diff options
author | Tom Sepez <tsepez@chromium.org> | 2018-08-09 21:32:47 +0000 |
---|---|---|
committer | Chromium commit bot <commit-bot@chromium.org> | 2018-08-09 21:32:47 +0000 |
commit | fd7cede17e027a83de2aff3bc0f5ee875271e444 (patch) | |
tree | c1576d19a8e8c99ebbcf9dba4e75ef4665e631cc /core/fxcrt/fx_string.cpp | |
parent | 60627d6eafd025dde711e532eee6866840c04bef (diff) | |
download | pdfium-fd7cede17e027a83de2aff3bc0f5ee875271e444.tar.xz |
Move all utf8 decoding under fx_string.h
Put encoder in cfx_utf8encoder.{h,cpp} to parallel decoder.
Add tests, and fix one corner case involving 0xff.
Change-Id: Ib97540afdc708bcc6280a79c76734ec68ea72690
Reviewed-on: https://pdfium-review.googlesource.com/39770
Commit-Queue: Lei Zhang <thestig@chromium.org>
Reviewed-by: Lei Zhang <thestig@chromium.org>
Diffstat (limited to 'core/fxcrt/fx_string.cpp')
-rw-r--r-- | core/fxcrt/fx_string.cpp | 64 |
1 files changed, 13 insertions, 51 deletions
diff --git a/core/fxcrt/fx_string.cpp b/core/fxcrt/fx_string.cpp index daf995560b..c9993f9ab8 100644 --- a/core/fxcrt/fx_string.cpp +++ b/core/fxcrt/fx_string.cpp @@ -7,61 +7,12 @@ #include <limits> #include <vector> +#include "core/fxcrt/cfx_utf8decoder.h" +#include "core/fxcrt/cfx_utf8encoder.h" #include "core/fxcrt/fx_extension.h" #include "core/fxcrt/fx_string.h" #include "third_party/base/compiler_specific.h" -namespace { - -class CFX_UTF8Encoder { - public: - CFX_UTF8Encoder() {} - ~CFX_UTF8Encoder() {} - - void Input(wchar_t unicodeAsWchar) { - uint32_t unicode = static_cast<uint32_t>(unicodeAsWchar); - if (unicode < 0x80) { - m_Buffer.push_back(unicode); - } else { - if (unicode >= 0x80000000) - return; - - int nbytes = 0; - if (unicode < 0x800) - nbytes = 2; - else if (unicode < 0x10000) - nbytes = 3; - else if (unicode < 0x200000) - nbytes = 4; - else if (unicode < 0x4000000) - nbytes = 5; - else - nbytes = 6; - - static const uint8_t prefix[] = {0xc0, 0xe0, 0xf0, 0xf8, 0xfc}; - int order = 1 << ((nbytes - 1) * 6); - int code = unicodeAsWchar; - m_Buffer.push_back(prefix[nbytes - 2] | (code / order)); - for (int i = 0; i < nbytes - 1; i++) { - code = code % order; - order >>= 6; - m_Buffer.push_back(0x80 | (code / order)); - } - } - } - - // The data returned by GetResult() is invalidated when this is modified by - // appending any data. - ByteStringView GetResult() const { - return ByteStringView(m_Buffer.data(), m_Buffer.size()); - } - - private: - std::vector<uint8_t> m_Buffer; -}; - -} // namespace - ByteString FX_UTF8Encode(const WideStringView& wsStr) { size_t len = wsStr.GetLength(); const wchar_t* pStr = wsStr.unterminated_c_str(); @@ -72,6 +23,17 @@ ByteString FX_UTF8Encode(const WideStringView& wsStr) { return ByteString(encoder.GetResult()); } +WideString FX_UTF8Decode(const ByteStringView& bsStr) { + if (bsStr.IsEmpty()) + return WideString(); + + CFX_UTF8Decoder decoder; + for (size_t i = 0; i < bsStr.GetLength(); i++) + decoder.Input(bsStr[i]); + + return WideString(decoder.GetResult()); +} + namespace { const float fraction_scales[] = {0.1f, 0.01f, 0.001f, |