summaryrefslogtreecommitdiff
path: root/core/fxcrt/fx_string.cpp
diff options
context:
space:
mode:
authorTom Sepez <tsepez@chromium.org>2018-08-09 21:32:47 +0000
committerChromium commit bot <commit-bot@chromium.org>2018-08-09 21:32:47 +0000
commitfd7cede17e027a83de2aff3bc0f5ee875271e444 (patch)
treec1576d19a8e8c99ebbcf9dba4e75ef4665e631cc /core/fxcrt/fx_string.cpp
parent60627d6eafd025dde711e532eee6866840c04bef (diff)
downloadpdfium-fd7cede17e027a83de2aff3bc0f5ee875271e444.tar.xz
Move all utf8 decoding under fx_string.h
Put encoder in cfx_utf8encoder.{h,cpp} to parallel decoder. Add tests, and fix one corner case involving 0xff. Change-Id: Ib97540afdc708bcc6280a79c76734ec68ea72690 Reviewed-on: https://pdfium-review.googlesource.com/39770 Commit-Queue: Lei Zhang <thestig@chromium.org> Reviewed-by: Lei Zhang <thestig@chromium.org>
Diffstat (limited to 'core/fxcrt/fx_string.cpp')
-rw-r--r--core/fxcrt/fx_string.cpp64
1 files changed, 13 insertions, 51 deletions
diff --git a/core/fxcrt/fx_string.cpp b/core/fxcrt/fx_string.cpp
index daf995560b..c9993f9ab8 100644
--- a/core/fxcrt/fx_string.cpp
+++ b/core/fxcrt/fx_string.cpp
@@ -7,61 +7,12 @@
#include <limits>
#include <vector>
+#include "core/fxcrt/cfx_utf8decoder.h"
+#include "core/fxcrt/cfx_utf8encoder.h"
#include "core/fxcrt/fx_extension.h"
#include "core/fxcrt/fx_string.h"
#include "third_party/base/compiler_specific.h"
-namespace {
-
-class CFX_UTF8Encoder {
- public:
- CFX_UTF8Encoder() {}
- ~CFX_UTF8Encoder() {}
-
- void Input(wchar_t unicodeAsWchar) {
- uint32_t unicode = static_cast<uint32_t>(unicodeAsWchar);
- if (unicode < 0x80) {
- m_Buffer.push_back(unicode);
- } else {
- if (unicode >= 0x80000000)
- return;
-
- int nbytes = 0;
- if (unicode < 0x800)
- nbytes = 2;
- else if (unicode < 0x10000)
- nbytes = 3;
- else if (unicode < 0x200000)
- nbytes = 4;
- else if (unicode < 0x4000000)
- nbytes = 5;
- else
- nbytes = 6;
-
- static const uint8_t prefix[] = {0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
- int order = 1 << ((nbytes - 1) * 6);
- int code = unicodeAsWchar;
- m_Buffer.push_back(prefix[nbytes - 2] | (code / order));
- for (int i = 0; i < nbytes - 1; i++) {
- code = code % order;
- order >>= 6;
- m_Buffer.push_back(0x80 | (code / order));
- }
- }
- }
-
- // The data returned by GetResult() is invalidated when this is modified by
- // appending any data.
- ByteStringView GetResult() const {
- return ByteStringView(m_Buffer.data(), m_Buffer.size());
- }
-
- private:
- std::vector<uint8_t> m_Buffer;
-};
-
-} // namespace
-
ByteString FX_UTF8Encode(const WideStringView& wsStr) {
size_t len = wsStr.GetLength();
const wchar_t* pStr = wsStr.unterminated_c_str();
@@ -72,6 +23,17 @@ ByteString FX_UTF8Encode(const WideStringView& wsStr) {
return ByteString(encoder.GetResult());
}
+WideString FX_UTF8Decode(const ByteStringView& bsStr) {
+ if (bsStr.IsEmpty())
+ return WideString();
+
+ CFX_UTF8Decoder decoder;
+ for (size_t i = 0; i < bsStr.GetLength(); i++)
+ decoder.Input(bsStr[i]);
+
+ return WideString(decoder.GetResult());
+}
+
namespace {
const float fraction_scales[] = {0.1f, 0.01f, 0.001f,