diff options
author | Tom Sepez <tsepez@chromium.org> | 2018-08-09 21:32:47 +0000 |
---|---|---|
committer | Chromium commit bot <commit-bot@chromium.org> | 2018-08-09 21:32:47 +0000 |
commit | fd7cede17e027a83de2aff3bc0f5ee875271e444 (patch) | |
tree | c1576d19a8e8c99ebbcf9dba4e75ef4665e631cc /core/fxcrt/cfx_utf8encoder.cpp | |
parent | 60627d6eafd025dde711e532eee6866840c04bef (diff) | |
download | pdfium-fd7cede17e027a83de2aff3bc0f5ee875271e444.tar.xz |
Move all utf8 decoding under fx_string.h
Put encoder in cfx_utf8encoder.{h,cpp} to parallel decoder.
Add tests, and fix one corner case involving 0xff.
Change-Id: Ib97540afdc708bcc6280a79c76734ec68ea72690
Reviewed-on: https://pdfium-review.googlesource.com/39770
Commit-Queue: Lei Zhang <thestig@chromium.org>
Reviewed-by: Lei Zhang <thestig@chromium.org>
Diffstat (limited to 'core/fxcrt/cfx_utf8encoder.cpp')
-rw-r--r-- | core/fxcrt/cfx_utf8encoder.cpp | 43 |
1 files changed, 43 insertions, 0 deletions
diff --git a/core/fxcrt/cfx_utf8encoder.cpp b/core/fxcrt/cfx_utf8encoder.cpp new file mode 100644 index 0000000000..9ed149f1ad --- /dev/null +++ b/core/fxcrt/cfx_utf8encoder.cpp @@ -0,0 +1,43 @@ +// Copyright 2018 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#include "core/fxcrt/cfx_utf8encoder.h" + +CFX_UTF8Encoder::CFX_UTF8Encoder() = default; + +CFX_UTF8Encoder::~CFX_UTF8Encoder() = default; + +void CFX_UTF8Encoder::Input(wchar_t unicodeAsWchar) { + uint32_t unicode = static_cast<uint32_t>(unicodeAsWchar); + if (unicode < 0x80) { + m_Buffer.push_back(unicode); + } else { + if (unicode >= 0x80000000) + return; + + int nbytes = 0; + if (unicode < 0x800) + nbytes = 2; + else if (unicode < 0x10000) + nbytes = 3; + else if (unicode < 0x200000) + nbytes = 4; + else if (unicode < 0x4000000) + nbytes = 5; + else + nbytes = 6; + + static const uint8_t prefix[] = {0xc0, 0xe0, 0xf0, 0xf8, 0xfc}; + int order = 1 << ((nbytes - 1) * 6); + int code = unicodeAsWchar; + m_Buffer.push_back(prefix[nbytes - 2] | (code / order)); + for (int i = 0; i < nbytes - 1; i++) { + code = code % order; + order >>= 6; + m_Buffer.push_back(0x80 | (code / order)); + } + } +} |