summaryrefslogtreecommitdiff
path: root/core/fpdfapi/font/cpdf_tounicodemap.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'core/fpdfapi/font/cpdf_tounicodemap.cpp')
-rw-r--r--core/fpdfapi/font/cpdf_tounicodemap.cpp233
1 files changed, 233 insertions, 0 deletions
diff --git a/core/fpdfapi/font/cpdf_tounicodemap.cpp b/core/fpdfapi/font/cpdf_tounicodemap.cpp
new file mode 100644
index 0000000000..8989f0be37
--- /dev/null
+++ b/core/fpdfapi/font/cpdf_tounicodemap.cpp
@@ -0,0 +1,233 @@
+// Copyright 2017 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
+
+#include "core/fpdfapi/font/cpdf_tounicodemap.h"
+
+#include "core/fpdfapi/cpdf_modulemgr.h"
+#include "core/fpdfapi/font/cpdf_cid2unicodemap.h"
+#include "core/fpdfapi/page/cpdf_pagemodule.h"
+#include "core/fpdfapi/parser/cpdf_simple_parser.h"
+#include "core/fxcrt/fx_extension.h"
+#include "core/fxcrt/fx_safe_types.h"
+#include "third_party/base/numerics/safe_conversions.h"
+
+CFX_WideString CPDF_ToUnicodeMap::Lookup(uint32_t charcode) const {
+ auto it = m_Map.find(charcode);
+ if (it != m_Map.end()) {
+ uint32_t value = it->second;
+ wchar_t unicode = (wchar_t)(value & 0xffff);
+ if (unicode != 0xffff) {
+ return unicode;
+ }
+ const wchar_t* buf = m_MultiCharBuf.GetBuffer();
+ uint32_t buf_len = m_MultiCharBuf.GetLength();
+ if (!buf || buf_len == 0) {
+ return CFX_WideString();
+ }
+ uint32_t index = value >> 16;
+ if (index >= buf_len) {
+ return CFX_WideString();
+ }
+ uint32_t len = buf[index];
+ if (index + len < index || index + len >= buf_len) {
+ return CFX_WideString();
+ }
+ return CFX_WideString(buf + index + 1, len);
+ }
+ if (m_pBaseMap) {
+ return m_pBaseMap->UnicodeFromCID((uint16_t)charcode);
+ }
+ return CFX_WideString();
+}
+
+uint32_t CPDF_ToUnicodeMap::ReverseLookup(wchar_t unicode) const {
+ for (const auto& pair : m_Map) {
+ if (pair.second == static_cast<uint32_t>(unicode))
+ return pair.first;
+ }
+ return 0;
+}
+
+// Static.
+uint32_t CPDF_ToUnicodeMap::StringToCode(const CFX_ByteStringC& str) {
+ int len = str.GetLength();
+ if (len == 0)
+ return 0;
+
+ uint32_t result = 0;
+ if (str[0] == '<') {
+ for (int i = 1; i < len && std::isxdigit(str[i]); ++i)
+ result = result * 16 + FXSYS_HexCharToInt(str.CharAt(i));
+ return result;
+ }
+
+ for (int i = 0; i < len && std::isdigit(str[i]); ++i)
+ result = result * 10 + FXSYS_DecimalCharToInt(str.CharAt(i));
+
+ return result;
+}
+
+static CFX_WideString StringDataAdd(CFX_WideString str) {
+ CFX_WideString ret;
+ int len = str.GetLength();
+ wchar_t value = 1;
+ for (int i = len - 1; i >= 0; --i) {
+ wchar_t ch = str[i] + value;
+ if (ch < str[i]) {
+ ret.Insert(0, 0);
+ } else {
+ ret.Insert(0, ch);
+ value = 0;
+ }
+ }
+ if (value) {
+ ret.Insert(0, value);
+ }
+ return ret;
+}
+
+// Static.
+CFX_WideString CPDF_ToUnicodeMap::StringToWideString(
+ const CFX_ByteStringC& str) {
+ int len = str.GetLength();
+ if (len == 0)
+ return CFX_WideString();
+
+ CFX_WideString result;
+ if (str[0] == '<') {
+ int byte_pos = 0;
+ wchar_t ch = 0;
+ for (int i = 1; i < len && std::isxdigit(str[i]); ++i) {
+ ch = ch * 16 + FXSYS_HexCharToInt(str[i]);
+ byte_pos++;
+ if (byte_pos == 4) {
+ result += ch;
+ byte_pos = 0;
+ ch = 0;
+ }
+ }
+ return result;
+ }
+ return result;
+}
+
+CPDF_ToUnicodeMap::CPDF_ToUnicodeMap() : m_pBaseMap(nullptr) {}
+
+CPDF_ToUnicodeMap::~CPDF_ToUnicodeMap() {}
+
+uint32_t CPDF_ToUnicodeMap::GetUnicode() {
+ FX_SAFE_UINT32 uni = m_MultiCharBuf.GetLength();
+ uni = uni * 0x10000 + 0xffff;
+ return uni.ValueOrDefault(0);
+}
+
+void CPDF_ToUnicodeMap::Load(CPDF_Stream* pStream) {
+ CIDSet cid_set = CIDSET_UNKNOWN;
+ auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(pStream);
+ pAcc->LoadAllData(false);
+ CPDF_SimpleParser parser(pAcc->GetData(), pAcc->GetSize());
+ while (1) {
+ CFX_ByteStringC word = parser.GetWord();
+ if (word.IsEmpty()) {
+ break;
+ }
+ if (word == "beginbfchar") {
+ while (1) {
+ word = parser.GetWord();
+ if (word.IsEmpty() || word == "endbfchar") {
+ break;
+ }
+ uint32_t srccode = StringToCode(word);
+ word = parser.GetWord();
+ CFX_WideString destcode = StringToWideString(word);
+ int len = destcode.GetLength();
+ if (len == 0) {
+ continue;
+ }
+ if (len == 1) {
+ m_Map[srccode] = destcode.GetAt(0);
+ } else {
+ m_Map[srccode] = GetUnicode();
+ m_MultiCharBuf.AppendChar(destcode.GetLength());
+ m_MultiCharBuf << destcode;
+ }
+ }
+ } else if (word == "beginbfrange") {
+ while (1) {
+ CFX_ByteString low, high;
+ low = parser.GetWord();
+ if (low.IsEmpty() || low == "endbfrange") {
+ break;
+ }
+ high = parser.GetWord();
+ uint32_t lowcode = StringToCode(low.AsStringC());
+ uint32_t highcode =
+ (lowcode & 0xffffff00) | (StringToCode(high.AsStringC()) & 0xff);
+ if (highcode == (uint32_t)-1) {
+ break;
+ }
+ CFX_ByteString start(parser.GetWord());
+ if (start == "[") {
+ for (uint32_t code = lowcode; code <= highcode; code++) {
+ CFX_ByteString dest(parser.GetWord());
+ CFX_WideString destcode = StringToWideString(dest.AsStringC());
+ int len = destcode.GetLength();
+ if (len == 0) {
+ continue;
+ }
+ if (len == 1) {
+ m_Map[code] = destcode.GetAt(0);
+ } else {
+ m_Map[code] = GetUnicode();
+ m_MultiCharBuf.AppendChar(destcode.GetLength());
+ m_MultiCharBuf << destcode;
+ }
+ }
+ parser.GetWord();
+ } else {
+ CFX_WideString destcode = StringToWideString(start.AsStringC());
+ int len = destcode.GetLength();
+ uint32_t value = 0;
+ if (len == 1) {
+ value = StringToCode(start.AsStringC());
+ for (uint32_t code = lowcode; code <= highcode; code++) {
+ m_Map[code] = value++;
+ }
+ } else {
+ for (uint32_t code = lowcode; code <= highcode; code++) {
+ CFX_WideString retcode;
+ if (code == lowcode) {
+ retcode = destcode;
+ } else {
+ retcode = StringDataAdd(destcode);
+ }
+ m_Map[code] = GetUnicode();
+ m_MultiCharBuf.AppendChar(retcode.GetLength());
+ m_MultiCharBuf << retcode;
+ destcode = retcode;
+ }
+ }
+ }
+ }
+ } else if (word == "/Adobe-Korea1-UCS2") {
+ cid_set = CIDSET_KOREA1;
+ } else if (word == "/Adobe-Japan1-UCS2") {
+ cid_set = CIDSET_JAPAN1;
+ } else if (word == "/Adobe-CNS1-UCS2") {
+ cid_set = CIDSET_CNS1;
+ } else if (word == "/Adobe-GB1-UCS2") {
+ cid_set = CIDSET_GB1;
+ }
+ }
+ if (cid_set) {
+ m_pBaseMap = CPDF_ModuleMgr::Get()
+ ->GetPageModule()
+ ->GetFontGlobals()
+ ->m_CMapManager.GetCID2UnicodeMap(cid_set, false);
+ } else {
+ m_pBaseMap = nullptr;
+ }
+}