// Copyright 2017 PDFium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com #include "core/fpdfapi/font/cpdf_tounicodemap.h" #include "core/fpdfapi/cpdf_modulemgr.h" #include "core/fpdfapi/font/cpdf_cid2unicodemap.h" #include "core/fpdfapi/page/cpdf_pagemodule.h" #include "core/fpdfapi/parser/cpdf_simple_parser.h" #include "core/fxcrt/fx_extension.h" #include "core/fxcrt/fx_safe_types.h" #include "third_party/base/numerics/safe_conversions.h" WideString CPDF_ToUnicodeMap::Lookup(uint32_t charcode) const { auto it = m_Map.find(charcode); if (it != m_Map.end()) { uint32_t value = it->second; wchar_t unicode = (wchar_t)(value & 0xffff); if (unicode != 0xffff) { return unicode; } const wchar_t* buf = m_MultiCharBuf.GetBuffer(); uint32_t buf_len = m_MultiCharBuf.GetLength(); if (!buf || buf_len == 0) { return WideString(); } uint32_t index = value >> 16; if (index >= buf_len) { return WideString(); } uint32_t len = buf[index]; if (index + len < index || index + len >= buf_len) { return WideString(); } return WideString(buf + index + 1, len); } if (m_pBaseMap) { return m_pBaseMap->UnicodeFromCID((uint16_t)charcode); } return WideString(); } uint32_t CPDF_ToUnicodeMap::ReverseLookup(wchar_t unicode) const { for (const auto& pair : m_Map) { if (pair.second == static_cast(unicode)) return pair.first; } return 0; } // Static. uint32_t CPDF_ToUnicodeMap::StringToCode(const ByteStringView& str) { int len = str.GetLength(); if (len == 0) return 0; uint32_t result = 0; if (str[0] == '<') { for (int i = 1; i < len && std::isxdigit(str[i]); ++i) result = result * 16 + FXSYS_HexCharToInt(str.CharAt(i)); return result; } for (int i = 0; i < len && std::isdigit(str[i]); ++i) result = result * 10 + FXSYS_DecimalCharToInt(str.CharAt(i)); return result; } static WideString StringDataAdd(WideString str) { WideString ret; int len = str.GetLength(); wchar_t value = 1; for (int i = len - 1; i >= 0; --i) { wchar_t ch = str[i] + value; if (ch < str[i]) { ret.InsertAtFront(0); } else { ret.InsertAtFront(ch); value = 0; } } if (value) ret.InsertAtFront(value); return ret; } // Static. WideString CPDF_ToUnicodeMap::StringToWideString(const ByteStringView& str) { int len = str.GetLength(); if (len == 0) return WideString(); WideString result; if (str[0] == '<') { int byte_pos = 0; wchar_t ch = 0; for (int i = 1; i < len && std::isxdigit(str[i]); ++i) { ch = ch * 16 + FXSYS_HexCharToInt(str[i]); byte_pos++; if (byte_pos == 4) { result += ch; byte_pos = 0; ch = 0; } } return result; } return result; } CPDF_ToUnicodeMap::CPDF_ToUnicodeMap() : m_pBaseMap(nullptr) {} CPDF_ToUnicodeMap::~CPDF_ToUnicodeMap() {} uint32_t CPDF_ToUnicodeMap::GetUnicode() { FX_SAFE_UINT32 uni = m_MultiCharBuf.GetLength(); uni = uni * 0x10000 + 0xffff; return uni.ValueOrDefault(0); } void CPDF_ToUnicodeMap::Load(const CPDF_Stream* pStream) { CIDSet cid_set = CIDSET_UNKNOWN; auto pAcc = pdfium::MakeRetain(pStream); pAcc->LoadAllDataFiltered(); CPDF_SimpleParser parser(pAcc->GetSpan()); while (1) { ByteStringView word = parser.GetWord(); if (word.IsEmpty()) { break; } if (word == "beginbfchar") { while (1) { word = parser.GetWord(); if (word.IsEmpty() || word == "endbfchar") { break; } uint32_t srccode = StringToCode(word); word = parser.GetWord(); WideString destcode = StringToWideString(word); int len = destcode.GetLength(); if (len == 0) { continue; } if (len == 1) { m_Map[srccode] = destcode[0]; } else { m_Map[srccode] = GetUnicode(); m_MultiCharBuf.AppendChar(destcode.GetLength()); m_MultiCharBuf << destcode; } } } else if (word == "beginbfrange") { while (1) { ByteString low, high; low = parser.GetWord(); if (low.IsEmpty() || low == "endbfrange") { break; } high = parser.GetWord(); uint32_t lowcode = StringToCode(low.AsStringView()); uint32_t highcode = (lowcode & 0xffffff00) | (StringToCode(high.AsStringView()) & 0xff); if (highcode == (uint32_t)-1) { break; } ByteString start(parser.GetWord()); if (start == "[") { for (uint32_t code = lowcode; code <= highcode; code++) { ByteString dest(parser.GetWord()); WideString destcode = StringToWideString(dest.AsStringView()); int len = destcode.GetLength(); if (len == 0) { continue; } if (len == 1) { m_Map[code] = destcode[0]; } else { m_Map[code] = GetUnicode(); m_MultiCharBuf.AppendChar(destcode.GetLength()); m_MultiCharBuf << destcode; } } parser.GetWord(); } else { WideString destcode = StringToWideString(start.AsStringView()); int len = destcode.GetLength(); uint32_t value = 0; if (len == 1) { value = StringToCode(start.AsStringView()); for (uint32_t code = lowcode; code <= highcode; code++) { m_Map[code] = value++; } } else { for (uint32_t code = lowcode; code <= highcode; code++) { WideString retcode; if (code == lowcode) { retcode = destcode; } else { retcode = StringDataAdd(destcode); } m_Map[code] = GetUnicode(); m_MultiCharBuf.AppendChar(retcode.GetLength()); m_MultiCharBuf << retcode; destcode = retcode; } } } } } else if (word == "/Adobe-Korea1-UCS2") { cid_set = CIDSET_KOREA1; } else if (word == "/Adobe-Japan1-UCS2") { cid_set = CIDSET_JAPAN1; } else if (word == "/Adobe-CNS1-UCS2") { cid_set = CIDSET_CNS1; } else if (word == "/Adobe-GB1-UCS2") { cid_set = CIDSET_GB1; } } if (cid_set) { m_pBaseMap = CPDF_ModuleMgr::Get() ->GetPageModule() ->GetFontGlobals() ->GetCMapManager() ->GetCID2UnicodeMap(cid_set, false); } else { m_pBaseMap = nullptr; } }