summaryrefslogtreecommitdiff
path: root/core/fpdfapi/font/cpdf_cmapparser.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'core/fpdfapi/font/cpdf_cmapparser.cpp')
-rw-r--r--core/fpdfapi/font/cpdf_cmapparser.cpp204
1 files changed, 204 insertions, 0 deletions
diff --git a/core/fpdfapi/font/cpdf_cmapparser.cpp b/core/fpdfapi/font/cpdf_cmapparser.cpp
new file mode 100644
index 0000000000..0f9bca6018
--- /dev/null
+++ b/core/fpdfapi/font/cpdf_cmapparser.cpp
@@ -0,0 +1,204 @@
+// Copyright 2014 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
+
+#include "core/fpdfapi/font/cpdf_cmapparser.h"
+
+#include <vector>
+
+#include "core/fpdfapi/cmaps/cmap_int.h"
+#include "core/fpdfapi/cpdf_modulemgr.h"
+#include "core/fpdfapi/page/cpdf_pagemodule.h"
+#include "core/fpdfapi/parser/cpdf_array.h"
+#include "core/fpdfapi/parser/cpdf_dictionary.h"
+#include "core/fpdfapi/parser/cpdf_simple_parser.h"
+#include "core/fxcrt/fx_extension.h"
+#include "core/fxge/fx_freetype.h"
+#include "third_party/base/logging.h"
+#include "third_party/base/stl_util.h"
+
+namespace {
+
+const char* const g_CharsetNames[CIDSET_NUM_SETS] = {nullptr, "GB1", "CNS1",
+ "Japan1", "Korea1", "UCS"};
+
+CIDSet CIDSetFromSizeT(size_t index) {
+ if (index >= CIDSET_NUM_SETS) {
+ NOTREACHED();
+ return CIDSET_UNKNOWN;
+ }
+ return static_cast<CIDSet>(index);
+}
+
+CFX_ByteStringC CMap_GetString(const CFX_ByteStringC& word) {
+ if (word.GetLength() <= 2)
+ return CFX_ByteStringC();
+ return CFX_ByteStringC(&word[1], word.GetLength() - 2);
+}
+
+} // namespace
+
+CPDF_CMapParser::CPDF_CMapParser(CPDF_CMap* pCMap)
+ : m_pCMap(pCMap), m_Status(0), m_CodeSeq(0) {}
+
+CPDF_CMapParser::~CPDF_CMapParser() {}
+
+void CPDF_CMapParser::ParseWord(const CFX_ByteStringC& word) {
+ if (word.IsEmpty()) {
+ return;
+ }
+ if (word == "begincidchar") {
+ m_Status = 1;
+ m_CodeSeq = 0;
+ } else if (word == "begincidrange") {
+ m_Status = 2;
+ m_CodeSeq = 0;
+ } else if (word == "endcidrange" || word == "endcidchar") {
+ m_Status = 0;
+ } else if (word == "/WMode") {
+ m_Status = 6;
+ } else if (word == "/Registry") {
+ m_Status = 3;
+ } else if (word == "/Ordering") {
+ m_Status = 4;
+ } else if (word == "/Supplement") {
+ m_Status = 5;
+ } else if (word == "begincodespacerange") {
+ m_Status = 7;
+ m_CodeSeq = 0;
+ } else if (word == "usecmap") {
+ } else if (m_Status == 1 || m_Status == 2) {
+ m_CodePoints[m_CodeSeq] = CMap_GetCode(word);
+ m_CodeSeq++;
+ uint32_t StartCode, EndCode;
+ uint16_t StartCID;
+ if (m_Status == 1) {
+ if (m_CodeSeq < 2) {
+ return;
+ }
+ EndCode = StartCode = m_CodePoints[0];
+ StartCID = (uint16_t)m_CodePoints[1];
+ } else {
+ if (m_CodeSeq < 3) {
+ return;
+ }
+ StartCode = m_CodePoints[0];
+ EndCode = m_CodePoints[1];
+ StartCID = (uint16_t)m_CodePoints[2];
+ }
+ if (EndCode < 0x10000) {
+ for (uint32_t code = StartCode; code <= EndCode; code++) {
+ m_pCMap->m_DirectCharcodeToCIDTable[code] =
+ static_cast<uint16_t>(StartCID + code - StartCode);
+ }
+ } else {
+ m_AdditionalCharcodeToCIDMappings.push_back(
+ {StartCode, EndCode, StartCID});
+ }
+ m_CodeSeq = 0;
+ } else if (m_Status == 3) {
+ m_Status = 0;
+ } else if (m_Status == 4) {
+ m_pCMap->m_Charset = CharsetFromOrdering(CMap_GetString(word));
+ m_Status = 0;
+ } else if (m_Status == 5) {
+ m_Status = 0;
+ } else if (m_Status == 6) {
+ m_pCMap->m_bVertical = CMap_GetCode(word) != 0;
+ m_Status = 0;
+ } else if (m_Status == 7) {
+ if (word == "endcodespacerange") {
+ uint32_t nSegs = pdfium::CollectionSize<uint32_t>(m_CodeRanges);
+ if (nSegs > 1) {
+ m_pCMap->m_CodingScheme = CPDF_CMap::MixedFourBytes;
+ m_pCMap->m_MixedFourByteLeadingRanges = m_CodeRanges;
+ } else if (nSegs == 1) {
+ m_pCMap->m_CodingScheme = (m_CodeRanges[0].m_CharSize == 2)
+ ? CPDF_CMap::TwoBytes
+ : CPDF_CMap::OneByte;
+ }
+ m_Status = 0;
+ } else {
+ if (word.GetLength() == 0 || word.GetAt(0) != '<') {
+ return;
+ }
+ if (m_CodeSeq % 2) {
+ CPDF_CMap::CodeRange range;
+ if (CMap_GetCodeRange(range, m_LastWord.AsStringC(), word))
+ m_CodeRanges.push_back(range);
+ }
+ m_CodeSeq++;
+ }
+ }
+ m_LastWord = word;
+}
+
+// Static.
+uint32_t CPDF_CMapParser::CMap_GetCode(const CFX_ByteStringC& word) {
+ pdfium::base::CheckedNumeric<uint32_t> num = 0;
+ if (word.GetAt(0) == '<') {
+ for (int i = 1; i < word.GetLength() && std::isxdigit(word.GetAt(i)); ++i) {
+ num = num * 16 + FXSYS_HexCharToInt(word.GetAt(i));
+ if (!num.IsValid())
+ return 0;
+ }
+ return num.ValueOrDie();
+ }
+
+ for (int i = 0; i < word.GetLength() && std::isdigit(word.GetAt(i)); ++i) {
+ num =
+ num * 10 + FXSYS_DecimalCharToInt(static_cast<wchar_t>(word.GetAt(i)));
+ if (!num.IsValid())
+ return 0;
+ }
+ return num.ValueOrDie();
+}
+
+// Static.
+bool CPDF_CMapParser::CMap_GetCodeRange(CPDF_CMap::CodeRange& range,
+ const CFX_ByteStringC& first,
+ const CFX_ByteStringC& second) {
+ if (first.GetLength() == 0 || first.GetAt(0) != '<')
+ return false;
+
+ int i;
+ for (i = 1; i < first.GetLength(); ++i) {
+ if (first.GetAt(i) == '>') {
+ break;
+ }
+ }
+ range.m_CharSize = (i - 1) / 2;
+ if (range.m_CharSize > 4)
+ return false;
+
+ for (i = 0; i < range.m_CharSize; ++i) {
+ uint8_t digit1 = first.GetAt(i * 2 + 1);
+ uint8_t digit2 = first.GetAt(i * 2 + 2);
+ range.m_Lower[i] =
+ FXSYS_HexCharToInt(digit1) * 16 + FXSYS_HexCharToInt(digit2);
+ }
+
+ uint32_t size = second.GetLength();
+ for (i = 0; i < range.m_CharSize; ++i) {
+ uint8_t digit1 = ((uint32_t)i * 2 + 1 < size)
+ ? second.GetAt((FX_STRSIZE)i * 2 + 1)
+ : '0';
+ uint8_t digit2 = ((uint32_t)i * 2 + 2 < size)
+ ? second.GetAt((FX_STRSIZE)i * 2 + 2)
+ : '0';
+ range.m_Upper[i] =
+ FXSYS_HexCharToInt(digit1) * 16 + FXSYS_HexCharToInt(digit2);
+ }
+ return true;
+}
+
+// static
+CIDSet CPDF_CMapParser::CharsetFromOrdering(const CFX_ByteStringC& ordering) {
+ for (size_t charset = 1; charset < FX_ArraySize(g_CharsetNames); ++charset) {
+ if (ordering == g_CharsetNames[charset])
+ return CIDSetFromSizeT(charset);
+ }
+ return CIDSET_UNKNOWN;
+}