From 169b30187bf5798a6106b5ab16288c9d86861f8b Mon Sep 17 00:00:00 2001
From: Nicolas Pena <npm@chromium.org>
Date: Fri, 26 May 2017 14:38:03 -0400
Subject: Use proper file names in core/fpdfapi/font
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This CL splits up font_int.h into files by classes. It also renames the
unittests to match the class being tested. Finally, it renames the ttgsubtable
files to match the class name.

Change-Id: I6187caa9e82d12b9a66e955113fe327d52042ae0
Reviewed-on: https://pdfium-review.googlesource.com/6090
Reviewed-by: Tom Sepez <tsepez@chromium.org>
Commit-Queue: Nicolás Peña <npm@chromium.org>
---
 core/fpdfapi/font/cpdf_cmap.cpp | 488 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 488 insertions(+)
 create mode 100644 core/fpdfapi/font/cpdf_cmap.cpp

(limited to 'core/fpdfapi/font/cpdf_cmap.cpp')
diff --git a/core/fpdfapi/font/cpdf_cmap.cpp b/core/fpdfapi/font/cpdf_cmap.cpp
new file mode 100644
index 0000000000..55f5ccc5c5
--- /dev/null
+++ b/core/fpdfapi/font/cpdf_cmap.cpp
@@ -0,0 +1,488 @@
+// Copyright 2017 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
+
+#include "core/fpdfapi/font/cpdf_cmap.h"
+
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "core/fpdfapi/cmaps/cmap_int.h"
+#include "core/fpdfapi/font/cpdf_cmapmanager.h"
+#include "core/fpdfapi/font/cpdf_cmapparser.h"
+#include "core/fpdfapi/parser/cpdf_simple_parser.h"
+
+namespace {
+
+struct ByteRange {
+  uint8_t m_First;
+  uint8_t m_Last;  // Inclusive.
+};
+
+struct PredefinedCMap {
+  const char* m_pName;
+  CIDSet m_Charset;
+  CIDCoding m_Coding;
+  CPDF_CMap::CodingScheme m_CodingScheme;
+  uint8_t m_LeadingSegCount;
+  ByteRange m_LeadingSegs[2];
+};
+
+const PredefinedCMap g_PredefinedCMaps[] = {
+    {"GB-EUC",
+     CIDSET_GB1,
+     CIDCODING_GB,
+     CPDF_CMap::MixedTwoBytes,
+     1,
+     {{0xa1, 0xfe}}},
+    {"GBpc-EUC",
+     CIDSET_GB1,
+     CIDCODING_GB,
+     CPDF_CMap::MixedTwoBytes,
+     1,
+     {{0xa1, 0xfc}}},
+    {"GBK-EUC",
+     CIDSET_GB1,
+     CIDCODING_GB,
+     CPDF_CMap::MixedTwoBytes,
+     1,
+     {{0x81, 0xfe}}},
+    {"GBKp-EUC",
+     CIDSET_GB1,
+     CIDCODING_GB,
+     CPDF_CMap::MixedTwoBytes,
+     1,
+     {{0x81, 0xfe}}},
+    {"GBK2K-EUC",
+     CIDSET_GB1,
+     CIDCODING_GB,
+     CPDF_CMap::MixedTwoBytes,
+     1,
+     {{0x81, 0xfe}}},
+    {"GBK2K",
+     CIDSET_GB1,
+     CIDCODING_GB,
+     CPDF_CMap::MixedTwoBytes,
+     1,
+     {{0x81, 0xfe}}},
+    {"UniGB-UCS2", CIDSET_GB1, CIDCODING_UCS2, CPDF_CMap::TwoBytes, 0, {}},
+    {"UniGB-UTF16", CIDSET_GB1, CIDCODING_UTF16, CPDF_CMap::TwoBytes, 0, {}},
+    {"B5pc",
+     CIDSET_CNS1,
+     CIDCODING_BIG5,
+     CPDF_CMap::MixedTwoBytes,
+     1,
+     {{0xa1, 0xfc}}},
+    {"HKscs-B5",
+     CIDSET_CNS1,
+     CIDCODING_BIG5,
+     CPDF_CMap::MixedTwoBytes,
+     1,
+     {{0x88, 0xfe}}},
+    {"ETen-B5",
+     CIDSET_CNS1,
+     CIDCODING_BIG5,
+     CPDF_CMap::MixedTwoBytes,
+     1,
+     {{0xa1, 0xfe}}},
+    {"ETenms-B5",
+     CIDSET_CNS1,
+     CIDCODING_BIG5,
+     CPDF_CMap::MixedTwoBytes,
+     1,
+     {{0xa1, 0xfe}}},
+    {"UniCNS-UCS2", CIDSET_CNS1, CIDCODING_UCS2, CPDF_CMap::TwoBytes, 0, {}},
+    {"UniCNS-UTF16", CIDSET_CNS1, CIDCODING_UTF16, CPDF_CMap::TwoBytes, 0, {}},
+    {"83pv-RKSJ",
+     CIDSET_JAPAN1,
+     CIDCODING_JIS,
+     CPDF_CMap::MixedTwoBytes,
+     2,
+     {{0x81, 0x9f}, {0xe0, 0xfc}}},
+    {"90ms-RKSJ",
+     CIDSET_JAPAN1,
+     CIDCODING_JIS,
+     CPDF_CMap::MixedTwoBytes,
+     2,
+     {{0x81, 0x9f}, {0xe0, 0xfc}}},
+    {"90msp-RKSJ",
+     CIDSET_JAPAN1,
+     CIDCODING_JIS,
+     CPDF_CMap::MixedTwoBytes,
+     2,
+     {{0x81, 0x9f}, {0xe0, 0xfc}}},
+    {"90pv-RKSJ",
+     CIDSET_JAPAN1,
+     CIDCODING_JIS,
+     CPDF_CMap::MixedTwoBytes,
+     2,
+     {{0x81, 0x9f}, {0xe0, 0xfc}}},
+    {"Add-RKSJ",
+     CIDSET_JAPAN1,
+     CIDCODING_JIS,
+     CPDF_CMap::MixedTwoBytes,
+     2,
+     {{0x81, 0x9f}, {0xe0, 0xfc}}},
+    {"EUC",
+     CIDSET_JAPAN1,
+     CIDCODING_JIS,
+     CPDF_CMap::MixedTwoBytes,
+     2,
+     {{0x8e, 0x8e}, {0xa1, 0xfe}}},
+    {"H", CIDSET_JAPAN1, CIDCODING_JIS, CPDF_CMap::TwoBytes, 1, {{0x21, 0x7e}}},
+    {"V", CIDSET_JAPAN1, CIDCODING_JIS, CPDF_CMap::TwoBytes, 1, {{0x21, 0x7e}}},
+    {"Ext-RKSJ",
+     CIDSET_JAPAN1,
+     CIDCODING_JIS,
+     CPDF_CMap::MixedTwoBytes,
+     2,
+     {{0x81, 0x9f}, {0xe0, 0xfc}}},
+    {"UniJIS-UCS2", CIDSET_JAPAN1, CIDCODING_UCS2, CPDF_CMap::TwoBytes, 0, {}},
+    {"UniJIS-UCS2-HW",
+     CIDSET_JAPAN1,
+     CIDCODING_UCS2,
+     CPDF_CMap::TwoBytes,
+     0,
+     {}},
+    {"UniJIS-UTF16",
+     CIDSET_JAPAN1,
+     CIDCODING_UTF16,
+     CPDF_CMap::TwoBytes,
+     0,
+     {}},
+    {"KSC-EUC",
+     CIDSET_KOREA1,
+     CIDCODING_KOREA,
+     CPDF_CMap::MixedTwoBytes,
+     1,
+     {{0xa1, 0xfe}}},
+    {"KSCms-UHC",
+     CIDSET_KOREA1,
+     CIDCODING_KOREA,
+     CPDF_CMap::MixedTwoBytes,
+     1,
+     {{0x81, 0xfe}}},
+    {"KSCms-UHC-HW",
+     CIDSET_KOREA1,
+     CIDCODING_KOREA,
+     CPDF_CMap::MixedTwoBytes,
+     1,
+     {{0x81, 0xfe}}},
+    {"KSCpc-EUC",
+     CIDSET_KOREA1,
+     CIDCODING_KOREA,
+     CPDF_CMap::MixedTwoBytes,
+     1,
+     {{0xa1, 0xfd}}},
+    {"UniKS-UCS2", CIDSET_KOREA1, CIDCODING_UCS2, CPDF_CMap::TwoBytes, 0, {}},
+    {"UniKS-UTF16", CIDSET_KOREA1, CIDCODING_UTF16, CPDF_CMap::TwoBytes, 0, {}},
+};
+
+int CheckFourByteCodeRange(uint8_t* codes,
+                           int size,
+                           const std::vector<CPDF_CMap::CodeRange>& ranges) {
+  int iSeg = pdfium::CollectionSize<int>(ranges) - 1;
+  while (iSeg >= 0) {
+    if (ranges[iSeg].m_CharSize < size) {
+      --iSeg;
+      continue;
+    }
+    int iChar = 0;
+    while (iChar < size) {
+      if (codes[iChar] < ranges[iSeg].m_Lower[iChar] ||
+          codes[iChar] > ranges[iSeg].m_Upper[iChar]) {
+        break;
+      }
+      ++iChar;
+    }
+    if (iChar == ranges[iSeg].m_CharSize)
+      return 2;
+    if (iChar)
+      return (size == ranges[iSeg].m_CharSize) ? 2 : 1;
+    iSeg--;
+  }
+  return 0;
+}
+
+int GetFourByteCharSizeImpl(uint32_t charcode,
+                            const std::vector<CPDF_CMap::CodeRange>& ranges) {
+  if (ranges.empty())
+    return 1;
+
+  uint8_t codes[4];
+  codes[0] = codes[1] = 0x00;
+  codes[2] = (uint8_t)(charcode >> 8 & 0xFF);
+  codes[3] = (uint8_t)charcode;
+  int offset = 0;
+  int size = 4;
+  for (int i = 0; i < 4; ++i) {
+    int iSeg = pdfium::CollectionSize<int>(ranges) - 1;
+    while (iSeg >= 0) {
+      if (ranges[iSeg].m_CharSize < size) {
+        --iSeg;
+        continue;
+      }
+      int iChar = 0;
+      while (iChar < size) {
+        if (codes[offset + iChar] < ranges[iSeg].m_Lower[iChar] ||
+            codes[offset + iChar] > ranges[iSeg].m_Upper[iChar]) {
+          break;
+        }
+        ++iChar;
+      }
+      if (iChar == ranges[iSeg].m_CharSize)
+        return size;
+      --iSeg;
+    }
+    --size;
+    ++offset;
+  }
+  return 1;
+}
+
+}  // namespace
+
+CPDF_CMap::CPDF_CMap()
+    : m_bLoaded(false),
+      m_bVertical(false),
+      m_Charset(CIDSET_UNKNOWN),
+      m_CodingScheme(TwoBytes),
+      m_Coding(CIDCODING_UNKNOWN),
+      m_pEmbedMap(nullptr) {}
+
+CPDF_CMap::~CPDF_CMap() {}
+
+void CPDF_CMap::LoadPredefined(CPDF_CMapManager* pMgr,
+                               const CFX_ByteString& bsName,
+                               bool bPromptCJK) {
+  m_PredefinedCMap = bsName;
+  if (m_PredefinedCMap == "Identity-H" || m_PredefinedCMap == "Identity-V") {
+    m_Coding = CIDCODING_CID;
+    m_bVertical = bsName[9] == 'V';
+    m_bLoaded = true;
+    return;
+  }
+  CFX_ByteString cmapid = m_PredefinedCMap;
+  m_bVertical = cmapid.Right(1) == "V";
+  if (cmapid.GetLength() > 2) {
+    cmapid = cmapid.Left(cmapid.GetLength() - 2);
+  }
+  const PredefinedCMap* map = nullptr;
+  for (size_t i = 0; i < FX_ArraySize(g_PredefinedCMaps); ++i) {
+    if (cmapid == CFX_ByteStringC(g_PredefinedCMaps[i].m_pName)) {
+      map = &g_PredefinedCMaps[i];
+      break;
+    }
+  }
+  if (!map)
+    return;
+
+  m_Charset = map->m_Charset;
+  m_Coding = map->m_Coding;
+  m_CodingScheme = map->m_CodingScheme;
+  if (m_CodingScheme == MixedTwoBytes) {
+    m_MixedTwoByteLeadingBytes = std::vector<bool>(256);
+    for (uint32_t i = 0; i < map->m_LeadingSegCount; ++i) {
+      const ByteRange& seg = map->m_LeadingSegs[i];
+      for (int b = seg.m_First; b <= seg.m_Last; ++b)
+        m_MixedTwoByteLeadingBytes[b] = true;
+    }
+  }
+  FPDFAPI_FindEmbeddedCMap(bsName, m_Charset, m_Coding, m_pEmbedMap);
+  if (!m_pEmbedMap)
+    return;
+
+  m_bLoaded = true;
+}
+
+void CPDF_CMap::LoadEmbedded(const uint8_t* pData, uint32_t size) {
+  m_DirectCharcodeToCIDTable = std::vector<uint16_t>(65536);
+  CPDF_CMapParser parser(this);
+  CPDF_SimpleParser syntax(pData, size);
+  while (1) {
+    CFX_ByteStringC word = syntax.GetWord();
+    if (word.IsEmpty()) {
+      break;
+    }
+    parser.ParseWord(word);
+  }
+  if (m_CodingScheme == MixedFourBytes && parser.HasAdditionalMappings()) {
+    m_AdditionalCharcodeToCIDMappings = parser.TakeAdditionalMappings();
+    std::sort(
+        m_AdditionalCharcodeToCIDMappings.begin(),
+        m_AdditionalCharcodeToCIDMappings.end(),
+        [](const CPDF_CMap::CIDRange& arg1, const CPDF_CMap::CIDRange& arg2) {
+          return arg1.m_EndCode < arg2.m_EndCode;
+        });
+  }
+}
+
+uint16_t CPDF_CMap::CIDFromCharCode(uint32_t charcode) const {
+  if (m_Coding == CIDCODING_CID)
+    return static_cast<uint16_t>(charcode);
+
+  if (m_pEmbedMap)
+    return FPDFAPI_CIDFromCharCode(m_pEmbedMap, charcode);
+
+  if (m_DirectCharcodeToCIDTable.empty())
+    return static_cast<uint16_t>(charcode);
+
+  if (charcode < 0x10000)
+    return m_DirectCharcodeToCIDTable[charcode];
+
+  auto it = std::lower_bound(m_AdditionalCharcodeToCIDMappings.begin(),
+                             m_AdditionalCharcodeToCIDMappings.end(), charcode,
+                             [](const CPDF_CMap::CIDRange& arg, uint32_t val) {
+                               return arg.m_EndCode < val;
+                             });
+  if (it == m_AdditionalCharcodeToCIDMappings.end() ||
+      it->m_StartCode > charcode) {
+    return 0;
+  }
+  return it->m_StartCID + charcode - it->m_StartCode;
+}
+
+uint32_t CPDF_CMap::GetNextChar(const char* pString,
+                                int nStrLen,
+                                int& offset) const {
+  auto* pBytes = reinterpret_cast<const uint8_t*>(pString);
+  switch (m_CodingScheme) {
+    case OneByte: {
+      return pBytes[offset++];
+    }
+    case TwoBytes: {
+      uint8_t byte1 = pBytes[offset++];
+      return 256 * byte1 + pBytes[offset++];
+    }
+    case MixedTwoBytes: {
+      uint8_t byte1 = pBytes[offset++];
+      if (!m_MixedTwoByteLeadingBytes[byte1])
+        return byte1;
+      return 256 * byte1 + pBytes[offset++];
+    }
+    case MixedFourBytes: {
+      uint8_t codes[4];
+      int char_size = 1;
+      codes[0] = pBytes[offset++];
+      while (1) {
+        int ret = CheckFourByteCodeRange(codes, char_size,
+                                         m_MixedFourByteLeadingRanges);
+        if (ret == 0)
+          return 0;
+        if (ret == 2) {
+          uint32_t charcode = 0;
+          for (int i = 0; i < char_size; i++)
+            charcode = (charcode << 8) + codes[i];
+          return charcode;
+        }
+        if (char_size == 4 || offset == nStrLen)
+          return 0;
+        codes[char_size++] = pBytes[offset++];
+      }
+      break;
+    }
+  }
+  return 0;
+}
+
+int CPDF_CMap::GetCharSize(uint32_t charcode) const {
+  switch (m_CodingScheme) {
+    case OneByte:
+      return 1;
+    case TwoBytes:
+      return 2;
+    case MixedTwoBytes:
+      if (charcode < 0x100)
+        return 1;
+      return 2;
+    case MixedFourBytes:
+      if (charcode < 0x100)
+        return 1;
+      if (charcode < 0x10000)
+        return 2;
+      if (charcode < 0x1000000)
+        return 3;
+      return 4;
+  }
+  return 1;
+}
+
+int CPDF_CMap::CountChar(const char* pString, int size) const {
+  switch (m_CodingScheme) {
+    case OneByte:
+      return size;
+    case TwoBytes:
+      return (size + 1) / 2;
+    case MixedTwoBytes: {
+      int count = 0;
+      for (int i = 0; i < size; i++) {
+        count++;
+        if (m_MixedTwoByteLeadingBytes[reinterpret_cast<const uint8_t*>(
+                pString)[i]]) {
+          i++;
+        }
+      }
+      return count;
+    }
+    case MixedFourBytes: {
+      int count = 0, offset = 0;
+      while (offset < size) {
+        GetNextChar(pString, size, offset);
+        count++;
+      }
+      return count;
+    }
+  }
+  return size;
+}
+
+int CPDF_CMap::AppendChar(char* str, uint32_t charcode) const {
+  switch (m_CodingScheme) {
+    case OneByte:
+      str[0] = (uint8_t)charcode;
+      return 1;
+    case TwoBytes:
+      str[0] = (uint8_t)(charcode / 256);
+      str[1] = (uint8_t)(charcode % 256);
+      return 2;
+    case MixedTwoBytes:
+      if (charcode < 0x100 && !m_MixedTwoByteLeadingBytes[(uint8_t)charcode]) {
+        str[0] = (uint8_t)charcode;
+        return 1;
+      }
+      str[0] = (uint8_t)(charcode >> 8);
+      str[1] = (uint8_t)charcode;
+      return 2;
+    case MixedFourBytes:
+      if (charcode < 0x100) {
+        int iSize =
+            GetFourByteCharSizeImpl(charcode, m_MixedFourByteLeadingRanges);
+        if (iSize == 0)
+          iSize = 1;
+        str[iSize - 1] = (uint8_t)charcode;
+        if (iSize > 1)
+          memset(str, 0, iSize - 1);
+        return iSize;
+      }
+      if (charcode < 0x10000) {
+        str[0] = (uint8_t)(charcode >> 8);
+        str[1] = (uint8_t)charcode;
+        return 2;
+      }
+      if (charcode < 0x1000000) {
+        str[0] = (uint8_t)(charcode >> 16);
+        str[1] = (uint8_t)(charcode >> 8);
+        str[2] = (uint8_t)charcode;
+        return 3;
+      }
+      str[0] = (uint8_t)(charcode >> 24);
+      str[1] = (uint8_t)(charcode >> 16);
+      str[2] = (uint8_t)(charcode >> 8);
+      str[3] = (uint8_t)charcode;
+      return 4;
+  }
+  return 0;
+}
-- 
cgit v1.2.3