From ab20404199d3897f753861413014be28d259a6cc Mon Sep 17 00:00:00 2001
From: Tom Sepez <tsepez@chromium.org>
Date: Fri, 5 May 2017 14:31:12 -0700
Subject: Split out CPDF_CMap::m_pLeading into separate members.

Avoid casting willy-nill over uint8_t*s, since there's at
least one place where the wrong cast could be applied.
Use std::vector<> as well.
Use ByteRange struct to avoid the seg[i * 2 + 1] anti-pattern.

Change-Id: I9278ccc3a545b2640f0daf101a0c8b1d5c5564f8
Reviewed-on: https://pdfium-review.googlesource.com/5012
Commit-Queue: Tom Sepez <tsepez@chromium.org>
Reviewed-by: Lei Zhang <thestig@chromium.org>
---
 core/fpdfapi/font/font_int.h        |   8 +-
 core/fpdfapi/font/fpdf_font_cid.cpp | 202 +++++++++++++++++-------------------
 2 files changed, 98 insertions(+), 112 deletions(-)

(limited to 'core')
diff --git a/core/fpdfapi/font/font_int.h b/core/fpdfapi/font/font_int.h
index 72c892fbbd..f434f2b41d 100644
--- a/core/fpdfapi/font/font_int.h
+++ b/core/fpdfapi/font/font_int.h
@@ -81,8 +81,8 @@ class CPDF_CMap : public CFX_Retainable {
                       bool bPromptCJK);
   void LoadEmbedded(const uint8_t* pData, uint32_t dwSize);
 
-  bool IsLoaded() const;
-  bool IsVertWriting() const;
+  bool IsLoaded() const { return m_bLoaded; }
+  bool IsVertWriting() const { return m_bVertical; }
   uint16_t CIDFromCharCode(uint32_t charcode) const;
   int GetCharSize(uint32_t charcode) const;
   uint32_t GetNextChar(const char* pString, int nStrLen, int& offset) const;
@@ -102,8 +102,8 @@ class CPDF_CMap : public CFX_Retainable {
   CIDSet m_Charset;
   CodingScheme m_CodingScheme;
   int m_Coding;
-  int m_nCodeRanges;
-  uint8_t* m_pLeadingBytes;
+  std::vector<bool> m_MixedTwoByteLeadingBytes;
+  std::vector<CodeRange> m_MixedFourByteLeadingRanges;
   std::vector<uint16_t> m_DirectCharcodeToCIDTable;
   std::vector<CIDRange> m_AdditionalCharcodeToCIDMappings;
   const FXCMAP_CMap* m_pEmbedMap;
diff --git a/core/fpdfapi/font/fpdf_font_cid.cpp b/core/fpdfapi/font/fpdf_font_cid.cpp
index 6ac9587e8f..e45154600b 100644
--- a/core/fpdfapi/font/fpdf_font_cid.cpp
+++ b/core/fpdfapi/font/fpdf_font_cid.cpp
@@ -26,54 +26,57 @@ namespace {
 
 const char* const g_CharsetNames[CIDSET_NUM_SETS] = {nullptr,  "GB1",    "CNS1",
                                                      "Japan1", "Korea1", "UCS"};
+struct ByteRange {
+  uint8_t m_First;
+  uint8_t m_Last;  // Inclusive.
+};
 
-class CPDF_PredefinedCMap {
- public:
+struct PredefinedCMap {
   const char* m_pName;
   CIDSet m_Charset;
   CIDCoding m_Coding;
   CPDF_CMap::CodingScheme m_CodingScheme;
   uint8_t m_LeadingSegCount;
-  uint8_t m_LeadingSegs[4];
+  ByteRange m_LeadingSegs[2];
 };
 
-const CPDF_PredefinedCMap g_PredefinedCMaps[] = {
+const PredefinedCMap g_PredefinedCMaps[] = {
     {"GB-EUC",
      CIDSET_GB1,
      CIDCODING_GB,
      CPDF_CMap::MixedTwoBytes,
      1,
-     {0xa1, 0xfe}},
+     {{0xa1, 0xfe}}},
     {"GBpc-EUC",
      CIDSET_GB1,
      CIDCODING_GB,
      CPDF_CMap::MixedTwoBytes,
      1,
-     {0xa1, 0xfc}},
+     {{0xa1, 0xfc}}},
     {"GBK-EUC",
      CIDSET_GB1,
      CIDCODING_GB,
      CPDF_CMap::MixedTwoBytes,
      1,
-     {0x81, 0xfe}},
+     {{0x81, 0xfe}}},
     {"GBKp-EUC",
      CIDSET_GB1,
      CIDCODING_GB,
      CPDF_CMap::MixedTwoBytes,
      1,
-     {0x81, 0xfe}},
+     {{0x81, 0xfe}}},
     {"GBK2K-EUC",
      CIDSET_GB1,
      CIDCODING_GB,
      CPDF_CMap::MixedTwoBytes,
      1,
-     {0x81, 0xfe}},
+     {{0x81, 0xfe}}},
     {"GBK2K",
      CIDSET_GB1,
      CIDCODING_GB,
      CPDF_CMap::MixedTwoBytes,
      1,
-     {0x81, 0xfe}},
+     {{0x81, 0xfe}}},
     {"UniGB-UCS2", CIDSET_GB1, CIDCODING_UCS2, CPDF_CMap::TwoBytes, 0, {}},
     {"UniGB-UTF16", CIDSET_GB1, CIDCODING_UTF16, CPDF_CMap::TwoBytes, 0, {}},
     {"B5pc",
@@ -81,25 +84,25 @@ const CPDF_PredefinedCMap g_PredefinedCMaps[] = {
      CIDCODING_BIG5,
      CPDF_CMap::MixedTwoBytes,
      1,
-     {0xa1, 0xfc}},
+     {{0xa1, 0xfc}}},
     {"HKscs-B5",
      CIDSET_CNS1,
      CIDCODING_BIG5,
      CPDF_CMap::MixedTwoBytes,
      1,
-     {0x88, 0xfe}},
+     {{0x88, 0xfe}}},
     {"ETen-B5",
      CIDSET_CNS1,
      CIDCODING_BIG5,
      CPDF_CMap::MixedTwoBytes,
      1,
-     {0xa1, 0xfe}},
+     {{0xa1, 0xfe}}},
     {"ETenms-B5",
      CIDSET_CNS1,
      CIDCODING_BIG5,
      CPDF_CMap::MixedTwoBytes,
      1,
-     {0xa1, 0xfe}},
+     {{0xa1, 0xfe}}},
     {"UniCNS-UCS2", CIDSET_CNS1, CIDCODING_UCS2, CPDF_CMap::TwoBytes, 0, {}},
     {"UniCNS-UTF16", CIDSET_CNS1, CIDCODING_UTF16, CPDF_CMap::TwoBytes, 0, {}},
     {"83pv-RKSJ",
@@ -107,45 +110,45 @@ const CPDF_PredefinedCMap g_PredefinedCMaps[] = {
      CIDCODING_JIS,
      CPDF_CMap::MixedTwoBytes,
      2,
-     {0x81, 0x9f, 0xe0, 0xfc}},
+     {{0x81, 0x9f}, {0xe0, 0xfc}}},
     {"90ms-RKSJ",
      CIDSET_JAPAN1,
      CIDCODING_JIS,
      CPDF_CMap::MixedTwoBytes,
      2,
-     {0x81, 0x9f, 0xe0, 0xfc}},
+     {{0x81, 0x9f}, {0xe0, 0xfc}}},
     {"90msp-RKSJ",
      CIDSET_JAPAN1,
      CIDCODING_JIS,
      CPDF_CMap::MixedTwoBytes,
      2,
-     {0x81, 0x9f, 0xe0, 0xfc}},
+     {{0x81, 0x9f}, {0xe0, 0xfc}}},
     {"90pv-RKSJ",
      CIDSET_JAPAN1,
      CIDCODING_JIS,
      CPDF_CMap::MixedTwoBytes,
      2,
-     {0x81, 0x9f, 0xe0, 0xfc}},
+     {{0x81, 0x9f}, {0xe0, 0xfc}}},
     {"Add-RKSJ",
      CIDSET_JAPAN1,
      CIDCODING_JIS,
      CPDF_CMap::MixedTwoBytes,
      2,
-     {0x81, 0x9f, 0xe0, 0xfc}},
+     {{0x81, 0x9f}, {0xe0, 0xfc}}},
     {"EUC",
      CIDSET_JAPAN1,
      CIDCODING_JIS,
      CPDF_CMap::MixedTwoBytes,
      2,
-     {0x8e, 0x8e, 0xa1, 0xfe}},
-    {"H", CIDSET_JAPAN1, CIDCODING_JIS, CPDF_CMap::TwoBytes, 1, {0x21, 0x7e}},
-    {"V", CIDSET_JAPAN1, CIDCODING_JIS, CPDF_CMap::TwoBytes, 1, {0x21, 0x7e}},
+     {{0x8e, 0x8e}, {0xa1, 0xfe}}},
+    {"H", CIDSET_JAPAN1, CIDCODING_JIS, CPDF_CMap::TwoBytes, 1, {{0x21, 0x7e}}},
+    {"V", CIDSET_JAPAN1, CIDCODING_JIS, CPDF_CMap::TwoBytes, 1, {{0x21, 0x7e}}},
     {"Ext-RKSJ",
      CIDSET_JAPAN1,
      CIDCODING_JIS,
      CPDF_CMap::MixedTwoBytes,
      2,
-     {0x81, 0x9f, 0xe0, 0xfc}},
+     {{0x81, 0x9f}, {0xe0, 0xfc}}},
     {"UniJIS-UCS2", CIDSET_JAPAN1, CIDCODING_UCS2, CPDF_CMap::TwoBytes, 0, {}},
     {"UniJIS-UCS2-HW",
      CIDSET_JAPAN1,
@@ -164,25 +167,25 @@ const CPDF_PredefinedCMap g_PredefinedCMaps[] = {
      CIDCODING_KOREA,
      CPDF_CMap::MixedTwoBytes,
      1,
-     {0xa1, 0xfe}},
+     {{0xa1, 0xfe}}},
     {"KSCms-UHC",
      CIDSET_KOREA1,
      CIDCODING_KOREA,
      CPDF_CMap::MixedTwoBytes,
      1,
-     {0x81, 0xfe}},
+     {{0x81, 0xfe}}},
     {"KSCms-UHC-HW",
      CIDSET_KOREA1,
      CIDCODING_KOREA,
      CPDF_CMap::MixedTwoBytes,
      1,
-     {0x81, 0xfe}},
+     {{0x81, 0xfe}}},
     {"KSCpc-EUC",
      CIDSET_KOREA1,
      CIDCODING_KOREA,
      CPDF_CMap::MixedTwoBytes,
      1,
-     {0xa1, 0xfd}},
+     {{0xa1, 0xfd}}},
     {"UniKS-UCS2", CIDSET_KOREA1, CIDCODING_UCS2, CPDF_CMap::TwoBytes, 0, {}},
     {"UniKS-UTF16", CIDSET_KOREA1, CIDCODING_UTF16, CPDF_CMap::TwoBytes, 0, {}},
 };
@@ -201,38 +204,35 @@ CFX_ByteStringC CMap_GetString(const CFX_ByteStringC& word) {
   return CFX_ByteStringC(&word[1], word.GetLength() - 2);
 }
 
-int CheckCodeRange(uint8_t* codes,
-                   int size,
-                   CPDF_CMap::CodeRange* pRanges,
-                   int nRanges) {
-  int iSeg = nRanges - 1;
+int CheckFourByteCodeRange(uint8_t* codes,
+                           int size,
+                           const std::vector<CPDF_CMap::CodeRange>& ranges) {
+  int iSeg = pdfium::CollectionSize<int>(ranges) - 1;
   while (iSeg >= 0) {
-    if (pRanges[iSeg].m_CharSize < size) {
+    if (ranges[iSeg].m_CharSize < size) {
       --iSeg;
       continue;
     }
     int iChar = 0;
     while (iChar < size) {
-      if (codes[iChar] < pRanges[iSeg].m_Lower[iChar] ||
-          codes[iChar] > pRanges[iSeg].m_Upper[iChar]) {
+      if (codes[iChar] < ranges[iSeg].m_Lower[iChar] ||
+          codes[iChar] > ranges[iSeg].m_Upper[iChar]) {
         break;
       }
       ++iChar;
     }
-    if (iChar == pRanges[iSeg].m_CharSize)
+    if (iChar == ranges[iSeg].m_CharSize)
       return 2;
-
     if (iChar)
-      return (size == pRanges[iSeg].m_CharSize) ? 2 : 1;
+      return (size == ranges[iSeg].m_CharSize) ? 2 : 1;
     iSeg--;
   }
   return 0;
 }
 
-int GetCharSizeImpl(uint32_t charcode,
-                    CPDF_CMap::CodeRange* pRanges,
-                    int iRangesSize) {
-  if (!iRangesSize)
+int GetFourByteCharSizeImpl(uint32_t charcode,
+                            const std::vector<CPDF_CMap::CodeRange>& ranges) {
+  if (ranges.empty())
     return 1;
 
   uint8_t codes[4];
@@ -242,21 +242,21 @@ int GetCharSizeImpl(uint32_t charcode,
   int offset = 0;
   int size = 4;
   for (int i = 0; i < 4; ++i) {
-    int iSeg = iRangesSize - 1;
+    int iSeg = pdfium::CollectionSize<int>(ranges) - 1;
     while (iSeg >= 0) {
-      if (pRanges[iSeg].m_CharSize < size) {
+      if (ranges[iSeg].m_CharSize < size) {
         --iSeg;
         continue;
       }
       int iChar = 0;
       while (iChar < size) {
-        if (codes[offset + iChar] < pRanges[iSeg].m_Lower[iChar] ||
-            codes[offset + iChar] > pRanges[iSeg].m_Upper[iChar]) {
+        if (codes[offset + iChar] < ranges[iSeg].m_Lower[iChar] ||
+            codes[offset + iChar] > ranges[iSeg].m_Upper[iChar]) {
           break;
         }
         ++iChar;
       }
-      if (iChar == pRanges[iSeg].m_CharSize)
+      if (iChar == ranges[iSeg].m_CharSize)
         return size;
       --iSeg;
     }
@@ -387,12 +387,7 @@ void CPDF_CMapParser::ParseWord(const CFX_ByteStringC& word) {
       uint32_t nSegs = pdfium::CollectionSize<uint32_t>(m_CodeRanges);
       if (nSegs > 1) {
         m_pCMap->m_CodingScheme = CPDF_CMap::MixedFourBytes;
-        m_pCMap->m_nCodeRanges = nSegs;
-        FX_Free(m_pCMap->m_pLeadingBytes);
-        m_pCMap->m_pLeadingBytes =
-            FX_Alloc2D(uint8_t, nSegs, sizeof(CPDF_CMap::CodeRange));
-        memcpy(m_pCMap->m_pLeadingBytes, m_CodeRanges.data(),
-               nSegs * sizeof(CPDF_CMap::CodeRange));
+        m_pCMap->m_MixedFourByteLeadingRanges = m_CodeRanges;
       } else if (nSegs == 1) {
         m_pCMap->m_CodingScheme = (m_CodeRanges[0].m_CharSize == 2)
                                       ? CPDF_CMap::TwoBytes
@@ -479,21 +474,9 @@ CPDF_CMap::CPDF_CMap()
       m_Charset(CIDSET_UNKNOWN),
       m_CodingScheme(TwoBytes),
       m_Coding(CIDCODING_UNKNOWN),
-      m_nCodeRanges(0),
-      m_pLeadingBytes(nullptr),
       m_pEmbedMap(nullptr) {}
 
-CPDF_CMap::~CPDF_CMap() {
-  FX_Free(m_pLeadingBytes);
-}
-
-bool CPDF_CMap::IsLoaded() const {
-  return m_bLoaded;
-}
-
-bool CPDF_CMap::IsVertWriting() const {
-  return m_bVertical;
-}
+CPDF_CMap::~CPDF_CMap() {}
 
 void CPDF_CMap::LoadPredefined(CPDF_CMapManager* pMgr,
                                const CFX_ByteString& bsName,
@@ -510,7 +493,7 @@ void CPDF_CMap::LoadPredefined(CPDF_CMapManager* pMgr,
   if (cmapid.GetLength() > 2) {
     cmapid = cmapid.Left(cmapid.GetLength() - 2);
   }
-  const CPDF_PredefinedCMap* map = nullptr;
+  const PredefinedCMap* map = nullptr;
   for (size_t i = 0; i < FX_ArraySize(g_PredefinedCMaps); ++i) {
     if (cmapid == CFX_ByteStringC(g_PredefinedCMaps[i].m_pName)) {
       map = &g_PredefinedCMaps[i];
@@ -524,12 +507,11 @@ void CPDF_CMap::LoadPredefined(CPDF_CMapManager* pMgr,
   m_Coding = map->m_Coding;
   m_CodingScheme = map->m_CodingScheme;
   if (m_CodingScheme == MixedTwoBytes) {
-    m_pLeadingBytes = FX_Alloc(uint8_t, 256);
+    m_MixedTwoByteLeadingBytes = std::vector<bool>(256);
     for (uint32_t i = 0; i < map->m_LeadingSegCount; ++i) {
-      const uint8_t* segs = map->m_LeadingSegs;
-      for (int b = segs[i * 2]; b <= segs[i * 2 + 1]; ++b) {
-        m_pLeadingBytes[b] = 1;
-      }
+      const ByteRange& seg = map->m_LeadingSegs[i];
+      for (int b = seg.m_First; b <= seg.m_Last; ++b)
+        m_MixedTwoByteLeadingBytes[b] = true;
     }
   }
   FPDFAPI_FindEmbeddedCMap(bsName, m_Charset, m_Coding, m_pEmbedMap);
@@ -589,48 +571,46 @@ uint16_t CPDF_CMap::CIDFromCharCode(uint32_t charcode) const {
 uint32_t CPDF_CMap::GetNextChar(const char* pString,
                                 int nStrLen,
                                 int& offset) const {
+  auto* pBytes = reinterpret_cast<const uint8_t*>(pString);
   switch (m_CodingScheme) {
-    case OneByte:
-      return ((uint8_t*)pString)[offset++];
-    case TwoBytes:
-      offset += 2;
-      return ((uint8_t*)pString)[offset - 2] * 256 +
-             ((uint8_t*)pString)[offset - 1];
+    case OneByte: {
+      return pBytes[offset++];
+    }
+    case TwoBytes: {
+      uint8_t byte1 = pBytes[offset++];
+      return 256 * byte1 + pBytes[offset++];
+    }
     case MixedTwoBytes: {
-      uint8_t byte1 = ((uint8_t*)pString)[offset++];
-      if (!m_pLeadingBytes[byte1]) {
+      uint8_t byte1 = pBytes[offset++];
+      if (!m_MixedTwoByteLeadingBytes[byte1])
         return byte1;
-      }
-      uint8_t byte2 = ((uint8_t*)pString)[offset++];
-      return byte1 * 256 + byte2;
+      return 256 * byte1 + pBytes[offset++];
     }
     case MixedFourBytes: {
       uint8_t codes[4];
       int char_size = 1;
-      codes[0] = ((uint8_t*)pString)[offset++];
-      auto* pRanges = reinterpret_cast<CPDF_CMap::CodeRange*>(m_pLeadingBytes);
+      codes[0] = pBytes[offset++];
       while (1) {
-        int ret = CheckCodeRange(codes, char_size, pRanges, m_nCodeRanges);
-        if (ret == 0) {
+        int ret = CheckFourByteCodeRange(codes, char_size,
+                                         m_MixedFourByteLeadingRanges);
+        if (ret == 0)
           return 0;
-        }
         if (ret == 2) {
           uint32_t charcode = 0;
-          for (int i = 0; i < char_size; i++) {
+          for (int i = 0; i < char_size; i++)
             charcode = (charcode << 8) + codes[i];
-          }
           return charcode;
         }
-        if (char_size == 4 || offset == nStrLen) {
+        if (char_size == 4 || offset == nStrLen)
           return 0;
-        }
-        codes[char_size++] = ((uint8_t*)pString)[offset++];
+        codes[char_size++] = pBytes[offset++];
       }
       break;
     }
   }
   return 0;
 }
+
 int CPDF_CMap::GetCharSize(uint32_t charcode) const {
   switch (m_CodingScheme) {
     case OneByte:
@@ -638,20 +618,21 @@ int CPDF_CMap::GetCharSize(uint32_t charcode) const {
     case TwoBytes:
       return 2;
     case MixedTwoBytes:
+      if (charcode < 0x100)
+        return 1;
+      return 2;
     case MixedFourBytes:
-      if (charcode < 0x100) {
+      if (charcode < 0x100)
         return 1;
-      }
-      if (charcode < 0x10000) {
+      if (charcode < 0x10000)
         return 2;
-      }
-      if (charcode < 0x1000000) {
+      if (charcode < 0x1000000)
         return 3;
-      }
       return 4;
   }
   return 1;
 }
+
 int CPDF_CMap::CountChar(const char* pString, int size) const {
   switch (m_CodingScheme) {
     case OneByte:
@@ -662,7 +643,8 @@ int CPDF_CMap::CountChar(const char* pString, int size) const {
       int count = 0;
       for (int i = 0; i < size; i++) {
         count++;
-        if (m_pLeadingBytes[((uint8_t*)pString)[i]]) {
+        if (m_MixedTwoByteLeadingBytes[reinterpret_cast<const uint8_t*>(
+                pString)[i]]) {
           i++;
         }
       }
@@ -690,18 +672,22 @@ int CPDF_CMap::AppendChar(char* str, uint32_t charcode) const {
       str[1] = (uint8_t)(charcode % 256);
       return 2;
     case MixedTwoBytes:
+      if (charcode < 0x100 && !m_MixedTwoByteLeadingBytes[(uint8_t)charcode]) {
+        str[0] = (uint8_t)charcode;
+        return 1;
+      }
+      str[0] = (uint8_t)(charcode >> 8);
+      str[1] = (uint8_t)charcode;
+      return 2;
     case MixedFourBytes:
       if (charcode < 0x100) {
-        auto* pRanges =
-            reinterpret_cast<CPDF_CMap::CodeRange*>(m_pLeadingBytes);
-        int iSize = GetCharSizeImpl(charcode, pRanges, m_nCodeRanges);
-        if (iSize == 0) {
+        int iSize =
+            GetFourByteCharSizeImpl(charcode, m_MixedFourByteLeadingRanges);
+        if (iSize == 0)
           iSize = 1;
-        }
-        if (iSize > 1) {
-          memset(str, 0, sizeof(uint8_t) * iSize);
-        }
         str[iSize - 1] = (uint8_t)charcode;
+        if (iSize > 1)
+          memset(str + 1, 0, iSize - 1);
         return iSize;
       }
       if (charcode < 0x10000) {
-- 
cgit v1.2.3