summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Sepez <tsepez@chromium.org>2017-05-05 14:31:12 -0700
committerChromium commit bot <commit-bot@chromium.org>2017-05-05 22:09:54 +0000
commitab20404199d3897f753861413014be28d259a6cc (patch)
tree8a6701159298db3cbe4391d0ec12677478e2a215
parent60cc3baa05136bccf5d77e0b2cf3000817915bcd (diff)
downloadpdfium-ab20404199d3897f753861413014be28d259a6cc.tar.xz
Split out CPDF_CMap::m_pLeading into separate members.chromium/3093chromium/3092
Avoid casting willy-nill over uint8_t*s, since there's at least one place where the wrong cast could be applied. Use std::vector<> as well. Use ByteRange struct to avoid the seg[i * 2 + 1] anti-pattern. Change-Id: I9278ccc3a545b2640f0daf101a0c8b1d5c5564f8 Reviewed-on: https://pdfium-review.googlesource.com/5012 Commit-Queue: Tom Sepez <tsepez@chromium.org> Reviewed-by: Lei Zhang <thestig@chromium.org>
-rw-r--r--core/fpdfapi/font/font_int.h8
-rw-r--r--core/fpdfapi/font/fpdf_font_cid.cpp202
2 files changed, 98 insertions, 112 deletions
diff --git a/core/fpdfapi/font/font_int.h b/core/fpdfapi/font/font_int.h
index 72c892fbbd..f434f2b41d 100644
--- a/core/fpdfapi/font/font_int.h
+++ b/core/fpdfapi/font/font_int.h
@@ -81,8 +81,8 @@ class CPDF_CMap : public CFX_Retainable {
bool bPromptCJK);
void LoadEmbedded(const uint8_t* pData, uint32_t dwSize);
- bool IsLoaded() const;
- bool IsVertWriting() const;
+ bool IsLoaded() const { return m_bLoaded; }
+ bool IsVertWriting() const { return m_bVertical; }
uint16_t CIDFromCharCode(uint32_t charcode) const;
int GetCharSize(uint32_t charcode) const;
uint32_t GetNextChar(const char* pString, int nStrLen, int& offset) const;
@@ -102,8 +102,8 @@ class CPDF_CMap : public CFX_Retainable {
CIDSet m_Charset;
CodingScheme m_CodingScheme;
int m_Coding;
- int m_nCodeRanges;
- uint8_t* m_pLeadingBytes;
+ std::vector<bool> m_MixedTwoByteLeadingBytes;
+ std::vector<CodeRange> m_MixedFourByteLeadingRanges;
std::vector<uint16_t> m_DirectCharcodeToCIDTable;
std::vector<CIDRange> m_AdditionalCharcodeToCIDMappings;
const FXCMAP_CMap* m_pEmbedMap;
diff --git a/core/fpdfapi/font/fpdf_font_cid.cpp b/core/fpdfapi/font/fpdf_font_cid.cpp
index 6ac9587e8f..e45154600b 100644
--- a/core/fpdfapi/font/fpdf_font_cid.cpp
+++ b/core/fpdfapi/font/fpdf_font_cid.cpp
@@ -26,54 +26,57 @@ namespace {
const char* const g_CharsetNames[CIDSET_NUM_SETS] = {nullptr, "GB1", "CNS1",
"Japan1", "Korea1", "UCS"};
+struct ByteRange {
+ uint8_t m_First;
+ uint8_t m_Last; // Inclusive.
+};
-class CPDF_PredefinedCMap {
- public:
+struct PredefinedCMap {
const char* m_pName;
CIDSet m_Charset;
CIDCoding m_Coding;
CPDF_CMap::CodingScheme m_CodingScheme;
uint8_t m_LeadingSegCount;
- uint8_t m_LeadingSegs[4];
+ ByteRange m_LeadingSegs[2];
};
-const CPDF_PredefinedCMap g_PredefinedCMaps[] = {
+const PredefinedCMap g_PredefinedCMaps[] = {
{"GB-EUC",
CIDSET_GB1,
CIDCODING_GB,
CPDF_CMap::MixedTwoBytes,
1,
- {0xa1, 0xfe}},
+ {{0xa1, 0xfe}}},
{"GBpc-EUC",
CIDSET_GB1,
CIDCODING_GB,
CPDF_CMap::MixedTwoBytes,
1,
- {0xa1, 0xfc}},
+ {{0xa1, 0xfc}}},
{"GBK-EUC",
CIDSET_GB1,
CIDCODING_GB,
CPDF_CMap::MixedTwoBytes,
1,
- {0x81, 0xfe}},
+ {{0x81, 0xfe}}},
{"GBKp-EUC",
CIDSET_GB1,
CIDCODING_GB,
CPDF_CMap::MixedTwoBytes,
1,
- {0x81, 0xfe}},
+ {{0x81, 0xfe}}},
{"GBK2K-EUC",
CIDSET_GB1,
CIDCODING_GB,
CPDF_CMap::MixedTwoBytes,
1,
- {0x81, 0xfe}},
+ {{0x81, 0xfe}}},
{"GBK2K",
CIDSET_GB1,
CIDCODING_GB,
CPDF_CMap::MixedTwoBytes,
1,
- {0x81, 0xfe}},
+ {{0x81, 0xfe}}},
{"UniGB-UCS2", CIDSET_GB1, CIDCODING_UCS2, CPDF_CMap::TwoBytes, 0, {}},
{"UniGB-UTF16", CIDSET_GB1, CIDCODING_UTF16, CPDF_CMap::TwoBytes, 0, {}},
{"B5pc",
@@ -81,25 +84,25 @@ const CPDF_PredefinedCMap g_PredefinedCMaps[] = {
CIDCODING_BIG5,
CPDF_CMap::MixedTwoBytes,
1,
- {0xa1, 0xfc}},
+ {{0xa1, 0xfc}}},
{"HKscs-B5",
CIDSET_CNS1,
CIDCODING_BIG5,
CPDF_CMap::MixedTwoBytes,
1,
- {0x88, 0xfe}},
+ {{0x88, 0xfe}}},
{"ETen-B5",
CIDSET_CNS1,
CIDCODING_BIG5,
CPDF_CMap::MixedTwoBytes,
1,
- {0xa1, 0xfe}},
+ {{0xa1, 0xfe}}},
{"ETenms-B5",
CIDSET_CNS1,
CIDCODING_BIG5,
CPDF_CMap::MixedTwoBytes,
1,
- {0xa1, 0xfe}},
+ {{0xa1, 0xfe}}},
{"UniCNS-UCS2", CIDSET_CNS1, CIDCODING_UCS2, CPDF_CMap::TwoBytes, 0, {}},
{"UniCNS-UTF16", CIDSET_CNS1, CIDCODING_UTF16, CPDF_CMap::TwoBytes, 0, {}},
{"83pv-RKSJ",
@@ -107,45 +110,45 @@ const CPDF_PredefinedCMap g_PredefinedCMaps[] = {
CIDCODING_JIS,
CPDF_CMap::MixedTwoBytes,
2,
- {0x81, 0x9f, 0xe0, 0xfc}},
+ {{0x81, 0x9f}, {0xe0, 0xfc}}},
{"90ms-RKSJ",
CIDSET_JAPAN1,
CIDCODING_JIS,
CPDF_CMap::MixedTwoBytes,
2,
- {0x81, 0x9f, 0xe0, 0xfc}},
+ {{0x81, 0x9f}, {0xe0, 0xfc}}},
{"90msp-RKSJ",
CIDSET_JAPAN1,
CIDCODING_JIS,
CPDF_CMap::MixedTwoBytes,
2,
- {0x81, 0x9f, 0xe0, 0xfc}},
+ {{0x81, 0x9f}, {0xe0, 0xfc}}},
{"90pv-RKSJ",
CIDSET_JAPAN1,
CIDCODING_JIS,
CPDF_CMap::MixedTwoBytes,
2,
- {0x81, 0x9f, 0xe0, 0xfc}},
+ {{0x81, 0x9f}, {0xe0, 0xfc}}},
{"Add-RKSJ",
CIDSET_JAPAN1,
CIDCODING_JIS,
CPDF_CMap::MixedTwoBytes,
2,
- {0x81, 0x9f, 0xe0, 0xfc}},
+ {{0x81, 0x9f}, {0xe0, 0xfc}}},
{"EUC",
CIDSET_JAPAN1,
CIDCODING_JIS,
CPDF_CMap::MixedTwoBytes,
2,
- {0x8e, 0x8e, 0xa1, 0xfe}},
- {"H", CIDSET_JAPAN1, CIDCODING_JIS, CPDF_CMap::TwoBytes, 1, {0x21, 0x7e}},
- {"V", CIDSET_JAPAN1, CIDCODING_JIS, CPDF_CMap::TwoBytes, 1, {0x21, 0x7e}},
+ {{0x8e, 0x8e}, {0xa1, 0xfe}}},
+ {"H", CIDSET_JAPAN1, CIDCODING_JIS, CPDF_CMap::TwoBytes, 1, {{0x21, 0x7e}}},
+ {"V", CIDSET_JAPAN1, CIDCODING_JIS, CPDF_CMap::TwoBytes, 1, {{0x21, 0x7e}}},
{"Ext-RKSJ",
CIDSET_JAPAN1,
CIDCODING_JIS,
CPDF_CMap::MixedTwoBytes,
2,
- {0x81, 0x9f, 0xe0, 0xfc}},
+ {{0x81, 0x9f}, {0xe0, 0xfc}}},
{"UniJIS-UCS2", CIDSET_JAPAN1, CIDCODING_UCS2, CPDF_CMap::TwoBytes, 0, {}},
{"UniJIS-UCS2-HW",
CIDSET_JAPAN1,
@@ -164,25 +167,25 @@ const CPDF_PredefinedCMap g_PredefinedCMaps[] = {
CIDCODING_KOREA,
CPDF_CMap::MixedTwoBytes,
1,
- {0xa1, 0xfe}},
+ {{0xa1, 0xfe}}},
{"KSCms-UHC",
CIDSET_KOREA1,
CIDCODING_KOREA,
CPDF_CMap::MixedTwoBytes,
1,
- {0x81, 0xfe}},
+ {{0x81, 0xfe}}},
{"KSCms-UHC-HW",
CIDSET_KOREA1,
CIDCODING_KOREA,
CPDF_CMap::MixedTwoBytes,
1,
- {0x81, 0xfe}},
+ {{0x81, 0xfe}}},
{"KSCpc-EUC",
CIDSET_KOREA1,
CIDCODING_KOREA,
CPDF_CMap::MixedTwoBytes,
1,
- {0xa1, 0xfd}},
+ {{0xa1, 0xfd}}},
{"UniKS-UCS2", CIDSET_KOREA1, CIDCODING_UCS2, CPDF_CMap::TwoBytes, 0, {}},
{"UniKS-UTF16", CIDSET_KOREA1, CIDCODING_UTF16, CPDF_CMap::TwoBytes, 0, {}},
};
@@ -201,38 +204,35 @@ CFX_ByteStringC CMap_GetString(const CFX_ByteStringC& word) {
return CFX_ByteStringC(&word[1], word.GetLength() - 2);
}
-int CheckCodeRange(uint8_t* codes,
- int size,
- CPDF_CMap::CodeRange* pRanges,
- int nRanges) {
- int iSeg = nRanges - 1;
+int CheckFourByteCodeRange(uint8_t* codes,
+ int size,
+ const std::vector<CPDF_CMap::CodeRange>& ranges) {
+ int iSeg = pdfium::CollectionSize<int>(ranges) - 1;
while (iSeg >= 0) {
- if (pRanges[iSeg].m_CharSize < size) {
+ if (ranges[iSeg].m_CharSize < size) {
--iSeg;
continue;
}
int iChar = 0;
while (iChar < size) {
- if (codes[iChar] < pRanges[iSeg].m_Lower[iChar] ||
- codes[iChar] > pRanges[iSeg].m_Upper[iChar]) {
+ if (codes[iChar] < ranges[iSeg].m_Lower[iChar] ||
+ codes[iChar] > ranges[iSeg].m_Upper[iChar]) {
break;
}
++iChar;
}
- if (iChar == pRanges[iSeg].m_CharSize)
+ if (iChar == ranges[iSeg].m_CharSize)
return 2;
-
if (iChar)
- return (size == pRanges[iSeg].m_CharSize) ? 2 : 1;
+ return (size == ranges[iSeg].m_CharSize) ? 2 : 1;
iSeg--;
}
return 0;
}
-int GetCharSizeImpl(uint32_t charcode,
- CPDF_CMap::CodeRange* pRanges,
- int iRangesSize) {
- if (!iRangesSize)
+int GetFourByteCharSizeImpl(uint32_t charcode,
+ const std::vector<CPDF_CMap::CodeRange>& ranges) {
+ if (ranges.empty())
return 1;
uint8_t codes[4];
@@ -242,21 +242,21 @@ int GetCharSizeImpl(uint32_t charcode,
int offset = 0;
int size = 4;
for (int i = 0; i < 4; ++i) {
- int iSeg = iRangesSize - 1;
+ int iSeg = pdfium::CollectionSize<int>(ranges) - 1;
while (iSeg >= 0) {
- if (pRanges[iSeg].m_CharSize < size) {
+ if (ranges[iSeg].m_CharSize < size) {
--iSeg;
continue;
}
int iChar = 0;
while (iChar < size) {
- if (codes[offset + iChar] < pRanges[iSeg].m_Lower[iChar] ||
- codes[offset + iChar] > pRanges[iSeg].m_Upper[iChar]) {
+ if (codes[offset + iChar] < ranges[iSeg].m_Lower[iChar] ||
+ codes[offset + iChar] > ranges[iSeg].m_Upper[iChar]) {
break;
}
++iChar;
}
- if (iChar == pRanges[iSeg].m_CharSize)
+ if (iChar == ranges[iSeg].m_CharSize)
return size;
--iSeg;
}
@@ -387,12 +387,7 @@ void CPDF_CMapParser::ParseWord(const CFX_ByteStringC& word) {
uint32_t nSegs = pdfium::CollectionSize<uint32_t>(m_CodeRanges);
if (nSegs > 1) {
m_pCMap->m_CodingScheme = CPDF_CMap::MixedFourBytes;
- m_pCMap->m_nCodeRanges = nSegs;
- FX_Free(m_pCMap->m_pLeadingBytes);
- m_pCMap->m_pLeadingBytes =
- FX_Alloc2D(uint8_t, nSegs, sizeof(CPDF_CMap::CodeRange));
- memcpy(m_pCMap->m_pLeadingBytes, m_CodeRanges.data(),
- nSegs * sizeof(CPDF_CMap::CodeRange));
+ m_pCMap->m_MixedFourByteLeadingRanges = m_CodeRanges;
} else if (nSegs == 1) {
m_pCMap->m_CodingScheme = (m_CodeRanges[0].m_CharSize == 2)
? CPDF_CMap::TwoBytes
@@ -479,21 +474,9 @@ CPDF_CMap::CPDF_CMap()
m_Charset(CIDSET_UNKNOWN),
m_CodingScheme(TwoBytes),
m_Coding(CIDCODING_UNKNOWN),
- m_nCodeRanges(0),
- m_pLeadingBytes(nullptr),
m_pEmbedMap(nullptr) {}
-CPDF_CMap::~CPDF_CMap() {
- FX_Free(m_pLeadingBytes);
-}
-
-bool CPDF_CMap::IsLoaded() const {
- return m_bLoaded;
-}
-
-bool CPDF_CMap::IsVertWriting() const {
- return m_bVertical;
-}
+CPDF_CMap::~CPDF_CMap() {}
void CPDF_CMap::LoadPredefined(CPDF_CMapManager* pMgr,
const CFX_ByteString& bsName,
@@ -510,7 +493,7 @@ void CPDF_CMap::LoadPredefined(CPDF_CMapManager* pMgr,
if (cmapid.GetLength() > 2) {
cmapid = cmapid.Left(cmapid.GetLength() - 2);
}
- const CPDF_PredefinedCMap* map = nullptr;
+ const PredefinedCMap* map = nullptr;
for (size_t i = 0; i < FX_ArraySize(g_PredefinedCMaps); ++i) {
if (cmapid == CFX_ByteStringC(g_PredefinedCMaps[i].m_pName)) {
map = &g_PredefinedCMaps[i];
@@ -524,12 +507,11 @@ void CPDF_CMap::LoadPredefined(CPDF_CMapManager* pMgr,
m_Coding = map->m_Coding;
m_CodingScheme = map->m_CodingScheme;
if (m_CodingScheme == MixedTwoBytes) {
- m_pLeadingBytes = FX_Alloc(uint8_t, 256);
+ m_MixedTwoByteLeadingBytes = std::vector<bool>(256);
for (uint32_t i = 0; i < map->m_LeadingSegCount; ++i) {
- const uint8_t* segs = map->m_LeadingSegs;
- for (int b = segs[i * 2]; b <= segs[i * 2 + 1]; ++b) {
- m_pLeadingBytes[b] = 1;
- }
+ const ByteRange& seg = map->m_LeadingSegs[i];
+ for (int b = seg.m_First; b <= seg.m_Last; ++b)
+ m_MixedTwoByteLeadingBytes[b] = true;
}
}
FPDFAPI_FindEmbeddedCMap(bsName, m_Charset, m_Coding, m_pEmbedMap);
@@ -589,48 +571,46 @@ uint16_t CPDF_CMap::CIDFromCharCode(uint32_t charcode) const {
uint32_t CPDF_CMap::GetNextChar(const char* pString,
int nStrLen,
int& offset) const {
+ auto* pBytes = reinterpret_cast<const uint8_t*>(pString);
switch (m_CodingScheme) {
- case OneByte:
- return ((uint8_t*)pString)[offset++];
- case TwoBytes:
- offset += 2;
- return ((uint8_t*)pString)[offset - 2] * 256 +
- ((uint8_t*)pString)[offset - 1];
+ case OneByte: {
+ return pBytes[offset++];
+ }
+ case TwoBytes: {
+ uint8_t byte1 = pBytes[offset++];
+ return 256 * byte1 + pBytes[offset++];
+ }
case MixedTwoBytes: {
- uint8_t byte1 = ((uint8_t*)pString)[offset++];
- if (!m_pLeadingBytes[byte1]) {
+ uint8_t byte1 = pBytes[offset++];
+ if (!m_MixedTwoByteLeadingBytes[byte1])
return byte1;
- }
- uint8_t byte2 = ((uint8_t*)pString)[offset++];
- return byte1 * 256 + byte2;
+ return 256 * byte1 + pBytes[offset++];
}
case MixedFourBytes: {
uint8_t codes[4];
int char_size = 1;
- codes[0] = ((uint8_t*)pString)[offset++];
- auto* pRanges = reinterpret_cast<CPDF_CMap::CodeRange*>(m_pLeadingBytes);
+ codes[0] = pBytes[offset++];
while (1) {
- int ret = CheckCodeRange(codes, char_size, pRanges, m_nCodeRanges);
- if (ret == 0) {
+ int ret = CheckFourByteCodeRange(codes, char_size,
+ m_MixedFourByteLeadingRanges);
+ if (ret == 0)
return 0;
- }
if (ret == 2) {
uint32_t charcode = 0;
- for (int i = 0; i < char_size; i++) {
+ for (int i = 0; i < char_size; i++)
charcode = (charcode << 8) + codes[i];
- }
return charcode;
}
- if (char_size == 4 || offset == nStrLen) {
+ if (char_size == 4 || offset == nStrLen)
return 0;
- }
- codes[char_size++] = ((uint8_t*)pString)[offset++];
+ codes[char_size++] = pBytes[offset++];
}
break;
}
}
return 0;
}
+
int CPDF_CMap::GetCharSize(uint32_t charcode) const {
switch (m_CodingScheme) {
case OneByte:
@@ -638,20 +618,21 @@ int CPDF_CMap::GetCharSize(uint32_t charcode) const {
case TwoBytes:
return 2;
case MixedTwoBytes:
+ if (charcode < 0x100)
+ return 1;
+ return 2;
case MixedFourBytes:
- if (charcode < 0x100) {
+ if (charcode < 0x100)
return 1;
- }
- if (charcode < 0x10000) {
+ if (charcode < 0x10000)
return 2;
- }
- if (charcode < 0x1000000) {
+ if (charcode < 0x1000000)
return 3;
- }
return 4;
}
return 1;
}
+
int CPDF_CMap::CountChar(const char* pString, int size) const {
switch (m_CodingScheme) {
case OneByte:
@@ -662,7 +643,8 @@ int CPDF_CMap::CountChar(const char* pString, int size) const {
int count = 0;
for (int i = 0; i < size; i++) {
count++;
- if (m_pLeadingBytes[((uint8_t*)pString)[i]]) {
+ if (m_MixedTwoByteLeadingBytes[reinterpret_cast<const uint8_t*>(
+ pString)[i]]) {
i++;
}
}
@@ -690,18 +672,22 @@ int CPDF_CMap::AppendChar(char* str, uint32_t charcode) const {
str[1] = (uint8_t)(charcode % 256);
return 2;
case MixedTwoBytes:
+ if (charcode < 0x100 && !m_MixedTwoByteLeadingBytes[(uint8_t)charcode]) {
+ str[0] = (uint8_t)charcode;
+ return 1;
+ }
+ str[0] = (uint8_t)(charcode >> 8);
+ str[1] = (uint8_t)charcode;
+ return 2;
case MixedFourBytes:
if (charcode < 0x100) {
- auto* pRanges =
- reinterpret_cast<CPDF_CMap::CodeRange*>(m_pLeadingBytes);
- int iSize = GetCharSizeImpl(charcode, pRanges, m_nCodeRanges);
- if (iSize == 0) {
+ int iSize =
+ GetFourByteCharSizeImpl(charcode, m_MixedFourByteLeadingRanges);
+ if (iSize == 0)
iSize = 1;
- }
- if (iSize > 1) {
- memset(str, 0, sizeof(uint8_t) * iSize);
- }
str[iSize - 1] = (uint8_t)charcode;
+ if (iSize > 1)
+ memset(str + 1, 0, iSize - 1);
return iSize;
}
if (charcode < 0x10000) {