From ab20404199d3897f753861413014be28d259a6cc Mon Sep 17 00:00:00 2001 From: Tom Sepez Date: Fri, 5 May 2017 14:31:12 -0700 Subject: Split out CPDF_CMap::m_pLeading into separate members. Avoid casting willy-nill over uint8_t*s, since there's at least one place where the wrong cast could be applied. Use std::vector<> as well. Use ByteRange struct to avoid the seg[i * 2 + 1] anti-pattern. Change-Id: I9278ccc3a545b2640f0daf101a0c8b1d5c5564f8 Reviewed-on: https://pdfium-review.googlesource.com/5012 Commit-Queue: Tom Sepez Reviewed-by: Lei Zhang --- core/fpdfapi/font/font_int.h | 8 +- core/fpdfapi/font/fpdf_font_cid.cpp | 202 +++++++++++++++++------------------- 2 files changed, 98 insertions(+), 112 deletions(-) (limited to 'core') diff --git a/core/fpdfapi/font/font_int.h b/core/fpdfapi/font/font_int.h index 72c892fbbd..f434f2b41d 100644 --- a/core/fpdfapi/font/font_int.h +++ b/core/fpdfapi/font/font_int.h @@ -81,8 +81,8 @@ class CPDF_CMap : public CFX_Retainable { bool bPromptCJK); void LoadEmbedded(const uint8_t* pData, uint32_t dwSize); - bool IsLoaded() const; - bool IsVertWriting() const; + bool IsLoaded() const { return m_bLoaded; } + bool IsVertWriting() const { return m_bVertical; } uint16_t CIDFromCharCode(uint32_t charcode) const; int GetCharSize(uint32_t charcode) const; uint32_t GetNextChar(const char* pString, int nStrLen, int& offset) const; @@ -102,8 +102,8 @@ class CPDF_CMap : public CFX_Retainable { CIDSet m_Charset; CodingScheme m_CodingScheme; int m_Coding; - int m_nCodeRanges; - uint8_t* m_pLeadingBytes; + std::vector m_MixedTwoByteLeadingBytes; + std::vector m_MixedFourByteLeadingRanges; std::vector m_DirectCharcodeToCIDTable; std::vector m_AdditionalCharcodeToCIDMappings; const FXCMAP_CMap* m_pEmbedMap; diff --git a/core/fpdfapi/font/fpdf_font_cid.cpp b/core/fpdfapi/font/fpdf_font_cid.cpp index 6ac9587e8f..e45154600b 100644 --- a/core/fpdfapi/font/fpdf_font_cid.cpp +++ b/core/fpdfapi/font/fpdf_font_cid.cpp @@ -26,54 +26,57 @@ namespace { const char* const g_CharsetNames[CIDSET_NUM_SETS] = {nullptr, "GB1", "CNS1", "Japan1", "Korea1", "UCS"}; +struct ByteRange { + uint8_t m_First; + uint8_t m_Last; // Inclusive. +}; -class CPDF_PredefinedCMap { - public: +struct PredefinedCMap { const char* m_pName; CIDSet m_Charset; CIDCoding m_Coding; CPDF_CMap::CodingScheme m_CodingScheme; uint8_t m_LeadingSegCount; - uint8_t m_LeadingSegs[4]; + ByteRange m_LeadingSegs[2]; }; -const CPDF_PredefinedCMap g_PredefinedCMaps[] = { +const PredefinedCMap g_PredefinedCMaps[] = { {"GB-EUC", CIDSET_GB1, CIDCODING_GB, CPDF_CMap::MixedTwoBytes, 1, - {0xa1, 0xfe}}, + {{0xa1, 0xfe}}}, {"GBpc-EUC", CIDSET_GB1, CIDCODING_GB, CPDF_CMap::MixedTwoBytes, 1, - {0xa1, 0xfc}}, + {{0xa1, 0xfc}}}, {"GBK-EUC", CIDSET_GB1, CIDCODING_GB, CPDF_CMap::MixedTwoBytes, 1, - {0x81, 0xfe}}, + {{0x81, 0xfe}}}, {"GBKp-EUC", CIDSET_GB1, CIDCODING_GB, CPDF_CMap::MixedTwoBytes, 1, - {0x81, 0xfe}}, + {{0x81, 0xfe}}}, {"GBK2K-EUC", CIDSET_GB1, CIDCODING_GB, CPDF_CMap::MixedTwoBytes, 1, - {0x81, 0xfe}}, + {{0x81, 0xfe}}}, {"GBK2K", CIDSET_GB1, CIDCODING_GB, CPDF_CMap::MixedTwoBytes, 1, - {0x81, 0xfe}}, + {{0x81, 0xfe}}}, {"UniGB-UCS2", CIDSET_GB1, CIDCODING_UCS2, CPDF_CMap::TwoBytes, 0, {}}, {"UniGB-UTF16", CIDSET_GB1, CIDCODING_UTF16, CPDF_CMap::TwoBytes, 0, {}}, {"B5pc", @@ -81,25 +84,25 @@ const CPDF_PredefinedCMap g_PredefinedCMaps[] = { CIDCODING_BIG5, CPDF_CMap::MixedTwoBytes, 1, - {0xa1, 0xfc}}, + {{0xa1, 0xfc}}}, {"HKscs-B5", CIDSET_CNS1, CIDCODING_BIG5, CPDF_CMap::MixedTwoBytes, 1, - {0x88, 0xfe}}, + {{0x88, 0xfe}}}, {"ETen-B5", CIDSET_CNS1, CIDCODING_BIG5, CPDF_CMap::MixedTwoBytes, 1, - {0xa1, 0xfe}}, + {{0xa1, 0xfe}}}, {"ETenms-B5", CIDSET_CNS1, CIDCODING_BIG5, CPDF_CMap::MixedTwoBytes, 1, - {0xa1, 0xfe}}, + {{0xa1, 0xfe}}}, {"UniCNS-UCS2", CIDSET_CNS1, CIDCODING_UCS2, CPDF_CMap::TwoBytes, 0, {}}, {"UniCNS-UTF16", CIDSET_CNS1, CIDCODING_UTF16, CPDF_CMap::TwoBytes, 0, {}}, {"83pv-RKSJ", @@ -107,45 +110,45 @@ const CPDF_PredefinedCMap g_PredefinedCMaps[] = { CIDCODING_JIS, CPDF_CMap::MixedTwoBytes, 2, - {0x81, 0x9f, 0xe0, 0xfc}}, + {{0x81, 0x9f}, {0xe0, 0xfc}}}, {"90ms-RKSJ", CIDSET_JAPAN1, CIDCODING_JIS, CPDF_CMap::MixedTwoBytes, 2, - {0x81, 0x9f, 0xe0, 0xfc}}, + {{0x81, 0x9f}, {0xe0, 0xfc}}}, {"90msp-RKSJ", CIDSET_JAPAN1, CIDCODING_JIS, CPDF_CMap::MixedTwoBytes, 2, - {0x81, 0x9f, 0xe0, 0xfc}}, + {{0x81, 0x9f}, {0xe0, 0xfc}}}, {"90pv-RKSJ", CIDSET_JAPAN1, CIDCODING_JIS, CPDF_CMap::MixedTwoBytes, 2, - {0x81, 0x9f, 0xe0, 0xfc}}, + {{0x81, 0x9f}, {0xe0, 0xfc}}}, {"Add-RKSJ", CIDSET_JAPAN1, CIDCODING_JIS, CPDF_CMap::MixedTwoBytes, 2, - {0x81, 0x9f, 0xe0, 0xfc}}, + {{0x81, 0x9f}, {0xe0, 0xfc}}}, {"EUC", CIDSET_JAPAN1, CIDCODING_JIS, CPDF_CMap::MixedTwoBytes, 2, - {0x8e, 0x8e, 0xa1, 0xfe}}, - {"H", CIDSET_JAPAN1, CIDCODING_JIS, CPDF_CMap::TwoBytes, 1, {0x21, 0x7e}}, - {"V", CIDSET_JAPAN1, CIDCODING_JIS, CPDF_CMap::TwoBytes, 1, {0x21, 0x7e}}, + {{0x8e, 0x8e}, {0xa1, 0xfe}}}, + {"H", CIDSET_JAPAN1, CIDCODING_JIS, CPDF_CMap::TwoBytes, 1, {{0x21, 0x7e}}}, + {"V", CIDSET_JAPAN1, CIDCODING_JIS, CPDF_CMap::TwoBytes, 1, {{0x21, 0x7e}}}, {"Ext-RKSJ", CIDSET_JAPAN1, CIDCODING_JIS, CPDF_CMap::MixedTwoBytes, 2, - {0x81, 0x9f, 0xe0, 0xfc}}, + {{0x81, 0x9f}, {0xe0, 0xfc}}}, {"UniJIS-UCS2", CIDSET_JAPAN1, CIDCODING_UCS2, CPDF_CMap::TwoBytes, 0, {}}, {"UniJIS-UCS2-HW", CIDSET_JAPAN1, @@ -164,25 +167,25 @@ const CPDF_PredefinedCMap g_PredefinedCMaps[] = { CIDCODING_KOREA, CPDF_CMap::MixedTwoBytes, 1, - {0xa1, 0xfe}}, + {{0xa1, 0xfe}}}, {"KSCms-UHC", CIDSET_KOREA1, CIDCODING_KOREA, CPDF_CMap::MixedTwoBytes, 1, - {0x81, 0xfe}}, + {{0x81, 0xfe}}}, {"KSCms-UHC-HW", CIDSET_KOREA1, CIDCODING_KOREA, CPDF_CMap::MixedTwoBytes, 1, - {0x81, 0xfe}}, + {{0x81, 0xfe}}}, {"KSCpc-EUC", CIDSET_KOREA1, CIDCODING_KOREA, CPDF_CMap::MixedTwoBytes, 1, - {0xa1, 0xfd}}, + {{0xa1, 0xfd}}}, {"UniKS-UCS2", CIDSET_KOREA1, CIDCODING_UCS2, CPDF_CMap::TwoBytes, 0, {}}, {"UniKS-UTF16", CIDSET_KOREA1, CIDCODING_UTF16, CPDF_CMap::TwoBytes, 0, {}}, }; @@ -201,38 +204,35 @@ CFX_ByteStringC CMap_GetString(const CFX_ByteStringC& word) { return CFX_ByteStringC(&word[1], word.GetLength() - 2); } -int CheckCodeRange(uint8_t* codes, - int size, - CPDF_CMap::CodeRange* pRanges, - int nRanges) { - int iSeg = nRanges - 1; +int CheckFourByteCodeRange(uint8_t* codes, + int size, + const std::vector& ranges) { + int iSeg = pdfium::CollectionSize(ranges) - 1; while (iSeg >= 0) { - if (pRanges[iSeg].m_CharSize < size) { + if (ranges[iSeg].m_CharSize < size) { --iSeg; continue; } int iChar = 0; while (iChar < size) { - if (codes[iChar] < pRanges[iSeg].m_Lower[iChar] || - codes[iChar] > pRanges[iSeg].m_Upper[iChar]) { + if (codes[iChar] < ranges[iSeg].m_Lower[iChar] || + codes[iChar] > ranges[iSeg].m_Upper[iChar]) { break; } ++iChar; } - if (iChar == pRanges[iSeg].m_CharSize) + if (iChar == ranges[iSeg].m_CharSize) return 2; - if (iChar) - return (size == pRanges[iSeg].m_CharSize) ? 2 : 1; + return (size == ranges[iSeg].m_CharSize) ? 2 : 1; iSeg--; } return 0; } -int GetCharSizeImpl(uint32_t charcode, - CPDF_CMap::CodeRange* pRanges, - int iRangesSize) { - if (!iRangesSize) +int GetFourByteCharSizeImpl(uint32_t charcode, + const std::vector& ranges) { + if (ranges.empty()) return 1; uint8_t codes[4]; @@ -242,21 +242,21 @@ int GetCharSizeImpl(uint32_t charcode, int offset = 0; int size = 4; for (int i = 0; i < 4; ++i) { - int iSeg = iRangesSize - 1; + int iSeg = pdfium::CollectionSize(ranges) - 1; while (iSeg >= 0) { - if (pRanges[iSeg].m_CharSize < size) { + if (ranges[iSeg].m_CharSize < size) { --iSeg; continue; } int iChar = 0; while (iChar < size) { - if (codes[offset + iChar] < pRanges[iSeg].m_Lower[iChar] || - codes[offset + iChar] > pRanges[iSeg].m_Upper[iChar]) { + if (codes[offset + iChar] < ranges[iSeg].m_Lower[iChar] || + codes[offset + iChar] > ranges[iSeg].m_Upper[iChar]) { break; } ++iChar; } - if (iChar == pRanges[iSeg].m_CharSize) + if (iChar == ranges[iSeg].m_CharSize) return size; --iSeg; } @@ -387,12 +387,7 @@ void CPDF_CMapParser::ParseWord(const CFX_ByteStringC& word) { uint32_t nSegs = pdfium::CollectionSize(m_CodeRanges); if (nSegs > 1) { m_pCMap->m_CodingScheme = CPDF_CMap::MixedFourBytes; - m_pCMap->m_nCodeRanges = nSegs; - FX_Free(m_pCMap->m_pLeadingBytes); - m_pCMap->m_pLeadingBytes = - FX_Alloc2D(uint8_t, nSegs, sizeof(CPDF_CMap::CodeRange)); - memcpy(m_pCMap->m_pLeadingBytes, m_CodeRanges.data(), - nSegs * sizeof(CPDF_CMap::CodeRange)); + m_pCMap->m_MixedFourByteLeadingRanges = m_CodeRanges; } else if (nSegs == 1) { m_pCMap->m_CodingScheme = (m_CodeRanges[0].m_CharSize == 2) ? CPDF_CMap::TwoBytes @@ -479,21 +474,9 @@ CPDF_CMap::CPDF_CMap() m_Charset(CIDSET_UNKNOWN), m_CodingScheme(TwoBytes), m_Coding(CIDCODING_UNKNOWN), - m_nCodeRanges(0), - m_pLeadingBytes(nullptr), m_pEmbedMap(nullptr) {} -CPDF_CMap::~CPDF_CMap() { - FX_Free(m_pLeadingBytes); -} - -bool CPDF_CMap::IsLoaded() const { - return m_bLoaded; -} - -bool CPDF_CMap::IsVertWriting() const { - return m_bVertical; -} +CPDF_CMap::~CPDF_CMap() {} void CPDF_CMap::LoadPredefined(CPDF_CMapManager* pMgr, const CFX_ByteString& bsName, @@ -510,7 +493,7 @@ void CPDF_CMap::LoadPredefined(CPDF_CMapManager* pMgr, if (cmapid.GetLength() > 2) { cmapid = cmapid.Left(cmapid.GetLength() - 2); } - const CPDF_PredefinedCMap* map = nullptr; + const PredefinedCMap* map = nullptr; for (size_t i = 0; i < FX_ArraySize(g_PredefinedCMaps); ++i) { if (cmapid == CFX_ByteStringC(g_PredefinedCMaps[i].m_pName)) { map = &g_PredefinedCMaps[i]; @@ -524,12 +507,11 @@ void CPDF_CMap::LoadPredefined(CPDF_CMapManager* pMgr, m_Coding = map->m_Coding; m_CodingScheme = map->m_CodingScheme; if (m_CodingScheme == MixedTwoBytes) { - m_pLeadingBytes = FX_Alloc(uint8_t, 256); + m_MixedTwoByteLeadingBytes = std::vector(256); for (uint32_t i = 0; i < map->m_LeadingSegCount; ++i) { - const uint8_t* segs = map->m_LeadingSegs; - for (int b = segs[i * 2]; b <= segs[i * 2 + 1]; ++b) { - m_pLeadingBytes[b] = 1; - } + const ByteRange& seg = map->m_LeadingSegs[i]; + for (int b = seg.m_First; b <= seg.m_Last; ++b) + m_MixedTwoByteLeadingBytes[b] = true; } } FPDFAPI_FindEmbeddedCMap(bsName, m_Charset, m_Coding, m_pEmbedMap); @@ -589,48 +571,46 @@ uint16_t CPDF_CMap::CIDFromCharCode(uint32_t charcode) const { uint32_t CPDF_CMap::GetNextChar(const char* pString, int nStrLen, int& offset) const { + auto* pBytes = reinterpret_cast(pString); switch (m_CodingScheme) { - case OneByte: - return ((uint8_t*)pString)[offset++]; - case TwoBytes: - offset += 2; - return ((uint8_t*)pString)[offset - 2] * 256 + - ((uint8_t*)pString)[offset - 1]; + case OneByte: { + return pBytes[offset++]; + } + case TwoBytes: { + uint8_t byte1 = pBytes[offset++]; + return 256 * byte1 + pBytes[offset++]; + } case MixedTwoBytes: { - uint8_t byte1 = ((uint8_t*)pString)[offset++]; - if (!m_pLeadingBytes[byte1]) { + uint8_t byte1 = pBytes[offset++]; + if (!m_MixedTwoByteLeadingBytes[byte1]) return byte1; - } - uint8_t byte2 = ((uint8_t*)pString)[offset++]; - return byte1 * 256 + byte2; + return 256 * byte1 + pBytes[offset++]; } case MixedFourBytes: { uint8_t codes[4]; int char_size = 1; - codes[0] = ((uint8_t*)pString)[offset++]; - auto* pRanges = reinterpret_cast(m_pLeadingBytes); + codes[0] = pBytes[offset++]; while (1) { - int ret = CheckCodeRange(codes, char_size, pRanges, m_nCodeRanges); - if (ret == 0) { + int ret = CheckFourByteCodeRange(codes, char_size, + m_MixedFourByteLeadingRanges); + if (ret == 0) return 0; - } if (ret == 2) { uint32_t charcode = 0; - for (int i = 0; i < char_size; i++) { + for (int i = 0; i < char_size; i++) charcode = (charcode << 8) + codes[i]; - } return charcode; } - if (char_size == 4 || offset == nStrLen) { + if (char_size == 4 || offset == nStrLen) return 0; - } - codes[char_size++] = ((uint8_t*)pString)[offset++]; + codes[char_size++] = pBytes[offset++]; } break; } } return 0; } + int CPDF_CMap::GetCharSize(uint32_t charcode) const { switch (m_CodingScheme) { case OneByte: @@ -638,20 +618,21 @@ int CPDF_CMap::GetCharSize(uint32_t charcode) const { case TwoBytes: return 2; case MixedTwoBytes: + if (charcode < 0x100) + return 1; + return 2; case MixedFourBytes: - if (charcode < 0x100) { + if (charcode < 0x100) return 1; - } - if (charcode < 0x10000) { + if (charcode < 0x10000) return 2; - } - if (charcode < 0x1000000) { + if (charcode < 0x1000000) return 3; - } return 4; } return 1; } + int CPDF_CMap::CountChar(const char* pString, int size) const { switch (m_CodingScheme) { case OneByte: @@ -662,7 +643,8 @@ int CPDF_CMap::CountChar(const char* pString, int size) const { int count = 0; for (int i = 0; i < size; i++) { count++; - if (m_pLeadingBytes[((uint8_t*)pString)[i]]) { + if (m_MixedTwoByteLeadingBytes[reinterpret_cast( + pString)[i]]) { i++; } } @@ -690,18 +672,22 @@ int CPDF_CMap::AppendChar(char* str, uint32_t charcode) const { str[1] = (uint8_t)(charcode % 256); return 2; case MixedTwoBytes: + if (charcode < 0x100 && !m_MixedTwoByteLeadingBytes[(uint8_t)charcode]) { + str[0] = (uint8_t)charcode; + return 1; + } + str[0] = (uint8_t)(charcode >> 8); + str[1] = (uint8_t)charcode; + return 2; case MixedFourBytes: if (charcode < 0x100) { - auto* pRanges = - reinterpret_cast(m_pLeadingBytes); - int iSize = GetCharSizeImpl(charcode, pRanges, m_nCodeRanges); - if (iSize == 0) { + int iSize = + GetFourByteCharSizeImpl(charcode, m_MixedFourByteLeadingRanges); + if (iSize == 0) iSize = 1; - } - if (iSize > 1) { - memset(str, 0, sizeof(uint8_t) * iSize); - } str[iSize - 1] = (uint8_t)charcode; + if (iSize > 1) + memset(str + 1, 0, iSize - 1); return iSize; } if (charcode < 0x10000) { -- cgit v1.2.3