From 4c64450875263a1f48e84f6d2223d8c36484f7cf Mon Sep 17 00:00:00 2001 From: Tom Sepez Date: Thu, 4 May 2017 15:32:21 -0700 Subject: Fix CPDF_CMap::m_pAddMapping lack of type information. Using a ByteBuffer and an array of uint8_t's isn't how one would represent an array of structured data. Packing uint16_t's into a uint32_t via / and % isn't ideal, either. Bug: Change-Id: Ib09ae2659ba2f027724546bb7aef99bdfd2dea25 Reviewed-on: https://pdfium-review.googlesource.com/4951 Commit-Queue: Tom Sepez Reviewed-by: Lei Zhang --- core/fpdfapi/font/font_int.h | 170 ++++++++++++++------------- core/fpdfapi/font/fpdf_font_cid.cpp | 104 ++++++---------- core/fpdfapi/font/fpdf_font_cid_unittest.cpp | 2 +- 3 files changed, 131 insertions(+), 145 deletions(-) diff --git a/core/fpdfapi/font/font_int.h b/core/fpdfapi/font/font_int.h index 926ea91a9e..8bf7ef5594 100644 --- a/core/fpdfapi/font/font_int.h +++ b/core/fpdfapi/font/font_int.h @@ -9,6 +9,7 @@ #include #include +#include #include #include "core/fpdfapi/font/cpdf_cidfont.h" @@ -16,9 +17,10 @@ #include "core/fxcrt/fx_basic.h" class CPDF_CID2UnicodeMap; -class CPDF_CMap; +class CPDF_CMapManager; class CPDF_Font; class CPDF_Stream; +struct FXCMAP_CMap; using FXFT_Library = void*; @@ -26,25 +28,6 @@ int TT2PDF(int m, FXFT_Face face); bool FT_UseTTCharmap(FXFT_Face face, int platform_id, int encoding_id); CIDSet CharsetFromOrdering(const CFX_ByteStringC& ordering); -class CPDF_CMapManager { - public: - CPDF_CMapManager(); - ~CPDF_CMapManager(); - - CFX_RetainPtr GetPredefinedCMap(const CFX_ByteString& name, - bool bPromptCJK); - CPDF_CID2UnicodeMap* GetCID2UnicodeMap(CIDSet charset, bool bPromptCJK); - - private: - CFX_RetainPtr LoadPredefinedCMap(const CFX_ByteString& name, - bool bPromptCJK); - std::unique_ptr LoadCID2UnicodeMap(CIDSet charset, - bool bPromptCJK); - - std::map> m_CMaps; - std::unique_ptr m_CID2UnicodeMaps[6]; -}; - class CFX_StockFontArray { public: CFX_StockFontArray(); @@ -58,64 +41,6 @@ class CFX_StockFontArray { std::unique_ptr m_StockFonts[14]; }; -class CPDF_FontGlobals { - public: - CPDF_FontGlobals(); - ~CPDF_FontGlobals(); - - void Clear(CPDF_Document* pDoc); - CPDF_Font* Find(CPDF_Document* pDoc, uint32_t index); - - // Takes ownership of |pFont|, returns unowned pointer to it. - CPDF_Font* Set(CPDF_Document* key, - uint32_t index, - std::unique_ptr pFont); - - CPDF_CMapManager m_CMapManager; - struct { - const struct FXCMAP_CMap* m_pMapList; - uint32_t m_Count; - } m_EmbeddedCharsets[CIDSET_NUM_SETS]; - struct { - const uint16_t* m_pMap; - uint32_t m_Count; - } m_EmbeddedToUnicodes[CIDSET_NUM_SETS]; - - private: - std::map> m_StockMap; -}; - -struct CMap_CodeRange { - int m_CharSize; - uint8_t m_Lower[4]; - uint8_t m_Upper[4]; -}; - -class CPDF_CMapParser { - public: - CPDF_CMapParser(); - ~CPDF_CMapParser(); - void Initialize(CPDF_CMap* pMap); - void ParseWord(const CFX_ByteStringC& str); - CFX_BinaryBuf m_AddMaps; - - private: - friend class fpdf_font_cid_CMap_GetCode_Test; - friend class fpdf_font_cid_CMap_GetCodeRange_Test; - - static uint32_t CMap_GetCode(const CFX_ByteStringC& word); - static bool CMap_GetCodeRange(CMap_CodeRange& range, - const CFX_ByteStringC& first, - const CFX_ByteStringC& second); - - CPDF_CMap* m_pCMap; - int m_Status; - int m_CodeSeq; - uint32_t m_CodePoints[4]; - std::vector m_CodeRanges; - CFX_ByteString m_LastWord; -}; - enum CIDCoding : uint8_t { CIDCODING_UNKNOWN = 0, CIDCODING_GB, @@ -136,6 +61,18 @@ class CPDF_CMap : public CFX_Retainable { MixedFourBytes }; + struct CodeRange { + int m_CharSize; + uint8_t m_Lower[4]; + uint8_t m_Upper[4]; + }; + + struct CIDRange { + uint32_t m_StartCode; + uint32_t m_EndCode; + uint16_t m_StartCID; + }; + template friend CFX_RetainPtr pdfium::MakeRetain(Args&&... args); @@ -167,11 +104,59 @@ class CPDF_CMap : public CFX_Retainable { int m_nCodeRanges; uint8_t* m_pLeadingBytes; uint16_t* m_pMapping; - uint8_t* m_pAddMapping; bool m_bLoaded; + std::vector m_AddMapping; const FXCMAP_CMap* m_pEmbedMap; }; +class CPDF_CMapManager { + public: + CPDF_CMapManager(); + ~CPDF_CMapManager(); + + CFX_RetainPtr GetPredefinedCMap(const CFX_ByteString& name, + bool bPromptCJK); + CPDF_CID2UnicodeMap* GetCID2UnicodeMap(CIDSet charset, bool bPromptCJK); + + private: + CFX_RetainPtr LoadPredefinedCMap(const CFX_ByteString& name, + bool bPromptCJK); + std::unique_ptr LoadCID2UnicodeMap(CIDSet charset, + bool bPromptCJK); + + std::map> m_CMaps; + std::unique_ptr m_CID2UnicodeMaps[6]; +}; + +class CPDF_CMapParser { + public: + explicit CPDF_CMapParser(CPDF_CMap* pMap); + ~CPDF_CMapParser(); + + void ParseWord(const CFX_ByteStringC& str); + bool HasAddMaps() const { return !m_AddMaps.empty(); } + std::vector TakeAddMaps() { + return std::move(m_AddMaps); + } + + private: + friend class fpdf_font_cid_CMap_GetCode_Test; + friend class fpdf_font_cid_CMap_GetCodeRange_Test; + + static uint32_t CMap_GetCode(const CFX_ByteStringC& word); + static bool CMap_GetCodeRange(CPDF_CMap::CodeRange& range, + const CFX_ByteStringC& first, + const CFX_ByteStringC& second); + + CPDF_CMap* const m_pCMap; + int m_Status; + int m_CodeSeq; + uint32_t m_CodePoints[4]; + std::vector m_CodeRanges; + std::vector m_AddMaps; + CFX_ByteString m_LastWord; +}; + class CPDF_CID2UnicodeMap { public: CPDF_CID2UnicodeMap(); @@ -211,4 +196,31 @@ class CPDF_ToUnicodeMap { CFX_WideTextBuf m_MultiCharBuf; }; +class CPDF_FontGlobals { + public: + CPDF_FontGlobals(); + ~CPDF_FontGlobals(); + + void Clear(CPDF_Document* pDoc); + CPDF_Font* Find(CPDF_Document* pDoc, uint32_t index); + + // Takes ownership of |pFont|, returns unowned pointer to it. + CPDF_Font* Set(CPDF_Document* key, + uint32_t index, + std::unique_ptr pFont); + + CPDF_CMapManager m_CMapManager; + struct { + const FXCMAP_CMap* m_pMapList; + uint32_t m_Count; + } m_EmbeddedCharsets[CIDSET_NUM_SETS]; + struct { + const uint16_t* m_pMap; + uint32_t m_Count; + } m_EmbeddedToUnicodes[CIDSET_NUM_SETS]; + + private: + std::map> m_StockMap; +}; + #endif // CORE_FPDFAPI_FONT_FONT_INT_H_ diff --git a/core/fpdfapi/font/fpdf_font_cid.cpp b/core/fpdfapi/font/fpdf_font_cid.cpp index 001e8acd57..3f8e919bed 100644 --- a/core/fpdfapi/font/fpdf_font_cid.cpp +++ b/core/fpdfapi/font/fpdf_font_cid.cpp @@ -200,24 +200,9 @@ CFX_ByteStringC CMap_GetString(const CFX_ByteStringC& word) { return CFX_ByteStringC(&word[1], word.GetLength() - 2); } -int CompareDWORD(const void* data1, const void* data2) { - return (*(uint32_t*)data1) - (*(uint32_t*)data2); -} - -int CompareCID(const void* key, const void* element) { - if ((*(uint32_t*)key) < (*(uint32_t*)element)) { - return -1; - } - if ((*(uint32_t*)key) > - (*(uint32_t*)element) + ((uint32_t*)element)[1] / 65536) { - return 1; - } - return 0; -} - int CheckCodeRange(uint8_t* codes, int size, - CMap_CodeRange* pRanges, + CPDF_CMap::CodeRange* pRanges, int nRanges) { int iSeg = nRanges - 1; while (iSeg >= 0) { @@ -244,7 +229,7 @@ int CheckCodeRange(uint8_t* codes, } int GetCharSizeImpl(uint32_t charcode, - CMap_CodeRange* pRanges, + CPDF_CMap::CodeRange* pRanges, int iRangesSize) { if (!iRangesSize) return 1; @@ -328,18 +313,11 @@ std::unique_ptr CPDF_CMapManager::LoadCID2UnicodeMap( return pMap; } -CPDF_CMapParser::CPDF_CMapParser() - : m_pCMap(nullptr), m_Status(0), m_CodeSeq(0) {} +CPDF_CMapParser::CPDF_CMapParser(CPDF_CMap* pCMap) + : m_pCMap(pCMap), m_Status(0), m_CodeSeq(0) {} CPDF_CMapParser::~CPDF_CMapParser() {} -void CPDF_CMapParser::Initialize(CPDF_CMap* pCMap) { - m_pCMap = pCMap; - m_Status = 0; - m_CodeSeq = 0; - m_AddMaps.EstimateSize(0, 10240); -} - void CPDF_CMapParser::ParseWord(const CFX_ByteStringC& word) { if (word.IsEmpty()) { return; @@ -388,10 +366,7 @@ void CPDF_CMapParser::ParseWord(const CFX_ByteStringC& word) { m_pCMap->m_pMapping[code] = (uint16_t)(StartCID + code - StartCode); } } else { - uint32_t buf[2]; - buf[0] = StartCode; - buf[1] = ((EndCode - StartCode) << 16) + StartCID; - m_AddMaps.AppendBlock(buf, sizeof buf); + m_AddMaps.push_back({StartCode, EndCode, StartCID}); } m_CodeSeq = 0; } else if (m_Status == 3) { @@ -412,9 +387,9 @@ void CPDF_CMapParser::ParseWord(const CFX_ByteStringC& word) { m_pCMap->m_nCodeRanges = nSegs; FX_Free(m_pCMap->m_pLeadingBytes); m_pCMap->m_pLeadingBytes = - FX_Alloc2D(uint8_t, nSegs, sizeof(CMap_CodeRange)); + FX_Alloc2D(uint8_t, nSegs, sizeof(CPDF_CMap::CodeRange)); memcpy(m_pCMap->m_pLeadingBytes, m_CodeRanges.data(), - nSegs * sizeof(CMap_CodeRange)); + nSegs * sizeof(CPDF_CMap::CodeRange)); } else if (nSegs == 1) { m_pCMap->m_CodingScheme = (m_CodeRanges[0].m_CharSize == 2) ? CPDF_CMap::TwoBytes @@ -426,7 +401,7 @@ void CPDF_CMapParser::ParseWord(const CFX_ByteStringC& word) { return; } if (m_CodeSeq % 2) { - CMap_CodeRange range; + CPDF_CMap::CodeRange range; if (CMap_GetCodeRange(range, m_LastWord.AsStringC(), word)) m_CodeRanges.push_back(range); } @@ -458,7 +433,7 @@ uint32_t CPDF_CMapParser::CMap_GetCode(const CFX_ByteStringC& word) { } // Static. -bool CPDF_CMapParser::CMap_GetCodeRange(CMap_CodeRange& range, +bool CPDF_CMapParser::CMap_GetCodeRange(CPDF_CMap::CodeRange& range, const CFX_ByteStringC& first, const CFX_ByteStringC& second) { if (first.GetLength() == 0 || first.GetAt(0) != '<') @@ -503,13 +478,12 @@ CPDF_CMap::CPDF_CMap() { m_bLoaded = false; m_pMapping = nullptr; m_pLeadingBytes = nullptr; - m_pAddMapping = nullptr; m_pEmbedMap = nullptr; m_nCodeRanges = 0; } + CPDF_CMap::~CPDF_CMap() { FX_Free(m_pMapping); - FX_Free(m_pAddMapping); FX_Free(m_pLeadingBytes); } @@ -567,8 +541,7 @@ void CPDF_CMap::LoadPredefined(CPDF_CMapManager* pMgr, void CPDF_CMap::LoadEmbedded(const uint8_t* pData, uint32_t size) { m_pMapping = FX_Alloc(uint16_t, 65536); - CPDF_CMapParser parser; - parser.Initialize(this); + CPDF_CMapParser parser(this); CPDF_SimpleParser syntax(pData, size); while (1) { CFX_ByteStringC word = syntax.GetWord(); @@ -577,37 +550,37 @@ void CPDF_CMap::LoadEmbedded(const uint8_t* pData, uint32_t size) { } parser.ParseWord(word); } - if (m_CodingScheme == MixedFourBytes && parser.m_AddMaps.GetSize()) { - m_pAddMapping = FX_Alloc(uint8_t, parser.m_AddMaps.GetSize() + 4); - *(uint32_t*)m_pAddMapping = parser.m_AddMaps.GetSize() / 8; - memcpy(m_pAddMapping + 4, parser.m_AddMaps.GetBuffer(), - parser.m_AddMaps.GetSize()); - qsort(m_pAddMapping + 4, parser.m_AddMaps.GetSize() / 8, 8, CompareDWORD); + if (m_CodingScheme == MixedFourBytes && parser.HasAddMaps()) { + m_AddMapping = parser.TakeAddMaps(); + std::sort( + m_AddMapping.begin(), m_AddMapping.end(), + [](const CPDF_CMap::CIDRange& arg1, const CPDF_CMap::CIDRange& arg2) { + return arg1.m_EndCode < arg2.m_EndCode; + }); } } uint16_t CPDF_CMap::CIDFromCharCode(uint32_t charcode) const { - if (m_Coding == CIDCODING_CID) { - return (uint16_t)charcode; - } - if (m_pEmbedMap) { + if (m_Coding == CIDCODING_CID) + return static_cast(charcode); + + if (m_pEmbedMap) return FPDFAPI_CIDFromCharCode(m_pEmbedMap, charcode); - } - if (!m_pMapping) { - return (uint16_t)charcode; - } - if (charcode >> 16) { - if (m_pAddMapping) { - void* found = bsearch(&charcode, m_pAddMapping + 4, - *(uint32_t*)m_pAddMapping, 8, CompareCID); - if (!found) - return 0; - return (uint16_t)(((uint32_t*)found)[1] % 65536 + charcode - - *(uint32_t*)found); - } + + if (!m_pMapping) + return static_cast(charcode); + + if (charcode < 0x10000) + return m_pMapping[charcode]; + + auto it = std::lower_bound(m_AddMapping.begin(), m_AddMapping.end(), charcode, + [](const CPDF_CMap::CIDRange& arg, uint32_t val) { + return arg.m_EndCode < val; + }); + if (it == m_AddMapping.end() || it->m_StartCode > charcode) return 0; - } - return m_pMapping[charcode]; + + return it->m_StartCID + charcode - it->m_StartCode; } uint32_t CPDF_CMap::GetNextChar(const char* pString, @@ -632,7 +605,7 @@ uint32_t CPDF_CMap::GetNextChar(const char* pString, uint8_t codes[4]; int char_size = 1; codes[0] = ((uint8_t*)pString)[offset++]; - CMap_CodeRange* pRanges = (CMap_CodeRange*)m_pLeadingBytes; + auto* pRanges = reinterpret_cast(m_pLeadingBytes); while (1) { int ret = CheckCodeRange(codes, char_size, pRanges, m_nCodeRanges); if (ret == 0) { @@ -716,7 +689,8 @@ int CPDF_CMap::AppendChar(char* str, uint32_t charcode) const { case MixedTwoBytes: case MixedFourBytes: if (charcode < 0x100) { - CMap_CodeRange* pRanges = (CMap_CodeRange*)m_pLeadingBytes; + auto* pRanges = + reinterpret_cast(m_pLeadingBytes); int iSize = GetCharSizeImpl(charcode, pRanges, m_nCodeRanges); if (iSize == 0) { iSize = 1; diff --git a/core/fpdfapi/font/fpdf_font_cid_unittest.cpp b/core/fpdfapi/font/fpdf_font_cid_unittest.cpp index 53f5e47f92..813a3a9d9b 100644 --- a/core/fpdfapi/font/fpdf_font_cid_unittest.cpp +++ b/core/fpdfapi/font/fpdf_font_cid_unittest.cpp @@ -36,7 +36,7 @@ TEST(fpdf_font_cid, CMap_GetCode) { } TEST(fpdf_font_cid, CMap_GetCodeRange) { - CMap_CodeRange range; + CPDF_CMap::CodeRange range; // Must start with a < EXPECT_FALSE(CPDF_CMapParser::CMap_GetCodeRange(range, "", "")); -- cgit v1.2.3