From 0c8e6c1f39f20985a3efb17292e46c476194183a Mon Sep 17 00:00:00 2001 From: Lei Zhang Date: Thu, 19 Nov 2015 23:12:02 -0800 Subject: Cache object numbers in CPDF_Parser::ParseIndirectObject(). R=jun_fang@foxitsoftware.com, tsepez@chromium.org Review URL: https://codereview.chromium.org/1458633004 . --- core/include/fpdfapi/fpdf_parser.h | 17 ++++++++++++-- .../src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp | 27 +++++++++++++++------- 2 files changed, 34 insertions(+), 10 deletions(-) diff --git a/core/include/fpdfapi/fpdf_parser.h b/core/include/fpdfapi/fpdf_parser.h index 174430e440..efb955be21 100644 --- a/core/include/fpdfapi/fpdf_parser.h +++ b/core/include/fpdfapi/fpdf_parser.h @@ -7,6 +7,8 @@ #ifndef CORE_INCLUDE_FPDFAPI_FPDF_PARSER_H_ #define CORE_INCLUDE_FPDFAPI_FPDF_PARSER_H_ +#include + #include "core/include/fxcrt/fx_system.h" #include "fpdf_objects.h" #include "third_party/base/nonstd_unique_ptr.h" @@ -498,8 +500,6 @@ class CPDF_Parser { FX_DWORD LoadLinearizedMainXRefTable(); - CFX_MapPtrToPtr m_ObjectStreamMap; - CPDF_StreamAcc* GetObjectStream(FX_DWORD number); FX_BOOL IsLinearizedFile(IFX_FileRead* pFileAccess, FX_DWORD offset); @@ -543,9 +543,22 @@ class CPDF_Parser { FX_DWORD m_dwFirstPageNo; FX_DWORD m_dwXrefStartObjNum; + + // A map of object numbers to indirect streams. Map owns the streams. + CFX_MapPtrToPtr m_ObjectStreamMap; + + // Mapping of object numbers to offsets. The offsets are relative to the first + // object in the stream. + using StreamObjectCache = std::map; + + // Mapping of streams to their object caches. This is valid as long as the + // streams in |m_ObjectStreamMap| are valid. + std::map m_ObjCache; + friend class CPDF_Creator; friend class CPDF_DataAvail; }; + #define FXCIPHER_NONE 0 #define FXCIPHER_RC4 1 #define FXCIPHER_AES 2 diff --git a/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp b/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp index 6251748d3e..2105635687 100644 --- a/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp +++ b/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp @@ -141,6 +141,8 @@ void CPDF_Parser::CloseParser(FX_BOOL bReParse) { delete pStream; } m_ObjectStreamMap.RemoveAll(); + m_ObjCache.clear(); + m_SortedOffset.RemoveAll(); m_CrossRef.RemoveAll(); m_V5Type.RemoveAll(); @@ -1200,16 +1202,23 @@ CPDF_Object* CPDF_Parser::ParseIndirectObject(CPDF_IndirectObjects* pObjList, (uint8_t*)pObjStream->GetData(), (size_t)pObjStream->GetSize(), FALSE)); CPDF_SyntaxParser syntax; syntax.InitParser(file.get(), 0); - int32_t offset = GetStreamFirst(pObjStream); - for (int32_t i = GetStreamNCount(pObjStream); i > 0; --i) { - FX_DWORD thisnum = syntax.GetDirectNum(); - FX_DWORD thisoff = syntax.GetDirectNum(); - if (thisnum == objnum) { - syntax.RestorePos(offset + thisoff); - return syntax.GetObject(pObjList, 0, 0, pContext); + const int32_t offset = GetStreamFirst(pObjStream); + + // Read object numbers from |pObjStream| into a cache. + if (m_ObjCache.find(pObjStream) == m_ObjCache.end()) { + for (int32_t i = GetStreamNCount(pObjStream); i > 0; --i) { + FX_DWORD thisnum = syntax.GetDirectNum(); + FX_DWORD thisoff = syntax.GetDirectNum(); + m_ObjCache[pObjStream][thisnum] = thisoff; } } - return nullptr; + + const auto it = m_ObjCache[pObjStream].find(objnum); + if (it == m_ObjCache[pObjStream].end()) + return nullptr; + + syntax.RestorePos(offset + it->second); + return syntax.GetObject(pObjList, 0, 0, pContext); } CPDF_StreamAcc* CPDF_Parser::GetObjectStream(FX_DWORD objnum) { @@ -1659,6 +1668,8 @@ FX_DWORD CPDF_Parser::LoadLinearizedMainXRefTable() { delete pStream; } m_ObjectStreamMap.RemoveAll(); + m_ObjCache.clear(); + if (!LoadLinearizedAllCrossRefV4(m_LastXRefOffset, m_dwXrefStartObjNum) && !LoadLinearizedAllCrossRefV5(m_LastXRefOffset)) { m_LastXRefOffset = 0; -- cgit v1.2.3