diff options
author | Artem Strygin <art-snake@yandex-team.ru> | 2018-06-27 18:15:10 +0000 |
---|---|---|
committer | Chromium commit bot <commit-bot@chromium.org> | 2018-06-27 18:15:10 +0000 |
commit | 20eca1e383b7dce69cd791d42bda3558a3966301 (patch) | |
tree | d3fded7303e8326d3883ca51b6bed2ad2438b6da /core/fpdfapi/parser | |
parent | 00ba8bbea0ff57d6f11257736408e530e54ef642 (diff) | |
download | pdfium-20eca1e383b7dce69cd791d42bda3558a3966301.tar.xz |
Rework of loading of CPDF_Document.
Improve CPDF_Document interface.
Fix relationship between CPDF_Document and CPDF_Parser.
This CL changes CPDF_Document to internally create the CPDF_Parser
and removes the need for the CPDF_Parser to know about the CPDF_Document.
Change-Id: Iec7aef19575c90f30b9a6c919dfd4f4417e4caf2
Reviewed-on: https://pdfium-review.googlesource.com/35630
Commit-Queue: Art Snake <art-snake@yandex-team.ru>
Reviewed-by: dsinclair <dsinclair@chromium.org>
Diffstat (limited to 'core/fpdfapi/parser')
-rw-r--r-- | core/fpdfapi/parser/cpdf_data_avail.cpp | 12 | ||||
-rw-r--r-- | core/fpdfapi/parser/cpdf_data_avail.h | 1 | ||||
-rw-r--r-- | core/fpdfapi/parser/cpdf_document.cpp | 39 | ||||
-rw-r--r-- | core/fpdfapi/parser/cpdf_document.h | 21 | ||||
-rw-r--r-- | core/fpdfapi/parser/cpdf_document_unittest.cpp | 16 | ||||
-rw-r--r-- | core/fpdfapi/parser/cpdf_parser.cpp | 103 | ||||
-rw-r--r-- | core/fpdfapi/parser/cpdf_parser.h | 29 |
7 files changed, 137 insertions, 84 deletions
diff --git a/core/fpdfapi/parser/cpdf_data_avail.cpp b/core/fpdfapi/parser/cpdf_data_avail.cpp index acc01c65ed..3b39e3f54b 100644 --- a/core/fpdfapi/parser/cpdf_data_avail.cpp +++ b/core/fpdfapi/parser/cpdf_data_avail.cpp @@ -214,7 +214,7 @@ std::unique_ptr<CPDF_Object> CPDF_DataAvail::GetObject(uint32_t objnum, std::unique_ptr<CPDF_Object> pRet; if (pParser) { const CPDF_ReadValidator::Session read_session(GetValidator().Get()); - pRet = pParser->ParseIndirectObject(nullptr, objnum); + pRet = pParser->ParseIndirectObject(objnum); if (GetValidator()->has_read_problems()) return nullptr; } @@ -233,7 +233,7 @@ bool CPDF_DataAvail::CheckInfo() { } const CPDF_ReadValidator::Session read_session(GetValidator().Get()); - m_parser.ParseIndirectObject(nullptr, dwInfoObjNum); + m_parser.ParseIndirectObject(dwInfoObjNum); if (GetValidator()->has_read_problems()) return false; @@ -249,7 +249,7 @@ bool CPDF_DataAvail::CheckRoot() { } const CPDF_ReadValidator::Session read_session(GetValidator().Get()); - m_pRoot = ToDictionary(m_parser.ParseIndirectObject(nullptr, dwRootObjNum)); + m_pRoot = ToDictionary(m_parser.ParseIndirectObject(dwRootObjNum)); if (GetValidator()->has_read_problems()) return false; @@ -1011,11 +1011,11 @@ CPDF_DataAvail::ParseDocument(const char* password) { } auto parser = pdfium::MakeUnique<CPDF_Parser>(); parser->SetPassword(password); - auto document = pdfium::MakeUnique<CPDF_Document>(std::move(parser)); + auto document = pdfium::MakeUnique<CPDF_Document>(); CPDF_ReadValidator::Session read_session(GetValidator().Get()); - CPDF_Parser::Error error = document->GetParser()->StartLinearizedParse( - GetValidator(), document.get()); + CPDF_Parser::Error error = + document->LoadLinearizedDoc(GetValidator(), password); // Additional check, that all ok. if (GetValidator()->has_read_problems()) { diff --git a/core/fpdfapi/parser/cpdf_data_avail.h b/core/fpdfapi/parser/cpdf_data_avail.h index 8872bb103f..b6396bf82a 100644 --- a/core/fpdfapi/parser/cpdf_data_avail.h +++ b/core/fpdfapi/parser/cpdf_data_avail.h @@ -19,6 +19,7 @@ class CPDF_CrossRefAvail; class CPDF_Dictionary; +class CPDF_Document; class CPDF_HintTables; class CPDF_IndirectObjectHolder; class CPDF_LinearizedHeader; diff --git a/core/fpdfapi/parser/cpdf_document.cpp b/core/fpdfapi/parser/cpdf_document.cpp index 8f727ed14a..91c6b9c6f9 100644 --- a/core/fpdfapi/parser/cpdf_document.cpp +++ b/core/fpdfapi/parser/cpdf_document.cpp @@ -22,6 +22,7 @@ #include "core/fpdfapi/parser/cpdf_name.h" #include "core/fpdfapi/parser/cpdf_number.h" #include "core/fpdfapi/parser/cpdf_parser.h" +#include "core/fpdfapi/parser/cpdf_read_validator.h" #include "core/fpdfapi/parser/cpdf_reference.h" #include "core/fpdfapi/parser/cpdf_stream.h" #include "core/fpdfapi/parser/cpdf_string.h" @@ -185,17 +186,13 @@ std::unique_ptr<CPDF_Dictionary> CalculateFontDesc( } // namespace -CPDF_Document::CPDF_Document(std::unique_ptr<CPDF_Parser> pParser) - : CPDF_IndirectObjectHolder(), - m_pParser(std::move(pParser)), +CPDF_Document::CPDF_Document() + : ParsedObjectsHolder(), m_pRootDict(nullptr), m_iNextPageToTraverse(0), m_bReachedMaxPageLevel(false), m_pDocPage(pdfium::MakeUnique<CPDF_DocPageData>(this)), - m_pDocRender(pdfium::MakeUnique<CPDF_DocRenderData>(this)) { - if (pParser) - SetLastObjNum(m_pParser->GetLastObjNum()); -} + m_pDocRender(pdfium::MakeUnique<CPDF_DocRenderData>(this)) {} CPDF_Document::~CPDF_Document() { CPDF_ModuleMgr::Get()->GetPageModule()->ClearStockFont(this); @@ -203,7 +200,7 @@ CPDF_Document::~CPDF_Document() { std::unique_ptr<CPDF_Object> CPDF_Document::ParseIndirectObject( uint32_t objnum) { - return m_pParser ? m_pParser->ParseIndirectObject(this, objnum) : nullptr; + return m_pParser ? m_pParser->ParseIndirectObject(objnum) : nullptr; } void CPDF_Document::LoadDocInternal() { @@ -218,9 +215,28 @@ void CPDF_Document::LoadDocInternal() { return; } -void CPDF_Document::LoadDoc() { +bool CPDF_Document::TryInit() { LoadDocInternal(); LoadPages(); + return GetRoot() && (GetPageCount() > 0); +} + +CPDF_Parser::Error CPDF_Document::LoadDoc( + const RetainPtr<IFX_SeekableReadStream>& pFileAccess, + const char* password) { + if (!m_pParser) + SetParser(pdfium::MakeUnique<CPDF_Parser>(this)); + + return m_pParser->StartParse(pFileAccess, password); +} + +CPDF_Parser::Error CPDF_Document::LoadLinearizedDoc( + const RetainPtr<CPDF_ReadValidator>& validator, + const char* password) { + if (!m_pParser) + SetParser(pdfium::MakeUnique<CPDF_Parser>(this)); + + return m_pParser->StartLinearizedParse(validator, password); } void CPDF_Document::LoadPages() { @@ -309,6 +325,11 @@ void CPDF_Document::ResetTraversal() { m_pTreeTraversal.clear(); } +void CPDF_Document::SetParser(std::unique_ptr<CPDF_Parser> pParser) { + DCHECK(!m_pParser); + m_pParser = std::move(pParser); +} + const CPDF_Dictionary* CPDF_Document::GetPagesDict() const { const CPDF_Dictionary* pRoot = GetRoot(); return pRoot ? pRoot->GetDictFor("Pages") : nullptr; diff --git a/core/fpdfapi/parser/cpdf_document.h b/core/fpdfapi/parser/cpdf_document.h index 8d0aa08009..328f09b6ef 100644 --- a/core/fpdfapi/parser/cpdf_document.h +++ b/core/fpdfapi/parser/cpdf_document.h @@ -15,9 +15,10 @@ #include "core/fpdfapi/page/cpdf_image.h" #include "core/fpdfapi/page/cpdf_page.h" -#include "core/fpdfapi/parser/cpdf_indirect_object_holder.h" #include "core/fpdfapi/parser/cpdf_object.h" +#include "core/fpdfapi/parser/cpdf_parser.h" #include "core/fpdfdoc/cpdf_linklist.h" +#include "core/fxcrt/retain_ptr.h" class CFX_Font; class CFX_Matrix; @@ -30,7 +31,9 @@ class CPDF_IccProfile; class CPDF_LinearizedHeader; class CPDF_Parser; class CPDF_Pattern; +class CPDF_ReadValidator; class CPDF_StreamAcc; +class IFX_SeekableReadStream; class JBig2_DocumentContext; #define FPDFPERM_MODIFY 0x0008 @@ -38,7 +41,7 @@ class JBig2_DocumentContext; #define FPDFPERM_FILL_FORM 0x0100 #define FPDFPERM_EXTRACT_ACCESS 0x0200 -class CPDF_Document : public CPDF_IndirectObjectHolder { +class CPDF_Document : public CPDF_Parser::ParsedObjectsHolder { public: // Type from which the XFA extension can subclass itself. class Extension { @@ -52,7 +55,7 @@ class CPDF_Document : public CPDF_IndirectObjectHolder { static const int kPageMaxNum = 0xFFFFF; - explicit CPDF_Document(std::unique_ptr<CPDF_Parser> pParser); + CPDF_Document(); ~CPDF_Document() override; Extension* GetExtension() const { return m_pExtension.get(); } @@ -96,7 +99,16 @@ class CPDF_Document : public CPDF_IndirectObjectHolder { RetainPtr<CPDF_StreamAcc> LoadFontFile(const CPDF_Stream* pStream); RetainPtr<CPDF_IccProfile> LoadIccProfile(const CPDF_Stream* pStream); - void LoadDoc(); + // CPDF_Parser::ParsedObjectsHolder overrides: + bool TryInit() override; + + CPDF_Parser::Error LoadDoc( + const RetainPtr<IFX_SeekableReadStream>& pFileAccess, + const char* password); + CPDF_Parser::Error LoadLinearizedDoc( + const RetainPtr<CPDF_ReadValidator>& validator, + const char* password); + void LoadPages(); void CreateNewDoc(); @@ -144,6 +156,7 @@ class CPDF_Document : public CPDF_IndirectObjectHolder { std::set<CPDF_Dictionary*>* pVisited); bool InsertNewPage(int iPage, CPDF_Dictionary* pPageDict); void ResetTraversal(); + void SetParser(std::unique_ptr<CPDF_Parser> pParser); std::unique_ptr<CPDF_Parser> m_pParser; UnownedPtr<CPDF_Dictionary> m_pRootDict; diff --git a/core/fpdfapi/parser/cpdf_document_unittest.cpp b/core/fpdfapi/parser/cpdf_document_unittest.cpp index 522aa65fc9..c64da6382a 100644 --- a/core/fpdfapi/parser/cpdf_document_unittest.cpp +++ b/core/fpdfapi/parser/cpdf_document_unittest.cpp @@ -49,7 +49,7 @@ std::unique_ptr<CPDF_Dictionary> CreateNumberedPage(size_t number) { class CPDF_TestDocumentForPages : public CPDF_Document { public: - CPDF_TestDocumentForPages() : CPDF_Document(nullptr) { + CPDF_TestDocumentForPages() : CPDF_Document() { // Set up test auto zeroToTwo = pdfium::MakeUnique<CPDF_Array>(); zeroToTwo->AddNew<CPDF_Reference>( @@ -101,7 +101,7 @@ class CPDF_TestDocumentForPages : public CPDF_Document { class CPDF_TestDocumentWithPageWithoutPageNum : public CPDF_Document { public: - CPDF_TestDocumentWithPageWithoutPageNum() : CPDF_Document(nullptr) { + CPDF_TestDocumentWithPageWithoutPageNum() : CPDF_Document() { // Set up test auto allPages = pdfium::MakeUnique<CPDF_Array>(); allPages->AddNew<CPDF_Reference>( @@ -132,7 +132,7 @@ class TestLinearized : public CPDF_LinearizedHeader { class CPDF_TestDocPagesWithoutKids : public CPDF_Document { public: - CPDF_TestDocPagesWithoutKids() : CPDF_Document(nullptr) { + CPDF_TestDocPagesWithoutKids() : CPDF_Document() { CPDF_Dictionary* pagesDict = NewIndirect<CPDF_Dictionary>(); pagesDict->SetNewFor<CPDF_Name>("Type", "Pages"); pagesDict->SetNewFor<CPDF_Number>("Count", 3); @@ -143,6 +143,11 @@ class CPDF_TestDocPagesWithoutKids : public CPDF_Document { } }; +class CPDF_TestDocumentAllowSetParser : public CPDF_Document { + public: + using CPDF_Document::SetParser; +}; + } // namespace class cpdf_document_test : public testing::Test { @@ -222,8 +227,9 @@ TEST_F(cpdf_document_test, UseCachedPageObjNumIfHaveNotPagesDict) { auto linearized = pdfium::MakeUnique<TestLinearized>(dict.get()); auto parser = pdfium::MakeUnique<CPDF_Parser>(); parser->SetLinearizedHeader(std::move(linearized)); - CPDF_Document document(std::move(parser)); - document.LoadDoc(); + CPDF_TestDocumentAllowSetParser document; + document.SetParser(std::move(parser)); + document.LoadPages(); ASSERT_EQ(page_count, document.GetPageCount()); CPDF_Object* page_stub = document.NewIndirect<CPDF_Dictionary>(); const uint32_t obj_num = page_stub->GetObjNum(); diff --git a/core/fpdfapi/parser/cpdf_parser.cpp b/core/fpdfapi/parser/cpdf_parser.cpp index c7a3fe16c8..3986f3684f 100644 --- a/core/fpdfapi/parser/cpdf_parser.cpp +++ b/core/fpdfapi/parser/cpdf_parser.cpp @@ -47,6 +47,13 @@ uint32_t GetVarInt(const uint8_t* p, int32_t n) { return result; } +class ObjectsHolderStub : public CPDF_Parser::ParsedObjectsHolder { + public: + ObjectsHolderStub() = default; + ~ObjectsHolderStub() override = default; + bool TryInit() override { return true; } +}; + } // namespace class CPDF_Parser::TrailerData { @@ -117,12 +124,20 @@ class CPDF_Parser::TrailerData { uint32_t last_root_obj_num_ = CPDF_Object::kInvalidObjNum; }; -CPDF_Parser::CPDF_Parser() +CPDF_Parser::CPDF_Parser(ParsedObjectsHolder* holder) : m_pSyntax(pdfium::MakeUnique<CPDF_SyntaxParser>()), + m_pObjectsHolder(holder), m_bHasParsed(false), m_bXRefStream(false), m_FileVersion(0), - m_TrailerData(pdfium::MakeUnique<TrailerData>()) {} + m_TrailerData(pdfium::MakeUnique<TrailerData>()) { + if (!holder) { + m_pOwnedObjectsHolder = pdfium::MakeUnique<ObjectsHolderStub>(); + m_pObjectsHolder = m_pOwnedObjectsHolder.get(); + } +} + +CPDF_Parser::CPDF_Parser() : CPDF_Parser(nullptr) {} CPDF_Parser::~CPDF_Parser() { ReleaseEncryptHandler(); @@ -225,20 +240,19 @@ bool CPDF_Parser::ParseFileVersion() { CPDF_Parser::Error CPDF_Parser::StartParse( const RetainPtr<IFX_SeekableReadStream>& pFileAccess, - CPDF_Document* pDocument) { + const char* password) { if (!InitSyntaxParser( pdfium::MakeRetain<CPDF_ReadValidator>(pFileAccess, nullptr))) return FORMAT_ERROR; - return StartParseInternal(pDocument); + SetPassword(password); + return StartParseInternal(); } -CPDF_Parser::Error CPDF_Parser::StartParseInternal(CPDF_Document* pDocument) { +CPDF_Parser::Error CPDF_Parser::StartParseInternal() { ASSERT(!m_bHasParsed); m_bHasParsed = true; m_bXRefStream = false; - m_pDocument = pDocument; - bool bXRefRebuilt = false; m_LastXRefOffset = ParseStartXRef(); @@ -262,8 +276,7 @@ CPDF_Parser::Error CPDF_Parser::StartParseInternal(CPDF_Document* pDocument) { if (eRet != SUCCESS) return eRet; - m_pDocument->LoadDoc(); - if (!m_pDocument->GetRoot() || m_pDocument->GetPageCount() == 0) { + if (!GetRoot() || !m_pObjectsHolder->TryInit()) { if (bXRefRebuilt) return FORMAT_ERROR; @@ -275,8 +288,8 @@ CPDF_Parser::Error CPDF_Parser::StartParseInternal(CPDF_Document* pDocument) { if (eRet != SUCCESS) return eRet; - m_pDocument->LoadDoc(); - if (!m_pDocument->GetRoot()) + m_pObjectsHolder->TryInit(); + if (!GetRoot()) return FORMAT_ERROR; } if (GetRootObjNum() == CPDF_Object::kInvalidObjNum) { @@ -290,7 +303,7 @@ CPDF_Parser::Error CPDF_Parser::StartParseInternal(CPDF_Document* pDocument) { } if (m_pSecurityHandler && !m_pSecurityHandler->IsMetadataEncrypted()) { CPDF_Reference* pMetadata = - ToReference(m_pDocument->GetRoot()->GetObjectFor("Metadata")); + ToReference(GetRoot()->GetObjectFor("Metadata")); if (pMetadata) m_MetadataObjnum = pMetadata->GetRefObjNum(); } @@ -330,7 +343,8 @@ CPDF_Parser::Error CPDF_Parser::SetEncryptHandler() { if (CPDF_Dictionary* pEncryptDict = pEncryptObj->AsDictionary()) { SetEncryptDictionary(pEncryptDict); } else if (CPDF_Reference* pRef = pEncryptObj->AsReference()) { - pEncryptObj = m_pDocument->GetOrParseIndirectObject(pRef->GetRefObjNum()); + pEncryptObj = + m_pObjectsHolder->GetOrParseIndirectObject(pRef->GetRefObjNum()); if (pEncryptObj) SetEncryptDictionary(pEncryptObj->GetDict()); } @@ -955,8 +969,7 @@ bool CPDF_Parser::RebuildCrossRef() { } bool CPDF_Parser::LoadCrossRefV5(FX_FILESIZE* pos, bool bMainXRef) { - std::unique_ptr<CPDF_Object> pObject( - ParseIndirectObjectAt(m_pDocument.Get(), *pos, 0)); + std::unique_ptr<CPDF_Object> pObject(ParseIndirectObjectAt(*pos, 0)); if (!pObject) return false; @@ -965,14 +978,12 @@ bool CPDF_Parser::LoadCrossRefV5(FX_FILESIZE* pos, bool bMainXRef) { return false; CPDF_Object* pUnownedObject = pObject.get(); - if (m_pDocument) { - const CPDF_Dictionary* pRootDict = m_pDocument->GetRoot(); - if (pRootDict && pRootDict->GetObjNum() == objnum) - return false; - if (!m_pDocument->ReplaceIndirectObjectIfHigherGeneration( - objnum, std::move(pObject))) { - return false; - } + const CPDF_Dictionary* pRootDict = GetRoot(); + if (pRootDict && pRootDict->GetObjNum() == objnum) + return false; + if (!m_pObjectsHolder->ReplaceIndirectObjectIfHigherGeneration( + objnum, std::move(pObject))) { + return false; } CPDF_Stream* pStream = pUnownedObject->AsStream(); @@ -1104,6 +1115,12 @@ const CPDF_Array* CPDF_Parser::GetIDArray() const { return GetTrailer() ? GetTrailer()->GetArrayFor("ID") : nullptr; } +CPDF_Dictionary* CPDF_Parser::GetRoot() const { + CPDF_Object* obj = + m_pObjectsHolder->GetOrParseIndirectObject(GetRootObjNum()); + return obj ? obj->GetDict() : nullptr; +} + CPDF_Dictionary* CPDF_Parser::GetTrailer() const { return m_TrailerData->GetMainTrailer(); } @@ -1121,7 +1138,6 @@ uint32_t CPDF_Parser::GetRootObjNum() const { } std::unique_ptr<CPDF_Object> CPDF_Parser::ParseIndirectObject( - CPDF_IndirectObjectHolder* pObjList, uint32_t objnum) { if (!IsValidObjectNumber(objnum)) return nullptr; @@ -1135,22 +1151,20 @@ std::unique_ptr<CPDF_Object> CPDF_Parser::ParseIndirectObject( FX_FILESIZE pos = m_ObjectInfo[objnum].pos; if (pos <= 0) return nullptr; - return ParseIndirectObjectAt(pObjList, pos, objnum); + return ParseIndirectObjectAt(pos, objnum); } if (GetObjectType(objnum) != ObjectType::kCompressed) return nullptr; const CPDF_ObjectStream* pObjStream = - GetObjectStream(pObjList, m_ObjectInfo[objnum].pos); + GetObjectStream(m_ObjectInfo[objnum].pos); if (!pObjStream) return nullptr; - return pObjStream->ParseObject(pObjList, objnum); + return pObjStream->ParseObject(m_pObjectsHolder.Get(), objnum); } -const CPDF_ObjectStream* CPDF_Parser::GetObjectStream( - CPDF_IndirectObjectHolder* pObjList, - uint32_t object_number) { +const CPDF_ObjectStream* CPDF_Parser::GetObjectStream(uint32_t object_number) { // Prevent circular parsing the same object. if (pdfium::ContainsKey(m_ParsingObjNums, object_number)) return nullptr; @@ -1167,7 +1181,7 @@ const CPDF_ObjectStream* CPDF_Parser::GetObjectStream( return nullptr; std::unique_ptr<CPDF_Object> object = - ParseIndirectObjectAt(pObjList, object_pos, object_number); + ParseIndirectObjectAt(object_pos, object_number); if (!object) return nullptr; @@ -1180,22 +1194,21 @@ const CPDF_ObjectStream* CPDF_Parser::GetObjectStream( } std::unique_ptr<CPDF_Object> CPDF_Parser::ParseIndirectObjectAt( - CPDF_IndirectObjectHolder* pObjList, FX_FILESIZE pos, uint32_t objnum) { return ParseIndirectObjectAtInternal( - pObjList, pos, objnum, CPDF_SyntaxParser::ParseType::kLoose, nullptr); + pos, objnum, CPDF_SyntaxParser::ParseType::kLoose, nullptr); } std::unique_ptr<CPDF_Object> CPDF_Parser::ParseIndirectObjectAtInternal( - CPDF_IndirectObjectHolder* pObjList, FX_FILESIZE pos, uint32_t objnum, CPDF_SyntaxParser::ParseType parse_type, FX_FILESIZE* pResultPos) { const FX_FILESIZE saved_pos = m_pSyntax->GetPos(); m_pSyntax->SetPos(pos); - auto result = m_pSyntax->GetIndirectObject(pObjList, parse_type); + auto result = + m_pSyntax->GetIndirectObject(m_pObjectsHolder.Get(), parse_type); if (pResultPos) *pResultPos = m_pSyntax->GetPos(); @@ -1215,12 +1228,11 @@ std::unique_ptr<CPDF_Object> CPDF_Parser::ParseIndirectObjectAtInternal( } std::unique_ptr<CPDF_Object> CPDF_Parser::ParseIndirectObjectAtByStrict( - CPDF_IndirectObjectHolder* pObjList, FX_FILESIZE pos, uint32_t objnum, FX_FILESIZE* pResultPos) { return ParseIndirectObjectAtInternal( - pObjList, pos, objnum, CPDF_SyntaxParser::ParseType::kStrict, pResultPos); + pos, objnum, CPDF_SyntaxParser::ParseType::kStrict, pResultPos); } uint32_t CPDF_Parser::GetFirstPageNo() const { @@ -1236,7 +1248,7 @@ std::unique_ptr<CPDF_Dictionary> CPDF_Parser::LoadTrailerV4() { if (m_pSyntax->GetKeyword() != "trailer") return nullptr; - return ToDictionary(m_pSyntax->GetObjectBody(m_pDocument.Get())); + return ToDictionary(m_pSyntax->GetObjectBody(m_pObjectsHolder.Get())); } uint32_t CPDF_Parser::GetPermissions() const { @@ -1258,8 +1270,9 @@ std::unique_ptr<CPDF_LinearizedHeader> CPDF_Parser::ParseLinearizedHeader() { CPDF_Parser::Error CPDF_Parser::StartLinearizedParse( const RetainPtr<CPDF_ReadValidator>& validator, - CPDF_Document* pDocument) { + const char* password) { ASSERT(!m_bHasParsed); + SetPassword(password); m_bXRefStream = false; m_LastXRefOffset = 0; @@ -1268,10 +1281,9 @@ CPDF_Parser::Error CPDF_Parser::StartLinearizedParse( m_pLinearized = ParseLinearizedHeader(); if (!m_pLinearized) - return StartParseInternal(std::move(pDocument)); + return StartParseInternal(); m_bHasParsed = true; - m_pDocument = pDocument; m_LastXRefOffset = m_pLinearized->GetLastXRefOffset(); FX_FILESIZE dwFirstXRefOffset = m_LastXRefOffset; @@ -1299,8 +1311,7 @@ CPDF_Parser::Error CPDF_Parser::StartLinearizedParse( if (eRet != SUCCESS) return eRet; - m_pDocument->LoadDoc(); - if (!m_pDocument->GetRoot() || m_pDocument->GetPageCount() == 0) { + if (!GetRoot() || !m_pObjectsHolder->TryInit()) { if (bXRefRebuilt) return FORMAT_ERROR; @@ -1312,8 +1323,8 @@ CPDF_Parser::Error CPDF_Parser::StartLinearizedParse( if (eRet != SUCCESS) return eRet; - m_pDocument->LoadDoc(); - if (!m_pDocument->GetRoot()) + m_pObjectsHolder->TryInit(); + if (!GetRoot()) return FORMAT_ERROR; } @@ -1329,7 +1340,7 @@ CPDF_Parser::Error CPDF_Parser::StartLinearizedParse( if (m_pSecurityHandler && m_pSecurityHandler->IsMetadataEncrypted()) { if (CPDF_Reference* pMetadata = - ToReference(m_pDocument->GetRoot()->GetObjectFor("Metadata"))) + ToReference(GetRoot()->GetObjectFor("Metadata"))) m_MetadataObjnum = pMetadata->GetRefObjNum(); } return SUCCESS; diff --git a/core/fpdfapi/parser/cpdf_parser.h b/core/fpdfapi/parser/cpdf_parser.h index f859db5d16..1a1f9b0de4 100644 --- a/core/fpdfapi/parser/cpdf_parser.h +++ b/core/fpdfapi/parser/cpdf_parser.h @@ -14,6 +14,7 @@ #include <vector> #include "core/fpdfapi/parser/cpdf_cross_ref_table.h" +#include "core/fpdfapi/parser/cpdf_indirect_object_holder.h" #include "core/fpdfapi/parser/cpdf_syntax_parser.h" #include "core/fxcrt/fx_string.h" #include "core/fxcrt/fx_system.h" @@ -23,8 +24,6 @@ class CPDF_Array; class CPDF_CryptoHandler; class CPDF_Dictionary; -class CPDF_Document; -class CPDF_IndirectObjectHolder; class CPDF_LinearizedHeader; class CPDF_Object; class CPDF_ObjectStream; @@ -35,6 +34,11 @@ class IFX_SeekableReadStream; class CPDF_Parser { public: + class ParsedObjectsHolder : public CPDF_IndirectObjectHolder { + public: + virtual bool TryInit() = 0; + }; + enum Error { SUCCESS = 0, FILE_ERROR, @@ -49,13 +53,14 @@ class CPDF_Parser { static const size_t kInvalidPos = std::numeric_limits<size_t>::max(); + explicit CPDF_Parser(ParsedObjectsHolder* holder); CPDF_Parser(); ~CPDF_Parser(); Error StartParse(const RetainPtr<IFX_SeekableReadStream>& pFile, - CPDF_Document* pDocument); + const char* password); Error StartLinearizedParse(const RetainPtr<CPDF_ReadValidator>& validator, - CPDF_Document* pDocument); + const char* password); void SetPassword(const char* password) { m_Password = password; } ByteString GetPassword() const { return m_Password; } @@ -72,12 +77,11 @@ class CPDF_Parser { uint32_t GetRootObjNum() const; uint32_t GetInfoObjNum() const; const CPDF_Array* GetIDArray() const; + CPDF_Dictionary* GetRoot() const; CPDF_Dictionary* GetEncryptDict() const { return m_pEncryptDict.Get(); } - std::unique_ptr<CPDF_Object> ParseIndirectObject( - CPDF_IndirectObjectHolder* pObjList, - uint32_t objnum); + std::unique_ptr<CPDF_Object> ParseIndirectObject(uint32_t objnum); uint32_t GetLastObjNum() const; bool IsValidObjectNumber(uint32_t objnum) const; @@ -96,12 +100,10 @@ class CPDF_Parser { bool IsXRefStream() const { return m_bXRefStream; } std::unique_ptr<CPDF_Object> ParseIndirectObjectAt( - CPDF_IndirectObjectHolder* pObjList, FX_FILESIZE pos, uint32_t objnum); std::unique_ptr<CPDF_Object> ParseIndirectObjectAtByStrict( - CPDF_IndirectObjectHolder* pObjList, FX_FILESIZE pos, uint32_t objnum, FX_FILESIZE* pResultPos); @@ -150,7 +152,7 @@ class CPDF_Parser { ObjectInfo info; }; - Error StartParseInternal(CPDF_Document* pDocument); + Error StartParseInternal(); FX_FILESIZE ParseStartXRef(); bool LoadAllCrossRefV4(FX_FILESIZE pos); bool LoadAllCrossRefV5(FX_FILESIZE pos); @@ -161,8 +163,7 @@ class CPDF_Parser { bool LoadLinearizedAllCrossRefV4(FX_FILESIZE pos); bool LoadLinearizedAllCrossRefV5(FX_FILESIZE pos); Error LoadLinearizedMainXRefTable(); - const CPDF_ObjectStream* GetObjectStream(CPDF_IndirectObjectHolder* pObjList, - uint32_t object_number); + const CPDF_ObjectStream* GetObjectStream(uint32_t object_number); std::unique_ptr<CPDF_LinearizedHeader> ParseLinearizedHeader(); void SetEncryptDictionary(CPDF_Dictionary* pDict); void ShrinkObjectMap(uint32_t size); @@ -180,7 +181,6 @@ class CPDF_Parser { void MergeCrossRefObjectsData(const std::vector<CrossRefObjData>& objects); std::unique_ptr<CPDF_Object> ParseIndirectObjectAtInternal( - CPDF_IndirectObjectHolder* pObjList, FX_FILESIZE pos, uint32_t objnum, CPDF_SyntaxParser::ParseType parse_type, @@ -193,7 +193,8 @@ class CPDF_Parser { ObjectType GetObjectTypeFromCrossRefStreamType( int cross_ref_stream_type) const; - UnownedPtr<CPDF_Document> m_pDocument; + std::unique_ptr<ParsedObjectsHolder> m_pOwnedObjectsHolder; + UnownedPtr<ParsedObjectsHolder> m_pObjectsHolder; bool m_bHasParsed; bool m_bXRefStream; |