diff options
Diffstat (limited to 'core/fpdfapi/parser')
-rw-r--r-- | core/fpdfapi/parser/cpdf_document.cpp | 88 | ||||
-rw-r--r-- | core/fpdfapi/parser/cpdf_document.h | 14 | ||||
-rw-r--r-- | core/fpdfapi/parser/cpdf_document_unittest.cpp | 27 |
3 files changed, 97 insertions, 32 deletions
diff --git a/core/fpdfapi/parser/cpdf_document.cpp b/core/fpdfapi/parser/cpdf_document.cpp index 64574047e5..8e181de97c 100644 --- a/core/fpdfapi/parser/cpdf_document.cpp +++ b/core/fpdfapi/parser/cpdf_document.cpp @@ -336,6 +336,7 @@ CPDF_Document::CPDF_Document(std::unique_ptr<CPDF_Parser> pParser) m_pParser(std::move(pParser)), m_pRootDict(nullptr), m_pInfoDict(nullptr), + m_iNextPageToTraverse(0), m_bLinearized(false), m_iFirstPageNo(0), m_dwFirstPageObjNum(0), @@ -400,40 +401,72 @@ void CPDF_Document::LoadPages() { m_PageList.SetSize(RetrievePageCount()); } -CPDF_Dictionary* CPDF_Document::FindPDFPage(CPDF_Dictionary* pPages, - int iPage, - int nPagesToGo, - int level) { +CPDF_Dictionary* CPDF_Document::TraversePDFPages(int iPage, + int* nPagesToGo, + size_t level) { + if (*nPagesToGo < 0) + return nullptr; + CPDF_Dictionary* pPages = m_pTreeTraversal[level].first; CPDF_Array* pKidList = pPages->GetArrayFor("Kids"); - if (!pKidList) - return nPagesToGo == 0 ? pPages : nullptr; + if (!pKidList) { + if (*nPagesToGo != 1) + return nullptr; + m_PageList.SetAt(iPage, pPages->GetObjNum()); + return pPages; + } - if (level >= FX_MAX_PAGE_LEVEL) + if (level >= FX_MAX_PAGE_LEVEL) { + m_pTreeTraversal.pop_back(); return nullptr; + } - for (size_t i = 0; i < pKidList->GetCount(); i++) { + CPDF_Dictionary* page = nullptr; + for (size_t i = m_pTreeTraversal[level].second; i < pKidList->GetCount(); + i++) { + if (*nPagesToGo == 0) + break; CPDF_Dictionary* pKid = pKidList->GetDictAt(i); if (!pKid) { - nPagesToGo--; + (*nPagesToGo)--; + m_pTreeTraversal[level].second++; continue; } - if (pKid == pPages) + if (pKid == pPages) { + m_pTreeTraversal[level].second++; continue; + } if (!pKid->KeyExist("Kids")) { - if (nPagesToGo == 0) - return pKid; - - m_PageList.SetAt(iPage - nPagesToGo, pKid->GetObjNum()); - nPagesToGo--; + m_PageList.SetAt(iPage - (*nPagesToGo) + 1, pKid->GetObjNum()); + (*nPagesToGo)--; + m_pTreeTraversal[level].second++; + if (*nPagesToGo == 0) { + page = pKid; + break; + } } else { - int nPages = pKid->GetIntegerFor("Count"); - if (nPagesToGo < nPages) - return FindPDFPage(pKid, iPage, nPagesToGo, level + 1); - - nPagesToGo -= nPages; + // If the vector has size level+1, the child is not in yet + if (m_pTreeTraversal.size() == level + 1) + m_pTreeTraversal.push_back(std::make_pair(pKid, 0)); + // Now m_pTreeTraversal[level+1] should exist and be equal to pKid. + CPDF_Dictionary* pageKid = TraversePDFPages(iPage, nPagesToGo, level + 1); + // Check if child was completely processed, i.e. it popped itself out + if (m_pTreeTraversal.size() == level + 1) + m_pTreeTraversal[level].second++; + // If child did not finish or if no pages to go, we are done + if (m_pTreeTraversal.size() != level + 1 || *nPagesToGo == 0) { + page = pageKid; + break; + } } } - return nullptr; + if (m_pTreeTraversal[level].second == pKidList->GetCount()) + m_pTreeTraversal.pop_back(); + return page; +} + +void CPDF_Document::ResetTraversal() { + m_iNextPageToTraverse = 0; + m_pTreeTraversal.clear(); } CPDF_Dictionary* CPDF_Document::GetPagesDict() const { @@ -460,17 +493,18 @@ CPDF_Dictionary* CPDF_Document::GetPage(int iPage) { if (objnum) { if (CPDF_Dictionary* pDict = ToDictionary(GetOrParseIndirectObject(objnum))) return pDict; + return nullptr; } CPDF_Dictionary* pPages = GetPagesDict(); if (!pPages) return nullptr; - CPDF_Dictionary* pPage = FindPDFPage(pPages, iPage, iPage, 0); - if (!pPage) - return nullptr; - - m_PageList.SetAt(iPage, pPage->GetObjNum()); + if (m_pTreeTraversal.empty()) + m_pTreeTraversal.push_back(std::make_pair(pPages, 0)); + int nPagesToGo = iPage - m_iNextPageToTraverse + 1; + CPDF_Dictionary* pPage = TraversePDFPages(iPage, &nPagesToGo, 0); + m_iNextPageToTraverse = iPage + 1; return pPage; } @@ -664,6 +698,7 @@ bool CPDF_Document::InsertDeletePDFPage(CPDF_Dictionary* pPages, } pPages->SetIntegerFor( "Count", pPages->GetIntegerFor("Count") + (bInsert ? 1 : -1)); + ResetTraversal(); break; } int nPages = pKid->GetIntegerFor("Count"); @@ -704,6 +739,7 @@ bool CPDF_Document::InsertNewPage(int iPage, CPDF_Dictionary* pPageDict) { pPagesList->Add(new CPDF_Reference(this, pPageDict->GetObjNum())); pPages->SetIntegerFor("Count", nPages + 1); pPageDict->SetReferenceFor("Parent", this, pPages->GetObjNum()); + ResetTraversal(); } else { std::set<CPDF_Dictionary*> stack = {pPages}; if (!InsertDeletePDFPage(pPages, iPage, pPageDict, true, &stack)) diff --git a/core/fpdfapi/parser/cpdf_document.h b/core/fpdfapi/parser/cpdf_document.h index e1135260ee..0a99e42c3f 100644 --- a/core/fpdfapi/parser/cpdf_document.h +++ b/core/fpdfapi/parser/cpdf_document.h @@ -105,10 +105,8 @@ class CPDF_Document : public CPDF_IndirectObjectHolder { protected: // Retrieve page count information by getting count value from the tree nodes int RetrievePageCount() const; - CPDF_Dictionary* FindPDFPage(CPDF_Dictionary* pPages, - int iPage, - int nPagesToGo, - int level); + // When this method is called, m_pTreeTraversal[level] exists. + CPDF_Dictionary* TraversePDFPages(int iPage, int* nPagesToGo, size_t level); int FindPageIndex(CPDF_Dictionary* pNode, uint32_t& skip_count, uint32_t objnum, @@ -130,10 +128,18 @@ class CPDF_Document : public CPDF_IndirectObjectHolder { bool bInsert, std::set<CPDF_Dictionary*>* pVisited); bool InsertNewPage(int iPage, CPDF_Dictionary* pPageDict); + void ResetTraversal(); std::unique_ptr<CPDF_Parser> m_pParser; CPDF_Dictionary* m_pRootDict; CPDF_Dictionary* m_pInfoDict; + // Vector of pairs to know current position in the page tree. The index in the + // vector corresponds to the level being described. The pair contains a + // pointer to the dictionary being processed at the level, and an index of the + // of the child being processed within the dictionary's /Kids array. + std::vector<std::pair<CPDF_Dictionary*, size_t>> m_pTreeTraversal; + // Index of the next page that will be traversed from the page tree. + int m_iNextPageToTraverse; bool m_bLinearized; int m_iFirstPageNo; uint32_t m_dwFirstPageObjNum; diff --git a/core/fpdfapi/parser/cpdf_document_unittest.cpp b/core/fpdfapi/parser/cpdf_document_unittest.cpp index c09665b716..71716a649e 100644 --- a/core/fpdfapi/parser/cpdf_document_unittest.cpp +++ b/core/fpdfapi/parser/cpdf_document_unittest.cpp @@ -95,7 +95,7 @@ TEST_F(cpdf_document_test, GetPages) { for (int i = 0; i < 7; i++) { CPDF_Dictionary* page = document->GetPage(i); ASSERT_TRUE(page); - ASSERT_TRUE(page->GetObjectFor("PageNumbering")); + ASSERT_TRUE(page->KeyExist("PageNumbering")); EXPECT_EQ(i, page->GetIntegerFor("PageNumbering")); } CPDF_Dictionary* page = document->GetPage(7); @@ -108,13 +108,36 @@ TEST_F(cpdf_document_test, GetPagesReverseOrder) { for (int i = 6; i >= 0; i--) { CPDF_Dictionary* page = document->GetPage(i); ASSERT_TRUE(page); - ASSERT_TRUE(page->GetObjectFor("PageNumbering")); + ASSERT_TRUE(page->KeyExist("PageNumbering")); EXPECT_EQ(i, page->GetIntegerFor("PageNumbering")); } CPDF_Dictionary* page = document->GetPage(7); EXPECT_FALSE(page); } +TEST(cpdf_document, GetPagesInDisorder) { + std::unique_ptr<CPDF_TestDocumentForPages> document = + pdfium::MakeUnique<CPDF_TestDocumentForPages>(); + + CPDF_Dictionary* page = document->GetPage(1); + ASSERT_TRUE(page); + ASSERT_TRUE(page->KeyExist("PageNumbering")); + EXPECT_EQ(1, page->GetIntegerFor("PageNumbering")); + + page = document->GetPage(3); + ASSERT_TRUE(page); + ASSERT_TRUE(page->KeyExist("PageNumbering")); + EXPECT_EQ(3, page->GetIntegerFor("PageNumbering")); + + page = document->GetPage(7); + EXPECT_FALSE(page); + + page = document->GetPage(6); + ASSERT_TRUE(page); + ASSERT_TRUE(page->KeyExist("PageNumbering")); + EXPECT_EQ(6, page->GetIntegerFor("PageNumbering")); +} + TEST_F(cpdf_document_test, UseCachedPageObjNumIfHaveNotPagesDict) { // ObjNum can be added in CPDF_DataAvail::IsPageAvail, and PagesDict // can be not exists in this case. |