From 7403f8a2a0d87179a1ccd57ceccd2d84fa59c319 Mon Sep 17 00:00:00 2001 From: dsinclair Date: Thu, 20 Oct 2016 10:58:52 -0700 Subject: Revert of Traverse PDF page tree only once in CPDF_Document (patchset #4 id:60001 of https://codereview.chromium.org/2414423002/ ) Reason for revert: Possible cause of crbug.com/657897 reverting to find out. BUG=657897 Original issue's description: > Traverse PDF page tree only once in CPDF_Document > > In our current implementation of CPDF_Document::GetPage, we traverse > the PDF page tree until we find the index we are looking for. This is > slow when we do calls GetPage(0), GetPage(1), ... since in this case > the page tree will be traversed n times if there are n pages. This CL > makes sure the page tree is only traversed once. > > Time to load the PDF from the bug below in chrome official build: > Before this CL: 1 minute 40 seconds > After this CL: 5 seconds > > BUG=chromium:638513 > > Committed: https://pdfium.googlesource.com/pdfium/+/7c29e27dae139a205755c1a29b7f3ac8b36ec0da TBR=thestig@chromium.org,tsepez@chromium.org,npm@chromium.org # Not skipping CQ checks because original CL landed more than 1 days ago. BUG=chromium:638513 Review-Url: https://chromiumcodereview.appspot.com/2430313006 --- core/fpdfapi/parser/cpdf_document.cpp | 245 +++++++++++++++------------------- core/fpdfapi/parser/cpdf_document.h | 17 +-- 2 files changed, 115 insertions(+), 147 deletions(-) diff --git a/core/fpdfapi/parser/cpdf_document.cpp b/core/fpdfapi/parser/cpdf_document.cpp index ba20ff6223..c5f64a790c 100644 --- a/core/fpdfapi/parser/cpdf_document.cpp +++ b/core/fpdfapi/parser/cpdf_document.cpp @@ -8,7 +8,6 @@ #include #include -#include #include #include "core/fpdfapi/cpdf_modulemgr.h" @@ -241,6 +240,83 @@ void InsertWidthArray1(CFX_Font* pFont, InsertWidthArrayImpl(widths, size, pWidthArray); } +int InsertDeletePDFPage(CPDF_Document* pDoc, + CPDF_Dictionary* pPages, + int nPagesToGo, + CPDF_Dictionary* pPage, + FX_BOOL bInsert, + std::set* pVisited) { + CPDF_Array* pKidList = pPages->GetArrayFor("Kids"); + if (!pKidList) + return -1; + + for (size_t i = 0; i < pKidList->GetCount(); i++) { + CPDF_Dictionary* pKid = pKidList->GetDictAt(i); + if (pKid->GetStringFor("Type") == "Page") { + if (nPagesToGo == 0) { + if (bInsert) { + pKidList->InsertAt(i, new CPDF_Reference(pDoc, pPage->GetObjNum())); + pPage->SetReferenceFor("Parent", pDoc, pPages->GetObjNum()); + } else { + pKidList->RemoveAt(i); + } + pPages->SetIntegerFor( + "Count", pPages->GetIntegerFor("Count") + (bInsert ? 1 : -1)); + return 1; + } + nPagesToGo--; + } else { + int nPages = pKid->GetIntegerFor("Count"); + if (nPagesToGo < nPages) { + if (pdfium::ContainsKey(*pVisited, pKid)) + return -1; + + pdfium::ScopedSetInsertion insertion(pVisited, pKid); + if (InsertDeletePDFPage(pDoc, pKid, nPagesToGo, pPage, bInsert, + pVisited) < 0) { + return -1; + } + pPages->SetIntegerFor( + "Count", pPages->GetIntegerFor("Count") + (bInsert ? 1 : -1)); + return 1; + } + nPagesToGo -= nPages; + } + } + return 0; +} + +int InsertNewPage(CPDF_Document* pDoc, + int iPage, + CPDF_Dictionary* pPageDict, + CFX_ArrayTemplate& pageList) { + CPDF_Dictionary* pRoot = pDoc->GetRoot(); + CPDF_Dictionary* pPages = pRoot ? pRoot->GetDictFor("Pages") : nullptr; + if (!pPages) + return -1; + + int nPages = pDoc->GetPageCount(); + if (iPage < 0 || iPage > nPages) + return -1; + + if (iPage == nPages) { + CPDF_Array* pPagesList = pPages->GetArrayFor("Kids"); + if (!pPagesList) { + pPagesList = new CPDF_Array; + pPages->SetFor("Kids", pPagesList); + } + pPagesList->Add(new CPDF_Reference(pDoc, pPageDict->GetObjNum())); + pPages->SetIntegerFor("Count", nPages + 1); + pPageDict->SetReferenceFor("Parent", pDoc, pPages->GetObjNum()); + } else { + std::set stack = {pPages}; + if (InsertDeletePDFPage(pDoc, pPages, iPage, pPageDict, TRUE, &stack) < 0) + return -1; + } + pageList.InsertAt(iPage, pPageDict->GetObjNum()); + return iPage; +} + int CountPages(CPDF_Dictionary* pPages, std::set* visited_pages) { int count = pPages->GetIntegerFor("Count"); @@ -337,7 +413,6 @@ CPDF_Document::CPDF_Document(std::unique_ptr pParser) m_pParser(std::move(pParser)), m_pRootDict(nullptr), m_pInfoDict(nullptr), - m_iLastPageTraversed(-1), m_bLinearized(false), m_iFirstPageNo(0), m_dwFirstPageObjNum(0), @@ -402,63 +477,40 @@ void CPDF_Document::LoadPages() { m_PageList.SetSize(RetrievePageCount()); } -CPDF_Dictionary* CPDF_Document::TraversePDFPages(int iPage, int nPagesToGo) { - std::pair* lastProc = &m_pTreeTraversal.top(); - CPDF_Dictionary* pPages = lastProc->first; +CPDF_Dictionary* CPDF_Document::FindPDFPage(CPDF_Dictionary* pPages, + int iPage, + int nPagesToGo, + int level) { CPDF_Array* pKidList = pPages->GetArrayFor("Kids"); - if (!pKidList) { - m_pTreeTraversal.pop(); - if (nPagesToGo != 1) - return nullptr; - m_PageList.SetAt(iPage, pPages->GetObjNum()); - return pPages; - } + if (!pKidList) + return nPagesToGo == 0 ? pPages : nullptr; - if (m_pTreeTraversal.size() >= FX_MAX_PAGE_LEVEL) { - m_pTreeTraversal.pop(); + if (level >= FX_MAX_PAGE_LEVEL) return nullptr; - } - bool shouldFinish = pPages->GetIntegerFor("Count") <= nPagesToGo; - CPDF_Dictionary* page = nullptr; - for (size_t i = lastProc->second + 1; i < pKidList->GetCount(); i++) { + for (size_t i = 0; i < pKidList->GetCount(); i++) { CPDF_Dictionary* pKid = pKidList->GetDictAt(i); if (!pKid) { nPagesToGo--; - lastProc->second++; continue; } - if (pKid == pPages) { - lastProc->second++; + if (pKid == pPages) continue; - } if (!pKid->KeyExist("Kids")) { - m_PageList.SetAt(iPage - nPagesToGo + 1, pKid->GetObjNum()); + if (nPagesToGo == 0) + return pKid; + + m_PageList.SetAt(iPage - nPagesToGo, pKid->GetObjNum()); nPagesToGo--; - lastProc->second++; - if (nPagesToGo == 0) { - page = pKid; - break; - } } else { int nPages = pKid->GetIntegerFor("Count"); - m_pTreeTraversal.push(std::make_pair(pKid, -1)); - CPDF_Dictionary* pageKid = TraversePDFPages(iPage, nPagesToGo); - // If the stack top is current element, kid was completely processed. - if (lastProc == &m_pTreeTraversal.top()) - lastProc->second++; - if (nPagesToGo <= nPages) { - page = pageKid; - break; - } + if (nPagesToGo < nPages) + return FindPDFPage(pKid, iPage, nPagesToGo, level + 1); + nPagesToGo -= nPages; } } - if (shouldFinish || - lastProc->second == static_cast(pKidList->GetCount() - 1)) { - m_pTreeTraversal.pop(); - } - return page; + return nullptr; } CPDF_Dictionary* CPDF_Document::GetPagesDict() const { @@ -482,20 +534,21 @@ CPDF_Dictionary* CPDF_Document::GetPage(int iPage) { } int objnum = m_PageList.GetAt(iPage); - if (!objnum || m_iLastPageTraversed < iPage) { - CPDF_Dictionary* pPages = GetPagesDict(); - if (!pPages) - return nullptr; - if (m_pTreeTraversal.empty()) - m_pTreeTraversal.push(std::make_pair(pPages, -1)); - CPDF_Dictionary* page = TraversePDFPages(m_iLastPageTraversed + 1, - iPage - m_iLastPageTraversed); - m_iLastPageTraversed = iPage; - return page; + if (objnum) { + if (CPDF_Dictionary* pDict = ToDictionary(GetOrParseIndirectObject(objnum))) + return pDict; } - if (CPDF_Dictionary* pDict = ToDictionary(GetOrParseIndirectObject(objnum))) - return pDict; - return nullptr; + + CPDF_Dictionary* pPages = GetPagesDict(); + if (!pPages) + return nullptr; + + CPDF_Dictionary* pPage = FindPDFPage(pPages, iPage, iPage, 0); + if (!pPage) + return nullptr; + + m_PageList.SetAt(iPage, pPage->GetObjNum()); + return pPage; } void CPDF_Document::SetPageObjNum(int iPage, uint32_t objNum) { @@ -657,91 +710,13 @@ CPDF_Dictionary* CPDF_Document::CreateNewPage(int iPage) { CPDF_Dictionary* pDict = new CPDF_Dictionary(m_pByteStringPool); pDict->SetNameFor("Type", "Page"); uint32_t dwObjNum = AddIndirectObject(pDict); - if (InsertNewPage(iPage, pDict, m_PageList) < 0) { + if (InsertNewPage(this, iPage, pDict, m_PageList) < 0) { ReleaseIndirectObject(dwObjNum); return nullptr; } return pDict; } -int CPDF_Document::InsertDeletePDFPage(CPDF_Dictionary* pPages, - int nPagesToGo, - CPDF_Dictionary* pPage, - FX_BOOL bInsert, - std::set* pVisited) { - CPDF_Array* pKidList = pPages->GetArrayFor("Kids"); - if (!pKidList) - return -1; - - for (size_t i = 0; i < pKidList->GetCount(); i++) { - CPDF_Dictionary* pKid = pKidList->GetDictAt(i); - if (pKid->GetStringFor("Type") == "Page") { - if (nPagesToGo == 0) { - if (bInsert) { - pKidList->InsertAt(i, new CPDF_Reference(this, pPage->GetObjNum())); - pPage->SetReferenceFor("Parent", this, pPages->GetObjNum()); - } else { - pKidList->RemoveAt(i); - } - pPages->SetIntegerFor( - "Count", pPages->GetIntegerFor("Count") + (bInsert ? 1 : -1)); - // Tree will change, so reset tree transversal variables - m_iLastPageTraversed = -1; - m_pTreeTraversal = std::stack>(); - return 1; - } - nPagesToGo--; - } else { - int nPages = pKid->GetIntegerFor("Count"); - if (nPagesToGo < nPages) { - if (pdfium::ContainsKey(*pVisited, pKid)) - return -1; - - pdfium::ScopedSetInsertion insertion(pVisited, pKid); - if (InsertDeletePDFPage(pKid, nPagesToGo, pPage, bInsert, pVisited) < - 0) { - return -1; - } - pPages->SetIntegerFor( - "Count", pPages->GetIntegerFor("Count") + (bInsert ? 1 : -1)); - return 1; - } - nPagesToGo -= nPages; - } - } - return 0; -} - -int CPDF_Document::InsertNewPage(int iPage, - CPDF_Dictionary* pPageDict, - CFX_ArrayTemplate& pageList) { - CPDF_Dictionary* pRoot = GetRoot(); - CPDF_Dictionary* pPages = pRoot ? pRoot->GetDictFor("Pages") : nullptr; - if (!pPages) - return -1; - - int nPages = GetPageCount(); - if (iPage < 0 || iPage > nPages) - return -1; - - if (iPage == nPages) { - CPDF_Array* pPagesList = pPages->GetArrayFor("Kids"); - if (!pPagesList) { - pPagesList = new CPDF_Array; - pPages->SetFor("Kids", pPagesList); - } - pPagesList->Add(new CPDF_Reference(this, pPageDict->GetObjNum())); - pPages->SetIntegerFor("Count", nPages + 1); - pPageDict->SetReferenceFor("Parent", this, pPages->GetObjNum()); - } else { - std::set stack = {pPages}; - if (InsertDeletePDFPage(pPages, iPage, pPageDict, TRUE, &stack) < 0) - return -1; - } - pageList.InsertAt(iPage, pPageDict->GetObjNum()); - return iPage; -} - void CPDF_Document::DeletePage(int iPage) { CPDF_Dictionary* pPages = GetPagesDict(); if (!pPages) @@ -752,7 +727,7 @@ void CPDF_Document::DeletePage(int iPage) { return; std::set stack = {pPages}; - if (InsertDeletePDFPage(pPages, iPage, nullptr, FALSE, &stack) < 0) + if (InsertDeletePDFPage(this, pPages, iPage, nullptr, FALSE, &stack) < 0) return; m_PageList.RemoveAt(iPage); diff --git a/core/fpdfapi/parser/cpdf_document.h b/core/fpdfapi/parser/cpdf_document.h index e845ea8eef..c557a56013 100644 --- a/core/fpdfapi/parser/cpdf_document.h +++ b/core/fpdfapi/parser/cpdf_document.h @@ -9,7 +9,6 @@ #include #include -#include #include "core/fpdfapi/parser/cpdf_indirect_object_holder.h" #include "core/fpdfapi/parser/cpdf_object.h" @@ -108,7 +107,10 @@ class CPDF_Document : public CPDF_IndirectObjectHolder { // Retrieve page count information by getting count value from the tree nodes int RetrievePageCount() const; - CPDF_Dictionary* TraversePDFPages(int iPage, int nPagesToGo); + CPDF_Dictionary* FindPDFPage(CPDF_Dictionary* pPages, + int iPage, + int nPagesToGo, + int level); int FindPageIndex(CPDF_Dictionary* pNode, uint32_t& skip_count, uint32_t objnum, @@ -124,19 +126,10 @@ class CPDF_Document : public CPDF_IndirectObjectHolder { FX_BOOL bVert, CFX_ByteString basefont, std::function Insert); - int InsertDeletePDFPage(CPDF_Dictionary* pPages, - int nPagesToGo, - CPDF_Dictionary* pPage, - FX_BOOL bInsert, - std::set* pVisited); - int InsertNewPage(int iPage, - CPDF_Dictionary* pPageDict, - CFX_ArrayTemplate& pageList); + std::unique_ptr m_pParser; CPDF_Dictionary* m_pRootDict; CPDF_Dictionary* m_pInfoDict; - std::stack> m_pTreeTraversal; - int m_iLastPageTraversed; bool m_bLinearized; int m_iFirstPageNo; uint32_t m_dwFirstPageObjNum; -- cgit v1.2.3