diff options
author | npm <npm@chromium.org> | 2016-10-26 11:03:43 -0700 |
---|---|---|
committer | Commit bot <commit-bot@chromium.org> | 2016-10-26 11:03:43 -0700 |
commit | d3a2009d75eac3cda442f545ef0865afae7b35cf (patch) | |
tree | 9bbc5bbfbd48e1e63acd1bf55cac09d65cef8882 /core/fpdfapi/parser/cpdf_document.cpp | |
parent | 1842be87408b06bf0b4c521044c09452caac5c80 (diff) | |
download | pdfium-d3a2009d75eac3cda442f545ef0865afae7b35cf.tar.xz |
Traverse PDF page tree only once in CPDF_Document
Try 2: main fix was recursively popping elements from the stack. Since
the Traverse method can be called on non-root nodes from GetPage(), we
have to make sure to properly update the parents.
Try 1 at https://codereview.chromium.org/2414423002/
In our current implementation of CPDF_Document::GetPage, we traverse
the PDF page tree until we find the index we are looking for. This is
slow when we do calls GetPage(0), GetPage(1), ... since in this case
the page tree will be traversed n times if there are n pages. This CL
makes sure the page tree is only traversed once.
Time to load the PDF from the bug below in chrome official build:
Before this CL: around 1 minute 25 seconds
After this CL: around 4 seconds
BUG=chromium:638513
Review-Url: https://codereview.chromium.org/2442403002
Diffstat (limited to 'core/fpdfapi/parser/cpdf_document.cpp')
-rw-r--r-- | core/fpdfapi/parser/cpdf_document.cpp | 256 |
1 files changed, 146 insertions, 110 deletions
diff --git a/core/fpdfapi/parser/cpdf_document.cpp b/core/fpdfapi/parser/cpdf_document.cpp index c5f64a790c..f548a26038 100644 --- a/core/fpdfapi/parser/cpdf_document.cpp +++ b/core/fpdfapi/parser/cpdf_document.cpp @@ -8,6 +8,7 @@ #include <memory> #include <set> +#include <utility> #include <vector> #include "core/fpdfapi/cpdf_modulemgr.h" @@ -240,83 +241,6 @@ void InsertWidthArray1(CFX_Font* pFont, InsertWidthArrayImpl(widths, size, pWidthArray); } -int InsertDeletePDFPage(CPDF_Document* pDoc, - CPDF_Dictionary* pPages, - int nPagesToGo, - CPDF_Dictionary* pPage, - FX_BOOL bInsert, - std::set<CPDF_Dictionary*>* pVisited) { - CPDF_Array* pKidList = pPages->GetArrayFor("Kids"); - if (!pKidList) - return -1; - - for (size_t i = 0; i < pKidList->GetCount(); i++) { - CPDF_Dictionary* pKid = pKidList->GetDictAt(i); - if (pKid->GetStringFor("Type") == "Page") { - if (nPagesToGo == 0) { - if (bInsert) { - pKidList->InsertAt(i, new CPDF_Reference(pDoc, pPage->GetObjNum())); - pPage->SetReferenceFor("Parent", pDoc, pPages->GetObjNum()); - } else { - pKidList->RemoveAt(i); - } - pPages->SetIntegerFor( - "Count", pPages->GetIntegerFor("Count") + (bInsert ? 1 : -1)); - return 1; - } - nPagesToGo--; - } else { - int nPages = pKid->GetIntegerFor("Count"); - if (nPagesToGo < nPages) { - if (pdfium::ContainsKey(*pVisited, pKid)) - return -1; - - pdfium::ScopedSetInsertion<CPDF_Dictionary*> insertion(pVisited, pKid); - if (InsertDeletePDFPage(pDoc, pKid, nPagesToGo, pPage, bInsert, - pVisited) < 0) { - return -1; - } - pPages->SetIntegerFor( - "Count", pPages->GetIntegerFor("Count") + (bInsert ? 1 : -1)); - return 1; - } - nPagesToGo -= nPages; - } - } - return 0; -} - -int InsertNewPage(CPDF_Document* pDoc, - int iPage, - CPDF_Dictionary* pPageDict, - CFX_ArrayTemplate<uint32_t>& pageList) { - CPDF_Dictionary* pRoot = pDoc->GetRoot(); - CPDF_Dictionary* pPages = pRoot ? pRoot->GetDictFor("Pages") : nullptr; - if (!pPages) - return -1; - - int nPages = pDoc->GetPageCount(); - if (iPage < 0 || iPage > nPages) - return -1; - - if (iPage == nPages) { - CPDF_Array* pPagesList = pPages->GetArrayFor("Kids"); - if (!pPagesList) { - pPagesList = new CPDF_Array; - pPages->SetFor("Kids", pPagesList); - } - pPagesList->Add(new CPDF_Reference(pDoc, pPageDict->GetObjNum())); - pPages->SetIntegerFor("Count", nPages + 1); - pPageDict->SetReferenceFor("Parent", pDoc, pPages->GetObjNum()); - } else { - std::set<CPDF_Dictionary*> stack = {pPages}; - if (InsertDeletePDFPage(pDoc, pPages, iPage, pPageDict, TRUE, &stack) < 0) - return -1; - } - pageList.InsertAt(iPage, pPageDict->GetObjNum()); - return iPage; -} - int CountPages(CPDF_Dictionary* pPages, std::set<CPDF_Dictionary*>* visited_pages) { int count = pPages->GetIntegerFor("Count"); @@ -413,6 +337,7 @@ CPDF_Document::CPDF_Document(std::unique_ptr<CPDF_Parser> pParser) m_pParser(std::move(pParser)), m_pRootDict(nullptr), m_pInfoDict(nullptr), + m_iLastPageTraversed(-1), m_bLinearized(false), m_iFirstPageNo(0), m_dwFirstPageObjNum(0), @@ -477,40 +402,71 @@ void CPDF_Document::LoadPages() { m_PageList.SetSize(RetrievePageCount()); } -CPDF_Dictionary* CPDF_Document::FindPDFPage(CPDF_Dictionary* pPages, - int iPage, - int nPagesToGo, - int level) { +void CPDF_Document::PopAndPropagate() { + m_pTreeTraversal.pop(); + if (m_pTreeTraversal.empty()) + return; + std::pair<CPDF_Dictionary*, int>* top = &m_pTreeTraversal.top(); + top->second++; + if (top->second == + static_cast<int>(top->first->GetArrayFor("Kids")->GetCount() - 1)) { + PopAndPropagate(); + } +} + +CPDF_Dictionary* CPDF_Document::TraversePDFPages(int iPage, int nPagesToGo) { + std::pair<CPDF_Dictionary*, int>* lastProc = &m_pTreeTraversal.top(); + CPDF_Dictionary* pPages = lastProc->first; CPDF_Array* pKidList = pPages->GetArrayFor("Kids"); - if (!pKidList) - return nPagesToGo == 0 ? pPages : nullptr; + if (!pKidList) { + PopAndPropagate(); + if (nPagesToGo != 1) + return nullptr; + m_PageList.SetAt(iPage, pPages->GetObjNum()); + return pPages; + } - if (level >= FX_MAX_PAGE_LEVEL) + if (m_pTreeTraversal.size() >= FX_MAX_PAGE_LEVEL) { + PopAndPropagate(); return nullptr; + } - for (size_t i = 0; i < pKidList->GetCount(); i++) { + CPDF_Dictionary* page = nullptr; + for (size_t i = lastProc->second + 1; i < pKidList->GetCount(); i++) { CPDF_Dictionary* pKid = pKidList->GetDictAt(i); if (!pKid) { nPagesToGo--; + lastProc->second++; continue; } - if (pKid == pPages) + if (pKid == pPages) { + lastProc->second++; continue; + } if (!pKid->KeyExist("Kids")) { - if (nPagesToGo == 0) - return pKid; - - m_PageList.SetAt(iPage - nPagesToGo, pKid->GetObjNum()); + m_PageList.SetAt(iPage - nPagesToGo + 1, pKid->GetObjNum()); nPagesToGo--; + lastProc->second++; + if (nPagesToGo == 0) { + page = pKid; + break; + } } else { int nPages = pKid->GetIntegerFor("Count"); - if (nPagesToGo < nPages) - return FindPDFPage(pKid, iPage, nPagesToGo, level + 1); - + m_pTreeTraversal.push(std::make_pair(pKid, -1)); + CPDF_Dictionary* pageKid = TraversePDFPages(iPage, nPagesToGo); + if (nPagesToGo <= nPages) { + page = pageKid; + break; + } nPagesToGo -= nPages; } } - return nullptr; + if (!m_pTreeTraversal.empty() && lastProc == &m_pTreeTraversal.top() && + lastProc->second == static_cast<int>(pKidList->GetCount() - 1)) { + PopAndPropagate(); + } + return page; } CPDF_Dictionary* CPDF_Document::GetPagesDict() const { @@ -534,21 +490,20 @@ CPDF_Dictionary* CPDF_Document::GetPage(int iPage) { } int objnum = m_PageList.GetAt(iPage); - if (objnum) { - if (CPDF_Dictionary* pDict = ToDictionary(GetOrParseIndirectObject(objnum))) - return pDict; + if (!objnum) { + CPDF_Dictionary* pPages = GetPagesDict(); + if (!pPages) + return nullptr; + if (m_pTreeTraversal.empty()) + m_pTreeTraversal.push(std::make_pair(pPages, -1)); + CPDF_Dictionary* page = + TraversePDFPages(iPage, iPage - m_iLastPageTraversed); + m_iLastPageTraversed = iPage; + return page; } - - CPDF_Dictionary* pPages = GetPagesDict(); - if (!pPages) - return nullptr; - - CPDF_Dictionary* pPage = FindPDFPage(pPages, iPage, iPage, 0); - if (!pPage) - return nullptr; - - m_PageList.SetAt(iPage, pPage->GetObjNum()); - return pPage; + if (CPDF_Dictionary* pDict = ToDictionary(GetOrParseIndirectObject(objnum))) + return pDict; + return nullptr; } void CPDF_Document::SetPageObjNum(int iPage, uint32_t objNum) { @@ -710,13 +665,94 @@ CPDF_Dictionary* CPDF_Document::CreateNewPage(int iPage) { CPDF_Dictionary* pDict = new CPDF_Dictionary(m_pByteStringPool); pDict->SetNameFor("Type", "Page"); uint32_t dwObjNum = AddIndirectObject(pDict); - if (InsertNewPage(this, iPage, pDict, m_PageList) < 0) { + if (InsertNewPage(iPage, pDict, m_PageList) < 0) { ReleaseIndirectObject(dwObjNum); return nullptr; } return pDict; } +int CPDF_Document::InsertDeletePDFPage(CPDF_Dictionary* pPages, + int nPagesToGo, + CPDF_Dictionary* pPage, + FX_BOOL bInsert, + std::set<CPDF_Dictionary*>* pVisited) { + CPDF_Array* pKidList = pPages->GetArrayFor("Kids"); + if (!pKidList) + return -1; + + for (size_t i = 0; i < pKidList->GetCount(); i++) { + CPDF_Dictionary* pKid = pKidList->GetDictAt(i); + if (pKid->GetStringFor("Type") == "Page") { + if (nPagesToGo == 0) { + if (bInsert) { + pKidList->InsertAt(i, new CPDF_Reference(this, pPage->GetObjNum())); + pPage->SetReferenceFor("Parent", this, pPages->GetObjNum()); + } else { + pKidList->RemoveAt(i); + } + pPages->SetIntegerFor( + "Count", pPages->GetIntegerFor("Count") + (bInsert ? 1 : -1)); + // Tree will change, so reset tree transversal variables + m_iLastPageTraversed = -1; + m_pTreeTraversal = std::stack<std::pair<CPDF_Dictionary*, int>>(); + return 1; + } + nPagesToGo--; + } else { + int nPages = pKid->GetIntegerFor("Count"); + if (nPagesToGo < nPages) { + if (pdfium::ContainsKey(*pVisited, pKid)) + return -1; + + pdfium::ScopedSetInsertion<CPDF_Dictionary*> insertion(pVisited, pKid); + if (InsertDeletePDFPage(pKid, nPagesToGo, pPage, bInsert, pVisited) < + 0) { + return -1; + } + pPages->SetIntegerFor( + "Count", pPages->GetIntegerFor("Count") + (bInsert ? 1 : -1)); + return 1; + } + nPagesToGo -= nPages; + } + } + return 0; +} + +int CPDF_Document::InsertNewPage(int iPage, + CPDF_Dictionary* pPageDict, + CFX_ArrayTemplate<uint32_t>& pageList) { + CPDF_Dictionary* pRoot = GetRoot(); + CPDF_Dictionary* pPages = pRoot ? pRoot->GetDictFor("Pages") : nullptr; + if (!pPages) + return -1; + + int nPages = GetPageCount(); + if (iPage < 0 || iPage > nPages) + return -1; + + if (iPage == nPages) { + CPDF_Array* pPagesList = pPages->GetArrayFor("Kids"); + if (!pPagesList) { + pPagesList = new CPDF_Array; + pPages->SetFor("Kids", pPagesList); + } + pPagesList->Add(new CPDF_Reference(this, pPageDict->GetObjNum())); + pPages->SetIntegerFor("Count", nPages + 1); + pPageDict->SetReferenceFor("Parent", this, pPages->GetObjNum()); + // Reset tree transversal variables + m_iLastPageTraversed = -1; + m_pTreeTraversal = std::stack<std::pair<CPDF_Dictionary*, int>>(); + } else { + std::set<CPDF_Dictionary*> stack = {pPages}; + if (InsertDeletePDFPage(pPages, iPage, pPageDict, TRUE, &stack) < 0) + return -1; + } + pageList.InsertAt(iPage, pPageDict->GetObjNum()); + return iPage; +} + void CPDF_Document::DeletePage(int iPage) { CPDF_Dictionary* pPages = GetPagesDict(); if (!pPages) @@ -727,7 +763,7 @@ void CPDF_Document::DeletePage(int iPage) { return; std::set<CPDF_Dictionary*> stack = {pPages}; - if (InsertDeletePDFPage(this, pPages, iPage, nullptr, FALSE, &stack) < 0) + if (InsertDeletePDFPage(pPages, iPage, nullptr, FALSE, &stack) < 0) return; m_PageList.RemoveAt(iPage); |