summaryrefslogtreecommitdiff
path: root/core/fpdfapi/parser
diff options
context:
space:
mode:
authornpm <npm@chromium.org>2016-10-28 14:30:44 -0700
committerCommit bot <commit-bot@chromium.org>2016-10-28 14:30:44 -0700
commit900f421e29daf2ab62de3ae8dc821f031bc7bdb3 (patch)
tree7f9020dd8a2172b523d8a35864a5c6b0053ec658 /core/fpdfapi/parser
parentdc0401a0da446a0744419d71f2bb216f3b90ad0f (diff)
downloadpdfium-900f421e29daf2ab62de3ae8dc821f031bc7bdb3.tar.xz
Revert of Traverse PDF page tree only once in CPDF_Document Try 2 (patchset #3 id:40001 of https://codereview.chromium.org/2442403002/ )
Reason for revert: Not quite right yet. Original issue's description: > Traverse PDF page tree only once in CPDF_Document > > Try 2: main fix was recursively popping elements from the stack. Since > the Traverse method can be called on non-root nodes from GetPage(), we > have to make sure to properly update the parents. > > Try 1 at https://codereview.chromium.org/2414423002/ > > In our current implementation of CPDF_Document::GetPage, we traverse > the PDF page tree until we find the index we are looking for. This is > slow when we do calls GetPage(0), GetPage(1), ... since in this case > the page tree will be traversed n times if there are n pages. This CL > makes sure the page tree is only traversed once. > > Time to load the PDF from the bug below in chrome official build: > Before this CL: around 1 minute 25 seconds > After this CL: around 4 seconds > > BUG=chromium:638513 > > Committed: https://pdfium.googlesource.com/pdfium/+/d3a2009d75eac3cda442f545ef0865afae7b35cf TBR=tsepez@chromium.org,weili@chromium.org,thestig@chromium.org # Not skipping CQ checks because original CL landed more than 1 days ago. BUG=chromium:638513 Review-Url: https://codereview.chromium.org/2461063003
Diffstat (limited to 'core/fpdfapi/parser')
-rw-r--r--core/fpdfapi/parser/cpdf_document.cpp256
-rw-r--r--core/fpdfapi/parser/cpdf_document.h21
2 files changed, 115 insertions, 162 deletions
diff --git a/core/fpdfapi/parser/cpdf_document.cpp b/core/fpdfapi/parser/cpdf_document.cpp
index f548a26038..c5f64a790c 100644
--- a/core/fpdfapi/parser/cpdf_document.cpp
+++ b/core/fpdfapi/parser/cpdf_document.cpp
@@ -8,7 +8,6 @@
#include <memory>
#include <set>
-#include <utility>
#include <vector>
#include "core/fpdfapi/cpdf_modulemgr.h"
@@ -241,6 +240,83 @@ void InsertWidthArray1(CFX_Font* pFont,
InsertWidthArrayImpl(widths, size, pWidthArray);
}
+int InsertDeletePDFPage(CPDF_Document* pDoc,
+ CPDF_Dictionary* pPages,
+ int nPagesToGo,
+ CPDF_Dictionary* pPage,
+ FX_BOOL bInsert,
+ std::set<CPDF_Dictionary*>* pVisited) {
+ CPDF_Array* pKidList = pPages->GetArrayFor("Kids");
+ if (!pKidList)
+ return -1;
+
+ for (size_t i = 0; i < pKidList->GetCount(); i++) {
+ CPDF_Dictionary* pKid = pKidList->GetDictAt(i);
+ if (pKid->GetStringFor("Type") == "Page") {
+ if (nPagesToGo == 0) {
+ if (bInsert) {
+ pKidList->InsertAt(i, new CPDF_Reference(pDoc, pPage->GetObjNum()));
+ pPage->SetReferenceFor("Parent", pDoc, pPages->GetObjNum());
+ } else {
+ pKidList->RemoveAt(i);
+ }
+ pPages->SetIntegerFor(
+ "Count", pPages->GetIntegerFor("Count") + (bInsert ? 1 : -1));
+ return 1;
+ }
+ nPagesToGo--;
+ } else {
+ int nPages = pKid->GetIntegerFor("Count");
+ if (nPagesToGo < nPages) {
+ if (pdfium::ContainsKey(*pVisited, pKid))
+ return -1;
+
+ pdfium::ScopedSetInsertion<CPDF_Dictionary*> insertion(pVisited, pKid);
+ if (InsertDeletePDFPage(pDoc, pKid, nPagesToGo, pPage, bInsert,
+ pVisited) < 0) {
+ return -1;
+ }
+ pPages->SetIntegerFor(
+ "Count", pPages->GetIntegerFor("Count") + (bInsert ? 1 : -1));
+ return 1;
+ }
+ nPagesToGo -= nPages;
+ }
+ }
+ return 0;
+}
+
+int InsertNewPage(CPDF_Document* pDoc,
+ int iPage,
+ CPDF_Dictionary* pPageDict,
+ CFX_ArrayTemplate<uint32_t>& pageList) {
+ CPDF_Dictionary* pRoot = pDoc->GetRoot();
+ CPDF_Dictionary* pPages = pRoot ? pRoot->GetDictFor("Pages") : nullptr;
+ if (!pPages)
+ return -1;
+
+ int nPages = pDoc->GetPageCount();
+ if (iPage < 0 || iPage > nPages)
+ return -1;
+
+ if (iPage == nPages) {
+ CPDF_Array* pPagesList = pPages->GetArrayFor("Kids");
+ if (!pPagesList) {
+ pPagesList = new CPDF_Array;
+ pPages->SetFor("Kids", pPagesList);
+ }
+ pPagesList->Add(new CPDF_Reference(pDoc, pPageDict->GetObjNum()));
+ pPages->SetIntegerFor("Count", nPages + 1);
+ pPageDict->SetReferenceFor("Parent", pDoc, pPages->GetObjNum());
+ } else {
+ std::set<CPDF_Dictionary*> stack = {pPages};
+ if (InsertDeletePDFPage(pDoc, pPages, iPage, pPageDict, TRUE, &stack) < 0)
+ return -1;
+ }
+ pageList.InsertAt(iPage, pPageDict->GetObjNum());
+ return iPage;
+}
+
int CountPages(CPDF_Dictionary* pPages,
std::set<CPDF_Dictionary*>* visited_pages) {
int count = pPages->GetIntegerFor("Count");
@@ -337,7 +413,6 @@ CPDF_Document::CPDF_Document(std::unique_ptr<CPDF_Parser> pParser)
m_pParser(std::move(pParser)),
m_pRootDict(nullptr),
m_pInfoDict(nullptr),
- m_iLastPageTraversed(-1),
m_bLinearized(false),
m_iFirstPageNo(0),
m_dwFirstPageObjNum(0),
@@ -402,71 +477,40 @@ void CPDF_Document::LoadPages() {
m_PageList.SetSize(RetrievePageCount());
}
-void CPDF_Document::PopAndPropagate() {
- m_pTreeTraversal.pop();
- if (m_pTreeTraversal.empty())
- return;
- std::pair<CPDF_Dictionary*, int>* top = &m_pTreeTraversal.top();
- top->second++;
- if (top->second ==
- static_cast<int>(top->first->GetArrayFor("Kids")->GetCount() - 1)) {
- PopAndPropagate();
- }
-}
-
-CPDF_Dictionary* CPDF_Document::TraversePDFPages(int iPage, int nPagesToGo) {
- std::pair<CPDF_Dictionary*, int>* lastProc = &m_pTreeTraversal.top();
- CPDF_Dictionary* pPages = lastProc->first;
+CPDF_Dictionary* CPDF_Document::FindPDFPage(CPDF_Dictionary* pPages,
+ int iPage,
+ int nPagesToGo,
+ int level) {
CPDF_Array* pKidList = pPages->GetArrayFor("Kids");
- if (!pKidList) {
- PopAndPropagate();
- if (nPagesToGo != 1)
- return nullptr;
- m_PageList.SetAt(iPage, pPages->GetObjNum());
- return pPages;
- }
+ if (!pKidList)
+ return nPagesToGo == 0 ? pPages : nullptr;
- if (m_pTreeTraversal.size() >= FX_MAX_PAGE_LEVEL) {
- PopAndPropagate();
+ if (level >= FX_MAX_PAGE_LEVEL)
return nullptr;
- }
- CPDF_Dictionary* page = nullptr;
- for (size_t i = lastProc->second + 1; i < pKidList->GetCount(); i++) {
+ for (size_t i = 0; i < pKidList->GetCount(); i++) {
CPDF_Dictionary* pKid = pKidList->GetDictAt(i);
if (!pKid) {
nPagesToGo--;
- lastProc->second++;
continue;
}
- if (pKid == pPages) {
- lastProc->second++;
+ if (pKid == pPages)
continue;
- }
if (!pKid->KeyExist("Kids")) {
- m_PageList.SetAt(iPage - nPagesToGo + 1, pKid->GetObjNum());
+ if (nPagesToGo == 0)
+ return pKid;
+
+ m_PageList.SetAt(iPage - nPagesToGo, pKid->GetObjNum());
nPagesToGo--;
- lastProc->second++;
- if (nPagesToGo == 0) {
- page = pKid;
- break;
- }
} else {
int nPages = pKid->GetIntegerFor("Count");
- m_pTreeTraversal.push(std::make_pair(pKid, -1));
- CPDF_Dictionary* pageKid = TraversePDFPages(iPage, nPagesToGo);
- if (nPagesToGo <= nPages) {
- page = pageKid;
- break;
- }
+ if (nPagesToGo < nPages)
+ return FindPDFPage(pKid, iPage, nPagesToGo, level + 1);
+
nPagesToGo -= nPages;
}
}
- if (!m_pTreeTraversal.empty() && lastProc == &m_pTreeTraversal.top() &&
- lastProc->second == static_cast<int>(pKidList->GetCount() - 1)) {
- PopAndPropagate();
- }
- return page;
+ return nullptr;
}
CPDF_Dictionary* CPDF_Document::GetPagesDict() const {
@@ -490,20 +534,21 @@ CPDF_Dictionary* CPDF_Document::GetPage(int iPage) {
}
int objnum = m_PageList.GetAt(iPage);
- if (!objnum) {
- CPDF_Dictionary* pPages = GetPagesDict();
- if (!pPages)
- return nullptr;
- if (m_pTreeTraversal.empty())
- m_pTreeTraversal.push(std::make_pair(pPages, -1));
- CPDF_Dictionary* page =
- TraversePDFPages(iPage, iPage - m_iLastPageTraversed);
- m_iLastPageTraversed = iPage;
- return page;
+ if (objnum) {
+ if (CPDF_Dictionary* pDict = ToDictionary(GetOrParseIndirectObject(objnum)))
+ return pDict;
}
- if (CPDF_Dictionary* pDict = ToDictionary(GetOrParseIndirectObject(objnum)))
- return pDict;
- return nullptr;
+
+ CPDF_Dictionary* pPages = GetPagesDict();
+ if (!pPages)
+ return nullptr;
+
+ CPDF_Dictionary* pPage = FindPDFPage(pPages, iPage, iPage, 0);
+ if (!pPage)
+ return nullptr;
+
+ m_PageList.SetAt(iPage, pPage->GetObjNum());
+ return pPage;
}
void CPDF_Document::SetPageObjNum(int iPage, uint32_t objNum) {
@@ -665,94 +710,13 @@ CPDF_Dictionary* CPDF_Document::CreateNewPage(int iPage) {
CPDF_Dictionary* pDict = new CPDF_Dictionary(m_pByteStringPool);
pDict->SetNameFor("Type", "Page");
uint32_t dwObjNum = AddIndirectObject(pDict);
- if (InsertNewPage(iPage, pDict, m_PageList) < 0) {
+ if (InsertNewPage(this, iPage, pDict, m_PageList) < 0) {
ReleaseIndirectObject(dwObjNum);
return nullptr;
}
return pDict;
}
-int CPDF_Document::InsertDeletePDFPage(CPDF_Dictionary* pPages,
- int nPagesToGo,
- CPDF_Dictionary* pPage,
- FX_BOOL bInsert,
- std::set<CPDF_Dictionary*>* pVisited) {
- CPDF_Array* pKidList = pPages->GetArrayFor("Kids");
- if (!pKidList)
- return -1;
-
- for (size_t i = 0; i < pKidList->GetCount(); i++) {
- CPDF_Dictionary* pKid = pKidList->GetDictAt(i);
- if (pKid->GetStringFor("Type") == "Page") {
- if (nPagesToGo == 0) {
- if (bInsert) {
- pKidList->InsertAt(i, new CPDF_Reference(this, pPage->GetObjNum()));
- pPage->SetReferenceFor("Parent", this, pPages->GetObjNum());
- } else {
- pKidList->RemoveAt(i);
- }
- pPages->SetIntegerFor(
- "Count", pPages->GetIntegerFor("Count") + (bInsert ? 1 : -1));
- // Tree will change, so reset tree transversal variables
- m_iLastPageTraversed = -1;
- m_pTreeTraversal = std::stack<std::pair<CPDF_Dictionary*, int>>();
- return 1;
- }
- nPagesToGo--;
- } else {
- int nPages = pKid->GetIntegerFor("Count");
- if (nPagesToGo < nPages) {
- if (pdfium::ContainsKey(*pVisited, pKid))
- return -1;
-
- pdfium::ScopedSetInsertion<CPDF_Dictionary*> insertion(pVisited, pKid);
- if (InsertDeletePDFPage(pKid, nPagesToGo, pPage, bInsert, pVisited) <
- 0) {
- return -1;
- }
- pPages->SetIntegerFor(
- "Count", pPages->GetIntegerFor("Count") + (bInsert ? 1 : -1));
- return 1;
- }
- nPagesToGo -= nPages;
- }
- }
- return 0;
-}
-
-int CPDF_Document::InsertNewPage(int iPage,
- CPDF_Dictionary* pPageDict,
- CFX_ArrayTemplate<uint32_t>& pageList) {
- CPDF_Dictionary* pRoot = GetRoot();
- CPDF_Dictionary* pPages = pRoot ? pRoot->GetDictFor("Pages") : nullptr;
- if (!pPages)
- return -1;
-
- int nPages = GetPageCount();
- if (iPage < 0 || iPage > nPages)
- return -1;
-
- if (iPage == nPages) {
- CPDF_Array* pPagesList = pPages->GetArrayFor("Kids");
- if (!pPagesList) {
- pPagesList = new CPDF_Array;
- pPages->SetFor("Kids", pPagesList);
- }
- pPagesList->Add(new CPDF_Reference(this, pPageDict->GetObjNum()));
- pPages->SetIntegerFor("Count", nPages + 1);
- pPageDict->SetReferenceFor("Parent", this, pPages->GetObjNum());
- // Reset tree transversal variables
- m_iLastPageTraversed = -1;
- m_pTreeTraversal = std::stack<std::pair<CPDF_Dictionary*, int>>();
- } else {
- std::set<CPDF_Dictionary*> stack = {pPages};
- if (InsertDeletePDFPage(pPages, iPage, pPageDict, TRUE, &stack) < 0)
- return -1;
- }
- pageList.InsertAt(iPage, pPageDict->GetObjNum());
- return iPage;
-}
-
void CPDF_Document::DeletePage(int iPage) {
CPDF_Dictionary* pPages = GetPagesDict();
if (!pPages)
@@ -763,7 +727,7 @@ void CPDF_Document::DeletePage(int iPage) {
return;
std::set<CPDF_Dictionary*> stack = {pPages};
- if (InsertDeletePDFPage(pPages, iPage, nullptr, FALSE, &stack) < 0)
+ if (InsertDeletePDFPage(this, pPages, iPage, nullptr, FALSE, &stack) < 0)
return;
m_PageList.RemoveAt(iPage);
diff --git a/core/fpdfapi/parser/cpdf_document.h b/core/fpdfapi/parser/cpdf_document.h
index ef9f663c3b..ea7bd328aa 100644
--- a/core/fpdfapi/parser/cpdf_document.h
+++ b/core/fpdfapi/parser/cpdf_document.h
@@ -9,7 +9,6 @@
#include <functional>
#include <memory>
-#include <stack>
#include "core/fpdfapi/parser/cpdf_indirect_object_holder.h"
#include "core/fpdfapi/parser/cpdf_object.h"
@@ -106,7 +105,10 @@ class CPDF_Document : public CPDF_IndirectObjectHolder {
protected:
// Retrieve page count information by getting count value from the tree nodes
int RetrievePageCount() const;
- CPDF_Dictionary* TraversePDFPages(int iPage, int nPagesToGo);
+ CPDF_Dictionary* FindPDFPage(CPDF_Dictionary* pPages,
+ int iPage,
+ int nPagesToGo,
+ int level);
int FindPageIndex(CPDF_Dictionary* pNode,
uint32_t& skip_count,
uint32_t objnum,
@@ -122,23 +124,10 @@ class CPDF_Document : public CPDF_IndirectObjectHolder {
FX_BOOL bVert,
CFX_ByteString basefont,
std::function<void(FX_WCHAR, FX_WCHAR, CPDF_Array*)> Insert);
- int InsertDeletePDFPage(CPDF_Dictionary* pPages,
- int nPagesToGo,
- CPDF_Dictionary* pPage,
- FX_BOOL bInsert,
- std::set<CPDF_Dictionary*>* pVisited);
- int InsertNewPage(int iPage,
- CPDF_Dictionary* pPageDict,
- CFX_ArrayTemplate<uint32_t>& pageList);
- void PopAndPropagate();
+
std::unique_ptr<CPDF_Parser> m_pParser;
CPDF_Dictionary* m_pRootDict;
CPDF_Dictionary* m_pInfoDict;
- // Stack of page nodes to know current position in page tree. Int is the index
- // of last processed child.
- std::stack<std::pair<CPDF_Dictionary*, int>> m_pTreeTraversal;
- // Index of last page (leaf) processed from page tree.
- int m_iLastPageTraversed;
bool m_bLinearized;
int m_iFirstPageNo;
uint32_t m_dwFirstPageObjNum;