Traverse PDF page tree only once in CPDF_Document

Try 2: main fix was recursively popping elements from the stack. Since the Traverse method can be called on non-root nodes from GetPage(), we have to make sure to properly update the parents. Try 1 at https://codereview.chromium.org/2414423002/ In our current implementation of CPDF_Document::GetPage, we traverse the PDF page tree until we find the index we are looking for. This is slow when we do calls GetPage(0), GetPage(1), ... since in this case the page tree will be traversed n times if there are n pages. This CL makes sure the page tree is only traversed once. Time to load the PDF from the bug below in chrome official build: Before this CL: around 1 minute 25 seconds After this CL: around 4 seconds BUG=chromium:638513 Review-Url: https://codereview.chromium.org/2442403002
author: npm <npm@chromium.org> 2016-10-26 11:03:43 -0700
committer: Commit bot <commit-bot@chromium.org> 2016-10-26 11:03:43 -0700
commit: d3a2009d75eac3cda442f545ef0865afae7b35cf (patch)
tree: 9bbc5bbfbd48e1e63acd1bf55cac09d65cef8882 /core/fpdfapi/parser/cpdf_document.h
parent: 1842be87408b06bf0b4c521044c09452caac5c80 (diff)
download: pdfium-d3a2009d75eac3cda442f545ef0865afae7b35cf.tar.xz
1 files changed, 16 insertions, 5 deletions
diff --git a/core/fpdfapi/parser/cpdf_document.h b/core/fpdfapi/parser/cpdf_document.h
index ea7bd328aa..ef9f663c3b 100644
--- a/core/fpdfapi/parser/cpdf_document.h
+++ b/core/fpdfapi/parser/cpdf_document.h
@@ -9,6 +9,7 @@
 
 #include <functional>
 #include <memory>
+#include <stack>
 
 #include "core/fpdfapi/parser/cpdf_indirect_object_holder.h"
 #include "core/fpdfapi/parser/cpdf_object.h"
@@ -105,10 +106,7 @@ class CPDF_Document : public CPDF_IndirectObjectHolder {
  protected:
   // Retrieve page count information by getting count value from the tree nodes
   int RetrievePageCount() const;
-  CPDF_Dictionary* FindPDFPage(CPDF_Dictionary* pPages,
-                               int iPage,
-                               int nPagesToGo,
-                               int level);
+  CPDF_Dictionary* TraversePDFPages(int iPage, int nPagesToGo);
   int FindPageIndex(CPDF_Dictionary* pNode,
                     uint32_t& skip_count,
                     uint32_t objnum,
@@ -124,10 +122,23 @@ class CPDF_Document : public CPDF_IndirectObjectHolder {
       FX_BOOL bVert,
       CFX_ByteString basefont,
       std::function<void(FX_WCHAR, FX_WCHAR, CPDF_Array*)> Insert);
-
+  int InsertDeletePDFPage(CPDF_Dictionary* pPages,
+                          int nPagesToGo,
+                          CPDF_Dictionary* pPage,
+                          FX_BOOL bInsert,
+                          std::set<CPDF_Dictionary*>* pVisited);
+  int InsertNewPage(int iPage,
+                    CPDF_Dictionary* pPageDict,
+                    CFX_ArrayTemplate<uint32_t>& pageList);
+  void PopAndPropagate();
   std::unique_ptr<CPDF_Parser> m_pParser;
   CPDF_Dictionary* m_pRootDict;
   CPDF_Dictionary* m_pInfoDict;
+  // Stack of page nodes to know current position in page tree. Int is the index
+  // of last processed child.
+  std::stack<std::pair<CPDF_Dictionary*, int>> m_pTreeTraversal;
+  // Index of last page (leaf) processed from page tree.
+  int m_iLastPageTraversed;
   bool m_bLinearized;
   int m_iFirstPageNo;
   uint32_t m_dwFirstPageObjNum;
author	npm <npm@chromium.org>	2016-10-26 11:03:43 -0700
committer	Commit bot <commit-bot@chromium.org>	2016-10-26 11:03:43 -0700
commit	d3a2009d75eac3cda442f545ef0865afae7b35cf (patch)
tree	9bbc5bbfbd48e1e63acd1bf55cac09d65cef8882 /core/fpdfapi/parser/cpdf_document.h
parent	1842be87408b06bf0b4c521044c09452caac5c80 (diff)
download	pdfium-d3a2009d75eac3cda442f545ef0865afae7b35cf.tar.xz