From 215816b7450a577e186cc0e5f1634c4c6610b86b Mon Sep 17 00:00:00 2001 From: Wei Li Date: Thu, 14 Jan 2016 12:29:02 -0800 Subject: Merge to XFA: Correct the way to count pages and to avoid infinite loop BUG=pdfium:360 TBR=thestig@chromium.org Review URL: https://codereview.chromium.org/1585823003 . (cherry picked from commit 5d5d9fe7bd1c9566b9d6570015b7c60894d9fc0c) Review URL: https://codereview.chromium.org/1583073004 . --- core/include/fpdfapi/fpdf_parser.h | 18 +++++- .../fpdfapi/fpdf_parser/fpdf_parser_document.cpp | 75 +++++++++++++--------- .../src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp | 14 ---- fpdfsdk/src/fpdfview_embeddertest.cpp | 5 ++ testing/resources/bug_360.pdf | 14 ++++ 5 files changed, 80 insertions(+), 46 deletions(-) create mode 100644 testing/resources/bug_360.pdf diff --git a/core/include/fpdfapi/fpdf_parser.h b/core/include/fpdfapi/fpdf_parser.h index fda4557119..bdddfc37d7 100644 --- a/core/include/fpdfapi/fpdf_parser.h +++ b/core/include/fpdfapi/fpdf_parser.h @@ -61,6 +61,20 @@ inline bool PDFCharIsLineEnding(uint8_t c) { return c == '\r' || c == '\n'; } +template +class ScopedSetInsertion { + public: + ScopedSetInsertion(std::set* org_set, T elem) + : m_Set(org_set), m_Entry(elem) { + m_Set->insert(m_Entry); + } + ~ScopedSetInsertion() { m_Set->erase(m_Entry); } + + private: + std::set* const m_Set; + const T m_Entry; +}; + // Indexed by 8-bit char code, contains unicode code points. extern const FX_WORD PDFDocEncoding[256]; @@ -168,7 +182,9 @@ class CPDF_Document : public CFX_PrivateData, public CPDF_IndirectObjectHolder { CFX_DWordArray m_PageList; - int _GetPageCount() const; + // Retrieve page count information by getting count value from the tree nodes + // or walking through the tree nodes to calculate it. + int RetrievePageCount() const; CPDF_Dictionary* _FindPDFPage(CPDF_Dictionary* pPages, int iPage, int nPagesToGo, diff --git a/core/src/fpdfapi/fpdf_parser/fpdf_parser_document.cpp b/core/src/fpdfapi/fpdf_parser/fpdf_parser_document.cpp index d8091e8861..6fc3440482 100644 --- a/core/src/fpdfapi/fpdf_parser/fpdf_parser_document.cpp +++ b/core/src/fpdfapi/fpdf_parser/fpdf_parser_document.cpp @@ -6,7 +6,43 @@ #include "core/include/fpdfapi/fpdf_parser.h" +#include + #include "core/include/fpdfapi/fpdf_module.h" +#include "third_party/base/stl_util.h" + +namespace { + +int CountPages(CPDF_Dictionary* pPages, + std::set* visited_pages) { + int count = pPages->GetInteger("Count"); + if (count > 0 && count < FPDF_PAGE_MAX_NUM) { + return count; + } + CPDF_Array* pKidList = pPages->GetArray("Kids"); + if (!pKidList) { + return 0; + } + count = 0; + for (FX_DWORD i = 0; i < pKidList->GetCount(); i++) { + CPDF_Dictionary* pKid = pKidList->GetDict(i); + if (!pKid || pdfium::ContainsKey(*visited_pages, pKid)) { + continue; + } + if (pKid->KeyExist("Kids")) { + // Use |visited_pages| to help detect circular references of pages. + ScopedSetInsertion local_add(visited_pages, pKid); + count += CountPages(pKid, visited_pages); + } else { + // This page is a leaf node. + count++; + } + } + pPages->SetAtInteger("Count", count); + return count; +} + +} // namespace CPDF_Document::CPDF_Document(CPDF_Parser* pParser) : CPDF_IndirectObjectHolder(pParser) { @@ -54,7 +90,7 @@ void CPDF_Document::LoadDoc() { m_ID1 = pIDArray->GetString(0); m_ID2 = pIDArray->GetString(1); } - m_PageList.SetSize(_GetPageCount()); + m_PageList.SetSize(RetrievePageCount()); } void CPDF_Document::LoadAsynDoc(CPDF_Dictionary* pLinearized) { m_bLinearized = TRUE; @@ -87,7 +123,7 @@ void CPDF_Document::LoadAsynDoc(CPDF_Dictionary* pLinearized) { m_dwFirstPageObjNum = pObjNum->GetInteger(); } void CPDF_Document::LoadPages() { - m_PageList.SetSize(_GetPageCount()); + m_PageList.SetSize(RetrievePageCount()); } CPDF_Document::~CPDF_Document() { if (m_pDocPage) { @@ -256,34 +292,8 @@ int CPDF_Document::GetPageIndex(FX_DWORD objnum) { int CPDF_Document::GetPageCount() const { return m_PageList.GetSize(); } -static int _CountPages(CPDF_Dictionary* pPages, int level) { - if (level > 128) { - return 0; - } - int count = pPages->GetInteger("Count"); - if (count > 0 && count < FPDF_PAGE_MAX_NUM) { - return count; - } - CPDF_Array* pKidList = pPages->GetArray("Kids"); - if (!pKidList) { - return 0; - } - count = 0; - for (FX_DWORD i = 0; i < pKidList->GetCount(); i++) { - CPDF_Dictionary* pKid = pKidList->GetDict(i); - if (!pKid) { - continue; - } - if (!pKid->KeyExist("Kids")) { - count++; - } else { - count += _CountPages(pKid, level + 1); - } - } - pPages->SetAtInteger("Count", count); - return count; -} -int CPDF_Document::_GetPageCount() const { + +int CPDF_Document::RetrievePageCount() const { CPDF_Dictionary* pRoot = GetRoot(); if (!pRoot) { return 0; @@ -295,8 +305,11 @@ int CPDF_Document::_GetPageCount() const { if (!pPages->KeyExist("Kids")) { return 1; } - return _CountPages(pPages, 0); + std::set visited_pages; + visited_pages.insert(pPages); + return CountPages(pPages, &visited_pages); } + FX_BOOL CPDF_Document::IsContentUsedElsewhere(FX_DWORD objnum, CPDF_Dictionary* pThisPageDict) { for (int i = 0; i < m_PageList.GetSize(); i++) { diff --git a/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp b/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp index 6f0fc76fc0..03bc9aec58 100644 --- a/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp +++ b/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp @@ -36,20 +36,6 @@ struct SearchTagRecord { FX_DWORD m_Offset; }; -template -class ScopedSetInsertion { - public: - ScopedSetInsertion(std::set* org_set, T elem) - : m_Set(org_set), m_Entry(elem) { - m_Set->insert(m_Entry); - } - ~ScopedSetInsertion() { m_Set->erase(m_Entry); } - - private: - std::set* const m_Set; - const T m_Entry; -}; - int CompareFileSize(const void* p1, const void* p2) { return *(FX_FILESIZE*)p1 - *(FX_FILESIZE*)p2; } diff --git a/fpdfsdk/src/fpdfview_embeddertest.cpp b/fpdfsdk/src/fpdfview_embeddertest.cpp index 3147c01660..670c640668 100644 --- a/fpdfsdk/src/fpdfview_embeddertest.cpp +++ b/fpdfsdk/src/fpdfview_embeddertest.cpp @@ -227,4 +227,9 @@ TEST_F(FPDFViewEmbeddertest, Hang_343) { // dictionary will not cause an infinite loop in CPDF_SyntaxParser::GetObject(). TEST_F(FPDFViewEmbeddertest, Hang_344) { EXPECT_FALSE(OpenDocument("bug_344.pdf")); +} + +// The test should pass even when the file has circular references to pages. +TEST_F(FPDFViewEmbeddertest, Hang_360) { + EXPECT_FALSE(OpenDocument("bug_360.pdf")); } \ No newline at end of file diff --git a/testing/resources/bug_360.pdf b/testing/resources/bug_360.pdf new file mode 100644 index 0000000000..6e69c7bfed --- /dev/null +++ b/testing/resources/bug_360.pdf @@ -0,0 +1,14 @@ +%PDF-1.2 +xref +0 3 +0000%PDF-1.2 +xref +0 3 +0000000000 65535 f 0000000009 00000 n +00000000000 0000 n +trailer << /Size 3 /Root 1 0 R %PDF-1.5 +%âãÏÓ +1 0 obj +<>/Lang(es-ES)/Pages 1 0 R/Type/Catalog>endob +2 0 obj +<> -- cgit v1.2.3