From a031357eaab7c934ac03717968cf78ff556c819b Mon Sep 17 00:00:00 2001 From: weili Date: Wed, 4 May 2016 09:36:11 -0700 Subject: Reland of lax a couple checks to allow certain non-standard PDF files. (patchset #1 id:1 of https://codereview.chromium.org/1946693002/ ) Reason for revert: The culprit was found and confirmed, not this one. Original issue's description: > Revert of Relax a couple checks to allow certain non-standard PDF files. (patchset #1 id:1 of https://codereview.chromium.org/1926823002/ ) > > Reason for revert: > Speculatively revert due to high volume of crashes on Chromium. > > Original issue's description: > > Relax a couple checks to allow certain non-standard PDF files. > > > > Some non-standard PDF files misuse the size of cross reference table, > > and reuse some object number which the old one is still in use. PDFium > > can relax the reusing of xref objects only since it is not referred in > > the pdf document. When the size of cross reference table is larger > > than defined, PDFium will try to continue other than abort. > > > > BUG=chromium:596947 > > > > Committed: https://pdfium.googlesource.com/pdfium/+/cd1e9ff4f432cbc29ed279e6891fb7ddc2ea3734 > > TBR=thestig@chromium.org,dsinclair@chromium.org > # Not skipping CQ checks because original CL landed more than 1 days ago. > BUG=chromium:596947 > > Committed: https://pdfium.googlesource.com/pdfium/+/5fc4f31285c3a88fc157fd2d9b9cf2eb5c7cabed TBR=thestig@chromium.org,dsinclair@chromium.org # Skipping CQ checks because original CL landed less than 1 days ago. NOPRESUBMIT=true NOTREECHECKS=true NOTRY=true BUG=chromium:596947 Review-Url: https://codereview.chromium.org/1947983002 --- .../fpdf_parser/cpdf_indirect_object_holder.cpp | 20 ++++++++++++++++---- core/fpdfapi/fpdf_parser/cpdf_parser.cpp | 7 ++++++- .../fpdf_parser/cpdf_parser_embeddertest.cpp | 16 ++++++++++++++++ testing/resources/bug_596947.pdf | Bin 0 -> 971 bytes 4 files changed, 38 insertions(+), 5 deletions(-) create mode 100644 testing/resources/bug_596947.pdf diff --git a/core/fpdfapi/fpdf_parser/cpdf_indirect_object_holder.cpp b/core/fpdfapi/fpdf_parser/cpdf_indirect_object_holder.cpp index ef3395d3ae..4020b003bb 100644 --- a/core/fpdfapi/fpdf_parser/cpdf_indirect_object_holder.cpp +++ b/core/fpdfapi/fpdf_parser/cpdf_indirect_object_holder.cpp @@ -6,6 +6,7 @@ #include "core/fpdfapi/fpdf_parser/include/cpdf_indirect_object_holder.h" +#include "core/fpdfapi/fpdf_parser/include/cpdf_dictionary.h" #include "core/fpdfapi/fpdf_parser/include/cpdf_object.h" #include "core/fpdfapi/fpdf_parser/include/cpdf_parser.h" @@ -24,17 +25,28 @@ CPDF_Object* CPDF_IndirectObjectHolder::GetIndirectObject(uint32_t objnum) { if (objnum == 0) return nullptr; + CPDF_Object* result_obj = nullptr; auto it = m_IndirectObjs.find(objnum); - if (it != m_IndirectObjs.end()) - return it->second->GetObjNum() != CPDF_Object::kInvalidObjNum ? it->second - : nullptr; + if (it != m_IndirectObjs.end()) { + CPDF_Object* obj = it->second; + result_obj = + obj->GetObjNum() != CPDF_Object::kInvalidObjNum ? it->second : nullptr; + // Xref object is not used by the pdf document itself. Some software thus + // reuse an object number for xref object. So when we get an xref object, + // try again to see whether another object with the same number is defined. + // If so, use that object instead. See chromium:596947. + CPDF_Dictionary* dict = + obj->IsStream() ? obj->GetDict() : obj->AsDictionary(); + if (!dict || dict->GetStringBy("Type") != "XRef") + return result_obj; + } if (!m_pParser) return nullptr; CPDF_Object* pObj = m_pParser->ParseIndirectObject(this, objnum); if (!pObj) - return nullptr; + return result_obj; pObj->m_ObjNum = objnum; m_LastObjNum = std::max(m_LastObjNum, objnum); diff --git a/core/fpdfapi/fpdf_parser/cpdf_parser.cpp b/core/fpdfapi/fpdf_parser/cpdf_parser.cpp index acf51de1ea..c66647846d 100644 --- a/core/fpdfapi/fpdf_parser/cpdf_parser.cpp +++ b/core/fpdfapi/fpdf_parser/cpdf_parser.cpp @@ -1077,8 +1077,13 @@ FX_BOOL CPDF_Parser::LoadCrossRefV5(FX_FILESIZE* pos, FX_BOOL bMainXRef) { FX_SAFE_UINT32 dwMaxObjNum = startnum; dwMaxObjNum += count; uint32_t dwV5Size = m_ObjectInfo.empty() ? 0 : GetLastObjNum() + 1; - if (!dwMaxObjNum.IsValid() || dwMaxObjNum.ValueOrDie() > dwV5Size) + if (!dwMaxObjNum.IsValid()) continue; + // When the max object number is larger than the defined size, try to + // increase the size to accomodate more objects. + // Some software messes this up, see chromium:596947. + if (dwMaxObjNum.ValueOrDie() > dwV5Size) + ShrinkObjectMap(dwMaxObjNum.ValueOrDie()); for (uint32_t j = 0; j < count; j++) { int32_t type = 1; diff --git a/core/fpdfapi/fpdf_parser/cpdf_parser_embeddertest.cpp b/core/fpdfapi/fpdf_parser/cpdf_parser_embeddertest.cpp index 042b221554..d070bd6a4c 100644 --- a/core/fpdfapi/fpdf_parser/cpdf_parser_embeddertest.cpp +++ b/core/fpdfapi/fpdf_parser/cpdf_parser_embeddertest.cpp @@ -54,3 +54,19 @@ TEST_F(CPDFParserEmbeddertest, Bug_602650) { FPDFText_ClosePage(text_page); UnloadPage(page); } + +TEST_F(CPDFParserEmbeddertest, Bug_596947) { + // Test the case that the size of cross reference entries doesn't match with + // what is defined, and a certain case of reuse object number for cross + // reference object. + EXPECT_TRUE(OpenDocument("bug_596947.pdf")); + FPDF_PAGE page = LoadPage(0); + EXPECT_NE(nullptr, page); + FPDF_TEXTPAGE text_page = FPDFText_LoadPage(page); + EXPECT_NE(nullptr, text_page); + // The page should not be blank. + EXPECT_LT(0, FPDFText_CountChars(text_page)); + + FPDFText_ClosePage(text_page); + UnloadPage(page); +} diff --git a/testing/resources/bug_596947.pdf b/testing/resources/bug_596947.pdf new file mode 100644 index 0000000000..b3cbd19bf2 Binary files /dev/null and b/testing/resources/bug_596947.pdf differ -- cgit v1.2.3