From cd1e9ff4f432cbc29ed279e6891fb7ddc2ea3734 Mon Sep 17 00:00:00 2001 From: weili Date: Fri, 29 Apr 2016 10:24:02 -0700 Subject: Relax a couple checks to allow certain non-standard PDF files. Some non-standard PDF files misuse the size of cross reference table, and reuse some object number which the old one is still in use. PDFium can relax the reusing of xref objects only since it is not referred in the pdf document. When the size of cross reference table is larger than defined, PDFium will try to continue other than abort. BUG=chromium:596947 Review-Url: https://codereview.chromium.org/1926823002 --- .../fpdf_parser/cpdf_indirect_object_holder.cpp | 20 ++++++++++++++++---- core/fpdfapi/fpdf_parser/cpdf_parser.cpp | 7 ++++++- .../fpdf_parser/cpdf_parser_embeddertest.cpp | 16 ++++++++++++++++ testing/resources/bug_596947.pdf | Bin 0 -> 971 bytes 4 files changed, 38 insertions(+), 5 deletions(-) create mode 100644 testing/resources/bug_596947.pdf diff --git a/core/fpdfapi/fpdf_parser/cpdf_indirect_object_holder.cpp b/core/fpdfapi/fpdf_parser/cpdf_indirect_object_holder.cpp index ef3395d3ae..4020b003bb 100644 --- a/core/fpdfapi/fpdf_parser/cpdf_indirect_object_holder.cpp +++ b/core/fpdfapi/fpdf_parser/cpdf_indirect_object_holder.cpp @@ -6,6 +6,7 @@ #include "core/fpdfapi/fpdf_parser/include/cpdf_indirect_object_holder.h" +#include "core/fpdfapi/fpdf_parser/include/cpdf_dictionary.h" #include "core/fpdfapi/fpdf_parser/include/cpdf_object.h" #include "core/fpdfapi/fpdf_parser/include/cpdf_parser.h" @@ -24,17 +25,28 @@ CPDF_Object* CPDF_IndirectObjectHolder::GetIndirectObject(uint32_t objnum) { if (objnum == 0) return nullptr; + CPDF_Object* result_obj = nullptr; auto it = m_IndirectObjs.find(objnum); - if (it != m_IndirectObjs.end()) - return it->second->GetObjNum() != CPDF_Object::kInvalidObjNum ? it->second - : nullptr; + if (it != m_IndirectObjs.end()) { + CPDF_Object* obj = it->second; + result_obj = + obj->GetObjNum() != CPDF_Object::kInvalidObjNum ? it->second : nullptr; + // Xref object is not used by the pdf document itself. Some software thus + // reuse an object number for xref object. So when we get an xref object, + // try again to see whether another object with the same number is defined. + // If so, use that object instead. See chromium:596947. + CPDF_Dictionary* dict = + obj->IsStream() ? obj->GetDict() : obj->AsDictionary(); + if (!dict || dict->GetStringBy("Type") != "XRef") + return result_obj; + } if (!m_pParser) return nullptr; CPDF_Object* pObj = m_pParser->ParseIndirectObject(this, objnum); if (!pObj) - return nullptr; + return result_obj; pObj->m_ObjNum = objnum; m_LastObjNum = std::max(m_LastObjNum, objnum); diff --git a/core/fpdfapi/fpdf_parser/cpdf_parser.cpp b/core/fpdfapi/fpdf_parser/cpdf_parser.cpp index acf51de1ea..c66647846d 100644 --- a/core/fpdfapi/fpdf_parser/cpdf_parser.cpp +++ b/core/fpdfapi/fpdf_parser/cpdf_parser.cpp @@ -1077,8 +1077,13 @@ FX_BOOL CPDF_Parser::LoadCrossRefV5(FX_FILESIZE* pos, FX_BOOL bMainXRef) { FX_SAFE_UINT32 dwMaxObjNum = startnum; dwMaxObjNum += count; uint32_t dwV5Size = m_ObjectInfo.empty() ? 0 : GetLastObjNum() + 1; - if (!dwMaxObjNum.IsValid() || dwMaxObjNum.ValueOrDie() > dwV5Size) + if (!dwMaxObjNum.IsValid()) continue; + // When the max object number is larger than the defined size, try to + // increase the size to accomodate more objects. + // Some software messes this up, see chromium:596947. + if (dwMaxObjNum.ValueOrDie() > dwV5Size) + ShrinkObjectMap(dwMaxObjNum.ValueOrDie()); for (uint32_t j = 0; j < count; j++) { int32_t type = 1; diff --git a/core/fpdfapi/fpdf_parser/cpdf_parser_embeddertest.cpp b/core/fpdfapi/fpdf_parser/cpdf_parser_embeddertest.cpp index 042b221554..d070bd6a4c 100644 --- a/core/fpdfapi/fpdf_parser/cpdf_parser_embeddertest.cpp +++ b/core/fpdfapi/fpdf_parser/cpdf_parser_embeddertest.cpp @@ -54,3 +54,19 @@ TEST_F(CPDFParserEmbeddertest, Bug_602650) { FPDFText_ClosePage(text_page); UnloadPage(page); } + +TEST_F(CPDFParserEmbeddertest, Bug_596947) { + // Test the case that the size of cross reference entries doesn't match with + // what is defined, and a certain case of reuse object number for cross + // reference object. + EXPECT_TRUE(OpenDocument("bug_596947.pdf")); + FPDF_PAGE page = LoadPage(0); + EXPECT_NE(nullptr, page); + FPDF_TEXTPAGE text_page = FPDFText_LoadPage(page); + EXPECT_NE(nullptr, text_page); + // The page should not be blank. + EXPECT_LT(0, FPDFText_CountChars(text_page)); + + FPDFText_ClosePage(text_page); + UnloadPage(page); +} diff --git a/testing/resources/bug_596947.pdf b/testing/resources/bug_596947.pdf new file mode 100644 index 0000000000..b3cbd19bf2 Binary files /dev/null and b/testing/resources/bug_596947.pdf differ -- cgit v1.2.3