From 240dec52b2e6502e7deb27a3535af3b1a3e23428 Mon Sep 17 00:00:00 2001 From: art-snake Date: Mon, 7 Nov 2016 08:42:04 -0800 Subject: Reland of Unify some code Unify some code Move parsing of linearized header into separate CPDF_Linearized class. Original review: https://codereview.chromium.org/2466023002/ Revert review: https://codereview.chromium.org/2474283005/ Revert reason was: Breaking the chrome roll. See https://build.chromium.org/p/tryserver.chromium.linux/builders/linux_chromium_rel_ng/builds/331856 ___ Added Fix for fuzzers. Review-Url: https://codereview.chromium.org/2477213003 --- BUILD.gn | 2 + core/fpdfapi/parser/cpdf_data_avail.cpp | 147 ++++++------------------- core/fpdfapi/parser/cpdf_data_avail.h | 5 +- core/fpdfapi/parser/cpdf_document.cpp | 21 +--- core/fpdfapi/parser/cpdf_document.h | 3 +- core/fpdfapi/parser/cpdf_document_unittest.cpp | 15 ++- core/fpdfapi/parser/cpdf_hint_tables.cpp | 32 ++---- core/fpdfapi/parser/cpdf_hint_tables.h | 9 +- core/fpdfapi/parser/cpdf_linearized.cpp | 71 ++++++++++++ core/fpdfapi/parser/cpdf_linearized.h | 55 +++++++++ core/fpdfapi/parser/cpdf_parser.cpp | 46 +++----- core/fpdfapi/parser/cpdf_parser.h | 6 +- testing/libfuzzer/pdf_hint_table_fuzzer.cc | 83 +++++++------- 13 files changed, 251 insertions(+), 244 deletions(-) create mode 100644 core/fpdfapi/parser/cpdf_linearized.cpp create mode 100644 core/fpdfapi/parser/cpdf_linearized.h diff --git a/BUILD.gn b/BUILD.gn index a54f6e4018..0cf5f54c77 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -500,6 +500,8 @@ static_library("fpdfapi") { "core/fpdfapi/parser/cpdf_hint_tables.h", "core/fpdfapi/parser/cpdf_indirect_object_holder.cpp", "core/fpdfapi/parser/cpdf_indirect_object_holder.h", + "core/fpdfapi/parser/cpdf_linearized.cpp", + "core/fpdfapi/parser/cpdf_linearized.h", "core/fpdfapi/parser/cpdf_name.cpp", "core/fpdfapi/parser/cpdf_name.h", "core/fpdfapi/parser/cpdf_null.cpp", diff --git a/core/fpdfapi/parser/cpdf_data_avail.cpp b/core/fpdfapi/parser/cpdf_data_avail.cpp index c4ed95e17f..e6d2c61ad3 100644 --- a/core/fpdfapi/parser/cpdf_data_avail.cpp +++ b/core/fpdfapi/parser/cpdf_data_avail.cpp @@ -15,6 +15,7 @@ #include "core/fpdfapi/parser/cpdf_dictionary.h" #include "core/fpdfapi/parser/cpdf_document.h" #include "core/fpdfapi/parser/cpdf_hint_tables.h" +#include "core/fpdfapi/parser/cpdf_linearized.h" #include "core/fpdfapi/parser/cpdf_name.h" #include "core/fpdfapi/parser/cpdf_number.h" #include "core/fpdfapi/parser/cpdf_reference.h" @@ -43,7 +44,6 @@ CPDF_DataAvail::CPDF_DataAvail(FileAvail* pFileAvail, m_dwCurrentOffset = 0; m_dwXRefOffset = 0; m_bufferOffset = 0; - m_dwFirstPageNo = 0; m_bufferSize = 0; m_PagesObjNum = 0; m_dwCurrentXRefSteam = 0; @@ -56,7 +56,6 @@ CPDF_DataAvail::CPDF_DataAvail(FileAvail* pFileAvail, m_bDocAvail = false; m_bMainXRefLoadTried = false; m_bDocAvail = false; - m_bLinearized = false; m_bPagesLoad = false; m_bPagesTreeLoad = false; m_bMainXRefLoadedOK = false; @@ -66,7 +65,6 @@ CPDF_DataAvail::CPDF_DataAvail(FileAvail* pFileAvail, m_bPageLoadedOK = false; m_bNeedDownLoadResource = false; m_bLinearizedFormParamLoad = false; - m_pLinearized = nullptr; m_pRoot = nullptr; m_pTrailer = nullptr; m_pCurrentParser = nullptr; @@ -83,9 +81,6 @@ CPDF_DataAvail::CPDF_DataAvail(FileAvail* pFileAvail, CPDF_DataAvail::~CPDF_DataAvail() { m_pHintTables.reset(); - delete m_pLinearized; - delete m_pRoot; - delete m_pTrailer; for (CPDF_Object* pObject : m_arrayAcroforms) delete pObject; @@ -613,48 +608,27 @@ bool CPDF_DataAvail::CheckHeader(DownloadHints* pHints) { } bool CPDF_DataAvail::CheckFirstPage(DownloadHints* pHints) { - CPDF_Dictionary* pDict = m_pLinearized->GetDict(); - CPDF_Object* pEndOffSet = pDict ? pDict->GetObjectFor("E") : nullptr; - if (!pEndOffSet) { - m_docStatus = PDF_DATAAVAIL_ERROR; - return false; - } - - CPDF_Object* pXRefOffset = pDict ? pDict->GetObjectFor("T") : nullptr; - if (!pXRefOffset) { - m_docStatus = PDF_DATAAVAIL_ERROR; - return false; - } - - CPDF_Object* pFileLen = pDict ? pDict->GetObjectFor("L") : nullptr; - if (!pFileLen) { + if (!m_pLinearized->GetFirstPageEndOffset() || + !m_pLinearized->GetFileSize() || !m_pLinearized->GetLastXRefOffset()) { m_docStatus = PDF_DATAAVAIL_ERROR; return false; } bool bNeedDownLoad = false; - if (pEndOffSet->IsNumber()) { - uint32_t dwEnd = pEndOffSet->GetInteger(); - dwEnd += 512; - if ((FX_FILESIZE)dwEnd > m_dwFileLen) - dwEnd = (uint32_t)m_dwFileLen; - - int32_t iStartPos = (int32_t)(m_dwFileLen > 1024 ? 1024 : m_dwFileLen); - int32_t iSize = dwEnd > 1024 ? dwEnd - 1024 : 0; - if (!m_pFileAvail->IsDataAvail(iStartPos, iSize)) { - pHints->AddSegment(iStartPos, iSize); - bNeedDownLoad = true; - } - } + uint32_t dwEnd = m_pLinearized->GetFirstPageEndOffset(); + dwEnd += 512; + if ((FX_FILESIZE)dwEnd > m_dwFileLen) + dwEnd = (uint32_t)m_dwFileLen; - m_dwLastXRefOffset = 0; - FX_FILESIZE dwFileLen = 0; - if (pXRefOffset->IsNumber()) - m_dwLastXRefOffset = pXRefOffset->GetInteger(); - - if (pFileLen->IsNumber()) - dwFileLen = pFileLen->GetInteger(); + int32_t iStartPos = (int32_t)(m_dwFileLen > 1024 ? 1024 : m_dwFileLen); + int32_t iSize = dwEnd > 1024 ? dwEnd - 1024 : 0; + if (!m_pFileAvail->IsDataAvail(iStartPos, iSize)) { + pHints->AddSegment(iStartPos, iSize); + bNeedDownLoad = true; + } + m_dwLastXRefOffset = m_pLinearized->GetLastXRefOffset(); + FX_FILESIZE dwFileLen = m_pLinearized->GetFileSize(); if (!m_pFileAvail->IsDataAvail(m_dwLastXRefOffset, (uint32_t)(dwFileLen - m_dwLastXRefOffset))) { if (m_docStatus == PDF_DATAAVAIL_FIRSTPAGE) { @@ -702,52 +676,17 @@ bool CPDF_DataAvail::IsDataAvail(FX_FILESIZE offset, } bool CPDF_DataAvail::CheckHintTables(DownloadHints* pHints) { - CPDF_Dictionary* pDict = m_pLinearized->GetDict(); - if (!pDict) { - m_docStatus = PDF_DATAAVAIL_ERROR; - return false; - } - - // The actual value is not required here, but validate its existence and type. - CPDF_Number* pFirstPage = ToNumber(pDict->GetDirectObjectFor("O")); - if (!pFirstPage || !pFirstPage->IsInteger()) { - m_docStatus = PDF_DATAAVAIL_ERROR; - return false; - } - - CPDF_Number* pPageCount = ToNumber(pDict->GetDirectObjectFor("N")); - if (!pPageCount || !pPageCount->IsInteger()) { - m_docStatus = PDF_DATAAVAIL_ERROR; - return false; - } - - int nPageCount = pPageCount->GetInteger(); - if (nPageCount <= 1) { + if (m_pLinearized->GetPageCount() <= 1) { m_docStatus = PDF_DATAAVAIL_DONE; return true; } - - CPDF_Array* pHintStreamRange = pDict->GetArrayFor("H"); - size_t nHintStreamSize = pHintStreamRange ? pHintStreamRange->GetCount() : 0; - if (nHintStreamSize != 2 && nHintStreamSize != 4) { + if (!m_pLinearized->HasHintTable()) { m_docStatus = PDF_DATAAVAIL_ERROR; return false; } - for (const CPDF_Object* pArrayObject : *pHintStreamRange) { - const CPDF_Number* pNumber = ToNumber(pArrayObject->GetDirect()); - if (!pNumber || !pNumber->IsInteger()) { - m_docStatus = PDF_DATAAVAIL_ERROR; - return false; - } - } - - FX_FILESIZE szHintStart = pHintStreamRange->GetIntegerAt(0); - FX_FILESIZE szHintLength = pHintStreamRange->GetIntegerAt(1); - if (szHintStart < 0 || szHintLength <= 0) { - m_docStatus = PDF_DATAAVAIL_ERROR; - return false; - } + FX_FILESIZE szHintStart = m_pLinearized->GetHintStart(); + FX_FILESIZE szHintLength = m_pLinearized->GetHintLength(); if (!IsDataAvail(szHintStart, szHintLength, pHints)) return false; @@ -755,7 +694,7 @@ bool CPDF_DataAvail::CheckHintTables(DownloadHints* pHints) { m_syntaxParser.InitParser(m_pFileRead, m_dwHeaderOffset); std::unique_ptr pHintTables( - new CPDF_HintTables(this, pDict)); + new CPDF_HintTables(this, m_pLinearized.get())); std::unique_ptr pHintStream( ParseIndirectObjectAt(szHintStart, 0)); CPDF_Stream* pStream = ToStream(pHintStream.get()); @@ -819,12 +758,12 @@ CPDF_DataAvail::DocLinearizationStatus CPDF_DataAvail::IsLinearizedPDF() { } bool CPDF_DataAvail::IsLinearized() { - return m_bLinearized; + return !!m_pLinearized; } bool CPDF_DataAvail::IsLinearizedFile(uint8_t* pData, uint32_t dwLen) { if (m_pLinearized) - return m_bLinearized; + return true; ScopedFileStream file(FX_CreateMemoryStream(pData, (size_t)dwLen, false)); @@ -844,27 +783,13 @@ bool CPDF_DataAvail::IsLinearizedFile(uint8_t* pData, uint32_t dwLen) { return false; uint32_t objnum = FXSYS_atoui(wordObjNum.c_str()); - m_pLinearized = - ParseIndirectObjectAt(m_syntaxParser.m_HeaderOffset + 9, objnum); - if (!m_pLinearized) - return false; - - CPDF_Dictionary* pDict = m_pLinearized->GetDict(); - if (!pDict || !pDict->GetObjectFor("Linearized")) + m_pLinearized = CPDF_Linearized::CreateForObject(pdfium::WrapUnique( + ParseIndirectObjectAt(m_syntaxParser.m_HeaderOffset + 9, objnum))); + if (!m_pLinearized || + m_pLinearized->GetFileSize() != m_pFileRead->GetSize()) { + m_pLinearized.reset(); return false; - - CPDF_Object* pLen = pDict->GetObjectFor("L"); - if (!pLen) - return false; - - if ((FX_FILESIZE)pLen->GetInteger() != m_pFileRead->GetSize()) - return false; - - m_bLinearized = true; - - if (CPDF_Number* pNo = ToNumber(pDict->GetObjectFor("P"))) - m_dwFirstPageNo = pNo->GetInteger(); - + } return true; } @@ -1600,8 +1525,8 @@ CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsPageAvail( if (pdfium::ContainsKey(m_pagesLoadState, dwPage)) return DataAvailable; - if (m_bLinearized) { - if (dwPage == m_dwFirstPageNo) { + if (m_pLinearized) { + if (dwPage == m_pLinearized->GetFirstPageNo()) { DocAvailStatus nRet = CheckLinearizedFirstPage(dwPage, pHints); if (nRet == DataAvailable) m_pagesLoadState.insert(dwPage); @@ -1731,11 +1656,8 @@ void CPDF_DataAvail::GetLinearizedMainXRefInfo(FX_FILESIZE* pPos, } int CPDF_DataAvail::GetPageCount() const { - if (m_pLinearized) { - CPDF_Dictionary* pDict = m_pLinearized->GetDict(); - CPDF_Object* pObj = pDict ? pDict->GetDirectObjectFor("N") : nullptr; - return pObj ? pObj->GetInteger() : 0; - } + if (m_pLinearized) + return m_pLinearized->GetPageCount(); return m_pDocument ? m_pDocument->GetPageCount() : 0; } @@ -1748,10 +1670,7 @@ CPDF_Dictionary* CPDF_DataAvail::GetPage(int index) { if (!m_pLinearized || !m_pHintTables) return nullptr; - CPDF_Dictionary* pDict = m_pLinearized->GetDict(); - CPDF_Object* pObj = pDict ? pDict->GetDirectObjectFor("P") : nullptr; - int firstPageNum = pObj ? pObj->GetInteger() : 0; - if (index == firstPageNum) + if (index == static_cast(m_pLinearized->GetFirstPageNo())) return nullptr; FX_FILESIZE szPageStartPos = 0; FX_FILESIZE szPageLength = 0; diff --git a/core/fpdfapi/parser/cpdf_data_avail.h b/core/fpdfapi/parser/cpdf_data_avail.h index 0a8716a498..bff9f2943b 100644 --- a/core/fpdfapi/parser/cpdf_data_avail.h +++ b/core/fpdfapi/parser/cpdf_data_avail.h @@ -16,6 +16,7 @@ class CPDF_Dictionary; class CPDF_HintTables; class CPDF_IndirectObjectHolder; +class CPDF_Linearized; class CPDF_Parser; enum PDF_DATAAVAIL_STATUS { @@ -199,7 +200,7 @@ class CPDF_DataAvail final { CPDF_Object* m_pRoot; uint32_t m_dwRootObjNum; uint32_t m_dwInfoObjNum; - CPDF_Object* m_pLinearized; + std::unique_ptr m_pLinearized; CPDF_Object* m_pTrailer; bool m_bDocAvail; FX_FILESIZE m_dwHeaderOffset; @@ -220,8 +221,6 @@ class CPDF_DataAvail final { CFX_ArrayTemplate m_XRefStreamList; CFX_ArrayTemplate m_PageObjList; uint32_t m_PagesObjNum; - bool m_bLinearized; - uint32_t m_dwFirstPageNo; bool m_bLinearedDataOK; bool m_bMainXRefLoadTried; bool m_bMainXRefLoadedOK; diff --git a/core/fpdfapi/parser/cpdf_document.cpp b/core/fpdfapi/parser/cpdf_document.cpp index 8e181de97c..d9ffc0b28a 100644 --- a/core/fpdfapi/parser/cpdf_document.cpp +++ b/core/fpdfapi/parser/cpdf_document.cpp @@ -17,6 +17,7 @@ #include "core/fpdfapi/page/pageint.h" #include "core/fpdfapi/parser/cpdf_array.h" #include "core/fpdfapi/parser/cpdf_dictionary.h" +#include "core/fpdfapi/parser/cpdf_linearized.h" #include "core/fpdfapi/parser/cpdf_number.h" #include "core/fpdfapi/parser/cpdf_parser.h" #include "core/fpdfapi/parser/cpdf_reference.h" @@ -378,23 +379,13 @@ void CPDF_Document::LoadDoc() { m_PageList.SetSize(RetrievePageCount()); } -void CPDF_Document::LoadLinearizedDoc(CPDF_Dictionary* pLinearizationParams) { +void CPDF_Document::LoadLinearizedDoc( + const CPDF_Linearized* pLinearizationParams) { m_bLinearized = true; LoadDocInternal(); - - uint32_t dwPageCount = 0; - CPDF_Object* pCount = pLinearizationParams->GetObjectFor("N"); - if (ToNumber(pCount)) - dwPageCount = pCount->GetInteger(); - m_PageList.SetSize(dwPageCount); - - CPDF_Object* pNo = pLinearizationParams->GetObjectFor("P"); - if (ToNumber(pNo)) - m_iFirstPageNo = pNo->GetInteger(); - - CPDF_Object* pObjNum = pLinearizationParams->GetObjectFor("O"); - if (ToNumber(pObjNum)) - m_dwFirstPageObjNum = pObjNum->GetInteger(); + m_PageList.SetSize(pLinearizationParams->GetPageCount()); + m_iFirstPageNo = pLinearizationParams->GetFirstPageNo(); + m_dwFirstPageObjNum = pLinearizationParams->GetFirstPageObjNum(); } void CPDF_Document::LoadPages() { diff --git a/core/fpdfapi/parser/cpdf_document.h b/core/fpdfapi/parser/cpdf_document.h index 0a99e42c3f..1b18015200 100644 --- a/core/fpdfapi/parser/cpdf_document.h +++ b/core/fpdfapi/parser/cpdf_document.h @@ -26,6 +26,7 @@ class CPDF_Font; class CPDF_FontEncoding; class CPDF_IccProfile; class CPDF_Image; +class CPDF_Linearized; class CPDF_Parser; class CPDF_Pattern; class CPDF_StreamAcc; @@ -85,7 +86,7 @@ class CPDF_Document : public CPDF_IndirectObjectHolder { CPDF_IccProfile* LoadIccProfile(CPDF_Stream* pStream); void LoadDoc(); - void LoadLinearizedDoc(CPDF_Dictionary* pLinearizationParams); + void LoadLinearizedDoc(const CPDF_Linearized* pLinearizationParams); void LoadPages(); void CreateNewDoc(); diff --git a/core/fpdfapi/parser/cpdf_document_unittest.cpp b/core/fpdfapi/parser/cpdf_document_unittest.cpp index 16d0ade069..e20a5a1d1d 100644 --- a/core/fpdfapi/parser/cpdf_document_unittest.cpp +++ b/core/fpdfapi/parser/cpdf_document_unittest.cpp @@ -9,15 +9,13 @@ #include "core/fpdfapi/cpdf_modulemgr.h" #include "core/fpdfapi/parser/cpdf_array.h" #include "core/fpdfapi/parser/cpdf_dictionary.h" +#include "core/fpdfapi/parser/cpdf_linearized.h" #include "core/fpdfapi/parser/cpdf_parser.h" #include "core/fxcrt/fx_memory.h" #include "testing/gtest/include/gtest/gtest.h" namespace { -using ScopedDictionary = - std::unique_ptr>; - CPDF_Dictionary* CreatePageTreeNode(CPDF_Array* kids, CPDF_Document* pDoc, int count) { @@ -77,6 +75,11 @@ class CPDF_TestDocumentForPages : public CPDF_Document { private: std::unique_ptr m_pOwnedRootDict; }; + +class TestLinearized : public CPDF_Linearized { + public: + explicit TestLinearized(CPDF_Dictionary* dict) : CPDF_Linearized(dict) {} +}; } // namespace class cpdf_document_test : public testing::Test { @@ -142,10 +145,12 @@ TEST_F(cpdf_document_test, UseCachedPageObjNumIfHaveNotPagesDict) { // can be not exists in this case. // (case, when hint table is used to page check in CPDF_DataAvail). CPDF_Document document(pdfium::MakeUnique()); - std::unique_ptr dict(new CPDF_Dictionary()); + auto dict = pdfium::MakeUnique(); + dict->SetBooleanFor("Linearized", true); const int page_count = 100; dict->SetIntegerFor("N", page_count); - document.LoadLinearizedDoc(dict.get()); + TestLinearized linearized(dict.get()); + document.LoadLinearizedDoc(&linearized); ASSERT_EQ(page_count, document.GetPageCount()); CPDF_Object* page_stub = new CPDF_Dictionary(); const uint32_t obj_num = document.AddIndirectObject(page_stub); diff --git a/core/fpdfapi/parser/cpdf_hint_tables.cpp b/core/fpdfapi/parser/cpdf_hint_tables.cpp index 045b94cac5..bbc57dace9 100644 --- a/core/fpdfapi/parser/cpdf_hint_tables.cpp +++ b/core/fpdfapi/parser/cpdf_hint_tables.cpp @@ -12,6 +12,7 @@ #include "core/fpdfapi/parser/cpdf_data_avail.h" #include "core/fpdfapi/parser/cpdf_dictionary.h" #include "core/fpdfapi/parser/cpdf_document.h" +#include "core/fpdfapi/parser/cpdf_linearized.h" #include "core/fpdfapi/parser/cpdf_stream.h" #include "core/fpdfapi/parser/cpdf_stream_acc.h" #include "core/fxcrt/fx_safe_types.h" @@ -34,12 +35,12 @@ bool IsValidPageOffsetHintTableBitCount(uint32_t bits) { } // namespace CPDF_HintTables::CPDF_HintTables(CPDF_DataAvail* pDataAvail, - CPDF_Dictionary* pLinearized) + CPDF_Linearized* pLinearized) : m_pDataAvail(pDataAvail), - m_pLinearizedDict(pLinearized), + m_pLinearized(pLinearized), m_nFirstPageSharedObjs(0), m_szFirstPageObjOffset(0) { - ASSERT(m_pLinearizedDict); + ASSERT(m_pLinearized); } CPDF_HintTables::~CPDF_HintTables() {} @@ -487,38 +488,25 @@ bool CPDF_HintTables::LoadHintStream(CPDF_Stream* pHintStream) { } int CPDF_HintTables::GetEndOfFirstPageOffset() const { - CPDF_Object* pOffsetE = m_pLinearizedDict->GetDirectObjectFor("E"); - return pOffsetE ? pOffsetE->GetInteger() : -1; + return static_cast(m_pLinearized->GetFirstPageEndOffset()); } int CPDF_HintTables::GetNumberOfPages() const { - CPDF_Object* pPageNum = m_pLinearizedDict->GetDirectObjectFor("N"); - return pPageNum ? pPageNum->GetInteger() : 0; + return static_cast(m_pLinearized->GetPageCount()); } int CPDF_HintTables::GetFirstPageObjectNumber() const { - CPDF_Object* pFirstPageObj = m_pLinearizedDict->GetDirectObjectFor("O"); - return pFirstPageObj ? pFirstPageObj->GetInteger() : -1; + return static_cast(m_pLinearized->GetFirstPageObjNum()); } int CPDF_HintTables::GetFirstPageNumber() const { - CPDF_Object* pFirstPageNum = m_pLinearizedDict->GetDirectObjectFor("P"); - return pFirstPageNum ? pFirstPageNum->GetInteger() : 0; + return static_cast(m_pLinearized->GetFirstPageNo()); } int CPDF_HintTables::ReadPrimaryHintStreamOffset() const { - return ReadPrimaryHintStream(0); + return static_cast(m_pLinearized->GetHintStart()); } int CPDF_HintTables::ReadPrimaryHintStreamLength() const { - return ReadPrimaryHintStream(1); -} - -int CPDF_HintTables::ReadPrimaryHintStream(int index) const { - CPDF_Array* pRange = m_pLinearizedDict->GetArrayFor("H"); - if (!pRange) - return -1; - - CPDF_Object* pStreamLen = pRange->GetDirectObjectAt(index); - return pStreamLen ? pStreamLen->GetInteger() : -1; + return static_cast(m_pLinearized->GetHintLength()); } diff --git a/core/fpdfapi/parser/cpdf_hint_tables.h b/core/fpdfapi/parser/cpdf_hint_tables.h index 84f48cf41f..68b5059b05 100644 --- a/core/fpdfapi/parser/cpdf_hint_tables.h +++ b/core/fpdfapi/parser/cpdf_hint_tables.h @@ -14,12 +14,12 @@ #include "core/fxcrt/fx_stream.h" class CFX_BitStream; -class CPDF_Dictionary; +class CPDF_Linearized; class CPDF_Stream; class CPDF_HintTables { public: - CPDF_HintTables(CPDF_DataAvail* pDataAvail, CPDF_Dictionary* pLinearized); + CPDF_HintTables(CPDF_DataAvail* pDataAvail, CPDF_Linearized* pLinearized); virtual ~CPDF_HintTables(); bool GetPagePos(uint32_t index, @@ -46,9 +46,6 @@ class CPDF_HintTables { virtual int ReadPrimaryHintStreamOffset() const; virtual int ReadPrimaryHintStreamLength() const; - // Helper for the ReadPrimaryHintStream methods above. - int ReadPrimaryHintStream(int index) const; - uint32_t GetItemLength(uint32_t index, const std::vector& szArray); @@ -56,7 +53,7 @@ class CPDF_HintTables { CPDF_DataAvail* const m_pDataAvail; // Owned by |m_pDataAvail|. - CPDF_Dictionary* const m_pLinearizedDict; + CPDF_Linearized* const m_pLinearized; uint32_t m_nFirstPageSharedObjs; FX_FILESIZE m_szFirstPageObjOffset; diff --git a/core/fpdfapi/parser/cpdf_linearized.cpp b/core/fpdfapi/parser/cpdf_linearized.cpp new file mode 100644 index 0000000000..ec57f100cb --- /dev/null +++ b/core/fpdfapi/parser/cpdf_linearized.cpp @@ -0,0 +1,71 @@ +// Copyright 2016 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#include "core/fpdfapi/parser/cpdf_linearized.h" + +#include "core/fpdfapi/parser/cpdf_array.h" +#include "core/fpdfapi/parser/cpdf_dictionary.h" +#include "core/fpdfapi/parser/cpdf_number.h" +#include "third_party/base/ptr_util.h" + +namespace { + +template +bool IsValidNumericDictionaryValue(const CPDF_Dictionary* pDict, + const char* key, + T min_value, + bool must_exist = true) { + if (!pDict->KeyExist(key)) + return !must_exist; + const CPDF_Number* pNum = ToNumber(pDict->GetObjectFor(key)); + if (!pNum || !pNum->IsInteger()) + return false; + const int raw_value = pNum->GetInteger(); + if (!pdfium::base::IsValueInRangeForNumericType(raw_value)) + return false; + return static_cast(raw_value) >= min_value; +} + +} // namespace + +// static +std::unique_ptr CPDF_Linearized::CreateForObject( + std::unique_ptr pObj) { + auto pDict = ToDictionary(std::move(pObj)); + if (!pDict || !pDict->KeyExist("Linearized") || + !IsValidNumericDictionaryValue(pDict.get(), "L", 1) || + !IsValidNumericDictionaryValue(pDict.get(), "P", 0, false) || + !IsValidNumericDictionaryValue(pDict.get(), "T", 1) || + !IsValidNumericDictionaryValue(pDict.get(), "N", 0) || + !IsValidNumericDictionaryValue(pDict.get(), "E", 1) || + !IsValidNumericDictionaryValue(pDict.get(), "O", 1)) + return nullptr; + return pdfium::WrapUnique(new CPDF_Linearized(pDict.get())); +} + +CPDF_Linearized::CPDF_Linearized(const CPDF_Dictionary* pDict) { + if (!pDict) + return; + m_szFileSize = pDict->GetIntegerFor("L"); + m_dwFirstPageNo = pDict->GetIntegerFor("P"); + m_szLastXRefOffset = pDict->GetIntegerFor("T"); + m_PageCount = pDict->GetIntegerFor("N"); + m_szFirstPageEndOffset = pDict->GetIntegerFor("E"); + m_FirstPageObjNum = pDict->GetIntegerFor("O"); + const CPDF_Array* pHintStreamRange = pDict->GetArrayFor("H"); + const size_t nHintStreamSize = + pHintStreamRange ? pHintStreamRange->GetCount() : 0; + if (nHintStreamSize == 2 || nHintStreamSize == 4) { + m_szHintStart = std::max(pHintStreamRange->GetIntegerAt(0), 0); + m_szHintLength = std::max(pHintStreamRange->GetIntegerAt(1), 0); + } +} + +CPDF_Linearized::~CPDF_Linearized() {} + +bool CPDF_Linearized::HasHintTable() const { + return GetPageCount() > 1 && GetHintStart() > 0 && GetHintLength() > 0; +} diff --git a/core/fpdfapi/parser/cpdf_linearized.h b/core/fpdfapi/parser/cpdf_linearized.h new file mode 100644 index 0000000000..caf538e3fe --- /dev/null +++ b/core/fpdfapi/parser/cpdf_linearized.h @@ -0,0 +1,55 @@ +// Copyright 2016 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#ifndef CORE_FPDFAPI_PARSER_CPDF_LINEARIZED_H_ +#define CORE_FPDFAPI_PARSER_CPDF_LINEARIZED_H_ + +#include + +#include "core/fxcrt/fx_memory.h" +#include "core/fxcrt/fx_stream.h" + +class CPDF_Dictionary; +class CPDF_Object; + +class CPDF_Linearized { + public: + ~CPDF_Linearized(); + static std::unique_ptr CreateForObject( + std::unique_ptr pObj); + + // Will only return values > 0. + FX_FILESIZE GetFileSize() const { return m_szFileSize; } + uint32_t GetFirstPageNo() const { return m_dwFirstPageNo; } + // Will only return values > 0. + FX_FILESIZE GetLastXRefOffset() const { return m_szLastXRefOffset; } + uint32_t GetPageCount() const { return m_PageCount; } + // Will only return values > 0. + FX_FILESIZE GetFirstPageEndOffset() const { return m_szFirstPageEndOffset; } + // Will only return values > 0. + uint32_t GetFirstPageObjNum() const { return m_FirstPageObjNum; } + + bool HasHintTable() const; + // Will only return values > 0. + FX_FILESIZE GetHintStart() const { return m_szHintStart; } + // Will only return values > 0. + FX_FILESIZE GetHintLength() const { return m_szHintLength; } + + protected: + explicit CPDF_Linearized(const CPDF_Dictionary* pDict); + + private: + FX_FILESIZE m_szFileSize = 0; + uint32_t m_dwFirstPageNo = 0; + FX_FILESIZE m_szLastXRefOffset = 0; + uint32_t m_PageCount = 0; + FX_FILESIZE m_szFirstPageEndOffset = 0; + uint32_t m_FirstPageObjNum = 0; + FX_FILESIZE m_szHintStart = 0; + FX_FILESIZE m_szHintLength = 0; +}; + +#endif // CORE_FPDFAPI_PARSER_CPDF_LINEARIZED_H_ diff --git a/core/fpdfapi/parser/cpdf_parser.cpp b/core/fpdfapi/parser/cpdf_parser.cpp index c8c07bd0b7..044d12d6ef 100644 --- a/core/fpdfapi/parser/cpdf_parser.cpp +++ b/core/fpdfapi/parser/cpdf_parser.cpp @@ -12,6 +12,7 @@ #include "core/fpdfapi/parser/cpdf_crypto_handler.h" #include "core/fpdfapi/parser/cpdf_dictionary.h" #include "core/fpdfapi/parser/cpdf_document.h" +#include "core/fpdfapi/parser/cpdf_linearized.h" #include "core/fpdfapi/parser/cpdf_number.h" #include "core/fpdfapi/parser/cpdf_reference.h" #include "core/fpdfapi/parser/cpdf_security_handler.h" @@ -54,8 +55,6 @@ CPDF_Parser::CPDF_Parser() m_pTrailer(nullptr), m_pEncryptDict(nullptr), m_bVersionUpdated(false), - m_pLinearized(nullptr), - m_dwFirstPageNo(0), m_dwXrefStartObjNum(0) { m_pSyntax.reset(new CPDF_SyntaxParser); } @@ -72,8 +71,6 @@ CPDF_Parser::~CPDF_Parser() { for (CPDF_Dictionary* trailer : m_Trailers) delete trailer; - - delete m_pLinearized; } uint32_t CPDF_Parser::GetLastObjNum() const { @@ -1402,6 +1399,10 @@ CPDF_Object* CPDF_Parser::ParseIndirectObjectAtByStrict( return pObj; } +uint32_t CPDF_Parser::GetFirstPageNo() const { + return m_pLinearized ? m_pLinearized->GetFirstPageNo() : 0; +} + CPDF_Dictionary* CPDF_Parser::LoadTrailerV4() { if (m_pSyntax->GetKeyword() != "trailer") return nullptr; @@ -1448,35 +1449,14 @@ bool CPDF_Parser::IsLinearizedFile(IFX_SeekableReadStream* pFileAccess, return false; } - m_pLinearized = m_pSyntax->GetObject(nullptr, objnum, gennum, true); + m_pLinearized = CPDF_Linearized::CreateForObject( + pdfium::WrapUnique(m_pSyntax->GetObject(nullptr, objnum, gennum, true))); if (!m_pLinearized) return false; - - CPDF_Dictionary* pDict = m_pLinearized->GetDict(); - if (pDict && pDict->GetObjectFor("Linearized")) { - m_pSyntax->GetNextWord(nullptr); - - CPDF_Object* pLen = pDict->GetObjectFor("L"); - if (!pLen) { - delete m_pLinearized; - m_pLinearized = nullptr; - return false; - } - - if (pLen->GetInteger() != (int)pFileAccess->GetSize()) - return false; - - if (CPDF_Number* pNo = ToNumber(pDict->GetObjectFor("P"))) - m_dwFirstPageNo = pNo->GetInteger(); - - if (CPDF_Number* pTable = ToNumber(pDict->GetObjectFor("T"))) - m_LastXRefOffset = pTable->GetInteger(); - - return true; - } - delete m_pLinearized; - m_pLinearized = nullptr; - return false; + m_LastXRefOffset = m_pLinearized->GetLastXRefOffset(); + // Move parser onto first page xref table start. + m_pSyntax->GetNextWord(nullptr); + return true; } CPDF_Parser::Error CPDF_Parser::StartLinearizedParse( @@ -1525,7 +1505,7 @@ CPDF_Parser::Error CPDF_Parser::StartLinearizedParse( if (eRet != SUCCESS) return eRet; - m_pDocument->LoadLinearizedDoc(m_pLinearized->GetDict()); + m_pDocument->LoadLinearizedDoc(m_pLinearized.get()); if (!m_pDocument->GetRoot() || m_pDocument->GetPageCount() == 0) { if (bXRefRebuilt) return FORMAT_ERROR; @@ -1538,7 +1518,7 @@ CPDF_Parser::Error CPDF_Parser::StartLinearizedParse( if (eRet != SUCCESS) return eRet; - m_pDocument->LoadLinearizedDoc(m_pLinearized->GetDict()); + m_pDocument->LoadLinearizedDoc(m_pLinearized.get()); if (!m_pDocument->GetRoot()) return FORMAT_ERROR; } diff --git a/core/fpdfapi/parser/cpdf_parser.h b/core/fpdfapi/parser/cpdf_parser.h index 3be157a1a2..b8ce7ab294 100644 --- a/core/fpdfapi/parser/cpdf_parser.h +++ b/core/fpdfapi/parser/cpdf_parser.h @@ -19,6 +19,7 @@ class CPDF_CryptoHandler; class CPDF_Dictionary; class CPDF_Document; class CPDF_IndirectObjectHolder; +class CPDF_Linearized; class CPDF_Object; class CPDF_SecurityHandler; class CPDF_StreamAcc; @@ -88,7 +89,7 @@ class CPDF_Parser { uint32_t objnum, FX_FILESIZE* pResultPos); - uint32_t GetFirstPageNo() const { return m_dwFirstPageNo; } + uint32_t GetFirstPageNo() const; protected: struct ObjectInfo { @@ -157,8 +158,7 @@ class CPDF_Parser { std::set m_SortedOffset; std::vector m_Trailers; bool m_bVersionUpdated; - CPDF_Object* m_pLinearized; - uint32_t m_dwFirstPageNo; + std::unique_ptr m_pLinearized; uint32_t m_dwXrefStartObjNum; // A map of object numbers to indirect streams. Map owns the streams. diff --git a/testing/libfuzzer/pdf_hint_table_fuzzer.cc b/testing/libfuzzer/pdf_hint_table_fuzzer.cc index b01c87216b..ec51517fc0 100644 --- a/testing/libfuzzer/pdf_hint_table_fuzzer.cc +++ b/testing/libfuzzer/pdf_hint_table_fuzzer.cc @@ -4,18 +4,11 @@ #include +#include "core/fpdfapi/parser/cpdf_array.h" #include "core/fpdfapi/parser/cpdf_dictionary.h" #include "core/fpdfapi/parser/cpdf_hint_tables.h" - -struct DummyLinearizedDictionary { - int end_of_first_page_offset; - int number_of_pages; - int first_page_object_number; - int first_page_number; - int primary_hint_stream_offset; - int primary_hint_stream_length; - int shared_hint_table_offset; -}; +#include "core/fpdfapi/parser/cpdf_linearized.h" +#include "third_party/base/ptr_util.h" int32_t GetData(const int32_t** data32, const uint8_t** data, size_t* size) { const int32_t* ret = *data32; @@ -27,64 +20,70 @@ int32_t GetData(const int32_t** data32, const uint8_t** data, size_t* size) { class HintTableForFuzzing : public CPDF_HintTables { public: - HintTableForFuzzing(DummyLinearizedDictionary* dict, - CPDF_Dictionary* linearized_dict) - : CPDF_HintTables(nullptr, linearized_dict), dict_(dict) {} + HintTableForFuzzing(CPDF_Linearized* pLinearized, + int shared_hint_table_offset) + : CPDF_HintTables(nullptr, pLinearized), + shared_hint_table_offset_(shared_hint_table_offset) {} ~HintTableForFuzzing() {} void Fuzz(const uint8_t* data, size_t size) { - if (dict_->shared_hint_table_offset <= 0) + if (shared_hint_table_offset_ <= 0) return; - if (size < static_cast(dict_->shared_hint_table_offset)) + if (size < static_cast(shared_hint_table_offset_)) return; CFX_BitStream bs; bs.Init(data, size); if (!ReadPageHintTable(&bs)) return; - ReadSharedObjHintTable(&bs, dict_->shared_hint_table_offset); + ReadSharedObjHintTable(&bs, shared_hint_table_offset_); } private: - int GetEndOfFirstPageOffset() const override { - return dict_->end_of_first_page_offset; - } - int GetNumberOfPages() const override { return dict_->number_of_pages; } - int GetFirstPageObjectNumber() const override { - return dict_->first_page_object_number; - } - int GetFirstPageNumber() const override { return dict_->first_page_number; } - int ReadPrimaryHintStreamOffset() const override { - return dict_->primary_hint_stream_offset; - } - int ReadPrimaryHintStreamLength() const override { - return dict_->primary_hint_stream_length; - } + int shared_hint_table_offset_; +}; - DummyLinearizedDictionary* const dict_; +class FakeLinearized : public CPDF_Linearized { + public: + explicit FakeLinearized(CPDF_Dictionary* linearized_dict) + : CPDF_Linearized(linearized_dict) {} }; extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { - // Need 28 bytes for |dummy_dict|. + // Need 28 bytes for |linearized_dict|. // The header section of page offset hint table is 36 bytes. // The header section of shared object hint table is 24 bytes. if (size < 28 + 36 + 24) return 0; const int32_t* data32 = reinterpret_cast(data); - DummyLinearizedDictionary dummy_dict; - dummy_dict.end_of_first_page_offset = GetData(&data32, &data, &size); - dummy_dict.number_of_pages = GetData(&data32, &data, &size); - dummy_dict.first_page_object_number = GetData(&data32, &data, &size); - dummy_dict.first_page_number = GetData(&data32, &data, &size); - dummy_dict.primary_hint_stream_offset = GetData(&data32, &data, &size); - dummy_dict.primary_hint_stream_length = GetData(&data32, &data, &size); - dummy_dict.shared_hint_table_offset = GetData(&data32, &data, &size); - std::unique_ptr dummy_linearized_dict(new CPDF_Dictionary); + auto linearized_dict = pdfium::MakeUnique(); + // Set initial value. + linearized_dict->SetBooleanFor("Linearized", true); + // Set first page end offset + linearized_dict->SetIntegerFor("E", GetData(&data32, &data, &size)); + // Set page count + linearized_dict->SetIntegerFor("N", GetData(&data32, &data, &size)); + // Set first page obj num + linearized_dict->SetIntegerFor("O", GetData(&data32, &data, &size)); + // Set first page no + linearized_dict->SetIntegerFor("P", GetData(&data32, &data, &size)); + + auto hint_info = pdfium::MakeUnique(); + // Add primary hint stream offset + hint_info->AddInteger(GetData(&data32, &data, &size)); + // Add primary hint stream size + hint_info->AddInteger(GetData(&data32, &data, &size)); + // Set hint stream info. + linearized_dict->SetFor("H", hint_info.release()); + + const int shared_hint_table_offset = GetData(&data32, &data, &size); + { - HintTableForFuzzing hint_table(&dummy_dict, dummy_linearized_dict.get()); + FakeLinearized linearized(linearized_dict.get()); + HintTableForFuzzing hint_table(&linearized, shared_hint_table_offset); hint_table.Fuzz(data, size); } return 0; -- cgit v1.2.3