From 19cd7cc42700b844e6d9fc170a65bf7122d468b8 Mon Sep 17 00:00:00 2001 From: dsinclair Date: Fri, 4 Nov 2016 21:06:05 -0700 Subject: Revert of Unify some code (patchset #14 id:260001 of https://codereview.chromium.org/2466023002/ ) Reason for revert: Breaking the chrome roll. See https://build.chromium.org/p/tryserver.chromium.linux/builders/linux_chromium_rel_ng/builds/331856 Original issue's description: > Unify some code > > Move parsing of linearized header into separate CPDF_Linearized class. > > Committed: https://pdfium.googlesource.com/pdfium/+/71333dc57ac7e4cf7963c83333730b3882ab371f TBR=thestig@chromium.org,brucedawson@chromium.org,art-snake@yandex-team.ru # Skipping CQ checks because original CL landed less than 1 days ago. NOPRESUBMIT=true NOTREECHECKS=true NOTRY=true Review-Url: https://codereview.chromium.org/2474283005 --- BUILD.gn | 2 - core/fpdfapi/parser/cpdf_data_avail.cpp | 147 +++++++++++++++++++------ core/fpdfapi/parser/cpdf_data_avail.h | 5 +- core/fpdfapi/parser/cpdf_document.cpp | 21 +++- core/fpdfapi/parser/cpdf_document.h | 3 +- core/fpdfapi/parser/cpdf_document_unittest.cpp | 15 +-- core/fpdfapi/parser/cpdf_hint_tables.cpp | 32 ++++-- core/fpdfapi/parser/cpdf_hint_tables.h | 9 +- core/fpdfapi/parser/cpdf_linearized.cpp | 69 ------------ core/fpdfapi/parser/cpdf_linearized.h | 55 --------- core/fpdfapi/parser/cpdf_parser.cpp | 46 +++++--- core/fpdfapi/parser/cpdf_parser.h | 6 +- 12 files changed, 202 insertions(+), 208 deletions(-) delete mode 100644 core/fpdfapi/parser/cpdf_linearized.cpp delete mode 100644 core/fpdfapi/parser/cpdf_linearized.h diff --git a/BUILD.gn b/BUILD.gn index 0cf5f54c77..a54f6e4018 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -500,8 +500,6 @@ static_library("fpdfapi") { "core/fpdfapi/parser/cpdf_hint_tables.h", "core/fpdfapi/parser/cpdf_indirect_object_holder.cpp", "core/fpdfapi/parser/cpdf_indirect_object_holder.h", - "core/fpdfapi/parser/cpdf_linearized.cpp", - "core/fpdfapi/parser/cpdf_linearized.h", "core/fpdfapi/parser/cpdf_name.cpp", "core/fpdfapi/parser/cpdf_name.h", "core/fpdfapi/parser/cpdf_null.cpp", diff --git a/core/fpdfapi/parser/cpdf_data_avail.cpp b/core/fpdfapi/parser/cpdf_data_avail.cpp index e6d2c61ad3..c4ed95e17f 100644 --- a/core/fpdfapi/parser/cpdf_data_avail.cpp +++ b/core/fpdfapi/parser/cpdf_data_avail.cpp @@ -15,7 +15,6 @@ #include "core/fpdfapi/parser/cpdf_dictionary.h" #include "core/fpdfapi/parser/cpdf_document.h" #include "core/fpdfapi/parser/cpdf_hint_tables.h" -#include "core/fpdfapi/parser/cpdf_linearized.h" #include "core/fpdfapi/parser/cpdf_name.h" #include "core/fpdfapi/parser/cpdf_number.h" #include "core/fpdfapi/parser/cpdf_reference.h" @@ -44,6 +43,7 @@ CPDF_DataAvail::CPDF_DataAvail(FileAvail* pFileAvail, m_dwCurrentOffset = 0; m_dwXRefOffset = 0; m_bufferOffset = 0; + m_dwFirstPageNo = 0; m_bufferSize = 0; m_PagesObjNum = 0; m_dwCurrentXRefSteam = 0; @@ -56,6 +56,7 @@ CPDF_DataAvail::CPDF_DataAvail(FileAvail* pFileAvail, m_bDocAvail = false; m_bMainXRefLoadTried = false; m_bDocAvail = false; + m_bLinearized = false; m_bPagesLoad = false; m_bPagesTreeLoad = false; m_bMainXRefLoadedOK = false; @@ -65,6 +66,7 @@ CPDF_DataAvail::CPDF_DataAvail(FileAvail* pFileAvail, m_bPageLoadedOK = false; m_bNeedDownLoadResource = false; m_bLinearizedFormParamLoad = false; + m_pLinearized = nullptr; m_pRoot = nullptr; m_pTrailer = nullptr; m_pCurrentParser = nullptr; @@ -81,6 +83,9 @@ CPDF_DataAvail::CPDF_DataAvail(FileAvail* pFileAvail, CPDF_DataAvail::~CPDF_DataAvail() { m_pHintTables.reset(); + delete m_pLinearized; + delete m_pRoot; + delete m_pTrailer; for (CPDF_Object* pObject : m_arrayAcroforms) delete pObject; @@ -608,27 +613,48 @@ bool CPDF_DataAvail::CheckHeader(DownloadHints* pHints) { } bool CPDF_DataAvail::CheckFirstPage(DownloadHints* pHints) { - if (!m_pLinearized->GetFirstPageEndOffset() || - !m_pLinearized->GetFileSize() || !m_pLinearized->GetLastXRefOffset()) { + CPDF_Dictionary* pDict = m_pLinearized->GetDict(); + CPDF_Object* pEndOffSet = pDict ? pDict->GetObjectFor("E") : nullptr; + if (!pEndOffSet) { m_docStatus = PDF_DATAAVAIL_ERROR; return false; } - bool bNeedDownLoad = false; - uint32_t dwEnd = m_pLinearized->GetFirstPageEndOffset(); - dwEnd += 512; - if ((FX_FILESIZE)dwEnd > m_dwFileLen) - dwEnd = (uint32_t)m_dwFileLen; + CPDF_Object* pXRefOffset = pDict ? pDict->GetObjectFor("T") : nullptr; + if (!pXRefOffset) { + m_docStatus = PDF_DATAAVAIL_ERROR; + return false; + } - int32_t iStartPos = (int32_t)(m_dwFileLen > 1024 ? 1024 : m_dwFileLen); - int32_t iSize = dwEnd > 1024 ? dwEnd - 1024 : 0; - if (!m_pFileAvail->IsDataAvail(iStartPos, iSize)) { - pHints->AddSegment(iStartPos, iSize); - bNeedDownLoad = true; + CPDF_Object* pFileLen = pDict ? pDict->GetObjectFor("L") : nullptr; + if (!pFileLen) { + m_docStatus = PDF_DATAAVAIL_ERROR; + return false; } - m_dwLastXRefOffset = m_pLinearized->GetLastXRefOffset(); - FX_FILESIZE dwFileLen = m_pLinearized->GetFileSize(); + bool bNeedDownLoad = false; + if (pEndOffSet->IsNumber()) { + uint32_t dwEnd = pEndOffSet->GetInteger(); + dwEnd += 512; + if ((FX_FILESIZE)dwEnd > m_dwFileLen) + dwEnd = (uint32_t)m_dwFileLen; + + int32_t iStartPos = (int32_t)(m_dwFileLen > 1024 ? 1024 : m_dwFileLen); + int32_t iSize = dwEnd > 1024 ? dwEnd - 1024 : 0; + if (!m_pFileAvail->IsDataAvail(iStartPos, iSize)) { + pHints->AddSegment(iStartPos, iSize); + bNeedDownLoad = true; + } + } + + m_dwLastXRefOffset = 0; + FX_FILESIZE dwFileLen = 0; + if (pXRefOffset->IsNumber()) + m_dwLastXRefOffset = pXRefOffset->GetInteger(); + + if (pFileLen->IsNumber()) + dwFileLen = pFileLen->GetInteger(); + if (!m_pFileAvail->IsDataAvail(m_dwLastXRefOffset, (uint32_t)(dwFileLen - m_dwLastXRefOffset))) { if (m_docStatus == PDF_DATAAVAIL_FIRSTPAGE) { @@ -676,17 +702,52 @@ bool CPDF_DataAvail::IsDataAvail(FX_FILESIZE offset, } bool CPDF_DataAvail::CheckHintTables(DownloadHints* pHints) { - if (m_pLinearized->GetPageCount() <= 1) { + CPDF_Dictionary* pDict = m_pLinearized->GetDict(); + if (!pDict) { + m_docStatus = PDF_DATAAVAIL_ERROR; + return false; + } + + // The actual value is not required here, but validate its existence and type. + CPDF_Number* pFirstPage = ToNumber(pDict->GetDirectObjectFor("O")); + if (!pFirstPage || !pFirstPage->IsInteger()) { + m_docStatus = PDF_DATAAVAIL_ERROR; + return false; + } + + CPDF_Number* pPageCount = ToNumber(pDict->GetDirectObjectFor("N")); + if (!pPageCount || !pPageCount->IsInteger()) { + m_docStatus = PDF_DATAAVAIL_ERROR; + return false; + } + + int nPageCount = pPageCount->GetInteger(); + if (nPageCount <= 1) { m_docStatus = PDF_DATAAVAIL_DONE; return true; } - if (!m_pLinearized->HasHintTable()) { + + CPDF_Array* pHintStreamRange = pDict->GetArrayFor("H"); + size_t nHintStreamSize = pHintStreamRange ? pHintStreamRange->GetCount() : 0; + if (nHintStreamSize != 2 && nHintStreamSize != 4) { m_docStatus = PDF_DATAAVAIL_ERROR; return false; } - FX_FILESIZE szHintStart = m_pLinearized->GetHintStart(); - FX_FILESIZE szHintLength = m_pLinearized->GetHintLength(); + for (const CPDF_Object* pArrayObject : *pHintStreamRange) { + const CPDF_Number* pNumber = ToNumber(pArrayObject->GetDirect()); + if (!pNumber || !pNumber->IsInteger()) { + m_docStatus = PDF_DATAAVAIL_ERROR; + return false; + } + } + + FX_FILESIZE szHintStart = pHintStreamRange->GetIntegerAt(0); + FX_FILESIZE szHintLength = pHintStreamRange->GetIntegerAt(1); + if (szHintStart < 0 || szHintLength <= 0) { + m_docStatus = PDF_DATAAVAIL_ERROR; + return false; + } if (!IsDataAvail(szHintStart, szHintLength, pHints)) return false; @@ -694,7 +755,7 @@ bool CPDF_DataAvail::CheckHintTables(DownloadHints* pHints) { m_syntaxParser.InitParser(m_pFileRead, m_dwHeaderOffset); std::unique_ptr pHintTables( - new CPDF_HintTables(this, m_pLinearized.get())); + new CPDF_HintTables(this, pDict)); std::unique_ptr pHintStream( ParseIndirectObjectAt(szHintStart, 0)); CPDF_Stream* pStream = ToStream(pHintStream.get()); @@ -758,12 +819,12 @@ CPDF_DataAvail::DocLinearizationStatus CPDF_DataAvail::IsLinearizedPDF() { } bool CPDF_DataAvail::IsLinearized() { - return !!m_pLinearized; + return m_bLinearized; } bool CPDF_DataAvail::IsLinearizedFile(uint8_t* pData, uint32_t dwLen) { if (m_pLinearized) - return true; + return m_bLinearized; ScopedFileStream file(FX_CreateMemoryStream(pData, (size_t)dwLen, false)); @@ -783,13 +844,27 @@ bool CPDF_DataAvail::IsLinearizedFile(uint8_t* pData, uint32_t dwLen) { return false; uint32_t objnum = FXSYS_atoui(wordObjNum.c_str()); - m_pLinearized = CPDF_Linearized::CreateForObject(pdfium::WrapUnique( - ParseIndirectObjectAt(m_syntaxParser.m_HeaderOffset + 9, objnum))); - if (!m_pLinearized || - m_pLinearized->GetFileSize() != m_pFileRead->GetSize()) { - m_pLinearized.reset(); + m_pLinearized = + ParseIndirectObjectAt(m_syntaxParser.m_HeaderOffset + 9, objnum); + if (!m_pLinearized) return false; - } + + CPDF_Dictionary* pDict = m_pLinearized->GetDict(); + if (!pDict || !pDict->GetObjectFor("Linearized")) + return false; + + CPDF_Object* pLen = pDict->GetObjectFor("L"); + if (!pLen) + return false; + + if ((FX_FILESIZE)pLen->GetInteger() != m_pFileRead->GetSize()) + return false; + + m_bLinearized = true; + + if (CPDF_Number* pNo = ToNumber(pDict->GetObjectFor("P"))) + m_dwFirstPageNo = pNo->GetInteger(); + return true; } @@ -1525,8 +1600,8 @@ CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsPageAvail( if (pdfium::ContainsKey(m_pagesLoadState, dwPage)) return DataAvailable; - if (m_pLinearized) { - if (dwPage == m_pLinearized->GetFirstPageNo()) { + if (m_bLinearized) { + if (dwPage == m_dwFirstPageNo) { DocAvailStatus nRet = CheckLinearizedFirstPage(dwPage, pHints); if (nRet == DataAvailable) m_pagesLoadState.insert(dwPage); @@ -1656,8 +1731,11 @@ void CPDF_DataAvail::GetLinearizedMainXRefInfo(FX_FILESIZE* pPos, } int CPDF_DataAvail::GetPageCount() const { - if (m_pLinearized) - return m_pLinearized->GetPageCount(); + if (m_pLinearized) { + CPDF_Dictionary* pDict = m_pLinearized->GetDict(); + CPDF_Object* pObj = pDict ? pDict->GetDirectObjectFor("N") : nullptr; + return pObj ? pObj->GetInteger() : 0; + } return m_pDocument ? m_pDocument->GetPageCount() : 0; } @@ -1670,7 +1748,10 @@ CPDF_Dictionary* CPDF_DataAvail::GetPage(int index) { if (!m_pLinearized || !m_pHintTables) return nullptr; - if (index == static_cast(m_pLinearized->GetFirstPageNo())) + CPDF_Dictionary* pDict = m_pLinearized->GetDict(); + CPDF_Object* pObj = pDict ? pDict->GetDirectObjectFor("P") : nullptr; + int firstPageNum = pObj ? pObj->GetInteger() : 0; + if (index == firstPageNum) return nullptr; FX_FILESIZE szPageStartPos = 0; FX_FILESIZE szPageLength = 0; diff --git a/core/fpdfapi/parser/cpdf_data_avail.h b/core/fpdfapi/parser/cpdf_data_avail.h index bff9f2943b..0a8716a498 100644 --- a/core/fpdfapi/parser/cpdf_data_avail.h +++ b/core/fpdfapi/parser/cpdf_data_avail.h @@ -16,7 +16,6 @@ class CPDF_Dictionary; class CPDF_HintTables; class CPDF_IndirectObjectHolder; -class CPDF_Linearized; class CPDF_Parser; enum PDF_DATAAVAIL_STATUS { @@ -200,7 +199,7 @@ class CPDF_DataAvail final { CPDF_Object* m_pRoot; uint32_t m_dwRootObjNum; uint32_t m_dwInfoObjNum; - std::unique_ptr m_pLinearized; + CPDF_Object* m_pLinearized; CPDF_Object* m_pTrailer; bool m_bDocAvail; FX_FILESIZE m_dwHeaderOffset; @@ -221,6 +220,8 @@ class CPDF_DataAvail final { CFX_ArrayTemplate m_XRefStreamList; CFX_ArrayTemplate m_PageObjList; uint32_t m_PagesObjNum; + bool m_bLinearized; + uint32_t m_dwFirstPageNo; bool m_bLinearedDataOK; bool m_bMainXRefLoadTried; bool m_bMainXRefLoadedOK; diff --git a/core/fpdfapi/parser/cpdf_document.cpp b/core/fpdfapi/parser/cpdf_document.cpp index d9ffc0b28a..8e181de97c 100644 --- a/core/fpdfapi/parser/cpdf_document.cpp +++ b/core/fpdfapi/parser/cpdf_document.cpp @@ -17,7 +17,6 @@ #include "core/fpdfapi/page/pageint.h" #include "core/fpdfapi/parser/cpdf_array.h" #include "core/fpdfapi/parser/cpdf_dictionary.h" -#include "core/fpdfapi/parser/cpdf_linearized.h" #include "core/fpdfapi/parser/cpdf_number.h" #include "core/fpdfapi/parser/cpdf_parser.h" #include "core/fpdfapi/parser/cpdf_reference.h" @@ -379,13 +378,23 @@ void CPDF_Document::LoadDoc() { m_PageList.SetSize(RetrievePageCount()); } -void CPDF_Document::LoadLinearizedDoc( - const CPDF_Linearized* pLinearizationParams) { +void CPDF_Document::LoadLinearizedDoc(CPDF_Dictionary* pLinearizationParams) { m_bLinearized = true; LoadDocInternal(); - m_PageList.SetSize(pLinearizationParams->GetPageCount()); - m_iFirstPageNo = pLinearizationParams->GetFirstPageNo(); - m_dwFirstPageObjNum = pLinearizationParams->GetFirstPageObjNum(); + + uint32_t dwPageCount = 0; + CPDF_Object* pCount = pLinearizationParams->GetObjectFor("N"); + if (ToNumber(pCount)) + dwPageCount = pCount->GetInteger(); + m_PageList.SetSize(dwPageCount); + + CPDF_Object* pNo = pLinearizationParams->GetObjectFor("P"); + if (ToNumber(pNo)) + m_iFirstPageNo = pNo->GetInteger(); + + CPDF_Object* pObjNum = pLinearizationParams->GetObjectFor("O"); + if (ToNumber(pObjNum)) + m_dwFirstPageObjNum = pObjNum->GetInteger(); } void CPDF_Document::LoadPages() { diff --git a/core/fpdfapi/parser/cpdf_document.h b/core/fpdfapi/parser/cpdf_document.h index 1b18015200..0a99e42c3f 100644 --- a/core/fpdfapi/parser/cpdf_document.h +++ b/core/fpdfapi/parser/cpdf_document.h @@ -26,7 +26,6 @@ class CPDF_Font; class CPDF_FontEncoding; class CPDF_IccProfile; class CPDF_Image; -class CPDF_Linearized; class CPDF_Parser; class CPDF_Pattern; class CPDF_StreamAcc; @@ -86,7 +85,7 @@ class CPDF_Document : public CPDF_IndirectObjectHolder { CPDF_IccProfile* LoadIccProfile(CPDF_Stream* pStream); void LoadDoc(); - void LoadLinearizedDoc(const CPDF_Linearized* pLinearizationParams); + void LoadLinearizedDoc(CPDF_Dictionary* pLinearizationParams); void LoadPages(); void CreateNewDoc(); diff --git a/core/fpdfapi/parser/cpdf_document_unittest.cpp b/core/fpdfapi/parser/cpdf_document_unittest.cpp index e20a5a1d1d..16d0ade069 100644 --- a/core/fpdfapi/parser/cpdf_document_unittest.cpp +++ b/core/fpdfapi/parser/cpdf_document_unittest.cpp @@ -9,13 +9,15 @@ #include "core/fpdfapi/cpdf_modulemgr.h" #include "core/fpdfapi/parser/cpdf_array.h" #include "core/fpdfapi/parser/cpdf_dictionary.h" -#include "core/fpdfapi/parser/cpdf_linearized.h" #include "core/fpdfapi/parser/cpdf_parser.h" #include "core/fxcrt/fx_memory.h" #include "testing/gtest/include/gtest/gtest.h" namespace { +using ScopedDictionary = + std::unique_ptr>; + CPDF_Dictionary* CreatePageTreeNode(CPDF_Array* kids, CPDF_Document* pDoc, int count) { @@ -75,11 +77,6 @@ class CPDF_TestDocumentForPages : public CPDF_Document { private: std::unique_ptr m_pOwnedRootDict; }; - -class TestLinearized : public CPDF_Linearized { - public: - explicit TestLinearized(CPDF_Dictionary* dict) : CPDF_Linearized(dict) {} -}; } // namespace class cpdf_document_test : public testing::Test { @@ -145,12 +142,10 @@ TEST_F(cpdf_document_test, UseCachedPageObjNumIfHaveNotPagesDict) { // can be not exists in this case. // (case, when hint table is used to page check in CPDF_DataAvail). CPDF_Document document(pdfium::MakeUnique()); - auto dict = pdfium::MakeUnique(); - dict->SetBooleanFor("Linearized", true); + std::unique_ptr dict(new CPDF_Dictionary()); const int page_count = 100; dict->SetIntegerFor("N", page_count); - TestLinearized linearized(dict.get()); - document.LoadLinearizedDoc(&linearized); + document.LoadLinearizedDoc(dict.get()); ASSERT_EQ(page_count, document.GetPageCount()); CPDF_Object* page_stub = new CPDF_Dictionary(); const uint32_t obj_num = document.AddIndirectObject(page_stub); diff --git a/core/fpdfapi/parser/cpdf_hint_tables.cpp b/core/fpdfapi/parser/cpdf_hint_tables.cpp index bbc57dace9..045b94cac5 100644 --- a/core/fpdfapi/parser/cpdf_hint_tables.cpp +++ b/core/fpdfapi/parser/cpdf_hint_tables.cpp @@ -12,7 +12,6 @@ #include "core/fpdfapi/parser/cpdf_data_avail.h" #include "core/fpdfapi/parser/cpdf_dictionary.h" #include "core/fpdfapi/parser/cpdf_document.h" -#include "core/fpdfapi/parser/cpdf_linearized.h" #include "core/fpdfapi/parser/cpdf_stream.h" #include "core/fpdfapi/parser/cpdf_stream_acc.h" #include "core/fxcrt/fx_safe_types.h" @@ -35,12 +34,12 @@ bool IsValidPageOffsetHintTableBitCount(uint32_t bits) { } // namespace CPDF_HintTables::CPDF_HintTables(CPDF_DataAvail* pDataAvail, - CPDF_Linearized* pLinearized) + CPDF_Dictionary* pLinearized) : m_pDataAvail(pDataAvail), - m_pLinearized(pLinearized), + m_pLinearizedDict(pLinearized), m_nFirstPageSharedObjs(0), m_szFirstPageObjOffset(0) { - ASSERT(m_pLinearized); + ASSERT(m_pLinearizedDict); } CPDF_HintTables::~CPDF_HintTables() {} @@ -488,25 +487,38 @@ bool CPDF_HintTables::LoadHintStream(CPDF_Stream* pHintStream) { } int CPDF_HintTables::GetEndOfFirstPageOffset() const { - return static_cast(m_pLinearized->GetFirstPageEndOffset()); + CPDF_Object* pOffsetE = m_pLinearizedDict->GetDirectObjectFor("E"); + return pOffsetE ? pOffsetE->GetInteger() : -1; } int CPDF_HintTables::GetNumberOfPages() const { - return static_cast(m_pLinearized->GetPageCount()); + CPDF_Object* pPageNum = m_pLinearizedDict->GetDirectObjectFor("N"); + return pPageNum ? pPageNum->GetInteger() : 0; } int CPDF_HintTables::GetFirstPageObjectNumber() const { - return static_cast(m_pLinearized->GetFirstPageObjNum()); + CPDF_Object* pFirstPageObj = m_pLinearizedDict->GetDirectObjectFor("O"); + return pFirstPageObj ? pFirstPageObj->GetInteger() : -1; } int CPDF_HintTables::GetFirstPageNumber() const { - return static_cast(m_pLinearized->GetFirstPageNo()); + CPDF_Object* pFirstPageNum = m_pLinearizedDict->GetDirectObjectFor("P"); + return pFirstPageNum ? pFirstPageNum->GetInteger() : 0; } int CPDF_HintTables::ReadPrimaryHintStreamOffset() const { - return static_cast(m_pLinearized->GetHintStart()); + return ReadPrimaryHintStream(0); } int CPDF_HintTables::ReadPrimaryHintStreamLength() const { - return static_cast(m_pLinearized->GetHintLength()); + return ReadPrimaryHintStream(1); +} + +int CPDF_HintTables::ReadPrimaryHintStream(int index) const { + CPDF_Array* pRange = m_pLinearizedDict->GetArrayFor("H"); + if (!pRange) + return -1; + + CPDF_Object* pStreamLen = pRange->GetDirectObjectAt(index); + return pStreamLen ? pStreamLen->GetInteger() : -1; } diff --git a/core/fpdfapi/parser/cpdf_hint_tables.h b/core/fpdfapi/parser/cpdf_hint_tables.h index 68b5059b05..84f48cf41f 100644 --- a/core/fpdfapi/parser/cpdf_hint_tables.h +++ b/core/fpdfapi/parser/cpdf_hint_tables.h @@ -14,12 +14,12 @@ #include "core/fxcrt/fx_stream.h" class CFX_BitStream; -class CPDF_Linearized; +class CPDF_Dictionary; class CPDF_Stream; class CPDF_HintTables { public: - CPDF_HintTables(CPDF_DataAvail* pDataAvail, CPDF_Linearized* pLinearized); + CPDF_HintTables(CPDF_DataAvail* pDataAvail, CPDF_Dictionary* pLinearized); virtual ~CPDF_HintTables(); bool GetPagePos(uint32_t index, @@ -46,6 +46,9 @@ class CPDF_HintTables { virtual int ReadPrimaryHintStreamOffset() const; virtual int ReadPrimaryHintStreamLength() const; + // Helper for the ReadPrimaryHintStream methods above. + int ReadPrimaryHintStream(int index) const; + uint32_t GetItemLength(uint32_t index, const std::vector& szArray); @@ -53,7 +56,7 @@ class CPDF_HintTables { CPDF_DataAvail* const m_pDataAvail; // Owned by |m_pDataAvail|. - CPDF_Linearized* const m_pLinearized; + CPDF_Dictionary* const m_pLinearizedDict; uint32_t m_nFirstPageSharedObjs; FX_FILESIZE m_szFirstPageObjOffset; diff --git a/core/fpdfapi/parser/cpdf_linearized.cpp b/core/fpdfapi/parser/cpdf_linearized.cpp deleted file mode 100644 index aa0ef45bcc..0000000000 --- a/core/fpdfapi/parser/cpdf_linearized.cpp +++ /dev/null @@ -1,69 +0,0 @@ -// Copyright 2016 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#include "core/fpdfapi/parser/cpdf_linearized.h" - -#include "core/fpdfapi/parser/cpdf_array.h" -#include "core/fpdfapi/parser/cpdf_dictionary.h" -#include "core/fpdfapi/parser/cpdf_number.h" -#include "third_party/base/ptr_util.h" - -namespace { - -template -bool IsValidNumericDictionaryValue(const CPDF_Dictionary* pDict, - const char* key, - T min_value, - bool must_exist = true) { - if (!pDict->KeyExist(key)) - return !must_exist; - const CPDF_Number* pNum = ToNumber(pDict->GetObjectFor(key)); - if (!pNum || !pNum->IsInteger()) - return false; - const int raw_value = pNum->GetInteger(); - if (!pdfium::base::IsValueInRangeForNumericType(raw_value)) - return false; - return static_cast(raw_value) >= min_value; -} - -} // namespace - -// static -std::unique_ptr CPDF_Linearized::CreateForObject( - std::unique_ptr pObj) { - auto pDict = ToDictionary(std::move(pObj)); - if (!pDict || !pDict->KeyExist("Linearized") || - !IsValidNumericDictionaryValue(pDict.get(), "L", 1) || - !IsValidNumericDictionaryValue(pDict.get(), "P", 0, false) || - !IsValidNumericDictionaryValue(pDict.get(), "T", 1) || - !IsValidNumericDictionaryValue(pDict.get(), "N", 0) || - !IsValidNumericDictionaryValue(pDict.get(), "E", 1) || - !IsValidNumericDictionaryValue(pDict.get(), "O", 1)) - return nullptr; - return pdfium::WrapUnique(new CPDF_Linearized(pDict.get())); -} - -CPDF_Linearized::CPDF_Linearized(const CPDF_Dictionary* pDict) { - m_szFileSize = pDict->GetIntegerFor("L"); - m_dwFirstPageNo = pDict->GetIntegerFor("P"); - m_szLastXRefOffset = pDict->GetIntegerFor("T"); - m_PageCount = pDict->GetIntegerFor("N"); - m_szFirstPageEndOffset = pDict->GetIntegerFor("E"); - m_FirstPageObjNum = pDict->GetIntegerFor("O"); - const CPDF_Array* pHintStreamRange = pDict->GetArrayFor("H"); - const size_t nHintStreamSize = - pHintStreamRange ? pHintStreamRange->GetCount() : 0; - if (nHintStreamSize == 2 || nHintStreamSize == 4) { - m_szHintStart = std::max(pHintStreamRange->GetIntegerAt(0), 0); - m_szHintLength = std::max(pHintStreamRange->GetIntegerAt(1), 0); - } -} - -CPDF_Linearized::~CPDF_Linearized() {} - -bool CPDF_Linearized::HasHintTable() const { - return GetPageCount() > 1 && GetHintStart() > 0 && GetHintLength() > 0; -} diff --git a/core/fpdfapi/parser/cpdf_linearized.h b/core/fpdfapi/parser/cpdf_linearized.h deleted file mode 100644 index caf538e3fe..0000000000 --- a/core/fpdfapi/parser/cpdf_linearized.h +++ /dev/null @@ -1,55 +0,0 @@ -// Copyright 2016 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#ifndef CORE_FPDFAPI_PARSER_CPDF_LINEARIZED_H_ -#define CORE_FPDFAPI_PARSER_CPDF_LINEARIZED_H_ - -#include - -#include "core/fxcrt/fx_memory.h" -#include "core/fxcrt/fx_stream.h" - -class CPDF_Dictionary; -class CPDF_Object; - -class CPDF_Linearized { - public: - ~CPDF_Linearized(); - static std::unique_ptr CreateForObject( - std::unique_ptr pObj); - - // Will only return values > 0. - FX_FILESIZE GetFileSize() const { return m_szFileSize; } - uint32_t GetFirstPageNo() const { return m_dwFirstPageNo; } - // Will only return values > 0. - FX_FILESIZE GetLastXRefOffset() const { return m_szLastXRefOffset; } - uint32_t GetPageCount() const { return m_PageCount; } - // Will only return values > 0. - FX_FILESIZE GetFirstPageEndOffset() const { return m_szFirstPageEndOffset; } - // Will only return values > 0. - uint32_t GetFirstPageObjNum() const { return m_FirstPageObjNum; } - - bool HasHintTable() const; - // Will only return values > 0. - FX_FILESIZE GetHintStart() const { return m_szHintStart; } - // Will only return values > 0. - FX_FILESIZE GetHintLength() const { return m_szHintLength; } - - protected: - explicit CPDF_Linearized(const CPDF_Dictionary* pDict); - - private: - FX_FILESIZE m_szFileSize = 0; - uint32_t m_dwFirstPageNo = 0; - FX_FILESIZE m_szLastXRefOffset = 0; - uint32_t m_PageCount = 0; - FX_FILESIZE m_szFirstPageEndOffset = 0; - uint32_t m_FirstPageObjNum = 0; - FX_FILESIZE m_szHintStart = 0; - FX_FILESIZE m_szHintLength = 0; -}; - -#endif // CORE_FPDFAPI_PARSER_CPDF_LINEARIZED_H_ diff --git a/core/fpdfapi/parser/cpdf_parser.cpp b/core/fpdfapi/parser/cpdf_parser.cpp index 044d12d6ef..c8c07bd0b7 100644 --- a/core/fpdfapi/parser/cpdf_parser.cpp +++ b/core/fpdfapi/parser/cpdf_parser.cpp @@ -12,7 +12,6 @@ #include "core/fpdfapi/parser/cpdf_crypto_handler.h" #include "core/fpdfapi/parser/cpdf_dictionary.h" #include "core/fpdfapi/parser/cpdf_document.h" -#include "core/fpdfapi/parser/cpdf_linearized.h" #include "core/fpdfapi/parser/cpdf_number.h" #include "core/fpdfapi/parser/cpdf_reference.h" #include "core/fpdfapi/parser/cpdf_security_handler.h" @@ -55,6 +54,8 @@ CPDF_Parser::CPDF_Parser() m_pTrailer(nullptr), m_pEncryptDict(nullptr), m_bVersionUpdated(false), + m_pLinearized(nullptr), + m_dwFirstPageNo(0), m_dwXrefStartObjNum(0) { m_pSyntax.reset(new CPDF_SyntaxParser); } @@ -71,6 +72,8 @@ CPDF_Parser::~CPDF_Parser() { for (CPDF_Dictionary* trailer : m_Trailers) delete trailer; + + delete m_pLinearized; } uint32_t CPDF_Parser::GetLastObjNum() const { @@ -1399,10 +1402,6 @@ CPDF_Object* CPDF_Parser::ParseIndirectObjectAtByStrict( return pObj; } -uint32_t CPDF_Parser::GetFirstPageNo() const { - return m_pLinearized ? m_pLinearized->GetFirstPageNo() : 0; -} - CPDF_Dictionary* CPDF_Parser::LoadTrailerV4() { if (m_pSyntax->GetKeyword() != "trailer") return nullptr; @@ -1449,14 +1448,35 @@ bool CPDF_Parser::IsLinearizedFile(IFX_SeekableReadStream* pFileAccess, return false; } - m_pLinearized = CPDF_Linearized::CreateForObject( - pdfium::WrapUnique(m_pSyntax->GetObject(nullptr, objnum, gennum, true))); + m_pLinearized = m_pSyntax->GetObject(nullptr, objnum, gennum, true); if (!m_pLinearized) return false; - m_LastXRefOffset = m_pLinearized->GetLastXRefOffset(); - // Move parser onto first page xref table start. - m_pSyntax->GetNextWord(nullptr); - return true; + + CPDF_Dictionary* pDict = m_pLinearized->GetDict(); + if (pDict && pDict->GetObjectFor("Linearized")) { + m_pSyntax->GetNextWord(nullptr); + + CPDF_Object* pLen = pDict->GetObjectFor("L"); + if (!pLen) { + delete m_pLinearized; + m_pLinearized = nullptr; + return false; + } + + if (pLen->GetInteger() != (int)pFileAccess->GetSize()) + return false; + + if (CPDF_Number* pNo = ToNumber(pDict->GetObjectFor("P"))) + m_dwFirstPageNo = pNo->GetInteger(); + + if (CPDF_Number* pTable = ToNumber(pDict->GetObjectFor("T"))) + m_LastXRefOffset = pTable->GetInteger(); + + return true; + } + delete m_pLinearized; + m_pLinearized = nullptr; + return false; } CPDF_Parser::Error CPDF_Parser::StartLinearizedParse( @@ -1505,7 +1525,7 @@ CPDF_Parser::Error CPDF_Parser::StartLinearizedParse( if (eRet != SUCCESS) return eRet; - m_pDocument->LoadLinearizedDoc(m_pLinearized.get()); + m_pDocument->LoadLinearizedDoc(m_pLinearized->GetDict()); if (!m_pDocument->GetRoot() || m_pDocument->GetPageCount() == 0) { if (bXRefRebuilt) return FORMAT_ERROR; @@ -1518,7 +1538,7 @@ CPDF_Parser::Error CPDF_Parser::StartLinearizedParse( if (eRet != SUCCESS) return eRet; - m_pDocument->LoadLinearizedDoc(m_pLinearized.get()); + m_pDocument->LoadLinearizedDoc(m_pLinearized->GetDict()); if (!m_pDocument->GetRoot()) return FORMAT_ERROR; } diff --git a/core/fpdfapi/parser/cpdf_parser.h b/core/fpdfapi/parser/cpdf_parser.h index b8ce7ab294..3be157a1a2 100644 --- a/core/fpdfapi/parser/cpdf_parser.h +++ b/core/fpdfapi/parser/cpdf_parser.h @@ -19,7 +19,6 @@ class CPDF_CryptoHandler; class CPDF_Dictionary; class CPDF_Document; class CPDF_IndirectObjectHolder; -class CPDF_Linearized; class CPDF_Object; class CPDF_SecurityHandler; class CPDF_StreamAcc; @@ -89,7 +88,7 @@ class CPDF_Parser { uint32_t objnum, FX_FILESIZE* pResultPos); - uint32_t GetFirstPageNo() const; + uint32_t GetFirstPageNo() const { return m_dwFirstPageNo; } protected: struct ObjectInfo { @@ -158,7 +157,8 @@ class CPDF_Parser { std::set m_SortedOffset; std::vector m_Trailers; bool m_bVersionUpdated; - std::unique_ptr m_pLinearized; + CPDF_Object* m_pLinearized; + uint32_t m_dwFirstPageNo; uint32_t m_dwXrefStartObjNum; // A map of object numbers to indirect streams. Map owns the streams. -- cgit v1.2.3