From c4642d4ffe8e6d315663119b5317d3a889e59f1a Mon Sep 17 00:00:00 2001 From: Artem Strygin Date: Fri, 29 Jun 2018 12:56:29 +0000 Subject: Implement CPDF_HintsTable::PageInfo. Merge page info data from Hints Table into CPDF_HintsTable::PageInfo class. Change-Id: I468996346ee153e3fa8ada6a83770614362d1b92 Reviewed-on: https://pdfium-review.googlesource.com/15813 Commit-Queue: Art Snake Reviewed-by: Lei Zhang --- core/fpdfapi/parser/cpdf_hint_tables.cpp | 103 ++++++++++------------ core/fpdfapi/parser/cpdf_hint_tables.h | 48 +++++++++- core/fpdfapi/parser/cpdf_hint_tables_unittest.cpp | 29 ++++++ 3 files changed, 118 insertions(+), 62 deletions(-) (limited to 'core/fpdfapi/parser') diff --git a/core/fpdfapi/parser/cpdf_hint_tables.cpp b/core/fpdfapi/parser/cpdf_hint_tables.cpp index 7d371909e9..fc9a46d68a 100644 --- a/core/fpdfapi/parser/cpdf_hint_tables.cpp +++ b/core/fpdfapi/parser/cpdf_hint_tables.cpp @@ -37,6 +37,9 @@ bool IsValidPageOffsetHintTableBitCount(uint32_t bits) { } // namespace +CPDF_HintTables::PageInfo::PageInfo() = default; +CPDF_HintTables::PageInfo::~PageInfo() = default; + CPDF_HintTables::CPDF_HintTables(CPDF_ReadValidator* pValidator, CPDF_LinearizedHeader* pLinearized) : m_pValidator(pValidator), @@ -59,6 +62,14 @@ uint32_t CPDF_HintTables::GetItemLength( } bool CPDF_HintTables::ReadPageHintTable(CFX_BitStream* hStream) { + const uint32_t nPages = m_pLinearized->GetPageCount(); + if (nPages < 1 || nPages >= CPDF_Document::kPageMaxNum) + return false; + + const uint32_t nFirstPageNum = m_pLinearized->GetFirstPageNo(); + if (nFirstPageNum >= nPages) + return false; + if (!hStream || hStream->IsEOF()) return false; @@ -122,83 +133,78 @@ bool CPDF_HintTables::ReadPageHintTable(CFX_BitStream* hStream) { // Item 13: Skip Item 13 which has 16 bits. hStream->SkipBits(16); - const uint32_t nPages = m_pLinearized->GetPageCount(); - if (nPages < 1 || nPages >= CPDF_Document::kPageMaxNum) - return false; - - const uint32_t dwPages = pdfium::base::checked_cast(nPages); FX_SAFE_UINT32 required_bits = dwDeltaObjectsBits; - required_bits *= dwPages; + required_bits *= nPages; if (!CanReadFromBitStream(hStream, required_bits)) return false; + m_PageInfos = std::vector(nPages); + m_PageInfos[nFirstPageNum].set_start_obj_num( + m_pLinearized->GetFirstPageObjNum()); + // The object number of remaining pages starts from 1. + uint32_t dwStartObjNum = 1; for (uint32_t i = 0; i < nPages; ++i) { FX_SAFE_UINT32 safeDeltaObj = hStream->GetBits(dwDeltaObjectsBits); safeDeltaObj += dwObjLeastNum; if (!safeDeltaObj.IsValid()) return false; - m_dwDeltaNObjsArray.push_back(safeDeltaObj.ValueOrDie()); + m_PageInfos[i].set_objects_count(safeDeltaObj.ValueOrDie()); + if (i == nFirstPageNum) + continue; + m_PageInfos[i].set_start_obj_num(dwStartObjNum); + dwStartObjNum += m_PageInfos[i].objects_count(); } hStream->ByteAlign(); required_bits = dwDeltaPageLenBits; - required_bits *= dwPages; + required_bits *= nPages; if (!CanReadFromBitStream(hStream, required_bits)) return false; - std::vector dwPageLenArray; for (uint32_t i = 0; i < nPages; ++i) { FX_SAFE_UINT32 safePageLen = hStream->GetBits(dwDeltaPageLenBits); safePageLen += dwPageLeastLen; if (!safePageLen.IsValid()) return false; - - dwPageLenArray.push_back(safePageLen.ValueOrDie()); + m_PageInfos[i].set_page_length(safePageLen.ValueOrDie()); } - const uint32_t nFirstPageNum = m_pLinearized->GetFirstPageNo(); - if (nFirstPageNum >= nPages) - return false; - - m_szPageOffsetArray.resize(nPages, 0); ASSERT(m_szFirstPageObjOffset); - m_szPageOffsetArray[nFirstPageNum] = m_szFirstPageObjOffset; - FX_FILESIZE prev_page_offset = m_pLinearized->GetFirstPageEndOffset(); + m_PageInfos[nFirstPageNum].set_page_offset(m_szFirstPageObjOffset); + FX_FILESIZE prev_page_end = m_pLinearized->GetFirstPageEndOffset(); for (uint32_t i = 0; i < nPages; ++i) { if (i == nFirstPageNum) continue; - - m_szPageOffsetArray[i] = prev_page_offset; - prev_page_offset += dwPageLenArray[i]; + m_PageInfos[i].set_page_offset(prev_page_end); + prev_page_end += m_PageInfos[i].page_length(); } - m_szPageOffsetArray.push_back(m_szPageOffsetArray[nPages - 1] + - dwPageLenArray[nPages - 1]); hStream->ByteAlign(); // Number of shared objects. required_bits = dwSharedObjBits; - required_bits *= dwPages; + required_bits *= nPages; if (!CanReadFromBitStream(hStream, required_bits)) return false; + std::vector dwNSharedObjsArray(nPages); for (uint32_t i = 0; i < nPages; i++) - m_dwNSharedObjsArray.push_back(hStream->GetBits(dwSharedObjBits)); + dwNSharedObjsArray[i] = hStream->GetBits(dwSharedObjBits); hStream->ByteAlign(); // Array of identifiers, size = nshared_objects. for (uint32_t i = 0; i < nPages; i++) { required_bits = dwSharedIdBits; - required_bits *= m_dwNSharedObjsArray[i]; + required_bits *= dwNSharedObjsArray[i]; if (!CanReadFromBitStream(hStream, required_bits)) return false; - for (uint32_t j = 0; j < m_dwNSharedObjsArray[i]; j++) - m_dwIdentifierArray.push_back(hStream->GetBits(dwSharedIdBits)); + for (uint32_t j = 0; j < dwNSharedObjsArray[i]; j++) + m_PageInfos[i].AddIdentifier(hStream->GetBits(dwSharedIdBits)); } hStream->ByteAlign(); for (uint32_t i = 0; i < nPages; i++) { - FX_SAFE_UINT32 safeSize = m_dwNSharedObjsArray[i]; + FX_SAFE_UINT32 safeSize = dwNSharedObjsArray[i]; safeSize *= dwSharedNumeratorBits; if (!CanReadFromBitStream(hStream, safeSize)) return false; @@ -207,7 +213,7 @@ bool CPDF_HintTables::ReadPageHintTable(CFX_BitStream* hStream) { } hStream->ByteAlign(); - FX_SAFE_UINT32 safeTotalPageLen = dwPages; + FX_SAFE_UINT32 safeTotalPageLen = nPages; safeTotalPageLen *= dwDeltaPageLenBits; if (!CanReadFromBitStream(hStream, safeTotalPageLen)) return false; @@ -343,24 +349,9 @@ bool CPDF_HintTables::GetPagePos(uint32_t index, if (index >= m_pLinearized->GetPageCount()) return false; - *szPageStartPos = m_szPageOffsetArray[index]; - *szPageLength = GetItemLength(index, m_szPageOffsetArray); - - const uint32_t nFirstPageObjNum = m_pLinearized->GetFirstPageObjNum(); - - const uint32_t dwFirstPageNum = m_pLinearized->GetFirstPageNo(); - if (index == dwFirstPageNum) { - *dwObjNum = nFirstPageObjNum; - return true; - } - - // The object number of remaining pages starts from 1. - *dwObjNum = 1; - for (uint32_t i = 0; i < index; ++i) { - if (i == dwFirstPageNum) - continue; - *dwObjNum += m_dwDeltaNObjsArray[i]; - } + *szPageStartPos = m_PageInfos[index].page_offset(); + *szPageLength = m_PageInfos[index].page_length(); + *dwObjNum = m_PageInfos[index].start_obj_num(); return true; } @@ -368,27 +359,23 @@ CPDF_DataAvail::DocAvailStatus CPDF_HintTables::CheckPage(uint32_t index) { if (index == m_pLinearized->GetFirstPageNo()) return CPDF_DataAvail::DataAvailable; - uint32_t dwLength = GetItemLength(index, m_szPageOffsetArray); - // If two pages have the same offset, it should be treated as an error. + if (index >= m_pLinearized->GetPageCount()) + return CPDF_DataAvail::DataError; + + uint32_t dwLength = m_PageInfos[index].page_length(); if (!dwLength) return CPDF_DataAvail::DataError; if (!m_pValidator->CheckDataRangeAndRequestIfUnavailable( - m_szPageOffsetArray[index], dwLength)) { + m_PageInfos[index].page_offset(), dwLength)) { return CPDF_DataAvail::DataNotAvailable; } // Download data of shared objects in the page. - uint32_t offset = 0; - for (uint32_t i = 0; i < index; ++i) - offset += m_dwNSharedObjsArray[i]; - const uint32_t nFirstPageObjNum = m_pLinearized->GetFirstPageObjNum(); - uint32_t dwIndex = 0; uint32_t dwObjNum = 0; - for (uint32_t j = 0; j < m_dwNSharedObjsArray[index]; ++j) { - dwIndex = m_dwIdentifierArray[offset + j]; + for (const uint32_t dwIndex : m_PageInfos[index].Identifiers()) { if (dwIndex >= m_dwSharedObjNumArray.size()) continue; diff --git a/core/fpdfapi/parser/cpdf_hint_tables.h b/core/fpdfapi/parser/cpdf_hint_tables.h index c51d95a255..0db190b189 100644 --- a/core/fpdfapi/parser/cpdf_hint_tables.h +++ b/core/fpdfapi/parser/cpdf_hint_tables.h @@ -20,6 +20,46 @@ class CPDF_ReadValidator; class CPDF_HintTables { public: + class PageInfo { + public: + PageInfo(); + ~PageInfo(); + + void set_objects_count(uint32_t objects_count) { + m_nObjectsCount = objects_count; + } + uint32_t objects_count() const { return m_nObjectsCount; } + + void set_page_offset(FX_FILESIZE offset) { m_szOffset = offset; } + FX_FILESIZE page_offset() const { return m_szOffset; } + + void set_page_length(uint32_t length) { m_dwLength = length; } + uint32_t page_length() const { return m_dwLength; } + + void set_start_obj_num(uint32_t start_obj_num) { + m_dwStartObjNum = start_obj_num; + } + uint32_t start_obj_num() const { return m_dwStartObjNum; } + + void AddIdentifier(uint32_t Identifier) { + m_dwIdentifierArray.push_back(Identifier); + } + + const std::vector& Identifiers() const { + return m_dwIdentifierArray; + } + + private: + uint32_t m_nObjectsCount = 0; + FX_FILESIZE m_szOffset = 0; + uint32_t m_dwLength = 0; + uint32_t m_dwStartObjNum = 0; + std::vector m_dwIdentifierArray; + + PageInfo(const PageInfo& other) = delete; + PageInfo& operator=(const PageInfo&) = delete; + }; + CPDF_HintTables(CPDF_ReadValidator* pValidator, CPDF_LinearizedHeader* pLinearized); virtual ~CPDF_HintTables(); @@ -33,6 +73,8 @@ class CPDF_HintTables { bool LoadHintStream(CPDF_Stream* pHintStream); + const std::vector& PageInfos() const { return m_PageInfos; } + protected: bool ReadPageHintTable(CFX_BitStream* hStream); bool ReadSharedObjHintTable(CFX_BitStream* hStream, uint32_t offset); @@ -51,11 +93,9 @@ class CPDF_HintTables { uint32_t m_nFirstPageSharedObjs; FX_FILESIZE m_szFirstPageObjOffset; - std::vector m_dwDeltaNObjsArray; - std::vector m_dwNSharedObjsArray; + + std::vector m_PageInfos; std::vector m_dwSharedObjNumArray; - std::vector m_dwIdentifierArray; - std::vector m_szPageOffsetArray; std::vector m_szSharedObjOffsetArray; }; diff --git a/core/fpdfapi/parser/cpdf_hint_tables_unittest.cpp b/core/fpdfapi/parser/cpdf_hint_tables_unittest.cpp index e45722950e..3d98196093 100644 --- a/core/fpdfapi/parser/cpdf_hint_tables_unittest.cpp +++ b/core/fpdfapi/parser/cpdf_hint_tables_unittest.cpp @@ -67,3 +67,32 @@ TEST_F(CPDF_HintTablesTest, Load) { ASSERT_FALSE( hint_tables->GetPagePos(2, &page_start, &page_length, &page_obj_num)); } + +TEST_F(CPDF_HintTablesTest, PageInfos) { + auto data_avail = MakeDataAvailFromFile("feature_linearized_loading.pdf"); + ASSERT_EQ(CPDF_DataAvail::DocAvailStatus::DataAvailable, + data_avail->IsDocAvail(nullptr)); + + const CPDF_HintTables* hint_tables = data_avail->GetHintTables(); + ASSERT_TRUE(hint_tables); + ASSERT_EQ(2u, hint_tables->PageInfos().size()); + + EXPECT_EQ(5u, hint_tables->PageInfos()[0].objects_count()); + EXPECT_EQ(777, hint_tables->PageInfos()[0].page_offset()); + EXPECT_EQ(4328u, hint_tables->PageInfos()[0].page_length()); + EXPECT_EQ(39u, hint_tables->PageInfos()[0].start_obj_num()); + ASSERT_EQ(2u, hint_tables->PageInfos()[0].Identifiers().size()); + + EXPECT_EQ(0u, hint_tables->PageInfos()[0].Identifiers()[0]); + EXPECT_EQ(0u, hint_tables->PageInfos()[0].Identifiers()[1]); + + EXPECT_EQ(3u, hint_tables->PageInfos()[1].objects_count()); + EXPECT_EQ(5105, hint_tables->PageInfos()[1].page_offset()); + EXPECT_EQ(767u, hint_tables->PageInfos()[1].page_length()); + EXPECT_EQ(1u, hint_tables->PageInfos()[1].start_obj_num()); + ASSERT_EQ(3u, hint_tables->PageInfos()[1].Identifiers().size()); + + EXPECT_EQ(2u, hint_tables->PageInfos()[1].Identifiers()[0]); + EXPECT_EQ(5u, hint_tables->PageInfos()[1].Identifiers()[1]); + EXPECT_EQ(3u, hint_tables->PageInfos()[1].Identifiers()[2]); +} -- cgit v1.2.3