diff options
-rw-r--r-- | core/fpdfapi/parser/cpdf_hint_tables.cpp | 24 | ||||
-rw-r--r-- | core/fpdfapi/parser/cpdf_hint_tables.h | 2 | ||||
-rw-r--r-- | core/fpdfapi/parser/cpdf_hint_tables_unittest.cpp | 64 | ||||
-rw-r--r-- | testing/resources/hint_table_102p.bin | bin | 0 -> 3797 bytes |
4 files changed, 76 insertions, 14 deletions
diff --git a/core/fpdfapi/parser/cpdf_hint_tables.cpp b/core/fpdfapi/parser/cpdf_hint_tables.cpp index 71a6d3688e..04e673bc97 100644 --- a/core/fpdfapi/parser/cpdf_hint_tables.cpp +++ b/core/fpdfapi/parser/cpdf_hint_tables.cpp @@ -117,7 +117,7 @@ bool CPDF_HintTables::ReadPageHintTable(CFX_BitStream* hStream) { // shared object referenced from a page, there is an indication of // where in the page's content stream the object is first referenced. const uint32_t dwSharedNumeratorBits = hStream->GetBits(16); - if (!IsValidPageOffsetHintTableBitCount(dwSharedNumeratorBits)) + if (dwSharedNumeratorBits > 32) return false; // Item 13: Skip Item 13 which has 16 bits. @@ -193,15 +193,17 @@ bool CPDF_HintTables::ReadPageHintTable(CFX_BitStream* hStream) { } hStream->ByteAlign(); - for (uint32_t i = 0; i < nPages; i++) { - FX_SAFE_UINT32 safeSize = dwNSharedObjsArray[i]; - safeSize *= dwSharedNumeratorBits; - if (!CanReadFromBitStream(hStream, safeSize)) - return false; + if (dwSharedNumeratorBits) { + for (uint32_t i = 0; i < nPages; i++) { + FX_SAFE_UINT32 safeSize = dwNSharedObjsArray[i]; + safeSize *= dwSharedNumeratorBits; + if (!CanReadFromBitStream(hStream, safeSize)) + return false; - hStream->SkipBits(safeSize.ValueOrDie()); + hStream->SkipBits(safeSize.ValueOrDie()); + } + hStream->ByteAlign(); } - hStream->ByteAlign(); FX_SAFE_UINT32 safeTotalPageLen = nPages; safeTotalPageLen *= dwDeltaPageLenBits; @@ -403,7 +405,11 @@ FX_FILESIZE CPDF_HintTables::HintsOffsetToFileOffset( // offset shall have the hint stream length added to it to determine the // actual offset relative to the beginning of the file. // See specification PDF 32000-1:2008 Annex F.4 (Hint tables). - if (file_offset.ValueOrDie() > m_pLinearized->GetHintStart()) + // Note: The PDF spec does not mention this, but positions equal to the hint + // stream offset also need to have the hint stream length added to it. e.g. + // There exists linearized PDFs generated by Adobe software that have this + // property. + if (file_offset.ValueOrDie() >= m_pLinearized->GetHintStart()) file_offset += m_pLinearized->GetHintLength(); return file_offset.ValueOrDefault(0); diff --git a/core/fpdfapi/parser/cpdf_hint_tables.h b/core/fpdfapi/parser/cpdf_hint_tables.h index a161dc68f0..5b978f99b2 100644 --- a/core/fpdfapi/parser/cpdf_hint_tables.h +++ b/core/fpdfapi/parser/cpdf_hint_tables.h @@ -83,6 +83,8 @@ class CPDF_HintTables { return m_SharedObjGroupInfos; } + FX_FILESIZE GetFirstPageObjOffset() const { return m_szFirstPageObjOffset; } + protected: bool ReadPageHintTable(CFX_BitStream* hStream); bool ReadSharedObjHintTable(CFX_BitStream* hStream, uint32_t offset); diff --git a/core/fpdfapi/parser/cpdf_hint_tables_unittest.cpp b/core/fpdfapi/parser/cpdf_hint_tables_unittest.cpp index af0e9ff745..8a7331d29b 100644 --- a/core/fpdfapi/parser/cpdf_hint_tables_unittest.cpp +++ b/core/fpdfapi/parser/cpdf_hint_tables_unittest.cpp @@ -10,9 +10,14 @@ #include "core/fpdfapi/cpdf_modulemgr.h" #include "core/fpdfapi/parser/cpdf_data_avail.h" +#include "core/fpdfapi/parser/cpdf_dictionary.h" +#include "core/fpdfapi/parser/cpdf_linearized_header.h" #include "core/fpdfapi/parser/cpdf_object.h" +#include "core/fpdfapi/parser/cpdf_read_validator.h" +#include "core/fpdfapi/parser/cpdf_stream.h" #include "core/fpdfapi/parser/cpdf_syntax_parser.h" #include "core/fxcrt/fx_stream.h" +#include "testing/fx_string_testhelpers.h" #include "testing/gmock/include/gmock/gmock.h" #include "testing/gtest/include/gtest/gtest.h" #include "testing/utils/path_service.h" @@ -20,16 +25,42 @@ namespace { -std::unique_ptr<CPDF_DataAvail> MakeDataAvailFromFile( +RetainPtr<CPDF_ReadValidator> MakeValidatorFromFile( const std::string& file_name) { std::string file_path; - if (!PathService::GetTestFilePath(file_name, &file_path)) - return nullptr; + PathService::GetTestFilePath(file_name, &file_path); + ASSERT(!file_path.empty()); + return pdfium::MakeRetain<CPDF_ReadValidator>( + IFX_SeekableReadStream::CreateFromFilename(file_path.c_str()), nullptr); +} + +std::unique_ptr<CPDF_DataAvail> MakeDataAvailFromFile( + const std::string& file_name) { return pdfium::MakeUnique<CPDF_DataAvail>( - nullptr, IFX_SeekableReadStream::CreateFromFilename(file_path.c_str()), - true); + nullptr, MakeValidatorFromFile(file_name), true); } +class TestLinearizedHeader : public CPDF_LinearizedHeader { + public: + TestLinearizedHeader(const CPDF_Dictionary* pDict, + FX_FILESIZE szLastXRefOffset) + : CPDF_LinearizedHeader(pDict, szLastXRefOffset) {} + + static std::unique_ptr<CPDF_LinearizedHeader> MakeHeader( + const std::string& inline_data) { + CPDF_SyntaxParser parser; + parser.InitParser( + pdfium::MakeRetain<CFX_BufferSeekableReadStream>( + reinterpret_cast<const unsigned char*>(inline_data.data()), + inline_data.size()), + 0); + std::unique_ptr<CPDF_Dictionary> dict = + ToDictionary(parser.GetObjectBody(nullptr)); + ASSERT(dict); + return pdfium::MakeUnique<TestLinearizedHeader>(dict.get(), 0); + } +}; + } // namespace class CPDF_HintTablesTest : public testing::Test { @@ -119,3 +150,26 @@ TEST_F(CPDF_HintTablesTest, PageAndGroupInfos) { EXPECT_EQ(10939, hint_tables->SharedGroupInfos()[5].m_szOffset); EXPECT_EQ(544u, hint_tables->SharedGroupInfos()[5].m_dwLength); } + +TEST_F(CPDF_HintTablesTest, FirstPageOffset) { + // Test that valid hint table is loaded, and have correct offset of first page + // object. + const auto linearized_header = TestLinearizedHeader::MakeHeader( + "<< /Linearized 1 /L 19326762 /H [ 123730 3816 ] /O 5932 /E 639518 /N " + "102 /T 19220281 >>"); + ASSERT_TRUE(linearized_header); + // This hint table is extracted from linearized file, generated by qpdf tool. + RetainPtr<CPDF_ReadValidator> validator = + MakeValidatorFromFile("hint_table_102p.bin"); + CPDF_SyntaxParser parser; + parser.InitParserWithValidator(validator, 0); + std::unique_ptr<CPDF_Stream> stream = ToStream(parser.GetObjectBody(nullptr)); + ASSERT_TRUE(stream); + auto hint_tables = pdfium::MakeUnique<CPDF_HintTables>( + validator.Get(), linearized_header.get()); + // Check that hint table will load. + ASSERT_TRUE(hint_tables->LoadHintStream(stream.get())); + // Check that hint table have correct first page offset. + // 127546 is predefined real value from original file. + EXPECT_EQ(127546, hint_tables->GetFirstPageObjOffset()); +} diff --git a/testing/resources/hint_table_102p.bin b/testing/resources/hint_table_102p.bin Binary files differnew file mode 100644 index 0000000000..4008b0daca --- /dev/null +++ b/testing/resources/hint_table_102p.bin |