diff options
author | Artem Strygin <art-snake@yandex-team.ru> | 2017-10-02 22:08:44 +0300 |
---|---|---|
committer | Chromium commit bot <commit-bot@chromium.org> | 2017-10-04 15:41:16 +0000 |
commit | a5fc8975c865dc3cc90de8ff46ca13fb46c13391 (patch) | |
tree | 36b38e781140fc31eeec8a55d85299911117ac2b /core/fpdfapi/parser | |
parent | 4db6e37b18648dfe2c94b672276c7bf6554fd9d4 (diff) | |
download | pdfium-a5fc8975c865dc3cc90de8ff46ca13fb46c13391.tar.xz |
Unify parsing of cross ref table v4.
We can use 'Prev' value of first-page cross-reference table trailer
for load main cross-reference table,
instead of 'T' value of Linearized header (Offset of first entry in main cross-reference table).
This is better solution, because this is allow us check entry count in main cross-ref table and
unify loading of main cross-ref table with loading methods of non linearized document.
See PDF specification:
http://wwwimages.adobe.com/content/dam/Adobe/en/devnet/pdf/pdfs/PDF32000_2008.pdf#678
(page 678, Example 3/part 3)
Change-Id: I59dcf3c73a0fb561221ded78e827e40535dbd717
Reviewed-on: https://pdfium-review.googlesource.com/13810
Commit-Queue: Art Snake <art-snake@yandex-team.ru>
Reviewed-by: dsinclair <dsinclair@chromium.org>
Diffstat (limited to 'core/fpdfapi/parser')
-rw-r--r-- | core/fpdfapi/parser/cpdf_data_avail.cpp | 13 | ||||
-rw-r--r-- | core/fpdfapi/parser/cpdf_parser.cpp | 58 | ||||
-rw-r--r-- | core/fpdfapi/parser/cpdf_parser.h | 4 | ||||
-rw-r--r-- | core/fpdfapi/parser/cpdf_parser_embeddertest.cpp | 16 |
4 files changed, 43 insertions, 48 deletions
diff --git a/core/fpdfapi/parser/cpdf_data_avail.cpp b/core/fpdfapi/parser/cpdf_data_avail.cpp index 38e857f22f..91fc8c1087 100644 --- a/core/fpdfapi/parser/cpdf_data_avail.cpp +++ b/core/fpdfapi/parser/cpdf_data_avail.cpp @@ -1171,13 +1171,22 @@ CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckLinearizedData() { return DataError; if (!m_bMainXRefLoadTried) { + ASSERT(m_pDocument->GetParser()->GetTrailer()); + const FX_SAFE_FILESIZE main_xref_offset = + m_pDocument->GetParser()->GetTrailer()->GetIntegerFor("Prev"); + if (!main_xref_offset.IsValid()) + return DataError; + + if (main_xref_offset.ValueOrDie() == 0) + return DataAvailable; + FX_SAFE_SIZE_T data_size = m_dwFileLen; - data_size -= m_pLinearized->GetLastXRefOffset(); + data_size -= main_xref_offset.ValueOrDie(); if (!data_size.IsValid()) return DataError; if (!GetValidator()->CheckDataRangeAndRequestIfUnavailable( - m_pLinearized->GetLastXRefOffset(), data_size.ValueOrDie())) + main_xref_offset.ValueOrDie(), data_size.ValueOrDie())) return DataNotAvailable; CPDF_Parser::Error eRet = diff --git a/core/fpdfapi/parser/cpdf_parser.cpp b/core/fpdfapi/parser/cpdf_parser.cpp index c443c8baf2..b6231a7a6a 100644 --- a/core/fpdfapi/parser/cpdf_parser.cpp +++ b/core/fpdfapi/parser/cpdf_parser.cpp @@ -102,8 +102,7 @@ CPDF_Parser::CPDF_Parser() m_bXRefStream(false), m_FileVersion(0), m_pEncryptDict(nullptr), - m_TrailerData(pdfium::MakeUnique<TrailerData>()), - m_dwLinearizedFirstPageXRefStartObjNum(0) {} + m_TrailerData(pdfium::MakeUnique<TrailerData>()) {} CPDF_Parser::~CPDF_Parser() { ReleaseEncryptHandler(); @@ -421,9 +420,8 @@ bool CPDF_Parser::LoadAllCrossRefV4(FX_FILESIZE xrefpos) { return true; } -bool CPDF_Parser::LoadLinearizedAllCrossRefV4(FX_FILESIZE xrefpos, - uint32_t dwObjCount) { - if (!LoadLinearizedCrossRefV4(xrefpos, dwObjCount)) +bool CPDF_Parser::LoadLinearizedAllCrossRefV4(FX_FILESIZE xrefpos) { + if (!LoadCrossRefV4(xrefpos, false)) return false; std::unique_ptr<CPDF_Dictionary> trailer = LoadTrailerV4(); @@ -477,18 +475,6 @@ bool CPDF_Parser::LoadLinearizedAllCrossRefV4(FX_FILESIZE xrefpos, return true; } -bool CPDF_Parser::LoadLinearizedCrossRefV4(FX_FILESIZE pos, - uint32_t dwObjCount) { - FX_FILESIZE dwStartPos = pos - m_pSyntax->m_HeaderOffset; - - m_pSyntax->SetPos(dwStartPos); - std::vector<CrossRefObjData> objects; - if (!ParseAndAppendCrossRefSubsectionData(0, dwObjCount, &objects)) - return false; - MergeCrossRefObjectsData(objects); - return true; -} - bool CPDF_Parser::ParseAndAppendCrossRefSubsectionData( uint32_t start_objnum, uint32_t count, @@ -1318,9 +1304,10 @@ bool CPDF_Parser::ParseLinearizedHeader() { if (!m_pLinearized) return false; - m_LastXRefOffset = m_pLinearized->GetLastXRefOffset(); // Move parser onto first page xref table start. m_pSyntax->GetNextWord(nullptr); + + m_LastXRefOffset = m_pSyntax->GetPos(); return true; } @@ -1340,7 +1327,7 @@ CPDF_Parser::Error CPDF_Parser::StartLinearizedParse( m_bHasParsed = true; m_pDocument = pDocument; - FX_FILESIZE dwFirstXRefOffset = m_pSyntax->GetPos(); + FX_FILESIZE dwFirstXRefOffset = m_LastXRefOffset; bool bXRefRebuilt = false; bool bLoadV4 = LoadCrossRefV4(dwFirstXRefOffset, false); if (!bLoadV4 && !LoadCrossRefV5(&dwFirstXRefOffset, true)) { @@ -1350,8 +1337,6 @@ CPDF_Parser::Error CPDF_Parser::StartLinearizedParse( bXRefRebuilt = true; m_LastXRefOffset = 0; } - m_dwLinearizedFirstPageXRefStartObjNum = - m_ObjectInfo.empty() ? 0 : m_ObjectInfo.begin()->first; if (bLoadV4) { std::unique_ptr<CPDF_Dictionary> trailer = LoadTrailerV4(); if (!trailer) @@ -1423,33 +1408,20 @@ bool CPDF_Parser::LoadLinearizedAllCrossRefV5(FX_FILESIZE xrefpos) { } CPDF_Parser::Error CPDF_Parser::LoadLinearizedMainXRefTable() { + const FX_SAFE_FILESIZE main_xref_offset = GetTrailer()->GetIntegerFor("Prev"); + if (!main_xref_offset.IsValid()) + return FORMAT_ERROR; + + if (main_xref_offset.ValueOrDie() == 0) + return SUCCESS; + const AutoRestorer<uint32_t> save_metadata_objnum(&m_MetadataObjnum); m_MetadataObjnum = 0; - m_pSyntax->SetPos(m_LastXRefOffset - m_pSyntax->m_HeaderOffset); - - uint8_t ch = 0; - uint32_t dwCount = 0; - m_pSyntax->GetNextChar(ch); - while (PDFCharIsWhitespace(ch)) { - ++dwCount; - if (m_pSyntax->m_FileLen <= - (FX_FILESIZE)(m_pSyntax->GetPos() + m_pSyntax->m_HeaderOffset)) { - break; - } - if (!m_pSyntax->GetNextChar(ch)) - return HANDLER_ERROR; - } - m_LastXRefOffset += dwCount; m_ObjectStreamMap.clear(); m_ObjCache.clear(); - // In linearized document, the main cross ref always should start from 0 - // objnum. - // And should have count equals to first obj number of first page cross ref - // table. - if (!LoadLinearizedAllCrossRefV4(m_LastXRefOffset, - m_dwLinearizedFirstPageXRefStartObjNum) && - !LoadLinearizedAllCrossRefV5(m_LastXRefOffset)) { + if (!LoadLinearizedAllCrossRefV4(main_xref_offset.ValueOrDie()) && + !LoadLinearizedAllCrossRefV5(main_xref_offset.ValueOrDie())) { m_LastXRefOffset = 0; return FORMAT_ERROR; } diff --git a/core/fpdfapi/parser/cpdf_parser.h b/core/fpdfapi/parser/cpdf_parser.h index 7c6a75d8c5..96bd6f43a9 100644 --- a/core/fpdfapi/parser/cpdf_parser.h +++ b/core/fpdfapi/parser/cpdf_parser.h @@ -162,8 +162,7 @@ class CPDF_Parser { std::unique_ptr<CPDF_Dictionary> LoadTrailerV4(); Error SetEncryptHandler(); void ReleaseEncryptHandler(); - bool LoadLinearizedAllCrossRefV4(FX_FILESIZE pos, uint32_t dwObjCount); - bool LoadLinearizedCrossRefV4(FX_FILESIZE pos, uint32_t dwObjCount); + bool LoadLinearizedAllCrossRefV4(FX_FILESIZE pos); bool LoadLinearizedAllCrossRefV5(FX_FILESIZE pos); Error LoadLinearizedMainXRefTable(); RetainPtr<CPDF_StreamAcc> GetObjectStream(uint32_t number); @@ -207,7 +206,6 @@ class CPDF_Parser { ByteString m_Password; std::unique_ptr<TrailerData> m_TrailerData; std::unique_ptr<CPDF_LinearizedHeader> m_pLinearized; - uint32_t m_dwLinearizedFirstPageXRefStartObjNum; // A map of object numbers to indirect streams. std::map<uint32_t, RetainPtr<CPDF_StreamAcc>> m_ObjectStreamMap; diff --git a/core/fpdfapi/parser/cpdf_parser_embeddertest.cpp b/core/fpdfapi/parser/cpdf_parser_embeddertest.cpp index 99bc2c2d42..6aa3e2785a 100644 --- a/core/fpdfapi/parser/cpdf_parser_embeddertest.cpp +++ b/core/fpdfapi/parser/cpdf_parser_embeddertest.cpp @@ -57,3 +57,19 @@ TEST_F(CPDFParserEmbeddertest, Bug_602650) { TEST_F(CPDFParserEmbeddertest, Bug_757705) { EXPECT_TRUE(OpenDocument("bug_757705.pdf")); } + +TEST_F(CPDFParserEmbeddertest, LoadMainCrossRefTable) { + EXPECT_TRUE(OpenDocument("feature_linearized_loading.pdf", nullptr, true)); + // To check that main cross ref table is loaded correctly,will be enough to + // check that the second page was correctly loaded. Because it is contains + // crossrefs for second page. + EXPECT_EQ(2, GetPageCount()); + FPDF_PAGE page = LoadPage(1); + EXPECT_NE(nullptr, page); + FPDF_TEXTPAGE text_page = FPDFText_LoadPage(page); + EXPECT_NE(nullptr, text_page); + // The page should not be blank. + EXPECT_LT(0, FPDFText_CountChars(text_page)); + FPDFText_ClosePage(text_page); + UnloadPage(page); +} |