From 626c2a528fdbb53ddc6fede8ce879f56bfe87716 Mon Sep 17 00:00:00 2001 From: Artem Strygin Date: Thu, 2 Nov 2017 19:59:38 +0000 Subject: Refactoring of cross refs availability check. Use CPDF_CrossRefAvail to check crossrefs. Change-Id: Ia333cff4e86eaab5bad17424c1bb8ef9bdbca8ff Reviewed-on: https://pdfium-review.googlesource.com/15510 Commit-Queue: Art Snake Reviewed-by: dsinclair --- core/fpdfapi/parser/cpdf_cross_ref_avail.cpp | 211 +++++++++++++ core/fpdfapi/parser/cpdf_cross_ref_avail.h | 56 ++++ .../parser/cpdf_cross_ref_avail_unittest.cpp | 339 +++++++++++++++++++++ core/fpdfapi/parser/cpdf_data_avail.cpp | 154 +++------- core/fpdfapi/parser/cpdf_data_avail.h | 18 +- 5 files changed, 644 insertions(+), 134 deletions(-) create mode 100644 core/fpdfapi/parser/cpdf_cross_ref_avail.cpp create mode 100644 core/fpdfapi/parser/cpdf_cross_ref_avail.h create mode 100644 core/fpdfapi/parser/cpdf_cross_ref_avail_unittest.cpp (limited to 'core') diff --git a/core/fpdfapi/parser/cpdf_cross_ref_avail.cpp b/core/fpdfapi/parser/cpdf_cross_ref_avail.cpp new file mode 100644 index 0000000000..be9818ae21 --- /dev/null +++ b/core/fpdfapi/parser/cpdf_cross_ref_avail.cpp @@ -0,0 +1,211 @@ +// Copyright 2017 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "core/fpdfapi/parser/cpdf_cross_ref_avail.h" + +#include +#include + +#include "core/fpdfapi/parser/cpdf_dictionary.h" +#include "core/fpdfapi/parser/cpdf_name.h" +#include "core/fpdfapi/parser/cpdf_read_validator.h" +#include "core/fpdfapi/parser/cpdf_reference.h" +#include "core/fpdfapi/parser/cpdf_syntax_parser.h" +#include "core/fpdfapi/parser/fpdf_parser_utility.h" + +namespace { + +constexpr char kCrossRefKeyword[] = "xref"; +constexpr char kTrailerKeyword[] = "trailer"; +constexpr char kPrevCrossRefFieldKey[] = "Prev"; +constexpr char kTypeFieldKey[] = "Type"; +constexpr char kPrevCrossRefStreamOffsetFieldKey[] = "XRefStm"; +constexpr char kXRefKeyword[] = "XRef"; +constexpr char kEncryptKey[] = "Encrypt"; + +} // namespace + +CPDF_CrossRefAvail::CPDF_CrossRefAvail(CPDF_SyntaxParser* parser, + FX_FILESIZE last_crossref_offset) + : parser_(parser), last_crossref_offset_(last_crossref_offset) { + ASSERT(parser_); + AddCrossRefForCheck(last_crossref_offset); +} + +CPDF_CrossRefAvail::~CPDF_CrossRefAvail() {} + +CPDF_DataAvail::DocAvailStatus CPDF_CrossRefAvail::CheckAvail() { + if (current_status_ == CPDF_DataAvail::DataAvailable) + return CPDF_DataAvail::DataAvailable; + + const CPDF_ReadValidator::Session read_session(GetValidator().Get()); + while (true) { + bool check_result = false; + switch (current_state_) { + case State::kCrossRefCheck: + check_result = CheckCrossRef(); + break; + case State::kCrossRefV4ItemCheck: + check_result = CheckCrossRefV4Item(); + break; + case State::kCrossRefV4TrailerCheck: + check_result = CheckCrossRefV4Trailer(); + break; + case State::kDone: + break; + default: { + current_status_ = CPDF_DataAvail::DataError; + NOTREACHED(); + break; + } + } + if (!check_result) + break; + + ASSERT(!GetValidator()->has_read_problems()); + } + return current_status_; +} + +bool CPDF_CrossRefAvail::CheckReadProblems() { + if (GetValidator()->read_error()) { + current_status_ = CPDF_DataAvail::DataError; + return true; + } + return GetValidator()->has_unavailable_data(); +} + +bool CPDF_CrossRefAvail::CheckCrossRef() { + if (cross_refs_for_check_.empty()) { + // All cross refs were checked. + current_state_ = State::kDone; + current_status_ = CPDF_DataAvail::DataAvailable; + return true; + } + parser_->SetPos(cross_refs_for_check_.front()); + + const ByteString first_word = parser_->PeekNextWord(nullptr); + if (CheckReadProblems()) + return false; + + const bool result = (first_word == kCrossRefKeyword) ? CheckCrossRefV4() + : CheckCrossRefStream(); + + if (result) + cross_refs_for_check_.pop(); + + return result; +} + +bool CPDF_CrossRefAvail::CheckCrossRefV4() { + const ByteString keyword = parser_->GetKeyword(); + if (CheckReadProblems()) + return false; + + if (keyword != kCrossRefKeyword) { + current_status_ = CPDF_DataAvail::DataError; + return false; + } + + current_state_ = State::kCrossRefV4ItemCheck; + current_offset_ = parser_->GetPos(); + return true; +} + +bool CPDF_CrossRefAvail::CheckCrossRefV4Item() { + parser_->SetPos(current_offset_); + const ByteString keyword = parser_->GetKeyword(); + if (CheckReadProblems()) + return false; + + if (keyword.IsEmpty()) { + current_status_ = CPDF_DataAvail::DataError; + return false; + } + + if (keyword == kTrailerKeyword) + current_state_ = State::kCrossRefV4TrailerCheck; + + // Go to next item. + current_offset_ = parser_->GetPos(); + return true; +} + +bool CPDF_CrossRefAvail::CheckCrossRefV4Trailer() { + parser_->SetPos(current_offset_); + + std::unique_ptr trailer = + ToDictionary(parser_->GetObjectBody(nullptr)); + if (CheckReadProblems()) + return false; + + if (!trailer) { + current_status_ = CPDF_DataAvail::DataError; + return false; + } + + if (ToReference(trailer->GetObjectFor(kEncryptKey))) { + current_status_ = CPDF_DataAvail::DataError; + return false; + } + + const int32_t xrefpos = + GetDirectInteger(trailer.get(), kPrevCrossRefFieldKey); + if (xrefpos && + pdfium::base::IsValueInRangeForNumericType(xrefpos)) + AddCrossRefForCheck(static_cast(xrefpos)); + + const int32_t stream_xref_offset = + GetDirectInteger(trailer.get(), kPrevCrossRefStreamOffsetFieldKey); + if (stream_xref_offset && + pdfium::base::IsValueInRangeForNumericType( + stream_xref_offset)) + AddCrossRefForCheck(static_cast(stream_xref_offset)); + + // Goto check next crossref + current_state_ = State::kCrossRefCheck; + return true; +} + +bool CPDF_CrossRefAvail::CheckCrossRefStream() { + auto cross_ref = + parser_->GetIndirectObject(nullptr, CPDF_SyntaxParser::ParseType::kLoose); + if (CheckReadProblems()) + return false; + + const CPDF_Dictionary* trailer = + cross_ref && cross_ref->IsStream() ? cross_ref->GetDict() : nullptr; + if (!trailer) { + current_status_ = CPDF_DataAvail::DataError; + return false; + } + + if (ToReference(trailer->GetObjectFor(kEncryptKey))) { + current_status_ = CPDF_DataAvail::DataError; + return false; + } + + CPDF_Name* type_name = ToName(trailer->GetObjectFor(kTypeFieldKey)); + if (type_name && type_name->GetString() == kXRefKeyword) { + const int32_t xrefpos = trailer->GetIntegerFor(kPrevCrossRefFieldKey); + if (xrefpos && + pdfium::base::IsValueInRangeForNumericType(xrefpos)) + AddCrossRefForCheck(static_cast(xrefpos)); + } + // Goto check next crossref + current_state_ = State::kCrossRefCheck; + return true; +} + +void CPDF_CrossRefAvail::AddCrossRefForCheck(FX_FILESIZE crossref_offset) { + if (registered_crossrefs_.count(crossref_offset)) + return; + + cross_refs_for_check_.push(crossref_offset); + registered_crossrefs_.insert(crossref_offset); +} + +fxcrt::RetainPtr CPDF_CrossRefAvail::GetValidator() { + return parser_->GetValidator(); +} diff --git a/core/fpdfapi/parser/cpdf_cross_ref_avail.h b/core/fpdfapi/parser/cpdf_cross_ref_avail.h new file mode 100644 index 0000000000..aad58f35e7 --- /dev/null +++ b/core/fpdfapi/parser/cpdf_cross_ref_avail.h @@ -0,0 +1,56 @@ +// Copyright 2017 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CORE_FPDFAPI_PARSER_CPDF_CROSS_REF_AVAIL_H_ +#define CORE_FPDFAPI_PARSER_CPDF_CROSS_REF_AVAIL_H_ + +#include +#include +#include + +#include "core/fpdfapi/parser/cpdf_data_avail.h" +#include "core/fxcrt/unowned_ptr.h" + +class CPDF_SyntaxParser; + +class CPDF_CrossRefAvail { + public: + CPDF_CrossRefAvail(CPDF_SyntaxParser* parser, + FX_FILESIZE last_crossref_offset); + ~CPDF_CrossRefAvail(); + + FX_FILESIZE last_crossref_offset() const { return last_crossref_offset_; } + + CPDF_DataAvail::DocAvailStatus CheckAvail(); + + private: + enum class State { + kCrossRefCheck, + kCrossRefV4ItemCheck, + kCrossRefV4TrailerCheck, + kDone, + }; + + bool CheckReadProblems(); + bool CheckCrossRef(); + bool CheckCrossRefV4(); + bool CheckCrossRefV4Item(); + bool CheckCrossRefV4Trailer(); + bool CheckCrossRefStream(); + + void AddCrossRefForCheck(FX_FILESIZE crossref_offset); + + fxcrt::RetainPtr GetValidator(); + + fxcrt::UnownedPtr parser_; + const FX_FILESIZE last_crossref_offset_ = 0; + CPDF_DataAvail::DocAvailStatus current_status_ = + CPDF_DataAvail::DataNotAvailable; + State current_state_ = State::kCrossRefCheck; + FX_FILESIZE current_offset_ = 0; + std::queue cross_refs_for_check_; + std::set registered_crossrefs_; +}; + +#endif // CORE_FPDFAPI_PARSER_CPDF_CROSS_REF_AVAIL_H_ diff --git a/core/fpdfapi/parser/cpdf_cross_ref_avail_unittest.cpp b/core/fpdfapi/parser/cpdf_cross_ref_avail_unittest.cpp new file mode 100644 index 0000000000..b798e17377 --- /dev/null +++ b/core/fpdfapi/parser/cpdf_cross_ref_avail_unittest.cpp @@ -0,0 +1,339 @@ +// Copyright 2017 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "core/fpdfapi/parser/cpdf_cross_ref_avail.h" + +#include +#include + +#include "core/fpdfapi/parser/cpdf_syntax_parser.h" +#include "testing/fx_string_testhelpers.h" +#include "testing/gtest/include/gtest/gtest.h" +#include "third_party/base/ptr_util.h" + +namespace { + +std::unique_ptr MakeParserForBuffer( + const unsigned char* buffer, + size_t buffer_size) { + auto parser = pdfium::MakeUnique(); + parser->InitParser( + pdfium::MakeRetain(buffer, buffer_size), 0); + return parser; +} + +} // namespace + +TEST(CPDF_CrossRefAvailTest, CheckCrossRefV4) { + const unsigned char xref_table[] = + "xref \n" + "0 6 \n" + "0000000003 65535 f \n" + "0000000017 00000 n \n" + "0000000081 00000 n \n" + "0000000000 00007 f \n" + "0000000331 00000 n \n" + "0000000409 00000 n \n" + "trailer\n" + "<<4f9bb2e7978401808f8f1f2a75c322c8>]" + "/Info 15 0 R/Size 16>>"; + const FX_FILESIZE last_crossref_offset = 0; + + auto parser = MakeParserForBuffer(xref_table, FX_ArraySize(xref_table)); + auto cross_ref_avail = pdfium::MakeUnique( + parser.get(), last_crossref_offset); + + EXPECT_EQ(CPDF_DataAvail::DataAvailable, cross_ref_avail->CheckAvail()); +} + +TEST(CPDF_CrossRefAvailTest, CheckCrossRefStream) { + const unsigned char xref_stream[] = + "16 0 obj\n" + "<>" + " stream \n" + "STREAM DATA STREAM DATA STREAM DATA\n" + "endstream\n" + "endobj\n"; + const FX_FILESIZE last_crossref_offset = 0; + + auto parser = MakeParserForBuffer(xref_stream, FX_ArraySize(xref_stream)); + auto cross_ref_avail = pdfium::MakeUnique( + parser.get(), last_crossref_offset); + + EXPECT_EQ(CPDF_DataAvail::DataAvailable, cross_ref_avail->CheckAvail()); +} + +TEST(CPDF_CrossRefAvailTest, IncorrectStartOffset) { + const unsigned char xref_stream[] = + "16 0 obj\n" + "<>" + " stream \n" + "STREAM DATA STREAM DATA STREAM DATA\n" + "endstream\n" + "endobj\n"; + + const FX_FILESIZE last_crossref_offset = 70000; + + auto parser = MakeParserForBuffer(xref_stream, FX_ArraySize(xref_stream)); + auto cross_ref_avail = pdfium::MakeUnique( + parser.get(), last_crossref_offset); + + EXPECT_EQ(CPDF_DataAvail::DataError, cross_ref_avail->CheckAvail()); +} + +TEST(CPDF_CrossRefAvailTest, IncorrectPrevOffset) { + const unsigned char xref_stream[] = + "16 0 obj\n" + "<>" + " stream \n" + "STREAM DATA STREAM DATA STREAM DATA\n" + "endstream\n" + "endobj\n"; + const FX_FILESIZE last_crossref_offset = 0; + + auto parser = MakeParserForBuffer(xref_stream, FX_ArraySize(xref_stream)); + auto cross_ref_avail = pdfium::MakeUnique( + parser.get(), last_crossref_offset); + EXPECT_EQ(CPDF_DataAvail::DataError, cross_ref_avail->CheckAvail()); +} + +TEST(CPDF_CrossRefAvailTest, IncorrectPrevStreamOffset) { + const unsigned char xref_table[] = + "xref \n" + "0 6 \n" + "0000000003 65535 f \n" + "0000000017 00000 n \n" + "0000000081 00000 n \n" + "0000000000 00007 f \n" + "0000000331 00000 n \n" + "0000000409 00000 n \n" + "trailer\n" + "<<4f9bb2e7978401808f8f1f2a75c322c8>]" + "/Info 15 0 R/Size 16 /XRefStm 70000>>"; + const FX_FILESIZE last_crossref_offset = 0; + + auto parser = MakeParserForBuffer(xref_table, FX_ArraySize(xref_table)); + auto cross_ref_avail = pdfium::MakeUnique( + parser.get(), last_crossref_offset); + EXPECT_EQ(CPDF_DataAvail::DataError, cross_ref_avail->CheckAvail()); +} + +TEST(CPDF_CrossRefAvailTest, IncorrectData) { + const unsigned char incorrect_data[] = + "fiajaoilf w9ifaoihwoiafhja wfijaofijoiaw fhj oiawhfoiah " + "wfoihoiwfghouiafghwoigahfi"; + const FX_FILESIZE last_crossref_offset = 0; + + auto parser = + MakeParserForBuffer(incorrect_data, FX_ArraySize(incorrect_data)); + auto cross_ref_avail = pdfium::MakeUnique( + parser.get(), last_crossref_offset); + EXPECT_EQ(CPDF_DataAvail::DataError, cross_ref_avail->CheckAvail()); +} + +TEST(CPDF_CrossRefAvailTest, ThreeCrossRefV4) { + char int_buffer[100]; + int prev_offset = 0; + int cur_offset = 0; + std::string table = "pdf blah blah blah\n"; + prev_offset = cur_offset; + cur_offset = static_cast(table.size()); + table += + "xref \n" + "0 6 \n" + "0000000003 65535 f \n" + "trailer\n" + "<<4f9bb2e7978401808f8f1f2a75c322c8>]" + "/Info 15 0 R/Size 16>>\n"; + table += "Dummy Data jgwhughouiwbahng"; + prev_offset = cur_offset; + cur_offset = static_cast(table.size()); + table += std::string( + "xref \n" + "0 6 \n" + "0000000003 65535 f \n" + "trailer\n" + "<<" + "4f9bb2e7978401808f8f1f2a75c322c8>]" + "/Info 15 0 R/Size 16" + "/Prev ") + + FXSYS_itoa(prev_offset, int_buffer, 10) + ">>\n"; + table += "More Dummy Data jgwhughouiwbahng"; + prev_offset = cur_offset; + cur_offset = static_cast(table.size()); + table += std::string( + "xref \n" + "0 6 \n" + "0000000003 65535 f \n" + "trailer\n" + "<<" + "4f9bb2e7978401808f8f1f2a75c322c8>]" + "/Info 15 0 R/Size 16" + "/Prev ") + + FXSYS_itoa(prev_offset, int_buffer, 10) + ">>\n"; + const FX_FILESIZE last_crossref_offset = cur_offset; + + auto parser = MakeParserForBuffer( + reinterpret_cast(table.data()), table.size()); + auto cross_ref_avail = pdfium::MakeUnique( + parser.get(), last_crossref_offset); + EXPECT_EQ(CPDF_DataAvail::DataAvailable, cross_ref_avail->CheckAvail()); +} + +TEST(CPDF_CrossRefAvailTest, ThreeCrossRefV5) { + char int_buffer[100]; + int prev_offset = 0; + int cur_offset = 0; + std::string table = "pdf blah blah blah\n"; + prev_offset = cur_offset; + cur_offset = static_cast(table.size()); + table += + "16 0 obj\n" + "<>" + " stream \n" + "STREAM DATA STREAM DATA STREAM DATA ahfcuabfkuabfu\n" + "endstream\n" + "endobj\n"; + table += "Dummy Data jgwhughouiwbahng"; + + prev_offset = cur_offset; + cur_offset = static_cast(table.size()); + table += std::string( + "55 0 obj\n" + "<>" + " stream \n" + "STREAM DATA STREAM DATA STREAM DATA\n" + "endstream\n" + "endobj\n"; + table += "More Dummy Data jgwhughouiwbahng"; + prev_offset = cur_offset; + cur_offset = static_cast(table.size()); + table += std::string( + "88 0 obj\n" + "<>" + " stream \n" + "STREAM DATA STREAM DATA STREAM DATA favav\n" + "endstream\n" + "endobj\n"; + const FX_FILESIZE last_crossref_offset = cur_offset; + + auto parser = MakeParserForBuffer( + reinterpret_cast(table.data()), table.size()); + auto cross_ref_avail = pdfium::MakeUnique( + parser.get(), last_crossref_offset); + EXPECT_EQ(CPDF_DataAvail::DataAvailable, cross_ref_avail->CheckAvail()); +} + +TEST(CPDF_CrossRefAvailTest, Mixed) { + char int_buffer[100]; + std::string table = "pdf blah blah blah\n"; + + const int first_v5_table_offset = static_cast(table.size()); + table += + "16 0 obj\n" + "<>" + " stream \n" + "STREAM DATA STREAM DATA STREAM DATA ahfcuabfkuabfu\n" + "endstream\n" + "endobj\n"; + table += "Dummy Data jgwhughouiwbahng"; + + const int second_v4_table_offset = static_cast(table.size()); + table += std::string( + "xref \n" + "0 6 \n" + "0000000003 65535 f \n" + "trailer\n" + "<<" + "4f9bb2e7978401808f8f1f2a75c322c8>]" + "/Info 15 0 R/Size 16" + "/Prev ") + + FXSYS_itoa(first_v5_table_offset, int_buffer, 10) + ">>\n"; + table += "More Dummy Data jgwhughouiwbahng"; + + const int last_v4_table_offset = static_cast(table.size()); + table += std::string( + "xref \n" + "0 6 \n" + "0000000003 65535 f \n" + "trailer\n" + "<<" + "4f9bb2e7978401808f8f1f2a75c322c8>]" + "/Info 15 0 R/Size 16" + "/Prev ") + + FXSYS_itoa(second_v4_table_offset, int_buffer, 10) + " /XRefStm " + + FXSYS_itoa(first_v5_table_offset, int_buffer, 10) + ">>\n"; + const FX_FILESIZE last_crossref_offset = last_v4_table_offset; + + auto parser = MakeParserForBuffer( + reinterpret_cast(table.data()), table.size()); + auto cross_ref_avail = pdfium::MakeUnique( + parser.get(), last_crossref_offset); + EXPECT_EQ(CPDF_DataAvail::DataAvailable, cross_ref_avail->CheckAvail()); +} + +TEST(CPDF_CrossRefAvailTest, CrossRefV5IsNotStream) { + const unsigned char invalid_xref_stream[] = + "16 0 obj\n" + "[/array /object]\n" + "endstream\n" + "endobj\n"; + const FX_FILESIZE last_crossref_offset = 0; + + auto parser = MakeParserForBuffer(invalid_xref_stream, + FX_ArraySize(invalid_xref_stream)); + auto cross_ref_avail = pdfium::MakeUnique( + parser.get(), last_crossref_offset); + EXPECT_EQ(CPDF_DataAvail::DataError, cross_ref_avail->CheckAvail()); +} + +TEST(CPDF_CrossRefAvailTest, CrossRefV4WithEncryptRef) { + const unsigned char xref_table[] = + "xref \n" + "0 6 \n" + "0000000003 65535 f \n" + "0000000017 00000 n \n" + "0000000081 00000 n \n" + "0000000000 00007 f \n" + "0000000331 00000 n \n" + "0000000409 00000 n \n" + "trailer\n" + "<<4f9bb2e7978401808f8f1f2a75c322c8>]" + "/Encrypt 77 0 R" + "/Info 15 0 R/Size 16>>"; + const FX_FILESIZE last_crossref_offset = 0; + + auto parser = MakeParserForBuffer(xref_table, FX_ArraySize(xref_table)); + auto cross_ref_avail = pdfium::MakeUnique( + parser.get(), last_crossref_offset); + EXPECT_EQ(CPDF_DataAvail::DataError, cross_ref_avail->CheckAvail()); +} + +TEST(CPDF_CrossRefAvailTest, CrossRefStreamWithEncryptRef) { + const unsigned char xref_stream[] = + "16 0 obj\n" + "<>" + " stream \n" + "STREAM DATA STREAM DATA STREAM DATA\n" + "endstream\n" + "endobj\n"; + const FX_FILESIZE last_crossref_offset = 0; + + auto parser = MakeParserForBuffer(xref_stream, FX_ArraySize(xref_stream)); + auto cross_ref_avail = pdfium::MakeUnique( + parser.get(), last_crossref_offset); + EXPECT_EQ(CPDF_DataAvail::DataError, cross_ref_avail->CheckAvail()); +} diff --git a/core/fpdfapi/parser/cpdf_data_avail.cpp b/core/fpdfapi/parser/cpdf_data_avail.cpp index 61927888cb..e50a7f6369 100644 --- a/core/fpdfapi/parser/cpdf_data_avail.cpp +++ b/core/fpdfapi/parser/cpdf_data_avail.cpp @@ -12,6 +12,7 @@ #include "core/fpdfapi/cpdf_modulemgr.h" #include "core/fpdfapi/parser/cpdf_array.h" +#include "core/fpdfapi/parser/cpdf_cross_ref_avail.h" #include "core/fpdfapi/parser/cpdf_dictionary.h" #include "core/fpdfapi/parser/cpdf_document.h" #include "core/fpdfapi/parser/cpdf_hint_tables.h" @@ -83,18 +84,12 @@ CPDF_DataAvail::CPDF_DataAvail( m_pFileRead = pdfium::MakeRetain(pFileRead, m_pFileAvail); m_Pos = 0; m_dwFileLen = m_pFileRead->GetSize(); - m_dwCurrentOffset = 0; - m_dwXRefOffset = 0; - m_dwTrailerOffset = 0; m_bufferOffset = 0; m_bufferSize = 0; m_PagesObjNum = 0; - m_dwCurrentXRefSteam = 0; m_dwInfoObjNum = 0; m_pDocument = 0; m_dwEncryptObjNum = 0; - m_dwPrevXRefOffset = 0; - m_dwLastXRefOffset = 0; m_bDocAvail = false; m_bMainXRefLoadTried = false; m_bDocAvail = false; @@ -142,16 +137,8 @@ bool CPDF_DataAvail::CheckDocStatus() { return CheckFirstPage(); case PDF_DATAAVAIL_HINTTABLE: return CheckHintTables(); - case PDF_DATAAVAIL_END: - return CheckEnd(); - case PDF_DATAAVAIL_CROSSREF: - return CheckCrossRef(); - case PDF_DATAAVAIL_CROSSREF_ITEM: - return CheckCrossRefItem(); - case PDF_DATAAVAIL_TRAILER: - return CheckTrailer(); case PDF_DATAAVAIL_LOADALLCROSSREF: - return LoadAllXref(); + return CheckAndLoadAllXref(); case PDF_DATAAVAIL_LOADALLFILE: return LoadAllFile(); case PDF_DATAAVAIL_ROOT: @@ -200,9 +187,37 @@ bool CPDF_DataAvail::LoadAllFile() { return false; } -bool CPDF_DataAvail::LoadAllXref() { - if (!m_parser.LoadAllCrossRefV4(m_dwLastXRefOffset) && - !m_parser.LoadAllCrossRefV5(m_dwLastXRefOffset)) { +bool CPDF_DataAvail::CheckAndLoadAllXref() { + if (!m_pCrossRefAvail) { + const CPDF_ReadValidator::Session read_session(GetValidator().Get()); + const FX_FILESIZE last_xref_offset = m_parser.ParseStartXRef(); + if (GetValidator()->has_read_problems()) + return false; + + if (last_xref_offset <= 0) { + m_docStatus = PDF_DATAAVAIL_ERROR; + return false; + } + + m_pCrossRefAvail = pdfium::MakeUnique(GetSyntaxParser(), + last_xref_offset); + } + + switch (m_pCrossRefAvail->CheckAvail()) { + case DocAvailStatus::DataAvailable: + break; + case DocAvailStatus::DataNotAvailable: + return false; + case DocAvailStatus::DataError: + m_docStatus = PDF_DATAAVAIL_ERROR; + return false; + default: + NOTREACHED(); + return false; + } + + if (!m_parser.LoadAllCrossRefV4(m_pCrossRefAvail->last_crossref_offset()) && + !m_parser.LoadAllCrossRefV5(m_pCrossRefAvail->last_crossref_offset())) { m_docStatus = PDF_DATAAVAIL_LOADALLFILE; return false; } @@ -415,7 +430,8 @@ bool CPDF_DataAvail::CheckPages() { bool CPDF_DataAvail::CheckHeader() { switch (CheckHeaderAndLinearized()) { case DocAvailStatus::DataAvailable: - m_docStatus = m_pLinearized ? PDF_DATAAVAIL_FIRSTPAGE : PDF_DATAAVAIL_END; + m_docStatus = m_pLinearized ? PDF_DATAAVAIL_FIRSTPAGE + : PDF_DATAAVAIL_LOADALLCROSSREF; return true; case DocAvailStatus::DataNotAvailable: return false; @@ -531,25 +547,6 @@ CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckHeaderAndLinearized() { return DocAvailStatus::DataAvailable; } -bool CPDF_DataAvail::CheckEnd() { - const CPDF_ReadValidator::Session read_session(GetValidator().Get()); - const FX_FILESIZE last_xref_offset = m_parser.ParseStartXRef(); - - if (GetValidator()->has_read_problems()) - return false; - - m_dwLastXRefOffset = last_xref_offset; - m_dwXRefOffset = last_xref_offset; - SetStartOffset(last_xref_offset); - m_docStatus = - (last_xref_offset > 0) ? PDF_DATAAVAIL_CROSSREF : PDF_DATAAVAIL_ERROR; - return true; -} - -void CPDF_DataAvail::SetStartOffset(FX_FILESIZE dwOffset) { - m_Pos = dwOffset; -} - bool CPDF_DataAvail::GetNextToken(ByteString* token) { uint8_t ch; if (!GetNextChar(ch)) @@ -653,87 +650,6 @@ bool CPDF_DataAvail::GetNextChar(uint8_t& ch) { return true; } -bool CPDF_DataAvail::CheckCrossRefItem() { - ByteString token; - while (1) { - const CPDF_ReadValidator::Session read_session(GetValidator().Get()); - if (!GetNextToken(&token)) { - if (!GetValidator()->has_read_problems()) - m_docStatus = PDF_DATAAVAIL_ERROR; - return false; - } - - if (token == "trailer") { - m_dwTrailerOffset = m_Pos; - m_docStatus = PDF_DATAAVAIL_TRAILER; - return true; - } - } -} - -bool CPDF_DataAvail::CheckCrossRef() { - const CPDF_ReadValidator::Session read_session(GetValidator().Get()); - ByteString token; - if (!GetNextToken(&token)) { - if (!GetValidator()->has_read_problems()) - m_docStatus = PDF_DATAAVAIL_ERROR; - return false; - } - - if (token != "xref") { - m_docStatus = PDF_DATAAVAIL_LOADALLFILE; - return true; - } - - m_docStatus = PDF_DATAAVAIL_CROSSREF_ITEM; - return true; -} - -bool CPDF_DataAvail::CheckTrailer() { - const CPDF_ReadValidator::Session read_session(GetValidator().Get()); - GetSyntaxParser()->SetPos(m_dwTrailerOffset); - const std::unique_ptr pTrailer = - GetSyntaxParser()->GetObjectBody(nullptr); - if (!pTrailer) { - if (!GetValidator()->has_read_problems()) - m_docStatus = PDF_DATAAVAIL_ERROR; - return false; - } - - if (!pTrailer->IsDictionary()) - return false; - - CPDF_Dictionary* pTrailerDict = pTrailer->GetDict(); - CPDF_Object* pEncrypt = pTrailerDict->GetObjectFor("Encrypt"); - if (ToReference(pEncrypt)) { - m_docStatus = PDF_DATAAVAIL_LOADALLFILE; - return true; - } - - // Prevent infinite-looping between Prev entries. - uint32_t xrefpos = GetDirectInteger(pTrailerDict, "Prev"); - if (!xrefpos || !m_SeenPrevPositions.insert(xrefpos).second) { - m_dwPrevXRefOffset = 0; - m_docStatus = PDF_DATAAVAIL_LOADALLCROSSREF; - return true; - } - - m_dwPrevXRefOffset = GetDirectInteger(pTrailerDict, "XRefStm"); - if (m_dwPrevXRefOffset) { - m_docStatus = PDF_DATAAVAIL_LOADALLFILE; - return true; - } - - m_dwPrevXRefOffset = xrefpos; - if (m_dwPrevXRefOffset >= m_dwFileLen) { - m_docStatus = PDF_DATAAVAIL_LOADALLFILE; - } else { - SetStartOffset(m_dwPrevXRefOffset); - m_docStatus = PDF_DATAAVAIL_CROSSREF; - } - return true; -} - bool CPDF_DataAvail::CheckPage(uint32_t dwPage) { while (true) { switch (m_docStatus) { diff --git a/core/fpdfapi/parser/cpdf_data_avail.h b/core/fpdfapi/parser/cpdf_data_avail.h index 772d3350b4..aaf3250f3a 100644 --- a/core/fpdfapi/parser/cpdf_data_avail.h +++ b/core/fpdfapi/parser/cpdf_data_avail.h @@ -16,6 +16,7 @@ #include "core/fpdfapi/parser/cpdf_syntax_parser.h" #include "core/fxcrt/unowned_ptr.h" +class CPDF_CrossRefAvail; class CPDF_Dictionary; class CPDF_HintTables; class CPDF_IndirectObjectHolder; @@ -28,10 +29,6 @@ enum PDF_DATAAVAIL_STATUS { PDF_DATAAVAIL_HEADER = 0, PDF_DATAAVAIL_FIRSTPAGE, PDF_DATAAVAIL_HINTTABLE, - PDF_DATAAVAIL_END, - PDF_DATAAVAIL_CROSSREF, - PDF_DATAAVAIL_CROSSREF_ITEM, - PDF_DATAAVAIL_TRAILER, PDF_DATAAVAIL_LOADALLCROSSREF, PDF_DATAAVAIL_ROOT, PDF_DATAAVAIL_INFO, @@ -127,10 +124,6 @@ class CPDF_DataAvail final { bool CheckHeader(); bool CheckFirstPage(); bool CheckHintTables(); - bool CheckEnd(); - bool CheckCrossRef(); - bool CheckCrossRefItem(); - bool CheckTrailer(); bool CheckRoot(); bool CheckInfo(); bool CheckPages(); @@ -152,7 +145,7 @@ class CPDF_DataAvail final { bool GetPageKids(CPDF_Parser* pParser, CPDF_Object* pPages); bool PreparePageItem(); bool LoadPages(); - bool LoadAllXref(); + bool CheckAndLoadAllXref(); bool LoadAllFile(); DocAvailStatus CheckLinearizedData(); @@ -180,10 +173,7 @@ class CPDF_DataAvail final { std::unique_ptr m_pLinearized; UnownedPtr m_pTrailer; bool m_bDocAvail; - FX_FILESIZE m_dwLastXRefOffset; - FX_FILESIZE m_dwXRefOffset; - FX_FILESIZE m_dwTrailerOffset; - FX_FILESIZE m_dwCurrentOffset; + std::unique_ptr m_pCrossRefAvail; PDF_DATAAVAIL_STATUS m_docStatus; FX_FILESIZE m_dwFileLen; CPDF_Document* m_pDocument; @@ -201,11 +191,9 @@ class CPDF_DataAvail final { bool m_bPagesTreeLoad; bool m_bPagesLoad; CPDF_Parser* m_pCurrentParser; - FX_FILESIZE m_dwCurrentXRefSteam; std::unique_ptr m_pFormAvail; std::vector> m_PagesArray; uint32_t m_dwEncryptObjNum; - FX_FILESIZE m_dwPrevXRefOffset; bool m_bTotalLoadPageTree; bool m_bCurPageDictLoadOK; PageNode m_PageNode; -- cgit v1.2.3