summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArtem Strygin <art-snake@yandex-team.ru>2017-11-02 19:59:38 +0000
committerChromium commit bot <commit-bot@chromium.org>2017-11-02 19:59:38 +0000
commit626c2a528fdbb53ddc6fede8ce879f56bfe87716 (patch)
treef97b6d2de024e882443d512d3e4788f6c29467af
parentf1be1e87045da36b52326fb269f7006670c0c9ab (diff)
downloadpdfium-626c2a528fdbb53ddc6fede8ce879f56bfe87716.tar.xz
Refactoring of cross refs availability check.
Use CPDF_CrossRefAvail to check crossrefs. Change-Id: Ia333cff4e86eaab5bad17424c1bb8ef9bdbca8ff Reviewed-on: https://pdfium-review.googlesource.com/15510 Commit-Queue: Art Snake <art-snake@yandex-team.ru> Reviewed-by: dsinclair <dsinclair@chromium.org>
-rw-r--r--BUILD.gn3
-rw-r--r--core/fpdfapi/parser/cpdf_cross_ref_avail.cpp211
-rw-r--r--core/fpdfapi/parser/cpdf_cross_ref_avail.h56
-rw-r--r--core/fpdfapi/parser/cpdf_cross_ref_avail_unittest.cpp339
-rw-r--r--core/fpdfapi/parser/cpdf_data_avail.cpp154
-rw-r--r--core/fpdfapi/parser/cpdf_data_avail.h18
6 files changed, 647 insertions, 134 deletions
diff --git a/BUILD.gn b/BUILD.gn
index 877b622ffd..60b5776e43 100644
--- a/BUILD.gn
+++ b/BUILD.gn
@@ -576,6 +576,8 @@ static_library("fpdfapi") {
"core/fpdfapi/parser/cpdf_array.h",
"core/fpdfapi/parser/cpdf_boolean.cpp",
"core/fpdfapi/parser/cpdf_boolean.h",
+ "core/fpdfapi/parser/cpdf_cross_ref_avail.cpp",
+ "core/fpdfapi/parser/cpdf_cross_ref_avail.h",
"core/fpdfapi/parser/cpdf_crypto_handler.cpp",
"core/fpdfapi/parser/cpdf_crypto_handler.h",
"core/fpdfapi/parser/cpdf_data_avail.cpp",
@@ -1941,6 +1943,7 @@ test("pdfium_unittests") {
"core/fpdfapi/page/cpdf_streamcontentparser_unittest.cpp",
"core/fpdfapi/page/cpdf_streamparser_unittest.cpp",
"core/fpdfapi/parser/cpdf_array_unittest.cpp",
+ "core/fpdfapi/parser/cpdf_cross_ref_avail_unittest.cpp",
"core/fpdfapi/parser/cpdf_document_unittest.cpp",
"core/fpdfapi/parser/cpdf_hint_tables_unittest.cpp",
"core/fpdfapi/parser/cpdf_indirect_object_holder_unittest.cpp",
diff --git a/core/fpdfapi/parser/cpdf_cross_ref_avail.cpp b/core/fpdfapi/parser/cpdf_cross_ref_avail.cpp
new file mode 100644
index 0000000000..be9818ae21
--- /dev/null
+++ b/core/fpdfapi/parser/cpdf_cross_ref_avail.cpp
@@ -0,0 +1,211 @@
+// Copyright 2017 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "core/fpdfapi/parser/cpdf_cross_ref_avail.h"
+
+#include <algorithm>
+#include <vector>
+
+#include "core/fpdfapi/parser/cpdf_dictionary.h"
+#include "core/fpdfapi/parser/cpdf_name.h"
+#include "core/fpdfapi/parser/cpdf_read_validator.h"
+#include "core/fpdfapi/parser/cpdf_reference.h"
+#include "core/fpdfapi/parser/cpdf_syntax_parser.h"
+#include "core/fpdfapi/parser/fpdf_parser_utility.h"
+
+namespace {
+
+constexpr char kCrossRefKeyword[] = "xref";
+constexpr char kTrailerKeyword[] = "trailer";
+constexpr char kPrevCrossRefFieldKey[] = "Prev";
+constexpr char kTypeFieldKey[] = "Type";
+constexpr char kPrevCrossRefStreamOffsetFieldKey[] = "XRefStm";
+constexpr char kXRefKeyword[] = "XRef";
+constexpr char kEncryptKey[] = "Encrypt";
+
+} // namespace
+
+CPDF_CrossRefAvail::CPDF_CrossRefAvail(CPDF_SyntaxParser* parser,
+ FX_FILESIZE last_crossref_offset)
+ : parser_(parser), last_crossref_offset_(last_crossref_offset) {
+ ASSERT(parser_);
+ AddCrossRefForCheck(last_crossref_offset);
+}
+
+CPDF_CrossRefAvail::~CPDF_CrossRefAvail() {}
+
+CPDF_DataAvail::DocAvailStatus CPDF_CrossRefAvail::CheckAvail() {
+ if (current_status_ == CPDF_DataAvail::DataAvailable)
+ return CPDF_DataAvail::DataAvailable;
+
+ const CPDF_ReadValidator::Session read_session(GetValidator().Get());
+ while (true) {
+ bool check_result = false;
+ switch (current_state_) {
+ case State::kCrossRefCheck:
+ check_result = CheckCrossRef();
+ break;
+ case State::kCrossRefV4ItemCheck:
+ check_result = CheckCrossRefV4Item();
+ break;
+ case State::kCrossRefV4TrailerCheck:
+ check_result = CheckCrossRefV4Trailer();
+ break;
+ case State::kDone:
+ break;
+ default: {
+ current_status_ = CPDF_DataAvail::DataError;
+ NOTREACHED();
+ break;
+ }
+ }
+ if (!check_result)
+ break;
+
+ ASSERT(!GetValidator()->has_read_problems());
+ }
+ return current_status_;
+}
+
+bool CPDF_CrossRefAvail::CheckReadProblems() {
+ if (GetValidator()->read_error()) {
+ current_status_ = CPDF_DataAvail::DataError;
+ return true;
+ }
+ return GetValidator()->has_unavailable_data();
+}
+
+bool CPDF_CrossRefAvail::CheckCrossRef() {
+ if (cross_refs_for_check_.empty()) {
+ // All cross refs were checked.
+ current_state_ = State::kDone;
+ current_status_ = CPDF_DataAvail::DataAvailable;
+ return true;
+ }
+ parser_->SetPos(cross_refs_for_check_.front());
+
+ const ByteString first_word = parser_->PeekNextWord(nullptr);
+ if (CheckReadProblems())
+ return false;
+
+ const bool result = (first_word == kCrossRefKeyword) ? CheckCrossRefV4()
+ : CheckCrossRefStream();
+
+ if (result)
+ cross_refs_for_check_.pop();
+
+ return result;
+}
+
+bool CPDF_CrossRefAvail::CheckCrossRefV4() {
+ const ByteString keyword = parser_->GetKeyword();
+ if (CheckReadProblems())
+ return false;
+
+ if (keyword != kCrossRefKeyword) {
+ current_status_ = CPDF_DataAvail::DataError;
+ return false;
+ }
+
+ current_state_ = State::kCrossRefV4ItemCheck;
+ current_offset_ = parser_->GetPos();
+ return true;
+}
+
+bool CPDF_CrossRefAvail::CheckCrossRefV4Item() {
+ parser_->SetPos(current_offset_);
+ const ByteString keyword = parser_->GetKeyword();
+ if (CheckReadProblems())
+ return false;
+
+ if (keyword.IsEmpty()) {
+ current_status_ = CPDF_DataAvail::DataError;
+ return false;
+ }
+
+ if (keyword == kTrailerKeyword)
+ current_state_ = State::kCrossRefV4TrailerCheck;
+
+ // Go to next item.
+ current_offset_ = parser_->GetPos();
+ return true;
+}
+
+bool CPDF_CrossRefAvail::CheckCrossRefV4Trailer() {
+ parser_->SetPos(current_offset_);
+
+ std::unique_ptr<CPDF_Dictionary> trailer =
+ ToDictionary(parser_->GetObjectBody(nullptr));
+ if (CheckReadProblems())
+ return false;
+
+ if (!trailer) {
+ current_status_ = CPDF_DataAvail::DataError;
+ return false;
+ }
+
+ if (ToReference(trailer->GetObjectFor(kEncryptKey))) {
+ current_status_ = CPDF_DataAvail::DataError;
+ return false;
+ }
+
+ const int32_t xrefpos =
+ GetDirectInteger(trailer.get(), kPrevCrossRefFieldKey);
+ if (xrefpos &&
+ pdfium::base::IsValueInRangeForNumericType<FX_FILESIZE>(xrefpos))
+ AddCrossRefForCheck(static_cast<FX_FILESIZE>(xrefpos));
+
+ const int32_t stream_xref_offset =
+ GetDirectInteger(trailer.get(), kPrevCrossRefStreamOffsetFieldKey);
+ if (stream_xref_offset &&
+ pdfium::base::IsValueInRangeForNumericType<FX_FILESIZE>(
+ stream_xref_offset))
+ AddCrossRefForCheck(static_cast<FX_FILESIZE>(stream_xref_offset));
+
+ // Goto check next crossref
+ current_state_ = State::kCrossRefCheck;
+ return true;
+}
+
+bool CPDF_CrossRefAvail::CheckCrossRefStream() {
+ auto cross_ref =
+ parser_->GetIndirectObject(nullptr, CPDF_SyntaxParser::ParseType::kLoose);
+ if (CheckReadProblems())
+ return false;
+
+ const CPDF_Dictionary* trailer =
+ cross_ref && cross_ref->IsStream() ? cross_ref->GetDict() : nullptr;
+ if (!trailer) {
+ current_status_ = CPDF_DataAvail::DataError;
+ return false;
+ }
+
+ if (ToReference(trailer->GetObjectFor(kEncryptKey))) {
+ current_status_ = CPDF_DataAvail::DataError;
+ return false;
+ }
+
+ CPDF_Name* type_name = ToName(trailer->GetObjectFor(kTypeFieldKey));
+ if (type_name && type_name->GetString() == kXRefKeyword) {
+ const int32_t xrefpos = trailer->GetIntegerFor(kPrevCrossRefFieldKey);
+ if (xrefpos &&
+ pdfium::base::IsValueInRangeForNumericType<FX_FILESIZE>(xrefpos))
+ AddCrossRefForCheck(static_cast<FX_FILESIZE>(xrefpos));
+ }
+ // Goto check next crossref
+ current_state_ = State::kCrossRefCheck;
+ return true;
+}
+
+void CPDF_CrossRefAvail::AddCrossRefForCheck(FX_FILESIZE crossref_offset) {
+ if (registered_crossrefs_.count(crossref_offset))
+ return;
+
+ cross_refs_for_check_.push(crossref_offset);
+ registered_crossrefs_.insert(crossref_offset);
+}
+
+fxcrt::RetainPtr<CPDF_ReadValidator> CPDF_CrossRefAvail::GetValidator() {
+ return parser_->GetValidator();
+}
diff --git a/core/fpdfapi/parser/cpdf_cross_ref_avail.h b/core/fpdfapi/parser/cpdf_cross_ref_avail.h
new file mode 100644
index 0000000000..aad58f35e7
--- /dev/null
+++ b/core/fpdfapi/parser/cpdf_cross_ref_avail.h
@@ -0,0 +1,56 @@
+// Copyright 2017 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CORE_FPDFAPI_PARSER_CPDF_CROSS_REF_AVAIL_H_
+#define CORE_FPDFAPI_PARSER_CPDF_CROSS_REF_AVAIL_H_
+
+#include <memory>
+#include <queue>
+#include <set>
+
+#include "core/fpdfapi/parser/cpdf_data_avail.h"
+#include "core/fxcrt/unowned_ptr.h"
+
+class CPDF_SyntaxParser;
+
+class CPDF_CrossRefAvail {
+ public:
+ CPDF_CrossRefAvail(CPDF_SyntaxParser* parser,
+ FX_FILESIZE last_crossref_offset);
+ ~CPDF_CrossRefAvail();
+
+ FX_FILESIZE last_crossref_offset() const { return last_crossref_offset_; }
+
+ CPDF_DataAvail::DocAvailStatus CheckAvail();
+
+ private:
+ enum class State {
+ kCrossRefCheck,
+ kCrossRefV4ItemCheck,
+ kCrossRefV4TrailerCheck,
+ kDone,
+ };
+
+ bool CheckReadProblems();
+ bool CheckCrossRef();
+ bool CheckCrossRefV4();
+ bool CheckCrossRefV4Item();
+ bool CheckCrossRefV4Trailer();
+ bool CheckCrossRefStream();
+
+ void AddCrossRefForCheck(FX_FILESIZE crossref_offset);
+
+ fxcrt::RetainPtr<CPDF_ReadValidator> GetValidator();
+
+ fxcrt::UnownedPtr<CPDF_SyntaxParser> parser_;
+ const FX_FILESIZE last_crossref_offset_ = 0;
+ CPDF_DataAvail::DocAvailStatus current_status_ =
+ CPDF_DataAvail::DataNotAvailable;
+ State current_state_ = State::kCrossRefCheck;
+ FX_FILESIZE current_offset_ = 0;
+ std::queue<FX_FILESIZE> cross_refs_for_check_;
+ std::set<FX_FILESIZE> registered_crossrefs_;
+};
+
+#endif // CORE_FPDFAPI_PARSER_CPDF_CROSS_REF_AVAIL_H_
diff --git a/core/fpdfapi/parser/cpdf_cross_ref_avail_unittest.cpp b/core/fpdfapi/parser/cpdf_cross_ref_avail_unittest.cpp
new file mode 100644
index 0000000000..b798e17377
--- /dev/null
+++ b/core/fpdfapi/parser/cpdf_cross_ref_avail_unittest.cpp
@@ -0,0 +1,339 @@
+// Copyright 2017 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "core/fpdfapi/parser/cpdf_cross_ref_avail.h"
+
+#include <memory>
+#include <string>
+
+#include "core/fpdfapi/parser/cpdf_syntax_parser.h"
+#include "testing/fx_string_testhelpers.h"
+#include "testing/gtest/include/gtest/gtest.h"
+#include "third_party/base/ptr_util.h"
+
+namespace {
+
+std::unique_ptr<CPDF_SyntaxParser> MakeParserForBuffer(
+ const unsigned char* buffer,
+ size_t buffer_size) {
+ auto parser = pdfium::MakeUnique<CPDF_SyntaxParser>();
+ parser->InitParser(
+ pdfium::MakeRetain<CFX_BufferSeekableReadStream>(buffer, buffer_size), 0);
+ return parser;
+}
+
+} // namespace
+
+TEST(CPDF_CrossRefAvailTest, CheckCrossRefV4) {
+ const unsigned char xref_table[] =
+ "xref \n"
+ "0 6 \n"
+ "0000000003 65535 f \n"
+ "0000000017 00000 n \n"
+ "0000000081 00000 n \n"
+ "0000000000 00007 f \n"
+ "0000000331 00000 n \n"
+ "0000000409 00000 n \n"
+ "trailer\n"
+ "<</Root 14 0 R/ID "
+ "[<afbb0f593c2d2aea5b519cb61da1c17b><4f9bb2e7978401808f8f1f2a75c322c8>]"
+ "/Info 15 0 R/Size 16>>";
+ const FX_FILESIZE last_crossref_offset = 0;
+
+ auto parser = MakeParserForBuffer(xref_table, FX_ArraySize(xref_table));
+ auto cross_ref_avail = pdfium::MakeUnique<CPDF_CrossRefAvail>(
+ parser.get(), last_crossref_offset);
+
+ EXPECT_EQ(CPDF_DataAvail::DataAvailable, cross_ref_avail->CheckAvail());
+}
+
+TEST(CPDF_CrossRefAvailTest, CheckCrossRefStream) {
+ const unsigned char xref_stream[] =
+ "16 0 obj\n"
+ "<</Filter /FlateDecode>>"
+ " stream \n"
+ "STREAM DATA STREAM DATA STREAM DATA\n"
+ "endstream\n"
+ "endobj\n";
+ const FX_FILESIZE last_crossref_offset = 0;
+
+ auto parser = MakeParserForBuffer(xref_stream, FX_ArraySize(xref_stream));
+ auto cross_ref_avail = pdfium::MakeUnique<CPDF_CrossRefAvail>(
+ parser.get(), last_crossref_offset);
+
+ EXPECT_EQ(CPDF_DataAvail::DataAvailable, cross_ref_avail->CheckAvail());
+}
+
+TEST(CPDF_CrossRefAvailTest, IncorrectStartOffset) {
+ const unsigned char xref_stream[] =
+ "16 0 obj\n"
+ "<</Filter /FlateDecode>>"
+ " stream \n"
+ "STREAM DATA STREAM DATA STREAM DATA\n"
+ "endstream\n"
+ "endobj\n";
+
+ const FX_FILESIZE last_crossref_offset = 70000;
+
+ auto parser = MakeParserForBuffer(xref_stream, FX_ArraySize(xref_stream));
+ auto cross_ref_avail = pdfium::MakeUnique<CPDF_CrossRefAvail>(
+ parser.get(), last_crossref_offset);
+
+ EXPECT_EQ(CPDF_DataAvail::DataError, cross_ref_avail->CheckAvail());
+}
+
+TEST(CPDF_CrossRefAvailTest, IncorrectPrevOffset) {
+ const unsigned char xref_stream[] =
+ "16 0 obj\n"
+ "<</Type /XRef /Prev 70000>>"
+ " stream \n"
+ "STREAM DATA STREAM DATA STREAM DATA\n"
+ "endstream\n"
+ "endobj\n";
+ const FX_FILESIZE last_crossref_offset = 0;
+
+ auto parser = MakeParserForBuffer(xref_stream, FX_ArraySize(xref_stream));
+ auto cross_ref_avail = pdfium::MakeUnique<CPDF_CrossRefAvail>(
+ parser.get(), last_crossref_offset);
+ EXPECT_EQ(CPDF_DataAvail::DataError, cross_ref_avail->CheckAvail());
+}
+
+TEST(CPDF_CrossRefAvailTest, IncorrectPrevStreamOffset) {
+ const unsigned char xref_table[] =
+ "xref \n"
+ "0 6 \n"
+ "0000000003 65535 f \n"
+ "0000000017 00000 n \n"
+ "0000000081 00000 n \n"
+ "0000000000 00007 f \n"
+ "0000000331 00000 n \n"
+ "0000000409 00000 n \n"
+ "trailer\n"
+ "<</Root 14 0 R/ID "
+ "[<afbb0f593c2d2aea5b519cb61da1c17b><4f9bb2e7978401808f8f1f2a75c322c8>]"
+ "/Info 15 0 R/Size 16 /XRefStm 70000>>";
+ const FX_FILESIZE last_crossref_offset = 0;
+
+ auto parser = MakeParserForBuffer(xref_table, FX_ArraySize(xref_table));
+ auto cross_ref_avail = pdfium::MakeUnique<CPDF_CrossRefAvail>(
+ parser.get(), last_crossref_offset);
+ EXPECT_EQ(CPDF_DataAvail::DataError, cross_ref_avail->CheckAvail());
+}
+
+TEST(CPDF_CrossRefAvailTest, IncorrectData) {
+ const unsigned char incorrect_data[] =
+ "fiajaoilf w9ifaoihwoiafhja wfijaofijoiaw fhj oiawhfoiah "
+ "wfoihoiwfghouiafghwoigahfi";
+ const FX_FILESIZE last_crossref_offset = 0;
+
+ auto parser =
+ MakeParserForBuffer(incorrect_data, FX_ArraySize(incorrect_data));
+ auto cross_ref_avail = pdfium::MakeUnique<CPDF_CrossRefAvail>(
+ parser.get(), last_crossref_offset);
+ EXPECT_EQ(CPDF_DataAvail::DataError, cross_ref_avail->CheckAvail());
+}
+
+TEST(CPDF_CrossRefAvailTest, ThreeCrossRefV4) {
+ char int_buffer[100];
+ int prev_offset = 0;
+ int cur_offset = 0;
+ std::string table = "pdf blah blah blah\n";
+ prev_offset = cur_offset;
+ cur_offset = static_cast<int>(table.size());
+ table +=
+ "xref \n"
+ "0 6 \n"
+ "0000000003 65535 f \n"
+ "trailer\n"
+ "<</Root 14 0 R/ID "
+ "[<afbb0f593c2d2aea5b519cb61da1c17b><4f9bb2e7978401808f8f1f2a75c322c8>]"
+ "/Info 15 0 R/Size 16>>\n";
+ table += "Dummy Data jgwhughouiwbahng";
+ prev_offset = cur_offset;
+ cur_offset = static_cast<int>(table.size());
+ table += std::string(
+ "xref \n"
+ "0 6 \n"
+ "0000000003 65535 f \n"
+ "trailer\n"
+ "<</Root 14 0 R/ID "
+ "[<afbb0f593c2d2aea5b519cb61da1c17b><"
+ "4f9bb2e7978401808f8f1f2a75c322c8>]"
+ "/Info 15 0 R/Size 16"
+ "/Prev ") +
+ FXSYS_itoa(prev_offset, int_buffer, 10) + ">>\n";
+ table += "More Dummy Data jgwhughouiwbahng";
+ prev_offset = cur_offset;
+ cur_offset = static_cast<int>(table.size());
+ table += std::string(
+ "xref \n"
+ "0 6 \n"
+ "0000000003 65535 f \n"
+ "trailer\n"
+ "<</Root 14 0 R/ID "
+ "[<afbb0f593c2d2aea5b519cb61da1c17b><"
+ "4f9bb2e7978401808f8f1f2a75c322c8>]"
+ "/Info 15 0 R/Size 16"
+ "/Prev ") +
+ FXSYS_itoa(prev_offset, int_buffer, 10) + ">>\n";
+ const FX_FILESIZE last_crossref_offset = cur_offset;
+
+ auto parser = MakeParserForBuffer(
+ reinterpret_cast<const unsigned char*>(table.data()), table.size());
+ auto cross_ref_avail = pdfium::MakeUnique<CPDF_CrossRefAvail>(
+ parser.get(), last_crossref_offset);
+ EXPECT_EQ(CPDF_DataAvail::DataAvailable, cross_ref_avail->CheckAvail());
+}
+
+TEST(CPDF_CrossRefAvailTest, ThreeCrossRefV5) {
+ char int_buffer[100];
+ int prev_offset = 0;
+ int cur_offset = 0;
+ std::string table = "pdf blah blah blah\n";
+ prev_offset = cur_offset;
+ cur_offset = static_cast<int>(table.size());
+ table +=
+ "16 0 obj\n"
+ "<</Type /XRef>>"
+ " stream \n"
+ "STREAM DATA STREAM DATA STREAM DATA ahfcuabfkuabfu\n"
+ "endstream\n"
+ "endobj\n";
+ table += "Dummy Data jgwhughouiwbahng";
+
+ prev_offset = cur_offset;
+ cur_offset = static_cast<int>(table.size());
+ table += std::string(
+ "55 0 obj\n"
+ "<</Type /XRef /Prev ") +
+ FXSYS_itoa(prev_offset, int_buffer, 10) +
+ ">>"
+ " stream \n"
+ "STREAM DATA STREAM DATA STREAM DATA\n"
+ "endstream\n"
+ "endobj\n";
+ table += "More Dummy Data jgwhughouiwbahng";
+ prev_offset = cur_offset;
+ cur_offset = static_cast<int>(table.size());
+ table += std::string(
+ "88 0 obj\n"
+ "<</Type /XRef /NNNN /Prev ") +
+ FXSYS_itoa(prev_offset, int_buffer, 10) +
+ ">>"
+ " stream \n"
+ "STREAM DATA STREAM DATA STREAM DATA favav\n"
+ "endstream\n"
+ "endobj\n";
+ const FX_FILESIZE last_crossref_offset = cur_offset;
+
+ auto parser = MakeParserForBuffer(
+ reinterpret_cast<const unsigned char*>(table.data()), table.size());
+ auto cross_ref_avail = pdfium::MakeUnique<CPDF_CrossRefAvail>(
+ parser.get(), last_crossref_offset);
+ EXPECT_EQ(CPDF_DataAvail::DataAvailable, cross_ref_avail->CheckAvail());
+}
+
+TEST(CPDF_CrossRefAvailTest, Mixed) {
+ char int_buffer[100];
+ std::string table = "pdf blah blah blah\n";
+
+ const int first_v5_table_offset = static_cast<int>(table.size());
+ table +=
+ "16 0 obj\n"
+ "<</Type /XRef>>"
+ " stream \n"
+ "STREAM DATA STREAM DATA STREAM DATA ahfcuabfkuabfu\n"
+ "endstream\n"
+ "endobj\n";
+ table += "Dummy Data jgwhughouiwbahng";
+
+ const int second_v4_table_offset = static_cast<int>(table.size());
+ table += std::string(
+ "xref \n"
+ "0 6 \n"
+ "0000000003 65535 f \n"
+ "trailer\n"
+ "<</Root 14 0 R/ID "
+ "[<afbb0f593c2d2aea5b519cb61da1c17b><"
+ "4f9bb2e7978401808f8f1f2a75c322c8>]"
+ "/Info 15 0 R/Size 16"
+ "/Prev ") +
+ FXSYS_itoa(first_v5_table_offset, int_buffer, 10) + ">>\n";
+ table += "More Dummy Data jgwhughouiwbahng";
+
+ const int last_v4_table_offset = static_cast<int>(table.size());
+ table += std::string(
+ "xref \n"
+ "0 6 \n"
+ "0000000003 65535 f \n"
+ "trailer\n"
+ "<</Root 14 0 R/ID "
+ "[<afbb0f593c2d2aea5b519cb61da1c17b><"
+ "4f9bb2e7978401808f8f1f2a75c322c8>]"
+ "/Info 15 0 R/Size 16"
+ "/Prev ") +
+ FXSYS_itoa(second_v4_table_offset, int_buffer, 10) + " /XRefStm " +
+ FXSYS_itoa(first_v5_table_offset, int_buffer, 10) + ">>\n";
+ const FX_FILESIZE last_crossref_offset = last_v4_table_offset;
+
+ auto parser = MakeParserForBuffer(
+ reinterpret_cast<const unsigned char*>(table.data()), table.size());
+ auto cross_ref_avail = pdfium::MakeUnique<CPDF_CrossRefAvail>(
+ parser.get(), last_crossref_offset);
+ EXPECT_EQ(CPDF_DataAvail::DataAvailable, cross_ref_avail->CheckAvail());
+}
+
+TEST(CPDF_CrossRefAvailTest, CrossRefV5IsNotStream) {
+ const unsigned char invalid_xref_stream[] =
+ "16 0 obj\n"
+ "[/array /object]\n"
+ "endstream\n"
+ "endobj\n";
+ const FX_FILESIZE last_crossref_offset = 0;
+
+ auto parser = MakeParserForBuffer(invalid_xref_stream,
+ FX_ArraySize(invalid_xref_stream));
+ auto cross_ref_avail = pdfium::MakeUnique<CPDF_CrossRefAvail>(
+ parser.get(), last_crossref_offset);
+ EXPECT_EQ(CPDF_DataAvail::DataError, cross_ref_avail->CheckAvail());
+}
+
+TEST(CPDF_CrossRefAvailTest, CrossRefV4WithEncryptRef) {
+ const unsigned char xref_table[] =
+ "xref \n"
+ "0 6 \n"
+ "0000000003 65535 f \n"
+ "0000000017 00000 n \n"
+ "0000000081 00000 n \n"
+ "0000000000 00007 f \n"
+ "0000000331 00000 n \n"
+ "0000000409 00000 n \n"
+ "trailer\n"
+ "<</Root 14 0 R/ID "
+ "[<afbb0f593c2d2aea5b519cb61da1c17b><4f9bb2e7978401808f8f1f2a75c322c8>]"
+ "/Encrypt 77 0 R"
+ "/Info 15 0 R/Size 16>>";
+ const FX_FILESIZE last_crossref_offset = 0;
+
+ auto parser = MakeParserForBuffer(xref_table, FX_ArraySize(xref_table));
+ auto cross_ref_avail = pdfium::MakeUnique<CPDF_CrossRefAvail>(
+ parser.get(), last_crossref_offset);
+ EXPECT_EQ(CPDF_DataAvail::DataError, cross_ref_avail->CheckAvail());
+}
+
+TEST(CPDF_CrossRefAvailTest, CrossRefStreamWithEncryptRef) {
+ const unsigned char xref_stream[] =
+ "16 0 obj\n"
+ "<</Filter /FlateDecode /Encrypt 77 0 R>>"
+ " stream \n"
+ "STREAM DATA STREAM DATA STREAM DATA\n"
+ "endstream\n"
+ "endobj\n";
+ const FX_FILESIZE last_crossref_offset = 0;
+
+ auto parser = MakeParserForBuffer(xref_stream, FX_ArraySize(xref_stream));
+ auto cross_ref_avail = pdfium::MakeUnique<CPDF_CrossRefAvail>(
+ parser.get(), last_crossref_offset);
+ EXPECT_EQ(CPDF_DataAvail::DataError, cross_ref_avail->CheckAvail());
+}
diff --git a/core/fpdfapi/parser/cpdf_data_avail.cpp b/core/fpdfapi/parser/cpdf_data_avail.cpp
index 61927888cb..e50a7f6369 100644
--- a/core/fpdfapi/parser/cpdf_data_avail.cpp
+++ b/core/fpdfapi/parser/cpdf_data_avail.cpp
@@ -12,6 +12,7 @@
#include "core/fpdfapi/cpdf_modulemgr.h"
#include "core/fpdfapi/parser/cpdf_array.h"
+#include "core/fpdfapi/parser/cpdf_cross_ref_avail.h"
#include "core/fpdfapi/parser/cpdf_dictionary.h"
#include "core/fpdfapi/parser/cpdf_document.h"
#include "core/fpdfapi/parser/cpdf_hint_tables.h"
@@ -83,18 +84,12 @@ CPDF_DataAvail::CPDF_DataAvail(
m_pFileRead = pdfium::MakeRetain<CPDF_ReadValidator>(pFileRead, m_pFileAvail);
m_Pos = 0;
m_dwFileLen = m_pFileRead->GetSize();
- m_dwCurrentOffset = 0;
- m_dwXRefOffset = 0;
- m_dwTrailerOffset = 0;
m_bufferOffset = 0;
m_bufferSize = 0;
m_PagesObjNum = 0;
- m_dwCurrentXRefSteam = 0;
m_dwInfoObjNum = 0;
m_pDocument = 0;
m_dwEncryptObjNum = 0;
- m_dwPrevXRefOffset = 0;
- m_dwLastXRefOffset = 0;
m_bDocAvail = false;
m_bMainXRefLoadTried = false;
m_bDocAvail = false;
@@ -142,16 +137,8 @@ bool CPDF_DataAvail::CheckDocStatus() {
return CheckFirstPage();
case PDF_DATAAVAIL_HINTTABLE:
return CheckHintTables();
- case PDF_DATAAVAIL_END:
- return CheckEnd();
- case PDF_DATAAVAIL_CROSSREF:
- return CheckCrossRef();
- case PDF_DATAAVAIL_CROSSREF_ITEM:
- return CheckCrossRefItem();
- case PDF_DATAAVAIL_TRAILER:
- return CheckTrailer();
case PDF_DATAAVAIL_LOADALLCROSSREF:
- return LoadAllXref();
+ return CheckAndLoadAllXref();
case PDF_DATAAVAIL_LOADALLFILE:
return LoadAllFile();
case PDF_DATAAVAIL_ROOT:
@@ -200,9 +187,37 @@ bool CPDF_DataAvail::LoadAllFile() {
return false;
}
-bool CPDF_DataAvail::LoadAllXref() {
- if (!m_parser.LoadAllCrossRefV4(m_dwLastXRefOffset) &&
- !m_parser.LoadAllCrossRefV5(m_dwLastXRefOffset)) {
+bool CPDF_DataAvail::CheckAndLoadAllXref() {
+ if (!m_pCrossRefAvail) {
+ const CPDF_ReadValidator::Session read_session(GetValidator().Get());
+ const FX_FILESIZE last_xref_offset = m_parser.ParseStartXRef();
+ if (GetValidator()->has_read_problems())
+ return false;
+
+ if (last_xref_offset <= 0) {
+ m_docStatus = PDF_DATAAVAIL_ERROR;
+ return false;
+ }
+
+ m_pCrossRefAvail = pdfium::MakeUnique<CPDF_CrossRefAvail>(GetSyntaxParser(),
+ last_xref_offset);
+ }
+
+ switch (m_pCrossRefAvail->CheckAvail()) {
+ case DocAvailStatus::DataAvailable:
+ break;
+ case DocAvailStatus::DataNotAvailable:
+ return false;
+ case DocAvailStatus::DataError:
+ m_docStatus = PDF_DATAAVAIL_ERROR;
+ return false;
+ default:
+ NOTREACHED();
+ return false;
+ }
+
+ if (!m_parser.LoadAllCrossRefV4(m_pCrossRefAvail->last_crossref_offset()) &&
+ !m_parser.LoadAllCrossRefV5(m_pCrossRefAvail->last_crossref_offset())) {
m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
return false;
}
@@ -415,7 +430,8 @@ bool CPDF_DataAvail::CheckPages() {
bool CPDF_DataAvail::CheckHeader() {
switch (CheckHeaderAndLinearized()) {
case DocAvailStatus::DataAvailable:
- m_docStatus = m_pLinearized ? PDF_DATAAVAIL_FIRSTPAGE : PDF_DATAAVAIL_END;
+ m_docStatus = m_pLinearized ? PDF_DATAAVAIL_FIRSTPAGE
+ : PDF_DATAAVAIL_LOADALLCROSSREF;
return true;
case DocAvailStatus::DataNotAvailable:
return false;
@@ -531,25 +547,6 @@ CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckHeaderAndLinearized() {
return DocAvailStatus::DataAvailable;
}
-bool CPDF_DataAvail::CheckEnd() {
- const CPDF_ReadValidator::Session read_session(GetValidator().Get());
- const FX_FILESIZE last_xref_offset = m_parser.ParseStartXRef();
-
- if (GetValidator()->has_read_problems())
- return false;
-
- m_dwLastXRefOffset = last_xref_offset;
- m_dwXRefOffset = last_xref_offset;
- SetStartOffset(last_xref_offset);
- m_docStatus =
- (last_xref_offset > 0) ? PDF_DATAAVAIL_CROSSREF : PDF_DATAAVAIL_ERROR;
- return true;
-}
-
-void CPDF_DataAvail::SetStartOffset(FX_FILESIZE dwOffset) {
- m_Pos = dwOffset;
-}
-
bool CPDF_DataAvail::GetNextToken(ByteString* token) {
uint8_t ch;
if (!GetNextChar(ch))
@@ -653,87 +650,6 @@ bool CPDF_DataAvail::GetNextChar(uint8_t& ch) {
return true;
}
-bool CPDF_DataAvail::CheckCrossRefItem() {
- ByteString token;
- while (1) {
- const CPDF_ReadValidator::Session read_session(GetValidator().Get());
- if (!GetNextToken(&token)) {
- if (!GetValidator()->has_read_problems())
- m_docStatus = PDF_DATAAVAIL_ERROR;
- return false;
- }
-
- if (token == "trailer") {
- m_dwTrailerOffset = m_Pos;
- m_docStatus = PDF_DATAAVAIL_TRAILER;
- return true;
- }
- }
-}
-
-bool CPDF_DataAvail::CheckCrossRef() {
- const CPDF_ReadValidator::Session read_session(GetValidator().Get());
- ByteString token;
- if (!GetNextToken(&token)) {
- if (!GetValidator()->has_read_problems())
- m_docStatus = PDF_DATAAVAIL_ERROR;
- return false;
- }
-
- if (token != "xref") {
- m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
- return true;
- }
-
- m_docStatus = PDF_DATAAVAIL_CROSSREF_ITEM;
- return true;
-}
-
-bool CPDF_DataAvail::CheckTrailer() {
- const CPDF_ReadValidator::Session read_session(GetValidator().Get());
- GetSyntaxParser()->SetPos(m_dwTrailerOffset);
- const std::unique_ptr<CPDF_Object> pTrailer =
- GetSyntaxParser()->GetObjectBody(nullptr);
- if (!pTrailer) {
- if (!GetValidator()->has_read_problems())
- m_docStatus = PDF_DATAAVAIL_ERROR;
- return false;
- }
-
- if (!pTrailer->IsDictionary())
- return false;
-
- CPDF_Dictionary* pTrailerDict = pTrailer->GetDict();
- CPDF_Object* pEncrypt = pTrailerDict->GetObjectFor("Encrypt");
- if (ToReference(pEncrypt)) {
- m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
- return true;
- }
-
- // Prevent infinite-looping between Prev entries.
- uint32_t xrefpos = GetDirectInteger(pTrailerDict, "Prev");
- if (!xrefpos || !m_SeenPrevPositions.insert(xrefpos).second) {
- m_dwPrevXRefOffset = 0;
- m_docStatus = PDF_DATAAVAIL_LOADALLCROSSREF;
- return true;
- }
-
- m_dwPrevXRefOffset = GetDirectInteger(pTrailerDict, "XRefStm");
- if (m_dwPrevXRefOffset) {
- m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
- return true;
- }
-
- m_dwPrevXRefOffset = xrefpos;
- if (m_dwPrevXRefOffset >= m_dwFileLen) {
- m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
- } else {
- SetStartOffset(m_dwPrevXRefOffset);
- m_docStatus = PDF_DATAAVAIL_CROSSREF;
- }
- return true;
-}
-
bool CPDF_DataAvail::CheckPage(uint32_t dwPage) {
while (true) {
switch (m_docStatus) {
diff --git a/core/fpdfapi/parser/cpdf_data_avail.h b/core/fpdfapi/parser/cpdf_data_avail.h
index 772d3350b4..aaf3250f3a 100644
--- a/core/fpdfapi/parser/cpdf_data_avail.h
+++ b/core/fpdfapi/parser/cpdf_data_avail.h
@@ -16,6 +16,7 @@
#include "core/fpdfapi/parser/cpdf_syntax_parser.h"
#include "core/fxcrt/unowned_ptr.h"
+class CPDF_CrossRefAvail;
class CPDF_Dictionary;
class CPDF_HintTables;
class CPDF_IndirectObjectHolder;
@@ -28,10 +29,6 @@ enum PDF_DATAAVAIL_STATUS {
PDF_DATAAVAIL_HEADER = 0,
PDF_DATAAVAIL_FIRSTPAGE,
PDF_DATAAVAIL_HINTTABLE,
- PDF_DATAAVAIL_END,
- PDF_DATAAVAIL_CROSSREF,
- PDF_DATAAVAIL_CROSSREF_ITEM,
- PDF_DATAAVAIL_TRAILER,
PDF_DATAAVAIL_LOADALLCROSSREF,
PDF_DATAAVAIL_ROOT,
PDF_DATAAVAIL_INFO,
@@ -127,10 +124,6 @@ class CPDF_DataAvail final {
bool CheckHeader();
bool CheckFirstPage();
bool CheckHintTables();
- bool CheckEnd();
- bool CheckCrossRef();
- bool CheckCrossRefItem();
- bool CheckTrailer();
bool CheckRoot();
bool CheckInfo();
bool CheckPages();
@@ -152,7 +145,7 @@ class CPDF_DataAvail final {
bool GetPageKids(CPDF_Parser* pParser, CPDF_Object* pPages);
bool PreparePageItem();
bool LoadPages();
- bool LoadAllXref();
+ bool CheckAndLoadAllXref();
bool LoadAllFile();
DocAvailStatus CheckLinearizedData();
@@ -180,10 +173,7 @@ class CPDF_DataAvail final {
std::unique_ptr<CPDF_LinearizedHeader> m_pLinearized;
UnownedPtr<CPDF_Object> m_pTrailer;
bool m_bDocAvail;
- FX_FILESIZE m_dwLastXRefOffset;
- FX_FILESIZE m_dwXRefOffset;
- FX_FILESIZE m_dwTrailerOffset;
- FX_FILESIZE m_dwCurrentOffset;
+ std::unique_ptr<CPDF_CrossRefAvail> m_pCrossRefAvail;
PDF_DATAAVAIL_STATUS m_docStatus;
FX_FILESIZE m_dwFileLen;
CPDF_Document* m_pDocument;
@@ -201,11 +191,9 @@ class CPDF_DataAvail final {
bool m_bPagesTreeLoad;
bool m_bPagesLoad;
CPDF_Parser* m_pCurrentParser;
- FX_FILESIZE m_dwCurrentXRefSteam;
std::unique_ptr<CPDF_PageObjectAvail> m_pFormAvail;
std::vector<std::unique_ptr<CPDF_Object>> m_PagesArray;
uint32_t m_dwEncryptObjNum;
- FX_FILESIZE m_dwPrevXRefOffset;
bool m_bTotalLoadPageTree;
bool m_bCurPageDictLoadOK;
PageNode m_PageNode;