From 304eefb58759e56be3fb357c78204accd4fa98fc Mon Sep 17 00:00:00 2001 From: Artem Strygin Date: Tue, 29 Aug 2017 00:26:42 +0300 Subject: Implement CPDF_ObjectAvail. This is non recursive replacement for CPDF_DataAvail::AreObjectsAvailable. Also added tests. Change-Id: I546289fc0963d2343253755850f55af8c0bd8e4c Reviewed-on: https://pdfium-review.googlesource.com/11430 Reviewed-by: dsinclair Commit-Queue: Art Snake --- BUILD.gn | 6 + core/fpdfapi/parser/cpdf_data_avail.cpp | 13 +- core/fpdfapi/parser/cpdf_indirect_object_holder.h | 2 +- core/fpdfapi/parser/cpdf_object_avail.cpp | 148 +++++++++ core/fpdfapi/parser/cpdf_object_avail.h | 52 +++ core/fpdfapi/parser/cpdf_object_avail_unittest.cpp | 363 +++++++++++++++++++++ core/fpdfapi/parser/cpdf_page_object_avail.cpp | 17 + core/fpdfapi/parser/cpdf_page_object_avail.h | 21 ++ .../parser/cpdf_page_object_avail_unittest.cpp | 137 ++++++++ core/fpdfapi/parser/cpdf_read_validator.h | 3 +- 10 files changed, 752 insertions(+), 10 deletions(-) create mode 100644 core/fpdfapi/parser/cpdf_object_avail.cpp create mode 100644 core/fpdfapi/parser/cpdf_object_avail.h create mode 100644 core/fpdfapi/parser/cpdf_object_avail_unittest.cpp create mode 100644 core/fpdfapi/parser/cpdf_page_object_avail.cpp create mode 100644 core/fpdfapi/parser/cpdf_page_object_avail.h create mode 100644 core/fpdfapi/parser/cpdf_page_object_avail_unittest.cpp diff --git a/BUILD.gn b/BUILD.gn index 9eb7fb04af..e498e2f614 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -604,8 +604,12 @@ static_library("fpdfapi") { "core/fpdfapi/parser/cpdf_number.h", "core/fpdfapi/parser/cpdf_object.cpp", "core/fpdfapi/parser/cpdf_object.h", + "core/fpdfapi/parser/cpdf_object_avail.cpp", + "core/fpdfapi/parser/cpdf_object_avail.h", "core/fpdfapi/parser/cpdf_object_walker.cpp", "core/fpdfapi/parser/cpdf_object_walker.h", + "core/fpdfapi/parser/cpdf_page_object_avail.cpp", + "core/fpdfapi/parser/cpdf_page_object_avail.h", "core/fpdfapi/parser/cpdf_parser.cpp", "core/fpdfapi/parser/cpdf_parser.h", "core/fpdfapi/parser/cpdf_read_validator.cpp", @@ -1896,8 +1900,10 @@ test("pdfium_unittests") { "core/fpdfapi/page/cpdf_streamparser_unittest.cpp", "core/fpdfapi/parser/cpdf_array_unittest.cpp", "core/fpdfapi/parser/cpdf_document_unittest.cpp", + "core/fpdfapi/parser/cpdf_object_avail_unittest.cpp", "core/fpdfapi/parser/cpdf_object_unittest.cpp", "core/fpdfapi/parser/cpdf_object_walker_unittest.cpp", + "core/fpdfapi/parser/cpdf_page_object_avail_unittest.cpp", "core/fpdfapi/parser/cpdf_parser_unittest.cpp", "core/fpdfapi/parser/cpdf_read_validator_unittest.cpp", "core/fpdfapi/parser/cpdf_simple_parser_unittest.cpp", diff --git a/core/fpdfapi/parser/cpdf_data_avail.cpp b/core/fpdfapi/parser/cpdf_data_avail.cpp index 43897a464c..c9cb1d75fc 100644 --- a/core/fpdfapi/parser/cpdf_data_avail.cpp +++ b/core/fpdfapi/parser/cpdf_data_avail.cpp @@ -18,6 +18,7 @@ #include "core/fpdfapi/parser/cpdf_linearized_header.h" #include "core/fpdfapi/parser/cpdf_name.h" #include "core/fpdfapi/parser/cpdf_number.h" +#include "core/fpdfapi/parser/cpdf_page_object_avail.h" #include "core/fpdfapi/parser/cpdf_read_validator.h" #include "core/fpdfapi/parser/cpdf_reference.h" #include "core/fpdfapi/parser/cpdf_stream.h" @@ -1514,10 +1515,8 @@ bool CPDF_DataAvail::ValidatePage(uint32_t dwPage) { CPDF_Dictionary* pPageDict = m_pDocument->GetPage(safePage.ValueOrDie()); if (!pPageDict) return false; - std::vector obj_array; - obj_array.push_back(pPageDict); - std::vector dummy; - return AreObjectsAvailable(obj_array, true, dummy); + CPDF_PageObjectAvail obj_avail(GetValidator().Get(), m_pDocument, pPageDict); + return obj_avail.CheckAvail() == DocAvailStatus::DataAvailable; } bool CPDF_DataAvail::ValidateForm() { @@ -1527,10 +1526,8 @@ bool CPDF_DataAvail::ValidateForm() { CPDF_Object* pAcroForm = pRoot->GetObjectFor("AcroForm"); if (!pAcroForm) return false; - std::vector obj_array; - obj_array.push_back(pAcroForm); - std::vector dummy; - return AreObjectsAvailable(obj_array, true, dummy); + CPDF_PageObjectAvail obj_avail(GetValidator().Get(), m_pDocument, pAcroForm); + return obj_avail.CheckAvail() == DocAvailStatus::DataAvailable; } CPDF_DataAvail::PageNode::PageNode() : m_type(PDF_PAGENODE_UNKNOWN) {} diff --git a/core/fpdfapi/parser/cpdf_indirect_object_holder.h b/core/fpdfapi/parser/cpdf_indirect_object_holder.h index b6d33a3cd5..b82377e5b7 100644 --- a/core/fpdfapi/parser/cpdf_indirect_object_holder.h +++ b/core/fpdfapi/parser/cpdf_indirect_object_holder.h @@ -28,7 +28,7 @@ class CPDF_IndirectObjectHolder { virtual ~CPDF_IndirectObjectHolder(); CPDF_Object* GetIndirectObject(uint32_t objnum) const; - CPDF_Object* GetOrParseIndirectObject(uint32_t objnum); + virtual CPDF_Object* GetOrParseIndirectObject(uint32_t objnum); void DeleteIndirectObject(uint32_t objnum); // Creates and adds a new object owned by the indirect object holder, diff --git a/core/fpdfapi/parser/cpdf_object_avail.cpp b/core/fpdfapi/parser/cpdf_object_avail.cpp new file mode 100644 index 0000000000..35e7f45850 --- /dev/null +++ b/core/fpdfapi/parser/cpdf_object_avail.cpp @@ -0,0 +1,148 @@ +// Copyright 2017 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "core/fpdfapi/parser/cpdf_object_avail.h" + +#include + +#include "core/fpdfapi/parser/cpdf_dictionary.h" +#include "core/fpdfapi/parser/cpdf_indirect_object_holder.h" +#include "core/fpdfapi/parser/cpdf_object_walker.h" +#include "core/fpdfapi/parser/cpdf_read_validator.h" +#include "core/fpdfapi/parser/cpdf_reference.h" + +CPDF_ObjectAvail::CPDF_ObjectAvail(CPDF_ReadValidator* validator, + CPDF_IndirectObjectHolder* holder, + const CPDF_Object* root) + : validator_(validator), holder_(holder), root_(root) { + ASSERT(validator_); + ASSERT(holder); + ASSERT(root_); + if (!root_->IsInline()) + parsed_objnums_.insert(root_->GetObjNum()); +} + +CPDF_ObjectAvail::CPDF_ObjectAvail(CPDF_ReadValidator* validator, + CPDF_IndirectObjectHolder* holder, + uint32_t obj_num) + : validator_(validator), + holder_(holder), + root_(pdfium::MakeUnique(holder, obj_num)) { + ASSERT(validator_); + ASSERT(holder); +} + +CPDF_ObjectAvail::~CPDF_ObjectAvail() {} + +CPDF_DataAvail::DocAvailStatus CPDF_ObjectAvail::CheckAvail() { + if (!LoadRootObject()) + return CPDF_DataAvail::DocAvailStatus::DataNotAvailable; + + if (CheckObjects()) { + CleanMemory(); + return CPDF_DataAvail::DocAvailStatus::DataAvailable; + } + return CPDF_DataAvail::DocAvailStatus::DataNotAvailable; +} + +bool CPDF_ObjectAvail::LoadRootObject() { + if (!non_parsed_objects_.empty()) + return true; + + while (root_ && root_->IsReference()) { + const uint32_t ref_obj_num = root_->AsReference()->GetRefObjNum(); + if (HasObjectParsed(ref_obj_num)) { + root_ = nullptr; + return true; + } + + const CPDF_ReadValidator::Session parse_session(validator_.Get()); + const CPDF_Object* direct = holder_->GetOrParseIndirectObject(ref_obj_num); + if (validator_->has_read_problems()) + return false; + + parsed_objnums_.insert(ref_obj_num); + root_ = direct; + } + std::stack non_parsed_objects_in_root; + if (AppendObjectSubRefs(root_.Get(), &non_parsed_objects_in_root)) { + non_parsed_objects_ = std::move(non_parsed_objects_in_root); + return true; + } + return false; +} + +bool CPDF_ObjectAvail::CheckObjects() { + std::stack objects_to_check = std::move(non_parsed_objects_); + std::set checked_objects; + while (!objects_to_check.empty()) { + const uint32_t obj_num = objects_to_check.top(); + objects_to_check.pop(); + + if (HasObjectParsed(obj_num)) + continue; + + if (!checked_objects.insert(obj_num).second) + continue; + + const CPDF_ReadValidator::Session parse_session(validator_.Get()); + const CPDF_Object* direct = holder_->GetOrParseIndirectObject(obj_num); + if (direct == root_.Get()) + continue; + + if (validator_->has_read_problems() || + !AppendObjectSubRefs(direct, &objects_to_check)) { + non_parsed_objects_.push(obj_num); + continue; + } + parsed_objnums_.insert(obj_num); + } + return non_parsed_objects_.empty(); +} + +bool CPDF_ObjectAvail::AppendObjectSubRefs(const CPDF_Object* object, + std::stack* refs) const { + ASSERT(refs); + if (!object) + return true; + + CPDF_ObjectWalker walker(object); + while (const CPDF_Object* obj = walker.GetNext()) { + const CPDF_ReadValidator::Session parse_session(validator_.Get()); + + // Skip if this object if it's an inlined root, the parent object or + // explicitily excluded. + const bool skip = (walker.GetParent() && obj == root_.Get()) || + walker.dictionary_key() == "Parent" || + (obj != root_.Get() && ExcludeObject(obj)); + + // We need to parse the object before we can do the exclusion check. + // This is because the exclusion check may check against a referenced + // field of the object which we need to make sure is loaded. + if (validator_->has_read_problems()) + return false; + + if (skip) { + walker.SkipWalkIntoCurrentObject(); + continue; + } + + if (obj->IsReference()) + refs->push(obj->AsReference()->GetRefObjNum()); + } + return true; +} + +void CPDF_ObjectAvail::CleanMemory() { + root_.Reset(); + parsed_objnums_.clear(); +} + +bool CPDF_ObjectAvail::ExcludeObject(const CPDF_Object* object) const { + return false; +} + +bool CPDF_ObjectAvail::HasObjectParsed(uint32_t obj_num) const { + return parsed_objnums_.count(obj_num) > 0; +} diff --git a/core/fpdfapi/parser/cpdf_object_avail.h b/core/fpdfapi/parser/cpdf_object_avail.h new file mode 100644 index 0000000000..233d180c94 --- /dev/null +++ b/core/fpdfapi/parser/cpdf_object_avail.h @@ -0,0 +1,52 @@ +// Copyright 2017 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CORE_FPDFAPI_PARSER_CPDF_OBJECT_AVAIL_H_ +#define CORE_FPDFAPI_PARSER_CPDF_OBJECT_AVAIL_H_ + +#include +#include +#include + +#include "core/fpdfapi/parser/cpdf_data_avail.h" +#include "core/fxcrt/cfx_maybe_owned.h" +#include "core/fxcrt/cfx_unowned_ptr.h" + +class CPDF_Object; +class CPDF_Reference; +class CPDF_IndirectObjectHolder; +class CPDF_ReadValidator; + +// Helper for check availability of object tree. +class CPDF_ObjectAvail { + public: + CPDF_ObjectAvail(CPDF_ReadValidator* validator, + CPDF_IndirectObjectHolder* holder, + const CPDF_Object* root); + CPDF_ObjectAvail(CPDF_ReadValidator* validator, + CPDF_IndirectObjectHolder* holder, + uint32_t obj_num); + virtual ~CPDF_ObjectAvail(); + + CPDF_DataAvail::DocAvailStatus CheckAvail(); + + protected: + virtual bool ExcludeObject(const CPDF_Object* object) const; + + private: + bool LoadRootObject(); + bool CheckObjects(); + bool AppendObjectSubRefs(const CPDF_Object* object, + std::stack* refs) const; + void CleanMemory(); + bool HasObjectParsed(uint32_t obj_num) const; + + CFX_UnownedPtr validator_; + CFX_UnownedPtr holder_; + CFX_MaybeOwned root_; + std::set parsed_objnums_; + std::stack non_parsed_objects_; +}; + +#endif // CORE_FPDFAPI_PARSER_CPDF_OBJECT_AVAIL_H_ diff --git a/core/fpdfapi/parser/cpdf_object_avail_unittest.cpp b/core/fpdfapi/parser/cpdf_object_avail_unittest.cpp new file mode 100644 index 0000000000..cb9ceac101 --- /dev/null +++ b/core/fpdfapi/parser/cpdf_object_avail_unittest.cpp @@ -0,0 +1,363 @@ +// Copyright 2017 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "core/fpdfapi/parser/cpdf_object_avail.h" + +#include +#include + +#include "core/fpdfapi/parser/cpdf_array.h" +#include "core/fpdfapi/parser/cpdf_dictionary.h" +#include "core/fpdfapi/parser/cpdf_indirect_object_holder.h" +#include "core/fpdfapi/parser/cpdf_read_validator.h" +#include "core/fpdfapi/parser/cpdf_reference.h" +#include "core/fpdfapi/parser/cpdf_string.h" +#include "core/fxcrt/fx_stream.h" +#include "testing/gtest/include/gtest/gtest.h" +#include "third_party/base/ptr_util.h" + +namespace { + +class InvalidReader : public IFX_SeekableReadStream { + public: + template + friend CFX_RetainPtr pdfium::MakeRetain(Args&&... args); + + // IFX_SeekableReadStream overrides: + bool ReadBlock(void* buffer, FX_FILESIZE offset, size_t size) override { + return false; + } + FX_FILESIZE GetSize() override { return 100; } + + private: + InvalidReader() {} + ~InvalidReader() override {} +}; + +class TestReadValidator : public CPDF_ReadValidator { + public: + template + friend CFX_RetainPtr pdfium::MakeRetain(Args&&... args); + + void SimulateReadError() { ReadBlock(nullptr, 0, 1); } + + protected: + TestReadValidator() + : CPDF_ReadValidator(pdfium::MakeRetain(), nullptr) {} + ~TestReadValidator() override {} +}; + +class TestHolder : public CPDF_IndirectObjectHolder { + public: + enum class ObjectState { + Unavailable, + Available, + }; + TestHolder() : validator_(pdfium::MakeRetain()) {} + ~TestHolder() override {} + + // CPDF_IndirectObjectHolder overrides: + CPDF_Object* GetOrParseIndirectObject(uint32_t objnum) override { + auto it = objects_data_.find(objnum); + if (it == objects_data_.end()) + return nullptr; + + ObjectData& obj_data = it->second; + if (obj_data.state == ObjectState::Unavailable) { + validator_->SimulateReadError(); + return nullptr; + } + return obj_data.object.get(); + } + + CFX_RetainPtr GetValidator() { return validator_; } + + void AddObject(uint32_t objnum, + std::unique_ptr object, + ObjectState state) { + ObjectData object_data; + object_data.object = std::move(object); + object_data.state = state; + ASSERT(objects_data_.find(objnum) == objects_data_.end()); + objects_data_[objnum] = std::move(object_data); + } + + void SetObjectState(uint32_t objnum, ObjectState state) { + auto it = objects_data_.find(objnum); + ASSERT(it != objects_data_.end()); + ObjectData& obj_data = it->second; + obj_data.state = state; + } + + CPDF_Object* GetTestObject(uint32_t objnum) { + auto it = objects_data_.find(objnum); + if (it == objects_data_.end()) + return nullptr; + return it->second.object.get(); + } + + private: + struct ObjectData { + std::unique_ptr object; + ObjectState state = ObjectState::Unavailable; + }; + std::map objects_data_; + CFX_RetainPtr validator_; +}; + +class CPDF_ObjectAvailFailOnExclude : public CPDF_ObjectAvail { + public: + using CPDF_ObjectAvail::CPDF_ObjectAvail; + ~CPDF_ObjectAvailFailOnExclude() override {} + bool ExcludeObject(const CPDF_Object* object) const override { + NOTREACHED(); + return false; + } +}; + +class CPDF_ObjectAvailExcludeArray : public CPDF_ObjectAvail { + public: + using CPDF_ObjectAvail::CPDF_ObjectAvail; + ~CPDF_ObjectAvailExcludeArray() override {} + bool ExcludeObject(const CPDF_Object* object) const override { + return object->IsArray(); + } +}; + +class CPDF_ObjectAvailExcludeTypeKey : public CPDF_ObjectAvail { + public: + using CPDF_ObjectAvail::CPDF_ObjectAvail; + ~CPDF_ObjectAvailExcludeTypeKey() override {} + bool ExcludeObject(const CPDF_Object* object) const override { + // The value of "Type" may be reference, and if it is not available, we can + // incorrect filter objects. + // In this case CPDF_ObjectAvail should wait availability of this item and + // call ExcludeObject again. + return object->IsDictionary() && + object->GetDict()->GetStringFor("Type") == "Exclude me"; + } +}; + +} // namespace + +TEST(CPDF_ObjectAvailTest, OneObject) { + TestHolder holder; + holder.AddObject(1, pdfium::MakeUnique(nullptr, "string", false), + TestHolder::ObjectState::Unavailable); + CPDF_ObjectAvail avail(holder.GetValidator().Get(), &holder, 1); + EXPECT_EQ(CPDF_DataAvail::DocAvailStatus::DataNotAvailable, + avail.CheckAvail()); + holder.SetObjectState(1, TestHolder::ObjectState::Available); + EXPECT_EQ(CPDF_DataAvail::DocAvailStatus::DataAvailable, avail.CheckAvail()); +} + +TEST(CPDF_ObjectAvailTest, OneReferencedObject) { + TestHolder holder; + holder.AddObject(1, pdfium::MakeUnique(&holder, 2), + TestHolder::ObjectState::Unavailable); + holder.AddObject(2, pdfium::MakeUnique(nullptr, "string", false), + TestHolder::ObjectState::Unavailable); + CPDF_ObjectAvail avail(holder.GetValidator().Get(), &holder, 1); + EXPECT_EQ(CPDF_DataAvail::DocAvailStatus::DataNotAvailable, + avail.CheckAvail()); + + holder.SetObjectState(1, TestHolder::ObjectState::Available); + EXPECT_EQ(CPDF_DataAvail::DocAvailStatus::DataNotAvailable, + avail.CheckAvail()); + + holder.SetObjectState(2, TestHolder::ObjectState::Available); + EXPECT_EQ(CPDF_DataAvail::DocAvailStatus::DataAvailable, avail.CheckAvail()); +} + +TEST(CPDF_ObjectAvailTest, CycledReferences) { + TestHolder holder; + holder.AddObject(1, pdfium::MakeUnique(&holder, 2), + TestHolder::ObjectState::Unavailable); + holder.AddObject(2, pdfium::MakeUnique(&holder, 3), + TestHolder::ObjectState::Unavailable); + holder.AddObject(3, pdfium::MakeUnique(&holder, 1), + TestHolder::ObjectState::Unavailable); + + CPDF_ObjectAvail avail(holder.GetValidator().Get(), &holder, 1); + EXPECT_EQ(CPDF_DataAvail::DocAvailStatus::DataNotAvailable, + avail.CheckAvail()); + + holder.SetObjectState(1, TestHolder::ObjectState::Available); + EXPECT_EQ(CPDF_DataAvail::DocAvailStatus::DataNotAvailable, + avail.CheckAvail()); + + holder.SetObjectState(2, TestHolder::ObjectState::Available); + EXPECT_EQ(CPDF_DataAvail::DocAvailStatus::DataNotAvailable, + avail.CheckAvail()); + + holder.SetObjectState(3, TestHolder::ObjectState::Available); + EXPECT_EQ(CPDF_DataAvail::DocAvailStatus::DataAvailable, avail.CheckAvail()); +} + +TEST(CPDF_ObjectAvailTest, DoNotCheckParent) { + TestHolder holder; + holder.AddObject(1, pdfium::MakeUnique(), + TestHolder::ObjectState::Unavailable); + holder.AddObject(2, pdfium::MakeUnique(), + TestHolder::ObjectState::Unavailable); + + holder.GetTestObject(2)->GetDict()->SetNewFor("Parent", + &holder, 1); + + CPDF_ObjectAvail avail(holder.GetValidator().Get(), &holder, 2); + EXPECT_EQ(CPDF_DataAvail::DocAvailStatus::DataNotAvailable, + avail.CheckAvail()); + + holder.SetObjectState(2, TestHolder::ObjectState::Available); + // Object should be available in case when "Parent" object is unavailable. + EXPECT_EQ(CPDF_DataAvail::DocAvailStatus::DataAvailable, avail.CheckAvail()); +} + +TEST(CPDF_ObjectAvailTest, Generic) { + TestHolder holder; + const uint32_t kDepth = 100; + for (uint32_t i = 1; i < kDepth; ++i) { + holder.AddObject(i, pdfium::MakeUnique(), + TestHolder::ObjectState::Unavailable); + // Add ref to next dictionary. + holder.GetTestObject(i)->GetDict()->SetNewFor( + "Child", &holder, i + 1); + } + // Add final object + holder.AddObject(kDepth, pdfium::MakeUnique(), + TestHolder::ObjectState::Unavailable); + + CPDF_ObjectAvail avail(holder.GetValidator().Get(), &holder, 1); + + for (uint32_t i = 1; i <= kDepth; ++i) { + EXPECT_EQ(CPDF_DataAvail::DocAvailStatus::DataNotAvailable, + avail.CheckAvail()); + holder.SetObjectState(i, TestHolder::ObjectState::Available); + } + EXPECT_EQ(CPDF_DataAvail::DocAvailStatus::DataAvailable, avail.CheckAvail()); +} + +TEST(CPDF_ObjectAvailTest, NotExcludeRoot) { + TestHolder holder; + holder.AddObject(1, pdfium::MakeUnique(), + TestHolder::ObjectState::Available); + CPDF_ObjectAvailFailOnExclude avail(holder.GetValidator().Get(), &holder, 1); + EXPECT_EQ(CPDF_DataAvail::DocAvailStatus::DataAvailable, avail.CheckAvail()); +} + +TEST(CPDF_ObjectAvailTest, NotExcludeReferedRoot) { + TestHolder holder; + holder.AddObject(1, pdfium::MakeUnique(&holder, 2), + TestHolder::ObjectState::Available); + holder.AddObject(2, pdfium::MakeUnique(), + TestHolder::ObjectState::Available); + CPDF_ObjectAvailFailOnExclude avail(holder.GetValidator().Get(), &holder, 1); + EXPECT_EQ(CPDF_DataAvail::DocAvailStatus::DataAvailable, avail.CheckAvail()); +} + +TEST(CPDF_ObjectAvailTest, Exclude) { + TestHolder holder; + holder.AddObject(1, pdfium::MakeUnique(), + TestHolder::ObjectState::Available); + holder.GetTestObject(1)->GetDict()->SetNewFor("ArrayRef", + &holder, 2); + holder.AddObject(2, pdfium::MakeUnique(), + TestHolder::ObjectState::Available); + holder.GetTestObject(2)->AsArray()->AddNew(&holder, 2); + + // Add string, which is refered by array item. It is should not be checked. + holder.AddObject( + 3, + pdfium::MakeUnique(nullptr, "Not available string", false), + TestHolder::ObjectState::Unavailable); + CPDF_ObjectAvailExcludeArray avail(holder.GetValidator().Get(), &holder, 1); + EXPECT_EQ(CPDF_DataAvail::DocAvailStatus::DataAvailable, avail.CheckAvail()); +} + +TEST(CPDF_ObjectAvailTest, ReadErrorOnExclude) { + TestHolder holder; + holder.AddObject(1, pdfium::MakeUnique(), + TestHolder::ObjectState::Available); + holder.GetTestObject(1)->GetDict()->SetNewFor("DictRef", + &holder, 2); + holder.AddObject(2, pdfium::MakeUnique(), + TestHolder::ObjectState::Available); + + holder.GetTestObject(2)->GetDict()->SetNewFor("Type", &holder, + 3); + // The value of "Type" key is not available at start + holder.AddObject( + 3, pdfium::MakeUnique(nullptr, "Exclude me", false), + TestHolder::ObjectState::Unavailable); + + holder.GetTestObject(2)->GetDict()->SetNewFor("OtherData", + &holder, 4); + // Add string, which is refered by dictionary item. It is should not be + // checked, because the dictionary with it, should be skipped. + holder.AddObject( + 4, + pdfium::MakeUnique(nullptr, "Not available string", false), + TestHolder::ObjectState::Unavailable); + + CPDF_ObjectAvailExcludeTypeKey avail(holder.GetValidator().Get(), &holder, 1); + + EXPECT_EQ(CPDF_DataAvail::DocAvailStatus::DataNotAvailable, + avail.CheckAvail()); + + // Make "Type" value object available. + holder.SetObjectState(3, TestHolder::ObjectState::Available); + + // Now object should be available, although the object '4' is not available, + // because it is in skipped dictionary. + EXPECT_EQ(CPDF_DataAvail::DocAvailStatus::DataAvailable, avail.CheckAvail()); +} + +TEST(CPDF_ObjectAvailTest, IgnoreNotExistsObject) { + TestHolder holder; + holder.AddObject(1, pdfium::MakeUnique(), + TestHolder::ObjectState::Available); + holder.GetTestObject(1)->GetDict()->SetNewFor( + "NotExistsObjRef", &holder, 2); + CPDF_ObjectAvail avail(holder.GetValidator().Get(), &holder, 1); + // Now object should be available, although the object '2' is not exists. But + // all exists in file related data are checked. + EXPECT_EQ(CPDF_DataAvail::DocAvailStatus::DataAvailable, avail.CheckAvail()); +} + +TEST(CPDF_ObjectAvailTest, CheckTwice) { + TestHolder holder; + holder.AddObject(1, pdfium::MakeUnique(nullptr, "string", false), + TestHolder::ObjectState::Unavailable); + + CPDF_ObjectAvail avail(holder.GetValidator().Get(), &holder, 1); + EXPECT_EQ(avail.CheckAvail(), avail.CheckAvail()); + + holder.SetObjectState(1, TestHolder::ObjectState::Available); + EXPECT_EQ(avail.CheckAvail(), avail.CheckAvail()); +} + +TEST(CPDF_ObjectAvailTest, SelfReferedInlinedObject) { + TestHolder holder; + holder.AddObject(1, pdfium::MakeUnique(), + TestHolder::ObjectState::Available); + + holder.GetTestObject(1)->GetDict()->SetNewFor("Data", &holder, + 2); + auto* root = + holder.GetTestObject(1)->GetDict()->SetNewFor("Dict"); + + root->SetNewFor("Self", &holder, 1); + + holder.AddObject(2, pdfium::MakeUnique(nullptr, "Data", false), + TestHolder::ObjectState::Unavailable); + + CPDF_ObjectAvail avail(holder.GetValidator().Get(), &holder, root); + + EXPECT_EQ(CPDF_DataAvail::DocAvailStatus::DataNotAvailable, + avail.CheckAvail()); + + holder.SetObjectState(2, TestHolder::ObjectState::Available); + + EXPECT_EQ(CPDF_DataAvail::DocAvailStatus::DataAvailable, avail.CheckAvail()); +} diff --git a/core/fpdfapi/parser/cpdf_page_object_avail.cpp b/core/fpdfapi/parser/cpdf_page_object_avail.cpp new file mode 100644 index 0000000000..6673885f61 --- /dev/null +++ b/core/fpdfapi/parser/cpdf_page_object_avail.cpp @@ -0,0 +1,17 @@ +// Copyright 2017 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "core/fpdfapi/parser/cpdf_page_object_avail.h" + +#include "core/fpdfapi/parser/cpdf_dictionary.h" + +CPDF_PageObjectAvail::~CPDF_PageObjectAvail() {} + +bool CPDF_PageObjectAvail::ExcludeObject(const CPDF_Object* object) const { + if (CPDF_ObjectAvail::ExcludeObject(object)) + return true; + + return object->IsDictionary() && + object->GetDict()->GetStringFor("Type") == "Page"; +} diff --git a/core/fpdfapi/parser/cpdf_page_object_avail.h b/core/fpdfapi/parser/cpdf_page_object_avail.h new file mode 100644 index 0000000000..8c740ed045 --- /dev/null +++ b/core/fpdfapi/parser/cpdf_page_object_avail.h @@ -0,0 +1,21 @@ +// Copyright 2017 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CORE_FPDFAPI_PARSER_CPDF_PAGE_OBJECT_AVAIL_H_ +#define CORE_FPDFAPI_PARSER_CPDF_PAGE_OBJECT_AVAIL_H_ + +#include "core/fpdfapi/parser/cpdf_object_avail.h" + +// Helper for check availability of page's object tree. +// Exclude references to pages. +class CPDF_PageObjectAvail : public CPDF_ObjectAvail { + public: + using CPDF_ObjectAvail::CPDF_ObjectAvail; + ~CPDF_PageObjectAvail() override; + + protected: + bool ExcludeObject(const CPDF_Object* object) const override; +}; + +#endif // CORE_FPDFAPI_PARSER_CPDF_PAGE_OBJECT_AVAIL_H_ diff --git a/core/fpdfapi/parser/cpdf_page_object_avail_unittest.cpp b/core/fpdfapi/parser/cpdf_page_object_avail_unittest.cpp new file mode 100644 index 0000000000..a6454cd9fe --- /dev/null +++ b/core/fpdfapi/parser/cpdf_page_object_avail_unittest.cpp @@ -0,0 +1,137 @@ +// Copyright 2017 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "core/fpdfapi/parser/cpdf_page_object_avail.h" + +#include +#include +#include + +#include "core/fpdfapi/parser/cpdf_array.h" +#include "core/fpdfapi/parser/cpdf_dictionary.h" +#include "core/fpdfapi/parser/cpdf_indirect_object_holder.h" +#include "core/fpdfapi/parser/cpdf_read_validator.h" +#include "core/fpdfapi/parser/cpdf_reference.h" +#include "core/fpdfapi/parser/cpdf_string.h" +#include "core/fxcrt/fx_stream.h" +#include "testing/gtest/include/gtest/gtest.h" +#include "third_party/base/ptr_util.h" + +namespace { + +class InvalidReader : public IFX_SeekableReadStream { + public: + template + friend CFX_RetainPtr pdfium::MakeRetain(Args&&... args); + + // IFX_SeekableReadStream overrides: + bool ReadBlock(void* buffer, FX_FILESIZE offset, size_t size) override { + return false; + } + FX_FILESIZE GetSize() override { return 100; } + + private: + InvalidReader() {} + ~InvalidReader() override {} +}; + +class TestReadValidator : public CPDF_ReadValidator { + public: + template + friend CFX_RetainPtr pdfium::MakeRetain(Args&&... args); + + void SimulateReadError() { ReadBlock(nullptr, 0, 1); } + + protected: + TestReadValidator() + : CPDF_ReadValidator(pdfium::MakeRetain(), nullptr) {} + ~TestReadValidator() override {} +}; + +class TestHolder : public CPDF_IndirectObjectHolder { + public: + enum class ObjectState { + Unavailable, + Available, + }; + TestHolder() : validator_(pdfium::MakeRetain()) {} + ~TestHolder() override {} + + // CPDF_IndirectObjectHolder overrides: + CPDF_Object* GetOrParseIndirectObject(uint32_t objnum) override { + auto it = objects_data_.find(objnum); + if (it == objects_data_.end()) + return nullptr; + + ObjectData& obj_data = it->second; + if (obj_data.state == ObjectState::Unavailable) { + validator_->SimulateReadError(); + return nullptr; + } + return obj_data.object.get(); + } + + CFX_RetainPtr GetValidator() { return validator_; } + + void AddObject(uint32_t objnum, + std::unique_ptr object, + ObjectState state) { + ObjectData object_data; + object_data.object = std::move(object); + object_data.state = state; + ASSERT(objects_data_.find(objnum) == objects_data_.end()); + objects_data_[objnum] = std::move(object_data); + } + + void SetObjectState(uint32_t objnum, ObjectState state) { + auto it = objects_data_.find(objnum); + ASSERT(it != objects_data_.end()); + ObjectData& obj_data = it->second; + obj_data.state = state; + } + + CPDF_Object* GetTestObject(uint32_t objnum) { + auto it = objects_data_.find(objnum); + if (it == objects_data_.end()) + return nullptr; + return it->second.object.get(); + } + + private: + struct ObjectData { + std::unique_ptr object; + ObjectState state = ObjectState::Unavailable; + }; + std::map objects_data_; + CFX_RetainPtr validator_; +}; + +} // namespace + +TEST(CPDF_PageObjectAvailTest, ExcludePages) { + TestHolder holder; + holder.AddObject(1, pdfium::MakeUnique(), + TestHolder::ObjectState::Available); + holder.GetTestObject(1)->GetDict()->SetNewFor("Kids", &holder, + 2); + holder.AddObject(2, pdfium::MakeUnique(), + TestHolder::ObjectState::Available); + holder.GetTestObject(2)->AsArray()->AddNew(&holder, 3); + + holder.AddObject(3, pdfium::MakeUnique(), + TestHolder::ObjectState::Available); + holder.GetTestObject(3)->GetDict()->SetFor( + "Type", pdfium::MakeUnique(nullptr, "Page", false)); + holder.GetTestObject(3)->GetDict()->SetNewFor("OtherPageData", + &holder, 4); + // Add unavailable object related to other page. + holder.AddObject( + 4, pdfium::MakeUnique(nullptr, "Other page data", false), + TestHolder::ObjectState::Unavailable); + + CPDF_PageObjectAvail avail(holder.GetValidator().Get(), &holder, 1); + // Now object should be available, although the object '4' is not available, + // because it is in skipped other page. + EXPECT_EQ(CPDF_DataAvail::DocAvailStatus::DataAvailable, avail.CheckAvail()); +} diff --git a/core/fpdfapi/parser/cpdf_read_validator.h b/core/fpdfapi/parser/cpdf_read_validator.h index 106f6e437b..20ec866a59 100644 --- a/core/fpdfapi/parser/cpdf_read_validator.h +++ b/core/fpdfapi/parser/cpdf_read_validator.h @@ -46,11 +46,12 @@ class CPDF_ReadValidator : public IFX_SeekableReadStream { bool ReadBlock(void* buffer, FX_FILESIZE offset, size_t size) override; FX_FILESIZE GetSize() override; - private: + protected: CPDF_ReadValidator(const CFX_RetainPtr& file_read, CPDF_DataAvail::FileAvail* file_avail); ~CPDF_ReadValidator() override; + private: void ScheduleDownload(FX_FILESIZE offset, size_t size); CFX_RetainPtr file_read_; -- cgit v1.2.3