From f57cad4f5dd0436d5b207d362afb34fc5f3f9acb Mon Sep 17 00:00:00 2001 From: Artem Strygin Date: Mon, 14 Aug 2017 23:35:52 +0300 Subject: Add CPDF_ObjectWalker. It is allow us to walk on all non-null sub-objects in an object in depth, include itself, Change-Id: Ia23051073984411668112422b47cf7a4460aa078 Reviewed-on: https://pdfium-review.googlesource.com/8910 Commit-Queue: Art Snake Reviewed-by: dsinclair --- core/fpdfapi/parser/cpdf_null.cpp | 4 + core/fpdfapi/parser/cpdf_null.h | 1 + core/fpdfapi/parser/cpdf_object.cpp | 4 + core/fpdfapi/parser/cpdf_object.h | 1 + core/fpdfapi/parser/cpdf_object_walker.cpp | 165 +++++++++++++++++++++ core/fpdfapi/parser/cpdf_object_walker.h | 69 +++++++++ .../fpdfapi/parser/cpdf_object_walker_unittest.cpp | 142 ++++++++++++++++++ 7 files changed, 386 insertions(+) create mode 100644 core/fpdfapi/parser/cpdf_object_walker.cpp create mode 100644 core/fpdfapi/parser/cpdf_object_walker.h create mode 100644 core/fpdfapi/parser/cpdf_object_walker_unittest.cpp (limited to 'core/fpdfapi/parser') diff --git a/core/fpdfapi/parser/cpdf_null.cpp b/core/fpdfapi/parser/cpdf_null.cpp index 254c86fd7a..7c1cdac486 100644 --- a/core/fpdfapi/parser/cpdf_null.cpp +++ b/core/fpdfapi/parser/cpdf_null.cpp @@ -20,3 +20,7 @@ std::unique_ptr CPDF_Null::Clone() const { bool CPDF_Null::WriteTo(IFX_ArchiveStream* archive) const { return archive->WriteString(" null"); } + +bool CPDF_Null::IsNull() const { + return true; +} diff --git a/core/fpdfapi/parser/cpdf_null.h b/core/fpdfapi/parser/cpdf_null.h index 92917281fb..2ec05ec82c 100644 --- a/core/fpdfapi/parser/cpdf_null.h +++ b/core/fpdfapi/parser/cpdf_null.h @@ -19,6 +19,7 @@ class CPDF_Null : public CPDF_Object { Type GetType() const override; std::unique_ptr Clone() const override; bool WriteTo(IFX_ArchiveStream* archive) const override; + bool IsNull() const override; }; #endif // CORE_FPDFAPI_PARSER_CPDF_NULL_H_ diff --git a/core/fpdfapi/parser/cpdf_object.cpp b/core/fpdfapi/parser/cpdf_object.cpp index 82c3b09416..b35c6e5b6f 100644 --- a/core/fpdfapi/parser/cpdf_object.cpp +++ b/core/fpdfapi/parser/cpdf_object.cpp @@ -95,6 +95,10 @@ bool CPDF_Object::IsString() const { return false; } +bool CPDF_Object::IsNull() const { + return false; +} + CPDF_Array* CPDF_Object::AsArray() { return nullptr; } diff --git a/core/fpdfapi/parser/cpdf_object.h b/core/fpdfapi/parser/cpdf_object.h index b0c1adf955..04b77ec706 100644 --- a/core/fpdfapi/parser/cpdf_object.h +++ b/core/fpdfapi/parser/cpdf_object.h @@ -71,6 +71,7 @@ class CPDF_Object { virtual bool IsReference() const; virtual bool IsStream() const; virtual bool IsString() const; + virtual bool IsNull() const; virtual CPDF_Array* AsArray(); virtual const CPDF_Array* AsArray() const; diff --git a/core/fpdfapi/parser/cpdf_object_walker.cpp b/core/fpdfapi/parser/cpdf_object_walker.cpp new file mode 100644 index 0000000000..c6e0f0091d --- /dev/null +++ b/core/fpdfapi/parser/cpdf_object_walker.cpp @@ -0,0 +1,165 @@ +// Copyright 2017 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "core/fpdfapi/parser/cpdf_object_walker.h" + +#include + +#include "core/fpdfapi/parser/cpdf_array.h" +#include "core/fpdfapi/parser/cpdf_dictionary.h" +#include "core/fpdfapi/parser/cpdf_stream.h" + +namespace { + +class StreamIterator : public CPDF_ObjectWalker::SubobjectIterator { + public: + explicit StreamIterator(const CPDF_Stream* stream) + : SubobjectIterator(stream) {} + ~StreamIterator() override {} + + bool IsFinished() const override { return IsStarted() && is_finished_; } + + const CPDF_Object* IncrementImpl() override { + ASSERT(IsStarted()); + ASSERT(!IsFinished()); + is_finished_ = true; + return object()->GetDict(); + } + + void Start() override {} + + private: + bool is_finished_ = false; +}; + +class DictionaryIterator : public CPDF_ObjectWalker::SubobjectIterator { + public: + explicit DictionaryIterator(const CPDF_Dictionary* dictionary) + : SubobjectIterator(dictionary) {} + ~DictionaryIterator() override {} + + bool IsFinished() const override { + return IsStarted() && dict_iterator_ == object()->GetDict()->end(); + } + + const CPDF_Object* IncrementImpl() override { + ASSERT(IsStarted()); + ASSERT(!IsFinished()); + const CPDF_Object* result = dict_iterator_->second.get(); + dict_key_ = dict_iterator_->first; + ++dict_iterator_; + return result; + } + + void Start() override { + ASSERT(!IsStarted()); + dict_iterator_ = object()->GetDict()->begin(); + } + + const CFX_ByteString& dict_key() const { return dict_key_; } + + private: + CPDF_Dictionary::const_iterator dict_iterator_; + CFX_ByteString dict_key_; +}; + +class ArrayIterator : public CPDF_ObjectWalker::SubobjectIterator { + public: + explicit ArrayIterator(const CPDF_Array* array) : SubobjectIterator(array) {} + + ~ArrayIterator() override {} + + bool IsFinished() const override { + return IsStarted() && arr_iterator_ == object()->AsArray()->end(); + } + + const CPDF_Object* IncrementImpl() override { + ASSERT(IsStarted()); + ASSERT(!IsFinished()); + const CPDF_Object* result = arr_iterator_->get(); + ++arr_iterator_; + return result; + } + + void Start() override { arr_iterator_ = object()->AsArray()->begin(); } + + public: + CPDF_Array::const_iterator arr_iterator_; +}; + +} // namespace + +CPDF_ObjectWalker::SubobjectIterator::~SubobjectIterator() {} + +const CPDF_Object* CPDF_ObjectWalker::SubobjectIterator::Increment() { + if (!IsStarted()) { + Start(); + is_started_ = true; + } + while (!IsFinished()) { + const CPDF_Object* result = IncrementImpl(); + if (result) + return result; + } + return nullptr; +} + +CPDF_ObjectWalker::SubobjectIterator::SubobjectIterator( + const CPDF_Object* object) + : object_(object) { + ASSERT(object_); +} + +// static +std::unique_ptr +CPDF_ObjectWalker::MakeIterator(const CPDF_Object* object) { + if (object->IsStream()) + return pdfium::MakeUnique(object->AsStream()); + if (object->IsDictionary()) + return pdfium::MakeUnique(object->AsDictionary()); + if (object->IsArray()) + return pdfium::MakeUnique(object->AsArray()); + return nullptr; +} + +CPDF_ObjectWalker::CPDF_ObjectWalker(const CPDF_Object* root) + : next_object_(root), parent_object_(nullptr), current_depth_(0) {} + +CPDF_ObjectWalker::~CPDF_ObjectWalker() {} + +const CPDF_Object* CPDF_ObjectWalker::GetNext() { + while (!stack_.empty() || next_object_) { + if (next_object_) { + auto new_iterator = MakeIterator(next_object_); + if (new_iterator) { + // Schedule walk within composite objects. + stack_.push(std::move(new_iterator)); + } + auto* result = next_object_; + next_object_ = nullptr; + return result; + } + + SubobjectIterator* it = stack_.top().get(); + if (it->IsFinished()) { + stack_.pop(); + } else { + next_object_ = it->Increment(); + parent_object_ = it->object(); + dict_key_ = parent_object_->IsDictionary() + ? static_cast(it)->dict_key() + : CFX_ByteString(); + current_depth_ = stack_.size(); + } + } + dict_key_ = CFX_ByteString(); + current_depth_ = 0; + return nullptr; +} + +void CPDF_ObjectWalker::SkipWalkIntoCurrentObject() { + if (stack_.empty() || stack_.top()->IsStarted()) + return; + stack_.pop(); +} diff --git a/core/fpdfapi/parser/cpdf_object_walker.h b/core/fpdfapi/parser/cpdf_object_walker.h new file mode 100644 index 0000000000..5590440f8f --- /dev/null +++ b/core/fpdfapi/parser/cpdf_object_walker.h @@ -0,0 +1,69 @@ +// Copyright 2017 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CORE_FPDFAPI_PARSER_CPDF_OBJECT_WALKER_H_ +#define CORE_FPDFAPI_PARSER_CPDF_OBJECT_WALKER_H_ + +#include +#include + +#include "core/fpdfapi/parser/cpdf_dictionary.h" + +// Walk on all non-null sub-objects in an object in depth, include itself, +// like in flat list. +class CPDF_ObjectWalker { + public: + class SubobjectIterator { + public: + virtual ~SubobjectIterator(); + bool IsStarted() const { return is_started_; } + bool virtual IsFinished() const = 0; + const CPDF_Object* Increment(); + const CPDF_Object* object() const { return object_; } + + protected: + explicit SubobjectIterator(const CPDF_Object* object); + + virtual const CPDF_Object* IncrementImpl() = 0; + virtual void Start() = 0; + + private: + const CPDF_Object* object_; + bool is_started_ = false; + }; + + explicit CPDF_ObjectWalker(const CPDF_Object* root); + ~CPDF_ObjectWalker(); + + const CPDF_Object* GetNext(); + void SkipWalkIntoCurrentObject(); + + size_t current_depth() const { return current_depth_; } + const CPDF_Object* GetParent() const { return parent_object_; } + const CFX_ByteString& dictionary_key() const { return dict_key_; } + + private: + static std::unique_ptr MakeIterator( + const CPDF_Object* object); + + const CPDF_Object* next_object_; + const CPDF_Object* parent_object_; + + CFX_ByteString dict_key_; + size_t current_depth_; + + std::stack> stack_; +}; + +class CPDF_NonConstObjectWalker : public CPDF_ObjectWalker { + public: + explicit CPDF_NonConstObjectWalker(CPDF_Object* root) + : CPDF_ObjectWalker(root) {} + + CPDF_Object* GetNext() { + return const_cast(CPDF_ObjectWalker::GetNext()); + } +}; + +#endif // CORE_FPDFAPI_PARSER_CPDF_OBJECT_WALKER_H_ diff --git a/core/fpdfapi/parser/cpdf_object_walker_unittest.cpp b/core/fpdfapi/parser/cpdf_object_walker_unittest.cpp new file mode 100644 index 0000000000..66c559d3ca --- /dev/null +++ b/core/fpdfapi/parser/cpdf_object_walker_unittest.cpp @@ -0,0 +1,142 @@ +// Copyright 2017 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "core/fpdfapi/parser/cpdf_object_walker.h" + +#include +#include +#include + +#include "core/fpdfapi/parser/cpdf_array.h" +#include "core/fpdfapi/parser/cpdf_boolean.h" +#include "core/fpdfapi/parser/cpdf_dictionary.h" +#include "core/fpdfapi/parser/cpdf_name.h" +#include "core/fpdfapi/parser/cpdf_null.h" +#include "core/fpdfapi/parser/cpdf_number.h" +#include "core/fpdfapi/parser/cpdf_reference.h" +#include "core/fpdfapi/parser/cpdf_stream.h" +#include "core/fpdfapi/parser/cpdf_string.h" +#include "testing/gtest/include/gtest/gtest.h" +#include "third_party/base/ptr_util.h" + +namespace { + +std::string Walk(CPDF_Object* object) { + std::ostringstream result; + CPDF_ObjectWalker walker(object); + while (const CPDF_Object* obj = walker.GetNext()) { + if (obj->IsDictionary()) + result << " Dict"; + else if (obj->IsArray()) + result << " Arr"; + else if (obj->IsString()) + result << " Str"; + else if (obj->IsBoolean()) + result << " Bool"; + else if (obj->IsStream()) + result << " Stream"; + else if (obj->IsReference()) + result << " Ref"; + else if (obj->IsNull()) + result << " Null"; + else + result << " Unknown"; + } + std::string result_str = result.str(); + if (!result_str.empty()) { + result_str.erase(result_str.begin()); // remove start space + } + return result_str; +} + +} // namespace + +TEST(CPDF_ObjectWalkerTest, Simple) { + EXPECT_EQ(Walk(pdfium::MakeUnique().get()), "Null"); + EXPECT_EQ(Walk(pdfium::MakeUnique().get()), "Dict"); + EXPECT_EQ(Walk(pdfium::MakeUnique().get()), "Arr"); + EXPECT_EQ(Walk(pdfium::MakeUnique().get()), "Str"); + EXPECT_EQ(Walk(pdfium::MakeUnique().get()), "Bool"); + EXPECT_EQ(Walk(pdfium::MakeUnique().get()), "Stream"); + EXPECT_EQ(Walk(pdfium::MakeUnique(nullptr, 0).get()), "Ref"); +} + +TEST(CPDF_ObjectWalkerTest, CombinedObject) { + auto dict = pdfium::MakeUnique(); + dict->SetFor("1", pdfium::MakeUnique()); + dict->SetFor("2", pdfium::MakeUnique()); + auto array = pdfium::MakeUnique(); + array->Add(pdfium::MakeUnique(nullptr, 0)); + array->Add(pdfium::MakeUnique()); + array->Add(pdfium::MakeUnique( + nullptr, 0, pdfium::MakeUnique())); + dict->SetFor("3", std::move(array)); + EXPECT_EQ(Walk(dict.get()), "Dict Str Bool Arr Ref Null Stream Dict"); +} + +TEST(CPDF_ObjectWalkerTest, GetParent) { + auto level_4 = pdfium::MakeUnique(); + auto level_3 = pdfium::MakeUnique(); + level_3->SetFor("AnyObj", std::move(level_4)); + auto level_2 = + pdfium::MakeUnique(nullptr, 0, std::move(level_3)); + auto level_1 = pdfium::MakeUnique(); + level_1->Add(std::move(level_2)); + auto level_0 = pdfium::MakeUnique(); + level_0->SetFor("Array", std::move(level_1)); + + // We have <>) ]>> + // In this case each step will increase depth. + // And on each step the prev object should be parent for current. + const CPDF_Object* cur_parent = nullptr; + CPDF_ObjectWalker walker(level_0.get()); + while (const CPDF_Object* obj = walker.GetNext()) { + EXPECT_EQ(cur_parent, walker.GetParent()); + cur_parent = obj; + } +} + +TEST(CPDF_ObjectWalkerTest, SkipWalkIntoCurrentObject) { + auto root_array = pdfium::MakeUnique(); + // Add 2 null objects into |root_array|. [ null1, null2 ] + root_array->AddNew(); + root_array->AddNew(); + // |root_array| will contain 4 null objects after this. + // [ null1, null2, [ null3, null4 ] ] + root_array->Add(root_array->Clone()); + + int non_array_objects = 0; + CPDF_ObjectWalker walker(root_array.get()); + while (const CPDF_Object* obj = walker.GetNext()) { + if (obj != root_array.get() && obj->IsArray()) { + // skip other array except root. + walker.SkipWalkIntoCurrentObject(); + } + if (!obj->IsArray()) + ++non_array_objects; + } + // 2 objects from child array should be skipped. + EXPECT_EQ(2, non_array_objects); +} + +TEST(CPDF_ObjectWalkerTest, DictionaryKey) { + auto dict = pdfium::MakeUnique(); + dict->SetFor("1", pdfium::MakeUnique()); + dict->SetFor("2", pdfium::MakeUnique()); + dict->SetFor("3", pdfium::MakeUnique()); + dict->SetFor("4", pdfium::MakeUnique()); + dict->SetFor("5", pdfium::MakeUnique()); + + CPDF_ObjectWalker walker(dict.get()); + while (const CPDF_Object* obj = walker.GetNext()) { + if (obj == dict.get()) { + // Ignore root dictinary object + continue; + } + // Test that, dictionary key is correct. + EXPECT_EQ(walker.GetParent()->AsDictionary()->GetObjectFor( + walker.dictionary_key()), + obj); + } +} -- cgit v1.2.3