summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArtem Strygin <art-snake@yandex-team.ru>2017-08-14 23:35:52 +0300
committerChromium commit bot <commit-bot@chromium.org>2017-08-14 22:30:41 +0000
commitf57cad4f5dd0436d5b207d362afb34fc5f3f9acb (patch)
tree6beef6ce505b2baf0d976e545a5ac8a625e5ff95
parent93c886b7ce59f6e65fe885330558c52f51cfcab9 (diff)
downloadpdfium-f57cad4f5dd0436d5b207d362afb34fc5f3f9acb.tar.xz
Add CPDF_ObjectWalker.
It is allow us to walk on all non-null sub-objects in an object in depth, include itself, Change-Id: Ia23051073984411668112422b47cf7a4460aa078 Reviewed-on: https://pdfium-review.googlesource.com/8910 Commit-Queue: Art Snake <art-snake@yandex-team.ru> Reviewed-by: dsinclair <dsinclair@chromium.org>
-rw-r--r--BUILD.gn3
-rw-r--r--core/fpdfapi/parser/cpdf_null.cpp4
-rw-r--r--core/fpdfapi/parser/cpdf_null.h1
-rw-r--r--core/fpdfapi/parser/cpdf_object.cpp4
-rw-r--r--core/fpdfapi/parser/cpdf_object.h1
-rw-r--r--core/fpdfapi/parser/cpdf_object_walker.cpp165
-rw-r--r--core/fpdfapi/parser/cpdf_object_walker.h69
-rw-r--r--core/fpdfapi/parser/cpdf_object_walker_unittest.cpp142
8 files changed, 389 insertions, 0 deletions
diff --git a/BUILD.gn b/BUILD.gn
index 93196a491a..bfe38046a5 100644
--- a/BUILD.gn
+++ b/BUILD.gn
@@ -604,6 +604,8 @@ static_library("fpdfapi") {
"core/fpdfapi/parser/cpdf_number.h",
"core/fpdfapi/parser/cpdf_object.cpp",
"core/fpdfapi/parser/cpdf_object.h",
+ "core/fpdfapi/parser/cpdf_object_walker.cpp",
+ "core/fpdfapi/parser/cpdf_object_walker.h",
"core/fpdfapi/parser/cpdf_parser.cpp",
"core/fpdfapi/parser/cpdf_parser.h",
"core/fpdfapi/parser/cpdf_read_validator.cpp",
@@ -1897,6 +1899,7 @@ test("pdfium_unittests") {
"core/fpdfapi/parser/cpdf_array_unittest.cpp",
"core/fpdfapi/parser/cpdf_document_unittest.cpp",
"core/fpdfapi/parser/cpdf_object_unittest.cpp",
+ "core/fpdfapi/parser/cpdf_object_walker_unittest.cpp",
"core/fpdfapi/parser/cpdf_parser_unittest.cpp",
"core/fpdfapi/parser/cpdf_read_validator_unittest.cpp",
"core/fpdfapi/parser/cpdf_simple_parser_unittest.cpp",
diff --git a/core/fpdfapi/parser/cpdf_null.cpp b/core/fpdfapi/parser/cpdf_null.cpp
index 254c86fd7a..7c1cdac486 100644
--- a/core/fpdfapi/parser/cpdf_null.cpp
+++ b/core/fpdfapi/parser/cpdf_null.cpp
@@ -20,3 +20,7 @@ std::unique_ptr<CPDF_Object> CPDF_Null::Clone() const {
bool CPDF_Null::WriteTo(IFX_ArchiveStream* archive) const {
return archive->WriteString(" null");
}
+
+bool CPDF_Null::IsNull() const {
+ return true;
+}
diff --git a/core/fpdfapi/parser/cpdf_null.h b/core/fpdfapi/parser/cpdf_null.h
index 92917281fb..2ec05ec82c 100644
--- a/core/fpdfapi/parser/cpdf_null.h
+++ b/core/fpdfapi/parser/cpdf_null.h
@@ -19,6 +19,7 @@ class CPDF_Null : public CPDF_Object {
Type GetType() const override;
std::unique_ptr<CPDF_Object> Clone() const override;
bool WriteTo(IFX_ArchiveStream* archive) const override;
+ bool IsNull() const override;
};
#endif // CORE_FPDFAPI_PARSER_CPDF_NULL_H_
diff --git a/core/fpdfapi/parser/cpdf_object.cpp b/core/fpdfapi/parser/cpdf_object.cpp
index 82c3b09416..b35c6e5b6f 100644
--- a/core/fpdfapi/parser/cpdf_object.cpp
+++ b/core/fpdfapi/parser/cpdf_object.cpp
@@ -95,6 +95,10 @@ bool CPDF_Object::IsString() const {
return false;
}
+bool CPDF_Object::IsNull() const {
+ return false;
+}
+
CPDF_Array* CPDF_Object::AsArray() {
return nullptr;
}
diff --git a/core/fpdfapi/parser/cpdf_object.h b/core/fpdfapi/parser/cpdf_object.h
index b0c1adf955..04b77ec706 100644
--- a/core/fpdfapi/parser/cpdf_object.h
+++ b/core/fpdfapi/parser/cpdf_object.h
@@ -71,6 +71,7 @@ class CPDF_Object {
virtual bool IsReference() const;
virtual bool IsStream() const;
virtual bool IsString() const;
+ virtual bool IsNull() const;
virtual CPDF_Array* AsArray();
virtual const CPDF_Array* AsArray() const;
diff --git a/core/fpdfapi/parser/cpdf_object_walker.cpp b/core/fpdfapi/parser/cpdf_object_walker.cpp
new file mode 100644
index 0000000000..c6e0f0091d
--- /dev/null
+++ b/core/fpdfapi/parser/cpdf_object_walker.cpp
@@ -0,0 +1,165 @@
+// Copyright 2017 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "core/fpdfapi/parser/cpdf_object_walker.h"
+
+#include <utility>
+
+#include "core/fpdfapi/parser/cpdf_array.h"
+#include "core/fpdfapi/parser/cpdf_dictionary.h"
+#include "core/fpdfapi/parser/cpdf_stream.h"
+
+namespace {
+
+class StreamIterator : public CPDF_ObjectWalker::SubobjectIterator {
+ public:
+ explicit StreamIterator(const CPDF_Stream* stream)
+ : SubobjectIterator(stream) {}
+ ~StreamIterator() override {}
+
+ bool IsFinished() const override { return IsStarted() && is_finished_; }
+
+ const CPDF_Object* IncrementImpl() override {
+ ASSERT(IsStarted());
+ ASSERT(!IsFinished());
+ is_finished_ = true;
+ return object()->GetDict();
+ }
+
+ void Start() override {}
+
+ private:
+ bool is_finished_ = false;
+};
+
+class DictionaryIterator : public CPDF_ObjectWalker::SubobjectIterator {
+ public:
+ explicit DictionaryIterator(const CPDF_Dictionary* dictionary)
+ : SubobjectIterator(dictionary) {}
+ ~DictionaryIterator() override {}
+
+ bool IsFinished() const override {
+ return IsStarted() && dict_iterator_ == object()->GetDict()->end();
+ }
+
+ const CPDF_Object* IncrementImpl() override {
+ ASSERT(IsStarted());
+ ASSERT(!IsFinished());
+ const CPDF_Object* result = dict_iterator_->second.get();
+ dict_key_ = dict_iterator_->first;
+ ++dict_iterator_;
+ return result;
+ }
+
+ void Start() override {
+ ASSERT(!IsStarted());
+ dict_iterator_ = object()->GetDict()->begin();
+ }
+
+ const CFX_ByteString& dict_key() const { return dict_key_; }
+
+ private:
+ CPDF_Dictionary::const_iterator dict_iterator_;
+ CFX_ByteString dict_key_;
+};
+
+class ArrayIterator : public CPDF_ObjectWalker::SubobjectIterator {
+ public:
+ explicit ArrayIterator(const CPDF_Array* array) : SubobjectIterator(array) {}
+
+ ~ArrayIterator() override {}
+
+ bool IsFinished() const override {
+ return IsStarted() && arr_iterator_ == object()->AsArray()->end();
+ }
+
+ const CPDF_Object* IncrementImpl() override {
+ ASSERT(IsStarted());
+ ASSERT(!IsFinished());
+ const CPDF_Object* result = arr_iterator_->get();
+ ++arr_iterator_;
+ return result;
+ }
+
+ void Start() override { arr_iterator_ = object()->AsArray()->begin(); }
+
+ public:
+ CPDF_Array::const_iterator arr_iterator_;
+};
+
+} // namespace
+
+CPDF_ObjectWalker::SubobjectIterator::~SubobjectIterator() {}
+
+const CPDF_Object* CPDF_ObjectWalker::SubobjectIterator::Increment() {
+ if (!IsStarted()) {
+ Start();
+ is_started_ = true;
+ }
+ while (!IsFinished()) {
+ const CPDF_Object* result = IncrementImpl();
+ if (result)
+ return result;
+ }
+ return nullptr;
+}
+
+CPDF_ObjectWalker::SubobjectIterator::SubobjectIterator(
+ const CPDF_Object* object)
+ : object_(object) {
+ ASSERT(object_);
+}
+
+// static
+std::unique_ptr<CPDF_ObjectWalker::SubobjectIterator>
+CPDF_ObjectWalker::MakeIterator(const CPDF_Object* object) {
+ if (object->IsStream())
+ return pdfium::MakeUnique<StreamIterator>(object->AsStream());
+ if (object->IsDictionary())
+ return pdfium::MakeUnique<DictionaryIterator>(object->AsDictionary());
+ if (object->IsArray())
+ return pdfium::MakeUnique<ArrayIterator>(object->AsArray());
+ return nullptr;
+}
+
+CPDF_ObjectWalker::CPDF_ObjectWalker(const CPDF_Object* root)
+ : next_object_(root), parent_object_(nullptr), current_depth_(0) {}
+
+CPDF_ObjectWalker::~CPDF_ObjectWalker() {}
+
+const CPDF_Object* CPDF_ObjectWalker::GetNext() {
+ while (!stack_.empty() || next_object_) {
+ if (next_object_) {
+ auto new_iterator = MakeIterator(next_object_);
+ if (new_iterator) {
+ // Schedule walk within composite objects.
+ stack_.push(std::move(new_iterator));
+ }
+ auto* result = next_object_;
+ next_object_ = nullptr;
+ return result;
+ }
+
+ SubobjectIterator* it = stack_.top().get();
+ if (it->IsFinished()) {
+ stack_.pop();
+ } else {
+ next_object_ = it->Increment();
+ parent_object_ = it->object();
+ dict_key_ = parent_object_->IsDictionary()
+ ? static_cast<DictionaryIterator*>(it)->dict_key()
+ : CFX_ByteString();
+ current_depth_ = stack_.size();
+ }
+ }
+ dict_key_ = CFX_ByteString();
+ current_depth_ = 0;
+ return nullptr;
+}
+
+void CPDF_ObjectWalker::SkipWalkIntoCurrentObject() {
+ if (stack_.empty() || stack_.top()->IsStarted())
+ return;
+ stack_.pop();
+}
diff --git a/core/fpdfapi/parser/cpdf_object_walker.h b/core/fpdfapi/parser/cpdf_object_walker.h
new file mode 100644
index 0000000000..5590440f8f
--- /dev/null
+++ b/core/fpdfapi/parser/cpdf_object_walker.h
@@ -0,0 +1,69 @@
+// Copyright 2017 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CORE_FPDFAPI_PARSER_CPDF_OBJECT_WALKER_H_
+#define CORE_FPDFAPI_PARSER_CPDF_OBJECT_WALKER_H_
+
+#include <memory>
+#include <stack>
+
+#include "core/fpdfapi/parser/cpdf_dictionary.h"
+
+// Walk on all non-null sub-objects in an object in depth, include itself,
+// like in flat list.
+class CPDF_ObjectWalker {
+ public:
+ class SubobjectIterator {
+ public:
+ virtual ~SubobjectIterator();
+ bool IsStarted() const { return is_started_; }
+ bool virtual IsFinished() const = 0;
+ const CPDF_Object* Increment();
+ const CPDF_Object* object() const { return object_; }
+
+ protected:
+ explicit SubobjectIterator(const CPDF_Object* object);
+
+ virtual const CPDF_Object* IncrementImpl() = 0;
+ virtual void Start() = 0;
+
+ private:
+ const CPDF_Object* object_;
+ bool is_started_ = false;
+ };
+
+ explicit CPDF_ObjectWalker(const CPDF_Object* root);
+ ~CPDF_ObjectWalker();
+
+ const CPDF_Object* GetNext();
+ void SkipWalkIntoCurrentObject();
+
+ size_t current_depth() const { return current_depth_; }
+ const CPDF_Object* GetParent() const { return parent_object_; }
+ const CFX_ByteString& dictionary_key() const { return dict_key_; }
+
+ private:
+ static std::unique_ptr<SubobjectIterator> MakeIterator(
+ const CPDF_Object* object);
+
+ const CPDF_Object* next_object_;
+ const CPDF_Object* parent_object_;
+
+ CFX_ByteString dict_key_;
+ size_t current_depth_;
+
+ std::stack<std::unique_ptr<SubobjectIterator>> stack_;
+};
+
+class CPDF_NonConstObjectWalker : public CPDF_ObjectWalker {
+ public:
+ explicit CPDF_NonConstObjectWalker(CPDF_Object* root)
+ : CPDF_ObjectWalker(root) {}
+
+ CPDF_Object* GetNext() {
+ return const_cast<CPDF_Object*>(CPDF_ObjectWalker::GetNext());
+ }
+};
+
+#endif // CORE_FPDFAPI_PARSER_CPDF_OBJECT_WALKER_H_
diff --git a/core/fpdfapi/parser/cpdf_object_walker_unittest.cpp b/core/fpdfapi/parser/cpdf_object_walker_unittest.cpp
new file mode 100644
index 0000000000..66c559d3ca
--- /dev/null
+++ b/core/fpdfapi/parser/cpdf_object_walker_unittest.cpp
@@ -0,0 +1,142 @@
+// Copyright 2017 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "core/fpdfapi/parser/cpdf_object_walker.h"
+
+#include <sstream>
+#include <string>
+#include <utility>
+
+#include "core/fpdfapi/parser/cpdf_array.h"
+#include "core/fpdfapi/parser/cpdf_boolean.h"
+#include "core/fpdfapi/parser/cpdf_dictionary.h"
+#include "core/fpdfapi/parser/cpdf_name.h"
+#include "core/fpdfapi/parser/cpdf_null.h"
+#include "core/fpdfapi/parser/cpdf_number.h"
+#include "core/fpdfapi/parser/cpdf_reference.h"
+#include "core/fpdfapi/parser/cpdf_stream.h"
+#include "core/fpdfapi/parser/cpdf_string.h"
+#include "testing/gtest/include/gtest/gtest.h"
+#include "third_party/base/ptr_util.h"
+
+namespace {
+
+std::string Walk(CPDF_Object* object) {
+ std::ostringstream result;
+ CPDF_ObjectWalker walker(object);
+ while (const CPDF_Object* obj = walker.GetNext()) {
+ if (obj->IsDictionary())
+ result << " Dict";
+ else if (obj->IsArray())
+ result << " Arr";
+ else if (obj->IsString())
+ result << " Str";
+ else if (obj->IsBoolean())
+ result << " Bool";
+ else if (obj->IsStream())
+ result << " Stream";
+ else if (obj->IsReference())
+ result << " Ref";
+ else if (obj->IsNull())
+ result << " Null";
+ else
+ result << " Unknown";
+ }
+ std::string result_str = result.str();
+ if (!result_str.empty()) {
+ result_str.erase(result_str.begin()); // remove start space
+ }
+ return result_str;
+}
+
+} // namespace
+
+TEST(CPDF_ObjectWalkerTest, Simple) {
+ EXPECT_EQ(Walk(pdfium::MakeUnique<CPDF_Null>().get()), "Null");
+ EXPECT_EQ(Walk(pdfium::MakeUnique<CPDF_Dictionary>().get()), "Dict");
+ EXPECT_EQ(Walk(pdfium::MakeUnique<CPDF_Array>().get()), "Arr");
+ EXPECT_EQ(Walk(pdfium::MakeUnique<CPDF_String>().get()), "Str");
+ EXPECT_EQ(Walk(pdfium::MakeUnique<CPDF_Boolean>().get()), "Bool");
+ EXPECT_EQ(Walk(pdfium::MakeUnique<CPDF_Stream>().get()), "Stream");
+ EXPECT_EQ(Walk(pdfium::MakeUnique<CPDF_Reference>(nullptr, 0).get()), "Ref");
+}
+
+TEST(CPDF_ObjectWalkerTest, CombinedObject) {
+ auto dict = pdfium::MakeUnique<CPDF_Dictionary>();
+ dict->SetFor("1", pdfium::MakeUnique<CPDF_String>());
+ dict->SetFor("2", pdfium::MakeUnique<CPDF_Boolean>());
+ auto array = pdfium::MakeUnique<CPDF_Array>();
+ array->Add(pdfium::MakeUnique<CPDF_Reference>(nullptr, 0));
+ array->Add(pdfium::MakeUnique<CPDF_Null>());
+ array->Add(pdfium::MakeUnique<CPDF_Stream>(
+ nullptr, 0, pdfium::MakeUnique<CPDF_Dictionary>()));
+ dict->SetFor("3", std::move(array));
+ EXPECT_EQ(Walk(dict.get()), "Dict Str Bool Arr Ref Null Stream Dict");
+}
+
+TEST(CPDF_ObjectWalkerTest, GetParent) {
+ auto level_4 = pdfium::MakeUnique<CPDF_Null>();
+ auto level_3 = pdfium::MakeUnique<CPDF_Dictionary>();
+ level_3->SetFor("AnyObj", std::move(level_4));
+ auto level_2 =
+ pdfium::MakeUnique<CPDF_Stream>(nullptr, 0, std::move(level_3));
+ auto level_1 = pdfium::MakeUnique<CPDF_Array>();
+ level_1->Add(std::move(level_2));
+ auto level_0 = pdfium::MakeUnique<CPDF_Dictionary>();
+ level_0->SetFor("Array", std::move(level_1));
+
+ // We have <</Array [ stream( << /AnyObj null >>) ]>>
+ // In this case each step will increase depth.
+ // And on each step the prev object should be parent for current.
+ const CPDF_Object* cur_parent = nullptr;
+ CPDF_ObjectWalker walker(level_0.get());
+ while (const CPDF_Object* obj = walker.GetNext()) {
+ EXPECT_EQ(cur_parent, walker.GetParent());
+ cur_parent = obj;
+ }
+}
+
+TEST(CPDF_ObjectWalkerTest, SkipWalkIntoCurrentObject) {
+ auto root_array = pdfium::MakeUnique<CPDF_Array>();
+ // Add 2 null objects into |root_array|. [ null1, null2 ]
+ root_array->AddNew<CPDF_Null>();
+ root_array->AddNew<CPDF_Null>();
+ // |root_array| will contain 4 null objects after this.
+ // [ null1, null2, [ null3, null4 ] ]
+ root_array->Add(root_array->Clone());
+
+ int non_array_objects = 0;
+ CPDF_ObjectWalker walker(root_array.get());
+ while (const CPDF_Object* obj = walker.GetNext()) {
+ if (obj != root_array.get() && obj->IsArray()) {
+ // skip other array except root.
+ walker.SkipWalkIntoCurrentObject();
+ }
+ if (!obj->IsArray())
+ ++non_array_objects;
+ }
+ // 2 objects from child array should be skipped.
+ EXPECT_EQ(2, non_array_objects);
+}
+
+TEST(CPDF_ObjectWalkerTest, DictionaryKey) {
+ auto dict = pdfium::MakeUnique<CPDF_Dictionary>();
+ dict->SetFor("1", pdfium::MakeUnique<CPDF_Null>());
+ dict->SetFor("2", pdfium::MakeUnique<CPDF_Null>());
+ dict->SetFor("3", pdfium::MakeUnique<CPDF_Null>());
+ dict->SetFor("4", pdfium::MakeUnique<CPDF_Null>());
+ dict->SetFor("5", pdfium::MakeUnique<CPDF_Null>());
+
+ CPDF_ObjectWalker walker(dict.get());
+ while (const CPDF_Object* obj = walker.GetNext()) {
+ if (obj == dict.get()) {
+ // Ignore root dictinary object
+ continue;
+ }
+ // Test that, dictionary key is correct.
+ EXPECT_EQ(walker.GetParent()->AsDictionary()->GetObjectFor(
+ walker.dictionary_key()),
+ obj);
+ }
+}