summaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
Diffstat (limited to 'core')
-rw-r--r--core/fpdfdoc/cpdf_structelement.cpp (renamed from core/fpdfdoc/doc_tagged.cpp)312
-rw-r--r--core/fpdfdoc/cpdf_structelement.h (renamed from core/fpdfdoc/tagged_int.h)73
-rw-r--r--core/fpdfdoc/cpdf_structtree.cpp155
-rw-r--r--core/fpdfdoc/cpdf_structtree.h51
-rw-r--r--core/fpdfdoc/fpdf_tagged.h75
5 files changed, 312 insertions, 354 deletions
diff --git a/core/fpdfdoc/doc_tagged.cpp b/core/fpdfdoc/cpdf_structelement.cpp
index 418fab411a..418f75b3a9 100644
--- a/core/fpdfdoc/doc_tagged.cpp
+++ b/core/fpdfdoc/cpdf_structelement.cpp
@@ -1,33 +1,49 @@
-// Copyright 2014 PDFium Authors. All rights reserved.
+// Copyright 2017 PDFium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
-#include <map>
-#include <memory>
-#include <utility>
+#include "core/fpdfdoc/cpdf_structelement.h"
#include "core/fpdfapi/parser/cpdf_array.h"
#include "core/fpdfapi/parser/cpdf_dictionary.h"
-#include "core/fpdfapi/parser/cpdf_document.h"
#include "core/fpdfapi/parser/cpdf_name.h"
#include "core/fpdfapi/parser/cpdf_number.h"
+#include "core/fpdfapi/parser/cpdf_object.h"
#include "core/fpdfapi/parser/cpdf_reference.h"
#include "core/fpdfapi/parser/cpdf_stream.h"
-#include "core/fpdfdoc/cpdf_numbertree.h"
-#include "core/fpdfdoc/fpdf_tagged.h"
-#include "core/fpdfdoc/tagged_int.h"
-#include "third_party/base/ptr_util.h"
+#include "core/fpdfdoc/cpdf_structtree.h"
+#include "third_party/base/stl_util.h"
namespace {
const int nMaxRecursion = 32;
-bool IsTagged(const CPDF_Document* pDoc) {
- CPDF_Dictionary* pCatalog = pDoc->GetRoot();
- CPDF_Dictionary* pMarkInfo = pCatalog->GetDictFor("MarkInfo");
- return pMarkInfo && pMarkInfo->GetIntegerFor("Marked");
+CPDF_Dictionary* FindAttrDict(CPDF_Object* pAttrs,
+ const CFX_ByteStringC& owner,
+ float nLevel = 0.0F) {
+ if (nLevel > nMaxRecursion)
+ return nullptr;
+ if (!pAttrs)
+ return nullptr;
+
+ CPDF_Dictionary* pDict = nullptr;
+ if (pAttrs->IsDictionary()) {
+ pDict = pAttrs->AsDictionary();
+ } else if (CPDF_Stream* pStream = pAttrs->AsStream()) {
+ pDict = pStream->GetDict();
+ } else if (CPDF_Array* pArray = pAttrs->AsArray()) {
+ for (uint32_t i = 0; i < pArray->GetCount(); i++) {
+ CPDF_Object* pElement = pArray->GetDirectObjectAt(i);
+ pDict = FindAttrDict(pElement, owner, nLevel + 1);
+ if (pDict)
+ return pDict;
+ }
+ }
+ if (pDict && pDict->GetStringFor("O") == owner)
+ return pDict;
+ return nullptr;
}
} // namespace
@@ -41,134 +57,7 @@ CPDF_StructKid::CPDF_StructKid()
CPDF_StructKid::CPDF_StructKid(const CPDF_StructKid& that) = default;
-CPDF_StructKid::~CPDF_StructKid() {}
-
-// static
-std::unique_ptr<IPDF_StructTree> IPDF_StructTree::LoadPage(
- const CPDF_Document* pDoc,
- const CPDF_Dictionary* pPageDict) {
- if (!IsTagged(pDoc))
- return nullptr;
-
- auto pTree = pdfium::MakeUnique<CPDF_StructTree>(pDoc);
- pTree->LoadPageTree(pPageDict);
- return std::move(pTree);
-}
-
-CPDF_StructTree::CPDF_StructTree(const CPDF_Document* pDoc)
- : m_pTreeRoot(pDoc->GetRoot()->GetDictFor("StructTreeRoot")),
- m_pRoleMap(m_pTreeRoot ? m_pTreeRoot->GetDictFor("RoleMap") : nullptr),
- m_pPage(nullptr) {}
-
-CPDF_StructTree::~CPDF_StructTree() {}
-
-int CPDF_StructTree::CountTopElements() const {
- return pdfium::CollectionSize<int>(m_Kids);
-}
-
-IPDF_StructElement* CPDF_StructTree::GetTopElement(int i) const {
- return m_Kids[i].Get();
-}
-
-void CPDF_StructTree::LoadPageTree(const CPDF_Dictionary* pPageDict) {
- m_pPage = pPageDict;
- if (!m_pTreeRoot)
- return;
-
- CPDF_Object* pKids = m_pTreeRoot->GetDirectObjectFor("K");
- if (!pKids)
- return;
-
- uint32_t dwKids = 0;
- if (pKids->IsDictionary())
- dwKids = 1;
- else if (CPDF_Array* pArray = pKids->AsArray())
- dwKids = pArray->GetCount();
- else
- return;
-
- m_Kids.clear();
- m_Kids.resize(dwKids);
- CPDF_Dictionary* pParentTree = m_pTreeRoot->GetDictFor("ParentTree");
- if (!pParentTree)
- return;
-
- CPDF_NumberTree parent_tree(pParentTree);
- int parents_id = pPageDict->GetIntegerFor("StructParents", -1);
- if (parents_id < 0)
- return;
-
- CPDF_Array* pParentArray = ToArray(parent_tree.LookupValue(parents_id));
- if (!pParentArray)
- return;
-
- std::map<CPDF_Dictionary*, CFX_RetainPtr<CPDF_StructElement>> element_map;
- for (size_t i = 0; i < pParentArray->GetCount(); i++) {
- if (CPDF_Dictionary* pParent = pParentArray->GetDictAt(i))
- AddPageNode(pParent, &element_map);
- }
-}
-
-CFX_RetainPtr<CPDF_StructElement> CPDF_StructTree::AddPageNode(
- CPDF_Dictionary* pDict,
- std::map<CPDF_Dictionary*, CFX_RetainPtr<CPDF_StructElement>>* map,
- int nLevel) {
- if (nLevel > nMaxRecursion)
- return nullptr;
-
- auto it = map->find(pDict);
- if (it != map->end())
- return it->second;
-
- auto pElement = pdfium::MakeRetain<CPDF_StructElement>(this, nullptr, pDict);
- (*map)[pDict] = pElement;
- CPDF_Dictionary* pParent = pDict->GetDictFor("P");
- if (!pParent || pParent->GetStringFor("Type") == "StructTreeRoot") {
- if (!AddTopLevelNode(pDict, pElement))
- map->erase(pDict);
- return pElement;
- }
-
- CFX_RetainPtr<CPDF_StructElement> pParentElement =
- AddPageNode(pParent, map, nLevel + 1);
- bool bSave = false;
- for (CPDF_StructKid& kid : *pParentElement->GetKids()) {
- if (kid.m_Type == CPDF_StructKid::Element && kid.m_pDict == pDict) {
- kid.m_pElement = pElement;
- bSave = true;
- }
- }
- if (!bSave)
- map->erase(pDict);
- return pElement;
-}
-
-bool CPDF_StructTree::AddTopLevelNode(
- CPDF_Dictionary* pDict,
- const CFX_RetainPtr<CPDF_StructElement>& pElement) {
- CPDF_Object* pObj = m_pTreeRoot->GetDirectObjectFor("K");
- if (!pObj)
- return false;
-
- if (pObj->IsDictionary()) {
- if (pObj->GetObjNum() != pDict->GetObjNum())
- return false;
- m_Kids[0] = pElement;
- }
- if (CPDF_Array* pTopKids = pObj->AsArray()) {
- bool bSave = false;
- for (size_t i = 0; i < pTopKids->GetCount(); i++) {
- CPDF_Reference* pKidRef = ToReference(pTopKids->GetObjectAt(i));
- if (pKidRef && pKidRef->GetRefObjNum() == pDict->GetObjNum()) {
- m_Kids[i] = pElement;
- bSave = true;
- }
- }
- if (!bSave)
- return false;
- }
- return true;
-}
+CPDF_StructKid::~CPDF_StructKid() = default;
CPDF_StructElement::CPDF_StructElement(CPDF_StructTree* pTree,
CPDF_StructElement* pParent,
@@ -177,43 +66,26 @@ CPDF_StructElement::CPDF_StructElement(CPDF_StructTree* pTree,
m_pParent(pParent),
m_pDict(pDict),
m_Type(pDict->GetStringFor("S")) {
- if (pTree->m_pRoleMap) {
- CFX_ByteString mapped = pTree->m_pRoleMap->GetStringFor(m_Type);
+ if (pTree->GetRoleMap()) {
+ CFX_ByteString mapped = pTree->GetRoleMap()->GetStringFor(m_Type);
if (!mapped.IsEmpty())
m_Type = mapped;
}
LoadKids(pDict);
}
-IPDF_StructTree* CPDF_StructElement::GetTree() const {
- return m_pTree;
-}
-
-const CFX_ByteString& CPDF_StructElement::GetType() const {
- return m_Type;
-}
-
-IPDF_StructElement* CPDF_StructElement::GetParent() const {
- return m_pParent;
-}
-
-CPDF_Dictionary* CPDF_StructElement::GetDict() const {
- return m_pDict;
-}
+CPDF_StructElement::~CPDF_StructElement() = default;
int CPDF_StructElement::CountKids() const {
return pdfium::CollectionSize<int>(m_Kids);
}
-IPDF_StructElement* CPDF_StructElement::GetKidIfElement(int index) const {
- if (m_Kids[index].m_Type != CPDF_StructKid::Element)
- return nullptr;
-
- return m_Kids[index].m_pElement.Get();
+CPDF_StructElement* CPDF_StructElement::GetKidIfElement(int index) const {
+ return m_Kids[index].m_Type == CPDF_StructKid::Element
+ ? m_Kids[index].m_pElement.Get()
+ : nullptr;
}
-CPDF_StructElement::~CPDF_StructElement() {}
-
void CPDF_StructElement::LoadKids(CPDF_Dictionary* pDict) {
CPDF_Object* pObj = pDict->GetObjectFor("Pg");
uint32_t PageObjNum = 0;
@@ -231,11 +103,13 @@ void CPDF_StructElement::LoadKids(CPDF_Dictionary* pDict) {
CPDF_Object* pKid = pArray->GetDirectObjectAt(i);
LoadKid(PageObjNum, pKid, &m_Kids[i]);
}
- } else {
- m_Kids.resize(1);
- LoadKid(PageObjNum, pKids, &m_Kids[0]);
+ return;
}
+
+ m_Kids.resize(1);
+ LoadKid(PageObjNum, pKids, &m_Kids[0]);
}
+
void CPDF_StructElement::LoadKid(uint32_t PageObjNum,
CPDF_Object* pKidObj,
CPDF_StructKid* pKid) {
@@ -244,9 +118,9 @@ void CPDF_StructElement::LoadKid(uint32_t PageObjNum,
return;
if (pKidObj->IsNumber()) {
- if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) {
+ if (m_pTree->GetPage() && m_pTree->GetPage()->GetObjNum() != PageObjNum)
return;
- }
+
pKid->m_Type = CPDF_StructKid::PageContent;
pKid->m_ContentId = pKidObj->GetInteger();
pKid->m_PageObjNum = PageObjNum;
@@ -256,96 +130,70 @@ void CPDF_StructElement::LoadKid(uint32_t PageObjNum,
CPDF_Dictionary* pKidDict = pKidObj->AsDictionary();
if (!pKidDict)
return;
-
if (CPDF_Reference* pRef = ToReference(pKidDict->GetObjectFor("Pg")))
PageObjNum = pRef->GetRefObjNum();
CFX_ByteString type = pKidDict->GetStringFor("Type");
+ if ((type == "MCR" || type == "OBJR") && m_pTree->GetPage() &&
+ m_pTree->GetPage()->GetObjNum() != PageObjNum) {
+ return;
+ }
+
if (type == "MCR") {
- if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) {
- return;
- }
pKid->m_Type = CPDF_StructKid::StreamContent;
CPDF_Reference* pRef = ToReference(pKidDict->GetObjectFor("Stm"));
pKid->m_RefObjNum = pRef ? pRef->GetRefObjNum() : 0;
pKid->m_PageObjNum = PageObjNum;
pKid->m_ContentId = pKidDict->GetIntegerFor("MCID");
- } else if (type == "OBJR") {
- if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) {
- return;
- }
+ return;
+ }
+
+ if (type == "OBJR") {
pKid->m_Type = CPDF_StructKid::Object;
CPDF_Reference* pObj = ToReference(pKidDict->GetObjectFor("Obj"));
pKid->m_RefObjNum = pObj ? pObj->GetRefObjNum() : 0;
pKid->m_PageObjNum = PageObjNum;
- } else {
- pKid->m_Type = CPDF_StructKid::Element;
- pKid->m_pDict = pKidDict;
- if (!m_pTree->m_pPage) {
- pKid->m_pElement =
- pdfium::MakeRetain<CPDF_StructElement>(m_pTree, this, pKidDict);
- } else {
- pKid->m_pElement = nullptr;
- }
+ return;
}
-}
-static CPDF_Dictionary* FindAttrDict(CPDF_Object* pAttrs,
- const CFX_ByteStringC& owner,
- float nLevel = 0.0F) {
- if (nLevel > nMaxRecursion)
- return nullptr;
- if (!pAttrs)
- return nullptr;
- CPDF_Dictionary* pDict = nullptr;
- if (pAttrs->IsDictionary()) {
- pDict = pAttrs->AsDictionary();
- } else if (CPDF_Stream* pStream = pAttrs->AsStream()) {
- pDict = pStream->GetDict();
- } else if (CPDF_Array* pArray = pAttrs->AsArray()) {
- for (uint32_t i = 0; i < pArray->GetCount(); i++) {
- CPDF_Object* pElement = pArray->GetDirectObjectAt(i);
- pDict = FindAttrDict(pElement, owner, nLevel + 1);
- if (pDict)
- return pDict;
- }
+ pKid->m_Type = CPDF_StructKid::Element;
+ pKid->m_pDict = pKidDict;
+ if (m_pTree->GetPage()) {
+ pKid->m_pElement = nullptr;
+ return;
}
- if (pDict && pDict->GetStringFor("O") == owner)
- return pDict;
- return nullptr;
+
+ pKid->m_pElement =
+ pdfium::MakeRetain<CPDF_StructElement>(m_pTree, this, pKidDict);
}
+
CPDF_Object* CPDF_StructElement::GetAttr(const CFX_ByteStringC& owner,
const CFX_ByteStringC& name,
bool bInheritable,
float fLevel) {
- if (fLevel > nMaxRecursion) {
+ if (fLevel > nMaxRecursion)
return nullptr;
- }
+
if (bInheritable) {
- CPDF_Object* pAttr = GetAttr(owner, name, false);
- if (pAttr) {
+ if (CPDF_Object* pAttr = GetAttr(owner, name, false))
return pAttr;
- }
- if (!m_pParent) {
+ if (!m_pParent)
return nullptr;
- }
return m_pParent->GetAttr(owner, name, true, fLevel + 1);
}
- CPDF_Object* pA = m_pDict->GetDirectObjectFor("A");
- if (pA) {
- CPDF_Dictionary* pAttrDict = FindAttrDict(pA, owner);
- if (pAttrDict) {
- CPDF_Object* pAttr = pAttrDict->GetDirectObjectFor(CFX_ByteString(name));
- if (pAttr) {
- return pAttr;
- }
+
+ if (CPDF_Object* pA = m_pDict->GetDirectObjectFor("A")) {
+ if (CPDF_Dictionary* dict = FindAttrDict(pA, owner)) {
+ if (CPDF_Object* attr = dict->GetDirectObjectFor(CFX_ByteString(name)))
+ return attr;
}
}
+
CPDF_Object* pC = m_pDict->GetDirectObjectFor("C");
if (!pC)
return nullptr;
- CPDF_Dictionary* pClassMap = m_pTree->m_pTreeRoot->GetDictFor("ClassMap");
+ CPDF_Dictionary* pClassMap = m_pTree->GetTreeRoot()->GetDictFor("ClassMap");
if (!pClassMap)
return nullptr;
@@ -358,12 +206,14 @@ CPDF_Object* CPDF_StructElement::GetAttr(const CFX_ByteStringC& owner,
}
return nullptr;
}
+
CFX_ByteString class_name = pC->GetString();
CPDF_Dictionary* pClassDict = pClassMap->GetDictFor(class_name);
if (pClassDict && pClassDict->GetStringFor("O") == owner)
return pClassDict->GetDirectObjectFor(CFX_ByteString(name));
return nullptr;
}
+
CPDF_Object* CPDF_StructElement::GetAttr(const CFX_ByteStringC& owner,
const CFX_ByteStringC& name,
bool bInheritable,
@@ -372,11 +222,11 @@ CPDF_Object* CPDF_StructElement::GetAttr(const CFX_ByteStringC& owner,
CPDF_Array* pArray = ToArray(pAttr);
if (!pArray || subindex == -1)
return pAttr;
-
if (subindex >= static_cast<int>(pArray->GetCount()))
return pAttr;
return pArray->GetDirectObjectAt(subindex);
}
+
CFX_ByteString CPDF_StructElement::GetName(const CFX_ByteStringC& owner,
const CFX_ByteStringC& name,
const CFX_ByteStringC& default_value,
@@ -396,10 +246,11 @@ FX_ARGB CPDF_StructElement::GetColor(const CFX_ByteStringC& owner,
CPDF_Array* pArray = ToArray(GetAttr(owner, name, bInheritable, subindex));
if (!pArray)
return default_value;
- return 0xff000000 | ((int)(pArray->GetNumberAt(0) * 255) << 16) |
- ((int)(pArray->GetNumberAt(1) * 255) << 8) |
- (int)(pArray->GetNumberAt(2) * 255);
+ return 0xff000000 | (static_cast<int>(pArray->GetNumberAt(0) * 255) << 16) |
+ (static_cast<int>(pArray->GetNumberAt(1) * 255) << 8) |
+ static_cast<int>(pArray->GetNumberAt(2) * 255);
}
+
float CPDF_StructElement::GetNumber(const CFX_ByteStringC& owner,
const CFX_ByteStringC& name,
float default_value,
@@ -408,6 +259,7 @@ float CPDF_StructElement::GetNumber(const CFX_ByteStringC& owner,
CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex);
return ToNumber(pAttr) ? pAttr->GetNumber() : default_value;
}
+
int CPDF_StructElement::GetInteger(const CFX_ByteStringC& owner,
const CFX_ByteStringC& name,
int default_value,
diff --git a/core/fpdfdoc/tagged_int.h b/core/fpdfdoc/cpdf_structelement.h
index cafcbd42aa..b227397431 100644
--- a/core/fpdfdoc/tagged_int.h
+++ b/core/fpdfdoc/cpdf_structelement.h
@@ -1,23 +1,25 @@
-// Copyright 2014 PDFium Authors. All rights reserved.
+// Copyright 2017 PDFium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
-#ifndef CORE_FPDFDOC_TAGGED_INT_H_
-#define CORE_FPDFDOC_TAGGED_INT_H_
+#ifndef CORE_FPDFDOC_CPDF_STRUCTELEMENT_H_
+#define CORE_FPDFDOC_CPDF_STRUCTELEMENT_H_
-#include <map>
-#include <memory>
#include <vector>
-#include "core/fpdfdoc/fpdf_tagged.h"
#include "core/fxcrt/cfx_retain_ptr.h"
-#include "third_party/base/stl_util.h"
+#include "core/fxcrt/fx_string.h"
+#include "core/fxge/fx_dib.h"
+class CPDF_Dictionary;
+class CPDF_Object;
class CPDF_StructElement;
+class CPDF_StructTree;
-struct CPDF_StructKid {
+class CPDF_StructKid {
+ public:
CPDF_StructKid();
CPDF_StructKid(const CPDF_StructKid& that);
~CPDF_StructKid();
@@ -31,69 +33,42 @@ struct CPDF_StructKid {
uint32_t m_ContentId; // For PageContent, StreamContent.
};
-class CPDF_StructTree final : public IPDF_StructTree {
- public:
- explicit CPDF_StructTree(const CPDF_Document* pDoc);
- ~CPDF_StructTree() override;
-
- // IPDF_StructTree:
- int CountTopElements() const override;
- IPDF_StructElement* GetTopElement(int i) const override;
-
- void LoadPageTree(const CPDF_Dictionary* pPageDict);
- CFX_RetainPtr<CPDF_StructElement> AddPageNode(
- CPDF_Dictionary* pElement,
- std::map<CPDF_Dictionary*, CFX_RetainPtr<CPDF_StructElement>>* map,
- int nLevel = 0);
- bool AddTopLevelNode(CPDF_Dictionary* pDict,
- const CFX_RetainPtr<CPDF_StructElement>& pElement);
-
- protected:
- const CPDF_Dictionary* const m_pTreeRoot;
- const CPDF_Dictionary* const m_pRoleMap;
- const CPDF_Dictionary* m_pPage;
- std::vector<CFX_RetainPtr<CPDF_StructElement>> m_Kids;
-
- friend class CPDF_StructElement;
-};
-
-class CPDF_StructElement final : public CFX_Retainable,
- public IPDF_StructElement {
+class CPDF_StructElement : public CFX_Retainable {
public:
template <typename T, typename... Args>
friend CFX_RetainPtr<T> pdfium::MakeRetain(Args&&... args);
- // IPDF_StructElement
- IPDF_StructTree* GetTree() const override;
- const CFX_ByteString& GetType() const override;
- IPDF_StructElement* GetParent() const override;
- CPDF_Dictionary* GetDict() const override;
- int CountKids() const override;
- IPDF_StructElement* GetKidIfElement(int index) const override;
+ CPDF_StructTree* GetTree() const { return m_pTree; }
+ const CFX_ByteString& GetType() const { return m_Type; }
+ CPDF_StructElement* GetParent() const { return m_pParent; }
+ CPDF_Dictionary* GetDict() const { return m_pDict; }
+
+ int CountKids() const;
+ CPDF_StructElement* GetKidIfElement(int index) const;
CPDF_Object* GetAttr(const CFX_ByteStringC& owner,
const CFX_ByteStringC& name,
bool bInheritable = false,
- float fLevel = 0.0F) override;
+ float fLevel = 0.0F);
CFX_ByteString GetName(const CFX_ByteStringC& owner,
const CFX_ByteStringC& name,
const CFX_ByteStringC& default_value,
bool bInheritable = false,
- int subindex = -1) override;
+ int subindex = -1);
FX_ARGB GetColor(const CFX_ByteStringC& owner,
const CFX_ByteStringC& name,
FX_ARGB default_value,
bool bInheritable = false,
- int subindex = -1) override;
+ int subindex = -1);
float GetNumber(const CFX_ByteStringC& owner,
const CFX_ByteStringC& name,
float default_value,
bool bInheritable = false,
- int subindex = -1) override;
+ int subindex = -1);
int GetInteger(const CFX_ByteStringC& owner,
const CFX_ByteStringC& name,
int default_value,
bool bInheritable = false,
- int subindex = -1) override;
+ int subindex = -1);
std::vector<CPDF_StructKid>* GetKids() { return &m_Kids; }
void LoadKids(CPDF_Dictionary* pDict);
@@ -116,4 +91,4 @@ class CPDF_StructElement final : public CFX_Retainable,
std::vector<CPDF_StructKid> m_Kids;
};
-#endif // CORE_FPDFDOC_TAGGED_INT_H_
+#endif // CORE_FPDFDOC_CPDF_STRUCTELEMENT_H_
diff --git a/core/fpdfdoc/cpdf_structtree.cpp b/core/fpdfdoc/cpdf_structtree.cpp
new file mode 100644
index 0000000000..51ad2c775c
--- /dev/null
+++ b/core/fpdfdoc/cpdf_structtree.cpp
@@ -0,0 +1,155 @@
+// Copyright 2017 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
+
+#include "core/fpdfdoc/cpdf_structtree.h"
+
+#include "core/fpdfapi/parser/cpdf_array.h"
+#include "core/fpdfapi/parser/cpdf_document.h"
+#include "core/fpdfapi/parser/cpdf_number.h"
+#include "core/fpdfapi/parser/cpdf_reference.h"
+#include "core/fpdfdoc/cpdf_numbertree.h"
+#include "core/fpdfdoc/cpdf_structelement.h"
+#include "third_party/base/stl_util.h"
+
+namespace {
+
+const int nMaxRecursion = 32;
+
+bool IsTagged(const CPDF_Document* pDoc) {
+ CPDF_Dictionary* pCatalog = pDoc->GetRoot();
+ CPDF_Dictionary* pMarkInfo = pCatalog->GetDictFor("MarkInfo");
+ return pMarkInfo && pMarkInfo->GetIntegerFor("Marked");
+}
+
+} // namespace
+
+// static
+std::unique_ptr<CPDF_StructTree> CPDF_StructTree::LoadPage(
+ const CPDF_Document* pDoc,
+ const CPDF_Dictionary* pPageDict) {
+ if (!IsTagged(pDoc))
+ return nullptr;
+
+ auto pTree = pdfium::MakeUnique<CPDF_StructTree>(pDoc);
+ pTree->LoadPageTree(pPageDict);
+ return pTree;
+}
+
+CPDF_StructTree::CPDF_StructTree(const CPDF_Document* pDoc)
+ : m_pTreeRoot(pDoc->GetRoot()->GetDictFor("StructTreeRoot")),
+ m_pRoleMap(m_pTreeRoot ? m_pTreeRoot->GetDictFor("RoleMap") : nullptr),
+ m_pPage(nullptr) {}
+
+CPDF_StructTree::~CPDF_StructTree() {}
+
+int CPDF_StructTree::CountTopElements() const {
+ return pdfium::CollectionSize<int>(m_Kids);
+}
+
+CPDF_StructElement* CPDF_StructTree::GetTopElement(int i) const {
+ return m_Kids[i].Get();
+}
+
+void CPDF_StructTree::LoadPageTree(const CPDF_Dictionary* pPageDict) {
+ m_pPage = pPageDict;
+ if (!m_pTreeRoot)
+ return;
+
+ CPDF_Object* pKids = m_pTreeRoot->GetDirectObjectFor("K");
+ if (!pKids)
+ return;
+
+ uint32_t dwKids = 0;
+ if (pKids->IsDictionary())
+ dwKids = 1;
+ else if (CPDF_Array* pArray = pKids->AsArray())
+ dwKids = pArray->GetCount();
+ else
+ return;
+
+ m_Kids.clear();
+ m_Kids.resize(dwKids);
+ CPDF_Dictionary* pParentTree = m_pTreeRoot->GetDictFor("ParentTree");
+ if (!pParentTree)
+ return;
+
+ CPDF_NumberTree parent_tree(pParentTree);
+ int parents_id = pPageDict->GetIntegerFor("StructParents", -1);
+ if (parents_id < 0)
+ return;
+
+ CPDF_Array* pParentArray = ToArray(parent_tree.LookupValue(parents_id));
+ if (!pParentArray)
+ return;
+
+ std::map<CPDF_Dictionary*, CFX_RetainPtr<CPDF_StructElement>> element_map;
+ for (size_t i = 0; i < pParentArray->GetCount(); i++) {
+ if (CPDF_Dictionary* pParent = pParentArray->GetDictAt(i))
+ AddPageNode(pParent, &element_map);
+ }
+}
+
+CFX_RetainPtr<CPDF_StructElement> CPDF_StructTree::AddPageNode(
+ CPDF_Dictionary* pDict,
+ std::map<CPDF_Dictionary*, CFX_RetainPtr<CPDF_StructElement>>* map,
+ int nLevel) {
+ if (nLevel > nMaxRecursion)
+ return nullptr;
+
+ auto it = map->find(pDict);
+ if (it != map->end())
+ return it->second;
+
+ auto pElement = pdfium::MakeRetain<CPDF_StructElement>(this, nullptr, pDict);
+ (*map)[pDict] = pElement;
+ CPDF_Dictionary* pParent = pDict->GetDictFor("P");
+ if (!pParent || pParent->GetStringFor("Type") == "StructTreeRoot") {
+ if (!AddTopLevelNode(pDict, pElement))
+ map->erase(pDict);
+ return pElement;
+ }
+
+ CFX_RetainPtr<CPDF_StructElement> pParentElement =
+ AddPageNode(pParent, map, nLevel + 1);
+ bool bSave = false;
+ for (CPDF_StructKid& kid : *pParentElement->GetKids()) {
+ if (kid.m_Type == CPDF_StructKid::Element && kid.m_pDict == pDict) {
+ kid.m_pElement = pElement;
+ bSave = true;
+ }
+ }
+ if (!bSave)
+ map->erase(pDict);
+ return pElement;
+}
+
+bool CPDF_StructTree::AddTopLevelNode(
+ CPDF_Dictionary* pDict,
+ const CFX_RetainPtr<CPDF_StructElement>& pElement) {
+ CPDF_Object* pObj = m_pTreeRoot->GetDirectObjectFor("K");
+ if (!pObj)
+ return false;
+
+ if (pObj->IsDictionary()) {
+ if (pObj->GetObjNum() != pDict->GetObjNum())
+ return false;
+ m_Kids[0] = pElement;
+ }
+
+ CPDF_Array* pTopKids = pObj->AsArray();
+ if (!pTopKids)
+ return true;
+
+ bool bSave = false;
+ for (size_t i = 0; i < pTopKids->GetCount(); i++) {
+ CPDF_Reference* pKidRef = ToReference(pTopKids->GetObjectAt(i));
+ if (pKidRef && pKidRef->GetRefObjNum() == pDict->GetObjNum()) {
+ m_Kids[i] = pElement;
+ bSave = true;
+ }
+ }
+ return bSave;
+}
diff --git a/core/fpdfdoc/cpdf_structtree.h b/core/fpdfdoc/cpdf_structtree.h
new file mode 100644
index 0000000000..20bf41e7ee
--- /dev/null
+++ b/core/fpdfdoc/cpdf_structtree.h
@@ -0,0 +1,51 @@
+// Copyright 2017 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
+
+#ifndef CORE_FPDFDOC_CPDF_STRUCTTREE_H_
+#define CORE_FPDFDOC_CPDF_STRUCTTREE_H_
+
+#include <map>
+#include <memory>
+#include <vector>
+
+#include "core/fxcrt/cfx_retain_ptr.h"
+
+class CPDF_Dictionary;
+class CPDF_Document;
+class CPDF_StructElement;
+
+class CPDF_StructTree {
+ public:
+ static std::unique_ptr<CPDF_StructTree> LoadPage(
+ const CPDF_Document* pDoc,
+ const CPDF_Dictionary* pPageDict);
+
+ explicit CPDF_StructTree(const CPDF_Document* pDoc);
+ ~CPDF_StructTree();
+
+ int CountTopElements() const;
+ CPDF_StructElement* GetTopElement(int i) const;
+
+ void LoadPageTree(const CPDF_Dictionary* pPageDict);
+ CFX_RetainPtr<CPDF_StructElement> AddPageNode(
+ CPDF_Dictionary* pElement,
+ std::map<CPDF_Dictionary*, CFX_RetainPtr<CPDF_StructElement>>* map,
+ int nLevel = 0);
+ bool AddTopLevelNode(CPDF_Dictionary* pDict,
+ const CFX_RetainPtr<CPDF_StructElement>& pElement);
+
+ const CPDF_Dictionary* GetRoleMap() const { return m_pRoleMap; }
+ const CPDF_Dictionary* GetPage() const { return m_pPage; }
+ const CPDF_Dictionary* GetTreeRoot() const { return m_pTreeRoot; }
+
+ private:
+ const CPDF_Dictionary* const m_pTreeRoot;
+ const CPDF_Dictionary* const m_pRoleMap;
+ const CPDF_Dictionary* m_pPage;
+ std::vector<CFX_RetainPtr<CPDF_StructElement>> m_Kids;
+};
+
+#endif // CORE_FPDFDOC_CPDF_STRUCTTREE_H_
diff --git a/core/fpdfdoc/fpdf_tagged.h b/core/fpdfdoc/fpdf_tagged.h
deleted file mode 100644
index 5e7b1827c5..0000000000
--- a/core/fpdfdoc/fpdf_tagged.h
+++ /dev/null
@@ -1,75 +0,0 @@
-// Copyright 2014 PDFium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
-
-#ifndef CORE_FPDFDOC_FPDF_TAGGED_H_
-#define CORE_FPDFDOC_FPDF_TAGGED_H_
-
-#include <memory>
-
-#include "core/fxge/fx_dib.h"
-
-class CPDF_Dictionary;
-class CPDF_Document;
-class IPDF_StructElement;
-
-class IPDF_StructTree {
- public:
- static std::unique_ptr<IPDF_StructTree> LoadPage(
- const CPDF_Document* pDoc,
- const CPDF_Dictionary* pPageDict);
-
-
- virtual int CountTopElements() const = 0;
- virtual IPDF_StructElement* GetTopElement(int i) const = 0;
-
- protected:
- friend std::default_delete<IPDF_StructTree>;
- virtual ~IPDF_StructTree() {}
-};
-
-class IPDF_StructElement {
- public:
- virtual IPDF_StructTree* GetTree() const = 0;
- virtual const CFX_ByteString& GetType() const = 0;
- virtual IPDF_StructElement* GetParent() const = 0;
- virtual CPDF_Dictionary* GetDict() const = 0;
- virtual int CountKids() const = 0;
- virtual IPDF_StructElement* GetKidIfElement(int index) const = 0;
-
- virtual CPDF_Object* GetAttr(const CFX_ByteStringC& owner,
- const CFX_ByteStringC& name,
- bool bInheritable = false,
- float fLevel = 0.0F) = 0;
-
- virtual CFX_ByteString GetName(const CFX_ByteStringC& owner,
- const CFX_ByteStringC& name,
- const CFX_ByteStringC& default_value,
- bool bInheritable = false,
- int subindex = -1) = 0;
-
- virtual FX_ARGB GetColor(const CFX_ByteStringC& owner,
- const CFX_ByteStringC& name,
- FX_ARGB default_value,
- bool bInheritable = false,
- int subindex = -1) = 0;
-
- virtual float GetNumber(const CFX_ByteStringC& owner,
- const CFX_ByteStringC& name,
- float default_value,
- bool bInheritable = false,
- int subindex = -1) = 0;
-
- virtual int GetInteger(const CFX_ByteStringC& owner,
- const CFX_ByteStringC& name,
- int default_value,
- bool bInheritable = false,
- int subindex = -1) = 0;
-
- protected:
- virtual ~IPDF_StructElement() {}
-};
-
-#endif // CORE_FPDFDOC_FPDF_TAGGED_H_