summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--BUILD.gn3
-rw-r--r--core/fpdfdoc/doc_tagged.cpp459
-rw-r--r--core/fpdfdoc/include/fpdf_tagged.h93
-rw-r--r--core/fpdfdoc/tagged_int.h107
-rw-r--r--pdfium.gyp3
5 files changed, 665 insertions, 0 deletions
diff --git a/BUILD.gn b/BUILD.gn
index c8da0dc35e..c898f4d413 100644
--- a/BUILD.gn
+++ b/BUILD.gn
@@ -272,6 +272,7 @@ static_library("fpdfdoc") {
"core/fpdfdoc/csection.h",
"core/fpdfdoc/ctypeset.cpp",
"core/fpdfdoc/ctypeset.h",
+ "core/fpdfdoc/doc_tagged.cpp",
"core/fpdfdoc/include/cpdf_aaction.h",
"core/fpdfdoc/include/cpdf_action.h",
"core/fpdfdoc/include/cpdf_actionfields.h",
@@ -301,9 +302,11 @@ static_library("fpdfdoc") {
"core/fpdfdoc/include/cpvt_wordplace.h",
"core/fpdfdoc/include/cpvt_wordprops.h",
"core/fpdfdoc/include/cpvt_wordrange.h",
+ "core/fpdfdoc/include/fpdf_tagged.h",
"core/fpdfdoc/include/ipdf_formnotify.h",
"core/fpdfdoc/ipdf_formnotify.cpp",
"core/fpdfdoc/ipvt_fontmap.h",
+ "core/fpdfdoc/tagged_int.h",
]
configs += [ ":pdfium_core_config" ]
}
diff --git a/core/fpdfdoc/doc_tagged.cpp b/core/fpdfdoc/doc_tagged.cpp
new file mode 100644
index 0000000000..80a296af60
--- /dev/null
+++ b/core/fpdfdoc/doc_tagged.cpp
@@ -0,0 +1,459 @@
+// Copyright 2014 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
+
+#include <map>
+
+#include "core/fpdfapi/fpdf_parser/include/cpdf_array.h"
+#include "core/fpdfapi/fpdf_parser/include/cpdf_dictionary.h"
+#include "core/fpdfapi/fpdf_parser/include/cpdf_document.h"
+#include "core/fpdfapi/fpdf_parser/include/cpdf_reference.h"
+#include "core/fpdfapi/fpdf_parser/include/cpdf_stream.h"
+#include "core/fpdfdoc/cpdf_numbertree.h"
+#include "core/fpdfdoc/include/fpdf_tagged.h"
+#include "core/fpdfdoc/tagged_int.h"
+
+namespace {
+
+const int nMaxRecursion = 32;
+
+bool IsTagged(const CPDF_Document* pDoc) {
+ CPDF_Dictionary* pCatalog = pDoc->GetRoot();
+ CPDF_Dictionary* pMarkInfo = pCatalog->GetDictBy("MarkInfo");
+ return pMarkInfo && pMarkInfo->GetIntegerBy("Marked");
+}
+
+} // namespace
+
+// static
+IPDF_StructTree* IPDF_StructTree::LoadPage(const CPDF_Document* pDoc,
+ const CPDF_Dictionary* pPageDict) {
+ if (!IsTagged(pDoc))
+ return nullptr;
+
+ CPDF_StructTreeImpl* pTree = new CPDF_StructTreeImpl(pDoc);
+ pTree->LoadPageTree(pPageDict);
+ return pTree;
+}
+
+// static.
+IPDF_StructTree* IPDF_StructTree::LoadDoc(const CPDF_Document* pDoc) {
+ if (!IsTagged(pDoc))
+ return nullptr;
+
+ CPDF_StructTreeImpl* pTree = new CPDF_StructTreeImpl(pDoc);
+ pTree->LoadDocTree();
+ return pTree;
+}
+
+CPDF_StructTreeImpl::CPDF_StructTreeImpl(const CPDF_Document* pDoc)
+ : m_pTreeRoot(pDoc->GetRoot()->GetDictBy("StructTreeRoot")),
+ m_pRoleMap(m_pTreeRoot ? m_pTreeRoot->GetDictBy("RoleMap") : nullptr),
+ m_pPage(nullptr) {}
+
+CPDF_StructTreeImpl::~CPDF_StructTreeImpl() {}
+
+int CPDF_StructTreeImpl::CountTopElements() const {
+ return pdfium::CollectionSize<int>(m_Kids);
+}
+
+IPDF_StructElement* CPDF_StructTreeImpl::GetTopElement(int i) const {
+ return m_Kids[i].Get();
+}
+
+void CPDF_StructTreeImpl::LoadDocTree() {
+ m_pPage = nullptr;
+ if (!m_pTreeRoot)
+ return;
+
+ CPDF_Object* pKids = m_pTreeRoot->GetDirectObjectBy("K");
+ if (!pKids)
+ return;
+
+ if (CPDF_Dictionary* pDict = pKids->AsDictionary()) {
+ m_Kids.push_back(CFX_RetainPtr<CPDF_StructElementImpl>(
+ new CPDF_StructElementImpl(this, nullptr, pDict)));
+ return;
+ }
+
+ CPDF_Array* pArray = pKids->AsArray();
+ if (!pArray)
+ return;
+
+ for (size_t i = 0; i < pArray->GetCount(); i++) {
+ m_Kids.push_back(CFX_RetainPtr<CPDF_StructElementImpl>(
+ new CPDF_StructElementImpl(this, nullptr, pArray->GetDictAt(i))));
+ }
+}
+
+void CPDF_StructTreeImpl::LoadPageTree(const CPDF_Dictionary* pPageDict) {
+ m_pPage = pPageDict;
+ if (!m_pTreeRoot)
+ return;
+
+ CPDF_Object* pKids = m_pTreeRoot->GetDirectObjectBy("K");
+ if (!pKids)
+ return;
+
+ uint32_t dwKids = 0;
+ if (pKids->IsDictionary())
+ dwKids = 1;
+ else if (CPDF_Array* pArray = pKids->AsArray())
+ dwKids = pArray->GetCount();
+ else
+ return;
+
+ m_Kids.clear();
+ m_Kids.resize(dwKids);
+ CPDF_Dictionary* pParentTree = m_pTreeRoot->GetDictBy("ParentTree");
+ if (!pParentTree)
+ return;
+
+ CPDF_NumberTree parent_tree(pParentTree);
+ int parents_id = pPageDict->GetIntegerBy("StructParents", -1);
+ if (parents_id < 0)
+ return;
+
+ CPDF_Array* pParentArray = ToArray(parent_tree.LookupValue(parents_id));
+ if (!pParentArray)
+ return;
+
+ std::map<CPDF_Dictionary*, CPDF_StructElementImpl*> element_map;
+ for (size_t i = 0; i < pParentArray->GetCount(); i++) {
+ if (CPDF_Dictionary* pParent = pParentArray->GetDictAt(i))
+ AddPageNode(pParent, element_map);
+ }
+}
+
+CPDF_StructElementImpl* CPDF_StructTreeImpl::AddPageNode(
+ CPDF_Dictionary* pDict,
+ std::map<CPDF_Dictionary*, CPDF_StructElementImpl*>& map,
+ int nLevel) {
+ if (nLevel > nMaxRecursion)
+ return nullptr;
+
+ auto it = map.find(pDict);
+ if (it != map.end())
+ return it->second;
+
+ CPDF_StructElementImpl* pElement =
+ new CPDF_StructElementImpl(this, nullptr, pDict);
+ map[pDict] = pElement;
+ CPDF_Dictionary* pParent = pDict->GetDictBy("P");
+ if (!pParent || pParent->GetStringBy("Type") == "StructTreeRoot") {
+ if (!AddTopLevelNode(pDict, pElement)) {
+ pElement->Release();
+ map.erase(pDict);
+ }
+ } else {
+ CPDF_StructElementImpl* pParentElement =
+ AddPageNode(pParent, map, nLevel + 1);
+ FX_BOOL bSave = FALSE;
+ for (CPDF_StructKid& kid : pParentElement->m_Kids) {
+ if (kid.m_Type != CPDF_StructKid::Element)
+ continue;
+ if (kid.m_Element.m_pDict != pDict)
+ continue;
+ kid.m_Element.m_pElement = pElement->Retain();
+ bSave = TRUE;
+ }
+ if (!bSave) {
+ pElement->Release();
+ map.erase(pDict);
+ }
+ }
+ return pElement;
+}
+FX_BOOL CPDF_StructTreeImpl::AddTopLevelNode(CPDF_Dictionary* pDict,
+ CPDF_StructElementImpl* pElement) {
+ CPDF_Object* pObj = m_pTreeRoot->GetDirectObjectBy("K");
+ if (!pObj)
+ return FALSE;
+
+ if (pObj->IsDictionary()) {
+ if (pObj->GetObjNum() != pDict->GetObjNum())
+ return FALSE;
+ m_Kids[0].Reset(pElement);
+ }
+ if (CPDF_Array* pTopKids = pObj->AsArray()) {
+ bool bSave = false;
+ for (size_t i = 0; i < pTopKids->GetCount(); i++) {
+ CPDF_Reference* pKidRef = ToReference(pTopKids->GetObjectAt(i));
+ if (pKidRef && pKidRef->GetRefObjNum() == pDict->GetObjNum()) {
+ m_Kids[i].Reset(pElement);
+ bSave = true;
+ }
+ }
+ if (!bSave)
+ return FALSE;
+ }
+ return TRUE;
+}
+
+CPDF_StructElementImpl::CPDF_StructElementImpl(CPDF_StructTreeImpl* pTree,
+ CPDF_StructElementImpl* pParent,
+ CPDF_Dictionary* pDict)
+ : m_RefCount(0),
+ m_pTree(pTree),
+ m_pParent(pParent),
+ m_pDict(pDict),
+ m_Type(pDict->GetStringBy("S")) {
+ if (pTree->m_pRoleMap) {
+ CFX_ByteString mapped = pTree->m_pRoleMap->GetStringBy(m_Type);
+ if (!mapped.IsEmpty())
+ m_Type = mapped;
+ }
+ LoadKids(pDict);
+}
+
+IPDF_StructTree* CPDF_StructElementImpl::GetTree() const {
+ return m_pTree;
+}
+
+const CFX_ByteString& CPDF_StructElementImpl::GetType() const {
+ return m_Type;
+}
+
+IPDF_StructElement* CPDF_StructElementImpl::GetParent() const {
+ return m_pParent;
+}
+
+CPDF_Dictionary* CPDF_StructElementImpl::GetDict() const {
+ return m_pDict;
+}
+
+int CPDF_StructElementImpl::CountKids() const {
+ return pdfium::CollectionSize<int>(m_Kids);
+}
+
+const CPDF_StructKid& CPDF_StructElementImpl::GetKid(int index) const {
+ return m_Kids[index];
+}
+
+CPDF_StructElementImpl::~CPDF_StructElementImpl() {
+ for (CPDF_StructKid& kid : m_Kids) {
+ if (kid.m_Type == CPDF_StructKid::Element && kid.m_Element.m_pElement)
+ static_cast<CPDF_StructElementImpl*>(kid.m_Element.m_pElement)->Release();
+ }
+}
+
+CPDF_StructElementImpl* CPDF_StructElementImpl::Retain() {
+ m_RefCount++;
+ return this;
+}
+void CPDF_StructElementImpl::Release() {
+ if (--m_RefCount < 1) {
+ delete this;
+ }
+}
+void CPDF_StructElementImpl::LoadKids(CPDF_Dictionary* pDict) {
+ CPDF_Object* pObj = pDict->GetObjectBy("Pg");
+ uint32_t PageObjNum = 0;
+ if (CPDF_Reference* pRef = ToReference(pObj))
+ PageObjNum = pRef->GetRefObjNum();
+
+ CPDF_Object* pKids = pDict->GetDirectObjectBy("K");
+ if (!pKids)
+ return;
+
+ m_Kids.clear();
+ if (CPDF_Array* pArray = pKids->AsArray()) {
+ m_Kids.resize(pArray->GetCount());
+ for (uint32_t i = 0; i < pArray->GetCount(); i++) {
+ CPDF_Object* pKid = pArray->GetDirectObjectAt(i);
+ LoadKid(PageObjNum, pKid, &m_Kids[i]);
+ }
+ } else {
+ m_Kids.resize(1);
+ LoadKid(PageObjNum, pKids, &m_Kids[0]);
+ }
+}
+void CPDF_StructElementImpl::LoadKid(uint32_t PageObjNum,
+ CPDF_Object* pKidObj,
+ CPDF_StructKid* pKid) {
+ pKid->m_Type = CPDF_StructKid::Invalid;
+ if (!pKidObj)
+ return;
+
+ if (pKidObj->IsNumber()) {
+ if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) {
+ return;
+ }
+ pKid->m_Type = CPDF_StructKid::PageContent;
+ pKid->m_PageContent.m_ContentId = pKidObj->GetInteger();
+ pKid->m_PageContent.m_PageObjNum = PageObjNum;
+ return;
+ }
+
+ CPDF_Dictionary* pKidDict = pKidObj->AsDictionary();
+ if (!pKidDict)
+ return;
+
+ if (CPDF_Reference* pRef = ToReference(pKidDict->GetObjectBy("Pg")))
+ PageObjNum = pRef->GetRefObjNum();
+
+ CFX_ByteString type = pKidDict->GetStringBy("Type");
+ if (type == "MCR") {
+ if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) {
+ return;
+ }
+ pKid->m_Type = CPDF_StructKid::StreamContent;
+ if (CPDF_Reference* pRef = ToReference(pKidDict->GetObjectBy("Stm"))) {
+ pKid->m_StreamContent.m_RefObjNum = pRef->GetRefObjNum();
+ } else {
+ pKid->m_StreamContent.m_RefObjNum = 0;
+ }
+ pKid->m_StreamContent.m_PageObjNum = PageObjNum;
+ pKid->m_StreamContent.m_ContentId = pKidDict->GetIntegerBy("MCID");
+ } else if (type == "OBJR") {
+ if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) {
+ return;
+ }
+ pKid->m_Type = CPDF_StructKid::Object;
+ if (CPDF_Reference* pObj = ToReference(pKidDict->GetObjectBy("Obj"))) {
+ pKid->m_Object.m_RefObjNum = pObj->GetRefObjNum();
+ } else {
+ pKid->m_Object.m_RefObjNum = 0;
+ }
+ pKid->m_Object.m_PageObjNum = PageObjNum;
+ } else {
+ pKid->m_Type = CPDF_StructKid::Element;
+ pKid->m_Element.m_pDict = pKidDict;
+ if (!m_pTree->m_pPage) {
+ pKid->m_Element.m_pElement =
+ new CPDF_StructElementImpl(m_pTree, this, pKidDict);
+ } else {
+ pKid->m_Element.m_pElement = nullptr;
+ }
+ }
+}
+static CPDF_Dictionary* FindAttrDict(CPDF_Object* pAttrs,
+ const CFX_ByteStringC& owner,
+ FX_FLOAT nLevel = 0.0F) {
+ if (nLevel > nMaxRecursion)
+ return nullptr;
+ if (!pAttrs)
+ return nullptr;
+
+ CPDF_Dictionary* pDict = nullptr;
+ if (pAttrs->IsDictionary()) {
+ pDict = pAttrs->AsDictionary();
+ } else if (CPDF_Stream* pStream = pAttrs->AsStream()) {
+ pDict = pStream->GetDict();
+ } else if (CPDF_Array* pArray = pAttrs->AsArray()) {
+ for (uint32_t i = 0; i < pArray->GetCount(); i++) {
+ CPDF_Object* pElement = pArray->GetDirectObjectAt(i);
+ pDict = FindAttrDict(pElement, owner, nLevel + 1);
+ if (pDict)
+ return pDict;
+ }
+ }
+ if (pDict && pDict->GetStringBy("O") == owner)
+ return pDict;
+ return nullptr;
+}
+CPDF_Object* CPDF_StructElementImpl::GetAttr(const CFX_ByteStringC& owner,
+ const CFX_ByteStringC& name,
+ FX_BOOL bInheritable,
+ FX_FLOAT fLevel) {
+ if (fLevel > nMaxRecursion) {
+ return nullptr;
+ }
+ if (bInheritable) {
+ CPDF_Object* pAttr = GetAttr(owner, name, FALSE);
+ if (pAttr) {
+ return pAttr;
+ }
+ if (!m_pParent) {
+ return nullptr;
+ }
+ return m_pParent->GetAttr(owner, name, TRUE, fLevel + 1);
+ }
+ CPDF_Object* pA = m_pDict->GetDirectObjectBy("A");
+ if (pA) {
+ CPDF_Dictionary* pAttrDict = FindAttrDict(pA, owner);
+ if (pAttrDict) {
+ CPDF_Object* pAttr = pAttrDict->GetDirectObjectBy(CFX_ByteString(name));
+ if (pAttr) {
+ return pAttr;
+ }
+ }
+ }
+ CPDF_Object* pC = m_pDict->GetDirectObjectBy("C");
+ if (!pC)
+ return nullptr;
+
+ CPDF_Dictionary* pClassMap = m_pTree->m_pTreeRoot->GetDictBy("ClassMap");
+ if (!pClassMap)
+ return nullptr;
+
+ if (CPDF_Array* pArray = pC->AsArray()) {
+ for (uint32_t i = 0; i < pArray->GetCount(); i++) {
+ CFX_ByteString class_name = pArray->GetStringAt(i);
+ CPDF_Dictionary* pClassDict = pClassMap->GetDictBy(class_name);
+ if (pClassDict && pClassDict->GetStringBy("O") == owner)
+ return pClassDict->GetDirectObjectBy(CFX_ByteString(name));
+ }
+ return nullptr;
+ }
+ CFX_ByteString class_name = pC->GetString();
+ CPDF_Dictionary* pClassDict = pClassMap->GetDictBy(class_name);
+ if (pClassDict && pClassDict->GetStringBy("O") == owner)
+ return pClassDict->GetDirectObjectBy(CFX_ByteString(name));
+ return nullptr;
+}
+CPDF_Object* CPDF_StructElementImpl::GetAttr(const CFX_ByteStringC& owner,
+ const CFX_ByteStringC& name,
+ FX_BOOL bInheritable,
+ int subindex) {
+ CPDF_Object* pAttr = GetAttr(owner, name, bInheritable);
+ CPDF_Array* pArray = ToArray(pAttr);
+ if (!pArray || subindex == -1)
+ return pAttr;
+
+ if (subindex >= static_cast<int>(pArray->GetCount()))
+ return pAttr;
+ return pArray->GetDirectObjectAt(subindex);
+}
+CFX_ByteString CPDF_StructElementImpl::GetName(
+ const CFX_ByteStringC& owner,
+ const CFX_ByteStringC& name,
+ const CFX_ByteStringC& default_value,
+ FX_BOOL bInheritable,
+ int subindex) {
+ CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex);
+ if (ToName(pAttr))
+ return pAttr->GetString();
+ return CFX_ByteString(default_value);
+}
+
+FX_ARGB CPDF_StructElementImpl::GetColor(const CFX_ByteStringC& owner,
+ const CFX_ByteStringC& name,
+ FX_ARGB default_value,
+ FX_BOOL bInheritable,
+ int subindex) {
+ CPDF_Array* pArray = ToArray(GetAttr(owner, name, bInheritable, subindex));
+ if (!pArray)
+ return default_value;
+ return 0xff000000 | ((int)(pArray->GetNumberAt(0) * 255) << 16) |
+ ((int)(pArray->GetNumberAt(1) * 255) << 8) |
+ (int)(pArray->GetNumberAt(2) * 255);
+}
+FX_FLOAT CPDF_StructElementImpl::GetNumber(const CFX_ByteStringC& owner,
+ const CFX_ByteStringC& name,
+ FX_FLOAT default_value,
+ FX_BOOL bInheritable,
+ int subindex) {
+ CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex);
+ return ToNumber(pAttr) ? pAttr->GetNumber() : default_value;
+}
+int CPDF_StructElementImpl::GetInteger(const CFX_ByteStringC& owner,
+ const CFX_ByteStringC& name,
+ int default_value,
+ FX_BOOL bInheritable,
+ int subindex) {
+ CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex);
+ return ToNumber(pAttr) ? pAttr->GetInteger() : default_value;
+}
diff --git a/core/fpdfdoc/include/fpdf_tagged.h b/core/fpdfdoc/include/fpdf_tagged.h
new file mode 100644
index 0000000000..43d69e8db9
--- /dev/null
+++ b/core/fpdfdoc/include/fpdf_tagged.h
@@ -0,0 +1,93 @@
+// Copyright 2014 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
+
+#ifndef CORE_FPDFDOC_INCLUDE_FPDF_TAGGED_H_
+#define CORE_FPDFDOC_INCLUDE_FPDF_TAGGED_H_
+
+#include "core/fxge/include/fx_dib.h"
+
+class CPDF_Dictionary;
+class CPDF_Document;
+class IPDF_StructElement;
+
+class IPDF_StructTree {
+ public:
+ static IPDF_StructTree* LoadDoc(const CPDF_Document* pDoc);
+ static IPDF_StructTree* LoadPage(const CPDF_Document* pDoc,
+ const CPDF_Dictionary* pPageDict);
+
+ virtual ~IPDF_StructTree() {}
+
+ virtual int CountTopElements() const = 0;
+ virtual IPDF_StructElement* GetTopElement(int i) const = 0;
+};
+
+struct CPDF_StructKid {
+ enum { Invalid, Element, PageContent, StreamContent, Object } m_Type;
+
+ union {
+ struct {
+ IPDF_StructElement* m_pElement;
+ CPDF_Dictionary* m_pDict;
+ } m_Element;
+ struct {
+ uint32_t m_PageObjNum;
+ uint32_t m_ContentId;
+ } m_PageContent;
+ struct {
+ uint32_t m_PageObjNum;
+ uint32_t m_ContentId;
+ uint32_t m_RefObjNum;
+ } m_StreamContent;
+ struct {
+ uint32_t m_PageObjNum;
+ uint32_t m_RefObjNum;
+ } m_Object;
+ };
+};
+
+class IPDF_StructElement {
+ public:
+ virtual ~IPDF_StructElement() {}
+
+ virtual IPDF_StructTree* GetTree() const = 0;
+ virtual const CFX_ByteString& GetType() const = 0;
+ virtual IPDF_StructElement* GetParent() const = 0;
+ virtual CPDF_Dictionary* GetDict() const = 0;
+ virtual int CountKids() const = 0;
+ virtual const CPDF_StructKid& GetKid(int index) const = 0;
+
+ virtual CPDF_Object* GetAttr(const CFX_ByteStringC& owner,
+ const CFX_ByteStringC& name,
+ FX_BOOL bInheritable = FALSE,
+ FX_FLOAT fLevel = 0.0F) = 0;
+
+ virtual CFX_ByteString GetName(const CFX_ByteStringC& owner,
+ const CFX_ByteStringC& name,
+ const CFX_ByteStringC& default_value,
+ FX_BOOL bInheritable = FALSE,
+ int subindex = -1) = 0;
+
+ virtual FX_ARGB GetColor(const CFX_ByteStringC& owner,
+ const CFX_ByteStringC& name,
+ FX_ARGB default_value,
+ FX_BOOL bInheritable = FALSE,
+ int subindex = -1) = 0;
+
+ virtual FX_FLOAT GetNumber(const CFX_ByteStringC& owner,
+ const CFX_ByteStringC& name,
+ FX_FLOAT default_value,
+ FX_BOOL bInheritable = FALSE,
+ int subindex = -1) = 0;
+
+ virtual int GetInteger(const CFX_ByteStringC& owner,
+ const CFX_ByteStringC& name,
+ int default_value,
+ FX_BOOL bInheritable = FALSE,
+ int subindex = -1) = 0;
+};
+
+#endif // CORE_FPDFDOC_INCLUDE_FPDF_TAGGED_H_
diff --git a/core/fpdfdoc/tagged_int.h b/core/fpdfdoc/tagged_int.h
new file mode 100644
index 0000000000..354a93cb76
--- /dev/null
+++ b/core/fpdfdoc/tagged_int.h
@@ -0,0 +1,107 @@
+// Copyright 2014 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
+
+#ifndef CORE_FPDFDOC_TAGGED_INT_H_
+#define CORE_FPDFDOC_TAGGED_INT_H_
+
+#include <map>
+#include <memory>
+#include <vector>
+
+#include "core/fpdfdoc/include/fpdf_tagged.h"
+#include "core/fxcrt/include/cfx_retain_ptr.h"
+#include "third_party/base/stl_util.h"
+
+class CPDF_StructElementImpl;
+
+class CPDF_StructTreeImpl final : public IPDF_StructTree {
+ public:
+ explicit CPDF_StructTreeImpl(const CPDF_Document* pDoc);
+ ~CPDF_StructTreeImpl() override;
+
+ // IPDF_StructTree:
+ int CountTopElements() const override;
+ IPDF_StructElement* GetTopElement(int i) const override;
+
+ void LoadDocTree();
+ void LoadPageTree(const CPDF_Dictionary* pPageDict);
+ CPDF_StructElementImpl* AddPageNode(
+ CPDF_Dictionary* pElement,
+ std::map<CPDF_Dictionary*, CPDF_StructElementImpl*>& map,
+ int nLevel = 0);
+ FX_BOOL AddTopLevelNode(CPDF_Dictionary* pDict,
+ CPDF_StructElementImpl* pElement);
+
+ protected:
+ const CPDF_Dictionary* const m_pTreeRoot;
+ const CPDF_Dictionary* const m_pRoleMap;
+ const CPDF_Dictionary* m_pPage;
+ std::vector<CFX_RetainPtr<CPDF_StructElementImpl>> m_Kids;
+
+ friend class CPDF_StructElementImpl;
+};
+
+class CPDF_StructElementImpl final : public IPDF_StructElement {
+ public:
+ CPDF_StructElementImpl(CPDF_StructTreeImpl* pTree,
+ CPDF_StructElementImpl* pParent,
+ CPDF_Dictionary* pDict);
+
+ // IPDF_StructElement
+ IPDF_StructTree* GetTree() const override;
+ const CFX_ByteString& GetType() const override;
+ IPDF_StructElement* GetParent() const override;
+ CPDF_Dictionary* GetDict() const override;
+ int CountKids() const override;
+ const CPDF_StructKid& GetKid(int index) const override;
+ CPDF_Object* GetAttr(const CFX_ByteStringC& owner,
+ const CFX_ByteStringC& name,
+ FX_BOOL bInheritable = FALSE,
+ FX_FLOAT fLevel = 0.0F) override;
+ CFX_ByteString GetName(const CFX_ByteStringC& owner,
+ const CFX_ByteStringC& name,
+ const CFX_ByteStringC& default_value,
+ FX_BOOL bInheritable = FALSE,
+ int subindex = -1) override;
+ FX_ARGB GetColor(const CFX_ByteStringC& owner,
+ const CFX_ByteStringC& name,
+ FX_ARGB default_value,
+ FX_BOOL bInheritable = FALSE,
+ int subindex = -1) override;
+ FX_FLOAT GetNumber(const CFX_ByteStringC& owner,
+ const CFX_ByteStringC& name,
+ FX_FLOAT default_value,
+ FX_BOOL bInheritable = FALSE,
+ int subindex = -1) override;
+ int GetInteger(const CFX_ByteStringC& owner,
+ const CFX_ByteStringC& name,
+ int default_value,
+ FX_BOOL bInheritable = FALSE,
+ int subindex = -1) override;
+
+ void LoadKids(CPDF_Dictionary* pDict);
+ void LoadKid(uint32_t PageObjNum, CPDF_Object* pObj, CPDF_StructKid* pKid);
+ CPDF_Object* GetAttr(const CFX_ByteStringC& owner,
+ const CFX_ByteStringC& name,
+ FX_BOOL bInheritable,
+ int subindex);
+ CPDF_StructElementImpl* Retain();
+ void Release();
+
+ protected:
+ ~CPDF_StructElementImpl() override;
+
+ int m_RefCount;
+ CPDF_StructTreeImpl* const m_pTree;
+ CPDF_StructElementImpl* const m_pParent;
+ CPDF_Dictionary* const m_pDict;
+ CFX_ByteString m_Type;
+ std::vector<CPDF_StructKid> m_Kids;
+
+ friend class CPDF_StructTreeImpl;
+};
+
+#endif // CORE_FPDFDOC_TAGGED_INT_H_
diff --git a/pdfium.gyp b/pdfium.gyp
index 87c4fc6b4a..658d2dde74 100644
--- a/pdfium.gyp
+++ b/pdfium.gyp
@@ -253,6 +253,7 @@
'core/fpdfdoc/csection.h',
'core/fpdfdoc/ctypeset.cpp',
'core/fpdfdoc/ctypeset.h',
+ 'core/fpdfdoc/doc_tagged.cpp',
'core/fpdfdoc/include/cpdf_aaction.h',
'core/fpdfdoc/include/cpdf_action.h',
'core/fpdfdoc/include/cpdf_actionfields.h',
@@ -282,9 +283,11 @@
'core/fpdfdoc/include/cpvt_wordplace.h',
'core/fpdfdoc/include/cpvt_wordprops.h',
'core/fpdfdoc/include/cpvt_wordrange.h',
+ 'core/fpdfdoc/include/fpdf_tagged.h',
'core/fpdfdoc/include/ipvt_fontmap.h',
'core/fpdfdoc/include/ipdf_formnotify.h',
'core/fpdfdoc/ipdf_formnotify.cpp',
+ 'core/fpdfdoc/tagged_int.h',
],
},
{