diff options
author | dan sinclair <dsinclair@chromium.org> | 2017-04-06 13:38:54 -0400 |
---|---|---|
committer | Chromium commit bot <commit-bot@chromium.org> | 2017-04-06 18:09:57 +0000 |
commit | 7f389615a0fca78532482d6f4070d18c5d2f9f5d (patch) | |
tree | 9e47feb57a5bfd57abe48eb0104c5952fe394ab0 | |
parent | 25553aa8f74ddccbfcb3bb7b8f55d3f2ac00338f (diff) | |
download | pdfium-7f389615a0fca78532482d6f4070d18c5d2f9f5d.tar.xz |
Cleanup the tagged code
This CL removes the IPDF_Struct* classes in favour of their only implementation.
The tagged code was split out into files matching the classes they contain. The
friendship between CPDF_StructTree and CPDF_StructElement was broken in favour
of accessors.
Bug: pdfium:672
Change-Id: Iade83b608fb7168b3b0f41338d10d5fd8ab91a6e
Reviewed-on: https://pdfium-review.googlesource.com/3820
Reviewed-by: Tom Sepez <tsepez@chromium.org>
Reviewed-by: Nicolás Peña <npm@chromium.org>
Commit-Queue: dsinclair <dsinclair@chromium.org>
-rw-r--r-- | BUILD.gn | 7 | ||||
-rw-r--r-- | core/fpdfdoc/cpdf_structelement.cpp (renamed from core/fpdfdoc/doc_tagged.cpp) | 312 | ||||
-rw-r--r-- | core/fpdfdoc/cpdf_structelement.h (renamed from core/fpdfdoc/tagged_int.h) | 73 | ||||
-rw-r--r-- | core/fpdfdoc/cpdf_structtree.cpp | 155 | ||||
-rw-r--r-- | core/fpdfdoc/cpdf_structtree.h | 51 | ||||
-rw-r--r-- | core/fpdfdoc/fpdf_tagged.h | 75 | ||||
-rw-r--r-- | fpdfsdk/fpdf_structtree.cpp | 27 |
7 files changed, 330 insertions, 370 deletions
@@ -325,6 +325,10 @@ static_library("fpdfdoc") { "core/fpdfdoc/cpdf_occontext.h", "core/fpdfdoc/cpdf_pagelabel.cpp", "core/fpdfdoc/cpdf_pagelabel.h", + "core/fpdfdoc/cpdf_structelement.cpp", + "core/fpdfdoc/cpdf_structelement.h", + "core/fpdfdoc/cpdf_structtree.cpp", + "core/fpdfdoc/cpdf_structtree.h", "core/fpdfdoc/cpdf_variabletext.cpp", "core/fpdfdoc/cpdf_variabletext.h", "core/fpdfdoc/cpdf_viewerpreferences.cpp", @@ -353,11 +357,8 @@ static_library("fpdfdoc") { "core/fpdfdoc/csection.h", "core/fpdfdoc/ctypeset.cpp", "core/fpdfdoc/ctypeset.h", - "core/fpdfdoc/doc_tagged.cpp", - "core/fpdfdoc/fpdf_tagged.h", "core/fpdfdoc/ipdf_formnotify.h", "core/fpdfdoc/ipvt_fontmap.h", - "core/fpdfdoc/tagged_int.h", ] configs += [ ":pdfium_core_config" ] deps = [ diff --git a/core/fpdfdoc/doc_tagged.cpp b/core/fpdfdoc/cpdf_structelement.cpp index 418fab411a..418f75b3a9 100644 --- a/core/fpdfdoc/doc_tagged.cpp +++ b/core/fpdfdoc/cpdf_structelement.cpp @@ -1,33 +1,49 @@ -// Copyright 2014 PDFium Authors. All rights reserved. +// Copyright 2017 PDFium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com -#include <map> -#include <memory> -#include <utility> +#include "core/fpdfdoc/cpdf_structelement.h" #include "core/fpdfapi/parser/cpdf_array.h" #include "core/fpdfapi/parser/cpdf_dictionary.h" -#include "core/fpdfapi/parser/cpdf_document.h" #include "core/fpdfapi/parser/cpdf_name.h" #include "core/fpdfapi/parser/cpdf_number.h" +#include "core/fpdfapi/parser/cpdf_object.h" #include "core/fpdfapi/parser/cpdf_reference.h" #include "core/fpdfapi/parser/cpdf_stream.h" -#include "core/fpdfdoc/cpdf_numbertree.h" -#include "core/fpdfdoc/fpdf_tagged.h" -#include "core/fpdfdoc/tagged_int.h" -#include "third_party/base/ptr_util.h" +#include "core/fpdfdoc/cpdf_structtree.h" +#include "third_party/base/stl_util.h" namespace { const int nMaxRecursion = 32; -bool IsTagged(const CPDF_Document* pDoc) { - CPDF_Dictionary* pCatalog = pDoc->GetRoot(); - CPDF_Dictionary* pMarkInfo = pCatalog->GetDictFor("MarkInfo"); - return pMarkInfo && pMarkInfo->GetIntegerFor("Marked"); +CPDF_Dictionary* FindAttrDict(CPDF_Object* pAttrs, + const CFX_ByteStringC& owner, + float nLevel = 0.0F) { + if (nLevel > nMaxRecursion) + return nullptr; + if (!pAttrs) + return nullptr; + + CPDF_Dictionary* pDict = nullptr; + if (pAttrs->IsDictionary()) { + pDict = pAttrs->AsDictionary(); + } else if (CPDF_Stream* pStream = pAttrs->AsStream()) { + pDict = pStream->GetDict(); + } else if (CPDF_Array* pArray = pAttrs->AsArray()) { + for (uint32_t i = 0; i < pArray->GetCount(); i++) { + CPDF_Object* pElement = pArray->GetDirectObjectAt(i); + pDict = FindAttrDict(pElement, owner, nLevel + 1); + if (pDict) + return pDict; + } + } + if (pDict && pDict->GetStringFor("O") == owner) + return pDict; + return nullptr; } } // namespace @@ -41,134 +57,7 @@ CPDF_StructKid::CPDF_StructKid() CPDF_StructKid::CPDF_StructKid(const CPDF_StructKid& that) = default; -CPDF_StructKid::~CPDF_StructKid() {} - -// static -std::unique_ptr<IPDF_StructTree> IPDF_StructTree::LoadPage( - const CPDF_Document* pDoc, - const CPDF_Dictionary* pPageDict) { - if (!IsTagged(pDoc)) - return nullptr; - - auto pTree = pdfium::MakeUnique<CPDF_StructTree>(pDoc); - pTree->LoadPageTree(pPageDict); - return std::move(pTree); -} - -CPDF_StructTree::CPDF_StructTree(const CPDF_Document* pDoc) - : m_pTreeRoot(pDoc->GetRoot()->GetDictFor("StructTreeRoot")), - m_pRoleMap(m_pTreeRoot ? m_pTreeRoot->GetDictFor("RoleMap") : nullptr), - m_pPage(nullptr) {} - -CPDF_StructTree::~CPDF_StructTree() {} - -int CPDF_StructTree::CountTopElements() const { - return pdfium::CollectionSize<int>(m_Kids); -} - -IPDF_StructElement* CPDF_StructTree::GetTopElement(int i) const { - return m_Kids[i].Get(); -} - -void CPDF_StructTree::LoadPageTree(const CPDF_Dictionary* pPageDict) { - m_pPage = pPageDict; - if (!m_pTreeRoot) - return; - - CPDF_Object* pKids = m_pTreeRoot->GetDirectObjectFor("K"); - if (!pKids) - return; - - uint32_t dwKids = 0; - if (pKids->IsDictionary()) - dwKids = 1; - else if (CPDF_Array* pArray = pKids->AsArray()) - dwKids = pArray->GetCount(); - else - return; - - m_Kids.clear(); - m_Kids.resize(dwKids); - CPDF_Dictionary* pParentTree = m_pTreeRoot->GetDictFor("ParentTree"); - if (!pParentTree) - return; - - CPDF_NumberTree parent_tree(pParentTree); - int parents_id = pPageDict->GetIntegerFor("StructParents", -1); - if (parents_id < 0) - return; - - CPDF_Array* pParentArray = ToArray(parent_tree.LookupValue(parents_id)); - if (!pParentArray) - return; - - std::map<CPDF_Dictionary*, CFX_RetainPtr<CPDF_StructElement>> element_map; - for (size_t i = 0; i < pParentArray->GetCount(); i++) { - if (CPDF_Dictionary* pParent = pParentArray->GetDictAt(i)) - AddPageNode(pParent, &element_map); - } -} - -CFX_RetainPtr<CPDF_StructElement> CPDF_StructTree::AddPageNode( - CPDF_Dictionary* pDict, - std::map<CPDF_Dictionary*, CFX_RetainPtr<CPDF_StructElement>>* map, - int nLevel) { - if (nLevel > nMaxRecursion) - return nullptr; - - auto it = map->find(pDict); - if (it != map->end()) - return it->second; - - auto pElement = pdfium::MakeRetain<CPDF_StructElement>(this, nullptr, pDict); - (*map)[pDict] = pElement; - CPDF_Dictionary* pParent = pDict->GetDictFor("P"); - if (!pParent || pParent->GetStringFor("Type") == "StructTreeRoot") { - if (!AddTopLevelNode(pDict, pElement)) - map->erase(pDict); - return pElement; - } - - CFX_RetainPtr<CPDF_StructElement> pParentElement = - AddPageNode(pParent, map, nLevel + 1); - bool bSave = false; - for (CPDF_StructKid& kid : *pParentElement->GetKids()) { - if (kid.m_Type == CPDF_StructKid::Element && kid.m_pDict == pDict) { - kid.m_pElement = pElement; - bSave = true; - } - } - if (!bSave) - map->erase(pDict); - return pElement; -} - -bool CPDF_StructTree::AddTopLevelNode( - CPDF_Dictionary* pDict, - const CFX_RetainPtr<CPDF_StructElement>& pElement) { - CPDF_Object* pObj = m_pTreeRoot->GetDirectObjectFor("K"); - if (!pObj) - return false; - - if (pObj->IsDictionary()) { - if (pObj->GetObjNum() != pDict->GetObjNum()) - return false; - m_Kids[0] = pElement; - } - if (CPDF_Array* pTopKids = pObj->AsArray()) { - bool bSave = false; - for (size_t i = 0; i < pTopKids->GetCount(); i++) { - CPDF_Reference* pKidRef = ToReference(pTopKids->GetObjectAt(i)); - if (pKidRef && pKidRef->GetRefObjNum() == pDict->GetObjNum()) { - m_Kids[i] = pElement; - bSave = true; - } - } - if (!bSave) - return false; - } - return true; -} +CPDF_StructKid::~CPDF_StructKid() = default; CPDF_StructElement::CPDF_StructElement(CPDF_StructTree* pTree, CPDF_StructElement* pParent, @@ -177,43 +66,26 @@ CPDF_StructElement::CPDF_StructElement(CPDF_StructTree* pTree, m_pParent(pParent), m_pDict(pDict), m_Type(pDict->GetStringFor("S")) { - if (pTree->m_pRoleMap) { - CFX_ByteString mapped = pTree->m_pRoleMap->GetStringFor(m_Type); + if (pTree->GetRoleMap()) { + CFX_ByteString mapped = pTree->GetRoleMap()->GetStringFor(m_Type); if (!mapped.IsEmpty()) m_Type = mapped; } LoadKids(pDict); } -IPDF_StructTree* CPDF_StructElement::GetTree() const { - return m_pTree; -} - -const CFX_ByteString& CPDF_StructElement::GetType() const { - return m_Type; -} - -IPDF_StructElement* CPDF_StructElement::GetParent() const { - return m_pParent; -} - -CPDF_Dictionary* CPDF_StructElement::GetDict() const { - return m_pDict; -} +CPDF_StructElement::~CPDF_StructElement() = default; int CPDF_StructElement::CountKids() const { return pdfium::CollectionSize<int>(m_Kids); } -IPDF_StructElement* CPDF_StructElement::GetKidIfElement(int index) const { - if (m_Kids[index].m_Type != CPDF_StructKid::Element) - return nullptr; - - return m_Kids[index].m_pElement.Get(); +CPDF_StructElement* CPDF_StructElement::GetKidIfElement(int index) const { + return m_Kids[index].m_Type == CPDF_StructKid::Element + ? m_Kids[index].m_pElement.Get() + : nullptr; } -CPDF_StructElement::~CPDF_StructElement() {} - void CPDF_StructElement::LoadKids(CPDF_Dictionary* pDict) { CPDF_Object* pObj = pDict->GetObjectFor("Pg"); uint32_t PageObjNum = 0; @@ -231,11 +103,13 @@ void CPDF_StructElement::LoadKids(CPDF_Dictionary* pDict) { CPDF_Object* pKid = pArray->GetDirectObjectAt(i); LoadKid(PageObjNum, pKid, &m_Kids[i]); } - } else { - m_Kids.resize(1); - LoadKid(PageObjNum, pKids, &m_Kids[0]); + return; } + + m_Kids.resize(1); + LoadKid(PageObjNum, pKids, &m_Kids[0]); } + void CPDF_StructElement::LoadKid(uint32_t PageObjNum, CPDF_Object* pKidObj, CPDF_StructKid* pKid) { @@ -244,9 +118,9 @@ void CPDF_StructElement::LoadKid(uint32_t PageObjNum, return; if (pKidObj->IsNumber()) { - if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) { + if (m_pTree->GetPage() && m_pTree->GetPage()->GetObjNum() != PageObjNum) return; - } + pKid->m_Type = CPDF_StructKid::PageContent; pKid->m_ContentId = pKidObj->GetInteger(); pKid->m_PageObjNum = PageObjNum; @@ -256,96 +130,70 @@ void CPDF_StructElement::LoadKid(uint32_t PageObjNum, CPDF_Dictionary* pKidDict = pKidObj->AsDictionary(); if (!pKidDict) return; - if (CPDF_Reference* pRef = ToReference(pKidDict->GetObjectFor("Pg"))) PageObjNum = pRef->GetRefObjNum(); CFX_ByteString type = pKidDict->GetStringFor("Type"); + if ((type == "MCR" || type == "OBJR") && m_pTree->GetPage() && + m_pTree->GetPage()->GetObjNum() != PageObjNum) { + return; + } + if (type == "MCR") { - if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) { - return; - } pKid->m_Type = CPDF_StructKid::StreamContent; CPDF_Reference* pRef = ToReference(pKidDict->GetObjectFor("Stm")); pKid->m_RefObjNum = pRef ? pRef->GetRefObjNum() : 0; pKid->m_PageObjNum = PageObjNum; pKid->m_ContentId = pKidDict->GetIntegerFor("MCID"); - } else if (type == "OBJR") { - if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) { - return; - } + return; + } + + if (type == "OBJR") { pKid->m_Type = CPDF_StructKid::Object; CPDF_Reference* pObj = ToReference(pKidDict->GetObjectFor("Obj")); pKid->m_RefObjNum = pObj ? pObj->GetRefObjNum() : 0; pKid->m_PageObjNum = PageObjNum; - } else { - pKid->m_Type = CPDF_StructKid::Element; - pKid->m_pDict = pKidDict; - if (!m_pTree->m_pPage) { - pKid->m_pElement = - pdfium::MakeRetain<CPDF_StructElement>(m_pTree, this, pKidDict); - } else { - pKid->m_pElement = nullptr; - } + return; } -} -static CPDF_Dictionary* FindAttrDict(CPDF_Object* pAttrs, - const CFX_ByteStringC& owner, - float nLevel = 0.0F) { - if (nLevel > nMaxRecursion) - return nullptr; - if (!pAttrs) - return nullptr; - CPDF_Dictionary* pDict = nullptr; - if (pAttrs->IsDictionary()) { - pDict = pAttrs->AsDictionary(); - } else if (CPDF_Stream* pStream = pAttrs->AsStream()) { - pDict = pStream->GetDict(); - } else if (CPDF_Array* pArray = pAttrs->AsArray()) { - for (uint32_t i = 0; i < pArray->GetCount(); i++) { - CPDF_Object* pElement = pArray->GetDirectObjectAt(i); - pDict = FindAttrDict(pElement, owner, nLevel + 1); - if (pDict) - return pDict; - } + pKid->m_Type = CPDF_StructKid::Element; + pKid->m_pDict = pKidDict; + if (m_pTree->GetPage()) { + pKid->m_pElement = nullptr; + return; } - if (pDict && pDict->GetStringFor("O") == owner) - return pDict; - return nullptr; + + pKid->m_pElement = + pdfium::MakeRetain<CPDF_StructElement>(m_pTree, this, pKidDict); } + CPDF_Object* CPDF_StructElement::GetAttr(const CFX_ByteStringC& owner, const CFX_ByteStringC& name, bool bInheritable, float fLevel) { - if (fLevel > nMaxRecursion) { + if (fLevel > nMaxRecursion) return nullptr; - } + if (bInheritable) { - CPDF_Object* pAttr = GetAttr(owner, name, false); - if (pAttr) { + if (CPDF_Object* pAttr = GetAttr(owner, name, false)) return pAttr; - } - if (!m_pParent) { + if (!m_pParent) return nullptr; - } return m_pParent->GetAttr(owner, name, true, fLevel + 1); } - CPDF_Object* pA = m_pDict->GetDirectObjectFor("A"); - if (pA) { - CPDF_Dictionary* pAttrDict = FindAttrDict(pA, owner); - if (pAttrDict) { - CPDF_Object* pAttr = pAttrDict->GetDirectObjectFor(CFX_ByteString(name)); - if (pAttr) { - return pAttr; - } + + if (CPDF_Object* pA = m_pDict->GetDirectObjectFor("A")) { + if (CPDF_Dictionary* dict = FindAttrDict(pA, owner)) { + if (CPDF_Object* attr = dict->GetDirectObjectFor(CFX_ByteString(name))) + return attr; } } + CPDF_Object* pC = m_pDict->GetDirectObjectFor("C"); if (!pC) return nullptr; - CPDF_Dictionary* pClassMap = m_pTree->m_pTreeRoot->GetDictFor("ClassMap"); + CPDF_Dictionary* pClassMap = m_pTree->GetTreeRoot()->GetDictFor("ClassMap"); if (!pClassMap) return nullptr; @@ -358,12 +206,14 @@ CPDF_Object* CPDF_StructElement::GetAttr(const CFX_ByteStringC& owner, } return nullptr; } + CFX_ByteString class_name = pC->GetString(); CPDF_Dictionary* pClassDict = pClassMap->GetDictFor(class_name); if (pClassDict && pClassDict->GetStringFor("O") == owner) return pClassDict->GetDirectObjectFor(CFX_ByteString(name)); return nullptr; } + CPDF_Object* CPDF_StructElement::GetAttr(const CFX_ByteStringC& owner, const CFX_ByteStringC& name, bool bInheritable, @@ -372,11 +222,11 @@ CPDF_Object* CPDF_StructElement::GetAttr(const CFX_ByteStringC& owner, CPDF_Array* pArray = ToArray(pAttr); if (!pArray || subindex == -1) return pAttr; - if (subindex >= static_cast<int>(pArray->GetCount())) return pAttr; return pArray->GetDirectObjectAt(subindex); } + CFX_ByteString CPDF_StructElement::GetName(const CFX_ByteStringC& owner, const CFX_ByteStringC& name, const CFX_ByteStringC& default_value, @@ -396,10 +246,11 @@ FX_ARGB CPDF_StructElement::GetColor(const CFX_ByteStringC& owner, CPDF_Array* pArray = ToArray(GetAttr(owner, name, bInheritable, subindex)); if (!pArray) return default_value; - return 0xff000000 | ((int)(pArray->GetNumberAt(0) * 255) << 16) | - ((int)(pArray->GetNumberAt(1) * 255) << 8) | - (int)(pArray->GetNumberAt(2) * 255); + return 0xff000000 | (static_cast<int>(pArray->GetNumberAt(0) * 255) << 16) | + (static_cast<int>(pArray->GetNumberAt(1) * 255) << 8) | + static_cast<int>(pArray->GetNumberAt(2) * 255); } + float CPDF_StructElement::GetNumber(const CFX_ByteStringC& owner, const CFX_ByteStringC& name, float default_value, @@ -408,6 +259,7 @@ float CPDF_StructElement::GetNumber(const CFX_ByteStringC& owner, CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex); return ToNumber(pAttr) ? pAttr->GetNumber() : default_value; } + int CPDF_StructElement::GetInteger(const CFX_ByteStringC& owner, const CFX_ByteStringC& name, int default_value, diff --git a/core/fpdfdoc/tagged_int.h b/core/fpdfdoc/cpdf_structelement.h index cafcbd42aa..b227397431 100644 --- a/core/fpdfdoc/tagged_int.h +++ b/core/fpdfdoc/cpdf_structelement.h @@ -1,23 +1,25 @@ -// Copyright 2014 PDFium Authors. All rights reserved. +// Copyright 2017 PDFium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com -#ifndef CORE_FPDFDOC_TAGGED_INT_H_ -#define CORE_FPDFDOC_TAGGED_INT_H_ +#ifndef CORE_FPDFDOC_CPDF_STRUCTELEMENT_H_ +#define CORE_FPDFDOC_CPDF_STRUCTELEMENT_H_ -#include <map> -#include <memory> #include <vector> -#include "core/fpdfdoc/fpdf_tagged.h" #include "core/fxcrt/cfx_retain_ptr.h" -#include "third_party/base/stl_util.h" +#include "core/fxcrt/fx_string.h" +#include "core/fxge/fx_dib.h" +class CPDF_Dictionary; +class CPDF_Object; class CPDF_StructElement; +class CPDF_StructTree; -struct CPDF_StructKid { +class CPDF_StructKid { + public: CPDF_StructKid(); CPDF_StructKid(const CPDF_StructKid& that); ~CPDF_StructKid(); @@ -31,69 +33,42 @@ struct CPDF_StructKid { uint32_t m_ContentId; // For PageContent, StreamContent. }; -class CPDF_StructTree final : public IPDF_StructTree { - public: - explicit CPDF_StructTree(const CPDF_Document* pDoc); - ~CPDF_StructTree() override; - - // IPDF_StructTree: - int CountTopElements() const override; - IPDF_StructElement* GetTopElement(int i) const override; - - void LoadPageTree(const CPDF_Dictionary* pPageDict); - CFX_RetainPtr<CPDF_StructElement> AddPageNode( - CPDF_Dictionary* pElement, - std::map<CPDF_Dictionary*, CFX_RetainPtr<CPDF_StructElement>>* map, - int nLevel = 0); - bool AddTopLevelNode(CPDF_Dictionary* pDict, - const CFX_RetainPtr<CPDF_StructElement>& pElement); - - protected: - const CPDF_Dictionary* const m_pTreeRoot; - const CPDF_Dictionary* const m_pRoleMap; - const CPDF_Dictionary* m_pPage; - std::vector<CFX_RetainPtr<CPDF_StructElement>> m_Kids; - - friend class CPDF_StructElement; -}; - -class CPDF_StructElement final : public CFX_Retainable, - public IPDF_StructElement { +class CPDF_StructElement : public CFX_Retainable { public: template <typename T, typename... Args> friend CFX_RetainPtr<T> pdfium::MakeRetain(Args&&... args); - // IPDF_StructElement - IPDF_StructTree* GetTree() const override; - const CFX_ByteString& GetType() const override; - IPDF_StructElement* GetParent() const override; - CPDF_Dictionary* GetDict() const override; - int CountKids() const override; - IPDF_StructElement* GetKidIfElement(int index) const override; + CPDF_StructTree* GetTree() const { return m_pTree; } + const CFX_ByteString& GetType() const { return m_Type; } + CPDF_StructElement* GetParent() const { return m_pParent; } + CPDF_Dictionary* GetDict() const { return m_pDict; } + + int CountKids() const; + CPDF_StructElement* GetKidIfElement(int index) const; CPDF_Object* GetAttr(const CFX_ByteStringC& owner, const CFX_ByteStringC& name, bool bInheritable = false, - float fLevel = 0.0F) override; + float fLevel = 0.0F); CFX_ByteString GetName(const CFX_ByteStringC& owner, const CFX_ByteStringC& name, const CFX_ByteStringC& default_value, bool bInheritable = false, - int subindex = -1) override; + int subindex = -1); FX_ARGB GetColor(const CFX_ByteStringC& owner, const CFX_ByteStringC& name, FX_ARGB default_value, bool bInheritable = false, - int subindex = -1) override; + int subindex = -1); float GetNumber(const CFX_ByteStringC& owner, const CFX_ByteStringC& name, float default_value, bool bInheritable = false, - int subindex = -1) override; + int subindex = -1); int GetInteger(const CFX_ByteStringC& owner, const CFX_ByteStringC& name, int default_value, bool bInheritable = false, - int subindex = -1) override; + int subindex = -1); std::vector<CPDF_StructKid>* GetKids() { return &m_Kids; } void LoadKids(CPDF_Dictionary* pDict); @@ -116,4 +91,4 @@ class CPDF_StructElement final : public CFX_Retainable, std::vector<CPDF_StructKid> m_Kids; }; -#endif // CORE_FPDFDOC_TAGGED_INT_H_ +#endif // CORE_FPDFDOC_CPDF_STRUCTELEMENT_H_ diff --git a/core/fpdfdoc/cpdf_structtree.cpp b/core/fpdfdoc/cpdf_structtree.cpp new file mode 100644 index 0000000000..51ad2c775c --- /dev/null +++ b/core/fpdfdoc/cpdf_structtree.cpp @@ -0,0 +1,155 @@ +// Copyright 2017 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#include "core/fpdfdoc/cpdf_structtree.h" + +#include "core/fpdfapi/parser/cpdf_array.h" +#include "core/fpdfapi/parser/cpdf_document.h" +#include "core/fpdfapi/parser/cpdf_number.h" +#include "core/fpdfapi/parser/cpdf_reference.h" +#include "core/fpdfdoc/cpdf_numbertree.h" +#include "core/fpdfdoc/cpdf_structelement.h" +#include "third_party/base/stl_util.h" + +namespace { + +const int nMaxRecursion = 32; + +bool IsTagged(const CPDF_Document* pDoc) { + CPDF_Dictionary* pCatalog = pDoc->GetRoot(); + CPDF_Dictionary* pMarkInfo = pCatalog->GetDictFor("MarkInfo"); + return pMarkInfo && pMarkInfo->GetIntegerFor("Marked"); +} + +} // namespace + +// static +std::unique_ptr<CPDF_StructTree> CPDF_StructTree::LoadPage( + const CPDF_Document* pDoc, + const CPDF_Dictionary* pPageDict) { + if (!IsTagged(pDoc)) + return nullptr; + + auto pTree = pdfium::MakeUnique<CPDF_StructTree>(pDoc); + pTree->LoadPageTree(pPageDict); + return pTree; +} + +CPDF_StructTree::CPDF_StructTree(const CPDF_Document* pDoc) + : m_pTreeRoot(pDoc->GetRoot()->GetDictFor("StructTreeRoot")), + m_pRoleMap(m_pTreeRoot ? m_pTreeRoot->GetDictFor("RoleMap") : nullptr), + m_pPage(nullptr) {} + +CPDF_StructTree::~CPDF_StructTree() {} + +int CPDF_StructTree::CountTopElements() const { + return pdfium::CollectionSize<int>(m_Kids); +} + +CPDF_StructElement* CPDF_StructTree::GetTopElement(int i) const { + return m_Kids[i].Get(); +} + +void CPDF_StructTree::LoadPageTree(const CPDF_Dictionary* pPageDict) { + m_pPage = pPageDict; + if (!m_pTreeRoot) + return; + + CPDF_Object* pKids = m_pTreeRoot->GetDirectObjectFor("K"); + if (!pKids) + return; + + uint32_t dwKids = 0; + if (pKids->IsDictionary()) + dwKids = 1; + else if (CPDF_Array* pArray = pKids->AsArray()) + dwKids = pArray->GetCount(); + else + return; + + m_Kids.clear(); + m_Kids.resize(dwKids); + CPDF_Dictionary* pParentTree = m_pTreeRoot->GetDictFor("ParentTree"); + if (!pParentTree) + return; + + CPDF_NumberTree parent_tree(pParentTree); + int parents_id = pPageDict->GetIntegerFor("StructParents", -1); + if (parents_id < 0) + return; + + CPDF_Array* pParentArray = ToArray(parent_tree.LookupValue(parents_id)); + if (!pParentArray) + return; + + std::map<CPDF_Dictionary*, CFX_RetainPtr<CPDF_StructElement>> element_map; + for (size_t i = 0; i < pParentArray->GetCount(); i++) { + if (CPDF_Dictionary* pParent = pParentArray->GetDictAt(i)) + AddPageNode(pParent, &element_map); + } +} + +CFX_RetainPtr<CPDF_StructElement> CPDF_StructTree::AddPageNode( + CPDF_Dictionary* pDict, + std::map<CPDF_Dictionary*, CFX_RetainPtr<CPDF_StructElement>>* map, + int nLevel) { + if (nLevel > nMaxRecursion) + return nullptr; + + auto it = map->find(pDict); + if (it != map->end()) + return it->second; + + auto pElement = pdfium::MakeRetain<CPDF_StructElement>(this, nullptr, pDict); + (*map)[pDict] = pElement; + CPDF_Dictionary* pParent = pDict->GetDictFor("P"); + if (!pParent || pParent->GetStringFor("Type") == "StructTreeRoot") { + if (!AddTopLevelNode(pDict, pElement)) + map->erase(pDict); + return pElement; + } + + CFX_RetainPtr<CPDF_StructElement> pParentElement = + AddPageNode(pParent, map, nLevel + 1); + bool bSave = false; + for (CPDF_StructKid& kid : *pParentElement->GetKids()) { + if (kid.m_Type == CPDF_StructKid::Element && kid.m_pDict == pDict) { + kid.m_pElement = pElement; + bSave = true; + } + } + if (!bSave) + map->erase(pDict); + return pElement; +} + +bool CPDF_StructTree::AddTopLevelNode( + CPDF_Dictionary* pDict, + const CFX_RetainPtr<CPDF_StructElement>& pElement) { + CPDF_Object* pObj = m_pTreeRoot->GetDirectObjectFor("K"); + if (!pObj) + return false; + + if (pObj->IsDictionary()) { + if (pObj->GetObjNum() != pDict->GetObjNum()) + return false; + m_Kids[0] = pElement; + } + + CPDF_Array* pTopKids = pObj->AsArray(); + if (!pTopKids) + return true; + + bool bSave = false; + for (size_t i = 0; i < pTopKids->GetCount(); i++) { + CPDF_Reference* pKidRef = ToReference(pTopKids->GetObjectAt(i)); + if (pKidRef && pKidRef->GetRefObjNum() == pDict->GetObjNum()) { + m_Kids[i] = pElement; + bSave = true; + } + } + return bSave; +} diff --git a/core/fpdfdoc/cpdf_structtree.h b/core/fpdfdoc/cpdf_structtree.h new file mode 100644 index 0000000000..20bf41e7ee --- /dev/null +++ b/core/fpdfdoc/cpdf_structtree.h @@ -0,0 +1,51 @@ +// Copyright 2017 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#ifndef CORE_FPDFDOC_CPDF_STRUCTTREE_H_ +#define CORE_FPDFDOC_CPDF_STRUCTTREE_H_ + +#include <map> +#include <memory> +#include <vector> + +#include "core/fxcrt/cfx_retain_ptr.h" + +class CPDF_Dictionary; +class CPDF_Document; +class CPDF_StructElement; + +class CPDF_StructTree { + public: + static std::unique_ptr<CPDF_StructTree> LoadPage( + const CPDF_Document* pDoc, + const CPDF_Dictionary* pPageDict); + + explicit CPDF_StructTree(const CPDF_Document* pDoc); + ~CPDF_StructTree(); + + int CountTopElements() const; + CPDF_StructElement* GetTopElement(int i) const; + + void LoadPageTree(const CPDF_Dictionary* pPageDict); + CFX_RetainPtr<CPDF_StructElement> AddPageNode( + CPDF_Dictionary* pElement, + std::map<CPDF_Dictionary*, CFX_RetainPtr<CPDF_StructElement>>* map, + int nLevel = 0); + bool AddTopLevelNode(CPDF_Dictionary* pDict, + const CFX_RetainPtr<CPDF_StructElement>& pElement); + + const CPDF_Dictionary* GetRoleMap() const { return m_pRoleMap; } + const CPDF_Dictionary* GetPage() const { return m_pPage; } + const CPDF_Dictionary* GetTreeRoot() const { return m_pTreeRoot; } + + private: + const CPDF_Dictionary* const m_pTreeRoot; + const CPDF_Dictionary* const m_pRoleMap; + const CPDF_Dictionary* m_pPage; + std::vector<CFX_RetainPtr<CPDF_StructElement>> m_Kids; +}; + +#endif // CORE_FPDFDOC_CPDF_STRUCTTREE_H_ diff --git a/core/fpdfdoc/fpdf_tagged.h b/core/fpdfdoc/fpdf_tagged.h deleted file mode 100644 index 5e7b1827c5..0000000000 --- a/core/fpdfdoc/fpdf_tagged.h +++ /dev/null @@ -1,75 +0,0 @@ -// Copyright 2014 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#ifndef CORE_FPDFDOC_FPDF_TAGGED_H_ -#define CORE_FPDFDOC_FPDF_TAGGED_H_ - -#include <memory> - -#include "core/fxge/fx_dib.h" - -class CPDF_Dictionary; -class CPDF_Document; -class IPDF_StructElement; - -class IPDF_StructTree { - public: - static std::unique_ptr<IPDF_StructTree> LoadPage( - const CPDF_Document* pDoc, - const CPDF_Dictionary* pPageDict); - - - virtual int CountTopElements() const = 0; - virtual IPDF_StructElement* GetTopElement(int i) const = 0; - - protected: - friend std::default_delete<IPDF_StructTree>; - virtual ~IPDF_StructTree() {} -}; - -class IPDF_StructElement { - public: - virtual IPDF_StructTree* GetTree() const = 0; - virtual const CFX_ByteString& GetType() const = 0; - virtual IPDF_StructElement* GetParent() const = 0; - virtual CPDF_Dictionary* GetDict() const = 0; - virtual int CountKids() const = 0; - virtual IPDF_StructElement* GetKidIfElement(int index) const = 0; - - virtual CPDF_Object* GetAttr(const CFX_ByteStringC& owner, - const CFX_ByteStringC& name, - bool bInheritable = false, - float fLevel = 0.0F) = 0; - - virtual CFX_ByteString GetName(const CFX_ByteStringC& owner, - const CFX_ByteStringC& name, - const CFX_ByteStringC& default_value, - bool bInheritable = false, - int subindex = -1) = 0; - - virtual FX_ARGB GetColor(const CFX_ByteStringC& owner, - const CFX_ByteStringC& name, - FX_ARGB default_value, - bool bInheritable = false, - int subindex = -1) = 0; - - virtual float GetNumber(const CFX_ByteStringC& owner, - const CFX_ByteStringC& name, - float default_value, - bool bInheritable = false, - int subindex = -1) = 0; - - virtual int GetInteger(const CFX_ByteStringC& owner, - const CFX_ByteStringC& name, - int default_value, - bool bInheritable = false, - int subindex = -1) = 0; - - protected: - virtual ~IPDF_StructElement() {} -}; - -#endif // CORE_FPDFDOC_FPDF_TAGGED_H_ diff --git a/fpdfsdk/fpdf_structtree.cpp b/fpdfsdk/fpdf_structtree.cpp index 8a93d2299d..96d40b41c2 100644 --- a/fpdfsdk/fpdf_structtree.cpp +++ b/fpdfsdk/fpdf_structtree.cpp @@ -8,17 +8,18 @@ #include "core/fpdfapi/page/cpdf_page.h" #include "core/fpdfapi/parser/cpdf_dictionary.h" -#include "core/fpdfdoc/fpdf_tagged.h" +#include "core/fpdfdoc/cpdf_structelement.h" +#include "core/fpdfdoc/cpdf_structtree.h" #include "fpdfsdk/fsdk_define.h" namespace { -IPDF_StructTree* ToStructTree(FPDF_STRUCTTREE struct_tree) { - return reinterpret_cast<IPDF_StructTree*>(struct_tree); +CPDF_StructTree* ToStructTree(FPDF_STRUCTTREE struct_tree) { + return reinterpret_cast<CPDF_StructTree*>(struct_tree); } -IPDF_StructElement* ToStructTreeElement(FPDF_STRUCTELEMENT struct_element) { - return reinterpret_cast<IPDF_StructElement*>(struct_element); +CPDF_StructElement* ToStructTreeElement(FPDF_STRUCTELEMENT struct_element) { + return reinterpret_cast<CPDF_StructElement*>(struct_element); } unsigned long WideStringToBuffer(const CFX_WideString& str, @@ -40,23 +41,23 @@ DLLEXPORT FPDF_STRUCTTREE STDCALL FPDF_StructTree_GetForPage(FPDF_PAGE page) { CPDF_Page* pPage = CPDFPageFromFPDFPage(page); if (!pPage) return nullptr; - return IPDF_StructTree::LoadPage(pPage->m_pDocument, pPage->m_pFormDict) + return CPDF_StructTree::LoadPage(pPage->m_pDocument, pPage->m_pFormDict) .release(); } DLLEXPORT void STDCALL FPDF_StructTree_Close(FPDF_STRUCTTREE struct_tree) { - std::unique_ptr<IPDF_StructTree>(ToStructTree(struct_tree)); + std::unique_ptr<CPDF_StructTree>(ToStructTree(struct_tree)); } DLLEXPORT int STDCALL FPDF_StructTree_CountChildren(FPDF_STRUCTTREE struct_tree) { - IPDF_StructTree* tree = ToStructTree(struct_tree); + CPDF_StructTree* tree = ToStructTree(struct_tree); return tree ? tree->CountTopElements() : -1; } DLLEXPORT FPDF_STRUCTELEMENT STDCALL FPDF_StructTree_GetChildAtIndex(FPDF_STRUCTTREE struct_tree, int index) { - IPDF_StructTree* tree = ToStructTree(struct_tree); + CPDF_StructTree* tree = ToStructTree(struct_tree); if (!tree || index < 0 || index >= tree->CountTopElements()) return nullptr; return tree->GetTopElement(index); @@ -66,7 +67,7 @@ DLLEXPORT unsigned long STDCALL FPDF_StructElement_GetAltText(FPDF_STRUCTELEMENT struct_element, void* buffer, unsigned long buflen) { - IPDF_StructElement* elem = ToStructTreeElement(struct_element); + CPDF_StructElement* elem = ToStructTreeElement(struct_element); return (elem && elem->GetDict()) ? WideStringToBuffer(elem->GetDict()->GetUnicodeTextFor("Alt"), buffer, buflen) @@ -77,21 +78,21 @@ DLLEXPORT unsigned long STDCALL FPDF_StructElement_GetType(FPDF_STRUCTELEMENT struct_element, void* buffer, unsigned long buflen) { - IPDF_StructElement* elem = ToStructTreeElement(struct_element); + CPDF_StructElement* elem = ToStructTreeElement(struct_element); return elem ? WideStringToBuffer(elem->GetType().UTF8Decode(), buffer, buflen) : 0; } DLLEXPORT int STDCALL FPDF_StructElement_CountChildren(FPDF_STRUCTELEMENT struct_element) { - IPDF_StructElement* elem = ToStructTreeElement(struct_element); + CPDF_StructElement* elem = ToStructTreeElement(struct_element); return elem ? elem->CountKids() : -1; } DLLEXPORT FPDF_STRUCTELEMENT STDCALL FPDF_StructElement_GetChildAtIndex(FPDF_STRUCTELEMENT struct_element, int index) { - IPDF_StructElement* elem = ToStructTreeElement(struct_element); + CPDF_StructElement* elem = ToStructTreeElement(struct_element); if (!elem || index < 0 || index >= elem->CountKids()) return nullptr; |