From 7f389615a0fca78532482d6f4070d18c5d2f9f5d Mon Sep 17 00:00:00 2001 From: dan sinclair Date: Thu, 6 Apr 2017 13:38:54 -0400 Subject: Cleanup the tagged code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This CL removes the IPDF_Struct* classes in favour of their only implementation. The tagged code was split out into files matching the classes they contain. The friendship between CPDF_StructTree and CPDF_StructElement was broken in favour of accessors. Bug: pdfium:672 Change-Id: Iade83b608fb7168b3b0f41338d10d5fd8ab91a6e Reviewed-on: https://pdfium-review.googlesource.com/3820 Reviewed-by: Tom Sepez Reviewed-by: Nicolás Peña Commit-Queue: dsinclair --- core/fpdfdoc/cpdf_structtree.cpp | 155 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 155 insertions(+) create mode 100644 core/fpdfdoc/cpdf_structtree.cpp (limited to 'core/fpdfdoc/cpdf_structtree.cpp') diff --git a/core/fpdfdoc/cpdf_structtree.cpp b/core/fpdfdoc/cpdf_structtree.cpp new file mode 100644 index 0000000000..51ad2c775c --- /dev/null +++ b/core/fpdfdoc/cpdf_structtree.cpp @@ -0,0 +1,155 @@ +// Copyright 2017 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#include "core/fpdfdoc/cpdf_structtree.h" + +#include "core/fpdfapi/parser/cpdf_array.h" +#include "core/fpdfapi/parser/cpdf_document.h" +#include "core/fpdfapi/parser/cpdf_number.h" +#include "core/fpdfapi/parser/cpdf_reference.h" +#include "core/fpdfdoc/cpdf_numbertree.h" +#include "core/fpdfdoc/cpdf_structelement.h" +#include "third_party/base/stl_util.h" + +namespace { + +const int nMaxRecursion = 32; + +bool IsTagged(const CPDF_Document* pDoc) { + CPDF_Dictionary* pCatalog = pDoc->GetRoot(); + CPDF_Dictionary* pMarkInfo = pCatalog->GetDictFor("MarkInfo"); + return pMarkInfo && pMarkInfo->GetIntegerFor("Marked"); +} + +} // namespace + +// static +std::unique_ptr CPDF_StructTree::LoadPage( + const CPDF_Document* pDoc, + const CPDF_Dictionary* pPageDict) { + if (!IsTagged(pDoc)) + return nullptr; + + auto pTree = pdfium::MakeUnique(pDoc); + pTree->LoadPageTree(pPageDict); + return pTree; +} + +CPDF_StructTree::CPDF_StructTree(const CPDF_Document* pDoc) + : m_pTreeRoot(pDoc->GetRoot()->GetDictFor("StructTreeRoot")), + m_pRoleMap(m_pTreeRoot ? m_pTreeRoot->GetDictFor("RoleMap") : nullptr), + m_pPage(nullptr) {} + +CPDF_StructTree::~CPDF_StructTree() {} + +int CPDF_StructTree::CountTopElements() const { + return pdfium::CollectionSize(m_Kids); +} + +CPDF_StructElement* CPDF_StructTree::GetTopElement(int i) const { + return m_Kids[i].Get(); +} + +void CPDF_StructTree::LoadPageTree(const CPDF_Dictionary* pPageDict) { + m_pPage = pPageDict; + if (!m_pTreeRoot) + return; + + CPDF_Object* pKids = m_pTreeRoot->GetDirectObjectFor("K"); + if (!pKids) + return; + + uint32_t dwKids = 0; + if (pKids->IsDictionary()) + dwKids = 1; + else if (CPDF_Array* pArray = pKids->AsArray()) + dwKids = pArray->GetCount(); + else + return; + + m_Kids.clear(); + m_Kids.resize(dwKids); + CPDF_Dictionary* pParentTree = m_pTreeRoot->GetDictFor("ParentTree"); + if (!pParentTree) + return; + + CPDF_NumberTree parent_tree(pParentTree); + int parents_id = pPageDict->GetIntegerFor("StructParents", -1); + if (parents_id < 0) + return; + + CPDF_Array* pParentArray = ToArray(parent_tree.LookupValue(parents_id)); + if (!pParentArray) + return; + + std::map> element_map; + for (size_t i = 0; i < pParentArray->GetCount(); i++) { + if (CPDF_Dictionary* pParent = pParentArray->GetDictAt(i)) + AddPageNode(pParent, &element_map); + } +} + +CFX_RetainPtr CPDF_StructTree::AddPageNode( + CPDF_Dictionary* pDict, + std::map>* map, + int nLevel) { + if (nLevel > nMaxRecursion) + return nullptr; + + auto it = map->find(pDict); + if (it != map->end()) + return it->second; + + auto pElement = pdfium::MakeRetain(this, nullptr, pDict); + (*map)[pDict] = pElement; + CPDF_Dictionary* pParent = pDict->GetDictFor("P"); + if (!pParent || pParent->GetStringFor("Type") == "StructTreeRoot") { + if (!AddTopLevelNode(pDict, pElement)) + map->erase(pDict); + return pElement; + } + + CFX_RetainPtr pParentElement = + AddPageNode(pParent, map, nLevel + 1); + bool bSave = false; + for (CPDF_StructKid& kid : *pParentElement->GetKids()) { + if (kid.m_Type == CPDF_StructKid::Element && kid.m_pDict == pDict) { + kid.m_pElement = pElement; + bSave = true; + } + } + if (!bSave) + map->erase(pDict); + return pElement; +} + +bool CPDF_StructTree::AddTopLevelNode( + CPDF_Dictionary* pDict, + const CFX_RetainPtr& pElement) { + CPDF_Object* pObj = m_pTreeRoot->GetDirectObjectFor("K"); + if (!pObj) + return false; + + if (pObj->IsDictionary()) { + if (pObj->GetObjNum() != pDict->GetObjNum()) + return false; + m_Kids[0] = pElement; + } + + CPDF_Array* pTopKids = pObj->AsArray(); + if (!pTopKids) + return true; + + bool bSave = false; + for (size_t i = 0; i < pTopKids->GetCount(); i++) { + CPDF_Reference* pKidRef = ToReference(pTopKids->GetObjectAt(i)); + if (pKidRef && pKidRef->GetRefObjNum() == pDict->GetObjNum()) { + m_Kids[i] = pElement; + bSave = true; + } + } + return bSave; +} -- cgit v1.2.3