From a5d2929220bb08bfdc34505ca4edf5be6d9de94f Mon Sep 17 00:00:00 2001 From: Jane Liu Date: Mon, 24 Jul 2017 11:47:23 -0400 Subject: Added CPDF_NameTree::AddValueAndName() 1. Added CPDF_NameTree::AddValueAndName() for inserting new name and values pairs into a nametree while following nametree order rules. This function will be used for the API for adding embedded files. * Added anonymous helper functions. * Added two unit tests. Bug=pdfium:174 Change-Id: Ic3e2f32f7147ef5c164cf5c2a28e0a652ffed74a Reviewed-on: https://pdfium-review.googlesource.com/8271 Reviewed-by: dsinclair Commit-Queue: Jane Liu --- core/fpdfdoc/cpdf_nametree.cpp | 140 +++++++++++++++++++++++--- core/fpdfdoc/cpdf_nametree.h | 5 + core/fpdfdoc/cpdf_nametree_unittest.cpp | 167 ++++++++++++++++++++++++++++++++ 3 files changed, 299 insertions(+), 13 deletions(-) diff --git a/core/fpdfdoc/cpdf_nametree.cpp b/core/fpdfdoc/cpdf_nametree.cpp index f30a27c681..4b4e7e81c2 100644 --- a/core/fpdfdoc/cpdf_nametree.cpp +++ b/core/fpdfdoc/cpdf_nametree.cpp @@ -6,38 +6,96 @@ #include "core/fpdfdoc/cpdf_nametree.h" +#include +#include + #include "core/fpdfapi/parser/cpdf_array.h" #include "core/fpdfapi/parser/cpdf_dictionary.h" #include "core/fpdfapi/parser/cpdf_document.h" +#include "core/fpdfapi/parser/cpdf_string.h" #include "core/fpdfapi/parser/fpdf_parser_decode.h" namespace { const int nMaxRecursion = 32; +// Get the limit arrays that leaf array |pFind| is under in the tree with root +// |pNode|. |pLimits| will hold all the limit arrays from the leaf up to before +// the root. Return true if successful. +bool GetNodeLimits(const CPDF_Dictionary* pNode, + const CPDF_Array* pFind, + int nLevel, + std::vector* pLimits) { + if (nLevel > nMaxRecursion) + return false; + + if (pNode->GetArrayFor("Names") == pFind) { + pLimits->push_back(pNode->GetArrayFor("Limits")); + return true; + } + + CPDF_Array* pKids = pNode->GetArrayFor("Kids"); + if (!pKids) + return false; + + for (size_t i = 0; i < pKids->GetCount(); ++i) { + CPDF_Dictionary* pKid = pKids->GetDictAt(i); + if (!pKid) + continue; + + if (GetNodeLimits(pKid, pFind, nLevel + 1, pLimits)) { + pLimits->push_back(pNode->GetArrayFor("Limits")); + return true; + } + } + return false; +} + +// Search for |csName| in the tree with root |pNode|. If successful, return the +// value that |csName| points to; |nIndex| will be the index of |csName|, +// |ppFind| will be the leaf array that |csName| is found in, and |pFindIndex| +// will be the index of |csName| in |ppFind|. If |csName| is not found, |ppFind| +// will be the leaf array that |csName| should be added to, and |pFindIndex| +// will be the index that it should be added at. CPDF_Object* SearchNameNode(CPDF_Dictionary* pNode, const CFX_WideString& csName, size_t& nIndex, + int nLevel, CPDF_Array** ppFind, - int nLevel = 0) { + int* pFindIndex) { if (nLevel > nMaxRecursion) return nullptr; CPDF_Array* pLimits = pNode->GetArrayFor("Limits"); + CPDF_Array* pNames = pNode->GetArrayFor("Names"); if (pLimits) { CFX_WideString csLeft = pLimits->GetUnicodeTextAt(0); CFX_WideString csRight = pLimits->GetUnicodeTextAt(1); + // If the lower limit is greater than the higher limit, swap them. if (csLeft.Compare(csRight) > 0) { - CFX_WideString csTmp = csRight; - csRight = csLeft; - csLeft = csTmp; + pLimits->SetNewAt(0, csRight); + pLimits->SetNewAt(1, csLeft); + csLeft = pLimits->GetUnicodeTextAt(0); + csRight = pLimits->GetUnicodeTextAt(1); } - if (csName.Compare(csLeft) < 0 || csName.Compare(csRight) > 0) { + // Skip this node if the name to look for is smaller than its lower limit. + if (csName.Compare(csLeft) < 0) + return nullptr; + + // Skip this node if the name to look for is greater than its higher limit, + // and the node itself is a leaf node. + if (csName.Compare(csRight) > 0 && pNames) { + if (ppFind) + *ppFind = pNames; + + if (pFindIndex) + *pFindIndex = pNames->GetCount() / 2 - 1; + return nullptr; } } - CPDF_Array* pNames = pNode->GetArrayFor("Names"); + // If the node is a leaf node, look for the name in its names array. if (pNames) { size_t dwCount = pNames->GetCount() / 2; for (size_t i = 0; i < dwCount; i++) { @@ -48,6 +106,10 @@ CPDF_Object* SearchNameNode(CPDF_Dictionary* pNode, if (ppFind) *ppFind = pNames; + + if (pFindIndex) + *pFindIndex = i; + if (iCompare < 0) continue; @@ -58,6 +120,7 @@ CPDF_Object* SearchNameNode(CPDF_Dictionary* pNode, return nullptr; } + // Search through the node's children. CPDF_Array* pKids = pNode->GetArrayFor("Kids"); if (!pKids) return nullptr; @@ -68,19 +131,22 @@ CPDF_Object* SearchNameNode(CPDF_Dictionary* pNode, continue; CPDF_Object* pFound = - SearchNameNode(pKid, csName, nIndex, ppFind, nLevel + 1); + SearchNameNode(pKid, csName, nIndex, nLevel + 1, ppFind, pFindIndex); if (pFound) return pFound; } return nullptr; } +// Get the key-value pair at |nIndex| in the tree with root |pNode|. If +// successful, return the value object; |csName| will be the key, and |ppFind| +// will be the leaf array that this pair is in. CPDF_Object* SearchNameNode(CPDF_Dictionary* pNode, size_t nIndex, size_t& nCurIndex, + int nLevel, CFX_WideString* csName, - CPDF_Array** ppFind, - int nLevel = 0) { + CPDF_Array** ppFind) { if (nLevel > nMaxRecursion) return nullptr; @@ -104,13 +170,14 @@ CPDF_Object* SearchNameNode(CPDF_Dictionary* pNode, if (!pKid) continue; CPDF_Object* pFound = - SearchNameNode(pKid, nIndex, nCurIndex, csName, ppFind, nLevel + 1); + SearchNameNode(pKid, nIndex, nCurIndex, nLevel + 1, csName, ppFind); if (pFound) return pFound; } return nullptr; } +// Get the total number of key-value pairs in the tree with root |pNode|. size_t CountNames(CPDF_Dictionary* pNode, int nLevel = 0) { if (nLevel > nMaxRecursion) return 0; @@ -163,11 +230,58 @@ int CPDF_NameTree::GetIndex(const CFX_WideString& csName) const { return -1; size_t nIndex = 0; - if (!SearchNameNode(m_pRoot.Get(), csName, nIndex, nullptr)) + if (!SearchNameNode(m_pRoot.Get(), csName, nIndex, 0, nullptr, nullptr)) return -1; return nIndex; } +bool CPDF_NameTree::AddValueAndName(std::unique_ptr pObj, + const CFX_WideString& name) { + if (!m_pRoot) + return false; + + size_t nIndex = 0; + CPDF_Array* pFind = nullptr; + int nFindIndex = -1; + // Fail if the tree already contains this name or if the tree is too deep. + if (SearchNameNode(m_pRoot.Get(), name, nIndex, 0, &pFind, &nFindIndex)) + return false; + + // If the returned |pFind| is a nullptr, then |name| is smaller than all + // existing entries in the tree, and we did not find a leaf array to place + // |name| into. We instead will find the leftmost leaf array in which to place + // |name| and |pObj|. + if (!pFind) { + size_t nCurIndex = 0; + CFX_WideString csName; + SearchNameNode(m_pRoot.Get(), 0, nCurIndex, 0, &csName, &pFind); + } + ASSERT(pFind); + + // Insert the name and the object into the leaf array found. Note that the + // insertion position is right after the key-value pair returned by |index|. + size_t nNameIndex = (nFindIndex + 1) * 2; + size_t nValueIndex = nNameIndex + 1; + pFind->InsertNewAt(nNameIndex, name); + pFind->InsertAt(nValueIndex, std::move(pObj)); + + // Expand the limits that the newly added name is under, if the name falls + // outside of the limits of its leaf array or any arrays above it. + std::vector pLimits; + GetNodeLimits(m_pRoot.Get(), pFind, 0, &pLimits); + for (auto* pLimit : pLimits) { + if (!pLimit) + continue; + + if (name.Compare(pLimit->GetUnicodeTextAt(0)) < 0) + pLimit->SetNewAt(0, name); + + if (name.Compare(pLimit->GetUnicodeTextAt(1)) > 0) + pLimit->SetNewAt(1, name); + } + return true; +} + CPDF_Object* CPDF_NameTree::LookupValueAndName(int nIndex, CFX_WideString* csName) const { csName->clear(); @@ -175,7 +289,7 @@ CPDF_Object* CPDF_NameTree::LookupValueAndName(int nIndex, return nullptr; size_t nCurIndex = 0; - return SearchNameNode(m_pRoot.Get(), nIndex, nCurIndex, csName, nullptr); + return SearchNameNode(m_pRoot.Get(), nIndex, nCurIndex, 0, csName, nullptr); } CPDF_Object* CPDF_NameTree::LookupValue(const CFX_WideString& csName) const { @@ -183,7 +297,7 @@ CPDF_Object* CPDF_NameTree::LookupValue(const CFX_WideString& csName) const { return nullptr; size_t nIndex = 0; - return SearchNameNode(m_pRoot.Get(), csName, nIndex, nullptr); + return SearchNameNode(m_pRoot.Get(), csName, nIndex, 0, nullptr, nullptr); } CPDF_Array* CPDF_NameTree::LookupNamedDest(CPDF_Document* pDoc, diff --git a/core/fpdfdoc/cpdf_nametree.h b/core/fpdfdoc/cpdf_nametree.h index a56f511783..28239233ea 100644 --- a/core/fpdfdoc/cpdf_nametree.h +++ b/core/fpdfdoc/cpdf_nametree.h @@ -7,6 +7,8 @@ #ifndef CORE_FPDFDOC_CPDF_NAMETREE_H_ #define CORE_FPDFDOC_CPDF_NAMETREE_H_ +#include + #include "core/fxcrt/cfx_unowned_ptr.h" #include "core/fxcrt/fx_string.h" @@ -21,6 +23,9 @@ class CPDF_NameTree { CPDF_NameTree(CPDF_Document* pDoc, const CFX_ByteString& category); ~CPDF_NameTree(); + bool AddValueAndName(std::unique_ptr pObj, + const CFX_WideString& name); + CPDF_Object* LookupValueAndName(int nIndex, CFX_WideString* csName) const; CPDF_Object* LookupValue(const CFX_WideString& csName) const; CPDF_Array* LookupNamedDest(CPDF_Document* pDoc, const CFX_WideString& sName); diff --git a/core/fpdfdoc/cpdf_nametree_unittest.cpp b/core/fpdfdoc/cpdf_nametree_unittest.cpp index bffb496843..4842f06a8e 100644 --- a/core/fpdfdoc/cpdf_nametree_unittest.cpp +++ b/core/fpdfdoc/cpdf_nametree_unittest.cpp @@ -9,6 +9,40 @@ #include "core/fpdfapi/parser/cpdf_string.h" #include "testing/gtest/include/gtest/gtest.h" +namespace { + +void AddNameKeyValue(CPDF_Array* pNames, const char* key, const int value) { + pNames->AddNew(key, false); + pNames->AddNew(value); +} + +void CheckNameKeyValue(CPDF_Array* pNames, + const int index, + const char* key, + const int value) { + EXPECT_STREQ(key, pNames->GetStringAt(index * 2).c_str()); + EXPECT_EQ(value, pNames->GetIntegerAt(index * 2 + 1)); +} + +void AddLimitsArray(CPDF_Dictionary* pNode, + const char* least, + const char* greatest) { + CPDF_Array* pLimits = pNode->SetNewFor("Limits"); + pLimits->AddNew(least, false); + pLimits->AddNew(greatest, false); +} + +void CheckLimitsArray(CPDF_Dictionary* pNode, + const char* least, + const char* greatest) { + CPDF_Array* pLimits = pNode->GetArrayFor("Limits"); + ASSERT_TRUE(pLimits); + EXPECT_STREQ(least, pLimits->GetStringAt(0).c_str()); + EXPECT_STREQ(greatest, pLimits->GetStringAt(1).c_str()); +} + +} // namespace + TEST(cpdf_nametree, GetUnicodeNameWithBOM) { // Set up the root dictionary with a Names array. auto pRootDict = pdfium::MakeUnique(); @@ -34,3 +68,136 @@ TEST(cpdf_nametree, GetUnicodeNameWithBOM) { ASSERT_TRUE(pObj->IsNumber()); EXPECT_EQ(100, pObj->AsNumber()->GetInteger()); } + +TEST(cpdf_nametree, AddIntoNames) { + // Set up a name tree with a single Names array. + auto pRootDict = pdfium::MakeUnique(); + CPDF_Array* pNames = pRootDict->SetNewFor("Names"); + AddNameKeyValue(pNames, "2.txt", 222); + AddNameKeyValue(pNames, "7.txt", 777); + + CPDF_NameTree nameTree(pRootDict.get()); + pNames = nameTree.GetRoot()->GetArrayFor("Names"); + + // Insert a name that already exists in the names array. + EXPECT_FALSE( + nameTree.AddValueAndName(pdfium::MakeUnique(111), L"2.txt")); + + // Insert in the beginning of the names array. + EXPECT_TRUE( + nameTree.AddValueAndName(pdfium::MakeUnique(111), L"1.txt")); + + // Insert in the middle of the names array. + EXPECT_TRUE( + nameTree.AddValueAndName(pdfium::MakeUnique(555), L"5.txt")); + + // Insert at the end of the names array. + EXPECT_TRUE( + nameTree.AddValueAndName(pdfium::MakeUnique(999), L"9.txt")); + + // Check that the names array has the expected key-value pairs. + CheckNameKeyValue(pNames, 0, "1.txt", 111); + CheckNameKeyValue(pNames, 1, "2.txt", 222); + CheckNameKeyValue(pNames, 2, "5.txt", 555); + CheckNameKeyValue(pNames, 3, "7.txt", 777); + CheckNameKeyValue(pNames, 4, "9.txt", 999); +} + +TEST(cpdf_nametree, AddIntoKids) { + // Set up a name tree with five nodes of three levels. + auto pRootDict = pdfium::MakeUnique(); + CPDF_Array* pKids = pRootDict->SetNewFor("Kids"); + CPDF_Dictionary* pKid1 = pKids->AddNew(); + + AddLimitsArray(pKid1, "1.txt", "9.txt"); + pKids = pKid1->SetNewFor("Kids"); + CPDF_Dictionary* pKid2 = pKids->AddNew(); + CPDF_Dictionary* pKid3 = pKids->AddNew(); + + AddLimitsArray(pKid2, "1.txt", "5.txt"); + pKids = pKid2->SetNewFor("Kids"); + CPDF_Dictionary* pKid4 = pKids->AddNew(); + CPDF_Dictionary* pKid5 = pKids->AddNew(); + + AddLimitsArray(pKid3, "9.txt", "9.txt"); + CPDF_Array* pNames = pKid3->SetNewFor("Names"); + AddNameKeyValue(pNames, "9.txt", 999); + + AddLimitsArray(pKid4, "1.txt", "2.txt"); + pNames = pKid4->SetNewFor("Names"); + AddNameKeyValue(pNames, "1.txt", 111); + AddNameKeyValue(pNames, "2.txt", 222); + + AddLimitsArray(pKid5, "3.txt", "5.txt"); + pNames = pKid5->SetNewFor("Names"); + AddNameKeyValue(pNames, "3.txt", 333); + AddNameKeyValue(pNames, "5.txt", 555); + + CPDF_NameTree nameTree(pRootDict.get()); + + // Check that adding an existing name would fail. + EXPECT_FALSE( + nameTree.AddValueAndName(pdfium::MakeUnique(444), L"9.txt")); + + // Add a name within the limits of a leaf node. + EXPECT_TRUE( + nameTree.AddValueAndName(pdfium::MakeUnique(444), L"4.txt")); + ASSERT_TRUE(nameTree.LookupValue(L"4.txt")); + EXPECT_EQ(444, nameTree.LookupValue(L"4.txt")->GetInteger()); + + // Add a name that requires changing the limits of two bottom levels. + EXPECT_TRUE( + nameTree.AddValueAndName(pdfium::MakeUnique(666), L"6.txt")); + ASSERT_TRUE(nameTree.LookupValue(L"6.txt")); + EXPECT_EQ(666, nameTree.LookupValue(L"6.txt")->GetInteger()); + + // Add a name that requires changing the limits of two top levels. + EXPECT_TRUE( + nameTree.AddValueAndName(pdfium::MakeUnique(99), L"99.txt")); + ASSERT_TRUE(nameTree.LookupValue(L"99.txt")); + EXPECT_EQ(99, nameTree.LookupValue(L"99.txt")->GetInteger()); + + // Add a name that requires changing the lower limit of all levels. + EXPECT_TRUE( + nameTree.AddValueAndName(pdfium::MakeUnique(-5), L"0.txt")); + ASSERT_TRUE(nameTree.LookupValue(L"0.txt")); + EXPECT_EQ(-5, nameTree.LookupValue(L"0.txt")->GetInteger()); + + // Check that the node on the first level has the expected limits. + pKid1 = nameTree.GetRoot()->GetArrayFor("Kids")->GetDictAt(0); + ASSERT_TRUE(pKid1); + CheckLimitsArray(pKid1, "0.txt", "99.txt"); + + // Check that the nodes on the second level has the expected limits and names. + pKid2 = pKid1->GetArrayFor("Kids")->GetDictAt(0); + ASSERT_TRUE(pKid2); + CheckLimitsArray(pKid2, "0.txt", "6.txt"); + + pKid3 = pKid1->GetArrayFor("Kids")->GetDictAt(1); + ASSERT_TRUE(pKid3); + CheckLimitsArray(pKid3, "9.txt", "99.txt"); + pNames = pKid3->GetArrayFor("Names"); + ASSERT_TRUE(pNames); + CheckNameKeyValue(pNames, 0, "9.txt", 999); + CheckNameKeyValue(pNames, 1, "99.txt", 99); + + // Check that the nodes on the third level has the expected limits and names. + pKid4 = pKid2->GetArrayFor("Kids")->GetDictAt(0); + ASSERT_TRUE(pKid4); + CheckLimitsArray(pKid4, "0.txt", "2.txt"); + pNames = pKid4->GetArrayFor("Names"); + ASSERT_TRUE(pNames); + CheckNameKeyValue(pNames, 0, "0.txt", -5); + CheckNameKeyValue(pNames, 1, "1.txt", 111); + CheckNameKeyValue(pNames, 2, "2.txt", 222); + + pKid5 = pKid2->GetArrayFor("Kids")->GetDictAt(1); + ASSERT_TRUE(pKid5); + CheckLimitsArray(pKid5, "3.txt", "6.txt"); + pNames = pKid5->GetArrayFor("Names"); + ASSERT_TRUE(pNames); + CheckNameKeyValue(pNames, 0, "3.txt", 333); + CheckNameKeyValue(pNames, 1, "4.txt", 444); + CheckNameKeyValue(pNames, 2, "5.txt", 555); + CheckNameKeyValue(pNames, 3, "6.txt", 666); +} -- cgit v1.2.3