From a5d2929220bb08bfdc34505ca4edf5be6d9de94f Mon Sep 17 00:00:00 2001
From: Jane Liu <janeliulwq@google.com>
Date: Mon, 24 Jul 2017 11:47:23 -0400
Subject: Added CPDF_NameTree::AddValueAndName()

1. Added CPDF_NameTree::AddValueAndName() for inserting new name and
values pairs into a nametree while following nametree order rules. This
function will be used for the API for adding embedded files.
    * Added anonymous helper functions.
    * Added two unit tests.

Bug=pdfium:174

Change-Id: Ic3e2f32f7147ef5c164cf5c2a28e0a652ffed74a
Reviewed-on: https://pdfium-review.googlesource.com/8271
Reviewed-by: dsinclair <dsinclair@chromium.org>
Commit-Queue: Jane Liu <janeliulwq@google.com>
---
 core/fpdfdoc/cpdf_nametree.cpp          | 140 +++++++++++++++++++++++---
 core/fpdfdoc/cpdf_nametree.h            |   5 +
 core/fpdfdoc/cpdf_nametree_unittest.cpp | 167 ++++++++++++++++++++++++++++++++
 3 files changed, 299 insertions(+), 13 deletions(-)
diff --git a/core/fpdfdoc/cpdf_nametree.cpp b/core/fpdfdoc/cpdf_nametree.cpp
index f30a27c681..4b4e7e81c2 100644
--- a/core/fpdfdoc/cpdf_nametree.cpp
+++ b/core/fpdfdoc/cpdf_nametree.cpp
@@ -6,38 +6,96 @@
 
 #include "core/fpdfdoc/cpdf_nametree.h"
 
+#include <utility>
+#include <vector>
+
 #include "core/fpdfapi/parser/cpdf_array.h"
 #include "core/fpdfapi/parser/cpdf_dictionary.h"
 #include "core/fpdfapi/parser/cpdf_document.h"
+#include "core/fpdfapi/parser/cpdf_string.h"
 #include "core/fpdfapi/parser/fpdf_parser_decode.h"
 
 namespace {
 
 const int nMaxRecursion = 32;
 
+// Get the limit arrays that leaf array |pFind| is under in the tree with root
+// |pNode|. |pLimits| will hold all the limit arrays from the leaf up to before
+// the root. Return true if successful.
+bool GetNodeLimits(const CPDF_Dictionary* pNode,
+                   const CPDF_Array* pFind,
+                   int nLevel,
+                   std::vector<CPDF_Array*>* pLimits) {
+  if (nLevel > nMaxRecursion)
+    return false;
+
+  if (pNode->GetArrayFor("Names") == pFind) {
+    pLimits->push_back(pNode->GetArrayFor("Limits"));
+    return true;
+  }
+
+  CPDF_Array* pKids = pNode->GetArrayFor("Kids");
+  if (!pKids)
+    return false;
+
+  for (size_t i = 0; i < pKids->GetCount(); ++i) {
+    CPDF_Dictionary* pKid = pKids->GetDictAt(i);
+    if (!pKid)
+      continue;
+
+    if (GetNodeLimits(pKid, pFind, nLevel + 1, pLimits)) {
+      pLimits->push_back(pNode->GetArrayFor("Limits"));
+      return true;
+    }
+  }
+  return false;
+}
+
+// Search for |csName| in the tree with root |pNode|. If successful, return the
+// value that |csName| points to; |nIndex| will be the index of |csName|,
+// |ppFind| will be the leaf array that |csName| is found in, and |pFindIndex|
+// will be the index of |csName| in |ppFind|. If |csName| is not found, |ppFind|
+// will be the leaf array that |csName| should be added to, and |pFindIndex|
+// will be the index that it should be added at.
 CPDF_Object* SearchNameNode(CPDF_Dictionary* pNode,
                             const CFX_WideString& csName,
                             size_t& nIndex,
+                            int nLevel,
                             CPDF_Array** ppFind,
-                            int nLevel = 0) {
+                            int* pFindIndex) {
   if (nLevel > nMaxRecursion)
     return nullptr;
 
   CPDF_Array* pLimits = pNode->GetArrayFor("Limits");
+  CPDF_Array* pNames = pNode->GetArrayFor("Names");
   if (pLimits) {
     CFX_WideString csLeft = pLimits->GetUnicodeTextAt(0);
     CFX_WideString csRight = pLimits->GetUnicodeTextAt(1);
+    // If the lower limit is greater than the higher limit, swap them.
     if (csLeft.Compare(csRight) > 0) {
-      CFX_WideString csTmp = csRight;
-      csRight = csLeft;
-      csLeft = csTmp;
+      pLimits->SetNewAt<CPDF_String>(0, csRight);
+      pLimits->SetNewAt<CPDF_String>(1, csLeft);
+      csLeft = pLimits->GetUnicodeTextAt(0);
+      csRight = pLimits->GetUnicodeTextAt(1);
     }
-    if (csName.Compare(csLeft) < 0 || csName.Compare(csRight) > 0) {
+    // Skip this node if the name to look for is smaller than its lower limit.
+    if (csName.Compare(csLeft) < 0)
+      return nullptr;
+
+    // Skip this node if the name to look for is greater than its higher limit,
+    // and the node itself is a leaf node.
+    if (csName.Compare(csRight) > 0 && pNames) {
+      if (ppFind)
+        *ppFind = pNames;
+
+      if (pFindIndex)
+        *pFindIndex = pNames->GetCount() / 2 - 1;
+
       return nullptr;
     }
   }
 
-  CPDF_Array* pNames = pNode->GetArrayFor("Names");
+  // If the node is a leaf node, look for the name in its names array.
   if (pNames) {
     size_t dwCount = pNames->GetCount() / 2;
     for (size_t i = 0; i < dwCount; i++) {
@@ -48,6 +106,10 @@ CPDF_Object* SearchNameNode(CPDF_Dictionary* pNode,
 
       if (ppFind)
         *ppFind = pNames;
+
+      if (pFindIndex)
+        *pFindIndex = i;
+
       if (iCompare < 0)
         continue;
 
@@ -58,6 +120,7 @@ CPDF_Object* SearchNameNode(CPDF_Dictionary* pNode,
     return nullptr;
   }
 
+  // Search through the node's children.
   CPDF_Array* pKids = pNode->GetArrayFor("Kids");
   if (!pKids)
     return nullptr;
@@ -68,19 +131,22 @@ CPDF_Object* SearchNameNode(CPDF_Dictionary* pNode,
       continue;
 
     CPDF_Object* pFound =
-        SearchNameNode(pKid, csName, nIndex, ppFind, nLevel + 1);
+        SearchNameNode(pKid, csName, nIndex, nLevel + 1, ppFind, pFindIndex);
     if (pFound)
       return pFound;
   }
   return nullptr;
 }
 
+// Get the key-value pair at |nIndex| in the tree with root |pNode|. If
+// successful, return the value object; |csName| will be the key, and |ppFind|
+// will be the leaf array that this pair is in.
 CPDF_Object* SearchNameNode(CPDF_Dictionary* pNode,
                             size_t nIndex,
                             size_t& nCurIndex,
+                            int nLevel,
                             CFX_WideString* csName,
-                            CPDF_Array** ppFind,
-                            int nLevel = 0) {
+                            CPDF_Array** ppFind) {
   if (nLevel > nMaxRecursion)
     return nullptr;
 
@@ -104,13 +170,14 @@ CPDF_Object* SearchNameNode(CPDF_Dictionary* pNode,
     if (!pKid)
       continue;
     CPDF_Object* pFound =
-        SearchNameNode(pKid, nIndex, nCurIndex, csName, ppFind, nLevel + 1);
+        SearchNameNode(pKid, nIndex, nCurIndex, nLevel + 1, csName, ppFind);
     if (pFound)
       return pFound;
   }
   return nullptr;
 }
 
+// Get the total number of key-value pairs in the tree with root |pNode|.
 size_t CountNames(CPDF_Dictionary* pNode, int nLevel = 0) {
   if (nLevel > nMaxRecursion)
     return 0;
@@ -163,11 +230,58 @@ int CPDF_NameTree::GetIndex(const CFX_WideString& csName) const {
     return -1;
 
   size_t nIndex = 0;
-  if (!SearchNameNode(m_pRoot.Get(), csName, nIndex, nullptr))
+  if (!SearchNameNode(m_pRoot.Get(), csName, nIndex, 0, nullptr, nullptr))
     return -1;
   return nIndex;
 }
 
+bool CPDF_NameTree::AddValueAndName(std::unique_ptr<CPDF_Object> pObj,
+                                    const CFX_WideString& name) {
+  if (!m_pRoot)
+    return false;
+
+  size_t nIndex = 0;
+  CPDF_Array* pFind = nullptr;
+  int nFindIndex = -1;
+  // Fail if the tree already contains this name or if the tree is too deep.
+  if (SearchNameNode(m_pRoot.Get(), name, nIndex, 0, &pFind, &nFindIndex))
+    return false;
+
+  // If the returned |pFind| is a nullptr, then |name| is smaller than all
+  // existing entries in the tree, and we did not find a leaf array to place
+  // |name| into. We instead will find the leftmost leaf array in which to place
+  // |name| and |pObj|.
+  if (!pFind) {
+    size_t nCurIndex = 0;
+    CFX_WideString csName;
+    SearchNameNode(m_pRoot.Get(), 0, nCurIndex, 0, &csName, &pFind);
+  }
+  ASSERT(pFind);
+
+  // Insert the name and the object into the leaf array found. Note that the
+  // insertion position is right after the key-value pair returned by |index|.
+  size_t nNameIndex = (nFindIndex + 1) * 2;
+  size_t nValueIndex = nNameIndex + 1;
+  pFind->InsertNewAt<CPDF_String>(nNameIndex, name);
+  pFind->InsertAt(nValueIndex, std::move(pObj));
+
+  // Expand the limits that the newly added name is under, if the name falls
+  // outside of the limits of its leaf array or any arrays above it.
+  std::vector<CPDF_Array*> pLimits;
+  GetNodeLimits(m_pRoot.Get(), pFind, 0, &pLimits);
+  for (auto* pLimit : pLimits) {
+    if (!pLimit)
+      continue;
+
+    if (name.Compare(pLimit->GetUnicodeTextAt(0)) < 0)
+      pLimit->SetNewAt<CPDF_String>(0, name);
+
+    if (name.Compare(pLimit->GetUnicodeTextAt(1)) > 0)
+      pLimit->SetNewAt<CPDF_String>(1, name);
+  }
+  return true;
+}
+
 CPDF_Object* CPDF_NameTree::LookupValueAndName(int nIndex,
                                                CFX_WideString* csName) const {
   csName->clear();
@@ -175,7 +289,7 @@ CPDF_Object* CPDF_NameTree::LookupValueAndName(int nIndex,
     return nullptr;
 
   size_t nCurIndex = 0;
-  return SearchNameNode(m_pRoot.Get(), nIndex, nCurIndex, csName, nullptr);
+  return SearchNameNode(m_pRoot.Get(), nIndex, nCurIndex, 0, csName, nullptr);
 }
 
 CPDF_Object* CPDF_NameTree::LookupValue(const CFX_WideString& csName) const {
@@ -183,7 +297,7 @@ CPDF_Object* CPDF_NameTree::LookupValue(const CFX_WideString& csName) const {
     return nullptr;
 
   size_t nIndex = 0;
-  return SearchNameNode(m_pRoot.Get(), csName, nIndex, nullptr);
+  return SearchNameNode(m_pRoot.Get(), csName, nIndex, 0, nullptr, nullptr);
 }
 
 CPDF_Array* CPDF_NameTree::LookupNamedDest(CPDF_Document* pDoc,
diff --git a/core/fpdfdoc/cpdf_nametree.h b/core/fpdfdoc/cpdf_nametree.h
index a56f511783..28239233ea 100644
--- a/core/fpdfdoc/cpdf_nametree.h
+++ b/core/fpdfdoc/cpdf_nametree.h
@@ -7,6 +7,8 @@
 #ifndef CORE_FPDFDOC_CPDF_NAMETREE_H_
 #define CORE_FPDFDOC_CPDF_NAMETREE_H_
 
+#include <memory>
+
 #include "core/fxcrt/cfx_unowned_ptr.h"
 #include "core/fxcrt/fx_string.h"
 
@@ -21,6 +23,9 @@ class CPDF_NameTree {
   CPDF_NameTree(CPDF_Document* pDoc, const CFX_ByteString& category);
   ~CPDF_NameTree();
 
+  bool AddValueAndName(std::unique_ptr<CPDF_Object> pObj,
+                       const CFX_WideString& name);
+
   CPDF_Object* LookupValueAndName(int nIndex, CFX_WideString* csName) const;
   CPDF_Object* LookupValue(const CFX_WideString& csName) const;
   CPDF_Array* LookupNamedDest(CPDF_Document* pDoc, const CFX_WideString& sName);
diff --git a/core/fpdfdoc/cpdf_nametree_unittest.cpp b/core/fpdfdoc/cpdf_nametree_unittest.cpp
index bffb496843..4842f06a8e 100644
--- a/core/fpdfdoc/cpdf_nametree_unittest.cpp
+++ b/core/fpdfdoc/cpdf_nametree_unittest.cpp
@@ -9,6 +9,40 @@
 #include "core/fpdfapi/parser/cpdf_string.h"
 #include "testing/gtest/include/gtest/gtest.h"
 
+namespace {
+
+void AddNameKeyValue(CPDF_Array* pNames, const char* key, const int value) {
+  pNames->AddNew<CPDF_String>(key, false);
+  pNames->AddNew<CPDF_Number>(value);
+}
+
+void CheckNameKeyValue(CPDF_Array* pNames,
+                       const int index,
+                       const char* key,
+                       const int value) {
+  EXPECT_STREQ(key, pNames->GetStringAt(index * 2).c_str());
+  EXPECT_EQ(value, pNames->GetIntegerAt(index * 2 + 1));
+}
+
+void AddLimitsArray(CPDF_Dictionary* pNode,
+                    const char* least,
+                    const char* greatest) {
+  CPDF_Array* pLimits = pNode->SetNewFor<CPDF_Array>("Limits");
+  pLimits->AddNew<CPDF_String>(least, false);
+  pLimits->AddNew<CPDF_String>(greatest, false);
+}
+
+void CheckLimitsArray(CPDF_Dictionary* pNode,
+                      const char* least,
+                      const char* greatest) {
+  CPDF_Array* pLimits = pNode->GetArrayFor("Limits");
+  ASSERT_TRUE(pLimits);
+  EXPECT_STREQ(least, pLimits->GetStringAt(0).c_str());
+  EXPECT_STREQ(greatest, pLimits->GetStringAt(1).c_str());
+}
+
+}  // namespace
+
 TEST(cpdf_nametree, GetUnicodeNameWithBOM) {
   // Set up the root dictionary with a Names array.
   auto pRootDict = pdfium::MakeUnique<CPDF_Dictionary>();
@@ -34,3 +68,136 @@ TEST(cpdf_nametree, GetUnicodeNameWithBOM) {
   ASSERT_TRUE(pObj->IsNumber());
   EXPECT_EQ(100, pObj->AsNumber()->GetInteger());
 }
+
+TEST(cpdf_nametree, AddIntoNames) {
+  // Set up a name tree with a single Names array.
+  auto pRootDict = pdfium::MakeUnique<CPDF_Dictionary>();
+  CPDF_Array* pNames = pRootDict->SetNewFor<CPDF_Array>("Names");
+  AddNameKeyValue(pNames, "2.txt", 222);
+  AddNameKeyValue(pNames, "7.txt", 777);
+
+  CPDF_NameTree nameTree(pRootDict.get());
+  pNames = nameTree.GetRoot()->GetArrayFor("Names");
+
+  // Insert a name that already exists in the names array.
+  EXPECT_FALSE(
+      nameTree.AddValueAndName(pdfium::MakeUnique<CPDF_Number>(111), L"2.txt"));
+
+  // Insert in the beginning of the names array.
+  EXPECT_TRUE(
+      nameTree.AddValueAndName(pdfium::MakeUnique<CPDF_Number>(111), L"1.txt"));
+
+  // Insert in the middle of the names array.
+  EXPECT_TRUE(
+      nameTree.AddValueAndName(pdfium::MakeUnique<CPDF_Number>(555), L"5.txt"));
+
+  // Insert at the end of the names array.
+  EXPECT_TRUE(
+      nameTree.AddValueAndName(pdfium::MakeUnique<CPDF_Number>(999), L"9.txt"));
+
+  // Check that the names array has the expected key-value pairs.
+  CheckNameKeyValue(pNames, 0, "1.txt", 111);
+  CheckNameKeyValue(pNames, 1, "2.txt", 222);
+  CheckNameKeyValue(pNames, 2, "5.txt", 555);
+  CheckNameKeyValue(pNames, 3, "7.txt", 777);
+  CheckNameKeyValue(pNames, 4, "9.txt", 999);
+}
+
+TEST(cpdf_nametree, AddIntoKids) {
+  // Set up a name tree with five nodes of three levels.
+  auto pRootDict = pdfium::MakeUnique<CPDF_Dictionary>();
+  CPDF_Array* pKids = pRootDict->SetNewFor<CPDF_Array>("Kids");
+  CPDF_Dictionary* pKid1 = pKids->AddNew<CPDF_Dictionary>();
+
+  AddLimitsArray(pKid1, "1.txt", "9.txt");
+  pKids = pKid1->SetNewFor<CPDF_Array>("Kids");
+  CPDF_Dictionary* pKid2 = pKids->AddNew<CPDF_Dictionary>();
+  CPDF_Dictionary* pKid3 = pKids->AddNew<CPDF_Dictionary>();
+
+  AddLimitsArray(pKid2, "1.txt", "5.txt");
+  pKids = pKid2->SetNewFor<CPDF_Array>("Kids");
+  CPDF_Dictionary* pKid4 = pKids->AddNew<CPDF_Dictionary>();
+  CPDF_Dictionary* pKid5 = pKids->AddNew<CPDF_Dictionary>();
+
+  AddLimitsArray(pKid3, "9.txt", "9.txt");
+  CPDF_Array* pNames = pKid3->SetNewFor<CPDF_Array>("Names");
+  AddNameKeyValue(pNames, "9.txt", 999);
+
+  AddLimitsArray(pKid4, "1.txt", "2.txt");
+  pNames = pKid4->SetNewFor<CPDF_Array>("Names");
+  AddNameKeyValue(pNames, "1.txt", 111);
+  AddNameKeyValue(pNames, "2.txt", 222);
+
+  AddLimitsArray(pKid5, "3.txt", "5.txt");
+  pNames = pKid5->SetNewFor<CPDF_Array>("Names");
+  AddNameKeyValue(pNames, "3.txt", 333);
+  AddNameKeyValue(pNames, "5.txt", 555);
+
+  CPDF_NameTree nameTree(pRootDict.get());
+
+  // Check that adding an existing name would fail.
+  EXPECT_FALSE(
+      nameTree.AddValueAndName(pdfium::MakeUnique<CPDF_Number>(444), L"9.txt"));
+
+  // Add a name within the limits of a leaf node.
+  EXPECT_TRUE(
+      nameTree.AddValueAndName(pdfium::MakeUnique<CPDF_Number>(444), L"4.txt"));
+  ASSERT_TRUE(nameTree.LookupValue(L"4.txt"));
+  EXPECT_EQ(444, nameTree.LookupValue(L"4.txt")->GetInteger());
+
+  // Add a name that requires changing the limits of two bottom levels.
+  EXPECT_TRUE(
+      nameTree.AddValueAndName(pdfium::MakeUnique<CPDF_Number>(666), L"6.txt"));
+  ASSERT_TRUE(nameTree.LookupValue(L"6.txt"));
+  EXPECT_EQ(666, nameTree.LookupValue(L"6.txt")->GetInteger());
+
+  // Add a name that requires changing the limits of two top levels.
+  EXPECT_TRUE(
+      nameTree.AddValueAndName(pdfium::MakeUnique<CPDF_Number>(99), L"99.txt"));
+  ASSERT_TRUE(nameTree.LookupValue(L"99.txt"));
+  EXPECT_EQ(99, nameTree.LookupValue(L"99.txt")->GetInteger());
+
+  // Add a name that requires changing the lower limit of all levels.
+  EXPECT_TRUE(
+      nameTree.AddValueAndName(pdfium::MakeUnique<CPDF_Number>(-5), L"0.txt"));
+  ASSERT_TRUE(nameTree.LookupValue(L"0.txt"));
+  EXPECT_EQ(-5, nameTree.LookupValue(L"0.txt")->GetInteger());
+
+  // Check that the node on the first level has the expected limits.
+  pKid1 = nameTree.GetRoot()->GetArrayFor("Kids")->GetDictAt(0);
+  ASSERT_TRUE(pKid1);
+  CheckLimitsArray(pKid1, "0.txt", "99.txt");
+
+  // Check that the nodes on the second level has the expected limits and names.
+  pKid2 = pKid1->GetArrayFor("Kids")->GetDictAt(0);
+  ASSERT_TRUE(pKid2);
+  CheckLimitsArray(pKid2, "0.txt", "6.txt");
+
+  pKid3 = pKid1->GetArrayFor("Kids")->GetDictAt(1);
+  ASSERT_TRUE(pKid3);
+  CheckLimitsArray(pKid3, "9.txt", "99.txt");
+  pNames = pKid3->GetArrayFor("Names");
+  ASSERT_TRUE(pNames);
+  CheckNameKeyValue(pNames, 0, "9.txt", 999);
+  CheckNameKeyValue(pNames, 1, "99.txt", 99);
+
+  // Check that the nodes on the third level has the expected limits and names.
+  pKid4 = pKid2->GetArrayFor("Kids")->GetDictAt(0);
+  ASSERT_TRUE(pKid4);
+  CheckLimitsArray(pKid4, "0.txt", "2.txt");
+  pNames = pKid4->GetArrayFor("Names");
+  ASSERT_TRUE(pNames);
+  CheckNameKeyValue(pNames, 0, "0.txt", -5);
+  CheckNameKeyValue(pNames, 1, "1.txt", 111);
+  CheckNameKeyValue(pNames, 2, "2.txt", 222);
+
+  pKid5 = pKid2->GetArrayFor("Kids")->GetDictAt(1);
+  ASSERT_TRUE(pKid5);
+  CheckLimitsArray(pKid5, "3.txt", "6.txt");
+  pNames = pKid5->GetArrayFor("Names");
+  ASSERT_TRUE(pNames);
+  CheckNameKeyValue(pNames, 0, "3.txt", 333);
+  CheckNameKeyValue(pNames, 1, "4.txt", 444);
+  CheckNameKeyValue(pNames, 2, "5.txt", 555);
+  CheckNameKeyValue(pNames, 3, "6.txt", 666);
+}
-- 
cgit v1.2.3