Use CFX_WideString in CPDF_NameTree functions to strip BOMchromium/3162

PDFium doesn't strip BOMs during parsing, but we should strip BOMs when retrieving parsed strings in CPDF_NameTree to ensure consistency and appropriate function behavior. See the bug for more info. As outlined in Bug=pdfium:593, the solution is to call GetUnicodeText() instead of GetString(). I added a GetUnicodeTextAt() function in CPDF_Array, which is symmetrical to GetUnicodeTextFor() in CPDF_Dictionary. I then changed the input variable types to CPDF_NameTree functions to be CFX_WideString instead of CFX_ByteString, and modified all the calls to them. I also added a unit test for nametree, which would fail prior to this change. Nametrees with non-unicode names are already tested by embedder tests. Bug=pdfium:820 Change-Id: Id69d7343632f83d1f5180348c0eea290f478183f Reviewed-on: https://pdfium-review.googlesource.com/8091 Reviewed-by: dsinclair <dsinclair@chromium.org> Commit-Queue: Jane Liu <janeliulwq@google.com>
author: Jane Liu <janeliulwq@google.com> 2017-07-19 13:10:50 -0400
committer: Chromium commit bot <commit-bot@chromium.org> 2017-07-19 19:09:39 +0000
commit: 67ccef73bf664b7cdb4c6eed7acbaa4163c22a80 (patch)
tree: 718061bc21fd52eab1bc70a8b9be97585f1d79f8 /core/fpdfdoc/cpdf_nametree.cpp
parent: eed247e9cb3b0e9ce5dcb8bf6ee7673c9dd3e544 (diff)
download: pdfium-67ccef73bf664b7cdb4c6eed7acbaa4163c22a80.tar.xz
1 files changed, 17 insertions, 17 deletions
diff --git a/core/fpdfdoc/cpdf_nametree.cpp b/core/fpdfdoc/cpdf_nametree.cpp
index b3808bddba..04cb1b9e40 100644
--- a/core/fpdfdoc/cpdf_nametree.cpp
+++ b/core/fpdfdoc/cpdf_nametree.cpp
@@ -9,13 +9,14 @@
 #include "core/fpdfapi/parser/cpdf_array.h"
 #include "core/fpdfapi/parser/cpdf_dictionary.h"
 #include "core/fpdfapi/parser/cpdf_document.h"
+#include "core/fpdfapi/parser/fpdf_parser_decode.h"
 
 namespace {
 
 const int nMaxRecursion = 32;
 
 CPDF_Object* SearchNameNode(CPDF_Dictionary* pNode,
-                            const CFX_ByteString& csName,
+                            const CFX_WideString& csName,
                             size_t& nIndex,
                             CPDF_Array** ppFind,
                             int nLevel = 0) {
@@ -24,15 +25,14 @@ CPDF_Object* SearchNameNode(CPDF_Dictionary* pNode,
 
   CPDF_Array* pLimits = pNode->GetArrayFor("Limits");
   if (pLimits) {
-    CFX_ByteString csLeft = pLimits->GetStringAt(0);
-    CFX_ByteString csRight = pLimits->GetStringAt(1);
-    if (csLeft.Compare(csRight.AsStringC()) > 0) {
-      CFX_ByteString csTmp = csRight;
+    CFX_WideString csLeft = pLimits->GetUnicodeTextAt(0);
+    CFX_WideString csRight = pLimits->GetUnicodeTextAt(1);
+    if (csLeft.Compare(csRight) > 0) {
+      CFX_WideString csTmp = csRight;
       csRight = csLeft;
       csLeft = csTmp;
     }
-    if (csName.Compare(csLeft.AsStringC()) < 0 ||
-        csName.Compare(csRight.AsStringC()) > 0) {
+    if (csName.Compare(csLeft) < 0 || csName.Compare(csRight) > 0) {
       return nullptr;
     }
   }
@@ -41,8 +41,8 @@ CPDF_Object* SearchNameNode(CPDF_Dictionary* pNode,
   if (pNames) {
     size_t dwCount = pNames->GetCount() / 2;
     for (size_t i = 0; i < dwCount; i++) {
-      CFX_ByteString csValue = pNames->GetStringAt(i * 2);
-      int32_t iCompare = csValue.Compare(csName.AsStringC());
+      CFX_WideString csValue = pNames->GetUnicodeTextAt(i * 2);
+      int32_t iCompare = csValue.Compare(csName);
       if (iCompare <= 0) {
         if (ppFind)
           *ppFind = pNames;
@@ -78,7 +78,7 @@ CPDF_Object* SearchNameNode(CPDF_Dictionary* pNode,
 CPDF_Object* SearchNameNode(CPDF_Dictionary* pNode,
                             size_t nIndex,
                             size_t& nCurIndex,
-                            CFX_ByteString* csName,
+                            CFX_WideString* csName,
                             CPDF_Array** ppFind,
                             int nLevel = 0) {
   if (nLevel > nMaxRecursion)
@@ -93,7 +93,7 @@ CPDF_Object* SearchNameNode(CPDF_Dictionary* pNode,
     }
     if (ppFind)
       *ppFind = pNames;
-    *csName = pNames->GetStringAt((nIndex - nCurIndex) * 2);
+    *csName = pNames->GetUnicodeTextAt((nIndex - nCurIndex) * 2);
     return pNames->GetDirectObjectAt((nIndex - nCurIndex) * 2 + 1);
   }
   CPDF_Array* pKids = pNode->GetArrayFor("Kids");
@@ -158,7 +158,7 @@ size_t CPDF_NameTree::GetCount() const {
   return m_pRoot ? ::CountNames(m_pRoot.Get()) : 0;
 }
 
-int CPDF_NameTree::GetIndex(const CFX_ByteString& csName) const {
+int CPDF_NameTree::GetIndex(const CFX_WideString& csName) const {
   if (!m_pRoot)
     return -1;
 
@@ -169,8 +169,8 @@ int CPDF_NameTree::GetIndex(const CFX_ByteString& csName) const {
 }
 
 CPDF_Object* CPDF_NameTree::LookupValueAndName(int nIndex,
-                                               CFX_ByteString* csName) const {
-  *csName = CFX_ByteString();
+                                               CFX_WideString* csName) const {
+  *csName = CFX_WideString();
   if (!m_pRoot)
     return nullptr;
 
@@ -178,7 +178,7 @@ CPDF_Object* CPDF_NameTree::LookupValueAndName(int nIndex,
   return SearchNameNode(m_pRoot.Get(), nIndex, nCurIndex, csName, nullptr);
 }
 
-CPDF_Object* CPDF_NameTree::LookupValue(const CFX_ByteString& csName) const {
+CPDF_Object* CPDF_NameTree::LookupValue(const CFX_WideString& csName) const {
   if (!m_pRoot)
     return nullptr;
 
@@ -187,13 +187,13 @@ CPDF_Object* CPDF_NameTree::LookupValue(const CFX_ByteString& csName) const {
 }
 
 CPDF_Array* CPDF_NameTree::LookupNamedDest(CPDF_Document* pDoc,
-                                           const CFX_ByteString& sName) {
+                                           const CFX_WideString& sName) {
   CPDF_Object* pValue = LookupValue(sName);
   if (!pValue) {
     CPDF_Dictionary* pDests = pDoc->GetRoot()->GetDictFor("Dests");
     if (!pDests)
       return nullptr;
-    pValue = pDests->GetDirectObjectFor(sName);
+    pValue = pDests->GetDirectObjectFor(PDF_EncodeText(sName));
   }
   if (!pValue)
     return nullptr;
author	Jane Liu <janeliulwq@google.com>	2017-07-19 13:10:50 -0400
committer	Chromium commit bot <commit-bot@chromium.org>	2017-07-19 19:09:39 +0000
commit	67ccef73bf664b7cdb4c6eed7acbaa4163c22a80 (patch)
tree	718061bc21fd52eab1bc70a8b9be97585f1d79f8 /core/fpdfdoc/cpdf_nametree.cpp
parent	eed247e9cb3b0e9ce5dcb8bf6ee7673c9dd3e544 (diff)
download	pdfium-67ccef73bf664b7cdb4c6eed7acbaa4163c22a80.tar.xz