summaryrefslogtreecommitdiff
path: root/core/fpdfdoc
diff options
context:
space:
mode:
authorJane Liu <janeliulwq@google.com>2017-07-19 13:10:50 -0400
committerChromium commit bot <commit-bot@chromium.org>2017-07-19 19:09:39 +0000
commit67ccef73bf664b7cdb4c6eed7acbaa4163c22a80 (patch)
tree718061bc21fd52eab1bc70a8b9be97585f1d79f8 /core/fpdfdoc
parenteed247e9cb3b0e9ce5dcb8bf6ee7673c9dd3e544 (diff)
downloadpdfium-67ccef73bf664b7cdb4c6eed7acbaa4163c22a80.tar.xz
Use CFX_WideString in CPDF_NameTree functions to strip BOMchromium/3162
PDFium doesn't strip BOMs during parsing, but we should strip BOMs when retrieving parsed strings in CPDF_NameTree to ensure consistency and appropriate function behavior. See the bug for more info. As outlined in Bug=pdfium:593, the solution is to call GetUnicodeText() instead of GetString(). I added a GetUnicodeTextAt() function in CPDF_Array, which is symmetrical to GetUnicodeTextFor() in CPDF_Dictionary. I then changed the input variable types to CPDF_NameTree functions to be CFX_WideString instead of CFX_ByteString, and modified all the calls to them. I also added a unit test for nametree, which would fail prior to this change. Nametrees with non-unicode names are already tested by embedder tests. Bug=pdfium:820 Change-Id: Id69d7343632f83d1f5180348c0eea290f478183f Reviewed-on: https://pdfium-review.googlesource.com/8091 Reviewed-by: dsinclair <dsinclair@chromium.org> Commit-Queue: Jane Liu <janeliulwq@google.com>
Diffstat (limited to 'core/fpdfdoc')
-rw-r--r--core/fpdfdoc/cpdf_action.cpp2
-rw-r--r--core/fpdfdoc/cpdf_bookmark.cpp3
-rw-r--r--core/fpdfdoc/cpdf_docjsactions.cpp6
-rw-r--r--core/fpdfdoc/cpdf_docjsactions.h6
-rw-r--r--core/fpdfdoc/cpdf_link.cpp2
-rw-r--r--core/fpdfdoc/cpdf_nametree.cpp34
-rw-r--r--core/fpdfdoc/cpdf_nametree.h8
-rw-r--r--core/fpdfdoc/cpdf_nametree_unittest.cpp35
8 files changed, 66 insertions, 30 deletions
diff --git a/core/fpdfdoc/cpdf_action.cpp b/core/fpdfdoc/cpdf_action.cpp
index 88d0781e80..2357580548 100644
--- a/core/fpdfdoc/cpdf_action.cpp
+++ b/core/fpdfdoc/cpdf_action.cpp
@@ -42,7 +42,7 @@ CPDF_Dest CPDF_Action::GetDest(CPDF_Document* pDoc) const {
return CPDF_Dest();
if (pDest->IsString() || pDest->IsName()) {
CPDF_NameTree name_tree(pDoc, "Dests");
- return CPDF_Dest(name_tree.LookupNamedDest(pDoc, pDest->GetString()));
+ return CPDF_Dest(name_tree.LookupNamedDest(pDoc, pDest->GetUnicodeText()));
}
if (CPDF_Array* pArray = pDest->AsArray())
return CPDF_Dest(pArray);
diff --git a/core/fpdfdoc/cpdf_bookmark.cpp b/core/fpdfdoc/cpdf_bookmark.cpp
index 29303f1d32..e84001f846 100644
--- a/core/fpdfdoc/cpdf_bookmark.cpp
+++ b/core/fpdfdoc/cpdf_bookmark.cpp
@@ -70,7 +70,8 @@ CPDF_Dest CPDF_Bookmark::GetDest(CPDF_Document* pDocument) const {
return CPDF_Dest();
if (pDest->IsString() || pDest->IsName()) {
CPDF_NameTree name_tree(pDocument, "Dests");
- return CPDF_Dest(name_tree.LookupNamedDest(pDocument, pDest->GetString()));
+ return CPDF_Dest(
+ name_tree.LookupNamedDest(pDocument, pDest->GetUnicodeText()));
}
if (CPDF_Array* pArray = pDest->AsArray())
return CPDF_Dest(pArray);
diff --git a/core/fpdfdoc/cpdf_docjsactions.cpp b/core/fpdfdoc/cpdf_docjsactions.cpp
index 59dbccce85..669ed7055c 100644
--- a/core/fpdfdoc/cpdf_docjsactions.cpp
+++ b/core/fpdfdoc/cpdf_docjsactions.cpp
@@ -20,7 +20,7 @@ int CPDF_DocJSActions::CountJSActions() const {
CPDF_Action CPDF_DocJSActions::GetJSActionAndName(
int index,
- CFX_ByteString* csName) const {
+ CFX_WideString* csName) const {
ASSERT(m_pDocument);
CPDF_NameTree name_tree(m_pDocument.Get(), "JavaScript");
CPDF_Object* pAction = name_tree.LookupValueAndName(index, csName);
@@ -28,7 +28,7 @@ CPDF_Action CPDF_DocJSActions::GetJSActionAndName(
: CPDF_Action();
}
-CPDF_Action CPDF_DocJSActions::GetJSAction(const CFX_ByteString& csName) const {
+CPDF_Action CPDF_DocJSActions::GetJSAction(const CFX_WideString& csName) const {
ASSERT(m_pDocument);
CPDF_NameTree name_tree(m_pDocument.Get(), "JavaScript");
CPDF_Object* pAction = name_tree.LookupValue(csName);
@@ -36,7 +36,7 @@ CPDF_Action CPDF_DocJSActions::GetJSAction(const CFX_ByteString& csName) const {
: CPDF_Action();
}
-int CPDF_DocJSActions::FindJSAction(const CFX_ByteString& csName) const {
+int CPDF_DocJSActions::FindJSAction(const CFX_WideString& csName) const {
ASSERT(m_pDocument);
CPDF_NameTree name_tree(m_pDocument.Get(), "JavaScript");
return name_tree.GetIndex(csName);
diff --git a/core/fpdfdoc/cpdf_docjsactions.h b/core/fpdfdoc/cpdf_docjsactions.h
index 328b8869f6..73c0a1ef1c 100644
--- a/core/fpdfdoc/cpdf_docjsactions.h
+++ b/core/fpdfdoc/cpdf_docjsactions.h
@@ -19,9 +19,9 @@ class CPDF_DocJSActions {
~CPDF_DocJSActions();
int CountJSActions() const;
- CPDF_Action GetJSActionAndName(int index, CFX_ByteString* csName) const;
- CPDF_Action GetJSAction(const CFX_ByteString& csName) const;
- int FindJSAction(const CFX_ByteString& csName) const;
+ CPDF_Action GetJSActionAndName(int index, CFX_WideString* csName) const;
+ CPDF_Action GetJSAction(const CFX_WideString& csName) const;
+ int FindJSAction(const CFX_WideString& csName) const;
CPDF_Document* GetDocument() const { return m_pDocument.Get(); }
private:
diff --git a/core/fpdfdoc/cpdf_link.cpp b/core/fpdfdoc/cpdf_link.cpp
index b622094a73..f7aec4087e 100644
--- a/core/fpdfdoc/cpdf_link.cpp
+++ b/core/fpdfdoc/cpdf_link.cpp
@@ -28,7 +28,7 @@ CPDF_Dest CPDF_Link::GetDest(CPDF_Document* pDoc) {
if (pDest->IsString() || pDest->IsName()) {
CPDF_NameTree name_tree(pDoc, "Dests");
- return CPDF_Dest(name_tree.LookupNamedDest(pDoc, pDest->GetString()));
+ return CPDF_Dest(name_tree.LookupNamedDest(pDoc, pDest->GetUnicodeText()));
}
if (CPDF_Array* pArray = pDest->AsArray())
return CPDF_Dest(pArray);
diff --git a/core/fpdfdoc/cpdf_nametree.cpp b/core/fpdfdoc/cpdf_nametree.cpp
index b3808bddba..04cb1b9e40 100644
--- a/core/fpdfdoc/cpdf_nametree.cpp
+++ b/core/fpdfdoc/cpdf_nametree.cpp
@@ -9,13 +9,14 @@
#include "core/fpdfapi/parser/cpdf_array.h"
#include "core/fpdfapi/parser/cpdf_dictionary.h"
#include "core/fpdfapi/parser/cpdf_document.h"
+#include "core/fpdfapi/parser/fpdf_parser_decode.h"
namespace {
const int nMaxRecursion = 32;
CPDF_Object* SearchNameNode(CPDF_Dictionary* pNode,
- const CFX_ByteString& csName,
+ const CFX_WideString& csName,
size_t& nIndex,
CPDF_Array** ppFind,
int nLevel = 0) {
@@ -24,15 +25,14 @@ CPDF_Object* SearchNameNode(CPDF_Dictionary* pNode,
CPDF_Array* pLimits = pNode->GetArrayFor("Limits");
if (pLimits) {
- CFX_ByteString csLeft = pLimits->GetStringAt(0);
- CFX_ByteString csRight = pLimits->GetStringAt(1);
- if (csLeft.Compare(csRight.AsStringC()) > 0) {
- CFX_ByteString csTmp = csRight;
+ CFX_WideString csLeft = pLimits->GetUnicodeTextAt(0);
+ CFX_WideString csRight = pLimits->GetUnicodeTextAt(1);
+ if (csLeft.Compare(csRight) > 0) {
+ CFX_WideString csTmp = csRight;
csRight = csLeft;
csLeft = csTmp;
}
- if (csName.Compare(csLeft.AsStringC()) < 0 ||
- csName.Compare(csRight.AsStringC()) > 0) {
+ if (csName.Compare(csLeft) < 0 || csName.Compare(csRight) > 0) {
return nullptr;
}
}
@@ -41,8 +41,8 @@ CPDF_Object* SearchNameNode(CPDF_Dictionary* pNode,
if (pNames) {
size_t dwCount = pNames->GetCount() / 2;
for (size_t i = 0; i < dwCount; i++) {
- CFX_ByteString csValue = pNames->GetStringAt(i * 2);
- int32_t iCompare = csValue.Compare(csName.AsStringC());
+ CFX_WideString csValue = pNames->GetUnicodeTextAt(i * 2);
+ int32_t iCompare = csValue.Compare(csName);
if (iCompare <= 0) {
if (ppFind)
*ppFind = pNames;
@@ -78,7 +78,7 @@ CPDF_Object* SearchNameNode(CPDF_Dictionary* pNode,
CPDF_Object* SearchNameNode(CPDF_Dictionary* pNode,
size_t nIndex,
size_t& nCurIndex,
- CFX_ByteString* csName,
+ CFX_WideString* csName,
CPDF_Array** ppFind,
int nLevel = 0) {
if (nLevel > nMaxRecursion)
@@ -93,7 +93,7 @@ CPDF_Object* SearchNameNode(CPDF_Dictionary* pNode,
}
if (ppFind)
*ppFind = pNames;
- *csName = pNames->GetStringAt((nIndex - nCurIndex) * 2);
+ *csName = pNames->GetUnicodeTextAt((nIndex - nCurIndex) * 2);
return pNames->GetDirectObjectAt((nIndex - nCurIndex) * 2 + 1);
}
CPDF_Array* pKids = pNode->GetArrayFor("Kids");
@@ -158,7 +158,7 @@ size_t CPDF_NameTree::GetCount() const {
return m_pRoot ? ::CountNames(m_pRoot.Get()) : 0;
}
-int CPDF_NameTree::GetIndex(const CFX_ByteString& csName) const {
+int CPDF_NameTree::GetIndex(const CFX_WideString& csName) const {
if (!m_pRoot)
return -1;
@@ -169,8 +169,8 @@ int CPDF_NameTree::GetIndex(const CFX_ByteString& csName) const {
}
CPDF_Object* CPDF_NameTree::LookupValueAndName(int nIndex,
- CFX_ByteString* csName) const {
- *csName = CFX_ByteString();
+ CFX_WideString* csName) const {
+ *csName = CFX_WideString();
if (!m_pRoot)
return nullptr;
@@ -178,7 +178,7 @@ CPDF_Object* CPDF_NameTree::LookupValueAndName(int nIndex,
return SearchNameNode(m_pRoot.Get(), nIndex, nCurIndex, csName, nullptr);
}
-CPDF_Object* CPDF_NameTree::LookupValue(const CFX_ByteString& csName) const {
+CPDF_Object* CPDF_NameTree::LookupValue(const CFX_WideString& csName) const {
if (!m_pRoot)
return nullptr;
@@ -187,13 +187,13 @@ CPDF_Object* CPDF_NameTree::LookupValue(const CFX_ByteString& csName) const {
}
CPDF_Array* CPDF_NameTree::LookupNamedDest(CPDF_Document* pDoc,
- const CFX_ByteString& sName) {
+ const CFX_WideString& sName) {
CPDF_Object* pValue = LookupValue(sName);
if (!pValue) {
CPDF_Dictionary* pDests = pDoc->GetRoot()->GetDictFor("Dests");
if (!pDests)
return nullptr;
- pValue = pDests->GetDirectObjectFor(sName);
+ pValue = pDests->GetDirectObjectFor(PDF_EncodeText(sName));
}
if (!pValue)
return nullptr;
diff --git a/core/fpdfdoc/cpdf_nametree.h b/core/fpdfdoc/cpdf_nametree.h
index 69000f32b2..a56f511783 100644
--- a/core/fpdfdoc/cpdf_nametree.h
+++ b/core/fpdfdoc/cpdf_nametree.h
@@ -21,11 +21,11 @@ class CPDF_NameTree {
CPDF_NameTree(CPDF_Document* pDoc, const CFX_ByteString& category);
~CPDF_NameTree();
- CPDF_Object* LookupValueAndName(int nIndex, CFX_ByteString* csName) const;
- CPDF_Object* LookupValue(const CFX_ByteString& csName) const;
- CPDF_Array* LookupNamedDest(CPDF_Document* pDoc, const CFX_ByteString& sName);
+ CPDF_Object* LookupValueAndName(int nIndex, CFX_WideString* csName) const;
+ CPDF_Object* LookupValue(const CFX_WideString& csName) const;
+ CPDF_Array* LookupNamedDest(CPDF_Document* pDoc, const CFX_WideString& sName);
- int GetIndex(const CFX_ByteString& csName) const;
+ int GetIndex(const CFX_WideString& csName) const;
size_t GetCount() const;
CPDF_Dictionary* GetRoot() const { return m_pRoot.Get(); }
diff --git a/core/fpdfdoc/cpdf_nametree_unittest.cpp b/core/fpdfdoc/cpdf_nametree_unittest.cpp
new file mode 100644
index 0000000000..28af9e078d
--- /dev/null
+++ b/core/fpdfdoc/cpdf_nametree_unittest.cpp
@@ -0,0 +1,35 @@
+// Copyright 2017 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "core/fpdfdoc/cpdf_nametree.h"
+#include "core/fpdfapi/parser/cpdf_array.h"
+#include "core/fpdfapi/parser/cpdf_dictionary.h"
+#include "core/fpdfapi/parser/cpdf_number.h"
+#include "core/fpdfapi/parser/cpdf_string.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+TEST(cpdf_nametree, GetUnicodeNameWithBOM) {
+ // Set up the root dictionary with a Names array.
+ auto pRootDict = pdfium::MakeUnique<CPDF_Dictionary>();
+ CPDF_Array* pNames = pRootDict->SetNewFor<CPDF_Array>("Names");
+
+ // Add the key "1" (with BOM) and value 100 into the array.
+ std::ostringstream buf;
+ buf << static_cast<unsigned char>(254) << static_cast<unsigned char>(255)
+ << static_cast<unsigned char>(0) << static_cast<unsigned char>(49);
+ pNames->AddNew<CPDF_String>(CFX_ByteString(buf), true);
+ pNames->AddNew<CPDF_Number>(100);
+
+ // Check that the key is as expected.
+ CPDF_NameTree nameTree(pRootDict.get());
+ CFX_WideString storedName;
+ nameTree.LookupValueAndName(0, &storedName);
+ EXPECT_STREQ(L"1", storedName.c_str());
+
+ // Check that the correct value object can be obtained by looking up "1".
+ CFX_WideString matchName = L"1";
+ CPDF_Object* pObj = nameTree.LookupValue(matchName);
+ ASSERT_TRUE(pObj->IsNumber());
+ EXPECT_EQ(100, pObj->AsNumber()->GetInteger());
+}