From 67ccef73bf664b7cdb4c6eed7acbaa4163c22a80 Mon Sep 17 00:00:00 2001 From: Jane Liu Date: Wed, 19 Jul 2017 13:10:50 -0400 Subject: Use CFX_WideString in CPDF_NameTree functions to strip BOM PDFium doesn't strip BOMs during parsing, but we should strip BOMs when retrieving parsed strings in CPDF_NameTree to ensure consistency and appropriate function behavior. See the bug for more info. As outlined in Bug=pdfium:593, the solution is to call GetUnicodeText() instead of GetString(). I added a GetUnicodeTextAt() function in CPDF_Array, which is symmetrical to GetUnicodeTextFor() in CPDF_Dictionary. I then changed the input variable types to CPDF_NameTree functions to be CFX_WideString instead of CFX_ByteString, and modified all the calls to them. I also added a unit test for nametree, which would fail prior to this change. Nametrees with non-unicode names are already tested by embedder tests. Bug=pdfium:820 Change-Id: Id69d7343632f83d1f5180348c0eea290f478183f Reviewed-on: https://pdfium-review.googlesource.com/8091 Reviewed-by: dsinclair Commit-Queue: Jane Liu --- core/fpdfapi/parser/cpdf_array.cpp | 6 ++++++ core/fpdfapi/parser/cpdf_array.h | 1 + core/fpdfdoc/cpdf_action.cpp | 2 +- core/fpdfdoc/cpdf_bookmark.cpp | 3 ++- core/fpdfdoc/cpdf_docjsactions.cpp | 6 +++--- core/fpdfdoc/cpdf_docjsactions.h | 6 +++--- core/fpdfdoc/cpdf_link.cpp | 2 +- core/fpdfdoc/cpdf_nametree.cpp | 34 ++++++++++++++++---------------- core/fpdfdoc/cpdf_nametree.h | 8 ++++---- core/fpdfdoc/cpdf_nametree_unittest.cpp | 35 +++++++++++++++++++++++++++++++++ 10 files changed, 73 insertions(+), 30 deletions(-) create mode 100644 core/fpdfdoc/cpdf_nametree_unittest.cpp (limited to 'core') diff --git a/core/fpdfapi/parser/cpdf_array.cpp b/core/fpdfapi/parser/cpdf_array.cpp index ea4ca7eaeb..1f2740ea9c 100644 --- a/core/fpdfapi/parser/cpdf_array.cpp +++ b/core/fpdfapi/parser/cpdf_array.cpp @@ -105,6 +105,12 @@ CFX_ByteString CPDF_Array::GetStringAt(size_t i) const { return m_Objects[i]->GetString(); } +CFX_WideString CPDF_Array::GetUnicodeTextAt(size_t i) const { + if (i >= m_Objects.size()) + return CFX_WideString(); + return m_Objects[i]->GetUnicodeText(); +} + int CPDF_Array::GetIntegerAt(size_t i) const { if (i >= m_Objects.size()) return 0; diff --git a/core/fpdfapi/parser/cpdf_array.h b/core/fpdfapi/parser/cpdf_array.h index bb17c0a427..2590971d80 100644 --- a/core/fpdfapi/parser/cpdf_array.h +++ b/core/fpdfapi/parser/cpdf_array.h @@ -41,6 +41,7 @@ class CPDF_Array : public CPDF_Object { CPDF_Object* GetObjectAt(size_t index) const; CPDF_Object* GetDirectObjectAt(size_t index) const; CFX_ByteString GetStringAt(size_t index) const; + CFX_WideString GetUnicodeTextAt(size_t index) const; int GetIntegerAt(size_t index) const; float GetNumberAt(size_t index) const; CPDF_Dictionary* GetDictAt(size_t index) const; diff --git a/core/fpdfdoc/cpdf_action.cpp b/core/fpdfdoc/cpdf_action.cpp index 88d0781e80..2357580548 100644 --- a/core/fpdfdoc/cpdf_action.cpp +++ b/core/fpdfdoc/cpdf_action.cpp @@ -42,7 +42,7 @@ CPDF_Dest CPDF_Action::GetDest(CPDF_Document* pDoc) const { return CPDF_Dest(); if (pDest->IsString() || pDest->IsName()) { CPDF_NameTree name_tree(pDoc, "Dests"); - return CPDF_Dest(name_tree.LookupNamedDest(pDoc, pDest->GetString())); + return CPDF_Dest(name_tree.LookupNamedDest(pDoc, pDest->GetUnicodeText())); } if (CPDF_Array* pArray = pDest->AsArray()) return CPDF_Dest(pArray); diff --git a/core/fpdfdoc/cpdf_bookmark.cpp b/core/fpdfdoc/cpdf_bookmark.cpp index 29303f1d32..e84001f846 100644 --- a/core/fpdfdoc/cpdf_bookmark.cpp +++ b/core/fpdfdoc/cpdf_bookmark.cpp @@ -70,7 +70,8 @@ CPDF_Dest CPDF_Bookmark::GetDest(CPDF_Document* pDocument) const { return CPDF_Dest(); if (pDest->IsString() || pDest->IsName()) { CPDF_NameTree name_tree(pDocument, "Dests"); - return CPDF_Dest(name_tree.LookupNamedDest(pDocument, pDest->GetString())); + return CPDF_Dest( + name_tree.LookupNamedDest(pDocument, pDest->GetUnicodeText())); } if (CPDF_Array* pArray = pDest->AsArray()) return CPDF_Dest(pArray); diff --git a/core/fpdfdoc/cpdf_docjsactions.cpp b/core/fpdfdoc/cpdf_docjsactions.cpp index 59dbccce85..669ed7055c 100644 --- a/core/fpdfdoc/cpdf_docjsactions.cpp +++ b/core/fpdfdoc/cpdf_docjsactions.cpp @@ -20,7 +20,7 @@ int CPDF_DocJSActions::CountJSActions() const { CPDF_Action CPDF_DocJSActions::GetJSActionAndName( int index, - CFX_ByteString* csName) const { + CFX_WideString* csName) const { ASSERT(m_pDocument); CPDF_NameTree name_tree(m_pDocument.Get(), "JavaScript"); CPDF_Object* pAction = name_tree.LookupValueAndName(index, csName); @@ -28,7 +28,7 @@ CPDF_Action CPDF_DocJSActions::GetJSActionAndName( : CPDF_Action(); } -CPDF_Action CPDF_DocJSActions::GetJSAction(const CFX_ByteString& csName) const { +CPDF_Action CPDF_DocJSActions::GetJSAction(const CFX_WideString& csName) const { ASSERT(m_pDocument); CPDF_NameTree name_tree(m_pDocument.Get(), "JavaScript"); CPDF_Object* pAction = name_tree.LookupValue(csName); @@ -36,7 +36,7 @@ CPDF_Action CPDF_DocJSActions::GetJSAction(const CFX_ByteString& csName) const { : CPDF_Action(); } -int CPDF_DocJSActions::FindJSAction(const CFX_ByteString& csName) const { +int CPDF_DocJSActions::FindJSAction(const CFX_WideString& csName) const { ASSERT(m_pDocument); CPDF_NameTree name_tree(m_pDocument.Get(), "JavaScript"); return name_tree.GetIndex(csName); diff --git a/core/fpdfdoc/cpdf_docjsactions.h b/core/fpdfdoc/cpdf_docjsactions.h index 328b8869f6..73c0a1ef1c 100644 --- a/core/fpdfdoc/cpdf_docjsactions.h +++ b/core/fpdfdoc/cpdf_docjsactions.h @@ -19,9 +19,9 @@ class CPDF_DocJSActions { ~CPDF_DocJSActions(); int CountJSActions() const; - CPDF_Action GetJSActionAndName(int index, CFX_ByteString* csName) const; - CPDF_Action GetJSAction(const CFX_ByteString& csName) const; - int FindJSAction(const CFX_ByteString& csName) const; + CPDF_Action GetJSActionAndName(int index, CFX_WideString* csName) const; + CPDF_Action GetJSAction(const CFX_WideString& csName) const; + int FindJSAction(const CFX_WideString& csName) const; CPDF_Document* GetDocument() const { return m_pDocument.Get(); } private: diff --git a/core/fpdfdoc/cpdf_link.cpp b/core/fpdfdoc/cpdf_link.cpp index b622094a73..f7aec4087e 100644 --- a/core/fpdfdoc/cpdf_link.cpp +++ b/core/fpdfdoc/cpdf_link.cpp @@ -28,7 +28,7 @@ CPDF_Dest CPDF_Link::GetDest(CPDF_Document* pDoc) { if (pDest->IsString() || pDest->IsName()) { CPDF_NameTree name_tree(pDoc, "Dests"); - return CPDF_Dest(name_tree.LookupNamedDest(pDoc, pDest->GetString())); + return CPDF_Dest(name_tree.LookupNamedDest(pDoc, pDest->GetUnicodeText())); } if (CPDF_Array* pArray = pDest->AsArray()) return CPDF_Dest(pArray); diff --git a/core/fpdfdoc/cpdf_nametree.cpp b/core/fpdfdoc/cpdf_nametree.cpp index b3808bddba..04cb1b9e40 100644 --- a/core/fpdfdoc/cpdf_nametree.cpp +++ b/core/fpdfdoc/cpdf_nametree.cpp @@ -9,13 +9,14 @@ #include "core/fpdfapi/parser/cpdf_array.h" #include "core/fpdfapi/parser/cpdf_dictionary.h" #include "core/fpdfapi/parser/cpdf_document.h" +#include "core/fpdfapi/parser/fpdf_parser_decode.h" namespace { const int nMaxRecursion = 32; CPDF_Object* SearchNameNode(CPDF_Dictionary* pNode, - const CFX_ByteString& csName, + const CFX_WideString& csName, size_t& nIndex, CPDF_Array** ppFind, int nLevel = 0) { @@ -24,15 +25,14 @@ CPDF_Object* SearchNameNode(CPDF_Dictionary* pNode, CPDF_Array* pLimits = pNode->GetArrayFor("Limits"); if (pLimits) { - CFX_ByteString csLeft = pLimits->GetStringAt(0); - CFX_ByteString csRight = pLimits->GetStringAt(1); - if (csLeft.Compare(csRight.AsStringC()) > 0) { - CFX_ByteString csTmp = csRight; + CFX_WideString csLeft = pLimits->GetUnicodeTextAt(0); + CFX_WideString csRight = pLimits->GetUnicodeTextAt(1); + if (csLeft.Compare(csRight) > 0) { + CFX_WideString csTmp = csRight; csRight = csLeft; csLeft = csTmp; } - if (csName.Compare(csLeft.AsStringC()) < 0 || - csName.Compare(csRight.AsStringC()) > 0) { + if (csName.Compare(csLeft) < 0 || csName.Compare(csRight) > 0) { return nullptr; } } @@ -41,8 +41,8 @@ CPDF_Object* SearchNameNode(CPDF_Dictionary* pNode, if (pNames) { size_t dwCount = pNames->GetCount() / 2; for (size_t i = 0; i < dwCount; i++) { - CFX_ByteString csValue = pNames->GetStringAt(i * 2); - int32_t iCompare = csValue.Compare(csName.AsStringC()); + CFX_WideString csValue = pNames->GetUnicodeTextAt(i * 2); + int32_t iCompare = csValue.Compare(csName); if (iCompare <= 0) { if (ppFind) *ppFind = pNames; @@ -78,7 +78,7 @@ CPDF_Object* SearchNameNode(CPDF_Dictionary* pNode, CPDF_Object* SearchNameNode(CPDF_Dictionary* pNode, size_t nIndex, size_t& nCurIndex, - CFX_ByteString* csName, + CFX_WideString* csName, CPDF_Array** ppFind, int nLevel = 0) { if (nLevel > nMaxRecursion) @@ -93,7 +93,7 @@ CPDF_Object* SearchNameNode(CPDF_Dictionary* pNode, } if (ppFind) *ppFind = pNames; - *csName = pNames->GetStringAt((nIndex - nCurIndex) * 2); + *csName = pNames->GetUnicodeTextAt((nIndex - nCurIndex) * 2); return pNames->GetDirectObjectAt((nIndex - nCurIndex) * 2 + 1); } CPDF_Array* pKids = pNode->GetArrayFor("Kids"); @@ -158,7 +158,7 @@ size_t CPDF_NameTree::GetCount() const { return m_pRoot ? ::CountNames(m_pRoot.Get()) : 0; } -int CPDF_NameTree::GetIndex(const CFX_ByteString& csName) const { +int CPDF_NameTree::GetIndex(const CFX_WideString& csName) const { if (!m_pRoot) return -1; @@ -169,8 +169,8 @@ int CPDF_NameTree::GetIndex(const CFX_ByteString& csName) const { } CPDF_Object* CPDF_NameTree::LookupValueAndName(int nIndex, - CFX_ByteString* csName) const { - *csName = CFX_ByteString(); + CFX_WideString* csName) const { + *csName = CFX_WideString(); if (!m_pRoot) return nullptr; @@ -178,7 +178,7 @@ CPDF_Object* CPDF_NameTree::LookupValueAndName(int nIndex, return SearchNameNode(m_pRoot.Get(), nIndex, nCurIndex, csName, nullptr); } -CPDF_Object* CPDF_NameTree::LookupValue(const CFX_ByteString& csName) const { +CPDF_Object* CPDF_NameTree::LookupValue(const CFX_WideString& csName) const { if (!m_pRoot) return nullptr; @@ -187,13 +187,13 @@ CPDF_Object* CPDF_NameTree::LookupValue(const CFX_ByteString& csName) const { } CPDF_Array* CPDF_NameTree::LookupNamedDest(CPDF_Document* pDoc, - const CFX_ByteString& sName) { + const CFX_WideString& sName) { CPDF_Object* pValue = LookupValue(sName); if (!pValue) { CPDF_Dictionary* pDests = pDoc->GetRoot()->GetDictFor("Dests"); if (!pDests) return nullptr; - pValue = pDests->GetDirectObjectFor(sName); + pValue = pDests->GetDirectObjectFor(PDF_EncodeText(sName)); } if (!pValue) return nullptr; diff --git a/core/fpdfdoc/cpdf_nametree.h b/core/fpdfdoc/cpdf_nametree.h index 69000f32b2..a56f511783 100644 --- a/core/fpdfdoc/cpdf_nametree.h +++ b/core/fpdfdoc/cpdf_nametree.h @@ -21,11 +21,11 @@ class CPDF_NameTree { CPDF_NameTree(CPDF_Document* pDoc, const CFX_ByteString& category); ~CPDF_NameTree(); - CPDF_Object* LookupValueAndName(int nIndex, CFX_ByteString* csName) const; - CPDF_Object* LookupValue(const CFX_ByteString& csName) const; - CPDF_Array* LookupNamedDest(CPDF_Document* pDoc, const CFX_ByteString& sName); + CPDF_Object* LookupValueAndName(int nIndex, CFX_WideString* csName) const; + CPDF_Object* LookupValue(const CFX_WideString& csName) const; + CPDF_Array* LookupNamedDest(CPDF_Document* pDoc, const CFX_WideString& sName); - int GetIndex(const CFX_ByteString& csName) const; + int GetIndex(const CFX_WideString& csName) const; size_t GetCount() const; CPDF_Dictionary* GetRoot() const { return m_pRoot.Get(); } diff --git a/core/fpdfdoc/cpdf_nametree_unittest.cpp b/core/fpdfdoc/cpdf_nametree_unittest.cpp new file mode 100644 index 0000000000..28af9e078d --- /dev/null +++ b/core/fpdfdoc/cpdf_nametree_unittest.cpp @@ -0,0 +1,35 @@ +// Copyright 2017 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "core/fpdfdoc/cpdf_nametree.h" +#include "core/fpdfapi/parser/cpdf_array.h" +#include "core/fpdfapi/parser/cpdf_dictionary.h" +#include "core/fpdfapi/parser/cpdf_number.h" +#include "core/fpdfapi/parser/cpdf_string.h" +#include "testing/gtest/include/gtest/gtest.h" + +TEST(cpdf_nametree, GetUnicodeNameWithBOM) { + // Set up the root dictionary with a Names array. + auto pRootDict = pdfium::MakeUnique(); + CPDF_Array* pNames = pRootDict->SetNewFor("Names"); + + // Add the key "1" (with BOM) and value 100 into the array. + std::ostringstream buf; + buf << static_cast(254) << static_cast(255) + << static_cast(0) << static_cast(49); + pNames->AddNew(CFX_ByteString(buf), true); + pNames->AddNew(100); + + // Check that the key is as expected. + CPDF_NameTree nameTree(pRootDict.get()); + CFX_WideString storedName; + nameTree.LookupValueAndName(0, &storedName); + EXPECT_STREQ(L"1", storedName.c_str()); + + // Check that the correct value object can be obtained by looking up "1". + CFX_WideString matchName = L"1"; + CPDF_Object* pObj = nameTree.LookupValue(matchName); + ASSERT_TRUE(pObj->IsNumber()); + EXPECT_EQ(100, pObj->AsNumber()->GetInteger()); +} -- cgit v1.2.3