From 67ccef73bf664b7cdb4c6eed7acbaa4163c22a80 Mon Sep 17 00:00:00 2001 From: Jane Liu Date: Wed, 19 Jul 2017 13:10:50 -0400 Subject: Use CFX_WideString in CPDF_NameTree functions to strip BOM PDFium doesn't strip BOMs during parsing, but we should strip BOMs when retrieving parsed strings in CPDF_NameTree to ensure consistency and appropriate function behavior. See the bug for more info. As outlined in Bug=pdfium:593, the solution is to call GetUnicodeText() instead of GetString(). I added a GetUnicodeTextAt() function in CPDF_Array, which is symmetrical to GetUnicodeTextFor() in CPDF_Dictionary. I then changed the input variable types to CPDF_NameTree functions to be CFX_WideString instead of CFX_ByteString, and modified all the calls to them. I also added a unit test for nametree, which would fail prior to this change. Nametrees with non-unicode names are already tested by embedder tests. Bug=pdfium:820 Change-Id: Id69d7343632f83d1f5180348c0eea290f478183f Reviewed-on: https://pdfium-review.googlesource.com/8091 Reviewed-by: dsinclair Commit-Queue: Jane Liu --- BUILD.gn | 1 + core/fpdfapi/parser/cpdf_array.cpp | 6 ++++++ core/fpdfapi/parser/cpdf_array.h | 1 + core/fpdfdoc/cpdf_action.cpp | 2 +- core/fpdfdoc/cpdf_bookmark.cpp | 3 ++- core/fpdfdoc/cpdf_docjsactions.cpp | 6 +++--- core/fpdfdoc/cpdf_docjsactions.h | 6 +++--- core/fpdfdoc/cpdf_link.cpp | 2 +- core/fpdfdoc/cpdf_nametree.cpp | 34 ++++++++++++++++---------------- core/fpdfdoc/cpdf_nametree.h | 8 ++++---- core/fpdfdoc/cpdf_nametree_unittest.cpp | 35 +++++++++++++++++++++++++++++++++ fpdfsdk/cpdfsdk_formfillenvironment.cpp | 5 ++--- fpdfsdk/fpdfattachment.cpp | 2 +- fpdfsdk/fpdfview.cpp | 7 ++++--- fpdfsdk/javascript/Document.cpp | 3 +-- xfa/fxfa/cxfa_ffdoc.cpp | 9 ++++----- 16 files changed, 86 insertions(+), 44 deletions(-) create mode 100644 core/fpdfdoc/cpdf_nametree_unittest.cpp diff --git a/BUILD.gn b/BUILD.gn index e02882085b..6bf3947867 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -1917,6 +1917,7 @@ test("pdfium_unittests") { "core/fpdfdoc/cpdf_dest_unittest.cpp", "core/fpdfdoc/cpdf_filespec_unittest.cpp", "core/fpdfdoc/cpdf_formfield_unittest.cpp", + "core/fpdfdoc/cpdf_nametree_unittest.cpp", "core/fpdftext/cpdf_linkextract_unittest.cpp", "core/fxcodec/codec/fx_codec_a85_unittest.cpp", "core/fxcodec/codec/fx_codec_jpx_unittest.cpp", diff --git a/core/fpdfapi/parser/cpdf_array.cpp b/core/fpdfapi/parser/cpdf_array.cpp index ea4ca7eaeb..1f2740ea9c 100644 --- a/core/fpdfapi/parser/cpdf_array.cpp +++ b/core/fpdfapi/parser/cpdf_array.cpp @@ -105,6 +105,12 @@ CFX_ByteString CPDF_Array::GetStringAt(size_t i) const { return m_Objects[i]->GetString(); } +CFX_WideString CPDF_Array::GetUnicodeTextAt(size_t i) const { + if (i >= m_Objects.size()) + return CFX_WideString(); + return m_Objects[i]->GetUnicodeText(); +} + int CPDF_Array::GetIntegerAt(size_t i) const { if (i >= m_Objects.size()) return 0; diff --git a/core/fpdfapi/parser/cpdf_array.h b/core/fpdfapi/parser/cpdf_array.h index bb17c0a427..2590971d80 100644 --- a/core/fpdfapi/parser/cpdf_array.h +++ b/core/fpdfapi/parser/cpdf_array.h @@ -41,6 +41,7 @@ class CPDF_Array : public CPDF_Object { CPDF_Object* GetObjectAt(size_t index) const; CPDF_Object* GetDirectObjectAt(size_t index) const; CFX_ByteString GetStringAt(size_t index) const; + CFX_WideString GetUnicodeTextAt(size_t index) const; int GetIntegerAt(size_t index) const; float GetNumberAt(size_t index) const; CPDF_Dictionary* GetDictAt(size_t index) const; diff --git a/core/fpdfdoc/cpdf_action.cpp b/core/fpdfdoc/cpdf_action.cpp index 88d0781e80..2357580548 100644 --- a/core/fpdfdoc/cpdf_action.cpp +++ b/core/fpdfdoc/cpdf_action.cpp @@ -42,7 +42,7 @@ CPDF_Dest CPDF_Action::GetDest(CPDF_Document* pDoc) const { return CPDF_Dest(); if (pDest->IsString() || pDest->IsName()) { CPDF_NameTree name_tree(pDoc, "Dests"); - return CPDF_Dest(name_tree.LookupNamedDest(pDoc, pDest->GetString())); + return CPDF_Dest(name_tree.LookupNamedDest(pDoc, pDest->GetUnicodeText())); } if (CPDF_Array* pArray = pDest->AsArray()) return CPDF_Dest(pArray); diff --git a/core/fpdfdoc/cpdf_bookmark.cpp b/core/fpdfdoc/cpdf_bookmark.cpp index 29303f1d32..e84001f846 100644 --- a/core/fpdfdoc/cpdf_bookmark.cpp +++ b/core/fpdfdoc/cpdf_bookmark.cpp @@ -70,7 +70,8 @@ CPDF_Dest CPDF_Bookmark::GetDest(CPDF_Document* pDocument) const { return CPDF_Dest(); if (pDest->IsString() || pDest->IsName()) { CPDF_NameTree name_tree(pDocument, "Dests"); - return CPDF_Dest(name_tree.LookupNamedDest(pDocument, pDest->GetString())); + return CPDF_Dest( + name_tree.LookupNamedDest(pDocument, pDest->GetUnicodeText())); } if (CPDF_Array* pArray = pDest->AsArray()) return CPDF_Dest(pArray); diff --git a/core/fpdfdoc/cpdf_docjsactions.cpp b/core/fpdfdoc/cpdf_docjsactions.cpp index 59dbccce85..669ed7055c 100644 --- a/core/fpdfdoc/cpdf_docjsactions.cpp +++ b/core/fpdfdoc/cpdf_docjsactions.cpp @@ -20,7 +20,7 @@ int CPDF_DocJSActions::CountJSActions() const { CPDF_Action CPDF_DocJSActions::GetJSActionAndName( int index, - CFX_ByteString* csName) const { + CFX_WideString* csName) const { ASSERT(m_pDocument); CPDF_NameTree name_tree(m_pDocument.Get(), "JavaScript"); CPDF_Object* pAction = name_tree.LookupValueAndName(index, csName); @@ -28,7 +28,7 @@ CPDF_Action CPDF_DocJSActions::GetJSActionAndName( : CPDF_Action(); } -CPDF_Action CPDF_DocJSActions::GetJSAction(const CFX_ByteString& csName) const { +CPDF_Action CPDF_DocJSActions::GetJSAction(const CFX_WideString& csName) const { ASSERT(m_pDocument); CPDF_NameTree name_tree(m_pDocument.Get(), "JavaScript"); CPDF_Object* pAction = name_tree.LookupValue(csName); @@ -36,7 +36,7 @@ CPDF_Action CPDF_DocJSActions::GetJSAction(const CFX_ByteString& csName) const { : CPDF_Action(); } -int CPDF_DocJSActions::FindJSAction(const CFX_ByteString& csName) const { +int CPDF_DocJSActions::FindJSAction(const CFX_WideString& csName) const { ASSERT(m_pDocument); CPDF_NameTree name_tree(m_pDocument.Get(), "JavaScript"); return name_tree.GetIndex(csName); diff --git a/core/fpdfdoc/cpdf_docjsactions.h b/core/fpdfdoc/cpdf_docjsactions.h index 328b8869f6..73c0a1ef1c 100644 --- a/core/fpdfdoc/cpdf_docjsactions.h +++ b/core/fpdfdoc/cpdf_docjsactions.h @@ -19,9 +19,9 @@ class CPDF_DocJSActions { ~CPDF_DocJSActions(); int CountJSActions() const; - CPDF_Action GetJSActionAndName(int index, CFX_ByteString* csName) const; - CPDF_Action GetJSAction(const CFX_ByteString& csName) const; - int FindJSAction(const CFX_ByteString& csName) const; + CPDF_Action GetJSActionAndName(int index, CFX_WideString* csName) const; + CPDF_Action GetJSAction(const CFX_WideString& csName) const; + int FindJSAction(const CFX_WideString& csName) const; CPDF_Document* GetDocument() const { return m_pDocument.Get(); } private: diff --git a/core/fpdfdoc/cpdf_link.cpp b/core/fpdfdoc/cpdf_link.cpp index b622094a73..f7aec4087e 100644 --- a/core/fpdfdoc/cpdf_link.cpp +++ b/core/fpdfdoc/cpdf_link.cpp @@ -28,7 +28,7 @@ CPDF_Dest CPDF_Link::GetDest(CPDF_Document* pDoc) { if (pDest->IsString() || pDest->IsName()) { CPDF_NameTree name_tree(pDoc, "Dests"); - return CPDF_Dest(name_tree.LookupNamedDest(pDoc, pDest->GetString())); + return CPDF_Dest(name_tree.LookupNamedDest(pDoc, pDest->GetUnicodeText())); } if (CPDF_Array* pArray = pDest->AsArray()) return CPDF_Dest(pArray); diff --git a/core/fpdfdoc/cpdf_nametree.cpp b/core/fpdfdoc/cpdf_nametree.cpp index b3808bddba..04cb1b9e40 100644 --- a/core/fpdfdoc/cpdf_nametree.cpp +++ b/core/fpdfdoc/cpdf_nametree.cpp @@ -9,13 +9,14 @@ #include "core/fpdfapi/parser/cpdf_array.h" #include "core/fpdfapi/parser/cpdf_dictionary.h" #include "core/fpdfapi/parser/cpdf_document.h" +#include "core/fpdfapi/parser/fpdf_parser_decode.h" namespace { const int nMaxRecursion = 32; CPDF_Object* SearchNameNode(CPDF_Dictionary* pNode, - const CFX_ByteString& csName, + const CFX_WideString& csName, size_t& nIndex, CPDF_Array** ppFind, int nLevel = 0) { @@ -24,15 +25,14 @@ CPDF_Object* SearchNameNode(CPDF_Dictionary* pNode, CPDF_Array* pLimits = pNode->GetArrayFor("Limits"); if (pLimits) { - CFX_ByteString csLeft = pLimits->GetStringAt(0); - CFX_ByteString csRight = pLimits->GetStringAt(1); - if (csLeft.Compare(csRight.AsStringC()) > 0) { - CFX_ByteString csTmp = csRight; + CFX_WideString csLeft = pLimits->GetUnicodeTextAt(0); + CFX_WideString csRight = pLimits->GetUnicodeTextAt(1); + if (csLeft.Compare(csRight) > 0) { + CFX_WideString csTmp = csRight; csRight = csLeft; csLeft = csTmp; } - if (csName.Compare(csLeft.AsStringC()) < 0 || - csName.Compare(csRight.AsStringC()) > 0) { + if (csName.Compare(csLeft) < 0 || csName.Compare(csRight) > 0) { return nullptr; } } @@ -41,8 +41,8 @@ CPDF_Object* SearchNameNode(CPDF_Dictionary* pNode, if (pNames) { size_t dwCount = pNames->GetCount() / 2; for (size_t i = 0; i < dwCount; i++) { - CFX_ByteString csValue = pNames->GetStringAt(i * 2); - int32_t iCompare = csValue.Compare(csName.AsStringC()); + CFX_WideString csValue = pNames->GetUnicodeTextAt(i * 2); + int32_t iCompare = csValue.Compare(csName); if (iCompare <= 0) { if (ppFind) *ppFind = pNames; @@ -78,7 +78,7 @@ CPDF_Object* SearchNameNode(CPDF_Dictionary* pNode, CPDF_Object* SearchNameNode(CPDF_Dictionary* pNode, size_t nIndex, size_t& nCurIndex, - CFX_ByteString* csName, + CFX_WideString* csName, CPDF_Array** ppFind, int nLevel = 0) { if (nLevel > nMaxRecursion) @@ -93,7 +93,7 @@ CPDF_Object* SearchNameNode(CPDF_Dictionary* pNode, } if (ppFind) *ppFind = pNames; - *csName = pNames->GetStringAt((nIndex - nCurIndex) * 2); + *csName = pNames->GetUnicodeTextAt((nIndex - nCurIndex) * 2); return pNames->GetDirectObjectAt((nIndex - nCurIndex) * 2 + 1); } CPDF_Array* pKids = pNode->GetArrayFor("Kids"); @@ -158,7 +158,7 @@ size_t CPDF_NameTree::GetCount() const { return m_pRoot ? ::CountNames(m_pRoot.Get()) : 0; } -int CPDF_NameTree::GetIndex(const CFX_ByteString& csName) const { +int CPDF_NameTree::GetIndex(const CFX_WideString& csName) const { if (!m_pRoot) return -1; @@ -169,8 +169,8 @@ int CPDF_NameTree::GetIndex(const CFX_ByteString& csName) const { } CPDF_Object* CPDF_NameTree::LookupValueAndName(int nIndex, - CFX_ByteString* csName) const { - *csName = CFX_ByteString(); + CFX_WideString* csName) const { + *csName = CFX_WideString(); if (!m_pRoot) return nullptr; @@ -178,7 +178,7 @@ CPDF_Object* CPDF_NameTree::LookupValueAndName(int nIndex, return SearchNameNode(m_pRoot.Get(), nIndex, nCurIndex, csName, nullptr); } -CPDF_Object* CPDF_NameTree::LookupValue(const CFX_ByteString& csName) const { +CPDF_Object* CPDF_NameTree::LookupValue(const CFX_WideString& csName) const { if (!m_pRoot) return nullptr; @@ -187,13 +187,13 @@ CPDF_Object* CPDF_NameTree::LookupValue(const CFX_ByteString& csName) const { } CPDF_Array* CPDF_NameTree::LookupNamedDest(CPDF_Document* pDoc, - const CFX_ByteString& sName) { + const CFX_WideString& sName) { CPDF_Object* pValue = LookupValue(sName); if (!pValue) { CPDF_Dictionary* pDests = pDoc->GetRoot()->GetDictFor("Dests"); if (!pDests) return nullptr; - pValue = pDests->GetDirectObjectFor(sName); + pValue = pDests->GetDirectObjectFor(PDF_EncodeText(sName)); } if (!pValue) return nullptr; diff --git a/core/fpdfdoc/cpdf_nametree.h b/core/fpdfdoc/cpdf_nametree.h index 69000f32b2..a56f511783 100644 --- a/core/fpdfdoc/cpdf_nametree.h +++ b/core/fpdfdoc/cpdf_nametree.h @@ -21,11 +21,11 @@ class CPDF_NameTree { CPDF_NameTree(CPDF_Document* pDoc, const CFX_ByteString& category); ~CPDF_NameTree(); - CPDF_Object* LookupValueAndName(int nIndex, CFX_ByteString* csName) const; - CPDF_Object* LookupValue(const CFX_ByteString& csName) const; - CPDF_Array* LookupNamedDest(CPDF_Document* pDoc, const CFX_ByteString& sName); + CPDF_Object* LookupValueAndName(int nIndex, CFX_WideString* csName) const; + CPDF_Object* LookupValue(const CFX_WideString& csName) const; + CPDF_Array* LookupNamedDest(CPDF_Document* pDoc, const CFX_WideString& sName); - int GetIndex(const CFX_ByteString& csName) const; + int GetIndex(const CFX_WideString& csName) const; size_t GetCount() const; CPDF_Dictionary* GetRoot() const { return m_pRoot.Get(); } diff --git a/core/fpdfdoc/cpdf_nametree_unittest.cpp b/core/fpdfdoc/cpdf_nametree_unittest.cpp new file mode 100644 index 0000000000..28af9e078d --- /dev/null +++ b/core/fpdfdoc/cpdf_nametree_unittest.cpp @@ -0,0 +1,35 @@ +// Copyright 2017 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "core/fpdfdoc/cpdf_nametree.h" +#include "core/fpdfapi/parser/cpdf_array.h" +#include "core/fpdfapi/parser/cpdf_dictionary.h" +#include "core/fpdfapi/parser/cpdf_number.h" +#include "core/fpdfapi/parser/cpdf_string.h" +#include "testing/gtest/include/gtest/gtest.h" + +TEST(cpdf_nametree, GetUnicodeNameWithBOM) { + // Set up the root dictionary with a Names array. + auto pRootDict = pdfium::MakeUnique(); + CPDF_Array* pNames = pRootDict->SetNewFor("Names"); + + // Add the key "1" (with BOM) and value 100 into the array. + std::ostringstream buf; + buf << static_cast(254) << static_cast(255) + << static_cast(0) << static_cast(49); + pNames->AddNew(CFX_ByteString(buf), true); + pNames->AddNew(100); + + // Check that the key is as expected. + CPDF_NameTree nameTree(pRootDict.get()); + CFX_WideString storedName; + nameTree.LookupValueAndName(0, &storedName); + EXPECT_STREQ(L"1", storedName.c_str()); + + // Check that the correct value object can be obtained by looking up "1". + CFX_WideString matchName = L"1"; + CPDF_Object* pObj = nameTree.LookupValue(matchName); + ASSERT_TRUE(pObj->IsNumber()); + EXPECT_EQ(100, pObj->AsNumber()->GetInteger()); +} diff --git a/fpdfsdk/cpdfsdk_formfillenvironment.cpp b/fpdfsdk/cpdfsdk_formfillenvironment.cpp index b0ee903ead..de7947556d 100644 --- a/fpdfsdk/cpdfsdk_formfillenvironment.cpp +++ b/fpdfsdk/cpdfsdk_formfillenvironment.cpp @@ -603,10 +603,9 @@ void CPDFSDK_FormFillEnvironment::ProcJavascriptFun() { CPDF_DocJSActions docJS(pPDFDoc); int iCount = docJS.CountJSActions(); for (int i = 0; i < iCount; i++) { - CFX_ByteString csJSName; + CFX_WideString csJSName; CPDF_Action jsAction = docJS.GetJSActionAndName(i, &csJSName); - GetActionHandler()->DoAction_JavaScript( - jsAction, CFX_WideString::FromLocal(csJSName.AsStringC()), this); + GetActionHandler()->DoAction_JavaScript(jsAction, csJSName, this); } } diff --git a/fpdfsdk/fpdfattachment.cpp b/fpdfsdk/fpdfattachment.cpp index 337ab35e0f..7d8cce736b 100644 --- a/fpdfsdk/fpdfattachment.cpp +++ b/fpdfsdk/fpdfattachment.cpp @@ -30,7 +30,7 @@ DLLEXPORT FPDF_ATTACHMENT STDCALL FPDFDoc_GetAttachment(FPDF_DOCUMENT document, if (static_cast(index) >= nameTree.GetCount()) return nullptr; - CFX_ByteString csName; + CFX_WideString csName; return nameTree.LookupValueAndName(index, &csName); } diff --git a/fpdfsdk/fpdfview.cpp b/fpdfsdk/fpdfview.cpp index 06e72b3a71..7dbaf7f9e7 100644 --- a/fpdfsdk/fpdfview.cpp +++ b/fpdfsdk/fpdfview.cpp @@ -1322,7 +1322,7 @@ DLLEXPORT FPDF_DEST STDCALL FPDF_GetNamedDestByName(FPDF_DOCUMENT document, return nullptr; CPDF_NameTree name_tree(pDoc, "Dests"); - return name_tree.LookupNamedDest(pDoc, name); + return name_tree.LookupNamedDest(pDoc, PDF_DecodeText(CFX_ByteString(name))); } #ifdef PDF_ENABLE_XFA @@ -1398,6 +1398,7 @@ DLLEXPORT FPDF_DEST STDCALL FPDF_GetNamedDest(FPDF_DOCUMENT document, CPDF_Object* pDestObj = nullptr; CFX_ByteString bsName; + CFX_WideString wsName; CPDF_NameTree nameTree(pDoc, "Dests"); int count = nameTree.GetCount(); if (index >= count) { @@ -1421,8 +1422,9 @@ DLLEXPORT FPDF_DEST STDCALL FPDF_GetNamedDest(FPDF_DOCUMENT document, break; i++; } + wsName = PDF_DecodeText(bsName); } else { - pDestObj = nameTree.LookupValueAndName(index, &bsName); + pDestObj = nameTree.LookupValueAndName(index, &wsName); } if (!pDestObj) return nullptr; @@ -1434,7 +1436,6 @@ DLLEXPORT FPDF_DEST STDCALL FPDF_GetNamedDest(FPDF_DOCUMENT document, if (!pDestObj->IsArray()) return nullptr; - CFX_WideString wsName = PDF_DecodeText(bsName); CFX_ByteString utf16Name = wsName.UTF16LE_Encode(); int len = utf16Name.GetLength(); if (!buffer) { diff --git a/fpdfsdk/javascript/Document.cpp b/fpdfsdk/javascript/Document.cpp index d5b075da62..ba4b2ae622 100644 --- a/fpdfsdk/javascript/Document.cpp +++ b/fpdfsdk/javascript/Document.cpp @@ -1594,13 +1594,12 @@ bool Document::gotoNamedDest(CJS_Runtime* pRuntime, return false; } CFX_WideString wideName = params[0].ToCFXWideString(pRuntime); - CFX_ByteString utf8Name = wideName.UTF8Encode(); CPDF_Document* pDocument = m_pFormFillEnv->GetPDFDocument(); if (!pDocument) return false; CPDF_NameTree nameTree(pDocument, "Dests"); - CPDF_Array* destArray = nameTree.LookupNamedDest(pDocument, utf8Name); + CPDF_Array* destArray = nameTree.LookupNamedDest(pDocument, wideName); if (!destArray) return false; diff --git a/xfa/fxfa/cxfa_ffdoc.cpp b/xfa/fxfa/cxfa_ffdoc.cpp index 01c570adbe..326fc7f228 100644 --- a/xfa/fxfa/cxfa_ffdoc.cpp +++ b/xfa/fxfa/cxfa_ffdoc.cpp @@ -365,13 +365,12 @@ CFX_RetainPtr CXFA_FFDoc::GetPDFNamedImage( return nullptr; CPDF_NameTree nametree(pXFAImages); - CFX_ByteString bsName = PDF_EncodeText(wsName.c_str(), wsName.GetLength()); - CPDF_Object* pObject = nametree.LookupValue(bsName); + CPDF_Object* pObject = nametree.LookupValue(CFX_WideString(wsName)); if (!pObject) { for (size_t i = 0; i < nametree.GetCount(); i++) { - CFX_ByteString bsTemp; - CPDF_Object* pTempObject = nametree.LookupValueAndName(i, &bsTemp); - if (bsTemp == bsName) { + CFX_WideString wsTemp; + CPDF_Object* pTempObject = nametree.LookupValueAndName(i, &wsTemp); + if (wsTemp == wsName) { pObject = pTempObject; break; } -- cgit v1.2.3