diff options
author | Jane Liu <janeliulwq@google.com> | 2017-07-19 13:10:50 -0400 |
---|---|---|
committer | Chromium commit bot <commit-bot@chromium.org> | 2017-07-19 19:09:39 +0000 |
commit | 67ccef73bf664b7cdb4c6eed7acbaa4163c22a80 (patch) | |
tree | 718061bc21fd52eab1bc70a8b9be97585f1d79f8 /core/fpdfdoc | |
parent | eed247e9cb3b0e9ce5dcb8bf6ee7673c9dd3e544 (diff) | |
download | pdfium-67ccef73bf664b7cdb4c6eed7acbaa4163c22a80.tar.xz |
Use CFX_WideString in CPDF_NameTree functions to strip BOMchromium/3162
PDFium doesn't strip BOMs during parsing, but we should strip BOMs when
retrieving parsed strings in CPDF_NameTree to ensure consistency and
appropriate function behavior. See the bug for more info.
As outlined in Bug=pdfium:593, the solution is to call GetUnicodeText()
instead of GetString(). I added a GetUnicodeTextAt() function in
CPDF_Array, which is symmetrical to GetUnicodeTextFor() in
CPDF_Dictionary.
I then changed the input variable types to CPDF_NameTree functions to
be CFX_WideString instead of CFX_ByteString, and modified all the
calls to them.
I also added a unit test for nametree, which would fail prior to this
change. Nametrees with non-unicode names are already tested by embedder
tests.
Bug=pdfium:820
Change-Id: Id69d7343632f83d1f5180348c0eea290f478183f
Reviewed-on: https://pdfium-review.googlesource.com/8091
Reviewed-by: dsinclair <dsinclair@chromium.org>
Commit-Queue: Jane Liu <janeliulwq@google.com>
Diffstat (limited to 'core/fpdfdoc')
-rw-r--r-- | core/fpdfdoc/cpdf_action.cpp | 2 | ||||
-rw-r--r-- | core/fpdfdoc/cpdf_bookmark.cpp | 3 | ||||
-rw-r--r-- | core/fpdfdoc/cpdf_docjsactions.cpp | 6 | ||||
-rw-r--r-- | core/fpdfdoc/cpdf_docjsactions.h | 6 | ||||
-rw-r--r-- | core/fpdfdoc/cpdf_link.cpp | 2 | ||||
-rw-r--r-- | core/fpdfdoc/cpdf_nametree.cpp | 34 | ||||
-rw-r--r-- | core/fpdfdoc/cpdf_nametree.h | 8 | ||||
-rw-r--r-- | core/fpdfdoc/cpdf_nametree_unittest.cpp | 35 |
8 files changed, 66 insertions, 30 deletions
diff --git a/core/fpdfdoc/cpdf_action.cpp b/core/fpdfdoc/cpdf_action.cpp index 88d0781e80..2357580548 100644 --- a/core/fpdfdoc/cpdf_action.cpp +++ b/core/fpdfdoc/cpdf_action.cpp @@ -42,7 +42,7 @@ CPDF_Dest CPDF_Action::GetDest(CPDF_Document* pDoc) const { return CPDF_Dest(); if (pDest->IsString() || pDest->IsName()) { CPDF_NameTree name_tree(pDoc, "Dests"); - return CPDF_Dest(name_tree.LookupNamedDest(pDoc, pDest->GetString())); + return CPDF_Dest(name_tree.LookupNamedDest(pDoc, pDest->GetUnicodeText())); } if (CPDF_Array* pArray = pDest->AsArray()) return CPDF_Dest(pArray); diff --git a/core/fpdfdoc/cpdf_bookmark.cpp b/core/fpdfdoc/cpdf_bookmark.cpp index 29303f1d32..e84001f846 100644 --- a/core/fpdfdoc/cpdf_bookmark.cpp +++ b/core/fpdfdoc/cpdf_bookmark.cpp @@ -70,7 +70,8 @@ CPDF_Dest CPDF_Bookmark::GetDest(CPDF_Document* pDocument) const { return CPDF_Dest(); if (pDest->IsString() || pDest->IsName()) { CPDF_NameTree name_tree(pDocument, "Dests"); - return CPDF_Dest(name_tree.LookupNamedDest(pDocument, pDest->GetString())); + return CPDF_Dest( + name_tree.LookupNamedDest(pDocument, pDest->GetUnicodeText())); } if (CPDF_Array* pArray = pDest->AsArray()) return CPDF_Dest(pArray); diff --git a/core/fpdfdoc/cpdf_docjsactions.cpp b/core/fpdfdoc/cpdf_docjsactions.cpp index 59dbccce85..669ed7055c 100644 --- a/core/fpdfdoc/cpdf_docjsactions.cpp +++ b/core/fpdfdoc/cpdf_docjsactions.cpp @@ -20,7 +20,7 @@ int CPDF_DocJSActions::CountJSActions() const { CPDF_Action CPDF_DocJSActions::GetJSActionAndName( int index, - CFX_ByteString* csName) const { + CFX_WideString* csName) const { ASSERT(m_pDocument); CPDF_NameTree name_tree(m_pDocument.Get(), "JavaScript"); CPDF_Object* pAction = name_tree.LookupValueAndName(index, csName); @@ -28,7 +28,7 @@ CPDF_Action CPDF_DocJSActions::GetJSActionAndName( : CPDF_Action(); } -CPDF_Action CPDF_DocJSActions::GetJSAction(const CFX_ByteString& csName) const { +CPDF_Action CPDF_DocJSActions::GetJSAction(const CFX_WideString& csName) const { ASSERT(m_pDocument); CPDF_NameTree name_tree(m_pDocument.Get(), "JavaScript"); CPDF_Object* pAction = name_tree.LookupValue(csName); @@ -36,7 +36,7 @@ CPDF_Action CPDF_DocJSActions::GetJSAction(const CFX_ByteString& csName) const { : CPDF_Action(); } -int CPDF_DocJSActions::FindJSAction(const CFX_ByteString& csName) const { +int CPDF_DocJSActions::FindJSAction(const CFX_WideString& csName) const { ASSERT(m_pDocument); CPDF_NameTree name_tree(m_pDocument.Get(), "JavaScript"); return name_tree.GetIndex(csName); diff --git a/core/fpdfdoc/cpdf_docjsactions.h b/core/fpdfdoc/cpdf_docjsactions.h index 328b8869f6..73c0a1ef1c 100644 --- a/core/fpdfdoc/cpdf_docjsactions.h +++ b/core/fpdfdoc/cpdf_docjsactions.h @@ -19,9 +19,9 @@ class CPDF_DocJSActions { ~CPDF_DocJSActions(); int CountJSActions() const; - CPDF_Action GetJSActionAndName(int index, CFX_ByteString* csName) const; - CPDF_Action GetJSAction(const CFX_ByteString& csName) const; - int FindJSAction(const CFX_ByteString& csName) const; + CPDF_Action GetJSActionAndName(int index, CFX_WideString* csName) const; + CPDF_Action GetJSAction(const CFX_WideString& csName) const; + int FindJSAction(const CFX_WideString& csName) const; CPDF_Document* GetDocument() const { return m_pDocument.Get(); } private: diff --git a/core/fpdfdoc/cpdf_link.cpp b/core/fpdfdoc/cpdf_link.cpp index b622094a73..f7aec4087e 100644 --- a/core/fpdfdoc/cpdf_link.cpp +++ b/core/fpdfdoc/cpdf_link.cpp @@ -28,7 +28,7 @@ CPDF_Dest CPDF_Link::GetDest(CPDF_Document* pDoc) { if (pDest->IsString() || pDest->IsName()) { CPDF_NameTree name_tree(pDoc, "Dests"); - return CPDF_Dest(name_tree.LookupNamedDest(pDoc, pDest->GetString())); + return CPDF_Dest(name_tree.LookupNamedDest(pDoc, pDest->GetUnicodeText())); } if (CPDF_Array* pArray = pDest->AsArray()) return CPDF_Dest(pArray); diff --git a/core/fpdfdoc/cpdf_nametree.cpp b/core/fpdfdoc/cpdf_nametree.cpp index b3808bddba..04cb1b9e40 100644 --- a/core/fpdfdoc/cpdf_nametree.cpp +++ b/core/fpdfdoc/cpdf_nametree.cpp @@ -9,13 +9,14 @@ #include "core/fpdfapi/parser/cpdf_array.h" #include "core/fpdfapi/parser/cpdf_dictionary.h" #include "core/fpdfapi/parser/cpdf_document.h" +#include "core/fpdfapi/parser/fpdf_parser_decode.h" namespace { const int nMaxRecursion = 32; CPDF_Object* SearchNameNode(CPDF_Dictionary* pNode, - const CFX_ByteString& csName, + const CFX_WideString& csName, size_t& nIndex, CPDF_Array** ppFind, int nLevel = 0) { @@ -24,15 +25,14 @@ CPDF_Object* SearchNameNode(CPDF_Dictionary* pNode, CPDF_Array* pLimits = pNode->GetArrayFor("Limits"); if (pLimits) { - CFX_ByteString csLeft = pLimits->GetStringAt(0); - CFX_ByteString csRight = pLimits->GetStringAt(1); - if (csLeft.Compare(csRight.AsStringC()) > 0) { - CFX_ByteString csTmp = csRight; + CFX_WideString csLeft = pLimits->GetUnicodeTextAt(0); + CFX_WideString csRight = pLimits->GetUnicodeTextAt(1); + if (csLeft.Compare(csRight) > 0) { + CFX_WideString csTmp = csRight; csRight = csLeft; csLeft = csTmp; } - if (csName.Compare(csLeft.AsStringC()) < 0 || - csName.Compare(csRight.AsStringC()) > 0) { + if (csName.Compare(csLeft) < 0 || csName.Compare(csRight) > 0) { return nullptr; } } @@ -41,8 +41,8 @@ CPDF_Object* SearchNameNode(CPDF_Dictionary* pNode, if (pNames) { size_t dwCount = pNames->GetCount() / 2; for (size_t i = 0; i < dwCount; i++) { - CFX_ByteString csValue = pNames->GetStringAt(i * 2); - int32_t iCompare = csValue.Compare(csName.AsStringC()); + CFX_WideString csValue = pNames->GetUnicodeTextAt(i * 2); + int32_t iCompare = csValue.Compare(csName); if (iCompare <= 0) { if (ppFind) *ppFind = pNames; @@ -78,7 +78,7 @@ CPDF_Object* SearchNameNode(CPDF_Dictionary* pNode, CPDF_Object* SearchNameNode(CPDF_Dictionary* pNode, size_t nIndex, size_t& nCurIndex, - CFX_ByteString* csName, + CFX_WideString* csName, CPDF_Array** ppFind, int nLevel = 0) { if (nLevel > nMaxRecursion) @@ -93,7 +93,7 @@ CPDF_Object* SearchNameNode(CPDF_Dictionary* pNode, } if (ppFind) *ppFind = pNames; - *csName = pNames->GetStringAt((nIndex - nCurIndex) * 2); + *csName = pNames->GetUnicodeTextAt((nIndex - nCurIndex) * 2); return pNames->GetDirectObjectAt((nIndex - nCurIndex) * 2 + 1); } CPDF_Array* pKids = pNode->GetArrayFor("Kids"); @@ -158,7 +158,7 @@ size_t CPDF_NameTree::GetCount() const { return m_pRoot ? ::CountNames(m_pRoot.Get()) : 0; } -int CPDF_NameTree::GetIndex(const CFX_ByteString& csName) const { +int CPDF_NameTree::GetIndex(const CFX_WideString& csName) const { if (!m_pRoot) return -1; @@ -169,8 +169,8 @@ int CPDF_NameTree::GetIndex(const CFX_ByteString& csName) const { } CPDF_Object* CPDF_NameTree::LookupValueAndName(int nIndex, - CFX_ByteString* csName) const { - *csName = CFX_ByteString(); + CFX_WideString* csName) const { + *csName = CFX_WideString(); if (!m_pRoot) return nullptr; @@ -178,7 +178,7 @@ CPDF_Object* CPDF_NameTree::LookupValueAndName(int nIndex, return SearchNameNode(m_pRoot.Get(), nIndex, nCurIndex, csName, nullptr); } -CPDF_Object* CPDF_NameTree::LookupValue(const CFX_ByteString& csName) const { +CPDF_Object* CPDF_NameTree::LookupValue(const CFX_WideString& csName) const { if (!m_pRoot) return nullptr; @@ -187,13 +187,13 @@ CPDF_Object* CPDF_NameTree::LookupValue(const CFX_ByteString& csName) const { } CPDF_Array* CPDF_NameTree::LookupNamedDest(CPDF_Document* pDoc, - const CFX_ByteString& sName) { + const CFX_WideString& sName) { CPDF_Object* pValue = LookupValue(sName); if (!pValue) { CPDF_Dictionary* pDests = pDoc->GetRoot()->GetDictFor("Dests"); if (!pDests) return nullptr; - pValue = pDests->GetDirectObjectFor(sName); + pValue = pDests->GetDirectObjectFor(PDF_EncodeText(sName)); } if (!pValue) return nullptr; diff --git a/core/fpdfdoc/cpdf_nametree.h b/core/fpdfdoc/cpdf_nametree.h index 69000f32b2..a56f511783 100644 --- a/core/fpdfdoc/cpdf_nametree.h +++ b/core/fpdfdoc/cpdf_nametree.h @@ -21,11 +21,11 @@ class CPDF_NameTree { CPDF_NameTree(CPDF_Document* pDoc, const CFX_ByteString& category); ~CPDF_NameTree(); - CPDF_Object* LookupValueAndName(int nIndex, CFX_ByteString* csName) const; - CPDF_Object* LookupValue(const CFX_ByteString& csName) const; - CPDF_Array* LookupNamedDest(CPDF_Document* pDoc, const CFX_ByteString& sName); + CPDF_Object* LookupValueAndName(int nIndex, CFX_WideString* csName) const; + CPDF_Object* LookupValue(const CFX_WideString& csName) const; + CPDF_Array* LookupNamedDest(CPDF_Document* pDoc, const CFX_WideString& sName); - int GetIndex(const CFX_ByteString& csName) const; + int GetIndex(const CFX_WideString& csName) const; size_t GetCount() const; CPDF_Dictionary* GetRoot() const { return m_pRoot.Get(); } diff --git a/core/fpdfdoc/cpdf_nametree_unittest.cpp b/core/fpdfdoc/cpdf_nametree_unittest.cpp new file mode 100644 index 0000000000..28af9e078d --- /dev/null +++ b/core/fpdfdoc/cpdf_nametree_unittest.cpp @@ -0,0 +1,35 @@ +// Copyright 2017 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "core/fpdfdoc/cpdf_nametree.h" +#include "core/fpdfapi/parser/cpdf_array.h" +#include "core/fpdfapi/parser/cpdf_dictionary.h" +#include "core/fpdfapi/parser/cpdf_number.h" +#include "core/fpdfapi/parser/cpdf_string.h" +#include "testing/gtest/include/gtest/gtest.h" + +TEST(cpdf_nametree, GetUnicodeNameWithBOM) { + // Set up the root dictionary with a Names array. + auto pRootDict = pdfium::MakeUnique<CPDF_Dictionary>(); + CPDF_Array* pNames = pRootDict->SetNewFor<CPDF_Array>("Names"); + + // Add the key "1" (with BOM) and value 100 into the array. + std::ostringstream buf; + buf << static_cast<unsigned char>(254) << static_cast<unsigned char>(255) + << static_cast<unsigned char>(0) << static_cast<unsigned char>(49); + pNames->AddNew<CPDF_String>(CFX_ByteString(buf), true); + pNames->AddNew<CPDF_Number>(100); + + // Check that the key is as expected. + CPDF_NameTree nameTree(pRootDict.get()); + CFX_WideString storedName; + nameTree.LookupValueAndName(0, &storedName); + EXPECT_STREQ(L"1", storedName.c_str()); + + // Check that the correct value object can be obtained by looking up "1". + CFX_WideString matchName = L"1"; + CPDF_Object* pObj = nameTree.LookupValue(matchName); + ASSERT_TRUE(pObj->IsNumber()); + EXPECT_EQ(100, pObj->AsNumber()->GetInteger()); +} |