From 8e753ca397ea332b29d65736b0e0bf9ad466df56 Mon Sep 17 00:00:00 2001 From: dan sinclair Date: Tue, 17 Apr 2018 18:45:50 +0000 Subject: Convert CFX_XMLParser to take a IFX_SeekableStream This CL changes the CFX_XMLParser to accept an IFX_SeekableStream intead of a CFX_SeekableStreamProxy. Change-Id: I3534288a8a00095acd78a60bf6b925c6a0357892 Reviewed-on: https://pdfium-review.googlesource.com/30856 Reviewed-by: Tom Sepez Commit-Queue: dsinclair --- core/fpdfdoc/cpdf_metadata.cpp | 9 ++++----- core/fxcrt/xml/cfx_xmlparser.cpp | 11 +++++++++-- core/fxcrt/xml/cfx_xmlparser.h | 4 ++-- core/fxcrt/xml/cfx_xmlparser_unittest.cpp | 12 ++++++------ testing/libfuzzer/pdf_xml_fuzzer.cc | 8 +++----- xfa/fxfa/parser/cxfa_document_parser.cpp | 17 +++++------------ xfa/fxfa/parser/cxfa_document_parser.h | 3 +-- xfa/fxfa/parser/cxfa_xmllocale.cpp | 9 ++++----- 8 files changed, 34 insertions(+), 39 deletions(-) diff --git a/core/fpdfdoc/cpdf_metadata.cpp b/core/fpdfdoc/cpdf_metadata.cpp index 56f8c64c0e..161fc93edd 100644 --- a/core/fpdfdoc/cpdf_metadata.cpp +++ b/core/fpdfdoc/cpdf_metadata.cpp @@ -8,6 +8,7 @@ #include "core/fpdfapi/parser/cpdf_stream.h" #include "core/fpdfapi/parser/cpdf_stream_acc.h" +#include "core/fxcrt/cfx_memorystream.h" #include "core/fxcrt/fx_codepage.h" #include "core/fxcrt/xml/cfx_xmlelement.h" #include "core/fxcrt/xml/cfx_xmlparser.h" @@ -68,11 +69,9 @@ std::vector CPDF_Metadata::CheckForSharedForm() const { pAcc->LoadAllDataFiltered(); auto root = pdfium::MakeUnique(L"root"); - auto proxy = pdfium::MakeRetain(pAcc->GetData(), - pAcc->GetSize()); - proxy->SetCodePage(FX_CODEPAGE_UTF8); - - CFX_XMLParser parser(root.get(), proxy); + auto stream = pdfium::MakeRetain(pAcc->GetData(), + pAcc->GetSize(), false); + CFX_XMLParser parser(root.get(), stream); if (!parser.Parse()) return {}; diff --git a/core/fxcrt/xml/cfx_xmlparser.cpp b/core/fxcrt/xml/cfx_xmlparser.cpp index 8292b33206..9336c11f87 100644 --- a/core/fxcrt/xml/cfx_xmlparser.cpp +++ b/core/fxcrt/xml/cfx_xmlparser.cpp @@ -10,6 +10,7 @@ #include #include +#include "core/fxcrt/fx_codepage.h" #include "core/fxcrt/fx_extension.h" #include "core/fxcrt/fx_safe_types.h" #include "core/fxcrt/xml/cfx_xmlchardata.h" @@ -81,10 +82,10 @@ bool CFX_XMLParser::IsXMLNameChar(wchar_t ch, bool bFirstChar) { } CFX_XMLParser::CFX_XMLParser(CFX_XMLNode* pParent, - const RetainPtr& pStream) + const RetainPtr& pStream) : m_pParent(pParent), m_pChild(nullptr), - m_pStream(pStream), + m_pStream(pdfium::MakeRetain(pStream)), m_iXMLPlaneSize(1024), m_iCurrentPos(0), m_iCurrentNodeNum(-1), @@ -106,6 +107,12 @@ CFX_XMLParser::CFX_XMLParser(CFX_XMLNode* pParent, ASSERT(m_pParent); ASSERT(pStream); + uint16_t wCodePage = m_pStream->GetCodePage(); + if (wCodePage != FX_CODEPAGE_UTF16LE && wCodePage != FX_CODEPAGE_UTF16BE && + wCodePage != FX_CODEPAGE_UTF8) { + m_pStream->SetCodePage(FX_CODEPAGE_UTF8); + } + m_NodeStack.push(m_pParent); m_CurNode.iNodeNum = -1; diff --git a/core/fxcrt/xml/cfx_xmlparser.h b/core/fxcrt/xml/cfx_xmlparser.h index 02a6ac441f..52d86fb82d 100644 --- a/core/fxcrt/xml/cfx_xmlparser.h +++ b/core/fxcrt/xml/cfx_xmlparser.h @@ -19,7 +19,7 @@ class CFX_XMLElement; class CFX_XMLNode; -class CFX_SeekableStreamProxy; +class IFX_SeekableStream; enum class FX_XmlSyntaxResult { None, @@ -44,7 +44,7 @@ class CFX_XMLParser { static bool IsXMLNameChar(wchar_t ch, bool bFirstChar); CFX_XMLParser(CFX_XMLNode* pParent, - const RetainPtr& pStream); + const RetainPtr& pStream); virtual ~CFX_XMLParser(); bool Parse(); diff --git a/core/fxcrt/xml/cfx_xmlparser_unittest.cpp b/core/fxcrt/xml/cfx_xmlparser_unittest.cpp index 97a81f7640..4bd953df25 100644 --- a/core/fxcrt/xml/cfx_xmlparser_unittest.cpp +++ b/core/fxcrt/xml/cfx_xmlparser_unittest.cpp @@ -6,7 +6,7 @@ #include -#include "core/fxcrt/cfx_seekablestreamproxy.h" +#include "core/fxcrt/cfx_memorystream.h" #include "core/fxcrt/fx_codepage.h" #include "core/fxcrt/xml/cfx_xmlnode.h" #include "testing/gtest/include/gtest/gtest.h" @@ -18,7 +18,7 @@ namespace { class CFX_XMLTestParser : public CFX_XMLParser { public: CFX_XMLTestParser(CFX_XMLNode* pParent, - const RetainPtr& pStream) + const RetainPtr& pStream) : CFX_XMLParser(pParent, pStream) {} ~CFX_XMLTestParser() override = default; @@ -38,10 +38,10 @@ class CFX_XMLTestParser : public CFX_XMLParser { WideString GetTextData() const { return CFX_XMLParser::GetTextData(); } }; -RetainPtr MakeProxy(const char* input) { - auto stream = pdfium::MakeRetain( - reinterpret_cast(const_cast(input)), strlen(input)); - stream->SetCodePage(FX_CODEPAGE_UTF8); +RetainPtr MakeProxy(const char* input) { + auto stream = pdfium::MakeRetain( + reinterpret_cast(const_cast(input)), strlen(input), + false); return stream; } diff --git a/testing/libfuzzer/pdf_xml_fuzzer.cc b/testing/libfuzzer/pdf_xml_fuzzer.cc index a8af779259..b9cb01580c 100644 --- a/testing/libfuzzer/pdf_xml_fuzzer.cc +++ b/testing/libfuzzer/pdf_xml_fuzzer.cc @@ -6,7 +6,7 @@ #include #include -#include "core/fxcrt/cfx_seekablestreamproxy.h" +#include "core/fxcrt/cfx_memorystream.h" #include "core/fxcrt/fx_safe_types.h" #include "core/fxcrt/fx_system.h" #include "core/fxcrt/xml/cfx_xmlnode.h" @@ -18,10 +18,8 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { if (!safe_size.IsValid()) return 0; - RetainPtr stream = - pdfium::MakeRetain(const_cast(data), - size); - + auto stream = pdfium::MakeRetain(const_cast(data), + size, false); auto root = pdfium::MakeUnique(); CFX_XMLParser parser(root.get(), stream); if (!parser.Parse()) diff --git a/xfa/fxfa/parser/cxfa_document_parser.cpp b/xfa/fxfa/parser/cxfa_document_parser.cpp index f52d52ff3c..62828d71a7 100644 --- a/xfa/fxfa/parser/cxfa_document_parser.cpp +++ b/xfa/fxfa/parser/cxfa_document_parser.cpp @@ -9,7 +9,7 @@ #include #include -#include "core/fxcrt/cfx_seekablestreamproxy.h" +#include "core/fxcrt/cfx_memorystream.h" #include "core/fxcrt/cfx_widetextbuf.h" #include "core/fxcrt/fx_codepage.h" #include "core/fxcrt/fx_extension.h" @@ -329,14 +329,7 @@ CXFA_DocumentParser::~CXFA_DocumentParser() {} bool CXFA_DocumentParser::Parse(const RetainPtr& pStream, XFA_PacketType ePacketID) { - auto pStreamProxy = pdfium::MakeRetain(pStream); - uint16_t wCodePage = pStreamProxy->GetCodePage(); - if (wCodePage != FX_CODEPAGE_UTF16LE && wCodePage != FX_CODEPAGE_UTF16BE && - wCodePage != FX_CODEPAGE_UTF8) { - pStreamProxy->SetCodePage(FX_CODEPAGE_UTF8); - } - - m_pNodeTree = LoadXML(pStreamProxy); + m_pNodeTree = LoadXML(pStream); if (!m_pNodeTree) return false; @@ -345,14 +338,14 @@ bool CXFA_DocumentParser::Parse(const RetainPtr& pStream, } CFX_XMLNode* CXFA_DocumentParser::ParseXMLData(const ByteString& wsXML) { - auto pStream = pdfium::MakeRetain( - const_cast(wsXML.raw_str()), wsXML.GetLength()); + auto pStream = pdfium::MakeRetain( + const_cast(wsXML.raw_str()), wsXML.GetLength(), false); m_pNodeTree = LoadXML(pStream); return m_pNodeTree ? GetDocumentNode(m_pNodeTree.get()) : nullptr; } std::unique_ptr CXFA_DocumentParser::LoadXML( - const RetainPtr& pStream) { + const RetainPtr& pStream) { ASSERT(pStream); auto root = pdfium::MakeUnique(); diff --git a/xfa/fxfa/parser/cxfa_document_parser.h b/xfa/fxfa/parser/cxfa_document_parser.h index acd27e6238..d76d5953ac 100644 --- a/xfa/fxfa/parser/cxfa_document_parser.h +++ b/xfa/fxfa/parser/cxfa_document_parser.h @@ -18,7 +18,6 @@ class CXFA_Node; class CFX_XMLDoc; class CFX_XMLInstruction; class IFX_SeekableStream; -class CFX_SeekableStreamProxy; class CXFA_DocumentParser { public: @@ -36,7 +35,7 @@ class CXFA_DocumentParser { private: std::unique_ptr LoadXML( - const RetainPtr& pStream); + const RetainPtr& pStream); CXFA_Node* ParseAsXDPPacket(CFX_XMLNode* pXMLDocumentNode, XFA_PacketType ePacketID); diff --git a/xfa/fxfa/parser/cxfa_xmllocale.cpp b/xfa/fxfa/parser/cxfa_xmllocale.cpp index 4354ed98de..ffb4cd9dde 100644 --- a/xfa/fxfa/parser/cxfa_xmllocale.cpp +++ b/xfa/fxfa/parser/cxfa_xmllocale.cpp @@ -8,6 +8,7 @@ #include +#include "core/fxcrt/cfx_memorystream.h" #include "core/fxcrt/fx_codepage.h" #include "core/fxcrt/xml/cfx_xmlelement.h" #include "core/fxcrt/xml/cfx_xmlparser.h" @@ -30,11 +31,9 @@ constexpr wchar_t kCurrencySymbol[] = L"currencySymbol"; std::unique_ptr CXFA_XMLLocale::Create( pdfium::span data) { auto root = pdfium::MakeUnique(L"root"); - auto proxy = - pdfium::MakeRetain(data.data(), data.size()); - proxy->SetCodePage(FX_CODEPAGE_UTF8); - - CFX_XMLParser parser(root.get(), proxy); + auto stream = + pdfium::MakeRetain(data.data(), data.size(), false); + CFX_XMLParser parser(root.get(), stream); if (!parser.Parse()) return nullptr; -- cgit v1.2.3