From e7d5c7f48ead8f5554105da9637fd9de1c093d67 Mon Sep 17 00:00:00 2001 From: Dan Sinclair Date: Thu, 12 Apr 2018 13:41:09 +0000 Subject: Remove CFX_XMLDoc and call the parser directly This CL removes the CFX_XMLDoc and calls the CFX_XMLParser directly from CXFA_DocumentParser. Change-Id: I4d715cca90cd15b5e1d79827e0bb2781e873e371 Reviewed-on: https://pdfium-review.googlesource.com/30251 Reviewed-by: Henrique Nakashima Commit-Queue: dsinclair --- BUILD.gn | 2 -- core/fxcrt/xml/cfx_xmldoc.cpp | 31 ------------------------------- core/fxcrt/xml/cfx_xmldoc.h | 31 ------------------------------- testing/libfuzzer/pdf_xml_fuzzer.cc | 8 ++++---- xfa/fxfa/parser/cxfa_dataexporter.cpp | 1 - xfa/fxfa/parser/cxfa_document_parser.cpp | 22 ++++++++++++++-------- xfa/fxfa/parser/cxfa_document_parser.h | 3 +++ 7 files changed, 21 insertions(+), 77 deletions(-) delete mode 100644 core/fxcrt/xml/cfx_xmldoc.cpp delete mode 100644 core/fxcrt/xml/cfx_xmldoc.h diff --git a/BUILD.gn b/BUILD.gn index 7599b77c6f..87de013c89 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -953,8 +953,6 @@ jumbo_static_library("fxcrt") { "core/fxcrt/xml/cfx_xmlattributenode.h", "core/fxcrt/xml/cfx_xmlchardata.cpp", "core/fxcrt/xml/cfx_xmlchardata.h", - "core/fxcrt/xml/cfx_xmldoc.cpp", - "core/fxcrt/xml/cfx_xmldoc.h", "core/fxcrt/xml/cfx_xmlelement.cpp", "core/fxcrt/xml/cfx_xmlelement.h", "core/fxcrt/xml/cfx_xmlinstruction.cpp", diff --git a/core/fxcrt/xml/cfx_xmldoc.cpp b/core/fxcrt/xml/cfx_xmldoc.cpp deleted file mode 100644 index c14255d432..0000000000 --- a/core/fxcrt/xml/cfx_xmldoc.cpp +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright 2017 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#include "core/fxcrt/xml/cfx_xmldoc.h" - -#include -#include - -#include "core/fxcrt/fx_codepage.h" -#include "core/fxcrt/xml/cfx_xmlchardata.h" -#include "core/fxcrt/xml/cfx_xmlelement.h" -#include "core/fxcrt/xml/cfx_xmlinstruction.h" -#include "core/fxcrt/xml/cfx_xmlnode.h" -#include "core/fxcrt/xml/cfx_xmltext.h" -#include "third_party/base/ptr_util.h" -#include "third_party/base/stl_util.h" - -CFX_XMLDoc::CFX_XMLDoc() : m_pRoot(pdfium::MakeUnique()) { - m_pRoot->AppendChild(new CFX_XMLInstruction(L"xml")); -} - -CFX_XMLDoc::~CFX_XMLDoc() {} - -bool CFX_XMLDoc::Load(const RetainPtr& pStream) { - ASSERT(pStream); - CFX_XMLParser parser(m_pRoot.get(), pStream); - return parser.Parse(); -} diff --git a/core/fxcrt/xml/cfx_xmldoc.h b/core/fxcrt/xml/cfx_xmldoc.h deleted file mode 100644 index 915acd325d..0000000000 --- a/core/fxcrt/xml/cfx_xmldoc.h +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright 2017 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#ifndef CORE_FXCRT_XML_CFX_XMLDOC_H_ -#define CORE_FXCRT_XML_CFX_XMLDOC_H_ - -#include -#include - -#include "core/fxcrt/cfx_seekablestreamproxy.h" -#include "core/fxcrt/retain_ptr.h" -#include "core/fxcrt/xml/cfx_xmlnode.h" -#include "core/fxcrt/xml/cfx_xmlparser.h" - -class CFX_XMLDoc { - public: - CFX_XMLDoc(); - ~CFX_XMLDoc(); - - bool Load(const RetainPtr& pStream); - - std::unique_ptr GetTree() { return std::move(m_pRoot); } - - private: - std::unique_ptr m_pRoot; -}; - -#endif // CORE_FXCRT_XML_CFX_XMLDOC_H_ diff --git a/testing/libfuzzer/pdf_xml_fuzzer.cc b/testing/libfuzzer/pdf_xml_fuzzer.cc index 2ad57cf8bc..a8af779259 100644 --- a/testing/libfuzzer/pdf_xml_fuzzer.cc +++ b/testing/libfuzzer/pdf_xml_fuzzer.cc @@ -9,7 +9,6 @@ #include "core/fxcrt/cfx_seekablestreamproxy.h" #include "core/fxcrt/fx_safe_types.h" #include "core/fxcrt/fx_system.h" -#include "core/fxcrt/xml/cfx_xmldoc.h" #include "core/fxcrt/xml/cfx_xmlnode.h" #include "core/fxcrt/xml/cfx_xmlparser.h" #include "third_party/base/ptr_util.h" @@ -22,11 +21,12 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { RetainPtr stream = pdfium::MakeRetain(const_cast(data), size); - CFX_XMLDoc doc; - if (!doc.Load(stream)) + + auto root = pdfium::MakeUnique(); + CFX_XMLParser parser(root.get(), stream); + if (!parser.Parse()) return 0; - auto root = doc.GetTree(); for (CFX_XMLNode* pXMLNode = root->GetFirstChild(); pXMLNode; pXMLNode = pXMLNode->GetNextSibling()) { if (pXMLNode->GetType() == FX_XMLNODE_Element) diff --git a/xfa/fxfa/parser/cxfa_dataexporter.cpp b/xfa/fxfa/parser/cxfa_dataexporter.cpp index 7773aeb190..1418d030b5 100644 --- a/xfa/fxfa/parser/cxfa_dataexporter.cpp +++ b/xfa/fxfa/parser/cxfa_dataexporter.cpp @@ -7,7 +7,6 @@ #include "xfa/fxfa/parser/cxfa_dataexporter.h" #include "core/fxcrt/fx_codepage.h" -#include "core/fxcrt/xml/cfx_xmldoc.h" #include "core/fxcrt/xml/cfx_xmlelement.h" #include "core/fxcrt/xml/cfx_xmlnode.h" #include "third_party/base/stl_util.h" diff --git a/xfa/fxfa/parser/cxfa_document_parser.cpp b/xfa/fxfa/parser/cxfa_document_parser.cpp index 707dfc11d5..c2cc1deb88 100644 --- a/xfa/fxfa/parser/cxfa_document_parser.cpp +++ b/xfa/fxfa/parser/cxfa_document_parser.cpp @@ -14,7 +14,6 @@ #include "core/fxcrt/fx_codepage.h" #include "core/fxcrt/fx_extension.h" #include "core/fxcrt/xml/cfx_xmlchardata.h" -#include "core/fxcrt/xml/cfx_xmldoc.h" #include "core/fxcrt/xml/cfx_xmlelement.h" #include "core/fxcrt/xml/cfx_xmlinstruction.h" #include "core/fxcrt/xml/cfx_xmlnode.h" @@ -345,11 +344,10 @@ bool CXFA_DocumentParser::Parse(const RetainPtr& pStream, pStreamProxy->SetCodePage(FX_CODEPAGE_UTF8); } - CFX_XMLDoc doc; - if (!doc.Load(pStreamProxy)) + m_pNodeTree = LoadXML(pStreamProxy); + if (!m_pNodeTree) return false; - m_pNodeTree = doc.GetTree(); m_pRootNode = ParseAsXDPPacket(GetDocumentNode(m_pNodeTree.get()), ePacketID); return !!m_pRootNode; } @@ -357,13 +355,21 @@ bool CXFA_DocumentParser::Parse(const RetainPtr& pStream, CFX_XMLNode* CXFA_DocumentParser::ParseXMLData(const ByteString& wsXML) { auto pStream = pdfium::MakeRetain( const_cast(wsXML.raw_str()), wsXML.GetLength()); - CFX_XMLDoc doc; - if (doc.Load(pStream)) - m_pNodeTree = doc.GetTree(); - + m_pNodeTree = LoadXML(pStream); return m_pNodeTree ? GetDocumentNode(m_pNodeTree.get()) : nullptr; } +std::unique_ptr CXFA_DocumentParser::LoadXML( + const RetainPtr& pStream) { + ASSERT(pStream); + + auto root = pdfium::MakeUnique(); + root->AppendChild(new CFX_XMLInstruction(L"xml")); + + CFX_XMLParser parser(root.get(), pStream); + return parser.Parse() ? std::move(root) : nullptr; +} + void CXFA_DocumentParser::ConstructXFANode(CXFA_Node* pXFANode, CFX_XMLNode* pXMLNode) { XFA_PacketType ePacketID = pXFANode->GetPacketType(); diff --git a/xfa/fxfa/parser/cxfa_document_parser.h b/xfa/fxfa/parser/cxfa_document_parser.h index e41bf7bf07..8899494c79 100644 --- a/xfa/fxfa/parser/cxfa_document_parser.h +++ b/xfa/fxfa/parser/cxfa_document_parser.h @@ -39,6 +39,9 @@ class CXFA_DocumentParser { void SetFactory(CXFA_Document* pFactory); private: + std::unique_ptr LoadXML( + const RetainPtr& pStream); + CXFA_Node* ParseAsXDPPacket(CFX_XMLNode* pXMLDocumentNode, XFA_PacketType ePacketID); CXFA_Node* ParseAsXDPPacket_XDP(CFX_XMLNode* pXMLDocumentNode); -- cgit v1.2.3