summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDan Sinclair <dsinclair@chromium.org>2018-04-12 13:15:59 +0000
committerChromium commit bot <commit-bot@chromium.org>2018-04-12 13:15:59 +0000
commita995d6fd9b862dbd37aebb9c323766bb5d11d389 (patch)
treedb204479d0c854d1e30840794972d5115cbe80f4
parent332139df2c3c0826069fa61bcd436309fcdf5a6f (diff)
downloadpdfium-a995d6fd9b862dbd37aebb9c323766bb5d11d389.tar.xz
Allow retrieving the XML tree from the CFX_XMLDoc
This CL allows the CXFA_SimpleParser to retrieve the XML tree from the CFX_XMLDoc. This way, we don't have to keep the doc around and can store the pointer to the tree in the CXFA_SimpleParser. Change-Id: I5b478acbe61e6f1ca5fa04d03133a2b327a0cb1c Reviewed-on: https://pdfium-review.googlesource.com/30210 Reviewed-by: Henrique Nakashima <hnakashima@chromium.org> Commit-Queue: dsinclair <dsinclair@chromium.org>
-rw-r--r--core/fxcrt/xml/cfx_xmldoc.cpp16
-rw-r--r--core/fxcrt/xml/cfx_xmldoc.h10
-rw-r--r--core/fxcrt/xml/cfx_xmlparser.cpp28
-rw-r--r--core/fxcrt/xml/cfx_xmlparser.h2
-rw-r--r--testing/libfuzzer/pdf_xml_fuzzer.cc8
-rw-r--r--xfa/fxfa/parser/cxfa_document_parser.cpp15
-rw-r--r--xfa/fxfa/parser/cxfa_simple_parser.cpp60
-rw-r--r--xfa/fxfa/parser/cxfa_simple_parser.h9
8 files changed, 52 insertions, 96 deletions
diff --git a/core/fxcrt/xml/cfx_xmldoc.cpp b/core/fxcrt/xml/cfx_xmldoc.cpp
index 57be180f9a..c14255d432 100644
--- a/core/fxcrt/xml/cfx_xmldoc.cpp
+++ b/core/fxcrt/xml/cfx_xmldoc.cpp
@@ -18,20 +18,14 @@
#include "third_party/base/ptr_util.h"
#include "third_party/base/stl_util.h"
-CFX_XMLDoc::CFX_XMLDoc(const RetainPtr<CFX_SeekableStreamProxy>& pStream)
- : m_pRoot(pdfium::MakeUnique<CFX_XMLNode>()),
- m_pXMLParser(pdfium::MakeUnique<CFX_XMLParser>(m_pRoot.get(), pStream)) {
- ASSERT(pStream);
-
+CFX_XMLDoc::CFX_XMLDoc() : m_pRoot(pdfium::MakeUnique<CFX_XMLNode>()) {
m_pRoot->AppendChild(new CFX_XMLInstruction(L"xml"));
}
CFX_XMLDoc::~CFX_XMLDoc() {}
-int32_t CFX_XMLDoc::Load() {
- return m_pXMLParser->Parse();
-}
-
-void CFX_XMLDoc::CloseXML() {
- m_pXMLParser.reset();
+bool CFX_XMLDoc::Load(const RetainPtr<CFX_SeekableStreamProxy>& pStream) {
+ ASSERT(pStream);
+ CFX_XMLParser parser(m_pRoot.get(), pStream);
+ return parser.Parse();
}
diff --git a/core/fxcrt/xml/cfx_xmldoc.h b/core/fxcrt/xml/cfx_xmldoc.h
index 55976b01b3..915acd325d 100644
--- a/core/fxcrt/xml/cfx_xmldoc.h
+++ b/core/fxcrt/xml/cfx_xmldoc.h
@@ -8,6 +8,7 @@
#define CORE_FXCRT_XML_CFX_XMLDOC_H_
#include <memory>
+#include <utility>
#include "core/fxcrt/cfx_seekablestreamproxy.h"
#include "core/fxcrt/retain_ptr.h"
@@ -16,18 +17,15 @@
class CFX_XMLDoc {
public:
- explicit CFX_XMLDoc(const RetainPtr<CFX_SeekableStreamProxy>& pStream);
+ CFX_XMLDoc();
~CFX_XMLDoc();
- int32_t Load();
- void CloseXML();
+ bool Load(const RetainPtr<CFX_SeekableStreamProxy>& pStream);
- CFX_XMLNode* GetRoot() const { return m_pRoot.get(); }
- CFX_XMLParser* GetParser() const { return m_pXMLParser.get(); }
+ std::unique_ptr<CFX_XMLNode> GetTree() { return std::move(m_pRoot); }
private:
std::unique_ptr<CFX_XMLNode> m_pRoot;
- std::unique_ptr<CFX_XMLParser> m_pXMLParser;
};
#endif // CORE_FXCRT_XML_CFX_XMLDOC_H_
diff --git a/core/fxcrt/xml/cfx_xmlparser.cpp b/core/fxcrt/xml/cfx_xmlparser.cpp
index 39196d0264..d0acbc7818 100644
--- a/core/fxcrt/xml/cfx_xmlparser.cpp
+++ b/core/fxcrt/xml/cfx_xmlparser.cpp
@@ -132,36 +132,36 @@ CFX_XMLParser::CFX_XMLParser(CFX_XMLNode* pParent,
CFX_XMLParser::~CFX_XMLParser() {}
-int32_t CFX_XMLParser::Parse() {
+bool CFX_XMLParser::Parse() {
int32_t iCount = 0;
while (true) {
FX_XmlSyntaxResult result = DoSyntaxParse();
if (result == FX_XmlSyntaxResult::Error)
- return -1;
+ return false;
if (result == FX_XmlSyntaxResult::EndOfString)
break;
switch (result) {
case FX_XmlSyntaxResult::InstructionClose:
if (m_pChild && m_pChild->GetType() != FX_XMLNODE_Instruction)
- return -1;
+ return false;
m_pChild = m_pParent;
break;
case FX_XmlSyntaxResult::ElementClose:
if (m_pChild->GetType() != FX_XMLNODE_Element)
- return -1;
+ return false;
m_ws1 = GetTagName();
if (m_ws1.GetLength() > 0 &&
m_ws1 != static_cast<CFX_XMLElement*>(m_pChild)->GetName()) {
- return -1;
+ return false;
}
if (!m_NodeStack.empty())
m_NodeStack.pop();
if (m_NodeStack.empty())
- return -1;
+ return false;
m_pParent = m_NodeStack.top();
m_pChild = m_pParent;
@@ -209,7 +209,7 @@ int32_t CFX_XMLParser::Parse() {
case FX_XmlSyntaxResult::TargetData:
if (m_pChild) {
if (m_pChild->GetType() != FX_XMLNODE_Instruction)
- return -1;
+ return false;
auto* instruction = static_cast<CFX_XMLInstruction*>(m_pChild);
if (!m_ws1.IsEmpty())
@@ -226,7 +226,7 @@ int32_t CFX_XMLParser::Parse() {
break;
}
}
- return m_NodeStack.size() != 1 ? -1 : GetStatus();
+ return m_NodeStack.size() != 1 ? false : GetStatus();
}
FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() {
@@ -698,18 +698,12 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() {
int32_t CFX_XMLParser::GetStatus() const {
if (!m_pStream)
- return -1;
+ return false;
int32_t iStreamLength = m_pStream->GetLength();
if (iStreamLength < 1)
- return 100;
-
- if (m_syntaxParserResult == FX_XmlSyntaxResult::Error)
- return -1;
-
- if (m_syntaxParserResult == FX_XmlSyntaxResult::EndOfString)
- return 100;
- return m_iParsedBytes * 100 / iStreamLength;
+ return true;
+ return m_syntaxParserResult != FX_XmlSyntaxResult::Error;
}
FX_FILESIZE CFX_XMLParser::GetCurrentBinaryPos() const {
diff --git a/core/fxcrt/xml/cfx_xmlparser.h b/core/fxcrt/xml/cfx_xmlparser.h
index c7587e9151..f4e3c03ee7 100644
--- a/core/fxcrt/xml/cfx_xmlparser.h
+++ b/core/fxcrt/xml/cfx_xmlparser.h
@@ -47,7 +47,7 @@ class CFX_XMLParser {
const RetainPtr<CFX_SeekableStreamProxy>& pStream);
virtual ~CFX_XMLParser();
- int32_t Parse();
+ bool Parse();
protected:
FX_XmlSyntaxResult DoSyntaxParse();
diff --git a/testing/libfuzzer/pdf_xml_fuzzer.cc b/testing/libfuzzer/pdf_xml_fuzzer.cc
index 82627e1496..2ad57cf8bc 100644
--- a/testing/libfuzzer/pdf_xml_fuzzer.cc
+++ b/testing/libfuzzer/pdf_xml_fuzzer.cc
@@ -22,12 +22,12 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
RetainPtr<CFX_SeekableStreamProxy> stream =
pdfium::MakeRetain<CFX_SeekableStreamProxy>(const_cast<uint8_t*>(data),
size);
- auto doc = pdfium::MakeUnique<CFX_XMLDoc>(stream);
- if (doc->Load() < 100)
+ CFX_XMLDoc doc;
+ if (!doc.Load(stream))
return 0;
- CFX_XMLNode* pXMLFakeRoot = doc->GetRoot();
- for (CFX_XMLNode* pXMLNode = pXMLFakeRoot->GetFirstChild(); pXMLNode;
+ auto root = doc.GetTree();
+ for (CFX_XMLNode* pXMLNode = root->GetFirstChild(); pXMLNode;
pXMLNode = pXMLNode->GetNextSibling()) {
if (pXMLNode->GetType() == FX_XMLNODE_Element)
break;
diff --git a/xfa/fxfa/parser/cxfa_document_parser.cpp b/xfa/fxfa/parser/cxfa_document_parser.cpp
index 18f9955210..883f684768 100644
--- a/xfa/fxfa/parser/cxfa_document_parser.cpp
+++ b/xfa/fxfa/parser/cxfa_document_parser.cpp
@@ -20,18 +20,15 @@ CXFA_DocumentParser::~CXFA_DocumentParser() {
int32_t CXFA_DocumentParser::Parse(const RetainPtr<IFX_SeekableStream>& pStream,
XFA_PacketType ePacketID) {
- m_pDocument.reset();
- m_nodeParser.CloseParser();
-
m_pDocument = pdfium::MakeUnique<CXFA_Document>(GetNotify());
m_nodeParser.SetFactory(m_pDocument.get());
- int32_t nRetStatus = m_nodeParser.Parse(pStream, ePacketID);
- if (nRetStatus == XFA_PARSESTATUS_Done) {
- ASSERT(m_pDocument);
- m_pDocument->SetRoot(m_nodeParser.GetRootNode());
- }
- return nRetStatus;
+ if (!m_nodeParser.Parse(pStream, ePacketID))
+ return XFA_PARSESTATUS_StatusErr;
+
+ ASSERT(m_pDocument);
+ m_pDocument->SetRoot(m_nodeParser.GetRootNode());
+ return XFA_PARSESTATUS_Done;
}
CXFA_FFNotify* CXFA_DocumentParser::GetNotify() const {
diff --git a/xfa/fxfa/parser/cxfa_simple_parser.cpp b/xfa/fxfa/parser/cxfa_simple_parser.cpp
index 225271fae8..232f1fbec8 100644
--- a/xfa/fxfa/parser/cxfa_simple_parser.cpp
+++ b/xfa/fxfa/parser/cxfa_simple_parser.cpp
@@ -100,11 +100,8 @@ const PacketInfo* GetPacketByName(const WideStringView& wsName) {
return nullptr;
}
-CFX_XMLNode* GetDocumentNode(CFX_XMLDoc* pXMLDoc) {
- if (!pXMLDoc)
- return nullptr;
-
- for (CFX_XMLNode* pXMLNode = pXMLDoc->GetRoot()->GetFirstChild(); pXMLNode;
+CFX_XMLNode* GetDocumentNode(CFX_XMLNode* pRootNode) {
+ for (CFX_XMLNode* pXMLNode = pRootNode->GetFirstChild(); pXMLNode;
pXMLNode = pXMLNode->GetNextSibling()) {
if (pXMLNode->GetType() != FX_XMLNODE_Element)
continue;
@@ -338,49 +335,33 @@ void CXFA_SimpleParser::SetFactory(CXFA_Document* pFactory) {
m_pFactory = pFactory;
}
-int32_t CXFA_SimpleParser::Parse(const RetainPtr<IFX_SeekableStream>& pStream,
- XFA_PacketType ePacketID) {
- CloseParser();
-
- m_pFileRead = pStream;
- m_pStream = pdfium::MakeRetain<CFX_SeekableStreamProxy>(pStream, false);
- uint16_t wCodePage = m_pStream->GetCodePage();
+bool CXFA_SimpleParser::Parse(const RetainPtr<IFX_SeekableStream>& pStream,
+ XFA_PacketType ePacketID) {
+ auto pStreamProxy =
+ pdfium::MakeRetain<CFX_SeekableStreamProxy>(pStream, false);
+ uint16_t wCodePage = pStreamProxy->GetCodePage();
if (wCodePage != FX_CODEPAGE_UTF16LE && wCodePage != FX_CODEPAGE_UTF16BE &&
wCodePage != FX_CODEPAGE_UTF8) {
- m_pStream->SetCodePage(FX_CODEPAGE_UTF8);
+ pStreamProxy->SetCodePage(FX_CODEPAGE_UTF8);
}
- m_pXMLDoc = pdfium::MakeUnique<CFX_XMLDoc>(m_pStream);
- if (!m_pXMLDoc)
- return XFA_PARSESTATUS_StatusErr;
-
- int32_t iRet = m_pXMLDoc->Load();
- if (iRet < 0)
- return XFA_PARSESTATUS_SyntaxErr;
- if (iRet < 100)
- return iRet / 2;
-
- m_pRootNode = ParseAsXDPPacket(GetDocumentNode(m_pXMLDoc.get()), ePacketID);
- m_pXMLDoc->CloseXML();
- m_pStream.Reset();
-
- if (!m_pRootNode)
- return XFA_PARSESTATUS_StatusErr;
+ CFX_XMLDoc doc;
+ if (!doc.Load(pStreamProxy))
+ return false;
- return XFA_PARSESTATUS_Done;
+ m_pNodeTree = doc.GetTree();
+ m_pRootNode = ParseAsXDPPacket(GetDocumentNode(m_pNodeTree.get()), ePacketID);
+ return !!m_pRootNode;
}
CFX_XMLNode* CXFA_SimpleParser::ParseXMLData(const ByteString& wsXML) {
- CloseParser();
-
auto pStream = pdfium::MakeRetain<CFX_SeekableStreamProxy>(
const_cast<uint8_t*>(wsXML.raw_str()), wsXML.GetLength());
- m_pXMLDoc = pdfium::MakeUnique<CFX_XMLDoc>(pStream);
+ CFX_XMLDoc doc;
+ if (doc.Load(pStream))
+ m_pNodeTree = doc.GetTree();
- int32_t iRet = m_pXMLDoc->Load();
- if (iRet < 0 || iRet >= 100)
- m_pXMLDoc->CloseXML();
- return iRet < 100 ? nullptr : GetDocumentNode(m_pXMLDoc.get());
+ return m_pNodeTree ? GetDocumentNode(m_pNodeTree.get()) : nullptr;
}
void CXFA_SimpleParser::ConstructXFANode(CXFA_Node* pXFANode,
@@ -1180,8 +1161,3 @@ void CXFA_SimpleParser::ParseInstruction(CXFA_Node* pXFANode,
}
}
}
-
-void CXFA_SimpleParser::CloseParser() {
- m_pXMLDoc.reset();
- m_pStream.Reset();
-}
diff --git a/xfa/fxfa/parser/cxfa_simple_parser.h b/xfa/fxfa/parser/cxfa_simple_parser.h
index cef9c9af54..61edf8e8e6 100644
--- a/xfa/fxfa/parser/cxfa_simple_parser.h
+++ b/xfa/fxfa/parser/cxfa_simple_parser.h
@@ -25,13 +25,12 @@ class CXFA_SimpleParser {
explicit CXFA_SimpleParser(CXFA_Document* pFactory);
~CXFA_SimpleParser();
- int32_t Parse(const RetainPtr<IFX_SeekableStream>& pStream,
- XFA_PacketType ePacketID);
+ bool Parse(const RetainPtr<IFX_SeekableStream>& pStream,
+ XFA_PacketType ePacketID);
CFX_XMLNode* ParseXMLData(const ByteString& wsXML);
void ConstructXFANode(CXFA_Node* pXFANode, CFX_XMLNode* pXMLNode);
CXFA_Node* GetRootNode() const;
- void CloseParser();
// Called later for the ctor with no parameters.
void SetFactory(CXFA_Document* pFactory);
@@ -71,10 +70,8 @@ class CXFA_SimpleParser {
CFX_XMLInstruction* pXMLInstruction,
XFA_PacketType ePacketID);
- std::unique_ptr<CFX_XMLDoc> m_pXMLDoc;
- RetainPtr<CFX_SeekableStreamProxy> m_pStream;
- RetainPtr<IFX_SeekableStream> m_pFileRead;
UnownedPtr<CXFA_Document> m_pFactory;
+ std::unique_ptr<CFX_XMLNode> m_pNodeTree;
// TODO(dsinclair): Figure out who owns this.
CXFA_Node* m_pRootNode = nullptr;
const bool m_bDocumentParser;