From 6d503b875e6f75f0d8b5f29fcf811a89f12ad12d Mon Sep 17 00:00:00 2001 From: Dan Sinclair Date: Thu, 12 Apr 2018 13:14:49 +0000 Subject: Cleanup some CFX_XMLParser code This CL cleans up minor nits in the CFX_XMLParser code. Change-Id: Ie19d12d3dcce16c9ce6088160ecdec3d9855c11f Reviewed-on: https://pdfium-review.googlesource.com/30170 Reviewed-by: Ryan Harrison Commit-Queue: dsinclair --- core/fxcrt/xml/cfx_xmldoc.cpp | 10 ++-- core/fxcrt/xml/cfx_xmldoc.h | 3 +- core/fxcrt/xml/cfx_xmlparser.cpp | 87 ++++++++++++++-------------------- core/fxcrt/xml/cfx_xmlparser.h | 5 +- testing/libfuzzer/pdf_xml_fuzzer.cc | 2 +- xfa/fxfa/parser/cxfa_simple_parser.cpp | 5 +- 6 files changed, 44 insertions(+), 68 deletions(-) diff --git a/core/fxcrt/xml/cfx_xmldoc.cpp b/core/fxcrt/xml/cfx_xmldoc.cpp index 64ed5a98d0..57be180f9a 100644 --- a/core/fxcrt/xml/cfx_xmldoc.cpp +++ b/core/fxcrt/xml/cfx_xmldoc.cpp @@ -19,8 +19,7 @@ #include "third_party/base/stl_util.h" CFX_XMLDoc::CFX_XMLDoc(const RetainPtr& pStream) - : m_iStatus(0), - m_pRoot(pdfium::MakeUnique()), + : m_pRoot(pdfium::MakeUnique()), m_pXMLParser(pdfium::MakeUnique(m_pRoot.get(), pStream)) { ASSERT(pStream); @@ -29,11 +28,8 @@ CFX_XMLDoc::CFX_XMLDoc(const RetainPtr& pStream) CFX_XMLDoc::~CFX_XMLDoc() {} -int32_t CFX_XMLDoc::DoLoad() { - if (m_iStatus < 100) - m_iStatus = m_pXMLParser->DoParser(); - - return m_iStatus; +int32_t CFX_XMLDoc::Load() { + return m_pXMLParser->Parse(); } void CFX_XMLDoc::CloseXML() { diff --git a/core/fxcrt/xml/cfx_xmldoc.h b/core/fxcrt/xml/cfx_xmldoc.h index 07be2d4e84..55976b01b3 100644 --- a/core/fxcrt/xml/cfx_xmldoc.h +++ b/core/fxcrt/xml/cfx_xmldoc.h @@ -19,14 +19,13 @@ class CFX_XMLDoc { explicit CFX_XMLDoc(const RetainPtr& pStream); ~CFX_XMLDoc(); - int32_t DoLoad(); + int32_t Load(); void CloseXML(); CFX_XMLNode* GetRoot() const { return m_pRoot.get(); } CFX_XMLParser* GetParser() const { return m_pXMLParser.get(); } private: - int32_t m_iStatus; std::unique_ptr m_pRoot; std::unique_ptr m_pXMLParser; }; diff --git a/core/fxcrt/xml/cfx_xmlparser.cpp b/core/fxcrt/xml/cfx_xmlparser.cpp index 05e52015bd..76ea32df6c 100644 --- a/core/fxcrt/xml/cfx_xmlparser.cpp +++ b/core/fxcrt/xml/cfx_xmlparser.cpp @@ -15,58 +15,46 @@ CFX_XMLParser::CFX_XMLParser(CFX_XMLNode* pParent, const RetainPtr& pStream) - : m_pStream(pStream), - m_pParser(pdfium::MakeUnique(m_pStream)), + : m_pParser(pdfium::MakeUnique(pStream)), m_pParent(pParent), - m_pChild(nullptr), - m_syntaxParserResult(FX_XmlSyntaxResult::None) { - ASSERT(m_pParent && m_pStream); + m_pChild(nullptr) { + ASSERT(m_pParent && pStream); m_NodeStack.push(m_pParent); } CFX_XMLParser::~CFX_XMLParser() {} -int32_t CFX_XMLParser::DoParser() { - if (m_syntaxParserResult == FX_XmlSyntaxResult::Error) - return -1; - if (m_syntaxParserResult == FX_XmlSyntaxResult::EndOfString) - return 100; - +int32_t CFX_XMLParser::Parse() { int32_t iCount = 0; while (true) { - m_syntaxParserResult = m_pParser->DoSyntaxParse(); - switch (m_syntaxParserResult) { - case FX_XmlSyntaxResult::InstructionOpen: - break; + FX_XmlSyntaxResult result = m_pParser->DoSyntaxParse(); + if (result == FX_XmlSyntaxResult::Error) + return -1; + if (result == FX_XmlSyntaxResult::EndOfString) + break; + + switch (result) { case FX_XmlSyntaxResult::InstructionClose: - if (m_pChild) { - if (m_pChild->GetType() != FX_XMLNODE_Instruction) { - m_syntaxParserResult = FX_XmlSyntaxResult::Error; - break; - } - } + if (m_pChild && m_pChild->GetType() != FX_XMLNODE_Instruction) + return -1; + m_pChild = m_pParent; break; - case FX_XmlSyntaxResult::ElementOpen: - case FX_XmlSyntaxResult::ElementBreak: - break; case FX_XmlSyntaxResult::ElementClose: - if (m_pChild->GetType() != FX_XMLNODE_Element) { - m_syntaxParserResult = FX_XmlSyntaxResult::Error; - break; - } + if (m_pChild->GetType() != FX_XMLNODE_Element) + return -1; + m_ws1 = m_pParser->GetTagName(); - m_ws2 = static_cast(m_pChild)->GetName(); - if (m_ws1.GetLength() > 0 && m_ws1 != m_ws2) { - m_syntaxParserResult = FX_XmlSyntaxResult::Error; - break; + if (m_ws1.GetLength() > 0 && + m_ws1 != static_cast(m_pChild)->GetName()) { + return -1; } + if (!m_NodeStack.empty()) m_NodeStack.pop(); - if (m_NodeStack.empty()) { - m_syntaxParserResult = FX_XmlSyntaxResult::Error; - break; - } + if (m_NodeStack.empty()) + return -1; + m_pParent = m_NodeStack.top(); m_pChild = m_pParent; iCount++; @@ -92,10 +80,9 @@ int32_t CFX_XMLParser::DoParser() { m_ws1 = m_pParser->GetAttributeName(); break; case FX_XmlSyntaxResult::AttriValue: - if (m_pChild) { - m_ws2 = m_pParser->GetAttributeName(); - if (m_pChild->GetType() == FX_XMLNODE_Element) - static_cast(m_pChild)->SetString(m_ws1, m_ws2); + if (m_pChild && m_pChild->GetType() == FX_XMLNODE_Element) { + static_cast(m_pChild)->SetString( + m_ws1, m_pParser->GetAttributeName()); } m_ws1.clear(); break; @@ -113,27 +100,23 @@ int32_t CFX_XMLParser::DoParser() { break; case FX_XmlSyntaxResult::TargetData: if (m_pChild) { - if (m_pChild->GetType() != FX_XMLNODE_Instruction) { - m_syntaxParserResult = FX_XmlSyntaxResult::Error; - break; - } + if (m_pChild->GetType() != FX_XMLNODE_Instruction) + return -1; + auto* instruction = static_cast(m_pChild); if (!m_ws1.IsEmpty()) instruction->AppendData(m_ws1); + instruction->AppendData(m_pParser->GetTargetData()); } m_ws1.clear(); break; + case FX_XmlSyntaxResult::ElementOpen: + case FX_XmlSyntaxResult::ElementBreak: + case FX_XmlSyntaxResult::InstructionOpen: default: break; } - if (m_syntaxParserResult == FX_XmlSyntaxResult::Error || - m_syntaxParserResult == FX_XmlSyntaxResult::EndOfString) { - break; - } } - return (m_syntaxParserResult == FX_XmlSyntaxResult::Error || - m_NodeStack.size() != 1) - ? -1 - : m_pParser->GetStatus(); + return m_NodeStack.size() != 1 ? -1 : m_pParser->GetStatus(); } diff --git a/core/fxcrt/xml/cfx_xmlparser.h b/core/fxcrt/xml/cfx_xmlparser.h index 2998a44ad5..8c4c354699 100644 --- a/core/fxcrt/xml/cfx_xmlparser.h +++ b/core/fxcrt/xml/cfx_xmlparser.h @@ -24,17 +24,14 @@ class CFX_XMLParser { const RetainPtr& pStream); ~CFX_XMLParser(); - int32_t DoParser(); + int32_t Parse(); private: - RetainPtr m_pStream; std::unique_ptr m_pParser; CFX_XMLNode* m_pParent; CFX_XMLNode* m_pChild; std::stack m_NodeStack; WideString m_ws1; - WideString m_ws2; - FX_XmlSyntaxResult m_syntaxParserResult; }; #endif // CORE_FXCRT_XML_CFX_XMLPARSER_H_ diff --git a/testing/libfuzzer/pdf_xml_fuzzer.cc b/testing/libfuzzer/pdf_xml_fuzzer.cc index 072f86c702..82627e1496 100644 --- a/testing/libfuzzer/pdf_xml_fuzzer.cc +++ b/testing/libfuzzer/pdf_xml_fuzzer.cc @@ -23,7 +23,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { pdfium::MakeRetain(const_cast(data), size); auto doc = pdfium::MakeUnique(stream); - if (doc->DoLoad() < 100) + if (doc->Load() < 100) return 0; CFX_XMLNode* pXMLFakeRoot = doc->GetRoot(); diff --git a/xfa/fxfa/parser/cxfa_simple_parser.cpp b/xfa/fxfa/parser/cxfa_simple_parser.cpp index b916b4dfc4..225271fae8 100644 --- a/xfa/fxfa/parser/cxfa_simple_parser.cpp +++ b/xfa/fxfa/parser/cxfa_simple_parser.cpp @@ -341,6 +341,7 @@ void CXFA_SimpleParser::SetFactory(CXFA_Document* pFactory) { int32_t CXFA_SimpleParser::Parse(const RetainPtr& pStream, XFA_PacketType ePacketID) { CloseParser(); + m_pFileRead = pStream; m_pStream = pdfium::MakeRetain(pStream, false); uint16_t wCodePage = m_pStream->GetCodePage(); @@ -353,7 +354,7 @@ int32_t CXFA_SimpleParser::Parse(const RetainPtr& pStream, if (!m_pXMLDoc) return XFA_PARSESTATUS_StatusErr; - int32_t iRet = m_pXMLDoc->DoLoad(); + int32_t iRet = m_pXMLDoc->Load(); if (iRet < 0) return XFA_PARSESTATUS_SyntaxErr; if (iRet < 100) @@ -376,7 +377,7 @@ CFX_XMLNode* CXFA_SimpleParser::ParseXMLData(const ByteString& wsXML) { const_cast(wsXML.raw_str()), wsXML.GetLength()); m_pXMLDoc = pdfium::MakeUnique(pStream); - int32_t iRet = m_pXMLDoc->DoLoad(); + int32_t iRet = m_pXMLDoc->Load(); if (iRet < 0 || iRet >= 100) m_pXMLDoc->CloseXML(); return iRet < 100 ? nullptr : GetDocumentNode(m_pXMLDoc.get()); -- cgit v1.2.3