diff options
27 files changed, 351 insertions, 309 deletions
@@ -1324,6 +1324,8 @@ if (pdf_enable_xfa) { "xfa/fxfa/parser/cxfa_corner.h", "xfa/fxfa/parser/cxfa_data.cpp", "xfa/fxfa/parser/cxfa_data.h", + "xfa/fxfa/parser/cxfa_document_parser.cpp", + "xfa/fxfa/parser/cxfa_document_parser.h", "xfa/fxfa/parser/cxfa_edge.h", "xfa/fxfa/parser/cxfa_event.cpp", "xfa/fxfa/parser/cxfa_event.h", @@ -1346,6 +1348,8 @@ if (pdf_enable_xfa) { "xfa/fxfa/parser/cxfa_rectangle.h", "xfa/fxfa/parser/cxfa_script.cpp", "xfa/fxfa/parser/cxfa_script.h", + "xfa/fxfa/parser/cxfa_simple_parser.cpp", + "xfa/fxfa/parser/cxfa_simple_parser.h", "xfa/fxfa/parser/cxfa_stroke.cpp", "xfa/fxfa/parser/cxfa_stroke.h", "xfa/fxfa/parser/cxfa_submit.cpp", @@ -1362,6 +1366,8 @@ if (pdf_enable_xfa) { "xfa/fxfa/parser/cxfa_valuearray.h", "xfa/fxfa/parser/cxfa_widgetdata.cpp", "xfa/fxfa/parser/cxfa_widgetdata.h", + "xfa/fxfa/parser/cxfa_xml_parser.cpp", + "xfa/fxfa/parser/cxfa_xml_parser.h", "xfa/fxfa/parser/xfa_basic_data.cpp", "xfa/fxfa/parser/xfa_basic_data.h", "xfa/fxfa/parser/xfa_basic_data_attributes.cpp", @@ -1395,8 +1401,6 @@ if (pdf_enable_xfa) { "xfa/fxfa/parser/xfa_localevalue.h", "xfa/fxfa/parser/xfa_object.h", "xfa/fxfa/parser/xfa_object_imp.cpp", - "xfa/fxfa/parser/xfa_parser_imp.cpp", - "xfa/fxfa/parser/xfa_parser_imp.h", "xfa/fxfa/parser/xfa_script.h", "xfa/fxfa/parser/xfa_script_datawindow.cpp", "xfa/fxfa/parser/xfa_script_datawindow.h", @@ -1535,7 +1539,7 @@ test("pdfium_embeddertests") { "testing/embedder_test.h", "testing/embedder_test_mock_delegate.h", "testing/embedder_test_timer_handling_delegate.h", - "xfa/fxfa/parser/xfa_parser_imp_embeddertest.cpp", + "xfa/fxfa/parser/cxfa_simple_parser_embeddertest.cpp", ] deps = [ ":pdfium", diff --git a/pdfium.gyp b/pdfium.gyp index e54223cc2f..3824c756a9 100644 --- a/pdfium.gyp +++ b/pdfium.gyp @@ -961,7 +961,7 @@ 'conditions': [ ['pdf_enable_xfa==1', { 'sources': [ - 'xfa/fxfa/parser/xfa_parser_imp_embeddertest.cpp', + 'xfa/fxfa/parser/cxfa_simple_parser_embeddertest.cpp', ], }], ['pdf_enable_v8==1', { diff --git a/testing/libfuzzer/pdf_xml_fuzzer.cc b/testing/libfuzzer/pdf_xml_fuzzer.cc index c93a5c293a..0b80b1a04c 100644 --- a/testing/libfuzzer/pdf_xml_fuzzer.cc +++ b/testing/libfuzzer/pdf_xml_fuzzer.cc @@ -9,7 +9,8 @@ #include "core/fxcrt/include/fx_basic.h" #include "core/fxcrt/include/fx_system.h" -#include "xfa/fxfa/parser/xfa_parser_imp.h" +#include "xfa/fde/xml/fde_xml_imp.h" +#include "xfa/fxfa/parser/cxfa_xml_parser.h" #include "xfa/fxfa/parser/xfa_utils.h" namespace { @@ -474,6 +474,8 @@ "xfa/fxfa/parser/cxfa_corner.h", "xfa/fxfa/parser/cxfa_data.cpp", "xfa/fxfa/parser/cxfa_data.h", + "xfa/fxfa/parser/cxfa_document_parser.cpp", + "xfa/fxfa/parser/cxfa_document_parser.h", "xfa/fxfa/parser/cxfa_edge.h", "xfa/fxfa/parser/cxfa_event.cpp", "xfa/fxfa/parser/cxfa_event.h", @@ -496,6 +498,8 @@ "xfa/fxfa/parser/cxfa_rectangle.h", "xfa/fxfa/parser/cxfa_script.cpp", "xfa/fxfa/parser/cxfa_script.h", + "xfa/fxfa/parser/cxfa_simple_parser.cpp", + "xfa/fxfa/parser/cxfa_simple_parser.h", "xfa/fxfa/parser/cxfa_stroke.cpp", "xfa/fxfa/parser/cxfa_stroke.h", "xfa/fxfa/parser/cxfa_submit.cpp", @@ -512,6 +516,8 @@ "xfa/fxfa/parser/cxfa_valuearray.h", "xfa/fxfa/parser/cxfa_widgetdata.cpp", "xfa/fxfa/parser/cxfa_widgetdata.h", + "xfa/fxfa/parser/cxfa_xml_parser.cpp", + "xfa/fxfa/parser/cxfa_xml_parser.h", "xfa/fxfa/parser/xfa_basic_data.cpp", "xfa/fxfa/parser/xfa_basic_data_attributes.cpp", "xfa/fxfa/parser/xfa_basic_data_element_attributes.cpp", @@ -545,8 +551,6 @@ "xfa/fxfa/parser/xfa_localevalue.h", "xfa/fxfa/parser/xfa_object.h", "xfa/fxfa/parser/xfa_object_imp.cpp", - "xfa/fxfa/parser/xfa_parser_imp.cpp", - "xfa/fxfa/parser/xfa_parser_imp.h", "xfa/fxfa/parser/xfa_script.h", "xfa/fxfa/parser/xfa_script_datawindow.cpp", "xfa/fxfa/parser/xfa_script_datawindow.h", diff --git a/xfa/fxfa/app/xfa_ffdoc.cpp b/xfa/fxfa/app/xfa_ffdoc.cpp index 6320cb9b2a..294839152e 100644 --- a/xfa/fxfa/app/xfa_ffdoc.cpp +++ b/xfa/fxfa/app/xfa_ffdoc.cpp @@ -23,8 +23,6 @@ #include "xfa/fxfa/include/xfa_fontmgr.h" #include "xfa/fxfa/parser/xfa_document.h" #include "xfa/fxfa/parser/xfa_document_serialize.h" -#include "xfa/fxfa/parser/xfa_parser_imp.h" -#include "xfa/fxfa/parser/xfa_parser_imp.h" namespace { @@ -168,7 +166,7 @@ uint32_t CXFA_FFDoc::GetDocType() { int32_t CXFA_FFDoc::StartLoad() { m_pNotify.reset(new CXFA_FFNotify(this)); m_pDocumentParser.reset(new CXFA_DocumentParser(m_pNotify.get())); - int32_t iStatus = m_pDocumentParser->StartParse(m_pStream); + int32_t iStatus = m_pDocumentParser->StartParse(m_pStream, XFA_XDPPACKET_XDP); return iStatus; } @@ -275,7 +273,8 @@ int32_t CXFA_FFDoc::DoLoad(IFX_Pause* pPause) { return XFA_PARSESTATUS_SyntaxErr; CXFA_Node* pRootNode = nullptr; - if (pParser->StartParse(m_pStream) == XFA_PARSESTATUS_Ready && + if (pParser->StartParse(m_pStream, XFA_XDPPACKET_XDP) == + XFA_PARSESTATUS_Ready && pParser->DoParse(nullptr) == XFA_PARSESTATUS_Done) { pRootNode = pParser->GetRootNode(); } diff --git a/xfa/fxfa/app/xfa_ffwidgethandler.cpp b/xfa/fxfa/app/xfa_ffwidgethandler.cpp index b60b7fc0a2..8e2b9dfb83 100644 --- a/xfa/fxfa/app/xfa_ffwidgethandler.cpp +++ b/xfa/fxfa/app/xfa_ffwidgethandler.cpp @@ -15,7 +15,6 @@ #include "xfa/fxfa/include/xfa_ffdocview.h" #include "xfa/fxfa/include/xfa_ffwidget.h" #include "xfa/fxfa/parser/xfa_document_layout_imp.h" -#include "xfa/fxfa/parser/xfa_parser_imp.h" CXFA_FFWidgetHandler::CXFA_FFWidgetHandler(CXFA_FFDocView* pDocView) : m_pDocView(pDocView) {} diff --git a/xfa/fxfa/fm2js/xfa_fm2jscontext.cpp b/xfa/fxfa/fm2js/xfa_fm2jscontext.cpp index 5813c5204a..6826094b1d 100644 --- a/xfa/fxfa/fm2js/xfa_fm2jscontext.cpp +++ b/xfa/fxfa/fm2js/xfa_fm2jscontext.cpp @@ -17,7 +17,6 @@ #include "xfa/fxfa/fm2js/xfa_program.h" #include "xfa/fxfa/parser/xfa_document.h" #include "xfa/fxfa/parser/xfa_localevalue.h" -#include "xfa/fxfa/parser/xfa_parser_imp.h" #include "xfa/fxfa/parser/xfa_script_imp.h" namespace { diff --git a/xfa/fxfa/include/xfa_ffdoc.h b/xfa/fxfa/include/xfa_ffdoc.h index b8795bc211..0f32c4300c 100644 --- a/xfa/fxfa/include/xfa_ffdoc.h +++ b/xfa/fxfa/include/xfa_ffdoc.h @@ -11,6 +11,7 @@ #include <memory> #include "xfa/fxfa/include/fxfa.h" +#include "xfa/fxfa/parser/cxfa_document_parser.h" #include "xfa/fxfa/parser/xfa_document.h" class CXFA_ChecksumContext; diff --git a/xfa/fxfa/parser/cxfa_document_parser.cpp b/xfa/fxfa/parser/cxfa_document_parser.cpp new file mode 100644 index 0000000000..565916ba93 --- /dev/null +++ b/xfa/fxfa/parser/cxfa_document_parser.cpp @@ -0,0 +1,54 @@ +// Copyright 2016 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#include "xfa/fxfa/parser/cxfa_document_parser.h" + +#include "xfa/fxfa/include/fxfa.h" +#include "xfa/fxfa/parser/xfa_document.h" + +CXFA_DocumentParser::CXFA_DocumentParser(CXFA_FFNotify* pNotify) + : m_nodeParser(nullptr, TRUE), m_pNotify(pNotify) {} + +CXFA_DocumentParser::~CXFA_DocumentParser() { + CloseParser(); +} + +int32_t CXFA_DocumentParser::StartParse(IFX_FileRead* pStream, + XFA_XDPPACKET ePacketID) { + CloseParser(); + int32_t nRetStatus = m_nodeParser.StartParse(pStream, ePacketID); + if (nRetStatus == XFA_PARSESTATUS_Ready) { + m_pDocument.reset(new CXFA_Document(this)); + m_nodeParser.SetFactory(m_pDocument.get()); + } + return nRetStatus; +} + +int32_t CXFA_DocumentParser::DoParse(IFX_Pause* pPause) { + int32_t nRetStatus = m_nodeParser.DoParse(pPause); + if (nRetStatus >= XFA_PARSESTATUS_Done) { + ASSERT(m_pDocument); + m_pDocument->SetRoot(m_nodeParser.GetRootNode()); + } + return nRetStatus; +} + +CFDE_XMLDoc* CXFA_DocumentParser::GetXMLDoc() const { + return m_nodeParser.GetXMLDoc(); +} + +CXFA_FFNotify* CXFA_DocumentParser::GetNotify() const { + return m_pNotify; +} + +CXFA_Document* CXFA_DocumentParser::GetDocument() const { + return m_pDocument.get(); +} + +void CXFA_DocumentParser::CloseParser() { + m_pDocument.reset(); + m_nodeParser.CloseParser(); +} diff --git a/xfa/fxfa/parser/cxfa_document_parser.h b/xfa/fxfa/parser/cxfa_document_parser.h new file mode 100644 index 0000000000..42275df1f1 --- /dev/null +++ b/xfa/fxfa/parser/cxfa_document_parser.h @@ -0,0 +1,40 @@ +// Copyright 2016 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#ifndef XFA_FXFA_PARSER_CXFA_DOCUMENT_PARSER_H_ +#define XFA_FXFA_PARSER_CXFA_DOCUMENT_PARSER_H_ + +#include <memory> + +#include "xfa/fxfa/parser/cxfa_simple_parser.h" + +class CFDE_XMLDoc; +class CXFA_Document; +class CXFA_FFNotify; +class CXFA_Notify; +class IFX_FileRead; +class IFX_Pause; + +class CXFA_DocumentParser { + public: + explicit CXFA_DocumentParser(CXFA_FFNotify* pNotify); + ~CXFA_DocumentParser(); + + int32_t StartParse(IFX_FileRead* pStream, XFA_XDPPACKET ePacketID); + int32_t DoParse(IFX_Pause* pPause); + + CFDE_XMLDoc* GetXMLDoc() const; + CXFA_FFNotify* GetNotify() const; + CXFA_Document* GetDocument() const; + void CloseParser(); + + protected: + CXFA_SimpleParser m_nodeParser; + CXFA_FFNotify* m_pNotify; + std::unique_ptr<CXFA_Document> m_pDocument; +}; + +#endif // XFA_FXFA_PARSER_CXFA_DOCUMENT_PARSER_H_ diff --git a/xfa/fxfa/parser/xfa_parser_imp.cpp b/xfa/fxfa/parser/cxfa_simple_parser.cpp index e4387a7838..c9edf2daff 100644 --- a/xfa/fxfa/parser/xfa_parser_imp.cpp +++ b/xfa/fxfa/parser/cxfa_simple_parser.cpp @@ -1,23 +1,16 @@ -// Copyright 2014 PDFium Authors. All rights reserved. +// Copyright 2016 PDFium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com -#include "xfa/fxfa/parser/xfa_parser_imp.h" +#include "xfa/fxfa/parser/cxfa_simple_parser.h" -#include <memory> - -#include "xfa/fde/xml/fde_xml_imp.h" #include "xfa/fgas/crt/fgas_codepage.h" +#include "xfa/fxfa/include/fxfa.h" #include "xfa/fxfa/include/xfa_checksum.h" -#include "xfa/fxfa/parser/xfa_basic_imp.h" -#include "xfa/fxfa/parser/xfa_doclayout.h" +#include "xfa/fxfa/parser/cxfa_xml_parser.h" #include "xfa/fxfa/parser/xfa_document.h" -#include "xfa/fxfa/parser/xfa_localemgr.h" -#include "xfa/fxfa/parser/xfa_object.h" -#include "xfa/fxfa/parser/xfa_script.h" -#include "xfa/fxfa/parser/xfa_utils.h" CXFA_SimpleParser::CXFA_SimpleParser(CXFA_Document* pFactory, bool bDocumentParser) @@ -1323,220 +1316,3 @@ void CXFA_SimpleParser::CloseParser() { m_pStream = nullptr; } } - -CXFA_DocumentParser::CXFA_DocumentParser(CXFA_FFNotify* pNotify) - : m_nodeParser(nullptr, TRUE), m_pNotify(pNotify), m_pDocument(nullptr) {} - -CXFA_DocumentParser::~CXFA_DocumentParser() { - CloseParser(); -} - -int32_t CXFA_DocumentParser::StartParse(IFX_FileRead* pStream, - XFA_XDPPACKET ePacketID) { - CloseParser(); - int32_t nRetStatus = m_nodeParser.StartParse(pStream, ePacketID); - if (nRetStatus == XFA_PARSESTATUS_Ready) { - m_pDocument.reset(new CXFA_Document(this)); - m_nodeParser.SetFactory(m_pDocument.get()); - } - return nRetStatus; -} - -int32_t CXFA_DocumentParser::DoParse(IFX_Pause* pPause) { - int32_t nRetStatus = m_nodeParser.DoParse(pPause); - if (nRetStatus >= XFA_PARSESTATUS_Done) { - ASSERT(m_pDocument); - m_pDocument->SetRoot(m_nodeParser.GetRootNode()); - } - return nRetStatus; -} - -CFDE_XMLDoc* CXFA_DocumentParser::GetXMLDoc() const { - return m_nodeParser.GetXMLDoc(); -} - -CXFA_FFNotify* CXFA_DocumentParser::GetNotify() const { - return m_pNotify; -} - -CXFA_Document* CXFA_DocumentParser::GetDocument() const { - return m_pDocument.get(); -} - -void CXFA_DocumentParser::CloseParser() { - m_pDocument.reset(); - m_nodeParser.CloseParser(); -} - -CXFA_XMLParser::CXFA_XMLParser(CFDE_XMLNode* pRoot, IFX_Stream* pStream) - : m_nElementStart(0), - m_dwCheckStatus(0), - m_dwCurrentCheckStatus(0), - m_pRoot(pRoot), - m_pStream(pStream), - m_pParser(nullptr), - m_pParent(pRoot), - m_pChild(nullptr), - m_NodeStack(16), - m_syntaxParserResult(FDE_XmlSyntaxResult::None) { - ASSERT(m_pParent && m_pStream); - m_NodeStack.Push(m_pParent); - m_pParser = new CFDE_XMLSyntaxParser; - m_pParser->Init(m_pStream, 32 * 1024, 1024 * 1024); -} - -CXFA_XMLParser::~CXFA_XMLParser() { - if (m_pParser) { - m_pParser->Release(); - } - m_NodeStack.RemoveAll(); - m_ws1.clear(); - m_ws2.clear(); -} - -void CXFA_XMLParser::Release() { - delete this; -} - -int32_t CXFA_XMLParser::DoParser(IFX_Pause* pPause) { - if (m_syntaxParserResult == FDE_XmlSyntaxResult::Error) - return -1; - if (m_syntaxParserResult == FDE_XmlSyntaxResult::EndOfString) - return 100; - - int32_t iCount = 0; - while (TRUE) { - m_syntaxParserResult = m_pParser->DoSyntaxParse(); - switch (m_syntaxParserResult) { - case FDE_XmlSyntaxResult::InstructionOpen: - break; - case FDE_XmlSyntaxResult::InstructionClose: - if (m_pChild) { - if (m_pChild->GetType() != FDE_XMLNODE_Instruction) { - m_syntaxParserResult = FDE_XmlSyntaxResult::Error; - break; - } - } - m_pChild = m_pParent; - break; - case FDE_XmlSyntaxResult::ElementOpen: - if (m_dwCheckStatus != 0x03 && m_NodeStack.GetSize() == 2) { - m_nElementStart = m_pParser->GetCurrentPos() - 1; - } - break; - case FDE_XmlSyntaxResult::ElementBreak: - break; - case FDE_XmlSyntaxResult::ElementClose: - if (m_pChild->GetType() != FDE_XMLNODE_Element) { - m_syntaxParserResult = FDE_XmlSyntaxResult::Error; - break; - } - m_pParser->GetTagName(m_ws1); - static_cast<CFDE_XMLElement*>(m_pChild)->GetTagName(m_ws2); - if (m_ws1.GetLength() > 0 && m_ws1 != m_ws2) { - m_syntaxParserResult = FDE_XmlSyntaxResult::Error; - break; - } - m_NodeStack.Pop(); - if (m_NodeStack.GetSize() < 1) { - m_syntaxParserResult = FDE_XmlSyntaxResult::Error; - break; - } else if (m_dwCurrentCheckStatus != 0 && m_NodeStack.GetSize() == 2) { - m_nSize[m_dwCurrentCheckStatus - 1] = - m_pParser->GetCurrentBinaryPos() - - m_nStart[m_dwCurrentCheckStatus - 1]; - m_dwCurrentCheckStatus = 0; - } - - m_pParent = static_cast<CFDE_XMLNode*>(*m_NodeStack.GetTopElement()); - m_pChild = m_pParent; - iCount++; - break; - case FDE_XmlSyntaxResult::TargetName: - m_pParser->GetTargetName(m_ws1); - if (m_ws1 == FX_WSTRC(L"originalXFAVersion") || - m_ws1 == FX_WSTRC(L"acrobat")) { - m_pChild = new CFDE_XMLInstruction(m_ws1); - m_pParent->InsertChildNode(m_pChild); - } else { - m_pChild = nullptr; - } - m_ws1.clear(); - break; - case FDE_XmlSyntaxResult::TagName: - m_pParser->GetTagName(m_ws1); - m_pChild = new CFDE_XMLElement(m_ws1); - m_pParent->InsertChildNode(m_pChild); - m_NodeStack.Push(m_pChild); - m_pParent = m_pChild; - - if (m_dwCheckStatus != 0x03 && m_NodeStack.GetSize() == 3) { - CFX_WideString wsTag; - static_cast<CFDE_XMLElement*>(m_pChild)->GetLocalTagName(wsTag); - if (wsTag == FX_WSTRC(L"template")) { - m_dwCheckStatus |= 0x01; - m_dwCurrentCheckStatus = 0x01; - m_nStart[0] = m_pParser->GetCurrentBinaryPos() - - (m_pParser->GetCurrentPos() - m_nElementStart); - } else if (wsTag == FX_WSTRC(L"datasets")) { - m_dwCheckStatus |= 0x02; - m_dwCurrentCheckStatus = 0x02; - m_nStart[1] = m_pParser->GetCurrentBinaryPos() - - (m_pParser->GetCurrentPos() - m_nElementStart); - } - } - break; - case FDE_XmlSyntaxResult::AttriName: - m_pParser->GetAttributeName(m_ws1); - break; - case FDE_XmlSyntaxResult::AttriValue: - if (m_pChild) { - m_pParser->GetAttributeName(m_ws2); - if (m_pChild->GetType() == FDE_XMLNODE_Element) { - static_cast<CFDE_XMLElement*>(m_pChild)->SetString(m_ws1, m_ws2); - } - } - m_ws1.clear(); - break; - case FDE_XmlSyntaxResult::Text: - m_pParser->GetTextData(m_ws1); - m_pChild = new CFDE_XMLText(m_ws1); - m_pParent->InsertChildNode(m_pChild); - m_pChild = m_pParent; - break; - case FDE_XmlSyntaxResult::CData: - m_pParser->GetTextData(m_ws1); - m_pChild = new CFDE_XMLCharData(m_ws1); - m_pParent->InsertChildNode(m_pChild); - m_pChild = m_pParent; - break; - case FDE_XmlSyntaxResult::TargetData: - if (m_pChild) { - if (m_pChild->GetType() != FDE_XMLNODE_Instruction) { - m_syntaxParserResult = FDE_XmlSyntaxResult::Error; - break; - } - if (!m_ws1.IsEmpty()) { - static_cast<CFDE_XMLInstruction*>(m_pChild)->AppendData(m_ws1); - } - m_pParser->GetTargetData(m_ws1); - static_cast<CFDE_XMLInstruction*>(m_pChild)->AppendData(m_ws1); - } - m_ws1.clear(); - break; - default: - break; - } - if (m_syntaxParserResult == FDE_XmlSyntaxResult::Error || - m_syntaxParserResult == FDE_XmlSyntaxResult::EndOfString) { - break; - } - if (pPause && iCount > 500 && pPause->NeedToPauseNow()) { - break; - } - } - return (m_syntaxParserResult == FDE_XmlSyntaxResult::Error || - m_NodeStack.GetSize() != 1) - ? -1 - : m_pParser->GetStatus(); -} diff --git a/xfa/fxfa/parser/xfa_parser_imp.h b/xfa/fxfa/parser/cxfa_simple_parser.h index c10ab4dfdc..d49008da91 100644 --- a/xfa/fxfa/parser/xfa_parser_imp.h +++ b/xfa/fxfa/parser/cxfa_simple_parser.h @@ -4,24 +4,25 @@ // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com -#ifndef XFA_FXFA_PARSER_XFA_PARSER_IMP_H_ -#define XFA_FXFA_PARSER_XFA_PARSER_IMP_H_ +#ifndef XFA_FXFA_PARSER_CXFA_SIMPLE_PARSER_H_ +#define XFA_FXFA_PARSER_CXFA_SIMPLE_PARSER_H_ #include "xfa/fde/xml/fde_xml_imp.h" #include "xfa/fxfa/include/fxfa_basic.h" class CXFA_Document; -class CXFA_FFNotify; class CXFA_Node; class CXFA_XMLParser; +class IFX_FileRead; +class IFX_Pause; +class IFX_Stream; class CXFA_SimpleParser { public: CXFA_SimpleParser(CXFA_Document* pFactory, bool bDocumentParser); ~CXFA_SimpleParser(); - int32_t StartParse(IFX_FileRead* pStream, - XFA_XDPPACKET ePacketID = XFA_XDPPACKET_XDP); + int32_t StartParse(IFX_FileRead* pStream, XFA_XDPPACKET ePacketID); int32_t DoParse(IFX_Pause* pPause = nullptr); int32_t ParseXMLData(const CFX_WideString& wsXML, CFDE_XMLNode*& pXMLNode, @@ -82,51 +83,4 @@ class CXFA_SimpleParser { friend class CXFA_DocumentParser; }; -class CXFA_DocumentParser { - public: - explicit CXFA_DocumentParser(CXFA_FFNotify* pNotify); - ~CXFA_DocumentParser(); - - int32_t StartParse(IFX_FileRead* pStream, - XFA_XDPPACKET ePacketID = XFA_XDPPACKET_XDP); - int32_t DoParse(IFX_Pause* pPause = nullptr); - - CFDE_XMLDoc* GetXMLDoc() const; - CXFA_FFNotify* GetNotify() const; - CXFA_Document* GetDocument() const; - void CloseParser(); - - protected: - CXFA_SimpleParser m_nodeParser; - CXFA_FFNotify* m_pNotify; - std::unique_ptr<CXFA_Document> m_pDocument; -}; - -class CXFA_XMLParser : public CFDE_XMLParser { - public: - CXFA_XMLParser(CFDE_XMLNode* pRoot, IFX_Stream* pStream); - ~CXFA_XMLParser() override; - - // CFDE_XMLParser - void Release() override; - int32_t DoParser(IFX_Pause* pPause) override; - - FX_FILESIZE m_nStart[2]; - size_t m_nSize[2]; - FX_FILESIZE m_nElementStart; - uint16_t m_dwCheckStatus; - uint16_t m_dwCurrentCheckStatus; - - protected: - CFDE_XMLNode* m_pRoot; - IFX_Stream* m_pStream; - CFDE_XMLSyntaxParser* m_pParser; - CFDE_XMLNode* m_pParent; - CFDE_XMLNode* m_pChild; - CFX_StackTemplate<CFDE_XMLNode*> m_NodeStack; - CFX_WideString m_ws1; - CFX_WideString m_ws2; - FDE_XmlSyntaxResult m_syntaxParserResult; -}; - -#endif // XFA_FXFA_PARSER_XFA_PARSER_IMP_H_ +#endif // XFA_FXFA_PARSER_CXFA_SIMPLE_PARSER_H_ diff --git a/xfa/fxfa/parser/xfa_parser_imp_embeddertest.cpp b/xfa/fxfa/parser/cxfa_simple_parser_embeddertest.cpp index 63fe272872..a33169646d 100644 --- a/xfa/fxfa/parser/xfa_parser_imp_embeddertest.cpp +++ b/xfa/fxfa/parser/cxfa_simple_parser_embeddertest.cpp @@ -1,13 +1,13 @@ -// Copyright 2015 PDFium Authors. All rights reserved. +// Copyright 2016 PDFium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "testing/embedder_test.h" #include "testing/gtest/include/gtest/gtest.h" -class XFAParserImpEmbeddertest : public EmbedderTest {}; +class CXFASimpleParserEmbeddertest : public EmbedderTest {}; -TEST_F(XFAParserImpEmbeddertest, Bug_216) { +TEST_F(CXFASimpleParserEmbeddertest, Bug_216) { EXPECT_TRUE(OpenDocument("bug_216.pdf")); FPDF_PAGE page = LoadPage(0); EXPECT_NE(nullptr, page); diff --git a/xfa/fxfa/parser/cxfa_xml_parser.cpp b/xfa/fxfa/parser/cxfa_xml_parser.cpp new file mode 100644 index 0000000000..268c8b1416 --- /dev/null +++ b/xfa/fxfa/parser/cxfa_xml_parser.cpp @@ -0,0 +1,176 @@ +// Copyright 2016 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#include "xfa/fxfa/parser/cxfa_xml_parser.h" + +CXFA_XMLParser::CXFA_XMLParser(CFDE_XMLNode* pRoot, IFX_Stream* pStream) + : m_nElementStart(0), + m_dwCheckStatus(0), + m_dwCurrentCheckStatus(0), + m_pRoot(pRoot), + m_pStream(pStream), + m_pParser(new CFDE_XMLSyntaxParser), + m_pParent(pRoot), + m_pChild(nullptr), + m_NodeStack(16), + m_syntaxParserResult(FDE_XmlSyntaxResult::None) { + ASSERT(m_pParent && m_pStream); + m_NodeStack.Push(m_pParent); + m_pParser->Init(m_pStream, 32 * 1024, 1024 * 1024); +} + +CXFA_XMLParser::~CXFA_XMLParser() { + m_NodeStack.RemoveAll(); + m_ws1.clear(); + m_ws2.clear(); +} + +void CXFA_XMLParser::Release() { + delete this; +} + +int32_t CXFA_XMLParser::DoParser(IFX_Pause* pPause) { + if (m_syntaxParserResult == FDE_XmlSyntaxResult::Error) + return -1; + if (m_syntaxParserResult == FDE_XmlSyntaxResult::EndOfString) + return 100; + + int32_t iCount = 0; + while (TRUE) { + m_syntaxParserResult = m_pParser->DoSyntaxParse(); + switch (m_syntaxParserResult) { + case FDE_XmlSyntaxResult::InstructionOpen: + break; + case FDE_XmlSyntaxResult::InstructionClose: + if (m_pChild) { + if (m_pChild->GetType() != FDE_XMLNODE_Instruction) { + m_syntaxParserResult = FDE_XmlSyntaxResult::Error; + break; + } + } + m_pChild = m_pParent; + break; + case FDE_XmlSyntaxResult::ElementOpen: + if (m_dwCheckStatus != 0x03 && m_NodeStack.GetSize() == 2) { + m_nElementStart = m_pParser->GetCurrentPos() - 1; + } + break; + case FDE_XmlSyntaxResult::ElementBreak: + break; + case FDE_XmlSyntaxResult::ElementClose: + if (m_pChild->GetType() != FDE_XMLNODE_Element) { + m_syntaxParserResult = FDE_XmlSyntaxResult::Error; + break; + } + m_pParser->GetTagName(m_ws1); + static_cast<CFDE_XMLElement*>(m_pChild)->GetTagName(m_ws2); + if (m_ws1.GetLength() > 0 && m_ws1 != m_ws2) { + m_syntaxParserResult = FDE_XmlSyntaxResult::Error; + break; + } + m_NodeStack.Pop(); + if (m_NodeStack.GetSize() < 1) { + m_syntaxParserResult = FDE_XmlSyntaxResult::Error; + break; + } else if (m_dwCurrentCheckStatus != 0 && m_NodeStack.GetSize() == 2) { + m_nSize[m_dwCurrentCheckStatus - 1] = + m_pParser->GetCurrentBinaryPos() - + m_nStart[m_dwCurrentCheckStatus - 1]; + m_dwCurrentCheckStatus = 0; + } + + m_pParent = static_cast<CFDE_XMLNode*>(*m_NodeStack.GetTopElement()); + m_pChild = m_pParent; + iCount++; + break; + case FDE_XmlSyntaxResult::TargetName: + m_pParser->GetTargetName(m_ws1); + if (m_ws1 == FX_WSTRC(L"originalXFAVersion") || + m_ws1 == FX_WSTRC(L"acrobat")) { + m_pChild = new CFDE_XMLInstruction(m_ws1); + m_pParent->InsertChildNode(m_pChild); + } else { + m_pChild = nullptr; + } + m_ws1.clear(); + break; + case FDE_XmlSyntaxResult::TagName: + m_pParser->GetTagName(m_ws1); + m_pChild = new CFDE_XMLElement(m_ws1); + m_pParent->InsertChildNode(m_pChild); + m_NodeStack.Push(m_pChild); + m_pParent = m_pChild; + + if (m_dwCheckStatus != 0x03 && m_NodeStack.GetSize() == 3) { + CFX_WideString wsTag; + static_cast<CFDE_XMLElement*>(m_pChild)->GetLocalTagName(wsTag); + if (wsTag == FX_WSTRC(L"template")) { + m_dwCheckStatus |= 0x01; + m_dwCurrentCheckStatus = 0x01; + m_nStart[0] = m_pParser->GetCurrentBinaryPos() - + (m_pParser->GetCurrentPos() - m_nElementStart); + } else if (wsTag == FX_WSTRC(L"datasets")) { + m_dwCheckStatus |= 0x02; + m_dwCurrentCheckStatus = 0x02; + m_nStart[1] = m_pParser->GetCurrentBinaryPos() - + (m_pParser->GetCurrentPos() - m_nElementStart); + } + } + break; + case FDE_XmlSyntaxResult::AttriName: + m_pParser->GetAttributeName(m_ws1); + break; + case FDE_XmlSyntaxResult::AttriValue: + if (m_pChild) { + m_pParser->GetAttributeName(m_ws2); + if (m_pChild->GetType() == FDE_XMLNODE_Element) { + static_cast<CFDE_XMLElement*>(m_pChild)->SetString(m_ws1, m_ws2); + } + } + m_ws1.clear(); + break; + case FDE_XmlSyntaxResult::Text: + m_pParser->GetTextData(m_ws1); + m_pChild = new CFDE_XMLText(m_ws1); + m_pParent->InsertChildNode(m_pChild); + m_pChild = m_pParent; + break; + case FDE_XmlSyntaxResult::CData: + m_pParser->GetTextData(m_ws1); + m_pChild = new CFDE_XMLCharData(m_ws1); + m_pParent->InsertChildNode(m_pChild); + m_pChild = m_pParent; + break; + case FDE_XmlSyntaxResult::TargetData: + if (m_pChild) { + if (m_pChild->GetType() != FDE_XMLNODE_Instruction) { + m_syntaxParserResult = FDE_XmlSyntaxResult::Error; + break; + } + if (!m_ws1.IsEmpty()) { + static_cast<CFDE_XMLInstruction*>(m_pChild)->AppendData(m_ws1); + } + m_pParser->GetTargetData(m_ws1); + static_cast<CFDE_XMLInstruction*>(m_pChild)->AppendData(m_ws1); + } + m_ws1.clear(); + break; + default: + break; + } + if (m_syntaxParserResult == FDE_XmlSyntaxResult::Error || + m_syntaxParserResult == FDE_XmlSyntaxResult::EndOfString) { + break; + } + if (pPause && iCount > 500 && pPause->NeedToPauseNow()) { + break; + } + } + return (m_syntaxParserResult == FDE_XmlSyntaxResult::Error || + m_NodeStack.GetSize() != 1) + ? -1 + : m_pParser->GetStatus(); +} diff --git a/xfa/fxfa/parser/cxfa_xml_parser.h b/xfa/fxfa/parser/cxfa_xml_parser.h new file mode 100644 index 0000000000..1fdf06b5a8 --- /dev/null +++ b/xfa/fxfa/parser/cxfa_xml_parser.h @@ -0,0 +1,43 @@ +// Copyright 2016 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#ifndef XFA_FXFA_PARSER_CXFA_XML_PARSER_H_ +#define XFA_FXFA_PARSER_CXFA_XML_PARSER_H_ + +#include "xfa/fde/xml/fde_xml_imp.h" + +class IFX_Stream; +class IFX_Pause; + +class CXFA_XMLParser : public CFDE_XMLParser { + public: + CXFA_XMLParser(CFDE_XMLNode* pRoot, IFX_Stream* pStream); + ~CXFA_XMLParser() override; + + // CFDE_XMLParser + void Release() override; + int32_t DoParser(IFX_Pause* pPause) override; + + FX_FILESIZE m_nStart[2]; + size_t m_nSize[2]; + FX_FILESIZE m_nElementStart; + uint16_t m_dwCheckStatus; + uint16_t m_dwCurrentCheckStatus; + + protected: + CFDE_XMLNode* m_pRoot; + IFX_Stream* m_pStream; + std::unique_ptr<CFDE_XMLSyntaxParser, ReleaseDeleter<CFDE_XMLSyntaxParser>> + m_pParser; + CFDE_XMLNode* m_pParent; + CFDE_XMLNode* m_pChild; + CFX_StackTemplate<CFDE_XMLNode*> m_NodeStack; + CFX_WideString m_ws1; + CFX_WideString m_ws2; + FDE_XmlSyntaxResult m_syntaxParserResult; +}; + +#endif // XFA_FXFA_PARSER_CXFA_XML_PARSER_H_ diff --git a/xfa/fxfa/parser/xfa_document.h b/xfa/fxfa/parser/xfa_document.h index 328f1bcdd2..176f0c7b98 100644 --- a/xfa/fxfa/parser/xfa_document.h +++ b/xfa/fxfa/parser/xfa_document.h @@ -10,8 +10,8 @@ #include "xfa/fxfa/include/fxfa.h" #include "xfa/fxfa/parser/xfa_localemgr.h" #include "xfa/fxfa/parser/xfa_object.h" -#include "xfa/fxfa/parser/xfa_parser_imp.h" +class CFDE_XMLDoc; class CXFA_Document; class CXFA_LayoutItem; class CXFA_LayoutProcessor; diff --git a/xfa/fxfa/parser/xfa_document_datamerger_imp.cpp b/xfa/fxfa/parser/xfa_document_datamerger_imp.cpp index dd1fce6eed..55718e2c88 100644 --- a/xfa/fxfa/parser/xfa_document_datamerger_imp.cpp +++ b/xfa/fxfa/parser/xfa_document_datamerger_imp.cpp @@ -15,7 +15,6 @@ #include "xfa/fxfa/parser/xfa_document_layout_imp.h" #include "xfa/fxfa/parser/xfa_localemgr.h" #include "xfa/fxfa/parser/xfa_object.h" -#include "xfa/fxfa/parser/xfa_parser_imp.h" #include "xfa/fxfa/parser/xfa_script.h" #include "xfa/fxfa/parser/xfa_script_imp.h" #include "xfa/fxfa/parser/xfa_utils.h" diff --git a/xfa/fxfa/parser/xfa_document_imp.cpp b/xfa/fxfa/parser/xfa_document_imp.cpp index efd48e66a1..4c8b4dba1d 100644 --- a/xfa/fxfa/parser/xfa_document_imp.cpp +++ b/xfa/fxfa/parser/xfa_document_imp.cpp @@ -6,13 +6,13 @@ #include "core/fxcrt/include/fx_ext.h" #include "xfa/fxfa/app/xfa_ffnotify.h" +#include "xfa/fxfa/parser/cxfa_document_parser.h" #include "xfa/fxfa/parser/xfa_basic_imp.h" #include "xfa/fxfa/parser/xfa_doclayout.h" #include "xfa/fxfa/parser/xfa_document.h" #include "xfa/fxfa/parser/xfa_document_layout_imp.h" #include "xfa/fxfa/parser/xfa_localemgr.h" #include "xfa/fxfa/parser/xfa_object.h" -#include "xfa/fxfa/parser/xfa_parser_imp.h" #include "xfa/fxfa/parser/xfa_script.h" #include "xfa/fxfa/parser/xfa_script_datawindow.h" #include "xfa/fxfa/parser/xfa_script_eventpseudomodel.h" diff --git a/xfa/fxfa/parser/xfa_document_serialize.cpp b/xfa/fxfa/parser/xfa_document_serialize.cpp index 428b470c13..a8c4cde4ac 100644 --- a/xfa/fxfa/parser/xfa_document_serialize.cpp +++ b/xfa/fxfa/parser/xfa_document_serialize.cpp @@ -8,11 +8,11 @@ #include "xfa/fde/xml/fde_xml_imp.h" #include "xfa/fgas/crt/fgas_codepage.h" +#include "xfa/fxfa/parser/cxfa_simple_parser.h" #include "xfa/fxfa/parser/xfa_doclayout.h" #include "xfa/fxfa/parser/xfa_document.h" #include "xfa/fxfa/parser/xfa_localemgr.h" #include "xfa/fxfa/parser/xfa_object.h" -#include "xfa/fxfa/parser/xfa_parser_imp.h" #include "xfa/fxfa/parser/xfa_script.h" #include "xfa/fxfa/parser/xfa_utils.h" diff --git a/xfa/fxfa/parser/xfa_layout_appadapter.cpp b/xfa/fxfa/parser/xfa_layout_appadapter.cpp index 0b94c644c4..a28987cfb8 100644 --- a/xfa/fxfa/parser/xfa_layout_appadapter.cpp +++ b/xfa/fxfa/parser/xfa_layout_appadapter.cpp @@ -14,7 +14,6 @@ #include "xfa/fxfa/parser/xfa_layout_pagemgr_new.h" #include "xfa/fxfa/parser/xfa_localemgr.h" #include "xfa/fxfa/parser/xfa_object.h" -#include "xfa/fxfa/parser/xfa_parser_imp.h" #include "xfa/fxfa/parser/xfa_script.h" #include "xfa/fxfa/parser/xfa_utils.h" diff --git a/xfa/fxfa/parser/xfa_layout_itemlayout.cpp b/xfa/fxfa/parser/xfa_layout_itemlayout.cpp index fb024ab8db..b834267f09 100644 --- a/xfa/fxfa/parser/xfa_layout_itemlayout.cpp +++ b/xfa/fxfa/parser/xfa_layout_itemlayout.cpp @@ -18,7 +18,6 @@ #include "xfa/fxfa/parser/xfa_layout_pagemgr_new.h" #include "xfa/fxfa/parser/xfa_localemgr.h" #include "xfa/fxfa/parser/xfa_object.h" -#include "xfa/fxfa/parser/xfa_parser_imp.h" #include "xfa/fxfa/parser/xfa_script.h" #include "xfa/fxfa/parser/xfa_utils.h" diff --git a/xfa/fxfa/parser/xfa_layout_pagemgr_new.cpp b/xfa/fxfa/parser/xfa_layout_pagemgr_new.cpp index 14329c68fb..5055000a6b 100644 --- a/xfa/fxfa/parser/xfa_layout_pagemgr_new.cpp +++ b/xfa/fxfa/parser/xfa_layout_pagemgr_new.cpp @@ -15,7 +15,6 @@ #include "xfa/fxfa/parser/xfa_layout_itemlayout.h" #include "xfa/fxfa/parser/xfa_localemgr.h" #include "xfa/fxfa/parser/xfa_object.h" -#include "xfa/fxfa/parser/xfa_parser_imp.h" #include "xfa/fxfa/parser/xfa_script.h" #include "xfa/fxfa/parser/xfa_script_imp.h" #include "xfa/fxfa/parser/xfa_utils.h" diff --git a/xfa/fxfa/parser/xfa_object_imp.cpp b/xfa/fxfa/parser/xfa_object_imp.cpp index 5ada3bd197..871286d24e 100644 --- a/xfa/fxfa/parser/xfa_object_imp.cpp +++ b/xfa/fxfa/parser/xfa_object_imp.cpp @@ -16,12 +16,12 @@ #include "xfa/fgas/crt/fgas_system.h" #include "xfa/fxfa/app/xfa_ffnotify.h" #include "xfa/fxfa/parser/cxfa_occur.h" +#include "xfa/fxfa/parser/cxfa_simple_parser.h" #include "xfa/fxfa/parser/xfa_basic_imp.h" #include "xfa/fxfa/parser/xfa_doclayout.h" #include "xfa/fxfa/parser/xfa_document.h" #include "xfa/fxfa/parser/xfa_document_layout_imp.h" #include "xfa/fxfa/parser/xfa_localemgr.h" -#include "xfa/fxfa/parser/xfa_parser_imp.h" #include "xfa/fxfa/parser/xfa_script.h" #include "xfa/fxfa/parser/xfa_script_imp.h" #include "xfa/fxfa/parser/xfa_utils.h" diff --git a/xfa/fxfa/parser/xfa_script_eventpseudomodel.cpp b/xfa/fxfa/parser/xfa_script_eventpseudomodel.cpp index a08b512591..ef724e49f2 100644 --- a/xfa/fxfa/parser/xfa_script_eventpseudomodel.cpp +++ b/xfa/fxfa/parser/xfa_script_eventpseudomodel.cpp @@ -14,7 +14,6 @@ #include "xfa/fxfa/parser/xfa_document.h" #include "xfa/fxfa/parser/xfa_localemgr.h" #include "xfa/fxfa/parser/xfa_object.h" -#include "xfa/fxfa/parser/xfa_parser_imp.h" #include "xfa/fxfa/parser/xfa_script.h" #include "xfa/fxfa/parser/xfa_script_imp.h" #include "xfa/fxfa/parser/xfa_utils.h" diff --git a/xfa/fxfa/parser/xfa_script_hostpseudomodel.cpp b/xfa/fxfa/parser/xfa_script_hostpseudomodel.cpp index b699c1f3ca..37ed181714 100644 --- a/xfa/fxfa/parser/xfa_script_hostpseudomodel.cpp +++ b/xfa/fxfa/parser/xfa_script_hostpseudomodel.cpp @@ -13,7 +13,6 @@ #include "xfa/fxfa/parser/xfa_document_layout_imp.h" #include "xfa/fxfa/parser/xfa_localemgr.h" #include "xfa/fxfa/parser/xfa_object.h" -#include "xfa/fxfa/parser/xfa_parser_imp.h" #include "xfa/fxfa/parser/xfa_script.h" #include "xfa/fxfa/parser/xfa_script_imp.h" #include "xfa/fxfa/parser/xfa_utils.h" diff --git a/xfa/fxfa/parser/xfa_script_layoutpseudomodel.cpp b/xfa/fxfa/parser/xfa_script_layoutpseudomodel.cpp index 5b24e3ae2e..3567a5a138 100644 --- a/xfa/fxfa/parser/xfa_script_layoutpseudomodel.cpp +++ b/xfa/fxfa/parser/xfa_script_layoutpseudomodel.cpp @@ -17,7 +17,6 @@ #include "xfa/fxfa/parser/xfa_layout_appadapter.h" #include "xfa/fxfa/parser/xfa_localemgr.h" #include "xfa/fxfa/parser/xfa_object.h" -#include "xfa/fxfa/parser/xfa_parser_imp.h" #include "xfa/fxfa/parser/xfa_script.h" #include "xfa/fxfa/parser/xfa_script_imp.h" #include "xfa/fxfa/parser/xfa_utils.h" diff --git a/xfa/fxfa/parser/xfa_script_signaturepseudomodel.cpp b/xfa/fxfa/parser/xfa_script_signaturepseudomodel.cpp index c328396e79..45c0f7c0d0 100644 --- a/xfa/fxfa/parser/xfa_script_signaturepseudomodel.cpp +++ b/xfa/fxfa/parser/xfa_script_signaturepseudomodel.cpp @@ -12,7 +12,6 @@ #include "xfa/fxfa/parser/xfa_document.h" #include "xfa/fxfa/parser/xfa_localemgr.h" #include "xfa/fxfa/parser/xfa_object.h" -#include "xfa/fxfa/parser/xfa_parser_imp.h" #include "xfa/fxfa/parser/xfa_script.h" #include "xfa/fxfa/parser/xfa_script_imp.h" #include "xfa/fxfa/parser/xfa_utils.h" |