From bd9237eb346946b0caa291504c3a5f54e9b1bb3f Mon Sep 17 00:00:00 2001 From: Dan Sinclair Date: Thu, 30 Mar 2017 16:49:42 -0400 Subject: Move CXFA_XMLParser to CFDE_XMLParser MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is nothing XFA specific in the CXFA_XMLParser. This Cl moves it to the CFDE_XMLParser and co-locates with the other XML code. Change-Id: I86c12da3f6f5732be54b8019562978f88234e2fc Reviewed-on: https://pdfium-review.googlesource.com/3432 Reviewed-by: Nicolás Peña Commit-Queue: dsinclair --- xfa/fde/xml/cfde_xml_parser.cpp | 167 ++++++++++++++++++++++++++++++++++++++++ xfa/fde/xml/cfde_xml_parser.h | 43 +++++++++++ xfa/fde/xml/fde_xml_imp.cpp | 31 +------- xfa/fde/xml/fde_xml_imp.h | 13 +--- 4 files changed, 215 insertions(+), 39 deletions(-) create mode 100644 xfa/fde/xml/cfde_xml_parser.cpp create mode 100644 xfa/fde/xml/cfde_xml_parser.h (limited to 'xfa/fde/xml') diff --git a/xfa/fde/xml/cfde_xml_parser.cpp b/xfa/fde/xml/cfde_xml_parser.cpp new file mode 100644 index 0000000000..840c34a5c4 --- /dev/null +++ b/xfa/fde/xml/cfde_xml_parser.cpp @@ -0,0 +1,167 @@ +// Copyright 2016 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#include "xfa/fde/xml/cfde_xml_parser.h" + +#include "third_party/base/ptr_util.h" + +CFDE_XMLParser::CFDE_XMLParser(CFDE_XMLNode* pParent, + const CFX_RetainPtr& pStream) + : m_nElementStart(0), + m_dwCheckStatus(0), + m_dwCurrentCheckStatus(0), + m_pStream(pStream), + m_pParser(pdfium::MakeUnique()), + m_pParent(pParent), + m_pChild(nullptr), + m_syntaxParserResult(FDE_XmlSyntaxResult::None) { + ASSERT(m_pParent && m_pStream); + m_NodeStack.push(m_pParent); + m_pParser->Init(m_pStream, 32 * 1024, 1024 * 1024); +} + +CFDE_XMLParser::~CFDE_XMLParser() {} + +int32_t CFDE_XMLParser::DoParser(IFX_Pause* pPause) { + if (m_syntaxParserResult == FDE_XmlSyntaxResult::Error) + return -1; + if (m_syntaxParserResult == FDE_XmlSyntaxResult::EndOfString) + return 100; + + int32_t iCount = 0; + while (true) { + m_syntaxParserResult = m_pParser->DoSyntaxParse(); + switch (m_syntaxParserResult) { + case FDE_XmlSyntaxResult::InstructionOpen: + break; + case FDE_XmlSyntaxResult::InstructionClose: + if (m_pChild) { + if (m_pChild->GetType() != FDE_XMLNODE_Instruction) { + m_syntaxParserResult = FDE_XmlSyntaxResult::Error; + break; + } + } + m_pChild = m_pParent; + break; + case FDE_XmlSyntaxResult::ElementOpen: + if (m_dwCheckStatus != 0x03 && m_NodeStack.size() == 2) + m_nElementStart = m_pParser->GetCurrentPos() - 1; + break; + case FDE_XmlSyntaxResult::ElementBreak: + break; + case FDE_XmlSyntaxResult::ElementClose: + if (m_pChild->GetType() != FDE_XMLNODE_Element) { + m_syntaxParserResult = FDE_XmlSyntaxResult::Error; + break; + } + m_pParser->GetTagName(m_ws1); + static_cast(m_pChild)->GetTagName(m_ws2); + if (m_ws1.GetLength() > 0 && m_ws1 != m_ws2) { + m_syntaxParserResult = FDE_XmlSyntaxResult::Error; + break; + } + if (!m_NodeStack.empty()) + m_NodeStack.pop(); + if (m_NodeStack.empty()) { + m_syntaxParserResult = FDE_XmlSyntaxResult::Error; + break; + } else if (m_dwCurrentCheckStatus != 0 && m_NodeStack.size() == 2) { + m_nSize[m_dwCurrentCheckStatus - 1] = + m_pParser->GetCurrentBinaryPos() - + m_nStart[m_dwCurrentCheckStatus - 1]; + m_dwCurrentCheckStatus = 0; + } + m_pParent = m_NodeStack.top(); + m_pChild = m_pParent; + iCount++; + break; + case FDE_XmlSyntaxResult::TargetName: + m_pParser->GetTargetName(m_ws1); + if (m_ws1 == L"originalXFAVersion" || m_ws1 == L"acrobat") { + m_pChild = new CFDE_XMLInstruction(m_ws1); + m_pParent->InsertChildNode(m_pChild); + } else { + m_pChild = nullptr; + } + m_ws1.clear(); + break; + case FDE_XmlSyntaxResult::TagName: + m_pParser->GetTagName(m_ws1); + m_pChild = new CFDE_XMLElement(m_ws1); + m_pParent->InsertChildNode(m_pChild); + m_NodeStack.push(m_pChild); + m_pParent = m_pChild; + + if (m_dwCheckStatus != 0x03 && m_NodeStack.size() == 3) { + CFX_WideString wsTag; + static_cast(m_pChild)->GetLocalTagName(wsTag); + if (wsTag == L"template") { + m_dwCheckStatus |= 0x01; + m_dwCurrentCheckStatus = 0x01; + m_nStart[0] = m_pParser->GetCurrentBinaryPos() - + (m_pParser->GetCurrentPos() - m_nElementStart); + } else if (wsTag == L"datasets") { + m_dwCheckStatus |= 0x02; + m_dwCurrentCheckStatus = 0x02; + m_nStart[1] = m_pParser->GetCurrentBinaryPos() - + (m_pParser->GetCurrentPos() - m_nElementStart); + } + } + break; + case FDE_XmlSyntaxResult::AttriName: + m_pParser->GetAttributeName(m_ws1); + break; + case FDE_XmlSyntaxResult::AttriValue: + if (m_pChild) { + m_pParser->GetAttributeName(m_ws2); + if (m_pChild->GetType() == FDE_XMLNODE_Element) { + static_cast(m_pChild)->SetString(m_ws1, m_ws2); + } + } + m_ws1.clear(); + break; + case FDE_XmlSyntaxResult::Text: + m_pParser->GetTextData(m_ws1); + m_pChild = new CFDE_XMLText(m_ws1); + m_pParent->InsertChildNode(m_pChild); + m_pChild = m_pParent; + break; + case FDE_XmlSyntaxResult::CData: + m_pParser->GetTextData(m_ws1); + m_pChild = new CFDE_XMLCharData(m_ws1); + m_pParent->InsertChildNode(m_pChild); + m_pChild = m_pParent; + break; + case FDE_XmlSyntaxResult::TargetData: + if (m_pChild) { + if (m_pChild->GetType() != FDE_XMLNODE_Instruction) { + m_syntaxParserResult = FDE_XmlSyntaxResult::Error; + break; + } + if (!m_ws1.IsEmpty()) { + static_cast(m_pChild)->AppendData(m_ws1); + } + m_pParser->GetTargetData(m_ws1); + static_cast(m_pChild)->AppendData(m_ws1); + } + m_ws1.clear(); + break; + default: + break; + } + if (m_syntaxParserResult == FDE_XmlSyntaxResult::Error || + m_syntaxParserResult == FDE_XmlSyntaxResult::EndOfString) { + break; + } + if (pPause && iCount > 500 && pPause->NeedToPauseNow()) { + break; + } + } + return (m_syntaxParserResult == FDE_XmlSyntaxResult::Error || + m_NodeStack.size() != 1) + ? -1 + : m_pParser->GetStatus(); +} diff --git a/xfa/fde/xml/cfde_xml_parser.h b/xfa/fde/xml/cfde_xml_parser.h new file mode 100644 index 0000000000..cd8ccbe389 --- /dev/null +++ b/xfa/fde/xml/cfde_xml_parser.h @@ -0,0 +1,43 @@ +// Copyright 2016 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#ifndef XFA_FDE_XML_CFDE_XML_PARSER_H_ +#define XFA_FDE_XML_CFDE_XML_PARSER_H_ + +#include +#include + +#include "xfa/fde/xml/fde_xml_imp.h" + +class IFGAS_Stream; +class IFX_Pause; + +class CFDE_XMLParser { + public: + CFDE_XMLParser(CFDE_XMLNode* pParent, + const CFX_RetainPtr& pStream); + ~CFDE_XMLParser(); + + int32_t DoParser(IFX_Pause* pPause); + + FX_FILESIZE m_nStart[2]; + size_t m_nSize[2]; + FX_FILESIZE m_nElementStart; + uint16_t m_dwCheckStatus; + uint16_t m_dwCurrentCheckStatus; + + private: + CFX_RetainPtr m_pStream; + std::unique_ptr m_pParser; + CFDE_XMLNode* m_pParent; + CFDE_XMLNode* m_pChild; + std::stack m_NodeStack; + CFX_WideString m_ws1; + CFX_WideString m_ws2; + FDE_XmlSyntaxResult m_syntaxParserResult; +}; + +#endif // XFA_FDE_XML_CFDE_XML_PARSER_H_ diff --git a/xfa/fde/xml/fde_xml_imp.cpp b/xfa/fde/xml/fde_xml_imp.cpp index ade8fc0557..68a4978209 100644 --- a/xfa/fde/xml/fde_xml_imp.cpp +++ b/xfa/fde/xml/fde_xml_imp.cpp @@ -13,6 +13,7 @@ #include "core/fxcrt/fx_safe_types.h" #include "third_party/base/ptr_util.h" #include "third_party/base/stl_util.h" +#include "xfa/fde/xml/cfde_xml_parser.h" #include "xfa/fgas/crt/fgas_codepage.h" namespace { @@ -917,7 +918,7 @@ CFDE_XMLDoc::CFDE_XMLDoc() CFDE_XMLDoc::~CFDE_XMLDoc() {} -bool CFDE_XMLDoc::LoadXML(std::unique_ptr pXMLParser) { +bool CFDE_XMLDoc::LoadXML(std::unique_ptr pXMLParser) { if (!pXMLParser) return false; @@ -1052,34 +1053,6 @@ void CFDE_XMLDoc::SaveXMLNode(const CFX_RetainPtr& pXMLStream, } } -void CFDE_XMLDoc::SaveXML(CFX_RetainPtr& pXMLStream, - bool bSaveBOM) { - if (!pXMLStream || pXMLStream == m_pStream) { - m_pStream->Seek(FX_STREAMSEEK_Begin, 0); - pXMLStream = m_pStream; - } - ASSERT((pXMLStream->GetAccessModes() & FX_STREAMACCESS_Text) != 0); - ASSERT((pXMLStream->GetAccessModes() & FX_STREAMACCESS_Write) != 0); - uint16_t wCodePage = pXMLStream->GetCodePage(); - if (wCodePage != FX_CODEPAGE_UTF16LE && wCodePage != FX_CODEPAGE_UTF16BE && - wCodePage != FX_CODEPAGE_UTF8) { - wCodePage = FX_CODEPAGE_UTF8; - pXMLStream->SetCodePage(wCodePage); - } - if (bSaveBOM) { - pXMLStream->WriteString(L"\xFEFF", 1); - } - CFDE_XMLNode* pNode = m_pRoot->m_pChild; - while (pNode) { - SaveXMLNode(pXMLStream, static_cast(pNode)); - pNode = pNode->m_pNext; - } - if (pXMLStream == m_pStream) { - int32_t iPos = pXMLStream->GetPosition(); - pXMLStream->SetLength(iPos); - } -} - CFDE_BlockBuffer::CFDE_BlockBuffer(int32_t iAllocStep) : m_iDataLength(0), m_iBufferSize(0), diff --git a/xfa/fde/xml/fde_xml_imp.h b/xfa/fde/xml/fde_xml_imp.h index 09cdcbacdf..bd88da06cc 100644 --- a/xfa/fde/xml/fde_xml_imp.h +++ b/xfa/fde/xml/fde_xml_imp.h @@ -22,8 +22,8 @@ class CFDE_XMLElement; class CFDE_XMLText; class CFDE_XMLDoc; class CFDE_XMLDOMParser; +class CFDE_XMLParser; class CFDE_XMLSyntaxParser; -class IFDE_XMLParser; class CFDE_XMLNode { public: @@ -191,27 +191,20 @@ class CFDE_XMLDoc { CFDE_XMLDoc(); ~CFDE_XMLDoc(); - bool LoadXML(std::unique_ptr pXMLParser); + bool LoadXML(std::unique_ptr pXMLParser); int32_t DoLoad(IFX_Pause* pPause = nullptr); void CloseXML(); CFDE_XMLNode* GetRoot() const { return m_pRoot.get(); } - void SaveXML(CFX_RetainPtr& pXMLStream, bool bSaveBOM = true); void SaveXMLNode(const CFX_RetainPtr& pXMLStream, CFDE_XMLNode* pNode); private: int32_t m_iStatus; std::unique_ptr m_pRoot; - std::unique_ptr m_pXMLParser; + std::unique_ptr m_pXMLParser; CFX_RetainPtr m_pStream; }; -class IFDE_XMLParser { - public: - virtual ~IFDE_XMLParser() {} - virtual int32_t DoParser(IFX_Pause* pPause) = 0; -}; - class CFDE_BlockBuffer { public: explicit CFDE_BlockBuffer(int32_t iAllocStep = 1024 * 1024); -- cgit v1.2.3