From ac35589d5d1a593497cf28d91ab6a236f25833c3 Mon Sep 17 00:00:00 2001 From: Dan Sinclair Date: Mon, 3 Apr 2017 16:46:21 -0400 Subject: Split the FDE XML file into indiviual class files. This Cl splits the fde_xml_int file apart into individual class files. Includes are fixed as needed. fde_xml.h is also removed and the needed defines moved to more appropiate places. Change-Id: I29774dabc4d0fb2d5092fcbbe7853f03401b6ec7 Reviewed-on: https://pdfium-review.googlesource.com/3616 Commit-Queue: dsinclair Reviewed-by: Tom Sepez --- BUILD.gn | 26 +- core/fxcrt/cfx_blockbuffer.cpp | 140 ++ core/fxcrt/cfx_blockbuffer.h | 54 + testing/libfuzzer/pdf_xml_fuzzer.cc | 5 +- xfa/fde/xml/cfde_xml_parser.cpp | 167 --- xfa/fde/xml/cfde_xml_parser.h | 43 - xfa/fde/xml/cfde_xmlchardata.cpp | 21 + xfa/fde/xml/cfde_xmlchardata.h | 29 + xfa/fde/xml/cfde_xmldeclaration.h | 18 + xfa/fde/xml/cfde_xmldoc.cpp | 161 ++ xfa/fde/xml/cfde_xmldoc.h | 36 + xfa/fde/xml/cfde_xmlelement.cpp | 223 +++ xfa/fde/xml/cfde_xmlelement.h | 56 + xfa/fde/xml/cfde_xmlinstruction.cpp | 160 ++ xfa/fde/xml/cfde_xmlinstruction.h | 50 + xfa/fde/xml/cfde_xmlnode.cpp | 458 ++++++ xfa/fde/xml/cfde_xmlnode.h | 74 + xfa/fde/xml/cfde_xmlparser.cpp | 173 +++ xfa/fde/xml/cfde_xmlparser.h | 47 + xfa/fde/xml/cfde_xmlsyntaxparser.cpp | 703 +++++++++ xfa/fde/xml/cfde_xmlsyntaxparser.h | 128 ++ xfa/fde/xml/cfde_xmlsyntaxparser_unittest.cpp | 632 ++++++++ xfa/fde/xml/cfde_xmltext.cpp | 21 + xfa/fde/xml/cfde_xmltext.h | 28 + xfa/fde/xml/fde_xml.h | 45 - xfa/fde/xml/fde_xml_imp.cpp | 1832 ----------------------- xfa/fde/xml/fde_xml_imp.h | 335 ----- xfa/fde/xml/fde_xml_imp_unittest.cpp | 632 -------- xfa/fxfa/app/cxfa_textlayout.cpp | 4 +- xfa/fxfa/app/cxfa_textparser.cpp | 2 + xfa/fxfa/app/xfa_ffwidgetacc.cpp | 3 +- xfa/fxfa/cxfa_ffdoc.cpp | 3 +- xfa/fxfa/cxfa_widgetacc.cpp | 3 +- xfa/fxfa/parser/cxfa_dataexporter.cpp | 27 +- xfa/fxfa/parser/cxfa_dataimporter.cpp | 2 +- xfa/fxfa/parser/cxfa_document_parser.cpp | 1 + xfa/fxfa/parser/cxfa_node.cpp | 4 +- xfa/fxfa/parser/cxfa_simple_parser.cpp | 8 +- xfa/fxfa/parser/cxfa_simple_parser.h | 4 +- xfa/fxfa/parser/xfa_document_datamerger_imp.cpp | 3 +- xfa/fxfa/parser/xfa_utils.cpp | 5 +- xfa/fxfa/parser/xfa_utils.h | 1 - 42 files changed, 3292 insertions(+), 3075 deletions(-) create mode 100644 core/fxcrt/cfx_blockbuffer.cpp create mode 100644 core/fxcrt/cfx_blockbuffer.h delete mode 100644 xfa/fde/xml/cfde_xml_parser.cpp delete mode 100644 xfa/fde/xml/cfde_xml_parser.h create mode 100644 xfa/fde/xml/cfde_xmlchardata.cpp create mode 100644 xfa/fde/xml/cfde_xmlchardata.h create mode 100644 xfa/fde/xml/cfde_xmldeclaration.h create mode 100644 xfa/fde/xml/cfde_xmldoc.cpp create mode 100644 xfa/fde/xml/cfde_xmldoc.h create mode 100644 xfa/fde/xml/cfde_xmlelement.cpp create mode 100644 xfa/fde/xml/cfde_xmlelement.h create mode 100644 xfa/fde/xml/cfde_xmlinstruction.cpp create mode 100644 xfa/fde/xml/cfde_xmlinstruction.h create mode 100644 xfa/fde/xml/cfde_xmlnode.cpp create mode 100644 xfa/fde/xml/cfde_xmlnode.h create mode 100644 xfa/fde/xml/cfde_xmlparser.cpp create mode 100644 xfa/fde/xml/cfde_xmlparser.h create mode 100644 xfa/fde/xml/cfde_xmlsyntaxparser.cpp create mode 100644 xfa/fde/xml/cfde_xmlsyntaxparser.h create mode 100644 xfa/fde/xml/cfde_xmlsyntaxparser_unittest.cpp create mode 100644 xfa/fde/xml/cfde_xmltext.cpp create mode 100644 xfa/fde/xml/cfde_xmltext.h delete mode 100644 xfa/fde/xml/fde_xml.h delete mode 100644 xfa/fde/xml/fde_xml_imp.cpp delete mode 100644 xfa/fde/xml/fde_xml_imp.h delete mode 100644 xfa/fde/xml/fde_xml_imp_unittest.cpp diff --git a/BUILD.gn b/BUILD.gn index fa879d3e03..542ae1bf71 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -809,6 +809,8 @@ static_library("fxcrt") { if (pdf_enable_xfa) { sources += [ + "core/fxcrt/cfx_blockbuffer.cpp", + "core/fxcrt/cfx_blockbuffer.h", "core/fxcrt/cfx_char.cpp", "core/fxcrt/cfx_char.h", "core/fxcrt/cfx_chariter.cpp", @@ -1402,11 +1404,23 @@ if (pdf_enable_xfa) { "xfa/fde/ifde_txtedtdorecord.h", "xfa/fde/ifde_txtedtengine.h", "xfa/fde/ifde_txtedtpage.h", - "xfa/fde/xml/cfde_xml_parser.cpp", - "xfa/fde/xml/cfde_xml_parser.h", - "xfa/fde/xml/fde_xml.h", - "xfa/fde/xml/fde_xml_imp.cpp", - "xfa/fde/xml/fde_xml_imp.h", + "xfa/fde/xml/cfde_xmlchardata.cpp", + "xfa/fde/xml/cfde_xmlchardata.h", + "xfa/fde/xml/cfde_xmldeclaration.h", + "xfa/fde/xml/cfde_xmldoc.cpp", + "xfa/fde/xml/cfde_xmldoc.h", + "xfa/fde/xml/cfde_xmlelement.cpp", + "xfa/fde/xml/cfde_xmlelement.h", + "xfa/fde/xml/cfde_xmlinstruction.cpp", + "xfa/fde/xml/cfde_xmlinstruction.h", + "xfa/fde/xml/cfde_xmlnode.cpp", + "xfa/fde/xml/cfde_xmlnode.h", + "xfa/fde/xml/cfde_xmlparser.cpp", + "xfa/fde/xml/cfde_xmlparser.h", + "xfa/fde/xml/cfde_xmlsyntaxparser.cpp", + "xfa/fde/xml/cfde_xmlsyntaxparser.h", + "xfa/fde/xml/cfde_xmltext.cpp", + "xfa/fde/xml/cfde_xmltext.h", "xfa/fgas/crt/cfgas_formatstring.cpp", "xfa/fgas/crt/cfgas_formatstring.h", "xfa/fgas/crt/fgas_codepage.cpp", @@ -1873,7 +1887,7 @@ test("pdfium_unittests") { "xfa/fde/css/cfde_cssdeclaration_unittest.cpp", "xfa/fde/css/cfde_cssstylesheet_unittest.cpp", "xfa/fde/css/cfde_cssvaluelistparser_unittest.cpp", - "xfa/fde/xml/fde_xml_imp_unittest.cpp", + "xfa/fde/xml/cfde_xmlsyntaxparser_unittest.cpp", "xfa/fgas/layout/fgas_rtfbreak_unittest.cpp", "xfa/fxfa/app/cxfa_textparser_unittest.cpp", "xfa/fxfa/cxfa_ffapp_unitest.cpp", diff --git a/core/fxcrt/cfx_blockbuffer.cpp b/core/fxcrt/cfx_blockbuffer.cpp new file mode 100644 index 0000000000..354f415282 --- /dev/null +++ b/core/fxcrt/cfx_blockbuffer.cpp @@ -0,0 +1,140 @@ +// Copyright 2017 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#include "core/fxcrt/cfx_blockbuffer.h" + +#include + +#include "third_party/base/stl_util.h" + +CFX_BlockBuffer::CFX_BlockBuffer(int32_t iAllocStep) + : m_iDataLength(0), + m_iBufferSize(0), + m_iAllocStep(iAllocStep), + m_iStartPosition(0) {} + +CFX_BlockBuffer::~CFX_BlockBuffer() { + ClearBuffer(); +} + +wchar_t* CFX_BlockBuffer::GetAvailableBlock(int32_t& iIndexInBlock) { + iIndexInBlock = 0; + if (m_BlockArray.empty()) + return nullptr; + + int32_t iRealIndex = m_iStartPosition + m_iDataLength; + if (iRealIndex == m_iBufferSize) { + m_BlockArray.emplace_back(FX_Alloc(wchar_t, m_iAllocStep)); + m_iBufferSize += m_iAllocStep; + return m_BlockArray.back().get(); + } + iIndexInBlock = iRealIndex % m_iAllocStep; + return m_BlockArray[iRealIndex / m_iAllocStep].get(); +} + +bool CFX_BlockBuffer::InitBuffer(int32_t iBufferSize) { + ClearBuffer(); + int32_t iNumOfBlock = (iBufferSize - 1) / m_iAllocStep + 1; + for (int32_t i = 0; i < iNumOfBlock; i++) + m_BlockArray.emplace_back(FX_Alloc(wchar_t, m_iAllocStep)); + + m_iBufferSize = iNumOfBlock * m_iAllocStep; + return true; +} + +void CFX_BlockBuffer::SetTextChar(int32_t iIndex, wchar_t ch) { + if (iIndex < 0) { + return; + } + int32_t iRealIndex = m_iStartPosition + iIndex; + int32_t iBlockIndex = iRealIndex / m_iAllocStep; + int32_t iInnerIndex = iRealIndex % m_iAllocStep; + int32_t iBlockSize = pdfium::CollectionSize(m_BlockArray); + if (iBlockIndex >= iBlockSize) { + int32_t iNewBlocks = iBlockIndex - iBlockSize + 1; + do { + m_BlockArray.emplace_back(FX_Alloc(wchar_t, m_iAllocStep)); + m_iBufferSize += m_iAllocStep; + } while (--iNewBlocks); + } + wchar_t* pTextData = m_BlockArray[iBlockIndex].get(); + pTextData[iInnerIndex] = ch; + m_iDataLength = std::max(m_iDataLength, iIndex + 1); +} + +int32_t CFX_BlockBuffer::DeleteTextChars(int32_t iCount, bool bDirection) { + if (iCount <= 0) + return m_iDataLength; + + if (iCount >= m_iDataLength) { + Reset(false); + return 0; + } + if (bDirection) { + m_iStartPosition += iCount; + m_iDataLength -= iCount; + } else { + m_iDataLength -= iCount; + } + return m_iDataLength; +} + +void CFX_BlockBuffer::GetTextData(CFX_WideString& wsTextData, + int32_t iStart, + int32_t iLength) const { + wsTextData.clear(); + int32_t iMaybeDataLength = m_iBufferSize - 1 - m_iStartPosition; + if (iStart < 0 || iStart > iMaybeDataLength) { + return; + } + if (iLength == -1 || iLength > iMaybeDataLength) { + iLength = iMaybeDataLength; + } + if (iLength <= 0) { + return; + } + wchar_t* pBuf = wsTextData.GetBuffer(iLength); + if (!pBuf) { + return; + } + int32_t iStartBlockIndex = 0; + int32_t iStartInnerIndex = 0; + TextDataIndex2BufIndex(iStart, iStartBlockIndex, iStartInnerIndex); + int32_t iEndBlockIndex = 0; + int32_t iEndInnerIndex = 0; + TextDataIndex2BufIndex(iStart + iLength, iEndBlockIndex, iEndInnerIndex); + int32_t iPointer = 0; + for (int32_t i = iStartBlockIndex; i <= iEndBlockIndex; i++) { + int32_t iBufferPointer = 0; + int32_t iCopyLength = m_iAllocStep; + if (i == iStartBlockIndex) { + iCopyLength -= iStartInnerIndex; + iBufferPointer = iStartInnerIndex; + } + if (i == iEndBlockIndex) { + iCopyLength -= ((m_iAllocStep - 1) - iEndInnerIndex); + } + wchar_t* pBlockBuf = m_BlockArray[i].get(); + memcpy(pBuf + iPointer, pBlockBuf + iBufferPointer, + iCopyLength * sizeof(wchar_t)); + iPointer += iCopyLength; + } + wsTextData.ReleaseBuffer(iLength); +} + +void CFX_BlockBuffer::TextDataIndex2BufIndex(const int32_t iIndex, + int32_t& iBlockIndex, + int32_t& iInnerIndex) const { + ASSERT(iIndex >= 0); + int32_t iRealIndex = m_iStartPosition + iIndex; + iBlockIndex = iRealIndex / m_iAllocStep; + iInnerIndex = iRealIndex % m_iAllocStep; +} + +void CFX_BlockBuffer::ClearBuffer() { + m_iBufferSize = 0; + m_BlockArray.clear(); +} diff --git a/core/fxcrt/cfx_blockbuffer.h b/core/fxcrt/cfx_blockbuffer.h new file mode 100644 index 0000000000..dbf01a938a --- /dev/null +++ b/core/fxcrt/cfx_blockbuffer.h @@ -0,0 +1,54 @@ +// Copyright 2017 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#ifndef CORE_FXCRT_CFX_BLOCKBUFFER_H_ +#define CORE_FXCRT_CFX_BLOCKBUFFER_H_ + +#include + +#include +#include + +#include "core/fxcrt/fx_string.h" + +class CFX_BlockBuffer { + public: + explicit CFX_BlockBuffer(int32_t iAllocStep = 1024 * 1024); + ~CFX_BlockBuffer(); + + bool InitBuffer(int32_t iBufferSize = 1024 * 1024); + bool IsInitialized() { return m_iBufferSize / m_iAllocStep >= 1; } + + wchar_t* GetAvailableBlock(int32_t& iIndexInBlock); + inline int32_t GetAllocStep() const { return m_iAllocStep; } + inline int32_t& GetDataLengthRef() { return m_iDataLength; } + + inline void Reset(bool bReserveData = true) { + if (!bReserveData) + m_iStartPosition = 0; + m_iDataLength = 0; + } + + void SetTextChar(int32_t iIndex, wchar_t ch); + int32_t DeleteTextChars(int32_t iCount, bool bDirection = true); + void GetTextData(CFX_WideString& wsTextData, + int32_t iStart = 0, + int32_t iLength = -1) const; + + private: + inline void TextDataIndex2BufIndex(const int32_t iIndex, + int32_t& iBlockIndex, + int32_t& iInnerIndex) const; + void ClearBuffer(); + + std::vector> m_BlockArray; + int32_t m_iDataLength; + int32_t m_iBufferSize; + int32_t m_iAllocStep; + int32_t m_iStartPosition; +}; + +#endif // CORE_FXCRT_CFX_BLOCKBUFFER_H_ diff --git a/testing/libfuzzer/pdf_xml_fuzzer.cc b/testing/libfuzzer/pdf_xml_fuzzer.cc index e255f96f6e..13eda60d77 100644 --- a/testing/libfuzzer/pdf_xml_fuzzer.cc +++ b/testing/libfuzzer/pdf_xml_fuzzer.cc @@ -10,8 +10,9 @@ #include "core/fxcrt/fx_safe_types.h" #include "core/fxcrt/fx_system.h" #include "third_party/base/ptr_util.h" -#include "xfa/fde/xml/cfde_xml_parser.h" -#include "xfa/fde/xml/fde_xml_imp.h" +#include "xfa/fde/xml/cfde_xmldoc.h" +#include "xfa/fde/xml/cfde_xmlnode.h" +#include "xfa/fde/xml/cfde_xmlparser.h" #include "xfa/fxfa/parser/cxfa_widetextread.h" namespace { diff --git a/xfa/fde/xml/cfde_xml_parser.cpp b/xfa/fde/xml/cfde_xml_parser.cpp deleted file mode 100644 index 840c34a5c4..0000000000 --- a/xfa/fde/xml/cfde_xml_parser.cpp +++ /dev/null @@ -1,167 +0,0 @@ -// Copyright 2016 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#include "xfa/fde/xml/cfde_xml_parser.h" - -#include "third_party/base/ptr_util.h" - -CFDE_XMLParser::CFDE_XMLParser(CFDE_XMLNode* pParent, - const CFX_RetainPtr& pStream) - : m_nElementStart(0), - m_dwCheckStatus(0), - m_dwCurrentCheckStatus(0), - m_pStream(pStream), - m_pParser(pdfium::MakeUnique()), - m_pParent(pParent), - m_pChild(nullptr), - m_syntaxParserResult(FDE_XmlSyntaxResult::None) { - ASSERT(m_pParent && m_pStream); - m_NodeStack.push(m_pParent); - m_pParser->Init(m_pStream, 32 * 1024, 1024 * 1024); -} - -CFDE_XMLParser::~CFDE_XMLParser() {} - -int32_t CFDE_XMLParser::DoParser(IFX_Pause* pPause) { - if (m_syntaxParserResult == FDE_XmlSyntaxResult::Error) - return -1; - if (m_syntaxParserResult == FDE_XmlSyntaxResult::EndOfString) - return 100; - - int32_t iCount = 0; - while (true) { - m_syntaxParserResult = m_pParser->DoSyntaxParse(); - switch (m_syntaxParserResult) { - case FDE_XmlSyntaxResult::InstructionOpen: - break; - case FDE_XmlSyntaxResult::InstructionClose: - if (m_pChild) { - if (m_pChild->GetType() != FDE_XMLNODE_Instruction) { - m_syntaxParserResult = FDE_XmlSyntaxResult::Error; - break; - } - } - m_pChild = m_pParent; - break; - case FDE_XmlSyntaxResult::ElementOpen: - if (m_dwCheckStatus != 0x03 && m_NodeStack.size() == 2) - m_nElementStart = m_pParser->GetCurrentPos() - 1; - break; - case FDE_XmlSyntaxResult::ElementBreak: - break; - case FDE_XmlSyntaxResult::ElementClose: - if (m_pChild->GetType() != FDE_XMLNODE_Element) { - m_syntaxParserResult = FDE_XmlSyntaxResult::Error; - break; - } - m_pParser->GetTagName(m_ws1); - static_cast(m_pChild)->GetTagName(m_ws2); - if (m_ws1.GetLength() > 0 && m_ws1 != m_ws2) { - m_syntaxParserResult = FDE_XmlSyntaxResult::Error; - break; - } - if (!m_NodeStack.empty()) - m_NodeStack.pop(); - if (m_NodeStack.empty()) { - m_syntaxParserResult = FDE_XmlSyntaxResult::Error; - break; - } else if (m_dwCurrentCheckStatus != 0 && m_NodeStack.size() == 2) { - m_nSize[m_dwCurrentCheckStatus - 1] = - m_pParser->GetCurrentBinaryPos() - - m_nStart[m_dwCurrentCheckStatus - 1]; - m_dwCurrentCheckStatus = 0; - } - m_pParent = m_NodeStack.top(); - m_pChild = m_pParent; - iCount++; - break; - case FDE_XmlSyntaxResult::TargetName: - m_pParser->GetTargetName(m_ws1); - if (m_ws1 == L"originalXFAVersion" || m_ws1 == L"acrobat") { - m_pChild = new CFDE_XMLInstruction(m_ws1); - m_pParent->InsertChildNode(m_pChild); - } else { - m_pChild = nullptr; - } - m_ws1.clear(); - break; - case FDE_XmlSyntaxResult::TagName: - m_pParser->GetTagName(m_ws1); - m_pChild = new CFDE_XMLElement(m_ws1); - m_pParent->InsertChildNode(m_pChild); - m_NodeStack.push(m_pChild); - m_pParent = m_pChild; - - if (m_dwCheckStatus != 0x03 && m_NodeStack.size() == 3) { - CFX_WideString wsTag; - static_cast(m_pChild)->GetLocalTagName(wsTag); - if (wsTag == L"template") { - m_dwCheckStatus |= 0x01; - m_dwCurrentCheckStatus = 0x01; - m_nStart[0] = m_pParser->GetCurrentBinaryPos() - - (m_pParser->GetCurrentPos() - m_nElementStart); - } else if (wsTag == L"datasets") { - m_dwCheckStatus |= 0x02; - m_dwCurrentCheckStatus = 0x02; - m_nStart[1] = m_pParser->GetCurrentBinaryPos() - - (m_pParser->GetCurrentPos() - m_nElementStart); - } - } - break; - case FDE_XmlSyntaxResult::AttriName: - m_pParser->GetAttributeName(m_ws1); - break; - case FDE_XmlSyntaxResult::AttriValue: - if (m_pChild) { - m_pParser->GetAttributeName(m_ws2); - if (m_pChild->GetType() == FDE_XMLNODE_Element) { - static_cast(m_pChild)->SetString(m_ws1, m_ws2); - } - } - m_ws1.clear(); - break; - case FDE_XmlSyntaxResult::Text: - m_pParser->GetTextData(m_ws1); - m_pChild = new CFDE_XMLText(m_ws1); - m_pParent->InsertChildNode(m_pChild); - m_pChild = m_pParent; - break; - case FDE_XmlSyntaxResult::CData: - m_pParser->GetTextData(m_ws1); - m_pChild = new CFDE_XMLCharData(m_ws1); - m_pParent->InsertChildNode(m_pChild); - m_pChild = m_pParent; - break; - case FDE_XmlSyntaxResult::TargetData: - if (m_pChild) { - if (m_pChild->GetType() != FDE_XMLNODE_Instruction) { - m_syntaxParserResult = FDE_XmlSyntaxResult::Error; - break; - } - if (!m_ws1.IsEmpty()) { - static_cast(m_pChild)->AppendData(m_ws1); - } - m_pParser->GetTargetData(m_ws1); - static_cast(m_pChild)->AppendData(m_ws1); - } - m_ws1.clear(); - break; - default: - break; - } - if (m_syntaxParserResult == FDE_XmlSyntaxResult::Error || - m_syntaxParserResult == FDE_XmlSyntaxResult::EndOfString) { - break; - } - if (pPause && iCount > 500 && pPause->NeedToPauseNow()) { - break; - } - } - return (m_syntaxParserResult == FDE_XmlSyntaxResult::Error || - m_NodeStack.size() != 1) - ? -1 - : m_pParser->GetStatus(); -} diff --git a/xfa/fde/xml/cfde_xml_parser.h b/xfa/fde/xml/cfde_xml_parser.h deleted file mode 100644 index cd8ccbe389..0000000000 --- a/xfa/fde/xml/cfde_xml_parser.h +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright 2016 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#ifndef XFA_FDE_XML_CFDE_XML_PARSER_H_ -#define XFA_FDE_XML_CFDE_XML_PARSER_H_ - -#include -#include - -#include "xfa/fde/xml/fde_xml_imp.h" - -class IFGAS_Stream; -class IFX_Pause; - -class CFDE_XMLParser { - public: - CFDE_XMLParser(CFDE_XMLNode* pParent, - const CFX_RetainPtr& pStream); - ~CFDE_XMLParser(); - - int32_t DoParser(IFX_Pause* pPause); - - FX_FILESIZE m_nStart[2]; - size_t m_nSize[2]; - FX_FILESIZE m_nElementStart; - uint16_t m_dwCheckStatus; - uint16_t m_dwCurrentCheckStatus; - - private: - CFX_RetainPtr m_pStream; - std::unique_ptr m_pParser; - CFDE_XMLNode* m_pParent; - CFDE_XMLNode* m_pChild; - std::stack m_NodeStack; - CFX_WideString m_ws1; - CFX_WideString m_ws2; - FDE_XmlSyntaxResult m_syntaxParserResult; -}; - -#endif // XFA_FDE_XML_CFDE_XML_PARSER_H_ diff --git a/xfa/fde/xml/cfde_xmlchardata.cpp b/xfa/fde/xml/cfde_xmlchardata.cpp new file mode 100644 index 0000000000..40e4730dea --- /dev/null +++ b/xfa/fde/xml/cfde_xmlchardata.cpp @@ -0,0 +1,21 @@ +// Copyright 2017 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#include "xfa/fde/xml/cfde_xmlchardata.h" + +CFDE_XMLCharData::CFDE_XMLCharData(const CFX_WideString& wsCData) + : CFDE_XMLDeclaration(), m_wsCharData(wsCData) {} + +CFDE_XMLCharData::~CFDE_XMLCharData() {} + +FDE_XMLNODETYPE CFDE_XMLCharData::GetType() const { + return FDE_XMLNODE_CharData; +} + +CFDE_XMLNode* CFDE_XMLCharData::Clone(bool bRecursive) { + CFDE_XMLCharData* pClone = new CFDE_XMLCharData(m_wsCharData); + return pClone; +} diff --git a/xfa/fde/xml/cfde_xmlchardata.h b/xfa/fde/xml/cfde_xmlchardata.h new file mode 100644 index 0000000000..308ec1721b --- /dev/null +++ b/xfa/fde/xml/cfde_xmlchardata.h @@ -0,0 +1,29 @@ +// Copyright 2017 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#ifndef XFA_FDE_XML_CFDE_XMLCHARDATA_H_ +#define XFA_FDE_XML_CFDE_XMLCHARDATA_H_ + +#include "core/fxcrt/fx_string.h" +#include "xfa/fde/xml/cfde_xmldeclaration.h" + +class CFDE_XMLCharData : public CFDE_XMLDeclaration { + public: + explicit CFDE_XMLCharData(const CFX_WideString& wsCData); + ~CFDE_XMLCharData() override; + + FDE_XMLNODETYPE GetType() const override; + CFDE_XMLNode* Clone(bool bRecursive) override; + + void GetCharData(CFX_WideString& wsCharData) const { + wsCharData = m_wsCharData; + } + void SetCharData(const CFX_WideString& wsCData) { m_wsCharData = wsCData; } + + CFX_WideString m_wsCharData; +}; + +#endif // XFA_FDE_XML_CFDE_XMLCHARDATA_H_ diff --git a/xfa/fde/xml/cfde_xmldeclaration.h b/xfa/fde/xml/cfde_xmldeclaration.h new file mode 100644 index 0000000000..ade66c470f --- /dev/null +++ b/xfa/fde/xml/cfde_xmldeclaration.h @@ -0,0 +1,18 @@ +// Copyright 2017 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#ifndef XFA_FDE_XML_CFDE_XMLDECLARATION_H_ +#define XFA_FDE_XML_CFDE_XMLDECLARATION_H_ + +#include "xfa/fde/xml/cfde_xmlnode.h" + +class CFDE_XMLDeclaration : public CFDE_XMLNode { + public: + CFDE_XMLDeclaration() {} + ~CFDE_XMLDeclaration() override {} +}; + +#endif // XFA_FDE_XML_CFDE_XMLDECLARATION_H_ diff --git a/xfa/fde/xml/cfde_xmldoc.cpp b/xfa/fde/xml/cfde_xmldoc.cpp new file mode 100644 index 0000000000..bc526ae4b3 --- /dev/null +++ b/xfa/fde/xml/cfde_xmldoc.cpp @@ -0,0 +1,161 @@ +// Copyright 2017 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#include "xfa/fde/xml/cfde_xmldoc.h" + +#include +#include + +#include "third_party/base/ptr_util.h" +#include "third_party/base/stl_util.h" +#include "xfa/fde/xml/cfde_xmlchardata.h" +#include "xfa/fde/xml/cfde_xmlelement.h" +#include "xfa/fde/xml/cfde_xmlinstruction.h" +#include "xfa/fde/xml/cfde_xmlnode.h" +#include "xfa/fde/xml/cfde_xmltext.h" +#include "xfa/fgas/crt/fgas_codepage.h" + +CFDE_XMLDoc::CFDE_XMLDoc() + : m_iStatus(0), m_pRoot(pdfium::MakeUnique()) { + m_pRoot->InsertChildNode(new CFDE_XMLInstruction(L"xml")); +} + +CFDE_XMLDoc::~CFDE_XMLDoc() {} + +bool CFDE_XMLDoc::LoadXML(std::unique_ptr pXMLParser) { + if (!pXMLParser) + return false; + + m_iStatus = 0; + m_pStream.Reset(); + m_pRoot->DeleteChildren(); + m_pXMLParser = std::move(pXMLParser); + return true; +} + +int32_t CFDE_XMLDoc::DoLoad(IFX_Pause* pPause) { + if (m_iStatus < 100) + m_iStatus = m_pXMLParser->DoParser(pPause); + + return m_iStatus; +} + +void CFDE_XMLDoc::CloseXML() { + m_pXMLParser.reset(); +} + +void CFDE_XMLDoc::SaveXMLNode(const CFX_RetainPtr& pXMLStream, + CFDE_XMLNode* pINode) { + CFDE_XMLNode* pNode = (CFDE_XMLNode*)pINode; + switch (pNode->GetType()) { + case FDE_XMLNODE_Instruction: { + CFX_WideString ws; + CFDE_XMLInstruction* pInstruction = (CFDE_XMLInstruction*)pNode; + if (pInstruction->m_wsTarget.CompareNoCase(L"xml") == 0) { + ws = L"GetCodePage(); + if (wCodePage == FX_CODEPAGE_UTF16LE) { + ws += L"UTF-16"; + } else if (wCodePage == FX_CODEPAGE_UTF16BE) { + ws += L"UTF-16be"; + } else { + ws += L"UTF-8"; + } + ws += L"\"?>"; + pXMLStream->WriteString(ws.c_str(), ws.GetLength()); + } else { + ws.Format(L"m_wsTarget.c_str()); + pXMLStream->WriteString(ws.c_str(), ws.GetLength()); + std::vector& attributes = pInstruction->m_Attributes; + int32_t i; + int32_t iCount = pdfium::CollectionSize(attributes); + CFX_WideString wsValue; + for (i = 0; i < iCount; i += 2) { + ws = L" "; + ws += attributes[i]; + ws += L"=\""; + wsValue = attributes[i + 1]; + wsValue.Replace(L"&", L"&"); + wsValue.Replace(L"<", L"<"); + wsValue.Replace(L">", L">"); + wsValue.Replace(L"\'", L"'"); + wsValue.Replace(L"\"", L"""); + ws += wsValue; + ws += L"\""; + pXMLStream->WriteString(ws.c_str(), ws.GetLength()); + } + std::vector& targetdata = pInstruction->m_TargetData; + iCount = pdfium::CollectionSize(targetdata); + for (i = 0; i < iCount; i++) { + ws = L" \""; + ws += targetdata[i]; + ws += L"\""; + pXMLStream->WriteString(ws.c_str(), ws.GetLength()); + } + ws = L"?>"; + pXMLStream->WriteString(ws.c_str(), ws.GetLength()); + } + } break; + case FDE_XMLNODE_Element: { + CFX_WideString ws; + ws = L"<"; + ws += ((CFDE_XMLElement*)pNode)->m_wsTag; + pXMLStream->WriteString(ws.c_str(), ws.GetLength()); + std::vector& attributes = + static_cast(pNode)->m_Attributes; + int32_t iCount = pdfium::CollectionSize(attributes); + CFX_WideString wsValue; + for (int32_t i = 0; i < iCount; i += 2) { + ws = L" "; + ws += attributes[i]; + ws += L"=\""; + wsValue = attributes[i + 1]; + wsValue.Replace(L"&", L"&"); + wsValue.Replace(L"<", L"<"); + wsValue.Replace(L">", L">"); + wsValue.Replace(L"\'", L"'"); + wsValue.Replace(L"\"", L"""); + ws += wsValue; + ws += L"\""; + pXMLStream->WriteString(ws.c_str(), ws.GetLength()); + } + if (pNode->m_pChild) { + ws = L"\n>"; + pXMLStream->WriteString(ws.c_str(), ws.GetLength()); + CFDE_XMLNode* pChild = pNode->m_pChild; + while (pChild) { + SaveXMLNode(pXMLStream, static_cast(pChild)); + pChild = pChild->m_pNext; + } + ws = L"m_wsTag; + ws += L"\n>"; + } else { + ws = L"\n/>"; + } + pXMLStream->WriteString(ws.c_str(), ws.GetLength()); + } break; + case FDE_XMLNODE_Text: { + CFX_WideString ws = ((CFDE_XMLText*)pNode)->m_wsText; + ws.Replace(L"&", L"&"); + ws.Replace(L"<", L"<"); + ws.Replace(L">", L">"); + ws.Replace(L"\'", L"'"); + ws.Replace(L"\"", L"""); + pXMLStream->WriteString(ws.c_str(), ws.GetLength()); + } break; + case FDE_XMLNODE_CharData: { + CFX_WideString ws = L"m_wsCharData; + ws += L"]]>"; + pXMLStream->WriteString(ws.c_str(), ws.GetLength()); + } break; + case FDE_XMLNODE_Unknown: + break; + default: + break; + } +} diff --git a/xfa/fde/xml/cfde_xmldoc.h b/xfa/fde/xml/cfde_xmldoc.h new file mode 100644 index 0000000000..3eb07a87e1 --- /dev/null +++ b/xfa/fde/xml/cfde_xmldoc.h @@ -0,0 +1,36 @@ +// Copyright 2017 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#ifndef XFA_FDE_XML_CFDE_XMLDOC_H_ +#define XFA_FDE_XML_CFDE_XMLDOC_H_ + +#include + +#include "core/fxcrt/cfx_retain_ptr.h" +#include "xfa/fde/xml/cfde_xmlnode.h" +#include "xfa/fde/xml/cfde_xmlparser.h" +#include "xfa/fgas/crt/ifgas_stream.h" + +class CFDE_XMLDoc { + public: + CFDE_XMLDoc(); + ~CFDE_XMLDoc(); + + bool LoadXML(std::unique_ptr pXMLParser); + int32_t DoLoad(IFX_Pause* pPause = nullptr); + void CloseXML(); + CFDE_XMLNode* GetRoot() const { return m_pRoot.get(); } + void SaveXMLNode(const CFX_RetainPtr& pXMLStream, + CFDE_XMLNode* pNode); + + private: + int32_t m_iStatus; + std::unique_ptr m_pRoot; + std::unique_ptr m_pXMLParser; + CFX_RetainPtr m_pStream; +}; + +#endif // XFA_FDE_XML_CFDE_XMLDOC_H_ diff --git a/xfa/fde/xml/cfde_xmlelement.cpp b/xfa/fde/xml/cfde_xmlelement.cpp new file mode 100644 index 0000000000..aca27c50e3 --- /dev/null +++ b/xfa/fde/xml/cfde_xmlelement.cpp @@ -0,0 +1,223 @@ +// Copyright 2017 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#include "xfa/fde/xml/cfde_xmlelement.h" + +#include "core/fxcrt/fx_ext.h" +#include "third_party/base/stl_util.h" +#include "xfa/fde/xml/cfde_xmlchardata.h" +#include "xfa/fde/xml/cfde_xmltext.h" + +CFDE_XMLElement::CFDE_XMLElement(const CFX_WideString& wsTag) + : CFDE_XMLNode(), m_wsTag(wsTag), m_Attributes() { + ASSERT(m_wsTag.GetLength() > 0); +} + +CFDE_XMLElement::~CFDE_XMLElement() {} + +FDE_XMLNODETYPE CFDE_XMLElement::GetType() const { + return FDE_XMLNODE_Element; +} + +CFDE_XMLNode* CFDE_XMLElement::Clone(bool bRecursive) { + CFDE_XMLElement* pClone = new CFDE_XMLElement(m_wsTag); + if (!pClone) + return nullptr; + + pClone->m_Attributes = m_Attributes; + if (bRecursive) { + CloneChildren(pClone); + } else { + CFX_WideString wsText; + CFDE_XMLNode* pChild = m_pChild; + while (pChild) { + switch (pChild->GetType()) { + case FDE_XMLNODE_Text: + wsText += ((CFDE_XMLText*)pChild)->m_wsText; + break; + default: + break; + } + pChild = pChild->m_pNext; + } + pClone->SetTextData(wsText); + } + return pClone; +} + +void CFDE_XMLElement::GetTagName(CFX_WideString& wsTag) const { + wsTag = m_wsTag; +} + +void CFDE_XMLElement::GetLocalTagName(CFX_WideString& wsTag) const { + FX_STRSIZE iFind = m_wsTag.Find(L':', 0); + if (iFind < 0) { + wsTag = m_wsTag; + } else { + wsTag = m_wsTag.Right(m_wsTag.GetLength() - iFind - 1); + } +} + +void CFDE_XMLElement::GetNamespacePrefix(CFX_WideString& wsPrefix) const { + FX_STRSIZE iFind = m_wsTag.Find(L':', 0); + if (iFind < 0) { + wsPrefix.clear(); + } else { + wsPrefix = m_wsTag.Left(iFind); + } +} + +void CFDE_XMLElement::GetNamespaceURI(CFX_WideString& wsNamespace) const { + CFX_WideString wsAttri(L"xmlns"), wsPrefix; + GetNamespacePrefix(wsPrefix); + if (wsPrefix.GetLength() > 0) { + wsAttri += L":"; + wsAttri += wsPrefix; + } + wsNamespace.clear(); + CFDE_XMLNode* pNode = (CFDE_XMLNode*)this; + while (pNode) { + if (pNode->GetType() != FDE_XMLNODE_Element) { + break; + } + CFDE_XMLElement* pElement = (CFDE_XMLElement*)pNode; + if (!pElement->HasAttribute(wsAttri.c_str())) { + pNode = pNode->GetNodeItem(CFDE_XMLNode::Parent); + continue; + } + pElement->GetString(wsAttri.c_str(), wsNamespace); + break; + } +} + +int32_t CFDE_XMLElement::CountAttributes() const { + return pdfium::CollectionSize(m_Attributes) / 2; +} + +bool CFDE_XMLElement::GetAttribute(int32_t index, + CFX_WideString& wsAttriName, + CFX_WideString& wsAttriValue) const { + int32_t iCount = pdfium::CollectionSize(m_Attributes); + ASSERT(index > -1 && index < iCount / 2); + for (int32_t i = 0; i < iCount; i += 2) { + if (index == 0) { + wsAttriName = m_Attributes[i]; + wsAttriValue = m_Attributes[i + 1]; + return true; + } + index--; + } + return false; +} + +bool CFDE_XMLElement::HasAttribute(const wchar_t* pwsAttriName) const { + int32_t iCount = pdfium::CollectionSize(m_Attributes); + for (int32_t i = 0; i < iCount; i += 2) { + if (m_Attributes[i].Compare(pwsAttriName) == 0) + return true; + } + return false; +} + +void CFDE_XMLElement::GetString(const wchar_t* pwsAttriName, + CFX_WideString& wsAttriValue, + const wchar_t* pwsDefValue) const { + int32_t iCount = pdfium::CollectionSize(m_Attributes); + for (int32_t i = 0; i < iCount; i += 2) { + if (m_Attributes[i].Compare(pwsAttriName) == 0) { + wsAttriValue = m_Attributes[i + 1]; + return; + } + } + wsAttriValue = pwsDefValue; +} + +void CFDE_XMLElement::SetString(const CFX_WideString& wsAttriName, + const CFX_WideString& wsAttriValue) { + ASSERT(wsAttriName.GetLength() > 0); + int32_t iCount = pdfium::CollectionSize(m_Attributes); + for (int32_t i = 0; i < iCount; i += 2) { + if (m_Attributes[i].Compare(wsAttriName) == 0) { + m_Attributes[i] = wsAttriName; + m_Attributes[i + 1] = wsAttriValue; + return; + } + } + m_Attributes.push_back(wsAttriName); + m_Attributes.push_back(wsAttriValue); +} + +int32_t CFDE_XMLElement::GetInteger(const wchar_t* pwsAttriName, + int32_t iDefValue) const { + int32_t iCount = pdfium::CollectionSize(m_Attributes); + for (int32_t i = 0; i < iCount; i += 2) { + if (m_Attributes[i].Compare(pwsAttriName) == 0) { + return FXSYS_wtoi(m_Attributes[i + 1].c_str()); + } + } + return iDefValue; +} + +void CFDE_XMLElement::SetInteger(const wchar_t* pwsAttriName, + int32_t iAttriValue) { + CFX_WideString wsValue; + wsValue.Format(L"%d", iAttriValue); + SetString(pwsAttriName, wsValue); +} + +float CFDE_XMLElement::GetFloat(const wchar_t* pwsAttriName, + float fDefValue) const { + int32_t iCount = pdfium::CollectionSize(m_Attributes); + for (int32_t i = 0; i < iCount; i += 2) { + if (m_Attributes[i].Compare(pwsAttriName) == 0) { + return FXSYS_wcstof(m_Attributes[i + 1].c_str(), -1, nullptr); + } + } + return fDefValue; +} + +void CFDE_XMLElement::SetFloat(const wchar_t* pwsAttriName, float fAttriValue) { + CFX_WideString wsValue; + wsValue.Format(L"%f", fAttriValue); + SetString(pwsAttriName, wsValue); +} + +void CFDE_XMLElement::RemoveAttribute(const wchar_t* pwsAttriName) { + int32_t iCount = pdfium::CollectionSize(m_Attributes); + for (int32_t i = 0; i < iCount; i += 2) { + if (m_Attributes[i].Compare(pwsAttriName) == 0) { + m_Attributes.erase(m_Attributes.begin() + i, + m_Attributes.begin() + i + 2); + return; + } + } +} + +void CFDE_XMLElement::GetTextData(CFX_WideString& wsText) const { + CFX_WideTextBuf buffer; + CFDE_XMLNode* pChild = m_pChild; + while (pChild) { + switch (pChild->GetType()) { + case FDE_XMLNODE_Text: + buffer << ((CFDE_XMLText*)pChild)->m_wsText; + break; + case FDE_XMLNODE_CharData: + buffer << ((CFDE_XMLCharData*)pChild)->m_wsCharData; + break; + default: + break; + } + pChild = pChild->m_pNext; + } + wsText = buffer.AsStringC(); +} + +void CFDE_XMLElement::SetTextData(const CFX_WideString& wsText) { + if (wsText.GetLength() < 1) { + return; + } + InsertChildNode(new CFDE_XMLText(wsText)); +} diff --git a/xfa/fde/xml/cfde_xmlelement.h b/xfa/fde/xml/cfde_xmlelement.h new file mode 100644 index 0000000000..8f61035979 --- /dev/null +++ b/xfa/fde/xml/cfde_xmlelement.h @@ -0,0 +1,56 @@ +// Copyright 2017 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#ifndef XFA_FDE_XML_CFDE_XMLELEMENT_H_ +#define XFA_FDE_XML_CFDE_XMLELEMENT_H_ + +#include + +#include "core/fxcrt/fx_string.h" +#include "xfa/fde/xml/cfde_xmlnode.h" + +class CFDE_XMLElement : public CFDE_XMLNode { + public: + explicit CFDE_XMLElement(const CFX_WideString& wsTag); + ~CFDE_XMLElement() override; + + // CFDE_XMLNode + FDE_XMLNODETYPE GetType() const override; + CFDE_XMLNode* Clone(bool bRecursive) override; + + void GetTagName(CFX_WideString& wsTag) const; + void GetLocalTagName(CFX_WideString& wsTag) const; + + void GetNamespacePrefix(CFX_WideString& wsPrefix) const; + void GetNamespaceURI(CFX_WideString& wsNamespace) const; + + int32_t CountAttributes() const; + bool GetAttribute(int32_t index, + CFX_WideString& wsAttriName, + CFX_WideString& wsAttriValue) const; + bool HasAttribute(const wchar_t* pwsAttriName) const; + void RemoveAttribute(const wchar_t* pwsAttriName); + + void GetString(const wchar_t* pwsAttriName, + CFX_WideString& wsAttriValue, + const wchar_t* pwsDefValue = nullptr) const; + void SetString(const CFX_WideString& wsAttriName, + const CFX_WideString& wsAttriValue); + + int32_t GetInteger(const wchar_t* pwsAttriName, int32_t iDefValue = 0) const; + void SetInteger(const wchar_t* pwsAttriName, int32_t iAttriValue); + + float GetFloat(const wchar_t* pwsAttriName, float fDefValue = 0) const; + void SetFloat(const wchar_t* pwsAttriName, float fAttriValue); + + void GetTextData(CFX_WideString& wsText) const; + void SetTextData(const CFX_WideString& wsText); + + CFX_WideString m_wsTag; + std::vector m_Attributes; +}; + +#endif // XFA_FDE_XML_CFDE_XMLELEMENT_H_ diff --git a/xfa/fde/xml/cfde_xmlinstruction.cpp b/xfa/fde/xml/cfde_xmlinstruction.cpp new file mode 100644 index 0000000000..64c980b439 --- /dev/null +++ b/xfa/fde/xml/cfde_xmlinstruction.cpp @@ -0,0 +1,160 @@ +// Copyright 2017 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#include "xfa/fde/xml/cfde_xmlinstruction.h" + +#include "core/fxcrt/fx_ext.h" +#include "third_party/base/stl_util.h" + +CFDE_XMLInstruction::CFDE_XMLInstruction(const CFX_WideString& wsTarget) + : m_wsTarget(wsTarget) { + ASSERT(m_wsTarget.GetLength() > 0); +} + +CFDE_XMLInstruction::~CFDE_XMLInstruction() {} + +FDE_XMLNODETYPE CFDE_XMLInstruction::GetType() const { + return FDE_XMLNODE_Instruction; +} + +CFDE_XMLNode* CFDE_XMLInstruction::Clone(bool bRecursive) { + CFDE_XMLInstruction* pClone = new CFDE_XMLInstruction(m_wsTarget); + if (!pClone) + return nullptr; + + pClone->m_Attributes = m_Attributes; + pClone->m_TargetData = m_TargetData; + if (bRecursive) + CloneChildren(pClone); + + return pClone; +} + +int32_t CFDE_XMLInstruction::CountAttributes() const { + return pdfium::CollectionSize(m_Attributes) / 2; +} + +bool CFDE_XMLInstruction::GetAttribute(int32_t index, + CFX_WideString& wsAttriName, + CFX_WideString& wsAttriValue) const { + int32_t iCount = pdfium::CollectionSize(m_Attributes); + ASSERT(index > -1 && index < iCount / 2); + for (int32_t i = 0; i < iCount; i += 2) { + if (index == 0) { + wsAttriName = m_Attributes[i]; + wsAttriValue = m_Attributes[i + 1]; + return true; + } + index--; + } + return false; +} + +bool CFDE_XMLInstruction::HasAttribute(const wchar_t* pwsAttriName) const { + int32_t iCount = pdfium::CollectionSize(m_Attributes); + for (int32_t i = 0; i < iCount; i += 2) { + if (m_Attributes[i].Compare(pwsAttriName) == 0) { + return true; + } + } + return false; +} + +void CFDE_XMLInstruction::GetString(const wchar_t* pwsAttriName, + CFX_WideString& wsAttriValue, + const wchar_t* pwsDefValue) const { + int32_t iCount = pdfium::CollectionSize(m_Attributes); + for (int32_t i = 0; i < iCount; i += 2) { + if (m_Attributes[i].Compare(pwsAttriName) == 0) { + wsAttriValue = m_Attributes[i + 1]; + return; + } + } + wsAttriValue = pwsDefValue; +} + +void CFDE_XMLInstruction::SetString(const CFX_WideString& wsAttriName, + const CFX_WideString& wsAttriValue) { + ASSERT(wsAttriName.GetLength() > 0); + int32_t iCount = pdfium::CollectionSize(m_Attributes); + for (int32_t i = 0; i < iCount; i += 2) { + if (m_Attributes[i].Compare(wsAttriName) == 0) { + m_Attributes[i] = wsAttriName; + m_Attributes[i + 1] = wsAttriValue; + return; + } + } + m_Attributes.push_back(wsAttriName); + m_Attributes.push_back(wsAttriValue); +} + +int32_t CFDE_XMLInstruction::GetInteger(const wchar_t* pwsAttriName, + int32_t iDefValue) const { + int32_t iCount = pdfium::CollectionSize(m_Attributes); + for (int32_t i = 0; i < iCount; i += 2) { + if (m_Attributes[i].Compare(pwsAttriName) == 0) { + return FXSYS_wtoi(m_Attributes[i + 1].c_str()); + } + } + return iDefValue; +} + +void CFDE_XMLInstruction::SetInteger(const wchar_t* pwsAttriName, + int32_t iAttriValue) { + CFX_WideString wsValue; + wsValue.Format(L"%d", iAttriValue); + SetString(pwsAttriName, wsValue); +} + +float CFDE_XMLInstruction::GetFloat(const wchar_t* pwsAttriName, + float fDefValue) const { + int32_t iCount = pdfium::CollectionSize(m_Attributes); + for (int32_t i = 0; i < iCount; i += 2) { + if (m_Attributes[i].Compare(pwsAttriName) == 0) { + return FXSYS_wcstof(m_Attributes[i + 1].c_str(), -1, nullptr); + } + } + return fDefValue; +} + +void CFDE_XMLInstruction::SetFloat(const wchar_t* pwsAttriName, + float fAttriValue) { + CFX_WideString wsValue; + wsValue.Format(L"%f", fAttriValue); + SetString(pwsAttriName, wsValue); +} + +void CFDE_XMLInstruction::RemoveAttribute(const wchar_t* pwsAttriName) { + int32_t iCount = pdfium::CollectionSize(m_Attributes); + for (int32_t i = 0; i < iCount; i += 2) { + if (m_Attributes[i].Compare(pwsAttriName) == 0) { + m_Attributes.erase(m_Attributes.begin() + i, + m_Attributes.begin() + i + 2); + return; + } + } +} + +int32_t CFDE_XMLInstruction::CountData() const { + return pdfium::CollectionSize(m_TargetData); +} + +bool CFDE_XMLInstruction::GetData(int32_t index, CFX_WideString& wsData) const { + if (!pdfium::IndexInBounds(m_TargetData, index)) + return false; + + wsData = m_TargetData[index]; + return true; +} + +void CFDE_XMLInstruction::AppendData(const CFX_WideString& wsData) { + m_TargetData.push_back(wsData); +} + +void CFDE_XMLInstruction::RemoveData(int32_t index) { + if (pdfium::IndexInBounds(m_TargetData, index)) + m_TargetData.erase(m_TargetData.begin() + index); +} diff --git a/xfa/fde/xml/cfde_xmlinstruction.h b/xfa/fde/xml/cfde_xmlinstruction.h new file mode 100644 index 0000000000..58dc5f17ac --- /dev/null +++ b/xfa/fde/xml/cfde_xmlinstruction.h @@ -0,0 +1,50 @@ +// Copyright 2017 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#ifndef XFA_FDE_XML_CFDE_XMLINSTRUCTION_H_ +#define XFA_FDE_XML_CFDE_XMLINSTRUCTION_H_ + +#include + +#include "core/fxcrt/fx_string.h" +#include "xfa/fde/xml/cfde_xmlnode.h" + +class CFDE_XMLInstruction : public CFDE_XMLNode { + public: + explicit CFDE_XMLInstruction(const CFX_WideString& wsTarget); + ~CFDE_XMLInstruction() override; + + // CFDE_XMLNode + FDE_XMLNODETYPE GetType() const override; + CFDE_XMLNode* Clone(bool bRecursive) override; + + void GetTargetName(CFX_WideString& wsTarget) const { wsTarget = m_wsTarget; } + int32_t CountAttributes() const; + bool GetAttribute(int32_t index, + CFX_WideString& wsAttriName, + CFX_WideString& wsAttriValue) const; + bool HasAttribute(const wchar_t* pwsAttriName) const; + void GetString(const wchar_t* pwsAttriName, + CFX_WideString& wsAttriValue, + const wchar_t* pwsDefValue = nullptr) const; + void SetString(const CFX_WideString& wsAttriName, + const CFX_WideString& wsAttriValue); + int32_t GetInteger(const wchar_t* pwsAttriName, int32_t iDefValue = 0) const; + void SetInteger(const wchar_t* pwsAttriName, int32_t iAttriValue); + float GetFloat(const wchar_t* pwsAttriName, float fDefValue = 0) const; + void SetFloat(const wchar_t* pwsAttriName, float fAttriValue); + void RemoveAttribute(const wchar_t* pwsAttriName); + int32_t CountData() const; + bool GetData(int32_t index, CFX_WideString& wsData) const; + void AppendData(const CFX_WideString& wsData); + void RemoveData(int32_t index); + + CFX_WideString m_wsTarget; + std::vector m_Attributes; + std::vector m_TargetData; +}; + +#endif // XFA_FDE_XML_CFDE_XMLINSTRUCTION_H_ diff --git a/xfa/fde/xml/cfde_xmlnode.cpp b/xfa/fde/xml/cfde_xmlnode.cpp new file mode 100644 index 0000000000..bd86c0b071 --- /dev/null +++ b/xfa/fde/xml/cfde_xmlnode.cpp @@ -0,0 +1,458 @@ +// Copyright 2017 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#include "xfa/fde/xml/cfde_xmlnode.h" + +#include + +#include "third_party/base/stl_util.h" +#include "xfa/fde/xml/cfde_xmlchardata.h" +#include "xfa/fde/xml/cfde_xmlelement.h" +#include "xfa/fde/xml/cfde_xmlinstruction.h" +#include "xfa/fde/xml/cfde_xmltext.h" +#include "xfa/fgas/crt/fgas_codepage.h" + +CFDE_XMLNode::CFDE_XMLNode() + : m_pParent(nullptr), + m_pChild(nullptr), + m_pPrior(nullptr), + m_pNext(nullptr) {} + +FDE_XMLNODETYPE CFDE_XMLNode::GetType() const { + return FDE_XMLNODE_Unknown; +} + +CFDE_XMLNode::~CFDE_XMLNode() { + DeleteChildren(); +} + +void CFDE_XMLNode::DeleteChildren() { + CFDE_XMLNode* pChild = m_pChild; + while (pChild) { + CFDE_XMLNode* pNext = pChild->m_pNext; + delete pChild; + pChild = pNext; + } + m_pChild = nullptr; +} + +int32_t CFDE_XMLNode::CountChildNodes() const { + int32_t iCount = 0; + CFDE_XMLNode* pChild = m_pChild; + while (pChild) { + iCount++; + pChild = pChild->m_pNext; + } + return iCount; +} + +CFDE_XMLNode* CFDE_XMLNode::GetChildNode(int32_t index) const { + CFDE_XMLNode* pChild = m_pChild; + while (pChild) { + if (index == 0) { + return pChild; + } + index--; + pChild = pChild->m_pNext; + } + return nullptr; +} + +int32_t CFDE_XMLNode::GetChildNodeIndex(CFDE_XMLNode* pNode) const { + int32_t index = 0; + CFDE_XMLNode* pChild = m_pChild; + while (pChild) { + if (pChild == pNode) { + return index; + } + index++; + pChild = pChild->m_pNext; + } + return -1; +} + +CFDE_XMLNode* CFDE_XMLNode::GetPath(const wchar_t* pPath, + int32_t iLength, + bool bQualifiedName) const { + ASSERT(pPath); + if (iLength < 0) { + iLength = FXSYS_wcslen(pPath); + } + if (iLength == 0) { + return nullptr; + } + CFX_WideString csPath; + const wchar_t* pStart = pPath; + const wchar_t* pEnd = pPath + iLength; + wchar_t ch; + while (pStart < pEnd) { + ch = *pStart++; + if (ch == L'/') { + break; + } else { + csPath += ch; + } + } + iLength -= pStart - pPath; + CFDE_XMLNode* pFind = nullptr; + if (csPath.GetLength() < 1) { + pFind = GetNodeItem(CFDE_XMLNode::Root); + } else if (csPath.Compare(L"..") == 0) { + pFind = m_pParent; + } else if (csPath.Compare(L".") == 0) { + pFind = (CFDE_XMLNode*)this; + } else { + CFX_WideString wsTag; + CFDE_XMLNode* pNode = m_pChild; + while (pNode) { + if (pNode->GetType() == FDE_XMLNODE_Element) { + if (bQualifiedName) { + ((CFDE_XMLElement*)pNode)->GetTagName(wsTag); + } else { + ((CFDE_XMLElement*)pNode)->GetLocalTagName(wsTag); + } + if (wsTag.Compare(csPath) == 0) { + if (iLength < 1) { + pFind = pNode; + } else { + pFind = pNode->GetPath(pStart, iLength, bQualifiedName); + } + if (pFind) + return pFind; + } + } + pNode = pNode->m_pNext; + } + } + if (!pFind || iLength < 1) + return pFind; + return pFind->GetPath(pStart, iLength, bQualifiedName); +} + +int32_t CFDE_XMLNode::InsertChildNode(CFDE_XMLNode* pNode, int32_t index) { + pNode->m_pParent = this; + if (!m_pChild) { + m_pChild = pNode; + pNode->m_pPrior = nullptr; + pNode->m_pNext = nullptr; + return 0; + } + if (index == 0) { + pNode->m_pNext = m_pChild; + pNode->m_pPrior = nullptr; + m_pChild->m_pPrior = pNode; + m_pChild = pNode; + return 0; + } + int32_t iCount = 0; + CFDE_XMLNode* pFind = m_pChild; + while (++iCount != index && pFind->m_pNext) { + pFind = pFind->m_pNext; + } + pNode->m_pPrior = pFind; + pNode->m_pNext = pFind->m_pNext; + if (pFind->m_pNext) + pFind->m_pNext->m_pPrior = pNode; + pFind->m_pNext = pNode; + return iCount; +} + +void CFDE_XMLNode::RemoveChildNode(CFDE_XMLNode* pNode) { + ASSERT(m_pChild && pNode); + if (m_pChild == pNode) { + m_pChild = pNode->m_pNext; + } else { + pNode->m_pPrior->m_pNext = pNode->m_pNext; + } + if (pNode->m_pNext) + pNode->m_pNext->m_pPrior = pNode->m_pPrior; + pNode->m_pParent = nullptr; + pNode->m_pNext = nullptr; + pNode->m_pPrior = nullptr; +} + +CFDE_XMLNode* CFDE_XMLNode::GetNodeItem(CFDE_XMLNode::NodeItem eItem) const { + switch (eItem) { + case CFDE_XMLNode::Root: { + CFDE_XMLNode* pParent = (CFDE_XMLNode*)this; + while (pParent->m_pParent) { + pParent = pParent->m_pParent; + } + return pParent; + } + case CFDE_XMLNode::Parent: + return m_pParent; + case CFDE_XMLNode::FirstSibling: { + CFDE_XMLNode* pItem = (CFDE_XMLNode*)this; + while (pItem->m_pPrior) { + pItem = pItem->m_pPrior; + } + return pItem == (CFDE_XMLNode*)this ? nullptr : pItem; + } + case CFDE_XMLNode::PriorSibling: + return m_pPrior; + case CFDE_XMLNode::NextSibling: + return m_pNext; + case CFDE_XMLNode::LastSibling: { + CFDE_XMLNode* pItem = (CFDE_XMLNode*)this; + while (pItem->m_pNext) + pItem = pItem->m_pNext; + return pItem == (CFDE_XMLNode*)this ? nullptr : pItem; + } + case CFDE_XMLNode::FirstNeighbor: { + CFDE_XMLNode* pParent = (CFDE_XMLNode*)this; + while (pParent->m_pParent) + pParent = pParent->m_pParent; + return pParent == (CFDE_XMLNode*)this ? nullptr : pParent; + } + case CFDE_XMLNode::PriorNeighbor: { + if (!m_pPrior) + return m_pParent; + + CFDE_XMLNode* pItem = m_pPrior; + while (pItem->m_pChild) { + pItem = pItem->m_pChild; + while (pItem->m_pNext) + pItem = pItem->m_pNext; + } + return pItem; + } + case CFDE_XMLNode::NextNeighbor: { + if (m_pChild) + return m_pChild; + if (m_pNext) + return m_pNext; + CFDE_XMLNode* pItem = m_pParent; + while (pItem) { + if (pItem->m_pNext) + return pItem->m_pNext; + pItem = pItem->m_pParent; + } + return nullptr; + } + case CFDE_XMLNode::LastNeighbor: { + CFDE_XMLNode* pItem = (CFDE_XMLNode*)this; + while (pItem->m_pParent) { + pItem = pItem->m_pParent; + } + while (true) { + while (pItem->m_pNext) + pItem = pItem->m_pNext; + if (!pItem->m_pChild) + break; + pItem = pItem->m_pChild; + } + return pItem == (CFDE_XMLNode*)this ? nullptr : pItem; + } + case CFDE_XMLNode::FirstChild: + return m_pChild; + case CFDE_XMLNode::LastChild: { + if (!m_pChild) + return nullptr; + + CFDE_XMLNode* pChild = m_pChild; + while (pChild->m_pNext) + pChild = pChild->m_pNext; + return pChild; + } + default: + break; + } + return nullptr; +} + +int32_t CFDE_XMLNode::GetNodeLevel() const { + int32_t iLevel = 0; + const CFDE_XMLNode* pItem = m_pParent; + while (pItem) { + iLevel++; + pItem = pItem->m_pParent; + } + return iLevel; +} + +bool CFDE_XMLNode::InsertNodeItem(CFDE_XMLNode::NodeItem eItem, + CFDE_XMLNode* pNode) { + switch (eItem) { + case CFDE_XMLNode::NextSibling: { + pNode->m_pParent = m_pParent; + pNode->m_pNext = m_pNext; + pNode->m_pPrior = this; + if (m_pNext) { + m_pNext->m_pPrior = pNode; + } + m_pNext = pNode; + return true; + } + case CFDE_XMLNode::PriorSibling: { + pNode->m_pParent = m_pParent; + pNode->m_pNext = this; + pNode->m_pPrior = m_pPrior; + if (m_pPrior) { + m_pPrior->m_pNext = pNode; + } else if (m_pParent) { + m_pParent->m_pChild = pNode; + } + m_pPrior = pNode; + return true; + } + default: + return false; + } +} + +CFDE_XMLNode* CFDE_XMLNode::RemoveNodeItem(CFDE_XMLNode::NodeItem eItem) { + CFDE_XMLNode* pNode = nullptr; + switch (eItem) { + case CFDE_XMLNode::NextSibling: + if (m_pNext) { + pNode = m_pNext; + m_pNext = pNode->m_pNext; + if (m_pNext) { + m_pNext->m_pPrior = this; + } + pNode->m_pParent = nullptr; + pNode->m_pNext = nullptr; + pNode->m_pPrior = nullptr; + } + break; + default: + break; + } + return pNode; +} + +CFDE_XMLNode* CFDE_XMLNode::Clone(bool bRecursive) { + return nullptr; +} + +void CFDE_XMLNode::SaveXMLNode(const CFX_RetainPtr& pXMLStream) { + CFDE_XMLNode* pNode = (CFDE_XMLNode*)this; + switch (pNode->GetType()) { + case FDE_XMLNODE_Instruction: { + CFX_WideString ws; + CFDE_XMLInstruction* pInstruction = (CFDE_XMLInstruction*)pNode; + if (pInstruction->m_wsTarget.CompareNoCase(L"xml") == 0) { + ws = L"GetCodePage(); + if (wCodePage == FX_CODEPAGE_UTF16LE) { + ws += L"UTF-16"; + } else if (wCodePage == FX_CODEPAGE_UTF16BE) { + ws += L"UTF-16be"; + } else { + ws += L"UTF-8"; + } + ws += L"\"?>"; + pXMLStream->WriteString(ws.c_str(), ws.GetLength()); + } else { + ws.Format(L"m_wsTarget.c_str()); + pXMLStream->WriteString(ws.c_str(), ws.GetLength()); + std::vector& attributes = pInstruction->m_Attributes; + int32_t i; + int32_t iCount = pdfium::CollectionSize(attributes); + CFX_WideString wsValue; + for (i = 0; i < iCount; i += 2) { + ws = L" "; + ws += attributes[i]; + ws += L"=\""; + wsValue = attributes[i + 1]; + wsValue.Replace(L"&", L"&"); + wsValue.Replace(L"<", L"<"); + wsValue.Replace(L">", L">"); + wsValue.Replace(L"\'", L"'"); + wsValue.Replace(L"\"", L"""); + ws += wsValue; + ws += L"\""; + pXMLStream->WriteString(ws.c_str(), ws.GetLength()); + } + std::vector& targetdata = pInstruction->m_TargetData; + iCount = pdfium::CollectionSize(targetdata); + for (i = 0; i < iCount; i++) { + ws = L" \""; + ws += targetdata[i]; + ws += L"\""; + pXMLStream->WriteString(ws.c_str(), ws.GetLength()); + } + ws = L"?>"; + pXMLStream->WriteString(ws.c_str(), ws.GetLength()); + } + } break; + case FDE_XMLNODE_Element: { + CFX_WideString ws; + ws = L"<"; + ws += ((CFDE_XMLElement*)pNode)->m_wsTag; + pXMLStream->WriteString(ws.c_str(), ws.GetLength()); + std::vector& attributes = + static_cast(pNode)->m_Attributes; + int32_t iCount = pdfium::CollectionSize(attributes); + CFX_WideString wsValue; + for (int32_t i = 0; i < iCount; i += 2) { + ws = L" "; + ws += attributes[i]; + ws += L"=\""; + wsValue = attributes[i + 1]; + wsValue.Replace(L"&", L"&"); + wsValue.Replace(L"<", L"<"); + wsValue.Replace(L">", L">"); + wsValue.Replace(L"\'", L"'"); + wsValue.Replace(L"\"", L"""); + ws += wsValue; + ws += L"\""; + pXMLStream->WriteString(ws.c_str(), ws.GetLength()); + } + if (pNode->m_pChild) { + ws = L"\n>"; + pXMLStream->WriteString(ws.c_str(), ws.GetLength()); + CFDE_XMLNode* pChild = pNode->m_pChild; + while (pChild) { + pChild->SaveXMLNode(pXMLStream); + pChild = pChild->m_pNext; + } + ws = L"m_wsTag; + ws += L"\n>"; + } else { + ws = L"\n/>"; + } + pXMLStream->WriteString(ws.c_str(), ws.GetLength()); + } break; + case FDE_XMLNODE_Text: { + CFX_WideString ws = ((CFDE_XMLText*)pNode)->m_wsText; + ws.Replace(L"&", L"&"); + ws.Replace(L"<", L"<"); + ws.Replace(L">", L">"); + ws.Replace(L"\'", L"'"); + ws.Replace(L"\"", L"""); + pXMLStream->WriteString(ws.c_str(), ws.GetLength()); + } break; + case FDE_XMLNODE_CharData: { + CFX_WideString ws = L"m_wsCharData; + ws += L"]]>"; + pXMLStream->WriteString(ws.c_str(), ws.GetLength()); + } break; + case FDE_XMLNODE_Unknown: + break; + default: + break; + } +} + +void CFDE_XMLNode::CloneChildren(CFDE_XMLNode* pClone) { + if (!m_pChild) { + return; + } + CFDE_XMLNode* pNext = m_pChild; + CFDE_XMLNode* pCloneNext = pNext->Clone(true); + pClone->InsertChildNode(pCloneNext); + pNext = pNext->m_pNext; + while (pNext) { + CFDE_XMLNode* pChild = pNext->Clone(true); + pCloneNext->InsertNodeItem(CFDE_XMLNode::NextSibling, pChild); + pCloneNext = pChild; + pNext = pNext->m_pNext; + } +} diff --git a/xfa/fde/xml/cfde_xmlnode.h b/xfa/fde/xml/cfde_xmlnode.h new file mode 100644 index 0000000000..178150fda5 --- /dev/null +++ b/xfa/fde/xml/cfde_xmlnode.h @@ -0,0 +1,74 @@ +// Copyright 2017 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#ifndef XFA_FDE_XML_CFDE_XMLNODE_H_ +#define XFA_FDE_XML_CFDE_XMLNODE_H_ + +#include "core/fxcrt/cfx_retain_ptr.h" +#include "xfa/fgas/crt/ifgas_stream.h" + +enum FDE_XMLNODETYPE { + FDE_XMLNODE_Unknown = 0, + FDE_XMLNODE_Instruction, + FDE_XMLNODE_Element, + FDE_XMLNODE_Text, + FDE_XMLNODE_CharData, +}; + +struct FDE_XMLNODE { + int32_t iNodeNum; + FDE_XMLNODETYPE eNodeType; +}; + +class CFDE_XMLNode { + public: + enum NodeItem { + Root = 0, + Parent, + FirstSibling, + PriorSibling, + NextSibling, + LastSibling, + FirstNeighbor, + PriorNeighbor, + NextNeighbor, + LastNeighbor, + FirstChild, + LastChild + }; + + CFDE_XMLNode(); + virtual ~CFDE_XMLNode(); + + virtual FDE_XMLNODETYPE GetType() const; + virtual CFDE_XMLNode* Clone(bool bRecursive); + + int32_t CountChildNodes() const; + CFDE_XMLNode* GetChildNode(int32_t index) const; + int32_t GetChildNodeIndex(CFDE_XMLNode* pNode) const; + int32_t InsertChildNode(CFDE_XMLNode* pNode, int32_t index = -1); + void RemoveChildNode(CFDE_XMLNode* pNode); + void DeleteChildren(); + void CloneChildren(CFDE_XMLNode* pClone); + + CFDE_XMLNode* GetPath(const wchar_t* pPath, + int32_t iLength = -1, + bool bQualifiedName = true) const; + + int32_t GetNodeLevel() const; + CFDE_XMLNode* GetNodeItem(CFDE_XMLNode::NodeItem eItem) const; + bool InsertNodeItem(CFDE_XMLNode::NodeItem eItem, CFDE_XMLNode* pNode); + CFDE_XMLNode* RemoveNodeItem(CFDE_XMLNode::NodeItem eItem); + + void SaveXMLNode(const CFX_RetainPtr& pXMLStream); + + CFDE_XMLNode* m_pParent; + CFDE_XMLNode* m_pChild; + CFDE_XMLNode* m_pPrior; + CFDE_XMLNode* m_pNext; +}; + +#endif // XFA_FDE_XML_CFDE_XMLNODE_H_ diff --git a/xfa/fde/xml/cfde_xmlparser.cpp b/xfa/fde/xml/cfde_xmlparser.cpp new file mode 100644 index 0000000000..db85021693 --- /dev/null +++ b/xfa/fde/xml/cfde_xmlparser.cpp @@ -0,0 +1,173 @@ +// Copyright 2016 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#include "xfa/fde/xml/cfde_xmlparser.h" + +#include "core/fxcrt/fx_basic.h" +#include "third_party/base/ptr_util.h" +#include "xfa/fde/xml/cfde_xmlchardata.h" +#include "xfa/fde/xml/cfde_xmlelement.h" +#include "xfa/fde/xml/cfde_xmlinstruction.h" +#include "xfa/fde/xml/cfde_xmlnode.h" +#include "xfa/fde/xml/cfde_xmltext.h" + +CFDE_XMLParser::CFDE_XMLParser(CFDE_XMLNode* pParent, + const CFX_RetainPtr& pStream) + : m_nElementStart(0), + m_dwCheckStatus(0), + m_dwCurrentCheckStatus(0), + m_pStream(pStream), + m_pParser(pdfium::MakeUnique()), + m_pParent(pParent), + m_pChild(nullptr), + m_syntaxParserResult(FDE_XmlSyntaxResult::None) { + ASSERT(m_pParent && m_pStream); + m_NodeStack.push(m_pParent); + m_pParser->Init(m_pStream, 32 * 1024, 1024 * 1024); +} + +CFDE_XMLParser::~CFDE_XMLParser() {} + +int32_t CFDE_XMLParser::DoParser(IFX_Pause* pPause) { + if (m_syntaxParserResult == FDE_XmlSyntaxResult::Error) + return -1; + if (m_syntaxParserResult == FDE_XmlSyntaxResult::EndOfString) + return 100; + + int32_t iCount = 0; + while (true) { + m_syntaxParserResult = m_pParser->DoSyntaxParse(); + switch (m_syntaxParserResult) { + case FDE_XmlSyntaxResult::InstructionOpen: + break; + case FDE_XmlSyntaxResult::InstructionClose: + if (m_pChild) { + if (m_pChild->GetType() != FDE_XMLNODE_Instruction) { + m_syntaxParserResult = FDE_XmlSyntaxResult::Error; + break; + } + } + m_pChild = m_pParent; + break; + case FDE_XmlSyntaxResult::ElementOpen: + if (m_dwCheckStatus != 0x03 && m_NodeStack.size() == 2) + m_nElementStart = m_pParser->GetCurrentPos() - 1; + break; + case FDE_XmlSyntaxResult::ElementBreak: + break; + case FDE_XmlSyntaxResult::ElementClose: + if (m_pChild->GetType() != FDE_XMLNODE_Element) { + m_syntaxParserResult = FDE_XmlSyntaxResult::Error; + break; + } + m_pParser->GetTagName(m_ws1); + static_cast(m_pChild)->GetTagName(m_ws2); + if (m_ws1.GetLength() > 0 && m_ws1 != m_ws2) { + m_syntaxParserResult = FDE_XmlSyntaxResult::Error; + break; + } + if (!m_NodeStack.empty()) + m_NodeStack.pop(); + if (m_NodeStack.empty()) { + m_syntaxParserResult = FDE_XmlSyntaxResult::Error; + break; + } else if (m_dwCurrentCheckStatus != 0 && m_NodeStack.size() == 2) { + m_nSize[m_dwCurrentCheckStatus - 1] = + m_pParser->GetCurrentBinaryPos() - + m_nStart[m_dwCurrentCheckStatus - 1]; + m_dwCurrentCheckStatus = 0; + } + m_pParent = m_NodeStack.top(); + m_pChild = m_pParent; + iCount++; + break; + case FDE_XmlSyntaxResult::TargetName: + m_pParser->GetTargetName(m_ws1); + if (m_ws1 == L"originalXFAVersion" || m_ws1 == L"acrobat") { + m_pChild = new CFDE_XMLInstruction(m_ws1); + m_pParent->InsertChildNode(m_pChild); + } else { + m_pChild = nullptr; + } + m_ws1.clear(); + break; + case FDE_XmlSyntaxResult::TagName: + m_pParser->GetTagName(m_ws1); + m_pChild = new CFDE_XMLElement(m_ws1); + m_pParent->InsertChildNode(m_pChild); + m_NodeStack.push(m_pChild); + m_pParent = m_pChild; + + if (m_dwCheckStatus != 0x03 && m_NodeStack.size() == 3) { + CFX_WideString wsTag; + static_cast(m_pChild)->GetLocalTagName(wsTag); + if (wsTag == L"template") { + m_dwCheckStatus |= 0x01; + m_dwCurrentCheckStatus = 0x01; + m_nStart[0] = m_pParser->GetCurrentBinaryPos() - + (m_pParser->GetCurrentPos() - m_nElementStart); + } else if (wsTag == L"datasets") { + m_dwCheckStatus |= 0x02; + m_dwCurrentCheckStatus = 0x02; + m_nStart[1] = m_pParser->GetCurrentBinaryPos() - + (m_pParser->GetCurrentPos() - m_nElementStart); + } + } + break; + case FDE_XmlSyntaxResult::AttriName: + m_pParser->GetAttributeName(m_ws1); + break; + case FDE_XmlSyntaxResult::AttriValue: + if (m_pChild) { + m_pParser->GetAttributeName(m_ws2); + if (m_pChild->GetType() == FDE_XMLNODE_Element) { + static_cast(m_pChild)->SetString(m_ws1, m_ws2); + } + } + m_ws1.clear(); + break; + case FDE_XmlSyntaxResult::Text: + m_pParser->GetTextData(m_ws1); + m_pChild = new CFDE_XMLText(m_ws1); + m_pParent->InsertChildNode(m_pChild); + m_pChild = m_pParent; + break; + case FDE_XmlSyntaxResult::CData: + m_pParser->GetTextData(m_ws1); + m_pChild = new CFDE_XMLCharData(m_ws1); + m_pParent->InsertChildNode(m_pChild); + m_pChild = m_pParent; + break; + case FDE_XmlSyntaxResult::TargetData: + if (m_pChild) { + if (m_pChild->GetType() != FDE_XMLNODE_Instruction) { + m_syntaxParserResult = FDE_XmlSyntaxResult::Error; + break; + } + if (!m_ws1.IsEmpty()) { + static_cast(m_pChild)->AppendData(m_ws1); + } + m_pParser->GetTargetData(m_ws1); + static_cast(m_pChild)->AppendData(m_ws1); + } + m_ws1.clear(); + break; + default: + break; + } + if (m_syntaxParserResult == FDE_XmlSyntaxResult::Error || + m_syntaxParserResult == FDE_XmlSyntaxResult::EndOfString) { + break; + } + if (pPause && iCount > 500 && pPause->NeedToPauseNow()) { + break; + } + } + return (m_syntaxParserResult == FDE_XmlSyntaxResult::Error || + m_NodeStack.size() != 1) + ? -1 + : m_pParser->GetStatus(); +} diff --git a/xfa/fde/xml/cfde_xmlparser.h b/xfa/fde/xml/cfde_xmlparser.h new file mode 100644 index 0000000000..42f590ce21 --- /dev/null +++ b/xfa/fde/xml/cfde_xmlparser.h @@ -0,0 +1,47 @@ +// Copyright 2016 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#ifndef XFA_FDE_XML_CFDE_XMLPARSER_H_ +#define XFA_FDE_XML_CFDE_XMLPARSER_H_ + +#include +#include + +#include "core/fxcrt/cfx_retain_ptr.h" +#include "core/fxcrt/fx_string.h" +#include "xfa/fde/xml/cfde_xmlsyntaxparser.h" + +class CFDE_XMLElement; +class CFDE_XMLNode; +class IFGAS_Stream; +class IFX_Pause; + +class CFDE_XMLParser { + public: + CFDE_XMLParser(CFDE_XMLNode* pParent, + const CFX_RetainPtr& pStream); + ~CFDE_XMLParser(); + + int32_t DoParser(IFX_Pause* pPause); + + FX_FILESIZE m_nStart[2]; + size_t m_nSize[2]; + FX_FILESIZE m_nElementStart; + uint16_t m_dwCheckStatus; + uint16_t m_dwCurrentCheckStatus; + + private: + CFX_RetainPtr m_pStream; + std::unique_ptr m_pParser; + CFDE_XMLNode* m_pParent; + CFDE_XMLNode* m_pChild; + std::stack m_NodeStack; + CFX_WideString m_ws1; + CFX_WideString m_ws2; + FDE_XmlSyntaxResult m_syntaxParserResult; +}; + +#endif // XFA_FDE_XML_CFDE_XMLPARSER_H_ diff --git a/xfa/fde/xml/cfde_xmlsyntaxparser.cpp b/xfa/fde/xml/cfde_xmlsyntaxparser.cpp new file mode 100644 index 0000000000..45a1eddb83 --- /dev/null +++ b/xfa/fde/xml/cfde_xmlsyntaxparser.cpp @@ -0,0 +1,703 @@ +// Copyright 2017 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#include "xfa/fde/xml/cfde_xmlsyntaxparser.h" + +#include + +#include "core/fxcrt/fx_ext.h" +#include "core/fxcrt/fx_safe_types.h" + +namespace { + +const uint32_t kMaxCharRange = 0x10ffff; + +bool IsXMLWhiteSpace(wchar_t ch) { + return ch == L' ' || ch == 0x0A || ch == 0x0D || ch == 0x09; +} + +struct FDE_XMLNAMECHAR { + uint16_t wStart; + uint16_t wEnd; + bool bStartChar; +}; + +const FDE_XMLNAMECHAR g_XMLNameChars[] = { + {L'-', L'.', false}, {L'0', L'9', false}, {L':', L':', false}, + {L'A', L'Z', true}, {L'_', L'_', true}, {L'a', L'z', true}, + {0xB7, 0xB7, false}, {0xC0, 0xD6, true}, {0xD8, 0xF6, true}, + {0xF8, 0x02FF, true}, {0x0300, 0x036F, false}, {0x0370, 0x037D, true}, + {0x037F, 0x1FFF, true}, {0x200C, 0x200D, true}, {0x203F, 0x2040, false}, + {0x2070, 0x218F, true}, {0x2C00, 0x2FEF, true}, {0x3001, 0xD7FF, true}, + {0xF900, 0xFDCF, true}, {0xFDF0, 0xFFFD, true}, +}; + +bool IsXMLNameChar(wchar_t ch, bool bFirstChar) { + int32_t iStart = 0; + int32_t iEnd = FX_ArraySize(g_XMLNameChars) - 1; + while (iStart <= iEnd) { + int32_t iMid = (iStart + iEnd) / 2; + if (ch < g_XMLNameChars[iMid].wStart) { + iEnd = iMid - 1; + } else if (ch > g_XMLNameChars[iMid].wEnd) { + iStart = iMid + 1; + } else { + return bFirstChar ? g_XMLNameChars[iMid].bStartChar : true; + } + } + return false; +} + +int32_t GetUTF8EncodeLength(const wchar_t* pSrc, int32_t iSrcLen) { + uint32_t unicode = 0; + int32_t iDstNum = 0; + while (iSrcLen-- > 0) { + unicode = *pSrc++; + int nbytes = 0; + if ((uint32_t)unicode < 0x80) { + nbytes = 1; + } else if ((uint32_t)unicode < 0x800) { + nbytes = 2; + } else if ((uint32_t)unicode < 0x10000) { + nbytes = 3; + } else if ((uint32_t)unicode < 0x200000) { + nbytes = 4; + } else if ((uint32_t)unicode < 0x4000000) { + nbytes = 5; + } else { + nbytes = 6; + } + iDstNum += nbytes; + } + return iDstNum; +} + +} // namespace + +CFDE_XMLSyntaxParser::CFDE_XMLSyntaxParser() + : m_pStream(nullptr), + m_iXMLPlaneSize(-1), + m_iCurrentPos(0), + m_iCurrentNodeNum(-1), + m_iLastNodeNum(-1), + m_iParsedChars(0), + m_iParsedBytes(0), + m_pBuffer(nullptr), + m_iBufferChars(0), + m_bEOS(false), + m_pStart(nullptr), + m_pEnd(nullptr), + m_iAllocStep(m_BlockBuffer.GetAllocStep()), + m_iDataLength(m_BlockBuffer.GetDataLengthRef()), + m_pCurrentBlock(nullptr), + m_iIndexInBlock(0), + m_iTextDataLength(0), + m_syntaxParserResult(FDE_XmlSyntaxResult::None), + m_syntaxParserState(FDE_XmlSyntaxState::Text), + m_wQuotationMark(0), + m_iEntityStart(-1) { + m_CurNode.iNodeNum = -1; + m_CurNode.eNodeType = FDE_XMLNODE_Unknown; +} + +void CFDE_XMLSyntaxParser::Init(const CFX_RetainPtr& pStream, + int32_t iXMLPlaneSize, + int32_t iTextDataSize) { + ASSERT(!m_pStream && !m_pBuffer); + ASSERT(pStream && iXMLPlaneSize > 0); + int32_t iStreamLength = pStream->GetLength(); + ASSERT(iStreamLength > 0); + m_pStream = pStream; + m_iXMLPlaneSize = std::min(iXMLPlaneSize, iStreamLength); + uint8_t bom[4]; + m_iCurrentPos = m_pStream->GetBOM(bom); + ASSERT(!m_pBuffer); + + FX_SAFE_INT32 alloc_size_safe = m_iXMLPlaneSize; + alloc_size_safe += 1; // For NUL. + if (!alloc_size_safe.IsValid() || alloc_size_safe.ValueOrDie() <= 0) { + m_syntaxParserResult = FDE_XmlSyntaxResult::Error; + return; + } + + m_pBuffer = FX_Alloc( + wchar_t, pdfium::base::ValueOrDieForType(alloc_size_safe)); + m_pStart = m_pEnd = m_pBuffer; + ASSERT(!m_BlockBuffer.IsInitialized()); + m_BlockBuffer.InitBuffer(); + m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); + m_iParsedBytes = m_iParsedChars = 0; + m_iBufferChars = 0; +} + +FDE_XmlSyntaxResult CFDE_XMLSyntaxParser::DoSyntaxParse() { + if (m_syntaxParserResult == FDE_XmlSyntaxResult::Error || + m_syntaxParserResult == FDE_XmlSyntaxResult::EndOfString) { + return m_syntaxParserResult; + } + ASSERT(m_pStream && m_pBuffer && m_BlockBuffer.IsInitialized()); + int32_t iStreamLength = m_pStream->GetLength(); + int32_t iPos; + + FDE_XmlSyntaxResult syntaxParserResult = FDE_XmlSyntaxResult::None; + while (true) { + if (m_pStart >= m_pEnd) { + if (m_bEOS || m_iCurrentPos >= iStreamLength) { + m_syntaxParserResult = FDE_XmlSyntaxResult::EndOfString; + return m_syntaxParserResult; + } + m_iParsedChars += (m_pEnd - m_pBuffer); + m_iParsedBytes = m_iCurrentPos; + if (m_pStream->GetPosition() != m_iCurrentPos) { + m_pStream->Seek(FX_STREAMSEEK_Begin, m_iCurrentPos); + } + m_iBufferChars = + m_pStream->ReadString(m_pBuffer, m_iXMLPlaneSize, m_bEOS); + iPos = m_pStream->GetPosition(); + if (m_iBufferChars < 1) { + m_iCurrentPos = iStreamLength; + m_syntaxParserResult = FDE_XmlSyntaxResult::EndOfString; + return m_syntaxParserResult; + } + m_iCurrentPos = iPos; + m_pStart = m_pBuffer; + m_pEnd = m_pBuffer + m_iBufferChars; + } + + while (m_pStart < m_pEnd) { + wchar_t ch = *m_pStart; + switch (m_syntaxParserState) { + case FDE_XmlSyntaxState::Text: + if (ch == L'<') { + if (m_iDataLength > 0) { + m_iTextDataLength = m_iDataLength; + m_BlockBuffer.Reset(); + m_pCurrentBlock = + m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); + m_iEntityStart = -1; + syntaxParserResult = FDE_XmlSyntaxResult::Text; + } else { + m_pStart++; + m_syntaxParserState = FDE_XmlSyntaxState::Node; + } + } else { + ParseTextChar(ch); + } + break; + case FDE_XmlSyntaxState::Node: + if (ch == L'!') { + m_pStart++; + m_syntaxParserState = FDE_XmlSyntaxState::SkipCommentOrDecl; + } else if (ch == L'/') { + m_pStart++; + m_syntaxParserState = FDE_XmlSyntaxState::CloseElement; + } else if (ch == L'?') { + m_iLastNodeNum++; + m_iCurrentNodeNum = m_iLastNodeNum; + m_CurNode.iNodeNum = m_iLastNodeNum; + m_CurNode.eNodeType = FDE_XMLNODE_Instruction; + m_XMLNodeStack.push(m_CurNode); + m_pStart++; + m_syntaxParserState = FDE_XmlSyntaxState::Target; + syntaxParserResult = FDE_XmlSyntaxResult::InstructionOpen; + } else { + m_iLastNodeNum++; + m_iCurrentNodeNum = m_iLastNodeNum; + m_CurNode.iNodeNum = m_iLastNodeNum; + m_CurNode.eNodeType = FDE_XMLNODE_Element; + m_XMLNodeStack.push(m_CurNode); + m_syntaxParserState = FDE_XmlSyntaxState::Tag; + syntaxParserResult = FDE_XmlSyntaxResult::ElementOpen; + } + break; + case FDE_XmlSyntaxState::Target: + case FDE_XmlSyntaxState::Tag: + if (!IsXMLNameChar(ch, m_iDataLength < 1)) { + if (m_iDataLength < 1) { + m_syntaxParserResult = FDE_XmlSyntaxResult::Error; + return m_syntaxParserResult; + } else { + m_iTextDataLength = m_iDataLength; + m_BlockBuffer.Reset(); + m_pCurrentBlock = + m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); + if (m_syntaxParserState != FDE_XmlSyntaxState::Target) { + syntaxParserResult = FDE_XmlSyntaxResult::TagName; + } else { + syntaxParserResult = FDE_XmlSyntaxResult::TargetName; + } + m_syntaxParserState = FDE_XmlSyntaxState::AttriName; + } + } else { + if (m_iIndexInBlock == m_iAllocStep) { + m_pCurrentBlock = + m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); + if (!m_pCurrentBlock) { + return FDE_XmlSyntaxResult::Error; + } + } + m_pCurrentBlock[m_iIndexInBlock++] = ch; + m_iDataLength++; + m_pStart++; + } + break; + case FDE_XmlSyntaxState::AttriName: + if (m_iDataLength < 1 && IsXMLWhiteSpace(ch)) { + m_pStart++; + break; + } + if (!IsXMLNameChar(ch, m_iDataLength < 1)) { + if (m_iDataLength < 1) { + if (m_CurNode.eNodeType == FDE_XMLNODE_Element) { + if (ch == L'>' || ch == L'/') { + m_syntaxParserState = FDE_XmlSyntaxState::BreakElement; + break; + } + } else if (m_CurNode.eNodeType == FDE_XMLNODE_Instruction) { + if (ch == L'?') { + m_syntaxParserState = FDE_XmlSyntaxState::CloseInstruction; + m_pStart++; + } else { + m_syntaxParserState = FDE_XmlSyntaxState::TargetData; + } + break; + } + m_syntaxParserResult = FDE_XmlSyntaxResult::Error; + return m_syntaxParserResult; + } else { + if (m_CurNode.eNodeType == FDE_XMLNODE_Instruction) { + if (ch != '=' && !IsXMLWhiteSpace(ch)) { + m_syntaxParserState = FDE_XmlSyntaxState::TargetData; + break; + } + } + m_iTextDataLength = m_iDataLength; + m_BlockBuffer.Reset(); + m_pCurrentBlock = + m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); + m_syntaxParserState = FDE_XmlSyntaxState::AttriEqualSign; + syntaxParserResult = FDE_XmlSyntaxResult::AttriName; + } + } else { + if (m_iIndexInBlock == m_iAllocStep) { + m_pCurrentBlock = + m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); + if (!m_pCurrentBlock) { + return FDE_XmlSyntaxResult::Error; + } + } + m_pCurrentBlock[m_iIndexInBlock++] = ch; + m_iDataLength++; + m_pStart++; + } + break; + case FDE_XmlSyntaxState::AttriEqualSign: + if (IsXMLWhiteSpace(ch)) { + m_pStart++; + break; + } + if (ch != L'=') { + if (m_CurNode.eNodeType == FDE_XMLNODE_Instruction) { + m_syntaxParserState = FDE_XmlSyntaxState::TargetData; + break; + } + m_syntaxParserResult = FDE_XmlSyntaxResult::Error; + return m_syntaxParserResult; + } else { + m_syntaxParserState = FDE_XmlSyntaxState::AttriQuotation; + m_pStart++; + } + break; + case FDE_XmlSyntaxState::AttriQuotation: + if (IsXMLWhiteSpace(ch)) { + m_pStart++; + break; + } + if (ch != L'\"' && ch != L'\'') { + m_syntaxParserResult = FDE_XmlSyntaxResult::Error; + return m_syntaxParserResult; + } else { + m_wQuotationMark = ch; + m_syntaxParserState = FDE_XmlSyntaxState::AttriValue; + m_pStart++; + } + break; + case FDE_XmlSyntaxState::AttriValue: + if (ch == m_wQuotationMark) { + if (m_iEntityStart > -1) { + m_syntaxParserResult = FDE_XmlSyntaxResult::Error; + return m_syntaxParserResult; + } + m_iTextDataLength = m_iDataLength; + m_wQuotationMark = 0; + m_BlockBuffer.Reset(); + m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); + m_pStart++; + m_syntaxParserState = FDE_XmlSyntaxState::AttriName; + syntaxParserResult = FDE_XmlSyntaxResult::AttriValue; + } else { + ParseTextChar(ch); + } + break; + case FDE_XmlSyntaxState::CloseInstruction: + if (ch != L'>') { + if (m_iIndexInBlock == m_iAllocStep) { + m_pCurrentBlock = + m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); + if (!m_pCurrentBlock) { + return FDE_XmlSyntaxResult::Error; + } + } + m_pCurrentBlock[m_iIndexInBlock++] = ch; + m_iDataLength++; + m_syntaxParserState = FDE_XmlSyntaxState::TargetData; + } else if (m_iDataLength > 0) { + m_iTextDataLength = m_iDataLength; + m_BlockBuffer.Reset(); + m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); + syntaxParserResult = FDE_XmlSyntaxResult::TargetData; + } else { + m_pStart++; + if (m_XMLNodeStack.empty()) { + m_syntaxParserResult = FDE_XmlSyntaxResult::Error; + return m_syntaxParserResult; + } + m_XMLNodeStack.pop(); + if (!m_XMLNodeStack.empty()) { + m_CurNode = m_XMLNodeStack.top(); + } else { + m_CurNode.iNodeNum = -1; + m_CurNode.eNodeType = FDE_XMLNODE_Unknown; + } + m_iCurrentNodeNum = m_CurNode.iNodeNum; + m_BlockBuffer.Reset(); + m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); + m_syntaxParserState = FDE_XmlSyntaxState::Text; + syntaxParserResult = FDE_XmlSyntaxResult::InstructionClose; + } + break; + case FDE_XmlSyntaxState::BreakElement: + if (ch == L'>') { + m_syntaxParserState = FDE_XmlSyntaxState::Text; + syntaxParserResult = FDE_XmlSyntaxResult::ElementBreak; + } else if (ch == L'/') { + m_syntaxParserState = FDE_XmlSyntaxState::CloseElement; + } else { + m_syntaxParserResult = FDE_XmlSyntaxResult::Error; + return m_syntaxParserResult; + } + m_pStart++; + break; + case FDE_XmlSyntaxState::CloseElement: + if (!IsXMLNameChar(ch, m_iDataLength < 1)) { + if (ch == L'>') { + if (m_XMLNodeStack.empty()) { + m_syntaxParserResult = FDE_XmlSyntaxResult::Error; + return m_syntaxParserResult; + } + m_XMLNodeStack.pop(); + if (!m_XMLNodeStack.empty()) { + m_CurNode = m_XMLNodeStack.top(); + } else { + m_CurNode.iNodeNum = -1; + m_CurNode.eNodeType = FDE_XMLNODE_Unknown; + } + m_iCurrentNodeNum = m_CurNode.iNodeNum; + m_iTextDataLength = m_iDataLength; + m_BlockBuffer.Reset(); + m_pCurrentBlock = + m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); + m_syntaxParserState = FDE_XmlSyntaxState::Text; + syntaxParserResult = FDE_XmlSyntaxResult::ElementClose; + } else if (!IsXMLWhiteSpace(ch)) { + m_syntaxParserResult = FDE_XmlSyntaxResult::Error; + return m_syntaxParserResult; + } + } else { + if (m_iIndexInBlock == m_iAllocStep) { + m_pCurrentBlock = + m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); + if (!m_pCurrentBlock) { + return FDE_XmlSyntaxResult::Error; + } + } + m_pCurrentBlock[m_iIndexInBlock++] = ch; + m_iDataLength++; + } + m_pStart++; + break; + case FDE_XmlSyntaxState::SkipCommentOrDecl: + if (FXSYS_wcsnicmp(m_pStart, L"--", 2) == 0) { + m_pStart += 2; + m_syntaxParserState = FDE_XmlSyntaxState::SkipComment; + } else if (FXSYS_wcsnicmp(m_pStart, L"[CDATA[", 7) == 0) { + m_pStart += 7; + m_syntaxParserState = FDE_XmlSyntaxState::SkipCData; + } else { + m_syntaxParserState = FDE_XmlSyntaxState::SkipDeclNode; + m_SkipChar = L'>'; + m_SkipStack.push(L'>'); + } + break; + case FDE_XmlSyntaxState::SkipCData: { + if (FXSYS_wcsnicmp(m_pStart, L"]]>", 3) == 0) { + m_pStart += 3; + syntaxParserResult = FDE_XmlSyntaxResult::CData; + m_iTextDataLength = m_iDataLength; + m_BlockBuffer.Reset(); + m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); + m_syntaxParserState = FDE_XmlSyntaxState::Text; + } else { + if (m_iIndexInBlock == m_iAllocStep) { + m_pCurrentBlock = + m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); + if (!m_pCurrentBlock) + return FDE_XmlSyntaxResult::Error; + } + m_pCurrentBlock[m_iIndexInBlock++] = ch; + m_iDataLength++; + m_pStart++; + } + break; + } + case FDE_XmlSyntaxState::SkipDeclNode: + if (m_SkipChar == L'\'' || m_SkipChar == L'\"') { + m_pStart++; + if (ch != m_SkipChar) + break; + + m_SkipStack.pop(); + if (m_SkipStack.empty()) + m_syntaxParserState = FDE_XmlSyntaxState::Text; + else + m_SkipChar = m_SkipStack.top(); + } else { + switch (ch) { + case L'<': + m_SkipChar = L'>'; + m_SkipStack.push(L'>'); + break; + case L'[': + m_SkipChar = L']'; + m_SkipStack.push(L']'); + break; + case L'(': + m_SkipChar = L')'; + m_SkipStack.push(L')'); + break; + case L'\'': + m_SkipChar = L'\''; + m_SkipStack.push(L'\''); + break; + case L'\"': + m_SkipChar = L'\"'; + m_SkipStack.push(L'\"'); + break; + default: + if (ch == m_SkipChar) { + m_SkipStack.pop(); + if (m_SkipStack.empty()) { + if (m_iDataLength >= 9) { + CFX_WideString wsHeader; + m_BlockBuffer.GetTextData(wsHeader, 0, 7); + } + m_iTextDataLength = m_iDataLength; + m_BlockBuffer.Reset(); + m_pCurrentBlock = + m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); + m_syntaxParserState = FDE_XmlSyntaxState::Text; + } else { + m_SkipChar = m_SkipStack.top(); + } + } + break; + } + if (!m_SkipStack.empty()) { + if (m_iIndexInBlock == m_iAllocStep) { + m_pCurrentBlock = + m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); + if (!m_pCurrentBlock) { + return FDE_XmlSyntaxResult::Error; + } + } + m_pCurrentBlock[m_iIndexInBlock++] = ch; + m_iDataLength++; + } + m_pStart++; + } + break; + case FDE_XmlSyntaxState::SkipComment: + if (FXSYS_wcsnicmp(m_pStart, L"-->", 3) == 0) { + m_pStart += 2; + m_syntaxParserState = FDE_XmlSyntaxState::Text; + } + + m_pStart++; + break; + case FDE_XmlSyntaxState::TargetData: + if (IsXMLWhiteSpace(ch)) { + if (m_iDataLength < 1) { + m_pStart++; + break; + } else if (m_wQuotationMark == 0) { + m_iTextDataLength = m_iDataLength; + m_wQuotationMark = 0; + m_BlockBuffer.Reset(); + m_pCurrentBlock = + m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); + m_pStart++; + syntaxParserResult = FDE_XmlSyntaxResult::TargetData; + break; + } + } + if (ch == '?') { + m_syntaxParserState = FDE_XmlSyntaxState::CloseInstruction; + m_pStart++; + } else if (ch == '\"') { + if (m_wQuotationMark == 0) { + m_wQuotationMark = ch; + m_pStart++; + } else if (ch == m_wQuotationMark) { + m_iTextDataLength = m_iDataLength; + m_wQuotationMark = 0; + m_BlockBuffer.Reset(); + m_pCurrentBlock = + m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); + m_pStart++; + syntaxParserResult = FDE_XmlSyntaxResult::TargetData; + } else { + m_syntaxParserResult = FDE_XmlSyntaxResult::Error; + return m_syntaxParserResult; + } + } else { + if (m_iIndexInBlock == m_iAllocStep) { + m_pCurrentBlock = + m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); + if (!m_pCurrentBlock) { + return FDE_XmlSyntaxResult::Error; + } + } + m_pCurrentBlock[m_iIndexInBlock++] = ch; + m_iDataLength++; + m_pStart++; + } + break; + default: + break; + } + if (syntaxParserResult != FDE_XmlSyntaxResult::None) + return syntaxParserResult; + } + } + return FDE_XmlSyntaxResult::Text; +} + +CFDE_XMLSyntaxParser::~CFDE_XMLSyntaxParser() { + m_pCurrentBlock = nullptr; + FX_Free(m_pBuffer); +} + +int32_t CFDE_XMLSyntaxParser::GetStatus() const { + if (!m_pStream) + return -1; + + int32_t iStreamLength = m_pStream->GetLength(); + if (iStreamLength < 1) + return 100; + + if (m_syntaxParserResult == FDE_XmlSyntaxResult::Error) + return -1; + + if (m_syntaxParserResult == FDE_XmlSyntaxResult::EndOfString) + return 100; + return m_iParsedBytes * 100 / iStreamLength; +} + +FX_FILESIZE CFDE_XMLSyntaxParser::GetCurrentBinaryPos() const { + if (!m_pStream) + return 0; + + int32_t nSrcLen = m_pStart - m_pBuffer; + int32_t nDstLen = GetUTF8EncodeLength(m_pBuffer, nSrcLen); + return m_iParsedBytes + nDstLen; +} + +void CFDE_XMLSyntaxParser::ParseTextChar(wchar_t character) { + if (m_iIndexInBlock == m_iAllocStep) { + m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); + if (!m_pCurrentBlock) { + return; + } + } + m_pCurrentBlock[m_iIndexInBlock++] = character; + m_iDataLength++; + if (m_iEntityStart > -1 && character == L';') { + CFX_WideString csEntity; + m_BlockBuffer.GetTextData(csEntity, m_iEntityStart + 1, + (m_iDataLength - 1) - m_iEntityStart - 1); + int32_t iLen = csEntity.GetLength(); + if (iLen > 0) { + if (csEntity[0] == L'#') { + uint32_t ch = 0; + wchar_t w; + if (iLen > 1 && csEntity[1] == L'x') { + for (int32_t i = 2; i < iLen; i++) { + w = csEntity[i]; + if (w >= L'0' && w <= L'9') { + ch = (ch << 4) + w - L'0'; + } else if (w >= L'A' && w <= L'F') { + ch = (ch << 4) + w - 55; + } else if (w >= L'a' && w <= L'f') { + ch = (ch << 4) + w - 87; + } else { + break; + } + } + } else { + for (int32_t i = 1; i < iLen; i++) { + w = csEntity[i]; + if (w < L'0' || w > L'9') + break; + ch = ch * 10 + w - L'0'; + } + } + if (ch > kMaxCharRange) + ch = ' '; + + character = static_cast(ch); + if (character != 0) { + m_BlockBuffer.SetTextChar(m_iEntityStart, character); + m_iEntityStart++; + } + } else { + if (csEntity.Compare(L"amp") == 0) { + m_BlockBuffer.SetTextChar(m_iEntityStart, L'&'); + m_iEntityStart++; + } else if (csEntity.Compare(L"lt") == 0) { + m_BlockBuffer.SetTextChar(m_iEntityStart, L'<'); + m_iEntityStart++; + } else if (csEntity.Compare(L"gt") == 0) { + m_BlockBuffer.SetTextChar(m_iEntityStart, L'>'); + m_iEntityStart++; + } else if (csEntity.Compare(L"apos") == 0) { + m_BlockBuffer.SetTextChar(m_iEntityStart, L'\''); + m_iEntityStart++; + } else if (csEntity.Compare(L"quot") == 0) { + m_BlockBuffer.SetTextChar(m_iEntityStart, L'\"'); + m_iEntityStart++; + } + } + } + m_BlockBuffer.DeleteTextChars(m_iDataLength - m_iEntityStart, false); + m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); + m_iEntityStart = -1; + } else { + if (m_iEntityStart < 0 && character == L'&') { + m_iEntityStart = m_iDataLength - 1; + } + } + m_pStart++; +} diff --git a/xfa/fde/xml/cfde_xmlsyntaxparser.h b/xfa/fde/xml/cfde_xmlsyntaxparser.h new file mode 100644 index 0000000000..9e768d01aa --- /dev/null +++ b/xfa/fde/xml/cfde_xmlsyntaxparser.h @@ -0,0 +1,128 @@ +// Copyright 2017 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#ifndef XFA_FDE_XML_CFDE_XMLSYNTAXPARSER_H_ +#define XFA_FDE_XML_CFDE_XMLSYNTAXPARSER_H_ + +#include + +#include "core/fxcrt/cfx_blockbuffer.h" +#include "core/fxcrt/cfx_retain_ptr.h" +#include "core/fxcrt/fx_string.h" +#include "xfa/fde/xml/cfde_xmlnode.h" +#include "xfa/fgas/crt/ifgas_stream.h" + +enum class FDE_XmlSyntaxResult { + None, + InstructionOpen, + InstructionClose, + ElementOpen, + ElementBreak, + ElementClose, + TargetName, + TagName, + AttriName, + AttriValue, + Text, + CData, + TargetData, + Error, + EndOfString +}; + +class CFDE_XMLSyntaxParser { + public: + CFDE_XMLSyntaxParser(); + ~CFDE_XMLSyntaxParser(); + + void Init(const CFX_RetainPtr& pStream, + int32_t iXMLPlaneSize, + int32_t iTextDataSize = 256); + + FDE_XmlSyntaxResult DoSyntaxParse(); + + int32_t GetStatus() const; + int32_t GetCurrentPos() const { + return m_iParsedChars + (m_pStart - m_pBuffer); + } + FX_FILESIZE GetCurrentBinaryPos() const; + int32_t GetCurrentNodeNumber() const { return m_iCurrentNodeNum; } + int32_t GetLastNodeNumber() const { return m_iLastNodeNum; } + + void GetTargetName(CFX_WideString& wsTarget) const { + m_BlockBuffer.GetTextData(wsTarget, 0, m_iTextDataLength); + } + void GetTagName(CFX_WideString& wsTag) const { + m_BlockBuffer.GetTextData(wsTag, 0, m_iTextDataLength); + } + void GetAttributeName(CFX_WideString& wsAttriName) const { + m_BlockBuffer.GetTextData(wsAttriName, 0, m_iTextDataLength); + } + void GetAttributeValue(CFX_WideString& wsAttriValue) const { + m_BlockBuffer.GetTextData(wsAttriValue, 0, m_iTextDataLength); + } + void GetTextData(CFX_WideString& wsText) const { + m_BlockBuffer.GetTextData(wsText, 0, m_iTextDataLength); + } + void GetTargetData(CFX_WideString& wsData) const { + m_BlockBuffer.GetTextData(wsData, 0, m_iTextDataLength); + } + + protected: + enum class FDE_XmlSyntaxState { + Text, + Node, + Target, + Tag, + AttriName, + AttriEqualSign, + AttriQuotation, + AttriValue, + Entity, + EntityDecimal, + EntityHex, + CloseInstruction, + BreakElement, + CloseElement, + SkipDeclNode, + DeclCharData, + SkipComment, + SkipCommentOrDecl, + SkipCData, + TargetData + }; + + void ParseTextChar(wchar_t ch); + + CFX_RetainPtr m_pStream; + int32_t m_iXMLPlaneSize; + int32_t m_iCurrentPos; + int32_t m_iCurrentNodeNum; + int32_t m_iLastNodeNum; + int32_t m_iParsedChars; + int32_t m_iParsedBytes; + wchar_t* m_pBuffer; + int32_t m_iBufferChars; + bool m_bEOS; + wchar_t* m_pStart; + wchar_t* m_pEnd; + FDE_XMLNODE m_CurNode; + std::stack m_XMLNodeStack; + CFX_BlockBuffer m_BlockBuffer; + int32_t m_iAllocStep; + int32_t& m_iDataLength; + wchar_t* m_pCurrentBlock; + int32_t m_iIndexInBlock; + int32_t m_iTextDataLength; + FDE_XmlSyntaxResult m_syntaxParserResult; + FDE_XmlSyntaxState m_syntaxParserState; + wchar_t m_wQuotationMark; + int32_t m_iEntityStart; + std::stack m_SkipStack; + wchar_t m_SkipChar; +}; + +#endif // XFA_FDE_XML_CFDE_XMLSYNTAXPARSER_H_ diff --git a/xfa/fde/xml/cfde_xmlsyntaxparser_unittest.cpp b/xfa/fde/xml/cfde_xmlsyntaxparser_unittest.cpp new file mode 100644 index 0000000000..9b04028123 --- /dev/null +++ b/xfa/fde/xml/cfde_xmlsyntaxparser_unittest.cpp @@ -0,0 +1,632 @@ +// Copyright 2016 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "xfa/fde/xml/cfde_xmlsyntaxparser.h" + +#include + +#include "testing/gtest/include/gtest/gtest.h" +#include "xfa/fgas/crt/ifgas_stream.h" + +TEST(CFDE_XMLSyntaxParser, CData) { + const wchar_t* input = + L""; + + const wchar_t* cdata = + L"\n" + L" if (a[1] < 3)\n" + L" app.alert(\"Tclams\");\n" + L" "; + + // We * sizeof(wchar_t) because we pass in the uint8_t, not the wchar_t. + size_t len = FXSYS_wcslen(input) * sizeof(wchar_t); + CFX_RetainPtr stream = IFGAS_Stream::CreateStream( + reinterpret_cast(const_cast(input)), len, 0); + CFDE_XMLSyntaxParser parser; + parser.Init(stream, 256); + EXPECT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); + + CFX_WideString data; + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); + parser.GetAttributeName(data); + EXPECT_EQ(L"contentType", data); + EXPECT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); + parser.GetAttributeValue(data); + EXPECT_EQ(L"application/x-javascript", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"\n ", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::CData, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(cdata, data); + + EXPECT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"\n", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); +} + +TEST(CFDE_XMLSyntaxParser, CDataWithInnerScript) { + const wchar_t* input = + L"\n" + L" ]]>\n" + L""; + + const wchar_t* cdata = + L"\n" + L" if (a[1] < 3)\n" + L" app.alert(\"Tclams\");\n" + L" \n" + L" "; + + // We * sizeof(wchar_t) because we pass in the uint8_t, not the wchar_t. + size_t len = FXSYS_wcslen(input) * sizeof(wchar_t); + CFX_RetainPtr stream = IFGAS_Stream::CreateStream( + reinterpret_cast(const_cast(input)), len, 0); + CFDE_XMLSyntaxParser parser; + parser.Init(stream, 256); + EXPECT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); + + CFX_WideString data; + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); + parser.GetAttributeName(data); + EXPECT_EQ(L"contentType", data); + EXPECT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); + parser.GetAttributeValue(data); + EXPECT_EQ(L"application/x-javascript", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"\n ", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::CData, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(cdata, data); + + EXPECT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"\n", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); +} + +TEST(CFDE_XMLSyntaxParser, ArrowBangArrow) { + const wchar_t* input = + L""; + + // We * sizeof(wchar_t) because we pass in the uint8_t, not the wchar_t. + size_t len = FXSYS_wcslen(input) * sizeof(wchar_t); + CFX_RetainPtr stream = IFGAS_Stream::CreateStream( + reinterpret_cast(const_cast(input)), len, 0); + CFDE_XMLSyntaxParser parser; + parser.Init(stream, 256); + EXPECT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); + + CFX_WideString data; + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); + parser.GetAttributeName(data); + EXPECT_EQ(L"contentType", data); + EXPECT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); + parser.GetAttributeValue(data); + EXPECT_EQ(L"application/x-javascript", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"\n ", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"\n", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); +} + +TEST(CFDE_XMLSyntaxParser, ArrowBangBracketArrow) { + const wchar_t* input = + L""; + + // We * sizeof(wchar_t) because we pass in the uint8_t, not the wchar_t. + size_t len = FXSYS_wcslen(input) * sizeof(wchar_t); + CFX_RetainPtr stream = IFGAS_Stream::CreateStream( + reinterpret_cast(const_cast(input)), len, 0); + CFDE_XMLSyntaxParser parser; + parser.Init(stream, 256); + EXPECT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); + + CFX_WideString data; + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); + parser.GetAttributeName(data); + EXPECT_EQ(L"contentType", data); + EXPECT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); + parser.GetAttributeValue(data); + EXPECT_EQ(L"application/x-javascript", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"\n ", data); + + // Parser walks to end of input. + + EXPECT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); +} + +TEST(CFDE_XMLSyntaxParser, IncompleteCData) { + const wchar_t* input = + L""; + + // We * sizeof(wchar_t) because we pass in the uint8_t, not the wchar_t. + size_t len = FXSYS_wcslen(input) * sizeof(wchar_t); + CFX_RetainPtr stream = IFGAS_Stream::CreateStream( + reinterpret_cast(const_cast(input)), len, 0); + CFDE_XMLSyntaxParser parser; + parser.Init(stream, 256); + EXPECT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); + + CFX_WideString data; + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); + parser.GetAttributeName(data); + EXPECT_EQ(L"contentType", data); + EXPECT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); + parser.GetAttributeValue(data); + EXPECT_EQ(L"application/x-javascript", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"\n ", data); + + // Parser walks to end of input. + + EXPECT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); +} + +TEST(CFDE_XMLSyntaxParser, UnClosedCData) { + const wchar_t* input = + L""; + + // We * sizeof(wchar_t) because we pass in the uint8_t, not the wchar_t. + size_t len = FXSYS_wcslen(input) * sizeof(wchar_t); + CFX_RetainPtr stream = IFGAS_Stream::CreateStream( + reinterpret_cast(const_cast(input)), len, 0); + CFDE_XMLSyntaxParser parser; + parser.Init(stream, 256); + EXPECT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); + + CFX_WideString data; + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); + parser.GetAttributeName(data); + EXPECT_EQ(L"contentType", data); + EXPECT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); + parser.GetAttributeValue(data); + EXPECT_EQ(L"application/x-javascript", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"\n ", data); + + // Parser walks to end of input. + + EXPECT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); +} + +TEST(CFDE_XMLSyntaxParser, EmptyCData) { + const wchar_t* input = + L""; + + // We * sizeof(wchar_t) because we pass in the uint8_t, not the wchar_t. + size_t len = FXSYS_wcslen(input) * sizeof(wchar_t); + CFX_RetainPtr stream = IFGAS_Stream::CreateStream( + reinterpret_cast(const_cast(input)), len, 0); + CFDE_XMLSyntaxParser parser; + parser.Init(stream, 256); + EXPECT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); + + CFX_WideString data; + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); + parser.GetAttributeName(data); + EXPECT_EQ(L"contentType", data); + EXPECT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); + parser.GetAttributeValue(data); + EXPECT_EQ(L"application/x-javascript", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"\n ", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::CData, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"\n", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); +} + +TEST(CFDE_XMLSyntaxParser, Comment) { + const wchar_t* input = + L""; + + // We * sizeof(wchar_t) because we pass in the uint8_t, not the wchar_t. + size_t len = FXSYS_wcslen(input) * sizeof(wchar_t); + CFX_RetainPtr stream = IFGAS_Stream::CreateStream( + reinterpret_cast(const_cast(input)), len, 0); + CFDE_XMLSyntaxParser parser; + parser.Init(stream, 256); + EXPECT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); + + CFX_WideString data; + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); + parser.GetAttributeName(data); + EXPECT_EQ(L"contentType", data); + EXPECT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); + parser.GetAttributeValue(data); + EXPECT_EQ(L"application/x-javascript", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"\n ", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"\n", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); +} + +TEST(CFDE_XMLSyntaxParser, IncorrectCommentStart) { + const wchar_t* input = + L""; + + // We * sizeof(wchar_t) because we pass in the uint8_t, not the wchar_t. + size_t len = FXSYS_wcslen(input) * sizeof(wchar_t); + CFX_RetainPtr stream = IFGAS_Stream::CreateStream( + reinterpret_cast(const_cast(input)), len, 0); + CFDE_XMLSyntaxParser parser; + parser.Init(stream, 256); + EXPECT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); + + CFX_WideString data; + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); + parser.GetAttributeName(data); + EXPECT_EQ(L"contentType", data); + EXPECT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); + parser.GetAttributeValue(data); + EXPECT_EQ(L"application/x-javascript", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"\n ", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"\n", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); +} + +TEST(CFDE_XMLSyntaxParser, CommentEmpty) { + const wchar_t* input = + L""; + + // We * sizeof(wchar_t) because we pass in the uint8_t, not the wchar_t. + size_t len = FXSYS_wcslen(input) * sizeof(wchar_t); + CFX_RetainPtr stream = IFGAS_Stream::CreateStream( + reinterpret_cast(const_cast(input)), len, 0); + CFDE_XMLSyntaxParser parser; + parser.Init(stream, 256); + EXPECT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); + + CFX_WideString data; + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); + parser.GetAttributeName(data); + EXPECT_EQ(L"contentType", data); + EXPECT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); + parser.GetAttributeValue(data); + EXPECT_EQ(L"application/x-javascript", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"\n ", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"\n", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); +} + +TEST(CFDE_XMLSyntaxParser, CommentThreeDash) { + const wchar_t* input = + L""; + + // We * sizeof(wchar_t) because we pass in the uint8_t, not the wchar_t. + size_t len = FXSYS_wcslen(input) * sizeof(wchar_t); + CFX_RetainPtr stream = IFGAS_Stream::CreateStream( + reinterpret_cast(const_cast(input)), len, 0); + CFDE_XMLSyntaxParser parser; + parser.Init(stream, 256); + EXPECT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); + + CFX_WideString data; + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); + parser.GetAttributeName(data); + EXPECT_EQ(L"contentType", data); + EXPECT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); + parser.GetAttributeValue(data); + EXPECT_EQ(L"application/x-javascript", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"\n ", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); +} + +TEST(CFDE_XMLSyntaxParser, CommentTwoDash) { + const wchar_t* input = + L""; + + // We * sizeof(wchar_t) because we pass in the uint8_t, not the wchar_t. + size_t len = FXSYS_wcslen(input) * sizeof(wchar_t); + CFX_RetainPtr stream = IFGAS_Stream::CreateStream( + reinterpret_cast(const_cast(input)), len, 0); + CFDE_XMLSyntaxParser parser; + parser.Init(stream, 256); + EXPECT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); + + CFX_WideString data; + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); + parser.GetAttributeName(data); + EXPECT_EQ(L"contentType", data); + EXPECT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); + parser.GetAttributeValue(data); + EXPECT_EQ(L"application/x-javascript", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"\n ", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); +} + +TEST(CFDE_XMLSyntaxParser, Entities) { + const wchar_t* input = + L""; + + // We * sizeof(wchar_t) because we pass in the uint8_t, not the wchar_t. + size_t len = FXSYS_wcslen(input) * sizeof(wchar_t); + CFX_RetainPtr stream = IFGAS_Stream::CreateStream( + reinterpret_cast(const_cast(input)), len, 0); + CFDE_XMLSyntaxParser parser; + parser.Init(stream, 256); + EXPECT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); + + CFX_WideString data; + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); + parser.GetAttributeName(data); + EXPECT_EQ(L"contentType", data); + EXPECT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); + parser.GetAttributeValue(data); + EXPECT_EQ(L"application/x-javascript", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L"BTH\xab48", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); +} + +TEST(CFDE_XMLSyntaxParser, EntityOverflowHex) { + const wchar_t* input = + L""; + + // We * sizeof(wchar_t) because we pass in the uint8_t, not the wchar_t. + size_t len = FXSYS_wcslen(input) * sizeof(wchar_t); + CFX_RetainPtr stream = IFGAS_Stream::CreateStream( + reinterpret_cast(const_cast(input)), len, 0); + CFDE_XMLSyntaxParser parser; + parser.Init(stream, 256); + EXPECT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); + + CFX_WideString data; + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); + parser.GetAttributeName(data); + EXPECT_EQ(L"contentType", data); + EXPECT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); + parser.GetAttributeValue(data); + EXPECT_EQ(L"application/x-javascript", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L" ", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); +} + +TEST(CFDE_XMLSyntaxParser, EntityOverflowDecimal) { + const wchar_t* input = + L""; + + // We * sizeof(wchar_t) because we pass in the uint8_t, not the wchar_t. + size_t len = FXSYS_wcslen(input) * sizeof(wchar_t); + CFX_RetainPtr stream = IFGAS_Stream::CreateStream( + reinterpret_cast(const_cast(input)), len, 0); + CFDE_XMLSyntaxParser parser; + parser.Init(stream, 256); + EXPECT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); + + CFX_WideString data; + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); + parser.GetAttributeName(data); + EXPECT_EQ(L"contentType", data); + EXPECT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); + parser.GetAttributeValue(data); + EXPECT_EQ(L"application/x-javascript", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); + EXPECT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); + parser.GetTextData(data); + EXPECT_EQ(L" ", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); + parser.GetTagName(data); + EXPECT_EQ(L"script", data); + + EXPECT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); +} diff --git a/xfa/fde/xml/cfde_xmltext.cpp b/xfa/fde/xml/cfde_xmltext.cpp new file mode 100644 index 0000000000..6bc2d64354 --- /dev/null +++ b/xfa/fde/xml/cfde_xmltext.cpp @@ -0,0 +1,21 @@ +// Copyright 2017 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#include "xfa/fde/xml/cfde_xmltext.h" + +CFDE_XMLText::CFDE_XMLText(const CFX_WideString& wsText) + : CFDE_XMLNode(), m_wsText(wsText) {} + +CFDE_XMLText::~CFDE_XMLText() {} + +FDE_XMLNODETYPE CFDE_XMLText::GetType() const { + return FDE_XMLNODE_Text; +} + +CFDE_XMLNode* CFDE_XMLText::Clone(bool bRecursive) { + CFDE_XMLText* pClone = new CFDE_XMLText(m_wsText); + return pClone; +} diff --git a/xfa/fde/xml/cfde_xmltext.h b/xfa/fde/xml/cfde_xmltext.h new file mode 100644 index 0000000000..6f3945be09 --- /dev/null +++ b/xfa/fde/xml/cfde_xmltext.h @@ -0,0 +1,28 @@ +// Copyright 2017 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#ifndef XFA_FDE_XML_CFDE_XMLTEXT_H_ +#define XFA_FDE_XML_CFDE_XMLTEXT_H_ + +#include "core/fxcrt/fx_string.h" +#include "xfa/fde/xml/cfde_xmlnode.h" + +class CFDE_XMLText : public CFDE_XMLNode { + public: + explicit CFDE_XMLText(const CFX_WideString& wsText); + ~CFDE_XMLText() override; + + // CFDE_XMLNode + FDE_XMLNODETYPE GetType() const override; + CFDE_XMLNode* Clone(bool bRecursive) override; + + void GetText(CFX_WideString& wsText) const { wsText = m_wsText; } + void SetText(const CFX_WideString& wsText) { m_wsText = wsText; } + + CFX_WideString m_wsText; +}; + +#endif // XFA_FDE_XML_CFDE_XMLTEXT_H_ diff --git a/xfa/fde/xml/fde_xml.h b/xfa/fde/xml/fde_xml.h deleted file mode 100644 index 399a930615..0000000000 --- a/xfa/fde/xml/fde_xml.h +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright 2014 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#ifndef XFA_FDE_XML_FDE_XML_H_ -#define XFA_FDE_XML_FDE_XML_H_ - -#include "core/fxcrt/fx_system.h" - -enum class FDE_XmlSyntaxResult { - None, - InstructionOpen, - InstructionClose, - ElementOpen, - ElementBreak, - ElementClose, - TargetName, - TagName, - AttriName, - AttriValue, - Text, - CData, - TargetData, - Error, - EndOfString -}; - -enum FDE_XMLNODETYPE { - FDE_XMLNODE_Unknown = 0, - FDE_XMLNODE_Instruction, - FDE_XMLNODE_Element, - FDE_XMLNODE_Text, - FDE_XMLNODE_CharData, -}; - -struct FDE_XMLNODE { - int32_t iNodeNum; - FDE_XMLNODETYPE eNodeType; -}; - -bool FDE_IsXMLValidChar(wchar_t ch); - -#endif // XFA_FDE_XML_FDE_XML_H_ diff --git a/xfa/fde/xml/fde_xml_imp.cpp b/xfa/fde/xml/fde_xml_imp.cpp deleted file mode 100644 index 2de48ef1f4..0000000000 --- a/xfa/fde/xml/fde_xml_imp.cpp +++ /dev/null @@ -1,1832 +0,0 @@ -// Copyright 2014 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#include "xfa/fde/xml/fde_xml_imp.h" - -#include -#include - -#include "core/fxcrt/fx_ext.h" -#include "core/fxcrt/fx_safe_types.h" -#include "third_party/base/ptr_util.h" -#include "third_party/base/stl_util.h" -#include "xfa/fde/xml/cfde_xml_parser.h" -#include "xfa/fgas/crt/fgas_codepage.h" - -namespace { - -const uint32_t kMaxCharRange = 0x10ffff; - -const uint16_t g_XMLValidCharRange[][2] = {{0x09, 0x09}, - {0x0A, 0x0A}, - {0x0D, 0x0D}, - {0x20, 0xD7FF}, - {0xE000, 0xFFFD}}; - -bool FDE_IsXMLWhiteSpace(wchar_t ch) { - return ch == L' ' || ch == 0x0A || ch == 0x0D || ch == 0x09; -} - -struct FDE_XMLNAMECHAR { - uint16_t wStart; - uint16_t wEnd; - bool bStartChar; -}; - -const FDE_XMLNAMECHAR g_XMLNameChars[] = { - {L'-', L'.', false}, {L'0', L'9', false}, {L':', L':', false}, - {L'A', L'Z', true}, {L'_', L'_', true}, {L'a', L'z', true}, - {0xB7, 0xB7, false}, {0xC0, 0xD6, true}, {0xD8, 0xF6, true}, - {0xF8, 0x02FF, true}, {0x0300, 0x036F, false}, {0x0370, 0x037D, true}, - {0x037F, 0x1FFF, true}, {0x200C, 0x200D, true}, {0x203F, 0x2040, false}, - {0x2070, 0x218F, true}, {0x2C00, 0x2FEF, true}, {0x3001, 0xD7FF, true}, - {0xF900, 0xFDCF, true}, {0xFDF0, 0xFFFD, true}, -}; - -bool FDE_IsXMLNameChar(wchar_t ch, bool bFirstChar) { - int32_t iStart = 0; - int32_t iEnd = FX_ArraySize(g_XMLNameChars) - 1; - while (iStart <= iEnd) { - int32_t iMid = (iStart + iEnd) / 2; - if (ch < g_XMLNameChars[iMid].wStart) { - iEnd = iMid - 1; - } else if (ch > g_XMLNameChars[iMid].wEnd) { - iStart = iMid + 1; - } else { - return bFirstChar ? g_XMLNameChars[iMid].bStartChar : true; - } - } - return false; -} - -} // namespace - -bool FDE_IsXMLValidChar(wchar_t ch) { - int32_t iStart = 0; - int32_t iEnd = FX_ArraySize(g_XMLValidCharRange) - 1; - while (iStart <= iEnd) { - int32_t iMid = (iStart + iEnd) / 2; - if (ch < g_XMLValidCharRange[iMid][0]) { - iEnd = iMid - 1; - } else if (ch > g_XMLValidCharRange[iMid][1]) { - iStart = iMid + 1; - } else { - return true; - } - } - return false; -} - -CFDE_XMLNode::CFDE_XMLNode() - : m_pParent(nullptr), - m_pChild(nullptr), - m_pPrior(nullptr), - m_pNext(nullptr) {} - -FDE_XMLNODETYPE CFDE_XMLNode::GetType() const { - return FDE_XMLNODE_Unknown; -} - -CFDE_XMLNode::~CFDE_XMLNode() { - DeleteChildren(); -} - -void CFDE_XMLNode::DeleteChildren() { - CFDE_XMLNode* pChild = m_pChild; - while (pChild) { - CFDE_XMLNode* pNext = pChild->m_pNext; - delete pChild; - pChild = pNext; - } - m_pChild = nullptr; -} - -int32_t CFDE_XMLNode::CountChildNodes() const { - int32_t iCount = 0; - CFDE_XMLNode* pChild = m_pChild; - while (pChild) { - iCount++; - pChild = pChild->m_pNext; - } - return iCount; -} - -CFDE_XMLNode* CFDE_XMLNode::GetChildNode(int32_t index) const { - CFDE_XMLNode* pChild = m_pChild; - while (pChild) { - if (index == 0) { - return pChild; - } - index--; - pChild = pChild->m_pNext; - } - return nullptr; -} - -int32_t CFDE_XMLNode::GetChildNodeIndex(CFDE_XMLNode* pNode) const { - int32_t index = 0; - CFDE_XMLNode* pChild = m_pChild; - while (pChild) { - if (pChild == pNode) { - return index; - } - index++; - pChild = pChild->m_pNext; - } - return -1; -} - -CFDE_XMLNode* CFDE_XMLNode::GetPath(const wchar_t* pPath, - int32_t iLength, - bool bQualifiedName) const { - ASSERT(pPath); - if (iLength < 0) { - iLength = FXSYS_wcslen(pPath); - } - if (iLength == 0) { - return nullptr; - } - CFX_WideString csPath; - const wchar_t* pStart = pPath; - const wchar_t* pEnd = pPath + iLength; - wchar_t ch; - while (pStart < pEnd) { - ch = *pStart++; - if (ch == L'/') { - break; - } else { - csPath += ch; - } - } - iLength -= pStart - pPath; - CFDE_XMLNode* pFind = nullptr; - if (csPath.GetLength() < 1) { - pFind = GetNodeItem(CFDE_XMLNode::Root); - } else if (csPath.Compare(L"..") == 0) { - pFind = m_pParent; - } else if (csPath.Compare(L".") == 0) { - pFind = (CFDE_XMLNode*)this; - } else { - CFX_WideString wsTag; - CFDE_XMLNode* pNode = m_pChild; - while (pNode) { - if (pNode->GetType() == FDE_XMLNODE_Element) { - if (bQualifiedName) { - ((CFDE_XMLElement*)pNode)->GetTagName(wsTag); - } else { - ((CFDE_XMLElement*)pNode)->GetLocalTagName(wsTag); - } - if (wsTag.Compare(csPath) == 0) { - if (iLength < 1) { - pFind = pNode; - } else { - pFind = pNode->GetPath(pStart, iLength, bQualifiedName); - } - if (pFind) - return pFind; - } - } - pNode = pNode->m_pNext; - } - } - if (!pFind || iLength < 1) - return pFind; - return pFind->GetPath(pStart, iLength, bQualifiedName); -} - -int32_t CFDE_XMLNode::InsertChildNode(CFDE_XMLNode* pNode, int32_t index) { - pNode->m_pParent = this; - if (!m_pChild) { - m_pChild = pNode; - pNode->m_pPrior = nullptr; - pNode->m_pNext = nullptr; - return 0; - } - if (index == 0) { - pNode->m_pNext = m_pChild; - pNode->m_pPrior = nullptr; - m_pChild->m_pPrior = pNode; - m_pChild = pNode; - return 0; - } - int32_t iCount = 0; - CFDE_XMLNode* pFind = m_pChild; - while (++iCount != index && pFind->m_pNext) { - pFind = pFind->m_pNext; - } - pNode->m_pPrior = pFind; - pNode->m_pNext = pFind->m_pNext; - if (pFind->m_pNext) - pFind->m_pNext->m_pPrior = pNode; - pFind->m_pNext = pNode; - return iCount; -} - -void CFDE_XMLNode::RemoveChildNode(CFDE_XMLNode* pNode) { - ASSERT(m_pChild && pNode); - if (m_pChild == pNode) { - m_pChild = pNode->m_pNext; - } else { - pNode->m_pPrior->m_pNext = pNode->m_pNext; - } - if (pNode->m_pNext) - pNode->m_pNext->m_pPrior = pNode->m_pPrior; - pNode->m_pParent = nullptr; - pNode->m_pNext = nullptr; - pNode->m_pPrior = nullptr; -} - -CFDE_XMLNode* CFDE_XMLNode::GetNodeItem(CFDE_XMLNode::NodeItem eItem) const { - switch (eItem) { - case CFDE_XMLNode::Root: { - CFDE_XMLNode* pParent = (CFDE_XMLNode*)this; - while (pParent->m_pParent) { - pParent = pParent->m_pParent; - } - return pParent; - } - case CFDE_XMLNode::Parent: - return m_pParent; - case CFDE_XMLNode::FirstSibling: { - CFDE_XMLNode* pItem = (CFDE_XMLNode*)this; - while (pItem->m_pPrior) { - pItem = pItem->m_pPrior; - } - return pItem == (CFDE_XMLNode*)this ? nullptr : pItem; - } - case CFDE_XMLNode::PriorSibling: - return m_pPrior; - case CFDE_XMLNode::NextSibling: - return m_pNext; - case CFDE_XMLNode::LastSibling: { - CFDE_XMLNode* pItem = (CFDE_XMLNode*)this; - while (pItem->m_pNext) - pItem = pItem->m_pNext; - return pItem == (CFDE_XMLNode*)this ? nullptr : pItem; - } - case CFDE_XMLNode::FirstNeighbor: { - CFDE_XMLNode* pParent = (CFDE_XMLNode*)this; - while (pParent->m_pParent) - pParent = pParent->m_pParent; - return pParent == (CFDE_XMLNode*)this ? nullptr : pParent; - } - case CFDE_XMLNode::PriorNeighbor: { - if (!m_pPrior) - return m_pParent; - - CFDE_XMLNode* pItem = m_pPrior; - while (pItem->m_pChild) { - pItem = pItem->m_pChild; - while (pItem->m_pNext) - pItem = pItem->m_pNext; - } - return pItem; - } - case CFDE_XMLNode::NextNeighbor: { - if (m_pChild) - return m_pChild; - if (m_pNext) - return m_pNext; - CFDE_XMLNode* pItem = m_pParent; - while (pItem) { - if (pItem->m_pNext) - return pItem->m_pNext; - pItem = pItem->m_pParent; - } - return nullptr; - } - case CFDE_XMLNode::LastNeighbor: { - CFDE_XMLNode* pItem = (CFDE_XMLNode*)this; - while (pItem->m_pParent) { - pItem = pItem->m_pParent; - } - while (true) { - while (pItem->m_pNext) - pItem = pItem->m_pNext; - if (!pItem->m_pChild) - break; - pItem = pItem->m_pChild; - } - return pItem == (CFDE_XMLNode*)this ? nullptr : pItem; - } - case CFDE_XMLNode::FirstChild: - return m_pChild; - case CFDE_XMLNode::LastChild: { - if (!m_pChild) - return nullptr; - - CFDE_XMLNode* pChild = m_pChild; - while (pChild->m_pNext) - pChild = pChild->m_pNext; - return pChild; - } - default: - break; - } - return nullptr; -} - -int32_t CFDE_XMLNode::GetNodeLevel() const { - int32_t iLevel = 0; - const CFDE_XMLNode* pItem = m_pParent; - while (pItem) { - iLevel++; - pItem = pItem->m_pParent; - } - return iLevel; -} - -bool CFDE_XMLNode::InsertNodeItem(CFDE_XMLNode::NodeItem eItem, - CFDE_XMLNode* pNode) { - switch (eItem) { - case CFDE_XMLNode::NextSibling: { - pNode->m_pParent = m_pParent; - pNode->m_pNext = m_pNext; - pNode->m_pPrior = this; - if (m_pNext) { - m_pNext->m_pPrior = pNode; - } - m_pNext = pNode; - return true; - } - case CFDE_XMLNode::PriorSibling: { - pNode->m_pParent = m_pParent; - pNode->m_pNext = this; - pNode->m_pPrior = m_pPrior; - if (m_pPrior) { - m_pPrior->m_pNext = pNode; - } else if (m_pParent) { - m_pParent->m_pChild = pNode; - } - m_pPrior = pNode; - return true; - } - default: - return false; - } -} - -CFDE_XMLNode* CFDE_XMLNode::RemoveNodeItem(CFDE_XMLNode::NodeItem eItem) { - CFDE_XMLNode* pNode = nullptr; - switch (eItem) { - case CFDE_XMLNode::NextSibling: - if (m_pNext) { - pNode = m_pNext; - m_pNext = pNode->m_pNext; - if (m_pNext) { - m_pNext->m_pPrior = this; - } - pNode->m_pParent = nullptr; - pNode->m_pNext = nullptr; - pNode->m_pPrior = nullptr; - } - break; - default: - break; - } - return pNode; -} - -CFDE_XMLNode* CFDE_XMLNode::Clone(bool bRecursive) { - return nullptr; -} - -void CFDE_XMLNode::SaveXMLNode(const CFX_RetainPtr& pXMLStream) { - CFDE_XMLNode* pNode = (CFDE_XMLNode*)this; - switch (pNode->GetType()) { - case FDE_XMLNODE_Instruction: { - CFX_WideString ws; - CFDE_XMLInstruction* pInstruction = (CFDE_XMLInstruction*)pNode; - if (pInstruction->m_wsTarget.CompareNoCase(L"xml") == 0) { - ws = L"GetCodePage(); - if (wCodePage == FX_CODEPAGE_UTF16LE) { - ws += L"UTF-16"; - } else if (wCodePage == FX_CODEPAGE_UTF16BE) { - ws += L"UTF-16be"; - } else { - ws += L"UTF-8"; - } - ws += L"\"?>"; - pXMLStream->WriteString(ws.c_str(), ws.GetLength()); - } else { - ws.Format(L"m_wsTarget.c_str()); - pXMLStream->WriteString(ws.c_str(), ws.GetLength()); - std::vector& attributes = pInstruction->m_Attributes; - int32_t i; - int32_t iCount = pdfium::CollectionSize(attributes); - CFX_WideString wsValue; - for (i = 0; i < iCount; i += 2) { - ws = L" "; - ws += attributes[i]; - ws += L"=\""; - wsValue = attributes[i + 1]; - wsValue.Replace(L"&", L"&"); - wsValue.Replace(L"<", L"<"); - wsValue.Replace(L">", L">"); - wsValue.Replace(L"\'", L"'"); - wsValue.Replace(L"\"", L"""); - ws += wsValue; - ws += L"\""; - pXMLStream->WriteString(ws.c_str(), ws.GetLength()); - } - std::vector& targetdata = pInstruction->m_TargetData; - iCount = pdfium::CollectionSize(targetdata); - for (i = 0; i < iCount; i++) { - ws = L" \""; - ws += targetdata[i]; - ws += L"\""; - pXMLStream->WriteString(ws.c_str(), ws.GetLength()); - } - ws = L"?>"; - pXMLStream->WriteString(ws.c_str(), ws.GetLength()); - } - } break; - case FDE_XMLNODE_Element: { - CFX_WideString ws; - ws = L"<"; - ws += ((CFDE_XMLElement*)pNode)->m_wsTag; - pXMLStream->WriteString(ws.c_str(), ws.GetLength()); - std::vector& attributes = - static_cast(pNode)->m_Attributes; - int32_t iCount = pdfium::CollectionSize(attributes); - CFX_WideString wsValue; - for (int32_t i = 0; i < iCount; i += 2) { - ws = L" "; - ws += attributes[i]; - ws += L"=\""; - wsValue = attributes[i + 1]; - wsValue.Replace(L"&", L"&"); - wsValue.Replace(L"<", L"<"); - wsValue.Replace(L">", L">"); - wsValue.Replace(L"\'", L"'"); - wsValue.Replace(L"\"", L"""); - ws += wsValue; - ws += L"\""; - pXMLStream->WriteString(ws.c_str(), ws.GetLength()); - } - if (pNode->m_pChild) { - ws = L"\n>"; - pXMLStream->WriteString(ws.c_str(), ws.GetLength()); - CFDE_XMLNode* pChild = pNode->m_pChild; - while (pChild) { - pChild->SaveXMLNode(pXMLStream); - pChild = pChild->m_pNext; - } - ws = L"m_wsTag; - ws += L"\n>"; - } else { - ws = L"\n/>"; - } - pXMLStream->WriteString(ws.c_str(), ws.GetLength()); - } break; - case FDE_XMLNODE_Text: { - CFX_WideString ws = ((CFDE_XMLText*)pNode)->m_wsText; - ws.Replace(L"&", L"&"); - ws.Replace(L"<", L"<"); - ws.Replace(L">", L">"); - ws.Replace(L"\'", L"'"); - ws.Replace(L"\"", L"""); - pXMLStream->WriteString(ws.c_str(), ws.GetLength()); - } break; - case FDE_XMLNODE_CharData: { - CFX_WideString ws = L"m_wsCharData; - ws += L"]]>"; - pXMLStream->WriteString(ws.c_str(), ws.GetLength()); - } break; - case FDE_XMLNODE_Unknown: - break; - default: - break; - } -} - -void CFDE_XMLNode::CloneChildren(CFDE_XMLNode* pClone) { - if (!m_pChild) { - return; - } - CFDE_XMLNode* pNext = m_pChild; - CFDE_XMLNode* pCloneNext = pNext->Clone(true); - pClone->InsertChildNode(pCloneNext); - pNext = pNext->m_pNext; - while (pNext) { - CFDE_XMLNode* pChild = pNext->Clone(true); - pCloneNext->InsertNodeItem(CFDE_XMLNode::NextSibling, pChild); - pCloneNext = pChild; - pNext = pNext->m_pNext; - } -} - -CFDE_XMLInstruction::CFDE_XMLInstruction(const CFX_WideString& wsTarget) - : m_wsTarget(wsTarget) { - ASSERT(m_wsTarget.GetLength() > 0); -} - -FDE_XMLNODETYPE CFDE_XMLInstruction::GetType() const { - return FDE_XMLNODE_Instruction; -} - -CFDE_XMLNode* CFDE_XMLInstruction::Clone(bool bRecursive) { - CFDE_XMLInstruction* pClone = new CFDE_XMLInstruction(m_wsTarget); - if (!pClone) - return nullptr; - - pClone->m_Attributes = m_Attributes; - pClone->m_TargetData = m_TargetData; - if (bRecursive) - CloneChildren(pClone); - - return pClone; -} - -int32_t CFDE_XMLInstruction::CountAttributes() const { - return pdfium::CollectionSize(m_Attributes) / 2; -} - -bool CFDE_XMLInstruction::GetAttribute(int32_t index, - CFX_WideString& wsAttriName, - CFX_WideString& wsAttriValue) const { - int32_t iCount = pdfium::CollectionSize(m_Attributes); - ASSERT(index > -1 && index < iCount / 2); - for (int32_t i = 0; i < iCount; i += 2) { - if (index == 0) { - wsAttriName = m_Attributes[i]; - wsAttriValue = m_Attributes[i + 1]; - return true; - } - index--; - } - return false; -} - -bool CFDE_XMLInstruction::HasAttribute(const wchar_t* pwsAttriName) const { - int32_t iCount = pdfium::CollectionSize(m_Attributes); - for (int32_t i = 0; i < iCount; i += 2) { - if (m_Attributes[i].Compare(pwsAttriName) == 0) { - return true; - } - } - return false; -} - -void CFDE_XMLInstruction::GetString(const wchar_t* pwsAttriName, - CFX_WideString& wsAttriValue, - const wchar_t* pwsDefValue) const { - int32_t iCount = pdfium::CollectionSize(m_Attributes); - for (int32_t i = 0; i < iCount; i += 2) { - if (m_Attributes[i].Compare(pwsAttriName) == 0) { - wsAttriValue = m_Attributes[i + 1]; - return; - } - } - wsAttriValue = pwsDefValue; -} - -void CFDE_XMLInstruction::SetString(const CFX_WideString& wsAttriName, - const CFX_WideString& wsAttriValue) { - ASSERT(wsAttriName.GetLength() > 0); - int32_t iCount = pdfium::CollectionSize(m_Attributes); - for (int32_t i = 0; i < iCount; i += 2) { - if (m_Attributes[i].Compare(wsAttriName) == 0) { - m_Attributes[i] = wsAttriName; - m_Attributes[i + 1] = wsAttriValue; - return; - } - } - m_Attributes.push_back(wsAttriName); - m_Attributes.push_back(wsAttriValue); -} - -int32_t CFDE_XMLInstruction::GetInteger(const wchar_t* pwsAttriName, - int32_t iDefValue) const { - int32_t iCount = pdfium::CollectionSize(m_Attributes); - for (int32_t i = 0; i < iCount; i += 2) { - if (m_Attributes[i].Compare(pwsAttriName) == 0) { - return FXSYS_wtoi(m_Attributes[i + 1].c_str()); - } - } - return iDefValue; -} - -void CFDE_XMLInstruction::SetInteger(const wchar_t* pwsAttriName, - int32_t iAttriValue) { - CFX_WideString wsValue; - wsValue.Format(L"%d", iAttriValue); - SetString(pwsAttriName, wsValue); -} - -float CFDE_XMLInstruction::GetFloat(const wchar_t* pwsAttriName, - float fDefValue) const { - int32_t iCount = pdfium::CollectionSize(m_Attributes); - for (int32_t i = 0; i < iCount; i += 2) { - if (m_Attributes[i].Compare(pwsAttriName) == 0) { - return FXSYS_wcstof(m_Attributes[i + 1].c_str(), -1, nullptr); - } - } - return fDefValue; -} - -void CFDE_XMLInstruction::SetFloat(const wchar_t* pwsAttriName, - float fAttriValue) { - CFX_WideString wsValue; - wsValue.Format(L"%f", fAttriValue); - SetString(pwsAttriName, wsValue); -} - -void CFDE_XMLInstruction::RemoveAttribute(const wchar_t* pwsAttriName) { - int32_t iCount = pdfium::CollectionSize(m_Attributes); - for (int32_t i = 0; i < iCount; i += 2) { - if (m_Attributes[i].Compare(pwsAttriName) == 0) { - m_Attributes.erase(m_Attributes.begin() + i, - m_Attributes.begin() + i + 2); - return; - } - } -} - -int32_t CFDE_XMLInstruction::CountData() const { - return pdfium::CollectionSize(m_TargetData); -} - -bool CFDE_XMLInstruction::GetData(int32_t index, CFX_WideString& wsData) const { - if (!pdfium::IndexInBounds(m_TargetData, index)) - return false; - - wsData = m_TargetData[index]; - return true; -} - -void CFDE_XMLInstruction::AppendData(const CFX_WideString& wsData) { - m_TargetData.push_back(wsData); -} - -void CFDE_XMLInstruction::RemoveData(int32_t index) { - if (pdfium::IndexInBounds(m_TargetData, index)) - m_TargetData.erase(m_TargetData.begin() + index); -} - -CFDE_XMLInstruction::~CFDE_XMLInstruction() {} - -CFDE_XMLElement::CFDE_XMLElement(const CFX_WideString& wsTag) - : CFDE_XMLNode(), m_wsTag(wsTag), m_Attributes() { - ASSERT(m_wsTag.GetLength() > 0); -} - -CFDE_XMLElement::~CFDE_XMLElement() {} - -FDE_XMLNODETYPE CFDE_XMLElement::GetType() const { - return FDE_XMLNODE_Element; -} - -CFDE_XMLNode* CFDE_XMLElement::Clone(bool bRecursive) { - CFDE_XMLElement* pClone = new CFDE_XMLElement(m_wsTag); - if (!pClone) - return nullptr; - - pClone->m_Attributes = m_Attributes; - if (bRecursive) { - CloneChildren(pClone); - } else { - CFX_WideString wsText; - CFDE_XMLNode* pChild = m_pChild; - while (pChild) { - switch (pChild->GetType()) { - case FDE_XMLNODE_Text: - wsText += ((CFDE_XMLText*)pChild)->m_wsText; - break; - default: - break; - } - pChild = pChild->m_pNext; - } - pClone->SetTextData(wsText); - } - return pClone; -} - -void CFDE_XMLElement::GetTagName(CFX_WideString& wsTag) const { - wsTag = m_wsTag; -} - -void CFDE_XMLElement::GetLocalTagName(CFX_WideString& wsTag) const { - FX_STRSIZE iFind = m_wsTag.Find(L':', 0); - if (iFind < 0) { - wsTag = m_wsTag; - } else { - wsTag = m_wsTag.Right(m_wsTag.GetLength() - iFind - 1); - } -} - -void CFDE_XMLElement::GetNamespacePrefix(CFX_WideString& wsPrefix) const { - FX_STRSIZE iFind = m_wsTag.Find(L':', 0); - if (iFind < 0) { - wsPrefix.clear(); - } else { - wsPrefix = m_wsTag.Left(iFind); - } -} - -void CFDE_XMLElement::GetNamespaceURI(CFX_WideString& wsNamespace) const { - CFX_WideString wsAttri(L"xmlns"), wsPrefix; - GetNamespacePrefix(wsPrefix); - if (wsPrefix.GetLength() > 0) { - wsAttri += L":"; - wsAttri += wsPrefix; - } - wsNamespace.clear(); - CFDE_XMLNode* pNode = (CFDE_XMLNode*)this; - while (pNode) { - if (pNode->GetType() != FDE_XMLNODE_Element) { - break; - } - CFDE_XMLElement* pElement = (CFDE_XMLElement*)pNode; - if (!pElement->HasAttribute(wsAttri.c_str())) { - pNode = pNode->GetNodeItem(CFDE_XMLNode::Parent); - continue; - } - pElement->GetString(wsAttri.c_str(), wsNamespace); - break; - } -} - -int32_t CFDE_XMLElement::CountAttributes() const { - return pdfium::CollectionSize(m_Attributes) / 2; -} - -bool CFDE_XMLElement::GetAttribute(int32_t index, - CFX_WideString& wsAttriName, - CFX_WideString& wsAttriValue) const { - int32_t iCount = pdfium::CollectionSize(m_Attributes); - ASSERT(index > -1 && index < iCount / 2); - for (int32_t i = 0; i < iCount; i += 2) { - if (index == 0) { - wsAttriName = m_Attributes[i]; - wsAttriValue = m_Attributes[i + 1]; - return true; - } - index--; - } - return false; -} - -bool CFDE_XMLElement::HasAttribute(const wchar_t* pwsAttriName) const { - int32_t iCount = pdfium::CollectionSize(m_Attributes); - for (int32_t i = 0; i < iCount; i += 2) { - if (m_Attributes[i].Compare(pwsAttriName) == 0) - return true; - } - return false; -} - -void CFDE_XMLElement::GetString(const wchar_t* pwsAttriName, - CFX_WideString& wsAttriValue, - const wchar_t* pwsDefValue) const { - int32_t iCount = pdfium::CollectionSize(m_Attributes); - for (int32_t i = 0; i < iCount; i += 2) { - if (m_Attributes[i].Compare(pwsAttriName) == 0) { - wsAttriValue = m_Attributes[i + 1]; - return; - } - } - wsAttriValue = pwsDefValue; -} - -void CFDE_XMLElement::SetString(const CFX_WideString& wsAttriName, - const CFX_WideString& wsAttriValue) { - ASSERT(wsAttriName.GetLength() > 0); - int32_t iCount = pdfium::CollectionSize(m_Attributes); - for (int32_t i = 0; i < iCount; i += 2) { - if (m_Attributes[i].Compare(wsAttriName) == 0) { - m_Attributes[i] = wsAttriName; - m_Attributes[i + 1] = wsAttriValue; - return; - } - } - m_Attributes.push_back(wsAttriName); - m_Attributes.push_back(wsAttriValue); -} - -int32_t CFDE_XMLElement::GetInteger(const wchar_t* pwsAttriName, - int32_t iDefValue) const { - int32_t iCount = pdfium::CollectionSize(m_Attributes); - for (int32_t i = 0; i < iCount; i += 2) { - if (m_Attributes[i].Compare(pwsAttriName) == 0) { - return FXSYS_wtoi(m_Attributes[i + 1].c_str()); - } - } - return iDefValue; -} - -void CFDE_XMLElement::SetInteger(const wchar_t* pwsAttriName, - int32_t iAttriValue) { - CFX_WideString wsValue; - wsValue.Format(L"%d", iAttriValue); - SetString(pwsAttriName, wsValue); -} - -float CFDE_XMLElement::GetFloat(const wchar_t* pwsAttriName, - float fDefValue) const { - int32_t iCount = pdfium::CollectionSize(m_Attributes); - for (int32_t i = 0; i < iCount; i += 2) { - if (m_Attributes[i].Compare(pwsAttriName) == 0) { - return FXSYS_wcstof(m_Attributes[i + 1].c_str(), -1, nullptr); - } - } - return fDefValue; -} - -void CFDE_XMLElement::SetFloat(const wchar_t* pwsAttriName, float fAttriValue) { - CFX_WideString wsValue; - wsValue.Format(L"%f", fAttriValue); - SetString(pwsAttriName, wsValue); -} - -void CFDE_XMLElement::RemoveAttribute(const wchar_t* pwsAttriName) { - int32_t iCount = pdfium::CollectionSize(m_Attributes); - for (int32_t i = 0; i < iCount; i += 2) { - if (m_Attributes[i].Compare(pwsAttriName) == 0) { - m_Attributes.erase(m_Attributes.begin() + i, - m_Attributes.begin() + i + 2); - return; - } - } -} - -void CFDE_XMLElement::GetTextData(CFX_WideString& wsText) const { - CFX_WideTextBuf buffer; - CFDE_XMLNode* pChild = m_pChild; - while (pChild) { - switch (pChild->GetType()) { - case FDE_XMLNODE_Text: - buffer << ((CFDE_XMLText*)pChild)->m_wsText; - break; - case FDE_XMLNODE_CharData: - buffer << ((CFDE_XMLCharData*)pChild)->m_wsCharData; - break; - default: - break; - } - pChild = pChild->m_pNext; - } - wsText = buffer.AsStringC(); -} - -void CFDE_XMLElement::SetTextData(const CFX_WideString& wsText) { - if (wsText.GetLength() < 1) { - return; - } - InsertChildNode(new CFDE_XMLText(wsText)); -} - -CFDE_XMLText::CFDE_XMLText(const CFX_WideString& wsText) - : CFDE_XMLNode(), m_wsText(wsText) {} - -FDE_XMLNODETYPE CFDE_XMLText::GetType() const { - return FDE_XMLNODE_Text; -} - -CFDE_XMLNode* CFDE_XMLText::Clone(bool bRecursive) { - CFDE_XMLText* pClone = new CFDE_XMLText(m_wsText); - return pClone; -} - -CFDE_XMLText::~CFDE_XMLText() {} - -CFDE_XMLCharData::CFDE_XMLCharData(const CFX_WideString& wsCData) - : CFDE_XMLDeclaration(), m_wsCharData(wsCData) {} - -FDE_XMLNODETYPE CFDE_XMLCharData::GetType() const { - return FDE_XMLNODE_CharData; -} - -CFDE_XMLNode* CFDE_XMLCharData::Clone(bool bRecursive) { - CFDE_XMLCharData* pClone = new CFDE_XMLCharData(m_wsCharData); - return pClone; -} - -CFDE_XMLCharData::~CFDE_XMLCharData() {} - -CFDE_XMLDoc::CFDE_XMLDoc() - : m_iStatus(0), m_pRoot(pdfium::MakeUnique()) { - m_pRoot->InsertChildNode(new CFDE_XMLInstruction(L"xml")); -} - -CFDE_XMLDoc::~CFDE_XMLDoc() {} - -bool CFDE_XMLDoc::LoadXML(std::unique_ptr pXMLParser) { - if (!pXMLParser) - return false; - - m_iStatus = 0; - m_pStream.Reset(); - m_pRoot->DeleteChildren(); - m_pXMLParser = std::move(pXMLParser); - return true; -} - -int32_t CFDE_XMLDoc::DoLoad(IFX_Pause* pPause) { - if (m_iStatus < 100) - m_iStatus = m_pXMLParser->DoParser(pPause); - - return m_iStatus; -} - -void CFDE_XMLDoc::CloseXML() { - m_pXMLParser.reset(); -} - -void CFDE_XMLDoc::SaveXMLNode(const CFX_RetainPtr& pXMLStream, - CFDE_XMLNode* pINode) { - CFDE_XMLNode* pNode = (CFDE_XMLNode*)pINode; - switch (pNode->GetType()) { - case FDE_XMLNODE_Instruction: { - CFX_WideString ws; - CFDE_XMLInstruction* pInstruction = (CFDE_XMLInstruction*)pNode; - if (pInstruction->m_wsTarget.CompareNoCase(L"xml") == 0) { - ws = L"GetCodePage(); - if (wCodePage == FX_CODEPAGE_UTF16LE) { - ws += L"UTF-16"; - } else if (wCodePage == FX_CODEPAGE_UTF16BE) { - ws += L"UTF-16be"; - } else { - ws += L"UTF-8"; - } - ws += L"\"?>"; - pXMLStream->WriteString(ws.c_str(), ws.GetLength()); - } else { - ws.Format(L"m_wsTarget.c_str()); - pXMLStream->WriteString(ws.c_str(), ws.GetLength()); - std::vector& attributes = pInstruction->m_Attributes; - int32_t i; - int32_t iCount = pdfium::CollectionSize(attributes); - CFX_WideString wsValue; - for (i = 0; i < iCount; i += 2) { - ws = L" "; - ws += attributes[i]; - ws += L"=\""; - wsValue = attributes[i + 1]; - wsValue.Replace(L"&", L"&"); - wsValue.Replace(L"<", L"<"); - wsValue.Replace(L">", L">"); - wsValue.Replace(L"\'", L"'"); - wsValue.Replace(L"\"", L"""); - ws += wsValue; - ws += L"\""; - pXMLStream->WriteString(ws.c_str(), ws.GetLength()); - } - std::vector& targetdata = pInstruction->m_TargetData; - iCount = pdfium::CollectionSize(targetdata); - for (i = 0; i < iCount; i++) { - ws = L" \""; - ws += targetdata[i]; - ws += L"\""; - pXMLStream->WriteString(ws.c_str(), ws.GetLength()); - } - ws = L"?>"; - pXMLStream->WriteString(ws.c_str(), ws.GetLength()); - } - } break; - case FDE_XMLNODE_Element: { - CFX_WideString ws; - ws = L"<"; - ws += ((CFDE_XMLElement*)pNode)->m_wsTag; - pXMLStream->WriteString(ws.c_str(), ws.GetLength()); - std::vector& attributes = - static_cast(pNode)->m_Attributes; - int32_t iCount = pdfium::CollectionSize(attributes); - CFX_WideString wsValue; - for (int32_t i = 0; i < iCount; i += 2) { - ws = L" "; - ws += attributes[i]; - ws += L"=\""; - wsValue = attributes[i + 1]; - wsValue.Replace(L"&", L"&"); - wsValue.Replace(L"<", L"<"); - wsValue.Replace(L">", L">"); - wsValue.Replace(L"\'", L"'"); - wsValue.Replace(L"\"", L"""); - ws += wsValue; - ws += L"\""; - pXMLStream->WriteString(ws.c_str(), ws.GetLength()); - } - if (pNode->m_pChild) { - ws = L"\n>"; - pXMLStream->WriteString(ws.c_str(), ws.GetLength()); - CFDE_XMLNode* pChild = pNode->m_pChild; - while (pChild) { - SaveXMLNode(pXMLStream, static_cast(pChild)); - pChild = pChild->m_pNext; - } - ws = L"m_wsTag; - ws += L"\n>"; - } else { - ws = L"\n/>"; - } - pXMLStream->WriteString(ws.c_str(), ws.GetLength()); - } break; - case FDE_XMLNODE_Text: { - CFX_WideString ws = ((CFDE_XMLText*)pNode)->m_wsText; - ws.Replace(L"&", L"&"); - ws.Replace(L"<", L"<"); - ws.Replace(L">", L">"); - ws.Replace(L"\'", L"'"); - ws.Replace(L"\"", L"""); - pXMLStream->WriteString(ws.c_str(), ws.GetLength()); - } break; - case FDE_XMLNODE_CharData: { - CFX_WideString ws = L"m_wsCharData; - ws += L"]]>"; - pXMLStream->WriteString(ws.c_str(), ws.GetLength()); - } break; - case FDE_XMLNODE_Unknown: - break; - default: - break; - } -} - -CFDE_BlockBuffer::CFDE_BlockBuffer(int32_t iAllocStep) - : m_iDataLength(0), - m_iBufferSize(0), - m_iAllocStep(iAllocStep), - m_iStartPosition(0) {} - -CFDE_BlockBuffer::~CFDE_BlockBuffer() { - ClearBuffer(); -} - -wchar_t* CFDE_BlockBuffer::GetAvailableBlock(int32_t& iIndexInBlock) { - iIndexInBlock = 0; - if (m_BlockArray.empty()) - return nullptr; - - int32_t iRealIndex = m_iStartPosition + m_iDataLength; - if (iRealIndex == m_iBufferSize) { - m_BlockArray.emplace_back(FX_Alloc(wchar_t, m_iAllocStep)); - m_iBufferSize += m_iAllocStep; - return m_BlockArray.back().get(); - } - iIndexInBlock = iRealIndex % m_iAllocStep; - return m_BlockArray[iRealIndex / m_iAllocStep].get(); -} - -bool CFDE_BlockBuffer::InitBuffer(int32_t iBufferSize) { - ClearBuffer(); - int32_t iNumOfBlock = (iBufferSize - 1) / m_iAllocStep + 1; - for (int32_t i = 0; i < iNumOfBlock; i++) - m_BlockArray.emplace_back(FX_Alloc(wchar_t, m_iAllocStep)); - - m_iBufferSize = iNumOfBlock * m_iAllocStep; - return true; -} - -void CFDE_BlockBuffer::SetTextChar(int32_t iIndex, wchar_t ch) { - if (iIndex < 0) { - return; - } - int32_t iRealIndex = m_iStartPosition + iIndex; - int32_t iBlockIndex = iRealIndex / m_iAllocStep; - int32_t iInnerIndex = iRealIndex % m_iAllocStep; - int32_t iBlockSize = pdfium::CollectionSize(m_BlockArray); - if (iBlockIndex >= iBlockSize) { - int32_t iNewBlocks = iBlockIndex - iBlockSize + 1; - do { - m_BlockArray.emplace_back(FX_Alloc(wchar_t, m_iAllocStep)); - m_iBufferSize += m_iAllocStep; - } while (--iNewBlocks); - } - wchar_t* pTextData = m_BlockArray[iBlockIndex].get(); - pTextData[iInnerIndex] = ch; - m_iDataLength = std::max(m_iDataLength, iIndex + 1); -} - -int32_t CFDE_BlockBuffer::DeleteTextChars(int32_t iCount, bool bDirection) { - if (iCount <= 0) - return m_iDataLength; - - if (iCount >= m_iDataLength) { - Reset(false); - return 0; - } - if (bDirection) { - m_iStartPosition += iCount; - m_iDataLength -= iCount; - } else { - m_iDataLength -= iCount; - } - return m_iDataLength; -} - -void CFDE_BlockBuffer::GetTextData(CFX_WideString& wsTextData, - int32_t iStart, - int32_t iLength) const { - wsTextData.clear(); - int32_t iMaybeDataLength = m_iBufferSize - 1 - m_iStartPosition; - if (iStart < 0 || iStart > iMaybeDataLength) { - return; - } - if (iLength == -1 || iLength > iMaybeDataLength) { - iLength = iMaybeDataLength; - } - if (iLength <= 0) { - return; - } - wchar_t* pBuf = wsTextData.GetBuffer(iLength); - if (!pBuf) { - return; - } - int32_t iStartBlockIndex = 0; - int32_t iStartInnerIndex = 0; - TextDataIndex2BufIndex(iStart, iStartBlockIndex, iStartInnerIndex); - int32_t iEndBlockIndex = 0; - int32_t iEndInnerIndex = 0; - TextDataIndex2BufIndex(iStart + iLength, iEndBlockIndex, iEndInnerIndex); - int32_t iPointer = 0; - for (int32_t i = iStartBlockIndex; i <= iEndBlockIndex; i++) { - int32_t iBufferPointer = 0; - int32_t iCopyLength = m_iAllocStep; - if (i == iStartBlockIndex) { - iCopyLength -= iStartInnerIndex; - iBufferPointer = iStartInnerIndex; - } - if (i == iEndBlockIndex) { - iCopyLength -= ((m_iAllocStep - 1) - iEndInnerIndex); - } - wchar_t* pBlockBuf = m_BlockArray[i].get(); - memcpy(pBuf + iPointer, pBlockBuf + iBufferPointer, - iCopyLength * sizeof(wchar_t)); - iPointer += iCopyLength; - } - wsTextData.ReleaseBuffer(iLength); -} - -void CFDE_BlockBuffer::TextDataIndex2BufIndex(const int32_t iIndex, - int32_t& iBlockIndex, - int32_t& iInnerIndex) const { - ASSERT(iIndex >= 0); - int32_t iRealIndex = m_iStartPosition + iIndex; - iBlockIndex = iRealIndex / m_iAllocStep; - iInnerIndex = iRealIndex % m_iAllocStep; -} - -void CFDE_BlockBuffer::ClearBuffer() { - m_iBufferSize = 0; - m_BlockArray.clear(); -} - -CFDE_XMLSyntaxParser::CFDE_XMLSyntaxParser() - : m_pStream(nullptr), - m_iXMLPlaneSize(-1), - m_iCurrentPos(0), - m_iCurrentNodeNum(-1), - m_iLastNodeNum(-1), - m_iParsedChars(0), - m_iParsedBytes(0), - m_pBuffer(nullptr), - m_iBufferChars(0), - m_bEOS(false), - m_pStart(nullptr), - m_pEnd(nullptr), - m_iAllocStep(m_BlockBuffer.GetAllocStep()), - m_iDataLength(m_BlockBuffer.GetDataLengthRef()), - m_pCurrentBlock(nullptr), - m_iIndexInBlock(0), - m_iTextDataLength(0), - m_syntaxParserResult(FDE_XmlSyntaxResult::None), - m_syntaxParserState(FDE_XmlSyntaxState::Text), - m_wQuotationMark(0), - m_iEntityStart(-1) { - m_CurNode.iNodeNum = -1; - m_CurNode.eNodeType = FDE_XMLNODE_Unknown; -} - -void CFDE_XMLSyntaxParser::Init(const CFX_RetainPtr& pStream, - int32_t iXMLPlaneSize, - int32_t iTextDataSize) { - ASSERT(!m_pStream && !m_pBuffer); - ASSERT(pStream && iXMLPlaneSize > 0); - int32_t iStreamLength = pStream->GetLength(); - ASSERT(iStreamLength > 0); - m_pStream = pStream; - m_iXMLPlaneSize = std::min(iXMLPlaneSize, iStreamLength); - uint8_t bom[4]; - m_iCurrentPos = m_pStream->GetBOM(bom); - ASSERT(!m_pBuffer); - - FX_SAFE_INT32 alloc_size_safe = m_iXMLPlaneSize; - alloc_size_safe += 1; // For NUL. - if (!alloc_size_safe.IsValid() || alloc_size_safe.ValueOrDie() <= 0) { - m_syntaxParserResult = FDE_XmlSyntaxResult::Error; - return; - } - - m_pBuffer = FX_Alloc( - wchar_t, pdfium::base::ValueOrDieForType(alloc_size_safe)); - m_pStart = m_pEnd = m_pBuffer; - ASSERT(!m_BlockBuffer.IsInitialized()); - m_BlockBuffer.InitBuffer(); - m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); - m_iParsedBytes = m_iParsedChars = 0; - m_iBufferChars = 0; -} - -FDE_XmlSyntaxResult CFDE_XMLSyntaxParser::DoSyntaxParse() { - if (m_syntaxParserResult == FDE_XmlSyntaxResult::Error || - m_syntaxParserResult == FDE_XmlSyntaxResult::EndOfString) { - return m_syntaxParserResult; - } - ASSERT(m_pStream && m_pBuffer && m_BlockBuffer.IsInitialized()); - int32_t iStreamLength = m_pStream->GetLength(); - int32_t iPos; - - FDE_XmlSyntaxResult syntaxParserResult = FDE_XmlSyntaxResult::None; - while (true) { - if (m_pStart >= m_pEnd) { - if (m_bEOS || m_iCurrentPos >= iStreamLength) { - m_syntaxParserResult = FDE_XmlSyntaxResult::EndOfString; - return m_syntaxParserResult; - } - m_iParsedChars += (m_pEnd - m_pBuffer); - m_iParsedBytes = m_iCurrentPos; - if (m_pStream->GetPosition() != m_iCurrentPos) { - m_pStream->Seek(FX_STREAMSEEK_Begin, m_iCurrentPos); - } - m_iBufferChars = - m_pStream->ReadString(m_pBuffer, m_iXMLPlaneSize, m_bEOS); - iPos = m_pStream->GetPosition(); - if (m_iBufferChars < 1) { - m_iCurrentPos = iStreamLength; - m_syntaxParserResult = FDE_XmlSyntaxResult::EndOfString; - return m_syntaxParserResult; - } - m_iCurrentPos = iPos; - m_pStart = m_pBuffer; - m_pEnd = m_pBuffer + m_iBufferChars; - } - - while (m_pStart < m_pEnd) { - wchar_t ch = *m_pStart; - switch (m_syntaxParserState) { - case FDE_XmlSyntaxState::Text: - if (ch == L'<') { - if (m_iDataLength > 0) { - m_iTextDataLength = m_iDataLength; - m_BlockBuffer.Reset(); - m_pCurrentBlock = - m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); - m_iEntityStart = -1; - syntaxParserResult = FDE_XmlSyntaxResult::Text; - } else { - m_pStart++; - m_syntaxParserState = FDE_XmlSyntaxState::Node; - } - } else { - ParseTextChar(ch); - } - break; - case FDE_XmlSyntaxState::Node: - if (ch == L'!') { - m_pStart++; - m_syntaxParserState = FDE_XmlSyntaxState::SkipCommentOrDecl; - } else if (ch == L'/') { - m_pStart++; - m_syntaxParserState = FDE_XmlSyntaxState::CloseElement; - } else if (ch == L'?') { - m_iLastNodeNum++; - m_iCurrentNodeNum = m_iLastNodeNum; - m_CurNode.iNodeNum = m_iLastNodeNum; - m_CurNode.eNodeType = FDE_XMLNODE_Instruction; - m_XMLNodeStack.push(m_CurNode); - m_pStart++; - m_syntaxParserState = FDE_XmlSyntaxState::Target; - syntaxParserResult = FDE_XmlSyntaxResult::InstructionOpen; - } else { - m_iLastNodeNum++; - m_iCurrentNodeNum = m_iLastNodeNum; - m_CurNode.iNodeNum = m_iLastNodeNum; - m_CurNode.eNodeType = FDE_XMLNODE_Element; - m_XMLNodeStack.push(m_CurNode); - m_syntaxParserState = FDE_XmlSyntaxState::Tag; - syntaxParserResult = FDE_XmlSyntaxResult::ElementOpen; - } - break; - case FDE_XmlSyntaxState::Target: - case FDE_XmlSyntaxState::Tag: - if (!FDE_IsXMLNameChar(ch, m_iDataLength < 1)) { - if (m_iDataLength < 1) { - m_syntaxParserResult = FDE_XmlSyntaxResult::Error; - return m_syntaxParserResult; - } else { - m_iTextDataLength = m_iDataLength; - m_BlockBuffer.Reset(); - m_pCurrentBlock = - m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); - if (m_syntaxParserState != FDE_XmlSyntaxState::Target) { - syntaxParserResult = FDE_XmlSyntaxResult::TagName; - } else { - syntaxParserResult = FDE_XmlSyntaxResult::TargetName; - } - m_syntaxParserState = FDE_XmlSyntaxState::AttriName; - } - } else { - if (m_iIndexInBlock == m_iAllocStep) { - m_pCurrentBlock = - m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); - if (!m_pCurrentBlock) { - return FDE_XmlSyntaxResult::Error; - } - } - m_pCurrentBlock[m_iIndexInBlock++] = ch; - m_iDataLength++; - m_pStart++; - } - break; - case FDE_XmlSyntaxState::AttriName: - if (m_iDataLength < 1 && FDE_IsXMLWhiteSpace(ch)) { - m_pStart++; - break; - } - if (!FDE_IsXMLNameChar(ch, m_iDataLength < 1)) { - if (m_iDataLength < 1) { - if (m_CurNode.eNodeType == FDE_XMLNODE_Element) { - if (ch == L'>' || ch == L'/') { - m_syntaxParserState = FDE_XmlSyntaxState::BreakElement; - break; - } - } else if (m_CurNode.eNodeType == FDE_XMLNODE_Instruction) { - if (ch == L'?') { - m_syntaxParserState = FDE_XmlSyntaxState::CloseInstruction; - m_pStart++; - } else { - m_syntaxParserState = FDE_XmlSyntaxState::TargetData; - } - break; - } - m_syntaxParserResult = FDE_XmlSyntaxResult::Error; - return m_syntaxParserResult; - } else { - if (m_CurNode.eNodeType == FDE_XMLNODE_Instruction) { - if (ch != '=' && !FDE_IsXMLWhiteSpace(ch)) { - m_syntaxParserState = FDE_XmlSyntaxState::TargetData; - break; - } - } - m_iTextDataLength = m_iDataLength; - m_BlockBuffer.Reset(); - m_pCurrentBlock = - m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); - m_syntaxParserState = FDE_XmlSyntaxState::AttriEqualSign; - syntaxParserResult = FDE_XmlSyntaxResult::AttriName; - } - } else { - if (m_iIndexInBlock == m_iAllocStep) { - m_pCurrentBlock = - m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); - if (!m_pCurrentBlock) { - return FDE_XmlSyntaxResult::Error; - } - } - m_pCurrentBlock[m_iIndexInBlock++] = ch; - m_iDataLength++; - m_pStart++; - } - break; - case FDE_XmlSyntaxState::AttriEqualSign: - if (FDE_IsXMLWhiteSpace(ch)) { - m_pStart++; - break; - } - if (ch != L'=') { - if (m_CurNode.eNodeType == FDE_XMLNODE_Instruction) { - m_syntaxParserState = FDE_XmlSyntaxState::TargetData; - break; - } - m_syntaxParserResult = FDE_XmlSyntaxResult::Error; - return m_syntaxParserResult; - } else { - m_syntaxParserState = FDE_XmlSyntaxState::AttriQuotation; - m_pStart++; - } - break; - case FDE_XmlSyntaxState::AttriQuotation: - if (FDE_IsXMLWhiteSpace(ch)) { - m_pStart++; - break; - } - if (ch != L'\"' && ch != L'\'') { - m_syntaxParserResult = FDE_XmlSyntaxResult::Error; - return m_syntaxParserResult; - } else { - m_wQuotationMark = ch; - m_syntaxParserState = FDE_XmlSyntaxState::AttriValue; - m_pStart++; - } - break; - case FDE_XmlSyntaxState::AttriValue: - if (ch == m_wQuotationMark) { - if (m_iEntityStart > -1) { - m_syntaxParserResult = FDE_XmlSyntaxResult::Error; - return m_syntaxParserResult; - } - m_iTextDataLength = m_iDataLength; - m_wQuotationMark = 0; - m_BlockBuffer.Reset(); - m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); - m_pStart++; - m_syntaxParserState = FDE_XmlSyntaxState::AttriName; - syntaxParserResult = FDE_XmlSyntaxResult::AttriValue; - } else { - ParseTextChar(ch); - } - break; - case FDE_XmlSyntaxState::CloseInstruction: - if (ch != L'>') { - if (m_iIndexInBlock == m_iAllocStep) { - m_pCurrentBlock = - m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); - if (!m_pCurrentBlock) { - return FDE_XmlSyntaxResult::Error; - } - } - m_pCurrentBlock[m_iIndexInBlock++] = ch; - m_iDataLength++; - m_syntaxParserState = FDE_XmlSyntaxState::TargetData; - } else if (m_iDataLength > 0) { - m_iTextDataLength = m_iDataLength; - m_BlockBuffer.Reset(); - m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); - syntaxParserResult = FDE_XmlSyntaxResult::TargetData; - } else { - m_pStart++; - if (m_XMLNodeStack.empty()) { - m_syntaxParserResult = FDE_XmlSyntaxResult::Error; - return m_syntaxParserResult; - } - m_XMLNodeStack.pop(); - if (!m_XMLNodeStack.empty()) { - m_CurNode = m_XMLNodeStack.top(); - } else { - m_CurNode.iNodeNum = -1; - m_CurNode.eNodeType = FDE_XMLNODE_Unknown; - } - m_iCurrentNodeNum = m_CurNode.iNodeNum; - m_BlockBuffer.Reset(); - m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); - m_syntaxParserState = FDE_XmlSyntaxState::Text; - syntaxParserResult = FDE_XmlSyntaxResult::InstructionClose; - } - break; - case FDE_XmlSyntaxState::BreakElement: - if (ch == L'>') { - m_syntaxParserState = FDE_XmlSyntaxState::Text; - syntaxParserResult = FDE_XmlSyntaxResult::ElementBreak; - } else if (ch == L'/') { - m_syntaxParserState = FDE_XmlSyntaxState::CloseElement; - } else { - m_syntaxParserResult = FDE_XmlSyntaxResult::Error; - return m_syntaxParserResult; - } - m_pStart++; - break; - case FDE_XmlSyntaxState::CloseElement: - if (!FDE_IsXMLNameChar(ch, m_iDataLength < 1)) { - if (ch == L'>') { - if (m_XMLNodeStack.empty()) { - m_syntaxParserResult = FDE_XmlSyntaxResult::Error; - return m_syntaxParserResult; - } - m_XMLNodeStack.pop(); - if (!m_XMLNodeStack.empty()) { - m_CurNode = m_XMLNodeStack.top(); - } else { - m_CurNode.iNodeNum = -1; - m_CurNode.eNodeType = FDE_XMLNODE_Unknown; - } - m_iCurrentNodeNum = m_CurNode.iNodeNum; - m_iTextDataLength = m_iDataLength; - m_BlockBuffer.Reset(); - m_pCurrentBlock = - m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); - m_syntaxParserState = FDE_XmlSyntaxState::Text; - syntaxParserResult = FDE_XmlSyntaxResult::ElementClose; - } else if (!FDE_IsXMLWhiteSpace(ch)) { - m_syntaxParserResult = FDE_XmlSyntaxResult::Error; - return m_syntaxParserResult; - } - } else { - if (m_iIndexInBlock == m_iAllocStep) { - m_pCurrentBlock = - m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); - if (!m_pCurrentBlock) { - return FDE_XmlSyntaxResult::Error; - } - } - m_pCurrentBlock[m_iIndexInBlock++] = ch; - m_iDataLength++; - } - m_pStart++; - break; - case FDE_XmlSyntaxState::SkipCommentOrDecl: - if (FXSYS_wcsnicmp(m_pStart, L"--", 2) == 0) { - m_pStart += 2; - m_syntaxParserState = FDE_XmlSyntaxState::SkipComment; - } else if (FXSYS_wcsnicmp(m_pStart, L"[CDATA[", 7) == 0) { - m_pStart += 7; - m_syntaxParserState = FDE_XmlSyntaxState::SkipCData; - } else { - m_syntaxParserState = FDE_XmlSyntaxState::SkipDeclNode; - m_SkipChar = L'>'; - m_SkipStack.push(L'>'); - } - break; - case FDE_XmlSyntaxState::SkipCData: { - if (FXSYS_wcsnicmp(m_pStart, L"]]>", 3) == 0) { - m_pStart += 3; - syntaxParserResult = FDE_XmlSyntaxResult::CData; - m_iTextDataLength = m_iDataLength; - m_BlockBuffer.Reset(); - m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); - m_syntaxParserState = FDE_XmlSyntaxState::Text; - } else { - if (m_iIndexInBlock == m_iAllocStep) { - m_pCurrentBlock = - m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); - if (!m_pCurrentBlock) - return FDE_XmlSyntaxResult::Error; - } - m_pCurrentBlock[m_iIndexInBlock++] = ch; - m_iDataLength++; - m_pStart++; - } - break; - } - case FDE_XmlSyntaxState::SkipDeclNode: - if (m_SkipChar == L'\'' || m_SkipChar == L'\"') { - m_pStart++; - if (ch != m_SkipChar) - break; - - m_SkipStack.pop(); - if (m_SkipStack.empty()) - m_syntaxParserState = FDE_XmlSyntaxState::Text; - else - m_SkipChar = m_SkipStack.top(); - } else { - switch (ch) { - case L'<': - m_SkipChar = L'>'; - m_SkipStack.push(L'>'); - break; - case L'[': - m_SkipChar = L']'; - m_SkipStack.push(L']'); - break; - case L'(': - m_SkipChar = L')'; - m_SkipStack.push(L')'); - break; - case L'\'': - m_SkipChar = L'\''; - m_SkipStack.push(L'\''); - break; - case L'\"': - m_SkipChar = L'\"'; - m_SkipStack.push(L'\"'); - break; - default: - if (ch == m_SkipChar) { - m_SkipStack.pop(); - if (m_SkipStack.empty()) { - if (m_iDataLength >= 9) { - CFX_WideString wsHeader; - m_BlockBuffer.GetTextData(wsHeader, 0, 7); - } - m_iTextDataLength = m_iDataLength; - m_BlockBuffer.Reset(); - m_pCurrentBlock = - m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); - m_syntaxParserState = FDE_XmlSyntaxState::Text; - } else { - m_SkipChar = m_SkipStack.top(); - } - } - break; - } - if (!m_SkipStack.empty()) { - if (m_iIndexInBlock == m_iAllocStep) { - m_pCurrentBlock = - m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); - if (!m_pCurrentBlock) { - return FDE_XmlSyntaxResult::Error; - } - } - m_pCurrentBlock[m_iIndexInBlock++] = ch; - m_iDataLength++; - } - m_pStart++; - } - break; - case FDE_XmlSyntaxState::SkipComment: - if (FXSYS_wcsnicmp(m_pStart, L"-->", 3) == 0) { - m_pStart += 2; - m_syntaxParserState = FDE_XmlSyntaxState::Text; - } - - m_pStart++; - break; - case FDE_XmlSyntaxState::TargetData: - if (FDE_IsXMLWhiteSpace(ch)) { - if (m_iDataLength < 1) { - m_pStart++; - break; - } else if (m_wQuotationMark == 0) { - m_iTextDataLength = m_iDataLength; - m_wQuotationMark = 0; - m_BlockBuffer.Reset(); - m_pCurrentBlock = - m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); - m_pStart++; - syntaxParserResult = FDE_XmlSyntaxResult::TargetData; - break; - } - } - if (ch == '?') { - m_syntaxParserState = FDE_XmlSyntaxState::CloseInstruction; - m_pStart++; - } else if (ch == '\"') { - if (m_wQuotationMark == 0) { - m_wQuotationMark = ch; - m_pStart++; - } else if (ch == m_wQuotationMark) { - m_iTextDataLength = m_iDataLength; - m_wQuotationMark = 0; - m_BlockBuffer.Reset(); - m_pCurrentBlock = - m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); - m_pStart++; - syntaxParserResult = FDE_XmlSyntaxResult::TargetData; - } else { - m_syntaxParserResult = FDE_XmlSyntaxResult::Error; - return m_syntaxParserResult; - } - } else { - if (m_iIndexInBlock == m_iAllocStep) { - m_pCurrentBlock = - m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); - if (!m_pCurrentBlock) { - return FDE_XmlSyntaxResult::Error; - } - } - m_pCurrentBlock[m_iIndexInBlock++] = ch; - m_iDataLength++; - m_pStart++; - } - break; - default: - break; - } - if (syntaxParserResult != FDE_XmlSyntaxResult::None) - return syntaxParserResult; - } - } - return FDE_XmlSyntaxResult::Text; -} - -CFDE_XMLSyntaxParser::~CFDE_XMLSyntaxParser() { - m_pCurrentBlock = nullptr; - FX_Free(m_pBuffer); -} - -int32_t CFDE_XMLSyntaxParser::GetStatus() const { - if (!m_pStream) - return -1; - - int32_t iStreamLength = m_pStream->GetLength(); - if (iStreamLength < 1) - return 100; - - if (m_syntaxParserResult == FDE_XmlSyntaxResult::Error) - return -1; - - if (m_syntaxParserResult == FDE_XmlSyntaxResult::EndOfString) - return 100; - return m_iParsedBytes * 100 / iStreamLength; -} - -static int32_t FX_GetUTF8EncodeLength(const wchar_t* pSrc, int32_t iSrcLen) { - uint32_t unicode = 0; - int32_t iDstNum = 0; - while (iSrcLen-- > 0) { - unicode = *pSrc++; - int nbytes = 0; - if ((uint32_t)unicode < 0x80) { - nbytes = 1; - } else if ((uint32_t)unicode < 0x800) { - nbytes = 2; - } else if ((uint32_t)unicode < 0x10000) { - nbytes = 3; - } else if ((uint32_t)unicode < 0x200000) { - nbytes = 4; - } else if ((uint32_t)unicode < 0x4000000) { - nbytes = 5; - } else { - nbytes = 6; - } - iDstNum += nbytes; - } - return iDstNum; -} - -FX_FILESIZE CFDE_XMLSyntaxParser::GetCurrentBinaryPos() const { - if (!m_pStream) - return 0; - - int32_t nSrcLen = m_pStart - m_pBuffer; - int32_t nDstLen = FX_GetUTF8EncodeLength(m_pBuffer, nSrcLen); - return m_iParsedBytes + nDstLen; -} - -void CFDE_XMLSyntaxParser::ParseTextChar(wchar_t character) { - if (m_iIndexInBlock == m_iAllocStep) { - m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); - if (!m_pCurrentBlock) { - return; - } - } - m_pCurrentBlock[m_iIndexInBlock++] = character; - m_iDataLength++; - if (m_iEntityStart > -1 && character == L';') { - CFX_WideString csEntity; - m_BlockBuffer.GetTextData(csEntity, m_iEntityStart + 1, - (m_iDataLength - 1) - m_iEntityStart - 1); - int32_t iLen = csEntity.GetLength(); - if (iLen > 0) { - if (csEntity[0] == L'#') { - uint32_t ch = 0; - wchar_t w; - if (iLen > 1 && csEntity[1] == L'x') { - for (int32_t i = 2; i < iLen; i++) { - w = csEntity[i]; - if (w >= L'0' && w <= L'9') { - ch = (ch << 4) + w - L'0'; - } else if (w >= L'A' && w <= L'F') { - ch = (ch << 4) + w - 55; - } else if (w >= L'a' && w <= L'f') { - ch = (ch << 4) + w - 87; - } else { - break; - } - } - } else { - for (int32_t i = 1; i < iLen; i++) { - w = csEntity[i]; - if (w < L'0' || w > L'9') - break; - ch = ch * 10 + w - L'0'; - } - } - if (ch > kMaxCharRange) - ch = ' '; - - character = static_cast(ch); - if (character != 0) { - m_BlockBuffer.SetTextChar(m_iEntityStart, character); - m_iEntityStart++; - } - } else { - if (csEntity.Compare(L"amp") == 0) { - m_BlockBuffer.SetTextChar(m_iEntityStart, L'&'); - m_iEntityStart++; - } else if (csEntity.Compare(L"lt") == 0) { - m_BlockBuffer.SetTextChar(m_iEntityStart, L'<'); - m_iEntityStart++; - } else if (csEntity.Compare(L"gt") == 0) { - m_BlockBuffer.SetTextChar(m_iEntityStart, L'>'); - m_iEntityStart++; - } else if (csEntity.Compare(L"apos") == 0) { - m_BlockBuffer.SetTextChar(m_iEntityStart, L'\''); - m_iEntityStart++; - } else if (csEntity.Compare(L"quot") == 0) { - m_BlockBuffer.SetTextChar(m_iEntityStart, L'\"'); - m_iEntityStart++; - } - } - } - m_BlockBuffer.DeleteTextChars(m_iDataLength - m_iEntityStart, false); - m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); - m_iEntityStart = -1; - } else { - if (m_iEntityStart < 0 && character == L'&') { - m_iEntityStart = m_iDataLength - 1; - } - } - m_pStart++; -} diff --git a/xfa/fde/xml/fde_xml_imp.h b/xfa/fde/xml/fde_xml_imp.h deleted file mode 100644 index bd88da06cc..0000000000 --- a/xfa/fde/xml/fde_xml_imp.h +++ /dev/null @@ -1,335 +0,0 @@ -// Copyright 2014 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#ifndef XFA_FDE_XML_FDE_XML_IMP_H_ -#define XFA_FDE_XML_FDE_XML_IMP_H_ - -#include -#include -#include - -#include "core/fxcrt/fx_basic.h" -#include "core/fxcrt/fx_system.h" -#include "xfa/fde/xml/fde_xml.h" -#include "xfa/fgas/crt/ifgas_stream.h" - -class CFDE_BlockBuffer; -class CFDE_XMLInstruction; -class CFDE_XMLElement; -class CFDE_XMLText; -class CFDE_XMLDoc; -class CFDE_XMLDOMParser; -class CFDE_XMLParser; -class CFDE_XMLSyntaxParser; - -class CFDE_XMLNode { - public: - enum NodeItem { - Root = 0, - Parent, - FirstSibling, - PriorSibling, - NextSibling, - LastSibling, - FirstNeighbor, - PriorNeighbor, - NextNeighbor, - LastNeighbor, - FirstChild, - LastChild - }; - - CFDE_XMLNode(); - virtual ~CFDE_XMLNode(); - - virtual FDE_XMLNODETYPE GetType() const; - virtual CFDE_XMLNode* Clone(bool bRecursive); - - int32_t CountChildNodes() const; - CFDE_XMLNode* GetChildNode(int32_t index) const; - int32_t GetChildNodeIndex(CFDE_XMLNode* pNode) const; - int32_t InsertChildNode(CFDE_XMLNode* pNode, int32_t index = -1); - void RemoveChildNode(CFDE_XMLNode* pNode); - void DeleteChildren(); - void CloneChildren(CFDE_XMLNode* pClone); - - CFDE_XMLNode* GetPath(const wchar_t* pPath, - int32_t iLength = -1, - bool bQualifiedName = true) const; - - int32_t GetNodeLevel() const; - CFDE_XMLNode* GetNodeItem(CFDE_XMLNode::NodeItem eItem) const; - bool InsertNodeItem(CFDE_XMLNode::NodeItem eItem, CFDE_XMLNode* pNode); - CFDE_XMLNode* RemoveNodeItem(CFDE_XMLNode::NodeItem eItem); - - void SaveXMLNode(const CFX_RetainPtr& pXMLStream); - - CFDE_XMLNode* m_pParent; - CFDE_XMLNode* m_pChild; - CFDE_XMLNode* m_pPrior; - CFDE_XMLNode* m_pNext; -}; - -class CFDE_XMLInstruction : public CFDE_XMLNode { - public: - explicit CFDE_XMLInstruction(const CFX_WideString& wsTarget); - ~CFDE_XMLInstruction() override; - - // CFDE_XMLNode - FDE_XMLNODETYPE GetType() const override; - CFDE_XMLNode* Clone(bool bRecursive) override; - - void GetTargetName(CFX_WideString& wsTarget) const { wsTarget = m_wsTarget; } - int32_t CountAttributes() const; - bool GetAttribute(int32_t index, - CFX_WideString& wsAttriName, - CFX_WideString& wsAttriValue) const; - bool HasAttribute(const wchar_t* pwsAttriName) const; - void GetString(const wchar_t* pwsAttriName, - CFX_WideString& wsAttriValue, - const wchar_t* pwsDefValue = nullptr) const; - void SetString(const CFX_WideString& wsAttriName, - const CFX_WideString& wsAttriValue); - int32_t GetInteger(const wchar_t* pwsAttriName, int32_t iDefValue = 0) const; - void SetInteger(const wchar_t* pwsAttriName, int32_t iAttriValue); - float GetFloat(const wchar_t* pwsAttriName, float fDefValue = 0) const; - void SetFloat(const wchar_t* pwsAttriName, float fAttriValue); - void RemoveAttribute(const wchar_t* pwsAttriName); - int32_t CountData() const; - bool GetData(int32_t index, CFX_WideString& wsData) const; - void AppendData(const CFX_WideString& wsData); - void RemoveData(int32_t index); - - CFX_WideString m_wsTarget; - std::vector m_Attributes; - std::vector m_TargetData; -}; - -class CFDE_XMLElement : public CFDE_XMLNode { - public: - explicit CFDE_XMLElement(const CFX_WideString& wsTag); - ~CFDE_XMLElement() override; - - // CFDE_XMLNode - FDE_XMLNODETYPE GetType() const override; - CFDE_XMLNode* Clone(bool bRecursive) override; - - void GetTagName(CFX_WideString& wsTag) const; - void GetLocalTagName(CFX_WideString& wsTag) const; - - void GetNamespacePrefix(CFX_WideString& wsPrefix) const; - void GetNamespaceURI(CFX_WideString& wsNamespace) const; - - int32_t CountAttributes() const; - bool GetAttribute(int32_t index, - CFX_WideString& wsAttriName, - CFX_WideString& wsAttriValue) const; - bool HasAttribute(const wchar_t* pwsAttriName) const; - void RemoveAttribute(const wchar_t* pwsAttriName); - - void GetString(const wchar_t* pwsAttriName, - CFX_WideString& wsAttriValue, - const wchar_t* pwsDefValue = nullptr) const; - void SetString(const CFX_WideString& wsAttriName, - const CFX_WideString& wsAttriValue); - - int32_t GetInteger(const wchar_t* pwsAttriName, int32_t iDefValue = 0) const; - void SetInteger(const wchar_t* pwsAttriName, int32_t iAttriValue); - - float GetFloat(const wchar_t* pwsAttriName, float fDefValue = 0) const; - void SetFloat(const wchar_t* pwsAttriName, float fAttriValue); - - void GetTextData(CFX_WideString& wsText) const; - void SetTextData(const CFX_WideString& wsText); - - CFX_WideString m_wsTag; - std::vector m_Attributes; -}; - -class CFDE_XMLText : public CFDE_XMLNode { - public: - explicit CFDE_XMLText(const CFX_WideString& wsText); - ~CFDE_XMLText() override; - - // CFDE_XMLNode - FDE_XMLNODETYPE GetType() const override; - CFDE_XMLNode* Clone(bool bRecursive) override; - - void GetText(CFX_WideString& wsText) const { wsText = m_wsText; } - void SetText(const CFX_WideString& wsText) { m_wsText = wsText; } - - CFX_WideString m_wsText; -}; - -class CFDE_XMLDeclaration : public CFDE_XMLNode { - public: - CFDE_XMLDeclaration() {} - ~CFDE_XMLDeclaration() override {} -}; - -class CFDE_XMLCharData : public CFDE_XMLDeclaration { - public: - explicit CFDE_XMLCharData(const CFX_WideString& wsCData); - ~CFDE_XMLCharData() override; - - FDE_XMLNODETYPE GetType() const override; - CFDE_XMLNode* Clone(bool bRecursive) override; - - void GetCharData(CFX_WideString& wsCharData) const { - wsCharData = m_wsCharData; - } - void SetCharData(const CFX_WideString& wsCData) { m_wsCharData = wsCData; } - - CFX_WideString m_wsCharData; -}; - -class CFDE_XMLDoc { - public: - CFDE_XMLDoc(); - ~CFDE_XMLDoc(); - - bool LoadXML(std::unique_ptr pXMLParser); - int32_t DoLoad(IFX_Pause* pPause = nullptr); - void CloseXML(); - CFDE_XMLNode* GetRoot() const { return m_pRoot.get(); } - void SaveXMLNode(const CFX_RetainPtr& pXMLStream, - CFDE_XMLNode* pNode); - - private: - int32_t m_iStatus; - std::unique_ptr m_pRoot; - std::unique_ptr m_pXMLParser; - CFX_RetainPtr m_pStream; -}; - -class CFDE_BlockBuffer { - public: - explicit CFDE_BlockBuffer(int32_t iAllocStep = 1024 * 1024); - ~CFDE_BlockBuffer(); - - bool InitBuffer(int32_t iBufferSize = 1024 * 1024); - bool IsInitialized() { return m_iBufferSize / m_iAllocStep >= 1; } - wchar_t* GetAvailableBlock(int32_t& iIndexInBlock); - inline int32_t GetAllocStep() const { return m_iAllocStep; } - inline int32_t& GetDataLengthRef() { return m_iDataLength; } - inline void Reset(bool bReserveData = true) { - if (!bReserveData) { - m_iStartPosition = 0; - } - m_iDataLength = 0; - } - void SetTextChar(int32_t iIndex, wchar_t ch); - int32_t DeleteTextChars(int32_t iCount, bool bDirection = true); - void GetTextData(CFX_WideString& wsTextData, - int32_t iStart = 0, - int32_t iLength = -1) const; - - protected: - inline void TextDataIndex2BufIndex(const int32_t iIndex, - int32_t& iBlockIndex, - int32_t& iInnerIndex) const; - void ClearBuffer(); - - std::vector> m_BlockArray; - int32_t m_iDataLength; - int32_t m_iBufferSize; - int32_t m_iAllocStep; - int32_t m_iStartPosition; -}; - -class CFDE_XMLSyntaxParser { - public: - CFDE_XMLSyntaxParser(); - ~CFDE_XMLSyntaxParser(); - - void Init(const CFX_RetainPtr& pStream, - int32_t iXMLPlaneSize, - int32_t iTextDataSize = 256); - - FDE_XmlSyntaxResult DoSyntaxParse(); - - int32_t GetStatus() const; - int32_t GetCurrentPos() const { - return m_iParsedChars + (m_pStart - m_pBuffer); - } - FX_FILESIZE GetCurrentBinaryPos() const; - int32_t GetCurrentNodeNumber() const { return m_iCurrentNodeNum; } - int32_t GetLastNodeNumber() const { return m_iLastNodeNum; } - - void GetTargetName(CFX_WideString& wsTarget) const { - m_BlockBuffer.GetTextData(wsTarget, 0, m_iTextDataLength); - } - void GetTagName(CFX_WideString& wsTag) const { - m_BlockBuffer.GetTextData(wsTag, 0, m_iTextDataLength); - } - void GetAttributeName(CFX_WideString& wsAttriName) const { - m_BlockBuffer.GetTextData(wsAttriName, 0, m_iTextDataLength); - } - void GetAttributeValue(CFX_WideString& wsAttriValue) const { - m_BlockBuffer.GetTextData(wsAttriValue, 0, m_iTextDataLength); - } - void GetTextData(CFX_WideString& wsText) const { - m_BlockBuffer.GetTextData(wsText, 0, m_iTextDataLength); - } - void GetTargetData(CFX_WideString& wsData) const { - m_BlockBuffer.GetTextData(wsData, 0, m_iTextDataLength); - } - - protected: - enum class FDE_XmlSyntaxState { - Text, - Node, - Target, - Tag, - AttriName, - AttriEqualSign, - AttriQuotation, - AttriValue, - Entity, - EntityDecimal, - EntityHex, - CloseInstruction, - BreakElement, - CloseElement, - SkipDeclNode, - DeclCharData, - SkipComment, - SkipCommentOrDecl, - SkipCData, - TargetData - }; - - void ParseTextChar(wchar_t ch); - - CFX_RetainPtr m_pStream; - int32_t m_iXMLPlaneSize; - int32_t m_iCurrentPos; - int32_t m_iCurrentNodeNum; - int32_t m_iLastNodeNum; - int32_t m_iParsedChars; - int32_t m_iParsedBytes; - wchar_t* m_pBuffer; - int32_t m_iBufferChars; - bool m_bEOS; - wchar_t* m_pStart; - wchar_t* m_pEnd; - FDE_XMLNODE m_CurNode; - std::stack m_XMLNodeStack; - CFDE_BlockBuffer m_BlockBuffer; - int32_t m_iAllocStep; - int32_t& m_iDataLength; - wchar_t* m_pCurrentBlock; - int32_t m_iIndexInBlock; - int32_t m_iTextDataLength; - FDE_XmlSyntaxResult m_syntaxParserResult; - FDE_XmlSyntaxState m_syntaxParserState; - wchar_t m_wQuotationMark; - int32_t m_iEntityStart; - std::stack m_SkipStack; - wchar_t m_SkipChar; -}; - -#endif // XFA_FDE_XML_FDE_XML_IMP_H_ diff --git a/xfa/fde/xml/fde_xml_imp_unittest.cpp b/xfa/fde/xml/fde_xml_imp_unittest.cpp deleted file mode 100644 index 3bd46ab77c..0000000000 --- a/xfa/fde/xml/fde_xml_imp_unittest.cpp +++ /dev/null @@ -1,632 +0,0 @@ -// Copyright 2016 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "xfa/fde/xml/fde_xml_imp.h" - -#include - -#include "testing/gtest/include/gtest/gtest.h" -#include "xfa/fgas/crt/ifgas_stream.h" - -TEST(CFDE_XMLSyntaxParser, CData) { - const wchar_t* input = - L""; - - const wchar_t* cdata = - L"\n" - L" if (a[1] < 3)\n" - L" app.alert(\"Tclams\");\n" - L" "; - - // We * sizeof(wchar_t) because we pass in the uint8_t, not the wchar_t. - size_t len = FXSYS_wcslen(input) * sizeof(wchar_t); - CFX_RetainPtr stream = IFGAS_Stream::CreateStream( - reinterpret_cast(const_cast(input)), len, 0); - CFDE_XMLSyntaxParser parser; - parser.Init(stream, 256); - EXPECT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); - EXPECT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); - - CFX_WideString data; - parser.GetTagName(data); - EXPECT_EQ(L"script", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); - parser.GetAttributeName(data); - EXPECT_EQ(L"contentType", data); - EXPECT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); - parser.GetAttributeValue(data); - EXPECT_EQ(L"application/x-javascript", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); - EXPECT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); - parser.GetTextData(data); - EXPECT_EQ(L"\n ", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::CData, parser.DoSyntaxParse()); - parser.GetTextData(data); - EXPECT_EQ(cdata, data); - - EXPECT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); - parser.GetTextData(data); - EXPECT_EQ(L"\n", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); - parser.GetTagName(data); - EXPECT_EQ(L"script", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); -} - -TEST(CFDE_XMLSyntaxParser, CDataWithInnerScript) { - const wchar_t* input = - L"\n" - L" ]]>\n" - L""; - - const wchar_t* cdata = - L"\n" - L" if (a[1] < 3)\n" - L" app.alert(\"Tclams\");\n" - L" \n" - L" "; - - // We * sizeof(wchar_t) because we pass in the uint8_t, not the wchar_t. - size_t len = FXSYS_wcslen(input) * sizeof(wchar_t); - CFX_RetainPtr stream = IFGAS_Stream::CreateStream( - reinterpret_cast(const_cast(input)), len, 0); - CFDE_XMLSyntaxParser parser; - parser.Init(stream, 256); - EXPECT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); - EXPECT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); - - CFX_WideString data; - parser.GetTagName(data); - EXPECT_EQ(L"script", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); - parser.GetAttributeName(data); - EXPECT_EQ(L"contentType", data); - EXPECT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); - parser.GetAttributeValue(data); - EXPECT_EQ(L"application/x-javascript", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); - EXPECT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); - parser.GetTextData(data); - EXPECT_EQ(L"\n ", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::CData, parser.DoSyntaxParse()); - parser.GetTextData(data); - EXPECT_EQ(cdata, data); - - EXPECT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); - parser.GetTextData(data); - EXPECT_EQ(L"\n", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); - parser.GetTagName(data); - EXPECT_EQ(L"script", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); -} - -TEST(CFDE_XMLSyntaxParser, ArrowBangArrow) { - const wchar_t* input = - L""; - - // We * sizeof(wchar_t) because we pass in the uint8_t, not the wchar_t. - size_t len = FXSYS_wcslen(input) * sizeof(wchar_t); - CFX_RetainPtr stream = IFGAS_Stream::CreateStream( - reinterpret_cast(const_cast(input)), len, 0); - CFDE_XMLSyntaxParser parser; - parser.Init(stream, 256); - EXPECT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); - EXPECT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); - - CFX_WideString data; - parser.GetTagName(data); - EXPECT_EQ(L"script", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); - parser.GetAttributeName(data); - EXPECT_EQ(L"contentType", data); - EXPECT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); - parser.GetAttributeValue(data); - EXPECT_EQ(L"application/x-javascript", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); - EXPECT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); - parser.GetTextData(data); - EXPECT_EQ(L"\n ", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); - parser.GetTextData(data); - EXPECT_EQ(L"\n", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); - parser.GetTagName(data); - EXPECT_EQ(L"script", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); -} - -TEST(CFDE_XMLSyntaxParser, ArrowBangBracketArrow) { - const wchar_t* input = - L""; - - // We * sizeof(wchar_t) because we pass in the uint8_t, not the wchar_t. - size_t len = FXSYS_wcslen(input) * sizeof(wchar_t); - CFX_RetainPtr stream = IFGAS_Stream::CreateStream( - reinterpret_cast(const_cast(input)), len, 0); - CFDE_XMLSyntaxParser parser; - parser.Init(stream, 256); - EXPECT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); - EXPECT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); - - CFX_WideString data; - parser.GetTagName(data); - EXPECT_EQ(L"script", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); - parser.GetAttributeName(data); - EXPECT_EQ(L"contentType", data); - EXPECT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); - parser.GetAttributeValue(data); - EXPECT_EQ(L"application/x-javascript", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); - EXPECT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); - parser.GetTextData(data); - EXPECT_EQ(L"\n ", data); - - // Parser walks to end of input. - - EXPECT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); -} - -TEST(CFDE_XMLSyntaxParser, IncompleteCData) { - const wchar_t* input = - L""; - - // We * sizeof(wchar_t) because we pass in the uint8_t, not the wchar_t. - size_t len = FXSYS_wcslen(input) * sizeof(wchar_t); - CFX_RetainPtr stream = IFGAS_Stream::CreateStream( - reinterpret_cast(const_cast(input)), len, 0); - CFDE_XMLSyntaxParser parser; - parser.Init(stream, 256); - EXPECT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); - EXPECT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); - - CFX_WideString data; - parser.GetTagName(data); - EXPECT_EQ(L"script", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); - parser.GetAttributeName(data); - EXPECT_EQ(L"contentType", data); - EXPECT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); - parser.GetAttributeValue(data); - EXPECT_EQ(L"application/x-javascript", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); - EXPECT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); - parser.GetTextData(data); - EXPECT_EQ(L"\n ", data); - - // Parser walks to end of input. - - EXPECT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); -} - -TEST(CFDE_XMLSyntaxParser, UnClosedCData) { - const wchar_t* input = - L""; - - // We * sizeof(wchar_t) because we pass in the uint8_t, not the wchar_t. - size_t len = FXSYS_wcslen(input) * sizeof(wchar_t); - CFX_RetainPtr stream = IFGAS_Stream::CreateStream( - reinterpret_cast(const_cast(input)), len, 0); - CFDE_XMLSyntaxParser parser; - parser.Init(stream, 256); - EXPECT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); - EXPECT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); - - CFX_WideString data; - parser.GetTagName(data); - EXPECT_EQ(L"script", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); - parser.GetAttributeName(data); - EXPECT_EQ(L"contentType", data); - EXPECT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); - parser.GetAttributeValue(data); - EXPECT_EQ(L"application/x-javascript", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); - EXPECT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); - parser.GetTextData(data); - EXPECT_EQ(L"\n ", data); - - // Parser walks to end of input. - - EXPECT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); -} - -TEST(CFDE_XMLSyntaxParser, EmptyCData) { - const wchar_t* input = - L""; - - // We * sizeof(wchar_t) because we pass in the uint8_t, not the wchar_t. - size_t len = FXSYS_wcslen(input) * sizeof(wchar_t); - CFX_RetainPtr stream = IFGAS_Stream::CreateStream( - reinterpret_cast(const_cast(input)), len, 0); - CFDE_XMLSyntaxParser parser; - parser.Init(stream, 256); - EXPECT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); - EXPECT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); - - CFX_WideString data; - parser.GetTagName(data); - EXPECT_EQ(L"script", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); - parser.GetAttributeName(data); - EXPECT_EQ(L"contentType", data); - EXPECT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); - parser.GetAttributeValue(data); - EXPECT_EQ(L"application/x-javascript", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); - EXPECT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); - parser.GetTextData(data); - EXPECT_EQ(L"\n ", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::CData, parser.DoSyntaxParse()); - parser.GetTextData(data); - EXPECT_EQ(L"", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); - parser.GetTextData(data); - EXPECT_EQ(L"\n", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); - parser.GetTagName(data); - EXPECT_EQ(L"script", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); -} - -TEST(CFDE_XMLSyntaxParser, Comment) { - const wchar_t* input = - L""; - - // We * sizeof(wchar_t) because we pass in the uint8_t, not the wchar_t. - size_t len = FXSYS_wcslen(input) * sizeof(wchar_t); - CFX_RetainPtr stream = IFGAS_Stream::CreateStream( - reinterpret_cast(const_cast(input)), len, 0); - CFDE_XMLSyntaxParser parser; - parser.Init(stream, 256); - EXPECT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); - EXPECT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); - - CFX_WideString data; - parser.GetTagName(data); - EXPECT_EQ(L"script", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); - parser.GetAttributeName(data); - EXPECT_EQ(L"contentType", data); - EXPECT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); - parser.GetAttributeValue(data); - EXPECT_EQ(L"application/x-javascript", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); - EXPECT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); - parser.GetTextData(data); - EXPECT_EQ(L"\n ", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); - parser.GetTextData(data); - EXPECT_EQ(L"\n", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); - parser.GetTagName(data); - EXPECT_EQ(L"script", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); -} - -TEST(CFDE_XMLSyntaxParser, IncorrectCommentStart) { - const wchar_t* input = - L""; - - // We * sizeof(wchar_t) because we pass in the uint8_t, not the wchar_t. - size_t len = FXSYS_wcslen(input) * sizeof(wchar_t); - CFX_RetainPtr stream = IFGAS_Stream::CreateStream( - reinterpret_cast(const_cast(input)), len, 0); - CFDE_XMLSyntaxParser parser; - parser.Init(stream, 256); - EXPECT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); - EXPECT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); - - CFX_WideString data; - parser.GetTagName(data); - EXPECT_EQ(L"script", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); - parser.GetAttributeName(data); - EXPECT_EQ(L"contentType", data); - EXPECT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); - parser.GetAttributeValue(data); - EXPECT_EQ(L"application/x-javascript", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); - EXPECT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); - parser.GetTextData(data); - EXPECT_EQ(L"\n ", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); - parser.GetTextData(data); - EXPECT_EQ(L"\n", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); - parser.GetTagName(data); - EXPECT_EQ(L"script", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); -} - -TEST(CFDE_XMLSyntaxParser, CommentEmpty) { - const wchar_t* input = - L""; - - // We * sizeof(wchar_t) because we pass in the uint8_t, not the wchar_t. - size_t len = FXSYS_wcslen(input) * sizeof(wchar_t); - CFX_RetainPtr stream = IFGAS_Stream::CreateStream( - reinterpret_cast(const_cast(input)), len, 0); - CFDE_XMLSyntaxParser parser; - parser.Init(stream, 256); - EXPECT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); - EXPECT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); - - CFX_WideString data; - parser.GetTagName(data); - EXPECT_EQ(L"script", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); - parser.GetAttributeName(data); - EXPECT_EQ(L"contentType", data); - EXPECT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); - parser.GetAttributeValue(data); - EXPECT_EQ(L"application/x-javascript", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); - EXPECT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); - parser.GetTextData(data); - EXPECT_EQ(L"\n ", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); - parser.GetTextData(data); - EXPECT_EQ(L"\n", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); - parser.GetTagName(data); - EXPECT_EQ(L"script", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); -} - -TEST(CFDE_XMLSyntaxParser, CommentThreeDash) { - const wchar_t* input = - L""; - - // We * sizeof(wchar_t) because we pass in the uint8_t, not the wchar_t. - size_t len = FXSYS_wcslen(input) * sizeof(wchar_t); - CFX_RetainPtr stream = IFGAS_Stream::CreateStream( - reinterpret_cast(const_cast(input)), len, 0); - CFDE_XMLSyntaxParser parser; - parser.Init(stream, 256); - EXPECT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); - EXPECT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); - - CFX_WideString data; - parser.GetTagName(data); - EXPECT_EQ(L"script", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); - parser.GetAttributeName(data); - EXPECT_EQ(L"contentType", data); - EXPECT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); - parser.GetAttributeValue(data); - EXPECT_EQ(L"application/x-javascript", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); - EXPECT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); - parser.GetTextData(data); - EXPECT_EQ(L"\n ", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); -} - -TEST(CFDE_XMLSyntaxParser, CommentTwoDash) { - const wchar_t* input = - L""; - - // We * sizeof(wchar_t) because we pass in the uint8_t, not the wchar_t. - size_t len = FXSYS_wcslen(input) * sizeof(wchar_t); - CFX_RetainPtr stream = IFGAS_Stream::CreateStream( - reinterpret_cast(const_cast(input)), len, 0); - CFDE_XMLSyntaxParser parser; - parser.Init(stream, 256); - EXPECT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); - EXPECT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); - - CFX_WideString data; - parser.GetTagName(data); - EXPECT_EQ(L"script", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); - parser.GetAttributeName(data); - EXPECT_EQ(L"contentType", data); - EXPECT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); - parser.GetAttributeValue(data); - EXPECT_EQ(L"application/x-javascript", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); - EXPECT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); - parser.GetTextData(data); - EXPECT_EQ(L"\n ", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); -} - -TEST(CFDE_XMLSyntaxParser, Entities) { - const wchar_t* input = - L""; - - // We * sizeof(wchar_t) because we pass in the uint8_t, not the wchar_t. - size_t len = FXSYS_wcslen(input) * sizeof(wchar_t); - CFX_RetainPtr stream = IFGAS_Stream::CreateStream( - reinterpret_cast(const_cast(input)), len, 0); - CFDE_XMLSyntaxParser parser; - parser.Init(stream, 256); - EXPECT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); - EXPECT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); - - CFX_WideString data; - parser.GetTagName(data); - EXPECT_EQ(L"script", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); - parser.GetAttributeName(data); - EXPECT_EQ(L"contentType", data); - EXPECT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); - parser.GetAttributeValue(data); - EXPECT_EQ(L"application/x-javascript", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); - EXPECT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); - parser.GetTextData(data); - EXPECT_EQ(L"BTH\xab48", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); - parser.GetTagName(data); - EXPECT_EQ(L"script", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); -} - -TEST(CFDE_XMLSyntaxParser, EntityOverflowHex) { - const wchar_t* input = - L""; - - // We * sizeof(wchar_t) because we pass in the uint8_t, not the wchar_t. - size_t len = FXSYS_wcslen(input) * sizeof(wchar_t); - CFX_RetainPtr stream = IFGAS_Stream::CreateStream( - reinterpret_cast(const_cast(input)), len, 0); - CFDE_XMLSyntaxParser parser; - parser.Init(stream, 256); - EXPECT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); - EXPECT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); - - CFX_WideString data; - parser.GetTagName(data); - EXPECT_EQ(L"script", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); - parser.GetAttributeName(data); - EXPECT_EQ(L"contentType", data); - EXPECT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); - parser.GetAttributeValue(data); - EXPECT_EQ(L"application/x-javascript", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); - EXPECT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); - parser.GetTextData(data); - EXPECT_EQ(L" ", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); - parser.GetTagName(data); - EXPECT_EQ(L"script", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); -} - -TEST(CFDE_XMLSyntaxParser, EntityOverflowDecimal) { - const wchar_t* input = - L""; - - // We * sizeof(wchar_t) because we pass in the uint8_t, not the wchar_t. - size_t len = FXSYS_wcslen(input) * sizeof(wchar_t); - CFX_RetainPtr stream = IFGAS_Stream::CreateStream( - reinterpret_cast(const_cast(input)), len, 0); - CFDE_XMLSyntaxParser parser; - parser.Init(stream, 256); - EXPECT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); - EXPECT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); - - CFX_WideString data; - parser.GetTagName(data); - EXPECT_EQ(L"script", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); - parser.GetAttributeName(data); - EXPECT_EQ(L"contentType", data); - EXPECT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); - parser.GetAttributeValue(data); - EXPECT_EQ(L"application/x-javascript", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); - EXPECT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); - parser.GetTextData(data); - EXPECT_EQ(L" ", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); - parser.GetTagName(data); - EXPECT_EQ(L"script", data); - - EXPECT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); -} diff --git a/xfa/fxfa/app/cxfa_textlayout.cpp b/xfa/fxfa/app/cxfa_textlayout.cpp index 2470459b31..718f90b9ea 100644 --- a/xfa/fxfa/app/cxfa_textlayout.cpp +++ b/xfa/fxfa/app/cxfa_textlayout.cpp @@ -17,7 +17,9 @@ #include "xfa/fde/cfde_renderdevice.h" #include "xfa/fde/css/cfde_csscomputedstyle.h" #include "xfa/fde/css/cfde_cssstyleselector.h" -#include "xfa/fde/xml/fde_xml_imp.h" +#include "xfa/fde/xml/cfde_xmlelement.h" +#include "xfa/fde/xml/cfde_xmlnode.h" +#include "xfa/fde/xml/cfde_xmltext.h" #include "xfa/fxfa/app/cxfa_linkuserdata.h" #include "xfa/fxfa/app/cxfa_loadercontext.h" #include "xfa/fxfa/app/cxfa_pieceline.h" diff --git a/xfa/fxfa/app/cxfa_textparser.cpp b/xfa/fxfa/app/cxfa_textparser.cpp index 65155f989c..b4032fa5c7 100644 --- a/xfa/fxfa/app/cxfa_textparser.cpp +++ b/xfa/fxfa/app/cxfa_textparser.cpp @@ -15,6 +15,8 @@ #include "xfa/fde/css/cfde_cssstyleselector.h" #include "xfa/fde/css/cfde_cssstylesheet.h" #include "xfa/fde/css/fde_css.h" +#include "xfa/fde/xml/cfde_xmlelement.h" +#include "xfa/fde/xml/cfde_xmlnode.h" #include "xfa/fgas/crt/fgas_codepage.h" #include "xfa/fgas/font/cfgas_fontmgr.h" #include "xfa/fxfa/app/cxfa_csstagprovider.h" diff --git a/xfa/fxfa/app/xfa_ffwidgetacc.cpp b/xfa/fxfa/app/xfa_ffwidgetacc.cpp index b6d4decc42..760def2759 100644 --- a/xfa/fxfa/app/xfa_ffwidgetacc.cpp +++ b/xfa/fxfa/app/xfa_ffwidgetacc.cpp @@ -14,7 +14,8 @@ #include "third_party/base/ptr_util.h" #include "third_party/base/stl_util.h" #include "xfa/fde/cfde_textout.h" -#include "xfa/fde/xml/fde_xml_imp.h" +#include "xfa/fde/xml/cfde_xmlelement.h" +#include "xfa/fde/xml/cfde_xmlnode.h" #include "xfa/fxfa/app/xfa_ffcheckbutton.h" #include "xfa/fxfa/app/xfa_ffchoicelist.h" #include "xfa/fxfa/app/xfa_fffield.h" diff --git a/xfa/fxfa/cxfa_ffdoc.cpp b/xfa/fxfa/cxfa_ffdoc.cpp index 0deb864811..f48ae57fb2 100644 --- a/xfa/fxfa/cxfa_ffdoc.cpp +++ b/xfa/fxfa/cxfa_ffdoc.cpp @@ -18,7 +18,8 @@ #include "core/fxcrt/fx_ext.h" #include "core/fxcrt/fx_memory.h" #include "third_party/base/ptr_util.h" -#include "xfa/fde/xml/fde_xml_imp.h" +#include "xfa/fde/xml/cfde_xmlelement.h" +#include "xfa/fde/xml/cfde_xmlnode.h" #include "xfa/fwl/cfwl_notedriver.h" #include "xfa/fxfa/app/xfa_ffnotify.h" #include "xfa/fxfa/cxfa_ffapp.h" diff --git a/xfa/fxfa/cxfa_widgetacc.cpp b/xfa/fxfa/cxfa_widgetacc.cpp index 9fee6bd238..be7556ee24 100644 --- a/xfa/fxfa/cxfa_widgetacc.cpp +++ b/xfa/fxfa/cxfa_widgetacc.cpp @@ -11,7 +11,8 @@ #include "third_party/base/stl_util.h" #include "xfa/fde/cfde_textout.h" -#include "xfa/fde/xml/fde_xml_imp.h" +#include "xfa/fde/xml/cfde_xmlelement.h" +#include "xfa/fde/xml/cfde_xmlnode.h" #include "xfa/fxfa/app/cxfa_textlayout.h" #include "xfa/fxfa/app/xfa_ffwidgetacc.h" #include "xfa/fxfa/cxfa_ffapp.h" diff --git a/xfa/fxfa/parser/cxfa_dataexporter.cpp b/xfa/fxfa/parser/cxfa_dataexporter.cpp index f9553c413f..fda29c2201 100644 --- a/xfa/fxfa/parser/cxfa_dataexporter.cpp +++ b/xfa/fxfa/parser/cxfa_dataexporter.cpp @@ -10,7 +10,9 @@ #include "core/fxcrt/fx_basic.h" #include "third_party/base/stl_util.h" -#include "xfa/fde/xml/fde_xml_imp.h" +#include "xfa/fde/xml/cfde_xmldoc.h" +#include "xfa/fde/xml/cfde_xmlelement.h" +#include "xfa/fde/xml/cfde_xmlnode.h" #include "xfa/fgas/crt/fgas_codepage.h" #include "xfa/fxfa/parser/cxfa_document.h" #include "xfa/fxfa/parser/cxfa_node.h" @@ -46,12 +48,33 @@ CFX_WideString ExportEncodeAttribute(const CFX_WideString& str) { return textBuf.MakeString(); } +const uint16_t g_XMLValidCharRange[][2] = {{0x09, 0x09}, + {0x0A, 0x0A}, + {0x0D, 0x0D}, + {0x20, 0xD7FF}, + {0xE000, 0xFFFD}}; +bool IsXMLValidChar(wchar_t ch) { + int32_t iStart = 0; + int32_t iEnd = FX_ArraySize(g_XMLValidCharRange) - 1; + while (iStart <= iEnd) { + int32_t iMid = (iStart + iEnd) / 2; + if (ch < g_XMLValidCharRange[iMid][0]) { + iEnd = iMid - 1; + } else if (ch > g_XMLValidCharRange[iMid][1]) { + iStart = iMid + 1; + } else { + return true; + } + } + return false; +} + CFX_WideString ExportEncodeContent(const CFX_WideStringC& str) { CFX_WideTextBuf textBuf; int32_t iLen = str.GetLength(); for (int32_t i = 0; i < iLen; i++) { wchar_t ch = str.GetAt(i); - if (!FDE_IsXMLValidChar(ch)) + if (!IsXMLValidChar(ch)) continue; if (ch == '&') { diff --git a/xfa/fxfa/parser/cxfa_dataimporter.cpp b/xfa/fxfa/parser/cxfa_dataimporter.cpp index 3ba304d550..200841f5cb 100644 --- a/xfa/fxfa/parser/cxfa_dataimporter.cpp +++ b/xfa/fxfa/parser/cxfa_dataimporter.cpp @@ -10,7 +10,7 @@ #include "core/fxcrt/fx_stream.h" #include "third_party/base/ptr_util.h" -#include "xfa/fde/xml/fde_xml_imp.h" +#include "xfa/fde/xml/cfde_xmlnode.h" #include "xfa/fxfa/fxfa.h" #include "xfa/fxfa/fxfa_basic.h" #include "xfa/fxfa/parser/cxfa_document.h" diff --git a/xfa/fxfa/parser/cxfa_document_parser.cpp b/xfa/fxfa/parser/cxfa_document_parser.cpp index 90be568785..b855513f52 100644 --- a/xfa/fxfa/parser/cxfa_document_parser.cpp +++ b/xfa/fxfa/parser/cxfa_document_parser.cpp @@ -7,6 +7,7 @@ #include "xfa/fxfa/parser/cxfa_document_parser.h" #include "third_party/base/ptr_util.h" +#include "xfa/fde/xml/cfde_xmldoc.h" #include "xfa/fxfa/fxfa.h" #include "xfa/fxfa/parser/cxfa_document.h" diff --git a/xfa/fxfa/parser/cxfa_node.cpp b/xfa/fxfa/parser/cxfa_node.cpp index 2828c98343..61107bf156 100644 --- a/xfa/fxfa/parser/cxfa_node.cpp +++ b/xfa/fxfa/parser/cxfa_node.cpp @@ -17,7 +17,9 @@ #include "fxjs/cfxjse_value.h" #include "third_party/base/ptr_util.h" #include "third_party/base/stl_util.h" -#include "xfa/fde/xml/fde_xml_imp.h" +#include "xfa/fde/xml/cfde_xmlelement.h" +#include "xfa/fde/xml/cfde_xmlnode.h" +#include "xfa/fde/xml/cfde_xmltext.h" #include "xfa/fgas/crt/fgas_codepage.h" #include "xfa/fxfa/app/xfa_ffnotify.h" #include "xfa/fxfa/cxfa_eventparam.h" diff --git a/xfa/fxfa/parser/cxfa_simple_parser.cpp b/xfa/fxfa/parser/cxfa_simple_parser.cpp index 9204660388..06e2e5d704 100644 --- a/xfa/fxfa/parser/cxfa_simple_parser.cpp +++ b/xfa/fxfa/parser/cxfa_simple_parser.cpp @@ -11,7 +11,13 @@ #include "core/fxcrt/cfx_checksumcontext.h" #include "core/fxcrt/fx_ext.h" #include "third_party/base/ptr_util.h" -#include "xfa/fde/xml/cfde_xml_parser.h" +#include "xfa/fde/xml/cfde_xmlchardata.h" +#include "xfa/fde/xml/cfde_xmldoc.h" +#include "xfa/fde/xml/cfde_xmlelement.h" +#include "xfa/fde/xml/cfde_xmlinstruction.h" +#include "xfa/fde/xml/cfde_xmlnode.h" +#include "xfa/fde/xml/cfde_xmlparser.h" +#include "xfa/fde/xml/cfde_xmltext.h" #include "xfa/fgas/crt/fgas_codepage.h" #include "xfa/fxfa/fxfa.h" #include "xfa/fxfa/parser/cxfa_document.h" diff --git a/xfa/fxfa/parser/cxfa_simple_parser.h b/xfa/fxfa/parser/cxfa_simple_parser.h index 350104161c..a9bcec2139 100644 --- a/xfa/fxfa/parser/cxfa_simple_parser.h +++ b/xfa/fxfa/parser/cxfa_simple_parser.h @@ -9,11 +9,13 @@ #include -#include "xfa/fde/xml/fde_xml_imp.h" #include "xfa/fxfa/fxfa_basic.h" class CXFA_Document; class CXFA_Node; +class CFDE_XMLDoc; +class CFDE_XMLInstruction; +class CFDE_XMLNode; class CFDE_XMLParser; class IFX_SeekableReadStream; class IFX_Pause; diff --git a/xfa/fxfa/parser/xfa_document_datamerger_imp.cpp b/xfa/fxfa/parser/xfa_document_datamerger_imp.cpp index e54bf4a05f..eb16628a2e 100644 --- a/xfa/fxfa/parser/xfa_document_datamerger_imp.cpp +++ b/xfa/fxfa/parser/xfa_document_datamerger_imp.cpp @@ -11,7 +11,8 @@ #include "core/fxcrt/fx_ext.h" #include "third_party/base/stl_util.h" -#include "xfa/fde/xml/fde_xml_imp.h" +#include "xfa/fde/xml/cfde_xmlelement.h" +#include "xfa/fde/xml/cfde_xmlnode.h" #include "xfa/fxfa/parser/cxfa_document.h" #include "xfa/fxfa/parser/cxfa_layoutprocessor.h" #include "xfa/fxfa/parser/cxfa_localemgr.h" diff --git a/xfa/fxfa/parser/xfa_utils.cpp b/xfa/fxfa/parser/xfa_utils.cpp index 1323232f9f..df180f2183 100644 --- a/xfa/fxfa/parser/xfa_utils.cpp +++ b/xfa/fxfa/parser/xfa_utils.cpp @@ -7,7 +7,10 @@ #include "xfa/fxfa/parser/xfa_utils.h" #include "core/fxcrt/fx_ext.h" -#include "xfa/fde/xml/fde_xml_imp.h" +#include "xfa/fde/xml/cfde_xmlchardata.h" +#include "xfa/fde/xml/cfde_xmlelement.h" +#include "xfa/fde/xml/cfde_xmlnode.h" +#include "xfa/fde/xml/cfde_xmltext.h" #include "xfa/fxfa/parser/cxfa_document.h" #include "xfa/fxfa/parser/cxfa_localemgr.h" #include "xfa/fxfa/parser/cxfa_localevalue.h" diff --git a/xfa/fxfa/parser/xfa_utils.h b/xfa/fxfa/parser/xfa_utils.h index b428a89b4e..d4461a39a7 100644 --- a/xfa/fxfa/parser/xfa_utils.h +++ b/xfa/fxfa/parser/xfa_utils.h @@ -7,7 +7,6 @@ #ifndef XFA_FXFA_PARSER_XFA_UTILS_H_ #define XFA_FXFA_PARSER_XFA_UTILS_H_ -#include "xfa/fde/xml/fde_xml.h" #include "xfa/fgas/crt/ifgas_stream.h" #include "xfa/fxfa/fxfa_basic.h" -- cgit v1.2.3