summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--BUILD.gn26
-rw-r--r--core/fxcrt/cfx_blockbuffer.cpp140
-rw-r--r--core/fxcrt/cfx_blockbuffer.h54
-rw-r--r--testing/libfuzzer/pdf_xml_fuzzer.cc5
-rw-r--r--xfa/fde/xml/cfde_xmlchardata.cpp21
-rw-r--r--xfa/fde/xml/cfde_xmlchardata.h29
-rw-r--r--xfa/fde/xml/cfde_xmldeclaration.h18
-rw-r--r--xfa/fde/xml/cfde_xmldoc.cpp161
-rw-r--r--xfa/fde/xml/cfde_xmldoc.h36
-rw-r--r--xfa/fde/xml/cfde_xmlelement.cpp223
-rw-r--r--xfa/fde/xml/cfde_xmlelement.h56
-rw-r--r--xfa/fde/xml/cfde_xmlinstruction.cpp160
-rw-r--r--xfa/fde/xml/cfde_xmlinstruction.h50
-rw-r--r--xfa/fde/xml/cfde_xmlnode.cpp458
-rw-r--r--xfa/fde/xml/cfde_xmlnode.h74
-rw-r--r--xfa/fde/xml/cfde_xmlparser.cpp (renamed from xfa/fde/xml/cfde_xml_parser.cpp)8
-rw-r--r--xfa/fde/xml/cfde_xmlparser.h (renamed from xfa/fde/xml/cfde_xml_parser.h)12
-rw-r--r--xfa/fde/xml/cfde_xmlsyntaxparser.cpp703
-rw-r--r--xfa/fde/xml/cfde_xmlsyntaxparser.h128
-rw-r--r--xfa/fde/xml/cfde_xmlsyntaxparser_unittest.cpp (renamed from xfa/fde/xml/fde_xml_imp_unittest.cpp)2
-rw-r--r--xfa/fde/xml/cfde_xmltext.cpp21
-rw-r--r--xfa/fde/xml/cfde_xmltext.h28
-rw-r--r--xfa/fde/xml/fde_xml.h45
-rw-r--r--xfa/fde/xml/fde_xml_imp.cpp1832
-rw-r--r--xfa/fde/xml/fde_xml_imp.h335
-rw-r--r--xfa/fxfa/app/cxfa_textlayout.cpp4
-rw-r--r--xfa/fxfa/app/cxfa_textparser.cpp2
-rw-r--r--xfa/fxfa/app/xfa_ffwidgetacc.cpp3
-rw-r--r--xfa/fxfa/cxfa_ffdoc.cpp3
-rw-r--r--xfa/fxfa/cxfa_widgetacc.cpp3
-rw-r--r--xfa/fxfa/parser/cxfa_dataexporter.cpp27
-rw-r--r--xfa/fxfa/parser/cxfa_dataimporter.cpp2
-rw-r--r--xfa/fxfa/parser/cxfa_document_parser.cpp1
-rw-r--r--xfa/fxfa/parser/cxfa_node.cpp4
-rw-r--r--xfa/fxfa/parser/cxfa_simple_parser.cpp8
-rw-r--r--xfa/fxfa/parser/cxfa_simple_parser.h4
-rw-r--r--xfa/fxfa/parser/xfa_document_datamerger_imp.cpp3
-rw-r--r--xfa/fxfa/parser/xfa_utils.cpp5
-rw-r--r--xfa/fxfa/parser/xfa_utils.h1
39 files changed, 2456 insertions, 2239 deletions
diff --git a/BUILD.gn b/BUILD.gn
index fa879d3e03..542ae1bf71 100644
--- a/BUILD.gn
+++ b/BUILD.gn
@@ -809,6 +809,8 @@ static_library("fxcrt") {
if (pdf_enable_xfa) {
sources += [
+ "core/fxcrt/cfx_blockbuffer.cpp",
+ "core/fxcrt/cfx_blockbuffer.h",
"core/fxcrt/cfx_char.cpp",
"core/fxcrt/cfx_char.h",
"core/fxcrt/cfx_chariter.cpp",
@@ -1402,11 +1404,23 @@ if (pdf_enable_xfa) {
"xfa/fde/ifde_txtedtdorecord.h",
"xfa/fde/ifde_txtedtengine.h",
"xfa/fde/ifde_txtedtpage.h",
- "xfa/fde/xml/cfde_xml_parser.cpp",
- "xfa/fde/xml/cfde_xml_parser.h",
- "xfa/fde/xml/fde_xml.h",
- "xfa/fde/xml/fde_xml_imp.cpp",
- "xfa/fde/xml/fde_xml_imp.h",
+ "xfa/fde/xml/cfde_xmlchardata.cpp",
+ "xfa/fde/xml/cfde_xmlchardata.h",
+ "xfa/fde/xml/cfde_xmldeclaration.h",
+ "xfa/fde/xml/cfde_xmldoc.cpp",
+ "xfa/fde/xml/cfde_xmldoc.h",
+ "xfa/fde/xml/cfde_xmlelement.cpp",
+ "xfa/fde/xml/cfde_xmlelement.h",
+ "xfa/fde/xml/cfde_xmlinstruction.cpp",
+ "xfa/fde/xml/cfde_xmlinstruction.h",
+ "xfa/fde/xml/cfde_xmlnode.cpp",
+ "xfa/fde/xml/cfde_xmlnode.h",
+ "xfa/fde/xml/cfde_xmlparser.cpp",
+ "xfa/fde/xml/cfde_xmlparser.h",
+ "xfa/fde/xml/cfde_xmlsyntaxparser.cpp",
+ "xfa/fde/xml/cfde_xmlsyntaxparser.h",
+ "xfa/fde/xml/cfde_xmltext.cpp",
+ "xfa/fde/xml/cfde_xmltext.h",
"xfa/fgas/crt/cfgas_formatstring.cpp",
"xfa/fgas/crt/cfgas_formatstring.h",
"xfa/fgas/crt/fgas_codepage.cpp",
@@ -1873,7 +1887,7 @@ test("pdfium_unittests") {
"xfa/fde/css/cfde_cssdeclaration_unittest.cpp",
"xfa/fde/css/cfde_cssstylesheet_unittest.cpp",
"xfa/fde/css/cfde_cssvaluelistparser_unittest.cpp",
- "xfa/fde/xml/fde_xml_imp_unittest.cpp",
+ "xfa/fde/xml/cfde_xmlsyntaxparser_unittest.cpp",
"xfa/fgas/layout/fgas_rtfbreak_unittest.cpp",
"xfa/fxfa/app/cxfa_textparser_unittest.cpp",
"xfa/fxfa/cxfa_ffapp_unitest.cpp",
diff --git a/core/fxcrt/cfx_blockbuffer.cpp b/core/fxcrt/cfx_blockbuffer.cpp
new file mode 100644
index 0000000000..354f415282
--- /dev/null
+++ b/core/fxcrt/cfx_blockbuffer.cpp
@@ -0,0 +1,140 @@
+// Copyright 2017 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
+
+#include "core/fxcrt/cfx_blockbuffer.h"
+
+#include <algorithm>
+
+#include "third_party/base/stl_util.h"
+
+CFX_BlockBuffer::CFX_BlockBuffer(int32_t iAllocStep)
+ : m_iDataLength(0),
+ m_iBufferSize(0),
+ m_iAllocStep(iAllocStep),
+ m_iStartPosition(0) {}
+
+CFX_BlockBuffer::~CFX_BlockBuffer() {
+ ClearBuffer();
+}
+
+wchar_t* CFX_BlockBuffer::GetAvailableBlock(int32_t& iIndexInBlock) {
+ iIndexInBlock = 0;
+ if (m_BlockArray.empty())
+ return nullptr;
+
+ int32_t iRealIndex = m_iStartPosition + m_iDataLength;
+ if (iRealIndex == m_iBufferSize) {
+ m_BlockArray.emplace_back(FX_Alloc(wchar_t, m_iAllocStep));
+ m_iBufferSize += m_iAllocStep;
+ return m_BlockArray.back().get();
+ }
+ iIndexInBlock = iRealIndex % m_iAllocStep;
+ return m_BlockArray[iRealIndex / m_iAllocStep].get();
+}
+
+bool CFX_BlockBuffer::InitBuffer(int32_t iBufferSize) {
+ ClearBuffer();
+ int32_t iNumOfBlock = (iBufferSize - 1) / m_iAllocStep + 1;
+ for (int32_t i = 0; i < iNumOfBlock; i++)
+ m_BlockArray.emplace_back(FX_Alloc(wchar_t, m_iAllocStep));
+
+ m_iBufferSize = iNumOfBlock * m_iAllocStep;
+ return true;
+}
+
+void CFX_BlockBuffer::SetTextChar(int32_t iIndex, wchar_t ch) {
+ if (iIndex < 0) {
+ return;
+ }
+ int32_t iRealIndex = m_iStartPosition + iIndex;
+ int32_t iBlockIndex = iRealIndex / m_iAllocStep;
+ int32_t iInnerIndex = iRealIndex % m_iAllocStep;
+ int32_t iBlockSize = pdfium::CollectionSize<int32_t>(m_BlockArray);
+ if (iBlockIndex >= iBlockSize) {
+ int32_t iNewBlocks = iBlockIndex - iBlockSize + 1;
+ do {
+ m_BlockArray.emplace_back(FX_Alloc(wchar_t, m_iAllocStep));
+ m_iBufferSize += m_iAllocStep;
+ } while (--iNewBlocks);
+ }
+ wchar_t* pTextData = m_BlockArray[iBlockIndex].get();
+ pTextData[iInnerIndex] = ch;
+ m_iDataLength = std::max(m_iDataLength, iIndex + 1);
+}
+
+int32_t CFX_BlockBuffer::DeleteTextChars(int32_t iCount, bool bDirection) {
+ if (iCount <= 0)
+ return m_iDataLength;
+
+ if (iCount >= m_iDataLength) {
+ Reset(false);
+ return 0;
+ }
+ if (bDirection) {
+ m_iStartPosition += iCount;
+ m_iDataLength -= iCount;
+ } else {
+ m_iDataLength -= iCount;
+ }
+ return m_iDataLength;
+}
+
+void CFX_BlockBuffer::GetTextData(CFX_WideString& wsTextData,
+ int32_t iStart,
+ int32_t iLength) const {
+ wsTextData.clear();
+ int32_t iMaybeDataLength = m_iBufferSize - 1 - m_iStartPosition;
+ if (iStart < 0 || iStart > iMaybeDataLength) {
+ return;
+ }
+ if (iLength == -1 || iLength > iMaybeDataLength) {
+ iLength = iMaybeDataLength;
+ }
+ if (iLength <= 0) {
+ return;
+ }
+ wchar_t* pBuf = wsTextData.GetBuffer(iLength);
+ if (!pBuf) {
+ return;
+ }
+ int32_t iStartBlockIndex = 0;
+ int32_t iStartInnerIndex = 0;
+ TextDataIndex2BufIndex(iStart, iStartBlockIndex, iStartInnerIndex);
+ int32_t iEndBlockIndex = 0;
+ int32_t iEndInnerIndex = 0;
+ TextDataIndex2BufIndex(iStart + iLength, iEndBlockIndex, iEndInnerIndex);
+ int32_t iPointer = 0;
+ for (int32_t i = iStartBlockIndex; i <= iEndBlockIndex; i++) {
+ int32_t iBufferPointer = 0;
+ int32_t iCopyLength = m_iAllocStep;
+ if (i == iStartBlockIndex) {
+ iCopyLength -= iStartInnerIndex;
+ iBufferPointer = iStartInnerIndex;
+ }
+ if (i == iEndBlockIndex) {
+ iCopyLength -= ((m_iAllocStep - 1) - iEndInnerIndex);
+ }
+ wchar_t* pBlockBuf = m_BlockArray[i].get();
+ memcpy(pBuf + iPointer, pBlockBuf + iBufferPointer,
+ iCopyLength * sizeof(wchar_t));
+ iPointer += iCopyLength;
+ }
+ wsTextData.ReleaseBuffer(iLength);
+}
+
+void CFX_BlockBuffer::TextDataIndex2BufIndex(const int32_t iIndex,
+ int32_t& iBlockIndex,
+ int32_t& iInnerIndex) const {
+ ASSERT(iIndex >= 0);
+ int32_t iRealIndex = m_iStartPosition + iIndex;
+ iBlockIndex = iRealIndex / m_iAllocStep;
+ iInnerIndex = iRealIndex % m_iAllocStep;
+}
+
+void CFX_BlockBuffer::ClearBuffer() {
+ m_iBufferSize = 0;
+ m_BlockArray.clear();
+}
diff --git a/core/fxcrt/cfx_blockbuffer.h b/core/fxcrt/cfx_blockbuffer.h
new file mode 100644
index 0000000000..dbf01a938a
--- /dev/null
+++ b/core/fxcrt/cfx_blockbuffer.h
@@ -0,0 +1,54 @@
+// Copyright 2017 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
+
+#ifndef CORE_FXCRT_CFX_BLOCKBUFFER_H_
+#define CORE_FXCRT_CFX_BLOCKBUFFER_H_
+
+#include <stdint.h>
+
+#include <memory>
+#include <vector>
+
+#include "core/fxcrt/fx_string.h"
+
+class CFX_BlockBuffer {
+ public:
+ explicit CFX_BlockBuffer(int32_t iAllocStep = 1024 * 1024);
+ ~CFX_BlockBuffer();
+
+ bool InitBuffer(int32_t iBufferSize = 1024 * 1024);
+ bool IsInitialized() { return m_iBufferSize / m_iAllocStep >= 1; }
+
+ wchar_t* GetAvailableBlock(int32_t& iIndexInBlock);
+ inline int32_t GetAllocStep() const { return m_iAllocStep; }
+ inline int32_t& GetDataLengthRef() { return m_iDataLength; }
+
+ inline void Reset(bool bReserveData = true) {
+ if (!bReserveData)
+ m_iStartPosition = 0;
+ m_iDataLength = 0;
+ }
+
+ void SetTextChar(int32_t iIndex, wchar_t ch);
+ int32_t DeleteTextChars(int32_t iCount, bool bDirection = true);
+ void GetTextData(CFX_WideString& wsTextData,
+ int32_t iStart = 0,
+ int32_t iLength = -1) const;
+
+ private:
+ inline void TextDataIndex2BufIndex(const int32_t iIndex,
+ int32_t& iBlockIndex,
+ int32_t& iInnerIndex) const;
+ void ClearBuffer();
+
+ std::vector<std::unique_ptr<wchar_t, FxFreeDeleter>> m_BlockArray;
+ int32_t m_iDataLength;
+ int32_t m_iBufferSize;
+ int32_t m_iAllocStep;
+ int32_t m_iStartPosition;
+};
+
+#endif // CORE_FXCRT_CFX_BLOCKBUFFER_H_
diff --git a/testing/libfuzzer/pdf_xml_fuzzer.cc b/testing/libfuzzer/pdf_xml_fuzzer.cc
index e255f96f6e..13eda60d77 100644
--- a/testing/libfuzzer/pdf_xml_fuzzer.cc
+++ b/testing/libfuzzer/pdf_xml_fuzzer.cc
@@ -10,8 +10,9 @@
#include "core/fxcrt/fx_safe_types.h"
#include "core/fxcrt/fx_system.h"
#include "third_party/base/ptr_util.h"
-#include "xfa/fde/xml/cfde_xml_parser.h"
-#include "xfa/fde/xml/fde_xml_imp.h"
+#include "xfa/fde/xml/cfde_xmldoc.h"
+#include "xfa/fde/xml/cfde_xmlnode.h"
+#include "xfa/fde/xml/cfde_xmlparser.h"
#include "xfa/fxfa/parser/cxfa_widetextread.h"
namespace {
diff --git a/xfa/fde/xml/cfde_xmlchardata.cpp b/xfa/fde/xml/cfde_xmlchardata.cpp
new file mode 100644
index 0000000000..40e4730dea
--- /dev/null
+++ b/xfa/fde/xml/cfde_xmlchardata.cpp
@@ -0,0 +1,21 @@
+// Copyright 2017 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
+
+#include "xfa/fde/xml/cfde_xmlchardata.h"
+
+CFDE_XMLCharData::CFDE_XMLCharData(const CFX_WideString& wsCData)
+ : CFDE_XMLDeclaration(), m_wsCharData(wsCData) {}
+
+CFDE_XMLCharData::~CFDE_XMLCharData() {}
+
+FDE_XMLNODETYPE CFDE_XMLCharData::GetType() const {
+ return FDE_XMLNODE_CharData;
+}
+
+CFDE_XMLNode* CFDE_XMLCharData::Clone(bool bRecursive) {
+ CFDE_XMLCharData* pClone = new CFDE_XMLCharData(m_wsCharData);
+ return pClone;
+}
diff --git a/xfa/fde/xml/cfde_xmlchardata.h b/xfa/fde/xml/cfde_xmlchardata.h
new file mode 100644
index 0000000000..308ec1721b
--- /dev/null
+++ b/xfa/fde/xml/cfde_xmlchardata.h
@@ -0,0 +1,29 @@
+// Copyright 2017 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
+
+#ifndef XFA_FDE_XML_CFDE_XMLCHARDATA_H_
+#define XFA_FDE_XML_CFDE_XMLCHARDATA_H_
+
+#include "core/fxcrt/fx_string.h"
+#include "xfa/fde/xml/cfde_xmldeclaration.h"
+
+class CFDE_XMLCharData : public CFDE_XMLDeclaration {
+ public:
+ explicit CFDE_XMLCharData(const CFX_WideString& wsCData);
+ ~CFDE_XMLCharData() override;
+
+ FDE_XMLNODETYPE GetType() const override;
+ CFDE_XMLNode* Clone(bool bRecursive) override;
+
+ void GetCharData(CFX_WideString& wsCharData) const {
+ wsCharData = m_wsCharData;
+ }
+ void SetCharData(const CFX_WideString& wsCData) { m_wsCharData = wsCData; }
+
+ CFX_WideString m_wsCharData;
+};
+
+#endif // XFA_FDE_XML_CFDE_XMLCHARDATA_H_
diff --git a/xfa/fde/xml/cfde_xmldeclaration.h b/xfa/fde/xml/cfde_xmldeclaration.h
new file mode 100644
index 0000000000..ade66c470f
--- /dev/null
+++ b/xfa/fde/xml/cfde_xmldeclaration.h
@@ -0,0 +1,18 @@
+// Copyright 2017 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
+
+#ifndef XFA_FDE_XML_CFDE_XMLDECLARATION_H_
+#define XFA_FDE_XML_CFDE_XMLDECLARATION_H_
+
+#include "xfa/fde/xml/cfde_xmlnode.h"
+
+class CFDE_XMLDeclaration : public CFDE_XMLNode {
+ public:
+ CFDE_XMLDeclaration() {}
+ ~CFDE_XMLDeclaration() override {}
+};
+
+#endif // XFA_FDE_XML_CFDE_XMLDECLARATION_H_
diff --git a/xfa/fde/xml/cfde_xmldoc.cpp b/xfa/fde/xml/cfde_xmldoc.cpp
new file mode 100644
index 0000000000..bc526ae4b3
--- /dev/null
+++ b/xfa/fde/xml/cfde_xmldoc.cpp
@@ -0,0 +1,161 @@
+// Copyright 2017 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
+
+#include "xfa/fde/xml/cfde_xmldoc.h"
+
+#include <utility>
+#include <vector>
+
+#include "third_party/base/ptr_util.h"
+#include "third_party/base/stl_util.h"
+#include "xfa/fde/xml/cfde_xmlchardata.h"
+#include "xfa/fde/xml/cfde_xmlelement.h"
+#include "xfa/fde/xml/cfde_xmlinstruction.h"
+#include "xfa/fde/xml/cfde_xmlnode.h"
+#include "xfa/fde/xml/cfde_xmltext.h"
+#include "xfa/fgas/crt/fgas_codepage.h"
+
+CFDE_XMLDoc::CFDE_XMLDoc()
+ : m_iStatus(0), m_pRoot(pdfium::MakeUnique<CFDE_XMLNode>()) {
+ m_pRoot->InsertChildNode(new CFDE_XMLInstruction(L"xml"));
+}
+
+CFDE_XMLDoc::~CFDE_XMLDoc() {}
+
+bool CFDE_XMLDoc::LoadXML(std::unique_ptr<CFDE_XMLParser> pXMLParser) {
+ if (!pXMLParser)
+ return false;
+
+ m_iStatus = 0;
+ m_pStream.Reset();
+ m_pRoot->DeleteChildren();
+ m_pXMLParser = std::move(pXMLParser);
+ return true;
+}
+
+int32_t CFDE_XMLDoc::DoLoad(IFX_Pause* pPause) {
+ if (m_iStatus < 100)
+ m_iStatus = m_pXMLParser->DoParser(pPause);
+
+ return m_iStatus;
+}
+
+void CFDE_XMLDoc::CloseXML() {
+ m_pXMLParser.reset();
+}
+
+void CFDE_XMLDoc::SaveXMLNode(const CFX_RetainPtr<IFGAS_Stream>& pXMLStream,
+ CFDE_XMLNode* pINode) {
+ CFDE_XMLNode* pNode = (CFDE_XMLNode*)pINode;
+ switch (pNode->GetType()) {
+ case FDE_XMLNODE_Instruction: {
+ CFX_WideString ws;
+ CFDE_XMLInstruction* pInstruction = (CFDE_XMLInstruction*)pNode;
+ if (pInstruction->m_wsTarget.CompareNoCase(L"xml") == 0) {
+ ws = L"<?xml version=\"1.0\" encoding=\"";
+ uint16_t wCodePage = pXMLStream->GetCodePage();
+ if (wCodePage == FX_CODEPAGE_UTF16LE) {
+ ws += L"UTF-16";
+ } else if (wCodePage == FX_CODEPAGE_UTF16BE) {
+ ws += L"UTF-16be";
+ } else {
+ ws += L"UTF-8";
+ }
+ ws += L"\"?>";
+ pXMLStream->WriteString(ws.c_str(), ws.GetLength());
+ } else {
+ ws.Format(L"<?%s", pInstruction->m_wsTarget.c_str());
+ pXMLStream->WriteString(ws.c_str(), ws.GetLength());
+ std::vector<CFX_WideString>& attributes = pInstruction->m_Attributes;
+ int32_t i;
+ int32_t iCount = pdfium::CollectionSize<int32_t>(attributes);
+ CFX_WideString wsValue;
+ for (i = 0; i < iCount; i += 2) {
+ ws = L" ";
+ ws += attributes[i];
+ ws += L"=\"";
+ wsValue = attributes[i + 1];
+ wsValue.Replace(L"&", L"&amp;");
+ wsValue.Replace(L"<", L"&lt;");
+ wsValue.Replace(L">", L"&gt;");
+ wsValue.Replace(L"\'", L"&apos;");
+ wsValue.Replace(L"\"", L"&quot;");
+ ws += wsValue;
+ ws += L"\"";
+ pXMLStream->WriteString(ws.c_str(), ws.GetLength());
+ }
+ std::vector<CFX_WideString>& targetdata = pInstruction->m_TargetData;
+ iCount = pdfium::CollectionSize<int32_t>(targetdata);
+ for (i = 0; i < iCount; i++) {
+ ws = L" \"";
+ ws += targetdata[i];
+ ws += L"\"";
+ pXMLStream->WriteString(ws.c_str(), ws.GetLength());
+ }
+ ws = L"?>";
+ pXMLStream->WriteString(ws.c_str(), ws.GetLength());
+ }
+ } break;
+ case FDE_XMLNODE_Element: {
+ CFX_WideString ws;
+ ws = L"<";
+ ws += ((CFDE_XMLElement*)pNode)->m_wsTag;
+ pXMLStream->WriteString(ws.c_str(), ws.GetLength());
+ std::vector<CFX_WideString>& attributes =
+ static_cast<CFDE_XMLElement*>(pNode)->m_Attributes;
+ int32_t iCount = pdfium::CollectionSize<int32_t>(attributes);
+ CFX_WideString wsValue;
+ for (int32_t i = 0; i < iCount; i += 2) {
+ ws = L" ";
+ ws += attributes[i];
+ ws += L"=\"";
+ wsValue = attributes[i + 1];
+ wsValue.Replace(L"&", L"&amp;");
+ wsValue.Replace(L"<", L"&lt;");
+ wsValue.Replace(L">", L"&gt;");
+ wsValue.Replace(L"\'", L"&apos;");
+ wsValue.Replace(L"\"", L"&quot;");
+ ws += wsValue;
+ ws += L"\"";
+ pXMLStream->WriteString(ws.c_str(), ws.GetLength());
+ }
+ if (pNode->m_pChild) {
+ ws = L"\n>";
+ pXMLStream->WriteString(ws.c_str(), ws.GetLength());
+ CFDE_XMLNode* pChild = pNode->m_pChild;
+ while (pChild) {
+ SaveXMLNode(pXMLStream, static_cast<CFDE_XMLNode*>(pChild));
+ pChild = pChild->m_pNext;
+ }
+ ws = L"</";
+ ws += ((CFDE_XMLElement*)pNode)->m_wsTag;
+ ws += L"\n>";
+ } else {
+ ws = L"\n/>";
+ }
+ pXMLStream->WriteString(ws.c_str(), ws.GetLength());
+ } break;
+ case FDE_XMLNODE_Text: {
+ CFX_WideString ws = ((CFDE_XMLText*)pNode)->m_wsText;
+ ws.Replace(L"&", L"&amp;");
+ ws.Replace(L"<", L"&lt;");
+ ws.Replace(L">", L"&gt;");
+ ws.Replace(L"\'", L"&apos;");
+ ws.Replace(L"\"", L"&quot;");
+ pXMLStream->WriteString(ws.c_str(), ws.GetLength());
+ } break;
+ case FDE_XMLNODE_CharData: {
+ CFX_WideString ws = L"<![CDATA[";
+ ws += ((CFDE_XMLCharData*)pNode)->m_wsCharData;
+ ws += L"]]>";
+ pXMLStream->WriteString(ws.c_str(), ws.GetLength());
+ } break;
+ case FDE_XMLNODE_Unknown:
+ break;
+ default:
+ break;
+ }
+}
diff --git a/xfa/fde/xml/cfde_xmldoc.h b/xfa/fde/xml/cfde_xmldoc.h
new file mode 100644
index 0000000000..3eb07a87e1
--- /dev/null
+++ b/xfa/fde/xml/cfde_xmldoc.h
@@ -0,0 +1,36 @@
+// Copyright 2017 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
+
+#ifndef XFA_FDE_XML_CFDE_XMLDOC_H_
+#define XFA_FDE_XML_CFDE_XMLDOC_H_
+
+#include <memory>
+
+#include "core/fxcrt/cfx_retain_ptr.h"
+#include "xfa/fde/xml/cfde_xmlnode.h"
+#include "xfa/fde/xml/cfde_xmlparser.h"
+#include "xfa/fgas/crt/ifgas_stream.h"
+
+class CFDE_XMLDoc {
+ public:
+ CFDE_XMLDoc();
+ ~CFDE_XMLDoc();
+
+ bool LoadXML(std::unique_ptr<CFDE_XMLParser> pXMLParser);
+ int32_t DoLoad(IFX_Pause* pPause = nullptr);
+ void CloseXML();
+ CFDE_XMLNode* GetRoot() const { return m_pRoot.get(); }
+ void SaveXMLNode(const CFX_RetainPtr<IFGAS_Stream>& pXMLStream,
+ CFDE_XMLNode* pNode);
+
+ private:
+ int32_t m_iStatus;
+ std::unique_ptr<CFDE_XMLNode> m_pRoot;
+ std::unique_ptr<CFDE_XMLParser> m_pXMLParser;
+ CFX_RetainPtr<IFGAS_Stream> m_pStream;
+};
+
+#endif // XFA_FDE_XML_CFDE_XMLDOC_H_
diff --git a/xfa/fde/xml/cfde_xmlelement.cpp b/xfa/fde/xml/cfde_xmlelement.cpp
new file mode 100644
index 0000000000..aca27c50e3
--- /dev/null
+++ b/xfa/fde/xml/cfde_xmlelement.cpp
@@ -0,0 +1,223 @@
+// Copyright 2017 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
+
+#include "xfa/fde/xml/cfde_xmlelement.h"
+
+#include "core/fxcrt/fx_ext.h"
+#include "third_party/base/stl_util.h"
+#include "xfa/fde/xml/cfde_xmlchardata.h"
+#include "xfa/fde/xml/cfde_xmltext.h"
+
+CFDE_XMLElement::CFDE_XMLElement(const CFX_WideString& wsTag)
+ : CFDE_XMLNode(), m_wsTag(wsTag), m_Attributes() {
+ ASSERT(m_wsTag.GetLength() > 0);
+}
+
+CFDE_XMLElement::~CFDE_XMLElement() {}
+
+FDE_XMLNODETYPE CFDE_XMLElement::GetType() const {
+ return FDE_XMLNODE_Element;
+}
+
+CFDE_XMLNode* CFDE_XMLElement::Clone(bool bRecursive) {
+ CFDE_XMLElement* pClone = new CFDE_XMLElement(m_wsTag);
+ if (!pClone)
+ return nullptr;
+
+ pClone->m_Attributes = m_Attributes;
+ if (bRecursive) {
+ CloneChildren(pClone);
+ } else {
+ CFX_WideString wsText;
+ CFDE_XMLNode* pChild = m_pChild;
+ while (pChild) {
+ switch (pChild->GetType()) {
+ case FDE_XMLNODE_Text:
+ wsText += ((CFDE_XMLText*)pChild)->m_wsText;
+ break;
+ default:
+ break;
+ }
+ pChild = pChild->m_pNext;
+ }
+ pClone->SetTextData(wsText);
+ }
+ return pClone;
+}
+
+void CFDE_XMLElement::GetTagName(CFX_WideString& wsTag) const {
+ wsTag = m_wsTag;
+}
+
+void CFDE_XMLElement::GetLocalTagName(CFX_WideString& wsTag) const {
+ FX_STRSIZE iFind = m_wsTag.Find(L':', 0);
+ if (iFind < 0) {
+ wsTag = m_wsTag;
+ } else {
+ wsTag = m_wsTag.Right(m_wsTag.GetLength() - iFind - 1);
+ }
+}
+
+void CFDE_XMLElement::GetNamespacePrefix(CFX_WideString& wsPrefix) const {
+ FX_STRSIZE iFind = m_wsTag.Find(L':', 0);
+ if (iFind < 0) {
+ wsPrefix.clear();
+ } else {
+ wsPrefix = m_wsTag.Left(iFind);
+ }
+}
+
+void CFDE_XMLElement::GetNamespaceURI(CFX_WideString& wsNamespace) const {
+ CFX_WideString wsAttri(L"xmlns"), wsPrefix;
+ GetNamespacePrefix(wsPrefix);
+ if (wsPrefix.GetLength() > 0) {
+ wsAttri += L":";
+ wsAttri += wsPrefix;
+ }
+ wsNamespace.clear();
+ CFDE_XMLNode* pNode = (CFDE_XMLNode*)this;
+ while (pNode) {
+ if (pNode->GetType() != FDE_XMLNODE_Element) {
+ break;
+ }
+ CFDE_XMLElement* pElement = (CFDE_XMLElement*)pNode;
+ if (!pElement->HasAttribute(wsAttri.c_str())) {
+ pNode = pNode->GetNodeItem(CFDE_XMLNode::Parent);
+ continue;
+ }
+ pElement->GetString(wsAttri.c_str(), wsNamespace);
+ break;
+ }
+}
+
+int32_t CFDE_XMLElement::CountAttributes() const {
+ return pdfium::CollectionSize<int32_t>(m_Attributes) / 2;
+}
+
+bool CFDE_XMLElement::GetAttribute(int32_t index,
+ CFX_WideString& wsAttriName,
+ CFX_WideString& wsAttriValue) const {
+ int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes);
+ ASSERT(index > -1 && index < iCount / 2);
+ for (int32_t i = 0; i < iCount; i += 2) {
+ if (index == 0) {
+ wsAttriName = m_Attributes[i];
+ wsAttriValue = m_Attributes[i + 1];
+ return true;
+ }
+ index--;
+ }
+ return false;
+}
+
+bool CFDE_XMLElement::HasAttribute(const wchar_t* pwsAttriName) const {
+ int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes);
+ for (int32_t i = 0; i < iCount; i += 2) {
+ if (m_Attributes[i].Compare(pwsAttriName) == 0)
+ return true;
+ }
+ return false;
+}
+
+void CFDE_XMLElement::GetString(const wchar_t* pwsAttriName,
+ CFX_WideString& wsAttriValue,
+ const wchar_t* pwsDefValue) const {
+ int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes);
+ for (int32_t i = 0; i < iCount; i += 2) {
+ if (m_Attributes[i].Compare(pwsAttriName) == 0) {
+ wsAttriValue = m_Attributes[i + 1];
+ return;
+ }
+ }
+ wsAttriValue = pwsDefValue;
+}
+
+void CFDE_XMLElement::SetString(const CFX_WideString& wsAttriName,
+ const CFX_WideString& wsAttriValue) {
+ ASSERT(wsAttriName.GetLength() > 0);
+ int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes);
+ for (int32_t i = 0; i < iCount; i += 2) {
+ if (m_Attributes[i].Compare(wsAttriName) == 0) {
+ m_Attributes[i] = wsAttriName;
+ m_Attributes[i + 1] = wsAttriValue;
+ return;
+ }
+ }
+ m_Attributes.push_back(wsAttriName);
+ m_Attributes.push_back(wsAttriValue);
+}
+
+int32_t CFDE_XMLElement::GetInteger(const wchar_t* pwsAttriName,
+ int32_t iDefValue) const {
+ int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes);
+ for (int32_t i = 0; i < iCount; i += 2) {
+ if (m_Attributes[i].Compare(pwsAttriName) == 0) {
+ return FXSYS_wtoi(m_Attributes[i + 1].c_str());
+ }
+ }
+ return iDefValue;
+}
+
+void CFDE_XMLElement::SetInteger(const wchar_t* pwsAttriName,
+ int32_t iAttriValue) {
+ CFX_WideString wsValue;
+ wsValue.Format(L"%d", iAttriValue);
+ SetString(pwsAttriName, wsValue);
+}
+
+float CFDE_XMLElement::GetFloat(const wchar_t* pwsAttriName,
+ float fDefValue) const {
+ int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes);
+ for (int32_t i = 0; i < iCount; i += 2) {
+ if (m_Attributes[i].Compare(pwsAttriName) == 0) {
+ return FXSYS_wcstof(m_Attributes[i + 1].c_str(), -1, nullptr);
+ }
+ }
+ return fDefValue;
+}
+
+void CFDE_XMLElement::SetFloat(const wchar_t* pwsAttriName, float fAttriValue) {
+ CFX_WideString wsValue;
+ wsValue.Format(L"%f", fAttriValue);
+ SetString(pwsAttriName, wsValue);
+}
+
+void CFDE_XMLElement::RemoveAttribute(const wchar_t* pwsAttriName) {
+ int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes);
+ for (int32_t i = 0; i < iCount; i += 2) {
+ if (m_Attributes[i].Compare(pwsAttriName) == 0) {
+ m_Attributes.erase(m_Attributes.begin() + i,
+ m_Attributes.begin() + i + 2);
+ return;
+ }
+ }
+}
+
+void CFDE_XMLElement::GetTextData(CFX_WideString& wsText) const {
+ CFX_WideTextBuf buffer;
+ CFDE_XMLNode* pChild = m_pChild;
+ while (pChild) {
+ switch (pChild->GetType()) {
+ case FDE_XMLNODE_Text:
+ buffer << ((CFDE_XMLText*)pChild)->m_wsText;
+ break;
+ case FDE_XMLNODE_CharData:
+ buffer << ((CFDE_XMLCharData*)pChild)->m_wsCharData;
+ break;
+ default:
+ break;
+ }
+ pChild = pChild->m_pNext;
+ }
+ wsText = buffer.AsStringC();
+}
+
+void CFDE_XMLElement::SetTextData(const CFX_WideString& wsText) {
+ if (wsText.GetLength() < 1) {
+ return;
+ }
+ InsertChildNode(new CFDE_XMLText(wsText));
+}
diff --git a/xfa/fde/xml/cfde_xmlelement.h b/xfa/fde/xml/cfde_xmlelement.h
new file mode 100644
index 0000000000..8f61035979
--- /dev/null
+++ b/xfa/fde/xml/cfde_xmlelement.h
@@ -0,0 +1,56 @@
+// Copyright 2017 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
+
+#ifndef XFA_FDE_XML_CFDE_XMLELEMENT_H_
+#define XFA_FDE_XML_CFDE_XMLELEMENT_H_
+
+#include <vector>
+
+#include "core/fxcrt/fx_string.h"
+#include "xfa/fde/xml/cfde_xmlnode.h"
+
+class CFDE_XMLElement : public CFDE_XMLNode {
+ public:
+ explicit CFDE_XMLElement(const CFX_WideString& wsTag);
+ ~CFDE_XMLElement() override;
+
+ // CFDE_XMLNode
+ FDE_XMLNODETYPE GetType() const override;
+ CFDE_XMLNode* Clone(bool bRecursive) override;
+
+ void GetTagName(CFX_WideString& wsTag) const;
+ void GetLocalTagName(CFX_WideString& wsTag) const;
+
+ void GetNamespacePrefix(CFX_WideString& wsPrefix) const;
+ void GetNamespaceURI(CFX_WideString& wsNamespace) const;
+
+ int32_t CountAttributes() const;
+ bool GetAttribute(int32_t index,
+ CFX_WideString& wsAttriName,
+ CFX_WideString& wsAttriValue) const;
+ bool HasAttribute(const wchar_t* pwsAttriName) const;
+ void RemoveAttribute(const wchar_t* pwsAttriName);
+
+ void GetString(const wchar_t* pwsAttriName,
+ CFX_WideString& wsAttriValue,
+ const wchar_t* pwsDefValue = nullptr) const;
+ void SetString(const CFX_WideString& wsAttriName,
+ const CFX_WideString& wsAttriValue);
+
+ int32_t GetInteger(const wchar_t* pwsAttriName, int32_t iDefValue = 0) const;
+ void SetInteger(const wchar_t* pwsAttriName, int32_t iAttriValue);
+
+ float GetFloat(const wchar_t* pwsAttriName, float fDefValue = 0) const;
+ void SetFloat(const wchar_t* pwsAttriName, float fAttriValue);
+
+ void GetTextData(CFX_WideString& wsText) const;
+ void SetTextData(const CFX_WideString& wsText);
+
+ CFX_WideString m_wsTag;
+ std::vector<CFX_WideString> m_Attributes;
+};
+
+#endif // XFA_FDE_XML_CFDE_XMLELEMENT_H_
diff --git a/xfa/fde/xml/cfde_xmlinstruction.cpp b/xfa/fde/xml/cfde_xmlinstruction.cpp
new file mode 100644
index 0000000000..64c980b439
--- /dev/null
+++ b/xfa/fde/xml/cfde_xmlinstruction.cpp
@@ -0,0 +1,160 @@
+// Copyright 2017 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
+
+#include "xfa/fde/xml/cfde_xmlinstruction.h"
+
+#include "core/fxcrt/fx_ext.h"
+#include "third_party/base/stl_util.h"
+
+CFDE_XMLInstruction::CFDE_XMLInstruction(const CFX_WideString& wsTarget)
+ : m_wsTarget(wsTarget) {
+ ASSERT(m_wsTarget.GetLength() > 0);
+}
+
+CFDE_XMLInstruction::~CFDE_XMLInstruction() {}
+
+FDE_XMLNODETYPE CFDE_XMLInstruction::GetType() const {
+ return FDE_XMLNODE_Instruction;
+}
+
+CFDE_XMLNode* CFDE_XMLInstruction::Clone(bool bRecursive) {
+ CFDE_XMLInstruction* pClone = new CFDE_XMLInstruction(m_wsTarget);
+ if (!pClone)
+ return nullptr;
+
+ pClone->m_Attributes = m_Attributes;
+ pClone->m_TargetData = m_TargetData;
+ if (bRecursive)
+ CloneChildren(pClone);
+
+ return pClone;
+}
+
+int32_t CFDE_XMLInstruction::CountAttributes() const {
+ return pdfium::CollectionSize<int32_t>(m_Attributes) / 2;
+}
+
+bool CFDE_XMLInstruction::GetAttribute(int32_t index,
+ CFX_WideString& wsAttriName,
+ CFX_WideString& wsAttriValue) const {
+ int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes);
+ ASSERT(index > -1 && index < iCount / 2);
+ for (int32_t i = 0; i < iCount; i += 2) {
+ if (index == 0) {
+ wsAttriName = m_Attributes[i];
+ wsAttriValue = m_Attributes[i + 1];
+ return true;
+ }
+ index--;
+ }
+ return false;
+}
+
+bool CFDE_XMLInstruction::HasAttribute(const wchar_t* pwsAttriName) const {
+ int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes);
+ for (int32_t i = 0; i < iCount; i += 2) {
+ if (m_Attributes[i].Compare(pwsAttriName) == 0) {
+ return true;
+ }
+ }
+ return false;
+}
+
+void CFDE_XMLInstruction::GetString(const wchar_t* pwsAttriName,
+ CFX_WideString& wsAttriValue,
+ const wchar_t* pwsDefValue) const {
+ int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes);
+ for (int32_t i = 0; i < iCount; i += 2) {
+ if (m_Attributes[i].Compare(pwsAttriName) == 0) {
+ wsAttriValue = m_Attributes[i + 1];
+ return;
+ }
+ }
+ wsAttriValue = pwsDefValue;
+}
+
+void CFDE_XMLInstruction::SetString(const CFX_WideString& wsAttriName,
+ const CFX_WideString& wsAttriValue) {
+ ASSERT(wsAttriName.GetLength() > 0);
+ int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes);
+ for (int32_t i = 0; i < iCount; i += 2) {
+ if (m_Attributes[i].Compare(wsAttriName) == 0) {
+ m_Attributes[i] = wsAttriName;
+ m_Attributes[i + 1] = wsAttriValue;
+ return;
+ }
+ }
+ m_Attributes.push_back(wsAttriName);
+ m_Attributes.push_back(wsAttriValue);
+}
+
+int32_t CFDE_XMLInstruction::GetInteger(const wchar_t* pwsAttriName,
+ int32_t iDefValue) const {
+ int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes);
+ for (int32_t i = 0; i < iCount; i += 2) {
+ if (m_Attributes[i].Compare(pwsAttriName) == 0) {
+ return FXSYS_wtoi(m_Attributes[i + 1].c_str());
+ }
+ }
+ return iDefValue;
+}
+
+void CFDE_XMLInstruction::SetInteger(const wchar_t* pwsAttriName,
+ int32_t iAttriValue) {
+ CFX_WideString wsValue;
+ wsValue.Format(L"%d", iAttriValue);
+ SetString(pwsAttriName, wsValue);
+}
+
+float CFDE_XMLInstruction::GetFloat(const wchar_t* pwsAttriName,
+ float fDefValue) const {
+ int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes);
+ for (int32_t i = 0; i < iCount; i += 2) {
+ if (m_Attributes[i].Compare(pwsAttriName) == 0) {
+ return FXSYS_wcstof(m_Attributes[i + 1].c_str(), -1, nullptr);
+ }
+ }
+ return fDefValue;
+}
+
+void CFDE_XMLInstruction::SetFloat(const wchar_t* pwsAttriName,
+ float fAttriValue) {
+ CFX_WideString wsValue;
+ wsValue.Format(L"%f", fAttriValue);
+ SetString(pwsAttriName, wsValue);
+}
+
+void CFDE_XMLInstruction::RemoveAttribute(const wchar_t* pwsAttriName) {
+ int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes);
+ for (int32_t i = 0; i < iCount; i += 2) {
+ if (m_Attributes[i].Compare(pwsAttriName) == 0) {
+ m_Attributes.erase(m_Attributes.begin() + i,
+ m_Attributes.begin() + i + 2);
+ return;
+ }
+ }
+}
+
+int32_t CFDE_XMLInstruction::CountData() const {
+ return pdfium::CollectionSize<int32_t>(m_TargetData);
+}
+
+bool CFDE_XMLInstruction::GetData(int32_t index, CFX_WideString& wsData) const {
+ if (!pdfium::IndexInBounds(m_TargetData, index))
+ return false;
+
+ wsData = m_TargetData[index];
+ return true;
+}
+
+void CFDE_XMLInstruction::AppendData(const CFX_WideString& wsData) {
+ m_TargetData.push_back(wsData);
+}
+
+void CFDE_XMLInstruction::RemoveData(int32_t index) {
+ if (pdfium::IndexInBounds(m_TargetData, index))
+ m_TargetData.erase(m_TargetData.begin() + index);
+}
diff --git a/xfa/fde/xml/cfde_xmlinstruction.h b/xfa/fde/xml/cfde_xmlinstruction.h
new file mode 100644
index 0000000000..58dc5f17ac
--- /dev/null
+++ b/xfa/fde/xml/cfde_xmlinstruction.h
@@ -0,0 +1,50 @@
+// Copyright 2017 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
+
+#ifndef XFA_FDE_XML_CFDE_XMLINSTRUCTION_H_
+#define XFA_FDE_XML_CFDE_XMLINSTRUCTION_H_
+
+#include <vector>
+
+#include "core/fxcrt/fx_string.h"
+#include "xfa/fde/xml/cfde_xmlnode.h"
+
+class CFDE_XMLInstruction : public CFDE_XMLNode {
+ public:
+ explicit CFDE_XMLInstruction(const CFX_WideString& wsTarget);
+ ~CFDE_XMLInstruction() override;
+
+ // CFDE_XMLNode
+ FDE_XMLNODETYPE GetType() const override;
+ CFDE_XMLNode* Clone(bool bRecursive) override;
+
+ void GetTargetName(CFX_WideString& wsTarget) const { wsTarget = m_wsTarget; }
+ int32_t CountAttributes() const;
+ bool GetAttribute(int32_t index,
+ CFX_WideString& wsAttriName,
+ CFX_WideString& wsAttriValue) const;
+ bool HasAttribute(const wchar_t* pwsAttriName) const;
+ void GetString(const wchar_t* pwsAttriName,
+ CFX_WideString& wsAttriValue,
+ const wchar_t* pwsDefValue = nullptr) const;
+ void SetString(const CFX_WideString& wsAttriName,
+ const CFX_WideString& wsAttriValue);
+ int32_t GetInteger(const wchar_t* pwsAttriName, int32_t iDefValue = 0) const;
+ void SetInteger(const wchar_t* pwsAttriName, int32_t iAttriValue);
+ float GetFloat(const wchar_t* pwsAttriName, float fDefValue = 0) const;
+ void SetFloat(const wchar_t* pwsAttriName, float fAttriValue);
+ void RemoveAttribute(const wchar_t* pwsAttriName);
+ int32_t CountData() const;
+ bool GetData(int32_t index, CFX_WideString& wsData) const;
+ void AppendData(const CFX_WideString& wsData);
+ void RemoveData(int32_t index);
+
+ CFX_WideString m_wsTarget;
+ std::vector<CFX_WideString> m_Attributes;
+ std::vector<CFX_WideString> m_TargetData;
+};
+
+#endif // XFA_FDE_XML_CFDE_XMLINSTRUCTION_H_
diff --git a/xfa/fde/xml/cfde_xmlnode.cpp b/xfa/fde/xml/cfde_xmlnode.cpp
new file mode 100644
index 0000000000..bd86c0b071
--- /dev/null
+++ b/xfa/fde/xml/cfde_xmlnode.cpp
@@ -0,0 +1,458 @@
+// Copyright 2017 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
+
+#include "xfa/fde/xml/cfde_xmlnode.h"
+
+#include <vector>
+
+#include "third_party/base/stl_util.h"
+#include "xfa/fde/xml/cfde_xmlchardata.h"
+#include "xfa/fde/xml/cfde_xmlelement.h"
+#include "xfa/fde/xml/cfde_xmlinstruction.h"
+#include "xfa/fde/xml/cfde_xmltext.h"
+#include "xfa/fgas/crt/fgas_codepage.h"
+
+CFDE_XMLNode::CFDE_XMLNode()
+ : m_pParent(nullptr),
+ m_pChild(nullptr),
+ m_pPrior(nullptr),
+ m_pNext(nullptr) {}
+
+FDE_XMLNODETYPE CFDE_XMLNode::GetType() const {
+ return FDE_XMLNODE_Unknown;
+}
+
+CFDE_XMLNode::~CFDE_XMLNode() {
+ DeleteChildren();
+}
+
+void CFDE_XMLNode::DeleteChildren() {
+ CFDE_XMLNode* pChild = m_pChild;
+ while (pChild) {
+ CFDE_XMLNode* pNext = pChild->m_pNext;
+ delete pChild;
+ pChild = pNext;
+ }
+ m_pChild = nullptr;
+}
+
+int32_t CFDE_XMLNode::CountChildNodes() const {
+ int32_t iCount = 0;
+ CFDE_XMLNode* pChild = m_pChild;
+ while (pChild) {
+ iCount++;
+ pChild = pChild->m_pNext;
+ }
+ return iCount;
+}
+
+CFDE_XMLNode* CFDE_XMLNode::GetChildNode(int32_t index) const {
+ CFDE_XMLNode* pChild = m_pChild;
+ while (pChild) {
+ if (index == 0) {
+ return pChild;
+ }
+ index--;
+ pChild = pChild->m_pNext;
+ }
+ return nullptr;
+}
+
+int32_t CFDE_XMLNode::GetChildNodeIndex(CFDE_XMLNode* pNode) const {
+ int32_t index = 0;
+ CFDE_XMLNode* pChild = m_pChild;
+ while (pChild) {
+ if (pChild == pNode) {
+ return index;
+ }
+ index++;
+ pChild = pChild->m_pNext;
+ }
+ return -1;
+}
+
+CFDE_XMLNode* CFDE_XMLNode::GetPath(const wchar_t* pPath,
+ int32_t iLength,
+ bool bQualifiedName) const {
+ ASSERT(pPath);
+ if (iLength < 0) {
+ iLength = FXSYS_wcslen(pPath);
+ }
+ if (iLength == 0) {
+ return nullptr;
+ }
+ CFX_WideString csPath;
+ const wchar_t* pStart = pPath;
+ const wchar_t* pEnd = pPath + iLength;
+ wchar_t ch;
+ while (pStart < pEnd) {
+ ch = *pStart++;
+ if (ch == L'/') {
+ break;
+ } else {
+ csPath += ch;
+ }
+ }
+ iLength -= pStart - pPath;
+ CFDE_XMLNode* pFind = nullptr;
+ if (csPath.GetLength() < 1) {
+ pFind = GetNodeItem(CFDE_XMLNode::Root);
+ } else if (csPath.Compare(L"..") == 0) {
+ pFind = m_pParent;
+ } else if (csPath.Compare(L".") == 0) {
+ pFind = (CFDE_XMLNode*)this;
+ } else {
+ CFX_WideString wsTag;
+ CFDE_XMLNode* pNode = m_pChild;
+ while (pNode) {
+ if (pNode->GetType() == FDE_XMLNODE_Element) {
+ if (bQualifiedName) {
+ ((CFDE_XMLElement*)pNode)->GetTagName(wsTag);
+ } else {
+ ((CFDE_XMLElement*)pNode)->GetLocalTagName(wsTag);
+ }
+ if (wsTag.Compare(csPath) == 0) {
+ if (iLength < 1) {
+ pFind = pNode;
+ } else {
+ pFind = pNode->GetPath(pStart, iLength, bQualifiedName);
+ }
+ if (pFind)
+ return pFind;
+ }
+ }
+ pNode = pNode->m_pNext;
+ }
+ }
+ if (!pFind || iLength < 1)
+ return pFind;
+ return pFind->GetPath(pStart, iLength, bQualifiedName);
+}
+
+int32_t CFDE_XMLNode::InsertChildNode(CFDE_XMLNode* pNode, int32_t index) {
+ pNode->m_pParent = this;
+ if (!m_pChild) {
+ m_pChild = pNode;
+ pNode->m_pPrior = nullptr;
+ pNode->m_pNext = nullptr;
+ return 0;
+ }
+ if (index == 0) {
+ pNode->m_pNext = m_pChild;
+ pNode->m_pPrior = nullptr;
+ m_pChild->m_pPrior = pNode;
+ m_pChild = pNode;
+ return 0;
+ }
+ int32_t iCount = 0;
+ CFDE_XMLNode* pFind = m_pChild;
+ while (++iCount != index && pFind->m_pNext) {
+ pFind = pFind->m_pNext;
+ }
+ pNode->m_pPrior = pFind;
+ pNode->m_pNext = pFind->m_pNext;
+ if (pFind->m_pNext)
+ pFind->m_pNext->m_pPrior = pNode;
+ pFind->m_pNext = pNode;
+ return iCount;
+}
+
+void CFDE_XMLNode::RemoveChildNode(CFDE_XMLNode* pNode) {
+ ASSERT(m_pChild && pNode);
+ if (m_pChild == pNode) {
+ m_pChild = pNode->m_pNext;
+ } else {
+ pNode->m_pPrior->m_pNext = pNode->m_pNext;
+ }
+ if (pNode->m_pNext)
+ pNode->m_pNext->m_pPrior = pNode->m_pPrior;
+ pNode->m_pParent = nullptr;
+ pNode->m_pNext = nullptr;
+ pNode->m_pPrior = nullptr;
+}
+
+CFDE_XMLNode* CFDE_XMLNode::GetNodeItem(CFDE_XMLNode::NodeItem eItem) const {
+ switch (eItem) {
+ case CFDE_XMLNode::Root: {
+ CFDE_XMLNode* pParent = (CFDE_XMLNode*)this;
+ while (pParent->m_pParent) {
+ pParent = pParent->m_pParent;
+ }
+ return pParent;
+ }
+ case CFDE_XMLNode::Parent:
+ return m_pParent;
+ case CFDE_XMLNode::FirstSibling: {
+ CFDE_XMLNode* pItem = (CFDE_XMLNode*)this;
+ while (pItem->m_pPrior) {
+ pItem = pItem->m_pPrior;
+ }
+ return pItem == (CFDE_XMLNode*)this ? nullptr : pItem;
+ }
+ case CFDE_XMLNode::PriorSibling:
+ return m_pPrior;
+ case CFDE_XMLNode::NextSibling:
+ return m_pNext;
+ case CFDE_XMLNode::LastSibling: {
+ CFDE_XMLNode* pItem = (CFDE_XMLNode*)this;
+ while (pItem->m_pNext)
+ pItem = pItem->m_pNext;
+ return pItem == (CFDE_XMLNode*)this ? nullptr : pItem;
+ }
+ case CFDE_XMLNode::FirstNeighbor: {
+ CFDE_XMLNode* pParent = (CFDE_XMLNode*)this;
+ while (pParent->m_pParent)
+ pParent = pParent->m_pParent;
+ return pParent == (CFDE_XMLNode*)this ? nullptr : pParent;
+ }
+ case CFDE_XMLNode::PriorNeighbor: {
+ if (!m_pPrior)
+ return m_pParent;
+
+ CFDE_XMLNode* pItem = m_pPrior;
+ while (pItem->m_pChild) {
+ pItem = pItem->m_pChild;
+ while (pItem->m_pNext)
+ pItem = pItem->m_pNext;
+ }
+ return pItem;
+ }
+ case CFDE_XMLNode::NextNeighbor: {
+ if (m_pChild)
+ return m_pChild;
+ if (m_pNext)
+ return m_pNext;
+ CFDE_XMLNode* pItem = m_pParent;
+ while (pItem) {
+ if (pItem->m_pNext)
+ return pItem->m_pNext;
+ pItem = pItem->m_pParent;
+ }
+ return nullptr;
+ }
+ case CFDE_XMLNode::LastNeighbor: {
+ CFDE_XMLNode* pItem = (CFDE_XMLNode*)this;
+ while (pItem->m_pParent) {
+ pItem = pItem->m_pParent;
+ }
+ while (true) {
+ while (pItem->m_pNext)
+ pItem = pItem->m_pNext;
+ if (!pItem->m_pChild)
+ break;
+ pItem = pItem->m_pChild;
+ }
+ return pItem == (CFDE_XMLNode*)this ? nullptr : pItem;
+ }
+ case CFDE_XMLNode::FirstChild:
+ return m_pChild;
+ case CFDE_XMLNode::LastChild: {
+ if (!m_pChild)
+ return nullptr;
+
+ CFDE_XMLNode* pChild = m_pChild;
+ while (pChild->m_pNext)
+ pChild = pChild->m_pNext;
+ return pChild;
+ }
+ default:
+ break;
+ }
+ return nullptr;
+}
+
+int32_t CFDE_XMLNode::GetNodeLevel() const {
+ int32_t iLevel = 0;
+ const CFDE_XMLNode* pItem = m_pParent;
+ while (pItem) {
+ iLevel++;
+ pItem = pItem->m_pParent;
+ }
+ return iLevel;
+}
+
+bool CFDE_XMLNode::InsertNodeItem(CFDE_XMLNode::NodeItem eItem,
+ CFDE_XMLNode* pNode) {
+ switch (eItem) {
+ case CFDE_XMLNode::NextSibling: {
+ pNode->m_pParent = m_pParent;
+ pNode->m_pNext = m_pNext;
+ pNode->m_pPrior = this;
+ if (m_pNext) {
+ m_pNext->m_pPrior = pNode;
+ }
+ m_pNext = pNode;
+ return true;
+ }
+ case CFDE_XMLNode::PriorSibling: {
+ pNode->m_pParent = m_pParent;
+ pNode->m_pNext = this;
+ pNode->m_pPrior = m_pPrior;
+ if (m_pPrior) {
+ m_pPrior->m_pNext = pNode;
+ } else if (m_pParent) {
+ m_pParent->m_pChild = pNode;
+ }
+ m_pPrior = pNode;
+ return true;
+ }
+ default:
+ return false;
+ }
+}
+
+CFDE_XMLNode* CFDE_XMLNode::RemoveNodeItem(CFDE_XMLNode::NodeItem eItem) {
+ CFDE_XMLNode* pNode = nullptr;
+ switch (eItem) {
+ case CFDE_XMLNode::NextSibling:
+ if (m_pNext) {
+ pNode = m_pNext;
+ m_pNext = pNode->m_pNext;
+ if (m_pNext) {
+ m_pNext->m_pPrior = this;
+ }
+ pNode->m_pParent = nullptr;
+ pNode->m_pNext = nullptr;
+ pNode->m_pPrior = nullptr;
+ }
+ break;
+ default:
+ break;
+ }
+ return pNode;
+}
+
+CFDE_XMLNode* CFDE_XMLNode::Clone(bool bRecursive) {
+ return nullptr;
+}
+
+void CFDE_XMLNode::SaveXMLNode(const CFX_RetainPtr<IFGAS_Stream>& pXMLStream) {
+ CFDE_XMLNode* pNode = (CFDE_XMLNode*)this;
+ switch (pNode->GetType()) {
+ case FDE_XMLNODE_Instruction: {
+ CFX_WideString ws;
+ CFDE_XMLInstruction* pInstruction = (CFDE_XMLInstruction*)pNode;
+ if (pInstruction->m_wsTarget.CompareNoCase(L"xml") == 0) {
+ ws = L"<?xml version=\"1.0\" encoding=\"";
+ uint16_t wCodePage = pXMLStream->GetCodePage();
+ if (wCodePage == FX_CODEPAGE_UTF16LE) {
+ ws += L"UTF-16";
+ } else if (wCodePage == FX_CODEPAGE_UTF16BE) {
+ ws += L"UTF-16be";
+ } else {
+ ws += L"UTF-8";
+ }
+ ws += L"\"?>";
+ pXMLStream->WriteString(ws.c_str(), ws.GetLength());
+ } else {
+ ws.Format(L"<?%s", pInstruction->m_wsTarget.c_str());
+ pXMLStream->WriteString(ws.c_str(), ws.GetLength());
+ std::vector<CFX_WideString>& attributes = pInstruction->m_Attributes;
+ int32_t i;
+ int32_t iCount = pdfium::CollectionSize<int32_t>(attributes);
+ CFX_WideString wsValue;
+ for (i = 0; i < iCount; i += 2) {
+ ws = L" ";
+ ws += attributes[i];
+ ws += L"=\"";
+ wsValue = attributes[i + 1];
+ wsValue.Replace(L"&", L"&amp;");
+ wsValue.Replace(L"<", L"&lt;");
+ wsValue.Replace(L">", L"&gt;");
+ wsValue.Replace(L"\'", L"&apos;");
+ wsValue.Replace(L"\"", L"&quot;");
+ ws += wsValue;
+ ws += L"\"";
+ pXMLStream->WriteString(ws.c_str(), ws.GetLength());
+ }
+ std::vector<CFX_WideString>& targetdata = pInstruction->m_TargetData;
+ iCount = pdfium::CollectionSize<int32_t>(targetdata);
+ for (i = 0; i < iCount; i++) {
+ ws = L" \"";
+ ws += targetdata[i];
+ ws += L"\"";
+ pXMLStream->WriteString(ws.c_str(), ws.GetLength());
+ }
+ ws = L"?>";
+ pXMLStream->WriteString(ws.c_str(), ws.GetLength());
+ }
+ } break;
+ case FDE_XMLNODE_Element: {
+ CFX_WideString ws;
+ ws = L"<";
+ ws += ((CFDE_XMLElement*)pNode)->m_wsTag;
+ pXMLStream->WriteString(ws.c_str(), ws.GetLength());
+ std::vector<CFX_WideString>& attributes =
+ static_cast<CFDE_XMLElement*>(pNode)->m_Attributes;
+ int32_t iCount = pdfium::CollectionSize<int32_t>(attributes);
+ CFX_WideString wsValue;
+ for (int32_t i = 0; i < iCount; i += 2) {
+ ws = L" ";
+ ws += attributes[i];
+ ws += L"=\"";
+ wsValue = attributes[i + 1];
+ wsValue.Replace(L"&", L"&amp;");
+ wsValue.Replace(L"<", L"&lt;");
+ wsValue.Replace(L">", L"&gt;");
+ wsValue.Replace(L"\'", L"&apos;");
+ wsValue.Replace(L"\"", L"&quot;");
+ ws += wsValue;
+ ws += L"\"";
+ pXMLStream->WriteString(ws.c_str(), ws.GetLength());
+ }
+ if (pNode->m_pChild) {
+ ws = L"\n>";
+ pXMLStream->WriteString(ws.c_str(), ws.GetLength());
+ CFDE_XMLNode* pChild = pNode->m_pChild;
+ while (pChild) {
+ pChild->SaveXMLNode(pXMLStream);
+ pChild = pChild->m_pNext;
+ }
+ ws = L"</";
+ ws += ((CFDE_XMLElement*)pNode)->m_wsTag;
+ ws += L"\n>";
+ } else {
+ ws = L"\n/>";
+ }
+ pXMLStream->WriteString(ws.c_str(), ws.GetLength());
+ } break;
+ case FDE_XMLNODE_Text: {
+ CFX_WideString ws = ((CFDE_XMLText*)pNode)->m_wsText;
+ ws.Replace(L"&", L"&amp;");
+ ws.Replace(L"<", L"&lt;");
+ ws.Replace(L">", L"&gt;");
+ ws.Replace(L"\'", L"&apos;");
+ ws.Replace(L"\"", L"&quot;");
+ pXMLStream->WriteString(ws.c_str(), ws.GetLength());
+ } break;
+ case FDE_XMLNODE_CharData: {
+ CFX_WideString ws = L"<![CDATA[";
+ ws += ((CFDE_XMLCharData*)pNode)->m_wsCharData;
+ ws += L"]]>";
+ pXMLStream->WriteString(ws.c_str(), ws.GetLength());
+ } break;
+ case FDE_XMLNODE_Unknown:
+ break;
+ default:
+ break;
+ }
+}
+
+void CFDE_XMLNode::CloneChildren(CFDE_XMLNode* pClone) {
+ if (!m_pChild) {
+ return;
+ }
+ CFDE_XMLNode* pNext = m_pChild;
+ CFDE_XMLNode* pCloneNext = pNext->Clone(true);
+ pClone->InsertChildNode(pCloneNext);
+ pNext = pNext->m_pNext;
+ while (pNext) {
+ CFDE_XMLNode* pChild = pNext->Clone(true);
+ pCloneNext->InsertNodeItem(CFDE_XMLNode::NextSibling, pChild);
+ pCloneNext = pChild;
+ pNext = pNext->m_pNext;
+ }
+}
diff --git a/xfa/fde/xml/cfde_xmlnode.h b/xfa/fde/xml/cfde_xmlnode.h
new file mode 100644
index 0000000000..178150fda5
--- /dev/null
+++ b/xfa/fde/xml/cfde_xmlnode.h
@@ -0,0 +1,74 @@
+// Copyright 2017 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
+
+#ifndef XFA_FDE_XML_CFDE_XMLNODE_H_
+#define XFA_FDE_XML_CFDE_XMLNODE_H_
+
+#include "core/fxcrt/cfx_retain_ptr.h"
+#include "xfa/fgas/crt/ifgas_stream.h"
+
+enum FDE_XMLNODETYPE {
+ FDE_XMLNODE_Unknown = 0,
+ FDE_XMLNODE_Instruction,
+ FDE_XMLNODE_Element,
+ FDE_XMLNODE_Text,
+ FDE_XMLNODE_CharData,
+};
+
+struct FDE_XMLNODE {
+ int32_t iNodeNum;
+ FDE_XMLNODETYPE eNodeType;
+};
+
+class CFDE_XMLNode {
+ public:
+ enum NodeItem {
+ Root = 0,
+ Parent,
+ FirstSibling,
+ PriorSibling,
+ NextSibling,
+ LastSibling,
+ FirstNeighbor,
+ PriorNeighbor,
+ NextNeighbor,
+ LastNeighbor,
+ FirstChild,
+ LastChild
+ };
+
+ CFDE_XMLNode();
+ virtual ~CFDE_XMLNode();
+
+ virtual FDE_XMLNODETYPE GetType() const;
+ virtual CFDE_XMLNode* Clone(bool bRecursive);
+
+ int32_t CountChildNodes() const;
+ CFDE_XMLNode* GetChildNode(int32_t index) const;
+ int32_t GetChildNodeIndex(CFDE_XMLNode* pNode) const;
+ int32_t InsertChildNode(CFDE_XMLNode* pNode, int32_t index = -1);
+ void RemoveChildNode(CFDE_XMLNode* pNode);
+ void DeleteChildren();
+ void CloneChildren(CFDE_XMLNode* pClone);
+
+ CFDE_XMLNode* GetPath(const wchar_t* pPath,
+ int32_t iLength = -1,
+ bool bQualifiedName = true) const;
+
+ int32_t GetNodeLevel() const;
+ CFDE_XMLNode* GetNodeItem(CFDE_XMLNode::NodeItem eItem) const;
+ bool InsertNodeItem(CFDE_XMLNode::NodeItem eItem, CFDE_XMLNode* pNode);
+ CFDE_XMLNode* RemoveNodeItem(CFDE_XMLNode::NodeItem eItem);
+
+ void SaveXMLNode(const CFX_RetainPtr<IFGAS_Stream>& pXMLStream);
+
+ CFDE_XMLNode* m_pParent;
+ CFDE_XMLNode* m_pChild;
+ CFDE_XMLNode* m_pPrior;
+ CFDE_XMLNode* m_pNext;
+};
+
+#endif // XFA_FDE_XML_CFDE_XMLNODE_H_
diff --git a/xfa/fde/xml/cfde_xml_parser.cpp b/xfa/fde/xml/cfde_xmlparser.cpp
index 840c34a5c4..db85021693 100644
--- a/xfa/fde/xml/cfde_xml_parser.cpp
+++ b/xfa/fde/xml/cfde_xmlparser.cpp
@@ -4,9 +4,15 @@
// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
-#include "xfa/fde/xml/cfde_xml_parser.h"
+#include "xfa/fde/xml/cfde_xmlparser.h"
+#include "core/fxcrt/fx_basic.h"
#include "third_party/base/ptr_util.h"
+#include "xfa/fde/xml/cfde_xmlchardata.h"
+#include "xfa/fde/xml/cfde_xmlelement.h"
+#include "xfa/fde/xml/cfde_xmlinstruction.h"
+#include "xfa/fde/xml/cfde_xmlnode.h"
+#include "xfa/fde/xml/cfde_xmltext.h"
CFDE_XMLParser::CFDE_XMLParser(CFDE_XMLNode* pParent,
const CFX_RetainPtr<IFGAS_Stream>& pStream)
diff --git a/xfa/fde/xml/cfde_xml_parser.h b/xfa/fde/xml/cfde_xmlparser.h
index cd8ccbe389..42f590ce21 100644
--- a/xfa/fde/xml/cfde_xml_parser.h
+++ b/xfa/fde/xml/cfde_xmlparser.h
@@ -4,14 +4,18 @@
// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
-#ifndef XFA_FDE_XML_CFDE_XML_PARSER_H_
-#define XFA_FDE_XML_CFDE_XML_PARSER_H_
+#ifndef XFA_FDE_XML_CFDE_XMLPARSER_H_
+#define XFA_FDE_XML_CFDE_XMLPARSER_H_
#include <memory>
#include <stack>
-#include "xfa/fde/xml/fde_xml_imp.h"
+#include "core/fxcrt/cfx_retain_ptr.h"
+#include "core/fxcrt/fx_string.h"
+#include "xfa/fde/xml/cfde_xmlsyntaxparser.h"
+class CFDE_XMLElement;
+class CFDE_XMLNode;
class IFGAS_Stream;
class IFX_Pause;
@@ -40,4 +44,4 @@ class CFDE_XMLParser {
FDE_XmlSyntaxResult m_syntaxParserResult;
};
-#endif // XFA_FDE_XML_CFDE_XML_PARSER_H_
+#endif // XFA_FDE_XML_CFDE_XMLPARSER_H_
diff --git a/xfa/fde/xml/cfde_xmlsyntaxparser.cpp b/xfa/fde/xml/cfde_xmlsyntaxparser.cpp
new file mode 100644
index 0000000000..45a1eddb83
--- /dev/null
+++ b/xfa/fde/xml/cfde_xmlsyntaxparser.cpp
@@ -0,0 +1,703 @@
+// Copyright 2017 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
+
+#include "xfa/fde/xml/cfde_xmlsyntaxparser.h"
+
+#include <algorithm>
+
+#include "core/fxcrt/fx_ext.h"
+#include "core/fxcrt/fx_safe_types.h"
+
+namespace {
+
+const uint32_t kMaxCharRange = 0x10ffff;
+
+bool IsXMLWhiteSpace(wchar_t ch) {
+ return ch == L' ' || ch == 0x0A || ch == 0x0D || ch == 0x09;
+}
+
+struct FDE_XMLNAMECHAR {
+ uint16_t wStart;
+ uint16_t wEnd;
+ bool bStartChar;
+};
+
+const FDE_XMLNAMECHAR g_XMLNameChars[] = {
+ {L'-', L'.', false}, {L'0', L'9', false}, {L':', L':', false},
+ {L'A', L'Z', true}, {L'_', L'_', true}, {L'a', L'z', true},
+ {0xB7, 0xB7, false}, {0xC0, 0xD6, true}, {0xD8, 0xF6, true},
+ {0xF8, 0x02FF, true}, {0x0300, 0x036F, false}, {0x0370, 0x037D, true},
+ {0x037F, 0x1FFF, true}, {0x200C, 0x200D, true}, {0x203F, 0x2040, false},
+ {0x2070, 0x218F, true}, {0x2C00, 0x2FEF, true}, {0x3001, 0xD7FF, true},
+ {0xF900, 0xFDCF, true}, {0xFDF0, 0xFFFD, true},
+};
+
+bool IsXMLNameChar(wchar_t ch, bool bFirstChar) {
+ int32_t iStart = 0;
+ int32_t iEnd = FX_ArraySize(g_XMLNameChars) - 1;
+ while (iStart <= iEnd) {
+ int32_t iMid = (iStart + iEnd) / 2;
+ if (ch < g_XMLNameChars[iMid].wStart) {
+ iEnd = iMid - 1;
+ } else if (ch > g_XMLNameChars[iMid].wEnd) {
+ iStart = iMid + 1;
+ } else {
+ return bFirstChar ? g_XMLNameChars[iMid].bStartChar : true;
+ }
+ }
+ return false;
+}
+
+int32_t GetUTF8EncodeLength(const wchar_t* pSrc, int32_t iSrcLen) {
+ uint32_t unicode = 0;
+ int32_t iDstNum = 0;
+ while (iSrcLen-- > 0) {
+ unicode = *pSrc++;
+ int nbytes = 0;
+ if ((uint32_t)unicode < 0x80) {
+ nbytes = 1;
+ } else if ((uint32_t)unicode < 0x800) {
+ nbytes = 2;
+ } else if ((uint32_t)unicode < 0x10000) {
+ nbytes = 3;
+ } else if ((uint32_t)unicode < 0x200000) {
+ nbytes = 4;
+ } else if ((uint32_t)unicode < 0x4000000) {
+ nbytes = 5;
+ } else {
+ nbytes = 6;
+ }
+ iDstNum += nbytes;
+ }
+ return iDstNum;
+}
+
+} // namespace
+
+CFDE_XMLSyntaxParser::CFDE_XMLSyntaxParser()
+ : m_pStream(nullptr),
+ m_iXMLPlaneSize(-1),
+ m_iCurrentPos(0),
+ m_iCurrentNodeNum(-1),
+ m_iLastNodeNum(-1),
+ m_iParsedChars(0),
+ m_iParsedBytes(0),
+ m_pBuffer(nullptr),
+ m_iBufferChars(0),
+ m_bEOS(false),
+ m_pStart(nullptr),
+ m_pEnd(nullptr),
+ m_iAllocStep(m_BlockBuffer.GetAllocStep()),
+ m_iDataLength(m_BlockBuffer.GetDataLengthRef()),
+ m_pCurrentBlock(nullptr),
+ m_iIndexInBlock(0),
+ m_iTextDataLength(0),
+ m_syntaxParserResult(FDE_XmlSyntaxResult::None),
+ m_syntaxParserState(FDE_XmlSyntaxState::Text),
+ m_wQuotationMark(0),
+ m_iEntityStart(-1) {
+ m_CurNode.iNodeNum = -1;
+ m_CurNode.eNodeType = FDE_XMLNODE_Unknown;
+}
+
+void CFDE_XMLSyntaxParser::Init(const CFX_RetainPtr<IFGAS_Stream>& pStream,
+ int32_t iXMLPlaneSize,
+ int32_t iTextDataSize) {
+ ASSERT(!m_pStream && !m_pBuffer);
+ ASSERT(pStream && iXMLPlaneSize > 0);
+ int32_t iStreamLength = pStream->GetLength();
+ ASSERT(iStreamLength > 0);
+ m_pStream = pStream;
+ m_iXMLPlaneSize = std::min(iXMLPlaneSize, iStreamLength);
+ uint8_t bom[4];
+ m_iCurrentPos = m_pStream->GetBOM(bom);
+ ASSERT(!m_pBuffer);
+
+ FX_SAFE_INT32 alloc_size_safe = m_iXMLPlaneSize;
+ alloc_size_safe += 1; // For NUL.
+ if (!alloc_size_safe.IsValid() || alloc_size_safe.ValueOrDie() <= 0) {
+ m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
+ return;
+ }
+
+ m_pBuffer = FX_Alloc(
+ wchar_t, pdfium::base::ValueOrDieForType<size_t>(alloc_size_safe));
+ m_pStart = m_pEnd = m_pBuffer;
+ ASSERT(!m_BlockBuffer.IsInitialized());
+ m_BlockBuffer.InitBuffer();
+ m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
+ m_iParsedBytes = m_iParsedChars = 0;
+ m_iBufferChars = 0;
+}
+
+FDE_XmlSyntaxResult CFDE_XMLSyntaxParser::DoSyntaxParse() {
+ if (m_syntaxParserResult == FDE_XmlSyntaxResult::Error ||
+ m_syntaxParserResult == FDE_XmlSyntaxResult::EndOfString) {
+ return m_syntaxParserResult;
+ }
+ ASSERT(m_pStream && m_pBuffer && m_BlockBuffer.IsInitialized());
+ int32_t iStreamLength = m_pStream->GetLength();
+ int32_t iPos;
+
+ FDE_XmlSyntaxResult syntaxParserResult = FDE_XmlSyntaxResult::None;
+ while (true) {
+ if (m_pStart >= m_pEnd) {
+ if (m_bEOS || m_iCurrentPos >= iStreamLength) {
+ m_syntaxParserResult = FDE_XmlSyntaxResult::EndOfString;
+ return m_syntaxParserResult;
+ }
+ m_iParsedChars += (m_pEnd - m_pBuffer);
+ m_iParsedBytes = m_iCurrentPos;
+ if (m_pStream->GetPosition() != m_iCurrentPos) {
+ m_pStream->Seek(FX_STREAMSEEK_Begin, m_iCurrentPos);
+ }
+ m_iBufferChars =
+ m_pStream->ReadString(m_pBuffer, m_iXMLPlaneSize, m_bEOS);
+ iPos = m_pStream->GetPosition();
+ if (m_iBufferChars < 1) {
+ m_iCurrentPos = iStreamLength;
+ m_syntaxParserResult = FDE_XmlSyntaxResult::EndOfString;
+ return m_syntaxParserResult;
+ }
+ m_iCurrentPos = iPos;
+ m_pStart = m_pBuffer;
+ m_pEnd = m_pBuffer + m_iBufferChars;
+ }
+
+ while (m_pStart < m_pEnd) {
+ wchar_t ch = *m_pStart;
+ switch (m_syntaxParserState) {
+ case FDE_XmlSyntaxState::Text:
+ if (ch == L'<') {
+ if (m_iDataLength > 0) {
+ m_iTextDataLength = m_iDataLength;
+ m_BlockBuffer.Reset();
+ m_pCurrentBlock =
+ m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
+ m_iEntityStart = -1;
+ syntaxParserResult = FDE_XmlSyntaxResult::Text;
+ } else {
+ m_pStart++;
+ m_syntaxParserState = FDE_XmlSyntaxState::Node;
+ }
+ } else {
+ ParseTextChar(ch);
+ }
+ break;
+ case FDE_XmlSyntaxState::Node:
+ if (ch == L'!') {
+ m_pStart++;
+ m_syntaxParserState = FDE_XmlSyntaxState::SkipCommentOrDecl;
+ } else if (ch == L'/') {
+ m_pStart++;
+ m_syntaxParserState = FDE_XmlSyntaxState::CloseElement;
+ } else if (ch == L'?') {
+ m_iLastNodeNum++;
+ m_iCurrentNodeNum = m_iLastNodeNum;
+ m_CurNode.iNodeNum = m_iLastNodeNum;
+ m_CurNode.eNodeType = FDE_XMLNODE_Instruction;
+ m_XMLNodeStack.push(m_CurNode);
+ m_pStart++;
+ m_syntaxParserState = FDE_XmlSyntaxState::Target;
+ syntaxParserResult = FDE_XmlSyntaxResult::InstructionOpen;
+ } else {
+ m_iLastNodeNum++;
+ m_iCurrentNodeNum = m_iLastNodeNum;
+ m_CurNode.iNodeNum = m_iLastNodeNum;
+ m_CurNode.eNodeType = FDE_XMLNODE_Element;
+ m_XMLNodeStack.push(m_CurNode);
+ m_syntaxParserState = FDE_XmlSyntaxState::Tag;
+ syntaxParserResult = FDE_XmlSyntaxResult::ElementOpen;
+ }
+ break;
+ case FDE_XmlSyntaxState::Target:
+ case FDE_XmlSyntaxState::Tag:
+ if (!IsXMLNameChar(ch, m_iDataLength < 1)) {
+ if (m_iDataLength < 1) {
+ m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
+ return m_syntaxParserResult;
+ } else {
+ m_iTextDataLength = m_iDataLength;
+ m_BlockBuffer.Reset();
+ m_pCurrentBlock =
+ m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
+ if (m_syntaxParserState != FDE_XmlSyntaxState::Target) {
+ syntaxParserResult = FDE_XmlSyntaxResult::TagName;
+ } else {
+ syntaxParserResult = FDE_XmlSyntaxResult::TargetName;
+ }
+ m_syntaxParserState = FDE_XmlSyntaxState::AttriName;
+ }
+ } else {
+ if (m_iIndexInBlock == m_iAllocStep) {
+ m_pCurrentBlock =
+ m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
+ if (!m_pCurrentBlock) {
+ return FDE_XmlSyntaxResult::Error;
+ }
+ }
+ m_pCurrentBlock[m_iIndexInBlock++] = ch;
+ m_iDataLength++;
+ m_pStart++;
+ }
+ break;
+ case FDE_XmlSyntaxState::AttriName:
+ if (m_iDataLength < 1 && IsXMLWhiteSpace(ch)) {
+ m_pStart++;
+ break;
+ }
+ if (!IsXMLNameChar(ch, m_iDataLength < 1)) {
+ if (m_iDataLength < 1) {
+ if (m_CurNode.eNodeType == FDE_XMLNODE_Element) {
+ if (ch == L'>' || ch == L'/') {
+ m_syntaxParserState = FDE_XmlSyntaxState::BreakElement;
+ break;
+ }
+ } else if (m_CurNode.eNodeType == FDE_XMLNODE_Instruction) {
+ if (ch == L'?') {
+ m_syntaxParserState = FDE_XmlSyntaxState::CloseInstruction;
+ m_pStart++;
+ } else {
+ m_syntaxParserState = FDE_XmlSyntaxState::TargetData;
+ }
+ break;
+ }
+ m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
+ return m_syntaxParserResult;
+ } else {
+ if (m_CurNode.eNodeType == FDE_XMLNODE_Instruction) {
+ if (ch != '=' && !IsXMLWhiteSpace(ch)) {
+ m_syntaxParserState = FDE_XmlSyntaxState::TargetData;
+ break;
+ }
+ }
+ m_iTextDataLength = m_iDataLength;
+ m_BlockBuffer.Reset();
+ m_pCurrentBlock =
+ m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
+ m_syntaxParserState = FDE_XmlSyntaxState::AttriEqualSign;
+ syntaxParserResult = FDE_XmlSyntaxResult::AttriName;
+ }
+ } else {
+ if (m_iIndexInBlock == m_iAllocStep) {
+ m_pCurrentBlock =
+ m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
+ if (!m_pCurrentBlock) {
+ return FDE_XmlSyntaxResult::Error;
+ }
+ }
+ m_pCurrentBlock[m_iIndexInBlock++] = ch;
+ m_iDataLength++;
+ m_pStart++;
+ }
+ break;
+ case FDE_XmlSyntaxState::AttriEqualSign:
+ if (IsXMLWhiteSpace(ch)) {
+ m_pStart++;
+ break;
+ }
+ if (ch != L'=') {
+ if (m_CurNode.eNodeType == FDE_XMLNODE_Instruction) {
+ m_syntaxParserState = FDE_XmlSyntaxState::TargetData;
+ break;
+ }
+ m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
+ return m_syntaxParserResult;
+ } else {
+ m_syntaxParserState = FDE_XmlSyntaxState::AttriQuotation;
+ m_pStart++;
+ }
+ break;
+ case FDE_XmlSyntaxState::AttriQuotation:
+ if (IsXMLWhiteSpace(ch)) {
+ m_pStart++;
+ break;
+ }
+ if (ch != L'\"' && ch != L'\'') {
+ m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
+ return m_syntaxParserResult;
+ } else {
+ m_wQuotationMark = ch;
+ m_syntaxParserState = FDE_XmlSyntaxState::AttriValue;
+ m_pStart++;
+ }
+ break;
+ case FDE_XmlSyntaxState::AttriValue:
+ if (ch == m_wQuotationMark) {
+ if (m_iEntityStart > -1) {
+ m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
+ return m_syntaxParserResult;
+ }
+ m_iTextDataLength = m_iDataLength;
+ m_wQuotationMark = 0;
+ m_BlockBuffer.Reset();
+ m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
+ m_pStart++;
+ m_syntaxParserState = FDE_XmlSyntaxState::AttriName;
+ syntaxParserResult = FDE_XmlSyntaxResult::AttriValue;
+ } else {
+ ParseTextChar(ch);
+ }
+ break;
+ case FDE_XmlSyntaxState::CloseInstruction:
+ if (ch != L'>') {
+ if (m_iIndexInBlock == m_iAllocStep) {
+ m_pCurrentBlock =
+ m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
+ if (!m_pCurrentBlock) {
+ return FDE_XmlSyntaxResult::Error;
+ }
+ }
+ m_pCurrentBlock[m_iIndexInBlock++] = ch;
+ m_iDataLength++;
+ m_syntaxParserState = FDE_XmlSyntaxState::TargetData;
+ } else if (m_iDataLength > 0) {
+ m_iTextDataLength = m_iDataLength;
+ m_BlockBuffer.Reset();
+ m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
+ syntaxParserResult = FDE_XmlSyntaxResult::TargetData;
+ } else {
+ m_pStart++;
+ if (m_XMLNodeStack.empty()) {
+ m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
+ return m_syntaxParserResult;
+ }
+ m_XMLNodeStack.pop();
+ if (!m_XMLNodeStack.empty()) {
+ m_CurNode = m_XMLNodeStack.top();
+ } else {
+ m_CurNode.iNodeNum = -1;
+ m_CurNode.eNodeType = FDE_XMLNODE_Unknown;
+ }
+ m_iCurrentNodeNum = m_CurNode.iNodeNum;
+ m_BlockBuffer.Reset();
+ m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
+ m_syntaxParserState = FDE_XmlSyntaxState::Text;
+ syntaxParserResult = FDE_XmlSyntaxResult::InstructionClose;
+ }
+ break;
+ case FDE_XmlSyntaxState::BreakElement:
+ if (ch == L'>') {
+ m_syntaxParserState = FDE_XmlSyntaxState::Text;
+ syntaxParserResult = FDE_XmlSyntaxResult::ElementBreak;
+ } else if (ch == L'/') {
+ m_syntaxParserState = FDE_XmlSyntaxState::CloseElement;
+ } else {
+ m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
+ return m_syntaxParserResult;
+ }
+ m_pStart++;
+ break;
+ case FDE_XmlSyntaxState::CloseElement:
+ if (!IsXMLNameChar(ch, m_iDataLength < 1)) {
+ if (ch == L'>') {
+ if (m_XMLNodeStack.empty()) {
+ m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
+ return m_syntaxParserResult;
+ }
+ m_XMLNodeStack.pop();
+ if (!m_XMLNodeStack.empty()) {
+ m_CurNode = m_XMLNodeStack.top();
+ } else {
+ m_CurNode.iNodeNum = -1;
+ m_CurNode.eNodeType = FDE_XMLNODE_Unknown;
+ }
+ m_iCurrentNodeNum = m_CurNode.iNodeNum;
+ m_iTextDataLength = m_iDataLength;
+ m_BlockBuffer.Reset();
+ m_pCurrentBlock =
+ m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
+ m_syntaxParserState = FDE_XmlSyntaxState::Text;
+ syntaxParserResult = FDE_XmlSyntaxResult::ElementClose;
+ } else if (!IsXMLWhiteSpace(ch)) {
+ m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
+ return m_syntaxParserResult;
+ }
+ } else {
+ if (m_iIndexInBlock == m_iAllocStep) {
+ m_pCurrentBlock =
+ m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
+ if (!m_pCurrentBlock) {
+ return FDE_XmlSyntaxResult::Error;
+ }
+ }
+ m_pCurrentBlock[m_iIndexInBlock++] = ch;
+ m_iDataLength++;
+ }
+ m_pStart++;
+ break;
+ case FDE_XmlSyntaxState::SkipCommentOrDecl:
+ if (FXSYS_wcsnicmp(m_pStart, L"--", 2) == 0) {
+ m_pStart += 2;
+ m_syntaxParserState = FDE_XmlSyntaxState::SkipComment;
+ } else if (FXSYS_wcsnicmp(m_pStart, L"[CDATA[", 7) == 0) {
+ m_pStart += 7;
+ m_syntaxParserState = FDE_XmlSyntaxState::SkipCData;
+ } else {
+ m_syntaxParserState = FDE_XmlSyntaxState::SkipDeclNode;
+ m_SkipChar = L'>';
+ m_SkipStack.push(L'>');
+ }
+ break;
+ case FDE_XmlSyntaxState::SkipCData: {
+ if (FXSYS_wcsnicmp(m_pStart, L"]]>", 3) == 0) {
+ m_pStart += 3;
+ syntaxParserResult = FDE_XmlSyntaxResult::CData;
+ m_iTextDataLength = m_iDataLength;
+ m_BlockBuffer.Reset();
+ m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
+ m_syntaxParserState = FDE_XmlSyntaxState::Text;
+ } else {
+ if (m_iIndexInBlock == m_iAllocStep) {
+ m_pCurrentBlock =
+ m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
+ if (!m_pCurrentBlock)
+ return FDE_XmlSyntaxResult::Error;
+ }
+ m_pCurrentBlock[m_iIndexInBlock++] = ch;
+ m_iDataLength++;
+ m_pStart++;
+ }
+ break;
+ }
+ case FDE_XmlSyntaxState::SkipDeclNode:
+ if (m_SkipChar == L'\'' || m_SkipChar == L'\"') {
+ m_pStart++;
+ if (ch != m_SkipChar)
+ break;
+
+ m_SkipStack.pop();
+ if (m_SkipStack.empty())
+ m_syntaxParserState = FDE_XmlSyntaxState::Text;
+ else
+ m_SkipChar = m_SkipStack.top();
+ } else {
+ switch (ch) {
+ case L'<':
+ m_SkipChar = L'>';
+ m_SkipStack.push(L'>');
+ break;
+ case L'[':
+ m_SkipChar = L']';
+ m_SkipStack.push(L']');
+ break;
+ case L'(':
+ m_SkipChar = L')';
+ m_SkipStack.push(L')');
+ break;
+ case L'\'':
+ m_SkipChar = L'\'';
+ m_SkipStack.push(L'\'');
+ break;
+ case L'\"':
+ m_SkipChar = L'\"';
+ m_SkipStack.push(L'\"');
+ break;
+ default:
+ if (ch == m_SkipChar) {
+ m_SkipStack.pop();
+ if (m_SkipStack.empty()) {
+ if (m_iDataLength >= 9) {
+ CFX_WideString wsHeader;
+ m_BlockBuffer.GetTextData(wsHeader, 0, 7);
+ }
+ m_iTextDataLength = m_iDataLength;
+ m_BlockBuffer.Reset();
+ m_pCurrentBlock =
+ m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
+ m_syntaxParserState = FDE_XmlSyntaxState::Text;
+ } else {
+ m_SkipChar = m_SkipStack.top();
+ }
+ }
+ break;
+ }
+ if (!m_SkipStack.empty()) {
+ if (m_iIndexInBlock == m_iAllocStep) {
+ m_pCurrentBlock =
+ m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
+ if (!m_pCurrentBlock) {
+ return FDE_XmlSyntaxResult::Error;
+ }
+ }
+ m_pCurrentBlock[m_iIndexInBlock++] = ch;
+ m_iDataLength++;
+ }
+ m_pStart++;
+ }
+ break;
+ case FDE_XmlSyntaxState::SkipComment:
+ if (FXSYS_wcsnicmp(m_pStart, L"-->", 3) == 0) {
+ m_pStart += 2;
+ m_syntaxParserState = FDE_XmlSyntaxState::Text;
+ }
+
+ m_pStart++;
+ break;
+ case FDE_XmlSyntaxState::TargetData:
+ if (IsXMLWhiteSpace(ch)) {
+ if (m_iDataLength < 1) {
+ m_pStart++;
+ break;
+ } else if (m_wQuotationMark == 0) {
+ m_iTextDataLength = m_iDataLength;
+ m_wQuotationMark = 0;
+ m_BlockBuffer.Reset();
+ m_pCurrentBlock =
+ m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
+ m_pStart++;
+ syntaxParserResult = FDE_XmlSyntaxResult::TargetData;
+ break;
+ }
+ }
+ if (ch == '?') {
+ m_syntaxParserState = FDE_XmlSyntaxState::CloseInstruction;
+ m_pStart++;
+ } else if (ch == '\"') {
+ if (m_wQuotationMark == 0) {
+ m_wQuotationMark = ch;
+ m_pStart++;
+ } else if (ch == m_wQuotationMark) {
+ m_iTextDataLength = m_iDataLength;
+ m_wQuotationMark = 0;
+ m_BlockBuffer.Reset();
+ m_pCurrentBlock =
+ m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
+ m_pStart++;
+ syntaxParserResult = FDE_XmlSyntaxResult::TargetData;
+ } else {
+ m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
+ return m_syntaxParserResult;
+ }
+ } else {
+ if (m_iIndexInBlock == m_iAllocStep) {
+ m_pCurrentBlock =
+ m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
+ if (!m_pCurrentBlock) {
+ return FDE_XmlSyntaxResult::Error;
+ }
+ }
+ m_pCurrentBlock[m_iIndexInBlock++] = ch;
+ m_iDataLength++;
+ m_pStart++;
+ }
+ break;
+ default:
+ break;
+ }
+ if (syntaxParserResult != FDE_XmlSyntaxResult::None)
+ return syntaxParserResult;
+ }
+ }
+ return FDE_XmlSyntaxResult::Text;
+}
+
+CFDE_XMLSyntaxParser::~CFDE_XMLSyntaxParser() {
+ m_pCurrentBlock = nullptr;
+ FX_Free(m_pBuffer);
+}
+
+int32_t CFDE_XMLSyntaxParser::GetStatus() const {
+ if (!m_pStream)
+ return -1;
+
+ int32_t iStreamLength = m_pStream->GetLength();
+ if (iStreamLength < 1)
+ return 100;
+
+ if (m_syntaxParserResult == FDE_XmlSyntaxResult::Error)
+ return -1;
+
+ if (m_syntaxParserResult == FDE_XmlSyntaxResult::EndOfString)
+ return 100;
+ return m_iParsedBytes * 100 / iStreamLength;
+}
+
+FX_FILESIZE CFDE_XMLSyntaxParser::GetCurrentBinaryPos() const {
+ if (!m_pStream)
+ return 0;
+
+ int32_t nSrcLen = m_pStart - m_pBuffer;
+ int32_t nDstLen = GetUTF8EncodeLength(m_pBuffer, nSrcLen);
+ return m_iParsedBytes + nDstLen;
+}
+
+void CFDE_XMLSyntaxParser::ParseTextChar(wchar_t character) {
+ if (m_iIndexInBlock == m_iAllocStep) {
+ m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
+ if (!m_pCurrentBlock) {
+ return;
+ }
+ }
+ m_pCurrentBlock[m_iIndexInBlock++] = character;
+ m_iDataLength++;
+ if (m_iEntityStart > -1 && character == L';') {
+ CFX_WideString csEntity;
+ m_BlockBuffer.GetTextData(csEntity, m_iEntityStart + 1,
+ (m_iDataLength - 1) - m_iEntityStart - 1);
+ int32_t iLen = csEntity.GetLength();
+ if (iLen > 0) {
+ if (csEntity[0] == L'#') {
+ uint32_t ch = 0;
+ wchar_t w;
+ if (iLen > 1 && csEntity[1] == L'x') {
+ for (int32_t i = 2; i < iLen; i++) {
+ w = csEntity[i];
+ if (w >= L'0' && w <= L'9') {
+ ch = (ch << 4) + w - L'0';
+ } else if (w >= L'A' && w <= L'F') {
+ ch = (ch << 4) + w - 55;
+ } else if (w >= L'a' && w <= L'f') {
+ ch = (ch << 4) + w - 87;
+ } else {
+ break;
+ }
+ }
+ } else {
+ for (int32_t i = 1; i < iLen; i++) {
+ w = csEntity[i];
+ if (w < L'0' || w > L'9')
+ break;
+ ch = ch * 10 + w - L'0';
+ }
+ }
+ if (ch > kMaxCharRange)
+ ch = ' ';
+
+ character = static_cast<wchar_t>(ch);
+ if (character != 0) {
+ m_BlockBuffer.SetTextChar(m_iEntityStart, character);
+ m_iEntityStart++;
+ }
+ } else {
+ if (csEntity.Compare(L"amp") == 0) {
+ m_BlockBuffer.SetTextChar(m_iEntityStart, L'&');
+ m_iEntityStart++;
+ } else if (csEntity.Compare(L"lt") == 0) {
+ m_BlockBuffer.SetTextChar(m_iEntityStart, L'<');
+ m_iEntityStart++;
+ } else if (csEntity.Compare(L"gt") == 0) {
+ m_BlockBuffer.SetTextChar(m_iEntityStart, L'>');
+ m_iEntityStart++;
+ } else if (csEntity.Compare(L"apos") == 0) {
+ m_BlockBuffer.SetTextChar(m_iEntityStart, L'\'');
+ m_iEntityStart++;
+ } else if (csEntity.Compare(L"quot") == 0) {
+ m_BlockBuffer.SetTextChar(m_iEntityStart, L'\"');
+ m_iEntityStart++;
+ }
+ }
+ }
+ m_BlockBuffer.DeleteTextChars(m_iDataLength - m_iEntityStart, false);
+ m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
+ m_iEntityStart = -1;
+ } else {
+ if (m_iEntityStart < 0 && character == L'&') {
+ m_iEntityStart = m_iDataLength - 1;
+ }
+ }
+ m_pStart++;
+}
diff --git a/xfa/fde/xml/cfde_xmlsyntaxparser.h b/xfa/fde/xml/cfde_xmlsyntaxparser.h
new file mode 100644
index 0000000000..9e768d01aa
--- /dev/null
+++ b/xfa/fde/xml/cfde_xmlsyntaxparser.h
@@ -0,0 +1,128 @@
+// Copyright 2017 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
+
+#ifndef XFA_FDE_XML_CFDE_XMLSYNTAXPARSER_H_
+#define XFA_FDE_XML_CFDE_XMLSYNTAXPARSER_H_
+
+#include <stack>
+
+#include "core/fxcrt/cfx_blockbuffer.h"
+#include "core/fxcrt/cfx_retain_ptr.h"
+#include "core/fxcrt/fx_string.h"
+#include "xfa/fde/xml/cfde_xmlnode.h"
+#include "xfa/fgas/crt/ifgas_stream.h"
+
+enum class FDE_XmlSyntaxResult {
+ None,
+ InstructionOpen,
+ InstructionClose,
+ ElementOpen,
+ ElementBreak,
+ ElementClose,
+ TargetName,
+ TagName,
+ AttriName,
+ AttriValue,
+ Text,
+ CData,
+ TargetData,
+ Error,
+ EndOfString
+};
+
+class CFDE_XMLSyntaxParser {
+ public:
+ CFDE_XMLSyntaxParser();
+ ~CFDE_XMLSyntaxParser();
+
+ void Init(const CFX_RetainPtr<IFGAS_Stream>& pStream,
+ int32_t iXMLPlaneSize,
+ int32_t iTextDataSize = 256);
+
+ FDE_XmlSyntaxResult DoSyntaxParse();
+
+ int32_t GetStatus() const;
+ int32_t GetCurrentPos() const {
+ return m_iParsedChars + (m_pStart - m_pBuffer);
+ }
+ FX_FILESIZE GetCurrentBinaryPos() const;
+ int32_t GetCurrentNodeNumber() const { return m_iCurrentNodeNum; }
+ int32_t GetLastNodeNumber() const { return m_iLastNodeNum; }
+
+ void GetTargetName(CFX_WideString& wsTarget) const {
+ m_BlockBuffer.GetTextData(wsTarget, 0, m_iTextDataLength);
+ }
+ void GetTagName(CFX_WideString& wsTag) const {
+ m_BlockBuffer.GetTextData(wsTag, 0, m_iTextDataLength);
+ }
+ void GetAttributeName(CFX_WideString& wsAttriName) const {
+ m_BlockBuffer.GetTextData(wsAttriName, 0, m_iTextDataLength);
+ }
+ void GetAttributeValue(CFX_WideString& wsAttriValue) const {
+ m_BlockBuffer.GetTextData(wsAttriValue, 0, m_iTextDataLength);
+ }
+ void GetTextData(CFX_WideString& wsText) const {
+ m_BlockBuffer.GetTextData(wsText, 0, m_iTextDataLength);
+ }
+ void GetTargetData(CFX_WideString& wsData) const {
+ m_BlockBuffer.GetTextData(wsData, 0, m_iTextDataLength);
+ }
+
+ protected:
+ enum class FDE_XmlSyntaxState {
+ Text,
+ Node,
+ Target,
+ Tag,
+ AttriName,
+ AttriEqualSign,
+ AttriQuotation,
+ AttriValue,
+ Entity,
+ EntityDecimal,
+ EntityHex,
+ CloseInstruction,
+ BreakElement,
+ CloseElement,
+ SkipDeclNode,
+ DeclCharData,
+ SkipComment,
+ SkipCommentOrDecl,
+ SkipCData,
+ TargetData
+ };
+
+ void ParseTextChar(wchar_t ch);
+
+ CFX_RetainPtr<IFGAS_Stream> m_pStream;
+ int32_t m_iXMLPlaneSize;
+ int32_t m_iCurrentPos;
+ int32_t m_iCurrentNodeNum;
+ int32_t m_iLastNodeNum;
+ int32_t m_iParsedChars;
+ int32_t m_iParsedBytes;
+ wchar_t* m_pBuffer;
+ int32_t m_iBufferChars;
+ bool m_bEOS;
+ wchar_t* m_pStart;
+ wchar_t* m_pEnd;
+ FDE_XMLNODE m_CurNode;
+ std::stack<FDE_XMLNODE> m_XMLNodeStack;
+ CFX_BlockBuffer m_BlockBuffer;
+ int32_t m_iAllocStep;
+ int32_t& m_iDataLength;
+ wchar_t* m_pCurrentBlock;
+ int32_t m_iIndexInBlock;
+ int32_t m_iTextDataLength;
+ FDE_XmlSyntaxResult m_syntaxParserResult;
+ FDE_XmlSyntaxState m_syntaxParserState;
+ wchar_t m_wQuotationMark;
+ int32_t m_iEntityStart;
+ std::stack<wchar_t> m_SkipStack;
+ wchar_t m_SkipChar;
+};
+
+#endif // XFA_FDE_XML_CFDE_XMLSYNTAXPARSER_H_
diff --git a/xfa/fde/xml/fde_xml_imp_unittest.cpp b/xfa/fde/xml/cfde_xmlsyntaxparser_unittest.cpp
index 3bd46ab77c..9b04028123 100644
--- a/xfa/fde/xml/fde_xml_imp_unittest.cpp
+++ b/xfa/fde/xml/cfde_xmlsyntaxparser_unittest.cpp
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
-#include "xfa/fde/xml/fde_xml_imp.h"
+#include "xfa/fde/xml/cfde_xmlsyntaxparser.h"
#include <memory>
diff --git a/xfa/fde/xml/cfde_xmltext.cpp b/xfa/fde/xml/cfde_xmltext.cpp
new file mode 100644
index 0000000000..6bc2d64354
--- /dev/null
+++ b/xfa/fde/xml/cfde_xmltext.cpp
@@ -0,0 +1,21 @@
+// Copyright 2017 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
+
+#include "xfa/fde/xml/cfde_xmltext.h"
+
+CFDE_XMLText::CFDE_XMLText(const CFX_WideString& wsText)
+ : CFDE_XMLNode(), m_wsText(wsText) {}
+
+CFDE_XMLText::~CFDE_XMLText() {}
+
+FDE_XMLNODETYPE CFDE_XMLText::GetType() const {
+ return FDE_XMLNODE_Text;
+}
+
+CFDE_XMLNode* CFDE_XMLText::Clone(bool bRecursive) {
+ CFDE_XMLText* pClone = new CFDE_XMLText(m_wsText);
+ return pClone;
+}
diff --git a/xfa/fde/xml/cfde_xmltext.h b/xfa/fde/xml/cfde_xmltext.h
new file mode 100644
index 0000000000..6f3945be09
--- /dev/null
+++ b/xfa/fde/xml/cfde_xmltext.h
@@ -0,0 +1,28 @@
+// Copyright 2017 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
+
+#ifndef XFA_FDE_XML_CFDE_XMLTEXT_H_
+#define XFA_FDE_XML_CFDE_XMLTEXT_H_
+
+#include "core/fxcrt/fx_string.h"
+#include "xfa/fde/xml/cfde_xmlnode.h"
+
+class CFDE_XMLText : public CFDE_XMLNode {
+ public:
+ explicit CFDE_XMLText(const CFX_WideString& wsText);
+ ~CFDE_XMLText() override;
+
+ // CFDE_XMLNode
+ FDE_XMLNODETYPE GetType() const override;
+ CFDE_XMLNode* Clone(bool bRecursive) override;
+
+ void GetText(CFX_WideString& wsText) const { wsText = m_wsText; }
+ void SetText(const CFX_WideString& wsText) { m_wsText = wsText; }
+
+ CFX_WideString m_wsText;
+};
+
+#endif // XFA_FDE_XML_CFDE_XMLTEXT_H_
diff --git a/xfa/fde/xml/fde_xml.h b/xfa/fde/xml/fde_xml.h
deleted file mode 100644
index 399a930615..0000000000
--- a/xfa/fde/xml/fde_xml.h
+++ /dev/null
@@ -1,45 +0,0 @@
-// Copyright 2014 PDFium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
-
-#ifndef XFA_FDE_XML_FDE_XML_H_
-#define XFA_FDE_XML_FDE_XML_H_
-
-#include "core/fxcrt/fx_system.h"
-
-enum class FDE_XmlSyntaxResult {
- None,
- InstructionOpen,
- InstructionClose,
- ElementOpen,
- ElementBreak,
- ElementClose,
- TargetName,
- TagName,
- AttriName,
- AttriValue,
- Text,
- CData,
- TargetData,
- Error,
- EndOfString
-};
-
-enum FDE_XMLNODETYPE {
- FDE_XMLNODE_Unknown = 0,
- FDE_XMLNODE_Instruction,
- FDE_XMLNODE_Element,
- FDE_XMLNODE_Text,
- FDE_XMLNODE_CharData,
-};
-
-struct FDE_XMLNODE {
- int32_t iNodeNum;
- FDE_XMLNODETYPE eNodeType;
-};
-
-bool FDE_IsXMLValidChar(wchar_t ch);
-
-#endif // XFA_FDE_XML_FDE_XML_H_
diff --git a/xfa/fde/xml/fde_xml_imp.cpp b/xfa/fde/xml/fde_xml_imp.cpp
deleted file mode 100644
index 2de48ef1f4..0000000000
--- a/xfa/fde/xml/fde_xml_imp.cpp
+++ /dev/null
@@ -1,1832 +0,0 @@
-// Copyright 2014 PDFium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
-
-#include "xfa/fde/xml/fde_xml_imp.h"
-
-#include <algorithm>
-#include <utility>
-
-#include "core/fxcrt/fx_ext.h"
-#include "core/fxcrt/fx_safe_types.h"
-#include "third_party/base/ptr_util.h"
-#include "third_party/base/stl_util.h"
-#include "xfa/fde/xml/cfde_xml_parser.h"
-#include "xfa/fgas/crt/fgas_codepage.h"
-
-namespace {
-
-const uint32_t kMaxCharRange = 0x10ffff;
-
-const uint16_t g_XMLValidCharRange[][2] = {{0x09, 0x09},
- {0x0A, 0x0A},
- {0x0D, 0x0D},
- {0x20, 0xD7FF},
- {0xE000, 0xFFFD}};
-
-bool FDE_IsXMLWhiteSpace(wchar_t ch) {
- return ch == L' ' || ch == 0x0A || ch == 0x0D || ch == 0x09;
-}
-
-struct FDE_XMLNAMECHAR {
- uint16_t wStart;
- uint16_t wEnd;
- bool bStartChar;
-};
-
-const FDE_XMLNAMECHAR g_XMLNameChars[] = {
- {L'-', L'.', false}, {L'0', L'9', false}, {L':', L':', false},
- {L'A', L'Z', true}, {L'_', L'_', true}, {L'a', L'z', true},
- {0xB7, 0xB7, false}, {0xC0, 0xD6, true}, {0xD8, 0xF6, true},
- {0xF8, 0x02FF, true}, {0x0300, 0x036F, false}, {0x0370, 0x037D, true},
- {0x037F, 0x1FFF, true}, {0x200C, 0x200D, true}, {0x203F, 0x2040, false},
- {0x2070, 0x218F, true}, {0x2C00, 0x2FEF, true}, {0x3001, 0xD7FF, true},
- {0xF900, 0xFDCF, true}, {0xFDF0, 0xFFFD, true},
-};
-
-bool FDE_IsXMLNameChar(wchar_t ch, bool bFirstChar) {
- int32_t iStart = 0;
- int32_t iEnd = FX_ArraySize(g_XMLNameChars) - 1;
- while (iStart <= iEnd) {
- int32_t iMid = (iStart + iEnd) / 2;
- if (ch < g_XMLNameChars[iMid].wStart) {
- iEnd = iMid - 1;
- } else if (ch > g_XMLNameChars[iMid].wEnd) {
- iStart = iMid + 1;
- } else {
- return bFirstChar ? g_XMLNameChars[iMid].bStartChar : true;
- }
- }
- return false;
-}
-
-} // namespace
-
-bool FDE_IsXMLValidChar(wchar_t ch) {
- int32_t iStart = 0;
- int32_t iEnd = FX_ArraySize(g_XMLValidCharRange) - 1;
- while (iStart <= iEnd) {
- int32_t iMid = (iStart + iEnd) / 2;
- if (ch < g_XMLValidCharRange[iMid][0]) {
- iEnd = iMid - 1;
- } else if (ch > g_XMLValidCharRange[iMid][1]) {
- iStart = iMid + 1;
- } else {
- return true;
- }
- }
- return false;
-}
-
-CFDE_XMLNode::CFDE_XMLNode()
- : m_pParent(nullptr),
- m_pChild(nullptr),
- m_pPrior(nullptr),
- m_pNext(nullptr) {}
-
-FDE_XMLNODETYPE CFDE_XMLNode::GetType() const {
- return FDE_XMLNODE_Unknown;
-}
-
-CFDE_XMLNode::~CFDE_XMLNode() {
- DeleteChildren();
-}
-
-void CFDE_XMLNode::DeleteChildren() {
- CFDE_XMLNode* pChild = m_pChild;
- while (pChild) {
- CFDE_XMLNode* pNext = pChild->m_pNext;
- delete pChild;
- pChild = pNext;
- }
- m_pChild = nullptr;
-}
-
-int32_t CFDE_XMLNode::CountChildNodes() const {
- int32_t iCount = 0;
- CFDE_XMLNode* pChild = m_pChild;
- while (pChild) {
- iCount++;
- pChild = pChild->m_pNext;
- }
- return iCount;
-}
-
-CFDE_XMLNode* CFDE_XMLNode::GetChildNode(int32_t index) const {
- CFDE_XMLNode* pChild = m_pChild;
- while (pChild) {
- if (index == 0) {
- return pChild;
- }
- index--;
- pChild = pChild->m_pNext;
- }
- return nullptr;
-}
-
-int32_t CFDE_XMLNode::GetChildNodeIndex(CFDE_XMLNode* pNode) const {
- int32_t index = 0;
- CFDE_XMLNode* pChild = m_pChild;
- while (pChild) {
- if (pChild == pNode) {
- return index;
- }
- index++;
- pChild = pChild->m_pNext;
- }
- return -1;
-}
-
-CFDE_XMLNode* CFDE_XMLNode::GetPath(const wchar_t* pPath,
- int32_t iLength,
- bool bQualifiedName) const {
- ASSERT(pPath);
- if (iLength < 0) {
- iLength = FXSYS_wcslen(pPath);
- }
- if (iLength == 0) {
- return nullptr;
- }
- CFX_WideString csPath;
- const wchar_t* pStart = pPath;
- const wchar_t* pEnd = pPath + iLength;
- wchar_t ch;
- while (pStart < pEnd) {
- ch = *pStart++;
- if (ch == L'/') {
- break;
- } else {
- csPath += ch;
- }
- }
- iLength -= pStart - pPath;
- CFDE_XMLNode* pFind = nullptr;
- if (csPath.GetLength() < 1) {
- pFind = GetNodeItem(CFDE_XMLNode::Root);
- } else if (csPath.Compare(L"..") == 0) {
- pFind = m_pParent;
- } else if (csPath.Compare(L".") == 0) {
- pFind = (CFDE_XMLNode*)this;
- } else {
- CFX_WideString wsTag;
- CFDE_XMLNode* pNode = m_pChild;
- while (pNode) {
- if (pNode->GetType() == FDE_XMLNODE_Element) {
- if (bQualifiedName) {
- ((CFDE_XMLElement*)pNode)->GetTagName(wsTag);
- } else {
- ((CFDE_XMLElement*)pNode)->GetLocalTagName(wsTag);
- }
- if (wsTag.Compare(csPath) == 0) {
- if (iLength < 1) {
- pFind = pNode;
- } else {
- pFind = pNode->GetPath(pStart, iLength, bQualifiedName);
- }
- if (pFind)
- return pFind;
- }
- }
- pNode = pNode->m_pNext;
- }
- }
- if (!pFind || iLength < 1)
- return pFind;
- return pFind->GetPath(pStart, iLength, bQualifiedName);
-}
-
-int32_t CFDE_XMLNode::InsertChildNode(CFDE_XMLNode* pNode, int32_t index) {
- pNode->m_pParent = this;
- if (!m_pChild) {
- m_pChild = pNode;
- pNode->m_pPrior = nullptr;
- pNode->m_pNext = nullptr;
- return 0;
- }
- if (index == 0) {
- pNode->m_pNext = m_pChild;
- pNode->m_pPrior = nullptr;
- m_pChild->m_pPrior = pNode;
- m_pChild = pNode;
- return 0;
- }
- int32_t iCount = 0;
- CFDE_XMLNode* pFind = m_pChild;
- while (++iCount != index && pFind->m_pNext) {
- pFind = pFind->m_pNext;
- }
- pNode->m_pPrior = pFind;
- pNode->m_pNext = pFind->m_pNext;
- if (pFind->m_pNext)
- pFind->m_pNext->m_pPrior = pNode;
- pFind->m_pNext = pNode;
- return iCount;
-}
-
-void CFDE_XMLNode::RemoveChildNode(CFDE_XMLNode* pNode) {
- ASSERT(m_pChild && pNode);
- if (m_pChild == pNode) {
- m_pChild = pNode->m_pNext;
- } else {
- pNode->m_pPrior->m_pNext = pNode->m_pNext;
- }
- if (pNode->m_pNext)
- pNode->m_pNext->m_pPrior = pNode->m_pPrior;
- pNode->m_pParent = nullptr;
- pNode->m_pNext = nullptr;
- pNode->m_pPrior = nullptr;
-}
-
-CFDE_XMLNode* CFDE_XMLNode::GetNodeItem(CFDE_XMLNode::NodeItem eItem) const {
- switch (eItem) {
- case CFDE_XMLNode::Root: {
- CFDE_XMLNode* pParent = (CFDE_XMLNode*)this;
- while (pParent->m_pParent) {
- pParent = pParent->m_pParent;
- }
- return pParent;
- }
- case CFDE_XMLNode::Parent:
- return m_pParent;
- case CFDE_XMLNode::FirstSibling: {
- CFDE_XMLNode* pItem = (CFDE_XMLNode*)this;
- while (pItem->m_pPrior) {
- pItem = pItem->m_pPrior;
- }
- return pItem == (CFDE_XMLNode*)this ? nullptr : pItem;
- }
- case CFDE_XMLNode::PriorSibling:
- return m_pPrior;
- case CFDE_XMLNode::NextSibling:
- return m_pNext;
- case CFDE_XMLNode::LastSibling: {
- CFDE_XMLNode* pItem = (CFDE_XMLNode*)this;
- while (pItem->m_pNext)
- pItem = pItem->m_pNext;
- return pItem == (CFDE_XMLNode*)this ? nullptr : pItem;
- }
- case CFDE_XMLNode::FirstNeighbor: {
- CFDE_XMLNode* pParent = (CFDE_XMLNode*)this;
- while (pParent->m_pParent)
- pParent = pParent->m_pParent;
- return pParent == (CFDE_XMLNode*)this ? nullptr : pParent;
- }
- case CFDE_XMLNode::PriorNeighbor: {
- if (!m_pPrior)
- return m_pParent;
-
- CFDE_XMLNode* pItem = m_pPrior;
- while (pItem->m_pChild) {
- pItem = pItem->m_pChild;
- while (pItem->m_pNext)
- pItem = pItem->m_pNext;
- }
- return pItem;
- }
- case CFDE_XMLNode::NextNeighbor: {
- if (m_pChild)
- return m_pChild;
- if (m_pNext)
- return m_pNext;
- CFDE_XMLNode* pItem = m_pParent;
- while (pItem) {
- if (pItem->m_pNext)
- return pItem->m_pNext;
- pItem = pItem->m_pParent;
- }
- return nullptr;
- }
- case CFDE_XMLNode::LastNeighbor: {
- CFDE_XMLNode* pItem = (CFDE_XMLNode*)this;
- while (pItem->m_pParent) {
- pItem = pItem->m_pParent;
- }
- while (true) {
- while (pItem->m_pNext)
- pItem = pItem->m_pNext;
- if (!pItem->m_pChild)
- break;
- pItem = pItem->m_pChild;
- }
- return pItem == (CFDE_XMLNode*)this ? nullptr : pItem;
- }
- case CFDE_XMLNode::FirstChild:
- return m_pChild;
- case CFDE_XMLNode::LastChild: {
- if (!m_pChild)
- return nullptr;
-
- CFDE_XMLNode* pChild = m_pChild;
- while (pChild->m_pNext)
- pChild = pChild->m_pNext;
- return pChild;
- }
- default:
- break;
- }
- return nullptr;
-}
-
-int32_t CFDE_XMLNode::GetNodeLevel() const {
- int32_t iLevel = 0;
- const CFDE_XMLNode* pItem = m_pParent;
- while (pItem) {
- iLevel++;
- pItem = pItem->m_pParent;
- }
- return iLevel;
-}
-
-bool CFDE_XMLNode::InsertNodeItem(CFDE_XMLNode::NodeItem eItem,
- CFDE_XMLNode* pNode) {
- switch (eItem) {
- case CFDE_XMLNode::NextSibling: {
- pNode->m_pParent = m_pParent;
- pNode->m_pNext = m_pNext;
- pNode->m_pPrior = this;
- if (m_pNext) {
- m_pNext->m_pPrior = pNode;
- }
- m_pNext = pNode;
- return true;
- }
- case CFDE_XMLNode::PriorSibling: {
- pNode->m_pParent = m_pParent;
- pNode->m_pNext = this;
- pNode->m_pPrior = m_pPrior;
- if (m_pPrior) {
- m_pPrior->m_pNext = pNode;
- } else if (m_pParent) {
- m_pParent->m_pChild = pNode;
- }
- m_pPrior = pNode;
- return true;
- }
- default:
- return false;
- }
-}
-
-CFDE_XMLNode* CFDE_XMLNode::RemoveNodeItem(CFDE_XMLNode::NodeItem eItem) {
- CFDE_XMLNode* pNode = nullptr;
- switch (eItem) {
- case CFDE_XMLNode::NextSibling:
- if (m_pNext) {
- pNode = m_pNext;
- m_pNext = pNode->m_pNext;
- if (m_pNext) {
- m_pNext->m_pPrior = this;
- }
- pNode->m_pParent = nullptr;
- pNode->m_pNext = nullptr;
- pNode->m_pPrior = nullptr;
- }
- break;
- default:
- break;
- }
- return pNode;
-}
-
-CFDE_XMLNode* CFDE_XMLNode::Clone(bool bRecursive) {
- return nullptr;
-}
-
-void CFDE_XMLNode::SaveXMLNode(const CFX_RetainPtr<IFGAS_Stream>& pXMLStream) {
- CFDE_XMLNode* pNode = (CFDE_XMLNode*)this;
- switch (pNode->GetType()) {
- case FDE_XMLNODE_Instruction: {
- CFX_WideString ws;
- CFDE_XMLInstruction* pInstruction = (CFDE_XMLInstruction*)pNode;
- if (pInstruction->m_wsTarget.CompareNoCase(L"xml") == 0) {
- ws = L"<?xml version=\"1.0\" encoding=\"";
- uint16_t wCodePage = pXMLStream->GetCodePage();
- if (wCodePage == FX_CODEPAGE_UTF16LE) {
- ws += L"UTF-16";
- } else if (wCodePage == FX_CODEPAGE_UTF16BE) {
- ws += L"UTF-16be";
- } else {
- ws += L"UTF-8";
- }
- ws += L"\"?>";
- pXMLStream->WriteString(ws.c_str(), ws.GetLength());
- } else {
- ws.Format(L"<?%s", pInstruction->m_wsTarget.c_str());
- pXMLStream->WriteString(ws.c_str(), ws.GetLength());
- std::vector<CFX_WideString>& attributes = pInstruction->m_Attributes;
- int32_t i;
- int32_t iCount = pdfium::CollectionSize<int32_t>(attributes);
- CFX_WideString wsValue;
- for (i = 0; i < iCount; i += 2) {
- ws = L" ";
- ws += attributes[i];
- ws += L"=\"";
- wsValue = attributes[i + 1];
- wsValue.Replace(L"&", L"&amp;");
- wsValue.Replace(L"<", L"&lt;");
- wsValue.Replace(L">", L"&gt;");
- wsValue.Replace(L"\'", L"&apos;");
- wsValue.Replace(L"\"", L"&quot;");
- ws += wsValue;
- ws += L"\"";
- pXMLStream->WriteString(ws.c_str(), ws.GetLength());
- }
- std::vector<CFX_WideString>& targetdata = pInstruction->m_TargetData;
- iCount = pdfium::CollectionSize<int32_t>(targetdata);
- for (i = 0; i < iCount; i++) {
- ws = L" \"";
- ws += targetdata[i];
- ws += L"\"";
- pXMLStream->WriteString(ws.c_str(), ws.GetLength());
- }
- ws = L"?>";
- pXMLStream->WriteString(ws.c_str(), ws.GetLength());
- }
- } break;
- case FDE_XMLNODE_Element: {
- CFX_WideString ws;
- ws = L"<";
- ws += ((CFDE_XMLElement*)pNode)->m_wsTag;
- pXMLStream->WriteString(ws.c_str(), ws.GetLength());
- std::vector<CFX_WideString>& attributes =
- static_cast<CFDE_XMLElement*>(pNode)->m_Attributes;
- int32_t iCount = pdfium::CollectionSize<int32_t>(attributes);
- CFX_WideString wsValue;
- for (int32_t i = 0; i < iCount; i += 2) {
- ws = L" ";
- ws += attributes[i];
- ws += L"=\"";
- wsValue = attributes[i + 1];
- wsValue.Replace(L"&", L"&amp;");
- wsValue.Replace(L"<", L"&lt;");
- wsValue.Replace(L">", L"&gt;");
- wsValue.Replace(L"\'", L"&apos;");
- wsValue.Replace(L"\"", L"&quot;");
- ws += wsValue;
- ws += L"\"";
- pXMLStream->WriteString(ws.c_str(), ws.GetLength());
- }
- if (pNode->m_pChild) {
- ws = L"\n>";
- pXMLStream->WriteString(ws.c_str(), ws.GetLength());
- CFDE_XMLNode* pChild = pNode->m_pChild;
- while (pChild) {
- pChild->SaveXMLNode(pXMLStream);
- pChild = pChild->m_pNext;
- }
- ws = L"</";
- ws += ((CFDE_XMLElement*)pNode)->m_wsTag;
- ws += L"\n>";
- } else {
- ws = L"\n/>";
- }
- pXMLStream->WriteString(ws.c_str(), ws.GetLength());
- } break;
- case FDE_XMLNODE_Text: {
- CFX_WideString ws = ((CFDE_XMLText*)pNode)->m_wsText;
- ws.Replace(L"&", L"&amp;");
- ws.Replace(L"<", L"&lt;");
- ws.Replace(L">", L"&gt;");
- ws.Replace(L"\'", L"&apos;");
- ws.Replace(L"\"", L"&quot;");
- pXMLStream->WriteString(ws.c_str(), ws.GetLength());
- } break;
- case FDE_XMLNODE_CharData: {
- CFX_WideString ws = L"<![CDATA[";
- ws += ((CFDE_XMLCharData*)pNode)->m_wsCharData;
- ws += L"]]>";
- pXMLStream->WriteString(ws.c_str(), ws.GetLength());
- } break;
- case FDE_XMLNODE_Unknown:
- break;
- default:
- break;
- }
-}
-
-void CFDE_XMLNode::CloneChildren(CFDE_XMLNode* pClone) {
- if (!m_pChild) {
- return;
- }
- CFDE_XMLNode* pNext = m_pChild;
- CFDE_XMLNode* pCloneNext = pNext->Clone(true);
- pClone->InsertChildNode(pCloneNext);
- pNext = pNext->m_pNext;
- while (pNext) {
- CFDE_XMLNode* pChild = pNext->Clone(true);
- pCloneNext->InsertNodeItem(CFDE_XMLNode::NextSibling, pChild);
- pCloneNext = pChild;
- pNext = pNext->m_pNext;
- }
-}
-
-CFDE_XMLInstruction::CFDE_XMLInstruction(const CFX_WideString& wsTarget)
- : m_wsTarget(wsTarget) {
- ASSERT(m_wsTarget.GetLength() > 0);
-}
-
-FDE_XMLNODETYPE CFDE_XMLInstruction::GetType() const {
- return FDE_XMLNODE_Instruction;
-}
-
-CFDE_XMLNode* CFDE_XMLInstruction::Clone(bool bRecursive) {
- CFDE_XMLInstruction* pClone = new CFDE_XMLInstruction(m_wsTarget);
- if (!pClone)
- return nullptr;
-
- pClone->m_Attributes = m_Attributes;
- pClone->m_TargetData = m_TargetData;
- if (bRecursive)
- CloneChildren(pClone);
-
- return pClone;
-}
-
-int32_t CFDE_XMLInstruction::CountAttributes() const {
- return pdfium::CollectionSize<int32_t>(m_Attributes) / 2;
-}
-
-bool CFDE_XMLInstruction::GetAttribute(int32_t index,
- CFX_WideString& wsAttriName,
- CFX_WideString& wsAttriValue) const {
- int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes);
- ASSERT(index > -1 && index < iCount / 2);
- for (int32_t i = 0; i < iCount; i += 2) {
- if (index == 0) {
- wsAttriName = m_Attributes[i];
- wsAttriValue = m_Attributes[i + 1];
- return true;
- }
- index--;
- }
- return false;
-}
-
-bool CFDE_XMLInstruction::HasAttribute(const wchar_t* pwsAttriName) const {
- int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes);
- for (int32_t i = 0; i < iCount; i += 2) {
- if (m_Attributes[i].Compare(pwsAttriName) == 0) {
- return true;
- }
- }
- return false;
-}
-
-void CFDE_XMLInstruction::GetString(const wchar_t* pwsAttriName,
- CFX_WideString& wsAttriValue,
- const wchar_t* pwsDefValue) const {
- int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes);
- for (int32_t i = 0; i < iCount; i += 2) {
- if (m_Attributes[i].Compare(pwsAttriName) == 0) {
- wsAttriValue = m_Attributes[i + 1];
- return;
- }
- }
- wsAttriValue = pwsDefValue;
-}
-
-void CFDE_XMLInstruction::SetString(const CFX_WideString& wsAttriName,
- const CFX_WideString& wsAttriValue) {
- ASSERT(wsAttriName.GetLength() > 0);
- int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes);
- for (int32_t i = 0; i < iCount; i += 2) {
- if (m_Attributes[i].Compare(wsAttriName) == 0) {
- m_Attributes[i] = wsAttriName;
- m_Attributes[i + 1] = wsAttriValue;
- return;
- }
- }
- m_Attributes.push_back(wsAttriName);
- m_Attributes.push_back(wsAttriValue);
-}
-
-int32_t CFDE_XMLInstruction::GetInteger(const wchar_t* pwsAttriName,
- int32_t iDefValue) const {
- int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes);
- for (int32_t i = 0; i < iCount; i += 2) {
- if (m_Attributes[i].Compare(pwsAttriName) == 0) {
- return FXSYS_wtoi(m_Attributes[i + 1].c_str());
- }
- }
- return iDefValue;
-}
-
-void CFDE_XMLInstruction::SetInteger(const wchar_t* pwsAttriName,
- int32_t iAttriValue) {
- CFX_WideString wsValue;
- wsValue.Format(L"%d", iAttriValue);
- SetString(pwsAttriName, wsValue);
-}
-
-float CFDE_XMLInstruction::GetFloat(const wchar_t* pwsAttriName,
- float fDefValue) const {
- int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes);
- for (int32_t i = 0; i < iCount; i += 2) {
- if (m_Attributes[i].Compare(pwsAttriName) == 0) {
- return FXSYS_wcstof(m_Attributes[i + 1].c_str(), -1, nullptr);
- }
- }
- return fDefValue;
-}
-
-void CFDE_XMLInstruction::SetFloat(const wchar_t* pwsAttriName,
- float fAttriValue) {
- CFX_WideString wsValue;
- wsValue.Format(L"%f", fAttriValue);
- SetString(pwsAttriName, wsValue);
-}
-
-void CFDE_XMLInstruction::RemoveAttribute(const wchar_t* pwsAttriName) {
- int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes);
- for (int32_t i = 0; i < iCount; i += 2) {
- if (m_Attributes[i].Compare(pwsAttriName) == 0) {
- m_Attributes.erase(m_Attributes.begin() + i,
- m_Attributes.begin() + i + 2);
- return;
- }
- }
-}
-
-int32_t CFDE_XMLInstruction::CountData() const {
- return pdfium::CollectionSize<int32_t>(m_TargetData);
-}
-
-bool CFDE_XMLInstruction::GetData(int32_t index, CFX_WideString& wsData) const {
- if (!pdfium::IndexInBounds(m_TargetData, index))
- return false;
-
- wsData = m_TargetData[index];
- return true;
-}
-
-void CFDE_XMLInstruction::AppendData(const CFX_WideString& wsData) {
- m_TargetData.push_back(wsData);
-}
-
-void CFDE_XMLInstruction::RemoveData(int32_t index) {
- if (pdfium::IndexInBounds(m_TargetData, index))
- m_TargetData.erase(m_TargetData.begin() + index);
-}
-
-CFDE_XMLInstruction::~CFDE_XMLInstruction() {}
-
-CFDE_XMLElement::CFDE_XMLElement(const CFX_WideString& wsTag)
- : CFDE_XMLNode(), m_wsTag(wsTag), m_Attributes() {
- ASSERT(m_wsTag.GetLength() > 0);
-}
-
-CFDE_XMLElement::~CFDE_XMLElement() {}
-
-FDE_XMLNODETYPE CFDE_XMLElement::GetType() const {
- return FDE_XMLNODE_Element;
-}
-
-CFDE_XMLNode* CFDE_XMLElement::Clone(bool bRecursive) {
- CFDE_XMLElement* pClone = new CFDE_XMLElement(m_wsTag);
- if (!pClone)
- return nullptr;
-
- pClone->m_Attributes = m_Attributes;
- if (bRecursive) {
- CloneChildren(pClone);
- } else {
- CFX_WideString wsText;
- CFDE_XMLNode* pChild = m_pChild;
- while (pChild) {
- switch (pChild->GetType()) {
- case FDE_XMLNODE_Text:
- wsText += ((CFDE_XMLText*)pChild)->m_wsText;
- break;
- default:
- break;
- }
- pChild = pChild->m_pNext;
- }
- pClone->SetTextData(wsText);
- }
- return pClone;
-}
-
-void CFDE_XMLElement::GetTagName(CFX_WideString& wsTag) const {
- wsTag = m_wsTag;
-}
-
-void CFDE_XMLElement::GetLocalTagName(CFX_WideString& wsTag) const {
- FX_STRSIZE iFind = m_wsTag.Find(L':', 0);
- if (iFind < 0) {
- wsTag = m_wsTag;
- } else {
- wsTag = m_wsTag.Right(m_wsTag.GetLength() - iFind - 1);
- }
-}
-
-void CFDE_XMLElement::GetNamespacePrefix(CFX_WideString& wsPrefix) const {
- FX_STRSIZE iFind = m_wsTag.Find(L':', 0);
- if (iFind < 0) {
- wsPrefix.clear();
- } else {
- wsPrefix = m_wsTag.Left(iFind);
- }
-}
-
-void CFDE_XMLElement::GetNamespaceURI(CFX_WideString& wsNamespace) const {
- CFX_WideString wsAttri(L"xmlns"), wsPrefix;
- GetNamespacePrefix(wsPrefix);
- if (wsPrefix.GetLength() > 0) {
- wsAttri += L":";
- wsAttri += wsPrefix;
- }
- wsNamespace.clear();
- CFDE_XMLNode* pNode = (CFDE_XMLNode*)this;
- while (pNode) {
- if (pNode->GetType() != FDE_XMLNODE_Element) {
- break;
- }
- CFDE_XMLElement* pElement = (CFDE_XMLElement*)pNode;
- if (!pElement->HasAttribute(wsAttri.c_str())) {
- pNode = pNode->GetNodeItem(CFDE_XMLNode::Parent);
- continue;
- }
- pElement->GetString(wsAttri.c_str(), wsNamespace);
- break;
- }
-}
-
-int32_t CFDE_XMLElement::CountAttributes() const {
- return pdfium::CollectionSize<int32_t>(m_Attributes) / 2;
-}
-
-bool CFDE_XMLElement::GetAttribute(int32_t index,
- CFX_WideString& wsAttriName,
- CFX_WideString& wsAttriValue) const {
- int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes);
- ASSERT(index > -1 && index < iCount / 2);
- for (int32_t i = 0; i < iCount; i += 2) {
- if (index == 0) {
- wsAttriName = m_Attributes[i];
- wsAttriValue = m_Attributes[i + 1];
- return true;
- }
- index--;
- }
- return false;
-}
-
-bool CFDE_XMLElement::HasAttribute(const wchar_t* pwsAttriName) const {
- int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes);
- for (int32_t i = 0; i < iCount; i += 2) {
- if (m_Attributes[i].Compare(pwsAttriName) == 0)
- return true;
- }
- return false;
-}
-
-void CFDE_XMLElement::GetString(const wchar_t* pwsAttriName,
- CFX_WideString& wsAttriValue,
- const wchar_t* pwsDefValue) const {
- int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes);
- for (int32_t i = 0; i < iCount; i += 2) {
- if (m_Attributes[i].Compare(pwsAttriName) == 0) {
- wsAttriValue = m_Attributes[i + 1];
- return;
- }
- }
- wsAttriValue = pwsDefValue;
-}
-
-void CFDE_XMLElement::SetString(const CFX_WideString& wsAttriName,
- const CFX_WideString& wsAttriValue) {
- ASSERT(wsAttriName.GetLength() > 0);
- int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes);
- for (int32_t i = 0; i < iCount; i += 2) {
- if (m_Attributes[i].Compare(wsAttriName) == 0) {
- m_Attributes[i] = wsAttriName;
- m_Attributes[i + 1] = wsAttriValue;
- return;
- }
- }
- m_Attributes.push_back(wsAttriName);
- m_Attributes.push_back(wsAttriValue);
-}
-
-int32_t CFDE_XMLElement::GetInteger(const wchar_t* pwsAttriName,
- int32_t iDefValue) const {
- int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes);
- for (int32_t i = 0; i < iCount; i += 2) {
- if (m_Attributes[i].Compare(pwsAttriName) == 0) {
- return FXSYS_wtoi(m_Attributes[i + 1].c_str());
- }
- }
- return iDefValue;
-}
-
-void CFDE_XMLElement::SetInteger(const wchar_t* pwsAttriName,
- int32_t iAttriValue) {
- CFX_WideString wsValue;
- wsValue.Format(L"%d", iAttriValue);
- SetString(pwsAttriName, wsValue);
-}
-
-float CFDE_XMLElement::GetFloat(const wchar_t* pwsAttriName,
- float fDefValue) const {
- int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes);
- for (int32_t i = 0; i < iCount; i += 2) {
- if (m_Attributes[i].Compare(pwsAttriName) == 0) {
- return FXSYS_wcstof(m_Attributes[i + 1].c_str(), -1, nullptr);
- }
- }
- return fDefValue;
-}
-
-void CFDE_XMLElement::SetFloat(const wchar_t* pwsAttriName, float fAttriValue) {
- CFX_WideString wsValue;
- wsValue.Format(L"%f", fAttriValue);
- SetString(pwsAttriName, wsValue);
-}
-
-void CFDE_XMLElement::RemoveAttribute(const wchar_t* pwsAttriName) {
- int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes);
- for (int32_t i = 0; i < iCount; i += 2) {
- if (m_Attributes[i].Compare(pwsAttriName) == 0) {
- m_Attributes.erase(m_Attributes.begin() + i,
- m_Attributes.begin() + i + 2);
- return;
- }
- }
-}
-
-void CFDE_XMLElement::GetTextData(CFX_WideString& wsText) const {
- CFX_WideTextBuf buffer;
- CFDE_XMLNode* pChild = m_pChild;
- while (pChild) {
- switch (pChild->GetType()) {
- case FDE_XMLNODE_Text:
- buffer << ((CFDE_XMLText*)pChild)->m_wsText;
- break;
- case FDE_XMLNODE_CharData:
- buffer << ((CFDE_XMLCharData*)pChild)->m_wsCharData;
- break;
- default:
- break;
- }
- pChild = pChild->m_pNext;
- }
- wsText = buffer.AsStringC();
-}
-
-void CFDE_XMLElement::SetTextData(const CFX_WideString& wsText) {
- if (wsText.GetLength() < 1) {
- return;
- }
- InsertChildNode(new CFDE_XMLText(wsText));
-}
-
-CFDE_XMLText::CFDE_XMLText(const CFX_WideString& wsText)
- : CFDE_XMLNode(), m_wsText(wsText) {}
-
-FDE_XMLNODETYPE CFDE_XMLText::GetType() const {
- return FDE_XMLNODE_Text;
-}
-
-CFDE_XMLNode* CFDE_XMLText::Clone(bool bRecursive) {
- CFDE_XMLText* pClone = new CFDE_XMLText(m_wsText);
- return pClone;
-}
-
-CFDE_XMLText::~CFDE_XMLText() {}
-
-CFDE_XMLCharData::CFDE_XMLCharData(const CFX_WideString& wsCData)
- : CFDE_XMLDeclaration(), m_wsCharData(wsCData) {}
-
-FDE_XMLNODETYPE CFDE_XMLCharData::GetType() const {
- return FDE_XMLNODE_CharData;
-}
-
-CFDE_XMLNode* CFDE_XMLCharData::Clone(bool bRecursive) {
- CFDE_XMLCharData* pClone = new CFDE_XMLCharData(m_wsCharData);
- return pClone;
-}
-
-CFDE_XMLCharData::~CFDE_XMLCharData() {}
-
-CFDE_XMLDoc::CFDE_XMLDoc()
- : m_iStatus(0), m_pRoot(pdfium::MakeUnique<CFDE_XMLNode>()) {
- m_pRoot->InsertChildNode(new CFDE_XMLInstruction(L"xml"));
-}
-
-CFDE_XMLDoc::~CFDE_XMLDoc() {}
-
-bool CFDE_XMLDoc::LoadXML(std::unique_ptr<CFDE_XMLParser> pXMLParser) {
- if (!pXMLParser)
- return false;
-
- m_iStatus = 0;
- m_pStream.Reset();
- m_pRoot->DeleteChildren();
- m_pXMLParser = std::move(pXMLParser);
- return true;
-}
-
-int32_t CFDE_XMLDoc::DoLoad(IFX_Pause* pPause) {
- if (m_iStatus < 100)
- m_iStatus = m_pXMLParser->DoParser(pPause);
-
- return m_iStatus;
-}
-
-void CFDE_XMLDoc::CloseXML() {
- m_pXMLParser.reset();
-}
-
-void CFDE_XMLDoc::SaveXMLNode(const CFX_RetainPtr<IFGAS_Stream>& pXMLStream,
- CFDE_XMLNode* pINode) {
- CFDE_XMLNode* pNode = (CFDE_XMLNode*)pINode;
- switch (pNode->GetType()) {
- case FDE_XMLNODE_Instruction: {
- CFX_WideString ws;
- CFDE_XMLInstruction* pInstruction = (CFDE_XMLInstruction*)pNode;
- if (pInstruction->m_wsTarget.CompareNoCase(L"xml") == 0) {
- ws = L"<?xml version=\"1.0\" encoding=\"";
- uint16_t wCodePage = pXMLStream->GetCodePage();
- if (wCodePage == FX_CODEPAGE_UTF16LE) {
- ws += L"UTF-16";
- } else if (wCodePage == FX_CODEPAGE_UTF16BE) {
- ws += L"UTF-16be";
- } else {
- ws += L"UTF-8";
- }
- ws += L"\"?>";
- pXMLStream->WriteString(ws.c_str(), ws.GetLength());
- } else {
- ws.Format(L"<?%s", pInstruction->m_wsTarget.c_str());
- pXMLStream->WriteString(ws.c_str(), ws.GetLength());
- std::vector<CFX_WideString>& attributes = pInstruction->m_Attributes;
- int32_t i;
- int32_t iCount = pdfium::CollectionSize<int32_t>(attributes);
- CFX_WideString wsValue;
- for (i = 0; i < iCount; i += 2) {
- ws = L" ";
- ws += attributes[i];
- ws += L"=\"";
- wsValue = attributes[i + 1];
- wsValue.Replace(L"&", L"&amp;");
- wsValue.Replace(L"<", L"&lt;");
- wsValue.Replace(L">", L"&gt;");
- wsValue.Replace(L"\'", L"&apos;");
- wsValue.Replace(L"\"", L"&quot;");
- ws += wsValue;
- ws += L"\"";
- pXMLStream->WriteString(ws.c_str(), ws.GetLength());
- }
- std::vector<CFX_WideString>& targetdata = pInstruction->m_TargetData;
- iCount = pdfium::CollectionSize<int32_t>(targetdata);
- for (i = 0; i < iCount; i++) {
- ws = L" \"";
- ws += targetdata[i];
- ws += L"\"";
- pXMLStream->WriteString(ws.c_str(), ws.GetLength());
- }
- ws = L"?>";
- pXMLStream->WriteString(ws.c_str(), ws.GetLength());
- }
- } break;
- case FDE_XMLNODE_Element: {
- CFX_WideString ws;
- ws = L"<";
- ws += ((CFDE_XMLElement*)pNode)->m_wsTag;
- pXMLStream->WriteString(ws.c_str(), ws.GetLength());
- std::vector<CFX_WideString>& attributes =
- static_cast<CFDE_XMLElement*>(pNode)->m_Attributes;
- int32_t iCount = pdfium::CollectionSize<int32_t>(attributes);
- CFX_WideString wsValue;
- for (int32_t i = 0; i < iCount; i += 2) {
- ws = L" ";
- ws += attributes[i];
- ws += L"=\"";
- wsValue = attributes[i + 1];
- wsValue.Replace(L"&", L"&amp;");
- wsValue.Replace(L"<", L"&lt;");
- wsValue.Replace(L">", L"&gt;");
- wsValue.Replace(L"\'", L"&apos;");
- wsValue.Replace(L"\"", L"&quot;");
- ws += wsValue;
- ws += L"\"";
- pXMLStream->WriteString(ws.c_str(), ws.GetLength());
- }
- if (pNode->m_pChild) {
- ws = L"\n>";
- pXMLStream->WriteString(ws.c_str(), ws.GetLength());
- CFDE_XMLNode* pChild = pNode->m_pChild;
- while (pChild) {
- SaveXMLNode(pXMLStream, static_cast<CFDE_XMLNode*>(pChild));
- pChild = pChild->m_pNext;
- }
- ws = L"</";
- ws += ((CFDE_XMLElement*)pNode)->m_wsTag;
- ws += L"\n>";
- } else {
- ws = L"\n/>";
- }
- pXMLStream->WriteString(ws.c_str(), ws.GetLength());
- } break;
- case FDE_XMLNODE_Text: {
- CFX_WideString ws = ((CFDE_XMLText*)pNode)->m_wsText;
- ws.Replace(L"&", L"&amp;");
- ws.Replace(L"<", L"&lt;");
- ws.Replace(L">", L"&gt;");
- ws.Replace(L"\'", L"&apos;");
- ws.Replace(L"\"", L"&quot;");
- pXMLStream->WriteString(ws.c_str(), ws.GetLength());
- } break;
- case FDE_XMLNODE_CharData: {
- CFX_WideString ws = L"<![CDATA[";
- ws += ((CFDE_XMLCharData*)pNode)->m_wsCharData;
- ws += L"]]>";
- pXMLStream->WriteString(ws.c_str(), ws.GetLength());
- } break;
- case FDE_XMLNODE_Unknown:
- break;
- default:
- break;
- }
-}
-
-CFDE_BlockBuffer::CFDE_BlockBuffer(int32_t iAllocStep)
- : m_iDataLength(0),
- m_iBufferSize(0),
- m_iAllocStep(iAllocStep),
- m_iStartPosition(0) {}
-
-CFDE_BlockBuffer::~CFDE_BlockBuffer() {
- ClearBuffer();
-}
-
-wchar_t* CFDE_BlockBuffer::GetAvailableBlock(int32_t& iIndexInBlock) {
- iIndexInBlock = 0;
- if (m_BlockArray.empty())
- return nullptr;
-
- int32_t iRealIndex = m_iStartPosition + m_iDataLength;
- if (iRealIndex == m_iBufferSize) {
- m_BlockArray.emplace_back(FX_Alloc(wchar_t, m_iAllocStep));
- m_iBufferSize += m_iAllocStep;
- return m_BlockArray.back().get();
- }
- iIndexInBlock = iRealIndex % m_iAllocStep;
- return m_BlockArray[iRealIndex / m_iAllocStep].get();
-}
-
-bool CFDE_BlockBuffer::InitBuffer(int32_t iBufferSize) {
- ClearBuffer();
- int32_t iNumOfBlock = (iBufferSize - 1) / m_iAllocStep + 1;
- for (int32_t i = 0; i < iNumOfBlock; i++)
- m_BlockArray.emplace_back(FX_Alloc(wchar_t, m_iAllocStep));
-
- m_iBufferSize = iNumOfBlock * m_iAllocStep;
- return true;
-}
-
-void CFDE_BlockBuffer::SetTextChar(int32_t iIndex, wchar_t ch) {
- if (iIndex < 0) {
- return;
- }
- int32_t iRealIndex = m_iStartPosition + iIndex;
- int32_t iBlockIndex = iRealIndex / m_iAllocStep;
- int32_t iInnerIndex = iRealIndex % m_iAllocStep;
- int32_t iBlockSize = pdfium::CollectionSize<int32_t>(m_BlockArray);
- if (iBlockIndex >= iBlockSize) {
- int32_t iNewBlocks = iBlockIndex - iBlockSize + 1;
- do {
- m_BlockArray.emplace_back(FX_Alloc(wchar_t, m_iAllocStep));
- m_iBufferSize += m_iAllocStep;
- } while (--iNewBlocks);
- }
- wchar_t* pTextData = m_BlockArray[iBlockIndex].get();
- pTextData[iInnerIndex] = ch;
- m_iDataLength = std::max(m_iDataLength, iIndex + 1);
-}
-
-int32_t CFDE_BlockBuffer::DeleteTextChars(int32_t iCount, bool bDirection) {
- if (iCount <= 0)
- return m_iDataLength;
-
- if (iCount >= m_iDataLength) {
- Reset(false);
- return 0;
- }
- if (bDirection) {
- m_iStartPosition += iCount;
- m_iDataLength -= iCount;
- } else {
- m_iDataLength -= iCount;
- }
- return m_iDataLength;
-}
-
-void CFDE_BlockBuffer::GetTextData(CFX_WideString& wsTextData,
- int32_t iStart,
- int32_t iLength) const {
- wsTextData.clear();
- int32_t iMaybeDataLength = m_iBufferSize - 1 - m_iStartPosition;
- if (iStart < 0 || iStart > iMaybeDataLength) {
- return;
- }
- if (iLength == -1 || iLength > iMaybeDataLength) {
- iLength = iMaybeDataLength;
- }
- if (iLength <= 0) {
- return;
- }
- wchar_t* pBuf = wsTextData.GetBuffer(iLength);
- if (!pBuf) {
- return;
- }
- int32_t iStartBlockIndex = 0;
- int32_t iStartInnerIndex = 0;
- TextDataIndex2BufIndex(iStart, iStartBlockIndex, iStartInnerIndex);
- int32_t iEndBlockIndex = 0;
- int32_t iEndInnerIndex = 0;
- TextDataIndex2BufIndex(iStart + iLength, iEndBlockIndex, iEndInnerIndex);
- int32_t iPointer = 0;
- for (int32_t i = iStartBlockIndex; i <= iEndBlockIndex; i++) {
- int32_t iBufferPointer = 0;
- int32_t iCopyLength = m_iAllocStep;
- if (i == iStartBlockIndex) {
- iCopyLength -= iStartInnerIndex;
- iBufferPointer = iStartInnerIndex;
- }
- if (i == iEndBlockIndex) {
- iCopyLength -= ((m_iAllocStep - 1) - iEndInnerIndex);
- }
- wchar_t* pBlockBuf = m_BlockArray[i].get();
- memcpy(pBuf + iPointer, pBlockBuf + iBufferPointer,
- iCopyLength * sizeof(wchar_t));
- iPointer += iCopyLength;
- }
- wsTextData.ReleaseBuffer(iLength);
-}
-
-void CFDE_BlockBuffer::TextDataIndex2BufIndex(const int32_t iIndex,
- int32_t& iBlockIndex,
- int32_t& iInnerIndex) const {
- ASSERT(iIndex >= 0);
- int32_t iRealIndex = m_iStartPosition + iIndex;
- iBlockIndex = iRealIndex / m_iAllocStep;
- iInnerIndex = iRealIndex % m_iAllocStep;
-}
-
-void CFDE_BlockBuffer::ClearBuffer() {
- m_iBufferSize = 0;
- m_BlockArray.clear();
-}
-
-CFDE_XMLSyntaxParser::CFDE_XMLSyntaxParser()
- : m_pStream(nullptr),
- m_iXMLPlaneSize(-1),
- m_iCurrentPos(0),
- m_iCurrentNodeNum(-1),
- m_iLastNodeNum(-1),
- m_iParsedChars(0),
- m_iParsedBytes(0),
- m_pBuffer(nullptr),
- m_iBufferChars(0),
- m_bEOS(false),
- m_pStart(nullptr),
- m_pEnd(nullptr),
- m_iAllocStep(m_BlockBuffer.GetAllocStep()),
- m_iDataLength(m_BlockBuffer.GetDataLengthRef()),
- m_pCurrentBlock(nullptr),
- m_iIndexInBlock(0),
- m_iTextDataLength(0),
- m_syntaxParserResult(FDE_XmlSyntaxResult::None),
- m_syntaxParserState(FDE_XmlSyntaxState::Text),
- m_wQuotationMark(0),
- m_iEntityStart(-1) {
- m_CurNode.iNodeNum = -1;
- m_CurNode.eNodeType = FDE_XMLNODE_Unknown;
-}
-
-void CFDE_XMLSyntaxParser::Init(const CFX_RetainPtr<IFGAS_Stream>& pStream,
- int32_t iXMLPlaneSize,
- int32_t iTextDataSize) {
- ASSERT(!m_pStream && !m_pBuffer);
- ASSERT(pStream && iXMLPlaneSize > 0);
- int32_t iStreamLength = pStream->GetLength();
- ASSERT(iStreamLength > 0);
- m_pStream = pStream;
- m_iXMLPlaneSize = std::min(iXMLPlaneSize, iStreamLength);
- uint8_t bom[4];
- m_iCurrentPos = m_pStream->GetBOM(bom);
- ASSERT(!m_pBuffer);
-
- FX_SAFE_INT32 alloc_size_safe = m_iXMLPlaneSize;
- alloc_size_safe += 1; // For NUL.
- if (!alloc_size_safe.IsValid() || alloc_size_safe.ValueOrDie() <= 0) {
- m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
- return;
- }
-
- m_pBuffer = FX_Alloc(
- wchar_t, pdfium::base::ValueOrDieForType<size_t>(alloc_size_safe));
- m_pStart = m_pEnd = m_pBuffer;
- ASSERT(!m_BlockBuffer.IsInitialized());
- m_BlockBuffer.InitBuffer();
- m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
- m_iParsedBytes = m_iParsedChars = 0;
- m_iBufferChars = 0;
-}
-
-FDE_XmlSyntaxResult CFDE_XMLSyntaxParser::DoSyntaxParse() {
- if (m_syntaxParserResult == FDE_XmlSyntaxResult::Error ||
- m_syntaxParserResult == FDE_XmlSyntaxResult::EndOfString) {
- return m_syntaxParserResult;
- }
- ASSERT(m_pStream && m_pBuffer && m_BlockBuffer.IsInitialized());
- int32_t iStreamLength = m_pStream->GetLength();
- int32_t iPos;
-
- FDE_XmlSyntaxResult syntaxParserResult = FDE_XmlSyntaxResult::None;
- while (true) {
- if (m_pStart >= m_pEnd) {
- if (m_bEOS || m_iCurrentPos >= iStreamLength) {
- m_syntaxParserResult = FDE_XmlSyntaxResult::EndOfString;
- return m_syntaxParserResult;
- }
- m_iParsedChars += (m_pEnd - m_pBuffer);
- m_iParsedBytes = m_iCurrentPos;
- if (m_pStream->GetPosition() != m_iCurrentPos) {
- m_pStream->Seek(FX_STREAMSEEK_Begin, m_iCurrentPos);
- }
- m_iBufferChars =
- m_pStream->ReadString(m_pBuffer, m_iXMLPlaneSize, m_bEOS);
- iPos = m_pStream->GetPosition();
- if (m_iBufferChars < 1) {
- m_iCurrentPos = iStreamLength;
- m_syntaxParserResult = FDE_XmlSyntaxResult::EndOfString;
- return m_syntaxParserResult;
- }
- m_iCurrentPos = iPos;
- m_pStart = m_pBuffer;
- m_pEnd = m_pBuffer + m_iBufferChars;
- }
-
- while (m_pStart < m_pEnd) {
- wchar_t ch = *m_pStart;
- switch (m_syntaxParserState) {
- case FDE_XmlSyntaxState::Text:
- if (ch == L'<') {
- if (m_iDataLength > 0) {
- m_iTextDataLength = m_iDataLength;
- m_BlockBuffer.Reset();
- m_pCurrentBlock =
- m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
- m_iEntityStart = -1;
- syntaxParserResult = FDE_XmlSyntaxResult::Text;
- } else {
- m_pStart++;
- m_syntaxParserState = FDE_XmlSyntaxState::Node;
- }
- } else {
- ParseTextChar(ch);
- }
- break;
- case FDE_XmlSyntaxState::Node:
- if (ch == L'!') {
- m_pStart++;
- m_syntaxParserState = FDE_XmlSyntaxState::SkipCommentOrDecl;
- } else if (ch == L'/') {
- m_pStart++;
- m_syntaxParserState = FDE_XmlSyntaxState::CloseElement;
- } else if (ch == L'?') {
- m_iLastNodeNum++;
- m_iCurrentNodeNum = m_iLastNodeNum;
- m_CurNode.iNodeNum = m_iLastNodeNum;
- m_CurNode.eNodeType = FDE_XMLNODE_Instruction;
- m_XMLNodeStack.push(m_CurNode);
- m_pStart++;
- m_syntaxParserState = FDE_XmlSyntaxState::Target;
- syntaxParserResult = FDE_XmlSyntaxResult::InstructionOpen;
- } else {
- m_iLastNodeNum++;
- m_iCurrentNodeNum = m_iLastNodeNum;
- m_CurNode.iNodeNum = m_iLastNodeNum;
- m_CurNode.eNodeType = FDE_XMLNODE_Element;
- m_XMLNodeStack.push(m_CurNode);
- m_syntaxParserState = FDE_XmlSyntaxState::Tag;
- syntaxParserResult = FDE_XmlSyntaxResult::ElementOpen;
- }
- break;
- case FDE_XmlSyntaxState::Target:
- case FDE_XmlSyntaxState::Tag:
- if (!FDE_IsXMLNameChar(ch, m_iDataLength < 1)) {
- if (m_iDataLength < 1) {
- m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
- return m_syntaxParserResult;
- } else {
- m_iTextDataLength = m_iDataLength;
- m_BlockBuffer.Reset();
- m_pCurrentBlock =
- m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
- if (m_syntaxParserState != FDE_XmlSyntaxState::Target) {
- syntaxParserResult = FDE_XmlSyntaxResult::TagName;
- } else {
- syntaxParserResult = FDE_XmlSyntaxResult::TargetName;
- }
- m_syntaxParserState = FDE_XmlSyntaxState::AttriName;
- }
- } else {
- if (m_iIndexInBlock == m_iAllocStep) {
- m_pCurrentBlock =
- m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
- if (!m_pCurrentBlock) {
- return FDE_XmlSyntaxResult::Error;
- }
- }
- m_pCurrentBlock[m_iIndexInBlock++] = ch;
- m_iDataLength++;
- m_pStart++;
- }
- break;
- case FDE_XmlSyntaxState::AttriName:
- if (m_iDataLength < 1 && FDE_IsXMLWhiteSpace(ch)) {
- m_pStart++;
- break;
- }
- if (!FDE_IsXMLNameChar(ch, m_iDataLength < 1)) {
- if (m_iDataLength < 1) {
- if (m_CurNode.eNodeType == FDE_XMLNODE_Element) {
- if (ch == L'>' || ch == L'/') {
- m_syntaxParserState = FDE_XmlSyntaxState::BreakElement;
- break;
- }
- } else if (m_CurNode.eNodeType == FDE_XMLNODE_Instruction) {
- if (ch == L'?') {
- m_syntaxParserState = FDE_XmlSyntaxState::CloseInstruction;
- m_pStart++;
- } else {
- m_syntaxParserState = FDE_XmlSyntaxState::TargetData;
- }
- break;
- }
- m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
- return m_syntaxParserResult;
- } else {
- if (m_CurNode.eNodeType == FDE_XMLNODE_Instruction) {
- if (ch != '=' && !FDE_IsXMLWhiteSpace(ch)) {
- m_syntaxParserState = FDE_XmlSyntaxState::TargetData;
- break;
- }
- }
- m_iTextDataLength = m_iDataLength;
- m_BlockBuffer.Reset();
- m_pCurrentBlock =
- m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
- m_syntaxParserState = FDE_XmlSyntaxState::AttriEqualSign;
- syntaxParserResult = FDE_XmlSyntaxResult::AttriName;
- }
- } else {
- if (m_iIndexInBlock == m_iAllocStep) {
- m_pCurrentBlock =
- m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
- if (!m_pCurrentBlock) {
- return FDE_XmlSyntaxResult::Error;
- }
- }
- m_pCurrentBlock[m_iIndexInBlock++] = ch;
- m_iDataLength++;
- m_pStart++;
- }
- break;
- case FDE_XmlSyntaxState::AttriEqualSign:
- if (FDE_IsXMLWhiteSpace(ch)) {
- m_pStart++;
- break;
- }
- if (ch != L'=') {
- if (m_CurNode.eNodeType == FDE_XMLNODE_Instruction) {
- m_syntaxParserState = FDE_XmlSyntaxState::TargetData;
- break;
- }
- m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
- return m_syntaxParserResult;
- } else {
- m_syntaxParserState = FDE_XmlSyntaxState::AttriQuotation;
- m_pStart++;
- }
- break;
- case FDE_XmlSyntaxState::AttriQuotation:
- if (FDE_IsXMLWhiteSpace(ch)) {
- m_pStart++;
- break;
- }
- if (ch != L'\"' && ch != L'\'') {
- m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
- return m_syntaxParserResult;
- } else {
- m_wQuotationMark = ch;
- m_syntaxParserState = FDE_XmlSyntaxState::AttriValue;
- m_pStart++;
- }
- break;
- case FDE_XmlSyntaxState::AttriValue:
- if (ch == m_wQuotationMark) {
- if (m_iEntityStart > -1) {
- m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
- return m_syntaxParserResult;
- }
- m_iTextDataLength = m_iDataLength;
- m_wQuotationMark = 0;
- m_BlockBuffer.Reset();
- m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
- m_pStart++;
- m_syntaxParserState = FDE_XmlSyntaxState::AttriName;
- syntaxParserResult = FDE_XmlSyntaxResult::AttriValue;
- } else {
- ParseTextChar(ch);
- }
- break;
- case FDE_XmlSyntaxState::CloseInstruction:
- if (ch != L'>') {
- if (m_iIndexInBlock == m_iAllocStep) {
- m_pCurrentBlock =
- m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
- if (!m_pCurrentBlock) {
- return FDE_XmlSyntaxResult::Error;
- }
- }
- m_pCurrentBlock[m_iIndexInBlock++] = ch;
- m_iDataLength++;
- m_syntaxParserState = FDE_XmlSyntaxState::TargetData;
- } else if (m_iDataLength > 0) {
- m_iTextDataLength = m_iDataLength;
- m_BlockBuffer.Reset();
- m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
- syntaxParserResult = FDE_XmlSyntaxResult::TargetData;
- } else {
- m_pStart++;
- if (m_XMLNodeStack.empty()) {
- m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
- return m_syntaxParserResult;
- }
- m_XMLNodeStack.pop();
- if (!m_XMLNodeStack.empty()) {
- m_CurNode = m_XMLNodeStack.top();
- } else {
- m_CurNode.iNodeNum = -1;
- m_CurNode.eNodeType = FDE_XMLNODE_Unknown;
- }
- m_iCurrentNodeNum = m_CurNode.iNodeNum;
- m_BlockBuffer.Reset();
- m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
- m_syntaxParserState = FDE_XmlSyntaxState::Text;
- syntaxParserResult = FDE_XmlSyntaxResult::InstructionClose;
- }
- break;
- case FDE_XmlSyntaxState::BreakElement:
- if (ch == L'>') {
- m_syntaxParserState = FDE_XmlSyntaxState::Text;
- syntaxParserResult = FDE_XmlSyntaxResult::ElementBreak;
- } else if (ch == L'/') {
- m_syntaxParserState = FDE_XmlSyntaxState::CloseElement;
- } else {
- m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
- return m_syntaxParserResult;
- }
- m_pStart++;
- break;
- case FDE_XmlSyntaxState::CloseElement:
- if (!FDE_IsXMLNameChar(ch, m_iDataLength < 1)) {
- if (ch == L'>') {
- if (m_XMLNodeStack.empty()) {
- m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
- return m_syntaxParserResult;
- }
- m_XMLNodeStack.pop();
- if (!m_XMLNodeStack.empty()) {
- m_CurNode = m_XMLNodeStack.top();
- } else {
- m_CurNode.iNodeNum = -1;
- m_CurNode.eNodeType = FDE_XMLNODE_Unknown;
- }
- m_iCurrentNodeNum = m_CurNode.iNodeNum;
- m_iTextDataLength = m_iDataLength;
- m_BlockBuffer.Reset();
- m_pCurrentBlock =
- m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
- m_syntaxParserState = FDE_XmlSyntaxState::Text;
- syntaxParserResult = FDE_XmlSyntaxResult::ElementClose;
- } else if (!FDE_IsXMLWhiteSpace(ch)) {
- m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
- return m_syntaxParserResult;
- }
- } else {
- if (m_iIndexInBlock == m_iAllocStep) {
- m_pCurrentBlock =
- m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
- if (!m_pCurrentBlock) {
- return FDE_XmlSyntaxResult::Error;
- }
- }
- m_pCurrentBlock[m_iIndexInBlock++] = ch;
- m_iDataLength++;
- }
- m_pStart++;
- break;
- case FDE_XmlSyntaxState::SkipCommentOrDecl:
- if (FXSYS_wcsnicmp(m_pStart, L"--", 2) == 0) {
- m_pStart += 2;
- m_syntaxParserState = FDE_XmlSyntaxState::SkipComment;
- } else if (FXSYS_wcsnicmp(m_pStart, L"[CDATA[", 7) == 0) {
- m_pStart += 7;
- m_syntaxParserState = FDE_XmlSyntaxState::SkipCData;
- } else {
- m_syntaxParserState = FDE_XmlSyntaxState::SkipDeclNode;
- m_SkipChar = L'>';
- m_SkipStack.push(L'>');
- }
- break;
- case FDE_XmlSyntaxState::SkipCData: {
- if (FXSYS_wcsnicmp(m_pStart, L"]]>", 3) == 0) {
- m_pStart += 3;
- syntaxParserResult = FDE_XmlSyntaxResult::CData;
- m_iTextDataLength = m_iDataLength;
- m_BlockBuffer.Reset();
- m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
- m_syntaxParserState = FDE_XmlSyntaxState::Text;
- } else {
- if (m_iIndexInBlock == m_iAllocStep) {
- m_pCurrentBlock =
- m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
- if (!m_pCurrentBlock)
- return FDE_XmlSyntaxResult::Error;
- }
- m_pCurrentBlock[m_iIndexInBlock++] = ch;
- m_iDataLength++;
- m_pStart++;
- }
- break;
- }
- case FDE_XmlSyntaxState::SkipDeclNode:
- if (m_SkipChar == L'\'' || m_SkipChar == L'\"') {
- m_pStart++;
- if (ch != m_SkipChar)
- break;
-
- m_SkipStack.pop();
- if (m_SkipStack.empty())
- m_syntaxParserState = FDE_XmlSyntaxState::Text;
- else
- m_SkipChar = m_SkipStack.top();
- } else {
- switch (ch) {
- case L'<':
- m_SkipChar = L'>';
- m_SkipStack.push(L'>');
- break;
- case L'[':
- m_SkipChar = L']';
- m_SkipStack.push(L']');
- break;
- case L'(':
- m_SkipChar = L')';
- m_SkipStack.push(L')');
- break;
- case L'\'':
- m_SkipChar = L'\'';
- m_SkipStack.push(L'\'');
- break;
- case L'\"':
- m_SkipChar = L'\"';
- m_SkipStack.push(L'\"');
- break;
- default:
- if (ch == m_SkipChar) {
- m_SkipStack.pop();
- if (m_SkipStack.empty()) {
- if (m_iDataLength >= 9) {
- CFX_WideString wsHeader;
- m_BlockBuffer.GetTextData(wsHeader, 0, 7);
- }
- m_iTextDataLength = m_iDataLength;
- m_BlockBuffer.Reset();
- m_pCurrentBlock =
- m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
- m_syntaxParserState = FDE_XmlSyntaxState::Text;
- } else {
- m_SkipChar = m_SkipStack.top();
- }
- }
- break;
- }
- if (!m_SkipStack.empty()) {
- if (m_iIndexInBlock == m_iAllocStep) {
- m_pCurrentBlock =
- m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
- if (!m_pCurrentBlock) {
- return FDE_XmlSyntaxResult::Error;
- }
- }
- m_pCurrentBlock[m_iIndexInBlock++] = ch;
- m_iDataLength++;
- }
- m_pStart++;
- }
- break;
- case FDE_XmlSyntaxState::SkipComment:
- if (FXSYS_wcsnicmp(m_pStart, L"-->", 3) == 0) {
- m_pStart += 2;
- m_syntaxParserState = FDE_XmlSyntaxState::Text;
- }
-
- m_pStart++;
- break;
- case FDE_XmlSyntaxState::TargetData:
- if (FDE_IsXMLWhiteSpace(ch)) {
- if (m_iDataLength < 1) {
- m_pStart++;
- break;
- } else if (m_wQuotationMark == 0) {
- m_iTextDataLength = m_iDataLength;
- m_wQuotationMark = 0;
- m_BlockBuffer.Reset();
- m_pCurrentBlock =
- m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
- m_pStart++;
- syntaxParserResult = FDE_XmlSyntaxResult::TargetData;
- break;
- }
- }
- if (ch == '?') {
- m_syntaxParserState = FDE_XmlSyntaxState::CloseInstruction;
- m_pStart++;
- } else if (ch == '\"') {
- if (m_wQuotationMark == 0) {
- m_wQuotationMark = ch;
- m_pStart++;
- } else if (ch == m_wQuotationMark) {
- m_iTextDataLength = m_iDataLength;
- m_wQuotationMark = 0;
- m_BlockBuffer.Reset();
- m_pCurrentBlock =
- m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
- m_pStart++;
- syntaxParserResult = FDE_XmlSyntaxResult::TargetData;
- } else {
- m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
- return m_syntaxParserResult;
- }
- } else {
- if (m_iIndexInBlock == m_iAllocStep) {
- m_pCurrentBlock =
- m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
- if (!m_pCurrentBlock) {
- return FDE_XmlSyntaxResult::Error;
- }
- }
- m_pCurrentBlock[m_iIndexInBlock++] = ch;
- m_iDataLength++;
- m_pStart++;
- }
- break;
- default:
- break;
- }
- if (syntaxParserResult != FDE_XmlSyntaxResult::None)
- return syntaxParserResult;
- }
- }
- return FDE_XmlSyntaxResult::Text;
-}
-
-CFDE_XMLSyntaxParser::~CFDE_XMLSyntaxParser() {
- m_pCurrentBlock = nullptr;
- FX_Free(m_pBuffer);
-}
-
-int32_t CFDE_XMLSyntaxParser::GetStatus() const {
- if (!m_pStream)
- return -1;
-
- int32_t iStreamLength = m_pStream->GetLength();
- if (iStreamLength < 1)
- return 100;
-
- if (m_syntaxParserResult == FDE_XmlSyntaxResult::Error)
- return -1;
-
- if (m_syntaxParserResult == FDE_XmlSyntaxResult::EndOfString)
- return 100;
- return m_iParsedBytes * 100 / iStreamLength;
-}
-
-static int32_t FX_GetUTF8EncodeLength(const wchar_t* pSrc, int32_t iSrcLen) {
- uint32_t unicode = 0;
- int32_t iDstNum = 0;
- while (iSrcLen-- > 0) {
- unicode = *pSrc++;
- int nbytes = 0;
- if ((uint32_t)unicode < 0x80) {
- nbytes = 1;
- } else if ((uint32_t)unicode < 0x800) {
- nbytes = 2;
- } else if ((uint32_t)unicode < 0x10000) {
- nbytes = 3;
- } else if ((uint32_t)unicode < 0x200000) {
- nbytes = 4;
- } else if ((uint32_t)unicode < 0x4000000) {
- nbytes = 5;
- } else {
- nbytes = 6;
- }
- iDstNum += nbytes;
- }
- return iDstNum;
-}
-
-FX_FILESIZE CFDE_XMLSyntaxParser::GetCurrentBinaryPos() const {
- if (!m_pStream)
- return 0;
-
- int32_t nSrcLen = m_pStart - m_pBuffer;
- int32_t nDstLen = FX_GetUTF8EncodeLength(m_pBuffer, nSrcLen);
- return m_iParsedBytes + nDstLen;
-}
-
-void CFDE_XMLSyntaxParser::ParseTextChar(wchar_t character) {
- if (m_iIndexInBlock == m_iAllocStep) {
- m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
- if (!m_pCurrentBlock) {
- return;
- }
- }
- m_pCurrentBlock[m_iIndexInBlock++] = character;
- m_iDataLength++;
- if (m_iEntityStart > -1 && character == L';') {
- CFX_WideString csEntity;
- m_BlockBuffer.GetTextData(csEntity, m_iEntityStart + 1,
- (m_iDataLength - 1) - m_iEntityStart - 1);
- int32_t iLen = csEntity.GetLength();
- if (iLen > 0) {
- if (csEntity[0] == L'#') {
- uint32_t ch = 0;
- wchar_t w;
- if (iLen > 1 && csEntity[1] == L'x') {
- for (int32_t i = 2; i < iLen; i++) {
- w = csEntity[i];
- if (w >= L'0' && w <= L'9') {
- ch = (ch << 4) + w - L'0';
- } else if (w >= L'A' && w <= L'F') {
- ch = (ch << 4) + w - 55;
- } else if (w >= L'a' && w <= L'f') {
- ch = (ch << 4) + w - 87;
- } else {
- break;
- }
- }
- } else {
- for (int32_t i = 1; i < iLen; i++) {
- w = csEntity[i];
- if (w < L'0' || w > L'9')
- break;
- ch = ch * 10 + w - L'0';
- }
- }
- if (ch > kMaxCharRange)
- ch = ' ';
-
- character = static_cast<wchar_t>(ch);
- if (character != 0) {
- m_BlockBuffer.SetTextChar(m_iEntityStart, character);
- m_iEntityStart++;
- }
- } else {
- if (csEntity.Compare(L"amp") == 0) {
- m_BlockBuffer.SetTextChar(m_iEntityStart, L'&');
- m_iEntityStart++;
- } else if (csEntity.Compare(L"lt") == 0) {
- m_BlockBuffer.SetTextChar(m_iEntityStart, L'<');
- m_iEntityStart++;
- } else if (csEntity.Compare(L"gt") == 0) {
- m_BlockBuffer.SetTextChar(m_iEntityStart, L'>');
- m_iEntityStart++;
- } else if (csEntity.Compare(L"apos") == 0) {
- m_BlockBuffer.SetTextChar(m_iEntityStart, L'\'');
- m_iEntityStart++;
- } else if (csEntity.Compare(L"quot") == 0) {
- m_BlockBuffer.SetTextChar(m_iEntityStart, L'\"');
- m_iEntityStart++;
- }
- }
- }
- m_BlockBuffer.DeleteTextChars(m_iDataLength - m_iEntityStart, false);
- m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
- m_iEntityStart = -1;
- } else {
- if (m_iEntityStart < 0 && character == L'&') {
- m_iEntityStart = m_iDataLength - 1;
- }
- }
- m_pStart++;
-}
diff --git a/xfa/fde/xml/fde_xml_imp.h b/xfa/fde/xml/fde_xml_imp.h
deleted file mode 100644
index bd88da06cc..0000000000
--- a/xfa/fde/xml/fde_xml_imp.h
+++ /dev/null
@@ -1,335 +0,0 @@
-// Copyright 2014 PDFium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
-
-#ifndef XFA_FDE_XML_FDE_XML_IMP_H_
-#define XFA_FDE_XML_FDE_XML_IMP_H_
-
-#include <memory>
-#include <stack>
-#include <vector>
-
-#include "core/fxcrt/fx_basic.h"
-#include "core/fxcrt/fx_system.h"
-#include "xfa/fde/xml/fde_xml.h"
-#include "xfa/fgas/crt/ifgas_stream.h"
-
-class CFDE_BlockBuffer;
-class CFDE_XMLInstruction;
-class CFDE_XMLElement;
-class CFDE_XMLText;
-class CFDE_XMLDoc;
-class CFDE_XMLDOMParser;
-class CFDE_XMLParser;
-class CFDE_XMLSyntaxParser;
-
-class CFDE_XMLNode {
- public:
- enum NodeItem {
- Root = 0,
- Parent,
- FirstSibling,
- PriorSibling,
- NextSibling,
- LastSibling,
- FirstNeighbor,
- PriorNeighbor,
- NextNeighbor,
- LastNeighbor,
- FirstChild,
- LastChild
- };
-
- CFDE_XMLNode();
- virtual ~CFDE_XMLNode();
-
- virtual FDE_XMLNODETYPE GetType() const;
- virtual CFDE_XMLNode* Clone(bool bRecursive);
-
- int32_t CountChildNodes() const;
- CFDE_XMLNode* GetChildNode(int32_t index) const;
- int32_t GetChildNodeIndex(CFDE_XMLNode* pNode) const;
- int32_t InsertChildNode(CFDE_XMLNode* pNode, int32_t index = -1);
- void RemoveChildNode(CFDE_XMLNode* pNode);
- void DeleteChildren();
- void CloneChildren(CFDE_XMLNode* pClone);
-
- CFDE_XMLNode* GetPath(const wchar_t* pPath,
- int32_t iLength = -1,
- bool bQualifiedName = true) const;
-
- int32_t GetNodeLevel() const;
- CFDE_XMLNode* GetNodeItem(CFDE_XMLNode::NodeItem eItem) const;
- bool InsertNodeItem(CFDE_XMLNode::NodeItem eItem, CFDE_XMLNode* pNode);
- CFDE_XMLNode* RemoveNodeItem(CFDE_XMLNode::NodeItem eItem);
-
- void SaveXMLNode(const CFX_RetainPtr<IFGAS_Stream>& pXMLStream);
-
- CFDE_XMLNode* m_pParent;
- CFDE_XMLNode* m_pChild;
- CFDE_XMLNode* m_pPrior;
- CFDE_XMLNode* m_pNext;
-};
-
-class CFDE_XMLInstruction : public CFDE_XMLNode {
- public:
- explicit CFDE_XMLInstruction(const CFX_WideString& wsTarget);
- ~CFDE_XMLInstruction() override;
-
- // CFDE_XMLNode
- FDE_XMLNODETYPE GetType() const override;
- CFDE_XMLNode* Clone(bool bRecursive) override;
-
- void GetTargetName(CFX_WideString& wsTarget) const { wsTarget = m_wsTarget; }
- int32_t CountAttributes() const;
- bool GetAttribute(int32_t index,
- CFX_WideString& wsAttriName,
- CFX_WideString& wsAttriValue) const;
- bool HasAttribute(const wchar_t* pwsAttriName) const;
- void GetString(const wchar_t* pwsAttriName,
- CFX_WideString& wsAttriValue,
- const wchar_t* pwsDefValue = nullptr) const;
- void SetString(const CFX_WideString& wsAttriName,
- const CFX_WideString& wsAttriValue);
- int32_t GetInteger(const wchar_t* pwsAttriName, int32_t iDefValue = 0) const;
- void SetInteger(const wchar_t* pwsAttriName, int32_t iAttriValue);
- float GetFloat(const wchar_t* pwsAttriName, float fDefValue = 0) const;
- void SetFloat(const wchar_t* pwsAttriName, float fAttriValue);
- void RemoveAttribute(const wchar_t* pwsAttriName);
- int32_t CountData() const;
- bool GetData(int32_t index, CFX_WideString& wsData) const;
- void AppendData(const CFX_WideString& wsData);
- void RemoveData(int32_t index);
-
- CFX_WideString m_wsTarget;
- std::vector<CFX_WideString> m_Attributes;
- std::vector<CFX_WideString> m_TargetData;
-};
-
-class CFDE_XMLElement : public CFDE_XMLNode {
- public:
- explicit CFDE_XMLElement(const CFX_WideString& wsTag);
- ~CFDE_XMLElement() override;
-
- // CFDE_XMLNode
- FDE_XMLNODETYPE GetType() const override;
- CFDE_XMLNode* Clone(bool bRecursive) override;
-
- void GetTagName(CFX_WideString& wsTag) const;
- void GetLocalTagName(CFX_WideString& wsTag) const;
-
- void GetNamespacePrefix(CFX_WideString& wsPrefix) const;
- void GetNamespaceURI(CFX_WideString& wsNamespace) const;
-
- int32_t CountAttributes() const;
- bool GetAttribute(int32_t index,
- CFX_WideString& wsAttriName,
- CFX_WideString& wsAttriValue) const;
- bool HasAttribute(const wchar_t* pwsAttriName) const;
- void RemoveAttribute(const wchar_t* pwsAttriName);
-
- void GetString(const wchar_t* pwsAttriName,
- CFX_WideString& wsAttriValue,
- const wchar_t* pwsDefValue = nullptr) const;
- void SetString(const CFX_WideString& wsAttriName,
- const CFX_WideString& wsAttriValue);
-
- int32_t GetInteger(const wchar_t* pwsAttriName, int32_t iDefValue = 0) const;
- void SetInteger(const wchar_t* pwsAttriName, int32_t iAttriValue);
-
- float GetFloat(const wchar_t* pwsAttriName, float fDefValue = 0) const;
- void SetFloat(const wchar_t* pwsAttriName, float fAttriValue);
-
- void GetTextData(CFX_WideString& wsText) const;
- void SetTextData(const CFX_WideString& wsText);
-
- CFX_WideString m_wsTag;
- std::vector<CFX_WideString> m_Attributes;
-};
-
-class CFDE_XMLText : public CFDE_XMLNode {
- public:
- explicit CFDE_XMLText(const CFX_WideString& wsText);
- ~CFDE_XMLText() override;
-
- // CFDE_XMLNode
- FDE_XMLNODETYPE GetType() const override;
- CFDE_XMLNode* Clone(bool bRecursive) override;
-
- void GetText(CFX_WideString& wsText) const { wsText = m_wsText; }
- void SetText(const CFX_WideString& wsText) { m_wsText = wsText; }
-
- CFX_WideString m_wsText;
-};
-
-class CFDE_XMLDeclaration : public CFDE_XMLNode {
- public:
- CFDE_XMLDeclaration() {}
- ~CFDE_XMLDeclaration() override {}
-};
-
-class CFDE_XMLCharData : public CFDE_XMLDeclaration {
- public:
- explicit CFDE_XMLCharData(const CFX_WideString& wsCData);
- ~CFDE_XMLCharData() override;
-
- FDE_XMLNODETYPE GetType() const override;
- CFDE_XMLNode* Clone(bool bRecursive) override;
-
- void GetCharData(CFX_WideString& wsCharData) const {
- wsCharData = m_wsCharData;
- }
- void SetCharData(const CFX_WideString& wsCData) { m_wsCharData = wsCData; }
-
- CFX_WideString m_wsCharData;
-};
-
-class CFDE_XMLDoc {
- public:
- CFDE_XMLDoc();
- ~CFDE_XMLDoc();
-
- bool LoadXML(std::unique_ptr<CFDE_XMLParser> pXMLParser);
- int32_t DoLoad(IFX_Pause* pPause = nullptr);
- void CloseXML();
- CFDE_XMLNode* GetRoot() const { return m_pRoot.get(); }
- void SaveXMLNode(const CFX_RetainPtr<IFGAS_Stream>& pXMLStream,
- CFDE_XMLNode* pNode);
-
- private:
- int32_t m_iStatus;
- std::unique_ptr<CFDE_XMLNode> m_pRoot;
- std::unique_ptr<CFDE_XMLParser> m_pXMLParser;
- CFX_RetainPtr<IFGAS_Stream> m_pStream;
-};
-
-class CFDE_BlockBuffer {
- public:
- explicit CFDE_BlockBuffer(int32_t iAllocStep = 1024 * 1024);
- ~CFDE_BlockBuffer();
-
- bool InitBuffer(int32_t iBufferSize = 1024 * 1024);
- bool IsInitialized() { return m_iBufferSize / m_iAllocStep >= 1; }
- wchar_t* GetAvailableBlock(int32_t& iIndexInBlock);
- inline int32_t GetAllocStep() const { return m_iAllocStep; }
- inline int32_t& GetDataLengthRef() { return m_iDataLength; }
- inline void Reset(bool bReserveData = true) {
- if (!bReserveData) {
- m_iStartPosition = 0;
- }
- m_iDataLength = 0;
- }
- void SetTextChar(int32_t iIndex, wchar_t ch);
- int32_t DeleteTextChars(int32_t iCount, bool bDirection = true);
- void GetTextData(CFX_WideString& wsTextData,
- int32_t iStart = 0,
- int32_t iLength = -1) const;
-
- protected:
- inline void TextDataIndex2BufIndex(const int32_t iIndex,
- int32_t& iBlockIndex,
- int32_t& iInnerIndex) const;
- void ClearBuffer();
-
- std::vector<std::unique_ptr<wchar_t, FxFreeDeleter>> m_BlockArray;
- int32_t m_iDataLength;
- int32_t m_iBufferSize;
- int32_t m_iAllocStep;
- int32_t m_iStartPosition;
-};
-
-class CFDE_XMLSyntaxParser {
- public:
- CFDE_XMLSyntaxParser();
- ~CFDE_XMLSyntaxParser();
-
- void Init(const CFX_RetainPtr<IFGAS_Stream>& pStream,
- int32_t iXMLPlaneSize,
- int32_t iTextDataSize = 256);
-
- FDE_XmlSyntaxResult DoSyntaxParse();
-
- int32_t GetStatus() const;
- int32_t GetCurrentPos() const {
- return m_iParsedChars + (m_pStart - m_pBuffer);
- }
- FX_FILESIZE GetCurrentBinaryPos() const;
- int32_t GetCurrentNodeNumber() const { return m_iCurrentNodeNum; }
- int32_t GetLastNodeNumber() const { return m_iLastNodeNum; }
-
- void GetTargetName(CFX_WideString& wsTarget) const {
- m_BlockBuffer.GetTextData(wsTarget, 0, m_iTextDataLength);
- }
- void GetTagName(CFX_WideString& wsTag) const {
- m_BlockBuffer.GetTextData(wsTag, 0, m_iTextDataLength);
- }
- void GetAttributeName(CFX_WideString& wsAttriName) const {
- m_BlockBuffer.GetTextData(wsAttriName, 0, m_iTextDataLength);
- }
- void GetAttributeValue(CFX_WideString& wsAttriValue) const {
- m_BlockBuffer.GetTextData(wsAttriValue, 0, m_iTextDataLength);
- }
- void GetTextData(CFX_WideString& wsText) const {
- m_BlockBuffer.GetTextData(wsText, 0, m_iTextDataLength);
- }
- void GetTargetData(CFX_WideString& wsData) const {
- m_BlockBuffer.GetTextData(wsData, 0, m_iTextDataLength);
- }
-
- protected:
- enum class FDE_XmlSyntaxState {
- Text,
- Node,
- Target,
- Tag,
- AttriName,
- AttriEqualSign,
- AttriQuotation,
- AttriValue,
- Entity,
- EntityDecimal,
- EntityHex,
- CloseInstruction,
- BreakElement,
- CloseElement,
- SkipDeclNode,
- DeclCharData,
- SkipComment,
- SkipCommentOrDecl,
- SkipCData,
- TargetData
- };
-
- void ParseTextChar(wchar_t ch);
-
- CFX_RetainPtr<IFGAS_Stream> m_pStream;
- int32_t m_iXMLPlaneSize;
- int32_t m_iCurrentPos;
- int32_t m_iCurrentNodeNum;
- int32_t m_iLastNodeNum;
- int32_t m_iParsedChars;
- int32_t m_iParsedBytes;
- wchar_t* m_pBuffer;
- int32_t m_iBufferChars;
- bool m_bEOS;
- wchar_t* m_pStart;
- wchar_t* m_pEnd;
- FDE_XMLNODE m_CurNode;
- std::stack<FDE_XMLNODE> m_XMLNodeStack;
- CFDE_BlockBuffer m_BlockBuffer;
- int32_t m_iAllocStep;
- int32_t& m_iDataLength;
- wchar_t* m_pCurrentBlock;
- int32_t m_iIndexInBlock;
- int32_t m_iTextDataLength;
- FDE_XmlSyntaxResult m_syntaxParserResult;
- FDE_XmlSyntaxState m_syntaxParserState;
- wchar_t m_wQuotationMark;
- int32_t m_iEntityStart;
- std::stack<wchar_t> m_SkipStack;
- wchar_t m_SkipChar;
-};
-
-#endif // XFA_FDE_XML_FDE_XML_IMP_H_
diff --git a/xfa/fxfa/app/cxfa_textlayout.cpp b/xfa/fxfa/app/cxfa_textlayout.cpp
index 2470459b31..718f90b9ea 100644
--- a/xfa/fxfa/app/cxfa_textlayout.cpp
+++ b/xfa/fxfa/app/cxfa_textlayout.cpp
@@ -17,7 +17,9 @@
#include "xfa/fde/cfde_renderdevice.h"
#include "xfa/fde/css/cfde_csscomputedstyle.h"
#include "xfa/fde/css/cfde_cssstyleselector.h"
-#include "xfa/fde/xml/fde_xml_imp.h"
+#include "xfa/fde/xml/cfde_xmlelement.h"
+#include "xfa/fde/xml/cfde_xmlnode.h"
+#include "xfa/fde/xml/cfde_xmltext.h"
#include "xfa/fxfa/app/cxfa_linkuserdata.h"
#include "xfa/fxfa/app/cxfa_loadercontext.h"
#include "xfa/fxfa/app/cxfa_pieceline.h"
diff --git a/xfa/fxfa/app/cxfa_textparser.cpp b/xfa/fxfa/app/cxfa_textparser.cpp
index 65155f989c..b4032fa5c7 100644
--- a/xfa/fxfa/app/cxfa_textparser.cpp
+++ b/xfa/fxfa/app/cxfa_textparser.cpp
@@ -15,6 +15,8 @@
#include "xfa/fde/css/cfde_cssstyleselector.h"
#include "xfa/fde/css/cfde_cssstylesheet.h"
#include "xfa/fde/css/fde_css.h"
+#include "xfa/fde/xml/cfde_xmlelement.h"
+#include "xfa/fde/xml/cfde_xmlnode.h"
#include "xfa/fgas/crt/fgas_codepage.h"
#include "xfa/fgas/font/cfgas_fontmgr.h"
#include "xfa/fxfa/app/cxfa_csstagprovider.h"
diff --git a/xfa/fxfa/app/xfa_ffwidgetacc.cpp b/xfa/fxfa/app/xfa_ffwidgetacc.cpp
index b6d4decc42..760def2759 100644
--- a/xfa/fxfa/app/xfa_ffwidgetacc.cpp
+++ b/xfa/fxfa/app/xfa_ffwidgetacc.cpp
@@ -14,7 +14,8 @@
#include "third_party/base/ptr_util.h"
#include "third_party/base/stl_util.h"
#include "xfa/fde/cfde_textout.h"
-#include "xfa/fde/xml/fde_xml_imp.h"
+#include "xfa/fde/xml/cfde_xmlelement.h"
+#include "xfa/fde/xml/cfde_xmlnode.h"
#include "xfa/fxfa/app/xfa_ffcheckbutton.h"
#include "xfa/fxfa/app/xfa_ffchoicelist.h"
#include "xfa/fxfa/app/xfa_fffield.h"
diff --git a/xfa/fxfa/cxfa_ffdoc.cpp b/xfa/fxfa/cxfa_ffdoc.cpp
index 0deb864811..f48ae57fb2 100644
--- a/xfa/fxfa/cxfa_ffdoc.cpp
+++ b/xfa/fxfa/cxfa_ffdoc.cpp
@@ -18,7 +18,8 @@
#include "core/fxcrt/fx_ext.h"
#include "core/fxcrt/fx_memory.h"
#include "third_party/base/ptr_util.h"
-#include "xfa/fde/xml/fde_xml_imp.h"
+#include "xfa/fde/xml/cfde_xmlelement.h"
+#include "xfa/fde/xml/cfde_xmlnode.h"
#include "xfa/fwl/cfwl_notedriver.h"
#include "xfa/fxfa/app/xfa_ffnotify.h"
#include "xfa/fxfa/cxfa_ffapp.h"
diff --git a/xfa/fxfa/cxfa_widgetacc.cpp b/xfa/fxfa/cxfa_widgetacc.cpp
index 9fee6bd238..be7556ee24 100644
--- a/xfa/fxfa/cxfa_widgetacc.cpp
+++ b/xfa/fxfa/cxfa_widgetacc.cpp
@@ -11,7 +11,8 @@
#include "third_party/base/stl_util.h"
#include "xfa/fde/cfde_textout.h"
-#include "xfa/fde/xml/fde_xml_imp.h"
+#include "xfa/fde/xml/cfde_xmlelement.h"
+#include "xfa/fde/xml/cfde_xmlnode.h"
#include "xfa/fxfa/app/cxfa_textlayout.h"
#include "xfa/fxfa/app/xfa_ffwidgetacc.h"
#include "xfa/fxfa/cxfa_ffapp.h"
diff --git a/xfa/fxfa/parser/cxfa_dataexporter.cpp b/xfa/fxfa/parser/cxfa_dataexporter.cpp
index f9553c413f..fda29c2201 100644
--- a/xfa/fxfa/parser/cxfa_dataexporter.cpp
+++ b/xfa/fxfa/parser/cxfa_dataexporter.cpp
@@ -10,7 +10,9 @@
#include "core/fxcrt/fx_basic.h"
#include "third_party/base/stl_util.h"
-#include "xfa/fde/xml/fde_xml_imp.h"
+#include "xfa/fde/xml/cfde_xmldoc.h"
+#include "xfa/fde/xml/cfde_xmlelement.h"
+#include "xfa/fde/xml/cfde_xmlnode.h"
#include "xfa/fgas/crt/fgas_codepage.h"
#include "xfa/fxfa/parser/cxfa_document.h"
#include "xfa/fxfa/parser/cxfa_node.h"
@@ -46,12 +48,33 @@ CFX_WideString ExportEncodeAttribute(const CFX_WideString& str) {
return textBuf.MakeString();
}
+const uint16_t g_XMLValidCharRange[][2] = {{0x09, 0x09},
+ {0x0A, 0x0A},
+ {0x0D, 0x0D},
+ {0x20, 0xD7FF},
+ {0xE000, 0xFFFD}};
+bool IsXMLValidChar(wchar_t ch) {
+ int32_t iStart = 0;
+ int32_t iEnd = FX_ArraySize(g_XMLValidCharRange) - 1;
+ while (iStart <= iEnd) {
+ int32_t iMid = (iStart + iEnd) / 2;
+ if (ch < g_XMLValidCharRange[iMid][0]) {
+ iEnd = iMid - 1;
+ } else if (ch > g_XMLValidCharRange[iMid][1]) {
+ iStart = iMid + 1;
+ } else {
+ return true;
+ }
+ }
+ return false;
+}
+
CFX_WideString ExportEncodeContent(const CFX_WideStringC& str) {
CFX_WideTextBuf textBuf;
int32_t iLen = str.GetLength();
for (int32_t i = 0; i < iLen; i++) {
wchar_t ch = str.GetAt(i);
- if (!FDE_IsXMLValidChar(ch))
+ if (!IsXMLValidChar(ch))
continue;
if (ch == '&') {
diff --git a/xfa/fxfa/parser/cxfa_dataimporter.cpp b/xfa/fxfa/parser/cxfa_dataimporter.cpp
index 3ba304d550..200841f5cb 100644
--- a/xfa/fxfa/parser/cxfa_dataimporter.cpp
+++ b/xfa/fxfa/parser/cxfa_dataimporter.cpp
@@ -10,7 +10,7 @@
#include "core/fxcrt/fx_stream.h"
#include "third_party/base/ptr_util.h"
-#include "xfa/fde/xml/fde_xml_imp.h"
+#include "xfa/fde/xml/cfde_xmlnode.h"
#include "xfa/fxfa/fxfa.h"
#include "xfa/fxfa/fxfa_basic.h"
#include "xfa/fxfa/parser/cxfa_document.h"
diff --git a/xfa/fxfa/parser/cxfa_document_parser.cpp b/xfa/fxfa/parser/cxfa_document_parser.cpp
index 90be568785..b855513f52 100644
--- a/xfa/fxfa/parser/cxfa_document_parser.cpp
+++ b/xfa/fxfa/parser/cxfa_document_parser.cpp
@@ -7,6 +7,7 @@
#include "xfa/fxfa/parser/cxfa_document_parser.h"
#include "third_party/base/ptr_util.h"
+#include "xfa/fde/xml/cfde_xmldoc.h"
#include "xfa/fxfa/fxfa.h"
#include "xfa/fxfa/parser/cxfa_document.h"
diff --git a/xfa/fxfa/parser/cxfa_node.cpp b/xfa/fxfa/parser/cxfa_node.cpp
index 2828c98343..61107bf156 100644
--- a/xfa/fxfa/parser/cxfa_node.cpp
+++ b/xfa/fxfa/parser/cxfa_node.cpp
@@ -17,7 +17,9 @@
#include "fxjs/cfxjse_value.h"
#include "third_party/base/ptr_util.h"
#include "third_party/base/stl_util.h"
-#include "xfa/fde/xml/fde_xml_imp.h"
+#include "xfa/fde/xml/cfde_xmlelement.h"
+#include "xfa/fde/xml/cfde_xmlnode.h"
+#include "xfa/fde/xml/cfde_xmltext.h"
#include "xfa/fgas/crt/fgas_codepage.h"
#include "xfa/fxfa/app/xfa_ffnotify.h"
#include "xfa/fxfa/cxfa_eventparam.h"
diff --git a/xfa/fxfa/parser/cxfa_simple_parser.cpp b/xfa/fxfa/parser/cxfa_simple_parser.cpp
index 9204660388..06e2e5d704 100644
--- a/xfa/fxfa/parser/cxfa_simple_parser.cpp
+++ b/xfa/fxfa/parser/cxfa_simple_parser.cpp
@@ -11,7 +11,13 @@
#include "core/fxcrt/cfx_checksumcontext.h"
#include "core/fxcrt/fx_ext.h"
#include "third_party/base/ptr_util.h"
-#include "xfa/fde/xml/cfde_xml_parser.h"
+#include "xfa/fde/xml/cfde_xmlchardata.h"
+#include "xfa/fde/xml/cfde_xmldoc.h"
+#include "xfa/fde/xml/cfde_xmlelement.h"
+#include "xfa/fde/xml/cfde_xmlinstruction.h"
+#include "xfa/fde/xml/cfde_xmlnode.h"
+#include "xfa/fde/xml/cfde_xmlparser.h"
+#include "xfa/fde/xml/cfde_xmltext.h"
#include "xfa/fgas/crt/fgas_codepage.h"
#include "xfa/fxfa/fxfa.h"
#include "xfa/fxfa/parser/cxfa_document.h"
diff --git a/xfa/fxfa/parser/cxfa_simple_parser.h b/xfa/fxfa/parser/cxfa_simple_parser.h
index 350104161c..a9bcec2139 100644
--- a/xfa/fxfa/parser/cxfa_simple_parser.h
+++ b/xfa/fxfa/parser/cxfa_simple_parser.h
@@ -9,11 +9,13 @@
#include <memory>
-#include "xfa/fde/xml/fde_xml_imp.h"
#include "xfa/fxfa/fxfa_basic.h"
class CXFA_Document;
class CXFA_Node;
+class CFDE_XMLDoc;
+class CFDE_XMLInstruction;
+class CFDE_XMLNode;
class CFDE_XMLParser;
class IFX_SeekableReadStream;
class IFX_Pause;
diff --git a/xfa/fxfa/parser/xfa_document_datamerger_imp.cpp b/xfa/fxfa/parser/xfa_document_datamerger_imp.cpp
index e54bf4a05f..eb16628a2e 100644
--- a/xfa/fxfa/parser/xfa_document_datamerger_imp.cpp
+++ b/xfa/fxfa/parser/xfa_document_datamerger_imp.cpp
@@ -11,7 +11,8 @@
#include "core/fxcrt/fx_ext.h"
#include "third_party/base/stl_util.h"
-#include "xfa/fde/xml/fde_xml_imp.h"
+#include "xfa/fde/xml/cfde_xmlelement.h"
+#include "xfa/fde/xml/cfde_xmlnode.h"
#include "xfa/fxfa/parser/cxfa_document.h"
#include "xfa/fxfa/parser/cxfa_layoutprocessor.h"
#include "xfa/fxfa/parser/cxfa_localemgr.h"
diff --git a/xfa/fxfa/parser/xfa_utils.cpp b/xfa/fxfa/parser/xfa_utils.cpp
index 1323232f9f..df180f2183 100644
--- a/xfa/fxfa/parser/xfa_utils.cpp
+++ b/xfa/fxfa/parser/xfa_utils.cpp
@@ -7,7 +7,10 @@
#include "xfa/fxfa/parser/xfa_utils.h"
#include "core/fxcrt/fx_ext.h"
-#include "xfa/fde/xml/fde_xml_imp.h"
+#include "xfa/fde/xml/cfde_xmlchardata.h"
+#include "xfa/fde/xml/cfde_xmlelement.h"
+#include "xfa/fde/xml/cfde_xmlnode.h"
+#include "xfa/fde/xml/cfde_xmltext.h"
#include "xfa/fxfa/parser/cxfa_document.h"
#include "xfa/fxfa/parser/cxfa_localemgr.h"
#include "xfa/fxfa/parser/cxfa_localevalue.h"
diff --git a/xfa/fxfa/parser/xfa_utils.h b/xfa/fxfa/parser/xfa_utils.h
index b428a89b4e..d4461a39a7 100644
--- a/xfa/fxfa/parser/xfa_utils.h
+++ b/xfa/fxfa/parser/xfa_utils.h
@@ -7,7 +7,6 @@
#ifndef XFA_FXFA_PARSER_XFA_UTILS_H_
#define XFA_FXFA_PARSER_XFA_UTILS_H_
-#include "xfa/fde/xml/fde_xml.h"
#include "xfa/fgas/crt/ifgas_stream.h"
#include "xfa/fxfa/fxfa_basic.h"