summaryrefslogtreecommitdiff
path: root/core/fxcrt/xml/cxml_parser.cpp
diff options
context:
space:
mode:
authorDan Sinclair <dsinclair@chromium.org>2017-03-30 14:33:28 -0400
committerChromium commit bot <commit-bot@chromium.org>2017-03-30 20:23:52 +0000
commit908c848202ef137e98d96f82a4eadfae551403b7 (patch)
treec1b682c4fc8b05c8871cef22d5bdb3af83954691 /core/fxcrt/xml/cxml_parser.cpp
parenta69665d11d4eaf5f2d17a46e6be7b43f6c0dbfbe (diff)
downloadpdfium-908c848202ef137e98d96f82a4eadfae551403b7.tar.xz
Move core/fxcrt XML files to core/fxcrt/xml
This CL moves the other XML files contained in core/fxcrt into the core/fxcrt/xml directory to contain all the fxcrt XML files in one place. Change-Id: I9faefb1f311bf167b75dfbb7b9b52f25515e3c31 Reviewed-on: https://pdfium-review.googlesource.com/3378 Reviewed-by: Tom Sepez <tsepez@chromium.org> Commit-Queue: dsinclair <dsinclair@chromium.org>
Diffstat (limited to 'core/fxcrt/xml/cxml_parser.cpp')
-rw-r--r--core/fxcrt/xml/cxml_parser.cpp680
1 files changed, 680 insertions, 0 deletions
diff --git a/core/fxcrt/xml/cxml_parser.cpp b/core/fxcrt/xml/cxml_parser.cpp
new file mode 100644
index 0000000000..be48b7adc2
--- /dev/null
+++ b/core/fxcrt/xml/cxml_parser.cpp
@@ -0,0 +1,680 @@
+// Copyright 2014 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
+
+#include <algorithm>
+#include <memory>
+#include <vector>
+
+#include "core/fxcrt/fx_ext.h"
+#include "core/fxcrt/xml/cxml_content.h"
+#include "core/fxcrt/xml/cxml_element.h"
+#include "core/fxcrt/xml/cxml_parser.h"
+#include "third_party/base/ptr_util.h"
+#include "third_party/base/stl_util.h"
+
+namespace {
+
+#define FXCRTM_XML_CHARTYPE_Normal 0x00
+#define FXCRTM_XML_CHARTYPE_SpaceChar 0x01
+#define FXCRTM_XML_CHARTYPE_Letter 0x02
+#define FXCRTM_XML_CHARTYPE_Digital 0x04
+#define FXCRTM_XML_CHARTYPE_NameIntro 0x08
+#define FXCRTM_XML_CHARTYPE_NameChar 0x10
+#define FXCRTM_XML_CHARTYPE_HexDigital 0x20
+#define FXCRTM_XML_CHARTYPE_HexLowerLetter 0x40
+#define FXCRTM_XML_CHARTYPE_HexUpperLetter 0x60
+#define FXCRTM_XML_CHARTYPE_HexChar 0x60
+
+const uint8_t g_FXCRT_XML_ByteTypes[256] = {
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x10, 0x00,
+ 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x08, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x7A, 0x7A, 0x7A, 0x7A, 0x7A, 0x7A, 0x1A,
+ 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
+ 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x00, 0x00, 0x00, 0x00, 0x18,
+ 0x00, 0x5A, 0x5A, 0x5A, 0x5A, 0x5A, 0x5A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
+ 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
+ 0x1A, 0x1A, 0x1A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1A, 0x1A, 0x1A, 0x1A,
+ 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
+ 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
+ 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
+ 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
+ 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
+ 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
+ 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
+ 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
+ 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
+ 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
+ 0x1A, 0x1A, 0x01, 0x01,
+};
+
+bool g_FXCRT_XML_IsWhiteSpace(uint8_t ch) {
+ return !!(g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_SpaceChar);
+}
+
+bool g_FXCRT_XML_IsDigital(uint8_t ch) {
+ return !!(g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_Digital);
+}
+
+bool g_FXCRT_XML_IsNameIntro(uint8_t ch) {
+ return !!(g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_NameIntro);
+}
+
+bool g_FXCRT_XML_IsNameChar(uint8_t ch) {
+ return !!(g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_NameChar);
+}
+
+class CXML_DataBufAcc : public IFX_BufferedReadStream {
+ public:
+ template <typename T, typename... Args>
+ friend CFX_RetainPtr<T> pdfium::MakeRetain(Args&&... args);
+
+ // IFX_BufferedReadStream
+ bool IsEOF() override;
+ FX_FILESIZE GetPosition() override;
+ size_t ReadBlock(void* buffer, size_t size) override;
+ bool ReadNextBlock(bool bRestart) override;
+ const uint8_t* GetBlockBuffer() override;
+ size_t GetBlockSize() override;
+ FX_FILESIZE GetBlockOffset() override;
+
+ private:
+ CXML_DataBufAcc(const uint8_t* pBuffer, size_t size);
+ ~CXML_DataBufAcc() override;
+
+ const uint8_t* m_pBuffer;
+ size_t m_dwSize;
+ size_t m_dwCurPos;
+};
+
+CXML_DataBufAcc::CXML_DataBufAcc(const uint8_t* pBuffer, size_t size)
+ : m_pBuffer(pBuffer), m_dwSize(size), m_dwCurPos(0) {}
+
+CXML_DataBufAcc::~CXML_DataBufAcc() {}
+
+bool CXML_DataBufAcc::IsEOF() {
+ return m_dwCurPos >= m_dwSize;
+}
+
+FX_FILESIZE CXML_DataBufAcc::GetPosition() {
+ return static_cast<FX_FILESIZE>(m_dwCurPos);
+}
+
+size_t CXML_DataBufAcc::ReadBlock(void* buffer, size_t size) {
+ return 0;
+}
+
+bool CXML_DataBufAcc::ReadNextBlock(bool bRestart) {
+ if (bRestart)
+ m_dwCurPos = 0;
+
+ if (m_dwCurPos < m_dwSize) {
+ m_dwCurPos = m_dwSize;
+ return true;
+ }
+ return false;
+}
+
+const uint8_t* CXML_DataBufAcc::GetBlockBuffer() {
+ return m_pBuffer;
+}
+
+size_t CXML_DataBufAcc::GetBlockSize() {
+ return m_dwSize;
+}
+
+FX_FILESIZE CXML_DataBufAcc::GetBlockOffset() {
+ return 0;
+}
+
+class CXML_DataStmAcc : public IFX_BufferedReadStream {
+ public:
+ template <typename T, typename... Args>
+ friend CFX_RetainPtr<T> pdfium::MakeRetain(Args&&... args);
+
+ // IFX_BufferedReadStream
+ bool IsEOF() override;
+ FX_FILESIZE GetPosition() override;
+ size_t ReadBlock(void* buffer, size_t size) override;
+ bool ReadNextBlock(bool bRestart) override;
+ const uint8_t* GetBlockBuffer() override;
+ size_t GetBlockSize() override;
+ FX_FILESIZE GetBlockOffset() override;
+
+ private:
+ explicit CXML_DataStmAcc(
+ const CFX_RetainPtr<IFX_SeekableReadStream>& pFileRead);
+ ~CXML_DataStmAcc() override;
+
+ CFX_RetainPtr<IFX_SeekableReadStream> m_pFileRead;
+ uint8_t* m_pBuffer;
+ FX_FILESIZE m_nStart;
+ size_t m_dwSize;
+};
+
+CXML_DataStmAcc::CXML_DataStmAcc(
+ const CFX_RetainPtr<IFX_SeekableReadStream>& pFileRead)
+ : m_pFileRead(pFileRead), m_pBuffer(nullptr), m_nStart(0), m_dwSize(0) {
+ ASSERT(m_pFileRead);
+}
+
+CXML_DataStmAcc::~CXML_DataStmAcc() {
+ FX_Free(m_pBuffer);
+}
+
+bool CXML_DataStmAcc::IsEOF() {
+ return m_nStart + static_cast<FX_FILESIZE>(m_dwSize) >=
+ m_pFileRead->GetSize();
+}
+
+FX_FILESIZE CXML_DataStmAcc::GetPosition() {
+ return m_nStart + static_cast<FX_FILESIZE>(m_dwSize);
+}
+
+size_t CXML_DataStmAcc::ReadBlock(void* buffer, size_t size) {
+ return 0;
+}
+
+bool CXML_DataStmAcc::ReadNextBlock(bool bRestart) {
+ if (bRestart)
+ m_nStart = 0;
+
+ FX_FILESIZE nLength = m_pFileRead->GetSize();
+ m_nStart += static_cast<FX_FILESIZE>(m_dwSize);
+ if (m_nStart >= nLength)
+ return false;
+
+ static const FX_FILESIZE FX_XMLDATASTREAM_BufferSize = 32 * 1024;
+ m_dwSize = static_cast<size_t>(
+ std::min(FX_XMLDATASTREAM_BufferSize, nLength - m_nStart));
+ if (!m_pBuffer)
+ m_pBuffer = FX_Alloc(uint8_t, m_dwSize);
+
+ return m_pFileRead->ReadBlock(m_pBuffer, m_nStart, m_dwSize);
+}
+
+const uint8_t* CXML_DataStmAcc::GetBlockBuffer() {
+ return (const uint8_t*)m_pBuffer;
+}
+
+size_t CXML_DataStmAcc::GetBlockSize() {
+ return m_dwSize;
+}
+
+FX_FILESIZE CXML_DataStmAcc::GetBlockOffset() {
+ return m_nStart;
+}
+
+} // namespace
+
+void FX_XML_SplitQualifiedName(const CFX_ByteStringC& bsFullName,
+ CFX_ByteStringC& bsSpace,
+ CFX_ByteStringC& bsName) {
+ if (bsFullName.IsEmpty())
+ return;
+
+ FX_STRSIZE iStart = bsFullName.Find(':');
+ if (iStart == -1) {
+ bsName = bsFullName;
+ } else {
+ bsSpace = bsFullName.Mid(0, iStart);
+ bsName = bsFullName.Mid(iStart + 1);
+ }
+}
+
+CXML_Parser::CXML_Parser()
+ : m_nOffset(0),
+ m_pBuffer(nullptr),
+ m_dwBufferSize(0),
+ m_nBufferOffset(0),
+ m_dwIndex(0) {}
+
+CXML_Parser::~CXML_Parser() {}
+
+bool CXML_Parser::Init(const uint8_t* pBuffer, size_t size) {
+ m_pDataAcc = pdfium::MakeRetain<CXML_DataBufAcc>(pBuffer, size);
+ m_nOffset = 0;
+ return ReadNextBlock();
+}
+
+bool CXML_Parser::ReadNextBlock() {
+ if (!m_pDataAcc->ReadNextBlock())
+ return false;
+
+ m_pBuffer = m_pDataAcc->GetBlockBuffer();
+ m_dwBufferSize = m_pDataAcc->GetBlockSize();
+ m_nBufferOffset = m_pDataAcc->GetBlockOffset();
+ m_dwIndex = 0;
+ return m_dwBufferSize > 0;
+}
+
+bool CXML_Parser::IsEOF() {
+ return m_pDataAcc->IsEOF() && m_dwIndex >= m_dwBufferSize;
+}
+
+void CXML_Parser::SkipWhiteSpaces() {
+ m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex);
+ if (IsEOF())
+ return;
+
+ do {
+ while (m_dwIndex < m_dwBufferSize &&
+ g_FXCRT_XML_IsWhiteSpace(m_pBuffer[m_dwIndex])) {
+ m_dwIndex++;
+ }
+ m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex);
+ if (m_dwIndex < m_dwBufferSize || IsEOF())
+ break;
+ } while (ReadNextBlock());
+}
+
+void CXML_Parser::GetName(CFX_ByteString* space, CFX_ByteString* name) {
+ m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex);
+ if (IsEOF())
+ return;
+
+ CFX_ByteTextBuf buf;
+ uint8_t ch;
+ do {
+ while (m_dwIndex < m_dwBufferSize) {
+ ch = m_pBuffer[m_dwIndex];
+ if (ch == ':') {
+ *space = buf.AsStringC();
+ buf.Clear();
+ } else if (g_FXCRT_XML_IsNameChar(ch)) {
+ buf.AppendChar(ch);
+ } else {
+ break;
+ }
+ m_dwIndex++;
+ }
+ m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex);
+ if (m_dwIndex < m_dwBufferSize || IsEOF())
+ break;
+ } while (ReadNextBlock());
+ *name = buf.AsStringC();
+}
+
+void CXML_Parser::SkipLiterals(const CFX_ByteStringC& str) {
+ m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex);
+ if (IsEOF()) {
+ return;
+ }
+ int32_t i = 0, iLen = str.GetLength();
+ do {
+ while (m_dwIndex < m_dwBufferSize) {
+ if (str.GetAt(i) != m_pBuffer[m_dwIndex++]) {
+ i = 0;
+ continue;
+ }
+ i++;
+ if (i == iLen)
+ break;
+ }
+ m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex);
+ if (i == iLen)
+ return;
+
+ if (m_dwIndex < m_dwBufferSize || IsEOF())
+ break;
+ } while (ReadNextBlock());
+ while (!m_pDataAcc->IsEOF()) {
+ ReadNextBlock();
+ m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwBufferSize);
+ }
+ m_dwIndex = m_dwBufferSize;
+}
+
+uint32_t CXML_Parser::GetCharRef() {
+ m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex);
+ if (IsEOF())
+ return 0;
+
+ uint8_t ch;
+ int32_t iState = 0;
+ CFX_ByteTextBuf buf;
+ uint32_t code = 0;
+ do {
+ while (m_dwIndex < m_dwBufferSize) {
+ ch = m_pBuffer[m_dwIndex];
+ switch (iState) {
+ case 0:
+ if (ch == '#') {
+ m_dwIndex++;
+ iState = 2;
+ break;
+ }
+ iState = 1;
+ case 1:
+ m_dwIndex++;
+ if (ch == ';') {
+ CFX_ByteStringC ref = buf.AsStringC();
+ if (ref == "gt")
+ code = '>';
+ else if (ref == "lt")
+ code = '<';
+ else if (ref == "amp")
+ code = '&';
+ else if (ref == "apos")
+ code = '\'';
+ else if (ref == "quot")
+ code = '"';
+ iState = 10;
+ break;
+ }
+ buf.AppendByte(ch);
+ break;
+ case 2:
+ if (ch == 'x') {
+ m_dwIndex++;
+ iState = 4;
+ break;
+ }
+ iState = 3;
+ case 3:
+ m_dwIndex++;
+ if (ch == ';') {
+ iState = 10;
+ break;
+ }
+ if (g_FXCRT_XML_IsDigital(ch))
+ code = code * 10 + FXSYS_toDecimalDigit(static_cast<wchar_t>(ch));
+ break;
+ case 4:
+ m_dwIndex++;
+ if (ch == ';') {
+ iState = 10;
+ break;
+ }
+ uint8_t nHex =
+ g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_HexChar;
+ if (nHex) {
+ if (nHex == FXCRTM_XML_CHARTYPE_HexDigital) {
+ code =
+ (code << 4) + FXSYS_toDecimalDigit(static_cast<wchar_t>(ch));
+ } else if (nHex == FXCRTM_XML_CHARTYPE_HexLowerLetter) {
+ code = (code << 4) + ch - 87;
+ } else {
+ code = (code << 4) + ch - 55;
+ }
+ }
+ break;
+ }
+ if (iState == 10)
+ break;
+ }
+ m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex);
+ if (iState == 10 || m_dwIndex < m_dwBufferSize || IsEOF()) {
+ break;
+ }
+ } while (ReadNextBlock());
+ return code;
+}
+
+void CXML_Parser::GetAttrValue(CFX_WideString& value) {
+ m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex);
+ if (IsEOF())
+ return;
+
+ CFX_UTF8Decoder decoder;
+ uint8_t mark = 0, ch = 0;
+ do {
+ while (m_dwIndex < m_dwBufferSize) {
+ ch = m_pBuffer[m_dwIndex];
+ if (mark == 0) {
+ if (ch != '\'' && ch != '"')
+ return;
+
+ mark = ch;
+ m_dwIndex++;
+ ch = 0;
+ continue;
+ }
+ m_dwIndex++;
+ if (ch == mark)
+ break;
+
+ if (ch == '&') {
+ decoder.AppendChar(GetCharRef());
+ if (IsEOF()) {
+ value = decoder.GetResult();
+ return;
+ }
+ } else {
+ decoder.Input(ch);
+ }
+ }
+ m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex);
+ if (ch == mark || m_dwIndex < m_dwBufferSize || IsEOF())
+ break;
+ } while (ReadNextBlock());
+ value = decoder.GetResult();
+}
+
+void CXML_Parser::GetTagName(bool bStartTag,
+ bool* bEndTag,
+ CFX_ByteString* space,
+ CFX_ByteString* name) {
+ m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex);
+ if (IsEOF())
+ return;
+
+ *bEndTag = false;
+ uint8_t ch;
+ int32_t iState = bStartTag ? 1 : 0;
+ do {
+ while (m_dwIndex < m_dwBufferSize) {
+ ch = m_pBuffer[m_dwIndex];
+ switch (iState) {
+ case 0:
+ m_dwIndex++;
+ if (ch != '<')
+ break;
+
+ iState = 1;
+ break;
+ case 1:
+ if (ch == '?') {
+ m_dwIndex++;
+ SkipLiterals("?>");
+ iState = 0;
+ break;
+ }
+ if (ch == '!') {
+ m_dwIndex++;
+ SkipLiterals("-->");
+ iState = 0;
+ break;
+ }
+ if (ch == '/') {
+ m_dwIndex++;
+ GetName(space, name);
+ *bEndTag = true;
+ } else {
+ GetName(space, name);
+ *bEndTag = false;
+ }
+ return;
+ }
+ }
+ m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex);
+ if (m_dwIndex < m_dwBufferSize || IsEOF())
+ break;
+ } while (ReadNextBlock());
+}
+
+std::unique_ptr<CXML_Element> CXML_Parser::ParseElement(CXML_Element* pParent,
+ bool bStartTag) {
+ m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex);
+ if (IsEOF())
+ return nullptr;
+
+ CFX_ByteString tag_name;
+ CFX_ByteString tag_space;
+ bool bEndTag;
+ GetTagName(bStartTag, &bEndTag, &tag_space, &tag_name);
+ if (tag_name.IsEmpty() || bEndTag)
+ return nullptr;
+
+ auto pElement = pdfium::MakeUnique<CXML_Element>(
+ pParent, tag_space.AsStringC(), tag_name.AsStringC());
+ do {
+ CFX_ByteString attr_space;
+ CFX_ByteString attr_name;
+ while (m_dwIndex < m_dwBufferSize) {
+ SkipWhiteSpaces();
+ if (IsEOF())
+ break;
+
+ if (!g_FXCRT_XML_IsNameIntro(m_pBuffer[m_dwIndex]))
+ break;
+
+ GetName(&attr_space, &attr_name);
+ SkipWhiteSpaces();
+ if (IsEOF())
+ break;
+
+ if (m_pBuffer[m_dwIndex] != '=')
+ break;
+
+ m_dwIndex++;
+ SkipWhiteSpaces();
+ if (IsEOF())
+ break;
+
+ CFX_WideString attr_value;
+ GetAttrValue(attr_value);
+ pElement->m_AttrMap.SetAt(attr_space, attr_name, attr_value);
+ }
+ m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex);
+ if (m_dwIndex < m_dwBufferSize || IsEOF())
+ break;
+ } while (ReadNextBlock());
+ SkipWhiteSpaces();
+ if (IsEOF())
+ return pElement;
+
+ uint8_t ch = m_pBuffer[m_dwIndex++];
+ if (ch == '/') {
+ m_dwIndex++;
+ m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex);
+ return pElement;
+ }
+ if (ch != '>') {
+ m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex);
+ return nullptr;
+ }
+ SkipWhiteSpaces();
+ if (IsEOF())
+ return pElement;
+
+ CFX_UTF8Decoder decoder;
+ CFX_WideTextBuf content;
+ bool bCDATA = false;
+ int32_t iState = 0;
+ do {
+ while (m_dwIndex < m_dwBufferSize) {
+ ch = m_pBuffer[m_dwIndex++];
+ switch (iState) {
+ case 0:
+ if (ch == '<') {
+ iState = 1;
+ } else if (ch == '&') {
+ decoder.ClearStatus();
+ decoder.AppendChar(GetCharRef());
+ } else {
+ decoder.Input(ch);
+ }
+ break;
+ case 1:
+ if (ch == '!') {
+ iState = 2;
+ } else if (ch == '?') {
+ SkipLiterals("?>");
+ SkipWhiteSpaces();
+ iState = 0;
+ } else if (ch == '/') {
+ CFX_ByteString space;
+ CFX_ByteString name;
+ GetName(&space, &name);
+ SkipWhiteSpaces();
+ m_dwIndex++;
+ iState = 10;
+ } else {
+ content << decoder.GetResult();
+ CFX_WideString dataStr = content.MakeString();
+ if (!bCDATA)
+ dataStr.TrimRight(L" \t\r\n");
+
+ InsertContentSegment(bCDATA, dataStr.AsStringC(), pElement.get());
+ content.Clear();
+ decoder.Clear();
+ bCDATA = false;
+ iState = 0;
+ m_dwIndex--;
+ std::unique_ptr<CXML_Element> pSubElement(
+ ParseElement(pElement.get(), true));
+ if (!pSubElement)
+ break;
+
+ pElement->m_Children.push_back(
+ {CXML_Element::Element, pSubElement.release()});
+ SkipWhiteSpaces();
+ }
+ break;
+ case 2:
+ if (ch == '[') {
+ SkipLiterals("]]>");
+ } else if (ch == '-') {
+ m_dwIndex++;
+ SkipLiterals("-->");
+ } else {
+ SkipLiterals(">");
+ }
+ decoder.Clear();
+ SkipWhiteSpaces();
+ iState = 0;
+ break;
+ }
+ if (iState == 10) {
+ break;
+ }
+ }
+ m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex);
+ if (iState == 10 || m_dwIndex < m_dwBufferSize || IsEOF())
+ break;
+ } while (ReadNextBlock());
+ content << decoder.GetResult();
+ CFX_WideString dataStr = content.MakeString();
+ dataStr.TrimRight(L" \t\r\n");
+
+ InsertContentSegment(bCDATA, dataStr.AsStringC(), pElement.get());
+ content.Clear();
+ decoder.Clear();
+ bCDATA = false;
+ return pElement;
+}
+
+void CXML_Parser::InsertContentSegment(bool bCDATA,
+ const CFX_WideStringC& content,
+ CXML_Element* pElement) {
+ if (content.IsEmpty())
+ return;
+
+ CXML_Content* pContent = new CXML_Content;
+ pContent->Set(bCDATA, content);
+ pElement->m_Children.push_back({CXML_Element::Content, pContent});
+}
+
+std::unique_ptr<CXML_Element> CXML_Element::Parse(const void* pBuffer,
+ size_t size) {
+ CXML_Parser parser;
+ if (!parser.Init(static_cast<const uint8_t*>(pBuffer), size))
+ return nullptr;
+ return parser.ParseElement(nullptr, false);
+}