summaryrefslogtreecommitdiff
path: root/core/fxcrt/xml
diff options
context:
space:
mode:
Diffstat (limited to 'core/fxcrt/xml')
-rw-r--r--core/fxcrt/xml/cfx_saxcontext.h23
-rw-r--r--core/fxcrt/xml/cfx_saxreader.cpp730
-rw-r--r--core/fxcrt/xml/cfx_saxreader.h144
-rw-r--r--core/fxcrt/xml/cfx_saxreaderhandler.cpp128
-rw-r--r--core/fxcrt/xml/cfx_saxreaderhandler.h49
5 files changed, 1074 insertions, 0 deletions
diff --git a/core/fxcrt/xml/cfx_saxcontext.h b/core/fxcrt/xml/cfx_saxcontext.h
new file mode 100644
index 0000000000..7afebed98d
--- /dev/null
+++ b/core/fxcrt/xml/cfx_saxcontext.h
@@ -0,0 +1,23 @@
+// Copyright 2017 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
+
+#ifndef CORE_FXCRT_XML_CFX_SAXCONTEXT_H_
+#define CORE_FXCRT_XML_CFX_SAXCONTEXT_H_
+
+#include "core/fxcrt/fx_basic.h"
+#include "core/fxcrt/fx_string.h"
+#include "core/fxcrt/xml/cfx_saxreader.h"
+
+class CFX_SAXContext {
+ public:
+ CFX_SAXContext() : m_eNode(CFX_SAXItem::Type::Unknown) {}
+
+ CFX_ByteTextBuf m_TextBuf;
+ CFX_ByteString m_bsTagName;
+ CFX_SAXItem::Type m_eNode;
+};
+
+#endif // CORE_FXCRT_XML_CFX_SAXCONTEXT_H_
diff --git a/core/fxcrt/xml/cfx_saxreader.cpp b/core/fxcrt/xml/cfx_saxreader.cpp
new file mode 100644
index 0000000000..287eaaa5b8
--- /dev/null
+++ b/core/fxcrt/xml/cfx_saxreader.cpp
@@ -0,0 +1,730 @@
+// Copyright 2014 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
+
+#include "core/fxcrt/xml/cfx_saxreader.h"
+
+#include <algorithm>
+#include <utility>
+
+#include "core/fxcrt/xml/cfx_saxreaderhandler.h"
+#include "third_party/base/ptr_util.h"
+#include "third_party/base/stl_util.h"
+
+enum class CFX_SaxMode {
+ Text = 0,
+ NodeStart,
+ DeclOrComment,
+ DeclNode,
+ Comment,
+ CommentContent,
+ TagName,
+ TagAttributeName,
+ TagAttributeEqual,
+ TagAttributeValue,
+ TagMaybeClose,
+ TagClose,
+ TagEnd,
+ TargetData,
+ MAX
+};
+
+class CFX_SAXCommentContext {
+ public:
+ CFX_SAXCommentContext() : m_iHeaderCount(0), m_iTailCount(0) {}
+ int32_t m_iHeaderCount;
+ int32_t m_iTailCount;
+};
+
+namespace {
+
+const uint32_t kSaxFileBufSize = 32768;
+
+typedef void (CFX_SAXReader::*FX_SAXReader_LPFParse)();
+static const FX_SAXReader_LPFParse
+ g_FX_SAXReader_LPFParse[static_cast<int>(CFX_SaxMode::MAX)] = {
+ &CFX_SAXReader::ParseText,
+ &CFX_SAXReader::ParseNodeStart,
+ &CFX_SAXReader::ParseDeclOrComment,
+ &CFX_SAXReader::ParseDeclNode,
+ &CFX_SAXReader::ParseComment,
+ &CFX_SAXReader::ParseCommentContent,
+ &CFX_SAXReader::ParseTagName,
+ &CFX_SAXReader::ParseTagAttributeName,
+ &CFX_SAXReader::ParseTagAttributeEqual,
+ &CFX_SAXReader::ParseTagAttributeValue,
+ &CFX_SAXReader::ParseMaybeClose,
+ &CFX_SAXReader::ParseTagClose,
+ &CFX_SAXReader::ParseTagEnd,
+ &CFX_SAXReader::ParseTargetData,
+};
+
+} // namespace
+
+CFX_SAXFile::CFX_SAXFile()
+ : m_dwStart(0),
+ m_dwEnd(0),
+ m_dwCur(0),
+ m_pBuf(nullptr),
+ m_dwBufSize(0),
+ m_dwBufIndex(0) {}
+
+CFX_SAXFile::~CFX_SAXFile() {}
+
+bool CFX_SAXFile::StartFile(const CFX_RetainPtr<IFX_SeekableReadStream>& pFile,
+ uint32_t dwStart,
+ uint32_t dwLen) {
+ ASSERT(!m_pFile && pFile);
+ uint32_t dwSize = pFile->GetSize();
+ if (dwStart >= dwSize)
+ return false;
+
+ if (dwLen == static_cast<uint32_t>(-1) || dwStart + dwLen > dwSize)
+ dwLen = dwSize - dwStart;
+
+ if (dwLen == 0)
+ return false;
+
+ m_dwBufSize = std::min(dwLen, kSaxFileBufSize);
+ m_pBuf = FX_Alloc(uint8_t, m_dwBufSize);
+ if (!pFile->ReadBlock(m_pBuf, dwStart, m_dwBufSize))
+ return false;
+
+ m_dwStart = dwStart;
+ m_dwEnd = dwStart + dwLen;
+ m_dwCur = dwStart;
+ m_pFile = pFile;
+ m_dwBufIndex = 0;
+ return true;
+}
+
+bool CFX_SAXFile::ReadNextBlock() {
+ ASSERT(m_pFile);
+ uint32_t dwSize = m_dwEnd - m_dwCur;
+ if (dwSize == 0) {
+ return false;
+ }
+ m_dwBufSize = std::min(dwSize, kSaxFileBufSize);
+ if (!m_pFile->ReadBlock(m_pBuf, m_dwCur, m_dwBufSize)) {
+ return false;
+ }
+ m_dwBufIndex = 0;
+ return true;
+}
+
+void CFX_SAXFile::Reset() {
+ if (m_pBuf) {
+ FX_Free(m_pBuf);
+ m_pBuf = nullptr;
+ }
+ m_pFile = nullptr;
+}
+
+CFX_SAXReader::CFX_SAXReader()
+ : m_File(),
+ m_pHandler(nullptr),
+ m_iState(-1),
+ m_dwItemID(0),
+ m_iDataSize(256),
+ m_iNameSize(256),
+ m_dwParseMode(0),
+ m_pCommentContext(nullptr) {
+ m_pszData = FX_Alloc(uint8_t, m_iDataSize);
+ m_pszName = FX_Alloc(uint8_t, m_iNameSize);
+}
+CFX_SAXReader::~CFX_SAXReader() {
+ Reset();
+ if (m_pszData) {
+ FX_Free(m_pszData);
+ m_pszData = nullptr;
+ }
+ if (m_pszName) {
+ FX_Free(m_pszName);
+ m_pszName = nullptr;
+ }
+}
+
+void CFX_SAXReader::Reset() {
+ m_File.Reset();
+ m_Stack = std::stack<std::unique_ptr<CFX_SAXItem>>();
+ m_dwItemID = 0;
+ m_SkipStack = std::stack<char>();
+ m_SkipChar = 0;
+ m_iDataLength = 0;
+ m_iEntityStart = -1;
+ m_iNameLength = 0;
+ m_iDataPos = 0;
+ m_pCommentContext.reset();
+}
+
+void CFX_SAXReader::Push() {
+ std::unique_ptr<CFX_SAXItem> pNew =
+ pdfium::MakeUnique<CFX_SAXItem>(++m_dwItemID);
+ if (!m_Stack.empty())
+ pNew->m_bSkip = m_Stack.top()->m_bSkip;
+ m_Stack.push(std::move(pNew));
+}
+
+void CFX_SAXReader::Pop() {
+ if (!m_Stack.empty())
+ m_Stack.pop();
+}
+
+CFX_SAXItem* CFX_SAXReader::GetCurrentItem() const {
+ return m_Stack.empty() ? nullptr : m_Stack.top().get();
+}
+
+void CFX_SAXReader::AppendData(uint8_t ch) {
+ ReallocDataBuffer();
+ m_pszData[m_iDataPos++] = ch;
+}
+
+void CFX_SAXReader::AppendName(uint8_t ch) {
+ ReallocNameBuffer();
+ m_pszName[m_iDataPos++] = ch;
+}
+
+void CFX_SAXReader::ReallocDataBuffer() {
+ if (m_iDataPos < m_iDataSize) {
+ return;
+ }
+ if (m_iDataSize <= 1024 * 1024) {
+ m_iDataSize *= 2;
+ } else {
+ m_iDataSize += 1024 * 1024;
+ }
+ m_pszData = (uint8_t*)FX_Realloc(uint8_t, m_pszData, m_iDataSize);
+}
+
+void CFX_SAXReader::ReallocNameBuffer() {
+ if (m_iDataPos < m_iNameSize) {
+ return;
+ }
+ if (m_iNameSize <= 1024 * 1024) {
+ m_iNameSize *= 2;
+ } else {
+ m_iNameSize += 1024 * 1024;
+ }
+ m_pszName = (uint8_t*)FX_Realloc(uint8_t, m_pszName, m_iNameSize);
+}
+
+bool CFX_SAXReader::SkipSpace(uint8_t ch) {
+ return (m_dwParseMode & CFX_SaxParseMode_NotSkipSpace) == 0 && ch < 0x21;
+}
+
+int32_t CFX_SAXReader::StartParse(
+ const CFX_RetainPtr<IFX_SeekableReadStream>& pFile,
+ uint32_t dwStart,
+ uint32_t dwLen,
+ uint32_t dwParseMode) {
+ m_iState = -1;
+ Reset();
+ if (!m_File.StartFile(pFile, dwStart, dwLen))
+ return -1;
+
+ m_iState = 0;
+ m_eMode = CFX_SaxMode::Text;
+ m_ePrevMode = CFX_SaxMode::Text;
+ m_bCharData = false;
+ m_dwDataOffset = 0;
+ m_dwParseMode = dwParseMode;
+ m_Stack.emplace(new CFX_SAXItem(++m_dwItemID));
+ return 0;
+}
+
+int32_t CFX_SAXReader::ContinueParse(IFX_Pause* pPause) {
+ if (m_iState < 0 || m_iState > 99) {
+ return m_iState;
+ }
+ while (m_File.m_dwCur < m_File.m_dwEnd) {
+ uint32_t& index = m_File.m_dwBufIndex;
+ uint32_t size = m_File.m_dwBufSize;
+ const uint8_t* pBuf = m_File.m_pBuf;
+ while (index < size) {
+ m_CurByte = pBuf[index];
+ (this->*g_FX_SAXReader_LPFParse[static_cast<int>(m_eMode)])();
+ index++;
+ }
+ m_File.m_dwCur += index;
+ m_iState = (m_File.m_dwCur - m_File.m_dwStart) * 100 /
+ (m_File.m_dwEnd - m_File.m_dwStart);
+ if (m_File.m_dwCur >= m_File.m_dwEnd) {
+ break;
+ }
+ if (!m_File.ReadNextBlock()) {
+ m_iState = -2;
+ break;
+ }
+ m_dwDataOffset = 0;
+ if (pPause && pPause->NeedToPauseNow()) {
+ break;
+ }
+ }
+ return m_iState;
+}
+void CFX_SAXReader::ParseChar(uint8_t ch) {
+ ReallocDataBuffer();
+ m_pszData[m_iDataPos] = ch;
+ if (m_iEntityStart > -1 && ch == ';') {
+ int32_t iSaveEntityStart = m_iEntityStart;
+ CFX_ByteString csEntity(m_pszData + m_iEntityStart + 1,
+ m_iDataPos - m_iEntityStart - 1);
+ int32_t iLen = csEntity.GetLength();
+ if (iLen > 0) {
+ if (csEntity[0] == '#') {
+ if ((m_dwParseMode & CFX_SaxParseMode_NotConvert_sharp) == 0) {
+ ch = 0;
+ uint8_t w;
+ if (iLen > 1 && csEntity[1] == 'x') {
+ for (int32_t i = 2; i < iLen; i++) {
+ w = csEntity[i];
+ if (w >= '0' && w <= '9') {
+ ch = (ch << 4) + w - '0';
+ } else if (w >= 'A' && w <= 'F') {
+ ch = (ch << 4) + w - 55;
+ } else if (w >= 'a' && w <= 'f') {
+ ch = (ch << 4) + w - 87;
+ } else {
+ break;
+ }
+ }
+ } else {
+ for (int32_t i = 1; i < iLen; i++) {
+ w = csEntity[i];
+ if (w < '0' || w > '9') {
+ break;
+ }
+ ch = ch * 10 + w - '0';
+ }
+ }
+ if (ch != 0) {
+ m_pszData[m_iEntityStart++] = ch;
+ }
+ }
+ } else {
+ if (csEntity.Compare("amp") == 0) {
+ if ((m_dwParseMode & CFX_SaxParseMode_NotConvert_amp) == 0) {
+ m_pszData[m_iEntityStart++] = '&';
+ }
+ } else if (csEntity.Compare("lt") == 0) {
+ if ((m_dwParseMode & CFX_SaxParseMode_NotConvert_lt) == 0) {
+ m_pszData[m_iEntityStart++] = '<';
+ }
+ } else if (csEntity.Compare("gt") == 0) {
+ if ((m_dwParseMode & CFX_SaxParseMode_NotConvert_gt) == 0) {
+ m_pszData[m_iEntityStart++] = '>';
+ }
+ } else if (csEntity.Compare("apos") == 0) {
+ if ((m_dwParseMode & CFX_SaxParseMode_NotConvert_apos) == 0) {
+ m_pszData[m_iEntityStart++] = '\'';
+ }
+ } else if (csEntity.Compare("quot") == 0) {
+ if ((m_dwParseMode & CFX_SaxParseMode_NotConvert_quot) == 0) {
+ m_pszData[m_iEntityStart++] = '\"';
+ }
+ }
+ }
+ }
+ if (iSaveEntityStart != m_iEntityStart) {
+ m_iDataPos = m_iEntityStart;
+ m_iEntityStart = -1;
+ } else {
+ m_iDataPos++;
+ m_iEntityStart = -1;
+ }
+ } else {
+ if (m_iEntityStart < 0 && ch == '&') {
+ m_iEntityStart = m_iDataPos;
+ }
+ m_iDataPos++;
+ }
+}
+
+void CFX_SAXReader::ParseText() {
+ if (m_CurByte == '<') {
+ if (m_iDataPos > 0) {
+ m_iDataLength = m_iDataPos;
+ m_iDataPos = 0;
+ if (m_pHandler) {
+ NotifyData();
+ }
+ }
+ Push();
+ m_dwNodePos = m_File.m_dwCur + m_File.m_dwBufIndex;
+ m_eMode = CFX_SaxMode::NodeStart;
+ return;
+ }
+ if (m_iDataPos < 1 && SkipSpace(m_CurByte)) {
+ return;
+ }
+ ParseChar(m_CurByte);
+}
+
+void CFX_SAXReader::ParseNodeStart() {
+ if (m_CurByte == '?') {
+ GetCurrentItem()->m_eNode = CFX_SAXItem::Type::Instruction;
+ m_eMode = CFX_SaxMode::TagName;
+ return;
+ }
+ if (m_CurByte == '!') {
+ m_eMode = CFX_SaxMode::DeclOrComment;
+ return;
+ }
+ if (m_CurByte == '/') {
+ m_eMode = CFX_SaxMode::TagEnd;
+ return;
+ }
+ if (m_CurByte == '>') {
+ Pop();
+ m_eMode = CFX_SaxMode::Text;
+ return;
+ }
+ if (m_CurByte > 0x20) {
+ m_dwDataOffset = m_File.m_dwBufIndex;
+ GetCurrentItem()->m_eNode = CFX_SAXItem::Type::Tag;
+ m_eMode = CFX_SaxMode::TagName;
+ AppendData(m_CurByte);
+ }
+}
+
+void CFX_SAXReader::ParseDeclOrComment() {
+ if (m_CurByte == '-') {
+ m_eMode = CFX_SaxMode::Comment;
+ GetCurrentItem()->m_eNode = CFX_SAXItem::Type::Comment;
+ if (!m_pCommentContext)
+ m_pCommentContext = pdfium::MakeUnique<CFX_SAXCommentContext>();
+ m_pCommentContext->m_iHeaderCount = 1;
+ m_pCommentContext->m_iTailCount = 0;
+ return;
+ }
+ m_eMode = CFX_SaxMode::DeclNode;
+ m_dwDataOffset = m_File.m_dwBufIndex;
+ m_SkipChar = '>';
+ m_SkipStack.push('>');
+ SkipNode();
+}
+
+void CFX_SAXReader::ParseComment() {
+ m_pCommentContext->m_iHeaderCount = 2;
+ m_dwNodePos = m_File.m_dwCur + m_File.m_dwBufIndex;
+ m_eMode = CFX_SaxMode::CommentContent;
+}
+
+void CFX_SAXReader::ParseCommentContent() {
+ if (m_CurByte == '-') {
+ m_pCommentContext->m_iTailCount++;
+ } else if (m_CurByte == '>' && m_pCommentContext->m_iTailCount == 2) {
+ m_iDataLength = m_iDataPos;
+ m_iDataPos = 0;
+ if (m_pHandler) {
+ NotifyTargetData();
+ }
+ Pop();
+ m_eMode = CFX_SaxMode::Text;
+ } else {
+ while (m_pCommentContext->m_iTailCount > 0) {
+ AppendData('-');
+ m_pCommentContext->m_iTailCount--;
+ }
+ AppendData(m_CurByte);
+ }
+}
+void CFX_SAXReader::ParseDeclNode() {
+ SkipNode();
+}
+void CFX_SAXReader::ParseTagName() {
+ if (m_CurByte < 0x21 || m_CurByte == '/' || m_CurByte == '>' ||
+ m_CurByte == '?') {
+ m_iDataLength = m_iDataPos;
+ m_iDataPos = 0;
+ if (m_pHandler) {
+ NotifyEnter();
+ }
+ if (m_CurByte < 0x21) {
+ m_eMode = CFX_SaxMode::TagAttributeName;
+ } else if (m_CurByte == '/' || m_CurByte == '?') {
+ m_ePrevMode = m_eMode;
+ m_eMode = CFX_SaxMode::TagMaybeClose;
+ } else {
+ if (m_pHandler) {
+ NotifyBreak();
+ }
+ m_eMode = CFX_SaxMode::Text;
+ }
+ } else {
+ AppendData(m_CurByte);
+ }
+}
+void CFX_SAXReader::ParseTagAttributeName() {
+ if (m_CurByte < 0x21 || m_CurByte == '=') {
+ if (m_iDataPos < 1 && m_CurByte < 0x21) {
+ return;
+ }
+ m_iNameLength = m_iDataPos;
+ m_iDataPos = 0;
+ m_SkipChar = 0;
+ m_eMode = m_CurByte == '=' ? CFX_SaxMode::TagAttributeValue
+ : CFX_SaxMode::TagAttributeEqual;
+ return;
+ }
+ if (m_CurByte == '/' || m_CurByte == '>' || m_CurByte == '?') {
+ if (m_CurByte == '/' || m_CurByte == '?') {
+ m_ePrevMode = m_eMode;
+ m_eMode = CFX_SaxMode::TagMaybeClose;
+ } else {
+ if (m_pHandler) {
+ NotifyBreak();
+ }
+ m_eMode = CFX_SaxMode::Text;
+ }
+ return;
+ }
+ if (m_iDataPos < 1) {
+ m_dwDataOffset = m_File.m_dwBufIndex;
+ }
+ AppendName(m_CurByte);
+}
+
+void CFX_SAXReader::ParseTagAttributeEqual() {
+ if (m_CurByte == '=') {
+ m_SkipChar = 0;
+ m_eMode = CFX_SaxMode::TagAttributeValue;
+ return;
+ }
+ if (GetCurrentItem()->m_eNode == CFX_SAXItem::Type::Instruction) {
+ m_iDataPos = m_iNameLength;
+ AppendName(0x20);
+ m_eMode = CFX_SaxMode::TargetData;
+ ParseTargetData();
+ }
+}
+
+void CFX_SAXReader::ParseTagAttributeValue() {
+ if (m_SkipChar) {
+ if (m_SkipChar == m_CurByte) {
+ {
+ m_iDataLength = m_iDataPos;
+ m_iDataPos = 0;
+ if (m_pHandler) {
+ NotifyAttribute();
+ }
+ }
+ m_SkipChar = 0;
+ m_eMode = CFX_SaxMode::TagAttributeName;
+ return;
+ }
+ ParseChar(m_CurByte);
+ return;
+ }
+ if (m_CurByte < 0x21) {
+ return;
+ }
+ if (m_iDataPos < 1) {
+ if (m_CurByte == '\'' || m_CurByte == '\"') {
+ m_SkipChar = m_CurByte;
+ }
+ }
+}
+
+void CFX_SAXReader::ParseMaybeClose() {
+ if (m_CurByte == '>') {
+ if (GetCurrentItem()->m_eNode == CFX_SAXItem::Type::Instruction) {
+ m_iNameLength = m_iDataPos;
+ m_iDataPos = 0;
+ if (m_pHandler) {
+ NotifyTargetData();
+ }
+ }
+ ParseTagClose();
+ m_eMode = CFX_SaxMode::Text;
+ } else if (m_ePrevMode == CFX_SaxMode::TagName) {
+ AppendData('/');
+ m_eMode = CFX_SaxMode::TagName;
+ m_ePrevMode = CFX_SaxMode::Text;
+ ParseTagName();
+ } else if (m_ePrevMode == CFX_SaxMode::TagAttributeName) {
+ AppendName('/');
+ m_eMode = CFX_SaxMode::TagAttributeName;
+ m_ePrevMode = CFX_SaxMode::Text;
+ ParseTagAttributeName();
+ } else if (m_ePrevMode == CFX_SaxMode::TargetData) {
+ AppendName('?');
+ m_eMode = CFX_SaxMode::TargetData;
+ m_ePrevMode = CFX_SaxMode::Text;
+ ParseTargetData();
+ }
+}
+void CFX_SAXReader::ParseTagClose() {
+ m_dwNodePos = m_File.m_dwCur + m_File.m_dwBufIndex;
+ if (m_pHandler) {
+ NotifyClose();
+ }
+ Pop();
+}
+void CFX_SAXReader::ParseTagEnd() {
+ if (m_CurByte < 0x21) {
+ return;
+ }
+ if (m_CurByte == '>') {
+ Pop();
+ m_dwNodePos = m_File.m_dwCur + m_File.m_dwBufIndex;
+ m_iDataLength = m_iDataPos;
+ m_iDataPos = 0;
+ if (m_pHandler) {
+ NotifyEnd();
+ }
+ Pop();
+ m_eMode = CFX_SaxMode::Text;
+ } else {
+ ParseChar(m_CurByte);
+ }
+}
+void CFX_SAXReader::ParseTargetData() {
+ if (m_CurByte == '?') {
+ m_ePrevMode = m_eMode;
+ m_eMode = CFX_SaxMode::TagMaybeClose;
+ } else {
+ AppendName(m_CurByte);
+ }
+}
+void CFX_SAXReader::SkipNode() {
+ if (m_SkipChar == '\'' || m_SkipChar == '\"') {
+ if (m_CurByte != m_SkipChar)
+ return;
+
+ ASSERT(!m_SkipStack.empty());
+ m_SkipStack.pop();
+ m_SkipChar = !m_SkipStack.empty() ? m_SkipStack.top() : 0;
+ return;
+ }
+ switch (m_CurByte) {
+ case '<':
+ m_SkipChar = '>';
+ m_SkipStack.push('>');
+ break;
+ case '[':
+ m_SkipChar = ']';
+ m_SkipStack.push(']');
+ break;
+ case '(':
+ m_SkipChar = ')';
+ m_SkipStack.push(')');
+ break;
+ case '\'':
+ m_SkipChar = '\'';
+ m_SkipStack.push('\'');
+ break;
+ case '\"':
+ m_SkipChar = '\"';
+ m_SkipStack.push('\"');
+ break;
+ default:
+ if (m_CurByte == m_SkipChar) {
+ m_SkipStack.pop();
+ m_SkipChar = !m_SkipStack.empty() ? m_SkipStack.top() : 0;
+ if (m_SkipStack.empty() && m_CurByte == '>') {
+ m_iDataLength = m_iDataPos;
+ m_iDataPos = 0;
+ if (m_iDataLength >= 9 &&
+ FXSYS_memcmp(m_pszData, "[CDATA[", 7 * sizeof(uint8_t)) == 0 &&
+ FXSYS_memcmp(m_pszData + m_iDataLength - 2, "]]",
+ 2 * sizeof(uint8_t)) == 0) {
+ Pop();
+ m_iDataLength -= 9;
+ m_dwDataOffset += 7;
+ FXSYS_memmove(m_pszData, m_pszData + 7,
+ m_iDataLength * sizeof(uint8_t));
+ m_bCharData = true;
+ if (m_pHandler) {
+ NotifyData();
+ }
+ m_bCharData = false;
+ } else {
+ Pop();
+ }
+ m_eMode = CFX_SaxMode::Text;
+ }
+ }
+ break;
+ }
+ if (!m_SkipStack.empty())
+ ParseChar(m_CurByte);
+}
+
+void CFX_SAXReader::NotifyData() {
+ CFX_SAXItem* pItem = GetCurrentItem();
+ if (!pItem)
+ return;
+
+ if (pItem->m_eNode == CFX_SAXItem::Type::Tag)
+ m_pHandler->OnTagData(
+ pItem->m_pNode,
+ m_bCharData ? CFX_SAXItem::Type::CharData : CFX_SAXItem::Type::Text,
+ CFX_ByteStringC(m_pszData, m_iDataLength),
+ m_File.m_dwCur + m_dwDataOffset);
+}
+
+void CFX_SAXReader::NotifyEnter() {
+ CFX_SAXItem* pItem = GetCurrentItem();
+ if (pItem->m_eNode == CFX_SAXItem::Type::Tag ||
+ pItem->m_eNode == CFX_SAXItem::Type::Instruction) {
+ pItem->m_pNode = m_pHandler->OnTagEnter(
+ CFX_ByteStringC(m_pszData, m_iDataLength), pItem->m_eNode, m_dwNodePos);
+ }
+}
+
+void CFX_SAXReader::NotifyAttribute() {
+ CFX_SAXItem* pItem = GetCurrentItem();
+ if (pItem->m_eNode == CFX_SAXItem::Type::Tag ||
+ pItem->m_eNode == CFX_SAXItem::Type::Instruction) {
+ m_pHandler->OnTagAttribute(pItem->m_pNode,
+ CFX_ByteStringC(m_pszName, m_iNameLength),
+ CFX_ByteStringC(m_pszData, m_iDataLength));
+ }
+}
+
+void CFX_SAXReader::NotifyBreak() {
+ CFX_SAXItem* pItem = GetCurrentItem();
+ if (pItem->m_eNode == CFX_SAXItem::Type::Tag)
+ m_pHandler->OnTagBreak(pItem->m_pNode);
+}
+
+void CFX_SAXReader::NotifyClose() {
+ CFX_SAXItem* pItem = GetCurrentItem();
+ if (pItem->m_eNode == CFX_SAXItem::Type::Tag ||
+ pItem->m_eNode == CFX_SAXItem::Type::Instruction) {
+ m_pHandler->OnTagClose(pItem->m_pNode, m_dwNodePos);
+ }
+}
+
+void CFX_SAXReader::NotifyEnd() {
+ CFX_SAXItem* pItem = GetCurrentItem();
+ if (!pItem || pItem->m_eNode != CFX_SAXItem::Type::Tag)
+ return;
+
+ m_pHandler->OnTagEnd(pItem->m_pNode,
+ CFX_ByteStringC(m_pszData, m_iDataLength), m_dwNodePos);
+}
+
+void CFX_SAXReader::NotifyTargetData() {
+ CFX_SAXItem* pItem = GetCurrentItem();
+ if (pItem->m_eNode == CFX_SAXItem::Type::Instruction) {
+ m_pHandler->OnTargetData(pItem->m_pNode, pItem->m_eNode,
+ CFX_ByteStringC(m_pszName, m_iNameLength),
+ m_dwNodePos);
+ } else if (pItem->m_eNode == CFX_SAXItem::Type::Comment) {
+ m_pHandler->OnTargetData(pItem->m_pNode, pItem->m_eNode,
+ CFX_ByteStringC(m_pszData, m_iDataLength),
+ m_dwNodePos);
+ }
+}
+
+void CFX_SAXReader::SkipCurrentNode() {
+ CFX_SAXItem* pItem = GetCurrentItem();
+ if (!pItem)
+ return;
+
+ pItem->m_bSkip = true;
+}
diff --git a/core/fxcrt/xml/cfx_saxreader.h b/core/fxcrt/xml/cfx_saxreader.h
new file mode 100644
index 0000000000..af3c26f94a
--- /dev/null
+++ b/core/fxcrt/xml/cfx_saxreader.h
@@ -0,0 +1,144 @@
+// Copyright 2014 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
+
+#ifndef CORE_FXCRT_XML_CFX_SAXREADER_H_
+#define CORE_FXCRT_XML_CFX_SAXREADER_H_
+
+#include <memory>
+#include <stack>
+
+#include "core/fxcrt/fx_basic.h"
+
+class CFX_SAXCommentContext;
+class CFX_SAXContext;
+class CFX_SAXReaderHandler;
+enum class CFX_SaxMode;
+
+class CFX_SAXItem {
+ public:
+ enum class Type {
+ Unknown = 0,
+ Instruction,
+ Declaration,
+ Comment,
+ Tag,
+ Text,
+ CharData,
+ };
+
+ explicit CFX_SAXItem(uint32_t id)
+ : m_pNode(nullptr), m_eNode(Type::Unknown), m_dwID(id), m_bSkip(false) {}
+
+ CFX_SAXContext* m_pNode;
+ Type m_eNode;
+ const uint32_t m_dwID;
+ bool m_bSkip;
+};
+
+class CFX_SAXFile {
+ public:
+ CFX_SAXFile();
+ ~CFX_SAXFile();
+
+ bool StartFile(const CFX_RetainPtr<IFX_SeekableReadStream>& pFile,
+ uint32_t dwStart,
+ uint32_t dwLen);
+ bool ReadNextBlock();
+ void Reset();
+
+ CFX_RetainPtr<IFX_SeekableReadStream> m_pFile;
+ uint32_t m_dwStart;
+ uint32_t m_dwEnd;
+ uint32_t m_dwCur;
+ uint8_t* m_pBuf;
+ uint32_t m_dwBufSize;
+ uint32_t m_dwBufIndex;
+};
+
+enum CFX_SaxParseMode {
+ CFX_SaxParseMode_NotConvert_amp = 1 << 0,
+ CFX_SaxParseMode_NotConvert_lt = 1 << 1,
+ CFX_SaxParseMode_NotConvert_gt = 1 << 2,
+ CFX_SaxParseMode_NotConvert_apos = 1 << 3,
+ CFX_SaxParseMode_NotConvert_quot = 1 << 4,
+ CFX_SaxParseMode_NotConvert_sharp = 1 << 5,
+ CFX_SaxParseMode_NotSkipSpace = 1 << 6
+};
+
+class CFX_SAXReader {
+ public:
+ CFX_SAXReader();
+ ~CFX_SAXReader();
+
+ int32_t StartParse(const CFX_RetainPtr<IFX_SeekableReadStream>& pFile,
+ uint32_t dwStart = 0,
+ uint32_t dwLen = -1,
+ uint32_t dwParseMode = 0);
+ int32_t ContinueParse(IFX_Pause* pPause = nullptr);
+ void SkipCurrentNode();
+ void SetHandler(CFX_SAXReaderHandler* pHandler) { m_pHandler = pHandler; }
+ void AppendData(uint8_t ch);
+ void AppendName(uint8_t ch);
+ void ParseText();
+ void ParseNodeStart();
+ void ParseInstruction();
+ void ParseDeclOrComment();
+ void ParseDeclNode();
+ void ParseComment();
+ void ParseCommentContent();
+ void ParseTagName();
+ void ParseTagAttributeName();
+ void ParseTagAttributeEqual();
+ void ParseTagAttributeValue();
+ void ParseMaybeClose();
+ void ParseTagClose();
+ void ParseTagEnd();
+ void ParseTargetData();
+
+ private:
+ void Reset();
+ void Push();
+ void Pop();
+ CFX_SAXItem* GetCurrentItem() const;
+ bool SkipSpace(uint8_t ch);
+ void SkipNode();
+ void NotifyData();
+ void NotifyEnter();
+ void NotifyAttribute();
+ void NotifyBreak();
+ void NotifyClose();
+ void NotifyEnd();
+ void NotifyTargetData();
+ void ReallocDataBuffer();
+ void ReallocNameBuffer();
+ void ParseChar(uint8_t ch);
+
+ CFX_SAXFile m_File;
+ CFX_SAXReaderHandler* m_pHandler;
+ int32_t m_iState;
+ std::stack<std::unique_ptr<CFX_SAXItem>> m_Stack;
+ uint32_t m_dwItemID;
+ CFX_SaxMode m_eMode;
+ CFX_SaxMode m_ePrevMode;
+ bool m_bCharData;
+ uint8_t m_CurByte;
+ uint32_t m_dwDataOffset;
+ std::stack<char> m_SkipStack;
+ uint8_t m_SkipChar;
+ uint32_t m_dwNodePos;
+ uint8_t* m_pszData;
+ int32_t m_iDataSize;
+ int32_t m_iDataLength;
+ int32_t m_iEntityStart;
+ int32_t m_iDataPos;
+ uint8_t* m_pszName;
+ int32_t m_iNameSize;
+ int32_t m_iNameLength;
+ uint32_t m_dwParseMode;
+ std::unique_ptr<CFX_SAXCommentContext> m_pCommentContext;
+};
+
+#endif // CORE_FXCRT_XML_CFX_SAXREADER_H_
diff --git a/core/fxcrt/xml/cfx_saxreaderhandler.cpp b/core/fxcrt/xml/cfx_saxreaderhandler.cpp
new file mode 100644
index 0000000000..e7b6cd186c
--- /dev/null
+++ b/core/fxcrt/xml/cfx_saxreaderhandler.cpp
@@ -0,0 +1,128 @@
+// Copyright 2017 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
+
+#include "core/fxcrt/xml/cfx_saxreaderhandler.h"
+
+#include "core/fxcrt/cfx_checksumcontext.h"
+
+CFX_SAXReaderHandler::CFX_SAXReaderHandler(CFX_ChecksumContext* pContext)
+ : m_pContext(pContext) {
+ ASSERT(m_pContext);
+}
+
+CFX_SAXReaderHandler::~CFX_SAXReaderHandler() {}
+
+CFX_SAXContext* CFX_SAXReaderHandler::OnTagEnter(
+ const CFX_ByteStringC& bsTagName,
+ CFX_SAXItem::Type eType,
+ uint32_t dwStartPos) {
+ UpdateChecksum(true);
+ if (eType != CFX_SAXItem::Type::Tag &&
+ eType != CFX_SAXItem::Type::Instruction) {
+ return nullptr;
+ }
+
+ m_SAXContext.m_eNode = eType;
+ CFX_ByteTextBuf& textBuf = m_SAXContext.m_TextBuf;
+ textBuf << "<";
+ if (eType == CFX_SAXItem::Type::Instruction)
+ textBuf << "?";
+
+ textBuf << bsTagName;
+ m_SAXContext.m_bsTagName = bsTagName;
+ return &m_SAXContext;
+}
+
+void CFX_SAXReaderHandler::OnTagAttribute(CFX_SAXContext* pTag,
+ const CFX_ByteStringC& bsAttri,
+ const CFX_ByteStringC& bsValue) {
+ if (!pTag)
+ return;
+ pTag->m_TextBuf << " " << bsAttri << "=\"" << bsValue << "\"";
+}
+
+void CFX_SAXReaderHandler::OnTagBreak(CFX_SAXContext* pTag) {
+ if (!pTag)
+ return;
+
+ pTag->m_TextBuf << ">";
+ UpdateChecksum(false);
+}
+
+void CFX_SAXReaderHandler::OnTagData(CFX_SAXContext* pTag,
+ CFX_SAXItem::Type eType,
+ const CFX_ByteStringC& bsData,
+ uint32_t dwStartPos) {
+ if (!pTag)
+ return;
+
+ CFX_ByteTextBuf& textBuf = pTag->m_TextBuf;
+ if (eType == CFX_SAXItem::Type::CharData)
+ textBuf << "<![CDATA[";
+
+ textBuf << bsData;
+ if (eType == CFX_SAXItem::Type::CharData)
+ textBuf << "]]>";
+}
+
+void CFX_SAXReaderHandler::OnTagClose(CFX_SAXContext* pTag, uint32_t dwEndPos) {
+ if (!pTag)
+ return;
+
+ CFX_ByteTextBuf& textBuf = pTag->m_TextBuf;
+ if (pTag->m_eNode == CFX_SAXItem::Type::Instruction)
+ textBuf << "?>";
+ else if (pTag->m_eNode == CFX_SAXItem::Type::Tag)
+ textBuf << "></" << pTag->m_bsTagName.AsStringC() << ">";
+
+ UpdateChecksum(false);
+}
+
+void CFX_SAXReaderHandler::OnTagEnd(CFX_SAXContext* pTag,
+ const CFX_ByteStringC& bsTagName,
+ uint32_t dwEndPos) {
+ if (!pTag)
+ return;
+
+ pTag->m_TextBuf << "</" << bsTagName << ">";
+ UpdateChecksum(false);
+}
+
+void CFX_SAXReaderHandler::OnTargetData(CFX_SAXContext* pTag,
+ CFX_SAXItem::Type eType,
+ const CFX_ByteStringC& bsData,
+ uint32_t dwStartPos) {
+ if (!pTag && eType != CFX_SAXItem::Type::Comment)
+ return;
+
+ if (eType == CFX_SAXItem::Type::Comment) {
+ m_SAXContext.m_TextBuf << "<!--" << bsData << "-->";
+ UpdateChecksum(false);
+ } else {
+ pTag->m_TextBuf << " " << bsData;
+ }
+}
+
+void CFX_SAXReaderHandler::UpdateChecksum(bool bCheckSpace) {
+ int32_t iLength = m_SAXContext.m_TextBuf.GetLength();
+ if (iLength < 1)
+ return;
+
+ uint8_t* pBuffer = m_SAXContext.m_TextBuf.GetBuffer();
+ bool bUpdata = true;
+ if (bCheckSpace) {
+ bUpdata = false;
+ for (int32_t i = 0; i < iLength; i++) {
+ bUpdata = (pBuffer[i] > 0x20);
+ if (bUpdata)
+ break;
+ }
+ }
+ if (bUpdata)
+ m_pContext->Update(CFX_ByteStringC(pBuffer, iLength));
+
+ m_SAXContext.m_TextBuf.Clear();
+}
diff --git a/core/fxcrt/xml/cfx_saxreaderhandler.h b/core/fxcrt/xml/cfx_saxreaderhandler.h
new file mode 100644
index 0000000000..bfedf03846
--- /dev/null
+++ b/core/fxcrt/xml/cfx_saxreaderhandler.h
@@ -0,0 +1,49 @@
+// Copyright 2017 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
+
+#ifndef CORE_FXCRT_XML_CFX_SAXREADERHANDLER_H_
+#define CORE_FXCRT_XML_CFX_SAXREADERHANDLER_H_
+
+#include "core/fxcrt/fx_string.h"
+#include "core/fxcrt/xml/cfx_saxcontext.h"
+#include "core/fxcrt/xml/cfx_saxreader.h"
+
+class CFX_ChecksumContext;
+
+class CFX_SAXReaderHandler {
+ public:
+ explicit CFX_SAXReaderHandler(CFX_ChecksumContext* pContext);
+ ~CFX_SAXReaderHandler();
+
+ CFX_SAXContext* OnTagEnter(const CFX_ByteStringC& bsTagName,
+ CFX_SAXItem::Type eType,
+ uint32_t dwStartPos);
+ void OnTagAttribute(CFX_SAXContext* pTag,
+ const CFX_ByteStringC& bsAttri,
+ const CFX_ByteStringC& bsValue);
+ void OnTagBreak(CFX_SAXContext* pTag);
+ void OnTagData(CFX_SAXContext* pTag,
+ CFX_SAXItem::Type eType,
+ const CFX_ByteStringC& bsData,
+ uint32_t dwStartPos);
+ void OnTagClose(CFX_SAXContext* pTag, uint32_t dwEndPos);
+ void OnTagEnd(CFX_SAXContext* pTag,
+ const CFX_ByteStringC& bsTagName,
+ uint32_t dwEndPos);
+
+ void OnTargetData(CFX_SAXContext* pTag,
+ CFX_SAXItem::Type eType,
+ const CFX_ByteStringC& bsData,
+ uint32_t dwStartPos);
+
+ private:
+ void UpdateChecksum(bool bCheckSpace);
+
+ CFX_ChecksumContext* m_pContext;
+ CFX_SAXContext m_SAXContext;
+};
+
+#endif // CORE_FXCRT_XML_CFX_SAXREADERHANDLER_H_