summaryrefslogtreecommitdiff
path: root/xfa/fde/xml/cfde_xmlsyntaxparser.cpp
diff options
context:
space:
mode:
authorDan Sinclair <dsinclair@chromium.org>2017-04-19 09:19:57 -0400
committerChromium commit bot <commit-bot@chromium.org>2017-04-19 13:33:07 +0000
commit0d86ecb08e1b2c204333b1f1f6b0b014e5b2971c (patch)
treef816429f8581c16a60773eb23385dc8e55729bac /xfa/fde/xml/cfde_xmlsyntaxparser.cpp
parent3b71d26f092ebc86ca9177fbbe89d83caa67ae1b (diff)
downloadpdfium-0d86ecb08e1b2c204333b1f1f6b0b014e5b2971c.tar.xz
Move fde XML parser to core
This CL moves the XML parser from FDE into FXCRT and renames to CFX_ from CFDE_. Change-Id: I21a9590bf74daf5517df630d7e7a5de89da99ea4 Reviewed-on: https://pdfium-review.googlesource.com/4312 Commit-Queue: dsinclair <dsinclair@chromium.org> Reviewed-by: Tom Sepez <tsepez@chromium.org> Reviewed-by: Nicolás Peña <npm@chromium.org>
Diffstat (limited to 'xfa/fde/xml/cfde_xmlsyntaxparser.cpp')
-rw-r--r--xfa/fde/xml/cfde_xmlsyntaxparser.cpp698
1 files changed, 0 insertions, 698 deletions
diff --git a/xfa/fde/xml/cfde_xmlsyntaxparser.cpp b/xfa/fde/xml/cfde_xmlsyntaxparser.cpp
deleted file mode 100644
index 5d671bb39a..0000000000
--- a/xfa/fde/xml/cfde_xmlsyntaxparser.cpp
+++ /dev/null
@@ -1,698 +0,0 @@
-// Copyright 2017 PDFium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
-
-#include "xfa/fde/xml/cfde_xmlsyntaxparser.h"
-
-#include <algorithm>
-
-#include "core/fxcrt/fx_ext.h"
-#include "core/fxcrt/fx_safe_types.h"
-
-namespace {
-
-const uint32_t kMaxCharRange = 0x10ffff;
-
-bool IsXMLWhiteSpace(wchar_t ch) {
- return ch == L' ' || ch == 0x0A || ch == 0x0D || ch == 0x09;
-}
-
-struct FDE_XMLNAMECHAR {
- uint16_t wStart;
- uint16_t wEnd;
- bool bStartChar;
-};
-
-const FDE_XMLNAMECHAR g_XMLNameChars[] = {
- {L'-', L'.', false}, {L'0', L'9', false}, {L':', L':', false},
- {L'A', L'Z', true}, {L'_', L'_', true}, {L'a', L'z', true},
- {0xB7, 0xB7, false}, {0xC0, 0xD6, true}, {0xD8, 0xF6, true},
- {0xF8, 0x02FF, true}, {0x0300, 0x036F, false}, {0x0370, 0x037D, true},
- {0x037F, 0x1FFF, true}, {0x200C, 0x200D, true}, {0x203F, 0x2040, false},
- {0x2070, 0x218F, true}, {0x2C00, 0x2FEF, true}, {0x3001, 0xD7FF, true},
- {0xF900, 0xFDCF, true}, {0xFDF0, 0xFFFD, true},
-};
-
-bool IsXMLNameChar(wchar_t ch, bool bFirstChar) {
- int32_t iStart = 0;
- int32_t iEnd = FX_ArraySize(g_XMLNameChars) - 1;
- while (iStart <= iEnd) {
- int32_t iMid = (iStart + iEnd) / 2;
- if (ch < g_XMLNameChars[iMid].wStart) {
- iEnd = iMid - 1;
- } else if (ch > g_XMLNameChars[iMid].wEnd) {
- iStart = iMid + 1;
- } else {
- return bFirstChar ? g_XMLNameChars[iMid].bStartChar : true;
- }
- }
- return false;
-}
-
-int32_t GetUTF8EncodeLength(const std::vector<wchar_t>& src,
- FX_FILESIZE iSrcLen) {
- uint32_t unicode = 0;
- int32_t iDstNum = 0;
- const wchar_t* pSrc = src.data();
- while (iSrcLen-- > 0) {
- unicode = *pSrc++;
- int nbytes = 0;
- if ((uint32_t)unicode < 0x80) {
- nbytes = 1;
- } else if ((uint32_t)unicode < 0x800) {
- nbytes = 2;
- } else if ((uint32_t)unicode < 0x10000) {
- nbytes = 3;
- } else if ((uint32_t)unicode < 0x200000) {
- nbytes = 4;
- } else if ((uint32_t)unicode < 0x4000000) {
- nbytes = 5;
- } else {
- nbytes = 6;
- }
- iDstNum += nbytes;
- }
- return iDstNum;
-}
-
-} // namespace
-
-CFDE_XMLSyntaxParser::CFDE_XMLSyntaxParser(
- const CFX_RetainPtr<CFX_SeekableStreamProxy>& pStream)
- : m_pStream(pStream),
- m_iXMLPlaneSize(32 * 1024),
- m_iCurrentPos(0),
- m_iCurrentNodeNum(-1),
- m_iLastNodeNum(-1),
- m_iParsedBytes(0),
- m_ParsedChars(0),
- m_iBufferChars(0),
- m_bEOS(false),
- m_Start(0),
- m_End(0),
- m_iAllocStep(m_BlockBuffer.GetAllocStep()),
- m_pCurrentBlock(nullptr),
- m_iIndexInBlock(0),
- m_iTextDataLength(0),
- m_syntaxParserResult(FDE_XmlSyntaxResult::None),
- m_syntaxParserState(FDE_XmlSyntaxState::Text),
- m_wQuotationMark(0),
- m_iEntityStart(-1) {
- ASSERT(pStream);
-
- m_CurNode.iNodeNum = -1;
- m_CurNode.eNodeType = FDE_XMLNODE_Unknown;
-
- m_iXMLPlaneSize =
- std::min(m_iXMLPlaneSize,
- pdfium::base::checked_cast<FX_STRSIZE>(m_pStream->GetLength()));
- m_iCurrentPos = m_pStream->GetBOMLength();
-
- FX_SAFE_STRSIZE alloc_size_safe = m_iXMLPlaneSize;
- alloc_size_safe += 1; // For NUL.
- if (!alloc_size_safe.IsValid() || alloc_size_safe.ValueOrDie() <= 0) {
- m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
- return;
- }
-
- m_Buffer.resize(pdfium::base::ValueOrDieForType<size_t>(alloc_size_safe));
-
- m_BlockBuffer.InitBuffer();
- std::tie(m_pCurrentBlock, m_iIndexInBlock) =
- m_BlockBuffer.GetAvailableBlock();
-}
-
-CFDE_XMLSyntaxParser::~CFDE_XMLSyntaxParser() {}
-
-FDE_XmlSyntaxResult CFDE_XMLSyntaxParser::DoSyntaxParse() {
- if (m_syntaxParserResult == FDE_XmlSyntaxResult::Error ||
- m_syntaxParserResult == FDE_XmlSyntaxResult::EndOfString) {
- return m_syntaxParserResult;
- }
-
- int32_t iStreamLength = m_pStream->GetLength();
- int32_t iPos;
-
- FDE_XmlSyntaxResult syntaxParserResult = FDE_XmlSyntaxResult::None;
- while (true) {
- if (m_Start >= m_End) {
- if (m_bEOS || m_iCurrentPos >= iStreamLength) {
- m_syntaxParserResult = FDE_XmlSyntaxResult::EndOfString;
- return m_syntaxParserResult;
- }
- m_ParsedChars += m_End;
- m_iParsedBytes = m_iCurrentPos;
- if (m_pStream->GetPosition() != m_iCurrentPos)
- m_pStream->Seek(CFX_SeekableStreamProxy::Pos::Begin, m_iCurrentPos);
-
- m_iBufferChars =
- m_pStream->ReadString(m_Buffer.data(), m_iXMLPlaneSize, &m_bEOS);
- iPos = m_pStream->GetPosition();
- if (m_iBufferChars < 1) {
- m_iCurrentPos = iStreamLength;
- m_syntaxParserResult = FDE_XmlSyntaxResult::EndOfString;
- return m_syntaxParserResult;
- }
- m_iCurrentPos = iPos;
- m_Start = 0;
- m_End = m_iBufferChars;
- }
-
- while (m_Start < m_End) {
- wchar_t ch = m_Buffer[m_Start];
- switch (m_syntaxParserState) {
- case FDE_XmlSyntaxState::Text:
- if (ch == L'<') {
- if (!m_BlockBuffer.IsEmpty()) {
- m_iTextDataLength = m_BlockBuffer.GetDataLength();
- m_BlockBuffer.Reset(true);
- std::tie(m_pCurrentBlock, m_iIndexInBlock) =
- m_BlockBuffer.GetAvailableBlock();
- m_iEntityStart = -1;
- syntaxParserResult = FDE_XmlSyntaxResult::Text;
- } else {
- m_Start++;
- m_syntaxParserState = FDE_XmlSyntaxState::Node;
- }
- } else {
- ParseTextChar(ch);
- }
- break;
- case FDE_XmlSyntaxState::Node:
- if (ch == L'!') {
- m_Start++;
- m_syntaxParserState = FDE_XmlSyntaxState::SkipCommentOrDecl;
- } else if (ch == L'/') {
- m_Start++;
- m_syntaxParserState = FDE_XmlSyntaxState::CloseElement;
- } else if (ch == L'?') {
- m_iLastNodeNum++;
- m_iCurrentNodeNum = m_iLastNodeNum;
- m_CurNode.iNodeNum = m_iLastNodeNum;
- m_CurNode.eNodeType = FDE_XMLNODE_Instruction;
- m_XMLNodeStack.push(m_CurNode);
- m_Start++;
- m_syntaxParserState = FDE_XmlSyntaxState::Target;
- syntaxParserResult = FDE_XmlSyntaxResult::InstructionOpen;
- } else {
- m_iLastNodeNum++;
- m_iCurrentNodeNum = m_iLastNodeNum;
- m_CurNode.iNodeNum = m_iLastNodeNum;
- m_CurNode.eNodeType = FDE_XMLNODE_Element;
- m_XMLNodeStack.push(m_CurNode);
- m_syntaxParserState = FDE_XmlSyntaxState::Tag;
- syntaxParserResult = FDE_XmlSyntaxResult::ElementOpen;
- }
- break;
- case FDE_XmlSyntaxState::Target:
- case FDE_XmlSyntaxState::Tag:
- if (!IsXMLNameChar(ch, m_BlockBuffer.IsEmpty())) {
- if (m_BlockBuffer.IsEmpty()) {
- m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
- return m_syntaxParserResult;
- }
-
- m_iTextDataLength = m_BlockBuffer.GetDataLength();
- m_BlockBuffer.Reset(true);
- std::tie(m_pCurrentBlock, m_iIndexInBlock) =
- m_BlockBuffer.GetAvailableBlock();
- if (m_syntaxParserState != FDE_XmlSyntaxState::Target)
- syntaxParserResult = FDE_XmlSyntaxResult::TagName;
- else
- syntaxParserResult = FDE_XmlSyntaxResult::TargetName;
-
- m_syntaxParserState = FDE_XmlSyntaxState::AttriName;
- } else {
- if (m_iIndexInBlock == m_iAllocStep) {
- std::tie(m_pCurrentBlock, m_iIndexInBlock) =
- m_BlockBuffer.GetAvailableBlock();
- if (!m_pCurrentBlock) {
- return FDE_XmlSyntaxResult::Error;
- }
- }
- m_pCurrentBlock[m_iIndexInBlock++] = ch;
- m_BlockBuffer.IncrementDataLength();
- m_Start++;
- }
- break;
- case FDE_XmlSyntaxState::AttriName:
- if (m_BlockBuffer.IsEmpty() && IsXMLWhiteSpace(ch)) {
- m_Start++;
- break;
- }
- if (!IsXMLNameChar(ch, m_BlockBuffer.IsEmpty())) {
- if (m_BlockBuffer.IsEmpty()) {
- if (m_CurNode.eNodeType == FDE_XMLNODE_Element) {
- if (ch == L'>' || ch == L'/') {
- m_syntaxParserState = FDE_XmlSyntaxState::BreakElement;
- break;
- }
- } else if (m_CurNode.eNodeType == FDE_XMLNODE_Instruction) {
- if (ch == L'?') {
- m_syntaxParserState = FDE_XmlSyntaxState::CloseInstruction;
- m_Start++;
- } else {
- m_syntaxParserState = FDE_XmlSyntaxState::TargetData;
- }
- break;
- }
- m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
- return m_syntaxParserResult;
- } else {
- if (m_CurNode.eNodeType == FDE_XMLNODE_Instruction) {
- if (ch != '=' && !IsXMLWhiteSpace(ch)) {
- m_syntaxParserState = FDE_XmlSyntaxState::TargetData;
- break;
- }
- }
- m_iTextDataLength = m_BlockBuffer.GetDataLength();
- m_BlockBuffer.Reset(true);
- std::tie(m_pCurrentBlock, m_iIndexInBlock) =
- m_BlockBuffer.GetAvailableBlock();
- m_syntaxParserState = FDE_XmlSyntaxState::AttriEqualSign;
- syntaxParserResult = FDE_XmlSyntaxResult::AttriName;
- }
- } else {
- if (m_iIndexInBlock == m_iAllocStep) {
- std::tie(m_pCurrentBlock, m_iIndexInBlock) =
- m_BlockBuffer.GetAvailableBlock();
- if (!m_pCurrentBlock) {
- return FDE_XmlSyntaxResult::Error;
- }
- }
- m_pCurrentBlock[m_iIndexInBlock++] = ch;
- m_BlockBuffer.IncrementDataLength();
- m_Start++;
- }
- break;
- case FDE_XmlSyntaxState::AttriEqualSign:
- if (IsXMLWhiteSpace(ch)) {
- m_Start++;
- break;
- }
- if (ch != L'=') {
- if (m_CurNode.eNodeType == FDE_XMLNODE_Instruction) {
- m_syntaxParserState = FDE_XmlSyntaxState::TargetData;
- break;
- }
- m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
- return m_syntaxParserResult;
- } else {
- m_syntaxParserState = FDE_XmlSyntaxState::AttriQuotation;
- m_Start++;
- }
- break;
- case FDE_XmlSyntaxState::AttriQuotation:
- if (IsXMLWhiteSpace(ch)) {
- m_Start++;
- break;
- }
- if (ch != L'\"' && ch != L'\'') {
- m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
- return m_syntaxParserResult;
- } else {
- m_wQuotationMark = ch;
- m_syntaxParserState = FDE_XmlSyntaxState::AttriValue;
- m_Start++;
- }
- break;
- case FDE_XmlSyntaxState::AttriValue:
- if (ch == m_wQuotationMark) {
- if (m_iEntityStart > -1) {
- m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
- return m_syntaxParserResult;
- }
- m_iTextDataLength = m_BlockBuffer.GetDataLength();
- m_wQuotationMark = 0;
- m_BlockBuffer.Reset(true);
- std::tie(m_pCurrentBlock, m_iIndexInBlock) =
- m_BlockBuffer.GetAvailableBlock();
- m_Start++;
- m_syntaxParserState = FDE_XmlSyntaxState::AttriName;
- syntaxParserResult = FDE_XmlSyntaxResult::AttriValue;
- } else {
- ParseTextChar(ch);
- }
- break;
- case FDE_XmlSyntaxState::CloseInstruction:
- if (ch != L'>') {
- if (m_iIndexInBlock == m_iAllocStep) {
- std::tie(m_pCurrentBlock, m_iIndexInBlock) =
- m_BlockBuffer.GetAvailableBlock();
- if (!m_pCurrentBlock) {
- return FDE_XmlSyntaxResult::Error;
- }
- }
- m_pCurrentBlock[m_iIndexInBlock++] = ch;
- m_BlockBuffer.IncrementDataLength();
- m_syntaxParserState = FDE_XmlSyntaxState::TargetData;
- } else if (!m_BlockBuffer.IsEmpty()) {
- m_iTextDataLength = m_BlockBuffer.GetDataLength();
- m_BlockBuffer.Reset(true);
- std::tie(m_pCurrentBlock, m_iIndexInBlock) =
- m_BlockBuffer.GetAvailableBlock();
- syntaxParserResult = FDE_XmlSyntaxResult::TargetData;
- } else {
- m_Start++;
- if (m_XMLNodeStack.empty()) {
- m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
- return m_syntaxParserResult;
- }
- m_XMLNodeStack.pop();
- if (!m_XMLNodeStack.empty()) {
- m_CurNode = m_XMLNodeStack.top();
- } else {
- m_CurNode.iNodeNum = -1;
- m_CurNode.eNodeType = FDE_XMLNODE_Unknown;
- }
- m_iCurrentNodeNum = m_CurNode.iNodeNum;
- m_BlockBuffer.Reset(true);
- std::tie(m_pCurrentBlock, m_iIndexInBlock) =
- m_BlockBuffer.GetAvailableBlock();
- m_syntaxParserState = FDE_XmlSyntaxState::Text;
- syntaxParserResult = FDE_XmlSyntaxResult::InstructionClose;
- }
- break;
- case FDE_XmlSyntaxState::BreakElement:
- if (ch == L'>') {
- m_syntaxParserState = FDE_XmlSyntaxState::Text;
- syntaxParserResult = FDE_XmlSyntaxResult::ElementBreak;
- } else if (ch == L'/') {
- m_syntaxParserState = FDE_XmlSyntaxState::CloseElement;
- } else {
- m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
- return m_syntaxParserResult;
- }
- m_Start++;
- break;
- case FDE_XmlSyntaxState::CloseElement:
- if (!IsXMLNameChar(ch, m_BlockBuffer.IsEmpty())) {
- if (ch == L'>') {
- if (m_XMLNodeStack.empty()) {
- m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
- return m_syntaxParserResult;
- }
- m_XMLNodeStack.pop();
- if (!m_XMLNodeStack.empty()) {
- m_CurNode = m_XMLNodeStack.top();
- } else {
- m_CurNode.iNodeNum = -1;
- m_CurNode.eNodeType = FDE_XMLNODE_Unknown;
- }
- m_iCurrentNodeNum = m_CurNode.iNodeNum;
- m_iTextDataLength = m_BlockBuffer.GetDataLength();
- m_BlockBuffer.Reset(true);
- std::tie(m_pCurrentBlock, m_iIndexInBlock) =
- m_BlockBuffer.GetAvailableBlock();
- m_syntaxParserState = FDE_XmlSyntaxState::Text;
- syntaxParserResult = FDE_XmlSyntaxResult::ElementClose;
- } else if (!IsXMLWhiteSpace(ch)) {
- m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
- return m_syntaxParserResult;
- }
- } else {
- if (m_iIndexInBlock == m_iAllocStep) {
- std::tie(m_pCurrentBlock, m_iIndexInBlock) =
- m_BlockBuffer.GetAvailableBlock();
- if (!m_pCurrentBlock) {
- return FDE_XmlSyntaxResult::Error;
- }
- }
- m_pCurrentBlock[m_iIndexInBlock++] = ch;
- m_BlockBuffer.IncrementDataLength();
- }
- m_Start++;
- break;
- case FDE_XmlSyntaxState::SkipCommentOrDecl:
- if (FXSYS_wcsnicmp(m_Buffer.data() + m_Start, L"--", 2) == 0) {
- m_Start += 2;
- m_syntaxParserState = FDE_XmlSyntaxState::SkipComment;
- } else if (FXSYS_wcsnicmp(m_Buffer.data() + m_Start, L"[CDATA[", 7) ==
- 0) {
- m_Start += 7;
- m_syntaxParserState = FDE_XmlSyntaxState::SkipCData;
- } else {
- m_syntaxParserState = FDE_XmlSyntaxState::SkipDeclNode;
- m_SkipChar = L'>';
- m_SkipStack.push(L'>');
- }
- break;
- case FDE_XmlSyntaxState::SkipCData: {
- if (FXSYS_wcsnicmp(m_Buffer.data() + m_Start, L"]]>", 3) == 0) {
- m_Start += 3;
- syntaxParserResult = FDE_XmlSyntaxResult::CData;
- m_iTextDataLength = m_BlockBuffer.GetDataLength();
- m_BlockBuffer.Reset(true);
- std::tie(m_pCurrentBlock, m_iIndexInBlock) =
- m_BlockBuffer.GetAvailableBlock();
- m_syntaxParserState = FDE_XmlSyntaxState::Text;
- } else {
- if (m_iIndexInBlock == m_iAllocStep) {
- std::tie(m_pCurrentBlock, m_iIndexInBlock) =
- m_BlockBuffer.GetAvailableBlock();
- if (!m_pCurrentBlock)
- return FDE_XmlSyntaxResult::Error;
- }
- m_pCurrentBlock[m_iIndexInBlock++] = ch;
- m_BlockBuffer.IncrementDataLength();
- m_Start++;
- }
- break;
- }
- case FDE_XmlSyntaxState::SkipDeclNode:
- if (m_SkipChar == L'\'' || m_SkipChar == L'\"') {
- m_Start++;
- if (ch != m_SkipChar)
- break;
-
- m_SkipStack.pop();
- if (m_SkipStack.empty())
- m_syntaxParserState = FDE_XmlSyntaxState::Text;
- else
- m_SkipChar = m_SkipStack.top();
- } else {
- switch (ch) {
- case L'<':
- m_SkipChar = L'>';
- m_SkipStack.push(L'>');
- break;
- case L'[':
- m_SkipChar = L']';
- m_SkipStack.push(L']');
- break;
- case L'(':
- m_SkipChar = L')';
- m_SkipStack.push(L')');
- break;
- case L'\'':
- m_SkipChar = L'\'';
- m_SkipStack.push(L'\'');
- break;
- case L'\"':
- m_SkipChar = L'\"';
- m_SkipStack.push(L'\"');
- break;
- default:
- if (ch == m_SkipChar) {
- m_SkipStack.pop();
- if (m_SkipStack.empty()) {
- if (m_BlockBuffer.GetDataLength() >= 9)
- (void)m_BlockBuffer.GetTextData(0, 7);
-
- m_iTextDataLength = m_BlockBuffer.GetDataLength();
- m_BlockBuffer.Reset(true);
- std::tie(m_pCurrentBlock, m_iIndexInBlock) =
- m_BlockBuffer.GetAvailableBlock();
- m_syntaxParserState = FDE_XmlSyntaxState::Text;
- } else {
- m_SkipChar = m_SkipStack.top();
- }
- }
- break;
- }
- if (!m_SkipStack.empty()) {
- if (m_iIndexInBlock == m_iAllocStep) {
- std::tie(m_pCurrentBlock, m_iIndexInBlock) =
- m_BlockBuffer.GetAvailableBlock();
- if (!m_pCurrentBlock) {
- return FDE_XmlSyntaxResult::Error;
- }
- }
- m_pCurrentBlock[m_iIndexInBlock++] = ch;
- m_BlockBuffer.IncrementDataLength();
- }
- m_Start++;
- }
- break;
- case FDE_XmlSyntaxState::SkipComment:
- if (FXSYS_wcsnicmp(m_Buffer.data() + m_Start, L"-->", 3) == 0) {
- m_Start += 2;
- m_syntaxParserState = FDE_XmlSyntaxState::Text;
- }
-
- m_Start++;
- break;
- case FDE_XmlSyntaxState::TargetData:
- if (IsXMLWhiteSpace(ch)) {
- if (m_BlockBuffer.IsEmpty()) {
- m_Start++;
- break;
- } else if (m_wQuotationMark == 0) {
- m_iTextDataLength = m_BlockBuffer.GetDataLength();
- m_wQuotationMark = 0;
- m_BlockBuffer.Reset(true);
- std::tie(m_pCurrentBlock, m_iIndexInBlock) =
- m_BlockBuffer.GetAvailableBlock();
- m_Start++;
- syntaxParserResult = FDE_XmlSyntaxResult::TargetData;
- break;
- }
- }
- if (ch == '?') {
- m_syntaxParserState = FDE_XmlSyntaxState::CloseInstruction;
- m_Start++;
- } else if (ch == '\"') {
- if (m_wQuotationMark == 0) {
- m_wQuotationMark = ch;
- m_Start++;
- } else if (ch == m_wQuotationMark) {
- m_iTextDataLength = m_BlockBuffer.GetDataLength();
- m_wQuotationMark = 0;
- m_BlockBuffer.Reset(true);
- std::tie(m_pCurrentBlock, m_iIndexInBlock) =
- m_BlockBuffer.GetAvailableBlock();
- m_Start++;
- syntaxParserResult = FDE_XmlSyntaxResult::TargetData;
- } else {
- m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
- return m_syntaxParserResult;
- }
- } else {
- if (m_iIndexInBlock == m_iAllocStep) {
- std::tie(m_pCurrentBlock, m_iIndexInBlock) =
- m_BlockBuffer.GetAvailableBlock();
- if (!m_pCurrentBlock) {
- return FDE_XmlSyntaxResult::Error;
- }
- }
- m_pCurrentBlock[m_iIndexInBlock++] = ch;
- m_BlockBuffer.IncrementDataLength();
- m_Start++;
- }
- break;
- default:
- break;
- }
- if (syntaxParserResult != FDE_XmlSyntaxResult::None)
- return syntaxParserResult;
- }
- }
- return FDE_XmlSyntaxResult::Text;
-}
-
-int32_t CFDE_XMLSyntaxParser::GetStatus() const {
- if (!m_pStream)
- return -1;
-
- int32_t iStreamLength = m_pStream->GetLength();
- if (iStreamLength < 1)
- return 100;
-
- if (m_syntaxParserResult == FDE_XmlSyntaxResult::Error)
- return -1;
-
- if (m_syntaxParserResult == FDE_XmlSyntaxResult::EndOfString)
- return 100;
- return m_iParsedBytes * 100 / iStreamLength;
-}
-
-FX_FILESIZE CFDE_XMLSyntaxParser::GetCurrentBinaryPos() const {
- if (!m_pStream)
- return 0;
-
- int32_t nDstLen = GetUTF8EncodeLength(m_Buffer, m_Start);
- return m_iParsedBytes + nDstLen;
-}
-
-void CFDE_XMLSyntaxParser::ParseTextChar(wchar_t character) {
- if (m_iIndexInBlock == m_iAllocStep) {
- std::tie(m_pCurrentBlock, m_iIndexInBlock) =
- m_BlockBuffer.GetAvailableBlock();
- if (!m_pCurrentBlock)
- return;
- }
-
- m_pCurrentBlock[m_iIndexInBlock++] = character;
- m_BlockBuffer.IncrementDataLength();
- if (m_iEntityStart > -1 && character == L';') {
- CFX_WideString csEntity = m_BlockBuffer.GetTextData(
- m_iEntityStart + 1,
- m_BlockBuffer.GetDataLength() - 1 - m_iEntityStart - 1);
- int32_t iLen = csEntity.GetLength();
- if (iLen > 0) {
- if (csEntity[0] == L'#') {
- uint32_t ch = 0;
- wchar_t w;
- if (iLen > 1 && csEntity[1] == L'x') {
- for (int32_t i = 2; i < iLen; i++) {
- w = csEntity[i];
- if (w >= L'0' && w <= L'9') {
- ch = (ch << 4) + w - L'0';
- } else if (w >= L'A' && w <= L'F') {
- ch = (ch << 4) + w - 55;
- } else if (w >= L'a' && w <= L'f') {
- ch = (ch << 4) + w - 87;
- } else {
- break;
- }
- }
- } else {
- for (int32_t i = 1; i < iLen; i++) {
- w = csEntity[i];
- if (w < L'0' || w > L'9')
- break;
- ch = ch * 10 + w - L'0';
- }
- }
- if (ch > kMaxCharRange)
- ch = ' ';
-
- character = static_cast<wchar_t>(ch);
- if (character != 0) {
- m_BlockBuffer.SetTextChar(m_iEntityStart, character);
- m_iEntityStart++;
- }
- } else {
- if (csEntity.Compare(L"amp") == 0) {
- m_BlockBuffer.SetTextChar(m_iEntityStart, L'&');
- m_iEntityStart++;
- } else if (csEntity.Compare(L"lt") == 0) {
- m_BlockBuffer.SetTextChar(m_iEntityStart, L'<');
- m_iEntityStart++;
- } else if (csEntity.Compare(L"gt") == 0) {
- m_BlockBuffer.SetTextChar(m_iEntityStart, L'>');
- m_iEntityStart++;
- } else if (csEntity.Compare(L"apos") == 0) {
- m_BlockBuffer.SetTextChar(m_iEntityStart, L'\'');
- m_iEntityStart++;
- } else if (csEntity.Compare(L"quot") == 0) {
- m_BlockBuffer.SetTextChar(m_iEntityStart, L'\"');
- m_iEntityStart++;
- }
- }
- }
- if (m_iEntityStart >= 0 &&
- m_BlockBuffer.GetDataLength() > static_cast<size_t>(m_iEntityStart)) {
- m_BlockBuffer.DeleteTextChars(m_BlockBuffer.GetDataLength() -
- m_iEntityStart);
- }
- std::tie(m_pCurrentBlock, m_iIndexInBlock) =
- m_BlockBuffer.GetAvailableBlock();
- m_iEntityStart = -1;
- } else if (m_iEntityStart < 0 && character == L'&') {
- m_iEntityStart = m_BlockBuffer.GetDataLength() - 1;
- }
- m_Start++;
-}