From 0d86ecb08e1b2c204333b1f1f6b0b014e5b2971c Mon Sep 17 00:00:00 2001 From: Dan Sinclair Date: Wed, 19 Apr 2017 09:19:57 -0400 Subject: Move fde XML parser to core MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This CL moves the XML parser from FDE into FXCRT and renames to CFX_ from CFDE_. Change-Id: I21a9590bf74daf5517df630d7e7a5de89da99ea4 Reviewed-on: https://pdfium-review.googlesource.com/4312 Commit-Queue: dsinclair Reviewed-by: Tom Sepez Reviewed-by: Nicolás Peña --- core/fxcrt/xml/cfx_xmlsyntaxparser.cpp | 698 +++++++++++++++++++++++++++++++++ 1 file changed, 698 insertions(+) create mode 100644 core/fxcrt/xml/cfx_xmlsyntaxparser.cpp (limited to 'core/fxcrt/xml/cfx_xmlsyntaxparser.cpp') diff --git a/core/fxcrt/xml/cfx_xmlsyntaxparser.cpp b/core/fxcrt/xml/cfx_xmlsyntaxparser.cpp new file mode 100644 index 0000000000..e7bef71085 --- /dev/null +++ b/core/fxcrt/xml/cfx_xmlsyntaxparser.cpp @@ -0,0 +1,698 @@ +// Copyright 2017 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#include "core/fxcrt/xml/cfx_xmlsyntaxparser.h" + +#include + +#include "core/fxcrt/fx_ext.h" +#include "core/fxcrt/fx_safe_types.h" + +namespace { + +const uint32_t kMaxCharRange = 0x10ffff; + +bool IsXMLWhiteSpace(wchar_t ch) { + return ch == L' ' || ch == 0x0A || ch == 0x0D || ch == 0x09; +} + +struct FX_XMLNAMECHAR { + uint16_t wStart; + uint16_t wEnd; + bool bStartChar; +}; + +const FX_XMLNAMECHAR g_XMLNameChars[] = { + {L'-', L'.', false}, {L'0', L'9', false}, {L':', L':', false}, + {L'A', L'Z', true}, {L'_', L'_', true}, {L'a', L'z', true}, + {0xB7, 0xB7, false}, {0xC0, 0xD6, true}, {0xD8, 0xF6, true}, + {0xF8, 0x02FF, true}, {0x0300, 0x036F, false}, {0x0370, 0x037D, true}, + {0x037F, 0x1FFF, true}, {0x200C, 0x200D, true}, {0x203F, 0x2040, false}, + {0x2070, 0x218F, true}, {0x2C00, 0x2FEF, true}, {0x3001, 0xD7FF, true}, + {0xF900, 0xFDCF, true}, {0xFDF0, 0xFFFD, true}, +}; + +bool IsXMLNameChar(wchar_t ch, bool bFirstChar) { + int32_t iStart = 0; + int32_t iEnd = FX_ArraySize(g_XMLNameChars) - 1; + while (iStart <= iEnd) { + int32_t iMid = (iStart + iEnd) / 2; + if (ch < g_XMLNameChars[iMid].wStart) { + iEnd = iMid - 1; + } else if (ch > g_XMLNameChars[iMid].wEnd) { + iStart = iMid + 1; + } else { + return bFirstChar ? g_XMLNameChars[iMid].bStartChar : true; + } + } + return false; +} + +int32_t GetUTF8EncodeLength(const std::vector& src, + FX_FILESIZE iSrcLen) { + uint32_t unicode = 0; + int32_t iDstNum = 0; + const wchar_t* pSrc = src.data(); + while (iSrcLen-- > 0) { + unicode = *pSrc++; + int nbytes = 0; + if ((uint32_t)unicode < 0x80) { + nbytes = 1; + } else if ((uint32_t)unicode < 0x800) { + nbytes = 2; + } else if ((uint32_t)unicode < 0x10000) { + nbytes = 3; + } else if ((uint32_t)unicode < 0x200000) { + nbytes = 4; + } else if ((uint32_t)unicode < 0x4000000) { + nbytes = 5; + } else { + nbytes = 6; + } + iDstNum += nbytes; + } + return iDstNum; +} + +} // namespace + +CFX_XMLSyntaxParser::CFX_XMLSyntaxParser( + const CFX_RetainPtr& pStream) + : m_pStream(pStream), + m_iXMLPlaneSize(32 * 1024), + m_iCurrentPos(0), + m_iCurrentNodeNum(-1), + m_iLastNodeNum(-1), + m_iParsedBytes(0), + m_ParsedChars(0), + m_iBufferChars(0), + m_bEOS(false), + m_Start(0), + m_End(0), + m_iAllocStep(m_BlockBuffer.GetAllocStep()), + m_pCurrentBlock(nullptr), + m_iIndexInBlock(0), + m_iTextDataLength(0), + m_syntaxParserResult(FX_XmlSyntaxResult::None), + m_syntaxParserState(FDE_XmlSyntaxState::Text), + m_wQuotationMark(0), + m_iEntityStart(-1) { + ASSERT(pStream); + + m_CurNode.iNodeNum = -1; + m_CurNode.eNodeType = FX_XMLNODE_Unknown; + + m_iXMLPlaneSize = + std::min(m_iXMLPlaneSize, + pdfium::base::checked_cast(m_pStream->GetLength())); + m_iCurrentPos = m_pStream->GetBOMLength(); + + FX_SAFE_STRSIZE alloc_size_safe = m_iXMLPlaneSize; + alloc_size_safe += 1; // For NUL. + if (!alloc_size_safe.IsValid() || alloc_size_safe.ValueOrDie() <= 0) { + m_syntaxParserResult = FX_XmlSyntaxResult::Error; + return; + } + + m_Buffer.resize(pdfium::base::ValueOrDieForType(alloc_size_safe)); + + m_BlockBuffer.InitBuffer(); + std::tie(m_pCurrentBlock, m_iIndexInBlock) = + m_BlockBuffer.GetAvailableBlock(); +} + +CFX_XMLSyntaxParser::~CFX_XMLSyntaxParser() {} + +FX_XmlSyntaxResult CFX_XMLSyntaxParser::DoSyntaxParse() { + if (m_syntaxParserResult == FX_XmlSyntaxResult::Error || + m_syntaxParserResult == FX_XmlSyntaxResult::EndOfString) { + return m_syntaxParserResult; + } + + int32_t iStreamLength = m_pStream->GetLength(); + int32_t iPos; + + FX_XmlSyntaxResult syntaxParserResult = FX_XmlSyntaxResult::None; + while (true) { + if (m_Start >= m_End) { + if (m_bEOS || m_iCurrentPos >= iStreamLength) { + m_syntaxParserResult = FX_XmlSyntaxResult::EndOfString; + return m_syntaxParserResult; + } + m_ParsedChars += m_End; + m_iParsedBytes = m_iCurrentPos; + if (m_pStream->GetPosition() != m_iCurrentPos) + m_pStream->Seek(CFX_SeekableStreamProxy::Pos::Begin, m_iCurrentPos); + + m_iBufferChars = + m_pStream->ReadString(m_Buffer.data(), m_iXMLPlaneSize, &m_bEOS); + iPos = m_pStream->GetPosition(); + if (m_iBufferChars < 1) { + m_iCurrentPos = iStreamLength; + m_syntaxParserResult = FX_XmlSyntaxResult::EndOfString; + return m_syntaxParserResult; + } + m_iCurrentPos = iPos; + m_Start = 0; + m_End = m_iBufferChars; + } + + while (m_Start < m_End) { + wchar_t ch = m_Buffer[m_Start]; + switch (m_syntaxParserState) { + case FDE_XmlSyntaxState::Text: + if (ch == L'<') { + if (!m_BlockBuffer.IsEmpty()) { + m_iTextDataLength = m_BlockBuffer.GetDataLength(); + m_BlockBuffer.Reset(true); + std::tie(m_pCurrentBlock, m_iIndexInBlock) = + m_BlockBuffer.GetAvailableBlock(); + m_iEntityStart = -1; + syntaxParserResult = FX_XmlSyntaxResult::Text; + } else { + m_Start++; + m_syntaxParserState = FDE_XmlSyntaxState::Node; + } + } else { + ParseTextChar(ch); + } + break; + case FDE_XmlSyntaxState::Node: + if (ch == L'!') { + m_Start++; + m_syntaxParserState = FDE_XmlSyntaxState::SkipCommentOrDecl; + } else if (ch == L'/') { + m_Start++; + m_syntaxParserState = FDE_XmlSyntaxState::CloseElement; + } else if (ch == L'?') { + m_iLastNodeNum++; + m_iCurrentNodeNum = m_iLastNodeNum; + m_CurNode.iNodeNum = m_iLastNodeNum; + m_CurNode.eNodeType = FX_XMLNODE_Instruction; + m_XMLNodeStack.push(m_CurNode); + m_Start++; + m_syntaxParserState = FDE_XmlSyntaxState::Target; + syntaxParserResult = FX_XmlSyntaxResult::InstructionOpen; + } else { + m_iLastNodeNum++; + m_iCurrentNodeNum = m_iLastNodeNum; + m_CurNode.iNodeNum = m_iLastNodeNum; + m_CurNode.eNodeType = FX_XMLNODE_Element; + m_XMLNodeStack.push(m_CurNode); + m_syntaxParserState = FDE_XmlSyntaxState::Tag; + syntaxParserResult = FX_XmlSyntaxResult::ElementOpen; + } + break; + case FDE_XmlSyntaxState::Target: + case FDE_XmlSyntaxState::Tag: + if (!IsXMLNameChar(ch, m_BlockBuffer.IsEmpty())) { + if (m_BlockBuffer.IsEmpty()) { + m_syntaxParserResult = FX_XmlSyntaxResult::Error; + return m_syntaxParserResult; + } + + m_iTextDataLength = m_BlockBuffer.GetDataLength(); + m_BlockBuffer.Reset(true); + std::tie(m_pCurrentBlock, m_iIndexInBlock) = + m_BlockBuffer.GetAvailableBlock(); + if (m_syntaxParserState != FDE_XmlSyntaxState::Target) + syntaxParserResult = FX_XmlSyntaxResult::TagName; + else + syntaxParserResult = FX_XmlSyntaxResult::TargetName; + + m_syntaxParserState = FDE_XmlSyntaxState::AttriName; + } else { + if (m_iIndexInBlock == m_iAllocStep) { + std::tie(m_pCurrentBlock, m_iIndexInBlock) = + m_BlockBuffer.GetAvailableBlock(); + if (!m_pCurrentBlock) { + return FX_XmlSyntaxResult::Error; + } + } + m_pCurrentBlock[m_iIndexInBlock++] = ch; + m_BlockBuffer.IncrementDataLength(); + m_Start++; + } + break; + case FDE_XmlSyntaxState::AttriName: + if (m_BlockBuffer.IsEmpty() && IsXMLWhiteSpace(ch)) { + m_Start++; + break; + } + if (!IsXMLNameChar(ch, m_BlockBuffer.IsEmpty())) { + if (m_BlockBuffer.IsEmpty()) { + if (m_CurNode.eNodeType == FX_XMLNODE_Element) { + if (ch == L'>' || ch == L'/') { + m_syntaxParserState = FDE_XmlSyntaxState::BreakElement; + break; + } + } else if (m_CurNode.eNodeType == FX_XMLNODE_Instruction) { + if (ch == L'?') { + m_syntaxParserState = FDE_XmlSyntaxState::CloseInstruction; + m_Start++; + } else { + m_syntaxParserState = FDE_XmlSyntaxState::TargetData; + } + break; + } + m_syntaxParserResult = FX_XmlSyntaxResult::Error; + return m_syntaxParserResult; + } else { + if (m_CurNode.eNodeType == FX_XMLNODE_Instruction) { + if (ch != '=' && !IsXMLWhiteSpace(ch)) { + m_syntaxParserState = FDE_XmlSyntaxState::TargetData; + break; + } + } + m_iTextDataLength = m_BlockBuffer.GetDataLength(); + m_BlockBuffer.Reset(true); + std::tie(m_pCurrentBlock, m_iIndexInBlock) = + m_BlockBuffer.GetAvailableBlock(); + m_syntaxParserState = FDE_XmlSyntaxState::AttriEqualSign; + syntaxParserResult = FX_XmlSyntaxResult::AttriName; + } + } else { + if (m_iIndexInBlock == m_iAllocStep) { + std::tie(m_pCurrentBlock, m_iIndexInBlock) = + m_BlockBuffer.GetAvailableBlock(); + if (!m_pCurrentBlock) { + return FX_XmlSyntaxResult::Error; + } + } + m_pCurrentBlock[m_iIndexInBlock++] = ch; + m_BlockBuffer.IncrementDataLength(); + m_Start++; + } + break; + case FDE_XmlSyntaxState::AttriEqualSign: + if (IsXMLWhiteSpace(ch)) { + m_Start++; + break; + } + if (ch != L'=') { + if (m_CurNode.eNodeType == FX_XMLNODE_Instruction) { + m_syntaxParserState = FDE_XmlSyntaxState::TargetData; + break; + } + m_syntaxParserResult = FX_XmlSyntaxResult::Error; + return m_syntaxParserResult; + } else { + m_syntaxParserState = FDE_XmlSyntaxState::AttriQuotation; + m_Start++; + } + break; + case FDE_XmlSyntaxState::AttriQuotation: + if (IsXMLWhiteSpace(ch)) { + m_Start++; + break; + } + if (ch != L'\"' && ch != L'\'') { + m_syntaxParserResult = FX_XmlSyntaxResult::Error; + return m_syntaxParserResult; + } else { + m_wQuotationMark = ch; + m_syntaxParserState = FDE_XmlSyntaxState::AttriValue; + m_Start++; + } + break; + case FDE_XmlSyntaxState::AttriValue: + if (ch == m_wQuotationMark) { + if (m_iEntityStart > -1) { + m_syntaxParserResult = FX_XmlSyntaxResult::Error; + return m_syntaxParserResult; + } + m_iTextDataLength = m_BlockBuffer.GetDataLength(); + m_wQuotationMark = 0; + m_BlockBuffer.Reset(true); + std::tie(m_pCurrentBlock, m_iIndexInBlock) = + m_BlockBuffer.GetAvailableBlock(); + m_Start++; + m_syntaxParserState = FDE_XmlSyntaxState::AttriName; + syntaxParserResult = FX_XmlSyntaxResult::AttriValue; + } else { + ParseTextChar(ch); + } + break; + case FDE_XmlSyntaxState::CloseInstruction: + if (ch != L'>') { + if (m_iIndexInBlock == m_iAllocStep) { + std::tie(m_pCurrentBlock, m_iIndexInBlock) = + m_BlockBuffer.GetAvailableBlock(); + if (!m_pCurrentBlock) { + return FX_XmlSyntaxResult::Error; + } + } + m_pCurrentBlock[m_iIndexInBlock++] = ch; + m_BlockBuffer.IncrementDataLength(); + m_syntaxParserState = FDE_XmlSyntaxState::TargetData; + } else if (!m_BlockBuffer.IsEmpty()) { + m_iTextDataLength = m_BlockBuffer.GetDataLength(); + m_BlockBuffer.Reset(true); + std::tie(m_pCurrentBlock, m_iIndexInBlock) = + m_BlockBuffer.GetAvailableBlock(); + syntaxParserResult = FX_XmlSyntaxResult::TargetData; + } else { + m_Start++; + if (m_XMLNodeStack.empty()) { + m_syntaxParserResult = FX_XmlSyntaxResult::Error; + return m_syntaxParserResult; + } + m_XMLNodeStack.pop(); + if (!m_XMLNodeStack.empty()) { + m_CurNode = m_XMLNodeStack.top(); + } else { + m_CurNode.iNodeNum = -1; + m_CurNode.eNodeType = FX_XMLNODE_Unknown; + } + m_iCurrentNodeNum = m_CurNode.iNodeNum; + m_BlockBuffer.Reset(true); + std::tie(m_pCurrentBlock, m_iIndexInBlock) = + m_BlockBuffer.GetAvailableBlock(); + m_syntaxParserState = FDE_XmlSyntaxState::Text; + syntaxParserResult = FX_XmlSyntaxResult::InstructionClose; + } + break; + case FDE_XmlSyntaxState::BreakElement: + if (ch == L'>') { + m_syntaxParserState = FDE_XmlSyntaxState::Text; + syntaxParserResult = FX_XmlSyntaxResult::ElementBreak; + } else if (ch == L'/') { + m_syntaxParserState = FDE_XmlSyntaxState::CloseElement; + } else { + m_syntaxParserResult = FX_XmlSyntaxResult::Error; + return m_syntaxParserResult; + } + m_Start++; + break; + case FDE_XmlSyntaxState::CloseElement: + if (!IsXMLNameChar(ch, m_BlockBuffer.IsEmpty())) { + if (ch == L'>') { + if (m_XMLNodeStack.empty()) { + m_syntaxParserResult = FX_XmlSyntaxResult::Error; + return m_syntaxParserResult; + } + m_XMLNodeStack.pop(); + if (!m_XMLNodeStack.empty()) { + m_CurNode = m_XMLNodeStack.top(); + } else { + m_CurNode.iNodeNum = -1; + m_CurNode.eNodeType = FX_XMLNODE_Unknown; + } + m_iCurrentNodeNum = m_CurNode.iNodeNum; + m_iTextDataLength = m_BlockBuffer.GetDataLength(); + m_BlockBuffer.Reset(true); + std::tie(m_pCurrentBlock, m_iIndexInBlock) = + m_BlockBuffer.GetAvailableBlock(); + m_syntaxParserState = FDE_XmlSyntaxState::Text; + syntaxParserResult = FX_XmlSyntaxResult::ElementClose; + } else if (!IsXMLWhiteSpace(ch)) { + m_syntaxParserResult = FX_XmlSyntaxResult::Error; + return m_syntaxParserResult; + } + } else { + if (m_iIndexInBlock == m_iAllocStep) { + std::tie(m_pCurrentBlock, m_iIndexInBlock) = + m_BlockBuffer.GetAvailableBlock(); + if (!m_pCurrentBlock) { + return FX_XmlSyntaxResult::Error; + } + } + m_pCurrentBlock[m_iIndexInBlock++] = ch; + m_BlockBuffer.IncrementDataLength(); + } + m_Start++; + break; + case FDE_XmlSyntaxState::SkipCommentOrDecl: + if (FXSYS_wcsnicmp(m_Buffer.data() + m_Start, L"--", 2) == 0) { + m_Start += 2; + m_syntaxParserState = FDE_XmlSyntaxState::SkipComment; + } else if (FXSYS_wcsnicmp(m_Buffer.data() + m_Start, L"[CDATA[", 7) == + 0) { + m_Start += 7; + m_syntaxParserState = FDE_XmlSyntaxState::SkipCData; + } else { + m_syntaxParserState = FDE_XmlSyntaxState::SkipDeclNode; + m_SkipChar = L'>'; + m_SkipStack.push(L'>'); + } + break; + case FDE_XmlSyntaxState::SkipCData: { + if (FXSYS_wcsnicmp(m_Buffer.data() + m_Start, L"]]>", 3) == 0) { + m_Start += 3; + syntaxParserResult = FX_XmlSyntaxResult::CData; + m_iTextDataLength = m_BlockBuffer.GetDataLength(); + m_BlockBuffer.Reset(true); + std::tie(m_pCurrentBlock, m_iIndexInBlock) = + m_BlockBuffer.GetAvailableBlock(); + m_syntaxParserState = FDE_XmlSyntaxState::Text; + } else { + if (m_iIndexInBlock == m_iAllocStep) { + std::tie(m_pCurrentBlock, m_iIndexInBlock) = + m_BlockBuffer.GetAvailableBlock(); + if (!m_pCurrentBlock) + return FX_XmlSyntaxResult::Error; + } + m_pCurrentBlock[m_iIndexInBlock++] = ch; + m_BlockBuffer.IncrementDataLength(); + m_Start++; + } + break; + } + case FDE_XmlSyntaxState::SkipDeclNode: + if (m_SkipChar == L'\'' || m_SkipChar == L'\"') { + m_Start++; + if (ch != m_SkipChar) + break; + + m_SkipStack.pop(); + if (m_SkipStack.empty()) + m_syntaxParserState = FDE_XmlSyntaxState::Text; + else + m_SkipChar = m_SkipStack.top(); + } else { + switch (ch) { + case L'<': + m_SkipChar = L'>'; + m_SkipStack.push(L'>'); + break; + case L'[': + m_SkipChar = L']'; + m_SkipStack.push(L']'); + break; + case L'(': + m_SkipChar = L')'; + m_SkipStack.push(L')'); + break; + case L'\'': + m_SkipChar = L'\''; + m_SkipStack.push(L'\''); + break; + case L'\"': + m_SkipChar = L'\"'; + m_SkipStack.push(L'\"'); + break; + default: + if (ch == m_SkipChar) { + m_SkipStack.pop(); + if (m_SkipStack.empty()) { + if (m_BlockBuffer.GetDataLength() >= 9) + (void)m_BlockBuffer.GetTextData(0, 7); + + m_iTextDataLength = m_BlockBuffer.GetDataLength(); + m_BlockBuffer.Reset(true); + std::tie(m_pCurrentBlock, m_iIndexInBlock) = + m_BlockBuffer.GetAvailableBlock(); + m_syntaxParserState = FDE_XmlSyntaxState::Text; + } else { + m_SkipChar = m_SkipStack.top(); + } + } + break; + } + if (!m_SkipStack.empty()) { + if (m_iIndexInBlock == m_iAllocStep) { + std::tie(m_pCurrentBlock, m_iIndexInBlock) = + m_BlockBuffer.GetAvailableBlock(); + if (!m_pCurrentBlock) { + return FX_XmlSyntaxResult::Error; + } + } + m_pCurrentBlock[m_iIndexInBlock++] = ch; + m_BlockBuffer.IncrementDataLength(); + } + m_Start++; + } + break; + case FDE_XmlSyntaxState::SkipComment: + if (FXSYS_wcsnicmp(m_Buffer.data() + m_Start, L"-->", 3) == 0) { + m_Start += 2; + m_syntaxParserState = FDE_XmlSyntaxState::Text; + } + + m_Start++; + break; + case FDE_XmlSyntaxState::TargetData: + if (IsXMLWhiteSpace(ch)) { + if (m_BlockBuffer.IsEmpty()) { + m_Start++; + break; + } else if (m_wQuotationMark == 0) { + m_iTextDataLength = m_BlockBuffer.GetDataLength(); + m_wQuotationMark = 0; + m_BlockBuffer.Reset(true); + std::tie(m_pCurrentBlock, m_iIndexInBlock) = + m_BlockBuffer.GetAvailableBlock(); + m_Start++; + syntaxParserResult = FX_XmlSyntaxResult::TargetData; + break; + } + } + if (ch == '?') { + m_syntaxParserState = FDE_XmlSyntaxState::CloseInstruction; + m_Start++; + } else if (ch == '\"') { + if (m_wQuotationMark == 0) { + m_wQuotationMark = ch; + m_Start++; + } else if (ch == m_wQuotationMark) { + m_iTextDataLength = m_BlockBuffer.GetDataLength(); + m_wQuotationMark = 0; + m_BlockBuffer.Reset(true); + std::tie(m_pCurrentBlock, m_iIndexInBlock) = + m_BlockBuffer.GetAvailableBlock(); + m_Start++; + syntaxParserResult = FX_XmlSyntaxResult::TargetData; + } else { + m_syntaxParserResult = FX_XmlSyntaxResult::Error; + return m_syntaxParserResult; + } + } else { + if (m_iIndexInBlock == m_iAllocStep) { + std::tie(m_pCurrentBlock, m_iIndexInBlock) = + m_BlockBuffer.GetAvailableBlock(); + if (!m_pCurrentBlock) { + return FX_XmlSyntaxResult::Error; + } + } + m_pCurrentBlock[m_iIndexInBlock++] = ch; + m_BlockBuffer.IncrementDataLength(); + m_Start++; + } + break; + default: + break; + } + if (syntaxParserResult != FX_XmlSyntaxResult::None) + return syntaxParserResult; + } + } + return FX_XmlSyntaxResult::Text; +} + +int32_t CFX_XMLSyntaxParser::GetStatus() const { + if (!m_pStream) + return -1; + + int32_t iStreamLength = m_pStream->GetLength(); + if (iStreamLength < 1) + return 100; + + if (m_syntaxParserResult == FX_XmlSyntaxResult::Error) + return -1; + + if (m_syntaxParserResult == FX_XmlSyntaxResult::EndOfString) + return 100; + return m_iParsedBytes * 100 / iStreamLength; +} + +FX_FILESIZE CFX_XMLSyntaxParser::GetCurrentBinaryPos() const { + if (!m_pStream) + return 0; + + int32_t nDstLen = GetUTF8EncodeLength(m_Buffer, m_Start); + return m_iParsedBytes + nDstLen; +} + +void CFX_XMLSyntaxParser::ParseTextChar(wchar_t character) { + if (m_iIndexInBlock == m_iAllocStep) { + std::tie(m_pCurrentBlock, m_iIndexInBlock) = + m_BlockBuffer.GetAvailableBlock(); + if (!m_pCurrentBlock) + return; + } + + m_pCurrentBlock[m_iIndexInBlock++] = character; + m_BlockBuffer.IncrementDataLength(); + if (m_iEntityStart > -1 && character == L';') { + CFX_WideString csEntity = m_BlockBuffer.GetTextData( + m_iEntityStart + 1, + m_BlockBuffer.GetDataLength() - 1 - m_iEntityStart - 1); + int32_t iLen = csEntity.GetLength(); + if (iLen > 0) { + if (csEntity[0] == L'#') { + uint32_t ch = 0; + wchar_t w; + if (iLen > 1 && csEntity[1] == L'x') { + for (int32_t i = 2; i < iLen; i++) { + w = csEntity[i]; + if (w >= L'0' && w <= L'9') { + ch = (ch << 4) + w - L'0'; + } else if (w >= L'A' && w <= L'F') { + ch = (ch << 4) + w - 55; + } else if (w >= L'a' && w <= L'f') { + ch = (ch << 4) + w - 87; + } else { + break; + } + } + } else { + for (int32_t i = 1; i < iLen; i++) { + w = csEntity[i]; + if (w < L'0' || w > L'9') + break; + ch = ch * 10 + w - L'0'; + } + } + if (ch > kMaxCharRange) + ch = ' '; + + character = static_cast(ch); + if (character != 0) { + m_BlockBuffer.SetTextChar(m_iEntityStart, character); + m_iEntityStart++; + } + } else { + if (csEntity.Compare(L"amp") == 0) { + m_BlockBuffer.SetTextChar(m_iEntityStart, L'&'); + m_iEntityStart++; + } else if (csEntity.Compare(L"lt") == 0) { + m_BlockBuffer.SetTextChar(m_iEntityStart, L'<'); + m_iEntityStart++; + } else if (csEntity.Compare(L"gt") == 0) { + m_BlockBuffer.SetTextChar(m_iEntityStart, L'>'); + m_iEntityStart++; + } else if (csEntity.Compare(L"apos") == 0) { + m_BlockBuffer.SetTextChar(m_iEntityStart, L'\''); + m_iEntityStart++; + } else if (csEntity.Compare(L"quot") == 0) { + m_BlockBuffer.SetTextChar(m_iEntityStart, L'\"'); + m_iEntityStart++; + } + } + } + if (m_iEntityStart >= 0 && + m_BlockBuffer.GetDataLength() > static_cast(m_iEntityStart)) { + m_BlockBuffer.DeleteTextChars(m_BlockBuffer.GetDataLength() - + m_iEntityStart); + } + std::tie(m_pCurrentBlock, m_iIndexInBlock) = + m_BlockBuffer.GetAvailableBlock(); + m_iEntityStart = -1; + } else if (m_iEntityStart < 0 && character == L'&') { + m_iEntityStart = m_BlockBuffer.GetDataLength() - 1; + } + m_Start++; +} -- cgit v1.2.3