From ac35589d5d1a593497cf28d91ab6a236f25833c3 Mon Sep 17 00:00:00 2001 From: Dan Sinclair Date: Mon, 3 Apr 2017 16:46:21 -0400 Subject: Split the FDE XML file into indiviual class files. This Cl splits the fde_xml_int file apart into individual class files. Includes are fixed as needed. fde_xml.h is also removed and the needed defines moved to more appropiate places. Change-Id: I29774dabc4d0fb2d5092fcbbe7853f03401b6ec7 Reviewed-on: https://pdfium-review.googlesource.com/3616 Commit-Queue: dsinclair Reviewed-by: Tom Sepez --- xfa/fde/xml/cfde_xmlsyntaxparser.cpp | 703 +++++++++++++++++++++++++++++++++++ 1 file changed, 703 insertions(+) create mode 100644 xfa/fde/xml/cfde_xmlsyntaxparser.cpp (limited to 'xfa/fde/xml/cfde_xmlsyntaxparser.cpp') diff --git a/xfa/fde/xml/cfde_xmlsyntaxparser.cpp b/xfa/fde/xml/cfde_xmlsyntaxparser.cpp new file mode 100644 index 0000000000..45a1eddb83 --- /dev/null +++ b/xfa/fde/xml/cfde_xmlsyntaxparser.cpp @@ -0,0 +1,703 @@ +// Copyright 2017 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#include "xfa/fde/xml/cfde_xmlsyntaxparser.h" + +#include + +#include "core/fxcrt/fx_ext.h" +#include "core/fxcrt/fx_safe_types.h" + +namespace { + +const uint32_t kMaxCharRange = 0x10ffff; + +bool IsXMLWhiteSpace(wchar_t ch) { + return ch == L' ' || ch == 0x0A || ch == 0x0D || ch == 0x09; +} + +struct FDE_XMLNAMECHAR { + uint16_t wStart; + uint16_t wEnd; + bool bStartChar; +}; + +const FDE_XMLNAMECHAR g_XMLNameChars[] = { + {L'-', L'.', false}, {L'0', L'9', false}, {L':', L':', false}, + {L'A', L'Z', true}, {L'_', L'_', true}, {L'a', L'z', true}, + {0xB7, 0xB7, false}, {0xC0, 0xD6, true}, {0xD8, 0xF6, true}, + {0xF8, 0x02FF, true}, {0x0300, 0x036F, false}, {0x0370, 0x037D, true}, + {0x037F, 0x1FFF, true}, {0x200C, 0x200D, true}, {0x203F, 0x2040, false}, + {0x2070, 0x218F, true}, {0x2C00, 0x2FEF, true}, {0x3001, 0xD7FF, true}, + {0xF900, 0xFDCF, true}, {0xFDF0, 0xFFFD, true}, +}; + +bool IsXMLNameChar(wchar_t ch, bool bFirstChar) { + int32_t iStart = 0; + int32_t iEnd = FX_ArraySize(g_XMLNameChars) - 1; + while (iStart <= iEnd) { + int32_t iMid = (iStart + iEnd) / 2; + if (ch < g_XMLNameChars[iMid].wStart) { + iEnd = iMid - 1; + } else if (ch > g_XMLNameChars[iMid].wEnd) { + iStart = iMid + 1; + } else { + return bFirstChar ? g_XMLNameChars[iMid].bStartChar : true; + } + } + return false; +} + +int32_t GetUTF8EncodeLength(const wchar_t* pSrc, int32_t iSrcLen) { + uint32_t unicode = 0; + int32_t iDstNum = 0; + while (iSrcLen-- > 0) { + unicode = *pSrc++; + int nbytes = 0; + if ((uint32_t)unicode < 0x80) { + nbytes = 1; + } else if ((uint32_t)unicode < 0x800) { + nbytes = 2; + } else if ((uint32_t)unicode < 0x10000) { + nbytes = 3; + } else if ((uint32_t)unicode < 0x200000) { + nbytes = 4; + } else if ((uint32_t)unicode < 0x4000000) { + nbytes = 5; + } else { + nbytes = 6; + } + iDstNum += nbytes; + } + return iDstNum; +} + +} // namespace + +CFDE_XMLSyntaxParser::CFDE_XMLSyntaxParser() + : m_pStream(nullptr), + m_iXMLPlaneSize(-1), + m_iCurrentPos(0), + m_iCurrentNodeNum(-1), + m_iLastNodeNum(-1), + m_iParsedChars(0), + m_iParsedBytes(0), + m_pBuffer(nullptr), + m_iBufferChars(0), + m_bEOS(false), + m_pStart(nullptr), + m_pEnd(nullptr), + m_iAllocStep(m_BlockBuffer.GetAllocStep()), + m_iDataLength(m_BlockBuffer.GetDataLengthRef()), + m_pCurrentBlock(nullptr), + m_iIndexInBlock(0), + m_iTextDataLength(0), + m_syntaxParserResult(FDE_XmlSyntaxResult::None), + m_syntaxParserState(FDE_XmlSyntaxState::Text), + m_wQuotationMark(0), + m_iEntityStart(-1) { + m_CurNode.iNodeNum = -1; + m_CurNode.eNodeType = FDE_XMLNODE_Unknown; +} + +void CFDE_XMLSyntaxParser::Init(const CFX_RetainPtr& pStream, + int32_t iXMLPlaneSize, + int32_t iTextDataSize) { + ASSERT(!m_pStream && !m_pBuffer); + ASSERT(pStream && iXMLPlaneSize > 0); + int32_t iStreamLength = pStream->GetLength(); + ASSERT(iStreamLength > 0); + m_pStream = pStream; + m_iXMLPlaneSize = std::min(iXMLPlaneSize, iStreamLength); + uint8_t bom[4]; + m_iCurrentPos = m_pStream->GetBOM(bom); + ASSERT(!m_pBuffer); + + FX_SAFE_INT32 alloc_size_safe = m_iXMLPlaneSize; + alloc_size_safe += 1; // For NUL. + if (!alloc_size_safe.IsValid() || alloc_size_safe.ValueOrDie() <= 0) { + m_syntaxParserResult = FDE_XmlSyntaxResult::Error; + return; + } + + m_pBuffer = FX_Alloc( + wchar_t, pdfium::base::ValueOrDieForType(alloc_size_safe)); + m_pStart = m_pEnd = m_pBuffer; + ASSERT(!m_BlockBuffer.IsInitialized()); + m_BlockBuffer.InitBuffer(); + m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); + m_iParsedBytes = m_iParsedChars = 0; + m_iBufferChars = 0; +} + +FDE_XmlSyntaxResult CFDE_XMLSyntaxParser::DoSyntaxParse() { + if (m_syntaxParserResult == FDE_XmlSyntaxResult::Error || + m_syntaxParserResult == FDE_XmlSyntaxResult::EndOfString) { + return m_syntaxParserResult; + } + ASSERT(m_pStream && m_pBuffer && m_BlockBuffer.IsInitialized()); + int32_t iStreamLength = m_pStream->GetLength(); + int32_t iPos; + + FDE_XmlSyntaxResult syntaxParserResult = FDE_XmlSyntaxResult::None; + while (true) { + if (m_pStart >= m_pEnd) { + if (m_bEOS || m_iCurrentPos >= iStreamLength) { + m_syntaxParserResult = FDE_XmlSyntaxResult::EndOfString; + return m_syntaxParserResult; + } + m_iParsedChars += (m_pEnd - m_pBuffer); + m_iParsedBytes = m_iCurrentPos; + if (m_pStream->GetPosition() != m_iCurrentPos) { + m_pStream->Seek(FX_STREAMSEEK_Begin, m_iCurrentPos); + } + m_iBufferChars = + m_pStream->ReadString(m_pBuffer, m_iXMLPlaneSize, m_bEOS); + iPos = m_pStream->GetPosition(); + if (m_iBufferChars < 1) { + m_iCurrentPos = iStreamLength; + m_syntaxParserResult = FDE_XmlSyntaxResult::EndOfString; + return m_syntaxParserResult; + } + m_iCurrentPos = iPos; + m_pStart = m_pBuffer; + m_pEnd = m_pBuffer + m_iBufferChars; + } + + while (m_pStart < m_pEnd) { + wchar_t ch = *m_pStart; + switch (m_syntaxParserState) { + case FDE_XmlSyntaxState::Text: + if (ch == L'<') { + if (m_iDataLength > 0) { + m_iTextDataLength = m_iDataLength; + m_BlockBuffer.Reset(); + m_pCurrentBlock = + m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); + m_iEntityStart = -1; + syntaxParserResult = FDE_XmlSyntaxResult::Text; + } else { + m_pStart++; + m_syntaxParserState = FDE_XmlSyntaxState::Node; + } + } else { + ParseTextChar(ch); + } + break; + case FDE_XmlSyntaxState::Node: + if (ch == L'!') { + m_pStart++; + m_syntaxParserState = FDE_XmlSyntaxState::SkipCommentOrDecl; + } else if (ch == L'/') { + m_pStart++; + m_syntaxParserState = FDE_XmlSyntaxState::CloseElement; + } else if (ch == L'?') { + m_iLastNodeNum++; + m_iCurrentNodeNum = m_iLastNodeNum; + m_CurNode.iNodeNum = m_iLastNodeNum; + m_CurNode.eNodeType = FDE_XMLNODE_Instruction; + m_XMLNodeStack.push(m_CurNode); + m_pStart++; + m_syntaxParserState = FDE_XmlSyntaxState::Target; + syntaxParserResult = FDE_XmlSyntaxResult::InstructionOpen; + } else { + m_iLastNodeNum++; + m_iCurrentNodeNum = m_iLastNodeNum; + m_CurNode.iNodeNum = m_iLastNodeNum; + m_CurNode.eNodeType = FDE_XMLNODE_Element; + m_XMLNodeStack.push(m_CurNode); + m_syntaxParserState = FDE_XmlSyntaxState::Tag; + syntaxParserResult = FDE_XmlSyntaxResult::ElementOpen; + } + break; + case FDE_XmlSyntaxState::Target: + case FDE_XmlSyntaxState::Tag: + if (!IsXMLNameChar(ch, m_iDataLength < 1)) { + if (m_iDataLength < 1) { + m_syntaxParserResult = FDE_XmlSyntaxResult::Error; + return m_syntaxParserResult; + } else { + m_iTextDataLength = m_iDataLength; + m_BlockBuffer.Reset(); + m_pCurrentBlock = + m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); + if (m_syntaxParserState != FDE_XmlSyntaxState::Target) { + syntaxParserResult = FDE_XmlSyntaxResult::TagName; + } else { + syntaxParserResult = FDE_XmlSyntaxResult::TargetName; + } + m_syntaxParserState = FDE_XmlSyntaxState::AttriName; + } + } else { + if (m_iIndexInBlock == m_iAllocStep) { + m_pCurrentBlock = + m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); + if (!m_pCurrentBlock) { + return FDE_XmlSyntaxResult::Error; + } + } + m_pCurrentBlock[m_iIndexInBlock++] = ch; + m_iDataLength++; + m_pStart++; + } + break; + case FDE_XmlSyntaxState::AttriName: + if (m_iDataLength < 1 && IsXMLWhiteSpace(ch)) { + m_pStart++; + break; + } + if (!IsXMLNameChar(ch, m_iDataLength < 1)) { + if (m_iDataLength < 1) { + if (m_CurNode.eNodeType == FDE_XMLNODE_Element) { + if (ch == L'>' || ch == L'/') { + m_syntaxParserState = FDE_XmlSyntaxState::BreakElement; + break; + } + } else if (m_CurNode.eNodeType == FDE_XMLNODE_Instruction) { + if (ch == L'?') { + m_syntaxParserState = FDE_XmlSyntaxState::CloseInstruction; + m_pStart++; + } else { + m_syntaxParserState = FDE_XmlSyntaxState::TargetData; + } + break; + } + m_syntaxParserResult = FDE_XmlSyntaxResult::Error; + return m_syntaxParserResult; + } else { + if (m_CurNode.eNodeType == FDE_XMLNODE_Instruction) { + if (ch != '=' && !IsXMLWhiteSpace(ch)) { + m_syntaxParserState = FDE_XmlSyntaxState::TargetData; + break; + } + } + m_iTextDataLength = m_iDataLength; + m_BlockBuffer.Reset(); + m_pCurrentBlock = + m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); + m_syntaxParserState = FDE_XmlSyntaxState::AttriEqualSign; + syntaxParserResult = FDE_XmlSyntaxResult::AttriName; + } + } else { + if (m_iIndexInBlock == m_iAllocStep) { + m_pCurrentBlock = + m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); + if (!m_pCurrentBlock) { + return FDE_XmlSyntaxResult::Error; + } + } + m_pCurrentBlock[m_iIndexInBlock++] = ch; + m_iDataLength++; + m_pStart++; + } + break; + case FDE_XmlSyntaxState::AttriEqualSign: + if (IsXMLWhiteSpace(ch)) { + m_pStart++; + break; + } + if (ch != L'=') { + if (m_CurNode.eNodeType == FDE_XMLNODE_Instruction) { + m_syntaxParserState = FDE_XmlSyntaxState::TargetData; + break; + } + m_syntaxParserResult = FDE_XmlSyntaxResult::Error; + return m_syntaxParserResult; + } else { + m_syntaxParserState = FDE_XmlSyntaxState::AttriQuotation; + m_pStart++; + } + break; + case FDE_XmlSyntaxState::AttriQuotation: + if (IsXMLWhiteSpace(ch)) { + m_pStart++; + break; + } + if (ch != L'\"' && ch != L'\'') { + m_syntaxParserResult = FDE_XmlSyntaxResult::Error; + return m_syntaxParserResult; + } else { + m_wQuotationMark = ch; + m_syntaxParserState = FDE_XmlSyntaxState::AttriValue; + m_pStart++; + } + break; + case FDE_XmlSyntaxState::AttriValue: + if (ch == m_wQuotationMark) { + if (m_iEntityStart > -1) { + m_syntaxParserResult = FDE_XmlSyntaxResult::Error; + return m_syntaxParserResult; + } + m_iTextDataLength = m_iDataLength; + m_wQuotationMark = 0; + m_BlockBuffer.Reset(); + m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); + m_pStart++; + m_syntaxParserState = FDE_XmlSyntaxState::AttriName; + syntaxParserResult = FDE_XmlSyntaxResult::AttriValue; + } else { + ParseTextChar(ch); + } + break; + case FDE_XmlSyntaxState::CloseInstruction: + if (ch != L'>') { + if (m_iIndexInBlock == m_iAllocStep) { + m_pCurrentBlock = + m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); + if (!m_pCurrentBlock) { + return FDE_XmlSyntaxResult::Error; + } + } + m_pCurrentBlock[m_iIndexInBlock++] = ch; + m_iDataLength++; + m_syntaxParserState = FDE_XmlSyntaxState::TargetData; + } else if (m_iDataLength > 0) { + m_iTextDataLength = m_iDataLength; + m_BlockBuffer.Reset(); + m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); + syntaxParserResult = FDE_XmlSyntaxResult::TargetData; + } else { + m_pStart++; + if (m_XMLNodeStack.empty()) { + m_syntaxParserResult = FDE_XmlSyntaxResult::Error; + return m_syntaxParserResult; + } + m_XMLNodeStack.pop(); + if (!m_XMLNodeStack.empty()) { + m_CurNode = m_XMLNodeStack.top(); + } else { + m_CurNode.iNodeNum = -1; + m_CurNode.eNodeType = FDE_XMLNODE_Unknown; + } + m_iCurrentNodeNum = m_CurNode.iNodeNum; + m_BlockBuffer.Reset(); + m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); + m_syntaxParserState = FDE_XmlSyntaxState::Text; + syntaxParserResult = FDE_XmlSyntaxResult::InstructionClose; + } + break; + case FDE_XmlSyntaxState::BreakElement: + if (ch == L'>') { + m_syntaxParserState = FDE_XmlSyntaxState::Text; + syntaxParserResult = FDE_XmlSyntaxResult::ElementBreak; + } else if (ch == L'/') { + m_syntaxParserState = FDE_XmlSyntaxState::CloseElement; + } else { + m_syntaxParserResult = FDE_XmlSyntaxResult::Error; + return m_syntaxParserResult; + } + m_pStart++; + break; + case FDE_XmlSyntaxState::CloseElement: + if (!IsXMLNameChar(ch, m_iDataLength < 1)) { + if (ch == L'>') { + if (m_XMLNodeStack.empty()) { + m_syntaxParserResult = FDE_XmlSyntaxResult::Error; + return m_syntaxParserResult; + } + m_XMLNodeStack.pop(); + if (!m_XMLNodeStack.empty()) { + m_CurNode = m_XMLNodeStack.top(); + } else { + m_CurNode.iNodeNum = -1; + m_CurNode.eNodeType = FDE_XMLNODE_Unknown; + } + m_iCurrentNodeNum = m_CurNode.iNodeNum; + m_iTextDataLength = m_iDataLength; + m_BlockBuffer.Reset(); + m_pCurrentBlock = + m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); + m_syntaxParserState = FDE_XmlSyntaxState::Text; + syntaxParserResult = FDE_XmlSyntaxResult::ElementClose; + } else if (!IsXMLWhiteSpace(ch)) { + m_syntaxParserResult = FDE_XmlSyntaxResult::Error; + return m_syntaxParserResult; + } + } else { + if (m_iIndexInBlock == m_iAllocStep) { + m_pCurrentBlock = + m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); + if (!m_pCurrentBlock) { + return FDE_XmlSyntaxResult::Error; + } + } + m_pCurrentBlock[m_iIndexInBlock++] = ch; + m_iDataLength++; + } + m_pStart++; + break; + case FDE_XmlSyntaxState::SkipCommentOrDecl: + if (FXSYS_wcsnicmp(m_pStart, L"--", 2) == 0) { + m_pStart += 2; + m_syntaxParserState = FDE_XmlSyntaxState::SkipComment; + } else if (FXSYS_wcsnicmp(m_pStart, L"[CDATA[", 7) == 0) { + m_pStart += 7; + m_syntaxParserState = FDE_XmlSyntaxState::SkipCData; + } else { + m_syntaxParserState = FDE_XmlSyntaxState::SkipDeclNode; + m_SkipChar = L'>'; + m_SkipStack.push(L'>'); + } + break; + case FDE_XmlSyntaxState::SkipCData: { + if (FXSYS_wcsnicmp(m_pStart, L"]]>", 3) == 0) { + m_pStart += 3; + syntaxParserResult = FDE_XmlSyntaxResult::CData; + m_iTextDataLength = m_iDataLength; + m_BlockBuffer.Reset(); + m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); + m_syntaxParserState = FDE_XmlSyntaxState::Text; + } else { + if (m_iIndexInBlock == m_iAllocStep) { + m_pCurrentBlock = + m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); + if (!m_pCurrentBlock) + return FDE_XmlSyntaxResult::Error; + } + m_pCurrentBlock[m_iIndexInBlock++] = ch; + m_iDataLength++; + m_pStart++; + } + break; + } + case FDE_XmlSyntaxState::SkipDeclNode: + if (m_SkipChar == L'\'' || m_SkipChar == L'\"') { + m_pStart++; + if (ch != m_SkipChar) + break; + + m_SkipStack.pop(); + if (m_SkipStack.empty()) + m_syntaxParserState = FDE_XmlSyntaxState::Text; + else + m_SkipChar = m_SkipStack.top(); + } else { + switch (ch) { + case L'<': + m_SkipChar = L'>'; + m_SkipStack.push(L'>'); + break; + case L'[': + m_SkipChar = L']'; + m_SkipStack.push(L']'); + break; + case L'(': + m_SkipChar = L')'; + m_SkipStack.push(L')'); + break; + case L'\'': + m_SkipChar = L'\''; + m_SkipStack.push(L'\''); + break; + case L'\"': + m_SkipChar = L'\"'; + m_SkipStack.push(L'\"'); + break; + default: + if (ch == m_SkipChar) { + m_SkipStack.pop(); + if (m_SkipStack.empty()) { + if (m_iDataLength >= 9) { + CFX_WideString wsHeader; + m_BlockBuffer.GetTextData(wsHeader, 0, 7); + } + m_iTextDataLength = m_iDataLength; + m_BlockBuffer.Reset(); + m_pCurrentBlock = + m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); + m_syntaxParserState = FDE_XmlSyntaxState::Text; + } else { + m_SkipChar = m_SkipStack.top(); + } + } + break; + } + if (!m_SkipStack.empty()) { + if (m_iIndexInBlock == m_iAllocStep) { + m_pCurrentBlock = + m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); + if (!m_pCurrentBlock) { + return FDE_XmlSyntaxResult::Error; + } + } + m_pCurrentBlock[m_iIndexInBlock++] = ch; + m_iDataLength++; + } + m_pStart++; + } + break; + case FDE_XmlSyntaxState::SkipComment: + if (FXSYS_wcsnicmp(m_pStart, L"-->", 3) == 0) { + m_pStart += 2; + m_syntaxParserState = FDE_XmlSyntaxState::Text; + } + + m_pStart++; + break; + case FDE_XmlSyntaxState::TargetData: + if (IsXMLWhiteSpace(ch)) { + if (m_iDataLength < 1) { + m_pStart++; + break; + } else if (m_wQuotationMark == 0) { + m_iTextDataLength = m_iDataLength; + m_wQuotationMark = 0; + m_BlockBuffer.Reset(); + m_pCurrentBlock = + m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); + m_pStart++; + syntaxParserResult = FDE_XmlSyntaxResult::TargetData; + break; + } + } + if (ch == '?') { + m_syntaxParserState = FDE_XmlSyntaxState::CloseInstruction; + m_pStart++; + } else if (ch == '\"') { + if (m_wQuotationMark == 0) { + m_wQuotationMark = ch; + m_pStart++; + } else if (ch == m_wQuotationMark) { + m_iTextDataLength = m_iDataLength; + m_wQuotationMark = 0; + m_BlockBuffer.Reset(); + m_pCurrentBlock = + m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); + m_pStart++; + syntaxParserResult = FDE_XmlSyntaxResult::TargetData; + } else { + m_syntaxParserResult = FDE_XmlSyntaxResult::Error; + return m_syntaxParserResult; + } + } else { + if (m_iIndexInBlock == m_iAllocStep) { + m_pCurrentBlock = + m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); + if (!m_pCurrentBlock) { + return FDE_XmlSyntaxResult::Error; + } + } + m_pCurrentBlock[m_iIndexInBlock++] = ch; + m_iDataLength++; + m_pStart++; + } + break; + default: + break; + } + if (syntaxParserResult != FDE_XmlSyntaxResult::None) + return syntaxParserResult; + } + } + return FDE_XmlSyntaxResult::Text; +} + +CFDE_XMLSyntaxParser::~CFDE_XMLSyntaxParser() { + m_pCurrentBlock = nullptr; + FX_Free(m_pBuffer); +} + +int32_t CFDE_XMLSyntaxParser::GetStatus() const { + if (!m_pStream) + return -1; + + int32_t iStreamLength = m_pStream->GetLength(); + if (iStreamLength < 1) + return 100; + + if (m_syntaxParserResult == FDE_XmlSyntaxResult::Error) + return -1; + + if (m_syntaxParserResult == FDE_XmlSyntaxResult::EndOfString) + return 100; + return m_iParsedBytes * 100 / iStreamLength; +} + +FX_FILESIZE CFDE_XMLSyntaxParser::GetCurrentBinaryPos() const { + if (!m_pStream) + return 0; + + int32_t nSrcLen = m_pStart - m_pBuffer; + int32_t nDstLen = GetUTF8EncodeLength(m_pBuffer, nSrcLen); + return m_iParsedBytes + nDstLen; +} + +void CFDE_XMLSyntaxParser::ParseTextChar(wchar_t character) { + if (m_iIndexInBlock == m_iAllocStep) { + m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); + if (!m_pCurrentBlock) { + return; + } + } + m_pCurrentBlock[m_iIndexInBlock++] = character; + m_iDataLength++; + if (m_iEntityStart > -1 && character == L';') { + CFX_WideString csEntity; + m_BlockBuffer.GetTextData(csEntity, m_iEntityStart + 1, + (m_iDataLength - 1) - m_iEntityStart - 1); + int32_t iLen = csEntity.GetLength(); + if (iLen > 0) { + if (csEntity[0] == L'#') { + uint32_t ch = 0; + wchar_t w; + if (iLen > 1 && csEntity[1] == L'x') { + for (int32_t i = 2; i < iLen; i++) { + w = csEntity[i]; + if (w >= L'0' && w <= L'9') { + ch = (ch << 4) + w - L'0'; + } else if (w >= L'A' && w <= L'F') { + ch = (ch << 4) + w - 55; + } else if (w >= L'a' && w <= L'f') { + ch = (ch << 4) + w - 87; + } else { + break; + } + } + } else { + for (int32_t i = 1; i < iLen; i++) { + w = csEntity[i]; + if (w < L'0' || w > L'9') + break; + ch = ch * 10 + w - L'0'; + } + } + if (ch > kMaxCharRange) + ch = ' '; + + character = static_cast(ch); + if (character != 0) { + m_BlockBuffer.SetTextChar(m_iEntityStart, character); + m_iEntityStart++; + } + } else { + if (csEntity.Compare(L"amp") == 0) { + m_BlockBuffer.SetTextChar(m_iEntityStart, L'&'); + m_iEntityStart++; + } else if (csEntity.Compare(L"lt") == 0) { + m_BlockBuffer.SetTextChar(m_iEntityStart, L'<'); + m_iEntityStart++; + } else if (csEntity.Compare(L"gt") == 0) { + m_BlockBuffer.SetTextChar(m_iEntityStart, L'>'); + m_iEntityStart++; + } else if (csEntity.Compare(L"apos") == 0) { + m_BlockBuffer.SetTextChar(m_iEntityStart, L'\''); + m_iEntityStart++; + } else if (csEntity.Compare(L"quot") == 0) { + m_BlockBuffer.SetTextChar(m_iEntityStart, L'\"'); + m_iEntityStart++; + } + } + } + m_BlockBuffer.DeleteTextChars(m_iDataLength - m_iEntityStart, false); + m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); + m_iEntityStart = -1; + } else { + if (m_iEntityStart < 0 && character == L'&') { + m_iEntityStart = m_iDataLength - 1; + } + } + m_pStart++; +} -- cgit v1.2.3