diff options
Diffstat (limited to 'xfa/fde/xml/fde_xml_imp.cpp')
-rw-r--r-- | xfa/fde/xml/fde_xml_imp.cpp | 1832 |
1 files changed, 0 insertions, 1832 deletions
diff --git a/xfa/fde/xml/fde_xml_imp.cpp b/xfa/fde/xml/fde_xml_imp.cpp deleted file mode 100644 index 2de48ef1f4..0000000000 --- a/xfa/fde/xml/fde_xml_imp.cpp +++ /dev/null @@ -1,1832 +0,0 @@ -// Copyright 2014 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#include "xfa/fde/xml/fde_xml_imp.h" - -#include <algorithm> -#include <utility> - -#include "core/fxcrt/fx_ext.h" -#include "core/fxcrt/fx_safe_types.h" -#include "third_party/base/ptr_util.h" -#include "third_party/base/stl_util.h" -#include "xfa/fde/xml/cfde_xml_parser.h" -#include "xfa/fgas/crt/fgas_codepage.h" - -namespace { - -const uint32_t kMaxCharRange = 0x10ffff; - -const uint16_t g_XMLValidCharRange[][2] = {{0x09, 0x09}, - {0x0A, 0x0A}, - {0x0D, 0x0D}, - {0x20, 0xD7FF}, - {0xE000, 0xFFFD}}; - -bool FDE_IsXMLWhiteSpace(wchar_t ch) { - return ch == L' ' || ch == 0x0A || ch == 0x0D || ch == 0x09; -} - -struct FDE_XMLNAMECHAR { - uint16_t wStart; - uint16_t wEnd; - bool bStartChar; -}; - -const FDE_XMLNAMECHAR g_XMLNameChars[] = { - {L'-', L'.', false}, {L'0', L'9', false}, {L':', L':', false}, - {L'A', L'Z', true}, {L'_', L'_', true}, {L'a', L'z', true}, - {0xB7, 0xB7, false}, {0xC0, 0xD6, true}, {0xD8, 0xF6, true}, - {0xF8, 0x02FF, true}, {0x0300, 0x036F, false}, {0x0370, 0x037D, true}, - {0x037F, 0x1FFF, true}, {0x200C, 0x200D, true}, {0x203F, 0x2040, false}, - {0x2070, 0x218F, true}, {0x2C00, 0x2FEF, true}, {0x3001, 0xD7FF, true}, - {0xF900, 0xFDCF, true}, {0xFDF0, 0xFFFD, true}, -}; - -bool FDE_IsXMLNameChar(wchar_t ch, bool bFirstChar) { - int32_t iStart = 0; - int32_t iEnd = FX_ArraySize(g_XMLNameChars) - 1; - while (iStart <= iEnd) { - int32_t iMid = (iStart + iEnd) / 2; - if (ch < g_XMLNameChars[iMid].wStart) { - iEnd = iMid - 1; - } else if (ch > g_XMLNameChars[iMid].wEnd) { - iStart = iMid + 1; - } else { - return bFirstChar ? g_XMLNameChars[iMid].bStartChar : true; - } - } - return false; -} - -} // namespace - -bool FDE_IsXMLValidChar(wchar_t ch) { - int32_t iStart = 0; - int32_t iEnd = FX_ArraySize(g_XMLValidCharRange) - 1; - while (iStart <= iEnd) { - int32_t iMid = (iStart + iEnd) / 2; - if (ch < g_XMLValidCharRange[iMid][0]) { - iEnd = iMid - 1; - } else if (ch > g_XMLValidCharRange[iMid][1]) { - iStart = iMid + 1; - } else { - return true; - } - } - return false; -} - -CFDE_XMLNode::CFDE_XMLNode() - : m_pParent(nullptr), - m_pChild(nullptr), - m_pPrior(nullptr), - m_pNext(nullptr) {} - -FDE_XMLNODETYPE CFDE_XMLNode::GetType() const { - return FDE_XMLNODE_Unknown; -} - -CFDE_XMLNode::~CFDE_XMLNode() { - DeleteChildren(); -} - -void CFDE_XMLNode::DeleteChildren() { - CFDE_XMLNode* pChild = m_pChild; - while (pChild) { - CFDE_XMLNode* pNext = pChild->m_pNext; - delete pChild; - pChild = pNext; - } - m_pChild = nullptr; -} - -int32_t CFDE_XMLNode::CountChildNodes() const { - int32_t iCount = 0; - CFDE_XMLNode* pChild = m_pChild; - while (pChild) { - iCount++; - pChild = pChild->m_pNext; - } - return iCount; -} - -CFDE_XMLNode* CFDE_XMLNode::GetChildNode(int32_t index) const { - CFDE_XMLNode* pChild = m_pChild; - while (pChild) { - if (index == 0) { - return pChild; - } - index--; - pChild = pChild->m_pNext; - } - return nullptr; -} - -int32_t CFDE_XMLNode::GetChildNodeIndex(CFDE_XMLNode* pNode) const { - int32_t index = 0; - CFDE_XMLNode* pChild = m_pChild; - while (pChild) { - if (pChild == pNode) { - return index; - } - index++; - pChild = pChild->m_pNext; - } - return -1; -} - -CFDE_XMLNode* CFDE_XMLNode::GetPath(const wchar_t* pPath, - int32_t iLength, - bool bQualifiedName) const { - ASSERT(pPath); - if (iLength < 0) { - iLength = FXSYS_wcslen(pPath); - } - if (iLength == 0) { - return nullptr; - } - CFX_WideString csPath; - const wchar_t* pStart = pPath; - const wchar_t* pEnd = pPath + iLength; - wchar_t ch; - while (pStart < pEnd) { - ch = *pStart++; - if (ch == L'/') { - break; - } else { - csPath += ch; - } - } - iLength -= pStart - pPath; - CFDE_XMLNode* pFind = nullptr; - if (csPath.GetLength() < 1) { - pFind = GetNodeItem(CFDE_XMLNode::Root); - } else if (csPath.Compare(L"..") == 0) { - pFind = m_pParent; - } else if (csPath.Compare(L".") == 0) { - pFind = (CFDE_XMLNode*)this; - } else { - CFX_WideString wsTag; - CFDE_XMLNode* pNode = m_pChild; - while (pNode) { - if (pNode->GetType() == FDE_XMLNODE_Element) { - if (bQualifiedName) { - ((CFDE_XMLElement*)pNode)->GetTagName(wsTag); - } else { - ((CFDE_XMLElement*)pNode)->GetLocalTagName(wsTag); - } - if (wsTag.Compare(csPath) == 0) { - if (iLength < 1) { - pFind = pNode; - } else { - pFind = pNode->GetPath(pStart, iLength, bQualifiedName); - } - if (pFind) - return pFind; - } - } - pNode = pNode->m_pNext; - } - } - if (!pFind || iLength < 1) - return pFind; - return pFind->GetPath(pStart, iLength, bQualifiedName); -} - -int32_t CFDE_XMLNode::InsertChildNode(CFDE_XMLNode* pNode, int32_t index) { - pNode->m_pParent = this; - if (!m_pChild) { - m_pChild = pNode; - pNode->m_pPrior = nullptr; - pNode->m_pNext = nullptr; - return 0; - } - if (index == 0) { - pNode->m_pNext = m_pChild; - pNode->m_pPrior = nullptr; - m_pChild->m_pPrior = pNode; - m_pChild = pNode; - return 0; - } - int32_t iCount = 0; - CFDE_XMLNode* pFind = m_pChild; - while (++iCount != index && pFind->m_pNext) { - pFind = pFind->m_pNext; - } - pNode->m_pPrior = pFind; - pNode->m_pNext = pFind->m_pNext; - if (pFind->m_pNext) - pFind->m_pNext->m_pPrior = pNode; - pFind->m_pNext = pNode; - return iCount; -} - -void CFDE_XMLNode::RemoveChildNode(CFDE_XMLNode* pNode) { - ASSERT(m_pChild && pNode); - if (m_pChild == pNode) { - m_pChild = pNode->m_pNext; - } else { - pNode->m_pPrior->m_pNext = pNode->m_pNext; - } - if (pNode->m_pNext) - pNode->m_pNext->m_pPrior = pNode->m_pPrior; - pNode->m_pParent = nullptr; - pNode->m_pNext = nullptr; - pNode->m_pPrior = nullptr; -} - -CFDE_XMLNode* CFDE_XMLNode::GetNodeItem(CFDE_XMLNode::NodeItem eItem) const { - switch (eItem) { - case CFDE_XMLNode::Root: { - CFDE_XMLNode* pParent = (CFDE_XMLNode*)this; - while (pParent->m_pParent) { - pParent = pParent->m_pParent; - } - return pParent; - } - case CFDE_XMLNode::Parent: - return m_pParent; - case CFDE_XMLNode::FirstSibling: { - CFDE_XMLNode* pItem = (CFDE_XMLNode*)this; - while (pItem->m_pPrior) { - pItem = pItem->m_pPrior; - } - return pItem == (CFDE_XMLNode*)this ? nullptr : pItem; - } - case CFDE_XMLNode::PriorSibling: - return m_pPrior; - case CFDE_XMLNode::NextSibling: - return m_pNext; - case CFDE_XMLNode::LastSibling: { - CFDE_XMLNode* pItem = (CFDE_XMLNode*)this; - while (pItem->m_pNext) - pItem = pItem->m_pNext; - return pItem == (CFDE_XMLNode*)this ? nullptr : pItem; - } - case CFDE_XMLNode::FirstNeighbor: { - CFDE_XMLNode* pParent = (CFDE_XMLNode*)this; - while (pParent->m_pParent) - pParent = pParent->m_pParent; - return pParent == (CFDE_XMLNode*)this ? nullptr : pParent; - } - case CFDE_XMLNode::PriorNeighbor: { - if (!m_pPrior) - return m_pParent; - - CFDE_XMLNode* pItem = m_pPrior; - while (pItem->m_pChild) { - pItem = pItem->m_pChild; - while (pItem->m_pNext) - pItem = pItem->m_pNext; - } - return pItem; - } - case CFDE_XMLNode::NextNeighbor: { - if (m_pChild) - return m_pChild; - if (m_pNext) - return m_pNext; - CFDE_XMLNode* pItem = m_pParent; - while (pItem) { - if (pItem->m_pNext) - return pItem->m_pNext; - pItem = pItem->m_pParent; - } - return nullptr; - } - case CFDE_XMLNode::LastNeighbor: { - CFDE_XMLNode* pItem = (CFDE_XMLNode*)this; - while (pItem->m_pParent) { - pItem = pItem->m_pParent; - } - while (true) { - while (pItem->m_pNext) - pItem = pItem->m_pNext; - if (!pItem->m_pChild) - break; - pItem = pItem->m_pChild; - } - return pItem == (CFDE_XMLNode*)this ? nullptr : pItem; - } - case CFDE_XMLNode::FirstChild: - return m_pChild; - case CFDE_XMLNode::LastChild: { - if (!m_pChild) - return nullptr; - - CFDE_XMLNode* pChild = m_pChild; - while (pChild->m_pNext) - pChild = pChild->m_pNext; - return pChild; - } - default: - break; - } - return nullptr; -} - -int32_t CFDE_XMLNode::GetNodeLevel() const { - int32_t iLevel = 0; - const CFDE_XMLNode* pItem = m_pParent; - while (pItem) { - iLevel++; - pItem = pItem->m_pParent; - } - return iLevel; -} - -bool CFDE_XMLNode::InsertNodeItem(CFDE_XMLNode::NodeItem eItem, - CFDE_XMLNode* pNode) { - switch (eItem) { - case CFDE_XMLNode::NextSibling: { - pNode->m_pParent = m_pParent; - pNode->m_pNext = m_pNext; - pNode->m_pPrior = this; - if (m_pNext) { - m_pNext->m_pPrior = pNode; - } - m_pNext = pNode; - return true; - } - case CFDE_XMLNode::PriorSibling: { - pNode->m_pParent = m_pParent; - pNode->m_pNext = this; - pNode->m_pPrior = m_pPrior; - if (m_pPrior) { - m_pPrior->m_pNext = pNode; - } else if (m_pParent) { - m_pParent->m_pChild = pNode; - } - m_pPrior = pNode; - return true; - } - default: - return false; - } -} - -CFDE_XMLNode* CFDE_XMLNode::RemoveNodeItem(CFDE_XMLNode::NodeItem eItem) { - CFDE_XMLNode* pNode = nullptr; - switch (eItem) { - case CFDE_XMLNode::NextSibling: - if (m_pNext) { - pNode = m_pNext; - m_pNext = pNode->m_pNext; - if (m_pNext) { - m_pNext->m_pPrior = this; - } - pNode->m_pParent = nullptr; - pNode->m_pNext = nullptr; - pNode->m_pPrior = nullptr; - } - break; - default: - break; - } - return pNode; -} - -CFDE_XMLNode* CFDE_XMLNode::Clone(bool bRecursive) { - return nullptr; -} - -void CFDE_XMLNode::SaveXMLNode(const CFX_RetainPtr<IFGAS_Stream>& pXMLStream) { - CFDE_XMLNode* pNode = (CFDE_XMLNode*)this; - switch (pNode->GetType()) { - case FDE_XMLNODE_Instruction: { - CFX_WideString ws; - CFDE_XMLInstruction* pInstruction = (CFDE_XMLInstruction*)pNode; - if (pInstruction->m_wsTarget.CompareNoCase(L"xml") == 0) { - ws = L"<?xml version=\"1.0\" encoding=\""; - uint16_t wCodePage = pXMLStream->GetCodePage(); - if (wCodePage == FX_CODEPAGE_UTF16LE) { - ws += L"UTF-16"; - } else if (wCodePage == FX_CODEPAGE_UTF16BE) { - ws += L"UTF-16be"; - } else { - ws += L"UTF-8"; - } - ws += L"\"?>"; - pXMLStream->WriteString(ws.c_str(), ws.GetLength()); - } else { - ws.Format(L"<?%s", pInstruction->m_wsTarget.c_str()); - pXMLStream->WriteString(ws.c_str(), ws.GetLength()); - std::vector<CFX_WideString>& attributes = pInstruction->m_Attributes; - int32_t i; - int32_t iCount = pdfium::CollectionSize<int32_t>(attributes); - CFX_WideString wsValue; - for (i = 0; i < iCount; i += 2) { - ws = L" "; - ws += attributes[i]; - ws += L"=\""; - wsValue = attributes[i + 1]; - wsValue.Replace(L"&", L"&"); - wsValue.Replace(L"<", L"<"); - wsValue.Replace(L">", L">"); - wsValue.Replace(L"\'", L"'"); - wsValue.Replace(L"\"", L"""); - ws += wsValue; - ws += L"\""; - pXMLStream->WriteString(ws.c_str(), ws.GetLength()); - } - std::vector<CFX_WideString>& targetdata = pInstruction->m_TargetData; - iCount = pdfium::CollectionSize<int32_t>(targetdata); - for (i = 0; i < iCount; i++) { - ws = L" \""; - ws += targetdata[i]; - ws += L"\""; - pXMLStream->WriteString(ws.c_str(), ws.GetLength()); - } - ws = L"?>"; - pXMLStream->WriteString(ws.c_str(), ws.GetLength()); - } - } break; - case FDE_XMLNODE_Element: { - CFX_WideString ws; - ws = L"<"; - ws += ((CFDE_XMLElement*)pNode)->m_wsTag; - pXMLStream->WriteString(ws.c_str(), ws.GetLength()); - std::vector<CFX_WideString>& attributes = - static_cast<CFDE_XMLElement*>(pNode)->m_Attributes; - int32_t iCount = pdfium::CollectionSize<int32_t>(attributes); - CFX_WideString wsValue; - for (int32_t i = 0; i < iCount; i += 2) { - ws = L" "; - ws += attributes[i]; - ws += L"=\""; - wsValue = attributes[i + 1]; - wsValue.Replace(L"&", L"&"); - wsValue.Replace(L"<", L"<"); - wsValue.Replace(L">", L">"); - wsValue.Replace(L"\'", L"'"); - wsValue.Replace(L"\"", L"""); - ws += wsValue; - ws += L"\""; - pXMLStream->WriteString(ws.c_str(), ws.GetLength()); - } - if (pNode->m_pChild) { - ws = L"\n>"; - pXMLStream->WriteString(ws.c_str(), ws.GetLength()); - CFDE_XMLNode* pChild = pNode->m_pChild; - while (pChild) { - pChild->SaveXMLNode(pXMLStream); - pChild = pChild->m_pNext; - } - ws = L"</"; - ws += ((CFDE_XMLElement*)pNode)->m_wsTag; - ws += L"\n>"; - } else { - ws = L"\n/>"; - } - pXMLStream->WriteString(ws.c_str(), ws.GetLength()); - } break; - case FDE_XMLNODE_Text: { - CFX_WideString ws = ((CFDE_XMLText*)pNode)->m_wsText; - ws.Replace(L"&", L"&"); - ws.Replace(L"<", L"<"); - ws.Replace(L">", L">"); - ws.Replace(L"\'", L"'"); - ws.Replace(L"\"", L"""); - pXMLStream->WriteString(ws.c_str(), ws.GetLength()); - } break; - case FDE_XMLNODE_CharData: { - CFX_WideString ws = L"<![CDATA["; - ws += ((CFDE_XMLCharData*)pNode)->m_wsCharData; - ws += L"]]>"; - pXMLStream->WriteString(ws.c_str(), ws.GetLength()); - } break; - case FDE_XMLNODE_Unknown: - break; - default: - break; - } -} - -void CFDE_XMLNode::CloneChildren(CFDE_XMLNode* pClone) { - if (!m_pChild) { - return; - } - CFDE_XMLNode* pNext = m_pChild; - CFDE_XMLNode* pCloneNext = pNext->Clone(true); - pClone->InsertChildNode(pCloneNext); - pNext = pNext->m_pNext; - while (pNext) { - CFDE_XMLNode* pChild = pNext->Clone(true); - pCloneNext->InsertNodeItem(CFDE_XMLNode::NextSibling, pChild); - pCloneNext = pChild; - pNext = pNext->m_pNext; - } -} - -CFDE_XMLInstruction::CFDE_XMLInstruction(const CFX_WideString& wsTarget) - : m_wsTarget(wsTarget) { - ASSERT(m_wsTarget.GetLength() > 0); -} - -FDE_XMLNODETYPE CFDE_XMLInstruction::GetType() const { - return FDE_XMLNODE_Instruction; -} - -CFDE_XMLNode* CFDE_XMLInstruction::Clone(bool bRecursive) { - CFDE_XMLInstruction* pClone = new CFDE_XMLInstruction(m_wsTarget); - if (!pClone) - return nullptr; - - pClone->m_Attributes = m_Attributes; - pClone->m_TargetData = m_TargetData; - if (bRecursive) - CloneChildren(pClone); - - return pClone; -} - -int32_t CFDE_XMLInstruction::CountAttributes() const { - return pdfium::CollectionSize<int32_t>(m_Attributes) / 2; -} - -bool CFDE_XMLInstruction::GetAttribute(int32_t index, - CFX_WideString& wsAttriName, - CFX_WideString& wsAttriValue) const { - int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes); - ASSERT(index > -1 && index < iCount / 2); - for (int32_t i = 0; i < iCount; i += 2) { - if (index == 0) { - wsAttriName = m_Attributes[i]; - wsAttriValue = m_Attributes[i + 1]; - return true; - } - index--; - } - return false; -} - -bool CFDE_XMLInstruction::HasAttribute(const wchar_t* pwsAttriName) const { - int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes); - for (int32_t i = 0; i < iCount; i += 2) { - if (m_Attributes[i].Compare(pwsAttriName) == 0) { - return true; - } - } - return false; -} - -void CFDE_XMLInstruction::GetString(const wchar_t* pwsAttriName, - CFX_WideString& wsAttriValue, - const wchar_t* pwsDefValue) const { - int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes); - for (int32_t i = 0; i < iCount; i += 2) { - if (m_Attributes[i].Compare(pwsAttriName) == 0) { - wsAttriValue = m_Attributes[i + 1]; - return; - } - } - wsAttriValue = pwsDefValue; -} - -void CFDE_XMLInstruction::SetString(const CFX_WideString& wsAttriName, - const CFX_WideString& wsAttriValue) { - ASSERT(wsAttriName.GetLength() > 0); - int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes); - for (int32_t i = 0; i < iCount; i += 2) { - if (m_Attributes[i].Compare(wsAttriName) == 0) { - m_Attributes[i] = wsAttriName; - m_Attributes[i + 1] = wsAttriValue; - return; - } - } - m_Attributes.push_back(wsAttriName); - m_Attributes.push_back(wsAttriValue); -} - -int32_t CFDE_XMLInstruction::GetInteger(const wchar_t* pwsAttriName, - int32_t iDefValue) const { - int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes); - for (int32_t i = 0; i < iCount; i += 2) { - if (m_Attributes[i].Compare(pwsAttriName) == 0) { - return FXSYS_wtoi(m_Attributes[i + 1].c_str()); - } - } - return iDefValue; -} - -void CFDE_XMLInstruction::SetInteger(const wchar_t* pwsAttriName, - int32_t iAttriValue) { - CFX_WideString wsValue; - wsValue.Format(L"%d", iAttriValue); - SetString(pwsAttriName, wsValue); -} - -float CFDE_XMLInstruction::GetFloat(const wchar_t* pwsAttriName, - float fDefValue) const { - int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes); - for (int32_t i = 0; i < iCount; i += 2) { - if (m_Attributes[i].Compare(pwsAttriName) == 0) { - return FXSYS_wcstof(m_Attributes[i + 1].c_str(), -1, nullptr); - } - } - return fDefValue; -} - -void CFDE_XMLInstruction::SetFloat(const wchar_t* pwsAttriName, - float fAttriValue) { - CFX_WideString wsValue; - wsValue.Format(L"%f", fAttriValue); - SetString(pwsAttriName, wsValue); -} - -void CFDE_XMLInstruction::RemoveAttribute(const wchar_t* pwsAttriName) { - int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes); - for (int32_t i = 0; i < iCount; i += 2) { - if (m_Attributes[i].Compare(pwsAttriName) == 0) { - m_Attributes.erase(m_Attributes.begin() + i, - m_Attributes.begin() + i + 2); - return; - } - } -} - -int32_t CFDE_XMLInstruction::CountData() const { - return pdfium::CollectionSize<int32_t>(m_TargetData); -} - -bool CFDE_XMLInstruction::GetData(int32_t index, CFX_WideString& wsData) const { - if (!pdfium::IndexInBounds(m_TargetData, index)) - return false; - - wsData = m_TargetData[index]; - return true; -} - -void CFDE_XMLInstruction::AppendData(const CFX_WideString& wsData) { - m_TargetData.push_back(wsData); -} - -void CFDE_XMLInstruction::RemoveData(int32_t index) { - if (pdfium::IndexInBounds(m_TargetData, index)) - m_TargetData.erase(m_TargetData.begin() + index); -} - -CFDE_XMLInstruction::~CFDE_XMLInstruction() {} - -CFDE_XMLElement::CFDE_XMLElement(const CFX_WideString& wsTag) - : CFDE_XMLNode(), m_wsTag(wsTag), m_Attributes() { - ASSERT(m_wsTag.GetLength() > 0); -} - -CFDE_XMLElement::~CFDE_XMLElement() {} - -FDE_XMLNODETYPE CFDE_XMLElement::GetType() const { - return FDE_XMLNODE_Element; -} - -CFDE_XMLNode* CFDE_XMLElement::Clone(bool bRecursive) { - CFDE_XMLElement* pClone = new CFDE_XMLElement(m_wsTag); - if (!pClone) - return nullptr; - - pClone->m_Attributes = m_Attributes; - if (bRecursive) { - CloneChildren(pClone); - } else { - CFX_WideString wsText; - CFDE_XMLNode* pChild = m_pChild; - while (pChild) { - switch (pChild->GetType()) { - case FDE_XMLNODE_Text: - wsText += ((CFDE_XMLText*)pChild)->m_wsText; - break; - default: - break; - } - pChild = pChild->m_pNext; - } - pClone->SetTextData(wsText); - } - return pClone; -} - -void CFDE_XMLElement::GetTagName(CFX_WideString& wsTag) const { - wsTag = m_wsTag; -} - -void CFDE_XMLElement::GetLocalTagName(CFX_WideString& wsTag) const { - FX_STRSIZE iFind = m_wsTag.Find(L':', 0); - if (iFind < 0) { - wsTag = m_wsTag; - } else { - wsTag = m_wsTag.Right(m_wsTag.GetLength() - iFind - 1); - } -} - -void CFDE_XMLElement::GetNamespacePrefix(CFX_WideString& wsPrefix) const { - FX_STRSIZE iFind = m_wsTag.Find(L':', 0); - if (iFind < 0) { - wsPrefix.clear(); - } else { - wsPrefix = m_wsTag.Left(iFind); - } -} - -void CFDE_XMLElement::GetNamespaceURI(CFX_WideString& wsNamespace) const { - CFX_WideString wsAttri(L"xmlns"), wsPrefix; - GetNamespacePrefix(wsPrefix); - if (wsPrefix.GetLength() > 0) { - wsAttri += L":"; - wsAttri += wsPrefix; - } - wsNamespace.clear(); - CFDE_XMLNode* pNode = (CFDE_XMLNode*)this; - while (pNode) { - if (pNode->GetType() != FDE_XMLNODE_Element) { - break; - } - CFDE_XMLElement* pElement = (CFDE_XMLElement*)pNode; - if (!pElement->HasAttribute(wsAttri.c_str())) { - pNode = pNode->GetNodeItem(CFDE_XMLNode::Parent); - continue; - } - pElement->GetString(wsAttri.c_str(), wsNamespace); - break; - } -} - -int32_t CFDE_XMLElement::CountAttributes() const { - return pdfium::CollectionSize<int32_t>(m_Attributes) / 2; -} - -bool CFDE_XMLElement::GetAttribute(int32_t index, - CFX_WideString& wsAttriName, - CFX_WideString& wsAttriValue) const { - int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes); - ASSERT(index > -1 && index < iCount / 2); - for (int32_t i = 0; i < iCount; i += 2) { - if (index == 0) { - wsAttriName = m_Attributes[i]; - wsAttriValue = m_Attributes[i + 1]; - return true; - } - index--; - } - return false; -} - -bool CFDE_XMLElement::HasAttribute(const wchar_t* pwsAttriName) const { - int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes); - for (int32_t i = 0; i < iCount; i += 2) { - if (m_Attributes[i].Compare(pwsAttriName) == 0) - return true; - } - return false; -} - -void CFDE_XMLElement::GetString(const wchar_t* pwsAttriName, - CFX_WideString& wsAttriValue, - const wchar_t* pwsDefValue) const { - int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes); - for (int32_t i = 0; i < iCount; i += 2) { - if (m_Attributes[i].Compare(pwsAttriName) == 0) { - wsAttriValue = m_Attributes[i + 1]; - return; - } - } - wsAttriValue = pwsDefValue; -} - -void CFDE_XMLElement::SetString(const CFX_WideString& wsAttriName, - const CFX_WideString& wsAttriValue) { - ASSERT(wsAttriName.GetLength() > 0); - int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes); - for (int32_t i = 0; i < iCount; i += 2) { - if (m_Attributes[i].Compare(wsAttriName) == 0) { - m_Attributes[i] = wsAttriName; - m_Attributes[i + 1] = wsAttriValue; - return; - } - } - m_Attributes.push_back(wsAttriName); - m_Attributes.push_back(wsAttriValue); -} - -int32_t CFDE_XMLElement::GetInteger(const wchar_t* pwsAttriName, - int32_t iDefValue) const { - int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes); - for (int32_t i = 0; i < iCount; i += 2) { - if (m_Attributes[i].Compare(pwsAttriName) == 0) { - return FXSYS_wtoi(m_Attributes[i + 1].c_str()); - } - } - return iDefValue; -} - -void CFDE_XMLElement::SetInteger(const wchar_t* pwsAttriName, - int32_t iAttriValue) { - CFX_WideString wsValue; - wsValue.Format(L"%d", iAttriValue); - SetString(pwsAttriName, wsValue); -} - -float CFDE_XMLElement::GetFloat(const wchar_t* pwsAttriName, - float fDefValue) const { - int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes); - for (int32_t i = 0; i < iCount; i += 2) { - if (m_Attributes[i].Compare(pwsAttriName) == 0) { - return FXSYS_wcstof(m_Attributes[i + 1].c_str(), -1, nullptr); - } - } - return fDefValue; -} - -void CFDE_XMLElement::SetFloat(const wchar_t* pwsAttriName, float fAttriValue) { - CFX_WideString wsValue; - wsValue.Format(L"%f", fAttriValue); - SetString(pwsAttriName, wsValue); -} - -void CFDE_XMLElement::RemoveAttribute(const wchar_t* pwsAttriName) { - int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes); - for (int32_t i = 0; i < iCount; i += 2) { - if (m_Attributes[i].Compare(pwsAttriName) == 0) { - m_Attributes.erase(m_Attributes.begin() + i, - m_Attributes.begin() + i + 2); - return; - } - } -} - -void CFDE_XMLElement::GetTextData(CFX_WideString& wsText) const { - CFX_WideTextBuf buffer; - CFDE_XMLNode* pChild = m_pChild; - while (pChild) { - switch (pChild->GetType()) { - case FDE_XMLNODE_Text: - buffer << ((CFDE_XMLText*)pChild)->m_wsText; - break; - case FDE_XMLNODE_CharData: - buffer << ((CFDE_XMLCharData*)pChild)->m_wsCharData; - break; - default: - break; - } - pChild = pChild->m_pNext; - } - wsText = buffer.AsStringC(); -} - -void CFDE_XMLElement::SetTextData(const CFX_WideString& wsText) { - if (wsText.GetLength() < 1) { - return; - } - InsertChildNode(new CFDE_XMLText(wsText)); -} - -CFDE_XMLText::CFDE_XMLText(const CFX_WideString& wsText) - : CFDE_XMLNode(), m_wsText(wsText) {} - -FDE_XMLNODETYPE CFDE_XMLText::GetType() const { - return FDE_XMLNODE_Text; -} - -CFDE_XMLNode* CFDE_XMLText::Clone(bool bRecursive) { - CFDE_XMLText* pClone = new CFDE_XMLText(m_wsText); - return pClone; -} - -CFDE_XMLText::~CFDE_XMLText() {} - -CFDE_XMLCharData::CFDE_XMLCharData(const CFX_WideString& wsCData) - : CFDE_XMLDeclaration(), m_wsCharData(wsCData) {} - -FDE_XMLNODETYPE CFDE_XMLCharData::GetType() const { - return FDE_XMLNODE_CharData; -} - -CFDE_XMLNode* CFDE_XMLCharData::Clone(bool bRecursive) { - CFDE_XMLCharData* pClone = new CFDE_XMLCharData(m_wsCharData); - return pClone; -} - -CFDE_XMLCharData::~CFDE_XMLCharData() {} - -CFDE_XMLDoc::CFDE_XMLDoc() - : m_iStatus(0), m_pRoot(pdfium::MakeUnique<CFDE_XMLNode>()) { - m_pRoot->InsertChildNode(new CFDE_XMLInstruction(L"xml")); -} - -CFDE_XMLDoc::~CFDE_XMLDoc() {} - -bool CFDE_XMLDoc::LoadXML(std::unique_ptr<CFDE_XMLParser> pXMLParser) { - if (!pXMLParser) - return false; - - m_iStatus = 0; - m_pStream.Reset(); - m_pRoot->DeleteChildren(); - m_pXMLParser = std::move(pXMLParser); - return true; -} - -int32_t CFDE_XMLDoc::DoLoad(IFX_Pause* pPause) { - if (m_iStatus < 100) - m_iStatus = m_pXMLParser->DoParser(pPause); - - return m_iStatus; -} - -void CFDE_XMLDoc::CloseXML() { - m_pXMLParser.reset(); -} - -void CFDE_XMLDoc::SaveXMLNode(const CFX_RetainPtr<IFGAS_Stream>& pXMLStream, - CFDE_XMLNode* pINode) { - CFDE_XMLNode* pNode = (CFDE_XMLNode*)pINode; - switch (pNode->GetType()) { - case FDE_XMLNODE_Instruction: { - CFX_WideString ws; - CFDE_XMLInstruction* pInstruction = (CFDE_XMLInstruction*)pNode; - if (pInstruction->m_wsTarget.CompareNoCase(L"xml") == 0) { - ws = L"<?xml version=\"1.0\" encoding=\""; - uint16_t wCodePage = pXMLStream->GetCodePage(); - if (wCodePage == FX_CODEPAGE_UTF16LE) { - ws += L"UTF-16"; - } else if (wCodePage == FX_CODEPAGE_UTF16BE) { - ws += L"UTF-16be"; - } else { - ws += L"UTF-8"; - } - ws += L"\"?>"; - pXMLStream->WriteString(ws.c_str(), ws.GetLength()); - } else { - ws.Format(L"<?%s", pInstruction->m_wsTarget.c_str()); - pXMLStream->WriteString(ws.c_str(), ws.GetLength()); - std::vector<CFX_WideString>& attributes = pInstruction->m_Attributes; - int32_t i; - int32_t iCount = pdfium::CollectionSize<int32_t>(attributes); - CFX_WideString wsValue; - for (i = 0; i < iCount; i += 2) { - ws = L" "; - ws += attributes[i]; - ws += L"=\""; - wsValue = attributes[i + 1]; - wsValue.Replace(L"&", L"&"); - wsValue.Replace(L"<", L"<"); - wsValue.Replace(L">", L">"); - wsValue.Replace(L"\'", L"'"); - wsValue.Replace(L"\"", L"""); - ws += wsValue; - ws += L"\""; - pXMLStream->WriteString(ws.c_str(), ws.GetLength()); - } - std::vector<CFX_WideString>& targetdata = pInstruction->m_TargetData; - iCount = pdfium::CollectionSize<int32_t>(targetdata); - for (i = 0; i < iCount; i++) { - ws = L" \""; - ws += targetdata[i]; - ws += L"\""; - pXMLStream->WriteString(ws.c_str(), ws.GetLength()); - } - ws = L"?>"; - pXMLStream->WriteString(ws.c_str(), ws.GetLength()); - } - } break; - case FDE_XMLNODE_Element: { - CFX_WideString ws; - ws = L"<"; - ws += ((CFDE_XMLElement*)pNode)->m_wsTag; - pXMLStream->WriteString(ws.c_str(), ws.GetLength()); - std::vector<CFX_WideString>& attributes = - static_cast<CFDE_XMLElement*>(pNode)->m_Attributes; - int32_t iCount = pdfium::CollectionSize<int32_t>(attributes); - CFX_WideString wsValue; - for (int32_t i = 0; i < iCount; i += 2) { - ws = L" "; - ws += attributes[i]; - ws += L"=\""; - wsValue = attributes[i + 1]; - wsValue.Replace(L"&", L"&"); - wsValue.Replace(L"<", L"<"); - wsValue.Replace(L">", L">"); - wsValue.Replace(L"\'", L"'"); - wsValue.Replace(L"\"", L"""); - ws += wsValue; - ws += L"\""; - pXMLStream->WriteString(ws.c_str(), ws.GetLength()); - } - if (pNode->m_pChild) { - ws = L"\n>"; - pXMLStream->WriteString(ws.c_str(), ws.GetLength()); - CFDE_XMLNode* pChild = pNode->m_pChild; - while (pChild) { - SaveXMLNode(pXMLStream, static_cast<CFDE_XMLNode*>(pChild)); - pChild = pChild->m_pNext; - } - ws = L"</"; - ws += ((CFDE_XMLElement*)pNode)->m_wsTag; - ws += L"\n>"; - } else { - ws = L"\n/>"; - } - pXMLStream->WriteString(ws.c_str(), ws.GetLength()); - } break; - case FDE_XMLNODE_Text: { - CFX_WideString ws = ((CFDE_XMLText*)pNode)->m_wsText; - ws.Replace(L"&", L"&"); - ws.Replace(L"<", L"<"); - ws.Replace(L">", L">"); - ws.Replace(L"\'", L"'"); - ws.Replace(L"\"", L"""); - pXMLStream->WriteString(ws.c_str(), ws.GetLength()); - } break; - case FDE_XMLNODE_CharData: { - CFX_WideString ws = L"<![CDATA["; - ws += ((CFDE_XMLCharData*)pNode)->m_wsCharData; - ws += L"]]>"; - pXMLStream->WriteString(ws.c_str(), ws.GetLength()); - } break; - case FDE_XMLNODE_Unknown: - break; - default: - break; - } -} - -CFDE_BlockBuffer::CFDE_BlockBuffer(int32_t iAllocStep) - : m_iDataLength(0), - m_iBufferSize(0), - m_iAllocStep(iAllocStep), - m_iStartPosition(0) {} - -CFDE_BlockBuffer::~CFDE_BlockBuffer() { - ClearBuffer(); -} - -wchar_t* CFDE_BlockBuffer::GetAvailableBlock(int32_t& iIndexInBlock) { - iIndexInBlock = 0; - if (m_BlockArray.empty()) - return nullptr; - - int32_t iRealIndex = m_iStartPosition + m_iDataLength; - if (iRealIndex == m_iBufferSize) { - m_BlockArray.emplace_back(FX_Alloc(wchar_t, m_iAllocStep)); - m_iBufferSize += m_iAllocStep; - return m_BlockArray.back().get(); - } - iIndexInBlock = iRealIndex % m_iAllocStep; - return m_BlockArray[iRealIndex / m_iAllocStep].get(); -} - -bool CFDE_BlockBuffer::InitBuffer(int32_t iBufferSize) { - ClearBuffer(); - int32_t iNumOfBlock = (iBufferSize - 1) / m_iAllocStep + 1; - for (int32_t i = 0; i < iNumOfBlock; i++) - m_BlockArray.emplace_back(FX_Alloc(wchar_t, m_iAllocStep)); - - m_iBufferSize = iNumOfBlock * m_iAllocStep; - return true; -} - -void CFDE_BlockBuffer::SetTextChar(int32_t iIndex, wchar_t ch) { - if (iIndex < 0) { - return; - } - int32_t iRealIndex = m_iStartPosition + iIndex; - int32_t iBlockIndex = iRealIndex / m_iAllocStep; - int32_t iInnerIndex = iRealIndex % m_iAllocStep; - int32_t iBlockSize = pdfium::CollectionSize<int32_t>(m_BlockArray); - if (iBlockIndex >= iBlockSize) { - int32_t iNewBlocks = iBlockIndex - iBlockSize + 1; - do { - m_BlockArray.emplace_back(FX_Alloc(wchar_t, m_iAllocStep)); - m_iBufferSize += m_iAllocStep; - } while (--iNewBlocks); - } - wchar_t* pTextData = m_BlockArray[iBlockIndex].get(); - pTextData[iInnerIndex] = ch; - m_iDataLength = std::max(m_iDataLength, iIndex + 1); -} - -int32_t CFDE_BlockBuffer::DeleteTextChars(int32_t iCount, bool bDirection) { - if (iCount <= 0) - return m_iDataLength; - - if (iCount >= m_iDataLength) { - Reset(false); - return 0; - } - if (bDirection) { - m_iStartPosition += iCount; - m_iDataLength -= iCount; - } else { - m_iDataLength -= iCount; - } - return m_iDataLength; -} - -void CFDE_BlockBuffer::GetTextData(CFX_WideString& wsTextData, - int32_t iStart, - int32_t iLength) const { - wsTextData.clear(); - int32_t iMaybeDataLength = m_iBufferSize - 1 - m_iStartPosition; - if (iStart < 0 || iStart > iMaybeDataLength) { - return; - } - if (iLength == -1 || iLength > iMaybeDataLength) { - iLength = iMaybeDataLength; - } - if (iLength <= 0) { - return; - } - wchar_t* pBuf = wsTextData.GetBuffer(iLength); - if (!pBuf) { - return; - } - int32_t iStartBlockIndex = 0; - int32_t iStartInnerIndex = 0; - TextDataIndex2BufIndex(iStart, iStartBlockIndex, iStartInnerIndex); - int32_t iEndBlockIndex = 0; - int32_t iEndInnerIndex = 0; - TextDataIndex2BufIndex(iStart + iLength, iEndBlockIndex, iEndInnerIndex); - int32_t iPointer = 0; - for (int32_t i = iStartBlockIndex; i <= iEndBlockIndex; i++) { - int32_t iBufferPointer = 0; - int32_t iCopyLength = m_iAllocStep; - if (i == iStartBlockIndex) { - iCopyLength -= iStartInnerIndex; - iBufferPointer = iStartInnerIndex; - } - if (i == iEndBlockIndex) { - iCopyLength -= ((m_iAllocStep - 1) - iEndInnerIndex); - } - wchar_t* pBlockBuf = m_BlockArray[i].get(); - memcpy(pBuf + iPointer, pBlockBuf + iBufferPointer, - iCopyLength * sizeof(wchar_t)); - iPointer += iCopyLength; - } - wsTextData.ReleaseBuffer(iLength); -} - -void CFDE_BlockBuffer::TextDataIndex2BufIndex(const int32_t iIndex, - int32_t& iBlockIndex, - int32_t& iInnerIndex) const { - ASSERT(iIndex >= 0); - int32_t iRealIndex = m_iStartPosition + iIndex; - iBlockIndex = iRealIndex / m_iAllocStep; - iInnerIndex = iRealIndex % m_iAllocStep; -} - -void CFDE_BlockBuffer::ClearBuffer() { - m_iBufferSize = 0; - m_BlockArray.clear(); -} - -CFDE_XMLSyntaxParser::CFDE_XMLSyntaxParser() - : m_pStream(nullptr), - m_iXMLPlaneSize(-1), - m_iCurrentPos(0), - m_iCurrentNodeNum(-1), - m_iLastNodeNum(-1), - m_iParsedChars(0), - m_iParsedBytes(0), - m_pBuffer(nullptr), - m_iBufferChars(0), - m_bEOS(false), - m_pStart(nullptr), - m_pEnd(nullptr), - m_iAllocStep(m_BlockBuffer.GetAllocStep()), - m_iDataLength(m_BlockBuffer.GetDataLengthRef()), - m_pCurrentBlock(nullptr), - m_iIndexInBlock(0), - m_iTextDataLength(0), - m_syntaxParserResult(FDE_XmlSyntaxResult::None), - m_syntaxParserState(FDE_XmlSyntaxState::Text), - m_wQuotationMark(0), - m_iEntityStart(-1) { - m_CurNode.iNodeNum = -1; - m_CurNode.eNodeType = FDE_XMLNODE_Unknown; -} - -void CFDE_XMLSyntaxParser::Init(const CFX_RetainPtr<IFGAS_Stream>& pStream, - int32_t iXMLPlaneSize, - int32_t iTextDataSize) { - ASSERT(!m_pStream && !m_pBuffer); - ASSERT(pStream && iXMLPlaneSize > 0); - int32_t iStreamLength = pStream->GetLength(); - ASSERT(iStreamLength > 0); - m_pStream = pStream; - m_iXMLPlaneSize = std::min(iXMLPlaneSize, iStreamLength); - uint8_t bom[4]; - m_iCurrentPos = m_pStream->GetBOM(bom); - ASSERT(!m_pBuffer); - - FX_SAFE_INT32 alloc_size_safe = m_iXMLPlaneSize; - alloc_size_safe += 1; // For NUL. - if (!alloc_size_safe.IsValid() || alloc_size_safe.ValueOrDie() <= 0) { - m_syntaxParserResult = FDE_XmlSyntaxResult::Error; - return; - } - - m_pBuffer = FX_Alloc( - wchar_t, pdfium::base::ValueOrDieForType<size_t>(alloc_size_safe)); - m_pStart = m_pEnd = m_pBuffer; - ASSERT(!m_BlockBuffer.IsInitialized()); - m_BlockBuffer.InitBuffer(); - m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); - m_iParsedBytes = m_iParsedChars = 0; - m_iBufferChars = 0; -} - -FDE_XmlSyntaxResult CFDE_XMLSyntaxParser::DoSyntaxParse() { - if (m_syntaxParserResult == FDE_XmlSyntaxResult::Error || - m_syntaxParserResult == FDE_XmlSyntaxResult::EndOfString) { - return m_syntaxParserResult; - } - ASSERT(m_pStream && m_pBuffer && m_BlockBuffer.IsInitialized()); - int32_t iStreamLength = m_pStream->GetLength(); - int32_t iPos; - - FDE_XmlSyntaxResult syntaxParserResult = FDE_XmlSyntaxResult::None; - while (true) { - if (m_pStart >= m_pEnd) { - if (m_bEOS || m_iCurrentPos >= iStreamLength) { - m_syntaxParserResult = FDE_XmlSyntaxResult::EndOfString; - return m_syntaxParserResult; - } - m_iParsedChars += (m_pEnd - m_pBuffer); - m_iParsedBytes = m_iCurrentPos; - if (m_pStream->GetPosition() != m_iCurrentPos) { - m_pStream->Seek(FX_STREAMSEEK_Begin, m_iCurrentPos); - } - m_iBufferChars = - m_pStream->ReadString(m_pBuffer, m_iXMLPlaneSize, m_bEOS); - iPos = m_pStream->GetPosition(); - if (m_iBufferChars < 1) { - m_iCurrentPos = iStreamLength; - m_syntaxParserResult = FDE_XmlSyntaxResult::EndOfString; - return m_syntaxParserResult; - } - m_iCurrentPos = iPos; - m_pStart = m_pBuffer; - m_pEnd = m_pBuffer + m_iBufferChars; - } - - while (m_pStart < m_pEnd) { - wchar_t ch = *m_pStart; - switch (m_syntaxParserState) { - case FDE_XmlSyntaxState::Text: - if (ch == L'<') { - if (m_iDataLength > 0) { - m_iTextDataLength = m_iDataLength; - m_BlockBuffer.Reset(); - m_pCurrentBlock = - m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); - m_iEntityStart = -1; - syntaxParserResult = FDE_XmlSyntaxResult::Text; - } else { - m_pStart++; - m_syntaxParserState = FDE_XmlSyntaxState::Node; - } - } else { - ParseTextChar(ch); - } - break; - case FDE_XmlSyntaxState::Node: - if (ch == L'!') { - m_pStart++; - m_syntaxParserState = FDE_XmlSyntaxState::SkipCommentOrDecl; - } else if (ch == L'/') { - m_pStart++; - m_syntaxParserState = FDE_XmlSyntaxState::CloseElement; - } else if (ch == L'?') { - m_iLastNodeNum++; - m_iCurrentNodeNum = m_iLastNodeNum; - m_CurNode.iNodeNum = m_iLastNodeNum; - m_CurNode.eNodeType = FDE_XMLNODE_Instruction; - m_XMLNodeStack.push(m_CurNode); - m_pStart++; - m_syntaxParserState = FDE_XmlSyntaxState::Target; - syntaxParserResult = FDE_XmlSyntaxResult::InstructionOpen; - } else { - m_iLastNodeNum++; - m_iCurrentNodeNum = m_iLastNodeNum; - m_CurNode.iNodeNum = m_iLastNodeNum; - m_CurNode.eNodeType = FDE_XMLNODE_Element; - m_XMLNodeStack.push(m_CurNode); - m_syntaxParserState = FDE_XmlSyntaxState::Tag; - syntaxParserResult = FDE_XmlSyntaxResult::ElementOpen; - } - break; - case FDE_XmlSyntaxState::Target: - case FDE_XmlSyntaxState::Tag: - if (!FDE_IsXMLNameChar(ch, m_iDataLength < 1)) { - if (m_iDataLength < 1) { - m_syntaxParserResult = FDE_XmlSyntaxResult::Error; - return m_syntaxParserResult; - } else { - m_iTextDataLength = m_iDataLength; - m_BlockBuffer.Reset(); - m_pCurrentBlock = - m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); - if (m_syntaxParserState != FDE_XmlSyntaxState::Target) { - syntaxParserResult = FDE_XmlSyntaxResult::TagName; - } else { - syntaxParserResult = FDE_XmlSyntaxResult::TargetName; - } - m_syntaxParserState = FDE_XmlSyntaxState::AttriName; - } - } else { - if (m_iIndexInBlock == m_iAllocStep) { - m_pCurrentBlock = - m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); - if (!m_pCurrentBlock) { - return FDE_XmlSyntaxResult::Error; - } - } - m_pCurrentBlock[m_iIndexInBlock++] = ch; - m_iDataLength++; - m_pStart++; - } - break; - case FDE_XmlSyntaxState::AttriName: - if (m_iDataLength < 1 && FDE_IsXMLWhiteSpace(ch)) { - m_pStart++; - break; - } - if (!FDE_IsXMLNameChar(ch, m_iDataLength < 1)) { - if (m_iDataLength < 1) { - if (m_CurNode.eNodeType == FDE_XMLNODE_Element) { - if (ch == L'>' || ch == L'/') { - m_syntaxParserState = FDE_XmlSyntaxState::BreakElement; - break; - } - } else if (m_CurNode.eNodeType == FDE_XMLNODE_Instruction) { - if (ch == L'?') { - m_syntaxParserState = FDE_XmlSyntaxState::CloseInstruction; - m_pStart++; - } else { - m_syntaxParserState = FDE_XmlSyntaxState::TargetData; - } - break; - } - m_syntaxParserResult = FDE_XmlSyntaxResult::Error; - return m_syntaxParserResult; - } else { - if (m_CurNode.eNodeType == FDE_XMLNODE_Instruction) { - if (ch != '=' && !FDE_IsXMLWhiteSpace(ch)) { - m_syntaxParserState = FDE_XmlSyntaxState::TargetData; - break; - } - } - m_iTextDataLength = m_iDataLength; - m_BlockBuffer.Reset(); - m_pCurrentBlock = - m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); - m_syntaxParserState = FDE_XmlSyntaxState::AttriEqualSign; - syntaxParserResult = FDE_XmlSyntaxResult::AttriName; - } - } else { - if (m_iIndexInBlock == m_iAllocStep) { - m_pCurrentBlock = - m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); - if (!m_pCurrentBlock) { - return FDE_XmlSyntaxResult::Error; - } - } - m_pCurrentBlock[m_iIndexInBlock++] = ch; - m_iDataLength++; - m_pStart++; - } - break; - case FDE_XmlSyntaxState::AttriEqualSign: - if (FDE_IsXMLWhiteSpace(ch)) { - m_pStart++; - break; - } - if (ch != L'=') { - if (m_CurNode.eNodeType == FDE_XMLNODE_Instruction) { - m_syntaxParserState = FDE_XmlSyntaxState::TargetData; - break; - } - m_syntaxParserResult = FDE_XmlSyntaxResult::Error; - return m_syntaxParserResult; - } else { - m_syntaxParserState = FDE_XmlSyntaxState::AttriQuotation; - m_pStart++; - } - break; - case FDE_XmlSyntaxState::AttriQuotation: - if (FDE_IsXMLWhiteSpace(ch)) { - m_pStart++; - break; - } - if (ch != L'\"' && ch != L'\'') { - m_syntaxParserResult = FDE_XmlSyntaxResult::Error; - return m_syntaxParserResult; - } else { - m_wQuotationMark = ch; - m_syntaxParserState = FDE_XmlSyntaxState::AttriValue; - m_pStart++; - } - break; - case FDE_XmlSyntaxState::AttriValue: - if (ch == m_wQuotationMark) { - if (m_iEntityStart > -1) { - m_syntaxParserResult = FDE_XmlSyntaxResult::Error; - return m_syntaxParserResult; - } - m_iTextDataLength = m_iDataLength; - m_wQuotationMark = 0; - m_BlockBuffer.Reset(); - m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); - m_pStart++; - m_syntaxParserState = FDE_XmlSyntaxState::AttriName; - syntaxParserResult = FDE_XmlSyntaxResult::AttriValue; - } else { - ParseTextChar(ch); - } - break; - case FDE_XmlSyntaxState::CloseInstruction: - if (ch != L'>') { - if (m_iIndexInBlock == m_iAllocStep) { - m_pCurrentBlock = - m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); - if (!m_pCurrentBlock) { - return FDE_XmlSyntaxResult::Error; - } - } - m_pCurrentBlock[m_iIndexInBlock++] = ch; - m_iDataLength++; - m_syntaxParserState = FDE_XmlSyntaxState::TargetData; - } else if (m_iDataLength > 0) { - m_iTextDataLength = m_iDataLength; - m_BlockBuffer.Reset(); - m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); - syntaxParserResult = FDE_XmlSyntaxResult::TargetData; - } else { - m_pStart++; - if (m_XMLNodeStack.empty()) { - m_syntaxParserResult = FDE_XmlSyntaxResult::Error; - return m_syntaxParserResult; - } - m_XMLNodeStack.pop(); - if (!m_XMLNodeStack.empty()) { - m_CurNode = m_XMLNodeStack.top(); - } else { - m_CurNode.iNodeNum = -1; - m_CurNode.eNodeType = FDE_XMLNODE_Unknown; - } - m_iCurrentNodeNum = m_CurNode.iNodeNum; - m_BlockBuffer.Reset(); - m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); - m_syntaxParserState = FDE_XmlSyntaxState::Text; - syntaxParserResult = FDE_XmlSyntaxResult::InstructionClose; - } - break; - case FDE_XmlSyntaxState::BreakElement: - if (ch == L'>') { - m_syntaxParserState = FDE_XmlSyntaxState::Text; - syntaxParserResult = FDE_XmlSyntaxResult::ElementBreak; - } else if (ch == L'/') { - m_syntaxParserState = FDE_XmlSyntaxState::CloseElement; - } else { - m_syntaxParserResult = FDE_XmlSyntaxResult::Error; - return m_syntaxParserResult; - } - m_pStart++; - break; - case FDE_XmlSyntaxState::CloseElement: - if (!FDE_IsXMLNameChar(ch, m_iDataLength < 1)) { - if (ch == L'>') { - if (m_XMLNodeStack.empty()) { - m_syntaxParserResult = FDE_XmlSyntaxResult::Error; - return m_syntaxParserResult; - } - m_XMLNodeStack.pop(); - if (!m_XMLNodeStack.empty()) { - m_CurNode = m_XMLNodeStack.top(); - } else { - m_CurNode.iNodeNum = -1; - m_CurNode.eNodeType = FDE_XMLNODE_Unknown; - } - m_iCurrentNodeNum = m_CurNode.iNodeNum; - m_iTextDataLength = m_iDataLength; - m_BlockBuffer.Reset(); - m_pCurrentBlock = - m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); - m_syntaxParserState = FDE_XmlSyntaxState::Text; - syntaxParserResult = FDE_XmlSyntaxResult::ElementClose; - } else if (!FDE_IsXMLWhiteSpace(ch)) { - m_syntaxParserResult = FDE_XmlSyntaxResult::Error; - return m_syntaxParserResult; - } - } else { - if (m_iIndexInBlock == m_iAllocStep) { - m_pCurrentBlock = - m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); - if (!m_pCurrentBlock) { - return FDE_XmlSyntaxResult::Error; - } - } - m_pCurrentBlock[m_iIndexInBlock++] = ch; - m_iDataLength++; - } - m_pStart++; - break; - case FDE_XmlSyntaxState::SkipCommentOrDecl: - if (FXSYS_wcsnicmp(m_pStart, L"--", 2) == 0) { - m_pStart += 2; - m_syntaxParserState = FDE_XmlSyntaxState::SkipComment; - } else if (FXSYS_wcsnicmp(m_pStart, L"[CDATA[", 7) == 0) { - m_pStart += 7; - m_syntaxParserState = FDE_XmlSyntaxState::SkipCData; - } else { - m_syntaxParserState = FDE_XmlSyntaxState::SkipDeclNode; - m_SkipChar = L'>'; - m_SkipStack.push(L'>'); - } - break; - case FDE_XmlSyntaxState::SkipCData: { - if (FXSYS_wcsnicmp(m_pStart, L"]]>", 3) == 0) { - m_pStart += 3; - syntaxParserResult = FDE_XmlSyntaxResult::CData; - m_iTextDataLength = m_iDataLength; - m_BlockBuffer.Reset(); - m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); - m_syntaxParserState = FDE_XmlSyntaxState::Text; - } else { - if (m_iIndexInBlock == m_iAllocStep) { - m_pCurrentBlock = - m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); - if (!m_pCurrentBlock) - return FDE_XmlSyntaxResult::Error; - } - m_pCurrentBlock[m_iIndexInBlock++] = ch; - m_iDataLength++; - m_pStart++; - } - break; - } - case FDE_XmlSyntaxState::SkipDeclNode: - if (m_SkipChar == L'\'' || m_SkipChar == L'\"') { - m_pStart++; - if (ch != m_SkipChar) - break; - - m_SkipStack.pop(); - if (m_SkipStack.empty()) - m_syntaxParserState = FDE_XmlSyntaxState::Text; - else - m_SkipChar = m_SkipStack.top(); - } else { - switch (ch) { - case L'<': - m_SkipChar = L'>'; - m_SkipStack.push(L'>'); - break; - case L'[': - m_SkipChar = L']'; - m_SkipStack.push(L']'); - break; - case L'(': - m_SkipChar = L')'; - m_SkipStack.push(L')'); - break; - case L'\'': - m_SkipChar = L'\''; - m_SkipStack.push(L'\''); - break; - case L'\"': - m_SkipChar = L'\"'; - m_SkipStack.push(L'\"'); - break; - default: - if (ch == m_SkipChar) { - m_SkipStack.pop(); - if (m_SkipStack.empty()) { - if (m_iDataLength >= 9) { - CFX_WideString wsHeader; - m_BlockBuffer.GetTextData(wsHeader, 0, 7); - } - m_iTextDataLength = m_iDataLength; - m_BlockBuffer.Reset(); - m_pCurrentBlock = - m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); - m_syntaxParserState = FDE_XmlSyntaxState::Text; - } else { - m_SkipChar = m_SkipStack.top(); - } - } - break; - } - if (!m_SkipStack.empty()) { - if (m_iIndexInBlock == m_iAllocStep) { - m_pCurrentBlock = - m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); - if (!m_pCurrentBlock) { - return FDE_XmlSyntaxResult::Error; - } - } - m_pCurrentBlock[m_iIndexInBlock++] = ch; - m_iDataLength++; - } - m_pStart++; - } - break; - case FDE_XmlSyntaxState::SkipComment: - if (FXSYS_wcsnicmp(m_pStart, L"-->", 3) == 0) { - m_pStart += 2; - m_syntaxParserState = FDE_XmlSyntaxState::Text; - } - - m_pStart++; - break; - case FDE_XmlSyntaxState::TargetData: - if (FDE_IsXMLWhiteSpace(ch)) { - if (m_iDataLength < 1) { - m_pStart++; - break; - } else if (m_wQuotationMark == 0) { - m_iTextDataLength = m_iDataLength; - m_wQuotationMark = 0; - m_BlockBuffer.Reset(); - m_pCurrentBlock = - m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); - m_pStart++; - syntaxParserResult = FDE_XmlSyntaxResult::TargetData; - break; - } - } - if (ch == '?') { - m_syntaxParserState = FDE_XmlSyntaxState::CloseInstruction; - m_pStart++; - } else if (ch == '\"') { - if (m_wQuotationMark == 0) { - m_wQuotationMark = ch; - m_pStart++; - } else if (ch == m_wQuotationMark) { - m_iTextDataLength = m_iDataLength; - m_wQuotationMark = 0; - m_BlockBuffer.Reset(); - m_pCurrentBlock = - m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); - m_pStart++; - syntaxParserResult = FDE_XmlSyntaxResult::TargetData; - } else { - m_syntaxParserResult = FDE_XmlSyntaxResult::Error; - return m_syntaxParserResult; - } - } else { - if (m_iIndexInBlock == m_iAllocStep) { - m_pCurrentBlock = - m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); - if (!m_pCurrentBlock) { - return FDE_XmlSyntaxResult::Error; - } - } - m_pCurrentBlock[m_iIndexInBlock++] = ch; - m_iDataLength++; - m_pStart++; - } - break; - default: - break; - } - if (syntaxParserResult != FDE_XmlSyntaxResult::None) - return syntaxParserResult; - } - } - return FDE_XmlSyntaxResult::Text; -} - -CFDE_XMLSyntaxParser::~CFDE_XMLSyntaxParser() { - m_pCurrentBlock = nullptr; - FX_Free(m_pBuffer); -} - -int32_t CFDE_XMLSyntaxParser::GetStatus() const { - if (!m_pStream) - return -1; - - int32_t iStreamLength = m_pStream->GetLength(); - if (iStreamLength < 1) - return 100; - - if (m_syntaxParserResult == FDE_XmlSyntaxResult::Error) - return -1; - - if (m_syntaxParserResult == FDE_XmlSyntaxResult::EndOfString) - return 100; - return m_iParsedBytes * 100 / iStreamLength; -} - -static int32_t FX_GetUTF8EncodeLength(const wchar_t* pSrc, int32_t iSrcLen) { - uint32_t unicode = 0; - int32_t iDstNum = 0; - while (iSrcLen-- > 0) { - unicode = *pSrc++; - int nbytes = 0; - if ((uint32_t)unicode < 0x80) { - nbytes = 1; - } else if ((uint32_t)unicode < 0x800) { - nbytes = 2; - } else if ((uint32_t)unicode < 0x10000) { - nbytes = 3; - } else if ((uint32_t)unicode < 0x200000) { - nbytes = 4; - } else if ((uint32_t)unicode < 0x4000000) { - nbytes = 5; - } else { - nbytes = 6; - } - iDstNum += nbytes; - } - return iDstNum; -} - -FX_FILESIZE CFDE_XMLSyntaxParser::GetCurrentBinaryPos() const { - if (!m_pStream) - return 0; - - int32_t nSrcLen = m_pStart - m_pBuffer; - int32_t nDstLen = FX_GetUTF8EncodeLength(m_pBuffer, nSrcLen); - return m_iParsedBytes + nDstLen; -} - -void CFDE_XMLSyntaxParser::ParseTextChar(wchar_t character) { - if (m_iIndexInBlock == m_iAllocStep) { - m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); - if (!m_pCurrentBlock) { - return; - } - } - m_pCurrentBlock[m_iIndexInBlock++] = character; - m_iDataLength++; - if (m_iEntityStart > -1 && character == L';') { - CFX_WideString csEntity; - m_BlockBuffer.GetTextData(csEntity, m_iEntityStart + 1, - (m_iDataLength - 1) - m_iEntityStart - 1); - int32_t iLen = csEntity.GetLength(); - if (iLen > 0) { - if (csEntity[0] == L'#') { - uint32_t ch = 0; - wchar_t w; - if (iLen > 1 && csEntity[1] == L'x') { - for (int32_t i = 2; i < iLen; i++) { - w = csEntity[i]; - if (w >= L'0' && w <= L'9') { - ch = (ch << 4) + w - L'0'; - } else if (w >= L'A' && w <= L'F') { - ch = (ch << 4) + w - 55; - } else if (w >= L'a' && w <= L'f') { - ch = (ch << 4) + w - 87; - } else { - break; - } - } - } else { - for (int32_t i = 1; i < iLen; i++) { - w = csEntity[i]; - if (w < L'0' || w > L'9') - break; - ch = ch * 10 + w - L'0'; - } - } - if (ch > kMaxCharRange) - ch = ' '; - - character = static_cast<wchar_t>(ch); - if (character != 0) { - m_BlockBuffer.SetTextChar(m_iEntityStart, character); - m_iEntityStart++; - } - } else { - if (csEntity.Compare(L"amp") == 0) { - m_BlockBuffer.SetTextChar(m_iEntityStart, L'&'); - m_iEntityStart++; - } else if (csEntity.Compare(L"lt") == 0) { - m_BlockBuffer.SetTextChar(m_iEntityStart, L'<'); - m_iEntityStart++; - } else if (csEntity.Compare(L"gt") == 0) { - m_BlockBuffer.SetTextChar(m_iEntityStart, L'>'); - m_iEntityStart++; - } else if (csEntity.Compare(L"apos") == 0) { - m_BlockBuffer.SetTextChar(m_iEntityStart, L'\''); - m_iEntityStart++; - } else if (csEntity.Compare(L"quot") == 0) { - m_BlockBuffer.SetTextChar(m_iEntityStart, L'\"'); - m_iEntityStart++; - } - } - } - m_BlockBuffer.DeleteTextChars(m_iDataLength - m_iEntityStart, false); - m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); - m_iEntityStart = -1; - } else { - if (m_iEntityStart < 0 && character == L'&') { - m_iEntityStart = m_iDataLength - 1; - } - } - m_pStart++; -} |