From 34f86b0c7816289d2a0b402d1909572f4e95613b Mon Sep 17 00:00:00 2001 From: dsinclair Date: Mon, 11 Jul 2016 08:42:33 -0700 Subject: Break xfa_parser_imp apart This CL splits the three parsers into individual files to make working with the code easier. Review-Url: https://codereview.chromium.org/2129963002 --- xfa/fxfa/parser/xfa_parser_imp.cpp | 1542 ------------------------------------ 1 file changed, 1542 deletions(-) delete mode 100644 xfa/fxfa/parser/xfa_parser_imp.cpp (limited to 'xfa/fxfa/parser/xfa_parser_imp.cpp') diff --git a/xfa/fxfa/parser/xfa_parser_imp.cpp b/xfa/fxfa/parser/xfa_parser_imp.cpp deleted file mode 100644 index e4387a7838..0000000000 --- a/xfa/fxfa/parser/xfa_parser_imp.cpp +++ /dev/null @@ -1,1542 +0,0 @@ -// Copyright 2014 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#include "xfa/fxfa/parser/xfa_parser_imp.h" - -#include - -#include "xfa/fde/xml/fde_xml_imp.h" -#include "xfa/fgas/crt/fgas_codepage.h" -#include "xfa/fxfa/include/xfa_checksum.h" -#include "xfa/fxfa/parser/xfa_basic_imp.h" -#include "xfa/fxfa/parser/xfa_doclayout.h" -#include "xfa/fxfa/parser/xfa_document.h" -#include "xfa/fxfa/parser/xfa_localemgr.h" -#include "xfa/fxfa/parser/xfa_object.h" -#include "xfa/fxfa/parser/xfa_script.h" -#include "xfa/fxfa/parser/xfa_utils.h" - -CXFA_SimpleParser::CXFA_SimpleParser(CXFA_Document* pFactory, - bool bDocumentParser) - : m_pXMLParser(nullptr), - m_pXMLDoc(nullptr), - m_pStream(nullptr), - m_pFileRead(nullptr), - m_pFactory(pFactory), - m_pRootNode(nullptr), - m_ePacketID(XFA_XDPPACKET_UNKNOWN), - m_bDocumentParser(bDocumentParser) {} - -CXFA_SimpleParser::~CXFA_SimpleParser() { - CloseParser(); -} - -void CXFA_SimpleParser::SetFactory(CXFA_Document* pFactory) { - m_pFactory = pFactory; -} - -static CFDE_XMLNode* XFA_FDEExtension_GetDocumentNode( - CFDE_XMLDoc* pXMLDoc, - FX_BOOL bVerifyWellFormness = FALSE) { - if (!pXMLDoc) { - return nullptr; - } - CFDE_XMLNode* pXMLFakeRoot = pXMLDoc->GetRoot(); - for (CFDE_XMLNode* pXMLNode = - pXMLFakeRoot->GetNodeItem(CFDE_XMLNode::FirstChild); - pXMLNode; pXMLNode = pXMLNode->GetNodeItem(CFDE_XMLNode::NextSibling)) { - if (pXMLNode->GetType() == FDE_XMLNODE_Element) { - if (bVerifyWellFormness) { - for (CFDE_XMLNode* pNextNode = - pXMLNode->GetNodeItem(CFDE_XMLNode::NextSibling); - pNextNode; - pNextNode = pNextNode->GetNodeItem(CFDE_XMLNode::NextSibling)) { - if (pNextNode->GetType() == FDE_XMLNODE_Element) { - return FALSE; - } - } - } - return pXMLNode; - } - } - return nullptr; -} -int32_t CXFA_SimpleParser::StartParse(IFX_FileRead* pStream, - XFA_XDPPACKET ePacketID) { - CloseParser(); - m_pFileRead = pStream; - m_pStream = IFX_Stream::CreateStream( - pStream, FX_STREAMACCESS_Read | FX_STREAMACCESS_Text); - if (!m_pStream) { - return XFA_PARSESTATUS_StreamErr; - } - uint16_t wCodePage = m_pStream->GetCodePage(); - if (wCodePage != FX_CODEPAGE_UTF16LE && wCodePage != FX_CODEPAGE_UTF16BE && - wCodePage != FX_CODEPAGE_UTF8) { - m_pStream->SetCodePage(FX_CODEPAGE_UTF8); - } - m_pXMLDoc = new CFDE_XMLDoc; - m_pXMLParser = new CXFA_XMLParser(m_pXMLDoc->GetRoot(), m_pStream); - if (!m_pXMLDoc->LoadXML(m_pXMLParser)) { - return XFA_PARSESTATUS_StatusErr; - } - m_ePacketID = ePacketID; - return XFA_PARSESTATUS_Ready; -} -int32_t CXFA_SimpleParser::DoParse(IFX_Pause* pPause) { - if (!m_pXMLDoc || m_ePacketID == XFA_XDPPACKET_UNKNOWN) { - return XFA_PARSESTATUS_StatusErr; - } - int32_t iRet = m_pXMLDoc->DoLoad(pPause); - if (iRet < 0) { - return XFA_PARSESTATUS_SyntaxErr; - } - if (iRet < 100) { - return iRet / 2; - } - m_pRootNode = ParseAsXDPPacket(XFA_FDEExtension_GetDocumentNode(m_pXMLDoc), - m_ePacketID); - m_pXMLDoc->CloseXML(); - if (m_pStream) { - m_pStream->Release(); - m_pStream = nullptr; - } - if (!m_pRootNode) { - return XFA_PARSESTATUS_StatusErr; - } - return XFA_PARSESTATUS_Done; -} -int32_t CXFA_SimpleParser::ParseXMLData(const CFX_WideString& wsXML, - CFDE_XMLNode*& pXMLNode, - IFX_Pause* pPause) { - CloseParser(); - pXMLNode = nullptr; - IFX_Stream* pStream = XFA_CreateWideTextRead(wsXML); - if (!pStream) { - return XFA_PARSESTATUS_StreamErr; - } - m_pStream = pStream; - m_pXMLDoc = new CFDE_XMLDoc; - CXFA_XMLParser* pParser = new CXFA_XMLParser(m_pXMLDoc->GetRoot(), m_pStream); - pParser->m_dwCheckStatus = 0x03; - if (!m_pXMLDoc->LoadXML(pParser)) { - return XFA_PARSESTATUS_StatusErr; - } - int32_t iRet = m_pXMLDoc->DoLoad(pPause); - if (iRet < 0 || iRet >= 100) { - m_pXMLDoc->CloseXML(); - } - if (iRet < 0) { - return XFA_PARSESTATUS_SyntaxErr; - } - if (iRet < 100) { - return iRet / 2; - } - if (m_pStream) { - m_pStream->Release(); - m_pStream = nullptr; - } - pXMLNode = XFA_FDEExtension_GetDocumentNode(m_pXMLDoc); - return XFA_PARSESTATUS_Done; -} - -void CXFA_SimpleParser::ConstructXFANode(CXFA_Node* pXFANode, - CFDE_XMLNode* pXMLNode) { - XFA_XDPPACKET ePacketID = (XFA_XDPPACKET)pXFANode->GetPacketID(); - if (ePacketID == XFA_XDPPACKET_Datasets) { - if (pXFANode->GetElementType() == XFA_Element::DataValue) { - for (CFDE_XMLNode* pXMLChild = - pXMLNode->GetNodeItem(CFDE_XMLNode::FirstChild); - pXMLChild; - pXMLChild = pXMLChild->GetNodeItem(CFDE_XMLNode::NextSibling)) { - FDE_XMLNODETYPE eNodeType = pXMLChild->GetType(); - if (eNodeType == FDE_XMLNODE_Instruction) - continue; - - if (eNodeType == FDE_XMLNODE_Element) { - CXFA_Node* pXFAChild = m_pFactory->CreateNode(XFA_XDPPACKET_Datasets, - XFA_Element::DataValue); - if (!pXFAChild) - return; - - CFX_WideString wsNodeStr; - CFDE_XMLElement* child = static_cast(pXMLChild); - child->GetLocalTagName(wsNodeStr); - pXFAChild->SetCData(XFA_ATTRIBUTE_Name, wsNodeStr); - CFX_WideString wsChildValue; - XFA_GetPlainTextFromRichText(child, wsChildValue); - if (!wsChildValue.IsEmpty()) - pXFAChild->SetCData(XFA_ATTRIBUTE_Value, wsChildValue); - - pXFANode->InsertChild(pXFAChild); - pXFAChild->SetXMLMappingNode(pXMLChild); - pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false); - break; - } - } - m_pRootNode = pXFANode; - } else { - m_pRootNode = DataLoader(pXFANode, pXMLNode, TRUE); - } - } else if (pXFANode->IsContentNode()) { - ParseContentNode(pXFANode, pXMLNode, ePacketID); - m_pRootNode = pXFANode; - } else { - m_pRootNode = NormalLoader(pXFANode, pXMLNode, ePacketID); - } -} - -CXFA_Node* CXFA_SimpleParser::GetRootNode() const { - return m_pRootNode; -} - -CFDE_XMLDoc* CXFA_SimpleParser::GetXMLDoc() const { - return m_pXMLDoc; -} - -FX_BOOL XFA_FDEExtension_ResolveNamespaceQualifier( - CFDE_XMLElement* pNode, - const CFX_WideStringC& wsQualifier, - CFX_WideString& wsNamespaceURI) { - if (!pNode) { - return FALSE; - } - CFDE_XMLNode* pFakeRoot = pNode->GetNodeItem(CFDE_XMLNode::Root); - CFX_WideString wsNSAttribute; - FX_BOOL bRet = FALSE; - if (wsQualifier.IsEmpty()) { - wsNSAttribute = FX_WSTRC(L"xmlns"); - bRet = TRUE; - } else { - wsNSAttribute = FX_WSTRC(L"xmlns:") + wsQualifier; - } - for (; pNode != pFakeRoot; pNode = static_cast( - pNode->GetNodeItem(CFDE_XMLNode::Parent))) { - if (pNode->GetType() != FDE_XMLNODE_Element) { - continue; - } - if (pNode->HasAttribute(wsNSAttribute.c_str())) { - pNode->GetString(wsNSAttribute.c_str(), wsNamespaceURI); - return TRUE; - } - } - wsNamespaceURI.clear(); - return bRet; -} -static inline void XFA_FDEExtension_GetElementTagNamespaceURI( - CFDE_XMLElement* pElement, - CFX_WideString& wsNamespaceURI) { - CFX_WideString wsNodeStr; - pElement->GetNamespacePrefix(wsNodeStr); - if (!XFA_FDEExtension_ResolveNamespaceQualifier( - pElement, wsNodeStr.AsStringC(), wsNamespaceURI)) { - wsNamespaceURI.clear(); - } -} -static FX_BOOL XFA_FDEExtension_MatchNodeName( - CFDE_XMLNode* pNode, - const CFX_WideStringC& wsLocalTagName, - const CFX_WideStringC& wsNamespaceURIPrefix, - uint32_t eMatchFlags = XFA_XDPPACKET_FLAGS_NOMATCH) { - if (!pNode || pNode->GetType() != FDE_XMLNODE_Element) { - return FALSE; - } - CFDE_XMLElement* pElement = reinterpret_cast(pNode); - CFX_WideString wsNodeStr; - pElement->GetLocalTagName(wsNodeStr); - if (wsNodeStr != wsLocalTagName) { - return FALSE; - } - XFA_FDEExtension_GetElementTagNamespaceURI(pElement, wsNodeStr); - if (eMatchFlags & XFA_XDPPACKET_FLAGS_NOMATCH) { - return TRUE; - } - if (eMatchFlags & XFA_XDPPACKET_FLAGS_PREFIXMATCH) { - return wsNodeStr.Left(wsNamespaceURIPrefix.GetLength()) == - wsNamespaceURIPrefix; - } - return wsNodeStr == wsNamespaceURIPrefix; -} -static FX_BOOL XFA_FDEExtension_GetAttributeLocalName( - const CFX_WideStringC& wsAttributeName, - CFX_WideString& wsLocalAttrName) { - CFX_WideString wsAttrName(wsAttributeName); - FX_STRSIZE iFind = wsAttrName.Find(L':', 0); - if (iFind < 0) { - wsLocalAttrName = wsAttrName; - return FALSE; - } else { - wsLocalAttrName = wsAttrName.Right(wsAttrName.GetLength() - iFind - 1); - return TRUE; - } -} -static FX_BOOL XFA_FDEExtension_ResolveAttribute( - CFDE_XMLElement* pElement, - const CFX_WideStringC& wsAttributeName, - CFX_WideString& wsLocalAttrName, - CFX_WideString& wsNamespaceURI) { - CFX_WideString wsAttrName(wsAttributeName); - CFX_WideString wsNSPrefix; - if (XFA_FDEExtension_GetAttributeLocalName(wsAttributeName, - wsLocalAttrName)) { - wsNSPrefix = wsAttrName.Left(wsAttributeName.GetLength() - - wsLocalAttrName.GetLength() - 1); - } - if (wsLocalAttrName == FX_WSTRC(L"xmlns") || - wsNSPrefix == FX_WSTRC(L"xmlns") || wsNSPrefix == FX_WSTRC(L"xml")) { - return FALSE; - } - if (!XFA_FDEExtension_ResolveNamespaceQualifier( - pElement, wsNSPrefix.AsStringC(), wsNamespaceURI)) { - wsNamespaceURI.clear(); - return FALSE; - } - return TRUE; -} -static FX_BOOL XFA_FDEExtension_FindAttributeWithNS( - CFDE_XMLElement* pElement, - const CFX_WideStringC& wsLocalAttributeName, - const CFX_WideStringC& wsNamespaceURIPrefix, - CFX_WideString& wsValue, - FX_BOOL bMatchNSAsPrefix = FALSE) { - if (!pElement) { - return FALSE; - } - CFX_WideString wsAttrName; - CFX_WideString wsAttrValue; - CFX_WideString wsAttrNS; - for (int32_t iAttrCount = pElement->CountAttributes(), i = 0; i < iAttrCount; - i++) { - pElement->GetAttribute(i, wsAttrName, wsAttrValue); - FX_STRSIZE iFind = wsAttrName.Find(L':', 0); - CFX_WideString wsNSPrefix; - if (iFind < 0) { - if (wsLocalAttributeName != wsAttrName) { - continue; - } - } else { - if (wsLocalAttributeName != - wsAttrName.Right(wsAttrName.GetLength() - iFind - 1)) { - continue; - } - wsNSPrefix = wsAttrName.Left(iFind); - } - if (!XFA_FDEExtension_ResolveNamespaceQualifier( - pElement, wsNSPrefix.AsStringC(), wsAttrNS)) { - continue; - } - if (bMatchNSAsPrefix) { - if (wsAttrNS.Left(wsNamespaceURIPrefix.GetLength()) != - wsNamespaceURIPrefix) { - continue; - } - } else { - if (wsAttrNS != wsNamespaceURIPrefix) { - continue; - } - } - wsValue = wsAttrValue; - return TRUE; - } - return FALSE; -} -CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket(CFDE_XMLNode* pXMLDocumentNode, - XFA_XDPPACKET ePacketID) { - switch (ePacketID) { - case XFA_XDPPACKET_UNKNOWN: - return nullptr; - case XFA_XDPPACKET_XDP: - return ParseAsXDPPacket_XDP(pXMLDocumentNode, ePacketID); - case XFA_XDPPACKET_Config: - return ParseAsXDPPacket_Config(pXMLDocumentNode, ePacketID); - case XFA_XDPPACKET_Template: - case XFA_XDPPACKET_Form: - return ParseAsXDPPacket_TemplateForm(pXMLDocumentNode, ePacketID); - case XFA_XDPPACKET_Datasets: - return ParseAsXDPPacket_Data(pXMLDocumentNode, ePacketID); - case XFA_XDPPACKET_Xdc: - return ParseAsXDPPacket_Xdc(pXMLDocumentNode, ePacketID); - case XFA_XDPPACKET_LocaleSet: - case XFA_XDPPACKET_ConnectionSet: - case XFA_XDPPACKET_SourceSet: - return ParseAsXDPPacket_LocaleConnectionSourceSet(pXMLDocumentNode, - ePacketID); - default: - return ParseAsXDPPacket_User(pXMLDocumentNode, ePacketID); - } -} -CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_XDP( - CFDE_XMLNode* pXMLDocumentNode, - XFA_XDPPACKET ePacketID) { - if (!XFA_FDEExtension_MatchNodeName( - pXMLDocumentNode, XFA_GetPacketByIndex(XFA_PACKET_XDP)->pName, - XFA_GetPacketByIndex(XFA_PACKET_XDP)->pURI, - XFA_GetPacketByIndex(XFA_PACKET_XDP)->eFlags)) { - return nullptr; - } - CXFA_Node* pXFARootNode = - m_pFactory->CreateNode(XFA_XDPPACKET_XDP, XFA_Element::Xfa); - if (!pXFARootNode) { - return nullptr; - } - m_pRootNode = pXFARootNode; - pXFARootNode->SetCData(XFA_ATTRIBUTE_Name, L"xfa"); - { - CFDE_XMLElement* pElement = static_cast(pXMLDocumentNode); - int32_t iAttributeCount = pElement->CountAttributes(); - for (int32_t i = 0; i < iAttributeCount; i++) { - CFX_WideString wsAttriName, wsAttriValue; - pElement->GetAttribute(i, wsAttriName, wsAttriValue); - if (wsAttriName == FX_WSTRC(L"uuid")) { - pXFARootNode->SetCData(XFA_ATTRIBUTE_Uuid, wsAttriValue); - } else if (wsAttriName == FX_WSTRC(L"timeStamp")) { - pXFARootNode->SetCData(XFA_ATTRIBUTE_TimeStamp, wsAttriValue); - } - } - } - CFDE_XMLNode* pXMLConfigDOMRoot = nullptr; - CXFA_Node* pXFAConfigDOMRoot = nullptr; - { - for (CFDE_XMLNode* pChildItem = - pXMLDocumentNode->GetNodeItem(CFDE_XMLNode::FirstChild); - pChildItem; - pChildItem = pChildItem->GetNodeItem(CFDE_XMLNode::NextSibling)) { - const XFA_PACKETINFO* pPacketInfo = - XFA_GetPacketByIndex(XFA_PACKET_Config); - if (!XFA_FDEExtension_MatchNodeName(pChildItem, pPacketInfo->pName, - pPacketInfo->pURI, - pPacketInfo->eFlags)) { - continue; - } - if (pXFARootNode->GetFirstChildByName(pPacketInfo->uHash)) { - return nullptr; - } - pXMLConfigDOMRoot = pChildItem; - pXFAConfigDOMRoot = - ParseAsXDPPacket_Config(pXMLConfigDOMRoot, XFA_XDPPACKET_Config); - pXFARootNode->InsertChild(pXFAConfigDOMRoot, nullptr); - } - } - CFDE_XMLNode* pXMLDatasetsDOMRoot = nullptr; - CFDE_XMLNode* pXMLFormDOMRoot = nullptr; - CFDE_XMLNode* pXMLTemplateDOMRoot = nullptr; - { - for (CFDE_XMLNode* pChildItem = - pXMLDocumentNode->GetNodeItem(CFDE_XMLNode::FirstChild); - pChildItem; - pChildItem = pChildItem->GetNodeItem(CFDE_XMLNode::NextSibling)) { - if (!pChildItem || pChildItem->GetType() != FDE_XMLNODE_Element) { - continue; - } - if (pChildItem == pXMLConfigDOMRoot) { - continue; - } - CFDE_XMLElement* pElement = - reinterpret_cast(pChildItem); - CFX_WideString wsPacketName; - pElement->GetLocalTagName(wsPacketName); - const XFA_PACKETINFO* pPacketInfo = - XFA_GetPacketByName(wsPacketName.AsStringC()); - if (pPacketInfo && pPacketInfo->pURI) { - if (!XFA_FDEExtension_MatchNodeName(pElement, pPacketInfo->pName, - pPacketInfo->pURI, - pPacketInfo->eFlags)) { - pPacketInfo = nullptr; - } - } - XFA_XDPPACKET ePacket = - pPacketInfo ? pPacketInfo->eName : XFA_XDPPACKET_USER; - if (ePacket == XFA_XDPPACKET_XDP) { - continue; - } - if (ePacket == XFA_XDPPACKET_Datasets) { - if (pXMLDatasetsDOMRoot) { - return nullptr; - } - pXMLDatasetsDOMRoot = pElement; - } else if (ePacket == XFA_XDPPACKET_Form) { - if (pXMLFormDOMRoot) { - return nullptr; - } - pXMLFormDOMRoot = pElement; - } else if (ePacket == XFA_XDPPACKET_Template) { - if (pXMLTemplateDOMRoot) { - // Found a duplicate template packet. - return nullptr; - } - CXFA_Node* pPacketNode = ParseAsXDPPacket(pElement, ePacket); - if (pPacketNode) { - pXMLTemplateDOMRoot = pElement; - pXFARootNode->InsertChild(pPacketNode); - } - } else { - CXFA_Node* pPacketNode = ParseAsXDPPacket(pElement, ePacket); - if (pPacketNode) { - if (pPacketInfo && - (pPacketInfo->eFlags & XFA_XDPPACKET_FLAGS_SUPPORTONE) && - pXFARootNode->GetFirstChildByName(pPacketInfo->uHash)) { - return nullptr; - } - pXFARootNode->InsertChild(pPacketNode); - } - } - } - } - if (!pXMLTemplateDOMRoot) { - // No template is found. - return nullptr; - } - if (pXMLDatasetsDOMRoot) { - CXFA_Node* pPacketNode = - ParseAsXDPPacket(pXMLDatasetsDOMRoot, XFA_XDPPACKET_Datasets); - if (pPacketNode) { - pXFARootNode->InsertChild(pPacketNode); - } - } - if (pXMLFormDOMRoot) { - CXFA_Node* pPacketNode = - ParseAsXDPPacket(pXMLFormDOMRoot, XFA_XDPPACKET_Form); - if (pPacketNode) { - pXFARootNode->InsertChild(pPacketNode); - } - } - pXFARootNode->SetXMLMappingNode(pXMLDocumentNode); - return pXFARootNode; -} -CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_Config( - CFDE_XMLNode* pXMLDocumentNode, - XFA_XDPPACKET ePacketID) { - if (!XFA_FDEExtension_MatchNodeName( - pXMLDocumentNode, XFA_GetPacketByIndex(XFA_PACKET_Config)->pName, - XFA_GetPacketByIndex(XFA_PACKET_Config)->pURI, - XFA_GetPacketByIndex(XFA_PACKET_Config)->eFlags)) { - return nullptr; - } - CXFA_Node* pNode = - m_pFactory->CreateNode(XFA_XDPPACKET_Config, XFA_Element::Config); - if (!pNode) { - return nullptr; - } - pNode->SetCData(XFA_ATTRIBUTE_Name, - XFA_GetPacketByIndex(XFA_PACKET_Config)->pName); - if (!NormalLoader(pNode, pXMLDocumentNode, ePacketID)) { - return nullptr; - } - pNode->SetXMLMappingNode(pXMLDocumentNode); - return pNode; -} -CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_TemplateForm( - CFDE_XMLNode* pXMLDocumentNode, - XFA_XDPPACKET ePacketID) { - CXFA_Node* pNode = nullptr; - if (ePacketID == XFA_XDPPACKET_Template) { - if (XFA_FDEExtension_MatchNodeName( - pXMLDocumentNode, XFA_GetPacketByIndex(XFA_PACKET_Template)->pName, - XFA_GetPacketByIndex(XFA_PACKET_Template)->pURI, - XFA_GetPacketByIndex(XFA_PACKET_Template)->eFlags)) { - pNode = - m_pFactory->CreateNode(XFA_XDPPACKET_Template, XFA_Element::Template); - if (!pNode) { - return nullptr; - } - pNode->SetCData(XFA_ATTRIBUTE_Name, - XFA_GetPacketByIndex(XFA_PACKET_Template)->pName); - if (m_bDocumentParser) { - CFX_WideString wsNamespaceURI; - CFDE_XMLElement* pXMLDocumentElement = - static_cast(pXMLDocumentNode); - pXMLDocumentElement->GetNamespaceURI(wsNamespaceURI); - if (wsNamespaceURI.IsEmpty()) { - pXMLDocumentElement->GetString(L"xmlns:xfa", wsNamespaceURI); - } - pNode->GetDocument()->RecognizeXFAVersionNumber(wsNamespaceURI); - } - if (!NormalLoader(pNode, pXMLDocumentNode, ePacketID)) { - return nullptr; - } - } - } else if (ePacketID == XFA_XDPPACKET_Form) { - if (XFA_FDEExtension_MatchNodeName( - pXMLDocumentNode, XFA_GetPacketByIndex(XFA_PACKET_Form)->pName, - XFA_GetPacketByIndex(XFA_PACKET_Form)->pURI, - XFA_GetPacketByIndex(XFA_PACKET_Form)->eFlags)) { - CFDE_XMLElement* pXMLDocumentElement = - static_cast(pXMLDocumentNode); - CFX_WideString wsChecksum; - pXMLDocumentElement->GetString(L"checksum", wsChecksum); - if (wsChecksum.GetLength() != 28 || - m_pXMLParser->m_dwCheckStatus != 0x03) { - return nullptr; - } - std::unique_ptr pChecksum(new CXFA_ChecksumContext); - pChecksum->StartChecksum(); - pChecksum->UpdateChecksum(m_pFileRead, m_pXMLParser->m_nStart[0], - m_pXMLParser->m_nSize[0]); - pChecksum->UpdateChecksum(m_pFileRead, m_pXMLParser->m_nStart[1], - m_pXMLParser->m_nSize[1]); - pChecksum->FinishChecksum(); - CFX_ByteString bsCheck = pChecksum->GetChecksum(); - if (bsCheck != wsChecksum.UTF8Encode()) - return nullptr; - - pNode = m_pFactory->CreateNode(XFA_XDPPACKET_Form, XFA_Element::Form); - if (!pNode) - return nullptr; - - pNode->SetCData(XFA_ATTRIBUTE_Name, - XFA_GetPacketByIndex(XFA_PACKET_Form)->pName); - pNode->SetAttribute(XFA_ATTRIBUTE_Checksum, wsChecksum.AsStringC()); - CXFA_Node* pTemplateRoot = - m_pRootNode->GetFirstChildByClass(XFA_Element::Template); - CXFA_Node* pTemplateChosen = - pTemplateRoot - ? pTemplateRoot->GetFirstChildByClass(XFA_Element::Subform) - : nullptr; - FX_BOOL bUseAttribute = TRUE; - if (pTemplateChosen && - pTemplateChosen->GetEnum(XFA_ATTRIBUTE_RestoreState) != - XFA_ATTRIBUTEENUM_Auto) { - bUseAttribute = FALSE; - } - if (!NormalLoader(pNode, pXMLDocumentNode, ePacketID, bUseAttribute)) { - return nullptr; - } - } - } - if (pNode) { - pNode->SetXMLMappingNode(pXMLDocumentNode); - } - return pNode; -} -static CFDE_XMLNode* XFA_GetDataSetsFromXDP(CFDE_XMLNode* pXMLDocumentNode) { - if (XFA_FDEExtension_MatchNodeName( - pXMLDocumentNode, XFA_GetPacketByIndex(XFA_PACKET_Datasets)->pName, - XFA_GetPacketByIndex(XFA_PACKET_Datasets)->pURI, - XFA_GetPacketByIndex(XFA_PACKET_Datasets)->eFlags)) { - return pXMLDocumentNode; - } - if (!XFA_FDEExtension_MatchNodeName( - pXMLDocumentNode, XFA_GetPacketByIndex(XFA_PACKET_XDP)->pName, - XFA_GetPacketByIndex(XFA_PACKET_XDP)->pURI, - XFA_GetPacketByIndex(XFA_PACKET_XDP)->eFlags)) { - return nullptr; - } - for (CFDE_XMLNode* pDatasetsNode = - pXMLDocumentNode->GetNodeItem(CFDE_XMLNode::FirstChild); - pDatasetsNode; - pDatasetsNode = pDatasetsNode->GetNodeItem(CFDE_XMLNode::NextSibling)) { - if (!XFA_FDEExtension_MatchNodeName( - pDatasetsNode, XFA_GetPacketByIndex(XFA_PACKET_Datasets)->pName, - XFA_GetPacketByIndex(XFA_PACKET_Datasets)->pURI, - XFA_GetPacketByIndex(XFA_PACKET_Datasets)->eFlags)) { - continue; - } - return pDatasetsNode; - } - return nullptr; -} -CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_Data( - CFDE_XMLNode* pXMLDocumentNode, - XFA_XDPPACKET ePacketID) { - CFDE_XMLNode* pDatasetsXMLNode = XFA_GetDataSetsFromXDP(pXMLDocumentNode); - if (pDatasetsXMLNode) { - CXFA_Node* pNode = - m_pFactory->CreateNode(XFA_XDPPACKET_Datasets, XFA_Element::DataModel); - if (!pNode) { - return nullptr; - } - pNode->SetCData(XFA_ATTRIBUTE_Name, - XFA_GetPacketByIndex(XFA_PACKET_Datasets)->pName); - if (!DataLoader(pNode, pDatasetsXMLNode, FALSE)) { - return nullptr; - } - pNode->SetXMLMappingNode(pDatasetsXMLNode); - return pNode; - } - CFDE_XMLNode* pDataXMLNode = nullptr; - if (XFA_FDEExtension_MatchNodeName( - pXMLDocumentNode, FX_WSTRC(L"data"), - XFA_GetPacketByIndex(XFA_PACKET_Datasets)->pURI, - XFA_GetPacketByIndex(XFA_PACKET_Datasets)->eFlags)) { - static_cast(pXMLDocumentNode) - ->RemoveAttribute(L"xmlns:xfa"); - pDataXMLNode = pXMLDocumentNode; - } else { - CFDE_XMLElement* pDataElement = new CFDE_XMLElement(L"xfa:data"); - CFDE_XMLNode* pParentXMLNode = - pXMLDocumentNode->GetNodeItem(CFDE_XMLNode::Parent); - if (pParentXMLNode) { - pParentXMLNode->RemoveChildNode(pXMLDocumentNode); - } - ASSERT(pXMLDocumentNode->GetType() == FDE_XMLNODE_Element); - if (pXMLDocumentNode->GetType() == FDE_XMLNODE_Element) { - static_cast(pXMLDocumentNode) - ->RemoveAttribute(L"xmlns:xfa"); - } - pDataElement->InsertChildNode(pXMLDocumentNode); - pDataXMLNode = pDataElement; - } - if (pDataXMLNode) { - CXFA_Node* pNode = - m_pFactory->CreateNode(XFA_XDPPACKET_Datasets, XFA_Element::DataGroup); - if (!pNode) { - if (pDataXMLNode != pXMLDocumentNode) { - pDataXMLNode->Release(); - } - return nullptr; - } - CFX_WideString wsLocalName; - static_cast(pDataXMLNode)->GetLocalTagName(wsLocalName); - pNode->SetCData(XFA_ATTRIBUTE_Name, wsLocalName); - if (!DataLoader(pNode, pDataXMLNode, TRUE)) { - return nullptr; - } - pNode->SetXMLMappingNode(pDataXMLNode); - if (pDataXMLNode != pXMLDocumentNode) { - pNode->SetFlag(XFA_NodeFlag_OwnXMLNode, false); - } - return pNode; - } - return nullptr; -} -CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_LocaleConnectionSourceSet( - CFDE_XMLNode* pXMLDocumentNode, - XFA_XDPPACKET ePacketID) { - CXFA_Node* pNode = nullptr; - if (ePacketID == XFA_XDPPACKET_LocaleSet) { - if (XFA_FDEExtension_MatchNodeName( - pXMLDocumentNode, XFA_GetPacketByIndex(XFA_PACKET_LocaleSet)->pName, - XFA_GetPacketByIndex(XFA_PACKET_LocaleSet)->pURI, - XFA_GetPacketByIndex(XFA_PACKET_LocaleSet)->eFlags)) { - pNode = m_pFactory->CreateNode(XFA_XDPPACKET_LocaleSet, - XFA_Element::LocaleSet); - if (!pNode) { - return nullptr; - } - pNode->SetCData(XFA_ATTRIBUTE_Name, - XFA_GetPacketByIndex(XFA_PACKET_LocaleSet)->pName); - if (!NormalLoader(pNode, pXMLDocumentNode, ePacketID)) { - return nullptr; - } - } - } else if (ePacketID == XFA_XDPPACKET_ConnectionSet) { - if (XFA_FDEExtension_MatchNodeName( - pXMLDocumentNode, - XFA_GetPacketByIndex(XFA_PACKET_ConnectionSet)->pName, - XFA_GetPacketByIndex(XFA_PACKET_ConnectionSet)->pURI, - XFA_GetPacketByIndex(XFA_PACKET_ConnectionSet)->eFlags)) { - pNode = m_pFactory->CreateNode(XFA_XDPPACKET_ConnectionSet, - XFA_Element::ConnectionSet); - if (!pNode) { - return nullptr; - } - pNode->SetCData(XFA_ATTRIBUTE_Name, - XFA_GetPacketByIndex(XFA_PACKET_ConnectionSet)->pName); - if (!NormalLoader(pNode, pXMLDocumentNode, ePacketID)) { - return nullptr; - } - } - } else if (ePacketID == XFA_XDPPACKET_SourceSet) { - if (XFA_FDEExtension_MatchNodeName( - pXMLDocumentNode, XFA_GetPacketByIndex(XFA_PACKET_SourceSet)->pName, - XFA_GetPacketByIndex(XFA_PACKET_SourceSet)->pURI, - XFA_GetPacketByIndex(XFA_PACKET_SourceSet)->eFlags)) { - pNode = m_pFactory->CreateNode(XFA_XDPPACKET_SourceSet, - XFA_Element::SourceSet); - if (!pNode) { - return nullptr; - } - pNode->SetCData(XFA_ATTRIBUTE_Name, - XFA_GetPacketByIndex(XFA_PACKET_SourceSet)->pName); - if (!NormalLoader(pNode, pXMLDocumentNode, ePacketID)) { - return nullptr; - } - } - } - if (pNode) { - pNode->SetXMLMappingNode(pXMLDocumentNode); - } - return pNode; -} -CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_Xdc( - CFDE_XMLNode* pXMLDocumentNode, - XFA_XDPPACKET ePacketID) { - if (XFA_FDEExtension_MatchNodeName( - pXMLDocumentNode, XFA_GetPacketByIndex(XFA_PACKET_Xdc)->pName, - XFA_GetPacketByIndex(XFA_PACKET_Xdc)->pURI, - XFA_GetPacketByIndex(XFA_PACKET_Xdc)->eFlags)) { - CXFA_Node* pNode = - m_pFactory->CreateNode(XFA_XDPPACKET_Xdc, XFA_Element::Xdc); - if (!pNode) { - return nullptr; - } - pNode->SetCData(XFA_ATTRIBUTE_Name, - XFA_GetPacketByIndex(XFA_PACKET_Xdc)->pName); - pNode->SetXMLMappingNode(pXMLDocumentNode); - return pNode; - } - return nullptr; -} -CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_User( - CFDE_XMLNode* pXMLDocumentNode, - XFA_XDPPACKET ePacketID) { - CXFA_Node* pNode = - m_pFactory->CreateNode(XFA_XDPPACKET_XDP, XFA_Element::Packet); - if (!pNode) { - return nullptr; - } - CFX_WideString wsName; - static_cast(pXMLDocumentNode)->GetLocalTagName(wsName); - pNode->SetCData(XFA_ATTRIBUTE_Name, wsName); - if (!UserPacketLoader(pNode, pXMLDocumentNode)) { - return nullptr; - } - pNode->SetXMLMappingNode(pXMLDocumentNode); - return pNode; -} -CXFA_Node* CXFA_SimpleParser::UserPacketLoader(CXFA_Node* pXFANode, - CFDE_XMLNode* pXMLDoc) { - return pXFANode; -} -static FX_BOOL XFA_FDEExtension_IsStringAllWhitespace(CFX_WideString wsText) { - wsText.TrimRight(L"\x20\x9\xD\xA"); - return wsText.IsEmpty(); -} -CXFA_Node* CXFA_SimpleParser::DataLoader(CXFA_Node* pXFANode, - CFDE_XMLNode* pXMLDoc, - FX_BOOL bDoTransform) { - ParseDataGroup(pXFANode, pXMLDoc, XFA_XDPPACKET_Datasets); - return pXFANode; -} -CXFA_Node* CXFA_SimpleParser::NormalLoader(CXFA_Node* pXFANode, - CFDE_XMLNode* pXMLDoc, - XFA_XDPPACKET ePacketID, - FX_BOOL bUseAttribute) { - FX_BOOL bOneOfPropertyFound = FALSE; - for (CFDE_XMLNode* pXMLChild = pXMLDoc->GetNodeItem(CFDE_XMLNode::FirstChild); - pXMLChild; - pXMLChild = pXMLChild->GetNodeItem(CFDE_XMLNode::NextSibling)) { - switch (pXMLChild->GetType()) { - case FDE_XMLNODE_Element: { - CFDE_XMLElement* pXMLElement = static_cast(pXMLChild); - CFX_WideString wsTagName; - pXMLElement->GetLocalTagName(wsTagName); - XFA_Element eType = XFA_GetElementTypeForName(wsTagName.AsStringC()); - if (eType == XFA_Element::Unknown) - continue; - - const XFA_PROPERTY* pPropertyInfo = XFA_GetPropertyOfElement( - pXFANode->GetElementType(), eType, ePacketID); - if (pPropertyInfo && - ((pPropertyInfo->uFlags & - (XFA_PROPERTYFLAG_OneOf | XFA_PROPERTYFLAG_DefaultOneOf)) != 0)) { - if (bOneOfPropertyFound) { - break; - } - bOneOfPropertyFound = TRUE; - } - CXFA_Node* pXFAChild = m_pFactory->CreateNode(ePacketID, eType); - if (!pXFAChild) - return nullptr; - if (ePacketID == XFA_XDPPACKET_Config) - pXFAChild->SetAttribute(XFA_ATTRIBUTE_Name, wsTagName.AsStringC()); - - FX_BOOL IsNeedValue = TRUE; - for (int32_t i = 0, count = pXMLElement->CountAttributes(); i < count; - i++) { - CFX_WideString wsAttrQualifiedName; - CFX_WideString wsAttrName; - CFX_WideString wsAttrValue; - pXMLElement->GetAttribute(i, wsAttrQualifiedName, wsAttrValue); - XFA_FDEExtension_GetAttributeLocalName( - wsAttrQualifiedName.AsStringC(), wsAttrName); - if (wsAttrName == FX_WSTRC(L"nil") && - wsAttrValue == FX_WSTRC(L"true")) { - IsNeedValue = FALSE; - } - const XFA_ATTRIBUTEINFO* lpAttrInfo = - XFA_GetAttributeByName(wsAttrName.AsStringC()); - if (!lpAttrInfo) { - continue; - } - if (!bUseAttribute && lpAttrInfo->eName != XFA_ATTRIBUTE_Name && - lpAttrInfo->eName != XFA_ATTRIBUTE_Save) { - continue; - } - pXFAChild->SetAttribute(lpAttrInfo->eName, wsAttrValue.AsStringC()); - } - pXFANode->InsertChild(pXFAChild); - if (eType == XFA_Element::Validate || eType == XFA_Element::Locale) { - if (ePacketID == XFA_XDPPACKET_Config) { - ParseContentNode(pXFAChild, pXMLElement, ePacketID); - } else { - NormalLoader(pXFAChild, pXMLElement, ePacketID, bUseAttribute); - } - break; - } - switch (pXFAChild->GetObjectType()) { - case XFA_ObjectType::ContentNode: - case XFA_ObjectType::TextNode: - case XFA_ObjectType::NodeC: - case XFA_ObjectType::NodeV: - if (IsNeedValue) { - ParseContentNode(pXFAChild, pXMLElement, ePacketID); - } - break; - default: - NormalLoader(pXFAChild, pXMLElement, ePacketID, bUseAttribute); - break; - } - } break; - case FDE_XMLNODE_Instruction: - ParseInstruction(pXFANode, static_cast(pXMLChild), - ePacketID); - break; - default: - break; - } - } - return pXFANode; -} -FX_BOOL XFA_RecognizeRichText(CFDE_XMLElement* pRichTextXMLNode) { - if (pRichTextXMLNode) { - CFX_WideString wsNamespaceURI; - XFA_FDEExtension_GetElementTagNamespaceURI(pRichTextXMLNode, - wsNamespaceURI); - if (wsNamespaceURI == FX_WSTRC(L"http://www.w3.org/1999/xhtml")) { - return TRUE; - } - } - return FALSE; -} -class RichTextNodeVisitor { - public: - static inline CFDE_XMLNode* GetFirstChild(CFDE_XMLNode* pNode) { - return pNode->GetNodeItem(CFDE_XMLNode::FirstChild); - } - static inline CFDE_XMLNode* GetNextSibling(CFDE_XMLNode* pNode) { - return pNode->GetNodeItem(CFDE_XMLNode::NextSibling); - } - static inline CFDE_XMLNode* GetParent(CFDE_XMLNode* pNode) { - return pNode->GetNodeItem(CFDE_XMLNode::Parent); - } -}; - -void XFA_ConvertXMLToPlainText(CFDE_XMLElement* pRootXMLNode, - CFX_WideString& wsOutput) { - for (CFDE_XMLNode* pXMLChild = - pRootXMLNode->GetNodeItem(CFDE_XMLNode::FirstChild); - pXMLChild; - pXMLChild = pXMLChild->GetNodeItem(CFDE_XMLNode::NextSibling)) { - switch (pXMLChild->GetType()) { - case FDE_XMLNODE_Element: { - CFX_WideString wsTextData; - static_cast(pXMLChild)->GetTextData(wsTextData); - wsTextData += FX_WSTRC(L"\n"); - wsOutput += wsTextData; - } break; - case FDE_XMLNODE_Text: { - CFX_WideString wsText; - static_cast(pXMLChild)->GetText(wsText); - if (XFA_FDEExtension_IsStringAllWhitespace(wsText)) { - continue; - } else { - wsOutput = wsText; - } - } break; - case FDE_XMLNODE_CharData: { - CFX_WideString wsCharData; - static_cast(pXMLChild)->GetCharData(wsCharData); - if (XFA_FDEExtension_IsStringAllWhitespace(wsCharData)) { - continue; - } else { - wsOutput = wsCharData; - } - } break; - default: - ASSERT(FALSE); - break; - } - } -} - -void CXFA_SimpleParser::ParseContentNode(CXFA_Node* pXFANode, - CFDE_XMLNode* pXMLNode, - XFA_XDPPACKET ePacketID) { - XFA_Element element = XFA_Element::Sharptext; - if (pXFANode->GetElementType() == XFA_Element::ExData) { - CFX_WideStringC wsContentType = - pXFANode->GetCData(XFA_ATTRIBUTE_ContentType); - if (wsContentType == FX_WSTRC(L"text/html")) - element = XFA_Element::SharpxHTML; - else if (wsContentType == FX_WSTRC(L"text/xml")) - element = XFA_Element::Sharpxml; - } - if (element == XFA_Element::SharpxHTML) - pXFANode->SetXMLMappingNode(pXMLNode); - - CFX_WideString wsValue; - for (CFDE_XMLNode* pXMLChild = - pXMLNode->GetNodeItem(CFDE_XMLNode::FirstChild); - pXMLChild; - pXMLChild = pXMLChild->GetNodeItem(CFDE_XMLNode::NextSibling)) { - FDE_XMLNODETYPE eNodeType = pXMLChild->GetType(); - if (eNodeType == FDE_XMLNODE_Instruction) - continue; - - if (element == XFA_Element::SharpxHTML) { - if (eNodeType != FDE_XMLNODE_Element) - break; - - if (XFA_RecognizeRichText(static_cast(pXMLChild))) - XFA_GetPlainTextFromRichText(static_cast(pXMLChild), - wsValue); - } else if (element == XFA_Element::Sharpxml) { - if (eNodeType != FDE_XMLNODE_Element) - break; - XFA_ConvertXMLToPlainText(static_cast(pXMLChild), - wsValue); - } else { - if (eNodeType == FDE_XMLNODE_Element) - break; - if (eNodeType == FDE_XMLNODE_Text) - static_cast(pXMLChild)->GetText(wsValue); - else if (eNodeType == FDE_XMLNODE_CharData) - static_cast(pXMLChild)->GetCharData(wsValue); - } - break; - } - if (!wsValue.IsEmpty()) { - if (pXFANode->IsContentNode()) { - CXFA_Node* pContentRawDataNode = - m_pFactory->CreateNode(ePacketID, element); - ASSERT(pContentRawDataNode); - pContentRawDataNode->SetCData(XFA_ATTRIBUTE_Value, wsValue); - pXFANode->InsertChild(pContentRawDataNode); - } else { - pXFANode->SetCData(XFA_ATTRIBUTE_Value, wsValue); - } - } -} - -void CXFA_SimpleParser::ParseDataGroup(CXFA_Node* pXFANode, - CFDE_XMLNode* pXMLNode, - XFA_XDPPACKET ePacketID) { - for (CFDE_XMLNode* pXMLChild = - pXMLNode->GetNodeItem(CFDE_XMLNode::FirstChild); - pXMLChild; - pXMLChild = pXMLChild->GetNodeItem(CFDE_XMLNode::NextSibling)) { - switch (pXMLChild->GetType()) { - case FDE_XMLNODE_Element: { - CFDE_XMLElement* pXMLElement = static_cast(pXMLChild); - { - CFX_WideString wsNamespaceURI; - XFA_FDEExtension_GetElementTagNamespaceURI(pXMLElement, - wsNamespaceURI); - if (wsNamespaceURI == - FX_WSTRC(L"http://www.xfa.com/schema/xfa-package/") || - wsNamespaceURI == - FX_WSTRC(L"http://www.xfa.org/schema/xfa-package/") || - wsNamespaceURI == - FX_WSTRC(L"http://www.w3.org/2001/XMLSchema-instance")) { - continue; - } - } - XFA_Element eNodeType = XFA_Element::DataModel; - if (eNodeType == XFA_Element::DataModel) { - CFX_WideString wsDataNodeAttr; - if (XFA_FDEExtension_FindAttributeWithNS( - pXMLElement, FX_WSTRC(L"dataNode"), - FX_WSTRC(L"http://www.xfa.org/schema/xfa-data/1.0/"), - wsDataNodeAttr)) { - if (wsDataNodeAttr == FX_WSTRC(L"dataGroup")) { - eNodeType = XFA_Element::DataGroup; - } else if (wsDataNodeAttr == FX_WSTRC(L"dataValue")) { - eNodeType = XFA_Element::DataValue; - } - } - } - CFX_WideString wsContentType; - if (eNodeType == XFA_Element::DataModel) { - if (XFA_FDEExtension_FindAttributeWithNS( - pXMLElement, FX_WSTRC(L"contentType"), - FX_WSTRC(L"http://www.xfa.org/schema/xfa-data/1.0/"), - wsContentType)) { - if (!wsContentType.IsEmpty()) { - eNodeType = XFA_Element::DataValue; - } - } - } - if (eNodeType == XFA_Element::DataModel) { - for (CFDE_XMLNode* pXMLDataChild = - pXMLElement->GetNodeItem(CFDE_XMLNode::FirstChild); - pXMLDataChild; pXMLDataChild = pXMLDataChild->GetNodeItem( - CFDE_XMLNode::NextSibling)) { - if (pXMLDataChild->GetType() == FDE_XMLNODE_Element) { - if (!XFA_RecognizeRichText( - static_cast(pXMLDataChild))) { - eNodeType = XFA_Element::DataGroup; - break; - } - } - } - } - if (eNodeType == XFA_Element::DataModel) { - eNodeType = XFA_Element::DataValue; - } - CXFA_Node* pXFAChild = - m_pFactory->CreateNode(XFA_XDPPACKET_Datasets, eNodeType); - if (!pXFAChild) { - return; - } - CFX_WideString wsNodeName; - pXMLElement->GetLocalTagName(wsNodeName); - pXFAChild->SetCData(XFA_ATTRIBUTE_Name, wsNodeName); - bool bNeedValue = true; - for (int32_t i = 0; i < pXMLElement->CountAttributes(); ++i) { - CFX_WideString wsQualifiedName; - CFX_WideString wsValue; - CFX_WideString wsName; - CFX_WideString wsNS; - pXMLElement->GetAttribute(i, wsQualifiedName, wsValue); - if (!XFA_FDEExtension_ResolveAttribute( - pXMLElement, wsQualifiedName.AsStringC(), wsName, wsNS)) { - continue; - } - if (wsName == FX_WSTRC(L"nil") && wsValue == FX_WSTRC(L"true")) { - bNeedValue = false; - continue; - } - if (wsNS == FX_WSTRC(L"http://www.xfa.com/schema/xfa-package/") || - wsNS == FX_WSTRC(L"http://www.xfa.org/schema/xfa-package/") || - wsNS == FX_WSTRC(L"http://www.w3.org/2001/XMLSchema-instance") || - wsNS == FX_WSTRC(L"http://www.xfa.org/schema/xfa-data/1.0/")) { - continue; - } - CXFA_Node* pXFAMetaData = m_pFactory->CreateNode( - XFA_XDPPACKET_Datasets, XFA_Element::DataValue); - if (!pXFAMetaData) { - return; - } - pXFAMetaData->SetCData(XFA_ATTRIBUTE_Name, wsName); - pXFAMetaData->SetCData(XFA_ATTRIBUTE_QualifiedName, wsQualifiedName); - pXFAMetaData->SetCData(XFA_ATTRIBUTE_Value, wsValue); - pXFAMetaData->SetEnum(XFA_ATTRIBUTE_Contains, - XFA_ATTRIBUTEENUM_MetaData); - pXFAChild->InsertChild(pXFAMetaData); - pXFAMetaData->SetXMLMappingNode(pXMLElement); - pXFAMetaData->SetFlag(XFA_NodeFlag_Initialized, false); - } - if (!bNeedValue) { - CFX_WideString wsNilName(L"xsi:nil"); - pXMLElement->RemoveAttribute(wsNilName.c_str()); - } - pXFANode->InsertChild(pXFAChild); - if (eNodeType == XFA_Element::DataGroup) { - ParseDataGroup(pXFAChild, pXMLElement, ePacketID); - } else if (bNeedValue) { - ParseDataValue(pXFAChild, pXMLChild, XFA_XDPPACKET_Datasets); - } - pXFAChild->SetXMLMappingNode(pXMLElement); - pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false); - continue; - } - case FDE_XMLNODE_CharData: { - CFDE_XMLCharData* pXMLCharData = - static_cast(pXMLChild); - CFX_WideString wsCharData; - pXMLCharData->GetCharData(wsCharData); - if (XFA_FDEExtension_IsStringAllWhitespace(wsCharData)) { - continue; - } - CXFA_Node* pXFAChild = m_pFactory->CreateNode(XFA_XDPPACKET_Datasets, - XFA_Element::DataValue); - if (!pXFAChild) { - return; - } - pXFAChild->SetCData(XFA_ATTRIBUTE_Value, wsCharData); - pXFANode->InsertChild(pXFAChild); - pXFAChild->SetXMLMappingNode(pXMLCharData); - pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false); - continue; - } - case FDE_XMLNODE_Text: { - CFDE_XMLText* pXMLText = static_cast(pXMLChild); - CFX_WideString wsText; - pXMLText->GetText(wsText); - if (XFA_FDEExtension_IsStringAllWhitespace(wsText)) { - continue; - } - CXFA_Node* pXFAChild = m_pFactory->CreateNode(XFA_XDPPACKET_Datasets, - XFA_Element::DataValue); - if (!pXFAChild) { - return; - } - pXFAChild->SetCData(XFA_ATTRIBUTE_Value, wsText); - pXFANode->InsertChild(pXFAChild); - pXFAChild->SetXMLMappingNode(pXMLText); - pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false); - continue; - } - default: - continue; - } - } -} - -void CXFA_SimpleParser::ParseDataValue(CXFA_Node* pXFANode, - CFDE_XMLNode* pXMLNode, - XFA_XDPPACKET ePacketID) { - CFX_WideTextBuf wsValueTextBuf; - CFX_WideTextBuf wsCurValueTextBuf; - FX_BOOL bMarkAsCompound = FALSE; - CFDE_XMLNode* pXMLCurValueNode = nullptr; - for (CFDE_XMLNode* pXMLChild = - pXMLNode->GetNodeItem(CFDE_XMLNode::FirstChild); - pXMLChild; - pXMLChild = pXMLChild->GetNodeItem(CFDE_XMLNode::NextSibling)) { - FDE_XMLNODETYPE eNodeType = pXMLChild->GetType(); - if (eNodeType == FDE_XMLNODE_Instruction) - continue; - - CFX_WideString wsText; - if (eNodeType == FDE_XMLNODE_Text) { - static_cast(pXMLChild)->GetText(wsText); - if (!pXMLCurValueNode) - pXMLCurValueNode = pXMLChild; - - wsCurValueTextBuf << wsText; - } else if (eNodeType == FDE_XMLNODE_CharData) { - static_cast(pXMLChild)->GetCharData(wsText); - if (!pXMLCurValueNode) - pXMLCurValueNode = pXMLChild; - - wsCurValueTextBuf << wsText; - } else if (XFA_RecognizeRichText( - static_cast(pXMLChild))) { - XFA_GetPlainTextFromRichText(static_cast(pXMLChild), - wsText); - if (!pXMLCurValueNode) - pXMLCurValueNode = pXMLChild; - - wsCurValueTextBuf << wsText; - } else { - bMarkAsCompound = TRUE; - if (pXMLCurValueNode) { - CFX_WideString wsCurValue = wsCurValueTextBuf.MakeString(); - if (!wsCurValue.IsEmpty()) { - CXFA_Node* pXFAChild = - m_pFactory->CreateNode(ePacketID, XFA_Element::DataValue); - if (!pXFAChild) - return; - - pXFAChild->SetCData(XFA_ATTRIBUTE_Name, L""); - pXFAChild->SetCData(XFA_ATTRIBUTE_Value, wsCurValue); - pXFANode->InsertChild(pXFAChild); - pXFAChild->SetXMLMappingNode(pXMLCurValueNode); - pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false); - wsValueTextBuf << wsCurValue; - wsCurValueTextBuf.Clear(); - } - pXMLCurValueNode = nullptr; - } - CXFA_Node* pXFAChild = - m_pFactory->CreateNode(ePacketID, XFA_Element::DataValue); - if (!pXFAChild) - return; - - CFX_WideString wsNodeStr; - static_cast(pXMLChild)->GetLocalTagName(wsNodeStr); - pXFAChild->SetCData(XFA_ATTRIBUTE_Name, wsNodeStr); - ParseDataValue(pXFAChild, pXMLChild, ePacketID); - pXFANode->InsertChild(pXFAChild); - pXFAChild->SetXMLMappingNode(pXMLChild); - pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false); - CFX_WideStringC wsCurValue = pXFAChild->GetCData(XFA_ATTRIBUTE_Value); - wsValueTextBuf << wsCurValue; - } - } - if (pXMLCurValueNode) { - CFX_WideString wsCurValue = wsCurValueTextBuf.MakeString(); - if (!wsCurValue.IsEmpty()) { - if (bMarkAsCompound) { - CXFA_Node* pXFAChild = - m_pFactory->CreateNode(ePacketID, XFA_Element::DataValue); - if (!pXFAChild) - return; - - pXFAChild->SetCData(XFA_ATTRIBUTE_Name, L""); - pXFAChild->SetCData(XFA_ATTRIBUTE_Value, wsCurValue); - pXFANode->InsertChild(pXFAChild); - pXFAChild->SetXMLMappingNode(pXMLCurValueNode); - pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false); - } - wsValueTextBuf << wsCurValue; - wsCurValueTextBuf.Clear(); - } - pXMLCurValueNode = nullptr; - } - CFX_WideString wsNodeValue = wsValueTextBuf.MakeString(); - pXFANode->SetCData(XFA_ATTRIBUTE_Value, wsNodeValue); -} - -void CXFA_SimpleParser::ParseInstruction(CXFA_Node* pXFANode, - CFDE_XMLInstruction* pXMLInstruction, - XFA_XDPPACKET ePacketID) { - if (!m_bDocumentParser) { - return; - } - CFX_WideString wsTargetName; - pXMLInstruction->GetTargetName(wsTargetName); - if (wsTargetName == FX_WSTRC(L"originalXFAVersion")) { - CFX_WideString wsData; - if (pXMLInstruction->GetData(0, wsData) && - (pXFANode->GetDocument()->RecognizeXFAVersionNumber(wsData) != - XFA_VERSION_UNKNOWN)) { - wsData.clear(); - if (pXMLInstruction->GetData(1, wsData) && - wsData == FX_WSTRC(L"v2.7-scripting:1")) { - pXFANode->GetDocument()->SetFlag(XFA_DOCFLAG_Scripting, TRUE); - } - } - } else if (wsTargetName == FX_WSTRC(L"acrobat")) { - CFX_WideString wsData; - if (pXMLInstruction->GetData(0, wsData) && - wsData == FX_WSTRC(L"JavaScript")) { - if (pXMLInstruction->GetData(1, wsData) && - wsData == FX_WSTRC(L"strictScoping")) { - pXFANode->GetDocument()->SetFlag(XFA_DOCFLAG_StrictScoping, TRUE); - } - } - } -} -void CXFA_SimpleParser::CloseParser() { - if (m_pXMLDoc) { - m_pXMLDoc->Release(); - m_pXMLDoc = nullptr; - } - if (m_pStream) { - m_pStream->Release(); - m_pStream = nullptr; - } -} - -CXFA_DocumentParser::CXFA_DocumentParser(CXFA_FFNotify* pNotify) - : m_nodeParser(nullptr, TRUE), m_pNotify(pNotify), m_pDocument(nullptr) {} - -CXFA_DocumentParser::~CXFA_DocumentParser() { - CloseParser(); -} - -int32_t CXFA_DocumentParser::StartParse(IFX_FileRead* pStream, - XFA_XDPPACKET ePacketID) { - CloseParser(); - int32_t nRetStatus = m_nodeParser.StartParse(pStream, ePacketID); - if (nRetStatus == XFA_PARSESTATUS_Ready) { - m_pDocument.reset(new CXFA_Document(this)); - m_nodeParser.SetFactory(m_pDocument.get()); - } - return nRetStatus; -} - -int32_t CXFA_DocumentParser::DoParse(IFX_Pause* pPause) { - int32_t nRetStatus = m_nodeParser.DoParse(pPause); - if (nRetStatus >= XFA_PARSESTATUS_Done) { - ASSERT(m_pDocument); - m_pDocument->SetRoot(m_nodeParser.GetRootNode()); - } - return nRetStatus; -} - -CFDE_XMLDoc* CXFA_DocumentParser::GetXMLDoc() const { - return m_nodeParser.GetXMLDoc(); -} - -CXFA_FFNotify* CXFA_DocumentParser::GetNotify() const { - return m_pNotify; -} - -CXFA_Document* CXFA_DocumentParser::GetDocument() const { - return m_pDocument.get(); -} - -void CXFA_DocumentParser::CloseParser() { - m_pDocument.reset(); - m_nodeParser.CloseParser(); -} - -CXFA_XMLParser::CXFA_XMLParser(CFDE_XMLNode* pRoot, IFX_Stream* pStream) - : m_nElementStart(0), - m_dwCheckStatus(0), - m_dwCurrentCheckStatus(0), - m_pRoot(pRoot), - m_pStream(pStream), - m_pParser(nullptr), - m_pParent(pRoot), - m_pChild(nullptr), - m_NodeStack(16), - m_syntaxParserResult(FDE_XmlSyntaxResult::None) { - ASSERT(m_pParent && m_pStream); - m_NodeStack.Push(m_pParent); - m_pParser = new CFDE_XMLSyntaxParser; - m_pParser->Init(m_pStream, 32 * 1024, 1024 * 1024); -} - -CXFA_XMLParser::~CXFA_XMLParser() { - if (m_pParser) { - m_pParser->Release(); - } - m_NodeStack.RemoveAll(); - m_ws1.clear(); - m_ws2.clear(); -} - -void CXFA_XMLParser::Release() { - delete this; -} - -int32_t CXFA_XMLParser::DoParser(IFX_Pause* pPause) { - if (m_syntaxParserResult == FDE_XmlSyntaxResult::Error) - return -1; - if (m_syntaxParserResult == FDE_XmlSyntaxResult::EndOfString) - return 100; - - int32_t iCount = 0; - while (TRUE) { - m_syntaxParserResult = m_pParser->DoSyntaxParse(); - switch (m_syntaxParserResult) { - case FDE_XmlSyntaxResult::InstructionOpen: - break; - case FDE_XmlSyntaxResult::InstructionClose: - if (m_pChild) { - if (m_pChild->GetType() != FDE_XMLNODE_Instruction) { - m_syntaxParserResult = FDE_XmlSyntaxResult::Error; - break; - } - } - m_pChild = m_pParent; - break; - case FDE_XmlSyntaxResult::ElementOpen: - if (m_dwCheckStatus != 0x03 && m_NodeStack.GetSize() == 2) { - m_nElementStart = m_pParser->GetCurrentPos() - 1; - } - break; - case FDE_XmlSyntaxResult::ElementBreak: - break; - case FDE_XmlSyntaxResult::ElementClose: - if (m_pChild->GetType() != FDE_XMLNODE_Element) { - m_syntaxParserResult = FDE_XmlSyntaxResult::Error; - break; - } - m_pParser->GetTagName(m_ws1); - static_cast(m_pChild)->GetTagName(m_ws2); - if (m_ws1.GetLength() > 0 && m_ws1 != m_ws2) { - m_syntaxParserResult = FDE_XmlSyntaxResult::Error; - break; - } - m_NodeStack.Pop(); - if (m_NodeStack.GetSize() < 1) { - m_syntaxParserResult = FDE_XmlSyntaxResult::Error; - break; - } else if (m_dwCurrentCheckStatus != 0 && m_NodeStack.GetSize() == 2) { - m_nSize[m_dwCurrentCheckStatus - 1] = - m_pParser->GetCurrentBinaryPos() - - m_nStart[m_dwCurrentCheckStatus - 1]; - m_dwCurrentCheckStatus = 0; - } - - m_pParent = static_cast(*m_NodeStack.GetTopElement()); - m_pChild = m_pParent; - iCount++; - break; - case FDE_XmlSyntaxResult::TargetName: - m_pParser->GetTargetName(m_ws1); - if (m_ws1 == FX_WSTRC(L"originalXFAVersion") || - m_ws1 == FX_WSTRC(L"acrobat")) { - m_pChild = new CFDE_XMLInstruction(m_ws1); - m_pParent->InsertChildNode(m_pChild); - } else { - m_pChild = nullptr; - } - m_ws1.clear(); - break; - case FDE_XmlSyntaxResult::TagName: - m_pParser->GetTagName(m_ws1); - m_pChild = new CFDE_XMLElement(m_ws1); - m_pParent->InsertChildNode(m_pChild); - m_NodeStack.Push(m_pChild); - m_pParent = m_pChild; - - if (m_dwCheckStatus != 0x03 && m_NodeStack.GetSize() == 3) { - CFX_WideString wsTag; - static_cast(m_pChild)->GetLocalTagName(wsTag); - if (wsTag == FX_WSTRC(L"template")) { - m_dwCheckStatus |= 0x01; - m_dwCurrentCheckStatus = 0x01; - m_nStart[0] = m_pParser->GetCurrentBinaryPos() - - (m_pParser->GetCurrentPos() - m_nElementStart); - } else if (wsTag == FX_WSTRC(L"datasets")) { - m_dwCheckStatus |= 0x02; - m_dwCurrentCheckStatus = 0x02; - m_nStart[1] = m_pParser->GetCurrentBinaryPos() - - (m_pParser->GetCurrentPos() - m_nElementStart); - } - } - break; - case FDE_XmlSyntaxResult::AttriName: - m_pParser->GetAttributeName(m_ws1); - break; - case FDE_XmlSyntaxResult::AttriValue: - if (m_pChild) { - m_pParser->GetAttributeName(m_ws2); - if (m_pChild->GetType() == FDE_XMLNODE_Element) { - static_cast(m_pChild)->SetString(m_ws1, m_ws2); - } - } - m_ws1.clear(); - break; - case FDE_XmlSyntaxResult::Text: - m_pParser->GetTextData(m_ws1); - m_pChild = new CFDE_XMLText(m_ws1); - m_pParent->InsertChildNode(m_pChild); - m_pChild = m_pParent; - break; - case FDE_XmlSyntaxResult::CData: - m_pParser->GetTextData(m_ws1); - m_pChild = new CFDE_XMLCharData(m_ws1); - m_pParent->InsertChildNode(m_pChild); - m_pChild = m_pParent; - break; - case FDE_XmlSyntaxResult::TargetData: - if (m_pChild) { - if (m_pChild->GetType() != FDE_XMLNODE_Instruction) { - m_syntaxParserResult = FDE_XmlSyntaxResult::Error; - break; - } - if (!m_ws1.IsEmpty()) { - static_cast(m_pChild)->AppendData(m_ws1); - } - m_pParser->GetTargetData(m_ws1); - static_cast(m_pChild)->AppendData(m_ws1); - } - m_ws1.clear(); - break; - default: - break; - } - if (m_syntaxParserResult == FDE_XmlSyntaxResult::Error || - m_syntaxParserResult == FDE_XmlSyntaxResult::EndOfString) { - break; - } - if (pPause && iCount > 500 && pPause->NeedToPauseNow()) { - break; - } - } - return (m_syntaxParserResult == FDE_XmlSyntaxResult::Error || - m_NodeStack.GetSize() != 1) - ? -1 - : m_pParser->GetStatus(); -} -- cgit v1.2.3