// Copyright 2014 PDFium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com #include "xfa/fxfa/parser/xfa_parser_imp.h" #include #include "xfa/fde/xml/fde_xml_imp.h" #include "xfa/fgas/crt/fgas_codepage.h" #include "xfa/fxfa/include/xfa_checksum.h" #include "xfa/fxfa/parser/xfa_basic_imp.h" #include "xfa/fxfa/parser/xfa_doclayout.h" #include "xfa/fxfa/parser/xfa_document.h" #include "xfa/fxfa/parser/xfa_localemgr.h" #include "xfa/fxfa/parser/xfa_object.h" #include "xfa/fxfa/parser/xfa_parser.h" #include "xfa/fxfa/parser/xfa_script.h" #include "xfa/fxfa/parser/xfa_utils.h" IXFA_Parser* IXFA_Parser::Create(CXFA_Document* pFactory, FX_BOOL bDocumentParser) { return new CXFA_SimpleParser(pFactory, bDocumentParser); } CXFA_SimpleParser::CXFA_SimpleParser(CXFA_Document* pFactory, FX_BOOL bDocumentParser) : m_pXMLParser(nullptr), m_pXMLDoc(nullptr), m_pStream(nullptr), m_pFileRead(nullptr), m_pFactory(pFactory), m_pRootNode(nullptr), m_ePacketID(XFA_XDPPACKET_UNKNOWN), m_bDocumentParser(bDocumentParser) {} CXFA_SimpleParser::~CXFA_SimpleParser() { CloseParser(); } void CXFA_SimpleParser::SetFactory(CXFA_Document* pFactory) { m_pFactory = pFactory; } static CFDE_XMLNode* XFA_FDEExtension_GetDocumentNode( CFDE_XMLDoc* pXMLDoc, FX_BOOL bVerifyWellFormness = FALSE) { if (!pXMLDoc) { return NULL; } CFDE_XMLNode* pXMLFakeRoot = pXMLDoc->GetRoot(); for (CFDE_XMLNode* pXMLNode = pXMLFakeRoot->GetNodeItem(CFDE_XMLNode::FirstChild); pXMLNode; pXMLNode = pXMLNode->GetNodeItem(CFDE_XMLNode::NextSibling)) { if (pXMLNode->GetType() == FDE_XMLNODE_Element) { if (bVerifyWellFormness) { for (CFDE_XMLNode* pNextNode = pXMLNode->GetNodeItem(CFDE_XMLNode::NextSibling); pNextNode; pNextNode = pNextNode->GetNodeItem(CFDE_XMLNode::NextSibling)) { if (pNextNode->GetType() == FDE_XMLNODE_Element) { return FALSE; } } } return pXMLNode; } } return NULL; } int32_t CXFA_SimpleParser::StartParse(IFX_FileRead* pStream, XFA_XDPPACKET ePacketID) { CloseParser(); m_pFileRead = pStream; m_pStream = IFX_Stream::CreateStream( pStream, FX_STREAMACCESS_Read | FX_STREAMACCESS_Text); if (m_pStream == NULL) { return XFA_PARSESTATUS_StreamErr; } uint16_t wCodePage = m_pStream->GetCodePage(); if (wCodePage != FX_CODEPAGE_UTF16LE && wCodePage != FX_CODEPAGE_UTF16BE && wCodePage != FX_CODEPAGE_UTF8) { m_pStream->SetCodePage(FX_CODEPAGE_UTF8); } m_pXMLDoc = new CFDE_XMLDoc; m_pXMLParser = new CXFA_XMLParser(m_pXMLDoc->GetRoot(), m_pStream); if (!m_pXMLDoc->LoadXML(m_pXMLParser)) { return XFA_PARSESTATUS_StatusErr; } m_ePacketID = ePacketID; return XFA_PARSESTATUS_Ready; } int32_t CXFA_SimpleParser::DoParse(IFX_Pause* pPause) { if (m_pXMLDoc == NULL || m_ePacketID == XFA_XDPPACKET_UNKNOWN) { return XFA_PARSESTATUS_StatusErr; } int32_t iRet = m_pXMLDoc->DoLoad(pPause); if (iRet < 0) { return XFA_PARSESTATUS_SyntaxErr; } if (iRet < 100) { return iRet / 2; } m_pRootNode = ParseAsXDPPacket(XFA_FDEExtension_GetDocumentNode(m_pXMLDoc), m_ePacketID); m_pXMLDoc->CloseXML(); if (m_pStream) { m_pStream->Release(); m_pStream = NULL; } if (!m_pRootNode) { return XFA_PARSESTATUS_StatusErr; } return XFA_PARSESTATUS_Done; } int32_t CXFA_SimpleParser::ParseXMLData(const CFX_WideString& wsXML, CFDE_XMLNode*& pXMLNode, IFX_Pause* pPause) { CloseParser(); pXMLNode = NULL; IFX_Stream* pStream = XFA_CreateWideTextRead(wsXML); if (!pStream) { return XFA_PARSESTATUS_StreamErr; } m_pStream = pStream; m_pXMLDoc = new CFDE_XMLDoc; CXFA_XMLParser* pParser = new CXFA_XMLParser(m_pXMLDoc->GetRoot(), m_pStream); pParser->m_dwCheckStatus = 0x03; if (!m_pXMLDoc->LoadXML(pParser)) { return XFA_PARSESTATUS_StatusErr; } int32_t iRet = m_pXMLDoc->DoLoad(pPause); if (iRet < 0 || iRet >= 100) { m_pXMLDoc->CloseXML(); } if (iRet < 0) { return XFA_PARSESTATUS_SyntaxErr; } if (iRet < 100) { return iRet / 2; } if (m_pStream) { m_pStream->Release(); m_pStream = NULL; } pXMLNode = XFA_FDEExtension_GetDocumentNode(m_pXMLDoc); return XFA_PARSESTATUS_Done; } void CXFA_SimpleParser::ConstructXFANode(CXFA_Node* pXFANode, CFDE_XMLNode* pXMLNode) { XFA_XDPPACKET ePacketID = (XFA_XDPPACKET)pXFANode->GetPacketID(); if (ePacketID == XFA_XDPPACKET_Datasets) { if (pXFANode->GetClassID() == XFA_ELEMENT_DataValue) { for (CFDE_XMLNode* pXMLChild = pXMLNode->GetNodeItem(CFDE_XMLNode::FirstChild); pXMLChild; pXMLChild = pXMLChild->GetNodeItem(CFDE_XMLNode::NextSibling)) { FDE_XMLNODETYPE eNodeType = pXMLChild->GetType(); if (eNodeType == FDE_XMLNODE_Instruction) continue; if (eNodeType == FDE_XMLNODE_Element) { CXFA_Node* pXFAChild = m_pFactory->CreateNode(XFA_XDPPACKET_Datasets, XFA_ELEMENT_DataValue); if (!pXFAChild) return; CFX_WideString wsNodeStr; CFDE_XMLElement* child = static_cast(pXMLChild); child->GetLocalTagName(wsNodeStr); pXFAChild->SetCData(XFA_ATTRIBUTE_Name, wsNodeStr); CFX_WideString wsChildValue; XFA_GetPlainTextFromRichText(child, wsChildValue); if (!wsChildValue.IsEmpty()) pXFAChild->SetCData(XFA_ATTRIBUTE_Value, wsChildValue); pXFANode->InsertChild(pXFAChild); pXFAChild->SetXMLMappingNode(pXMLChild); pXFAChild->SetFlag(XFA_NODEFLAG_Initialized, false); break; } } m_pRootNode = pXFANode; } else { m_pRootNode = DataLoader(pXFANode, pXMLNode, TRUE); } } else if (pXFANode->GetObjectType() == XFA_OBJECTTYPE_ContentNode) { ParseContentNode(pXFANode, pXMLNode, ePacketID); m_pRootNode = pXFANode; } else { m_pRootNode = NormalLoader(pXFANode, pXMLNode, ePacketID); } } FX_BOOL XFA_FDEExtension_ResolveNamespaceQualifier( CFDE_XMLElement* pNode, const CFX_WideStringC& wsQualifier, CFX_WideString& wsNamespaceURI) { if (!pNode) { return FALSE; } CFDE_XMLNode* pFakeRoot = pNode->GetNodeItem(CFDE_XMLNode::Root); CFX_WideString wsNSAttribute; FX_BOOL bRet = FALSE; if (wsQualifier.IsEmpty()) { wsNSAttribute = FX_WSTRC(L"xmlns"); bRet = TRUE; } else { wsNSAttribute = FX_WSTRC(L"xmlns:") + wsQualifier; } for (; pNode != pFakeRoot; pNode = static_cast( pNode->GetNodeItem(CFDE_XMLNode::Parent))) { if (pNode->GetType() != FDE_XMLNODE_Element) { continue; } if (pNode->HasAttribute(wsNSAttribute.c_str())) { pNode->GetString(wsNSAttribute.c_str(), wsNamespaceURI); return TRUE; } } wsNamespaceURI.clear(); return bRet; } static inline void XFA_FDEExtension_GetElementTagNamespaceURI( CFDE_XMLElement* pElement, CFX_WideString& wsNamespaceURI) { CFX_WideString wsNodeStr; pElement->GetNamespacePrefix(wsNodeStr); if (!XFA_FDEExtension_ResolveNamespaceQualifier( pElement, wsNodeStr.AsStringC(), wsNamespaceURI)) { wsNamespaceURI.clear(); } } static FX_BOOL XFA_FDEExtension_MatchNodeName( CFDE_XMLNode* pNode, const CFX_WideStringC& wsLocalTagName, const CFX_WideStringC& wsNamespaceURIPrefix, uint32_t eMatchFlags = XFA_XDPPACKET_FLAGS_NOMATCH) { if (!pNode || pNode->GetType() != FDE_XMLNODE_Element) { return FALSE; } CFDE_XMLElement* pElement = reinterpret_cast(pNode); CFX_WideString wsNodeStr; pElement->GetLocalTagName(wsNodeStr); if (wsNodeStr != wsLocalTagName) { return FALSE; } XFA_FDEExtension_GetElementTagNamespaceURI(pElement, wsNodeStr); if (eMatchFlags & XFA_XDPPACKET_FLAGS_NOMATCH) { return TRUE; } if (eMatchFlags & XFA_XDPPACKET_FLAGS_PREFIXMATCH) { return wsNodeStr.Left(wsNamespaceURIPrefix.GetLength()) == wsNamespaceURIPrefix; } return wsNodeStr == wsNamespaceURIPrefix; } static FX_BOOL XFA_FDEExtension_GetAttributeLocalName( const CFX_WideStringC& wsAttributeName, CFX_WideString& wsLocalAttrName) { CFX_WideString wsAttrName(wsAttributeName); FX_STRSIZE iFind = wsAttrName.Find(L':', 0); if (iFind < 0) { wsLocalAttrName = wsAttrName; return FALSE; } else { wsLocalAttrName = wsAttrName.Right(wsAttrName.GetLength() - iFind - 1); return TRUE; } } static FX_BOOL XFA_FDEExtension_ResolveAttribute( CFDE_XMLElement* pElement, const CFX_WideStringC& wsAttributeName, CFX_WideString& wsLocalAttrName, CFX_WideString& wsNamespaceURI) { CFX_WideString wsAttrName(wsAttributeName); CFX_WideString wsNSPrefix; if (XFA_FDEExtension_GetAttributeLocalName(wsAttributeName, wsLocalAttrName)) { wsNSPrefix = wsAttrName.Left(wsAttributeName.GetLength() - wsLocalAttrName.GetLength() - 1); } if (wsLocalAttrName == FX_WSTRC(L"xmlns") || wsNSPrefix == FX_WSTRC(L"xmlns") || wsNSPrefix == FX_WSTRC(L"xml")) { return FALSE; } if (!XFA_FDEExtension_ResolveNamespaceQualifier( pElement, wsNSPrefix.AsStringC(), wsNamespaceURI)) { wsNamespaceURI.clear(); return FALSE; } return TRUE; } static FX_BOOL XFA_FDEExtension_FindAttributeWithNS( CFDE_XMLElement* pElement, const CFX_WideStringC& wsLocalAttributeName, const CFX_WideStringC& wsNamespaceURIPrefix, CFX_WideString& wsValue, FX_BOOL bMatchNSAsPrefix = FALSE) { if (!pElement) { return FALSE; } CFX_WideString wsAttrName; CFX_WideString wsAttrValue; CFX_WideString wsAttrNS; for (int32_t iAttrCount = pElement->CountAttributes(), i = 0; i < iAttrCount; i++) { pElement->GetAttribute(i, wsAttrName, wsAttrValue); FX_STRSIZE iFind = wsAttrName.Find(L':', 0); CFX_WideString wsNSPrefix; if (iFind < 0) { if (wsLocalAttributeName != wsAttrName) { continue; } } else { if (wsLocalAttributeName != wsAttrName.Right(wsAttrName.GetLength() - iFind - 1)) { continue; } wsNSPrefix = wsAttrName.Left(iFind); } if (!XFA_FDEExtension_ResolveNamespaceQualifier( pElement, wsNSPrefix.AsStringC(), wsAttrNS)) { continue; } if (bMatchNSAsPrefix) { if (wsAttrNS.Left(wsNamespaceURIPrefix.GetLength()) != wsNamespaceURIPrefix) { continue; } } else { if (wsAttrNS != wsNamespaceURIPrefix) { continue; } } wsValue = wsAttrValue; return TRUE; } return FALSE; } CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket(CFDE_XMLNode* pXMLDocumentNode, XFA_XDPPACKET ePacketID) { switch (ePacketID) { case XFA_XDPPACKET_UNKNOWN: return NULL; case XFA_XDPPACKET_XDP: return ParseAsXDPPacket_XDP(pXMLDocumentNode, ePacketID); case XFA_XDPPACKET_Config: return ParseAsXDPPacket_Config(pXMLDocumentNode, ePacketID); case XFA_XDPPACKET_Template: case XFA_XDPPACKET_Form: return ParseAsXDPPacket_TemplateForm(pXMLDocumentNode, ePacketID); case XFA_XDPPACKET_Datasets: return ParseAsXDPPacket_Data(pXMLDocumentNode, ePacketID); case XFA_XDPPACKET_Xdc: return ParseAsXDPPacket_Xdc(pXMLDocumentNode, ePacketID); case XFA_XDPPACKET_LocaleSet: case XFA_XDPPACKET_ConnectionSet: case XFA_XDPPACKET_SourceSet: return ParseAsXDPPacket_LocaleConnectionSourceSet(pXMLDocumentNode, ePacketID); default: return ParseAsXDPPacket_User(pXMLDocumentNode, ePacketID); } } CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_XDP( CFDE_XMLNode* pXMLDocumentNode, XFA_XDPPACKET ePacketID) { if (!XFA_FDEExtension_MatchNodeName( pXMLDocumentNode, XFA_GetPacketByIndex(XFA_PACKET_XDP)->pName, XFA_GetPacketByIndex(XFA_PACKET_XDP)->pURI, XFA_GetPacketByIndex(XFA_PACKET_XDP)->eFlags)) { return nullptr; } CXFA_Node* pXFARootNode = m_pFactory->CreateNode(XFA_XDPPACKET_XDP, XFA_ELEMENT_Xfa); if (!pXFARootNode) { return nullptr; } m_pRootNode = pXFARootNode; pXFARootNode->SetCData(XFA_ATTRIBUTE_Name, L"xfa"); { CFDE_XMLElement* pElement = static_cast(pXMLDocumentNode); int32_t iAttributeCount = pElement->CountAttributes(); for (int32_t i = 0; i < iAttributeCount; i++) { CFX_WideString wsAttriName, wsAttriValue; pElement->GetAttribute(i, wsAttriName, wsAttriValue); if (wsAttriName == FX_WSTRC(L"uuid")) { pXFARootNode->SetCData(XFA_ATTRIBUTE_Uuid, wsAttriValue); } else if (wsAttriName == FX_WSTRC(L"timeStamp")) { pXFARootNode->SetCData(XFA_ATTRIBUTE_TimeStamp, wsAttriValue); } } } CFDE_XMLNode* pXMLConfigDOMRoot = nullptr; CXFA_Node* pXFAConfigDOMRoot = nullptr; { for (CFDE_XMLNode* pChildItem = pXMLDocumentNode->GetNodeItem(CFDE_XMLNode::FirstChild); pChildItem; pChildItem = pChildItem->GetNodeItem(CFDE_XMLNode::NextSibling)) { const XFA_PACKETINFO* pPacketInfo = XFA_GetPacketByIndex(XFA_PACKET_Config); if (!XFA_FDEExtension_MatchNodeName(pChildItem, pPacketInfo->pName, pPacketInfo->pURI, pPacketInfo->eFlags)) { continue; } if (pXFARootNode->GetFirstChildByName(pPacketInfo->uHash)) { return nullptr; } pXMLConfigDOMRoot = pChildItem; pXFAConfigDOMRoot = ParseAsXDPPacket_Config(pXMLConfigDOMRoot, XFA_XDPPACKET_Config); pXFARootNode->InsertChild(pXFAConfigDOMRoot, NULL); } } CFDE_XMLNode* pXMLDatasetsDOMRoot = nullptr; CFDE_XMLNode* pXMLFormDOMRoot = nullptr; CFDE_XMLNode* pXMLTemplateDOMRoot = nullptr; { for (CFDE_XMLNode* pChildItem = pXMLDocumentNode->GetNodeItem(CFDE_XMLNode::FirstChild); pChildItem; pChildItem = pChildItem->GetNodeItem(CFDE_XMLNode::NextSibling)) { if (!pChildItem || pChildItem->GetType() != FDE_XMLNODE_Element) { continue; } if (pChildItem == pXMLConfigDOMRoot) { continue; } CFDE_XMLElement* pElement = reinterpret_cast(pChildItem); CFX_WideString wsPacketName; pElement->GetLocalTagName(wsPacketName); const XFA_PACKETINFO* pPacketInfo = XFA_GetPacketByName(wsPacketName.AsStringC()); if (pPacketInfo && pPacketInfo->pURI) { if (!XFA_FDEExtension_MatchNodeName(pElement, pPacketInfo->pName, pPacketInfo->pURI, pPacketInfo->eFlags)) { pPacketInfo = nullptr; } } XFA_XDPPACKET ePacket = pPacketInfo ? pPacketInfo->eName : XFA_XDPPACKET_USER; if (ePacket == XFA_XDPPACKET_XDP) { continue; } if (ePacket == XFA_XDPPACKET_Datasets) { if (pXMLDatasetsDOMRoot) { return nullptr; } pXMLDatasetsDOMRoot = pElement; } else if (ePacket == XFA_XDPPACKET_Form) { if (pXMLFormDOMRoot) { return nullptr; } pXMLFormDOMRoot = pElement; } else if (ePacket == XFA_XDPPACKET_Template) { if (pXMLTemplateDOMRoot) { // Found a duplicate template packet. return nullptr; } CXFA_Node* pPacketNode = ParseAsXDPPacket(pElement, ePacket); if (pPacketNode) { pXMLTemplateDOMRoot = pElement; pXFARootNode->InsertChild(pPacketNode); } } else { CXFA_Node* pPacketNode = ParseAsXDPPacket(pElement, ePacket); if (pPacketNode) { if (pPacketInfo && (pPacketInfo->eFlags & XFA_XDPPACKET_FLAGS_SUPPORTONE) && pXFARootNode->GetFirstChildByName(pPacketInfo->uHash)) { return nullptr; } pXFARootNode->InsertChild(pPacketNode); } } } } if (!pXMLTemplateDOMRoot) { // No template is found. return nullptr; } if (pXMLDatasetsDOMRoot) { CXFA_Node* pPacketNode = ParseAsXDPPacket(pXMLDatasetsDOMRoot, XFA_XDPPACKET_Datasets); if (pPacketNode) { pXFARootNode->InsertChild(pPacketNode); } } if (pXMLFormDOMRoot) { CXFA_Node* pPacketNode = ParseAsXDPPacket(pXMLFormDOMRoot, XFA_XDPPACKET_Form); if (pPacketNode) { pXFARootNode->InsertChild(pPacketNode); } } pXFARootNode->SetXMLMappingNode(pXMLDocumentNode); return pXFARootNode; } CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_Config( CFDE_XMLNode* pXMLDocumentNode, XFA_XDPPACKET ePacketID) { if (!XFA_FDEExtension_MatchNodeName( pXMLDocumentNode, XFA_GetPacketByIndex(XFA_PACKET_Config)->pName, XFA_GetPacketByIndex(XFA_PACKET_Config)->pURI, XFA_GetPacketByIndex(XFA_PACKET_Config)->eFlags)) { return NULL; } CXFA_Node* pNode = m_pFactory->CreateNode(XFA_XDPPACKET_Config, XFA_ELEMENT_Config); if (!pNode) { return NULL; } pNode->SetCData(XFA_ATTRIBUTE_Name, XFA_GetPacketByIndex(XFA_PACKET_Config)->pName); if (!NormalLoader(pNode, pXMLDocumentNode, ePacketID)) { return NULL; } pNode->SetXMLMappingNode(pXMLDocumentNode); return pNode; } CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_TemplateForm( CFDE_XMLNode* pXMLDocumentNode, XFA_XDPPACKET ePacketID) { CXFA_Node* pNode = NULL; if (ePacketID == XFA_XDPPACKET_Template) { if (XFA_FDEExtension_MatchNodeName( pXMLDocumentNode, XFA_GetPacketByIndex(XFA_PACKET_Template)->pName, XFA_GetPacketByIndex(XFA_PACKET_Template)->pURI, XFA_GetPacketByIndex(XFA_PACKET_Template)->eFlags)) { pNode = m_pFactory->CreateNode(XFA_XDPPACKET_Template, XFA_ELEMENT_Template); if (!pNode) { return NULL; } pNode->SetCData(XFA_ATTRIBUTE_Name, XFA_GetPacketByIndex(XFA_PACKET_Template)->pName); if (m_bDocumentParser) { CFX_WideString wsNamespaceURI; CFDE_XMLElement* pXMLDocumentElement = static_cast(pXMLDocumentNode); pXMLDocumentElement->GetNamespaceURI(wsNamespaceURI); if (wsNamespaceURI.IsEmpty()) { pXMLDocumentElement->GetString(L"xmlns:xfa", wsNamespaceURI); } pNode->GetDocument()->RecognizeXFAVersionNumber(wsNamespaceURI); } if (!NormalLoader(pNode, pXMLDocumentNode, ePacketID)) { return NULL; } } } else if (ePacketID == XFA_XDPPACKET_Form) { if (XFA_FDEExtension_MatchNodeName( pXMLDocumentNode, XFA_GetPacketByIndex(XFA_PACKET_Form)->pName, XFA_GetPacketByIndex(XFA_PACKET_Form)->pURI, XFA_GetPacketByIndex(XFA_PACKET_Form)->eFlags)) { CFDE_XMLElement* pXMLDocumentElement = static_cast(pXMLDocumentNode); CFX_WideString wsChecksum; pXMLDocumentElement->GetString(L"checksum", wsChecksum); if (wsChecksum.GetLength() != 28 || m_pXMLParser->m_dwCheckStatus != 0x03) { return NULL; } std::unique_ptr pChecksum(new CXFA_ChecksumContext); pChecksum->StartChecksum(); pChecksum->UpdateChecksum(m_pFileRead, m_pXMLParser->m_nStart[0], m_pXMLParser->m_nSize[0]); pChecksum->UpdateChecksum(m_pFileRead, m_pXMLParser->m_nStart[1], m_pXMLParser->m_nSize[1]); pChecksum->FinishChecksum(); CFX_ByteString bsCheck = pChecksum->GetChecksum(); if (bsCheck != wsChecksum.UTF8Encode()) return nullptr; pNode = m_pFactory->CreateNode(XFA_XDPPACKET_Form, XFA_ELEMENT_Form); if (!pNode) return nullptr; pNode->SetCData(XFA_ATTRIBUTE_Name, XFA_GetPacketByIndex(XFA_PACKET_Form)->pName); pNode->SetAttribute(XFA_ATTRIBUTE_Checksum, wsChecksum.AsStringC()); CXFA_Node* pTemplateRoot = m_pRootNode->GetFirstChildByClass(XFA_ELEMENT_Template); CXFA_Node* pTemplateChosen = pTemplateRoot ? pTemplateRoot->GetFirstChildByClass(XFA_ELEMENT_Subform) : NULL; FX_BOOL bUseAttribute = TRUE; if (pTemplateChosen && pTemplateChosen->GetEnum(XFA_ATTRIBUTE_RestoreState) != XFA_ATTRIBUTEENUM_Auto) { bUseAttribute = FALSE; } if (!NormalLoader(pNode, pXMLDocumentNode, ePacketID, bUseAttribute)) { return NULL; } } } if (pNode) { pNode->SetXMLMappingNode(pXMLDocumentNode); } return pNode; } static CFDE_XMLNode* XFA_GetDataSetsFromXDP(CFDE_XMLNode* pXMLDocumentNode) { if (XFA_FDEExtension_MatchNodeName( pXMLDocumentNode, XFA_GetPacketByIndex(XFA_PACKET_Datasets)->pName, XFA_GetPacketByIndex(XFA_PACKET_Datasets)->pURI, XFA_GetPacketByIndex(XFA_PACKET_Datasets)->eFlags)) { return pXMLDocumentNode; } if (!XFA_FDEExtension_MatchNodeName( pXMLDocumentNode, XFA_GetPacketByIndex(XFA_PACKET_XDP)->pName, XFA_GetPacketByIndex(XFA_PACKET_XDP)->pURI, XFA_GetPacketByIndex(XFA_PACKET_XDP)->eFlags)) { return NULL; } for (CFDE_XMLNode* pDatasetsNode = pXMLDocumentNode->GetNodeItem(CFDE_XMLNode::FirstChild); pDatasetsNode; pDatasetsNode = pDatasetsNode->GetNodeItem(CFDE_XMLNode::NextSibling)) { if (!XFA_FDEExtension_MatchNodeName( pDatasetsNode, XFA_GetPacketByIndex(XFA_PACKET_Datasets)->pName, XFA_GetPacketByIndex(XFA_PACKET_Datasets)->pURI, XFA_GetPacketByIndex(XFA_PACKET_Datasets)->eFlags)) { continue; } return pDatasetsNode; } return NULL; } CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_Data( CFDE_XMLNode* pXMLDocumentNode, XFA_XDPPACKET ePacketID) { CFDE_XMLNode* pDatasetsXMLNode = XFA_GetDataSetsFromXDP(pXMLDocumentNode); if (pDatasetsXMLNode) { CXFA_Node* pNode = m_pFactory->CreateNode(XFA_XDPPACKET_Datasets, XFA_ELEMENT_DataModel); if (!pNode) { return NULL; } pNode->SetCData(XFA_ATTRIBUTE_Name, XFA_GetPacketByIndex(XFA_PACKET_Datasets)->pName); if (!DataLoader(pNode, pDatasetsXMLNode, FALSE)) { return NULL; } pNode->SetXMLMappingNode(pDatasetsXMLNode); return pNode; } CFDE_XMLNode* pDataXMLNode = NULL; if (XFA_FDEExtension_MatchNodeName( pXMLDocumentNode, FX_WSTRC(L"data"), XFA_GetPacketByIndex(XFA_PACKET_Datasets)->pURI, XFA_GetPacketByIndex(XFA_PACKET_Datasets)->eFlags)) { static_cast(pXMLDocumentNode) ->RemoveAttribute(L"xmlns:xfa"); pDataXMLNode = pXMLDocumentNode; } else { CFDE_XMLElement* pDataElement = new CFDE_XMLElement(L"xfa:data"); CFDE_XMLNode* pParentXMLNode = pXMLDocumentNode->GetNodeItem(CFDE_XMLNode::Parent); if (pParentXMLNode) { pParentXMLNode->RemoveChildNode(pXMLDocumentNode); } ASSERT(pXMLDocumentNode->GetType() == FDE_XMLNODE_Element); if (pXMLDocumentNode->GetType() == FDE_XMLNODE_Element) { static_cast(pXMLDocumentNode) ->RemoveAttribute(L"xmlns:xfa"); } pDataElement->InsertChildNode(pXMLDocumentNode); pDataXMLNode = pDataElement; } if (pDataXMLNode) { CXFA_Node* pNode = m_pFactory->CreateNode(XFA_XDPPACKET_Datasets, XFA_ELEMENT_DataGroup); if (!pNode) { if (pDataXMLNode != pXMLDocumentNode) { pDataXMLNode->Release(); } return NULL; } CFX_WideString wsLocalName; static_cast(pDataXMLNode)->GetLocalTagName(wsLocalName); pNode->SetCData(XFA_ATTRIBUTE_Name, wsLocalName); if (!DataLoader(pNode, pDataXMLNode, TRUE)) { return NULL; } pNode->SetXMLMappingNode(pDataXMLNode); if (pDataXMLNode != pXMLDocumentNode) { pNode->SetFlag(XFA_NODEFLAG_OwnXMLNode, false); } return pNode; } return NULL; } CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_LocaleConnectionSourceSet( CFDE_XMLNode* pXMLDocumentNode, XFA_XDPPACKET ePacketID) { CXFA_Node* pNode = NULL; if (ePacketID == XFA_XDPPACKET_LocaleSet) { if (XFA_FDEExtension_MatchNodeName( pXMLDocumentNode, XFA_GetPacketByIndex(XFA_PACKET_LocaleSet)->pName, XFA_GetPacketByIndex(XFA_PACKET_LocaleSet)->pURI, XFA_GetPacketByIndex(XFA_PACKET_LocaleSet)->eFlags)) { pNode = m_pFactory->CreateNode(XFA_XDPPACKET_LocaleSet, XFA_ELEMENT_LocaleSet); if (!pNode) { return NULL; } pNode->SetCData(XFA_ATTRIBUTE_Name, XFA_GetPacketByIndex(XFA_PACKET_LocaleSet)->pName); if (!NormalLoader(pNode, pXMLDocumentNode, ePacketID)) { return NULL; } } } else if (ePacketID == XFA_XDPPACKET_ConnectionSet) { if (XFA_FDEExtension_MatchNodeName( pXMLDocumentNode, XFA_GetPacketByIndex(XFA_PACKET_ConnectionSet)->pName, XFA_GetPacketByIndex(XFA_PACKET_ConnectionSet)->pURI, XFA_GetPacketByIndex(XFA_PACKET_ConnectionSet)->eFlags)) { pNode = m_pFactory->CreateNode(XFA_XDPPACKET_ConnectionSet, XFA_ELEMENT_ConnectionSet); if (!pNode) { return NULL; } pNode->SetCData(XFA_ATTRIBUTE_Name, XFA_GetPacketByIndex(XFA_PACKET_ConnectionSet)->pName); if (!NormalLoader(pNode, pXMLDocumentNode, ePacketID)) { return NULL; } } } else if (ePacketID == XFA_XDPPACKET_SourceSet) { if (XFA_FDEExtension_MatchNodeName( pXMLDocumentNode, XFA_GetPacketByIndex(XFA_PACKET_SourceSet)->pName, XFA_GetPacketByIndex(XFA_PACKET_SourceSet)->pURI, XFA_GetPacketByIndex(XFA_PACKET_SourceSet)->eFlags)) { pNode = m_pFactory->CreateNode(XFA_XDPPACKET_SourceSet, XFA_ELEMENT_SourceSet); if (!pNode) { return NULL; } pNode->SetCData(XFA_ATTRIBUTE_Name, XFA_GetPacketByIndex(XFA_PACKET_SourceSet)->pName); if (!NormalLoader(pNode, pXMLDocumentNode, ePacketID)) { return NULL; } } } if (pNode) { pNode->SetXMLMappingNode(pXMLDocumentNode); } return pNode; } CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_Xdc( CFDE_XMLNode* pXMLDocumentNode, XFA_XDPPACKET ePacketID) { if (XFA_FDEExtension_MatchNodeName( pXMLDocumentNode, XFA_GetPacketByIndex(XFA_PACKET_Xdc)->pName, XFA_GetPacketByIndex(XFA_PACKET_Xdc)->pURI, XFA_GetPacketByIndex(XFA_PACKET_Xdc)->eFlags)) { CXFA_Node* pNode = m_pFactory->CreateNode(XFA_XDPPACKET_Xdc, XFA_ELEMENT_Xdc); if (!pNode) { return NULL; } pNode->SetCData(XFA_ATTRIBUTE_Name, XFA_GetPacketByIndex(XFA_PACKET_Xdc)->pName); pNode->SetXMLMappingNode(pXMLDocumentNode); return pNode; } return NULL; } CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_User( CFDE_XMLNode* pXMLDocumentNode, XFA_XDPPACKET ePacketID) { CXFA_Node* pNode = m_pFactory->CreateNode(XFA_XDPPACKET_XDP, XFA_ELEMENT_Packet); if (!pNode) { return NULL; } CFX_WideString wsName; static_cast(pXMLDocumentNode)->GetLocalTagName(wsName); pNode->SetCData(XFA_ATTRIBUTE_Name, wsName); if (!UserPacketLoader(pNode, pXMLDocumentNode)) { return NULL; } pNode->SetXMLMappingNode(pXMLDocumentNode); return pNode; } CXFA_Node* CXFA_SimpleParser::UserPacketLoader(CXFA_Node* pXFANode, CFDE_XMLNode* pXMLDoc) { return pXFANode; } static FX_BOOL XFA_FDEExtension_IsStringAllWhitespace(CFX_WideString wsText) { wsText.TrimRight(L"\x20\x9\xD\xA"); return wsText.IsEmpty(); } CXFA_Node* CXFA_SimpleParser::DataLoader(CXFA_Node* pXFANode, CFDE_XMLNode* pXMLDoc, FX_BOOL bDoTransform) { ParseDataGroup(pXFANode, pXMLDoc, XFA_XDPPACKET_Datasets); return pXFANode; } CXFA_Node* CXFA_SimpleParser::NormalLoader(CXFA_Node* pXFANode, CFDE_XMLNode* pXMLDoc, XFA_XDPPACKET ePacketID, FX_BOOL bUseAttribute) { FX_BOOL bOneOfPropertyFound = FALSE; for (CFDE_XMLNode* pXMLChild = pXMLDoc->GetNodeItem(CFDE_XMLNode::FirstChild); pXMLChild; pXMLChild = pXMLChild->GetNodeItem(CFDE_XMLNode::NextSibling)) { switch (pXMLChild->GetType()) { case FDE_XMLNODE_Element: { CFDE_XMLElement* pXMLElement = static_cast(pXMLChild); CFX_WideString wsTagName; pXMLElement->GetLocalTagName(wsTagName); const XFA_ELEMENTINFO* pElemInfo = XFA_GetElementByName(wsTagName.AsStringC()); if (!pElemInfo) { continue; } const XFA_PROPERTY* pPropertyInfo = XFA_GetPropertyOfElement( pXFANode->GetClassID(), pElemInfo->eName, ePacketID); if (pPropertyInfo && ((pPropertyInfo->uFlags & (XFA_PROPERTYFLAG_OneOf | XFA_PROPERTYFLAG_DefaultOneOf)) != 0)) { if (bOneOfPropertyFound) { break; } bOneOfPropertyFound = TRUE; } CXFA_Node* pXFAChild = m_pFactory->CreateNode(ePacketID, pElemInfo->eName); if (pXFAChild == NULL) { return NULL; } if (ePacketID == XFA_XDPPACKET_Config) { pXFAChild->SetAttribute(XFA_ATTRIBUTE_Name, wsTagName.AsStringC()); } FX_BOOL IsNeedValue = TRUE; for (int32_t i = 0, count = pXMLElement->CountAttributes(); i < count; i++) { CFX_WideString wsAttrQualifiedName; CFX_WideString wsAttrName; CFX_WideString wsAttrValue; pXMLElement->GetAttribute(i, wsAttrQualifiedName, wsAttrValue); XFA_FDEExtension_GetAttributeLocalName( wsAttrQualifiedName.AsStringC(), wsAttrName); if (wsAttrName == FX_WSTRC(L"nil") && wsAttrValue == FX_WSTRC(L"true")) { IsNeedValue = FALSE; } const XFA_ATTRIBUTEINFO* lpAttrInfo = XFA_GetAttributeByName(wsAttrName.AsStringC()); if (!lpAttrInfo) { continue; } if (!bUseAttribute && lpAttrInfo->eName != XFA_ATTRIBUTE_Name && lpAttrInfo->eName != XFA_ATTRIBUTE_Save) { continue; } pXFAChild->SetAttribute(lpAttrInfo->eName, wsAttrValue.AsStringC()); } pXFANode->InsertChild(pXFAChild); if (pElemInfo->eName == XFA_ELEMENT_Validate || pElemInfo->eName == XFA_ELEMENT_Locale) { if (ePacketID == XFA_XDPPACKET_Config) { ParseContentNode(pXFAChild, pXMLElement, ePacketID); } else { NormalLoader(pXFAChild, pXMLElement, ePacketID, bUseAttribute); } break; } switch (pXFAChild->GetObjectType()) { case XFA_OBJECTTYPE_ContentNode: case XFA_OBJECTTYPE_TextNode: case XFA_OBJECTTYPE_NodeC: case XFA_OBJECTTYPE_NodeV: if (IsNeedValue) { ParseContentNode(pXFAChild, pXMLElement, ePacketID); } break; default: NormalLoader(pXFAChild, pXMLElement, ePacketID, bUseAttribute); break; } } break; case FDE_XMLNODE_Instruction: ParseInstruction(pXFANode, static_cast(pXMLChild), ePacketID); break; default: break; } } return pXFANode; } FX_BOOL XFA_RecognizeRichText(CFDE_XMLElement* pRichTextXMLNode) { if (pRichTextXMLNode) { CFX_WideString wsNamespaceURI; XFA_FDEExtension_GetElementTagNamespaceURI(pRichTextXMLNode, wsNamespaceURI); if (wsNamespaceURI == FX_WSTRC(L"http://www.w3.org/1999/xhtml")) { return TRUE; } } return FALSE; } class RichTextNodeVisitor { public: static inline CFDE_XMLNode* GetFirstChild(CFDE_XMLNode* pNode) { return pNode->GetNodeItem(CFDE_XMLNode::FirstChild); } static inline CFDE_XMLNode* GetNextSibling(CFDE_XMLNode* pNode) { return pNode->GetNodeItem(CFDE_XMLNode::NextSibling); } static inline CFDE_XMLNode* GetParent(CFDE_XMLNode* pNode) { return pNode->GetNodeItem(CFDE_XMLNode::Parent); } }; void XFA_ConvertXMLToPlainText(CFDE_XMLElement* pRootXMLNode, CFX_WideString& wsOutput) { for (CFDE_XMLNode* pXMLChild = pRootXMLNode->GetNodeItem(CFDE_XMLNode::FirstChild); pXMLChild; pXMLChild = pXMLChild->GetNodeItem(CFDE_XMLNode::NextSibling)) { switch (pXMLChild->GetType()) { case FDE_XMLNODE_Element: { CFX_WideString wsTextData; static_cast(pXMLChild)->GetTextData(wsTextData); wsTextData += FX_WSTRC(L"\n"); wsOutput += wsTextData; } break; case FDE_XMLNODE_Text: { CFX_WideString wsText; static_cast(pXMLChild)->GetText(wsText); if (XFA_FDEExtension_IsStringAllWhitespace(wsText)) { continue; } else { wsOutput = wsText; } } break; case FDE_XMLNODE_CharData: { CFX_WideString wsCharData; static_cast(pXMLChild)->GetCharData(wsCharData); if (XFA_FDEExtension_IsStringAllWhitespace(wsCharData)) { continue; } else { wsOutput = wsCharData; } } break; default: ASSERT(FALSE); break; } } } void CXFA_SimpleParser::ParseContentNode(CXFA_Node* pXFANode, CFDE_XMLNode* pXMLNode, XFA_XDPPACKET ePacketID) { XFA_ELEMENT element = XFA_ELEMENT_Sharptext; if (pXFANode->GetClassID() == XFA_ELEMENT_ExData) { CFX_WideStringC wsContentType = pXFANode->GetCData(XFA_ATTRIBUTE_ContentType); if (wsContentType == FX_WSTRC(L"text/html")) element = XFA_ELEMENT_SharpxHTML; else if (wsContentType == FX_WSTRC(L"text/xml")) element = XFA_ELEMENT_Sharpxml; } if (element == XFA_ELEMENT_SharpxHTML) pXFANode->SetXMLMappingNode(pXMLNode); CFX_WideString wsValue; for (CFDE_XMLNode* pXMLChild = pXMLNode->GetNodeItem(CFDE_XMLNode::FirstChild); pXMLChild; pXMLChild = pXMLChild->GetNodeItem(CFDE_XMLNode::NextSibling)) { FDE_XMLNODETYPE eNodeType = pXMLChild->GetType(); if (eNodeType == FDE_XMLNODE_Instruction) continue; if (element == XFA_ELEMENT_SharpxHTML) { if (eNodeType != FDE_XMLNODE_Element) break; if (XFA_RecognizeRichText(static_cast(pXMLChild))) XFA_GetPlainTextFromRichText(static_cast(pXMLChild), wsValue); } else if (element == XFA_ELEMENT_Sharpxml) { if (eNodeType != FDE_XMLNODE_Element) break; XFA_ConvertXMLToPlainText(static_cast(pXMLChild), wsValue); } else { if (eNodeType == FDE_XMLNODE_Element) break; if (eNodeType == FDE_XMLNODE_Text) static_cast(pXMLChild)->GetText(wsValue); else if (eNodeType == FDE_XMLNODE_CharData) static_cast(pXMLChild)->GetCharData(wsValue); } break; } if (!wsValue.IsEmpty()) { if (pXFANode->GetObjectType() == XFA_OBJECTTYPE_ContentNode) { CXFA_Node* pContentRawDataNode = m_pFactory->CreateNode(ePacketID, element); ASSERT(pContentRawDataNode); pContentRawDataNode->SetCData(XFA_ATTRIBUTE_Value, wsValue); pXFANode->InsertChild(pContentRawDataNode); } else { pXFANode->SetCData(XFA_ATTRIBUTE_Value, wsValue); } } } void CXFA_SimpleParser::ParseDataGroup(CXFA_Node* pXFANode, CFDE_XMLNode* pXMLNode, XFA_XDPPACKET ePacketID) { for (CFDE_XMLNode* pXMLChild = pXMLNode->GetNodeItem(CFDE_XMLNode::FirstChild); pXMLChild; pXMLChild = pXMLChild->GetNodeItem(CFDE_XMLNode::NextSibling)) { switch (pXMLChild->GetType()) { case FDE_XMLNODE_Element: { CFDE_XMLElement* pXMLElement = static_cast(pXMLChild); { CFX_WideString wsNamespaceURI; XFA_FDEExtension_GetElementTagNamespaceURI(pXMLElement, wsNamespaceURI); if (wsNamespaceURI == FX_WSTRC(L"http://www.xfa.com/schema/xfa-package/") || wsNamespaceURI == FX_WSTRC(L"http://www.xfa.org/schema/xfa-package/") || wsNamespaceURI == FX_WSTRC(L"http://www.w3.org/2001/XMLSchema-instance")) { continue; } if (0) { continue; } } XFA_ELEMENT eNodeType = XFA_ELEMENT_DataModel; if (eNodeType == XFA_ELEMENT_DataModel) { CFX_WideString wsDataNodeAttr; if (XFA_FDEExtension_FindAttributeWithNS( pXMLElement, FX_WSTRC(L"dataNode"), FX_WSTRC(L"http://www.xfa.org/schema/xfa-data/1.0/"), wsDataNodeAttr)) { if (wsDataNodeAttr == FX_WSTRC(L"dataGroup")) { eNodeType = XFA_ELEMENT_DataGroup; } else if (wsDataNodeAttr == FX_WSTRC(L"dataValue")) { eNodeType = XFA_ELEMENT_DataValue; } } } CFX_WideString wsContentType; if (eNodeType == XFA_ELEMENT_DataModel) { if (XFA_FDEExtension_FindAttributeWithNS( pXMLElement, FX_WSTRC(L"contentType"), FX_WSTRC(L"http://www.xfa.org/schema/xfa-data/1.0/"), wsContentType)) { if (!wsContentType.IsEmpty()) { eNodeType = XFA_ELEMENT_DataValue; } } } if (eNodeType == XFA_ELEMENT_DataModel) { for (CFDE_XMLNode* pXMLDataChild = pXMLElement->GetNodeItem(CFDE_XMLNode::FirstChild); pXMLDataChild; pXMLDataChild = pXMLDataChild->GetNodeItem( CFDE_XMLNode::NextSibling)) { if (pXMLDataChild->GetType() == FDE_XMLNODE_Element) { if (!XFA_RecognizeRichText( static_cast(pXMLDataChild))) { eNodeType = XFA_ELEMENT_DataGroup; break; } } } } if (eNodeType == XFA_ELEMENT_DataModel) { eNodeType = XFA_ELEMENT_DataValue; } CXFA_Node* pXFAChild = m_pFactory->CreateNode(XFA_XDPPACKET_Datasets, eNodeType); if (pXFAChild == NULL) { return; } CFX_WideString wsNodeName; pXMLElement->GetLocalTagName(wsNodeName); pXFAChild->SetCData(XFA_ATTRIBUTE_Name, wsNodeName); FX_BOOL bNeedValue = TRUE; if (1) { for (int32_t i = 0, count = pXMLElement->CountAttributes(); i < count; i++) { CFX_WideString wsAttrQualifiedName; CFX_WideString wsAttrValue; CFX_WideString wsAttrName; CFX_WideString wsAttrNamespaceURI; pXMLElement->GetAttribute(i, wsAttrQualifiedName, wsAttrValue); if (!XFA_FDEExtension_ResolveAttribute( pXMLElement, wsAttrQualifiedName.AsStringC(), wsAttrName, wsAttrNamespaceURI)) { continue; } if (wsAttrName == FX_WSTRC(L"nil") && wsAttrValue == FX_WSTRC(L"true")) { bNeedValue = FALSE; continue; } if (wsAttrNamespaceURI == FX_WSTRC(L"http://www.xfa.com/schema/xfa-package/") || wsAttrNamespaceURI == FX_WSTRC(L"http://www.xfa.org/schema/xfa-package/") || wsAttrNamespaceURI == FX_WSTRC(L"http://www.w3.org/2001/XMLSchema-instance") || wsAttrNamespaceURI == FX_WSTRC(L"http://www.xfa.org/schema/xfa-data/1.0/")) { continue; } if (0) { continue; } CXFA_Node* pXFAMetaData = m_pFactory->CreateNode( XFA_XDPPACKET_Datasets, XFA_ELEMENT_DataValue); if (pXFAMetaData == NULL) { return; } pXFAMetaData->SetCData(XFA_ATTRIBUTE_Name, wsAttrName); pXFAMetaData->SetCData(XFA_ATTRIBUTE_QualifiedName, wsAttrQualifiedName); pXFAMetaData->SetCData(XFA_ATTRIBUTE_Value, wsAttrValue); pXFAMetaData->SetEnum(XFA_ATTRIBUTE_Contains, XFA_ATTRIBUTEENUM_MetaData); pXFAChild->InsertChild(pXFAMetaData); pXFAMetaData->SetXMLMappingNode(pXMLElement); pXFAMetaData->SetFlag(XFA_NODEFLAG_Initialized, false); } if (!bNeedValue) { CFX_WideString wsNilName(L"xsi:nil"); pXMLElement->RemoveAttribute(wsNilName.c_str()); } } pXFANode->InsertChild(pXFAChild); if (eNodeType == XFA_ELEMENT_DataGroup) { ParseDataGroup(pXFAChild, pXMLElement, ePacketID); } else { if (bNeedValue) { ParseDataValue(pXFAChild, pXMLChild, XFA_XDPPACKET_Datasets); } } pXFAChild->SetXMLMappingNode(pXMLElement); pXFAChild->SetFlag(XFA_NODEFLAG_Initialized, false); } continue; case FDE_XMLNODE_CharData: { CFDE_XMLCharData* pXMLCharData = static_cast(pXMLChild); CFX_WideString wsCharData; pXMLCharData->GetCharData(wsCharData); if (XFA_FDEExtension_IsStringAllWhitespace(wsCharData)) { continue; } CXFA_Node* pXFAChild = m_pFactory->CreateNode(XFA_XDPPACKET_Datasets, XFA_ELEMENT_DataValue); if (pXFAChild == NULL) { return; } pXFAChild->SetCData(XFA_ATTRIBUTE_Value, wsCharData); pXFANode->InsertChild(pXFAChild); pXFAChild->SetXMLMappingNode(pXMLCharData); pXFAChild->SetFlag(XFA_NODEFLAG_Initialized, false); } continue; case FDE_XMLNODE_Text: { CFDE_XMLText* pXMLText = static_cast(pXMLChild); CFX_WideString wsText; pXMLText->GetText(wsText); if (XFA_FDEExtension_IsStringAllWhitespace(wsText)) { continue; } CXFA_Node* pXFAChild = m_pFactory->CreateNode(XFA_XDPPACKET_Datasets, XFA_ELEMENT_DataValue); if (pXFAChild == NULL) { return; } pXFAChild->SetCData(XFA_ATTRIBUTE_Value, wsText); pXFANode->InsertChild(pXFAChild); pXFAChild->SetXMLMappingNode(pXMLText); pXFAChild->SetFlag(XFA_NODEFLAG_Initialized, false); } continue; case FDE_XMLNODE_Instruction: continue; default: continue; } } } void CXFA_SimpleParser::ParseDataValue(CXFA_Node* pXFANode, CFDE_XMLNode* pXMLNode, XFA_XDPPACKET ePacketID) { CFX_WideTextBuf wsValueTextBuf; CFX_WideTextBuf wsCurValueTextBuf; FX_BOOL bMarkAsCompound = FALSE; CFDE_XMLNode* pXMLCurValueNode = nullptr; for (CFDE_XMLNode* pXMLChild = pXMLNode->GetNodeItem(CFDE_XMLNode::FirstChild); pXMLChild; pXMLChild = pXMLChild->GetNodeItem(CFDE_XMLNode::NextSibling)) { FDE_XMLNODETYPE eNodeType = pXMLChild->GetType(); if (eNodeType == FDE_XMLNODE_Instruction) continue; CFX_WideString wsText; if (eNodeType == FDE_XMLNODE_Text) { static_cast(pXMLChild)->GetText(wsText); if (!pXMLCurValueNode) pXMLCurValueNode = pXMLChild; wsCurValueTextBuf << wsText; } else if (eNodeType == FDE_XMLNODE_CharData) { static_cast(pXMLChild)->GetCharData(wsText); if (!pXMLCurValueNode) pXMLCurValueNode = pXMLChild; wsCurValueTextBuf << wsText; } else if (XFA_RecognizeRichText( static_cast(pXMLChild))) { XFA_GetPlainTextFromRichText(static_cast(pXMLChild), wsText); if (!pXMLCurValueNode) pXMLCurValueNode = pXMLChild; wsCurValueTextBuf << wsText; } else { bMarkAsCompound = TRUE; if (pXMLCurValueNode) { CFX_WideString wsCurValue = wsCurValueTextBuf.MakeString(); if (!wsCurValue.IsEmpty()) { CXFA_Node* pXFAChild = m_pFactory->CreateNode(ePacketID, XFA_ELEMENT_DataValue); if (!pXFAChild) return; pXFAChild->SetCData(XFA_ATTRIBUTE_Name, L""); pXFAChild->SetCData(XFA_ATTRIBUTE_Value, wsCurValue); pXFANode->InsertChild(pXFAChild); pXFAChild->SetXMLMappingNode(pXMLCurValueNode); pXFAChild->SetFlag(XFA_NODEFLAG_Initialized, false); wsValueTextBuf << wsCurValue; wsCurValueTextBuf.Clear(); } pXMLCurValueNode = nullptr; } CXFA_Node* pXFAChild = m_pFactory->CreateNode(ePacketID, XFA_ELEMENT_DataValue); if (!pXFAChild) return; CFX_WideString wsNodeStr; static_cast(pXMLChild)->GetLocalTagName(wsNodeStr); pXFAChild->SetCData(XFA_ATTRIBUTE_Name, wsNodeStr); ParseDataValue(pXFAChild, pXMLChild, ePacketID); pXFANode->InsertChild(pXFAChild); pXFAChild->SetXMLMappingNode(pXMLChild); pXFAChild->SetFlag(XFA_NODEFLAG_Initialized, false); CFX_WideStringC wsCurValue = pXFAChild->GetCData(XFA_ATTRIBUTE_Value); wsValueTextBuf << wsCurValue; } } if (pXMLCurValueNode) { CFX_WideString wsCurValue = wsCurValueTextBuf.MakeString(); if (!wsCurValue.IsEmpty()) { if (bMarkAsCompound) { CXFA_Node* pXFAChild = m_pFactory->CreateNode(ePacketID, XFA_ELEMENT_DataValue); if (!pXFAChild) return; pXFAChild->SetCData(XFA_ATTRIBUTE_Name, L""); pXFAChild->SetCData(XFA_ATTRIBUTE_Value, wsCurValue); pXFANode->InsertChild(pXFAChild); pXFAChild->SetXMLMappingNode(pXMLCurValueNode); pXFAChild->SetFlag(XFA_NODEFLAG_Initialized, false); } wsValueTextBuf << wsCurValue; wsCurValueTextBuf.Clear(); } pXMLCurValueNode = nullptr; } CFX_WideString wsNodeValue = wsValueTextBuf.MakeString(); pXFANode->SetCData(XFA_ATTRIBUTE_Value, wsNodeValue); } void CXFA_SimpleParser::ParseInstruction(CXFA_Node* pXFANode, CFDE_XMLInstruction* pXMLInstruction, XFA_XDPPACKET ePacketID) { if (!m_bDocumentParser) { return; } CFX_WideString wsTargetName; pXMLInstruction->GetTargetName(wsTargetName); if (wsTargetName == FX_WSTRC(L"originalXFAVersion")) { CFX_WideString wsData; if (pXMLInstruction->GetData(0, wsData) && (pXFANode->GetDocument()->RecognizeXFAVersionNumber(wsData) != XFA_VERSION_UNKNOWN)) { wsData.clear(); if (pXMLInstruction->GetData(1, wsData) && wsData == FX_WSTRC(L"v2.7-scripting:1")) { pXFANode->GetDocument()->SetFlag(XFA_DOCFLAG_Scripting, TRUE); } } } else if (wsTargetName == FX_WSTRC(L"acrobat")) { CFX_WideString wsData; if (pXMLInstruction->GetData(0, wsData) && wsData == FX_WSTRC(L"JavaScript")) { if (pXMLInstruction->GetData(1, wsData) && wsData == FX_WSTRC(L"strictScoping")) { pXFANode->GetDocument()->SetFlag(XFA_DOCFLAG_StrictScoping, TRUE); } } } } void CXFA_SimpleParser::CloseParser() { if (m_pXMLDoc) { m_pXMLDoc->Release(); m_pXMLDoc = NULL; } if (m_pStream) { m_pStream->Release(); m_pStream = NULL; } } CXFA_DocumentParser::CXFA_DocumentParser(CXFA_FFNotify* pNotify) : m_nodeParser(NULL, TRUE), m_pNotify(pNotify), m_pDocument(NULL) {} CXFA_DocumentParser::~CXFA_DocumentParser() { CloseParser(); } int32_t CXFA_DocumentParser::StartParse(IFX_FileRead* pStream, XFA_XDPPACKET ePacketID) { CloseParser(); int32_t nRetStatus = m_nodeParser.StartParse(pStream, ePacketID); if (nRetStatus == XFA_PARSESTATUS_Ready) { m_pDocument = new CXFA_Document(this); m_nodeParser.SetFactory(m_pDocument); } return nRetStatus; } int32_t CXFA_DocumentParser::DoParse(IFX_Pause* pPause) { int32_t nRetStatus = m_nodeParser.DoParse(pPause); if (nRetStatus >= XFA_PARSESTATUS_Done) { ASSERT(m_pDocument); m_pDocument->SetRoot(m_nodeParser.GetRootNode()); } return nRetStatus; } int32_t CXFA_DocumentParser::ParseXMLData(const CFX_WideString& wsXML, CFDE_XMLNode*& pXMLNode, IFX_Pause* pPause) { CloseParser(); int32_t nRetStatus = m_nodeParser.ParseXMLData(wsXML, pXMLNode, NULL); if (nRetStatus == XFA_PARSESTATUS_Done && pXMLNode) { m_pDocument = new CXFA_Document(this); m_nodeParser.SetFactory(m_pDocument); } return nRetStatus; } void CXFA_DocumentParser::ConstructXFANode(CXFA_Node* pXFANode, CFDE_XMLNode* pXMLNode) { if (!pXFANode || !pXMLNode) { return; } m_nodeParser.ConstructXFANode(pXFANode, pXMLNode); CXFA_Node* pRootNode = m_nodeParser.GetRootNode(); if (m_pDocument && pRootNode) { m_pDocument->SetRoot(pRootNode); } } void CXFA_DocumentParser::CloseParser() { delete m_pDocument; m_pDocument = nullptr; m_nodeParser.CloseParser(); } CXFA_XMLParser::CXFA_XMLParser(CFDE_XMLNode* pRoot, IFX_Stream* pStream) : m_nElementStart(0), m_dwCheckStatus(0), m_dwCurrentCheckStatus(0), m_pRoot(pRoot), m_pStream(pStream), m_pParser(nullptr), m_pParent(pRoot), m_pChild(nullptr), m_NodeStack(16), m_syntaxParserResult(FDE_XmlSyntaxResult::None) { ASSERT(m_pParent && m_pStream); m_NodeStack.Push(m_pParent); m_pParser = new CFDE_XMLSyntaxParser; m_pParser->Init(m_pStream, 32 * 1024, 1024 * 1024); } CXFA_XMLParser::~CXFA_XMLParser() { if (m_pParser) { m_pParser->Release(); } m_NodeStack.RemoveAll(); m_ws1.clear(); m_ws2.clear(); } int32_t CXFA_XMLParser::DoParser(IFX_Pause* pPause) { if (m_syntaxParserResult == FDE_XmlSyntaxResult::Error) return -1; if (m_syntaxParserResult == FDE_XmlSyntaxResult::EndOfString) return 100; int32_t iCount = 0; while (TRUE) { m_syntaxParserResult = m_pParser->DoSyntaxParse(); switch (m_syntaxParserResult) { case FDE_XmlSyntaxResult::InstructionOpen: break; case FDE_XmlSyntaxResult::InstructionClose: if (m_pChild) { if (m_pChild->GetType() != FDE_XMLNODE_Instruction) { m_syntaxParserResult = FDE_XmlSyntaxResult::Error; break; } } m_pChild = m_pParent; break; case FDE_XmlSyntaxResult::ElementOpen: if (m_dwCheckStatus != 0x03 && m_NodeStack.GetSize() == 2) { m_nElementStart = m_pParser->GetCurrentPos() - 1; } break; case FDE_XmlSyntaxResult::ElementBreak: break; case FDE_XmlSyntaxResult::ElementClose: if (m_pChild->GetType() != FDE_XMLNODE_Element) { m_syntaxParserResult = FDE_XmlSyntaxResult::Error; break; } m_pParser->GetTagName(m_ws1); static_cast(m_pChild)->GetTagName(m_ws2); if (m_ws1.GetLength() > 0 && m_ws1 != m_ws2) { m_syntaxParserResult = FDE_XmlSyntaxResult::Error; break; } m_NodeStack.Pop(); if (m_NodeStack.GetSize() < 1) { m_syntaxParserResult = FDE_XmlSyntaxResult::Error; break; } else if (m_dwCurrentCheckStatus != 0 && m_NodeStack.GetSize() == 2) { m_nSize[m_dwCurrentCheckStatus - 1] = m_pParser->GetCurrentBinaryPos() - m_nStart[m_dwCurrentCheckStatus - 1]; m_dwCurrentCheckStatus = 0; } m_pParent = static_cast(*m_NodeStack.GetTopElement()); m_pChild = m_pParent; iCount++; break; case FDE_XmlSyntaxResult::TargetName: m_pParser->GetTargetName(m_ws1); if (m_ws1 == FX_WSTRC(L"originalXFAVersion") || m_ws1 == FX_WSTRC(L"acrobat")) { m_pChild = new CFDE_XMLInstruction(m_ws1); m_pParent->InsertChildNode(m_pChild); } else { m_pChild = NULL; } m_ws1.clear(); break; case FDE_XmlSyntaxResult::TagName: m_pParser->GetTagName(m_ws1); m_pChild = new CFDE_XMLElement(m_ws1); m_pParent->InsertChildNode(m_pChild); m_NodeStack.Push(m_pChild); m_pParent = m_pChild; if (m_dwCheckStatus != 0x03 && m_NodeStack.GetSize() == 3) { CFX_WideString wsTag; static_cast(m_pChild)->GetLocalTagName(wsTag); if (wsTag == FX_WSTRC(L"template")) { m_dwCheckStatus |= 0x01; m_dwCurrentCheckStatus = 0x01; m_nStart[0] = m_pParser->GetCurrentBinaryPos() - (m_pParser->GetCurrentPos() - m_nElementStart); } else if (wsTag == FX_WSTRC(L"datasets")) { m_dwCheckStatus |= 0x02; m_dwCurrentCheckStatus = 0x02; m_nStart[1] = m_pParser->GetCurrentBinaryPos() - (m_pParser->GetCurrentPos() - m_nElementStart); } } break; case FDE_XmlSyntaxResult::AttriName: m_pParser->GetAttributeName(m_ws1); break; case FDE_XmlSyntaxResult::AttriValue: if (m_pChild) { m_pParser->GetAttributeName(m_ws2); if (m_pChild->GetType() == FDE_XMLNODE_Element) { static_cast(m_pChild)->SetString(m_ws1, m_ws2); } } m_ws1.clear(); break; case FDE_XmlSyntaxResult::Text: m_pParser->GetTextData(m_ws1); m_pChild = new CFDE_XMLText(m_ws1); m_pParent->InsertChildNode(m_pChild); m_pChild = m_pParent; break; case FDE_XmlSyntaxResult::CData: m_pParser->GetTextData(m_ws1); m_pChild = new CFDE_XMLCharData(m_ws1); m_pParent->InsertChildNode(m_pChild); m_pChild = m_pParent; break; case FDE_XmlSyntaxResult::TargetData: if (m_pChild) { if (m_pChild->GetType() != FDE_XMLNODE_Instruction) { m_syntaxParserResult = FDE_XmlSyntaxResult::Error; break; } if (!m_ws1.IsEmpty()) { static_cast(m_pChild)->AppendData(m_ws1); } m_pParser->GetTargetData(m_ws1); static_cast(m_pChild)->AppendData(m_ws1); } m_ws1.clear(); break; default: break; } if (m_syntaxParserResult == FDE_XmlSyntaxResult::Error || m_syntaxParserResult == FDE_XmlSyntaxResult::EndOfString) { break; } if (pPause && iCount > 500 && pPause->NeedToPauseNow()) { break; } } return (m_syntaxParserResult == FDE_XmlSyntaxResult::Error || m_NodeStack.GetSize() != 1) ? -1 : m_pParser->GetStatus(); }