diff options
author | dan sinclair <dsinclair@chromium.org> | 2017-12-04 15:44:46 +0000 |
---|---|---|
committer | Chromium commit bot <commit-bot@chromium.org> | 2017-12-04 15:44:46 +0000 |
commit | a827770faf5c3cfb157d59d401134e986dc4a18d (patch) | |
tree | 9c28f5cece852f498e74c945508d1456ad775a4b /xfa/fxfa/parser | |
parent | f2d7656f335cf38dfafb97370ffac38b9d4e3fc0 (diff) | |
download | pdfium-a827770faf5c3cfb157d59d401134e986dc4a18d.tar.xz |
Shuffle XFA code around
This CL moves some XFA code around to better match the files in which it
is either used or defined.
Change-Id: Iec3df187326e48189a048355a8bef76cccb4cdb4
Reviewed-on: https://pdfium-review.googlesource.com/20250
Commit-Queue: Ryan Harrison <rharrison@chromium.org>
Reviewed-by: Ryan Harrison <rharrison@chromium.org>
Diffstat (limited to 'xfa/fxfa/parser')
-rw-r--r-- | xfa/fxfa/parser/cxfa_dataexporter.cpp | 444 | ||||
-rw-r--r-- | xfa/fxfa/parser/cxfa_simple_parser.cpp | 86 | ||||
-rw-r--r-- | xfa/fxfa/parser/xfa_utils.cpp | 495 | ||||
-rw-r--r-- | xfa/fxfa/parser/xfa_utils.h | 6 |
4 files changed, 513 insertions, 518 deletions
diff --git a/xfa/fxfa/parser/cxfa_dataexporter.cpp b/xfa/fxfa/parser/cxfa_dataexporter.cpp index 02c316d920..2b5f3e6755 100644 --- a/xfa/fxfa/parser/cxfa_dataexporter.cpp +++ b/xfa/fxfa/parser/cxfa_dataexporter.cpp @@ -6,10 +6,6 @@ #include "xfa/fxfa/parser/cxfa_dataexporter.h" -#include <vector> - -#include "core/fxcrt/cfx_memorystream.h" -#include "core/fxcrt/cfx_widetextbuf.h" #include "core/fxcrt/fx_codepage.h" #include "core/fxcrt/xml/cfx_xmldoc.h" #include "core/fxcrt/xml/cfx_xmlelement.h" @@ -20,444 +16,6 @@ #include "xfa/fxfa/parser/cxfa_widgetdata.h" #include "xfa/fxfa/parser/xfa_utils.h" -namespace { - -constexpr const wchar_t kFormNS[] = L"http://www.xfa.org/schema/xfa-form/"; - -WideString ExportEncodeAttribute(const WideString& str) { - CFX_WideTextBuf textBuf; - int32_t iLen = str.GetLength(); - for (int32_t i = 0; i < iLen; i++) { - switch (str[i]) { - case '&': - textBuf << L"&"; - break; - case '<': - textBuf << L"<"; - break; - case '>': - textBuf << L">"; - break; - case '\'': - textBuf << L"'"; - break; - case '\"': - textBuf << L"""; - break; - default: - textBuf.AppendChar(str[i]); - } - } - return textBuf.MakeString(); -} - -bool IsXMLValidChar(wchar_t ch) { - return ch == 0x09 || ch == 0x0A || ch == 0x0D || - (ch >= 0x20 && ch <= 0xD7FF) || (ch >= 0xE000 && ch <= 0xFFFD); -} - -WideString ExportEncodeContent(const WideString& str) { - CFX_WideTextBuf textBuf; - int32_t iLen = str.GetLength(); - for (int32_t i = 0; i < iLen; i++) { - wchar_t ch = str[i]; - if (!IsXMLValidChar(ch)) - continue; - - if (ch == '&') { - textBuf << L"&"; - } else if (ch == '<') { - textBuf << L"<"; - } else if (ch == '>') { - textBuf << L">"; - } else if (ch == '\'') { - textBuf << L"'"; - } else if (ch == '\"') { - textBuf << L"""; - } else if (ch == ' ') { - if (i && str[i - 1] != ' ') { - textBuf.AppendChar(' '); - } else { - textBuf << L" "; - } - } else { - textBuf.AppendChar(str[i]); - } - } - return textBuf.MakeString(); -} - -void SaveAttribute(CXFA_Node* pNode, - XFA_Attribute eName, - const WideString& wsName, - bool bProto, - WideString& wsOutput) { - if (!bProto && !pNode->JSNode()->HasAttribute(eName)) - return; - - pdfium::Optional<WideString> value = - pNode->JSNode()->TryAttribute(eName, false); - if (!value) - return; - - wsOutput += L" "; - wsOutput += wsName; - wsOutput += L"=\""; - wsOutput += ExportEncodeAttribute(*value); - wsOutput += L"\""; -} - -bool AttributeSaveInDataModel(CXFA_Node* pNode, XFA_Attribute eAttribute) { - bool bSaveInDataModel = false; - if (pNode->GetElementType() != XFA_Element::Image) - return bSaveInDataModel; - - CXFA_Node* pValueNode = pNode->GetNodeItem(XFA_NODEITEM_Parent); - if (!pValueNode || pValueNode->GetElementType() != XFA_Element::Value) - return bSaveInDataModel; - - CXFA_Node* pFieldNode = pValueNode->GetNodeItem(XFA_NODEITEM_Parent); - if (pFieldNode && pFieldNode->GetBindData() && - eAttribute == XFA_Attribute::Href) { - bSaveInDataModel = true; - } - return bSaveInDataModel; -} - -bool ContentNodeNeedtoExport(CXFA_Node* pContentNode) { - pdfium::Optional<WideString> wsContent = - pContentNode->JSNode()->TryContent(false, false); - if (!wsContent) - return false; - - ASSERT(pContentNode->IsContentNode()); - CXFA_Node* pParentNode = pContentNode->GetNodeItem(XFA_NODEITEM_Parent); - if (!pParentNode || pParentNode->GetElementType() != XFA_Element::Value) - return true; - - CXFA_Node* pGrandParentNode = pParentNode->GetNodeItem(XFA_NODEITEM_Parent); - if (!pGrandParentNode || !pGrandParentNode->IsContainerNode()) - return true; - if (pGrandParentNode->GetBindData()) - return false; - - CXFA_WidgetData* pWidgetData = pGrandParentNode->GetWidgetData(); - XFA_Element eUIType = pWidgetData->GetUIType(); - if (eUIType == XFA_Element::PasswordEdit) - return false; - return true; -} - -void RecognizeXFAVersionNumber(CXFA_Node* pTemplateRoot, - WideString& wsVersionNumber) { - wsVersionNumber.clear(); - if (!pTemplateRoot) - return; - - pdfium::Optional<WideString> templateNS = - pTemplateRoot->JSNode()->TryNamespace(); - if (!templateNS) - return; - - XFA_VERSION eVersion = - pTemplateRoot->GetDocument()->RecognizeXFAVersionNumber(*templateNS); - if (eVersion == XFA_VERSION_UNKNOWN) - eVersion = XFA_VERSION_DEFAULT; - - wsVersionNumber = - WideString::Format(L"%i.%i", eVersion / 100, eVersion % 100); -} - -void RegenerateFormFile_Changed(CXFA_Node* pNode, - CFX_WideTextBuf& buf, - bool bSaveXML) { - WideString wsAttrs; - for (size_t i = 0;; ++i) { - XFA_Attribute attr = pNode->GetAttribute(i); - if (attr == XFA_Attribute::Unknown) - break; - - if (attr == XFA_Attribute::Name || - (AttributeSaveInDataModel(pNode, attr) && !bSaveXML)) { - continue; - } - WideString wsAttr; - SaveAttribute(pNode, attr, CXFA_Node::AttributeToName(attr), bSaveXML, - wsAttr); - wsAttrs += wsAttr; - } - - WideString wsChildren; - switch (pNode->GetObjectType()) { - case XFA_ObjectType::ContentNode: { - if (!bSaveXML && !ContentNodeNeedtoExport(pNode)) - break; - - CXFA_Node* pRawValueNode = pNode->GetNodeItem(XFA_NODEITEM_FirstChild); - while (pRawValueNode && - pRawValueNode->GetElementType() != XFA_Element::SharpxHTML && - pRawValueNode->GetElementType() != XFA_Element::Sharptext && - pRawValueNode->GetElementType() != XFA_Element::Sharpxml) { - pRawValueNode = pRawValueNode->GetNodeItem(XFA_NODEITEM_NextSibling); - } - if (!pRawValueNode) - break; - - pdfium::Optional<WideString> contentType = - pNode->JSNode()->TryAttribute(XFA_Attribute::ContentType, false); - if (pRawValueNode->GetElementType() == XFA_Element::SharpxHTML && - (contentType && *contentType == L"text/html")) { - CFX_XMLNode* pExDataXML = pNode->GetXMLMappingNode(); - if (!pExDataXML) - break; - - CFX_XMLNode* pRichTextXML = - pExDataXML->GetNodeItem(CFX_XMLNode::FirstChild); - if (!pRichTextXML) - break; - - auto pMemStream = pdfium::MakeRetain<CFX_MemoryStream>(true); - auto pTempStream = - pdfium::MakeRetain<CFX_SeekableStreamProxy>(pMemStream, true); - - pTempStream->SetCodePage(FX_CODEPAGE_UTF8); - pRichTextXML->SaveXMLNode(pTempStream); - wsChildren += WideString::FromUTF8( - ByteStringView(pMemStream->GetBuffer(), pMemStream->GetSize())); - } else if (pRawValueNode->GetElementType() == XFA_Element::Sharpxml && - (contentType && *contentType == L"text/xml")) { - pdfium::Optional<WideString> rawValue = - pRawValueNode->JSNode()->TryAttribute(XFA_Attribute::Value, false); - if (!rawValue || rawValue->IsEmpty()) - break; - - std::vector<WideString> wsSelTextArray; - size_t iStart = 0; - auto iEnd = rawValue->Find(L'\n', iStart); - iEnd = !iEnd.has_value() ? rawValue->GetLength() : iEnd; - while (iEnd.has_value() && iEnd >= iStart) { - wsSelTextArray.push_back( - rawValue->Mid(iStart, iEnd.value() - iStart)); - iStart = iEnd.value() + 1; - if (iStart >= rawValue->GetLength()) - break; - iEnd = rawValue->Find(L'\n', iStart); - } - - CXFA_Node* pParentNode = pNode->GetNodeItem(XFA_NODEITEM_Parent); - ASSERT(pParentNode); - CXFA_Node* pGrandparentNode = - pParentNode->GetNodeItem(XFA_NODEITEM_Parent); - ASSERT(pGrandparentNode); - WideString bodyTagName; - bodyTagName = pGrandparentNode->JSNode()->GetCData(XFA_Attribute::Name); - if (bodyTagName.IsEmpty()) - bodyTagName = L"ListBox1"; - - buf << L"<"; - buf << bodyTagName; - buf << L" xmlns=\"\"\n>"; - for (int32_t i = 0; i < pdfium::CollectionSize<int32_t>(wsSelTextArray); - i++) { - buf << L"<value\n>"; - buf << ExportEncodeContent(wsSelTextArray[i]); - buf << L"</value\n>"; - } - buf << L"</"; - buf << bodyTagName; - buf << L"\n>"; - wsChildren += buf.AsStringView(); - buf.Clear(); - } else { - WideString wsValue = - pRawValueNode->JSNode()->GetCData(XFA_Attribute::Value); - wsChildren += ExportEncodeContent(wsValue); - } - break; - } - case XFA_ObjectType::TextNode: - case XFA_ObjectType::NodeC: - case XFA_ObjectType::NodeV: { - WideString wsValue = pNode->JSNode()->GetCData(XFA_Attribute::Value); - wsChildren += ExportEncodeContent(wsValue); - break; - } - default: - if (pNode->GetElementType() == XFA_Element::Items) { - CXFA_Node* pTemplateNode = pNode->GetTemplateNode(); - if (!pTemplateNode || - pTemplateNode->CountChildren(XFA_Element::Unknown, false) != - pNode->CountChildren(XFA_Element::Unknown, false)) { - bSaveXML = true; - } - } - CFX_WideTextBuf newBuf; - CXFA_Node* pChildNode = pNode->GetNodeItem(XFA_NODEITEM_FirstChild); - while (pChildNode) { - RegenerateFormFile_Changed(pChildNode, newBuf, bSaveXML); - wsChildren += newBuf.AsStringView(); - newBuf.Clear(); - pChildNode = pChildNode->GetNodeItem(XFA_NODEITEM_NextSibling); - } - if (!bSaveXML && !wsChildren.IsEmpty() && - pNode->GetElementType() == XFA_Element::Items) { - wsChildren.clear(); - bSaveXML = true; - CXFA_Node* pChild = pNode->GetNodeItem(XFA_NODEITEM_FirstChild); - while (pChild) { - RegenerateFormFile_Changed(pChild, newBuf, bSaveXML); - wsChildren += newBuf.AsStringView(); - newBuf.Clear(); - pChild = pChild->GetNodeItem(XFA_NODEITEM_NextSibling); - } - } - break; - } - - if (!wsChildren.IsEmpty() || !wsAttrs.IsEmpty() || - pNode->JSNode()->HasAttribute(XFA_Attribute::Name)) { - WideStringView wsElement = pNode->GetClassName(); - WideString wsName; - SaveAttribute(pNode, XFA_Attribute::Name, L"name", true, wsName); - buf << L"<"; - buf << wsElement; - buf << wsName; - buf << wsAttrs; - if (wsChildren.IsEmpty()) { - buf << L"\n/>"; - } else { - buf << L"\n>"; - buf << wsChildren; - buf << L"</"; - buf << wsElement; - buf << L"\n>"; - } - } -} - -void RegenerateFormFile_Container( - CXFA_Node* pNode, - const RetainPtr<CFX_SeekableStreamProxy>& pStream, - bool bSaveXML) { - XFA_Element eType = pNode->GetElementType(); - if (eType == XFA_Element::Field || eType == XFA_Element::Draw || - !pNode->IsContainerNode()) { - CFX_WideTextBuf buf; - RegenerateFormFile_Changed(pNode, buf, bSaveXML); - size_t nLen = buf.GetLength(); - if (nLen > 0) - pStream->WriteString(buf.AsStringView()); - return; - } - - WideStringView wsElement(pNode->GetClassName()); - pStream->WriteString(L"<"); - pStream->WriteString(wsElement); - - WideString wsOutput; - SaveAttribute(pNode, XFA_Attribute::Name, L"name", true, wsOutput); - - WideString wsAttrs; - for (size_t i = 0;; ++i) { - XFA_Attribute attr = pNode->GetAttribute(i); - if (attr == XFA_Attribute::Unknown) - break; - if (attr == XFA_Attribute::Name) - continue; - - WideString wsAttr; - SaveAttribute(pNode, attr, CXFA_Node::AttributeToName(attr), false, wsAttr); - wsOutput += wsAttr; - } - - if (!wsOutput.IsEmpty()) - pStream->WriteString(wsOutput.AsStringView()); - - CXFA_Node* pChildNode = pNode->GetNodeItem(XFA_NODEITEM_FirstChild); - if (pChildNode) { - pStream->WriteString(L"\n>"); - while (pChildNode) { - RegenerateFormFile_Container(pChildNode, pStream, bSaveXML); - pChildNode = pChildNode->GetNodeItem(XFA_NODEITEM_NextSibling); - } - pStream->WriteString(L"</"); - pStream->WriteString(wsElement); - pStream->WriteString(L"\n>"); - } else { - pStream->WriteString(L"\n/>"); - } -} - -} // namespace - -void XFA_DataExporter_RegenerateFormFile( - CXFA_Node* pNode, - const RetainPtr<CFX_SeekableStreamProxy>& pStream, - const char* pChecksum, - bool bSaveXML) { - if (pNode->IsModelNode()) { - pStream->WriteString(L"<form"); - if (pChecksum) { - WideString wsChecksum = WideString::FromUTF8(pChecksum); - pStream->WriteString(L" checksum=\""); - pStream->WriteString(wsChecksum.AsStringView()); - pStream->WriteString(L"\""); - } - pStream->WriteString(L" xmlns=\""); - pStream->WriteString(WideStringView(kFormNS)); - - WideString wsVersionNumber; - RecognizeXFAVersionNumber( - ToNode(pNode->GetDocument()->GetXFAObject(XFA_HASHCODE_Template)), - wsVersionNumber); - if (wsVersionNumber.IsEmpty()) - wsVersionNumber = L"2.8"; - - wsVersionNumber += L"/\"\n>"; - pStream->WriteString(wsVersionNumber.AsStringView()); - - CXFA_Node* pChildNode = pNode->GetNodeItem(XFA_NODEITEM_FirstChild); - while (pChildNode) { - RegenerateFormFile_Container(pChildNode, pStream, false); - pChildNode = pChildNode->GetNodeItem(XFA_NODEITEM_NextSibling); - } - pStream->WriteString(L"</form\n>"); - } else { - RegenerateFormFile_Container(pNode, pStream, bSaveXML); - } -} - -void XFA_DataExporter_DealWithDataGroupNode(CXFA_Node* pDataNode) { - if (!pDataNode || pDataNode->GetElementType() == XFA_Element::DataValue) - return; - - int32_t iChildNum = 0; - for (CXFA_Node* pChildNode = pDataNode->GetNodeItem(XFA_NODEITEM_FirstChild); - pChildNode; - pChildNode = pChildNode->GetNodeItem(XFA_NODEITEM_NextSibling)) { - iChildNum++; - XFA_DataExporter_DealWithDataGroupNode(pChildNode); - } - - if (pDataNode->GetElementType() != XFA_Element::DataGroup) - return; - - if (iChildNum > 0) { - CFX_XMLNode* pXMLNode = pDataNode->GetXMLMappingNode(); - ASSERT(pXMLNode->GetType() == FX_XMLNODE_Element); - CFX_XMLElement* pXMLElement = static_cast<CFX_XMLElement*>(pXMLNode); - if (pXMLElement->HasAttribute(L"xfa:dataNode")) - pXMLElement->RemoveAttribute(L"xfa:dataNode"); - - return; - } - - CFX_XMLNode* pXMLNode = pDataNode->GetXMLMappingNode(); - ASSERT(pXMLNode->GetType() == FX_XMLNODE_Element); - static_cast<CFX_XMLElement*>(pXMLNode)->SetString(L"xfa:dataNode", - L"dataGroup"); -} - CXFA_DataExporter::CXFA_DataExporter(CXFA_Document* pDocument) : m_pDocument(pDocument) { ASSERT(m_pDocument); @@ -513,7 +71,7 @@ bool CXFA_DataExporter::Export( break; } case XFA_PacketType::Form: { - XFA_DataExporter_RegenerateFormFile(pNode, pStream, pChecksum); + XFA_DataExporter_RegenerateFormFile(pNode, pStream, pChecksum, false); break; } case XFA_PacketType::Template: diff --git a/xfa/fxfa/parser/cxfa_simple_parser.cpp b/xfa/fxfa/parser/cxfa_simple_parser.cpp index b3965ed6a2..bd3fdf7af1 100644 --- a/xfa/fxfa/parser/cxfa_simple_parser.cpp +++ b/xfa/fxfa/parser/cxfa_simple_parser.cpp @@ -292,6 +292,48 @@ void ConvertXMLToPlainText(CFX_XMLElement* pRootXMLNode, WideString& wsOutput) { } } +WideString GetPlainTextFromRichText(CFX_XMLNode* pXMLNode) { + if (!pXMLNode) + return L""; + + WideString wsPlainText; + switch (pXMLNode->GetType()) { + case FX_XMLNODE_Element: { + CFX_XMLElement* pXMLElement = static_cast<CFX_XMLElement*>(pXMLNode); + WideString wsTag = pXMLElement->GetLocalTagName(); + uint32_t uTag = FX_HashCode_GetW(wsTag.AsStringView(), true); + if (uTag == 0x0001f714) { + wsPlainText += L"\n"; + } else if (uTag == 0x00000070) { + if (!wsPlainText.IsEmpty()) { + wsPlainText += L"\n"; + } + } else if (uTag == 0xa48ac63) { + if (!wsPlainText.IsEmpty() && + wsPlainText[wsPlainText.GetLength() - 1] != '\n') { + wsPlainText += L"\n"; + } + } + break; + } + case FX_XMLNODE_Text: + case FX_XMLNODE_CharData: { + WideString wsContent = static_cast<CFX_XMLText*>(pXMLNode)->GetText(); + wsPlainText += wsContent; + break; + } + default: + break; + } + for (CFX_XMLNode* pChildXML = pXMLNode->GetNodeItem(CFX_XMLNode::FirstChild); + pChildXML; + pChildXML = pChildXML->GetNodeItem(CFX_XMLNode::NextSibling)) { + wsPlainText += GetPlainTextFromRichText(pChildXML); + } + + return wsPlainText; +} + } // namespace bool XFA_RecognizeRichText(CFX_XMLElement* pRichTextXMLNode) { @@ -396,8 +438,7 @@ void CXFA_SimpleParser::ConstructXFANode(CXFA_Node* pXFANode, WideString wsNodeStr = child->GetLocalTagName(); pXFAChild->JSNode()->SetCData(XFA_Attribute::Name, wsNodeStr, false, false); - WideString wsChildValue; - XFA_GetPlainTextFromRichText(child, wsChildValue); + WideString wsChildValue = GetPlainTextFromRichText(child); if (!wsChildValue.IsEmpty()) pXFAChild->JSNode()->SetCData(XFA_Attribute::Value, wsChildValue, false, false); @@ -428,36 +469,6 @@ CFX_XMLDoc* CXFA_SimpleParser::GetXMLDoc() const { return m_pXMLDoc.get(); } -bool XFA_FDEExtension_ResolveNamespaceQualifier(CFX_XMLElement* pNode, - const WideString& wsQualifier, - WideString* wsNamespaceURI) { - if (!pNode) - return false; - - CFX_XMLNode* pFakeRoot = pNode->GetNodeItem(CFX_XMLNode::Root); - WideString wsNSAttribute; - bool bRet = false; - if (wsQualifier.IsEmpty()) { - wsNSAttribute = L"xmlns"; - bRet = true; - } else { - wsNSAttribute = L"xmlns:" + wsQualifier; - } - for (CFX_XMLNode* pParent = pNode; pParent != pFakeRoot; - pParent = pParent->GetNodeItem(CFX_XMLNode::Parent)) { - if (pParent->GetType() != FX_XMLNODE_Element) - continue; - - auto* pElement = static_cast<CFX_XMLElement*>(pParent); - if (pElement->HasAttribute(wsNSAttribute.c_str())) { - *wsNamespaceURI = pElement->GetString(wsNSAttribute.c_str()); - return true; - } - } - wsNamespaceURI->clear(); - return bRet; -} - CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket(CFX_XMLNode* pXMLDocumentNode, XFA_PacketType ePacketID) { switch (ePacketID) { @@ -958,8 +969,8 @@ void CXFA_SimpleParser::ParseContentNode(CXFA_Node* pXFANode, break; if (XFA_RecognizeRichText(static_cast<CFX_XMLElement*>(pXMLChild))) - XFA_GetPlainTextFromRichText(static_cast<CFX_XMLElement*>(pXMLChild), - wsValue); + wsValue += + GetPlainTextFromRichText(static_cast<CFX_XMLElement*>(pXMLChild)); } else if (element == XFA_Element::Sharpxml) { if (eNodeType != FX_XMLNODE_Element) break; @@ -1139,16 +1150,15 @@ void CXFA_SimpleParser::ParseDataValue(CXFA_Node* pXFANode, if (eNodeType == FX_XMLNODE_Instruction) continue; - WideString wsText; if (eNodeType == FX_XMLNODE_Text || eNodeType == FX_XMLNODE_CharData) { - wsText = static_cast<CFX_XMLText*>(pXMLChild)->GetText(); + WideString wsText = static_cast<CFX_XMLText*>(pXMLChild)->GetText(); if (!pXMLCurValueNode) pXMLCurValueNode = pXMLChild; wsCurValueTextBuf << wsText; } else if (XFA_RecognizeRichText(static_cast<CFX_XMLElement*>(pXMLChild))) { - XFA_GetPlainTextFromRichText(static_cast<CFX_XMLElement*>(pXMLChild), - wsText); + WideString wsText = + GetPlainTextFromRichText(static_cast<CFX_XMLElement*>(pXMLChild)); if (!pXMLCurValueNode) pXMLCurValueNode = pXMLChild; diff --git a/xfa/fxfa/parser/xfa_utils.cpp b/xfa/fxfa/parser/xfa_utils.cpp index dbb9748fbf..4dbed96f25 100644 --- a/xfa/fxfa/parser/xfa_utils.cpp +++ b/xfa/fxfa/parser/xfa_utils.cpp @@ -7,7 +7,11 @@ #include "xfa/fxfa/parser/xfa_utils.h" #include <algorithm> +#include <vector> +#include "core/fxcrt/cfx_memorystream.h" +#include "core/fxcrt/cfx_widetextbuf.h" +#include "core/fxcrt/fx_codepage.h" #include "core/fxcrt/fx_extension.h" #include "core/fxcrt/xml/cfx_xmlchardata.h" #include "core/fxcrt/xml/cfx_xmlelement.h" @@ -22,6 +26,8 @@ namespace { +constexpr const wchar_t kFormNS[] = L"http://www.xfa.org/schema/xfa-form/"; + const double fraction_scales[] = {0.1, 0.01, 0.001, @@ -39,6 +45,370 @@ const double fraction_scales[] = {0.1, 0.000000000000001, 0.0000000000000001}; +WideString ExportEncodeAttribute(const WideString& str) { + CFX_WideTextBuf textBuf; + int32_t iLen = str.GetLength(); + for (int32_t i = 0; i < iLen; i++) { + switch (str[i]) { + case '&': + textBuf << L"&"; + break; + case '<': + textBuf << L"<"; + break; + case '>': + textBuf << L">"; + break; + case '\'': + textBuf << L"'"; + break; + case '\"': + textBuf << L"""; + break; + default: + textBuf.AppendChar(str[i]); + } + } + return textBuf.MakeString(); +} + +bool IsXMLValidChar(wchar_t ch) { + return ch == 0x09 || ch == 0x0A || ch == 0x0D || + (ch >= 0x20 && ch <= 0xD7FF) || (ch >= 0xE000 && ch <= 0xFFFD); +} + +WideString ExportEncodeContent(const WideString& str) { + CFX_WideTextBuf textBuf; + int32_t iLen = str.GetLength(); + for (int32_t i = 0; i < iLen; i++) { + wchar_t ch = str[i]; + if (!IsXMLValidChar(ch)) + continue; + + if (ch == '&') { + textBuf << L"&"; + } else if (ch == '<') { + textBuf << L"<"; + } else if (ch == '>') { + textBuf << L">"; + } else if (ch == '\'') { + textBuf << L"'"; + } else if (ch == '\"') { + textBuf << L"""; + } else if (ch == ' ') { + if (i && str[i - 1] != ' ') { + textBuf.AppendChar(' '); + } else { + textBuf << L" "; + } + } else { + textBuf.AppendChar(str[i]); + } + } + return textBuf.MakeString(); +} + +bool AttributeSaveInDataModel(CXFA_Node* pNode, XFA_Attribute eAttribute) { + bool bSaveInDataModel = false; + if (pNode->GetElementType() != XFA_Element::Image) + return bSaveInDataModel; + + CXFA_Node* pValueNode = pNode->GetNodeItem(XFA_NODEITEM_Parent); + if (!pValueNode || pValueNode->GetElementType() != XFA_Element::Value) + return bSaveInDataModel; + + CXFA_Node* pFieldNode = pValueNode->GetNodeItem(XFA_NODEITEM_Parent); + if (pFieldNode && pFieldNode->GetBindData() && + eAttribute == XFA_Attribute::Href) { + bSaveInDataModel = true; + } + return bSaveInDataModel; +} + +bool ContentNodeNeedtoExport(CXFA_Node* pContentNode) { + pdfium::Optional<WideString> wsContent = + pContentNode->JSNode()->TryContent(false, false); + if (!wsContent) + return false; + + ASSERT(pContentNode->IsContentNode()); + CXFA_Node* pParentNode = pContentNode->GetNodeItem(XFA_NODEITEM_Parent); + if (!pParentNode || pParentNode->GetElementType() != XFA_Element::Value) + return true; + + CXFA_Node* pGrandParentNode = pParentNode->GetNodeItem(XFA_NODEITEM_Parent); + if (!pGrandParentNode || !pGrandParentNode->IsContainerNode()) + return true; + if (pGrandParentNode->GetBindData()) + return false; + + CXFA_WidgetData* pWidgetData = pGrandParentNode->GetWidgetData(); + XFA_Element eUIType = pWidgetData->GetUIType(); + if (eUIType == XFA_Element::PasswordEdit) + return false; + return true; +} + +void SaveAttribute(CXFA_Node* pNode, + XFA_Attribute eName, + const WideString& wsName, + bool bProto, + WideString& wsOutput) { + if (!bProto && !pNode->JSNode()->HasAttribute(eName)) + return; + + pdfium::Optional<WideString> value = + pNode->JSNode()->TryAttribute(eName, false); + if (!value) + return; + + wsOutput += L" "; + wsOutput += wsName; + wsOutput += L"=\""; + wsOutput += ExportEncodeAttribute(*value); + wsOutput += L"\""; +} + +void RegenerateFormFile_Changed(CXFA_Node* pNode, + CFX_WideTextBuf& buf, + bool bSaveXML) { + WideString wsAttrs; + for (size_t i = 0;; ++i) { + XFA_Attribute attr = pNode->GetAttribute(i); + if (attr == XFA_Attribute::Unknown) + break; + + if (attr == XFA_Attribute::Name || + (AttributeSaveInDataModel(pNode, attr) && !bSaveXML)) { + continue; + } + WideString wsAttr; + SaveAttribute(pNode, attr, CXFA_Node::AttributeToName(attr), bSaveXML, + wsAttr); + wsAttrs += wsAttr; + } + + WideString wsChildren; + switch (pNode->GetObjectType()) { + case XFA_ObjectType::ContentNode: { + if (!bSaveXML && !ContentNodeNeedtoExport(pNode)) + break; + + CXFA_Node* pRawValueNode = pNode->GetNodeItem(XFA_NODEITEM_FirstChild); + while (pRawValueNode && + pRawValueNode->GetElementType() != XFA_Element::SharpxHTML && + pRawValueNode->GetElementType() != XFA_Element::Sharptext && + pRawValueNode->GetElementType() != XFA_Element::Sharpxml) { + pRawValueNode = pRawValueNode->GetNodeItem(XFA_NODEITEM_NextSibling); + } + if (!pRawValueNode) + break; + + pdfium::Optional<WideString> contentType = + pNode->JSNode()->TryAttribute(XFA_Attribute::ContentType, false); + if (pRawValueNode->GetElementType() == XFA_Element::SharpxHTML && + (contentType && *contentType == L"text/html")) { + CFX_XMLNode* pExDataXML = pNode->GetXMLMappingNode(); + if (!pExDataXML) + break; + + CFX_XMLNode* pRichTextXML = + pExDataXML->GetNodeItem(CFX_XMLNode::FirstChild); + if (!pRichTextXML) + break; + + auto pMemStream = pdfium::MakeRetain<CFX_MemoryStream>(true); + auto pTempStream = + pdfium::MakeRetain<CFX_SeekableStreamProxy>(pMemStream, true); + + pTempStream->SetCodePage(FX_CODEPAGE_UTF8); + pRichTextXML->SaveXMLNode(pTempStream); + wsChildren += WideString::FromUTF8( + ByteStringView(pMemStream->GetBuffer(), pMemStream->GetSize())); + } else if (pRawValueNode->GetElementType() == XFA_Element::Sharpxml && + (contentType && *contentType == L"text/xml")) { + pdfium::Optional<WideString> rawValue = + pRawValueNode->JSNode()->TryAttribute(XFA_Attribute::Value, false); + if (!rawValue || rawValue->IsEmpty()) + break; + + std::vector<WideString> wsSelTextArray; + size_t iStart = 0; + auto iEnd = rawValue->Find(L'\n', iStart); + iEnd = !iEnd.has_value() ? rawValue->GetLength() : iEnd; + while (iEnd.has_value() && iEnd >= iStart) { + wsSelTextArray.push_back( + rawValue->Mid(iStart, iEnd.value() - iStart)); + iStart = iEnd.value() + 1; + if (iStart >= rawValue->GetLength()) + break; + iEnd = rawValue->Find(L'\n', iStart); + } + + CXFA_Node* pParentNode = pNode->GetNodeItem(XFA_NODEITEM_Parent); + ASSERT(pParentNode); + CXFA_Node* pGrandparentNode = + pParentNode->GetNodeItem(XFA_NODEITEM_Parent); + ASSERT(pGrandparentNode); + WideString bodyTagName; + bodyTagName = pGrandparentNode->JSNode()->GetCData(XFA_Attribute::Name); + if (bodyTagName.IsEmpty()) + bodyTagName = L"ListBox1"; + + buf << L"<"; + buf << bodyTagName; + buf << L" xmlns=\"\"\n>"; + for (int32_t i = 0; i < pdfium::CollectionSize<int32_t>(wsSelTextArray); + i++) { + buf << L"<value\n>"; + buf << ExportEncodeContent(wsSelTextArray[i]); + buf << L"</value\n>"; + } + buf << L"</"; + buf << bodyTagName; + buf << L"\n>"; + wsChildren += buf.AsStringView(); + buf.Clear(); + } else { + WideString wsValue = + pRawValueNode->JSNode()->GetCData(XFA_Attribute::Value); + wsChildren += ExportEncodeContent(wsValue); + } + break; + } + case XFA_ObjectType::TextNode: + case XFA_ObjectType::NodeC: + case XFA_ObjectType::NodeV: { + WideString wsValue = pNode->JSNode()->GetCData(XFA_Attribute::Value); + wsChildren += ExportEncodeContent(wsValue); + break; + } + default: + if (pNode->GetElementType() == XFA_Element::Items) { + CXFA_Node* pTemplateNode = pNode->GetTemplateNode(); + if (!pTemplateNode || + pTemplateNode->CountChildren(XFA_Element::Unknown, false) != + pNode->CountChildren(XFA_Element::Unknown, false)) { + bSaveXML = true; + } + } + CFX_WideTextBuf newBuf; + CXFA_Node* pChildNode = pNode->GetNodeItem(XFA_NODEITEM_FirstChild); + while (pChildNode) { + RegenerateFormFile_Changed(pChildNode, newBuf, bSaveXML); + wsChildren += newBuf.AsStringView(); + newBuf.Clear(); + pChildNode = pChildNode->GetNodeItem(XFA_NODEITEM_NextSibling); + } + if (!bSaveXML && !wsChildren.IsEmpty() && + pNode->GetElementType() == XFA_Element::Items) { + wsChildren.clear(); + bSaveXML = true; + CXFA_Node* pChild = pNode->GetNodeItem(XFA_NODEITEM_FirstChild); + while (pChild) { + RegenerateFormFile_Changed(pChild, newBuf, bSaveXML); + wsChildren += newBuf.AsStringView(); + newBuf.Clear(); + pChild = pChild->GetNodeItem(XFA_NODEITEM_NextSibling); + } + } + break; + } + + if (!wsChildren.IsEmpty() || !wsAttrs.IsEmpty() || + pNode->JSNode()->HasAttribute(XFA_Attribute::Name)) { + WideStringView wsElement = pNode->GetClassName(); + WideString wsName; + SaveAttribute(pNode, XFA_Attribute::Name, L"name", true, wsName); + buf << L"<"; + buf << wsElement; + buf << wsName; + buf << wsAttrs; + if (wsChildren.IsEmpty()) { + buf << L"\n/>"; + } else { + buf << L"\n>"; + buf << wsChildren; + buf << L"</"; + buf << wsElement; + buf << L"\n>"; + } + } +} + +void RegenerateFormFile_Container( + CXFA_Node* pNode, + const RetainPtr<CFX_SeekableStreamProxy>& pStream, + bool bSaveXML) { + XFA_Element eType = pNode->GetElementType(); + if (eType == XFA_Element::Field || eType == XFA_Element::Draw || + !pNode->IsContainerNode()) { + CFX_WideTextBuf buf; + RegenerateFormFile_Changed(pNode, buf, bSaveXML); + size_t nLen = buf.GetLength(); + if (nLen > 0) + pStream->WriteString(buf.AsStringView()); + return; + } + + WideStringView wsElement(pNode->GetClassName()); + pStream->WriteString(L"<"); + pStream->WriteString(wsElement); + + WideString wsOutput; + SaveAttribute(pNode, XFA_Attribute::Name, L"name", true, wsOutput); + + WideString wsAttrs; + for (size_t i = 0;; ++i) { + XFA_Attribute attr = pNode->GetAttribute(i); + if (attr == XFA_Attribute::Unknown) + break; + if (attr == XFA_Attribute::Name) + continue; + + WideString wsAttr; + SaveAttribute(pNode, attr, CXFA_Node::AttributeToName(attr), false, wsAttr); + wsOutput += wsAttr; + } + + if (!wsOutput.IsEmpty()) + pStream->WriteString(wsOutput.AsStringView()); + + CXFA_Node* pChildNode = pNode->GetNodeItem(XFA_NODEITEM_FirstChild); + if (pChildNode) { + pStream->WriteString(L"\n>"); + while (pChildNode) { + RegenerateFormFile_Container(pChildNode, pStream, bSaveXML); + pChildNode = pChildNode->GetNodeItem(XFA_NODEITEM_NextSibling); + } + pStream->WriteString(L"</"); + pStream->WriteString(wsElement); + pStream->WriteString(L"\n>"); + } else { + pStream->WriteString(L"\n/>"); + } +} + +void RecognizeXFAVersionNumber(CXFA_Node* pTemplateRoot, + WideString& wsVersionNumber) { + wsVersionNumber.clear(); + if (!pTemplateRoot) + return; + + pdfium::Optional<WideString> templateNS = + pTemplateRoot->JSNode()->TryNamespace(); + if (!templateNS) + return; + + XFA_VERSION eVersion = + pTemplateRoot->GetDocument()->RecognizeXFAVersionNumber(*templateNS); + if (eVersion == XFA_VERSION_UNKNOWN) + eVersion = XFA_VERSION_DEFAULT; + + wsVersionNumber = + WideString::Format(L"%i.%i", eVersion / 100, eVersion % 100); +} + } // namespace double XFA_GetFractionalScale(uint32_t idx) { @@ -92,43 +462,102 @@ CXFA_LocaleValue XFA_GetLocaleValue(CXFA_WidgetData* pWidgetData) { return CXFA_LocaleValue(iVTType, pWidgetData->GetRawValue(), pWidgetData->GetNode()->GetDocument()->GetLocalMgr()); } -void XFA_GetPlainTextFromRichText(CFX_XMLNode* pXMLNode, - WideString& wsPlainText) { - if (!pXMLNode) { + +bool XFA_FDEExtension_ResolveNamespaceQualifier(CFX_XMLElement* pNode, + const WideString& wsQualifier, + WideString* wsNamespaceURI) { + if (!pNode) + return false; + + CFX_XMLNode* pFakeRoot = pNode->GetNodeItem(CFX_XMLNode::Root); + WideString wsNSAttribute; + bool bRet = false; + if (wsQualifier.IsEmpty()) { + wsNSAttribute = L"xmlns"; + bRet = true; + } else { + wsNSAttribute = L"xmlns:" + wsQualifier; + } + for (CFX_XMLNode* pParent = pNode; pParent != pFakeRoot; + pParent = pParent->GetNodeItem(CFX_XMLNode::Parent)) { + if (pParent->GetType() != FX_XMLNODE_Element) + continue; + + auto* pElement = static_cast<CFX_XMLElement*>(pParent); + if (pElement->HasAttribute(wsNSAttribute.c_str())) { + *wsNamespaceURI = pElement->GetString(wsNSAttribute.c_str()); + return true; + } + } + wsNamespaceURI->clear(); + return bRet; +} + +void XFA_DataExporter_DealWithDataGroupNode(CXFA_Node* pDataNode) { + if (!pDataNode || pDataNode->GetElementType() == XFA_Element::DataValue) return; + + int32_t iChildNum = 0; + for (CXFA_Node* pChildNode = pDataNode->GetNodeItem(XFA_NODEITEM_FirstChild); + pChildNode; + pChildNode = pChildNode->GetNodeItem(XFA_NODEITEM_NextSibling)) { + iChildNum++; + XFA_DataExporter_DealWithDataGroupNode(pChildNode); } - switch (pXMLNode->GetType()) { - case FX_XMLNODE_Element: { - CFX_XMLElement* pXMLElement = static_cast<CFX_XMLElement*>(pXMLNode); - WideString wsTag = pXMLElement->GetLocalTagName(); - uint32_t uTag = FX_HashCode_GetW(wsTag.AsStringView(), true); - if (uTag == 0x0001f714) { - wsPlainText += L"\n"; - } else if (uTag == 0x00000070) { - if (!wsPlainText.IsEmpty()) { - wsPlainText += L"\n"; - } - } else if (uTag == 0xa48ac63) { - if (!wsPlainText.IsEmpty() && - wsPlainText[wsPlainText.GetLength() - 1] != '\n') { - wsPlainText += L"\n"; - } - } - break; + + if (pDataNode->GetElementType() != XFA_Element::DataGroup) + return; + + if (iChildNum > 0) { + CFX_XMLNode* pXMLNode = pDataNode->GetXMLMappingNode(); + ASSERT(pXMLNode->GetType() == FX_XMLNODE_Element); + CFX_XMLElement* pXMLElement = static_cast<CFX_XMLElement*>(pXMLNode); + if (pXMLElement->HasAttribute(L"xfa:dataNode")) + pXMLElement->RemoveAttribute(L"xfa:dataNode"); + + return; + } + + CFX_XMLNode* pXMLNode = pDataNode->GetXMLMappingNode(); + ASSERT(pXMLNode->GetType() == FX_XMLNODE_Element); + static_cast<CFX_XMLElement*>(pXMLNode)->SetString(L"xfa:dataNode", + L"dataGroup"); +} + +void XFA_DataExporter_RegenerateFormFile( + CXFA_Node* pNode, + const RetainPtr<CFX_SeekableStreamProxy>& pStream, + const char* pChecksum, + bool bSaveXML) { + if (pNode->IsModelNode()) { + pStream->WriteString(L"<form"); + if (pChecksum) { + WideString wsChecksum = WideString::FromUTF8(pChecksum); + pStream->WriteString(L" checksum=\""); + pStream->WriteString(wsChecksum.AsStringView()); + pStream->WriteString(L"\""); } - case FX_XMLNODE_Text: - case FX_XMLNODE_CharData: { - WideString wsContent = static_cast<CFX_XMLText*>(pXMLNode)->GetText(); - wsPlainText += wsContent; - break; + pStream->WriteString(L" xmlns=\""); + pStream->WriteString(WideStringView(kFormNS)); + + WideString wsVersionNumber; + RecognizeXFAVersionNumber( + ToNode(pNode->GetDocument()->GetXFAObject(XFA_HASHCODE_Template)), + wsVersionNumber); + if (wsVersionNumber.IsEmpty()) + wsVersionNumber = L"2.8"; + + wsVersionNumber += L"/\"\n>"; + pStream->WriteString(wsVersionNumber.AsStringView()); + + CXFA_Node* pChildNode = pNode->GetNodeItem(XFA_NODEITEM_FirstChild); + while (pChildNode) { + RegenerateFormFile_Container(pChildNode, pStream, false); + pChildNode = pChildNode->GetNodeItem(XFA_NODEITEM_NextSibling); } - default: - break; - } - for (CFX_XMLNode* pChildXML = pXMLNode->GetNodeItem(CFX_XMLNode::FirstChild); - pChildXML; - pChildXML = pChildXML->GetNodeItem(CFX_XMLNode::NextSibling)) { - XFA_GetPlainTextFromRichText(pChildXML, wsPlainText); + pStream->WriteString(L"</form\n>"); + } else { + RegenerateFormFile_Container(pNode, pStream, bSaveXML); } } diff --git a/xfa/fxfa/parser/xfa_utils.h b/xfa/fxfa/parser/xfa_utils.h index 15b6983f9a..27e6534aab 100644 --- a/xfa/fxfa/parser/xfa_utils.h +++ b/xfa/fxfa/parser/xfa_utils.h @@ -27,16 +27,14 @@ CXFA_LocaleValue XFA_GetLocaleValue(CXFA_WidgetData* pWidgetData); int32_t XFA_MapRotation(int32_t nRotation); bool XFA_RecognizeRichText(CFX_XMLElement* pRichTextXMLNode); -void XFA_GetPlainTextFromRichText(CFX_XMLNode* pXMLNode, - WideString& wsPlainText); bool XFA_FieldIsMultiListBox(CXFA_Node* pFieldNode); void XFA_DataExporter_DealWithDataGroupNode(CXFA_Node* pDataNode); void XFA_DataExporter_RegenerateFormFile( CXFA_Node* pNode, const RetainPtr<CFX_SeekableStreamProxy>& pStream, - const char* pChecksum = nullptr, - bool bSaveXML = false); + const char* pChecksum, + bool bSaveXML); const XFA_SCRIPTATTRIBUTEINFO* XFA_GetScriptAttributeByName( XFA_Element eElement, |