summaryrefslogtreecommitdiff
path: root/xfa/fxfa/parser
diff options
context:
space:
mode:
authordan sinclair <dsinclair@chromium.org>2017-12-04 15:44:46 +0000
committerChromium commit bot <commit-bot@chromium.org>2017-12-04 15:44:46 +0000
commita827770faf5c3cfb157d59d401134e986dc4a18d (patch)
tree9c28f5cece852f498e74c945508d1456ad775a4b /xfa/fxfa/parser
parentf2d7656f335cf38dfafb97370ffac38b9d4e3fc0 (diff)
downloadpdfium-a827770faf5c3cfb157d59d401134e986dc4a18d.tar.xz
Shuffle XFA code around
This CL moves some XFA code around to better match the files in which it is either used or defined. Change-Id: Iec3df187326e48189a048355a8bef76cccb4cdb4 Reviewed-on: https://pdfium-review.googlesource.com/20250 Commit-Queue: Ryan Harrison <rharrison@chromium.org> Reviewed-by: Ryan Harrison <rharrison@chromium.org>
Diffstat (limited to 'xfa/fxfa/parser')
-rw-r--r--xfa/fxfa/parser/cxfa_dataexporter.cpp444
-rw-r--r--xfa/fxfa/parser/cxfa_simple_parser.cpp86
-rw-r--r--xfa/fxfa/parser/xfa_utils.cpp495
-rw-r--r--xfa/fxfa/parser/xfa_utils.h6
4 files changed, 513 insertions, 518 deletions
diff --git a/xfa/fxfa/parser/cxfa_dataexporter.cpp b/xfa/fxfa/parser/cxfa_dataexporter.cpp
index 02c316d920..2b5f3e6755 100644
--- a/xfa/fxfa/parser/cxfa_dataexporter.cpp
+++ b/xfa/fxfa/parser/cxfa_dataexporter.cpp
@@ -6,10 +6,6 @@
#include "xfa/fxfa/parser/cxfa_dataexporter.h"
-#include <vector>
-
-#include "core/fxcrt/cfx_memorystream.h"
-#include "core/fxcrt/cfx_widetextbuf.h"
#include "core/fxcrt/fx_codepage.h"
#include "core/fxcrt/xml/cfx_xmldoc.h"
#include "core/fxcrt/xml/cfx_xmlelement.h"
@@ -20,444 +16,6 @@
#include "xfa/fxfa/parser/cxfa_widgetdata.h"
#include "xfa/fxfa/parser/xfa_utils.h"
-namespace {
-
-constexpr const wchar_t kFormNS[] = L"http://www.xfa.org/schema/xfa-form/";
-
-WideString ExportEncodeAttribute(const WideString& str) {
- CFX_WideTextBuf textBuf;
- int32_t iLen = str.GetLength();
- for (int32_t i = 0; i < iLen; i++) {
- switch (str[i]) {
- case '&':
- textBuf << L"&amp;";
- break;
- case '<':
- textBuf << L"&lt;";
- break;
- case '>':
- textBuf << L"&gt;";
- break;
- case '\'':
- textBuf << L"&apos;";
- break;
- case '\"':
- textBuf << L"&quot;";
- break;
- default:
- textBuf.AppendChar(str[i]);
- }
- }
- return textBuf.MakeString();
-}
-
-bool IsXMLValidChar(wchar_t ch) {
- return ch == 0x09 || ch == 0x0A || ch == 0x0D ||
- (ch >= 0x20 && ch <= 0xD7FF) || (ch >= 0xE000 && ch <= 0xFFFD);
-}
-
-WideString ExportEncodeContent(const WideString& str) {
- CFX_WideTextBuf textBuf;
- int32_t iLen = str.GetLength();
- for (int32_t i = 0; i < iLen; i++) {
- wchar_t ch = str[i];
- if (!IsXMLValidChar(ch))
- continue;
-
- if (ch == '&') {
- textBuf << L"&amp;";
- } else if (ch == '<') {
- textBuf << L"&lt;";
- } else if (ch == '>') {
- textBuf << L"&gt;";
- } else if (ch == '\'') {
- textBuf << L"&apos;";
- } else if (ch == '\"') {
- textBuf << L"&quot;";
- } else if (ch == ' ') {
- if (i && str[i - 1] != ' ') {
- textBuf.AppendChar(' ');
- } else {
- textBuf << L"&#x20;";
- }
- } else {
- textBuf.AppendChar(str[i]);
- }
- }
- return textBuf.MakeString();
-}
-
-void SaveAttribute(CXFA_Node* pNode,
- XFA_Attribute eName,
- const WideString& wsName,
- bool bProto,
- WideString& wsOutput) {
- if (!bProto && !pNode->JSNode()->HasAttribute(eName))
- return;
-
- pdfium::Optional<WideString> value =
- pNode->JSNode()->TryAttribute(eName, false);
- if (!value)
- return;
-
- wsOutput += L" ";
- wsOutput += wsName;
- wsOutput += L"=\"";
- wsOutput += ExportEncodeAttribute(*value);
- wsOutput += L"\"";
-}
-
-bool AttributeSaveInDataModel(CXFA_Node* pNode, XFA_Attribute eAttribute) {
- bool bSaveInDataModel = false;
- if (pNode->GetElementType() != XFA_Element::Image)
- return bSaveInDataModel;
-
- CXFA_Node* pValueNode = pNode->GetNodeItem(XFA_NODEITEM_Parent);
- if (!pValueNode || pValueNode->GetElementType() != XFA_Element::Value)
- return bSaveInDataModel;
-
- CXFA_Node* pFieldNode = pValueNode->GetNodeItem(XFA_NODEITEM_Parent);
- if (pFieldNode && pFieldNode->GetBindData() &&
- eAttribute == XFA_Attribute::Href) {
- bSaveInDataModel = true;
- }
- return bSaveInDataModel;
-}
-
-bool ContentNodeNeedtoExport(CXFA_Node* pContentNode) {
- pdfium::Optional<WideString> wsContent =
- pContentNode->JSNode()->TryContent(false, false);
- if (!wsContent)
- return false;
-
- ASSERT(pContentNode->IsContentNode());
- CXFA_Node* pParentNode = pContentNode->GetNodeItem(XFA_NODEITEM_Parent);
- if (!pParentNode || pParentNode->GetElementType() != XFA_Element::Value)
- return true;
-
- CXFA_Node* pGrandParentNode = pParentNode->GetNodeItem(XFA_NODEITEM_Parent);
- if (!pGrandParentNode || !pGrandParentNode->IsContainerNode())
- return true;
- if (pGrandParentNode->GetBindData())
- return false;
-
- CXFA_WidgetData* pWidgetData = pGrandParentNode->GetWidgetData();
- XFA_Element eUIType = pWidgetData->GetUIType();
- if (eUIType == XFA_Element::PasswordEdit)
- return false;
- return true;
-}
-
-void RecognizeXFAVersionNumber(CXFA_Node* pTemplateRoot,
- WideString& wsVersionNumber) {
- wsVersionNumber.clear();
- if (!pTemplateRoot)
- return;
-
- pdfium::Optional<WideString> templateNS =
- pTemplateRoot->JSNode()->TryNamespace();
- if (!templateNS)
- return;
-
- XFA_VERSION eVersion =
- pTemplateRoot->GetDocument()->RecognizeXFAVersionNumber(*templateNS);
- if (eVersion == XFA_VERSION_UNKNOWN)
- eVersion = XFA_VERSION_DEFAULT;
-
- wsVersionNumber =
- WideString::Format(L"%i.%i", eVersion / 100, eVersion % 100);
-}
-
-void RegenerateFormFile_Changed(CXFA_Node* pNode,
- CFX_WideTextBuf& buf,
- bool bSaveXML) {
- WideString wsAttrs;
- for (size_t i = 0;; ++i) {
- XFA_Attribute attr = pNode->GetAttribute(i);
- if (attr == XFA_Attribute::Unknown)
- break;
-
- if (attr == XFA_Attribute::Name ||
- (AttributeSaveInDataModel(pNode, attr) && !bSaveXML)) {
- continue;
- }
- WideString wsAttr;
- SaveAttribute(pNode, attr, CXFA_Node::AttributeToName(attr), bSaveXML,
- wsAttr);
- wsAttrs += wsAttr;
- }
-
- WideString wsChildren;
- switch (pNode->GetObjectType()) {
- case XFA_ObjectType::ContentNode: {
- if (!bSaveXML && !ContentNodeNeedtoExport(pNode))
- break;
-
- CXFA_Node* pRawValueNode = pNode->GetNodeItem(XFA_NODEITEM_FirstChild);
- while (pRawValueNode &&
- pRawValueNode->GetElementType() != XFA_Element::SharpxHTML &&
- pRawValueNode->GetElementType() != XFA_Element::Sharptext &&
- pRawValueNode->GetElementType() != XFA_Element::Sharpxml) {
- pRawValueNode = pRawValueNode->GetNodeItem(XFA_NODEITEM_NextSibling);
- }
- if (!pRawValueNode)
- break;
-
- pdfium::Optional<WideString> contentType =
- pNode->JSNode()->TryAttribute(XFA_Attribute::ContentType, false);
- if (pRawValueNode->GetElementType() == XFA_Element::SharpxHTML &&
- (contentType && *contentType == L"text/html")) {
- CFX_XMLNode* pExDataXML = pNode->GetXMLMappingNode();
- if (!pExDataXML)
- break;
-
- CFX_XMLNode* pRichTextXML =
- pExDataXML->GetNodeItem(CFX_XMLNode::FirstChild);
- if (!pRichTextXML)
- break;
-
- auto pMemStream = pdfium::MakeRetain<CFX_MemoryStream>(true);
- auto pTempStream =
- pdfium::MakeRetain<CFX_SeekableStreamProxy>(pMemStream, true);
-
- pTempStream->SetCodePage(FX_CODEPAGE_UTF8);
- pRichTextXML->SaveXMLNode(pTempStream);
- wsChildren += WideString::FromUTF8(
- ByteStringView(pMemStream->GetBuffer(), pMemStream->GetSize()));
- } else if (pRawValueNode->GetElementType() == XFA_Element::Sharpxml &&
- (contentType && *contentType == L"text/xml")) {
- pdfium::Optional<WideString> rawValue =
- pRawValueNode->JSNode()->TryAttribute(XFA_Attribute::Value, false);
- if (!rawValue || rawValue->IsEmpty())
- break;
-
- std::vector<WideString> wsSelTextArray;
- size_t iStart = 0;
- auto iEnd = rawValue->Find(L'\n', iStart);
- iEnd = !iEnd.has_value() ? rawValue->GetLength() : iEnd;
- while (iEnd.has_value() && iEnd >= iStart) {
- wsSelTextArray.push_back(
- rawValue->Mid(iStart, iEnd.value() - iStart));
- iStart = iEnd.value() + 1;
- if (iStart >= rawValue->GetLength())
- break;
- iEnd = rawValue->Find(L'\n', iStart);
- }
-
- CXFA_Node* pParentNode = pNode->GetNodeItem(XFA_NODEITEM_Parent);
- ASSERT(pParentNode);
- CXFA_Node* pGrandparentNode =
- pParentNode->GetNodeItem(XFA_NODEITEM_Parent);
- ASSERT(pGrandparentNode);
- WideString bodyTagName;
- bodyTagName = pGrandparentNode->JSNode()->GetCData(XFA_Attribute::Name);
- if (bodyTagName.IsEmpty())
- bodyTagName = L"ListBox1";
-
- buf << L"<";
- buf << bodyTagName;
- buf << L" xmlns=\"\"\n>";
- for (int32_t i = 0; i < pdfium::CollectionSize<int32_t>(wsSelTextArray);
- i++) {
- buf << L"<value\n>";
- buf << ExportEncodeContent(wsSelTextArray[i]);
- buf << L"</value\n>";
- }
- buf << L"</";
- buf << bodyTagName;
- buf << L"\n>";
- wsChildren += buf.AsStringView();
- buf.Clear();
- } else {
- WideString wsValue =
- pRawValueNode->JSNode()->GetCData(XFA_Attribute::Value);
- wsChildren += ExportEncodeContent(wsValue);
- }
- break;
- }
- case XFA_ObjectType::TextNode:
- case XFA_ObjectType::NodeC:
- case XFA_ObjectType::NodeV: {
- WideString wsValue = pNode->JSNode()->GetCData(XFA_Attribute::Value);
- wsChildren += ExportEncodeContent(wsValue);
- break;
- }
- default:
- if (pNode->GetElementType() == XFA_Element::Items) {
- CXFA_Node* pTemplateNode = pNode->GetTemplateNode();
- if (!pTemplateNode ||
- pTemplateNode->CountChildren(XFA_Element::Unknown, false) !=
- pNode->CountChildren(XFA_Element::Unknown, false)) {
- bSaveXML = true;
- }
- }
- CFX_WideTextBuf newBuf;
- CXFA_Node* pChildNode = pNode->GetNodeItem(XFA_NODEITEM_FirstChild);
- while (pChildNode) {
- RegenerateFormFile_Changed(pChildNode, newBuf, bSaveXML);
- wsChildren += newBuf.AsStringView();
- newBuf.Clear();
- pChildNode = pChildNode->GetNodeItem(XFA_NODEITEM_NextSibling);
- }
- if (!bSaveXML && !wsChildren.IsEmpty() &&
- pNode->GetElementType() == XFA_Element::Items) {
- wsChildren.clear();
- bSaveXML = true;
- CXFA_Node* pChild = pNode->GetNodeItem(XFA_NODEITEM_FirstChild);
- while (pChild) {
- RegenerateFormFile_Changed(pChild, newBuf, bSaveXML);
- wsChildren += newBuf.AsStringView();
- newBuf.Clear();
- pChild = pChild->GetNodeItem(XFA_NODEITEM_NextSibling);
- }
- }
- break;
- }
-
- if (!wsChildren.IsEmpty() || !wsAttrs.IsEmpty() ||
- pNode->JSNode()->HasAttribute(XFA_Attribute::Name)) {
- WideStringView wsElement = pNode->GetClassName();
- WideString wsName;
- SaveAttribute(pNode, XFA_Attribute::Name, L"name", true, wsName);
- buf << L"<";
- buf << wsElement;
- buf << wsName;
- buf << wsAttrs;
- if (wsChildren.IsEmpty()) {
- buf << L"\n/>";
- } else {
- buf << L"\n>";
- buf << wsChildren;
- buf << L"</";
- buf << wsElement;
- buf << L"\n>";
- }
- }
-}
-
-void RegenerateFormFile_Container(
- CXFA_Node* pNode,
- const RetainPtr<CFX_SeekableStreamProxy>& pStream,
- bool bSaveXML) {
- XFA_Element eType = pNode->GetElementType();
- if (eType == XFA_Element::Field || eType == XFA_Element::Draw ||
- !pNode->IsContainerNode()) {
- CFX_WideTextBuf buf;
- RegenerateFormFile_Changed(pNode, buf, bSaveXML);
- size_t nLen = buf.GetLength();
- if (nLen > 0)
- pStream->WriteString(buf.AsStringView());
- return;
- }
-
- WideStringView wsElement(pNode->GetClassName());
- pStream->WriteString(L"<");
- pStream->WriteString(wsElement);
-
- WideString wsOutput;
- SaveAttribute(pNode, XFA_Attribute::Name, L"name", true, wsOutput);
-
- WideString wsAttrs;
- for (size_t i = 0;; ++i) {
- XFA_Attribute attr = pNode->GetAttribute(i);
- if (attr == XFA_Attribute::Unknown)
- break;
- if (attr == XFA_Attribute::Name)
- continue;
-
- WideString wsAttr;
- SaveAttribute(pNode, attr, CXFA_Node::AttributeToName(attr), false, wsAttr);
- wsOutput += wsAttr;
- }
-
- if (!wsOutput.IsEmpty())
- pStream->WriteString(wsOutput.AsStringView());
-
- CXFA_Node* pChildNode = pNode->GetNodeItem(XFA_NODEITEM_FirstChild);
- if (pChildNode) {
- pStream->WriteString(L"\n>");
- while (pChildNode) {
- RegenerateFormFile_Container(pChildNode, pStream, bSaveXML);
- pChildNode = pChildNode->GetNodeItem(XFA_NODEITEM_NextSibling);
- }
- pStream->WriteString(L"</");
- pStream->WriteString(wsElement);
- pStream->WriteString(L"\n>");
- } else {
- pStream->WriteString(L"\n/>");
- }
-}
-
-} // namespace
-
-void XFA_DataExporter_RegenerateFormFile(
- CXFA_Node* pNode,
- const RetainPtr<CFX_SeekableStreamProxy>& pStream,
- const char* pChecksum,
- bool bSaveXML) {
- if (pNode->IsModelNode()) {
- pStream->WriteString(L"<form");
- if (pChecksum) {
- WideString wsChecksum = WideString::FromUTF8(pChecksum);
- pStream->WriteString(L" checksum=\"");
- pStream->WriteString(wsChecksum.AsStringView());
- pStream->WriteString(L"\"");
- }
- pStream->WriteString(L" xmlns=\"");
- pStream->WriteString(WideStringView(kFormNS));
-
- WideString wsVersionNumber;
- RecognizeXFAVersionNumber(
- ToNode(pNode->GetDocument()->GetXFAObject(XFA_HASHCODE_Template)),
- wsVersionNumber);
- if (wsVersionNumber.IsEmpty())
- wsVersionNumber = L"2.8";
-
- wsVersionNumber += L"/\"\n>";
- pStream->WriteString(wsVersionNumber.AsStringView());
-
- CXFA_Node* pChildNode = pNode->GetNodeItem(XFA_NODEITEM_FirstChild);
- while (pChildNode) {
- RegenerateFormFile_Container(pChildNode, pStream, false);
- pChildNode = pChildNode->GetNodeItem(XFA_NODEITEM_NextSibling);
- }
- pStream->WriteString(L"</form\n>");
- } else {
- RegenerateFormFile_Container(pNode, pStream, bSaveXML);
- }
-}
-
-void XFA_DataExporter_DealWithDataGroupNode(CXFA_Node* pDataNode) {
- if (!pDataNode || pDataNode->GetElementType() == XFA_Element::DataValue)
- return;
-
- int32_t iChildNum = 0;
- for (CXFA_Node* pChildNode = pDataNode->GetNodeItem(XFA_NODEITEM_FirstChild);
- pChildNode;
- pChildNode = pChildNode->GetNodeItem(XFA_NODEITEM_NextSibling)) {
- iChildNum++;
- XFA_DataExporter_DealWithDataGroupNode(pChildNode);
- }
-
- if (pDataNode->GetElementType() != XFA_Element::DataGroup)
- return;
-
- if (iChildNum > 0) {
- CFX_XMLNode* pXMLNode = pDataNode->GetXMLMappingNode();
- ASSERT(pXMLNode->GetType() == FX_XMLNODE_Element);
- CFX_XMLElement* pXMLElement = static_cast<CFX_XMLElement*>(pXMLNode);
- if (pXMLElement->HasAttribute(L"xfa:dataNode"))
- pXMLElement->RemoveAttribute(L"xfa:dataNode");
-
- return;
- }
-
- CFX_XMLNode* pXMLNode = pDataNode->GetXMLMappingNode();
- ASSERT(pXMLNode->GetType() == FX_XMLNODE_Element);
- static_cast<CFX_XMLElement*>(pXMLNode)->SetString(L"xfa:dataNode",
- L"dataGroup");
-}
-
CXFA_DataExporter::CXFA_DataExporter(CXFA_Document* pDocument)
: m_pDocument(pDocument) {
ASSERT(m_pDocument);
@@ -513,7 +71,7 @@ bool CXFA_DataExporter::Export(
break;
}
case XFA_PacketType::Form: {
- XFA_DataExporter_RegenerateFormFile(pNode, pStream, pChecksum);
+ XFA_DataExporter_RegenerateFormFile(pNode, pStream, pChecksum, false);
break;
}
case XFA_PacketType::Template:
diff --git a/xfa/fxfa/parser/cxfa_simple_parser.cpp b/xfa/fxfa/parser/cxfa_simple_parser.cpp
index b3965ed6a2..bd3fdf7af1 100644
--- a/xfa/fxfa/parser/cxfa_simple_parser.cpp
+++ b/xfa/fxfa/parser/cxfa_simple_parser.cpp
@@ -292,6 +292,48 @@ void ConvertXMLToPlainText(CFX_XMLElement* pRootXMLNode, WideString& wsOutput) {
}
}
+WideString GetPlainTextFromRichText(CFX_XMLNode* pXMLNode) {
+ if (!pXMLNode)
+ return L"";
+
+ WideString wsPlainText;
+ switch (pXMLNode->GetType()) {
+ case FX_XMLNODE_Element: {
+ CFX_XMLElement* pXMLElement = static_cast<CFX_XMLElement*>(pXMLNode);
+ WideString wsTag = pXMLElement->GetLocalTagName();
+ uint32_t uTag = FX_HashCode_GetW(wsTag.AsStringView(), true);
+ if (uTag == 0x0001f714) {
+ wsPlainText += L"\n";
+ } else if (uTag == 0x00000070) {
+ if (!wsPlainText.IsEmpty()) {
+ wsPlainText += L"\n";
+ }
+ } else if (uTag == 0xa48ac63) {
+ if (!wsPlainText.IsEmpty() &&
+ wsPlainText[wsPlainText.GetLength() - 1] != '\n') {
+ wsPlainText += L"\n";
+ }
+ }
+ break;
+ }
+ case FX_XMLNODE_Text:
+ case FX_XMLNODE_CharData: {
+ WideString wsContent = static_cast<CFX_XMLText*>(pXMLNode)->GetText();
+ wsPlainText += wsContent;
+ break;
+ }
+ default:
+ break;
+ }
+ for (CFX_XMLNode* pChildXML = pXMLNode->GetNodeItem(CFX_XMLNode::FirstChild);
+ pChildXML;
+ pChildXML = pChildXML->GetNodeItem(CFX_XMLNode::NextSibling)) {
+ wsPlainText += GetPlainTextFromRichText(pChildXML);
+ }
+
+ return wsPlainText;
+}
+
} // namespace
bool XFA_RecognizeRichText(CFX_XMLElement* pRichTextXMLNode) {
@@ -396,8 +438,7 @@ void CXFA_SimpleParser::ConstructXFANode(CXFA_Node* pXFANode,
WideString wsNodeStr = child->GetLocalTagName();
pXFAChild->JSNode()->SetCData(XFA_Attribute::Name, wsNodeStr, false,
false);
- WideString wsChildValue;
- XFA_GetPlainTextFromRichText(child, wsChildValue);
+ WideString wsChildValue = GetPlainTextFromRichText(child);
if (!wsChildValue.IsEmpty())
pXFAChild->JSNode()->SetCData(XFA_Attribute::Value, wsChildValue,
false, false);
@@ -428,36 +469,6 @@ CFX_XMLDoc* CXFA_SimpleParser::GetXMLDoc() const {
return m_pXMLDoc.get();
}
-bool XFA_FDEExtension_ResolveNamespaceQualifier(CFX_XMLElement* pNode,
- const WideString& wsQualifier,
- WideString* wsNamespaceURI) {
- if (!pNode)
- return false;
-
- CFX_XMLNode* pFakeRoot = pNode->GetNodeItem(CFX_XMLNode::Root);
- WideString wsNSAttribute;
- bool bRet = false;
- if (wsQualifier.IsEmpty()) {
- wsNSAttribute = L"xmlns";
- bRet = true;
- } else {
- wsNSAttribute = L"xmlns:" + wsQualifier;
- }
- for (CFX_XMLNode* pParent = pNode; pParent != pFakeRoot;
- pParent = pParent->GetNodeItem(CFX_XMLNode::Parent)) {
- if (pParent->GetType() != FX_XMLNODE_Element)
- continue;
-
- auto* pElement = static_cast<CFX_XMLElement*>(pParent);
- if (pElement->HasAttribute(wsNSAttribute.c_str())) {
- *wsNamespaceURI = pElement->GetString(wsNSAttribute.c_str());
- return true;
- }
- }
- wsNamespaceURI->clear();
- return bRet;
-}
-
CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket(CFX_XMLNode* pXMLDocumentNode,
XFA_PacketType ePacketID) {
switch (ePacketID) {
@@ -958,8 +969,8 @@ void CXFA_SimpleParser::ParseContentNode(CXFA_Node* pXFANode,
break;
if (XFA_RecognizeRichText(static_cast<CFX_XMLElement*>(pXMLChild)))
- XFA_GetPlainTextFromRichText(static_cast<CFX_XMLElement*>(pXMLChild),
- wsValue);
+ wsValue +=
+ GetPlainTextFromRichText(static_cast<CFX_XMLElement*>(pXMLChild));
} else if (element == XFA_Element::Sharpxml) {
if (eNodeType != FX_XMLNODE_Element)
break;
@@ -1139,16 +1150,15 @@ void CXFA_SimpleParser::ParseDataValue(CXFA_Node* pXFANode,
if (eNodeType == FX_XMLNODE_Instruction)
continue;
- WideString wsText;
if (eNodeType == FX_XMLNODE_Text || eNodeType == FX_XMLNODE_CharData) {
- wsText = static_cast<CFX_XMLText*>(pXMLChild)->GetText();
+ WideString wsText = static_cast<CFX_XMLText*>(pXMLChild)->GetText();
if (!pXMLCurValueNode)
pXMLCurValueNode = pXMLChild;
wsCurValueTextBuf << wsText;
} else if (XFA_RecognizeRichText(static_cast<CFX_XMLElement*>(pXMLChild))) {
- XFA_GetPlainTextFromRichText(static_cast<CFX_XMLElement*>(pXMLChild),
- wsText);
+ WideString wsText =
+ GetPlainTextFromRichText(static_cast<CFX_XMLElement*>(pXMLChild));
if (!pXMLCurValueNode)
pXMLCurValueNode = pXMLChild;
diff --git a/xfa/fxfa/parser/xfa_utils.cpp b/xfa/fxfa/parser/xfa_utils.cpp
index dbb9748fbf..4dbed96f25 100644
--- a/xfa/fxfa/parser/xfa_utils.cpp
+++ b/xfa/fxfa/parser/xfa_utils.cpp
@@ -7,7 +7,11 @@
#include "xfa/fxfa/parser/xfa_utils.h"
#include <algorithm>
+#include <vector>
+#include "core/fxcrt/cfx_memorystream.h"
+#include "core/fxcrt/cfx_widetextbuf.h"
+#include "core/fxcrt/fx_codepage.h"
#include "core/fxcrt/fx_extension.h"
#include "core/fxcrt/xml/cfx_xmlchardata.h"
#include "core/fxcrt/xml/cfx_xmlelement.h"
@@ -22,6 +26,8 @@
namespace {
+constexpr const wchar_t kFormNS[] = L"http://www.xfa.org/schema/xfa-form/";
+
const double fraction_scales[] = {0.1,
0.01,
0.001,
@@ -39,6 +45,370 @@ const double fraction_scales[] = {0.1,
0.000000000000001,
0.0000000000000001};
+WideString ExportEncodeAttribute(const WideString& str) {
+ CFX_WideTextBuf textBuf;
+ int32_t iLen = str.GetLength();
+ for (int32_t i = 0; i < iLen; i++) {
+ switch (str[i]) {
+ case '&':
+ textBuf << L"&amp;";
+ break;
+ case '<':
+ textBuf << L"&lt;";
+ break;
+ case '>':
+ textBuf << L"&gt;";
+ break;
+ case '\'':
+ textBuf << L"&apos;";
+ break;
+ case '\"':
+ textBuf << L"&quot;";
+ break;
+ default:
+ textBuf.AppendChar(str[i]);
+ }
+ }
+ return textBuf.MakeString();
+}
+
+bool IsXMLValidChar(wchar_t ch) {
+ return ch == 0x09 || ch == 0x0A || ch == 0x0D ||
+ (ch >= 0x20 && ch <= 0xD7FF) || (ch >= 0xE000 && ch <= 0xFFFD);
+}
+
+WideString ExportEncodeContent(const WideString& str) {
+ CFX_WideTextBuf textBuf;
+ int32_t iLen = str.GetLength();
+ for (int32_t i = 0; i < iLen; i++) {
+ wchar_t ch = str[i];
+ if (!IsXMLValidChar(ch))
+ continue;
+
+ if (ch == '&') {
+ textBuf << L"&amp;";
+ } else if (ch == '<') {
+ textBuf << L"&lt;";
+ } else if (ch == '>') {
+ textBuf << L"&gt;";
+ } else if (ch == '\'') {
+ textBuf << L"&apos;";
+ } else if (ch == '\"') {
+ textBuf << L"&quot;";
+ } else if (ch == ' ') {
+ if (i && str[i - 1] != ' ') {
+ textBuf.AppendChar(' ');
+ } else {
+ textBuf << L"&#x20;";
+ }
+ } else {
+ textBuf.AppendChar(str[i]);
+ }
+ }
+ return textBuf.MakeString();
+}
+
+bool AttributeSaveInDataModel(CXFA_Node* pNode, XFA_Attribute eAttribute) {
+ bool bSaveInDataModel = false;
+ if (pNode->GetElementType() != XFA_Element::Image)
+ return bSaveInDataModel;
+
+ CXFA_Node* pValueNode = pNode->GetNodeItem(XFA_NODEITEM_Parent);
+ if (!pValueNode || pValueNode->GetElementType() != XFA_Element::Value)
+ return bSaveInDataModel;
+
+ CXFA_Node* pFieldNode = pValueNode->GetNodeItem(XFA_NODEITEM_Parent);
+ if (pFieldNode && pFieldNode->GetBindData() &&
+ eAttribute == XFA_Attribute::Href) {
+ bSaveInDataModel = true;
+ }
+ return bSaveInDataModel;
+}
+
+bool ContentNodeNeedtoExport(CXFA_Node* pContentNode) {
+ pdfium::Optional<WideString> wsContent =
+ pContentNode->JSNode()->TryContent(false, false);
+ if (!wsContent)
+ return false;
+
+ ASSERT(pContentNode->IsContentNode());
+ CXFA_Node* pParentNode = pContentNode->GetNodeItem(XFA_NODEITEM_Parent);
+ if (!pParentNode || pParentNode->GetElementType() != XFA_Element::Value)
+ return true;
+
+ CXFA_Node* pGrandParentNode = pParentNode->GetNodeItem(XFA_NODEITEM_Parent);
+ if (!pGrandParentNode || !pGrandParentNode->IsContainerNode())
+ return true;
+ if (pGrandParentNode->GetBindData())
+ return false;
+
+ CXFA_WidgetData* pWidgetData = pGrandParentNode->GetWidgetData();
+ XFA_Element eUIType = pWidgetData->GetUIType();
+ if (eUIType == XFA_Element::PasswordEdit)
+ return false;
+ return true;
+}
+
+void SaveAttribute(CXFA_Node* pNode,
+ XFA_Attribute eName,
+ const WideString& wsName,
+ bool bProto,
+ WideString& wsOutput) {
+ if (!bProto && !pNode->JSNode()->HasAttribute(eName))
+ return;
+
+ pdfium::Optional<WideString> value =
+ pNode->JSNode()->TryAttribute(eName, false);
+ if (!value)
+ return;
+
+ wsOutput += L" ";
+ wsOutput += wsName;
+ wsOutput += L"=\"";
+ wsOutput += ExportEncodeAttribute(*value);
+ wsOutput += L"\"";
+}
+
+void RegenerateFormFile_Changed(CXFA_Node* pNode,
+ CFX_WideTextBuf& buf,
+ bool bSaveXML) {
+ WideString wsAttrs;
+ for (size_t i = 0;; ++i) {
+ XFA_Attribute attr = pNode->GetAttribute(i);
+ if (attr == XFA_Attribute::Unknown)
+ break;
+
+ if (attr == XFA_Attribute::Name ||
+ (AttributeSaveInDataModel(pNode, attr) && !bSaveXML)) {
+ continue;
+ }
+ WideString wsAttr;
+ SaveAttribute(pNode, attr, CXFA_Node::AttributeToName(attr), bSaveXML,
+ wsAttr);
+ wsAttrs += wsAttr;
+ }
+
+ WideString wsChildren;
+ switch (pNode->GetObjectType()) {
+ case XFA_ObjectType::ContentNode: {
+ if (!bSaveXML && !ContentNodeNeedtoExport(pNode))
+ break;
+
+ CXFA_Node* pRawValueNode = pNode->GetNodeItem(XFA_NODEITEM_FirstChild);
+ while (pRawValueNode &&
+ pRawValueNode->GetElementType() != XFA_Element::SharpxHTML &&
+ pRawValueNode->GetElementType() != XFA_Element::Sharptext &&
+ pRawValueNode->GetElementType() != XFA_Element::Sharpxml) {
+ pRawValueNode = pRawValueNode->GetNodeItem(XFA_NODEITEM_NextSibling);
+ }
+ if (!pRawValueNode)
+ break;
+
+ pdfium::Optional<WideString> contentType =
+ pNode->JSNode()->TryAttribute(XFA_Attribute::ContentType, false);
+ if (pRawValueNode->GetElementType() == XFA_Element::SharpxHTML &&
+ (contentType && *contentType == L"text/html")) {
+ CFX_XMLNode* pExDataXML = pNode->GetXMLMappingNode();
+ if (!pExDataXML)
+ break;
+
+ CFX_XMLNode* pRichTextXML =
+ pExDataXML->GetNodeItem(CFX_XMLNode::FirstChild);
+ if (!pRichTextXML)
+ break;
+
+ auto pMemStream = pdfium::MakeRetain<CFX_MemoryStream>(true);
+ auto pTempStream =
+ pdfium::MakeRetain<CFX_SeekableStreamProxy>(pMemStream, true);
+
+ pTempStream->SetCodePage(FX_CODEPAGE_UTF8);
+ pRichTextXML->SaveXMLNode(pTempStream);
+ wsChildren += WideString::FromUTF8(
+ ByteStringView(pMemStream->GetBuffer(), pMemStream->GetSize()));
+ } else if (pRawValueNode->GetElementType() == XFA_Element::Sharpxml &&
+ (contentType && *contentType == L"text/xml")) {
+ pdfium::Optional<WideString> rawValue =
+ pRawValueNode->JSNode()->TryAttribute(XFA_Attribute::Value, false);
+ if (!rawValue || rawValue->IsEmpty())
+ break;
+
+ std::vector<WideString> wsSelTextArray;
+ size_t iStart = 0;
+ auto iEnd = rawValue->Find(L'\n', iStart);
+ iEnd = !iEnd.has_value() ? rawValue->GetLength() : iEnd;
+ while (iEnd.has_value() && iEnd >= iStart) {
+ wsSelTextArray.push_back(
+ rawValue->Mid(iStart, iEnd.value() - iStart));
+ iStart = iEnd.value() + 1;
+ if (iStart >= rawValue->GetLength())
+ break;
+ iEnd = rawValue->Find(L'\n', iStart);
+ }
+
+ CXFA_Node* pParentNode = pNode->GetNodeItem(XFA_NODEITEM_Parent);
+ ASSERT(pParentNode);
+ CXFA_Node* pGrandparentNode =
+ pParentNode->GetNodeItem(XFA_NODEITEM_Parent);
+ ASSERT(pGrandparentNode);
+ WideString bodyTagName;
+ bodyTagName = pGrandparentNode->JSNode()->GetCData(XFA_Attribute::Name);
+ if (bodyTagName.IsEmpty())
+ bodyTagName = L"ListBox1";
+
+ buf << L"<";
+ buf << bodyTagName;
+ buf << L" xmlns=\"\"\n>";
+ for (int32_t i = 0; i < pdfium::CollectionSize<int32_t>(wsSelTextArray);
+ i++) {
+ buf << L"<value\n>";
+ buf << ExportEncodeContent(wsSelTextArray[i]);
+ buf << L"</value\n>";
+ }
+ buf << L"</";
+ buf << bodyTagName;
+ buf << L"\n>";
+ wsChildren += buf.AsStringView();
+ buf.Clear();
+ } else {
+ WideString wsValue =
+ pRawValueNode->JSNode()->GetCData(XFA_Attribute::Value);
+ wsChildren += ExportEncodeContent(wsValue);
+ }
+ break;
+ }
+ case XFA_ObjectType::TextNode:
+ case XFA_ObjectType::NodeC:
+ case XFA_ObjectType::NodeV: {
+ WideString wsValue = pNode->JSNode()->GetCData(XFA_Attribute::Value);
+ wsChildren += ExportEncodeContent(wsValue);
+ break;
+ }
+ default:
+ if (pNode->GetElementType() == XFA_Element::Items) {
+ CXFA_Node* pTemplateNode = pNode->GetTemplateNode();
+ if (!pTemplateNode ||
+ pTemplateNode->CountChildren(XFA_Element::Unknown, false) !=
+ pNode->CountChildren(XFA_Element::Unknown, false)) {
+ bSaveXML = true;
+ }
+ }
+ CFX_WideTextBuf newBuf;
+ CXFA_Node* pChildNode = pNode->GetNodeItem(XFA_NODEITEM_FirstChild);
+ while (pChildNode) {
+ RegenerateFormFile_Changed(pChildNode, newBuf, bSaveXML);
+ wsChildren += newBuf.AsStringView();
+ newBuf.Clear();
+ pChildNode = pChildNode->GetNodeItem(XFA_NODEITEM_NextSibling);
+ }
+ if (!bSaveXML && !wsChildren.IsEmpty() &&
+ pNode->GetElementType() == XFA_Element::Items) {
+ wsChildren.clear();
+ bSaveXML = true;
+ CXFA_Node* pChild = pNode->GetNodeItem(XFA_NODEITEM_FirstChild);
+ while (pChild) {
+ RegenerateFormFile_Changed(pChild, newBuf, bSaveXML);
+ wsChildren += newBuf.AsStringView();
+ newBuf.Clear();
+ pChild = pChild->GetNodeItem(XFA_NODEITEM_NextSibling);
+ }
+ }
+ break;
+ }
+
+ if (!wsChildren.IsEmpty() || !wsAttrs.IsEmpty() ||
+ pNode->JSNode()->HasAttribute(XFA_Attribute::Name)) {
+ WideStringView wsElement = pNode->GetClassName();
+ WideString wsName;
+ SaveAttribute(pNode, XFA_Attribute::Name, L"name", true, wsName);
+ buf << L"<";
+ buf << wsElement;
+ buf << wsName;
+ buf << wsAttrs;
+ if (wsChildren.IsEmpty()) {
+ buf << L"\n/>";
+ } else {
+ buf << L"\n>";
+ buf << wsChildren;
+ buf << L"</";
+ buf << wsElement;
+ buf << L"\n>";
+ }
+ }
+}
+
+void RegenerateFormFile_Container(
+ CXFA_Node* pNode,
+ const RetainPtr<CFX_SeekableStreamProxy>& pStream,
+ bool bSaveXML) {
+ XFA_Element eType = pNode->GetElementType();
+ if (eType == XFA_Element::Field || eType == XFA_Element::Draw ||
+ !pNode->IsContainerNode()) {
+ CFX_WideTextBuf buf;
+ RegenerateFormFile_Changed(pNode, buf, bSaveXML);
+ size_t nLen = buf.GetLength();
+ if (nLen > 0)
+ pStream->WriteString(buf.AsStringView());
+ return;
+ }
+
+ WideStringView wsElement(pNode->GetClassName());
+ pStream->WriteString(L"<");
+ pStream->WriteString(wsElement);
+
+ WideString wsOutput;
+ SaveAttribute(pNode, XFA_Attribute::Name, L"name", true, wsOutput);
+
+ WideString wsAttrs;
+ for (size_t i = 0;; ++i) {
+ XFA_Attribute attr = pNode->GetAttribute(i);
+ if (attr == XFA_Attribute::Unknown)
+ break;
+ if (attr == XFA_Attribute::Name)
+ continue;
+
+ WideString wsAttr;
+ SaveAttribute(pNode, attr, CXFA_Node::AttributeToName(attr), false, wsAttr);
+ wsOutput += wsAttr;
+ }
+
+ if (!wsOutput.IsEmpty())
+ pStream->WriteString(wsOutput.AsStringView());
+
+ CXFA_Node* pChildNode = pNode->GetNodeItem(XFA_NODEITEM_FirstChild);
+ if (pChildNode) {
+ pStream->WriteString(L"\n>");
+ while (pChildNode) {
+ RegenerateFormFile_Container(pChildNode, pStream, bSaveXML);
+ pChildNode = pChildNode->GetNodeItem(XFA_NODEITEM_NextSibling);
+ }
+ pStream->WriteString(L"</");
+ pStream->WriteString(wsElement);
+ pStream->WriteString(L"\n>");
+ } else {
+ pStream->WriteString(L"\n/>");
+ }
+}
+
+void RecognizeXFAVersionNumber(CXFA_Node* pTemplateRoot,
+ WideString& wsVersionNumber) {
+ wsVersionNumber.clear();
+ if (!pTemplateRoot)
+ return;
+
+ pdfium::Optional<WideString> templateNS =
+ pTemplateRoot->JSNode()->TryNamespace();
+ if (!templateNS)
+ return;
+
+ XFA_VERSION eVersion =
+ pTemplateRoot->GetDocument()->RecognizeXFAVersionNumber(*templateNS);
+ if (eVersion == XFA_VERSION_UNKNOWN)
+ eVersion = XFA_VERSION_DEFAULT;
+
+ wsVersionNumber =
+ WideString::Format(L"%i.%i", eVersion / 100, eVersion % 100);
+}
+
} // namespace
double XFA_GetFractionalScale(uint32_t idx) {
@@ -92,43 +462,102 @@ CXFA_LocaleValue XFA_GetLocaleValue(CXFA_WidgetData* pWidgetData) {
return CXFA_LocaleValue(iVTType, pWidgetData->GetRawValue(),
pWidgetData->GetNode()->GetDocument()->GetLocalMgr());
}
-void XFA_GetPlainTextFromRichText(CFX_XMLNode* pXMLNode,
- WideString& wsPlainText) {
- if (!pXMLNode) {
+
+bool XFA_FDEExtension_ResolveNamespaceQualifier(CFX_XMLElement* pNode,
+ const WideString& wsQualifier,
+ WideString* wsNamespaceURI) {
+ if (!pNode)
+ return false;
+
+ CFX_XMLNode* pFakeRoot = pNode->GetNodeItem(CFX_XMLNode::Root);
+ WideString wsNSAttribute;
+ bool bRet = false;
+ if (wsQualifier.IsEmpty()) {
+ wsNSAttribute = L"xmlns";
+ bRet = true;
+ } else {
+ wsNSAttribute = L"xmlns:" + wsQualifier;
+ }
+ for (CFX_XMLNode* pParent = pNode; pParent != pFakeRoot;
+ pParent = pParent->GetNodeItem(CFX_XMLNode::Parent)) {
+ if (pParent->GetType() != FX_XMLNODE_Element)
+ continue;
+
+ auto* pElement = static_cast<CFX_XMLElement*>(pParent);
+ if (pElement->HasAttribute(wsNSAttribute.c_str())) {
+ *wsNamespaceURI = pElement->GetString(wsNSAttribute.c_str());
+ return true;
+ }
+ }
+ wsNamespaceURI->clear();
+ return bRet;
+}
+
+void XFA_DataExporter_DealWithDataGroupNode(CXFA_Node* pDataNode) {
+ if (!pDataNode || pDataNode->GetElementType() == XFA_Element::DataValue)
return;
+
+ int32_t iChildNum = 0;
+ for (CXFA_Node* pChildNode = pDataNode->GetNodeItem(XFA_NODEITEM_FirstChild);
+ pChildNode;
+ pChildNode = pChildNode->GetNodeItem(XFA_NODEITEM_NextSibling)) {
+ iChildNum++;
+ XFA_DataExporter_DealWithDataGroupNode(pChildNode);
}
- switch (pXMLNode->GetType()) {
- case FX_XMLNODE_Element: {
- CFX_XMLElement* pXMLElement = static_cast<CFX_XMLElement*>(pXMLNode);
- WideString wsTag = pXMLElement->GetLocalTagName();
- uint32_t uTag = FX_HashCode_GetW(wsTag.AsStringView(), true);
- if (uTag == 0x0001f714) {
- wsPlainText += L"\n";
- } else if (uTag == 0x00000070) {
- if (!wsPlainText.IsEmpty()) {
- wsPlainText += L"\n";
- }
- } else if (uTag == 0xa48ac63) {
- if (!wsPlainText.IsEmpty() &&
- wsPlainText[wsPlainText.GetLength() - 1] != '\n') {
- wsPlainText += L"\n";
- }
- }
- break;
+
+ if (pDataNode->GetElementType() != XFA_Element::DataGroup)
+ return;
+
+ if (iChildNum > 0) {
+ CFX_XMLNode* pXMLNode = pDataNode->GetXMLMappingNode();
+ ASSERT(pXMLNode->GetType() == FX_XMLNODE_Element);
+ CFX_XMLElement* pXMLElement = static_cast<CFX_XMLElement*>(pXMLNode);
+ if (pXMLElement->HasAttribute(L"xfa:dataNode"))
+ pXMLElement->RemoveAttribute(L"xfa:dataNode");
+
+ return;
+ }
+
+ CFX_XMLNode* pXMLNode = pDataNode->GetXMLMappingNode();
+ ASSERT(pXMLNode->GetType() == FX_XMLNODE_Element);
+ static_cast<CFX_XMLElement*>(pXMLNode)->SetString(L"xfa:dataNode",
+ L"dataGroup");
+}
+
+void XFA_DataExporter_RegenerateFormFile(
+ CXFA_Node* pNode,
+ const RetainPtr<CFX_SeekableStreamProxy>& pStream,
+ const char* pChecksum,
+ bool bSaveXML) {
+ if (pNode->IsModelNode()) {
+ pStream->WriteString(L"<form");
+ if (pChecksum) {
+ WideString wsChecksum = WideString::FromUTF8(pChecksum);
+ pStream->WriteString(L" checksum=\"");
+ pStream->WriteString(wsChecksum.AsStringView());
+ pStream->WriteString(L"\"");
}
- case FX_XMLNODE_Text:
- case FX_XMLNODE_CharData: {
- WideString wsContent = static_cast<CFX_XMLText*>(pXMLNode)->GetText();
- wsPlainText += wsContent;
- break;
+ pStream->WriteString(L" xmlns=\"");
+ pStream->WriteString(WideStringView(kFormNS));
+
+ WideString wsVersionNumber;
+ RecognizeXFAVersionNumber(
+ ToNode(pNode->GetDocument()->GetXFAObject(XFA_HASHCODE_Template)),
+ wsVersionNumber);
+ if (wsVersionNumber.IsEmpty())
+ wsVersionNumber = L"2.8";
+
+ wsVersionNumber += L"/\"\n>";
+ pStream->WriteString(wsVersionNumber.AsStringView());
+
+ CXFA_Node* pChildNode = pNode->GetNodeItem(XFA_NODEITEM_FirstChild);
+ while (pChildNode) {
+ RegenerateFormFile_Container(pChildNode, pStream, false);
+ pChildNode = pChildNode->GetNodeItem(XFA_NODEITEM_NextSibling);
}
- default:
- break;
- }
- for (CFX_XMLNode* pChildXML = pXMLNode->GetNodeItem(CFX_XMLNode::FirstChild);
- pChildXML;
- pChildXML = pChildXML->GetNodeItem(CFX_XMLNode::NextSibling)) {
- XFA_GetPlainTextFromRichText(pChildXML, wsPlainText);
+ pStream->WriteString(L"</form\n>");
+ } else {
+ RegenerateFormFile_Container(pNode, pStream, bSaveXML);
}
}
diff --git a/xfa/fxfa/parser/xfa_utils.h b/xfa/fxfa/parser/xfa_utils.h
index 15b6983f9a..27e6534aab 100644
--- a/xfa/fxfa/parser/xfa_utils.h
+++ b/xfa/fxfa/parser/xfa_utils.h
@@ -27,16 +27,14 @@ CXFA_LocaleValue XFA_GetLocaleValue(CXFA_WidgetData* pWidgetData);
int32_t XFA_MapRotation(int32_t nRotation);
bool XFA_RecognizeRichText(CFX_XMLElement* pRichTextXMLNode);
-void XFA_GetPlainTextFromRichText(CFX_XMLNode* pXMLNode,
- WideString& wsPlainText);
bool XFA_FieldIsMultiListBox(CXFA_Node* pFieldNode);
void XFA_DataExporter_DealWithDataGroupNode(CXFA_Node* pDataNode);
void XFA_DataExporter_RegenerateFormFile(
CXFA_Node* pNode,
const RetainPtr<CFX_SeekableStreamProxy>& pStream,
- const char* pChecksum = nullptr,
- bool bSaveXML = false);
+ const char* pChecksum,
+ bool bSaveXML);
const XFA_SCRIPTATTRIBUTEINFO* XFA_GetScriptAttributeByName(
XFA_Element eElement,