From ba367068887aca9f700289aa1b8c198920ca39a2 Mon Sep 17 00:00:00 2001 From: Dan Sinclair Date: Tue, 1 May 2018 17:02:54 +0000 Subject: Fixes XML Instruction handling in CXFA_DocumentParser. This CL fixes issues with handling XML instructions in CXFA_DocumentParser. Unittests were added to verify the behaviour. Change-Id: Iff8d51d0e6d411419473c9b2c32c700d4bbf86f5 Reviewed-on: https://pdfium-review.googlesource.com/31810 Commit-Queue: dsinclair Reviewed-by: Ryan Harrison Reviewed-by: Henrique Nakashima --- BUILD.gn | 1 + core/fxcrt/xml/cfx_xmlparser.cpp | 22 ++-- xfa/fxfa/parser/cxfa_document.cpp | 14 ++- xfa/fxfa/parser/cxfa_document_parser.cpp | 3 +- xfa/fxfa/parser/cxfa_document_parser_unittest.cpp | 127 ++++++++++++++++++++++ 5 files changed, 152 insertions(+), 15 deletions(-) create mode 100644 xfa/fxfa/parser/cxfa_document_parser_unittest.cpp diff --git a/BUILD.gn b/BUILD.gn index bd6bf43c25..61ffe92195 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -2932,6 +2932,7 @@ test("pdfium_unittests") { "xfa/fxfa/fm2js/cxfa_fmlexer_unittest.cpp", "xfa/fxfa/fm2js/cxfa_fmparser_unittest.cpp", "xfa/fxfa/fm2js/cxfa_fmsimpleexpression_unittest.cpp", + "xfa/fxfa/parser/cxfa_document_parser_unittest.cpp", "xfa/fxfa/parser/cxfa_localevalue_unittest.cpp", "xfa/fxfa/parser/cxfa_node_unittest.cpp", "xfa/fxfa/parser/cxfa_nodeiteratortemplate_unittest.cpp", diff --git a/core/fxcrt/xml/cfx_xmlparser.cpp b/core/fxcrt/xml/cfx_xmlparser.cpp index cbfb949705..c7a81afc16 100644 --- a/core/fxcrt/xml/cfx_xmlparser.cpp +++ b/core/fxcrt/xml/cfx_xmlparser.cpp @@ -175,8 +175,6 @@ bool CFX_XMLParser::Parse() { auto* instruction = static_cast(m_pChild); if (!target_data.IsEmpty()) instruction->AppendData(target_data); - if (!GetTextData().IsEmpty()) - instruction->AppendData(GetTextData()); } break; } @@ -248,18 +246,27 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() { } break; case FDE_XmlSyntaxState::Target: - case FDE_XmlSyntaxState::Tag: if (!IsXMLNameChar(ch, current_text_.empty())) { if (current_text_.empty()) { m_syntaxParserResult = FX_XmlSyntaxResult::Error; return m_syntaxParserResult; } - if (m_syntaxParserState != FDE_XmlSyntaxState::Target) - syntaxParserResult = FX_XmlSyntaxResult::TagName; - else - syntaxParserResult = FX_XmlSyntaxResult::TargetName; + syntaxParserResult = FX_XmlSyntaxResult::TargetName; + m_syntaxParserState = FDE_XmlSyntaxState::TargetData; + } else { + current_text_.push_back(ch); + m_Start++; + } + break; + case FDE_XmlSyntaxState::Tag: + if (!IsXMLNameChar(ch, current_text_.empty())) { + if (current_text_.empty()) { + m_syntaxParserResult = FX_XmlSyntaxResult::Error; + return m_syntaxParserResult; + } + syntaxParserResult = FX_XmlSyntaxResult::TagName; m_syntaxParserState = FDE_XmlSyntaxState::AttriName; } else { current_text_.push_back(ch); @@ -486,7 +493,6 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() { break; } if (m_wQuotationMark == 0) { - m_wQuotationMark = 0; m_Start++; syntaxParserResult = FX_XmlSyntaxResult::TargetData; break; diff --git a/xfa/fxfa/parser/cxfa_document.cpp b/xfa/fxfa/parser/cxfa_document.cpp index 6860c83376..9e5143c5a4 100644 --- a/xfa/fxfa/parser/cxfa_document.cpp +++ b/xfa/fxfa/parser/cxfa_document.cpp @@ -1447,17 +1447,19 @@ CFXJSE_Engine* CXFA_Document::GetScriptContext() const { XFA_VERSION CXFA_Document::RecognizeXFAVersionNumber( const WideString& wsTemplateNS) { WideStringView wsTemplateURIPrefix(kTemplateNS); - size_t nPrefixLength = wsTemplateURIPrefix.GetLength(); - if (WideStringView(wsTemplateNS.c_str(), wsTemplateNS.GetLength()) != - wsTemplateURIPrefix) { + if (wsTemplateNS.GetLength() <= wsTemplateURIPrefix.GetLength()) return XFA_VERSION_UNKNOWN; - } - auto nDotPos = wsTemplateNS.Find('.', nPrefixLength); + + size_t prefixLength = wsTemplateURIPrefix.GetLength(); + if (WideStringView(wsTemplateNS.c_str(), prefixLength) != wsTemplateURIPrefix) + return XFA_VERSION_UNKNOWN; + + auto nDotPos = wsTemplateNS.Find('.', prefixLength); if (!nDotPos.has_value()) return XFA_VERSION_UNKNOWN; int8_t iMajor = FXSYS_wtoi( - wsTemplateNS.Mid(nPrefixLength, nDotPos.value() - nPrefixLength).c_str()); + wsTemplateNS.Mid(prefixLength, nDotPos.value() - prefixLength).c_str()); int8_t iMinor = FXSYS_wtoi(wsTemplateNS .Mid(nDotPos.value() + 1, diff --git a/xfa/fxfa/parser/cxfa_document_parser.cpp b/xfa/fxfa/parser/cxfa_document_parser.cpp index f773a36897..fe246d2300 100644 --- a/xfa/fxfa/parser/cxfa_document_parser.cpp +++ b/xfa/fxfa/parser/cxfa_document_parser.cpp @@ -325,7 +325,7 @@ bool XFA_RecognizeRichText(CFX_XMLElement* pRichTextXMLNode) { CXFA_DocumentParser::CXFA_DocumentParser(CXFA_Document* pFactory) : m_pFactory(pFactory) {} -CXFA_DocumentParser::~CXFA_DocumentParser() {} +CXFA_DocumentParser::~CXFA_DocumentParser() = default; bool CXFA_DocumentParser::Parse(const RetainPtr& pStream, XFA_PacketType ePacketID) { @@ -1137,6 +1137,7 @@ void CXFA_DocumentParser::ParseInstruction(CXFA_Node* pXFANode, CFX_XMLInstruction* pXMLInstruction, XFA_PacketType ePacketID) { const std::vector& target_data = pXMLInstruction->GetTargetData(); + if (pXMLInstruction->IsOriginalXFAVersion()) { if (target_data.size() > 1 && (pXFANode->GetDocument()->RecognizeXFAVersionNumber(target_data[0]) != diff --git a/xfa/fxfa/parser/cxfa_document_parser_unittest.cpp b/xfa/fxfa/parser/cxfa_document_parser_unittest.cpp new file mode 100644 index 0000000000..9f68fc143b --- /dev/null +++ b/xfa/fxfa/parser/cxfa_document_parser_unittest.cpp @@ -0,0 +1,127 @@ +// Copyright 2018 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "xfa/fxfa/parser/cxfa_document_parser.h" +#include "core/fxcrt/cfx_memorystream.h" +#include "testing/gtest/include/gtest/gtest.h" +#include "testing/test_support.h" +#include "xfa/fxfa/parser/cxfa_document.h" + +class CXFA_DocumentParserTest : public testing::Test { + public: + void SetUp() override { + doc_ = pdfium::MakeUnique(nullptr); + parser_ = pdfium::MakeUnique(doc_.get()); + } + + void TearDown() override { + // Hold the XML tree until we cleanup the document. + std::unique_ptr root = parser_->GetXMLRoot(); + parser_ = nullptr; + doc_ = nullptr; + } + + CXFA_Document* GetDoc() const { return doc_.get(); } + CXFA_DocumentParser* GetParser() const { return parser_.get(); } + + private: + std::unique_ptr doc_; + std::unique_ptr parser_; +}; + +TEST_F(CXFA_DocumentParserTest, XMLInstructionScriptOff) { + const char* input = + "\n" + "\n" + ""; + EXPECT_FALSE(GetDoc()->HasFlag(XFA_DOCFLAG_Scripting)); + + auto stream = pdfium::MakeRetain( + reinterpret_cast(const_cast(input)), strlen(input), + false); + ASSERT_TRUE(GetParser()->Parse(stream, XFA_PacketType::Config)); + + CXFA_Node* root = GetParser()->GetRootNode(); + ASSERT_TRUE(root != nullptr); + EXPECT_FALSE(GetDoc()->HasFlag(XFA_DOCFLAG_Scripting)); +} + +TEST_F(CXFA_DocumentParserTest, XMLInstructionsScriptOn) { + const char* input = + "\n" + "\n" + ""; + + EXPECT_FALSE(GetDoc()->HasFlag(XFA_DOCFLAG_Scripting)); + + auto stream = pdfium::MakeRetain( + reinterpret_cast(const_cast(input)), strlen(input), + false); + ASSERT_TRUE(GetParser()->Parse(stream, XFA_PacketType::Config)); + + CXFA_Node* root = GetParser()->GetRootNode(); + ASSERT_TRUE(root != nullptr); + EXPECT_TRUE(GetDoc()->HasFlag(XFA_DOCFLAG_Scripting)); +} + +TEST_F(CXFA_DocumentParserTest, XMLInstructionsStrictScope) { + const char* input = + "" + "\n" + ""; + + EXPECT_FALSE(GetDoc()->HasFlag(XFA_DOCFLAG_StrictScoping)); + + auto stream = pdfium::MakeRetain( + reinterpret_cast(const_cast(input)), strlen(input), + false); + ASSERT_TRUE(GetParser()->Parse(stream, XFA_PacketType::Config)); + + CXFA_Node* root = GetParser()->GetRootNode(); + ASSERT_TRUE(root != nullptr); + EXPECT_TRUE(GetDoc()->HasFlag(XFA_DOCFLAG_StrictScoping)); +} + +TEST_F(CXFA_DocumentParserTest, XMLInstructionsStrictScopeBad) { + const char* input = + "" + "\n" + ""; + + EXPECT_FALSE(GetDoc()->HasFlag(XFA_DOCFLAG_StrictScoping)); + + auto stream = pdfium::MakeRetain( + reinterpret_cast(const_cast(input)), strlen(input), + false); + ASSERT_TRUE(GetParser()->Parse(stream, XFA_PacketType::Config)); + + CXFA_Node* root = GetParser()->GetRootNode(); + ASSERT_TRUE(root != nullptr); + EXPECT_FALSE(GetDoc()->HasFlag(XFA_DOCFLAG_StrictScoping)); +} + +TEST_F(CXFA_DocumentParserTest, MultipleXMLInstructions) { + const char* input = + "" + "\n" + "\n" + ""; + + EXPECT_FALSE(GetDoc()->HasFlag(XFA_DOCFLAG_Scripting)); + EXPECT_FALSE(GetDoc()->HasFlag(XFA_DOCFLAG_StrictScoping)); + + auto stream = pdfium::MakeRetain( + reinterpret_cast(const_cast(input)), strlen(input), + false); + ASSERT_TRUE(GetParser()->Parse(stream, XFA_PacketType::Config)); + + CXFA_Node* root = GetParser()->GetRootNode(); + ASSERT_TRUE(root != nullptr); + + EXPECT_TRUE(GetDoc()->HasFlag(XFA_DOCFLAG_Scripting)); + EXPECT_TRUE(GetDoc()->HasFlag(XFA_DOCFLAG_StrictScoping)); +} -- cgit v1.2.3