// Copyright 2016 PDFium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "core/fxcrt/xml/cfx_xmlparser.h" #include #include "core/fxcrt/cfx_readonlymemorystream.h" #include "core/fxcrt/fx_codepage.h" #include "core/fxcrt/xml/cfx_xmldocument.h" #include "core/fxcrt/xml/cfx_xmlelement.h" #include "core/fxcrt/xml/cfx_xmlinstruction.h" #include "testing/gtest/include/gtest/gtest.h" #include "testing/test_support.h" #include "third_party/base/ptr_util.h" class CFX_XMLParserTest : public testing::Test { public: std::unique_ptr Parse(pdfium::span input) { CFX_XMLParser parser( pdfium::MakeRetain(pdfium::as_bytes(input))); return parser.Parse(); } }; TEST_F(CFX_XMLParserTest, AttributesMustBeQuoted) { static const char input[] = ""; ASSERT_TRUE(Parse(input) == nullptr); } TEST_F(CFX_XMLParserTest, Attributes) { static const char input[] = ""; std::unique_ptr doc = Parse(input); ASSERT_TRUE(doc != nullptr); CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script"); ASSERT_TRUE(script != nullptr); EXPECT_EQ(L"application/x-javascript", script->GetAttribute(L"contentType")); EXPECT_EQ(L"1", script->GetAttribute(L"display")); } TEST_F(CFX_XMLParserTest, CData) { static const char input[] = ""; static const wchar_t cdata[] = L"\n \n" L" if (a[1] < 3)\n" L" app.alert(\"Tclams\");\n" L" \n"; std::unique_ptr doc = Parse(input); ASSERT_TRUE(doc != nullptr); CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script"); ASSERT_TRUE(script != nullptr); EXPECT_EQ(cdata, script->GetTextData()); } TEST_F(CFX_XMLParserTest, CDataWithInnerScript) { static const char input[] = "\n" " ]]>\n" ""; static const wchar_t cdata[] = L"\n \n" L" if (a[1] < 3)\n" L" app.alert(\"Tclams\");\n" L" \n" L" \n"; std::unique_ptr doc = Parse(input); ASSERT_TRUE(doc != nullptr); CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script"); ASSERT_TRUE(script != nullptr); EXPECT_EQ(cdata, script->GetTextData()); } TEST_F(CFX_XMLParserTest, ArrowBangArrow) { static const char input[] = ""; std::unique_ptr doc = Parse(input); ASSERT_TRUE(doc != nullptr); CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script"); ASSERT_TRUE(script != nullptr); EXPECT_EQ(L"\n \n", script->GetTextData()); } TEST_F(CFX_XMLParserTest, ArrowBangBracketArrow) { static const char input[] = ""; std::unique_ptr doc = Parse(input); ASSERT_TRUE(doc != nullptr); CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script"); ASSERT_TRUE(script != nullptr); EXPECT_EQ(L"\n ", script->GetTextData()); } TEST_F(CFX_XMLParserTest, IncompleteCData) { static const char input[] = ""; std::unique_ptr doc = Parse(input); ASSERT_TRUE(doc != nullptr); CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script"); ASSERT_TRUE(script != nullptr); EXPECT_EQ(L"\n ", script->GetTextData()); } TEST_F(CFX_XMLParserTest, UnClosedCData) { static const char input[] = ""; std::unique_ptr doc = Parse(input); ASSERT_TRUE(doc != nullptr); CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script"); ASSERT_TRUE(script != nullptr); EXPECT_EQ(L"\n ", script->GetTextData()); } TEST_F(CFX_XMLParserTest, EmptyCData) { static const char input[] = ""; std::unique_ptr doc = Parse(input); ASSERT_TRUE(doc != nullptr); CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script"); ASSERT_TRUE(script != nullptr); EXPECT_EQ(L"\n \n", script->GetTextData()); } TEST_F(CFX_XMLParserTest, Comment) { static const char input[] = ""; std::unique_ptr doc = Parse(input); ASSERT_TRUE(doc != nullptr); CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script"); ASSERT_TRUE(script != nullptr); EXPECT_EQ(L"\n \n", script->GetTextData()); } TEST_F(CFX_XMLParserTest, IncorrectCommentStart) { static const char input[] = ""; std::unique_ptr doc = Parse(input); ASSERT_TRUE(doc != nullptr); CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script"); ASSERT_TRUE(script != nullptr); EXPECT_EQ(L"\n \n", script->GetTextData()); } TEST_F(CFX_XMLParserTest, CommentEmpty) { static const char input[] = ""; std::unique_ptr doc = Parse(input); ASSERT_TRUE(doc != nullptr); CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script"); ASSERT_TRUE(script != nullptr); EXPECT_EQ(L"\n \n", script->GetTextData()); } TEST_F(CFX_XMLParserTest, CommentThreeDash) { static const char input[] = ""; std::unique_ptr doc = Parse(input); ASSERT_TRUE(doc != nullptr); CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script"); ASSERT_TRUE(script != nullptr); EXPECT_EQ(L"\n ", script->GetTextData()); } TEST_F(CFX_XMLParserTest, CommentTwoDash) { static const char input[] = ""; std::unique_ptr doc = Parse(input); ASSERT_TRUE(doc != nullptr); CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script"); EXPECT_EQ(L"\n ", script->GetTextData()); } TEST_F(CFX_XMLParserTest, Entities) { static const char input[] = ""; std::unique_ptr doc = Parse(input); ASSERT_TRUE(doc != nullptr); CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script"); ASSERT_TRUE(script != nullptr); EXPECT_EQ(L"BTjH\xab48&<>'\"", script->GetTextData()); } TEST_F(CFX_XMLParserTest, EntityOverflowHex) { static const char input[] = ""; std::unique_ptr doc = Parse(input); ASSERT_TRUE(doc != nullptr); CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script"); ASSERT_TRUE(script != nullptr); EXPECT_EQ(L" ", script->GetTextData()); } TEST_F(CFX_XMLParserTest, EntityOverflowDecimal) { static const char input[] = ""; std::unique_ptr doc = Parse(input); ASSERT_TRUE(doc != nullptr); CFX_XMLElement* script = doc->GetRoot()->GetFirstChildNamed(L"script"); ASSERT_TRUE(script != nullptr); EXPECT_EQ(L" ", script->GetTextData()); } TEST_F(CFX_XMLParserTest, IsXMLNameChar) { EXPECT_FALSE(CFX_XMLParser::IsXMLNameChar(L'-', true)); EXPECT_TRUE(CFX_XMLParser::IsXMLNameChar(L'-', false)); EXPECT_FALSE(CFX_XMLParser::IsXMLNameChar(0x2069, true)); EXPECT_TRUE(CFX_XMLParser::IsXMLNameChar(0x2070, true)); EXPECT_TRUE(CFX_XMLParser::IsXMLNameChar(0x2073, true)); EXPECT_TRUE(CFX_XMLParser::IsXMLNameChar(0x218F, true)); EXPECT_FALSE(CFX_XMLParser::IsXMLNameChar(0x2190, true)); EXPECT_FALSE(CFX_XMLParser::IsXMLNameChar(0xFDEF, true)); EXPECT_TRUE(CFX_XMLParser::IsXMLNameChar(0xFDF0, true)); EXPECT_TRUE(CFX_XMLParser::IsXMLNameChar(0xFDF1, true)); EXPECT_TRUE(CFX_XMLParser::IsXMLNameChar(0xFFFD, true)); EXPECT_FALSE(CFX_XMLParser::IsXMLNameChar(0xFFFE, true)); } TEST_F(CFX_XMLParserTest, BadElementClose) { ASSERT_TRUE(Parse("") == nullptr); } TEST_F(CFX_XMLParserTest, DoubleElementClose) { ASSERT_TRUE(Parse("

") == nullptr); } TEST_F(CFX_XMLParserTest, ParseInstruction) { static const char input[] = "" "
"; std::unique_ptr doc = Parse(input); ASSERT_TRUE(doc != nullptr); CFX_XMLElement* root = doc->GetRoot(); ASSERT_TRUE(root->GetFirstChild() != nullptr); ASSERT_EQ(FX_XMLNODE_Instruction, root->GetFirstChild()->GetType()); CFX_XMLInstruction* instruction = ToXMLInstruction(root->GetFirstChild()); EXPECT_TRUE(instruction->IsOriginalXFAVersion()); } TEST_F(CFX_XMLParserTest, BadEntity) { static const char input[] = ""; ASSERT_TRUE(Parse(input) == nullptr); }