// Copyright 2016 PDFium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "xfa/fde/xml/cfde_xmlsyntaxparser.h" #include #include "testing/gtest/include/gtest/gtest.h" #include "testing/test_support.h" #include "xfa/fgas/crt/cfgas_stream.h" #include "xfa/fgas/crt/fgas_codepage.h" class CFDE_XMLSyntaxParserTest : public pdfium::FPDF_Test {}; TEST_F(CFDE_XMLSyntaxParserTest, CData) { const char* input = ""; const wchar_t* cdata = L"\n" L" if (a[1] < 3)\n" L" app.alert(\"Tclams\");\n" L" "; CFX_RetainPtr stream = pdfium::MakeRetain( reinterpret_cast(const_cast(input)), strlen(input)); stream->SetCodePage(FX_CODEPAGE_UTF8); CFDE_XMLSyntaxParser parser(stream); ASSERT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); ASSERT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); ASSERT_EQ(L"script", parser.GetTagName()); ASSERT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); ASSERT_EQ(L"contentType", parser.GetAttributeName()); ASSERT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); ASSERT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); ASSERT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); ASSERT_EQ(L"\n ", parser.GetTextData()); ASSERT_EQ(FDE_XmlSyntaxResult::CData, parser.DoSyntaxParse()); ASSERT_EQ(cdata, parser.GetTextData()); ASSERT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); ASSERT_EQ(L"\n", parser.GetTextData()); ASSERT_EQ(FDE_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); ASSERT_EQ(L"script", parser.GetTagName()); ASSERT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); } TEST_F(CFDE_XMLSyntaxParserTest, CDataWithInnerScript) { const char* input = "\n" " ]]>\n" ""; const wchar_t* cdata = L"\n" L" if (a[1] < 3)\n" L" app.alert(\"Tclams\");\n" L" \n" L" "; CFX_RetainPtr stream = pdfium::MakeRetain( reinterpret_cast(const_cast(input)), strlen(input)); stream->SetCodePage(FX_CODEPAGE_UTF8); CFDE_XMLSyntaxParser parser(stream); ASSERT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); ASSERT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); ASSERT_EQ(L"script", parser.GetTagName()); ASSERT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); ASSERT_EQ(L"contentType", parser.GetAttributeName()); ASSERT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); ASSERT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); ASSERT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); ASSERT_EQ(L"\n ", parser.GetTextData()); ASSERT_EQ(FDE_XmlSyntaxResult::CData, parser.DoSyntaxParse()); ASSERT_EQ(cdata, parser.GetTextData()); ASSERT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); ASSERT_EQ(L"\n", parser.GetTextData()); ASSERT_EQ(FDE_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); ASSERT_EQ(L"script", parser.GetTagName()); ASSERT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); } TEST_F(CFDE_XMLSyntaxParserTest, ArrowBangArrow) { const char* input = ""; CFX_RetainPtr stream = pdfium::MakeRetain( reinterpret_cast(const_cast(input)), strlen(input)); stream->SetCodePage(FX_CODEPAGE_UTF8); CFDE_XMLSyntaxParser parser(stream); ASSERT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); ASSERT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); ASSERT_EQ(L"script", parser.GetTagName()); ASSERT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); ASSERT_EQ(L"contentType", parser.GetAttributeName()); ASSERT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); ASSERT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); ASSERT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); ASSERT_EQ(L"\n ", parser.GetTextData()); ASSERT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); ASSERT_EQ(L"\n", parser.GetTextData()); ASSERT_EQ(FDE_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); ASSERT_EQ(L"script", parser.GetTagName()); ASSERT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); } TEST_F(CFDE_XMLSyntaxParserTest, ArrowBangBracketArrow) { const char* input = ""; CFX_RetainPtr stream = pdfium::MakeRetain( reinterpret_cast(const_cast(input)), strlen(input)); stream->SetCodePage(FX_CODEPAGE_UTF8); CFDE_XMLSyntaxParser parser(stream); ASSERT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); ASSERT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); ASSERT_EQ(L"script", parser.GetTagName()); ASSERT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); ASSERT_EQ(L"contentType", parser.GetAttributeName()); ASSERT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); ASSERT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); ASSERT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); ASSERT_EQ(L"\n ", parser.GetTextData()); // Parser walks to end of input. ASSERT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); } TEST_F(CFDE_XMLSyntaxParserTest, IncompleteCData) { const char* input = ""; CFX_RetainPtr stream = pdfium::MakeRetain( reinterpret_cast(const_cast(input)), strlen(input)); stream->SetCodePage(FX_CODEPAGE_UTF8); CFDE_XMLSyntaxParser parser(stream); ASSERT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); ASSERT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); ASSERT_EQ(L"script", parser.GetTagName()); ASSERT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); ASSERT_EQ(L"contentType", parser.GetAttributeName()); ASSERT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); ASSERT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); ASSERT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); ASSERT_EQ(L"\n ", parser.GetTextData()); // Parser walks to end of input. ASSERT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); } TEST_F(CFDE_XMLSyntaxParserTest, UnClosedCData) { const char* input = ""; CFX_RetainPtr stream = pdfium::MakeRetain( reinterpret_cast(const_cast(input)), strlen(input)); stream->SetCodePage(FX_CODEPAGE_UTF8); CFDE_XMLSyntaxParser parser(stream); ASSERT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); ASSERT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); ASSERT_EQ(L"script", parser.GetTagName()); ASSERT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); ASSERT_EQ(L"contentType", parser.GetAttributeName()); ASSERT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); ASSERT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); ASSERT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); ASSERT_EQ(L"\n ", parser.GetTextData()); // Parser walks to end of input. ASSERT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); } TEST_F(CFDE_XMLSyntaxParserTest, EmptyCData) { const char* input = ""; CFX_RetainPtr stream = pdfium::MakeRetain( reinterpret_cast(const_cast(input)), strlen(input)); stream->SetCodePage(FX_CODEPAGE_UTF8); CFDE_XMLSyntaxParser parser(stream); ASSERT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); ASSERT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); ASSERT_EQ(L"script", parser.GetTagName()); ASSERT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); ASSERT_EQ(L"contentType", parser.GetAttributeName()); ASSERT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); ASSERT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); ASSERT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); ASSERT_EQ(L"\n ", parser.GetTextData()); ASSERT_EQ(FDE_XmlSyntaxResult::CData, parser.DoSyntaxParse()); ASSERT_EQ(L"", parser.GetTextData()); ASSERT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); ASSERT_EQ(L"\n", parser.GetTextData()); ASSERT_EQ(FDE_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); ASSERT_EQ(L"script", parser.GetTagName()); ASSERT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); } TEST_F(CFDE_XMLSyntaxParserTest, Comment) { const char* input = ""; CFX_RetainPtr stream = pdfium::MakeRetain( reinterpret_cast(const_cast(input)), strlen(input)); stream->SetCodePage(FX_CODEPAGE_UTF8); CFDE_XMLSyntaxParser parser(stream); ASSERT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); ASSERT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); ASSERT_EQ(L"script", parser.GetTagName()); ASSERT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); ASSERT_EQ(L"contentType", parser.GetAttributeName()); ASSERT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); ASSERT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); ASSERT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); ASSERT_EQ(L"\n ", parser.GetTextData()); ASSERT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); ASSERT_EQ(L"\n", parser.GetTextData()); ASSERT_EQ(FDE_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); ASSERT_EQ(L"script", parser.GetTagName()); ASSERT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); } TEST_F(CFDE_XMLSyntaxParserTest, IncorrectCommentStart) { const char* input = ""; CFX_RetainPtr stream = pdfium::MakeRetain( reinterpret_cast(const_cast(input)), strlen(input)); stream->SetCodePage(FX_CODEPAGE_UTF8); CFDE_XMLSyntaxParser parser(stream); ASSERT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); ASSERT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); ASSERT_EQ(L"script", parser.GetTagName()); ASSERT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); ASSERT_EQ(L"contentType", parser.GetAttributeName()); ASSERT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); ASSERT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); ASSERT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); ASSERT_EQ(L"\n ", parser.GetTextData()); ASSERT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); ASSERT_EQ(L"\n", parser.GetTextData()); ASSERT_EQ(FDE_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); ASSERT_EQ(L"script", parser.GetTagName()); ASSERT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); } TEST_F(CFDE_XMLSyntaxParserTest, CommentEmpty) { const char* input = ""; CFX_RetainPtr stream = pdfium::MakeRetain( reinterpret_cast(const_cast(input)), strlen(input)); stream->SetCodePage(FX_CODEPAGE_UTF8); CFDE_XMLSyntaxParser parser(stream); ASSERT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); ASSERT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); ASSERT_EQ(L"script", parser.GetTagName()); ASSERT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); ASSERT_EQ(L"contentType", parser.GetAttributeName()); ASSERT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); ASSERT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); ASSERT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); ASSERT_EQ(L"\n ", parser.GetTextData()); ASSERT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); ASSERT_EQ(L"\n", parser.GetTextData()); ASSERT_EQ(FDE_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); ASSERT_EQ(L"script", parser.GetTagName()); ASSERT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); } TEST_F(CFDE_XMLSyntaxParserTest, CommentThreeDash) { const char* input = ""; CFX_RetainPtr stream = pdfium::MakeRetain( reinterpret_cast(const_cast(input)), strlen(input)); stream->SetCodePage(FX_CODEPAGE_UTF8); CFDE_XMLSyntaxParser parser(stream); ASSERT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); ASSERT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); ASSERT_EQ(L"script", parser.GetTagName()); ASSERT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); ASSERT_EQ(L"contentType", parser.GetAttributeName()); ASSERT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); ASSERT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); ASSERT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); ASSERT_EQ(L"\n ", parser.GetTextData()); ASSERT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); } TEST_F(CFDE_XMLSyntaxParserTest, CommentTwoDash) { const char* input = ""; CFX_RetainPtr stream = pdfium::MakeRetain( reinterpret_cast(const_cast(input)), strlen(input)); stream->SetCodePage(FX_CODEPAGE_UTF8); CFDE_XMLSyntaxParser parser(stream); ASSERT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); ASSERT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); ASSERT_EQ(L"script", parser.GetTagName()); ASSERT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); ASSERT_EQ(L"contentType", parser.GetAttributeName()); ASSERT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); ASSERT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); ASSERT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); ASSERT_EQ(L"\n ", parser.GetTextData()); ASSERT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); } TEST_F(CFDE_XMLSyntaxParserTest, Entities) { const char* input = ""; CFX_RetainPtr stream = pdfium::MakeRetain( reinterpret_cast(const_cast(input)), strlen(input)); stream->SetCodePage(FX_CODEPAGE_UTF8); CFDE_XMLSyntaxParser parser(stream); ASSERT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); ASSERT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); ASSERT_EQ(L"script", parser.GetTagName()); ASSERT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); ASSERT_EQ(L"contentType", parser.GetAttributeName()); ASSERT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); ASSERT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); ASSERT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); ASSERT_EQ(L"BTH\xab48", parser.GetTextData()); ASSERT_EQ(FDE_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); ASSERT_EQ(L"script", parser.GetTagName()); ASSERT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); } TEST_F(CFDE_XMLSyntaxParserTest, EntityOverflowHex) { const char* input = ""; CFX_RetainPtr stream = pdfium::MakeRetain( reinterpret_cast(const_cast(input)), strlen(input)); stream->SetCodePage(FX_CODEPAGE_UTF8); CFDE_XMLSyntaxParser parser(stream); ASSERT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); ASSERT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); ASSERT_EQ(L"script", parser.GetTagName()); ASSERT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); ASSERT_EQ(L"contentType", parser.GetAttributeName()); ASSERT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); ASSERT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); ASSERT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); ASSERT_EQ(L" ", parser.GetTextData()); ASSERT_EQ(FDE_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); ASSERT_EQ(L"script", parser.GetTagName()); ASSERT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); } TEST_F(CFDE_XMLSyntaxParserTest, EntityOverflowDecimal) { const char* input = ""; CFX_RetainPtr stream = pdfium::MakeRetain( reinterpret_cast(const_cast(input)), strlen(input)); stream->SetCodePage(FX_CODEPAGE_UTF8); CFDE_XMLSyntaxParser parser(stream); ASSERT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); ASSERT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); ASSERT_EQ(L"script", parser.GetTagName()); ASSERT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); ASSERT_EQ(L"contentType", parser.GetAttributeName()); ASSERT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); ASSERT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); ASSERT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); ASSERT_EQ(L" ", parser.GetTextData()); ASSERT_EQ(FDE_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); ASSERT_EQ(L"script", parser.GetTagName()); ASSERT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); }