diff options
author | Dan Sinclair <dsinclair@chromium.org> | 2018-04-12 13:15:39 +0000 |
---|---|---|
committer | Chromium commit bot <commit-bot@chromium.org> | 2018-04-12 13:15:39 +0000 |
commit | 332139df2c3c0826069fa61bcd436309fcdf5a6f (patch) | |
tree | 404ec9e20810029fc132ea0c7cdcbd2eb39aa558 /core/fxcrt/xml/cfx_xmlparser_unittest.cpp | |
parent | 6d503b875e6f75f0d8b5f29fcf811a89f12ad12d (diff) | |
download | pdfium-332139df2c3c0826069fa61bcd436309fcdf5a6f.tar.xz |
Merge CFX_XMLParser and CFX_XMLSyntaxParser
The CFX_XMLParser was a wrapper around the CFX_XMLSyntaxParser. This CL
merges the SyntaxParser into protected/private methods if the XMLParser.
Change-Id: If1519b5de55866ed14359dffd64dc12c36ee0244
Reviewed-on: https://pdfium-review.googlesource.com/30171
Reviewed-by: Ryan Harrison <rharrison@chromium.org>
Commit-Queue: dsinclair <dsinclair@chromium.org>
Diffstat (limited to 'core/fxcrt/xml/cfx_xmlparser_unittest.cpp')
-rw-r--r-- | core/fxcrt/xml/cfx_xmlparser_unittest.cpp | 580 |
1 files changed, 580 insertions, 0 deletions
diff --git a/core/fxcrt/xml/cfx_xmlparser_unittest.cpp b/core/fxcrt/xml/cfx_xmlparser_unittest.cpp new file mode 100644 index 0000000000..d22925f797 --- /dev/null +++ b/core/fxcrt/xml/cfx_xmlparser_unittest.cpp @@ -0,0 +1,580 @@ +// Copyright 2016 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "core/fxcrt/xml/cfx_xmlparser.h" + +#include <memory> + +#include "core/fxcrt/cfx_seekablestreamproxy.h" +#include "core/fxcrt/fx_codepage.h" +#include "testing/gtest/include/gtest/gtest.h" +#include "testing/test_support.h" + +class CFX_XMLTestParser : public CFX_XMLParser { + public: + CFX_XMLTestParser(CFX_XMLNode* pParent, + const RetainPtr<CFX_SeekableStreamProxy>& pStream) + : CFX_XMLParser(pParent, pStream) {} + + ~CFX_XMLTestParser() override = default; + + FX_XmlSyntaxResult DoSyntaxParse() { return CFX_XMLParser::DoSyntaxParse(); } + + WideString GetTagName() const { return CFX_XMLParser::GetTagName(); } + + WideString GetAttributeName() const { + return CFX_XMLParser::GetAttributeName(); + } + + WideString GetAttributeValue() const { + return CFX_XMLParser::GetAttributeValue(); + } + + WideString GetTextData() const { return CFX_XMLParser::GetTextData(); } +}; + +TEST(CFX_XMLParserTest, CData) { + const char* input = + "<script contentType=\"application/x-javascript\">\n" + " <![CDATA[\n" + " if (a[1] < 3)\n" + " app.alert(\"Tclams\");\n" + " ]]>\n" + "</script>"; + + const wchar_t* cdata = + L"\n" + L" if (a[1] < 3)\n" + L" app.alert(\"Tclams\");\n" + L" "; + + RetainPtr<CFX_SeekableStreamProxy> stream = + pdfium::MakeRetain<CFX_SeekableStreamProxy>( + reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input)); + stream->SetCodePage(FX_CODEPAGE_UTF8); + + auto root = pdfium::MakeUnique<CFX_XMLNode>(); + CFX_XMLTestParser parser(root.get(), stream); + ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); + ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); + ASSERT_EQ(L"script", parser.GetTagName()); + + ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); + ASSERT_EQ(L"contentType", parser.GetAttributeName()); + ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); + ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); + + ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); + ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse()); + ASSERT_EQ(L"\n ", parser.GetTextData()); + + ASSERT_EQ(FX_XmlSyntaxResult::CData, parser.DoSyntaxParse()); + ASSERT_EQ(cdata, parser.GetTextData()); + + ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse()); + ASSERT_EQ(L"\n", parser.GetTextData()); + + ASSERT_EQ(FX_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); + ASSERT_EQ(L"script", parser.GetTagName()); + + ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); +} + +TEST(CFX_XMLParserTest, CDataWithInnerScript) { + const char* input = + "<script contentType=\"application/x-javascript\">\n" + " <![CDATA[\n" + " if (a[1] < 3)\n" + " app.alert(\"Tclams\");\n" + " </script>\n" + " ]]>\n" + "</script>"; + + const wchar_t* cdata = + L"\n" + L" if (a[1] < 3)\n" + L" app.alert(\"Tclams\");\n" + L" </script>\n" + L" "; + + RetainPtr<CFX_SeekableStreamProxy> stream = + pdfium::MakeRetain<CFX_SeekableStreamProxy>( + reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input)); + stream->SetCodePage(FX_CODEPAGE_UTF8); + + auto root = pdfium::MakeUnique<CFX_XMLNode>(); + CFX_XMLTestParser parser(root.get(), stream); + ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); + ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); + ASSERT_EQ(L"script", parser.GetTagName()); + + ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); + ASSERT_EQ(L"contentType", parser.GetAttributeName()); + ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); + ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); + + ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); + ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse()); + ASSERT_EQ(L"\n ", parser.GetTextData()); + + ASSERT_EQ(FX_XmlSyntaxResult::CData, parser.DoSyntaxParse()); + ASSERT_EQ(cdata, parser.GetTextData()); + + ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse()); + ASSERT_EQ(L"\n", parser.GetTextData()); + + ASSERT_EQ(FX_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); + ASSERT_EQ(L"script", parser.GetTagName()); + + ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); +} + +TEST(CFX_XMLParserTest, ArrowBangArrow) { + const char* input = + "<script contentType=\"application/x-javascript\">\n" + " <!>\n" + "</script>"; + + RetainPtr<CFX_SeekableStreamProxy> stream = + pdfium::MakeRetain<CFX_SeekableStreamProxy>( + reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input)); + stream->SetCodePage(FX_CODEPAGE_UTF8); + + auto root = pdfium::MakeUnique<CFX_XMLNode>(); + CFX_XMLTestParser parser(root.get(), stream); + ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); + ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); + + ASSERT_EQ(L"script", parser.GetTagName()); + + ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); + ASSERT_EQ(L"contentType", parser.GetAttributeName()); + ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); + ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); + + ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); + ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse()); + ASSERT_EQ(L"\n ", parser.GetTextData()); + + ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse()); + ASSERT_EQ(L"\n", parser.GetTextData()); + + ASSERT_EQ(FX_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); + ASSERT_EQ(L"script", parser.GetTagName()); + + ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); +} + +TEST(CFX_XMLParserTest, ArrowBangBracketArrow) { + const char* input = + "<script contentType=\"application/x-javascript\">\n" + " <![>\n" + "</script>"; + + RetainPtr<CFX_SeekableStreamProxy> stream = + pdfium::MakeRetain<CFX_SeekableStreamProxy>( + reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input)); + stream->SetCodePage(FX_CODEPAGE_UTF8); + + auto root = pdfium::MakeUnique<CFX_XMLNode>(); + CFX_XMLTestParser parser(root.get(), stream); + ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); + ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); + ASSERT_EQ(L"script", parser.GetTagName()); + + ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); + ASSERT_EQ(L"contentType", parser.GetAttributeName()); + ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); + ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); + + ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); + ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse()); + ASSERT_EQ(L"\n ", parser.GetTextData()); + + // Parser walks to end of input. + + ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); +} + +TEST(CFX_XMLParserTest, IncompleteCData) { + const char* input = + "<script contentType=\"application/x-javascript\">\n" + " <![CDATA>\n" + "</script>"; + + RetainPtr<CFX_SeekableStreamProxy> stream = + pdfium::MakeRetain<CFX_SeekableStreamProxy>( + reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input)); + stream->SetCodePage(FX_CODEPAGE_UTF8); + + auto root = pdfium::MakeUnique<CFX_XMLNode>(); + CFX_XMLTestParser parser(root.get(), stream); + ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); + ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); + ASSERT_EQ(L"script", parser.GetTagName()); + + ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); + ASSERT_EQ(L"contentType", parser.GetAttributeName()); + ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); + ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); + + ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); + ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse()); + ASSERT_EQ(L"\n ", parser.GetTextData()); + + // Parser walks to end of input. + + ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); +} + +TEST(CFX_XMLParserTest, UnClosedCData) { + const char* input = + "<script contentType=\"application/x-javascript\">\n" + " <![CDATA[\n" + "</script>"; + + RetainPtr<CFX_SeekableStreamProxy> stream = + pdfium::MakeRetain<CFX_SeekableStreamProxy>( + reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input)); + stream->SetCodePage(FX_CODEPAGE_UTF8); + + auto root = pdfium::MakeUnique<CFX_XMLNode>(); + CFX_XMLTestParser parser(root.get(), stream); + ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); + ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); + ASSERT_EQ(L"script", parser.GetTagName()); + + ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); + ASSERT_EQ(L"contentType", parser.GetAttributeName()); + ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); + ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); + + ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); + ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse()); + ASSERT_EQ(L"\n ", parser.GetTextData()); + + // Parser walks to end of input. + + ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); +} + +TEST(CFX_XMLParserTest, EmptyCData) { + const char* input = + "<script contentType=\"application/x-javascript\">\n" + " <![CDATA[]]>\n" + "</script>"; + + RetainPtr<CFX_SeekableStreamProxy> stream = + pdfium::MakeRetain<CFX_SeekableStreamProxy>( + reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input)); + stream->SetCodePage(FX_CODEPAGE_UTF8); + + auto root = pdfium::MakeUnique<CFX_XMLNode>(); + CFX_XMLTestParser parser(root.get(), stream); + ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); + ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); + ASSERT_EQ(L"script", parser.GetTagName()); + + ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); + ASSERT_EQ(L"contentType", parser.GetAttributeName()); + ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); + ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); + + ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); + ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse()); + ASSERT_EQ(L"\n ", parser.GetTextData()); + + ASSERT_EQ(FX_XmlSyntaxResult::CData, parser.DoSyntaxParse()); + ASSERT_EQ(L"", parser.GetTextData()); + + ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse()); + ASSERT_EQ(L"\n", parser.GetTextData()); + + ASSERT_EQ(FX_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); + ASSERT_EQ(L"script", parser.GetTagName()); + + ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); +} + +TEST(CFX_XMLParserTest, Comment) { + const char* input = + "<script contentType=\"application/x-javascript\">\n" + " <!-- A Comment -->\n" + "</script>"; + + RetainPtr<CFX_SeekableStreamProxy> stream = + pdfium::MakeRetain<CFX_SeekableStreamProxy>( + reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input)); + stream->SetCodePage(FX_CODEPAGE_UTF8); + + auto root = pdfium::MakeUnique<CFX_XMLNode>(); + CFX_XMLTestParser parser(root.get(), stream); + ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); + ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); + ASSERT_EQ(L"script", parser.GetTagName()); + + ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); + ASSERT_EQ(L"contentType", parser.GetAttributeName()); + ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); + ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); + + ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); + ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse()); + ASSERT_EQ(L"\n ", parser.GetTextData()); + + ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse()); + ASSERT_EQ(L"\n", parser.GetTextData()); + + ASSERT_EQ(FX_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); + ASSERT_EQ(L"script", parser.GetTagName()); + + ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); +} + +TEST(CFX_XMLParserTest, IncorrectCommentStart) { + const char* input = + "<script contentType=\"application/x-javascript\">\n" + " <!- A Comment -->\n" + "</script>"; + + RetainPtr<CFX_SeekableStreamProxy> stream = + pdfium::MakeRetain<CFX_SeekableStreamProxy>( + reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input)); + stream->SetCodePage(FX_CODEPAGE_UTF8); + + auto root = pdfium::MakeUnique<CFX_XMLNode>(); + CFX_XMLTestParser parser(root.get(), stream); + ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); + ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); + ASSERT_EQ(L"script", parser.GetTagName()); + + ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); + ASSERT_EQ(L"contentType", parser.GetAttributeName()); + ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); + ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); + + ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); + ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse()); + ASSERT_EQ(L"\n ", parser.GetTextData()); + + ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse()); + ASSERT_EQ(L"\n", parser.GetTextData()); + + ASSERT_EQ(FX_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); + ASSERT_EQ(L"script", parser.GetTagName()); + + ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); +} + +TEST(CFX_XMLParserTest, CommentEmpty) { + const char* input = + "<script contentType=\"application/x-javascript\">\n" + " <!---->\n" + "</script>"; + + RetainPtr<CFX_SeekableStreamProxy> stream = + pdfium::MakeRetain<CFX_SeekableStreamProxy>( + reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input)); + stream->SetCodePage(FX_CODEPAGE_UTF8); + + auto root = pdfium::MakeUnique<CFX_XMLNode>(); + CFX_XMLTestParser parser(root.get(), stream); + ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); + ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); + ASSERT_EQ(L"script", parser.GetTagName()); + + ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); + ASSERT_EQ(L"contentType", parser.GetAttributeName()); + ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); + ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); + + ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); + ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse()); + ASSERT_EQ(L"\n ", parser.GetTextData()); + + ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse()); + ASSERT_EQ(L"\n", parser.GetTextData()); + + ASSERT_EQ(FX_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); + ASSERT_EQ(L"script", parser.GetTagName()); + + ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); +} + +TEST(CFX_XMLParserTest, CommentThreeDash) { + const char* input = + "<script contentType=\"application/x-javascript\">\n" + " <!--->\n" + "</script>"; + + RetainPtr<CFX_SeekableStreamProxy> stream = + pdfium::MakeRetain<CFX_SeekableStreamProxy>( + reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input)); + stream->SetCodePage(FX_CODEPAGE_UTF8); + + auto root = pdfium::MakeUnique<CFX_XMLNode>(); + CFX_XMLTestParser parser(root.get(), stream); + ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); + ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); + ASSERT_EQ(L"script", parser.GetTagName()); + + ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); + ASSERT_EQ(L"contentType", parser.GetAttributeName()); + ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); + ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); + + ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); + ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse()); + ASSERT_EQ(L"\n ", parser.GetTextData()); + + ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); +} + +TEST(CFX_XMLParserTest, CommentTwoDash) { + const char* input = + "<script contentType=\"application/x-javascript\">\n" + " <!-->\n" + "</script>"; + + RetainPtr<CFX_SeekableStreamProxy> stream = + pdfium::MakeRetain<CFX_SeekableStreamProxy>( + reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input)); + stream->SetCodePage(FX_CODEPAGE_UTF8); + + auto root = pdfium::MakeUnique<CFX_XMLNode>(); + CFX_XMLTestParser parser(root.get(), stream); + ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); + ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); + ASSERT_EQ(L"script", parser.GetTagName()); + + ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); + ASSERT_EQ(L"contentType", parser.GetAttributeName()); + ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); + ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); + + ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); + ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse()); + ASSERT_EQ(L"\n ", parser.GetTextData()); + + ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); +} + +TEST(CFX_XMLParserTest, Entities) { + const char* input = + "<script contentType=\"application/x-javascript\">" + "B" + "T" + "H" + "ꭈ" + "�" + "</script>"; + + RetainPtr<CFX_SeekableStreamProxy> stream = + pdfium::MakeRetain<CFX_SeekableStreamProxy>( + reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input)); + stream->SetCodePage(FX_CODEPAGE_UTF8); + + auto root = pdfium::MakeUnique<CFX_XMLNode>(); + CFX_XMLTestParser parser(root.get(), stream); + ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); + ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); + ASSERT_EQ(L"script", parser.GetTagName()); + + ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); + ASSERT_EQ(L"contentType", parser.GetAttributeName()); + ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); + ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); + + ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); + ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse()); + ASSERT_EQ(L"BTH\xab48", parser.GetTextData()); + + ASSERT_EQ(FX_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); + ASSERT_EQ(L"script", parser.GetTagName()); + + ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); +} + +TEST(CFX_XMLParserTest, EntityOverflowHex) { + const char* input = + "<script contentType=\"application/x-javascript\">" + "�" + "�" + "</script>"; + + RetainPtr<CFX_SeekableStreamProxy> stream = + pdfium::MakeRetain<CFX_SeekableStreamProxy>( + reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input)); + stream->SetCodePage(FX_CODEPAGE_UTF8); + + auto root = pdfium::MakeUnique<CFX_XMLNode>(); + CFX_XMLTestParser parser(root.get(), stream); + ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); + ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); + ASSERT_EQ(L"script", parser.GetTagName()); + + ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); + ASSERT_EQ(L"contentType", parser.GetAttributeName()); + ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); + ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); + + ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); + ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse()); + ASSERT_EQ(L" ", parser.GetTextData()); + + ASSERT_EQ(FX_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); + ASSERT_EQ(L"script", parser.GetTagName()); + + ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); +} + +TEST(CFX_XMLParserTest, EntityOverflowDecimal) { + const char* input = + "<script contentType=\"application/x-javascript\">" + "�" + "�" + "</script>"; + + RetainPtr<CFX_SeekableStreamProxy> stream = + pdfium::MakeRetain<CFX_SeekableStreamProxy>( + reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input)); + stream->SetCodePage(FX_CODEPAGE_UTF8); + + auto root = pdfium::MakeUnique<CFX_XMLNode>(); + CFX_XMLTestParser parser(root.get(), stream); + ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); + ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); + ASSERT_EQ(L"script", parser.GetTagName()); + + ASSERT_EQ(FX_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); + ASSERT_EQ(L"contentType", parser.GetAttributeName()); + ASSERT_EQ(FX_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); + ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); + + ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); + ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse()); + ASSERT_EQ(L" ", parser.GetTextData()); + + ASSERT_EQ(FX_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); + ASSERT_EQ(L"script", parser.GetTagName()); + + ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); +} + +TEST(CFX_XMLParserTest, IsXMLNameChar) { + EXPECT_FALSE(CFX_XMLTestParser::IsXMLNameChar(L'-', true)); + EXPECT_TRUE(CFX_XMLTestParser::IsXMLNameChar(L'-', false)); + + EXPECT_FALSE(CFX_XMLTestParser::IsXMLNameChar(0x2069, true)); + EXPECT_TRUE(CFX_XMLTestParser::IsXMLNameChar(0x2070, true)); + EXPECT_TRUE(CFX_XMLTestParser::IsXMLNameChar(0x2073, true)); + EXPECT_TRUE(CFX_XMLTestParser::IsXMLNameChar(0x218F, true)); + EXPECT_FALSE(CFX_XMLTestParser::IsXMLNameChar(0x2190, true)); + + EXPECT_FALSE(CFX_XMLTestParser::IsXMLNameChar(0xFDEF, true)); + EXPECT_TRUE(CFX_XMLTestParser::IsXMLNameChar(0xFDF0, true)); + EXPECT_TRUE(CFX_XMLTestParser::IsXMLNameChar(0xFDF1, true)); + EXPECT_TRUE(CFX_XMLTestParser::IsXMLNameChar(0xFFFD, true)); + EXPECT_FALSE(CFX_XMLTestParser::IsXMLNameChar(0xFFFE, true)); +} |