From 52ab95aa3edbabbe90dcefcc54f3b6dace7ac53d Mon Sep 17 00:00:00 2001 From: Dan Sinclair Date: Thu, 26 Apr 2018 20:20:37 +0000 Subject: Reset entity start when clearing text When we extract the text data we need to make sure we clear any entity start positions, otherwise our entity extraction will go badly. Bug: chromium:836661 Change-Id: Icbafdef912b1f5b495eafef426961c5df66cd3fd Reviewed-on: https://pdfium-review.googlesource.com/31450 Commit-Queue: dsinclair Reviewed-by: Ryan Harrison Reviewed-by: Henrique Nakashima --- core/fxcrt/xml/cfx_xmlparser.cpp | 1 + core/fxcrt/xml/cfx_xmlparser_unittest.cpp | 32 +++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/core/fxcrt/xml/cfx_xmlparser.cpp b/core/fxcrt/xml/cfx_xmlparser.cpp index 685655e815..eb79637095 100644 --- a/core/fxcrt/xml/cfx_xmlparser.cpp +++ b/core/fxcrt/xml/cfx_xmlparser.cpp @@ -586,6 +586,7 @@ void CFX_XMLParser::ParseTextChar(wchar_t character) { WideString CFX_XMLParser::GetTextData() { WideString ret(current_text_.data(), current_text_.size()); current_text_.clear(); + m_iEntityStart = -1; current_text_.reserve(kCurrentTextReserve); return ret; } diff --git a/core/fxcrt/xml/cfx_xmlparser_unittest.cpp b/core/fxcrt/xml/cfx_xmlparser_unittest.cpp index b5c9be57cb..73d6685dad 100644 --- a/core/fxcrt/xml/cfx_xmlparser_unittest.cpp +++ b/core/fxcrt/xml/cfx_xmlparser_unittest.cpp @@ -569,3 +569,35 @@ TEST(CFX_XMLParserTest, DoubleElementClose) { ASSERT_EQ(L"p", parser.GetTextData()); ASSERT_EQ(FX_XmlSyntaxResult::Error, parser.DoSyntaxParse()); } + +TEST(CFX_XMLParserTest, BadEntity) { + const char* input = + ""; + + auto stream = MakeProxy(input); + auto root = pdfium::MakeUnique(L"ROOT"); + + CFX_XMLTestParser parser(root.get(), stream); + ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); + ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); + ASSERT_EQ(L"script", parser.GetTextData()); + + ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); + ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse()); + ASSERT_EQ(L"Test &", parser.GetTextData()); + + ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); + ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); + ASSERT_EQ(L"p", parser.GetTextData()); + ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); + + ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse()); + ASSERT_EQ(L"; thing", parser.GetTextData()); + + ASSERT_EQ(FX_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); + ASSERT_EQ(L"script", parser.GetTextData()); + + ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); +} -- cgit v1.2.3