summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDan Sinclair <dsinclair@chromium.org>2018-04-26 20:20:37 +0000
committerChromium commit bot <commit-bot@chromium.org>2018-04-26 20:20:37 +0000
commit52ab95aa3edbabbe90dcefcc54f3b6dace7ac53d (patch)
tree6198d797b594341f5b40cf5c6934f2488297bef7
parent051e837e08f52251de5932d90268d504060d12f1 (diff)
downloadpdfium-52ab95aa3edbabbe90dcefcc54f3b6dace7ac53d.tar.xz
Reset entity start when clearing textchromium/3410
When we extract the text data we need to make sure we clear any entity start positions, otherwise our entity extraction will go badly. Bug: chromium:836661 Change-Id: Icbafdef912b1f5b495eafef426961c5df66cd3fd Reviewed-on: https://pdfium-review.googlesource.com/31450 Commit-Queue: dsinclair <dsinclair@chromium.org> Reviewed-by: Ryan Harrison <rharrison@chromium.org> Reviewed-by: Henrique Nakashima <hnakashima@chromium.org>
-rw-r--r--core/fxcrt/xml/cfx_xmlparser.cpp1
-rw-r--r--core/fxcrt/xml/cfx_xmlparser_unittest.cpp32
2 files changed, 33 insertions, 0 deletions
diff --git a/core/fxcrt/xml/cfx_xmlparser.cpp b/core/fxcrt/xml/cfx_xmlparser.cpp
index 685655e815..eb79637095 100644
--- a/core/fxcrt/xml/cfx_xmlparser.cpp
+++ b/core/fxcrt/xml/cfx_xmlparser.cpp
@@ -586,6 +586,7 @@ void CFX_XMLParser::ParseTextChar(wchar_t character) {
WideString CFX_XMLParser::GetTextData() {
WideString ret(current_text_.data(), current_text_.size());
current_text_.clear();
+ m_iEntityStart = -1;
current_text_.reserve(kCurrentTextReserve);
return ret;
}
diff --git a/core/fxcrt/xml/cfx_xmlparser_unittest.cpp b/core/fxcrt/xml/cfx_xmlparser_unittest.cpp
index b5c9be57cb..73d6685dad 100644
--- a/core/fxcrt/xml/cfx_xmlparser_unittest.cpp
+++ b/core/fxcrt/xml/cfx_xmlparser_unittest.cpp
@@ -569,3 +569,35 @@ TEST(CFX_XMLParserTest, DoubleElementClose) {
ASSERT_EQ(L"p", parser.GetTextData());
ASSERT_EQ(FX_XmlSyntaxResult::Error, parser.DoSyntaxParse());
}
+
+TEST(CFX_XMLParserTest, BadEntity) {
+ const char* input =
+ "<script>"
+ "Test &<p>; thing"
+ "</script>";
+
+ auto stream = MakeProxy(input);
+ auto root = pdfium::MakeUnique<CFX_XMLElement>(L"ROOT");
+
+ CFX_XMLTestParser parser(root.get(), stream);
+ ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
+ ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
+ ASSERT_EQ(L"script", parser.GetTextData());
+
+ ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse());
+ ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
+ ASSERT_EQ(L"Test &", parser.GetTextData());
+
+ ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
+ ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
+ ASSERT_EQ(L"p", parser.GetTextData());
+ ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse());
+
+ ASSERT_EQ(FX_XmlSyntaxResult::Text, parser.DoSyntaxParse());
+ ASSERT_EQ(L"; thing", parser.GetTextData());
+
+ ASSERT_EQ(FX_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse());
+ ASSERT_EQ(L"script", parser.GetTextData());
+
+ ASSERT_EQ(FX_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse());
+}