From b242943f5e949ce3d92dcb62c7497815ccd231d1 Mon Sep 17 00:00:00 2001 From: Dan Sinclair Date: Tue, 24 Apr 2018 19:28:50 +0000 Subject: Remove m_CurNodeType from CFX_XMLParser This CL removes the m_CurNodeType member from CFX_XMLParser. This was, essentially, duplicating the m_XMLNodeTypeStack. We now use the top() of the stack element to find the same information. Change-Id: I26507d2eee954837210aa42919ff061cfc8e85d8 Reviewed-on: https://pdfium-review.googlesource.com/31277 Commit-Queue: dsinclair Reviewed-by: Ryan Harrison --- core/fxcrt/xml/cfx_xmlnode.h | 3 +-- core/fxcrt/xml/cfx_xmlparser.cpp | 24 ++++++------------------ core/fxcrt/xml/cfx_xmlparser.h | 1 - core/fxcrt/xml/cfx_xmlparser_unittest.cpp | 27 +++++++++++++++++++++++++++ 4 files changed, 34 insertions(+), 21 deletions(-) diff --git a/core/fxcrt/xml/cfx_xmlnode.h b/core/fxcrt/xml/cfx_xmlnode.h index c7dfbc8071..ec731eb62e 100644 --- a/core/fxcrt/xml/cfx_xmlnode.h +++ b/core/fxcrt/xml/cfx_xmlnode.h @@ -13,8 +13,7 @@ #include "core/fxcrt/retain_ptr.h" enum FX_XMLNODETYPE { - FX_XMLNODE_Unknown = 0, - FX_XMLNODE_Instruction, + FX_XMLNODE_Instruction = 0, FX_XMLNODE_Element, FX_XMLNODE_Text, FX_XMLNODE_CharData, diff --git a/core/fxcrt/xml/cfx_xmlparser.cpp b/core/fxcrt/xml/cfx_xmlparser.cpp index 55778d3204..651ebb41eb 100644 --- a/core/fxcrt/xml/cfx_xmlparser.cpp +++ b/core/fxcrt/xml/cfx_xmlparser.cpp @@ -242,14 +242,12 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() { m_Start++; m_syntaxParserState = FDE_XmlSyntaxState::CloseElement; } else if (ch == L'?') { - m_CurNodeType = FX_XMLNODE_Instruction; - m_XMLNodeTypeStack.push(m_CurNodeType); + m_XMLNodeTypeStack.push(FX_XMLNODE_Instruction); m_Start++; m_syntaxParserState = FDE_XmlSyntaxState::Target; syntaxParserResult = FX_XmlSyntaxResult::InstructionOpen; } else { - m_CurNodeType = FX_XMLNODE_Element; - m_XMLNodeTypeStack.push(m_CurNodeType); + m_XMLNodeTypeStack.push(FX_XMLNODE_Element); m_syntaxParserState = FDE_XmlSyntaxState::Tag; syntaxParserResult = FX_XmlSyntaxResult::ElementOpen; } @@ -280,12 +278,12 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() { } if (!IsXMLNameChar(ch, current_text_.empty())) { if (current_text_.empty()) { - if (m_CurNodeType == FX_XMLNODE_Element) { + if (m_XMLNodeTypeStack.top() == FX_XMLNODE_Element) { if (ch == L'>' || ch == L'/') { m_syntaxParserState = FDE_XmlSyntaxState::BreakElement; break; } - } else if (m_CurNodeType == FX_XMLNODE_Instruction) { + } else if (m_XMLNodeTypeStack.top() == FX_XMLNODE_Instruction) { if (ch == L'?') { m_syntaxParserState = FDE_XmlSyntaxState::CloseInstruction; m_Start++; @@ -297,7 +295,7 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() { m_syntaxParserResult = FX_XmlSyntaxResult::Error; return m_syntaxParserResult; } else { - if (m_CurNodeType == FX_XMLNODE_Instruction) { + if (m_XMLNodeTypeStack.top() == FX_XMLNODE_Instruction) { if (ch != '=' && !IsXMLWhiteSpace(ch)) { m_syntaxParserState = FDE_XmlSyntaxState::TargetData; break; @@ -317,7 +315,7 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() { break; } if (ch != L'=') { - if (m_CurNodeType == FX_XMLNODE_Instruction) { + if (m_XMLNodeTypeStack.top() == FX_XMLNODE_Instruction) { m_syntaxParserState = FDE_XmlSyntaxState::TargetData; break; } @@ -370,11 +368,6 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() { } m_XMLNodeTypeStack.pop(); - if (!m_XMLNodeTypeStack.empty()) - m_CurNodeType = m_XMLNodeTypeStack.top(); - else - m_CurNodeType = FX_XMLNODE_Unknown; - m_syntaxParserState = FDE_XmlSyntaxState::Text; syntaxParserResult = FX_XmlSyntaxResult::InstructionClose; } @@ -400,11 +393,6 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() { } m_XMLNodeTypeStack.pop(); - if (!m_XMLNodeTypeStack.empty()) - m_CurNodeType = m_XMLNodeTypeStack.top(); - else - m_CurNodeType = FX_XMLNODE_Unknown; - m_syntaxParserState = FDE_XmlSyntaxState::Text; syntaxParserResult = FX_XmlSyntaxResult::ElementClose; } else if (!IsXMLWhiteSpace(ch)) { diff --git a/core/fxcrt/xml/cfx_xmlparser.h b/core/fxcrt/xml/cfx_xmlparser.h index 6121f0c1dc..6db24e5040 100644 --- a/core/fxcrt/xml/cfx_xmlparser.h +++ b/core/fxcrt/xml/cfx_xmlparser.h @@ -84,7 +84,6 @@ class CFX_XMLParser { RetainPtr m_pStream; FX_FILESIZE m_Start = 0; // Start position in m_Buffer FX_FILESIZE m_End = 0; // End position in m_Buffer - FX_XMLNODETYPE m_CurNodeType = FX_XMLNODE_Unknown; FX_XmlSyntaxResult m_syntaxParserResult = FX_XmlSyntaxResult::None; FDE_XmlSyntaxState m_syntaxParserState = FDE_XmlSyntaxState::Text; std::stack m_NodeStack; diff --git a/core/fxcrt/xml/cfx_xmlparser_unittest.cpp b/core/fxcrt/xml/cfx_xmlparser_unittest.cpp index 790001cc27..b5c9be57cb 100644 --- a/core/fxcrt/xml/cfx_xmlparser_unittest.cpp +++ b/core/fxcrt/xml/cfx_xmlparser_unittest.cpp @@ -542,3 +542,30 @@ TEST(CFX_XMLParserTest, IsXMLNameChar) { EXPECT_TRUE(CFX_XMLTestParser::IsXMLNameChar(0xFFFD, true)); EXPECT_FALSE(CFX_XMLTestParser::IsXMLNameChar(0xFFFE, true)); } + +TEST(CFX_XMLParserTest, BadElementClose) { + const char* input = ""; + + auto stream = MakeProxy(input); + auto root = pdfium::MakeUnique(L"ROOT"); + + CFX_XMLTestParser parser(root.get(), stream); + ASSERT_EQ(FX_XmlSyntaxResult::Error, parser.DoSyntaxParse()); +} + +TEST(CFX_XMLParserTest, DoubleElementClose) { + const char* input = "

"; + + auto stream = MakeProxy(input); + auto root = pdfium::MakeUnique(L"ROOT"); + + CFX_XMLTestParser parser(root.get(), stream); + ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); + ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); + ASSERT_EQ(L"p", parser.GetTextData()); + ASSERT_EQ(FX_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); + ASSERT_EQ(L"", parser.GetTextData()); + ASSERT_EQ(FX_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); + ASSERT_EQ(L"p", parser.GetTextData()); + ASSERT_EQ(FX_XmlSyntaxResult::Error, parser.DoSyntaxParse()); +} -- cgit v1.2.3