diff options
author | dan sinclair <dsinclair@chromium.org> | 2018-04-17 21:55:18 +0000 |
---|---|---|
committer | Chromium commit bot <commit-bot@chromium.org> | 2018-04-17 21:55:18 +0000 |
commit | 384e6fefb42ab5d3d71dd60f7f8d5f5155228804 (patch) | |
tree | 8fe0d065f51f6a459bd2be6adf625c3dd7a716f7 | |
parent | 13aa65a71294cac6e4bdaab73ddd6f4b9fcd8676 (diff) | |
download | pdfium-384e6fefb42ab5d3d71dd60f7f8d5f5155228804.tar.xz |
CFX_XML Cleanup
Cleanup formatting and unused variables in the CFX_XML classes.
Change-Id: I1aff6317a3da38a141a071ba69c7893d0f669732
Reviewed-on: https://pdfium-review.googlesource.com/30730
Commit-Queue: dsinclair <dsinclair@chromium.org>
Reviewed-by: Henrique Nakashima <hnakashima@chromium.org>
-rw-r--r-- | core/fxcrt/xml/cfx_xmlchardata.cpp | 2 | ||||
-rw-r--r-- | core/fxcrt/xml/cfx_xmlelement.cpp | 23 | ||||
-rw-r--r-- | core/fxcrt/xml/cfx_xmlnode.cpp | 10 | ||||
-rw-r--r-- | core/fxcrt/xml/cfx_xmlnode.h | 13 | ||||
-rw-r--r-- | core/fxcrt/xml/cfx_xmlparser.cpp | 156 | ||||
-rw-r--r-- | core/fxcrt/xml/cfx_xmlparser.h | 25 | ||||
-rw-r--r-- | core/fxcrt/xml/cfx_xmlparser_unittest.cpp | 47 | ||||
-rw-r--r-- | core/fxcrt/xml/cfx_xmltext.cpp | 2 | ||||
-rw-r--r-- | testing/libfuzzer/pdf_xml_fuzzer.cc | 4 | ||||
-rw-r--r-- | xfa/fxfa/parser/cxfa_document_parser.cpp | 2 |
10 files changed, 108 insertions, 176 deletions
diff --git a/core/fxcrt/xml/cfx_xmlchardata.cpp b/core/fxcrt/xml/cfx_xmlchardata.cpp index b613b15d00..dec2e4618f 100644 --- a/core/fxcrt/xml/cfx_xmlchardata.cpp +++ b/core/fxcrt/xml/cfx_xmlchardata.cpp @@ -11,7 +11,7 @@ CFX_XMLCharData::CFX_XMLCharData(const WideString& wsCData) : CFX_XMLText(wsCData) {} -CFX_XMLCharData::~CFX_XMLCharData() {} +CFX_XMLCharData::~CFX_XMLCharData() = default; FX_XMLNODETYPE CFX_XMLCharData::GetType() const { return FX_XMLNODE_CharData; diff --git a/core/fxcrt/xml/cfx_xmlelement.cpp b/core/fxcrt/xml/cfx_xmlelement.cpp index d37c55c443..cab15f24c4 100644 --- a/core/fxcrt/xml/cfx_xmlelement.cpp +++ b/core/fxcrt/xml/cfx_xmlelement.cpp @@ -30,6 +30,8 @@ std::unique_ptr<CFX_XMLNode> CFX_XMLElement::Clone() { auto pClone = pdfium::MakeUnique<CFX_XMLElement>(name_); pClone->attrs_ = attrs_; + // TODO(dsinclair): This clone is wrong. It doesn't clone child nodes just + // copies text nodes? WideString wsText; for (CFX_XMLNode* pChild = GetFirstChild(); pChild; pChild = pChild->GetNextSibling()) { @@ -52,26 +54,26 @@ WideString CFX_XMLElement::GetNamespacePrefix() const { } WideString CFX_XMLElement::GetNamespaceURI() const { - WideString wsAttri(L"xmlns"); + WideString attr(L"xmlns"); WideString wsPrefix = GetNamespacePrefix(); - if (wsPrefix.GetLength() > 0) { - wsAttri += L":"; - wsAttri += wsPrefix; + if (!wsPrefix.IsEmpty()) { + attr += L":"; + attr += wsPrefix; } - auto* pNode = static_cast<const CFX_XMLNode*>(this); + const CFX_XMLNode* pNode = this; while (pNode) { if (pNode->GetType() != FX_XMLNODE_Element) break; auto* pElement = static_cast<const CFX_XMLElement*>(pNode); - if (!pElement->HasAttribute(wsAttri)) { + if (!pElement->HasAttribute(attr)) { pNode = pNode->GetParent(); continue; } - return pElement->GetAttribute(wsAttri); + return pElement->GetAttribute(attr); } - return WideString(); + return L""; } WideString CFX_XMLElement::GetTextData() const { @@ -88,7 +90,8 @@ WideString CFX_XMLElement::GetTextData() const { } void CFX_XMLElement::SetTextData(const WideString& wsText) { - if (wsText.GetLength() < 1) + // TODO(dsinclair): Shouldn't this remove the children if you set blank text? + if (wsText.IsEmpty()) return; AppendChild(pdfium::MakeUnique<CFX_XMLText>(wsText)); } @@ -100,6 +103,8 @@ void CFX_XMLElement::Save(const RetainPtr<IFX_SeekableStream>& pXMLStream) { pXMLStream->WriteString(name_encoded); for (auto it : attrs_) { + // Note, the space between attributes is added by AttributeToString which + // writes a blank as the first character. pXMLStream->WriteString( AttributeToString(it.first, it.second).UTF8Encode().AsStringView()); } diff --git a/core/fxcrt/xml/cfx_xmlnode.cpp b/core/fxcrt/xml/cfx_xmlnode.cpp index a8be133171..4f811e608e 100644 --- a/core/fxcrt/xml/cfx_xmlnode.cpp +++ b/core/fxcrt/xml/cfx_xmlnode.cpp @@ -21,10 +21,6 @@ CFX_XMLNode::~CFX_XMLNode() { DeleteChildren(); } -FX_XMLNODETYPE CFX_XMLNode::GetType() const { - return FX_XMLNODE_Unknown; -} - void CFX_XMLNode::DeleteChildren() { CFX_XMLNode* child = last_child_.Get(); // Clear last child early as it will have been deleted already. @@ -113,12 +109,6 @@ CFX_XMLNode* CFX_XMLNode::GetRoot() { return pParent; } -std::unique_ptr<CFX_XMLNode> CFX_XMLNode::Clone() { - return nullptr; -} - -void CFX_XMLNode::Save(const RetainPtr<IFX_SeekableStream>& pXMLStream) {} - WideString CFX_XMLNode::EncodeEntities(const WideString& value) { WideString ret = value; ret.Replace(L"&", L"&"); diff --git a/core/fxcrt/xml/cfx_xmlnode.h b/core/fxcrt/xml/cfx_xmlnode.h index d46ab9b29d..288095403c 100644 --- a/core/fxcrt/xml/cfx_xmlnode.h +++ b/core/fxcrt/xml/cfx_xmlnode.h @@ -20,19 +20,14 @@ enum FX_XMLNODETYPE { FX_XMLNODE_CharData, }; -struct FX_XMLNODE { - int32_t iNodeNum; - FX_XMLNODETYPE eNodeType; -}; - class CFX_XMLNode { public: CFX_XMLNode(); virtual ~CFX_XMLNode(); - virtual FX_XMLNODETYPE GetType() const; - virtual std::unique_ptr<CFX_XMLNode> Clone(); - virtual void Save(const RetainPtr<IFX_SeekableStream>& pXMLStream); + virtual FX_XMLNODETYPE GetType() const = 0; + virtual std::unique_ptr<CFX_XMLNode> Clone() = 0; + virtual void Save(const RetainPtr<IFX_SeekableStream>& pXMLStream) = 0; CFX_XMLNode* GetRoot(); CFX_XMLNode* GetParent() const { return parent_.Get(); } @@ -48,7 +43,7 @@ class CFX_XMLNode { WideString EncodeEntities(const WideString& value); private: - // A node owns it's first child and it owns it's next sibling. The rest + // A node owns its first child and it owns its next sibling. The rest // are unowned pointers. UnownedPtr<CFX_XMLNode> parent_; UnownedPtr<CFX_XMLNode> last_child_; diff --git a/core/fxcrt/xml/cfx_xmlparser.cpp b/core/fxcrt/xml/cfx_xmlparser.cpp index 9be8da76b4..7788a42907 100644 --- a/core/fxcrt/xml/cfx_xmlparser.cpp +++ b/core/fxcrt/xml/cfx_xmlparser.cpp @@ -45,32 +45,6 @@ const FX_XMLNAMECHAR g_XMLNameChars[] = { {0xF900, 0xFDCF, true}, {0xFDF0, 0xFFFD, true}, }; -int32_t GetUTF8EncodeLength(const std::vector<wchar_t>& src, - FX_FILESIZE iSrcLen) { - uint32_t unicode = 0; - int32_t iDstNum = 0; - const wchar_t* pSrc = src.data(); - while (iSrcLen-- > 0) { - unicode = *pSrc++; - int nbytes = 0; - if ((uint32_t)unicode < 0x80) { - nbytes = 1; - } else if ((uint32_t)unicode < 0x800) { - nbytes = 2; - } else if ((uint32_t)unicode < 0x10000) { - nbytes = 3; - } else if ((uint32_t)unicode < 0x200000) { - nbytes = 4; - } else if ((uint32_t)unicode < 0x4000000) { - nbytes = 5; - } else { - nbytes = 6; - } - iDstNum += nbytes; - } - return iDstNum; -} - } // namespace // static @@ -89,15 +63,10 @@ CFX_XMLParser::CFX_XMLParser(CFX_XMLNode* pParent, m_pStream(pdfium::MakeRetain<CFX_SeekableStreamProxy>(pStream)), m_iXMLPlaneSize(1024), m_iCurrentPos(0), - m_iCurrentNodeNum(-1), - m_iLastNodeNum(-1), - m_iParsedBytes(0), - m_ParsedChars(0), - m_iBufferChars(0), m_bEOS(false), m_Start(0), m_End(0), - m_iAllocStep(m_BlockBuffer.GetAllocStep()), + m_CurNodeType(FX_XMLNODE_Unknown), m_pCurrentBlock(nullptr), m_iIndexInBlock(0), m_iTextDataLength(0), @@ -116,9 +85,6 @@ CFX_XMLParser::CFX_XMLParser(CFX_XMLNode* pParent, m_NodeStack.push(m_pParent); - m_CurNode.iNodeNum = -1; - m_CurNode.eNodeType = FX_XMLNODE_Unknown; - m_iXMLPlaneSize = std::min(m_iXMLPlaneSize, pdfium::base::checked_cast<size_t>(m_pStream->GetLength())); @@ -138,7 +104,7 @@ CFX_XMLParser::CFX_XMLParser(CFX_XMLNode* pParent, m_BlockBuffer.GetAvailableBlock(); } -CFX_XMLParser::~CFX_XMLParser() {} +CFX_XMLParser::~CFX_XMLParser() = default; bool CFX_XMLParser::Parse() { int32_t iCount = 0; @@ -176,7 +142,7 @@ bool CFX_XMLParser::Parse() { iCount++; break; case FX_XmlSyntaxResult::TargetName: - m_ws1 = GetTargetName(); + m_ws1 = m_BlockBuffer.GetTextData(0, m_iTextDataLength); if (m_ws1 == L"originalXFAVersion" || m_ws1 == L"acrobat") { auto child = pdfium::MakeUnique<CFX_XMLInstruction>(m_ws1); m_pChild = child.get(); @@ -230,7 +196,8 @@ bool CFX_XMLParser::Parse() { if (!m_ws1.IsEmpty()) instruction->AppendData(m_ws1); - instruction->AppendData(GetTargetData()); + instruction->AppendData( + m_BlockBuffer.GetTextData(0, m_iTextDataLength)); } m_ws1.clear(); break; @@ -260,22 +227,20 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() { m_syntaxParserResult = FX_XmlSyntaxResult::EndOfString; return m_syntaxParserResult; } - m_ParsedChars += m_End; - m_iParsedBytes = m_iCurrentPos; if (m_pStream->GetPosition() != m_iCurrentPos) m_pStream->Seek(CFX_SeekableStreamProxy::From::Begin, m_iCurrentPos); - m_iBufferChars = + size_t buffer_chars = m_pStream->ReadString(m_Buffer.data(), m_iXMLPlaneSize, &m_bEOS); iPos = m_pStream->GetPosition(); - if (m_iBufferChars < 1) { + if (buffer_chars == 0) { m_iCurrentPos = iStreamLength; m_syntaxParserResult = FX_XmlSyntaxResult::EndOfString; return m_syntaxParserResult; } m_iCurrentPos = iPos; m_Start = 0; - m_End = m_iBufferChars; + m_End = buffer_chars; } while (m_Start < m_End) { @@ -306,20 +271,14 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() { m_Start++; m_syntaxParserState = FDE_XmlSyntaxState::CloseElement; } else if (ch == L'?') { - m_iLastNodeNum++; - m_iCurrentNodeNum = m_iLastNodeNum; - m_CurNode.iNodeNum = m_iLastNodeNum; - m_CurNode.eNodeType = FX_XMLNODE_Instruction; - m_XMLNodeStack.push(m_CurNode); + m_CurNodeType = FX_XMLNODE_Instruction; + m_XMLNodeTypeStack.push(m_CurNodeType); m_Start++; m_syntaxParserState = FDE_XmlSyntaxState::Target; syntaxParserResult = FX_XmlSyntaxResult::InstructionOpen; } else { - m_iLastNodeNum++; - m_iCurrentNodeNum = m_iLastNodeNum; - m_CurNode.iNodeNum = m_iLastNodeNum; - m_CurNode.eNodeType = FX_XMLNODE_Element; - m_XMLNodeStack.push(m_CurNode); + m_CurNodeType = FX_XMLNODE_Element; + m_XMLNodeTypeStack.push(m_CurNodeType); m_syntaxParserState = FDE_XmlSyntaxState::Tag; syntaxParserResult = FX_XmlSyntaxResult::ElementOpen; } @@ -343,12 +302,11 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() { m_syntaxParserState = FDE_XmlSyntaxState::AttriName; } else { - if (m_iIndexInBlock == m_iAllocStep) { + if (m_iIndexInBlock == m_BlockBuffer.GetAllocStep()) { std::tie(m_pCurrentBlock, m_iIndexInBlock) = m_BlockBuffer.GetAvailableBlock(); - if (!m_pCurrentBlock) { + if (!m_pCurrentBlock) return FX_XmlSyntaxResult::Error; - } } m_pCurrentBlock[m_iIndexInBlock++] = ch; m_BlockBuffer.IncrementDataLength(); @@ -362,12 +320,12 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() { } if (!IsXMLNameChar(ch, m_BlockBuffer.IsEmpty())) { if (m_BlockBuffer.IsEmpty()) { - if (m_CurNode.eNodeType == FX_XMLNODE_Element) { + if (m_CurNodeType == FX_XMLNODE_Element) { if (ch == L'>' || ch == L'/') { m_syntaxParserState = FDE_XmlSyntaxState::BreakElement; break; } - } else if (m_CurNode.eNodeType == FX_XMLNODE_Instruction) { + } else if (m_CurNodeType == FX_XMLNODE_Instruction) { if (ch == L'?') { m_syntaxParserState = FDE_XmlSyntaxState::CloseInstruction; m_Start++; @@ -379,7 +337,7 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() { m_syntaxParserResult = FX_XmlSyntaxResult::Error; return m_syntaxParserResult; } else { - if (m_CurNode.eNodeType == FX_XMLNODE_Instruction) { + if (m_CurNodeType == FX_XMLNODE_Instruction) { if (ch != '=' && !IsXMLWhiteSpace(ch)) { m_syntaxParserState = FDE_XmlSyntaxState::TargetData; break; @@ -393,12 +351,11 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() { syntaxParserResult = FX_XmlSyntaxResult::AttriName; } } else { - if (m_iIndexInBlock == m_iAllocStep) { + if (m_iIndexInBlock == m_BlockBuffer.GetAllocStep()) { std::tie(m_pCurrentBlock, m_iIndexInBlock) = m_BlockBuffer.GetAvailableBlock(); - if (!m_pCurrentBlock) { + if (!m_pCurrentBlock) return FX_XmlSyntaxResult::Error; - } } m_pCurrentBlock[m_iIndexInBlock++] = ch; m_BlockBuffer.IncrementDataLength(); @@ -411,7 +368,7 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() { break; } if (ch != L'=') { - if (m_CurNode.eNodeType == FX_XMLNODE_Instruction) { + if (m_CurNodeType == FX_XMLNODE_Instruction) { m_syntaxParserState = FDE_XmlSyntaxState::TargetData; break; } @@ -430,11 +387,11 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() { if (ch != L'\"' && ch != L'\'') { m_syntaxParserResult = FX_XmlSyntaxResult::Error; return m_syntaxParserResult; - } else { - m_wQuotationMark = ch; - m_syntaxParserState = FDE_XmlSyntaxState::AttriValue; - m_Start++; } + + m_wQuotationMark = ch; + m_syntaxParserState = FDE_XmlSyntaxState::AttriValue; + m_Start++; break; case FDE_XmlSyntaxState::AttriValue: if (ch == m_wQuotationMark) { @@ -456,13 +413,13 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() { break; case FDE_XmlSyntaxState::CloseInstruction: if (ch != L'>') { - if (m_iIndexInBlock == m_iAllocStep) { + if (m_iIndexInBlock == m_BlockBuffer.GetAllocStep()) { std::tie(m_pCurrentBlock, m_iIndexInBlock) = m_BlockBuffer.GetAvailableBlock(); - if (!m_pCurrentBlock) { + if (!m_pCurrentBlock) return FX_XmlSyntaxResult::Error; - } } + m_pCurrentBlock[m_iIndexInBlock++] = ch; m_BlockBuffer.IncrementDataLength(); m_syntaxParserState = FDE_XmlSyntaxState::TargetData; @@ -474,18 +431,17 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() { syntaxParserResult = FX_XmlSyntaxResult::TargetData; } else { m_Start++; - if (m_XMLNodeStack.empty()) { + if (m_XMLNodeTypeStack.empty()) { m_syntaxParserResult = FX_XmlSyntaxResult::Error; return m_syntaxParserResult; } - m_XMLNodeStack.pop(); - if (!m_XMLNodeStack.empty()) { - m_CurNode = m_XMLNodeStack.top(); - } else { - m_CurNode.iNodeNum = -1; - m_CurNode.eNodeType = FX_XMLNODE_Unknown; - } - m_iCurrentNodeNum = m_CurNode.iNodeNum; + m_XMLNodeTypeStack.pop(); + + if (!m_XMLNodeTypeStack.empty()) + m_CurNodeType = m_XMLNodeTypeStack.top(); + else + m_CurNodeType = FX_XMLNODE_Unknown; + m_BlockBuffer.Reset(true); std::tie(m_pCurrentBlock, m_iIndexInBlock) = m_BlockBuffer.GetAvailableBlock(); @@ -508,18 +464,17 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() { case FDE_XmlSyntaxState::CloseElement: if (!IsXMLNameChar(ch, m_BlockBuffer.IsEmpty())) { if (ch == L'>') { - if (m_XMLNodeStack.empty()) { + if (m_XMLNodeTypeStack.empty()) { m_syntaxParserResult = FX_XmlSyntaxResult::Error; return m_syntaxParserResult; } - m_XMLNodeStack.pop(); - if (!m_XMLNodeStack.empty()) { - m_CurNode = m_XMLNodeStack.top(); - } else { - m_CurNode.iNodeNum = -1; - m_CurNode.eNodeType = FX_XMLNODE_Unknown; - } - m_iCurrentNodeNum = m_CurNode.iNodeNum; + m_XMLNodeTypeStack.pop(); + + if (!m_XMLNodeTypeStack.empty()) + m_CurNodeType = m_XMLNodeTypeStack.top(); + else + m_CurNodeType = FX_XMLNODE_Unknown; + m_iTextDataLength = m_BlockBuffer.GetDataLength(); m_BlockBuffer.Reset(true); std::tie(m_pCurrentBlock, m_iIndexInBlock) = @@ -531,12 +486,11 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() { return m_syntaxParserResult; } } else { - if (m_iIndexInBlock == m_iAllocStep) { + if (m_iIndexInBlock == m_BlockBuffer.GetAllocStep()) { std::tie(m_pCurrentBlock, m_iIndexInBlock) = m_BlockBuffer.GetAvailableBlock(); - if (!m_pCurrentBlock) { + if (!m_pCurrentBlock) return FX_XmlSyntaxResult::Error; - } } m_pCurrentBlock[m_iIndexInBlock++] = ch; m_BlockBuffer.IncrementDataLength(); @@ -567,7 +521,7 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() { m_BlockBuffer.GetAvailableBlock(); m_syntaxParserState = FDE_XmlSyntaxState::Text; } else { - if (m_iIndexInBlock == m_iAllocStep) { + if (m_iIndexInBlock == m_BlockBuffer.GetAllocStep()) { std::tie(m_pCurrentBlock, m_iIndexInBlock) = m_BlockBuffer.GetAvailableBlock(); if (!m_pCurrentBlock) @@ -631,7 +585,7 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() { break; } if (!m_SkipStack.empty()) { - if (m_iIndexInBlock == m_iAllocStep) { + if (m_iIndexInBlock == m_BlockBuffer.GetAllocStep()) { std::tie(m_pCurrentBlock, m_iIndexInBlock) = m_BlockBuffer.GetAvailableBlock(); if (!m_pCurrentBlock) { @@ -689,12 +643,11 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() { return m_syntaxParserResult; } } else { - if (m_iIndexInBlock == m_iAllocStep) { + if (m_iIndexInBlock == m_BlockBuffer.GetAllocStep()) { std::tie(m_pCurrentBlock, m_iIndexInBlock) = m_BlockBuffer.GetAvailableBlock(); - if (!m_pCurrentBlock) { + if (!m_pCurrentBlock) return FX_XmlSyntaxResult::Error; - } } m_pCurrentBlock[m_iIndexInBlock++] = ch; m_BlockBuffer.IncrementDataLength(); @@ -721,16 +674,8 @@ bool CFX_XMLParser::GetStatus() const { return m_syntaxParserResult != FX_XmlSyntaxResult::Error; } -FX_FILESIZE CFX_XMLParser::GetCurrentBinaryPos() const { - if (!m_pStream) - return 0; - - int32_t nDstLen = GetUTF8EncodeLength(m_Buffer, m_Start); - return m_iParsedBytes + nDstLen; -} - void CFX_XMLParser::ParseTextChar(wchar_t character) { - if (m_iIndexInBlock == m_iAllocStep) { + if (m_iIndexInBlock == m_BlockBuffer.GetAllocStep()) { std::tie(m_pCurrentBlock, m_iIndexInBlock) = m_BlockBuffer.GetAvailableBlock(); if (!m_pCurrentBlock) @@ -765,6 +710,7 @@ void CFX_XMLParser::ParseTextChar(wchar_t character) { w = csEntity[i]; if (!std::iswdigit(w)) break; + ch = ch * 10 + w - L'0'; } } diff --git a/core/fxcrt/xml/cfx_xmlparser.h b/core/fxcrt/xml/cfx_xmlparser.h index 52d86fb82d..f369453de1 100644 --- a/core/fxcrt/xml/cfx_xmlparser.h +++ b/core/fxcrt/xml/cfx_xmlparser.h @@ -93,20 +93,7 @@ class CFX_XMLParser { }; void ParseTextChar(wchar_t ch); - bool GetStatus() const; - FX_FILESIZE GetCurrentPos() const { return m_ParsedChars + m_Start; } - FX_FILESIZE GetCurrentBinaryPos() const; - int32_t GetCurrentNodeNumber() const { return m_iCurrentNodeNum; } - int32_t GetLastNodeNumber() const { return m_iLastNodeNum; } - - WideString GetTargetName() const { - return m_BlockBuffer.GetTextData(0, m_iTextDataLength); - } - - WideString GetTargetData() const { - return m_BlockBuffer.GetTextData(0, m_iTextDataLength); - } CFX_XMLNode* m_pParent; CFX_XMLNode* m_pChild; @@ -116,21 +103,15 @@ class CFX_XMLParser { RetainPtr<CFX_SeekableStreamProxy> m_pStream; size_t m_iXMLPlaneSize; FX_FILESIZE m_iCurrentPos; - int32_t m_iCurrentNodeNum; - int32_t m_iLastNodeNum; - int32_t m_iParsedBytes; - FX_FILESIZE m_ParsedChars; std::vector<wchar_t> m_Buffer; - size_t m_iBufferChars; bool m_bEOS; FX_FILESIZE m_Start; // Start position in m_Buffer FX_FILESIZE m_End; // End position in m_Buffer - FX_XMLNODE m_CurNode; - std::stack<FX_XMLNODE> m_XMLNodeStack; + FX_XMLNODETYPE m_CurNodeType; + std::stack<FX_XMLNODETYPE> m_XMLNodeTypeStack; CFX_BlockBuffer m_BlockBuffer; - int32_t m_iAllocStep; wchar_t* m_pCurrentBlock; // Pointer into CFX_BlockBuffer - int32_t m_iIndexInBlock; + size_t m_iIndexInBlock; int32_t m_iTextDataLength; FX_XmlSyntaxResult m_syntaxParserResult; FDE_XmlSyntaxState m_syntaxParserState; diff --git a/core/fxcrt/xml/cfx_xmlparser_unittest.cpp b/core/fxcrt/xml/cfx_xmlparser_unittest.cpp index 4bd953df25..02addc70eb 100644 --- a/core/fxcrt/xml/cfx_xmlparser_unittest.cpp +++ b/core/fxcrt/xml/cfx_xmlparser_unittest.cpp @@ -8,7 +8,7 @@ #include "core/fxcrt/cfx_memorystream.h" #include "core/fxcrt/fx_codepage.h" -#include "core/fxcrt/xml/cfx_xmlnode.h" +#include "core/fxcrt/xml/cfx_xmlelement.h" #include "testing/gtest/include/gtest/gtest.h" #include "testing/test_support.h" #include "third_party/base/ptr_util.h" @@ -63,7 +63,8 @@ TEST(CFX_XMLParserTest, CData) { L" "; auto stream = MakeProxy(input); - auto root = pdfium::MakeUnique<CFX_XMLNode>(); + auto root = pdfium::MakeUnique<CFX_XMLElement>(L"ROOT"); + CFX_XMLTestParser parser(root.get(), stream); ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); @@ -108,7 +109,8 @@ TEST(CFX_XMLParserTest, CDataWithInnerScript) { L" "; auto stream = MakeProxy(input); - auto root = pdfium::MakeUnique<CFX_XMLNode>(); + auto root = pdfium::MakeUnique<CFX_XMLElement>(L"ROOT"); + CFX_XMLTestParser parser(root.get(), stream); ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); @@ -142,7 +144,8 @@ TEST(CFX_XMLParserTest, ArrowBangArrow) { "</script>"; auto stream = MakeProxy(input); - auto root = pdfium::MakeUnique<CFX_XMLNode>(); + auto root = pdfium::MakeUnique<CFX_XMLElement>(L"ROOT"); + CFX_XMLTestParser parser(root.get(), stream); ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); @@ -174,7 +177,8 @@ TEST(CFX_XMLParserTest, ArrowBangBracketArrow) { "</script>"; auto stream = MakeProxy(input); - auto root = pdfium::MakeUnique<CFX_XMLNode>(); + auto root = pdfium::MakeUnique<CFX_XMLElement>(L"ROOT"); + CFX_XMLTestParser parser(root.get(), stream); ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); @@ -201,7 +205,8 @@ TEST(CFX_XMLParserTest, IncompleteCData) { "</script>"; auto stream = MakeProxy(input); - auto root = pdfium::MakeUnique<CFX_XMLNode>(); + auto root = pdfium::MakeUnique<CFX_XMLElement>(L"ROOT"); + CFX_XMLTestParser parser(root.get(), stream); ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); @@ -228,7 +233,8 @@ TEST(CFX_XMLParserTest, UnClosedCData) { "</script>"; auto stream = MakeProxy(input); - auto root = pdfium::MakeUnique<CFX_XMLNode>(); + auto root = pdfium::MakeUnique<CFX_XMLElement>(L"ROOT"); + CFX_XMLTestParser parser(root.get(), stream); ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); @@ -255,7 +261,8 @@ TEST(CFX_XMLParserTest, EmptyCData) { "</script>"; auto stream = MakeProxy(input); - auto root = pdfium::MakeUnique<CFX_XMLNode>(); + auto root = pdfium::MakeUnique<CFX_XMLElement>(L"ROOT"); + CFX_XMLTestParser parser(root.get(), stream); ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); @@ -289,7 +296,8 @@ TEST(CFX_XMLParserTest, Comment) { "</script>"; auto stream = MakeProxy(input); - auto root = pdfium::MakeUnique<CFX_XMLNode>(); + auto root = pdfium::MakeUnique<CFX_XMLElement>(L"ROOT"); + CFX_XMLTestParser parser(root.get(), stream); ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); @@ -320,7 +328,8 @@ TEST(CFX_XMLParserTest, IncorrectCommentStart) { "</script>"; auto stream = MakeProxy(input); - auto root = pdfium::MakeUnique<CFX_XMLNode>(); + auto root = pdfium::MakeUnique<CFX_XMLElement>(L"ROOT"); + CFX_XMLTestParser parser(root.get(), stream); ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); @@ -351,7 +360,8 @@ TEST(CFX_XMLParserTest, CommentEmpty) { "</script>"; auto stream = MakeProxy(input); - auto root = pdfium::MakeUnique<CFX_XMLNode>(); + auto root = pdfium::MakeUnique<CFX_XMLElement>(L"ROOT"); + CFX_XMLTestParser parser(root.get(), stream); ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); @@ -382,7 +392,8 @@ TEST(CFX_XMLParserTest, CommentThreeDash) { "</script>"; auto stream = MakeProxy(input); - auto root = pdfium::MakeUnique<CFX_XMLNode>(); + auto root = pdfium::MakeUnique<CFX_XMLElement>(L"ROOT"); + CFX_XMLTestParser parser(root.get(), stream); ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); @@ -407,7 +418,8 @@ TEST(CFX_XMLParserTest, CommentTwoDash) { "</script>"; auto stream = MakeProxy(input); - auto root = pdfium::MakeUnique<CFX_XMLNode>(); + auto root = pdfium::MakeUnique<CFX_XMLElement>(L"ROOT"); + CFX_XMLTestParser parser(root.get(), stream); ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); @@ -436,7 +448,8 @@ TEST(CFX_XMLParserTest, Entities) { "</script>"; auto stream = MakeProxy(input); - auto root = pdfium::MakeUnique<CFX_XMLNode>(); + auto root = pdfium::MakeUnique<CFX_XMLElement>(L"ROOT"); + CFX_XMLTestParser parser(root.get(), stream); ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); @@ -465,7 +478,8 @@ TEST(CFX_XMLParserTest, EntityOverflowHex) { "</script>"; auto stream = MakeProxy(input); - auto root = pdfium::MakeUnique<CFX_XMLNode>(); + auto root = pdfium::MakeUnique<CFX_XMLElement>(L"ROOT"); + CFX_XMLTestParser parser(root.get(), stream); ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); @@ -494,7 +508,8 @@ TEST(CFX_XMLParserTest, EntityOverflowDecimal) { "</script>"; auto stream = MakeProxy(input); - auto root = pdfium::MakeUnique<CFX_XMLNode>(); + auto root = pdfium::MakeUnique<CFX_XMLElement>(L"ROOT"); + CFX_XMLTestParser parser(root.get(), stream); ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); diff --git a/core/fxcrt/xml/cfx_xmltext.cpp b/core/fxcrt/xml/cfx_xmltext.cpp index 74d0a501eb..9bed941dd4 100644 --- a/core/fxcrt/xml/cfx_xmltext.cpp +++ b/core/fxcrt/xml/cfx_xmltext.cpp @@ -11,7 +11,7 @@ CFX_XMLText::CFX_XMLText(const WideString& wsText) : CFX_XMLNode(), m_wsText(wsText) {} -CFX_XMLText::~CFX_XMLText() {} +CFX_XMLText::~CFX_XMLText() = default; FX_XMLNODETYPE CFX_XMLText::GetType() const { return FX_XMLNODE_Text; diff --git a/testing/libfuzzer/pdf_xml_fuzzer.cc b/testing/libfuzzer/pdf_xml_fuzzer.cc index b9cb01580c..97b9d3c7cf 100644 --- a/testing/libfuzzer/pdf_xml_fuzzer.cc +++ b/testing/libfuzzer/pdf_xml_fuzzer.cc @@ -9,7 +9,7 @@ #include "core/fxcrt/cfx_memorystream.h" #include "core/fxcrt/fx_safe_types.h" #include "core/fxcrt/fx_system.h" -#include "core/fxcrt/xml/cfx_xmlnode.h" +#include "core/fxcrt/xml/cfx_xmlelement.h" #include "core/fxcrt/xml/cfx_xmlparser.h" #include "third_party/base/ptr_util.h" @@ -20,7 +20,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { auto stream = pdfium::MakeRetain<CFX_MemoryStream>(const_cast<uint8_t*>(data), size, false); - auto root = pdfium::MakeUnique<CFX_XMLNode>(); + auto root = pdfium::MakeUnique<CFX_XMLElement>(L"ROOT"); CFX_XMLParser parser(root.get(), stream); if (!parser.Parse()) return 0; diff --git a/xfa/fxfa/parser/cxfa_document_parser.cpp b/xfa/fxfa/parser/cxfa_document_parser.cpp index 4770d849b1..e4f85f2bac 100644 --- a/xfa/fxfa/parser/cxfa_document_parser.cpp +++ b/xfa/fxfa/parser/cxfa_document_parser.cpp @@ -348,7 +348,7 @@ std::unique_ptr<CFX_XMLNode> CXFA_DocumentParser::LoadXML( const RetainPtr<IFX_SeekableStream>& pStream) { ASSERT(pStream); - auto root = pdfium::MakeUnique<CFX_XMLNode>(); + auto root = pdfium::MakeUnique<CFX_XMLElement>(L"ROOT"); root->AppendChild(pdfium::MakeUnique<CFX_XMLInstruction>(L"xml")); CFX_XMLParser parser(root.get(), pStream); |