summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordan sinclair <dsinclair@chromium.org>2018-04-17 21:55:18 +0000
committerChromium commit bot <commit-bot@chromium.org>2018-04-17 21:55:18 +0000
commit384e6fefb42ab5d3d71dd60f7f8d5f5155228804 (patch)
tree8fe0d065f51f6a459bd2be6adf625c3dd7a716f7
parent13aa65a71294cac6e4bdaab73ddd6f4b9fcd8676 (diff)
downloadpdfium-384e6fefb42ab5d3d71dd60f7f8d5f5155228804.tar.xz
CFX_XML Cleanup
Cleanup formatting and unused variables in the CFX_XML classes. Change-Id: I1aff6317a3da38a141a071ba69c7893d0f669732 Reviewed-on: https://pdfium-review.googlesource.com/30730 Commit-Queue: dsinclair <dsinclair@chromium.org> Reviewed-by: Henrique Nakashima <hnakashima@chromium.org>
-rw-r--r--core/fxcrt/xml/cfx_xmlchardata.cpp2
-rw-r--r--core/fxcrt/xml/cfx_xmlelement.cpp23
-rw-r--r--core/fxcrt/xml/cfx_xmlnode.cpp10
-rw-r--r--core/fxcrt/xml/cfx_xmlnode.h13
-rw-r--r--core/fxcrt/xml/cfx_xmlparser.cpp156
-rw-r--r--core/fxcrt/xml/cfx_xmlparser.h25
-rw-r--r--core/fxcrt/xml/cfx_xmlparser_unittest.cpp47
-rw-r--r--core/fxcrt/xml/cfx_xmltext.cpp2
-rw-r--r--testing/libfuzzer/pdf_xml_fuzzer.cc4
-rw-r--r--xfa/fxfa/parser/cxfa_document_parser.cpp2
10 files changed, 108 insertions, 176 deletions
diff --git a/core/fxcrt/xml/cfx_xmlchardata.cpp b/core/fxcrt/xml/cfx_xmlchardata.cpp
index b613b15d00..dec2e4618f 100644
--- a/core/fxcrt/xml/cfx_xmlchardata.cpp
+++ b/core/fxcrt/xml/cfx_xmlchardata.cpp
@@ -11,7 +11,7 @@
CFX_XMLCharData::CFX_XMLCharData(const WideString& wsCData)
: CFX_XMLText(wsCData) {}
-CFX_XMLCharData::~CFX_XMLCharData() {}
+CFX_XMLCharData::~CFX_XMLCharData() = default;
FX_XMLNODETYPE CFX_XMLCharData::GetType() const {
return FX_XMLNODE_CharData;
diff --git a/core/fxcrt/xml/cfx_xmlelement.cpp b/core/fxcrt/xml/cfx_xmlelement.cpp
index d37c55c443..cab15f24c4 100644
--- a/core/fxcrt/xml/cfx_xmlelement.cpp
+++ b/core/fxcrt/xml/cfx_xmlelement.cpp
@@ -30,6 +30,8 @@ std::unique_ptr<CFX_XMLNode> CFX_XMLElement::Clone() {
auto pClone = pdfium::MakeUnique<CFX_XMLElement>(name_);
pClone->attrs_ = attrs_;
+ // TODO(dsinclair): This clone is wrong. It doesn't clone child nodes just
+ // copies text nodes?
WideString wsText;
for (CFX_XMLNode* pChild = GetFirstChild(); pChild;
pChild = pChild->GetNextSibling()) {
@@ -52,26 +54,26 @@ WideString CFX_XMLElement::GetNamespacePrefix() const {
}
WideString CFX_XMLElement::GetNamespaceURI() const {
- WideString wsAttri(L"xmlns");
+ WideString attr(L"xmlns");
WideString wsPrefix = GetNamespacePrefix();
- if (wsPrefix.GetLength() > 0) {
- wsAttri += L":";
- wsAttri += wsPrefix;
+ if (!wsPrefix.IsEmpty()) {
+ attr += L":";
+ attr += wsPrefix;
}
- auto* pNode = static_cast<const CFX_XMLNode*>(this);
+ const CFX_XMLNode* pNode = this;
while (pNode) {
if (pNode->GetType() != FX_XMLNODE_Element)
break;
auto* pElement = static_cast<const CFX_XMLElement*>(pNode);
- if (!pElement->HasAttribute(wsAttri)) {
+ if (!pElement->HasAttribute(attr)) {
pNode = pNode->GetParent();
continue;
}
- return pElement->GetAttribute(wsAttri);
+ return pElement->GetAttribute(attr);
}
- return WideString();
+ return L"";
}
WideString CFX_XMLElement::GetTextData() const {
@@ -88,7 +90,8 @@ WideString CFX_XMLElement::GetTextData() const {
}
void CFX_XMLElement::SetTextData(const WideString& wsText) {
- if (wsText.GetLength() < 1)
+ // TODO(dsinclair): Shouldn't this remove the children if you set blank text?
+ if (wsText.IsEmpty())
return;
AppendChild(pdfium::MakeUnique<CFX_XMLText>(wsText));
}
@@ -100,6 +103,8 @@ void CFX_XMLElement::Save(const RetainPtr<IFX_SeekableStream>& pXMLStream) {
pXMLStream->WriteString(name_encoded);
for (auto it : attrs_) {
+ // Note, the space between attributes is added by AttributeToString which
+ // writes a blank as the first character.
pXMLStream->WriteString(
AttributeToString(it.first, it.second).UTF8Encode().AsStringView());
}
diff --git a/core/fxcrt/xml/cfx_xmlnode.cpp b/core/fxcrt/xml/cfx_xmlnode.cpp
index a8be133171..4f811e608e 100644
--- a/core/fxcrt/xml/cfx_xmlnode.cpp
+++ b/core/fxcrt/xml/cfx_xmlnode.cpp
@@ -21,10 +21,6 @@ CFX_XMLNode::~CFX_XMLNode() {
DeleteChildren();
}
-FX_XMLNODETYPE CFX_XMLNode::GetType() const {
- return FX_XMLNODE_Unknown;
-}
-
void CFX_XMLNode::DeleteChildren() {
CFX_XMLNode* child = last_child_.Get();
// Clear last child early as it will have been deleted already.
@@ -113,12 +109,6 @@ CFX_XMLNode* CFX_XMLNode::GetRoot() {
return pParent;
}
-std::unique_ptr<CFX_XMLNode> CFX_XMLNode::Clone() {
- return nullptr;
-}
-
-void CFX_XMLNode::Save(const RetainPtr<IFX_SeekableStream>& pXMLStream) {}
-
WideString CFX_XMLNode::EncodeEntities(const WideString& value) {
WideString ret = value;
ret.Replace(L"&", L"&amp;");
diff --git a/core/fxcrt/xml/cfx_xmlnode.h b/core/fxcrt/xml/cfx_xmlnode.h
index d46ab9b29d..288095403c 100644
--- a/core/fxcrt/xml/cfx_xmlnode.h
+++ b/core/fxcrt/xml/cfx_xmlnode.h
@@ -20,19 +20,14 @@ enum FX_XMLNODETYPE {
FX_XMLNODE_CharData,
};
-struct FX_XMLNODE {
- int32_t iNodeNum;
- FX_XMLNODETYPE eNodeType;
-};
-
class CFX_XMLNode {
public:
CFX_XMLNode();
virtual ~CFX_XMLNode();
- virtual FX_XMLNODETYPE GetType() const;
- virtual std::unique_ptr<CFX_XMLNode> Clone();
- virtual void Save(const RetainPtr<IFX_SeekableStream>& pXMLStream);
+ virtual FX_XMLNODETYPE GetType() const = 0;
+ virtual std::unique_ptr<CFX_XMLNode> Clone() = 0;
+ virtual void Save(const RetainPtr<IFX_SeekableStream>& pXMLStream) = 0;
CFX_XMLNode* GetRoot();
CFX_XMLNode* GetParent() const { return parent_.Get(); }
@@ -48,7 +43,7 @@ class CFX_XMLNode {
WideString EncodeEntities(const WideString& value);
private:
- // A node owns it's first child and it owns it's next sibling. The rest
+ // A node owns its first child and it owns its next sibling. The rest
// are unowned pointers.
UnownedPtr<CFX_XMLNode> parent_;
UnownedPtr<CFX_XMLNode> last_child_;
diff --git a/core/fxcrt/xml/cfx_xmlparser.cpp b/core/fxcrt/xml/cfx_xmlparser.cpp
index 9be8da76b4..7788a42907 100644
--- a/core/fxcrt/xml/cfx_xmlparser.cpp
+++ b/core/fxcrt/xml/cfx_xmlparser.cpp
@@ -45,32 +45,6 @@ const FX_XMLNAMECHAR g_XMLNameChars[] = {
{0xF900, 0xFDCF, true}, {0xFDF0, 0xFFFD, true},
};
-int32_t GetUTF8EncodeLength(const std::vector<wchar_t>& src,
- FX_FILESIZE iSrcLen) {
- uint32_t unicode = 0;
- int32_t iDstNum = 0;
- const wchar_t* pSrc = src.data();
- while (iSrcLen-- > 0) {
- unicode = *pSrc++;
- int nbytes = 0;
- if ((uint32_t)unicode < 0x80) {
- nbytes = 1;
- } else if ((uint32_t)unicode < 0x800) {
- nbytes = 2;
- } else if ((uint32_t)unicode < 0x10000) {
- nbytes = 3;
- } else if ((uint32_t)unicode < 0x200000) {
- nbytes = 4;
- } else if ((uint32_t)unicode < 0x4000000) {
- nbytes = 5;
- } else {
- nbytes = 6;
- }
- iDstNum += nbytes;
- }
- return iDstNum;
-}
-
} // namespace
// static
@@ -89,15 +63,10 @@ CFX_XMLParser::CFX_XMLParser(CFX_XMLNode* pParent,
m_pStream(pdfium::MakeRetain<CFX_SeekableStreamProxy>(pStream)),
m_iXMLPlaneSize(1024),
m_iCurrentPos(0),
- m_iCurrentNodeNum(-1),
- m_iLastNodeNum(-1),
- m_iParsedBytes(0),
- m_ParsedChars(0),
- m_iBufferChars(0),
m_bEOS(false),
m_Start(0),
m_End(0),
- m_iAllocStep(m_BlockBuffer.GetAllocStep()),
+ m_CurNodeType(FX_XMLNODE_Unknown),
m_pCurrentBlock(nullptr),
m_iIndexInBlock(0),
m_iTextDataLength(0),
@@ -116,9 +85,6 @@ CFX_XMLParser::CFX_XMLParser(CFX_XMLNode* pParent,
m_NodeStack.push(m_pParent);
- m_CurNode.iNodeNum = -1;
- m_CurNode.eNodeType = FX_XMLNODE_Unknown;
-
m_iXMLPlaneSize =
std::min(m_iXMLPlaneSize,
pdfium::base::checked_cast<size_t>(m_pStream->GetLength()));
@@ -138,7 +104,7 @@ CFX_XMLParser::CFX_XMLParser(CFX_XMLNode* pParent,
m_BlockBuffer.GetAvailableBlock();
}
-CFX_XMLParser::~CFX_XMLParser() {}
+CFX_XMLParser::~CFX_XMLParser() = default;
bool CFX_XMLParser::Parse() {
int32_t iCount = 0;
@@ -176,7 +142,7 @@ bool CFX_XMLParser::Parse() {
iCount++;
break;
case FX_XmlSyntaxResult::TargetName:
- m_ws1 = GetTargetName();
+ m_ws1 = m_BlockBuffer.GetTextData(0, m_iTextDataLength);
if (m_ws1 == L"originalXFAVersion" || m_ws1 == L"acrobat") {
auto child = pdfium::MakeUnique<CFX_XMLInstruction>(m_ws1);
m_pChild = child.get();
@@ -230,7 +196,8 @@ bool CFX_XMLParser::Parse() {
if (!m_ws1.IsEmpty())
instruction->AppendData(m_ws1);
- instruction->AppendData(GetTargetData());
+ instruction->AppendData(
+ m_BlockBuffer.GetTextData(0, m_iTextDataLength));
}
m_ws1.clear();
break;
@@ -260,22 +227,20 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() {
m_syntaxParserResult = FX_XmlSyntaxResult::EndOfString;
return m_syntaxParserResult;
}
- m_ParsedChars += m_End;
- m_iParsedBytes = m_iCurrentPos;
if (m_pStream->GetPosition() != m_iCurrentPos)
m_pStream->Seek(CFX_SeekableStreamProxy::From::Begin, m_iCurrentPos);
- m_iBufferChars =
+ size_t buffer_chars =
m_pStream->ReadString(m_Buffer.data(), m_iXMLPlaneSize, &m_bEOS);
iPos = m_pStream->GetPosition();
- if (m_iBufferChars < 1) {
+ if (buffer_chars == 0) {
m_iCurrentPos = iStreamLength;
m_syntaxParserResult = FX_XmlSyntaxResult::EndOfString;
return m_syntaxParserResult;
}
m_iCurrentPos = iPos;
m_Start = 0;
- m_End = m_iBufferChars;
+ m_End = buffer_chars;
}
while (m_Start < m_End) {
@@ -306,20 +271,14 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() {
m_Start++;
m_syntaxParserState = FDE_XmlSyntaxState::CloseElement;
} else if (ch == L'?') {
- m_iLastNodeNum++;
- m_iCurrentNodeNum = m_iLastNodeNum;
- m_CurNode.iNodeNum = m_iLastNodeNum;
- m_CurNode.eNodeType = FX_XMLNODE_Instruction;
- m_XMLNodeStack.push(m_CurNode);
+ m_CurNodeType = FX_XMLNODE_Instruction;
+ m_XMLNodeTypeStack.push(m_CurNodeType);
m_Start++;
m_syntaxParserState = FDE_XmlSyntaxState::Target;
syntaxParserResult = FX_XmlSyntaxResult::InstructionOpen;
} else {
- m_iLastNodeNum++;
- m_iCurrentNodeNum = m_iLastNodeNum;
- m_CurNode.iNodeNum = m_iLastNodeNum;
- m_CurNode.eNodeType = FX_XMLNODE_Element;
- m_XMLNodeStack.push(m_CurNode);
+ m_CurNodeType = FX_XMLNODE_Element;
+ m_XMLNodeTypeStack.push(m_CurNodeType);
m_syntaxParserState = FDE_XmlSyntaxState::Tag;
syntaxParserResult = FX_XmlSyntaxResult::ElementOpen;
}
@@ -343,12 +302,11 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() {
m_syntaxParserState = FDE_XmlSyntaxState::AttriName;
} else {
- if (m_iIndexInBlock == m_iAllocStep) {
+ if (m_iIndexInBlock == m_BlockBuffer.GetAllocStep()) {
std::tie(m_pCurrentBlock, m_iIndexInBlock) =
m_BlockBuffer.GetAvailableBlock();
- if (!m_pCurrentBlock) {
+ if (!m_pCurrentBlock)
return FX_XmlSyntaxResult::Error;
- }
}
m_pCurrentBlock[m_iIndexInBlock++] = ch;
m_BlockBuffer.IncrementDataLength();
@@ -362,12 +320,12 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() {
}
if (!IsXMLNameChar(ch, m_BlockBuffer.IsEmpty())) {
if (m_BlockBuffer.IsEmpty()) {
- if (m_CurNode.eNodeType == FX_XMLNODE_Element) {
+ if (m_CurNodeType == FX_XMLNODE_Element) {
if (ch == L'>' || ch == L'/') {
m_syntaxParserState = FDE_XmlSyntaxState::BreakElement;
break;
}
- } else if (m_CurNode.eNodeType == FX_XMLNODE_Instruction) {
+ } else if (m_CurNodeType == FX_XMLNODE_Instruction) {
if (ch == L'?') {
m_syntaxParserState = FDE_XmlSyntaxState::CloseInstruction;
m_Start++;
@@ -379,7 +337,7 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() {
m_syntaxParserResult = FX_XmlSyntaxResult::Error;
return m_syntaxParserResult;
} else {
- if (m_CurNode.eNodeType == FX_XMLNODE_Instruction) {
+ if (m_CurNodeType == FX_XMLNODE_Instruction) {
if (ch != '=' && !IsXMLWhiteSpace(ch)) {
m_syntaxParserState = FDE_XmlSyntaxState::TargetData;
break;
@@ -393,12 +351,11 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() {
syntaxParserResult = FX_XmlSyntaxResult::AttriName;
}
} else {
- if (m_iIndexInBlock == m_iAllocStep) {
+ if (m_iIndexInBlock == m_BlockBuffer.GetAllocStep()) {
std::tie(m_pCurrentBlock, m_iIndexInBlock) =
m_BlockBuffer.GetAvailableBlock();
- if (!m_pCurrentBlock) {
+ if (!m_pCurrentBlock)
return FX_XmlSyntaxResult::Error;
- }
}
m_pCurrentBlock[m_iIndexInBlock++] = ch;
m_BlockBuffer.IncrementDataLength();
@@ -411,7 +368,7 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() {
break;
}
if (ch != L'=') {
- if (m_CurNode.eNodeType == FX_XMLNODE_Instruction) {
+ if (m_CurNodeType == FX_XMLNODE_Instruction) {
m_syntaxParserState = FDE_XmlSyntaxState::TargetData;
break;
}
@@ -430,11 +387,11 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() {
if (ch != L'\"' && ch != L'\'') {
m_syntaxParserResult = FX_XmlSyntaxResult::Error;
return m_syntaxParserResult;
- } else {
- m_wQuotationMark = ch;
- m_syntaxParserState = FDE_XmlSyntaxState::AttriValue;
- m_Start++;
}
+
+ m_wQuotationMark = ch;
+ m_syntaxParserState = FDE_XmlSyntaxState::AttriValue;
+ m_Start++;
break;
case FDE_XmlSyntaxState::AttriValue:
if (ch == m_wQuotationMark) {
@@ -456,13 +413,13 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() {
break;
case FDE_XmlSyntaxState::CloseInstruction:
if (ch != L'>') {
- if (m_iIndexInBlock == m_iAllocStep) {
+ if (m_iIndexInBlock == m_BlockBuffer.GetAllocStep()) {
std::tie(m_pCurrentBlock, m_iIndexInBlock) =
m_BlockBuffer.GetAvailableBlock();
- if (!m_pCurrentBlock) {
+ if (!m_pCurrentBlock)
return FX_XmlSyntaxResult::Error;
- }
}
+
m_pCurrentBlock[m_iIndexInBlock++] = ch;
m_BlockBuffer.IncrementDataLength();
m_syntaxParserState = FDE_XmlSyntaxState::TargetData;
@@ -474,18 +431,17 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() {
syntaxParserResult = FX_XmlSyntaxResult::TargetData;
} else {
m_Start++;
- if (m_XMLNodeStack.empty()) {
+ if (m_XMLNodeTypeStack.empty()) {
m_syntaxParserResult = FX_XmlSyntaxResult::Error;
return m_syntaxParserResult;
}
- m_XMLNodeStack.pop();
- if (!m_XMLNodeStack.empty()) {
- m_CurNode = m_XMLNodeStack.top();
- } else {
- m_CurNode.iNodeNum = -1;
- m_CurNode.eNodeType = FX_XMLNODE_Unknown;
- }
- m_iCurrentNodeNum = m_CurNode.iNodeNum;
+ m_XMLNodeTypeStack.pop();
+
+ if (!m_XMLNodeTypeStack.empty())
+ m_CurNodeType = m_XMLNodeTypeStack.top();
+ else
+ m_CurNodeType = FX_XMLNODE_Unknown;
+
m_BlockBuffer.Reset(true);
std::tie(m_pCurrentBlock, m_iIndexInBlock) =
m_BlockBuffer.GetAvailableBlock();
@@ -508,18 +464,17 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() {
case FDE_XmlSyntaxState::CloseElement:
if (!IsXMLNameChar(ch, m_BlockBuffer.IsEmpty())) {
if (ch == L'>') {
- if (m_XMLNodeStack.empty()) {
+ if (m_XMLNodeTypeStack.empty()) {
m_syntaxParserResult = FX_XmlSyntaxResult::Error;
return m_syntaxParserResult;
}
- m_XMLNodeStack.pop();
- if (!m_XMLNodeStack.empty()) {
- m_CurNode = m_XMLNodeStack.top();
- } else {
- m_CurNode.iNodeNum = -1;
- m_CurNode.eNodeType = FX_XMLNODE_Unknown;
- }
- m_iCurrentNodeNum = m_CurNode.iNodeNum;
+ m_XMLNodeTypeStack.pop();
+
+ if (!m_XMLNodeTypeStack.empty())
+ m_CurNodeType = m_XMLNodeTypeStack.top();
+ else
+ m_CurNodeType = FX_XMLNODE_Unknown;
+
m_iTextDataLength = m_BlockBuffer.GetDataLength();
m_BlockBuffer.Reset(true);
std::tie(m_pCurrentBlock, m_iIndexInBlock) =
@@ -531,12 +486,11 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() {
return m_syntaxParserResult;
}
} else {
- if (m_iIndexInBlock == m_iAllocStep) {
+ if (m_iIndexInBlock == m_BlockBuffer.GetAllocStep()) {
std::tie(m_pCurrentBlock, m_iIndexInBlock) =
m_BlockBuffer.GetAvailableBlock();
- if (!m_pCurrentBlock) {
+ if (!m_pCurrentBlock)
return FX_XmlSyntaxResult::Error;
- }
}
m_pCurrentBlock[m_iIndexInBlock++] = ch;
m_BlockBuffer.IncrementDataLength();
@@ -567,7 +521,7 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() {
m_BlockBuffer.GetAvailableBlock();
m_syntaxParserState = FDE_XmlSyntaxState::Text;
} else {
- if (m_iIndexInBlock == m_iAllocStep) {
+ if (m_iIndexInBlock == m_BlockBuffer.GetAllocStep()) {
std::tie(m_pCurrentBlock, m_iIndexInBlock) =
m_BlockBuffer.GetAvailableBlock();
if (!m_pCurrentBlock)
@@ -631,7 +585,7 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() {
break;
}
if (!m_SkipStack.empty()) {
- if (m_iIndexInBlock == m_iAllocStep) {
+ if (m_iIndexInBlock == m_BlockBuffer.GetAllocStep()) {
std::tie(m_pCurrentBlock, m_iIndexInBlock) =
m_BlockBuffer.GetAvailableBlock();
if (!m_pCurrentBlock) {
@@ -689,12 +643,11 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() {
return m_syntaxParserResult;
}
} else {
- if (m_iIndexInBlock == m_iAllocStep) {
+ if (m_iIndexInBlock == m_BlockBuffer.GetAllocStep()) {
std::tie(m_pCurrentBlock, m_iIndexInBlock) =
m_BlockBuffer.GetAvailableBlock();
- if (!m_pCurrentBlock) {
+ if (!m_pCurrentBlock)
return FX_XmlSyntaxResult::Error;
- }
}
m_pCurrentBlock[m_iIndexInBlock++] = ch;
m_BlockBuffer.IncrementDataLength();
@@ -721,16 +674,8 @@ bool CFX_XMLParser::GetStatus() const {
return m_syntaxParserResult != FX_XmlSyntaxResult::Error;
}
-FX_FILESIZE CFX_XMLParser::GetCurrentBinaryPos() const {
- if (!m_pStream)
- return 0;
-
- int32_t nDstLen = GetUTF8EncodeLength(m_Buffer, m_Start);
- return m_iParsedBytes + nDstLen;
-}
-
void CFX_XMLParser::ParseTextChar(wchar_t character) {
- if (m_iIndexInBlock == m_iAllocStep) {
+ if (m_iIndexInBlock == m_BlockBuffer.GetAllocStep()) {
std::tie(m_pCurrentBlock, m_iIndexInBlock) =
m_BlockBuffer.GetAvailableBlock();
if (!m_pCurrentBlock)
@@ -765,6 +710,7 @@ void CFX_XMLParser::ParseTextChar(wchar_t character) {
w = csEntity[i];
if (!std::iswdigit(w))
break;
+
ch = ch * 10 + w - L'0';
}
}
diff --git a/core/fxcrt/xml/cfx_xmlparser.h b/core/fxcrt/xml/cfx_xmlparser.h
index 52d86fb82d..f369453de1 100644
--- a/core/fxcrt/xml/cfx_xmlparser.h
+++ b/core/fxcrt/xml/cfx_xmlparser.h
@@ -93,20 +93,7 @@ class CFX_XMLParser {
};
void ParseTextChar(wchar_t ch);
-
bool GetStatus() const;
- FX_FILESIZE GetCurrentPos() const { return m_ParsedChars + m_Start; }
- FX_FILESIZE GetCurrentBinaryPos() const;
- int32_t GetCurrentNodeNumber() const { return m_iCurrentNodeNum; }
- int32_t GetLastNodeNumber() const { return m_iLastNodeNum; }
-
- WideString GetTargetName() const {
- return m_BlockBuffer.GetTextData(0, m_iTextDataLength);
- }
-
- WideString GetTargetData() const {
- return m_BlockBuffer.GetTextData(0, m_iTextDataLength);
- }
CFX_XMLNode* m_pParent;
CFX_XMLNode* m_pChild;
@@ -116,21 +103,15 @@ class CFX_XMLParser {
RetainPtr<CFX_SeekableStreamProxy> m_pStream;
size_t m_iXMLPlaneSize;
FX_FILESIZE m_iCurrentPos;
- int32_t m_iCurrentNodeNum;
- int32_t m_iLastNodeNum;
- int32_t m_iParsedBytes;
- FX_FILESIZE m_ParsedChars;
std::vector<wchar_t> m_Buffer;
- size_t m_iBufferChars;
bool m_bEOS;
FX_FILESIZE m_Start; // Start position in m_Buffer
FX_FILESIZE m_End; // End position in m_Buffer
- FX_XMLNODE m_CurNode;
- std::stack<FX_XMLNODE> m_XMLNodeStack;
+ FX_XMLNODETYPE m_CurNodeType;
+ std::stack<FX_XMLNODETYPE> m_XMLNodeTypeStack;
CFX_BlockBuffer m_BlockBuffer;
- int32_t m_iAllocStep;
wchar_t* m_pCurrentBlock; // Pointer into CFX_BlockBuffer
- int32_t m_iIndexInBlock;
+ size_t m_iIndexInBlock;
int32_t m_iTextDataLength;
FX_XmlSyntaxResult m_syntaxParserResult;
FDE_XmlSyntaxState m_syntaxParserState;
diff --git a/core/fxcrt/xml/cfx_xmlparser_unittest.cpp b/core/fxcrt/xml/cfx_xmlparser_unittest.cpp
index 4bd953df25..02addc70eb 100644
--- a/core/fxcrt/xml/cfx_xmlparser_unittest.cpp
+++ b/core/fxcrt/xml/cfx_xmlparser_unittest.cpp
@@ -8,7 +8,7 @@
#include "core/fxcrt/cfx_memorystream.h"
#include "core/fxcrt/fx_codepage.h"
-#include "core/fxcrt/xml/cfx_xmlnode.h"
+#include "core/fxcrt/xml/cfx_xmlelement.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "testing/test_support.h"
#include "third_party/base/ptr_util.h"
@@ -63,7 +63,8 @@ TEST(CFX_XMLParserTest, CData) {
L" ";
auto stream = MakeProxy(input);
- auto root = pdfium::MakeUnique<CFX_XMLNode>();
+ auto root = pdfium::MakeUnique<CFX_XMLElement>(L"ROOT");
+
CFX_XMLTestParser parser(root.get(), stream);
ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
@@ -108,7 +109,8 @@ TEST(CFX_XMLParserTest, CDataWithInnerScript) {
L" ";
auto stream = MakeProxy(input);
- auto root = pdfium::MakeUnique<CFX_XMLNode>();
+ auto root = pdfium::MakeUnique<CFX_XMLElement>(L"ROOT");
+
CFX_XMLTestParser parser(root.get(), stream);
ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
@@ -142,7 +144,8 @@ TEST(CFX_XMLParserTest, ArrowBangArrow) {
"</script>";
auto stream = MakeProxy(input);
- auto root = pdfium::MakeUnique<CFX_XMLNode>();
+ auto root = pdfium::MakeUnique<CFX_XMLElement>(L"ROOT");
+
CFX_XMLTestParser parser(root.get(), stream);
ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
@@ -174,7 +177,8 @@ TEST(CFX_XMLParserTest, ArrowBangBracketArrow) {
"</script>";
auto stream = MakeProxy(input);
- auto root = pdfium::MakeUnique<CFX_XMLNode>();
+ auto root = pdfium::MakeUnique<CFX_XMLElement>(L"ROOT");
+
CFX_XMLTestParser parser(root.get(), stream);
ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
@@ -201,7 +205,8 @@ TEST(CFX_XMLParserTest, IncompleteCData) {
"</script>";
auto stream = MakeProxy(input);
- auto root = pdfium::MakeUnique<CFX_XMLNode>();
+ auto root = pdfium::MakeUnique<CFX_XMLElement>(L"ROOT");
+
CFX_XMLTestParser parser(root.get(), stream);
ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
@@ -228,7 +233,8 @@ TEST(CFX_XMLParserTest, UnClosedCData) {
"</script>";
auto stream = MakeProxy(input);
- auto root = pdfium::MakeUnique<CFX_XMLNode>();
+ auto root = pdfium::MakeUnique<CFX_XMLElement>(L"ROOT");
+
CFX_XMLTestParser parser(root.get(), stream);
ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
@@ -255,7 +261,8 @@ TEST(CFX_XMLParserTest, EmptyCData) {
"</script>";
auto stream = MakeProxy(input);
- auto root = pdfium::MakeUnique<CFX_XMLNode>();
+ auto root = pdfium::MakeUnique<CFX_XMLElement>(L"ROOT");
+
CFX_XMLTestParser parser(root.get(), stream);
ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
@@ -289,7 +296,8 @@ TEST(CFX_XMLParserTest, Comment) {
"</script>";
auto stream = MakeProxy(input);
- auto root = pdfium::MakeUnique<CFX_XMLNode>();
+ auto root = pdfium::MakeUnique<CFX_XMLElement>(L"ROOT");
+
CFX_XMLTestParser parser(root.get(), stream);
ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
@@ -320,7 +328,8 @@ TEST(CFX_XMLParserTest, IncorrectCommentStart) {
"</script>";
auto stream = MakeProxy(input);
- auto root = pdfium::MakeUnique<CFX_XMLNode>();
+ auto root = pdfium::MakeUnique<CFX_XMLElement>(L"ROOT");
+
CFX_XMLTestParser parser(root.get(), stream);
ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
@@ -351,7 +360,8 @@ TEST(CFX_XMLParserTest, CommentEmpty) {
"</script>";
auto stream = MakeProxy(input);
- auto root = pdfium::MakeUnique<CFX_XMLNode>();
+ auto root = pdfium::MakeUnique<CFX_XMLElement>(L"ROOT");
+
CFX_XMLTestParser parser(root.get(), stream);
ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
@@ -382,7 +392,8 @@ TEST(CFX_XMLParserTest, CommentThreeDash) {
"</script>";
auto stream = MakeProxy(input);
- auto root = pdfium::MakeUnique<CFX_XMLNode>();
+ auto root = pdfium::MakeUnique<CFX_XMLElement>(L"ROOT");
+
CFX_XMLTestParser parser(root.get(), stream);
ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
@@ -407,7 +418,8 @@ TEST(CFX_XMLParserTest, CommentTwoDash) {
"</script>";
auto stream = MakeProxy(input);
- auto root = pdfium::MakeUnique<CFX_XMLNode>();
+ auto root = pdfium::MakeUnique<CFX_XMLElement>(L"ROOT");
+
CFX_XMLTestParser parser(root.get(), stream);
ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
@@ -436,7 +448,8 @@ TEST(CFX_XMLParserTest, Entities) {
"</script>";
auto stream = MakeProxy(input);
- auto root = pdfium::MakeUnique<CFX_XMLNode>();
+ auto root = pdfium::MakeUnique<CFX_XMLElement>(L"ROOT");
+
CFX_XMLTestParser parser(root.get(), stream);
ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
@@ -465,7 +478,8 @@ TEST(CFX_XMLParserTest, EntityOverflowHex) {
"</script>";
auto stream = MakeProxy(input);
- auto root = pdfium::MakeUnique<CFX_XMLNode>();
+ auto root = pdfium::MakeUnique<CFX_XMLElement>(L"ROOT");
+
CFX_XMLTestParser parser(root.get(), stream);
ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
@@ -494,7 +508,8 @@ TEST(CFX_XMLParserTest, EntityOverflowDecimal) {
"</script>";
auto stream = MakeProxy(input);
- auto root = pdfium::MakeUnique<CFX_XMLNode>();
+ auto root = pdfium::MakeUnique<CFX_XMLElement>(L"ROOT");
+
CFX_XMLTestParser parser(root.get(), stream);
ASSERT_EQ(FX_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse());
ASSERT_EQ(FX_XmlSyntaxResult::TagName, parser.DoSyntaxParse());
diff --git a/core/fxcrt/xml/cfx_xmltext.cpp b/core/fxcrt/xml/cfx_xmltext.cpp
index 74d0a501eb..9bed941dd4 100644
--- a/core/fxcrt/xml/cfx_xmltext.cpp
+++ b/core/fxcrt/xml/cfx_xmltext.cpp
@@ -11,7 +11,7 @@
CFX_XMLText::CFX_XMLText(const WideString& wsText)
: CFX_XMLNode(), m_wsText(wsText) {}
-CFX_XMLText::~CFX_XMLText() {}
+CFX_XMLText::~CFX_XMLText() = default;
FX_XMLNODETYPE CFX_XMLText::GetType() const {
return FX_XMLNODE_Text;
diff --git a/testing/libfuzzer/pdf_xml_fuzzer.cc b/testing/libfuzzer/pdf_xml_fuzzer.cc
index b9cb01580c..97b9d3c7cf 100644
--- a/testing/libfuzzer/pdf_xml_fuzzer.cc
+++ b/testing/libfuzzer/pdf_xml_fuzzer.cc
@@ -9,7 +9,7 @@
#include "core/fxcrt/cfx_memorystream.h"
#include "core/fxcrt/fx_safe_types.h"
#include "core/fxcrt/fx_system.h"
-#include "core/fxcrt/xml/cfx_xmlnode.h"
+#include "core/fxcrt/xml/cfx_xmlelement.h"
#include "core/fxcrt/xml/cfx_xmlparser.h"
#include "third_party/base/ptr_util.h"
@@ -20,7 +20,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
auto stream = pdfium::MakeRetain<CFX_MemoryStream>(const_cast<uint8_t*>(data),
size, false);
- auto root = pdfium::MakeUnique<CFX_XMLNode>();
+ auto root = pdfium::MakeUnique<CFX_XMLElement>(L"ROOT");
CFX_XMLParser parser(root.get(), stream);
if (!parser.Parse())
return 0;
diff --git a/xfa/fxfa/parser/cxfa_document_parser.cpp b/xfa/fxfa/parser/cxfa_document_parser.cpp
index 4770d849b1..e4f85f2bac 100644
--- a/xfa/fxfa/parser/cxfa_document_parser.cpp
+++ b/xfa/fxfa/parser/cxfa_document_parser.cpp
@@ -348,7 +348,7 @@ std::unique_ptr<CFX_XMLNode> CXFA_DocumentParser::LoadXML(
const RetainPtr<IFX_SeekableStream>& pStream) {
ASSERT(pStream);
- auto root = pdfium::MakeUnique<CFX_XMLNode>();
+ auto root = pdfium::MakeUnique<CFX_XMLElement>(L"ROOT");
root->AppendChild(pdfium::MakeUnique<CFX_XMLInstruction>(L"xml"));
CFX_XMLParser parser(root.get(), pStream);