summaryrefslogtreecommitdiff
path: root/core/fxcrt/xml/cfx_xmlparser.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'core/fxcrt/xml/cfx_xmlparser.cpp')
-rw-r--r--core/fxcrt/xml/cfx_xmlparser.cpp156
1 files changed, 51 insertions, 105 deletions
diff --git a/core/fxcrt/xml/cfx_xmlparser.cpp b/core/fxcrt/xml/cfx_xmlparser.cpp
index 9be8da76b4..7788a42907 100644
--- a/core/fxcrt/xml/cfx_xmlparser.cpp
+++ b/core/fxcrt/xml/cfx_xmlparser.cpp
@@ -45,32 +45,6 @@ const FX_XMLNAMECHAR g_XMLNameChars[] = {
{0xF900, 0xFDCF, true}, {0xFDF0, 0xFFFD, true},
};
-int32_t GetUTF8EncodeLength(const std::vector<wchar_t>& src,
- FX_FILESIZE iSrcLen) {
- uint32_t unicode = 0;
- int32_t iDstNum = 0;
- const wchar_t* pSrc = src.data();
- while (iSrcLen-- > 0) {
- unicode = *pSrc++;
- int nbytes = 0;
- if ((uint32_t)unicode < 0x80) {
- nbytes = 1;
- } else if ((uint32_t)unicode < 0x800) {
- nbytes = 2;
- } else if ((uint32_t)unicode < 0x10000) {
- nbytes = 3;
- } else if ((uint32_t)unicode < 0x200000) {
- nbytes = 4;
- } else if ((uint32_t)unicode < 0x4000000) {
- nbytes = 5;
- } else {
- nbytes = 6;
- }
- iDstNum += nbytes;
- }
- return iDstNum;
-}
-
} // namespace
// static
@@ -89,15 +63,10 @@ CFX_XMLParser::CFX_XMLParser(CFX_XMLNode* pParent,
m_pStream(pdfium::MakeRetain<CFX_SeekableStreamProxy>(pStream)),
m_iXMLPlaneSize(1024),
m_iCurrentPos(0),
- m_iCurrentNodeNum(-1),
- m_iLastNodeNum(-1),
- m_iParsedBytes(0),
- m_ParsedChars(0),
- m_iBufferChars(0),
m_bEOS(false),
m_Start(0),
m_End(0),
- m_iAllocStep(m_BlockBuffer.GetAllocStep()),
+ m_CurNodeType(FX_XMLNODE_Unknown),
m_pCurrentBlock(nullptr),
m_iIndexInBlock(0),
m_iTextDataLength(0),
@@ -116,9 +85,6 @@ CFX_XMLParser::CFX_XMLParser(CFX_XMLNode* pParent,
m_NodeStack.push(m_pParent);
- m_CurNode.iNodeNum = -1;
- m_CurNode.eNodeType = FX_XMLNODE_Unknown;
-
m_iXMLPlaneSize =
std::min(m_iXMLPlaneSize,
pdfium::base::checked_cast<size_t>(m_pStream->GetLength()));
@@ -138,7 +104,7 @@ CFX_XMLParser::CFX_XMLParser(CFX_XMLNode* pParent,
m_BlockBuffer.GetAvailableBlock();
}
-CFX_XMLParser::~CFX_XMLParser() {}
+CFX_XMLParser::~CFX_XMLParser() = default;
bool CFX_XMLParser::Parse() {
int32_t iCount = 0;
@@ -176,7 +142,7 @@ bool CFX_XMLParser::Parse() {
iCount++;
break;
case FX_XmlSyntaxResult::TargetName:
- m_ws1 = GetTargetName();
+ m_ws1 = m_BlockBuffer.GetTextData(0, m_iTextDataLength);
if (m_ws1 == L"originalXFAVersion" || m_ws1 == L"acrobat") {
auto child = pdfium::MakeUnique<CFX_XMLInstruction>(m_ws1);
m_pChild = child.get();
@@ -230,7 +196,8 @@ bool CFX_XMLParser::Parse() {
if (!m_ws1.IsEmpty())
instruction->AppendData(m_ws1);
- instruction->AppendData(GetTargetData());
+ instruction->AppendData(
+ m_BlockBuffer.GetTextData(0, m_iTextDataLength));
}
m_ws1.clear();
break;
@@ -260,22 +227,20 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() {
m_syntaxParserResult = FX_XmlSyntaxResult::EndOfString;
return m_syntaxParserResult;
}
- m_ParsedChars += m_End;
- m_iParsedBytes = m_iCurrentPos;
if (m_pStream->GetPosition() != m_iCurrentPos)
m_pStream->Seek(CFX_SeekableStreamProxy::From::Begin, m_iCurrentPos);
- m_iBufferChars =
+ size_t buffer_chars =
m_pStream->ReadString(m_Buffer.data(), m_iXMLPlaneSize, &m_bEOS);
iPos = m_pStream->GetPosition();
- if (m_iBufferChars < 1) {
+ if (buffer_chars == 0) {
m_iCurrentPos = iStreamLength;
m_syntaxParserResult = FX_XmlSyntaxResult::EndOfString;
return m_syntaxParserResult;
}
m_iCurrentPos = iPos;
m_Start = 0;
- m_End = m_iBufferChars;
+ m_End = buffer_chars;
}
while (m_Start < m_End) {
@@ -306,20 +271,14 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() {
m_Start++;
m_syntaxParserState = FDE_XmlSyntaxState::CloseElement;
} else if (ch == L'?') {
- m_iLastNodeNum++;
- m_iCurrentNodeNum = m_iLastNodeNum;
- m_CurNode.iNodeNum = m_iLastNodeNum;
- m_CurNode.eNodeType = FX_XMLNODE_Instruction;
- m_XMLNodeStack.push(m_CurNode);
+ m_CurNodeType = FX_XMLNODE_Instruction;
+ m_XMLNodeTypeStack.push(m_CurNodeType);
m_Start++;
m_syntaxParserState = FDE_XmlSyntaxState::Target;
syntaxParserResult = FX_XmlSyntaxResult::InstructionOpen;
} else {
- m_iLastNodeNum++;
- m_iCurrentNodeNum = m_iLastNodeNum;
- m_CurNode.iNodeNum = m_iLastNodeNum;
- m_CurNode.eNodeType = FX_XMLNODE_Element;
- m_XMLNodeStack.push(m_CurNode);
+ m_CurNodeType = FX_XMLNODE_Element;
+ m_XMLNodeTypeStack.push(m_CurNodeType);
m_syntaxParserState = FDE_XmlSyntaxState::Tag;
syntaxParserResult = FX_XmlSyntaxResult::ElementOpen;
}
@@ -343,12 +302,11 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() {
m_syntaxParserState = FDE_XmlSyntaxState::AttriName;
} else {
- if (m_iIndexInBlock == m_iAllocStep) {
+ if (m_iIndexInBlock == m_BlockBuffer.GetAllocStep()) {
std::tie(m_pCurrentBlock, m_iIndexInBlock) =
m_BlockBuffer.GetAvailableBlock();
- if (!m_pCurrentBlock) {
+ if (!m_pCurrentBlock)
return FX_XmlSyntaxResult::Error;
- }
}
m_pCurrentBlock[m_iIndexInBlock++] = ch;
m_BlockBuffer.IncrementDataLength();
@@ -362,12 +320,12 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() {
}
if (!IsXMLNameChar(ch, m_BlockBuffer.IsEmpty())) {
if (m_BlockBuffer.IsEmpty()) {
- if (m_CurNode.eNodeType == FX_XMLNODE_Element) {
+ if (m_CurNodeType == FX_XMLNODE_Element) {
if (ch == L'>' || ch == L'/') {
m_syntaxParserState = FDE_XmlSyntaxState::BreakElement;
break;
}
- } else if (m_CurNode.eNodeType == FX_XMLNODE_Instruction) {
+ } else if (m_CurNodeType == FX_XMLNODE_Instruction) {
if (ch == L'?') {
m_syntaxParserState = FDE_XmlSyntaxState::CloseInstruction;
m_Start++;
@@ -379,7 +337,7 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() {
m_syntaxParserResult = FX_XmlSyntaxResult::Error;
return m_syntaxParserResult;
} else {
- if (m_CurNode.eNodeType == FX_XMLNODE_Instruction) {
+ if (m_CurNodeType == FX_XMLNODE_Instruction) {
if (ch != '=' && !IsXMLWhiteSpace(ch)) {
m_syntaxParserState = FDE_XmlSyntaxState::TargetData;
break;
@@ -393,12 +351,11 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() {
syntaxParserResult = FX_XmlSyntaxResult::AttriName;
}
} else {
- if (m_iIndexInBlock == m_iAllocStep) {
+ if (m_iIndexInBlock == m_BlockBuffer.GetAllocStep()) {
std::tie(m_pCurrentBlock, m_iIndexInBlock) =
m_BlockBuffer.GetAvailableBlock();
- if (!m_pCurrentBlock) {
+ if (!m_pCurrentBlock)
return FX_XmlSyntaxResult::Error;
- }
}
m_pCurrentBlock[m_iIndexInBlock++] = ch;
m_BlockBuffer.IncrementDataLength();
@@ -411,7 +368,7 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() {
break;
}
if (ch != L'=') {
- if (m_CurNode.eNodeType == FX_XMLNODE_Instruction) {
+ if (m_CurNodeType == FX_XMLNODE_Instruction) {
m_syntaxParserState = FDE_XmlSyntaxState::TargetData;
break;
}
@@ -430,11 +387,11 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() {
if (ch != L'\"' && ch != L'\'') {
m_syntaxParserResult = FX_XmlSyntaxResult::Error;
return m_syntaxParserResult;
- } else {
- m_wQuotationMark = ch;
- m_syntaxParserState = FDE_XmlSyntaxState::AttriValue;
- m_Start++;
}
+
+ m_wQuotationMark = ch;
+ m_syntaxParserState = FDE_XmlSyntaxState::AttriValue;
+ m_Start++;
break;
case FDE_XmlSyntaxState::AttriValue:
if (ch == m_wQuotationMark) {
@@ -456,13 +413,13 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() {
break;
case FDE_XmlSyntaxState::CloseInstruction:
if (ch != L'>') {
- if (m_iIndexInBlock == m_iAllocStep) {
+ if (m_iIndexInBlock == m_BlockBuffer.GetAllocStep()) {
std::tie(m_pCurrentBlock, m_iIndexInBlock) =
m_BlockBuffer.GetAvailableBlock();
- if (!m_pCurrentBlock) {
+ if (!m_pCurrentBlock)
return FX_XmlSyntaxResult::Error;
- }
}
+
m_pCurrentBlock[m_iIndexInBlock++] = ch;
m_BlockBuffer.IncrementDataLength();
m_syntaxParserState = FDE_XmlSyntaxState::TargetData;
@@ -474,18 +431,17 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() {
syntaxParserResult = FX_XmlSyntaxResult::TargetData;
} else {
m_Start++;
- if (m_XMLNodeStack.empty()) {
+ if (m_XMLNodeTypeStack.empty()) {
m_syntaxParserResult = FX_XmlSyntaxResult::Error;
return m_syntaxParserResult;
}
- m_XMLNodeStack.pop();
- if (!m_XMLNodeStack.empty()) {
- m_CurNode = m_XMLNodeStack.top();
- } else {
- m_CurNode.iNodeNum = -1;
- m_CurNode.eNodeType = FX_XMLNODE_Unknown;
- }
- m_iCurrentNodeNum = m_CurNode.iNodeNum;
+ m_XMLNodeTypeStack.pop();
+
+ if (!m_XMLNodeTypeStack.empty())
+ m_CurNodeType = m_XMLNodeTypeStack.top();
+ else
+ m_CurNodeType = FX_XMLNODE_Unknown;
+
m_BlockBuffer.Reset(true);
std::tie(m_pCurrentBlock, m_iIndexInBlock) =
m_BlockBuffer.GetAvailableBlock();
@@ -508,18 +464,17 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() {
case FDE_XmlSyntaxState::CloseElement:
if (!IsXMLNameChar(ch, m_BlockBuffer.IsEmpty())) {
if (ch == L'>') {
- if (m_XMLNodeStack.empty()) {
+ if (m_XMLNodeTypeStack.empty()) {
m_syntaxParserResult = FX_XmlSyntaxResult::Error;
return m_syntaxParserResult;
}
- m_XMLNodeStack.pop();
- if (!m_XMLNodeStack.empty()) {
- m_CurNode = m_XMLNodeStack.top();
- } else {
- m_CurNode.iNodeNum = -1;
- m_CurNode.eNodeType = FX_XMLNODE_Unknown;
- }
- m_iCurrentNodeNum = m_CurNode.iNodeNum;
+ m_XMLNodeTypeStack.pop();
+
+ if (!m_XMLNodeTypeStack.empty())
+ m_CurNodeType = m_XMLNodeTypeStack.top();
+ else
+ m_CurNodeType = FX_XMLNODE_Unknown;
+
m_iTextDataLength = m_BlockBuffer.GetDataLength();
m_BlockBuffer.Reset(true);
std::tie(m_pCurrentBlock, m_iIndexInBlock) =
@@ -531,12 +486,11 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() {
return m_syntaxParserResult;
}
} else {
- if (m_iIndexInBlock == m_iAllocStep) {
+ if (m_iIndexInBlock == m_BlockBuffer.GetAllocStep()) {
std::tie(m_pCurrentBlock, m_iIndexInBlock) =
m_BlockBuffer.GetAvailableBlock();
- if (!m_pCurrentBlock) {
+ if (!m_pCurrentBlock)
return FX_XmlSyntaxResult::Error;
- }
}
m_pCurrentBlock[m_iIndexInBlock++] = ch;
m_BlockBuffer.IncrementDataLength();
@@ -567,7 +521,7 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() {
m_BlockBuffer.GetAvailableBlock();
m_syntaxParserState = FDE_XmlSyntaxState::Text;
} else {
- if (m_iIndexInBlock == m_iAllocStep) {
+ if (m_iIndexInBlock == m_BlockBuffer.GetAllocStep()) {
std::tie(m_pCurrentBlock, m_iIndexInBlock) =
m_BlockBuffer.GetAvailableBlock();
if (!m_pCurrentBlock)
@@ -631,7 +585,7 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() {
break;
}
if (!m_SkipStack.empty()) {
- if (m_iIndexInBlock == m_iAllocStep) {
+ if (m_iIndexInBlock == m_BlockBuffer.GetAllocStep()) {
std::tie(m_pCurrentBlock, m_iIndexInBlock) =
m_BlockBuffer.GetAvailableBlock();
if (!m_pCurrentBlock) {
@@ -689,12 +643,11 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() {
return m_syntaxParserResult;
}
} else {
- if (m_iIndexInBlock == m_iAllocStep) {
+ if (m_iIndexInBlock == m_BlockBuffer.GetAllocStep()) {
std::tie(m_pCurrentBlock, m_iIndexInBlock) =
m_BlockBuffer.GetAvailableBlock();
- if (!m_pCurrentBlock) {
+ if (!m_pCurrentBlock)
return FX_XmlSyntaxResult::Error;
- }
}
m_pCurrentBlock[m_iIndexInBlock++] = ch;
m_BlockBuffer.IncrementDataLength();
@@ -721,16 +674,8 @@ bool CFX_XMLParser::GetStatus() const {
return m_syntaxParserResult != FX_XmlSyntaxResult::Error;
}
-FX_FILESIZE CFX_XMLParser::GetCurrentBinaryPos() const {
- if (!m_pStream)
- return 0;
-
- int32_t nDstLen = GetUTF8EncodeLength(m_Buffer, m_Start);
- return m_iParsedBytes + nDstLen;
-}
-
void CFX_XMLParser::ParseTextChar(wchar_t character) {
- if (m_iIndexInBlock == m_iAllocStep) {
+ if (m_iIndexInBlock == m_BlockBuffer.GetAllocStep()) {
std::tie(m_pCurrentBlock, m_iIndexInBlock) =
m_BlockBuffer.GetAvailableBlock();
if (!m_pCurrentBlock)
@@ -765,6 +710,7 @@ void CFX_XMLParser::ParseTextChar(wchar_t character) {
w = csEntity[i];
if (!std::iswdigit(w))
break;
+
ch = ch * 10 + w - L'0';
}
}