summaryrefslogtreecommitdiff
path: root/core/fxcrt/xml/cfx_xmlparser.cpp
diff options
context:
space:
mode:
authorDan Sinclair <dsinclair@chromium.org>2018-04-24 18:03:27 +0000
committerChromium commit bot <commit-bot@chromium.org>2018-04-24 18:03:27 +0000
commit6453a67d84dc321a5f28728e04929dc2ff35ff88 (patch)
treee2bc524cf9cea16aff853e4ed7afed11efa8fd20 /core/fxcrt/xml/cfx_xmlparser.cpp
parentdf96bf69f22d63a0ab6c5e48556682b0532c3079 (diff)
downloadpdfium-6453a67d84dc321a5f28728e04929dc2ff35ff88.tar.xz
Remove CFX_BlockBufferchromium/3406
This CL removes the usage of CFX_BlockBuffer from CFX_XMLParser. The block buffer has been replaced by a vector which is emptied out after the characters are removed. This should use less memory when parsing XML as the block buffer was previously storing all text characters seen in the file. Change-Id: I89568c664c762bb9feb034348524e5e86c2d9078 Reviewed-on: https://pdfium-review.googlesource.com/31275 Commit-Queue: dsinclair <dsinclair@chromium.org> Reviewed-by: Henrique Nakashima <hnakashima@chromium.org>
Diffstat (limited to 'core/fxcrt/xml/cfx_xmlparser.cpp')
-rw-r--r--core/fxcrt/xml/cfx_xmlparser.cpp242
1 files changed, 65 insertions, 177 deletions
diff --git a/core/fxcrt/xml/cfx_xmlparser.cpp b/core/fxcrt/xml/cfx_xmlparser.cpp
index 21bbbbe9d6..55778d3204 100644
--- a/core/fxcrt/xml/cfx_xmlparser.cpp
+++ b/core/fxcrt/xml/cfx_xmlparser.cpp
@@ -24,7 +24,8 @@
namespace {
-const uint32_t kMaxCharRange = 0x10ffff;
+constexpr size_t kCurrentTextReserve = 128;
+constexpr uint32_t kMaxCharRange = 0x10ffff;
bool IsXMLWhiteSpace(wchar_t ch) {
return ch == L' ' || ch == 0x0A || ch == 0x0D || ch == 0x09;
@@ -85,10 +86,7 @@ CFX_XMLParser::CFX_XMLParser(CFX_XMLNode* pParent,
}
m_Buffer.resize(pdfium::base::ValueOrDieForType<size_t>(alloc_size_safe));
-
- m_BlockBuffer.InitBuffer();
- std::tie(m_pCurrentBlock, m_iIndexInBlock) =
- m_BlockBuffer.GetAvailableBlock();
+ current_text_.reserve(kCurrentTextReserve);
}
CFX_XMLParser::~CFX_XMLParser() = default;
@@ -109,13 +107,13 @@ bool CFX_XMLParser::Parse() {
m_pChild = m_pParent;
break;
- case FX_XmlSyntaxResult::ElementClose:
+ case FX_XmlSyntaxResult::ElementClose: {
if (m_pChild->GetType() != FX_XMLNODE_Element)
return false;
- m_ws1 = GetTextData();
- if (m_ws1.GetLength() > 0 &&
- m_ws1 != static_cast<CFX_XMLElement*>(m_pChild)->GetName()) {
+ WideString element_name = GetTextData();
+ if (element_name.GetLength() > 0 &&
+ element_name != static_cast<CFX_XMLElement*>(m_pChild)->GetName()) {
return false;
}
@@ -128,65 +126,65 @@ bool CFX_XMLParser::Parse() {
m_pChild = m_pParent;
iCount++;
break;
- case FX_XmlSyntaxResult::TargetName:
- m_ws1 = GetTextData();
- if (m_ws1 == L"originalXFAVersion" || m_ws1 == L"acrobat") {
- auto child = pdfium::MakeUnique<CFX_XMLInstruction>(m_ws1);
+ }
+ case FX_XmlSyntaxResult::TargetName: {
+ WideString target_name = GetTextData();
+ if (target_name == L"originalXFAVersion" || target_name == L"acrobat") {
+ auto child = pdfium::MakeUnique<CFX_XMLInstruction>(target_name);
m_pChild = child.get();
m_pParent->AppendChild(std::move(child));
} else {
m_pChild = nullptr;
}
- m_ws1.clear();
break;
+ }
case FX_XmlSyntaxResult::TagName: {
- m_ws1 = GetTextData();
- auto child = pdfium::MakeUnique<CFX_XMLElement>(m_ws1);
+ auto child = pdfium::MakeUnique<CFX_XMLElement>(GetTextData());
m_pChild = child.get();
m_pParent->AppendChild(std::move(child));
m_NodeStack.push(m_pChild);
m_pParent = m_pChild;
break;
}
- case FX_XmlSyntaxResult::AttriName:
- m_ws1 = GetTextData();
+ case FX_XmlSyntaxResult::AttriName: {
+ current_attribute_name_ = GetTextData();
break;
+ }
case FX_XmlSyntaxResult::AttriValue:
if (m_pChild && m_pChild->GetType() == FX_XMLNODE_Element) {
- static_cast<CFX_XMLElement*>(m_pChild)->SetAttribute(m_ws1,
- GetTextData());
+ static_cast<CFX_XMLElement*>(m_pChild)->SetAttribute(
+ current_attribute_name_, GetTextData());
}
- m_ws1.clear();
+ current_attribute_name_.clear();
break;
case FX_XmlSyntaxResult::Text: {
- m_ws1 = GetTextData();
- auto child = pdfium::MakeUnique<CFX_XMLText>(m_ws1);
+ auto child = pdfium::MakeUnique<CFX_XMLText>(GetTextData());
m_pChild = child.get();
m_pParent->AppendChild(std::move(child));
m_pChild = m_pParent;
break;
}
case FX_XmlSyntaxResult::CData: {
- m_ws1 = GetTextData();
- auto child = pdfium::MakeUnique<CFX_XMLCharData>(m_ws1);
+ auto child = pdfium::MakeUnique<CFX_XMLCharData>(GetTextData());
m_pChild = child.get();
m_pParent->AppendChild(std::move(child));
m_pChild = m_pParent;
break;
}
- case FX_XmlSyntaxResult::TargetData:
+ case FX_XmlSyntaxResult::TargetData: {
+ WideString target_data = GetTextData();
if (m_pChild) {
if (m_pChild->GetType() != FX_XMLNODE_Instruction)
return false;
auto* instruction = static_cast<CFX_XMLInstruction*>(m_pChild);
- if (!m_ws1.IsEmpty())
- instruction->AppendData(m_ws1);
+ if (!target_data.IsEmpty())
+ instruction->AppendData(target_data);
instruction->AppendData(GetTextData());
}
- m_ws1.clear();
break;
+ }
case FX_XmlSyntaxResult::ElementOpen:
case FX_XmlSyntaxResult::ElementBreak:
case FX_XmlSyntaxResult::InstructionOpen:
@@ -226,12 +224,7 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() {
switch (m_syntaxParserState) {
case FDE_XmlSyntaxState::Text:
if (ch == L'<') {
- if (!m_BlockBuffer.IsEmpty()) {
- m_iTextDataLength = m_BlockBuffer.GetDataLength();
- m_BlockBuffer.Reset(true);
- std::tie(m_pCurrentBlock, m_iIndexInBlock) =
- m_BlockBuffer.GetAvailableBlock();
- m_iEntityStart = -1;
+ if (!current_text_.empty()) {
syntaxParserResult = FX_XmlSyntaxResult::Text;
} else {
m_Start++;
@@ -263,16 +256,12 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() {
break;
case FDE_XmlSyntaxState::Target:
case FDE_XmlSyntaxState::Tag:
- if (!IsXMLNameChar(ch, m_BlockBuffer.IsEmpty())) {
- if (m_BlockBuffer.IsEmpty()) {
+ if (!IsXMLNameChar(ch, current_text_.empty())) {
+ if (current_text_.empty()) {
m_syntaxParserResult = FX_XmlSyntaxResult::Error;
return m_syntaxParserResult;
}
- m_iTextDataLength = m_BlockBuffer.GetDataLength();
- m_BlockBuffer.Reset(true);
- std::tie(m_pCurrentBlock, m_iIndexInBlock) =
- m_BlockBuffer.GetAvailableBlock();
if (m_syntaxParserState != FDE_XmlSyntaxState::Target)
syntaxParserResult = FX_XmlSyntaxResult::TagName;
else
@@ -280,24 +269,17 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() {
m_syntaxParserState = FDE_XmlSyntaxState::AttriName;
} else {
- if (m_iIndexInBlock == m_BlockBuffer.GetAllocStep()) {
- std::tie(m_pCurrentBlock, m_iIndexInBlock) =
- m_BlockBuffer.GetAvailableBlock();
- if (!m_pCurrentBlock)
- return FX_XmlSyntaxResult::Error;
- }
- m_pCurrentBlock[m_iIndexInBlock++] = ch;
- m_BlockBuffer.IncrementDataLength();
+ current_text_.push_back(ch);
m_Start++;
}
break;
case FDE_XmlSyntaxState::AttriName:
- if (m_BlockBuffer.IsEmpty() && IsXMLWhiteSpace(ch)) {
+ if (current_text_.empty() && IsXMLWhiteSpace(ch)) {
m_Start++;
break;
}
- if (!IsXMLNameChar(ch, m_BlockBuffer.IsEmpty())) {
- if (m_BlockBuffer.IsEmpty()) {
+ if (!IsXMLNameChar(ch, current_text_.empty())) {
+ if (current_text_.empty()) {
if (m_CurNodeType == FX_XMLNODE_Element) {
if (ch == L'>' || ch == L'/') {
m_syntaxParserState = FDE_XmlSyntaxState::BreakElement;
@@ -321,22 +303,11 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() {
break;
}
}
- m_iTextDataLength = m_BlockBuffer.GetDataLength();
- m_BlockBuffer.Reset(true);
- std::tie(m_pCurrentBlock, m_iIndexInBlock) =
- m_BlockBuffer.GetAvailableBlock();
m_syntaxParserState = FDE_XmlSyntaxState::AttriEqualSign;
syntaxParserResult = FX_XmlSyntaxResult::AttriName;
}
} else {
- if (m_iIndexInBlock == m_BlockBuffer.GetAllocStep()) {
- std::tie(m_pCurrentBlock, m_iIndexInBlock) =
- m_BlockBuffer.GetAvailableBlock();
- if (!m_pCurrentBlock)
- return FX_XmlSyntaxResult::Error;
- }
- m_pCurrentBlock[m_iIndexInBlock++] = ch;
- m_BlockBuffer.IncrementDataLength();
+ current_text_.push_back(ch);
m_Start++;
}
break;
@@ -377,11 +348,7 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() {
m_syntaxParserResult = FX_XmlSyntaxResult::Error;
return m_syntaxParserResult;
}
- m_iTextDataLength = m_BlockBuffer.GetDataLength();
m_wQuotationMark = 0;
- m_BlockBuffer.Reset(true);
- std::tie(m_pCurrentBlock, m_iIndexInBlock) =
- m_BlockBuffer.GetAvailableBlock();
m_Start++;
m_syntaxParserState = FDE_XmlSyntaxState::AttriName;
syntaxParserResult = FX_XmlSyntaxResult::AttriValue;
@@ -391,21 +358,9 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() {
break;
case FDE_XmlSyntaxState::CloseInstruction:
if (ch != L'>') {
- if (m_iIndexInBlock == m_BlockBuffer.GetAllocStep()) {
- std::tie(m_pCurrentBlock, m_iIndexInBlock) =
- m_BlockBuffer.GetAvailableBlock();
- if (!m_pCurrentBlock)
- return FX_XmlSyntaxResult::Error;
- }
-
- m_pCurrentBlock[m_iIndexInBlock++] = ch;
- m_BlockBuffer.IncrementDataLength();
+ current_text_.push_back(ch);
m_syntaxParserState = FDE_XmlSyntaxState::TargetData;
- } else if (!m_BlockBuffer.IsEmpty()) {
- m_iTextDataLength = m_BlockBuffer.GetDataLength();
- m_BlockBuffer.Reset(true);
- std::tie(m_pCurrentBlock, m_iIndexInBlock) =
- m_BlockBuffer.GetAvailableBlock();
+ } else if (!current_text_.empty()) {
syntaxParserResult = FX_XmlSyntaxResult::TargetData;
} else {
m_Start++;
@@ -420,9 +375,6 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() {
else
m_CurNodeType = FX_XMLNODE_Unknown;
- m_BlockBuffer.Reset(true);
- std::tie(m_pCurrentBlock, m_iIndexInBlock) =
- m_BlockBuffer.GetAvailableBlock();
m_syntaxParserState = FDE_XmlSyntaxState::Text;
syntaxParserResult = FX_XmlSyntaxResult::InstructionClose;
}
@@ -440,7 +392,7 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() {
m_Start++;
break;
case FDE_XmlSyntaxState::CloseElement:
- if (!IsXMLNameChar(ch, m_BlockBuffer.IsEmpty())) {
+ if (!IsXMLNameChar(ch, current_text_.empty())) {
if (ch == L'>') {
if (m_XMLNodeTypeStack.empty()) {
m_syntaxParserResult = FX_XmlSyntaxResult::Error;
@@ -453,10 +405,6 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() {
else
m_CurNodeType = FX_XMLNODE_Unknown;
- m_iTextDataLength = m_BlockBuffer.GetDataLength();
- m_BlockBuffer.Reset(true);
- std::tie(m_pCurrentBlock, m_iIndexInBlock) =
- m_BlockBuffer.GetAvailableBlock();
m_syntaxParserState = FDE_XmlSyntaxState::Text;
syntaxParserResult = FX_XmlSyntaxResult::ElementClose;
} else if (!IsXMLWhiteSpace(ch)) {
@@ -464,14 +412,7 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() {
return m_syntaxParserResult;
}
} else {
- if (m_iIndexInBlock == m_BlockBuffer.GetAllocStep()) {
- std::tie(m_pCurrentBlock, m_iIndexInBlock) =
- m_BlockBuffer.GetAvailableBlock();
- if (!m_pCurrentBlock)
- return FX_XmlSyntaxResult::Error;
- }
- m_pCurrentBlock[m_iIndexInBlock++] = ch;
- m_BlockBuffer.IncrementDataLength();
+ current_text_.push_back(ch);
}
m_Start++;
break;
@@ -493,20 +434,9 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() {
if (FXSYS_wcsnicmp(m_Buffer.data() + m_Start, L"]]>", 3) == 0) {
m_Start += 3;
syntaxParserResult = FX_XmlSyntaxResult::CData;
- m_iTextDataLength = m_BlockBuffer.GetDataLength();
- m_BlockBuffer.Reset(true);
- std::tie(m_pCurrentBlock, m_iIndexInBlock) =
- m_BlockBuffer.GetAvailableBlock();
m_syntaxParserState = FDE_XmlSyntaxState::Text;
} else {
- if (m_iIndexInBlock == m_BlockBuffer.GetAllocStep()) {
- std::tie(m_pCurrentBlock, m_iIndexInBlock) =
- m_BlockBuffer.GetAvailableBlock();
- if (!m_pCurrentBlock)
- return FX_XmlSyntaxResult::Error;
- }
- m_pCurrentBlock[m_iIndexInBlock++] = ch;
- m_BlockBuffer.IncrementDataLength();
+ current_text_.push_back(ch);
m_Start++;
}
break;
@@ -548,10 +478,6 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() {
if (ch == m_SkipChar) {
m_SkipStack.pop();
if (m_SkipStack.empty()) {
- m_iTextDataLength = m_BlockBuffer.GetDataLength();
- m_BlockBuffer.Reset(true);
- std::tie(m_pCurrentBlock, m_iIndexInBlock) =
- m_BlockBuffer.GetAvailableBlock();
m_syntaxParserState = FDE_XmlSyntaxState::Text;
} else {
m_SkipChar = m_SkipStack.top();
@@ -559,17 +485,6 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() {
}
break;
}
- if (!m_SkipStack.empty()) {
- if (m_iIndexInBlock == m_BlockBuffer.GetAllocStep()) {
- std::tie(m_pCurrentBlock, m_iIndexInBlock) =
- m_BlockBuffer.GetAvailableBlock();
- if (!m_pCurrentBlock) {
- return FX_XmlSyntaxResult::Error;
- }
- }
- m_pCurrentBlock[m_iIndexInBlock++] = ch;
- m_BlockBuffer.IncrementDataLength();
- }
m_Start++;
}
break;
@@ -583,16 +498,12 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() {
break;
case FDE_XmlSyntaxState::TargetData:
if (IsXMLWhiteSpace(ch)) {
- if (m_BlockBuffer.IsEmpty()) {
+ if (current_text_.empty()) {
m_Start++;
break;
}
if (m_wQuotationMark == 0) {
- m_iTextDataLength = m_BlockBuffer.GetDataLength();
m_wQuotationMark = 0;
- m_BlockBuffer.Reset(true);
- std::tie(m_pCurrentBlock, m_iIndexInBlock) =
- m_BlockBuffer.GetAvailableBlock();
m_Start++;
syntaxParserResult = FX_XmlSyntaxResult::TargetData;
break;
@@ -606,11 +517,7 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() {
m_wQuotationMark = ch;
m_Start++;
} else if (ch == m_wQuotationMark) {
- m_iTextDataLength = m_BlockBuffer.GetDataLength();
m_wQuotationMark = 0;
- m_BlockBuffer.Reset(true);
- std::tie(m_pCurrentBlock, m_iIndexInBlock) =
- m_BlockBuffer.GetAvailableBlock();
m_Start++;
syntaxParserResult = FX_XmlSyntaxResult::TargetData;
} else {
@@ -618,14 +525,7 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() {
return m_syntaxParserResult;
}
} else {
- if (m_iIndexInBlock == m_BlockBuffer.GetAllocStep()) {
- std::tie(m_pCurrentBlock, m_iIndexInBlock) =
- m_BlockBuffer.GetAvailableBlock();
- if (!m_pCurrentBlock)
- return FX_XmlSyntaxResult::Error;
- }
- m_pCurrentBlock[m_iIndexInBlock++] = ch;
- m_BlockBuffer.IncrementDataLength();
+ current_text_.push_back(ch);
m_Start++;
}
break;
@@ -644,19 +544,17 @@ bool CFX_XMLParser::GetStatus() const {
}
void CFX_XMLParser::ParseTextChar(wchar_t character) {
- if (m_iIndexInBlock == m_BlockBuffer.GetAllocStep()) {
- std::tie(m_pCurrentBlock, m_iIndexInBlock) =
- m_BlockBuffer.GetAvailableBlock();
- if (!m_pCurrentBlock)
- return;
- }
+ current_text_.push_back(character);
- m_pCurrentBlock[m_iIndexInBlock++] = character;
- m_BlockBuffer.IncrementDataLength();
if (m_iEntityStart > -1 && character == L';') {
- WideString csEntity = m_BlockBuffer.GetTextData(
- m_iEntityStart + 1,
- m_BlockBuffer.GetDataLength() - 1 - m_iEntityStart - 1);
+ // Copy the entity out into a string and remove from the vector. When we
+ // copy the entity we don't want to copy out the & or the ; so we start
+ // shifted by one and want to copy 2 less characters in total.
+ WideString csEntity(current_text_.data() + m_iEntityStart + 1,
+ current_text_.size() - m_iEntityStart - 2);
+ current_text_.erase(current_text_.begin() + m_iEntityStart,
+ current_text_.end());
+
int32_t iLen = csEntity.GetLength();
if (iLen > 0) {
if (csEntity[0] == L'#') {
@@ -678,43 +576,33 @@ void CFX_XMLParser::ParseTextChar(wchar_t character) {
ch = ' ';
character = static_cast<wchar_t>(ch);
- if (character != 0) {
- m_BlockBuffer.SetTextChar(m_iEntityStart, character);
- m_iEntityStart++;
- }
+ if (character != 0)
+ current_text_.push_back(character);
} else {
if (csEntity.Compare(L"amp") == 0) {
- m_BlockBuffer.SetTextChar(m_iEntityStart, L'&');
- m_iEntityStart++;
+ current_text_.push_back(L'&');
} else if (csEntity.Compare(L"lt") == 0) {
- m_BlockBuffer.SetTextChar(m_iEntityStart, L'<');
- m_iEntityStart++;
+ current_text_.push_back(L'<');
} else if (csEntity.Compare(L"gt") == 0) {
- m_BlockBuffer.SetTextChar(m_iEntityStart, L'>');
- m_iEntityStart++;
+ current_text_.push_back(L'>');
} else if (csEntity.Compare(L"apos") == 0) {
- m_BlockBuffer.SetTextChar(m_iEntityStart, L'\'');
- m_iEntityStart++;
+ current_text_.push_back(L'\'');
} else if (csEntity.Compare(L"quot") == 0) {
- m_BlockBuffer.SetTextChar(m_iEntityStart, L'\"');
- m_iEntityStart++;
+ current_text_.push_back(L'"');
}
}
}
- if (m_iEntityStart >= 0 &&
- m_BlockBuffer.GetDataLength() > static_cast<size_t>(m_iEntityStart)) {
- m_BlockBuffer.DeleteTextChars(m_BlockBuffer.GetDataLength() -
- m_iEntityStart);
- }
- std::tie(m_pCurrentBlock, m_iIndexInBlock) =
- m_BlockBuffer.GetAvailableBlock();
+
m_iEntityStart = -1;
} else if (m_iEntityStart < 0 && character == L'&') {
- m_iEntityStart = m_BlockBuffer.GetDataLength() - 1;
+ m_iEntityStart = current_text_.size() - 1;
}
m_Start++;
}
-WideString CFX_XMLParser::GetTextData() const {
- return m_BlockBuffer.GetTextData(0, m_iTextDataLength);
+WideString CFX_XMLParser::GetTextData() {
+ WideString ret(current_text_.data(), current_text_.size());
+ current_text_.clear();
+ current_text_.reserve(kCurrentTextReserve);
+ return ret;
}