From 6453a67d84dc321a5f28728e04929dc2ff35ff88 Mon Sep 17 00:00:00 2001 From: Dan Sinclair Date: Tue, 24 Apr 2018 18:03:27 +0000 Subject: Remove CFX_BlockBuffer This CL removes the usage of CFX_BlockBuffer from CFX_XMLParser. The block buffer has been replaced by a vector which is emptied out after the characters are removed. This should use less memory when parsing XML as the block buffer was previously storing all text characters seen in the file. Change-Id: I89568c664c762bb9feb034348524e5e86c2d9078 Reviewed-on: https://pdfium-review.googlesource.com/31275 Commit-Queue: dsinclair Reviewed-by: Henrique Nakashima --- core/fxcrt/cfx_blockbuffer.cpp | 124 --------------- core/fxcrt/cfx_blockbuffer.h | 50 ------ core/fxcrt/xml/cfx_xmlparser.cpp | 242 ++++++++---------------------- core/fxcrt/xml/cfx_xmlparser.h | 10 +- core/fxcrt/xml/cfx_xmlparser_unittest.cpp | 2 +- 5 files changed, 69 insertions(+), 359 deletions(-) delete mode 100644 core/fxcrt/cfx_blockbuffer.cpp delete mode 100644 core/fxcrt/cfx_blockbuffer.h (limited to 'core') diff --git a/core/fxcrt/cfx_blockbuffer.cpp b/core/fxcrt/cfx_blockbuffer.cpp deleted file mode 100644 index 6a7d98aa18..0000000000 --- a/core/fxcrt/cfx_blockbuffer.cpp +++ /dev/null @@ -1,124 +0,0 @@ -// Copyright 2017 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#include "core/fxcrt/cfx_blockbuffer.h" - -#include -#include - -namespace { - -const size_t kAllocStep = 1024 * 1024; - -} // namespace - -CFX_BlockBuffer::CFX_BlockBuffer() - : m_DataLength(0), m_BufferSize(0), m_StartPosition(0) {} - -CFX_BlockBuffer::~CFX_BlockBuffer() {} - -size_t CFX_BlockBuffer::GetAllocStep() const { - return kAllocStep; -} - -std::pair CFX_BlockBuffer::GetAvailableBlock() { - if (m_BlockArray.empty()) - return {nullptr, 0}; - - size_t realIndex = m_StartPosition + m_DataLength; - if (realIndex == m_BufferSize) { - m_BlockArray.emplace_back(FX_Alloc(wchar_t, kAllocStep)); - m_BufferSize += kAllocStep; - return {m_BlockArray.back().get(), 0}; - } - return {m_BlockArray[realIndex / kAllocStep].get(), realIndex % kAllocStep}; -} - -bool CFX_BlockBuffer::InitBuffer() { - m_BlockArray.clear(); - m_BlockArray.emplace_back(FX_Alloc(wchar_t, kAllocStep)); - m_BufferSize = kAllocStep; - return true; -} - -void CFX_BlockBuffer::SetTextChar(size_t index, wchar_t ch) { - size_t realIndex = m_StartPosition + index; - size_t blockIndex = realIndex / kAllocStep; - if (blockIndex >= m_BlockArray.size()) { - size_t newBlocks = blockIndex - m_BlockArray.size() + 1; - do { - m_BlockArray.emplace_back(FX_Alloc(wchar_t, kAllocStep)); - m_BufferSize += kAllocStep; - } while (--newBlocks); - } - wchar_t* pTextData = m_BlockArray[blockIndex].get(); - pTextData[realIndex % kAllocStep] = ch; - m_DataLength = std::max(m_DataLength, index + 1); -} - -void CFX_BlockBuffer::DeleteTextChars(size_t count) { - if (count == 0) - return; - - if (count >= m_DataLength) { - Reset(false); - return; - } - m_DataLength -= count; -} - -WideString CFX_BlockBuffer::GetTextData(size_t start, size_t length) const { - if (m_BufferSize <= m_StartPosition + 1 || length == 0) - return WideString(); - - size_t maybeDataLength = m_BufferSize - 1 - m_StartPosition; - if (start > maybeDataLength) - return WideString(); - - length = std::min(length, maybeDataLength); - if (!length) - return WideString(); - - WideString wsTextData; - { - // Span's lifetime must end before ReleaseBuffer() below. - pdfium::span pBuf = wsTextData.GetBuffer(length); - size_t startBlock = 0; - size_t startInner = 0; - std::tie(startBlock, startInner) = TextDataIndex2BufIndex(start); - - size_t endBlock = 0; - size_t endInner = 0; - std::tie(endBlock, endInner) = TextDataIndex2BufIndex(start + length); - - size_t pointer = 0; - for (size_t i = startBlock; i <= endBlock; ++i) { - size_t bufferPointer = 0; - size_t copyLength = kAllocStep; - if (i == startBlock) { - copyLength -= startInner; - bufferPointer = startInner; - } - if (i == endBlock) - copyLength -= ((kAllocStep - 1) - endInner); - - wchar_t* pBlockBuf = m_BlockArray[i].get(); - memcpy(&pBuf[pointer], pBlockBuf + bufferPointer, - copyLength * sizeof(wchar_t)); - pointer += copyLength; - } - } - wsTextData.ReleaseBuffer(length); - return wsTextData; -} - -std::pair CFX_BlockBuffer::TextDataIndex2BufIndex( - const size_t iIndex) const { - ASSERT(iIndex >= 0); - - size_t realIndex = m_StartPosition + iIndex; - return {realIndex / kAllocStep, realIndex % kAllocStep}; -} diff --git a/core/fxcrt/cfx_blockbuffer.h b/core/fxcrt/cfx_blockbuffer.h deleted file mode 100644 index 1673136643..0000000000 --- a/core/fxcrt/cfx_blockbuffer.h +++ /dev/null @@ -1,50 +0,0 @@ -// Copyright 2017 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#ifndef CORE_FXCRT_CFX_BLOCKBUFFER_H_ -#define CORE_FXCRT_CFX_BLOCKBUFFER_H_ - -#include - -#include -#include -#include - -#include "core/fxcrt/fx_string.h" - -class CFX_BlockBuffer { - public: - CFX_BlockBuffer(); - ~CFX_BlockBuffer(); - - bool InitBuffer(); - - std::pair GetAvailableBlock(); - size_t GetAllocStep() const; - size_t GetDataLength() const { return m_DataLength; } - void IncrementDataLength() { m_DataLength++; } - bool IsEmpty() const { return m_DataLength == 0; } - - void Reset(bool bReserveData) { - if (!bReserveData) - m_StartPosition = 0; - m_DataLength = 0; - } - - void SetTextChar(size_t iIndex, wchar_t ch); - void DeleteTextChars(size_t iCount); - WideString GetTextData(size_t iStart, size_t iLength) const; - - private: - std::pair TextDataIndex2BufIndex(const size_t iIndex) const; - - std::vector> m_BlockArray; - size_t m_DataLength; - size_t m_BufferSize; - size_t m_StartPosition; -}; - -#endif // CORE_FXCRT_CFX_BLOCKBUFFER_H_ diff --git a/core/fxcrt/xml/cfx_xmlparser.cpp b/core/fxcrt/xml/cfx_xmlparser.cpp index 21bbbbe9d6..55778d3204 100644 --- a/core/fxcrt/xml/cfx_xmlparser.cpp +++ b/core/fxcrt/xml/cfx_xmlparser.cpp @@ -24,7 +24,8 @@ namespace { -const uint32_t kMaxCharRange = 0x10ffff; +constexpr size_t kCurrentTextReserve = 128; +constexpr uint32_t kMaxCharRange = 0x10ffff; bool IsXMLWhiteSpace(wchar_t ch) { return ch == L' ' || ch == 0x0A || ch == 0x0D || ch == 0x09; @@ -85,10 +86,7 @@ CFX_XMLParser::CFX_XMLParser(CFX_XMLNode* pParent, } m_Buffer.resize(pdfium::base::ValueOrDieForType(alloc_size_safe)); - - m_BlockBuffer.InitBuffer(); - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); + current_text_.reserve(kCurrentTextReserve); } CFX_XMLParser::~CFX_XMLParser() = default; @@ -109,13 +107,13 @@ bool CFX_XMLParser::Parse() { m_pChild = m_pParent; break; - case FX_XmlSyntaxResult::ElementClose: + case FX_XmlSyntaxResult::ElementClose: { if (m_pChild->GetType() != FX_XMLNODE_Element) return false; - m_ws1 = GetTextData(); - if (m_ws1.GetLength() > 0 && - m_ws1 != static_cast(m_pChild)->GetName()) { + WideString element_name = GetTextData(); + if (element_name.GetLength() > 0 && + element_name != static_cast(m_pChild)->GetName()) { return false; } @@ -128,65 +126,65 @@ bool CFX_XMLParser::Parse() { m_pChild = m_pParent; iCount++; break; - case FX_XmlSyntaxResult::TargetName: - m_ws1 = GetTextData(); - if (m_ws1 == L"originalXFAVersion" || m_ws1 == L"acrobat") { - auto child = pdfium::MakeUnique(m_ws1); + } + case FX_XmlSyntaxResult::TargetName: { + WideString target_name = GetTextData(); + if (target_name == L"originalXFAVersion" || target_name == L"acrobat") { + auto child = pdfium::MakeUnique(target_name); m_pChild = child.get(); m_pParent->AppendChild(std::move(child)); } else { m_pChild = nullptr; } - m_ws1.clear(); break; + } case FX_XmlSyntaxResult::TagName: { - m_ws1 = GetTextData(); - auto child = pdfium::MakeUnique(m_ws1); + auto child = pdfium::MakeUnique(GetTextData()); m_pChild = child.get(); m_pParent->AppendChild(std::move(child)); m_NodeStack.push(m_pChild); m_pParent = m_pChild; break; } - case FX_XmlSyntaxResult::AttriName: - m_ws1 = GetTextData(); + case FX_XmlSyntaxResult::AttriName: { + current_attribute_name_ = GetTextData(); break; + } case FX_XmlSyntaxResult::AttriValue: if (m_pChild && m_pChild->GetType() == FX_XMLNODE_Element) { - static_cast(m_pChild)->SetAttribute(m_ws1, - GetTextData()); + static_cast(m_pChild)->SetAttribute( + current_attribute_name_, GetTextData()); } - m_ws1.clear(); + current_attribute_name_.clear(); break; case FX_XmlSyntaxResult::Text: { - m_ws1 = GetTextData(); - auto child = pdfium::MakeUnique(m_ws1); + auto child = pdfium::MakeUnique(GetTextData()); m_pChild = child.get(); m_pParent->AppendChild(std::move(child)); m_pChild = m_pParent; break; } case FX_XmlSyntaxResult::CData: { - m_ws1 = GetTextData(); - auto child = pdfium::MakeUnique(m_ws1); + auto child = pdfium::MakeUnique(GetTextData()); m_pChild = child.get(); m_pParent->AppendChild(std::move(child)); m_pChild = m_pParent; break; } - case FX_XmlSyntaxResult::TargetData: + case FX_XmlSyntaxResult::TargetData: { + WideString target_data = GetTextData(); if (m_pChild) { if (m_pChild->GetType() != FX_XMLNODE_Instruction) return false; auto* instruction = static_cast(m_pChild); - if (!m_ws1.IsEmpty()) - instruction->AppendData(m_ws1); + if (!target_data.IsEmpty()) + instruction->AppendData(target_data); instruction->AppendData(GetTextData()); } - m_ws1.clear(); break; + } case FX_XmlSyntaxResult::ElementOpen: case FX_XmlSyntaxResult::ElementBreak: case FX_XmlSyntaxResult::InstructionOpen: @@ -226,12 +224,7 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() { switch (m_syntaxParserState) { case FDE_XmlSyntaxState::Text: if (ch == L'<') { - if (!m_BlockBuffer.IsEmpty()) { - m_iTextDataLength = m_BlockBuffer.GetDataLength(); - m_BlockBuffer.Reset(true); - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); - m_iEntityStart = -1; + if (!current_text_.empty()) { syntaxParserResult = FX_XmlSyntaxResult::Text; } else { m_Start++; @@ -263,16 +256,12 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() { break; case FDE_XmlSyntaxState::Target: case FDE_XmlSyntaxState::Tag: - if (!IsXMLNameChar(ch, m_BlockBuffer.IsEmpty())) { - if (m_BlockBuffer.IsEmpty()) { + if (!IsXMLNameChar(ch, current_text_.empty())) { + if (current_text_.empty()) { m_syntaxParserResult = FX_XmlSyntaxResult::Error; return m_syntaxParserResult; } - m_iTextDataLength = m_BlockBuffer.GetDataLength(); - m_BlockBuffer.Reset(true); - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); if (m_syntaxParserState != FDE_XmlSyntaxState::Target) syntaxParserResult = FX_XmlSyntaxResult::TagName; else @@ -280,24 +269,17 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() { m_syntaxParserState = FDE_XmlSyntaxState::AttriName; } else { - if (m_iIndexInBlock == m_BlockBuffer.GetAllocStep()) { - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); - if (!m_pCurrentBlock) - return FX_XmlSyntaxResult::Error; - } - m_pCurrentBlock[m_iIndexInBlock++] = ch; - m_BlockBuffer.IncrementDataLength(); + current_text_.push_back(ch); m_Start++; } break; case FDE_XmlSyntaxState::AttriName: - if (m_BlockBuffer.IsEmpty() && IsXMLWhiteSpace(ch)) { + if (current_text_.empty() && IsXMLWhiteSpace(ch)) { m_Start++; break; } - if (!IsXMLNameChar(ch, m_BlockBuffer.IsEmpty())) { - if (m_BlockBuffer.IsEmpty()) { + if (!IsXMLNameChar(ch, current_text_.empty())) { + if (current_text_.empty()) { if (m_CurNodeType == FX_XMLNODE_Element) { if (ch == L'>' || ch == L'/') { m_syntaxParserState = FDE_XmlSyntaxState::BreakElement; @@ -321,22 +303,11 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() { break; } } - m_iTextDataLength = m_BlockBuffer.GetDataLength(); - m_BlockBuffer.Reset(true); - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); m_syntaxParserState = FDE_XmlSyntaxState::AttriEqualSign; syntaxParserResult = FX_XmlSyntaxResult::AttriName; } } else { - if (m_iIndexInBlock == m_BlockBuffer.GetAllocStep()) { - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); - if (!m_pCurrentBlock) - return FX_XmlSyntaxResult::Error; - } - m_pCurrentBlock[m_iIndexInBlock++] = ch; - m_BlockBuffer.IncrementDataLength(); + current_text_.push_back(ch); m_Start++; } break; @@ -377,11 +348,7 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() { m_syntaxParserResult = FX_XmlSyntaxResult::Error; return m_syntaxParserResult; } - m_iTextDataLength = m_BlockBuffer.GetDataLength(); m_wQuotationMark = 0; - m_BlockBuffer.Reset(true); - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); m_Start++; m_syntaxParserState = FDE_XmlSyntaxState::AttriName; syntaxParserResult = FX_XmlSyntaxResult::AttriValue; @@ -391,21 +358,9 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() { break; case FDE_XmlSyntaxState::CloseInstruction: if (ch != L'>') { - if (m_iIndexInBlock == m_BlockBuffer.GetAllocStep()) { - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); - if (!m_pCurrentBlock) - return FX_XmlSyntaxResult::Error; - } - - m_pCurrentBlock[m_iIndexInBlock++] = ch; - m_BlockBuffer.IncrementDataLength(); + current_text_.push_back(ch); m_syntaxParserState = FDE_XmlSyntaxState::TargetData; - } else if (!m_BlockBuffer.IsEmpty()) { - m_iTextDataLength = m_BlockBuffer.GetDataLength(); - m_BlockBuffer.Reset(true); - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); + } else if (!current_text_.empty()) { syntaxParserResult = FX_XmlSyntaxResult::TargetData; } else { m_Start++; @@ -420,9 +375,6 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() { else m_CurNodeType = FX_XMLNODE_Unknown; - m_BlockBuffer.Reset(true); - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); m_syntaxParserState = FDE_XmlSyntaxState::Text; syntaxParserResult = FX_XmlSyntaxResult::InstructionClose; } @@ -440,7 +392,7 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() { m_Start++; break; case FDE_XmlSyntaxState::CloseElement: - if (!IsXMLNameChar(ch, m_BlockBuffer.IsEmpty())) { + if (!IsXMLNameChar(ch, current_text_.empty())) { if (ch == L'>') { if (m_XMLNodeTypeStack.empty()) { m_syntaxParserResult = FX_XmlSyntaxResult::Error; @@ -453,10 +405,6 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() { else m_CurNodeType = FX_XMLNODE_Unknown; - m_iTextDataLength = m_BlockBuffer.GetDataLength(); - m_BlockBuffer.Reset(true); - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); m_syntaxParserState = FDE_XmlSyntaxState::Text; syntaxParserResult = FX_XmlSyntaxResult::ElementClose; } else if (!IsXMLWhiteSpace(ch)) { @@ -464,14 +412,7 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() { return m_syntaxParserResult; } } else { - if (m_iIndexInBlock == m_BlockBuffer.GetAllocStep()) { - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); - if (!m_pCurrentBlock) - return FX_XmlSyntaxResult::Error; - } - m_pCurrentBlock[m_iIndexInBlock++] = ch; - m_BlockBuffer.IncrementDataLength(); + current_text_.push_back(ch); } m_Start++; break; @@ -493,20 +434,9 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() { if (FXSYS_wcsnicmp(m_Buffer.data() + m_Start, L"]]>", 3) == 0) { m_Start += 3; syntaxParserResult = FX_XmlSyntaxResult::CData; - m_iTextDataLength = m_BlockBuffer.GetDataLength(); - m_BlockBuffer.Reset(true); - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); m_syntaxParserState = FDE_XmlSyntaxState::Text; } else { - if (m_iIndexInBlock == m_BlockBuffer.GetAllocStep()) { - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); - if (!m_pCurrentBlock) - return FX_XmlSyntaxResult::Error; - } - m_pCurrentBlock[m_iIndexInBlock++] = ch; - m_BlockBuffer.IncrementDataLength(); + current_text_.push_back(ch); m_Start++; } break; @@ -548,10 +478,6 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() { if (ch == m_SkipChar) { m_SkipStack.pop(); if (m_SkipStack.empty()) { - m_iTextDataLength = m_BlockBuffer.GetDataLength(); - m_BlockBuffer.Reset(true); - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); m_syntaxParserState = FDE_XmlSyntaxState::Text; } else { m_SkipChar = m_SkipStack.top(); @@ -559,17 +485,6 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() { } break; } - if (!m_SkipStack.empty()) { - if (m_iIndexInBlock == m_BlockBuffer.GetAllocStep()) { - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); - if (!m_pCurrentBlock) { - return FX_XmlSyntaxResult::Error; - } - } - m_pCurrentBlock[m_iIndexInBlock++] = ch; - m_BlockBuffer.IncrementDataLength(); - } m_Start++; } break; @@ -583,16 +498,12 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() { break; case FDE_XmlSyntaxState::TargetData: if (IsXMLWhiteSpace(ch)) { - if (m_BlockBuffer.IsEmpty()) { + if (current_text_.empty()) { m_Start++; break; } if (m_wQuotationMark == 0) { - m_iTextDataLength = m_BlockBuffer.GetDataLength(); m_wQuotationMark = 0; - m_BlockBuffer.Reset(true); - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); m_Start++; syntaxParserResult = FX_XmlSyntaxResult::TargetData; break; @@ -606,11 +517,7 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() { m_wQuotationMark = ch; m_Start++; } else if (ch == m_wQuotationMark) { - m_iTextDataLength = m_BlockBuffer.GetDataLength(); m_wQuotationMark = 0; - m_BlockBuffer.Reset(true); - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); m_Start++; syntaxParserResult = FX_XmlSyntaxResult::TargetData; } else { @@ -618,14 +525,7 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() { return m_syntaxParserResult; } } else { - if (m_iIndexInBlock == m_BlockBuffer.GetAllocStep()) { - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); - if (!m_pCurrentBlock) - return FX_XmlSyntaxResult::Error; - } - m_pCurrentBlock[m_iIndexInBlock++] = ch; - m_BlockBuffer.IncrementDataLength(); + current_text_.push_back(ch); m_Start++; } break; @@ -644,19 +544,17 @@ bool CFX_XMLParser::GetStatus() const { } void CFX_XMLParser::ParseTextChar(wchar_t character) { - if (m_iIndexInBlock == m_BlockBuffer.GetAllocStep()) { - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); - if (!m_pCurrentBlock) - return; - } + current_text_.push_back(character); - m_pCurrentBlock[m_iIndexInBlock++] = character; - m_BlockBuffer.IncrementDataLength(); if (m_iEntityStart > -1 && character == L';') { - WideString csEntity = m_BlockBuffer.GetTextData( - m_iEntityStart + 1, - m_BlockBuffer.GetDataLength() - 1 - m_iEntityStart - 1); + // Copy the entity out into a string and remove from the vector. When we + // copy the entity we don't want to copy out the & or the ; so we start + // shifted by one and want to copy 2 less characters in total. + WideString csEntity(current_text_.data() + m_iEntityStart + 1, + current_text_.size() - m_iEntityStart - 2); + current_text_.erase(current_text_.begin() + m_iEntityStart, + current_text_.end()); + int32_t iLen = csEntity.GetLength(); if (iLen > 0) { if (csEntity[0] == L'#') { @@ -678,43 +576,33 @@ void CFX_XMLParser::ParseTextChar(wchar_t character) { ch = ' '; character = static_cast(ch); - if (character != 0) { - m_BlockBuffer.SetTextChar(m_iEntityStart, character); - m_iEntityStart++; - } + if (character != 0) + current_text_.push_back(character); } else { if (csEntity.Compare(L"amp") == 0) { - m_BlockBuffer.SetTextChar(m_iEntityStart, L'&'); - m_iEntityStart++; + current_text_.push_back(L'&'); } else if (csEntity.Compare(L"lt") == 0) { - m_BlockBuffer.SetTextChar(m_iEntityStart, L'<'); - m_iEntityStart++; + current_text_.push_back(L'<'); } else if (csEntity.Compare(L"gt") == 0) { - m_BlockBuffer.SetTextChar(m_iEntityStart, L'>'); - m_iEntityStart++; + current_text_.push_back(L'>'); } else if (csEntity.Compare(L"apos") == 0) { - m_BlockBuffer.SetTextChar(m_iEntityStart, L'\''); - m_iEntityStart++; + current_text_.push_back(L'\''); } else if (csEntity.Compare(L"quot") == 0) { - m_BlockBuffer.SetTextChar(m_iEntityStart, L'\"'); - m_iEntityStart++; + current_text_.push_back(L'"'); } } } - if (m_iEntityStart >= 0 && - m_BlockBuffer.GetDataLength() > static_cast(m_iEntityStart)) { - m_BlockBuffer.DeleteTextChars(m_BlockBuffer.GetDataLength() - - m_iEntityStart); - } - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); + m_iEntityStart = -1; } else if (m_iEntityStart < 0 && character == L'&') { - m_iEntityStart = m_BlockBuffer.GetDataLength() - 1; + m_iEntityStart = current_text_.size() - 1; } m_Start++; } -WideString CFX_XMLParser::GetTextData() const { - return m_BlockBuffer.GetTextData(0, m_iTextDataLength); +WideString CFX_XMLParser::GetTextData() { + WideString ret(current_text_.data(), current_text_.size()); + current_text_.clear(); + current_text_.reserve(kCurrentTextReserve); + return ret; } diff --git a/core/fxcrt/xml/cfx_xmlparser.h b/core/fxcrt/xml/cfx_xmlparser.h index 503852753f..6121f0c1dc 100644 --- a/core/fxcrt/xml/cfx_xmlparser.h +++ b/core/fxcrt/xml/cfx_xmlparser.h @@ -11,7 +11,6 @@ #include #include -#include "core/fxcrt/cfx_blockbuffer.h" #include "core/fxcrt/fx_string.h" #include "core/fxcrt/retain_ptr.h" #include "core/fxcrt/xml/cfx_xmlnode.h" @@ -50,7 +49,7 @@ class CFX_XMLParser { protected: FX_XmlSyntaxResult DoSyntaxParse(); - WideString GetTextData() const; + WideString GetTextData(); private: enum class FDE_XmlSyntaxState { @@ -81,7 +80,7 @@ class CFX_XMLParser { CFX_XMLNode* m_pParent; CFX_XMLNode* m_pChild = nullptr; - WideString m_ws1; + WideString current_attribute_name_; RetainPtr m_pStream; FX_FILESIZE m_Start = 0; // Start position in m_Buffer FX_FILESIZE m_End = 0; // End position in m_Buffer @@ -92,11 +91,8 @@ class CFX_XMLParser { std::stack m_XMLNodeTypeStack; std::stack m_SkipStack; std::vector m_Buffer; - CFX_BlockBuffer m_BlockBuffer; - wchar_t* m_pCurrentBlock = nullptr; // Pointer into CFX_BlockBuffer - size_t m_iIndexInBlock = 0; + std::vector current_text_; size_t m_iXMLPlaneSize = 1024; - int32_t m_iTextDataLength = 0; int32_t m_iEntityStart = -1; wchar_t m_wQuotationMark = 0; wchar_t m_SkipChar = 0; diff --git a/core/fxcrt/xml/cfx_xmlparser_unittest.cpp b/core/fxcrt/xml/cfx_xmlparser_unittest.cpp index 0b51c6b88c..790001cc27 100644 --- a/core/fxcrt/xml/cfx_xmlparser_unittest.cpp +++ b/core/fxcrt/xml/cfx_xmlparser_unittest.cpp @@ -24,7 +24,7 @@ class CFX_XMLTestParser : public CFX_XMLParser { ~CFX_XMLTestParser() override = default; FX_XmlSyntaxResult DoSyntaxParse() { return CFX_XMLParser::DoSyntaxParse(); } - WideString GetTextData() const { return CFX_XMLParser::GetTextData(); } + WideString GetTextData() { return CFX_XMLParser::GetTextData(); } }; RetainPtr MakeProxy(const char* input) { -- cgit v1.2.3