diff options
Diffstat (limited to 'core/fxcrt')
-rw-r--r-- | core/fxcrt/cfx_blockbuffer.cpp | 124 | ||||
-rw-r--r-- | core/fxcrt/cfx_blockbuffer.h | 50 | ||||
-rw-r--r-- | core/fxcrt/xml/cfx_xmlparser.cpp | 242 | ||||
-rw-r--r-- | core/fxcrt/xml/cfx_xmlparser.h | 10 | ||||
-rw-r--r-- | core/fxcrt/xml/cfx_xmlparser_unittest.cpp | 2 |
5 files changed, 69 insertions, 359 deletions
diff --git a/core/fxcrt/cfx_blockbuffer.cpp b/core/fxcrt/cfx_blockbuffer.cpp deleted file mode 100644 index 6a7d98aa18..0000000000 --- a/core/fxcrt/cfx_blockbuffer.cpp +++ /dev/null @@ -1,124 +0,0 @@ -// Copyright 2017 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#include "core/fxcrt/cfx_blockbuffer.h" - -#include <algorithm> -#include <utility> - -namespace { - -const size_t kAllocStep = 1024 * 1024; - -} // namespace - -CFX_BlockBuffer::CFX_BlockBuffer() - : m_DataLength(0), m_BufferSize(0), m_StartPosition(0) {} - -CFX_BlockBuffer::~CFX_BlockBuffer() {} - -size_t CFX_BlockBuffer::GetAllocStep() const { - return kAllocStep; -} - -std::pair<wchar_t*, size_t> CFX_BlockBuffer::GetAvailableBlock() { - if (m_BlockArray.empty()) - return {nullptr, 0}; - - size_t realIndex = m_StartPosition + m_DataLength; - if (realIndex == m_BufferSize) { - m_BlockArray.emplace_back(FX_Alloc(wchar_t, kAllocStep)); - m_BufferSize += kAllocStep; - return {m_BlockArray.back().get(), 0}; - } - return {m_BlockArray[realIndex / kAllocStep].get(), realIndex % kAllocStep}; -} - -bool CFX_BlockBuffer::InitBuffer() { - m_BlockArray.clear(); - m_BlockArray.emplace_back(FX_Alloc(wchar_t, kAllocStep)); - m_BufferSize = kAllocStep; - return true; -} - -void CFX_BlockBuffer::SetTextChar(size_t index, wchar_t ch) { - size_t realIndex = m_StartPosition + index; - size_t blockIndex = realIndex / kAllocStep; - if (blockIndex >= m_BlockArray.size()) { - size_t newBlocks = blockIndex - m_BlockArray.size() + 1; - do { - m_BlockArray.emplace_back(FX_Alloc(wchar_t, kAllocStep)); - m_BufferSize += kAllocStep; - } while (--newBlocks); - } - wchar_t* pTextData = m_BlockArray[blockIndex].get(); - pTextData[realIndex % kAllocStep] = ch; - m_DataLength = std::max(m_DataLength, index + 1); -} - -void CFX_BlockBuffer::DeleteTextChars(size_t count) { - if (count == 0) - return; - - if (count >= m_DataLength) { - Reset(false); - return; - } - m_DataLength -= count; -} - -WideString CFX_BlockBuffer::GetTextData(size_t start, size_t length) const { - if (m_BufferSize <= m_StartPosition + 1 || length == 0) - return WideString(); - - size_t maybeDataLength = m_BufferSize - 1 - m_StartPosition; - if (start > maybeDataLength) - return WideString(); - - length = std::min(length, maybeDataLength); - if (!length) - return WideString(); - - WideString wsTextData; - { - // Span's lifetime must end before ReleaseBuffer() below. - pdfium::span<wchar_t> pBuf = wsTextData.GetBuffer(length); - size_t startBlock = 0; - size_t startInner = 0; - std::tie(startBlock, startInner) = TextDataIndex2BufIndex(start); - - size_t endBlock = 0; - size_t endInner = 0; - std::tie(endBlock, endInner) = TextDataIndex2BufIndex(start + length); - - size_t pointer = 0; - for (size_t i = startBlock; i <= endBlock; ++i) { - size_t bufferPointer = 0; - size_t copyLength = kAllocStep; - if (i == startBlock) { - copyLength -= startInner; - bufferPointer = startInner; - } - if (i == endBlock) - copyLength -= ((kAllocStep - 1) - endInner); - - wchar_t* pBlockBuf = m_BlockArray[i].get(); - memcpy(&pBuf[pointer], pBlockBuf + bufferPointer, - copyLength * sizeof(wchar_t)); - pointer += copyLength; - } - } - wsTextData.ReleaseBuffer(length); - return wsTextData; -} - -std::pair<size_t, size_t> CFX_BlockBuffer::TextDataIndex2BufIndex( - const size_t iIndex) const { - ASSERT(iIndex >= 0); - - size_t realIndex = m_StartPosition + iIndex; - return {realIndex / kAllocStep, realIndex % kAllocStep}; -} diff --git a/core/fxcrt/cfx_blockbuffer.h b/core/fxcrt/cfx_blockbuffer.h deleted file mode 100644 index 1673136643..0000000000 --- a/core/fxcrt/cfx_blockbuffer.h +++ /dev/null @@ -1,50 +0,0 @@ -// Copyright 2017 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#ifndef CORE_FXCRT_CFX_BLOCKBUFFER_H_ -#define CORE_FXCRT_CFX_BLOCKBUFFER_H_ - -#include <stdint.h> - -#include <memory> -#include <utility> -#include <vector> - -#include "core/fxcrt/fx_string.h" - -class CFX_BlockBuffer { - public: - CFX_BlockBuffer(); - ~CFX_BlockBuffer(); - - bool InitBuffer(); - - std::pair<wchar_t*, size_t> GetAvailableBlock(); - size_t GetAllocStep() const; - size_t GetDataLength() const { return m_DataLength; } - void IncrementDataLength() { m_DataLength++; } - bool IsEmpty() const { return m_DataLength == 0; } - - void Reset(bool bReserveData) { - if (!bReserveData) - m_StartPosition = 0; - m_DataLength = 0; - } - - void SetTextChar(size_t iIndex, wchar_t ch); - void DeleteTextChars(size_t iCount); - WideString GetTextData(size_t iStart, size_t iLength) const; - - private: - std::pair<size_t, size_t> TextDataIndex2BufIndex(const size_t iIndex) const; - - std::vector<std::unique_ptr<wchar_t, FxFreeDeleter>> m_BlockArray; - size_t m_DataLength; - size_t m_BufferSize; - size_t m_StartPosition; -}; - -#endif // CORE_FXCRT_CFX_BLOCKBUFFER_H_ diff --git a/core/fxcrt/xml/cfx_xmlparser.cpp b/core/fxcrt/xml/cfx_xmlparser.cpp index 21bbbbe9d6..55778d3204 100644 --- a/core/fxcrt/xml/cfx_xmlparser.cpp +++ b/core/fxcrt/xml/cfx_xmlparser.cpp @@ -24,7 +24,8 @@ namespace { -const uint32_t kMaxCharRange = 0x10ffff; +constexpr size_t kCurrentTextReserve = 128; +constexpr uint32_t kMaxCharRange = 0x10ffff; bool IsXMLWhiteSpace(wchar_t ch) { return ch == L' ' || ch == 0x0A || ch == 0x0D || ch == 0x09; @@ -85,10 +86,7 @@ CFX_XMLParser::CFX_XMLParser(CFX_XMLNode* pParent, } m_Buffer.resize(pdfium::base::ValueOrDieForType<size_t>(alloc_size_safe)); - - m_BlockBuffer.InitBuffer(); - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); + current_text_.reserve(kCurrentTextReserve); } CFX_XMLParser::~CFX_XMLParser() = default; @@ -109,13 +107,13 @@ bool CFX_XMLParser::Parse() { m_pChild = m_pParent; break; - case FX_XmlSyntaxResult::ElementClose: + case FX_XmlSyntaxResult::ElementClose: { if (m_pChild->GetType() != FX_XMLNODE_Element) return false; - m_ws1 = GetTextData(); - if (m_ws1.GetLength() > 0 && - m_ws1 != static_cast<CFX_XMLElement*>(m_pChild)->GetName()) { + WideString element_name = GetTextData(); + if (element_name.GetLength() > 0 && + element_name != static_cast<CFX_XMLElement*>(m_pChild)->GetName()) { return false; } @@ -128,65 +126,65 @@ bool CFX_XMLParser::Parse() { m_pChild = m_pParent; iCount++; break; - case FX_XmlSyntaxResult::TargetName: - m_ws1 = GetTextData(); - if (m_ws1 == L"originalXFAVersion" || m_ws1 == L"acrobat") { - auto child = pdfium::MakeUnique<CFX_XMLInstruction>(m_ws1); + } + case FX_XmlSyntaxResult::TargetName: { + WideString target_name = GetTextData(); + if (target_name == L"originalXFAVersion" || target_name == L"acrobat") { + auto child = pdfium::MakeUnique<CFX_XMLInstruction>(target_name); m_pChild = child.get(); m_pParent->AppendChild(std::move(child)); } else { m_pChild = nullptr; } - m_ws1.clear(); break; + } case FX_XmlSyntaxResult::TagName: { - m_ws1 = GetTextData(); - auto child = pdfium::MakeUnique<CFX_XMLElement>(m_ws1); + auto child = pdfium::MakeUnique<CFX_XMLElement>(GetTextData()); m_pChild = child.get(); m_pParent->AppendChild(std::move(child)); m_NodeStack.push(m_pChild); m_pParent = m_pChild; break; } - case FX_XmlSyntaxResult::AttriName: - m_ws1 = GetTextData(); + case FX_XmlSyntaxResult::AttriName: { + current_attribute_name_ = GetTextData(); break; + } case FX_XmlSyntaxResult::AttriValue: if (m_pChild && m_pChild->GetType() == FX_XMLNODE_Element) { - static_cast<CFX_XMLElement*>(m_pChild)->SetAttribute(m_ws1, - GetTextData()); + static_cast<CFX_XMLElement*>(m_pChild)->SetAttribute( + current_attribute_name_, GetTextData()); } - m_ws1.clear(); + current_attribute_name_.clear(); break; case FX_XmlSyntaxResult::Text: { - m_ws1 = GetTextData(); - auto child = pdfium::MakeUnique<CFX_XMLText>(m_ws1); + auto child = pdfium::MakeUnique<CFX_XMLText>(GetTextData()); m_pChild = child.get(); m_pParent->AppendChild(std::move(child)); m_pChild = m_pParent; break; } case FX_XmlSyntaxResult::CData: { - m_ws1 = GetTextData(); - auto child = pdfium::MakeUnique<CFX_XMLCharData>(m_ws1); + auto child = pdfium::MakeUnique<CFX_XMLCharData>(GetTextData()); m_pChild = child.get(); m_pParent->AppendChild(std::move(child)); m_pChild = m_pParent; break; } - case FX_XmlSyntaxResult::TargetData: + case FX_XmlSyntaxResult::TargetData: { + WideString target_data = GetTextData(); if (m_pChild) { if (m_pChild->GetType() != FX_XMLNODE_Instruction) return false; auto* instruction = static_cast<CFX_XMLInstruction*>(m_pChild); - if (!m_ws1.IsEmpty()) - instruction->AppendData(m_ws1); + if (!target_data.IsEmpty()) + instruction->AppendData(target_data); instruction->AppendData(GetTextData()); } - m_ws1.clear(); break; + } case FX_XmlSyntaxResult::ElementOpen: case FX_XmlSyntaxResult::ElementBreak: case FX_XmlSyntaxResult::InstructionOpen: @@ -226,12 +224,7 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() { switch (m_syntaxParserState) { case FDE_XmlSyntaxState::Text: if (ch == L'<') { - if (!m_BlockBuffer.IsEmpty()) { - m_iTextDataLength = m_BlockBuffer.GetDataLength(); - m_BlockBuffer.Reset(true); - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); - m_iEntityStart = -1; + if (!current_text_.empty()) { syntaxParserResult = FX_XmlSyntaxResult::Text; } else { m_Start++; @@ -263,16 +256,12 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() { break; case FDE_XmlSyntaxState::Target: case FDE_XmlSyntaxState::Tag: - if (!IsXMLNameChar(ch, m_BlockBuffer.IsEmpty())) { - if (m_BlockBuffer.IsEmpty()) { + if (!IsXMLNameChar(ch, current_text_.empty())) { + if (current_text_.empty()) { m_syntaxParserResult = FX_XmlSyntaxResult::Error; return m_syntaxParserResult; } - m_iTextDataLength = m_BlockBuffer.GetDataLength(); - m_BlockBuffer.Reset(true); - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); if (m_syntaxParserState != FDE_XmlSyntaxState::Target) syntaxParserResult = FX_XmlSyntaxResult::TagName; else @@ -280,24 +269,17 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() { m_syntaxParserState = FDE_XmlSyntaxState::AttriName; } else { - if (m_iIndexInBlock == m_BlockBuffer.GetAllocStep()) { - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); - if (!m_pCurrentBlock) - return FX_XmlSyntaxResult::Error; - } - m_pCurrentBlock[m_iIndexInBlock++] = ch; - m_BlockBuffer.IncrementDataLength(); + current_text_.push_back(ch); m_Start++; } break; case FDE_XmlSyntaxState::AttriName: - if (m_BlockBuffer.IsEmpty() && IsXMLWhiteSpace(ch)) { + if (current_text_.empty() && IsXMLWhiteSpace(ch)) { m_Start++; break; } - if (!IsXMLNameChar(ch, m_BlockBuffer.IsEmpty())) { - if (m_BlockBuffer.IsEmpty()) { + if (!IsXMLNameChar(ch, current_text_.empty())) { + if (current_text_.empty()) { if (m_CurNodeType == FX_XMLNODE_Element) { if (ch == L'>' || ch == L'/') { m_syntaxParserState = FDE_XmlSyntaxState::BreakElement; @@ -321,22 +303,11 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() { break; } } - m_iTextDataLength = m_BlockBuffer.GetDataLength(); - m_BlockBuffer.Reset(true); - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); m_syntaxParserState = FDE_XmlSyntaxState::AttriEqualSign; syntaxParserResult = FX_XmlSyntaxResult::AttriName; } } else { - if (m_iIndexInBlock == m_BlockBuffer.GetAllocStep()) { - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); - if (!m_pCurrentBlock) - return FX_XmlSyntaxResult::Error; - } - m_pCurrentBlock[m_iIndexInBlock++] = ch; - m_BlockBuffer.IncrementDataLength(); + current_text_.push_back(ch); m_Start++; } break; @@ -377,11 +348,7 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() { m_syntaxParserResult = FX_XmlSyntaxResult::Error; return m_syntaxParserResult; } - m_iTextDataLength = m_BlockBuffer.GetDataLength(); m_wQuotationMark = 0; - m_BlockBuffer.Reset(true); - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); m_Start++; m_syntaxParserState = FDE_XmlSyntaxState::AttriName; syntaxParserResult = FX_XmlSyntaxResult::AttriValue; @@ -391,21 +358,9 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() { break; case FDE_XmlSyntaxState::CloseInstruction: if (ch != L'>') { - if (m_iIndexInBlock == m_BlockBuffer.GetAllocStep()) { - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); - if (!m_pCurrentBlock) - return FX_XmlSyntaxResult::Error; - } - - m_pCurrentBlock[m_iIndexInBlock++] = ch; - m_BlockBuffer.IncrementDataLength(); + current_text_.push_back(ch); m_syntaxParserState = FDE_XmlSyntaxState::TargetData; - } else if (!m_BlockBuffer.IsEmpty()) { - m_iTextDataLength = m_BlockBuffer.GetDataLength(); - m_BlockBuffer.Reset(true); - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); + } else if (!current_text_.empty()) { syntaxParserResult = FX_XmlSyntaxResult::TargetData; } else { m_Start++; @@ -420,9 +375,6 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() { else m_CurNodeType = FX_XMLNODE_Unknown; - m_BlockBuffer.Reset(true); - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); m_syntaxParserState = FDE_XmlSyntaxState::Text; syntaxParserResult = FX_XmlSyntaxResult::InstructionClose; } @@ -440,7 +392,7 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() { m_Start++; break; case FDE_XmlSyntaxState::CloseElement: - if (!IsXMLNameChar(ch, m_BlockBuffer.IsEmpty())) { + if (!IsXMLNameChar(ch, current_text_.empty())) { if (ch == L'>') { if (m_XMLNodeTypeStack.empty()) { m_syntaxParserResult = FX_XmlSyntaxResult::Error; @@ -453,10 +405,6 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() { else m_CurNodeType = FX_XMLNODE_Unknown; - m_iTextDataLength = m_BlockBuffer.GetDataLength(); - m_BlockBuffer.Reset(true); - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); m_syntaxParserState = FDE_XmlSyntaxState::Text; syntaxParserResult = FX_XmlSyntaxResult::ElementClose; } else if (!IsXMLWhiteSpace(ch)) { @@ -464,14 +412,7 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() { return m_syntaxParserResult; } } else { - if (m_iIndexInBlock == m_BlockBuffer.GetAllocStep()) { - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); - if (!m_pCurrentBlock) - return FX_XmlSyntaxResult::Error; - } - m_pCurrentBlock[m_iIndexInBlock++] = ch; - m_BlockBuffer.IncrementDataLength(); + current_text_.push_back(ch); } m_Start++; break; @@ -493,20 +434,9 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() { if (FXSYS_wcsnicmp(m_Buffer.data() + m_Start, L"]]>", 3) == 0) { m_Start += 3; syntaxParserResult = FX_XmlSyntaxResult::CData; - m_iTextDataLength = m_BlockBuffer.GetDataLength(); - m_BlockBuffer.Reset(true); - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); m_syntaxParserState = FDE_XmlSyntaxState::Text; } else { - if (m_iIndexInBlock == m_BlockBuffer.GetAllocStep()) { - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); - if (!m_pCurrentBlock) - return FX_XmlSyntaxResult::Error; - } - m_pCurrentBlock[m_iIndexInBlock++] = ch; - m_BlockBuffer.IncrementDataLength(); + current_text_.push_back(ch); m_Start++; } break; @@ -548,10 +478,6 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() { if (ch == m_SkipChar) { m_SkipStack.pop(); if (m_SkipStack.empty()) { - m_iTextDataLength = m_BlockBuffer.GetDataLength(); - m_BlockBuffer.Reset(true); - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); m_syntaxParserState = FDE_XmlSyntaxState::Text; } else { m_SkipChar = m_SkipStack.top(); @@ -559,17 +485,6 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() { } break; } - if (!m_SkipStack.empty()) { - if (m_iIndexInBlock == m_BlockBuffer.GetAllocStep()) { - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); - if (!m_pCurrentBlock) { - return FX_XmlSyntaxResult::Error; - } - } - m_pCurrentBlock[m_iIndexInBlock++] = ch; - m_BlockBuffer.IncrementDataLength(); - } m_Start++; } break; @@ -583,16 +498,12 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() { break; case FDE_XmlSyntaxState::TargetData: if (IsXMLWhiteSpace(ch)) { - if (m_BlockBuffer.IsEmpty()) { + if (current_text_.empty()) { m_Start++; break; } if (m_wQuotationMark == 0) { - m_iTextDataLength = m_BlockBuffer.GetDataLength(); m_wQuotationMark = 0; - m_BlockBuffer.Reset(true); - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); m_Start++; syntaxParserResult = FX_XmlSyntaxResult::TargetData; break; @@ -606,11 +517,7 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() { m_wQuotationMark = ch; m_Start++; } else if (ch == m_wQuotationMark) { - m_iTextDataLength = m_BlockBuffer.GetDataLength(); m_wQuotationMark = 0; - m_BlockBuffer.Reset(true); - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); m_Start++; syntaxParserResult = FX_XmlSyntaxResult::TargetData; } else { @@ -618,14 +525,7 @@ FX_XmlSyntaxResult CFX_XMLParser::DoSyntaxParse() { return m_syntaxParserResult; } } else { - if (m_iIndexInBlock == m_BlockBuffer.GetAllocStep()) { - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); - if (!m_pCurrentBlock) - return FX_XmlSyntaxResult::Error; - } - m_pCurrentBlock[m_iIndexInBlock++] = ch; - m_BlockBuffer.IncrementDataLength(); + current_text_.push_back(ch); m_Start++; } break; @@ -644,19 +544,17 @@ bool CFX_XMLParser::GetStatus() const { } void CFX_XMLParser::ParseTextChar(wchar_t character) { - if (m_iIndexInBlock == m_BlockBuffer.GetAllocStep()) { - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); - if (!m_pCurrentBlock) - return; - } + current_text_.push_back(character); - m_pCurrentBlock[m_iIndexInBlock++] = character; - m_BlockBuffer.IncrementDataLength(); if (m_iEntityStart > -1 && character == L';') { - WideString csEntity = m_BlockBuffer.GetTextData( - m_iEntityStart + 1, - m_BlockBuffer.GetDataLength() - 1 - m_iEntityStart - 1); + // Copy the entity out into a string and remove from the vector. When we + // copy the entity we don't want to copy out the & or the ; so we start + // shifted by one and want to copy 2 less characters in total. + WideString csEntity(current_text_.data() + m_iEntityStart + 1, + current_text_.size() - m_iEntityStart - 2); + current_text_.erase(current_text_.begin() + m_iEntityStart, + current_text_.end()); + int32_t iLen = csEntity.GetLength(); if (iLen > 0) { if (csEntity[0] == L'#') { @@ -678,43 +576,33 @@ void CFX_XMLParser::ParseTextChar(wchar_t character) { ch = ' '; character = static_cast<wchar_t>(ch); - if (character != 0) { - m_BlockBuffer.SetTextChar(m_iEntityStart, character); - m_iEntityStart++; - } + if (character != 0) + current_text_.push_back(character); } else { if (csEntity.Compare(L"amp") == 0) { - m_BlockBuffer.SetTextChar(m_iEntityStart, L'&'); - m_iEntityStart++; + current_text_.push_back(L'&'); } else if (csEntity.Compare(L"lt") == 0) { - m_BlockBuffer.SetTextChar(m_iEntityStart, L'<'); - m_iEntityStart++; + current_text_.push_back(L'<'); } else if (csEntity.Compare(L"gt") == 0) { - m_BlockBuffer.SetTextChar(m_iEntityStart, L'>'); - m_iEntityStart++; + current_text_.push_back(L'>'); } else if (csEntity.Compare(L"apos") == 0) { - m_BlockBuffer.SetTextChar(m_iEntityStart, L'\''); - m_iEntityStart++; + current_text_.push_back(L'\''); } else if (csEntity.Compare(L"quot") == 0) { - m_BlockBuffer.SetTextChar(m_iEntityStart, L'\"'); - m_iEntityStart++; + current_text_.push_back(L'"'); } } } - if (m_iEntityStart >= 0 && - m_BlockBuffer.GetDataLength() > static_cast<size_t>(m_iEntityStart)) { - m_BlockBuffer.DeleteTextChars(m_BlockBuffer.GetDataLength() - - m_iEntityStart); - } - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); + m_iEntityStart = -1; } else if (m_iEntityStart < 0 && character == L'&') { - m_iEntityStart = m_BlockBuffer.GetDataLength() - 1; + m_iEntityStart = current_text_.size() - 1; } m_Start++; } -WideString CFX_XMLParser::GetTextData() const { - return m_BlockBuffer.GetTextData(0, m_iTextDataLength); +WideString CFX_XMLParser::GetTextData() { + WideString ret(current_text_.data(), current_text_.size()); + current_text_.clear(); + current_text_.reserve(kCurrentTextReserve); + return ret; } diff --git a/core/fxcrt/xml/cfx_xmlparser.h b/core/fxcrt/xml/cfx_xmlparser.h index 503852753f..6121f0c1dc 100644 --- a/core/fxcrt/xml/cfx_xmlparser.h +++ b/core/fxcrt/xml/cfx_xmlparser.h @@ -11,7 +11,6 @@ #include <stack> #include <vector> -#include "core/fxcrt/cfx_blockbuffer.h" #include "core/fxcrt/fx_string.h" #include "core/fxcrt/retain_ptr.h" #include "core/fxcrt/xml/cfx_xmlnode.h" @@ -50,7 +49,7 @@ class CFX_XMLParser { protected: FX_XmlSyntaxResult DoSyntaxParse(); - WideString GetTextData() const; + WideString GetTextData(); private: enum class FDE_XmlSyntaxState { @@ -81,7 +80,7 @@ class CFX_XMLParser { CFX_XMLNode* m_pParent; CFX_XMLNode* m_pChild = nullptr; - WideString m_ws1; + WideString current_attribute_name_; RetainPtr<IFX_SeekableReadStream> m_pStream; FX_FILESIZE m_Start = 0; // Start position in m_Buffer FX_FILESIZE m_End = 0; // End position in m_Buffer @@ -92,11 +91,8 @@ class CFX_XMLParser { std::stack<FX_XMLNODETYPE> m_XMLNodeTypeStack; std::stack<wchar_t> m_SkipStack; std::vector<wchar_t> m_Buffer; - CFX_BlockBuffer m_BlockBuffer; - wchar_t* m_pCurrentBlock = nullptr; // Pointer into CFX_BlockBuffer - size_t m_iIndexInBlock = 0; + std::vector<wchar_t> current_text_; size_t m_iXMLPlaneSize = 1024; - int32_t m_iTextDataLength = 0; int32_t m_iEntityStart = -1; wchar_t m_wQuotationMark = 0; wchar_t m_SkipChar = 0; diff --git a/core/fxcrt/xml/cfx_xmlparser_unittest.cpp b/core/fxcrt/xml/cfx_xmlparser_unittest.cpp index 0b51c6b88c..790001cc27 100644 --- a/core/fxcrt/xml/cfx_xmlparser_unittest.cpp +++ b/core/fxcrt/xml/cfx_xmlparser_unittest.cpp @@ -24,7 +24,7 @@ class CFX_XMLTestParser : public CFX_XMLParser { ~CFX_XMLTestParser() override = default; FX_XmlSyntaxResult DoSyntaxParse() { return CFX_XMLParser::DoSyntaxParse(); } - WideString GetTextData() const { return CFX_XMLParser::GetTextData(); } + WideString GetTextData() { return CFX_XMLParser::GetTextData(); } }; RetainPtr<CFX_MemoryStream> MakeProxy(const char* input) { |