From 25553aa8f74ddccbfcb3bb7b8f55d3f2ac00338f Mon Sep 17 00:00:00 2001 From: dan sinclair Date: Thu, 6 Apr 2017 13:27:22 -0400 Subject: Remove the length reference from XMLSyntaxParser MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This Cl removes the reference to the length of the CFX_BlockBuffer that was stored and manipulated in CFDE_XMLSyntaxParser. Methods have been added to BlockBuffer to satisify the usages in the syntax parser. Change-Id: I1107c343ce267283c4c45aa3ae1bbfa93c24079f Reviewed-on: https://pdfium-review.googlesource.com/3816 Commit-Queue: dsinclair Reviewed-by: Nicolás Peña --- core/fxcrt/cfx_blockbuffer.cpp | 122 ++++++++++++++++------------------- core/fxcrt/cfx_blockbuffer.h | 32 +++++---- xfa/fde/xml/cfde_xmlsyntaxparser.cpp | 88 +++++++++++++------------ xfa/fde/xml/cfde_xmlsyntaxparser.h | 1 - 4 files changed, 118 insertions(+), 125 deletions(-) diff --git a/core/fxcrt/cfx_blockbuffer.cpp b/core/fxcrt/cfx_blockbuffer.cpp index efae014982..7609fb9a25 100644 --- a/core/fxcrt/cfx_blockbuffer.cpp +++ b/core/fxcrt/cfx_blockbuffer.cpp @@ -9,8 +9,6 @@ #include #include -#include "third_party/base/stl_util.h" - namespace { const int kAllocStep = 1024 * 1024; @@ -18,113 +16,107 @@ const int kAllocStep = 1024 * 1024; } // namespace CFX_BlockBuffer::CFX_BlockBuffer() - : m_iDataLength(0), m_iBufferSize(0), m_iStartPosition(0) {} + : m_DataLength(0), m_BufferSize(0), m_StartPosition(0) {} CFX_BlockBuffer::~CFX_BlockBuffer() {} -int32_t CFX_BlockBuffer::GetAllocStep() const { +size_t CFX_BlockBuffer::GetAllocStep() const { return kAllocStep; } -std::pair CFX_BlockBuffer::GetAvailableBlock() { +std::pair CFX_BlockBuffer::GetAvailableBlock() { if (m_BlockArray.empty()) return {nullptr, 0}; - int32_t iRealIndex = m_iStartPosition + m_iDataLength; - if (iRealIndex == m_iBufferSize) { + size_t realIndex = m_StartPosition + m_DataLength; + if (realIndex == m_BufferSize) { m_BlockArray.emplace_back(FX_Alloc(wchar_t, kAllocStep)); - m_iBufferSize += kAllocStep; + m_BufferSize += kAllocStep; return {m_BlockArray.back().get(), 0}; } - return {m_BlockArray[iRealIndex / kAllocStep].get(), iRealIndex % kAllocStep}; + return {m_BlockArray[realIndex / kAllocStep].get(), realIndex % kAllocStep}; } bool CFX_BlockBuffer::InitBuffer() { m_BlockArray.clear(); m_BlockArray.emplace_back(FX_Alloc(wchar_t, kAllocStep)); - m_iBufferSize = kAllocStep; + m_BufferSize = kAllocStep; return true; } -void CFX_BlockBuffer::SetTextChar(int32_t iIndex, wchar_t ch) { - if (iIndex < 0) - return; - - int32_t iRealIndex = m_iStartPosition + iIndex; - int32_t iBlockIndex = iRealIndex / kAllocStep; - int32_t iInnerIndex = iRealIndex % kAllocStep; - int32_t iBlockSize = pdfium::CollectionSize(m_BlockArray); - if (iBlockIndex >= iBlockSize) { - int32_t iNewBlocks = iBlockIndex - iBlockSize + 1; +void CFX_BlockBuffer::SetTextChar(size_t index, wchar_t ch) { + size_t realIndex = m_StartPosition + index; + size_t blockIndex = realIndex / kAllocStep; + if (blockIndex >= m_BlockArray.size()) { + size_t newBlocks = blockIndex - m_BlockArray.size() + 1; do { m_BlockArray.emplace_back(FX_Alloc(wchar_t, kAllocStep)); - m_iBufferSize += kAllocStep; - } while (--iNewBlocks); + m_BufferSize += kAllocStep; + } while (--newBlocks); } - wchar_t* pTextData = m_BlockArray[iBlockIndex].get(); - pTextData[iInnerIndex] = ch; - m_iDataLength = std::max(m_iDataLength, iIndex + 1); + wchar_t* pTextData = m_BlockArray[blockIndex].get(); + pTextData[realIndex % kAllocStep] = ch; + m_DataLength = std::max(m_DataLength, index + 1); } -int32_t CFX_BlockBuffer::DeleteTextChars(int32_t iCount) { - if (iCount <= 0) - return m_iDataLength; +void CFX_BlockBuffer::DeleteTextChars(size_t count) { + if (count == 0) + return; - if (iCount >= m_iDataLength) { + if (count >= m_DataLength) { Reset(false); - return 0; + return; } - m_iDataLength -= iCount; - return m_iDataLength; + m_DataLength -= count; } -CFX_WideString CFX_BlockBuffer::GetTextData(int32_t iStart, - int32_t iLength) const { - int32_t iMaybeDataLength = m_iBufferSize - 1 - m_iStartPosition; - if (iStart < 0 || iStart > iMaybeDataLength) +CFX_WideString CFX_BlockBuffer::GetTextData(size_t start, size_t length) const { + if (m_BufferSize <= m_StartPosition + 1 || length == 0) return CFX_WideString(); - if (iLength == -1 || iLength > iMaybeDataLength) - iLength = iMaybeDataLength; - if (iLength <= 0) + + size_t maybeDataLength = m_BufferSize - 1 - m_StartPosition; + if (start > maybeDataLength) return CFX_WideString(); + if (length > maybeDataLength) + length = maybeDataLength; CFX_WideString wsTextData; - wchar_t* pBuf = wsTextData.GetBuffer(iLength); + wchar_t* pBuf = wsTextData.GetBuffer(length); if (!pBuf) return CFX_WideString(); - int32_t iStartBlock = 0; - int32_t iStartInner = 0; - std::tie(iStartBlock, iStartInner) = TextDataIndex2BufIndex(iStart); - - int32_t iEndBlock = 0; - int32_t iEndInner = 0; - std::tie(iEndBlock, iEndInner) = TextDataIndex2BufIndex(iStart + iLength); - - int32_t iPointer = 0; - for (int32_t i = iStartBlock; i <= iEndBlock; i++) { - int32_t iBufferPointer = 0; - int32_t iCopyLength = kAllocStep; - if (i == iStartBlock) { - iCopyLength -= iStartInner; - iBufferPointer = iStartInner; + size_t startBlock = 0; + size_t startInner = 0; + std::tie(startBlock, startInner) = TextDataIndex2BufIndex(start); + + size_t endBlock = 0; + size_t endInner = 0; + std::tie(endBlock, endInner) = TextDataIndex2BufIndex(start + length); + + size_t pointer = 0; + for (size_t i = startBlock; i <= endBlock; ++i) { + size_t bufferPointer = 0; + size_t copyLength = kAllocStep; + if (i == startBlock) { + copyLength -= startInner; + bufferPointer = startInner; } - if (i == iEndBlock) - iCopyLength -= ((kAllocStep - 1) - iEndInner); + if (i == endBlock) + copyLength -= ((kAllocStep - 1) - endInner); wchar_t* pBlockBuf = m_BlockArray[i].get(); - memcpy(pBuf + iPointer, pBlockBuf + iBufferPointer, - iCopyLength * sizeof(wchar_t)); - iPointer += iCopyLength; + memcpy(pBuf + pointer, pBlockBuf + bufferPointer, + copyLength * sizeof(wchar_t)); + pointer += copyLength; } - wsTextData.ReleaseBuffer(iLength); + wsTextData.ReleaseBuffer(length); return wsTextData; } -std::pair CFX_BlockBuffer::TextDataIndex2BufIndex( - const int32_t iIndex) const { +std::pair CFX_BlockBuffer::TextDataIndex2BufIndex( + const size_t iIndex) const { ASSERT(iIndex >= 0); - int32_t iRealIndex = m_iStartPosition + iIndex; - return {iRealIndex / kAllocStep, iRealIndex % kAllocStep}; + size_t realIndex = m_StartPosition + iIndex; + return {realIndex / kAllocStep, realIndex % kAllocStep}; } diff --git a/core/fxcrt/cfx_blockbuffer.h b/core/fxcrt/cfx_blockbuffer.h index e7e493ca86..867449074d 100644 --- a/core/fxcrt/cfx_blockbuffer.h +++ b/core/fxcrt/cfx_blockbuffer.h @@ -21,33 +21,31 @@ class CFX_BlockBuffer { ~CFX_BlockBuffer(); bool InitBuffer(); - bool IsInitialized() { return m_iBufferSize / GetAllocStep() >= 1; } + bool IsInitialized() { return m_BufferSize / GetAllocStep() >= 1; } - std::pair GetAvailableBlock(); - int32_t GetAllocStep() const; - - // This is ... scary. This returns a ref, which the XMLSyntaxParser stores - // and modifies. - int32_t& GetDataLengthRef() { return m_iDataLength; } + std::pair GetAvailableBlock(); + size_t GetAllocStep() const; + size_t GetDataLength() const { return m_DataLength; } + void IncrementDataLength() { m_DataLength++; } + bool IsEmpty() const { return m_DataLength == 0; } void Reset(bool bReserveData) { if (!bReserveData) - m_iStartPosition = 0; - m_iDataLength = 0; + m_StartPosition = 0; + m_DataLength = 0; } - void SetTextChar(int32_t iIndex, wchar_t ch); - int32_t DeleteTextChars(int32_t iCount); - CFX_WideString GetTextData(int32_t iStart, int32_t iLength) const; + void SetTextChar(size_t iIndex, wchar_t ch); + void DeleteTextChars(size_t iCount); + CFX_WideString GetTextData(size_t iStart, size_t iLength) const; private: - std::pair TextDataIndex2BufIndex( - const int32_t iIndex) const; + std::pair TextDataIndex2BufIndex(const size_t iIndex) const; std::vector> m_BlockArray; - int32_t m_iDataLength; - int32_t m_iBufferSize; - int32_t m_iStartPosition; + size_t m_DataLength; + size_t m_BufferSize; + size_t m_StartPosition; }; #endif // CORE_FXCRT_CFX_BLOCKBUFFER_H_ diff --git a/xfa/fde/xml/cfde_xmlsyntaxparser.cpp b/xfa/fde/xml/cfde_xmlsyntaxparser.cpp index 116b2e5a67..9adf11be0f 100644 --- a/xfa/fde/xml/cfde_xmlsyntaxparser.cpp +++ b/xfa/fde/xml/cfde_xmlsyntaxparser.cpp @@ -93,7 +93,6 @@ CFDE_XMLSyntaxParser::CFDE_XMLSyntaxParser( m_Start(0), m_End(0), m_iAllocStep(m_BlockBuffer.GetAllocStep()), - m_iDataLength(m_BlockBuffer.GetDataLengthRef()), m_pCurrentBlock(nullptr), m_iIndexInBlock(0), m_iTextDataLength(0), @@ -166,8 +165,8 @@ FDE_XmlSyntaxResult CFDE_XMLSyntaxParser::DoSyntaxParse() { switch (m_syntaxParserState) { case FDE_XmlSyntaxState::Text: if (ch == L'<') { - if (m_iDataLength > 0) { - m_iTextDataLength = m_iDataLength; + if (!m_BlockBuffer.IsEmpty()) { + m_iTextDataLength = m_BlockBuffer.GetDataLength(); m_BlockBuffer.Reset(true); std::tie(m_pCurrentBlock, m_iIndexInBlock) = m_BlockBuffer.GetAvailableBlock(); @@ -209,22 +208,22 @@ FDE_XmlSyntaxResult CFDE_XMLSyntaxParser::DoSyntaxParse() { break; case FDE_XmlSyntaxState::Target: case FDE_XmlSyntaxState::Tag: - if (!IsXMLNameChar(ch, m_iDataLength < 1)) { - if (m_iDataLength < 1) { + if (!IsXMLNameChar(ch, m_BlockBuffer.IsEmpty())) { + if (m_BlockBuffer.IsEmpty()) { m_syntaxParserResult = FDE_XmlSyntaxResult::Error; return m_syntaxParserResult; - } else { - m_iTextDataLength = m_iDataLength; - m_BlockBuffer.Reset(true); - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); - if (m_syntaxParserState != FDE_XmlSyntaxState::Target) { - syntaxParserResult = FDE_XmlSyntaxResult::TagName; - } else { - syntaxParserResult = FDE_XmlSyntaxResult::TargetName; - } - m_syntaxParserState = FDE_XmlSyntaxState::AttriName; } + + m_iTextDataLength = m_BlockBuffer.GetDataLength(); + m_BlockBuffer.Reset(true); + std::tie(m_pCurrentBlock, m_iIndexInBlock) = + m_BlockBuffer.GetAvailableBlock(); + if (m_syntaxParserState != FDE_XmlSyntaxState::Target) + syntaxParserResult = FDE_XmlSyntaxResult::TagName; + else + syntaxParserResult = FDE_XmlSyntaxResult::TargetName; + + m_syntaxParserState = FDE_XmlSyntaxState::AttriName; } else { if (m_iIndexInBlock == m_iAllocStep) { std::tie(m_pCurrentBlock, m_iIndexInBlock) = @@ -234,17 +233,17 @@ FDE_XmlSyntaxResult CFDE_XMLSyntaxParser::DoSyntaxParse() { } } m_pCurrentBlock[m_iIndexInBlock++] = ch; - m_iDataLength++; + m_BlockBuffer.IncrementDataLength(); m_Start++; } break; case FDE_XmlSyntaxState::AttriName: - if (m_iDataLength < 1 && IsXMLWhiteSpace(ch)) { + if (m_BlockBuffer.IsEmpty() && IsXMLWhiteSpace(ch)) { m_Start++; break; } - if (!IsXMLNameChar(ch, m_iDataLength < 1)) { - if (m_iDataLength < 1) { + if (!IsXMLNameChar(ch, m_BlockBuffer.IsEmpty())) { + if (m_BlockBuffer.IsEmpty()) { if (m_CurNode.eNodeType == FDE_XMLNODE_Element) { if (ch == L'>' || ch == L'/') { m_syntaxParserState = FDE_XmlSyntaxState::BreakElement; @@ -268,7 +267,7 @@ FDE_XmlSyntaxResult CFDE_XMLSyntaxParser::DoSyntaxParse() { break; } } - m_iTextDataLength = m_iDataLength; + m_iTextDataLength = m_BlockBuffer.GetDataLength(); m_BlockBuffer.Reset(true); std::tie(m_pCurrentBlock, m_iIndexInBlock) = m_BlockBuffer.GetAvailableBlock(); @@ -284,7 +283,7 @@ FDE_XmlSyntaxResult CFDE_XMLSyntaxParser::DoSyntaxParse() { } } m_pCurrentBlock[m_iIndexInBlock++] = ch; - m_iDataLength++; + m_BlockBuffer.IncrementDataLength(); m_Start++; } break; @@ -325,7 +324,7 @@ FDE_XmlSyntaxResult CFDE_XMLSyntaxParser::DoSyntaxParse() { m_syntaxParserResult = FDE_XmlSyntaxResult::Error; return m_syntaxParserResult; } - m_iTextDataLength = m_iDataLength; + m_iTextDataLength = m_BlockBuffer.GetDataLength(); m_wQuotationMark = 0; m_BlockBuffer.Reset(true); std::tie(m_pCurrentBlock, m_iIndexInBlock) = @@ -347,10 +346,10 @@ FDE_XmlSyntaxResult CFDE_XMLSyntaxParser::DoSyntaxParse() { } } m_pCurrentBlock[m_iIndexInBlock++] = ch; - m_iDataLength++; + m_BlockBuffer.IncrementDataLength(); m_syntaxParserState = FDE_XmlSyntaxState::TargetData; - } else if (m_iDataLength > 0) { - m_iTextDataLength = m_iDataLength; + } else if (!m_BlockBuffer.IsEmpty()) { + m_iTextDataLength = m_BlockBuffer.GetDataLength(); m_BlockBuffer.Reset(true); std::tie(m_pCurrentBlock, m_iIndexInBlock) = m_BlockBuffer.GetAvailableBlock(); @@ -389,7 +388,7 @@ FDE_XmlSyntaxResult CFDE_XMLSyntaxParser::DoSyntaxParse() { m_Start++; break; case FDE_XmlSyntaxState::CloseElement: - if (!IsXMLNameChar(ch, m_iDataLength < 1)) { + if (!IsXMLNameChar(ch, m_BlockBuffer.IsEmpty())) { if (ch == L'>') { if (m_XMLNodeStack.empty()) { m_syntaxParserResult = FDE_XmlSyntaxResult::Error; @@ -403,7 +402,7 @@ FDE_XmlSyntaxResult CFDE_XMLSyntaxParser::DoSyntaxParse() { m_CurNode.eNodeType = FDE_XMLNODE_Unknown; } m_iCurrentNodeNum = m_CurNode.iNodeNum; - m_iTextDataLength = m_iDataLength; + m_iTextDataLength = m_BlockBuffer.GetDataLength(); m_BlockBuffer.Reset(true); std::tie(m_pCurrentBlock, m_iIndexInBlock) = m_BlockBuffer.GetAvailableBlock(); @@ -422,7 +421,7 @@ FDE_XmlSyntaxResult CFDE_XMLSyntaxParser::DoSyntaxParse() { } } m_pCurrentBlock[m_iIndexInBlock++] = ch; - m_iDataLength++; + m_BlockBuffer.IncrementDataLength(); } m_Start++; break; @@ -444,7 +443,7 @@ FDE_XmlSyntaxResult CFDE_XMLSyntaxParser::DoSyntaxParse() { if (FXSYS_wcsnicmp(m_Buffer.data() + m_Start, L"]]>", 3) == 0) { m_Start += 3; syntaxParserResult = FDE_XmlSyntaxResult::CData; - m_iTextDataLength = m_iDataLength; + m_iTextDataLength = m_BlockBuffer.GetDataLength(); m_BlockBuffer.Reset(true); std::tie(m_pCurrentBlock, m_iIndexInBlock) = m_BlockBuffer.GetAvailableBlock(); @@ -457,7 +456,7 @@ FDE_XmlSyntaxResult CFDE_XMLSyntaxParser::DoSyntaxParse() { return FDE_XmlSyntaxResult::Error; } m_pCurrentBlock[m_iIndexInBlock++] = ch; - m_iDataLength++; + m_BlockBuffer.IncrementDataLength(); m_Start++; } break; @@ -499,10 +498,10 @@ FDE_XmlSyntaxResult CFDE_XMLSyntaxParser::DoSyntaxParse() { if (ch == m_SkipChar) { m_SkipStack.pop(); if (m_SkipStack.empty()) { - if (m_iDataLength >= 9) + if (m_BlockBuffer.GetDataLength() >= 9) (void)m_BlockBuffer.GetTextData(0, 7); - m_iTextDataLength = m_iDataLength; + m_iTextDataLength = m_BlockBuffer.GetDataLength(); m_BlockBuffer.Reset(true); std::tie(m_pCurrentBlock, m_iIndexInBlock) = m_BlockBuffer.GetAvailableBlock(); @@ -522,7 +521,7 @@ FDE_XmlSyntaxResult CFDE_XMLSyntaxParser::DoSyntaxParse() { } } m_pCurrentBlock[m_iIndexInBlock++] = ch; - m_iDataLength++; + m_BlockBuffer.IncrementDataLength(); } m_Start++; } @@ -537,11 +536,11 @@ FDE_XmlSyntaxResult CFDE_XMLSyntaxParser::DoSyntaxParse() { break; case FDE_XmlSyntaxState::TargetData: if (IsXMLWhiteSpace(ch)) { - if (m_iDataLength < 1) { + if (m_BlockBuffer.IsEmpty()) { m_Start++; break; } else if (m_wQuotationMark == 0) { - m_iTextDataLength = m_iDataLength; + m_iTextDataLength = m_BlockBuffer.GetDataLength(); m_wQuotationMark = 0; m_BlockBuffer.Reset(true); std::tie(m_pCurrentBlock, m_iIndexInBlock) = @@ -559,7 +558,7 @@ FDE_XmlSyntaxResult CFDE_XMLSyntaxParser::DoSyntaxParse() { m_wQuotationMark = ch; m_Start++; } else if (ch == m_wQuotationMark) { - m_iTextDataLength = m_iDataLength; + m_iTextDataLength = m_BlockBuffer.GetDataLength(); m_wQuotationMark = 0; m_BlockBuffer.Reset(true); std::tie(m_pCurrentBlock, m_iIndexInBlock) = @@ -579,7 +578,7 @@ FDE_XmlSyntaxResult CFDE_XMLSyntaxParser::DoSyntaxParse() { } } m_pCurrentBlock[m_iIndexInBlock++] = ch; - m_iDataLength++; + m_BlockBuffer.IncrementDataLength(); m_Start++; } break; @@ -626,10 +625,11 @@ void CFDE_XMLSyntaxParser::ParseTextChar(wchar_t character) { } m_pCurrentBlock[m_iIndexInBlock++] = character; - m_iDataLength++; + m_BlockBuffer.IncrementDataLength(); if (m_iEntityStart > -1 && character == L';') { CFX_WideString csEntity = m_BlockBuffer.GetTextData( - m_iEntityStart + 1, (m_iDataLength - 1) - m_iEntityStart - 1); + m_iEntityStart + 1, + m_BlockBuffer.GetDataLength() - 1 - m_iEntityStart - 1); int32_t iLen = csEntity.GetLength(); if (iLen > 0) { if (csEntity[0] == L'#') { @@ -683,12 +683,16 @@ void CFDE_XMLSyntaxParser::ParseTextChar(wchar_t character) { } } } - m_BlockBuffer.DeleteTextChars(m_iDataLength - m_iEntityStart); + if (m_iEntityStart > 0 && + m_BlockBuffer.GetDataLength() > static_cast(m_iEntityStart)) { + m_BlockBuffer.DeleteTextChars(m_BlockBuffer.GetDataLength() - + m_iEntityStart); + } std::tie(m_pCurrentBlock, m_iIndexInBlock) = m_BlockBuffer.GetAvailableBlock(); m_iEntityStart = -1; } else if (m_iEntityStart < 0 && character == L'&') { - m_iEntityStart = m_iDataLength - 1; + m_iEntityStart = m_BlockBuffer.GetDataLength() - 1; } m_Start++; } diff --git a/xfa/fde/xml/cfde_xmlsyntaxparser.h b/xfa/fde/xml/cfde_xmlsyntaxparser.h index 6fa310476e..9f1274fa83 100644 --- a/xfa/fde/xml/cfde_xmlsyntaxparser.h +++ b/xfa/fde/xml/cfde_xmlsyntaxparser.h @@ -113,7 +113,6 @@ class CFDE_XMLSyntaxParser { std::stack m_XMLNodeStack; CFX_BlockBuffer m_BlockBuffer; int32_t m_iAllocStep; - int32_t& m_iDataLength; wchar_t* m_pCurrentBlock; // Pointer into CFX_BlockBuffer int32_t m_iIndexInBlock; int32_t m_iTextDataLength; -- cgit v1.2.3