diff options
Diffstat (limited to 'core/fxcrt/xml/cfx_saxreader.cpp')
-rw-r--r-- | core/fxcrt/xml/cfx_saxreader.cpp | 744 |
1 files changed, 0 insertions, 744 deletions
diff --git a/core/fxcrt/xml/cfx_saxreader.cpp b/core/fxcrt/xml/cfx_saxreader.cpp deleted file mode 100644 index 762144f4bb..0000000000 --- a/core/fxcrt/xml/cfx_saxreader.cpp +++ /dev/null @@ -1,744 +0,0 @@ -// Copyright 2014 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#include "core/fxcrt/xml/cfx_saxreader.h" - -#include <algorithm> -#include <utility> - -#include "core/fxcrt/fx_stream.h" -#include "core/fxcrt/xml/cfx_saxreaderhandler.h" -#include "third_party/base/ptr_util.h" -#include "third_party/base/stl_util.h" - -enum class CFX_SaxMode { - Text = 0, - NodeStart, - DeclOrComment, - DeclNode, - Comment, - CommentContent, - TagName, - TagAttributeName, - TagAttributeEqual, - TagAttributeValue, - TagMaybeClose, - TagClose, - TagEnd, - TargetData, -}; - -class CFX_SAXCommentContext { - public: - CFX_SAXCommentContext() : m_iHeaderCount(0), m_iTailCount(0) {} - int32_t m_iHeaderCount; - int32_t m_iTailCount; -}; - -namespace { - -const uint32_t kSaxFileBufSize = 32768; - -} // namespace - -CFX_SAXFile::CFX_SAXFile() - : m_dwStart(0), - m_dwEnd(0), - m_dwCur(0), - m_pBuf(nullptr), - m_dwBufSize(0), - m_dwBufIndex(0) {} - -CFX_SAXFile::~CFX_SAXFile() {} - -bool CFX_SAXFile::StartFile(const RetainPtr<IFX_SeekableReadStream>& pFile, - uint32_t dwStart, - uint32_t dwLen) { - ASSERT(!m_pFile && pFile); - uint32_t dwSize = pFile->GetSize(); - if (dwStart >= dwSize) - return false; - - if (dwLen == static_cast<uint32_t>(-1) || dwStart + dwLen > dwSize) - dwLen = dwSize - dwStart; - - if (dwLen == 0) - return false; - - m_dwBufSize = std::min(dwLen, kSaxFileBufSize); - m_pBuf = FX_Alloc(uint8_t, m_dwBufSize); - if (!pFile->ReadBlock(m_pBuf, dwStart, m_dwBufSize)) - return false; - - m_dwStart = dwStart; - m_dwEnd = dwStart + dwLen; - m_dwCur = dwStart; - m_pFile = pFile; - m_dwBufIndex = 0; - return true; -} - -bool CFX_SAXFile::ReadNextBlock() { - ASSERT(m_pFile); - uint32_t dwSize = m_dwEnd - m_dwCur; - if (dwSize == 0) { - return false; - } - m_dwBufSize = std::min(dwSize, kSaxFileBufSize); - if (!m_pFile->ReadBlock(m_pBuf, m_dwCur, m_dwBufSize)) { - return false; - } - m_dwBufIndex = 0; - return true; -} - -void CFX_SAXFile::Reset() { - if (m_pBuf) { - FX_Free(m_pBuf); - m_pBuf = nullptr; - } - m_pFile = nullptr; -} - -CFX_SAXReader::CFX_SAXReader() - : m_File(), - m_pHandler(nullptr), - m_iState(-1), - m_dwItemID(0), - m_dwParseMode(0) { - m_Data.reserve(256); - m_Name.reserve(256); -} - -CFX_SAXReader::~CFX_SAXReader() { - Reset(); -} - -void CFX_SAXReader::Reset() { - m_File.Reset(); - m_iState = -1; - m_Stack = std::stack<std::unique_ptr<CFX_SAXItem>>(); - m_dwItemID = 0; - m_SkipStack = std::stack<char>(); - m_SkipChar = 0; - m_pCommentContext.reset(); - ClearData(); - ClearName(); -} - -void CFX_SAXReader::Push() { - std::unique_ptr<CFX_SAXItem> pNew = - pdfium::MakeUnique<CFX_SAXItem>(++m_dwItemID); - if (!m_Stack.empty()) - pNew->m_bSkip = m_Stack.top()->m_bSkip; - m_Stack.push(std::move(pNew)); -} - -void CFX_SAXReader::Pop() { - if (!m_Stack.empty()) - m_Stack.pop(); -} - -CFX_SAXItem* CFX_SAXReader::GetCurrentItem() const { - return m_Stack.empty() ? nullptr : m_Stack.top().get(); -} - -void CFX_SAXReader::ClearData() { - m_Data.clear(); - m_iEntityStart = -1; -} - -void CFX_SAXReader::ClearName() { - m_Name.clear(); -} - -void CFX_SAXReader::AppendToData(uint8_t ch) { - m_Data.push_back(ch); -} - -void CFX_SAXReader::AppendToName(uint8_t ch) { - m_Name.push_back(ch); -} - -void CFX_SAXReader::BackUpAndReplaceDataAt(int32_t index, uint8_t ch) { - ASSERT(index > -1); - m_Data.erase(m_Data.begin() + index, m_Data.end()); - AppendToData(ch); -} - -int32_t CFX_SAXReader::CurrentDataIndex() const { - return pdfium::CollectionSize<int32_t>(m_Data) - 1; -} - -bool CFX_SAXReader::IsEntityStart(uint8_t ch) const { - return m_iEntityStart == -1 && ch == '&'; -} - -bool CFX_SAXReader::IsEntityEnd(uint8_t ch) const { - return m_iEntityStart != -1 && ch == ';'; -} - -bool CFX_SAXReader::SkipSpace(uint8_t ch) { - return (m_dwParseMode & CFX_SaxParseMode_NotSkipSpace) == 0 && ch < 0x21; -} - -int32_t CFX_SAXReader::StartParse( - const RetainPtr<IFX_SeekableReadStream>& pFile, - uint32_t dwStart, - uint32_t dwLen, - uint32_t dwParseMode) { - Reset(); - if (!m_File.StartFile(pFile, dwStart, dwLen)) - return -1; - - m_iState = 0; - m_eMode = CFX_SaxMode::Text; - m_ePrevMode = CFX_SaxMode::Text; - m_bCharData = false; - m_dwDataOffset = 0; - m_dwParseMode = dwParseMode; - m_Stack.push(pdfium::MakeUnique<CFX_SAXItem>(++m_dwItemID)); - return 0; -} - -int32_t CFX_SAXReader::ContinueParse() { - if (m_iState < 0 || m_iState > 99) - return m_iState; - - while (m_File.m_dwCur < m_File.m_dwEnd) { - uint32_t& index = m_File.m_dwBufIndex; - uint32_t size = m_File.m_dwBufSize; - const uint8_t* pBuf = m_File.m_pBuf; - while (index < size) { - m_CurByte = pBuf[index]; - ParseInternal(); - index++; - } - m_File.m_dwCur += index; - m_iState = (m_File.m_dwCur - m_File.m_dwStart) * 100 / - (m_File.m_dwEnd - m_File.m_dwStart); - if (m_File.m_dwCur >= m_File.m_dwEnd) - break; - if (!m_File.ReadNextBlock()) { - m_iState = -2; - break; - } - m_dwDataOffset = 0; - } - return m_iState; -} - -void CFX_SAXReader::ParseInternal() { - switch (m_eMode) { - case CFX_SaxMode::Text: - ParseText(); - break; - case CFX_SaxMode::NodeStart: - ParseNodeStart(); - break; - case CFX_SaxMode::DeclOrComment: - ParseDeclOrComment(); - break; - case CFX_SaxMode::DeclNode: - ParseDeclNode(); - break; - case CFX_SaxMode::Comment: - ParseComment(); - break; - case CFX_SaxMode::CommentContent: - ParseCommentContent(); - break; - case CFX_SaxMode::TagName: - ParseTagName(); - break; - case CFX_SaxMode::TagAttributeName: - ParseTagAttributeName(); - break; - case CFX_SaxMode::TagAttributeEqual: - ParseTagAttributeEqual(); - break; - case CFX_SaxMode::TagAttributeValue: - ParseTagAttributeValue(); - break; - case CFX_SaxMode::TagMaybeClose: - ParseMaybeClose(); - break; - case CFX_SaxMode::TagClose: - ParseTagClose(); - break; - case CFX_SaxMode::TagEnd: - ParseTagEnd(); - break; - case CFX_SaxMode::TargetData: - ParseTargetData(); - break; - } -} - -void CFX_SAXReader::ParseChar(uint8_t ch) { - AppendToData(ch); - if (IsEntityStart(ch)) { - m_iEntityStart = CurrentDataIndex(); - return; - } - if (!IsEntityEnd(ch)) - return; - - // No matter what, we're no longer in an entity. - ASSERT(m_iEntityStart > -1); - int32_t iSaveStart = m_iEntityStart; - m_iEntityStart = -1; - - // NOTE: Relies on negative lengths being treated as empty strings. - ByteString csEntity(m_Data.data() + iSaveStart + 1, - CurrentDataIndex() - iSaveStart - 1); - int32_t iLen = csEntity.GetLength(); - if (iLen == 0) - return; - - if (csEntity[0] == '#') { - if ((m_dwParseMode & CFX_SaxParseMode_NotConvert_sharp) == 0) { - ch = 0; - uint8_t w; - if (iLen > 1 && csEntity[1] == 'x') { - for (int32_t i = 2; i < iLen; i++) { - w = csEntity[i]; - if (w >= '0' && w <= '9') - ch = (ch << 4) + w - '0'; - else if (w >= 'A' && w <= 'F') - ch = (ch << 4) + w - 55; - else if (w >= 'a' && w <= 'f') - ch = (ch << 4) + w - 87; - else - break; - } - } else { - for (int32_t i = 1; i < iLen; i++) { - w = csEntity[i]; - if (w < '0' || w > '9') - break; - ch = ch * 10 + w - '0'; - } - } - if (ch != 0) - BackUpAndReplaceDataAt(iSaveStart, ch); - } - return; - } - if (csEntity == "amp") { - if ((m_dwParseMode & CFX_SaxParseMode_NotConvert_amp) == 0) - BackUpAndReplaceDataAt(iSaveStart, '&'); - return; - } - if (csEntity == "lt") { - if ((m_dwParseMode & CFX_SaxParseMode_NotConvert_lt) == 0) - BackUpAndReplaceDataAt(iSaveStart, '<'); - return; - } - if (csEntity == "gt") { - if ((m_dwParseMode & CFX_SaxParseMode_NotConvert_gt) == 0) - BackUpAndReplaceDataAt(iSaveStart, '>'); - return; - } - if (csEntity == "apos") { - if ((m_dwParseMode & CFX_SaxParseMode_NotConvert_apos) == 0) - BackUpAndReplaceDataAt(iSaveStart, '\''); - return; - } - if (csEntity == "quot") { - if ((m_dwParseMode & CFX_SaxParseMode_NotConvert_quot) == 0) - BackUpAndReplaceDataAt(iSaveStart, '\"'); - return; - } -} - -void CFX_SAXReader::ParseText() { - if (m_CurByte == '<') { - if (!m_Data.empty()) { - NotifyData(); - ClearData(); - } - Push(); - m_dwNodePos = m_File.m_dwCur + m_File.m_dwBufIndex; - m_eMode = CFX_SaxMode::NodeStart; - return; - } - if (m_Data.empty() && SkipSpace(m_CurByte)) - return; - - ParseChar(m_CurByte); -} - -void CFX_SAXReader::ParseNodeStart() { - if (m_CurByte == '?') { - GetCurrentItem()->m_eNode = CFX_SAXItem::Type::Instruction; - m_eMode = CFX_SaxMode::TagName; - return; - } - if (m_CurByte == '!') { - m_eMode = CFX_SaxMode::DeclOrComment; - return; - } - if (m_CurByte == '/') { - m_eMode = CFX_SaxMode::TagEnd; - return; - } - if (m_CurByte == '>') { - Pop(); - m_eMode = CFX_SaxMode::Text; - return; - } - if (m_CurByte > 0x20) { - m_dwDataOffset = m_File.m_dwBufIndex; - GetCurrentItem()->m_eNode = CFX_SAXItem::Type::Tag; - m_eMode = CFX_SaxMode::TagName; - AppendToData(m_CurByte); - } -} - -void CFX_SAXReader::ParseDeclOrComment() { - if (m_CurByte == '-') { - m_eMode = CFX_SaxMode::Comment; - GetCurrentItem()->m_eNode = CFX_SAXItem::Type::Comment; - if (!m_pCommentContext) - m_pCommentContext = pdfium::MakeUnique<CFX_SAXCommentContext>(); - m_pCommentContext->m_iHeaderCount = 1; - m_pCommentContext->m_iTailCount = 0; - return; - } - m_eMode = CFX_SaxMode::DeclNode; - m_dwDataOffset = m_File.m_dwBufIndex; - m_SkipChar = '>'; - m_SkipStack.push('>'); - SkipNode(); -} - -void CFX_SAXReader::ParseComment() { - m_pCommentContext->m_iHeaderCount = 2; - m_dwNodePos = m_File.m_dwCur + m_File.m_dwBufIndex; - m_eMode = CFX_SaxMode::CommentContent; -} - -void CFX_SAXReader::ParseCommentContent() { - if (m_CurByte == '-') { - m_pCommentContext->m_iTailCount++; - return; - } - if (m_CurByte == '>' && m_pCommentContext->m_iTailCount == 2) { - NotifyTargetData(); - ClearData(); - Pop(); - m_eMode = CFX_SaxMode::Text; - return; - } - while (m_pCommentContext->m_iTailCount > 0) { - AppendToData('-'); - m_pCommentContext->m_iTailCount--; - } - AppendToData(m_CurByte); -} - -void CFX_SAXReader::ParseDeclNode() { - SkipNode(); -} - -void CFX_SAXReader::ParseTagName() { - if (m_CurByte < 0x21 || m_CurByte == '/' || m_CurByte == '>' || - m_CurByte == '?') { - NotifyEnter(); - ClearData(); - if (m_CurByte < 0x21) { - ClearName(); - m_eMode = CFX_SaxMode::TagAttributeName; - } else if (m_CurByte == '/' || m_CurByte == '?') { - m_ePrevMode = m_eMode; - m_eMode = CFX_SaxMode::TagMaybeClose; - } else { - NotifyBreak(); - m_eMode = CFX_SaxMode::Text; - } - } else { - AppendToData(m_CurByte); - } -} - -void CFX_SAXReader::ParseTagAttributeName() { - if (m_CurByte < 0x21 || m_CurByte == '=') { - if (m_Name.empty() && m_CurByte < 0x21) - return; - - m_SkipChar = 0; - m_eMode = m_CurByte == '=' ? CFX_SaxMode::TagAttributeValue - : CFX_SaxMode::TagAttributeEqual; - ClearData(); - return; - } - if (m_CurByte == '/' || m_CurByte == '>' || m_CurByte == '?') { - if (m_CurByte == '/' || m_CurByte == '?') { - m_ePrevMode = m_eMode; - m_eMode = CFX_SaxMode::TagMaybeClose; - } else { - NotifyBreak(); - m_eMode = CFX_SaxMode::Text; - } - return; - } - if (m_Name.empty()) - m_dwDataOffset = m_File.m_dwBufIndex; - AppendToName(m_CurByte); -} - -void CFX_SAXReader::ParseTagAttributeEqual() { - if (m_CurByte == '=') { - m_SkipChar = 0; - m_eMode = CFX_SaxMode::TagAttributeValue; - return; - } - if (GetCurrentItem()->m_eNode == CFX_SAXItem::Type::Instruction) { - AppendToName(0x20); - m_eMode = CFX_SaxMode::TargetData; - ParseTargetData(); - } -} - -void CFX_SAXReader::ParseTagAttributeValue() { - if (m_SkipChar) { - if (m_SkipChar == m_CurByte) { - NotifyAttribute(); - ClearData(); - ClearName(); - m_SkipChar = 0; - m_eMode = CFX_SaxMode::TagAttributeName; - return; - } - ParseChar(m_CurByte); - return; - } - if (m_CurByte < 0x21) { - return; - } - if (m_Data.empty()) { - if (m_CurByte == '\'' || m_CurByte == '\"') - m_SkipChar = m_CurByte; - } -} - -void CFX_SAXReader::ParseMaybeClose() { - if (m_CurByte == '>') { - if (GetCurrentItem()->m_eNode == CFX_SAXItem::Type::Instruction) { - NotifyTargetData(); - ClearData(); - ClearName(); - } - ParseTagClose(); - m_eMode = CFX_SaxMode::Text; - } else if (m_ePrevMode == CFX_SaxMode::TagName) { - AppendToData('/'); - m_eMode = CFX_SaxMode::TagName; - m_ePrevMode = CFX_SaxMode::Text; - ParseTagName(); - } else if (m_ePrevMode == CFX_SaxMode::TagAttributeName) { - AppendToName('/'); - m_eMode = CFX_SaxMode::TagAttributeName; - m_ePrevMode = CFX_SaxMode::Text; - ParseTagAttributeName(); - } else if (m_ePrevMode == CFX_SaxMode::TargetData) { - AppendToName('?'); - m_eMode = CFX_SaxMode::TargetData; - m_ePrevMode = CFX_SaxMode::Text; - ParseTargetData(); - } -} -void CFX_SAXReader::ParseTagClose() { - m_dwNodePos = m_File.m_dwCur + m_File.m_dwBufIndex; - NotifyClose(); - Pop(); -} -void CFX_SAXReader::ParseTagEnd() { - if (m_CurByte < 0x21) { - return; - } - if (m_CurByte == '>') { - Pop(); - m_dwNodePos = m_File.m_dwCur + m_File.m_dwBufIndex; - NotifyEnd(); - ClearData(); - Pop(); - m_eMode = CFX_SaxMode::Text; - } else { - ParseChar(m_CurByte); - } -} -void CFX_SAXReader::ParseTargetData() { - if (m_CurByte == '?') { - m_ePrevMode = m_eMode; - m_eMode = CFX_SaxMode::TagMaybeClose; - } else { - AppendToName(m_CurByte); - } -} -void CFX_SAXReader::SkipNode() { - if (m_SkipChar == '\'' || m_SkipChar == '\"') { - if (m_CurByte != m_SkipChar) - return; - - ASSERT(!m_SkipStack.empty()); - m_SkipStack.pop(); - m_SkipChar = !m_SkipStack.empty() ? m_SkipStack.top() : 0; - return; - } - switch (m_CurByte) { - case '<': - m_SkipChar = '>'; - m_SkipStack.push('>'); - break; - case '[': - m_SkipChar = ']'; - m_SkipStack.push(']'); - break; - case '(': - m_SkipChar = ')'; - m_SkipStack.push(')'); - break; - case '\'': - m_SkipChar = '\''; - m_SkipStack.push('\''); - break; - case '\"': - m_SkipChar = '\"'; - m_SkipStack.push('\"'); - break; - default: - if (m_CurByte == m_SkipChar) { - m_SkipStack.pop(); - m_SkipChar = !m_SkipStack.empty() ? m_SkipStack.top() : 0; - if (m_SkipStack.empty() && m_CurByte == '>') { - if (m_Data.size() >= 9 && memcmp(m_Data.data(), "[CDATA[", 7) == 0 && - memcmp(m_Data.data() + m_Data.size() - 2, "]]", 2) == 0) { - Pop(); - m_Data.erase(m_Data.begin(), m_Data.begin() + 7); - m_Data.erase(m_Data.end() - 2, m_Data.end()); - m_bCharData = true; - NotifyData(); - m_bCharData = false; - } else { - Pop(); - } - ClearData(); - m_eMode = CFX_SaxMode::Text; - } - } - break; - } - if (!m_SkipStack.empty()) - ParseChar(m_CurByte); -} - -void CFX_SAXReader::NotifyData() { - if (!m_pHandler) - return; - - CFX_SAXItem* pItem = GetCurrentItem(); - if (!pItem) - return; - - if (pItem->m_eNode == CFX_SAXItem::Type::Tag) - m_pHandler->OnTagData( - pItem->m_pNode, - m_bCharData ? CFX_SAXItem::Type::CharData : CFX_SAXItem::Type::Text, - ByteStringView(m_Data), m_File.m_dwCur + m_dwDataOffset); -} - -void CFX_SAXReader::NotifyEnter() { - if (!m_pHandler) - return; - - CFX_SAXItem* pItem = GetCurrentItem(); - if (!pItem) - return; - - if (pItem->m_eNode == CFX_SAXItem::Type::Tag || - pItem->m_eNode == CFX_SAXItem::Type::Instruction) { - pItem->m_pNode = m_pHandler->OnTagEnter(ByteStringView(m_Data), - pItem->m_eNode, m_dwNodePos); - } -} - -void CFX_SAXReader::NotifyAttribute() { - if (!m_pHandler) - return; - - CFX_SAXItem* pItem = GetCurrentItem(); - if (!pItem) - return; - - if (pItem->m_eNode == CFX_SAXItem::Type::Tag || - pItem->m_eNode == CFX_SAXItem::Type::Instruction) { - m_pHandler->OnTagAttribute(pItem->m_pNode, ByteStringView(m_Name), - ByteStringView(m_Data)); - } -} - -void CFX_SAXReader::NotifyBreak() { - if (!m_pHandler) - return; - - CFX_SAXItem* pItem = GetCurrentItem(); - if (!pItem) - return; - - if (pItem->m_eNode == CFX_SAXItem::Type::Tag) - m_pHandler->OnTagBreak(pItem->m_pNode); -} - -void CFX_SAXReader::NotifyClose() { - if (!m_pHandler) - return; - - CFX_SAXItem* pItem = GetCurrentItem(); - if (!pItem) - return; - - if (pItem->m_eNode == CFX_SAXItem::Type::Tag || - pItem->m_eNode == CFX_SAXItem::Type::Instruction) { - m_pHandler->OnTagClose(pItem->m_pNode, m_dwNodePos); - } -} - -void CFX_SAXReader::NotifyEnd() { - if (!m_pHandler) - return; - - CFX_SAXItem* pItem = GetCurrentItem(); - if (!pItem) - return; - - if (pItem->m_eNode == CFX_SAXItem::Type::Tag) - m_pHandler->OnTagEnd(pItem->m_pNode, ByteStringView(m_Data), m_dwNodePos); -} - -void CFX_SAXReader::NotifyTargetData() { - if (!m_pHandler) - return; - - CFX_SAXItem* pItem = GetCurrentItem(); - if (!pItem) - return; - - if (pItem->m_eNode == CFX_SAXItem::Type::Instruction) { - m_pHandler->OnTargetData(pItem->m_pNode, pItem->m_eNode, - ByteStringView(m_Name), m_dwNodePos); - } else if (pItem->m_eNode == CFX_SAXItem::Type::Comment) { - m_pHandler->OnTargetData(pItem->m_pNode, pItem->m_eNode, - ByteStringView(m_Data), m_dwNodePos); - } -} - -void CFX_SAXReader::SkipCurrentNode() { - CFX_SAXItem* pItem = GetCurrentItem(); - if (pItem) - pItem->m_bSkip = true; -} |