diff options
Diffstat (limited to 'core')
-rw-r--r-- | core/fxcrt/cfx_checksumcontext.cpp | 152 | ||||
-rw-r--r-- | core/fxcrt/cfx_checksumcontext.h | 34 | ||||
-rw-r--r-- | core/fxcrt/xml/cfx_saxcontext.cpp | 9 | ||||
-rw-r--r-- | core/fxcrt/xml/cfx_saxcontext.h | 25 | ||||
-rw-r--r-- | core/fxcrt/xml/cfx_saxreader.cpp | 744 | ||||
-rw-r--r-- | core/fxcrt/xml/cfx_saxreader.h | 174 | ||||
-rw-r--r-- | core/fxcrt/xml/cfx_saxreader_unittest.cpp | 152 | ||||
-rw-r--r-- | core/fxcrt/xml/cfx_saxreaderhandler.cpp | 128 | ||||
-rw-r--r-- | core/fxcrt/xml/cfx_saxreaderhandler.h | 48 | ||||
-rw-r--r-- | core/fxcrt/xml/cfx_xmlparser.cpp | 30 | ||||
-rw-r--r-- | core/fxcrt/xml/cfx_xmlparser.h | 6 |
11 files changed, 1 insertions, 1501 deletions
diff --git a/core/fxcrt/cfx_checksumcontext.cpp b/core/fxcrt/cfx_checksumcontext.cpp deleted file mode 100644 index 3d3409dceb..0000000000 --- a/core/fxcrt/cfx_checksumcontext.cpp +++ /dev/null @@ -1,152 +0,0 @@ -// Copyright 2014 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#include "core/fxcrt/cfx_checksumcontext.h" - -#include "core/fdrm/crypto/fx_crypt.h" -#include "core/fxcrt/fx_stream.h" -#include "core/fxcrt/xml/cfx_saxreaderhandler.h" -#include "third_party/base/ptr_util.h" - -namespace { - -struct FX_BASE64DATA { - uint32_t data1 : 2; - uint32_t data2 : 6; - uint32_t data3 : 4; - uint32_t data4 : 4; - uint32_t data5 : 6; - uint32_t data6 : 2; - uint32_t data7 : 8; -}; - -const char g_FXBase64EncoderMap[64] = { - 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', - 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', - 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', - 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', - '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/', -}; - -void Base64EncodePiece(const FX_BASE64DATA& src, int32_t iBytes, char dst[4]) { - dst[0] = g_FXBase64EncoderMap[src.data2]; - uint32_t b = src.data1 << 4; - if (iBytes > 1) { - b |= src.data4; - } - dst[1] = g_FXBase64EncoderMap[b]; - if (iBytes > 1) { - b = src.data3 << 2; - if (iBytes > 2) { - b |= src.data6; - } - dst[2] = g_FXBase64EncoderMap[b]; - if (iBytes > 2) { - dst[3] = g_FXBase64EncoderMap[src.data5]; - } else { - dst[3] = '='; - } - } else { - dst[2] = dst[3] = '='; - } -} - -int32_t Base64EncodeA(const uint8_t* pSrc, int32_t iSrcLen, char* pDst) { - ASSERT(pSrc); - if (iSrcLen < 1) { - return 0; - } - if (!pDst) { - int32_t iDstLen = iSrcLen / 3 * 4; - if ((iSrcLen % 3) != 0) { - iDstLen += 4; - } - return iDstLen; - } - FX_BASE64DATA srcData; - int32_t iBytes = 3; - char* pDstEnd = pDst; - while (iSrcLen > 0) { - if (iSrcLen > 2) { - ((uint8_t*)&srcData)[0] = *pSrc++; - ((uint8_t*)&srcData)[1] = *pSrc++; - ((uint8_t*)&srcData)[2] = *pSrc++; - iSrcLen -= 3; - } else { - *((uint32_t*)&srcData) = 0; - ((uint8_t*)&srcData)[0] = *pSrc++; - if (iSrcLen > 1) { - ((uint8_t*)&srcData)[1] = *pSrc++; - } - iBytes = iSrcLen; - iSrcLen = 0; - } - Base64EncodePiece(srcData, iBytes, pDstEnd); - pDstEnd += 4; - } - return pDstEnd - pDst; -} - -} // namespace - -CFX_ChecksumContext::CFX_ChecksumContext() {} - -CFX_ChecksumContext::~CFX_ChecksumContext() {} - -void CFX_ChecksumContext::StartChecksum() { - FinishChecksum(); - m_pByteContext = pdfium::MakeUnique<CRYPT_sha1_context>(); - CRYPT_SHA1Start(m_pByteContext.get()); - m_bsChecksum.clear(); - m_pSAXReader = pdfium::MakeUnique<CFX_SAXReader>(); -} - -bool CFX_ChecksumContext::UpdateChecksum( - const RetainPtr<IFX_SeekableReadStream>& pSrcFile, - FX_FILESIZE offset, - size_t size) { - if (!m_pSAXReader || !pSrcFile) - return false; - - if (size < 1) - size = pSrcFile->GetSize(); - - CFX_SAXReaderHandler handler(this); - m_pSAXReader->SetHandler(&handler); - if (m_pSAXReader->StartParse( - pSrcFile, (uint32_t)offset, (uint32_t)size, - CFX_SaxParseMode_NotSkipSpace | CFX_SaxParseMode_NotConvert_amp | - CFX_SaxParseMode_NotConvert_lt | CFX_SaxParseMode_NotConvert_gt | - CFX_SaxParseMode_NotConvert_sharp) < 0) { - return false; - } - return m_pSAXReader->ContinueParse() > 99; -} - -void CFX_ChecksumContext::FinishChecksum() { - m_pSAXReader.reset(); - if (m_pByteContext) { - uint8_t digest[20]; - memset(digest, 0, 20); - CRYPT_SHA1Finish(m_pByteContext.get(), digest); - int32_t nLen = Base64EncodeA(digest, 20, nullptr); - char* pBuffer = m_bsChecksum.GetBuffer(nLen); - Base64EncodeA(digest, 20, pBuffer); - m_bsChecksum.ReleaseBuffer(nLen); - m_pByteContext.reset(); - } -} - -ByteString CFX_ChecksumContext::GetChecksum() const { - return m_bsChecksum; -} - -void CFX_ChecksumContext::Update(const ByteStringView& bsText) { - if (!m_pByteContext) - return; - - CRYPT_SHA1Update(m_pByteContext.get(), bsText.raw_str(), bsText.GetLength()); -} diff --git a/core/fxcrt/cfx_checksumcontext.h b/core/fxcrt/cfx_checksumcontext.h deleted file mode 100644 index 6d7963ad93..0000000000 --- a/core/fxcrt/cfx_checksumcontext.h +++ /dev/null @@ -1,34 +0,0 @@ -// Copyright 2014 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#ifndef CORE_FXCRT_CFX_CHECKSUMCONTEXT_H_ -#define CORE_FXCRT_CFX_CHECKSUMCONTEXT_H_ - -#include <memory> - -#include "core/fdrm/crypto/fx_crypt.h" -#include "core/fxcrt/xml/cfx_saxreader.h" - -class CFX_ChecksumContext { - public: - CFX_ChecksumContext(); - ~CFX_ChecksumContext(); - - void StartChecksum(); - void Update(const ByteStringView& bsText); - bool UpdateChecksum(const RetainPtr<IFX_SeekableReadStream>& pSrcFile, - FX_FILESIZE offset = 0, - size_t size = 0); - void FinishChecksum(); - ByteString GetChecksum() const; - - private: - std::unique_ptr<CFX_SAXReader> m_pSAXReader; - std::unique_ptr<CRYPT_sha1_context> m_pByteContext; - ByteString m_bsChecksum; -}; - -#endif // CORE_FXCRT_CFX_CHECKSUMCONTEXT_H_ diff --git a/core/fxcrt/xml/cfx_saxcontext.cpp b/core/fxcrt/xml/cfx_saxcontext.cpp deleted file mode 100644 index 4e2f0c58c9..0000000000 --- a/core/fxcrt/xml/cfx_saxcontext.cpp +++ /dev/null @@ -1,9 +0,0 @@ -// Copyright 2017 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "core/fxcrt/xml/cfx_saxcontext.h" - -CFX_SAXContext::CFX_SAXContext() : m_eNode(CFX_SAXItem::Type::Unknown) {} - -CFX_SAXContext::~CFX_SAXContext() {} diff --git a/core/fxcrt/xml/cfx_saxcontext.h b/core/fxcrt/xml/cfx_saxcontext.h deleted file mode 100644 index d4d74a385b..0000000000 --- a/core/fxcrt/xml/cfx_saxcontext.h +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright 2017 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#ifndef CORE_FXCRT_XML_CFX_SAXCONTEXT_H_ -#define CORE_FXCRT_XML_CFX_SAXCONTEXT_H_ - -#include <sstream> - -#include "core/fxcrt/fx_string.h" -#include "core/fxcrt/xml/cfx_saxreader.h" - -class CFX_SAXContext { - public: - CFX_SAXContext(); - ~CFX_SAXContext(); - - std::ostringstream m_TextBuf; - ByteString m_bsTagName; - CFX_SAXItem::Type m_eNode; -}; - -#endif // CORE_FXCRT_XML_CFX_SAXCONTEXT_H_ diff --git a/core/fxcrt/xml/cfx_saxreader.cpp b/core/fxcrt/xml/cfx_saxreader.cpp deleted file mode 100644 index 762144f4bb..0000000000 --- a/core/fxcrt/xml/cfx_saxreader.cpp +++ /dev/null @@ -1,744 +0,0 @@ -// Copyright 2014 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#include "core/fxcrt/xml/cfx_saxreader.h" - -#include <algorithm> -#include <utility> - -#include "core/fxcrt/fx_stream.h" -#include "core/fxcrt/xml/cfx_saxreaderhandler.h" -#include "third_party/base/ptr_util.h" -#include "third_party/base/stl_util.h" - -enum class CFX_SaxMode { - Text = 0, - NodeStart, - DeclOrComment, - DeclNode, - Comment, - CommentContent, - TagName, - TagAttributeName, - TagAttributeEqual, - TagAttributeValue, - TagMaybeClose, - TagClose, - TagEnd, - TargetData, -}; - -class CFX_SAXCommentContext { - public: - CFX_SAXCommentContext() : m_iHeaderCount(0), m_iTailCount(0) {} - int32_t m_iHeaderCount; - int32_t m_iTailCount; -}; - -namespace { - -const uint32_t kSaxFileBufSize = 32768; - -} // namespace - -CFX_SAXFile::CFX_SAXFile() - : m_dwStart(0), - m_dwEnd(0), - m_dwCur(0), - m_pBuf(nullptr), - m_dwBufSize(0), - m_dwBufIndex(0) {} - -CFX_SAXFile::~CFX_SAXFile() {} - -bool CFX_SAXFile::StartFile(const RetainPtr<IFX_SeekableReadStream>& pFile, - uint32_t dwStart, - uint32_t dwLen) { - ASSERT(!m_pFile && pFile); - uint32_t dwSize = pFile->GetSize(); - if (dwStart >= dwSize) - return false; - - if (dwLen == static_cast<uint32_t>(-1) || dwStart + dwLen > dwSize) - dwLen = dwSize - dwStart; - - if (dwLen == 0) - return false; - - m_dwBufSize = std::min(dwLen, kSaxFileBufSize); - m_pBuf = FX_Alloc(uint8_t, m_dwBufSize); - if (!pFile->ReadBlock(m_pBuf, dwStart, m_dwBufSize)) - return false; - - m_dwStart = dwStart; - m_dwEnd = dwStart + dwLen; - m_dwCur = dwStart; - m_pFile = pFile; - m_dwBufIndex = 0; - return true; -} - -bool CFX_SAXFile::ReadNextBlock() { - ASSERT(m_pFile); - uint32_t dwSize = m_dwEnd - m_dwCur; - if (dwSize == 0) { - return false; - } - m_dwBufSize = std::min(dwSize, kSaxFileBufSize); - if (!m_pFile->ReadBlock(m_pBuf, m_dwCur, m_dwBufSize)) { - return false; - } - m_dwBufIndex = 0; - return true; -} - -void CFX_SAXFile::Reset() { - if (m_pBuf) { - FX_Free(m_pBuf); - m_pBuf = nullptr; - } - m_pFile = nullptr; -} - -CFX_SAXReader::CFX_SAXReader() - : m_File(), - m_pHandler(nullptr), - m_iState(-1), - m_dwItemID(0), - m_dwParseMode(0) { - m_Data.reserve(256); - m_Name.reserve(256); -} - -CFX_SAXReader::~CFX_SAXReader() { - Reset(); -} - -void CFX_SAXReader::Reset() { - m_File.Reset(); - m_iState = -1; - m_Stack = std::stack<std::unique_ptr<CFX_SAXItem>>(); - m_dwItemID = 0; - m_SkipStack = std::stack<char>(); - m_SkipChar = 0; - m_pCommentContext.reset(); - ClearData(); - ClearName(); -} - -void CFX_SAXReader::Push() { - std::unique_ptr<CFX_SAXItem> pNew = - pdfium::MakeUnique<CFX_SAXItem>(++m_dwItemID); - if (!m_Stack.empty()) - pNew->m_bSkip = m_Stack.top()->m_bSkip; - m_Stack.push(std::move(pNew)); -} - -void CFX_SAXReader::Pop() { - if (!m_Stack.empty()) - m_Stack.pop(); -} - -CFX_SAXItem* CFX_SAXReader::GetCurrentItem() const { - return m_Stack.empty() ? nullptr : m_Stack.top().get(); -} - -void CFX_SAXReader::ClearData() { - m_Data.clear(); - m_iEntityStart = -1; -} - -void CFX_SAXReader::ClearName() { - m_Name.clear(); -} - -void CFX_SAXReader::AppendToData(uint8_t ch) { - m_Data.push_back(ch); -} - -void CFX_SAXReader::AppendToName(uint8_t ch) { - m_Name.push_back(ch); -} - -void CFX_SAXReader::BackUpAndReplaceDataAt(int32_t index, uint8_t ch) { - ASSERT(index > -1); - m_Data.erase(m_Data.begin() + index, m_Data.end()); - AppendToData(ch); -} - -int32_t CFX_SAXReader::CurrentDataIndex() const { - return pdfium::CollectionSize<int32_t>(m_Data) - 1; -} - -bool CFX_SAXReader::IsEntityStart(uint8_t ch) const { - return m_iEntityStart == -1 && ch == '&'; -} - -bool CFX_SAXReader::IsEntityEnd(uint8_t ch) const { - return m_iEntityStart != -1 && ch == ';'; -} - -bool CFX_SAXReader::SkipSpace(uint8_t ch) { - return (m_dwParseMode & CFX_SaxParseMode_NotSkipSpace) == 0 && ch < 0x21; -} - -int32_t CFX_SAXReader::StartParse( - const RetainPtr<IFX_SeekableReadStream>& pFile, - uint32_t dwStart, - uint32_t dwLen, - uint32_t dwParseMode) { - Reset(); - if (!m_File.StartFile(pFile, dwStart, dwLen)) - return -1; - - m_iState = 0; - m_eMode = CFX_SaxMode::Text; - m_ePrevMode = CFX_SaxMode::Text; - m_bCharData = false; - m_dwDataOffset = 0; - m_dwParseMode = dwParseMode; - m_Stack.push(pdfium::MakeUnique<CFX_SAXItem>(++m_dwItemID)); - return 0; -} - -int32_t CFX_SAXReader::ContinueParse() { - if (m_iState < 0 || m_iState > 99) - return m_iState; - - while (m_File.m_dwCur < m_File.m_dwEnd) { - uint32_t& index = m_File.m_dwBufIndex; - uint32_t size = m_File.m_dwBufSize; - const uint8_t* pBuf = m_File.m_pBuf; - while (index < size) { - m_CurByte = pBuf[index]; - ParseInternal(); - index++; - } - m_File.m_dwCur += index; - m_iState = (m_File.m_dwCur - m_File.m_dwStart) * 100 / - (m_File.m_dwEnd - m_File.m_dwStart); - if (m_File.m_dwCur >= m_File.m_dwEnd) - break; - if (!m_File.ReadNextBlock()) { - m_iState = -2; - break; - } - m_dwDataOffset = 0; - } - return m_iState; -} - -void CFX_SAXReader::ParseInternal() { - switch (m_eMode) { - case CFX_SaxMode::Text: - ParseText(); - break; - case CFX_SaxMode::NodeStart: - ParseNodeStart(); - break; - case CFX_SaxMode::DeclOrComment: - ParseDeclOrComment(); - break; - case CFX_SaxMode::DeclNode: - ParseDeclNode(); - break; - case CFX_SaxMode::Comment: - ParseComment(); - break; - case CFX_SaxMode::CommentContent: - ParseCommentContent(); - break; - case CFX_SaxMode::TagName: - ParseTagName(); - break; - case CFX_SaxMode::TagAttributeName: - ParseTagAttributeName(); - break; - case CFX_SaxMode::TagAttributeEqual: - ParseTagAttributeEqual(); - break; - case CFX_SaxMode::TagAttributeValue: - ParseTagAttributeValue(); - break; - case CFX_SaxMode::TagMaybeClose: - ParseMaybeClose(); - break; - case CFX_SaxMode::TagClose: - ParseTagClose(); - break; - case CFX_SaxMode::TagEnd: - ParseTagEnd(); - break; - case CFX_SaxMode::TargetData: - ParseTargetData(); - break; - } -} - -void CFX_SAXReader::ParseChar(uint8_t ch) { - AppendToData(ch); - if (IsEntityStart(ch)) { - m_iEntityStart = CurrentDataIndex(); - return; - } - if (!IsEntityEnd(ch)) - return; - - // No matter what, we're no longer in an entity. - ASSERT(m_iEntityStart > -1); - int32_t iSaveStart = m_iEntityStart; - m_iEntityStart = -1; - - // NOTE: Relies on negative lengths being treated as empty strings. - ByteString csEntity(m_Data.data() + iSaveStart + 1, - CurrentDataIndex() - iSaveStart - 1); - int32_t iLen = csEntity.GetLength(); - if (iLen == 0) - return; - - if (csEntity[0] == '#') { - if ((m_dwParseMode & CFX_SaxParseMode_NotConvert_sharp) == 0) { - ch = 0; - uint8_t w; - if (iLen > 1 && csEntity[1] == 'x') { - for (int32_t i = 2; i < iLen; i++) { - w = csEntity[i]; - if (w >= '0' && w <= '9') - ch = (ch << 4) + w - '0'; - else if (w >= 'A' && w <= 'F') - ch = (ch << 4) + w - 55; - else if (w >= 'a' && w <= 'f') - ch = (ch << 4) + w - 87; - else - break; - } - } else { - for (int32_t i = 1; i < iLen; i++) { - w = csEntity[i]; - if (w < '0' || w > '9') - break; - ch = ch * 10 + w - '0'; - } - } - if (ch != 0) - BackUpAndReplaceDataAt(iSaveStart, ch); - } - return; - } - if (csEntity == "amp") { - if ((m_dwParseMode & CFX_SaxParseMode_NotConvert_amp) == 0) - BackUpAndReplaceDataAt(iSaveStart, '&'); - return; - } - if (csEntity == "lt") { - if ((m_dwParseMode & CFX_SaxParseMode_NotConvert_lt) == 0) - BackUpAndReplaceDataAt(iSaveStart, '<'); - return; - } - if (csEntity == "gt") { - if ((m_dwParseMode & CFX_SaxParseMode_NotConvert_gt) == 0) - BackUpAndReplaceDataAt(iSaveStart, '>'); - return; - } - if (csEntity == "apos") { - if ((m_dwParseMode & CFX_SaxParseMode_NotConvert_apos) == 0) - BackUpAndReplaceDataAt(iSaveStart, '\''); - return; - } - if (csEntity == "quot") { - if ((m_dwParseMode & CFX_SaxParseMode_NotConvert_quot) == 0) - BackUpAndReplaceDataAt(iSaveStart, '\"'); - return; - } -} - -void CFX_SAXReader::ParseText() { - if (m_CurByte == '<') { - if (!m_Data.empty()) { - NotifyData(); - ClearData(); - } - Push(); - m_dwNodePos = m_File.m_dwCur + m_File.m_dwBufIndex; - m_eMode = CFX_SaxMode::NodeStart; - return; - } - if (m_Data.empty() && SkipSpace(m_CurByte)) - return; - - ParseChar(m_CurByte); -} - -void CFX_SAXReader::ParseNodeStart() { - if (m_CurByte == '?') { - GetCurrentItem()->m_eNode = CFX_SAXItem::Type::Instruction; - m_eMode = CFX_SaxMode::TagName; - return; - } - if (m_CurByte == '!') { - m_eMode = CFX_SaxMode::DeclOrComment; - return; - } - if (m_CurByte == '/') { - m_eMode = CFX_SaxMode::TagEnd; - return; - } - if (m_CurByte == '>') { - Pop(); - m_eMode = CFX_SaxMode::Text; - return; - } - if (m_CurByte > 0x20) { - m_dwDataOffset = m_File.m_dwBufIndex; - GetCurrentItem()->m_eNode = CFX_SAXItem::Type::Tag; - m_eMode = CFX_SaxMode::TagName; - AppendToData(m_CurByte); - } -} - -void CFX_SAXReader::ParseDeclOrComment() { - if (m_CurByte == '-') { - m_eMode = CFX_SaxMode::Comment; - GetCurrentItem()->m_eNode = CFX_SAXItem::Type::Comment; - if (!m_pCommentContext) - m_pCommentContext = pdfium::MakeUnique<CFX_SAXCommentContext>(); - m_pCommentContext->m_iHeaderCount = 1; - m_pCommentContext->m_iTailCount = 0; - return; - } - m_eMode = CFX_SaxMode::DeclNode; - m_dwDataOffset = m_File.m_dwBufIndex; - m_SkipChar = '>'; - m_SkipStack.push('>'); - SkipNode(); -} - -void CFX_SAXReader::ParseComment() { - m_pCommentContext->m_iHeaderCount = 2; - m_dwNodePos = m_File.m_dwCur + m_File.m_dwBufIndex; - m_eMode = CFX_SaxMode::CommentContent; -} - -void CFX_SAXReader::ParseCommentContent() { - if (m_CurByte == '-') { - m_pCommentContext->m_iTailCount++; - return; - } - if (m_CurByte == '>' && m_pCommentContext->m_iTailCount == 2) { - NotifyTargetData(); - ClearData(); - Pop(); - m_eMode = CFX_SaxMode::Text; - return; - } - while (m_pCommentContext->m_iTailCount > 0) { - AppendToData('-'); - m_pCommentContext->m_iTailCount--; - } - AppendToData(m_CurByte); -} - -void CFX_SAXReader::ParseDeclNode() { - SkipNode(); -} - -void CFX_SAXReader::ParseTagName() { - if (m_CurByte < 0x21 || m_CurByte == '/' || m_CurByte == '>' || - m_CurByte == '?') { - NotifyEnter(); - ClearData(); - if (m_CurByte < 0x21) { - ClearName(); - m_eMode = CFX_SaxMode::TagAttributeName; - } else if (m_CurByte == '/' || m_CurByte == '?') { - m_ePrevMode = m_eMode; - m_eMode = CFX_SaxMode::TagMaybeClose; - } else { - NotifyBreak(); - m_eMode = CFX_SaxMode::Text; - } - } else { - AppendToData(m_CurByte); - } -} - -void CFX_SAXReader::ParseTagAttributeName() { - if (m_CurByte < 0x21 || m_CurByte == '=') { - if (m_Name.empty() && m_CurByte < 0x21) - return; - - m_SkipChar = 0; - m_eMode = m_CurByte == '=' ? CFX_SaxMode::TagAttributeValue - : CFX_SaxMode::TagAttributeEqual; - ClearData(); - return; - } - if (m_CurByte == '/' || m_CurByte == '>' || m_CurByte == '?') { - if (m_CurByte == '/' || m_CurByte == '?') { - m_ePrevMode = m_eMode; - m_eMode = CFX_SaxMode::TagMaybeClose; - } else { - NotifyBreak(); - m_eMode = CFX_SaxMode::Text; - } - return; - } - if (m_Name.empty()) - m_dwDataOffset = m_File.m_dwBufIndex; - AppendToName(m_CurByte); -} - -void CFX_SAXReader::ParseTagAttributeEqual() { - if (m_CurByte == '=') { - m_SkipChar = 0; - m_eMode = CFX_SaxMode::TagAttributeValue; - return; - } - if (GetCurrentItem()->m_eNode == CFX_SAXItem::Type::Instruction) { - AppendToName(0x20); - m_eMode = CFX_SaxMode::TargetData; - ParseTargetData(); - } -} - -void CFX_SAXReader::ParseTagAttributeValue() { - if (m_SkipChar) { - if (m_SkipChar == m_CurByte) { - NotifyAttribute(); - ClearData(); - ClearName(); - m_SkipChar = 0; - m_eMode = CFX_SaxMode::TagAttributeName; - return; - } - ParseChar(m_CurByte); - return; - } - if (m_CurByte < 0x21) { - return; - } - if (m_Data.empty()) { - if (m_CurByte == '\'' || m_CurByte == '\"') - m_SkipChar = m_CurByte; - } -} - -void CFX_SAXReader::ParseMaybeClose() { - if (m_CurByte == '>') { - if (GetCurrentItem()->m_eNode == CFX_SAXItem::Type::Instruction) { - NotifyTargetData(); - ClearData(); - ClearName(); - } - ParseTagClose(); - m_eMode = CFX_SaxMode::Text; - } else if (m_ePrevMode == CFX_SaxMode::TagName) { - AppendToData('/'); - m_eMode = CFX_SaxMode::TagName; - m_ePrevMode = CFX_SaxMode::Text; - ParseTagName(); - } else if (m_ePrevMode == CFX_SaxMode::TagAttributeName) { - AppendToName('/'); - m_eMode = CFX_SaxMode::TagAttributeName; - m_ePrevMode = CFX_SaxMode::Text; - ParseTagAttributeName(); - } else if (m_ePrevMode == CFX_SaxMode::TargetData) { - AppendToName('?'); - m_eMode = CFX_SaxMode::TargetData; - m_ePrevMode = CFX_SaxMode::Text; - ParseTargetData(); - } -} -void CFX_SAXReader::ParseTagClose() { - m_dwNodePos = m_File.m_dwCur + m_File.m_dwBufIndex; - NotifyClose(); - Pop(); -} -void CFX_SAXReader::ParseTagEnd() { - if (m_CurByte < 0x21) { - return; - } - if (m_CurByte == '>') { - Pop(); - m_dwNodePos = m_File.m_dwCur + m_File.m_dwBufIndex; - NotifyEnd(); - ClearData(); - Pop(); - m_eMode = CFX_SaxMode::Text; - } else { - ParseChar(m_CurByte); - } -} -void CFX_SAXReader::ParseTargetData() { - if (m_CurByte == '?') { - m_ePrevMode = m_eMode; - m_eMode = CFX_SaxMode::TagMaybeClose; - } else { - AppendToName(m_CurByte); - } -} -void CFX_SAXReader::SkipNode() { - if (m_SkipChar == '\'' || m_SkipChar == '\"') { - if (m_CurByte != m_SkipChar) - return; - - ASSERT(!m_SkipStack.empty()); - m_SkipStack.pop(); - m_SkipChar = !m_SkipStack.empty() ? m_SkipStack.top() : 0; - return; - } - switch (m_CurByte) { - case '<': - m_SkipChar = '>'; - m_SkipStack.push('>'); - break; - case '[': - m_SkipChar = ']'; - m_SkipStack.push(']'); - break; - case '(': - m_SkipChar = ')'; - m_SkipStack.push(')'); - break; - case '\'': - m_SkipChar = '\''; - m_SkipStack.push('\''); - break; - case '\"': - m_SkipChar = '\"'; - m_SkipStack.push('\"'); - break; - default: - if (m_CurByte == m_SkipChar) { - m_SkipStack.pop(); - m_SkipChar = !m_SkipStack.empty() ? m_SkipStack.top() : 0; - if (m_SkipStack.empty() && m_CurByte == '>') { - if (m_Data.size() >= 9 && memcmp(m_Data.data(), "[CDATA[", 7) == 0 && - memcmp(m_Data.data() + m_Data.size() - 2, "]]", 2) == 0) { - Pop(); - m_Data.erase(m_Data.begin(), m_Data.begin() + 7); - m_Data.erase(m_Data.end() - 2, m_Data.end()); - m_bCharData = true; - NotifyData(); - m_bCharData = false; - } else { - Pop(); - } - ClearData(); - m_eMode = CFX_SaxMode::Text; - } - } - break; - } - if (!m_SkipStack.empty()) - ParseChar(m_CurByte); -} - -void CFX_SAXReader::NotifyData() { - if (!m_pHandler) - return; - - CFX_SAXItem* pItem = GetCurrentItem(); - if (!pItem) - return; - - if (pItem->m_eNode == CFX_SAXItem::Type::Tag) - m_pHandler->OnTagData( - pItem->m_pNode, - m_bCharData ? CFX_SAXItem::Type::CharData : CFX_SAXItem::Type::Text, - ByteStringView(m_Data), m_File.m_dwCur + m_dwDataOffset); -} - -void CFX_SAXReader::NotifyEnter() { - if (!m_pHandler) - return; - - CFX_SAXItem* pItem = GetCurrentItem(); - if (!pItem) - return; - - if (pItem->m_eNode == CFX_SAXItem::Type::Tag || - pItem->m_eNode == CFX_SAXItem::Type::Instruction) { - pItem->m_pNode = m_pHandler->OnTagEnter(ByteStringView(m_Data), - pItem->m_eNode, m_dwNodePos); - } -} - -void CFX_SAXReader::NotifyAttribute() { - if (!m_pHandler) - return; - - CFX_SAXItem* pItem = GetCurrentItem(); - if (!pItem) - return; - - if (pItem->m_eNode == CFX_SAXItem::Type::Tag || - pItem->m_eNode == CFX_SAXItem::Type::Instruction) { - m_pHandler->OnTagAttribute(pItem->m_pNode, ByteStringView(m_Name), - ByteStringView(m_Data)); - } -} - -void CFX_SAXReader::NotifyBreak() { - if (!m_pHandler) - return; - - CFX_SAXItem* pItem = GetCurrentItem(); - if (!pItem) - return; - - if (pItem->m_eNode == CFX_SAXItem::Type::Tag) - m_pHandler->OnTagBreak(pItem->m_pNode); -} - -void CFX_SAXReader::NotifyClose() { - if (!m_pHandler) - return; - - CFX_SAXItem* pItem = GetCurrentItem(); - if (!pItem) - return; - - if (pItem->m_eNode == CFX_SAXItem::Type::Tag || - pItem->m_eNode == CFX_SAXItem::Type::Instruction) { - m_pHandler->OnTagClose(pItem->m_pNode, m_dwNodePos); - } -} - -void CFX_SAXReader::NotifyEnd() { - if (!m_pHandler) - return; - - CFX_SAXItem* pItem = GetCurrentItem(); - if (!pItem) - return; - - if (pItem->m_eNode == CFX_SAXItem::Type::Tag) - m_pHandler->OnTagEnd(pItem->m_pNode, ByteStringView(m_Data), m_dwNodePos); -} - -void CFX_SAXReader::NotifyTargetData() { - if (!m_pHandler) - return; - - CFX_SAXItem* pItem = GetCurrentItem(); - if (!pItem) - return; - - if (pItem->m_eNode == CFX_SAXItem::Type::Instruction) { - m_pHandler->OnTargetData(pItem->m_pNode, pItem->m_eNode, - ByteStringView(m_Name), m_dwNodePos); - } else if (pItem->m_eNode == CFX_SAXItem::Type::Comment) { - m_pHandler->OnTargetData(pItem->m_pNode, pItem->m_eNode, - ByteStringView(m_Data), m_dwNodePos); - } -} - -void CFX_SAXReader::SkipCurrentNode() { - CFX_SAXItem* pItem = GetCurrentItem(); - if (pItem) - pItem->m_bSkip = true; -} diff --git a/core/fxcrt/xml/cfx_saxreader.h b/core/fxcrt/xml/cfx_saxreader.h deleted file mode 100644 index 9ff755d053..0000000000 --- a/core/fxcrt/xml/cfx_saxreader.h +++ /dev/null @@ -1,174 +0,0 @@ -// Copyright 2014 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#ifndef CORE_FXCRT_XML_CFX_SAXREADER_H_ -#define CORE_FXCRT_XML_CFX_SAXREADER_H_ - -#include <memory> -#include <stack> -#include <vector> - -#include "core/fxcrt/fx_string.h" -#include "core/fxcrt/retain_ptr.h" - -class CFX_SAXCommentContext; -class CFX_SAXContext; -class IFX_SeekableReadStream; -enum class CFX_SaxMode; - -class CFX_SAXItem { - public: - enum class Type { - Unknown = 0, - Instruction, - Declaration, - Comment, - Tag, - Text, - CharData, - }; - - explicit CFX_SAXItem(uint32_t id) - : m_pNode(nullptr), m_eNode(Type::Unknown), m_dwID(id), m_bSkip(false) {} - - CFX_SAXContext* m_pNode; - Type m_eNode; - const uint32_t m_dwID; - bool m_bSkip; -}; - -class CFX_SAXFile { - public: - CFX_SAXFile(); - ~CFX_SAXFile(); - - bool StartFile(const RetainPtr<IFX_SeekableReadStream>& pFile, - uint32_t dwStart, - uint32_t dwLen); - bool ReadNextBlock(); - void Reset(); - - RetainPtr<IFX_SeekableReadStream> m_pFile; - uint32_t m_dwStart; - uint32_t m_dwEnd; - uint32_t m_dwCur; - uint8_t* m_pBuf; - uint32_t m_dwBufSize; - uint32_t m_dwBufIndex; -}; - -enum CFX_SaxParseMode { - CFX_SaxParseMode_NotConvert_amp = 1 << 0, - CFX_SaxParseMode_NotConvert_lt = 1 << 1, - CFX_SaxParseMode_NotConvert_gt = 1 << 2, - CFX_SaxParseMode_NotConvert_apos = 1 << 3, - CFX_SaxParseMode_NotConvert_quot = 1 << 4, - CFX_SaxParseMode_NotConvert_sharp = 1 << 5, - CFX_SaxParseMode_NotSkipSpace = 1 << 6 -}; - -class CFX_SAXReader { - public: - class HandlerIface { - public: - virtual ~HandlerIface() {} - virtual CFX_SAXContext* OnTagEnter(const ByteStringView& bsTagName, - CFX_SAXItem::Type eType, - uint32_t dwStartPos) = 0; - virtual void OnTagAttribute(CFX_SAXContext* pTag, - const ByteStringView& bsAttri, - const ByteStringView& bsValue) = 0; - virtual void OnTagBreak(CFX_SAXContext* pTag) = 0; - virtual void OnTagData(CFX_SAXContext* pTag, - CFX_SAXItem::Type eType, - const ByteStringView& bsData, - uint32_t dwStartPos) = 0; - virtual void OnTagClose(CFX_SAXContext* pTag, uint32_t dwEndPos) = 0; - virtual void OnTagEnd(CFX_SAXContext* pTag, - const ByteStringView& bsTagName, - uint32_t dwEndPos) = 0; - virtual void OnTargetData(CFX_SAXContext* pTag, - CFX_SAXItem::Type eType, - const ByteStringView& bsData, - uint32_t dwStartPos) = 0; - }; - - CFX_SAXReader(); - ~CFX_SAXReader(); - - int32_t StartParse(const RetainPtr<IFX_SeekableReadStream>& pFile, - uint32_t dwStart = 0, - uint32_t dwLen = -1, - uint32_t dwParseMode = 0); - int32_t ContinueParse(); - void SetHandler(HandlerIface* pHandler) { m_pHandler = pHandler; } - - private: - void ParseInternal(); - void SkipCurrentNode(); - void AppendData(uint8_t ch); - void AppendName(uint8_t ch); - void ParseText(); - void ParseNodeStart(); - void ParseInstruction(); - void ParseDeclOrComment(); - void ParseDeclNode(); - void ParseComment(); - void ParseCommentContent(); - void ParseTagName(); - void ParseTagAttributeName(); - void ParseTagAttributeEqual(); - void ParseTagAttributeValue(); - void ParseMaybeClose(); - void ParseTagClose(); - void ParseTagEnd(); - void ParseTargetData(); - void Reset(); - void ClearData(); - void ClearName(); - void AppendToData(uint8_t ch); - void AppendToName(uint8_t ch); - void BackUpAndReplaceDataAt(int32_t index, uint8_t ch); - bool IsEntityStart(uint8_t ch) const; - bool IsEntityEnd(uint8_t ch) const; - int32_t CurrentDataIndex() const; - void Push(); - void Pop(); - CFX_SAXItem* GetCurrentItem() const; - bool SkipSpace(uint8_t ch); - void SkipNode(); - void NotifyData(); - void NotifyEnter(); - void NotifyAttribute(); - void NotifyBreak(); - void NotifyClose(); - void NotifyEnd(); - void NotifyTargetData(); - void ReallocDataBuffer(); - void ReallocNameBuffer(); - void ParseChar(uint8_t ch); - - CFX_SAXFile m_File; - HandlerIface* m_pHandler; - int32_t m_iState; - std::stack<std::unique_ptr<CFX_SAXItem>> m_Stack; - uint32_t m_dwItemID; - CFX_SaxMode m_eMode; - CFX_SaxMode m_ePrevMode; - bool m_bCharData; - uint8_t m_CurByte; - uint32_t m_dwDataOffset; - std::stack<char> m_SkipStack; - uint8_t m_SkipChar; - uint32_t m_dwNodePos; - std::vector<uint8_t> m_Data; - int32_t m_iEntityStart; // Index into m_Data. - std::vector<uint8_t> m_Name; - uint32_t m_dwParseMode; - std::unique_ptr<CFX_SAXCommentContext> m_pCommentContext; -}; - -#endif // CORE_FXCRT_XML_CFX_SAXREADER_H_ diff --git a/core/fxcrt/xml/cfx_saxreader_unittest.cpp b/core/fxcrt/xml/cfx_saxreader_unittest.cpp deleted file mode 100644 index 13d06325a7..0000000000 --- a/core/fxcrt/xml/cfx_saxreader_unittest.cpp +++ /dev/null @@ -1,152 +0,0 @@ -// Copyright 2017 The PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "core/fxcrt/xml/cfx_saxreader.h" -#include "core/fxcrt/cfx_memorystream.h" -#include "testing/gmock/include/gmock/gmock.h" -#include "testing/gtest/include/gtest/gtest.h" -#include "testing/test_support.h" - -using testing::_; -using testing::Eq; -using testing::Return; - -namespace { - -class MockHandler : public CFX_SAXReader::HandlerIface { - public: - MOCK_METHOD3(OnTagEnter, - CFX_SAXContext*(const ByteStringView& bsTagName, - CFX_SAXItem::Type eType, - uint32_t dwStartPos)); - MOCK_METHOD3(OnTagAttribute, - void(CFX_SAXContext* pTag, - const ByteStringView& bsAttri, - const ByteStringView& bsValue)); - MOCK_METHOD1(OnTagBreak, void(CFX_SAXContext* pTag)); - MOCK_METHOD4(OnTagData, - void(CFX_SAXContext* pTag, - CFX_SAXItem::Type eType, - const ByteStringView& bsData, - uint32_t dwStartPos)); - MOCK_METHOD2(OnTagClose, void(CFX_SAXContext* pTag, uint32_t dwEndPos)); - MOCK_METHOD3(OnTagEnd, - void(CFX_SAXContext* pTag, - const ByteStringView& bsTagName, - uint32_t dwEndPos)); - MOCK_METHOD4(OnTargetData, - void(CFX_SAXContext* pTag, - CFX_SAXItem::Type eType, - const ByteStringView& bsData, - uint32_t dwStartPos)); -}; - -} // namespace - -class CFX_SAXReaderTest : public testing::Test { - public: - void SetHandler(CFX_SAXReader::HandlerIface* handler) { - reader_.SetHandler(handler); - } - - bool StartParse(char* str) { - return reader_.StartParse( - pdfium::MakeRetain<CFX_MemoryStream>( - reinterpret_cast<uint8_t*>(str), strlen(str), false), - 0, static_cast<uint32_t>(-1), - CFX_SaxParseMode_NotSkipSpace) >= 0; - } - - int32_t ContinueParse() { - int32_t ret; - do { - ret = reader_.ContinueParse(); - } while (ret >= 0 && ret < 100); - return ret; - } - - private: - CFX_SAXReader reader_; -}; - -TEST_F(CFX_SAXReaderTest, Null) { - char data[] = ""; - ASSERT_FALSE(StartParse(data)); -} - -TEST_F(CFX_SAXReaderTest, SimpleText) { - MockHandler mock; - SetHandler(&mock); - - char data[] = "clams"; - ASSERT_TRUE(StartParse(data)); - EXPECT_EQ(100, ContinueParse()); -} - -TEST_F(CFX_SAXReaderTest, SimpleTag) { - MockHandler mock; - EXPECT_CALL(mock, OnTagEnter(Eq("clams"), _, _)); - EXPECT_CALL(mock, OnTagBreak(_)); - SetHandler(&mock); - - char data[] = "<clams>"; - ASSERT_TRUE(StartParse(data)); - EXPECT_EQ(100, ContinueParse()); -} - -TEST_F(CFX_SAXReaderTest, AttributeTag) { - MockHandler mock; - EXPECT_CALL(mock, OnTagEnter(Eq("clams"), _, _)); - EXPECT_CALL(mock, OnTagAttribute(_, Eq("size"), Eq("small"))); - EXPECT_CALL(mock, OnTagAttribute(_, Eq("color"), Eq("red"))); - EXPECT_CALL(mock, OnTagBreak(_)); - SetHandler(&mock); - - char data[] = "<clams size='small' color='red'>"; - ASSERT_TRUE(StartParse(data)); - EXPECT_EQ(100, ContinueParse()); -} - -TEST_F(CFX_SAXReaderTest, AttributeEntityTag) { - MockHandler mock; - EXPECT_CALL(mock, OnTagEnter(Eq("clams"), _, _)); - EXPECT_CALL(mock, OnTagAttribute(_, Eq("predicate"), Eq("1 < 2"))); - EXPECT_CALL(mock, OnTagBreak(_)); - SetHandler(&mock); - - char data[] = "<clams predicate='1 < 2'>"; - ASSERT_TRUE(StartParse(data)); - EXPECT_EQ(100, ContinueParse()); -} - -TEST_F(CFX_SAXReaderTest, TextWithinTag) { - MockHandler mock; - EXPECT_CALL(mock, OnTagEnter(Eq("b"), _, _)); - EXPECT_CALL(mock, OnTagBreak(_)); - EXPECT_CALL(mock, OnTagData(_, _, Eq("biff"), _)); - EXPECT_CALL(mock, OnTagEnd(_, Eq("b"), _)); - SetHandler(&mock); - - char data[] = "<b>biff</b>"; - ASSERT_TRUE(StartParse(data)); - EXPECT_EQ(100, ContinueParse()); -} - -TEST_F(CFX_SAXReaderTest, bug_711459) { - char data[] = - "&a<tag " - "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" - "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" - "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" - "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" - "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" - "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" - "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" - "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" - "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" - "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" - ">x;"; - ASSERT_TRUE(StartParse(data)); - EXPECT_EQ(100, ContinueParse()); -} diff --git a/core/fxcrt/xml/cfx_saxreaderhandler.cpp b/core/fxcrt/xml/cfx_saxreaderhandler.cpp deleted file mode 100644 index d255ce924d..0000000000 --- a/core/fxcrt/xml/cfx_saxreaderhandler.cpp +++ /dev/null @@ -1,128 +0,0 @@ -// Copyright 2017 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#include "core/fxcrt/xml/cfx_saxreaderhandler.h" - -#include <string> - -#include "core/fxcrt/cfx_checksumcontext.h" - -CFX_SAXReaderHandler::CFX_SAXReaderHandler(CFX_ChecksumContext* pContext) - : m_pContext(pContext) { - ASSERT(m_pContext); -} - -CFX_SAXReaderHandler::~CFX_SAXReaderHandler() {} - -CFX_SAXContext* CFX_SAXReaderHandler::OnTagEnter( - const ByteStringView& bsTagName, - CFX_SAXItem::Type eType, - uint32_t dwStartPos) { - UpdateChecksum(true); - if (eType != CFX_SAXItem::Type::Tag && - eType != CFX_SAXItem::Type::Instruction) { - return nullptr; - } - - m_SAXContext.m_eNode = eType; - m_SAXContext.m_TextBuf << "<"; - if (eType == CFX_SAXItem::Type::Instruction) - m_SAXContext.m_TextBuf << "?"; - - m_SAXContext.m_TextBuf << bsTagName; - m_SAXContext.m_bsTagName = bsTagName; - return &m_SAXContext; -} - -void CFX_SAXReaderHandler::OnTagAttribute(CFX_SAXContext* pTag, - const ByteStringView& bsAttri, - const ByteStringView& bsValue) { - if (!pTag) - return; - pTag->m_TextBuf << " " << bsAttri << "=\"" << bsValue << "\""; -} - -void CFX_SAXReaderHandler::OnTagBreak(CFX_SAXContext* pTag) { - if (!pTag) - return; - - pTag->m_TextBuf << ">"; - UpdateChecksum(false); -} - -void CFX_SAXReaderHandler::OnTagData(CFX_SAXContext* pTag, - CFX_SAXItem::Type eType, - const ByteStringView& bsData, - uint32_t dwStartPos) { - if (!pTag) - return; - - if (eType == CFX_SAXItem::Type::CharData) - pTag->m_TextBuf << "<![CDATA["; - - pTag->m_TextBuf << bsData; - if (eType == CFX_SAXItem::Type::CharData) - pTag->m_TextBuf << "]]>"; -} - -void CFX_SAXReaderHandler::OnTagClose(CFX_SAXContext* pTag, uint32_t dwEndPos) { - if (!pTag) - return; - - if (pTag->m_eNode == CFX_SAXItem::Type::Instruction) - pTag->m_TextBuf << "?>"; - else if (pTag->m_eNode == CFX_SAXItem::Type::Tag) - pTag->m_TextBuf << "></" << pTag->m_bsTagName.AsStringView() << ">"; - - UpdateChecksum(false); -} - -void CFX_SAXReaderHandler::OnTagEnd(CFX_SAXContext* pTag, - const ByteStringView& bsTagName, - uint32_t dwEndPos) { - if (!pTag) - return; - - pTag->m_TextBuf << "</" << bsTagName << ">"; - UpdateChecksum(false); -} - -void CFX_SAXReaderHandler::OnTargetData(CFX_SAXContext* pTag, - CFX_SAXItem::Type eType, - const ByteStringView& bsData, - uint32_t dwStartPos) { - if (!pTag && eType != CFX_SAXItem::Type::Comment) - return; - - if (eType == CFX_SAXItem::Type::Comment) { - m_SAXContext.m_TextBuf << "<!--" << bsData << "-->"; - UpdateChecksum(false); - } else { - pTag->m_TextBuf << " " << bsData; - } -} - -void CFX_SAXReaderHandler::UpdateChecksum(bool bCheckSpace) { - int32_t iLength = m_SAXContext.m_TextBuf.tellp(); - if (iLength < 1) - return; - - std::string sBuffer = m_SAXContext.m_TextBuf.str(); - const uint8_t* pBuffer = reinterpret_cast<const uint8_t*>(sBuffer.c_str()); - bool bUpdata = true; - if (bCheckSpace) { - bUpdata = false; - for (int32_t i = 0; i < iLength; i++) { - bUpdata = (pBuffer[i] > 0x20); - if (bUpdata) - break; - } - } - if (bUpdata) - m_pContext->Update(ByteStringView(pBuffer, iLength)); - - m_SAXContext.m_TextBuf.str(""); -} diff --git a/core/fxcrt/xml/cfx_saxreaderhandler.h b/core/fxcrt/xml/cfx_saxreaderhandler.h deleted file mode 100644 index 263008f1ff..0000000000 --- a/core/fxcrt/xml/cfx_saxreaderhandler.h +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright 2017 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#ifndef CORE_FXCRT_XML_CFX_SAXREADERHANDLER_H_ -#define CORE_FXCRT_XML_CFX_SAXREADERHANDLER_H_ - -#include "core/fxcrt/fx_string.h" -#include "core/fxcrt/xml/cfx_saxcontext.h" -#include "core/fxcrt/xml/cfx_saxreader.h" - -class CFX_ChecksumContext; - -class CFX_SAXReaderHandler : public CFX_SAXReader::HandlerIface { - public: - explicit CFX_SAXReaderHandler(CFX_ChecksumContext* pContext); - ~CFX_SAXReaderHandler() override; - - CFX_SAXContext* OnTagEnter(const ByteStringView& bsTagName, - CFX_SAXItem::Type eType, - uint32_t dwStartPos) override; - void OnTagAttribute(CFX_SAXContext* pTag, - const ByteStringView& bsAttri, - const ByteStringView& bsValue) override; - void OnTagBreak(CFX_SAXContext* pTag) override; - void OnTagData(CFX_SAXContext* pTag, - CFX_SAXItem::Type eType, - const ByteStringView& bsData, - uint32_t dwStartPos) override; - void OnTagClose(CFX_SAXContext* pTag, uint32_t dwEndPos) override; - void OnTagEnd(CFX_SAXContext* pTag, - const ByteStringView& bsTagName, - uint32_t dwEndPos) override; - void OnTargetData(CFX_SAXContext* pTag, - CFX_SAXItem::Type eType, - const ByteStringView& bsData, - uint32_t dwStartPos) override; - - private: - void UpdateChecksum(bool bCheckSpace); - - CFX_ChecksumContext* m_pContext; - CFX_SAXContext m_SAXContext; -}; - -#endif // CORE_FXCRT_XML_CFX_SAXREADERHANDLER_H_ diff --git a/core/fxcrt/xml/cfx_xmlparser.cpp b/core/fxcrt/xml/cfx_xmlparser.cpp index c81c4082eb..05e52015bd 100644 --- a/core/fxcrt/xml/cfx_xmlparser.cpp +++ b/core/fxcrt/xml/cfx_xmlparser.cpp @@ -15,10 +15,7 @@ CFX_XMLParser::CFX_XMLParser(CFX_XMLNode* pParent, const RetainPtr<CFX_SeekableStreamProxy>& pStream) - : m_nElementStart(0), - m_dwCheckStatus(0), - m_dwCurrentCheckStatus(0), - m_pStream(pStream), + : m_pStream(pStream), m_pParser(pdfium::MakeUnique<CFX_XMLSyntaxParser>(m_pStream)), m_pParent(pParent), m_pChild(nullptr), @@ -51,9 +48,6 @@ int32_t CFX_XMLParser::DoParser() { m_pChild = m_pParent; break; case FX_XmlSyntaxResult::ElementOpen: - if (m_dwCheckStatus != 0x03 && m_NodeStack.size() == 2) - m_nElementStart = m_pParser->GetCurrentPos() - 1; - break; case FX_XmlSyntaxResult::ElementBreak: break; case FX_XmlSyntaxResult::ElementClose: @@ -73,12 +67,6 @@ int32_t CFX_XMLParser::DoParser() { m_syntaxParserResult = FX_XmlSyntaxResult::Error; break; } - if (m_dwCurrentCheckStatus != 0 && m_NodeStack.size() == 2) { - m_nSize[m_dwCurrentCheckStatus - 1] = - m_pParser->GetCurrentBinaryPos() - - m_nStart[m_dwCurrentCheckStatus - 1]; - m_dwCurrentCheckStatus = 0; - } m_pParent = m_NodeStack.top(); m_pChild = m_pParent; iCount++; @@ -99,22 +87,6 @@ int32_t CFX_XMLParser::DoParser() { m_pParent->AppendChild(m_pChild); m_NodeStack.push(m_pChild); m_pParent = m_pChild; - - if (m_dwCheckStatus != 0x03 && m_NodeStack.size() == 3) { - WideString wsTag = - static_cast<CFX_XMLElement*>(m_pChild)->GetLocalTagName(); - if (wsTag == L"template") { - m_dwCheckStatus |= 0x01; - m_dwCurrentCheckStatus = 0x01; - m_nStart[0] = m_pParser->GetCurrentBinaryPos() - - (m_pParser->GetCurrentPos() - m_nElementStart); - } else if (wsTag == L"datasets") { - m_dwCheckStatus |= 0x02; - m_dwCurrentCheckStatus = 0x02; - m_nStart[1] = m_pParser->GetCurrentBinaryPos() - - (m_pParser->GetCurrentPos() - m_nElementStart); - } - } break; case FX_XmlSyntaxResult::AttriName: m_ws1 = m_pParser->GetAttributeName(); diff --git a/core/fxcrt/xml/cfx_xmlparser.h b/core/fxcrt/xml/cfx_xmlparser.h index 0038f6d9b2..2998a44ad5 100644 --- a/core/fxcrt/xml/cfx_xmlparser.h +++ b/core/fxcrt/xml/cfx_xmlparser.h @@ -26,12 +26,6 @@ class CFX_XMLParser { int32_t DoParser(); - FX_FILESIZE m_nStart[2]; - size_t m_nSize[2]; - FX_FILESIZE m_nElementStart; - uint16_t m_dwCheckStatus; - uint16_t m_dwCurrentCheckStatus; - private: RetainPtr<CFX_SeekableStreamProxy> m_pStream; std::unique_ptr<CFX_XMLSyntaxParser> m_pParser; |