From 6c93df843774354fb7c34cb887583b5312c04415 Mon Sep 17 00:00:00 2001 From: dsinclair Date: Thu, 19 May 2016 18:13:53 -0700 Subject: Move fgas_sax into individual files in fde. This CL moves the fgas Sax parser into the fde/xml directory. This places the parse with the other XML parser in the system. Review-Url: https://codereview.chromium.org/1990003002 --- BUILD.gn | 4 +- xfa.gyp | 4 +- xfa/fde/xml/cfx_saxreader.cpp | 717 ++++++++++++++++++++++++++++++++++++++++ xfa/fde/xml/cfx_saxreader.h | 143 ++++++++ xfa/fgas/xml/fgas_sax.cpp | 688 -------------------------------------- xfa/fgas/xml/fgas_sax.h | 162 --------- xfa/fxfa/app/xfa_checksum.cpp | 29 +- xfa/fxfa/include/xfa_checksum.h | 12 +- 8 files changed, 885 insertions(+), 874 deletions(-) create mode 100644 xfa/fde/xml/cfx_saxreader.cpp create mode 100644 xfa/fde/xml/cfx_saxreader.h delete mode 100644 xfa/fgas/xml/fgas_sax.cpp delete mode 100644 xfa/fgas/xml/fgas_sax.h diff --git a/BUILD.gn b/BUILD.gn index c8f1c2d2d5..dbd827a7af 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -945,6 +945,8 @@ if (pdf_enable_xfa) { "xfa/fde/ifx_chariter.h", "xfa/fde/tto/fde_textout.cpp", "xfa/fde/tto/fde_textout.h", + "xfa/fde/xml/cfx_saxreader.cpp", + "xfa/fde/xml/cfx_saxreader.h", "xfa/fde/xml/fde_xml.h", "xfa/fde/xml/fde_xml_imp.cpp", "xfa/fde/xml/fde_xml_imp.h", @@ -980,8 +982,6 @@ if (pdf_enable_xfa) { "xfa/fgas/localization/fgas_locale.cpp", "xfa/fgas/localization/fgas_locale.h", "xfa/fgas/localization/fgas_localeimp.h", - "xfa/fgas/xml/fgas_sax.cpp", - "xfa/fgas/xml/fgas_sax.h", "xfa/fwl/basewidget/fwl_barcodeimp.cpp", "xfa/fwl/basewidget/fwl_barcodeimp.h", "xfa/fwl/basewidget/fwl_caretimp.cpp", diff --git a/xfa.gyp b/xfa.gyp index 186f68094e..7ef5791188 100644 --- a/xfa.gyp +++ b/xfa.gyp @@ -88,6 +88,8 @@ "xfa/fde/ifx_chariter.h", "xfa/fde/tto/fde_textout.cpp", "xfa/fde/tto/fde_textout.h", + "xfa/fde/xml/cfx_saxreader.cpp", + "xfa/fde/xml/cfx_saxreader.h", "xfa/fde/xml/fde_xml.h", "xfa/fde/xml/fde_xml_imp.cpp", "xfa/fde/xml/fde_xml_imp.h", @@ -123,8 +125,6 @@ "xfa/fgas/localization/fgas_locale.cpp", "xfa/fgas/localization/fgas_locale.h", "xfa/fgas/localization/fgas_localeimp.h", - "xfa/fgas/xml/fgas_sax.cpp", - "xfa/fgas/xml/fgas_sax.h", "xfa/fwl/basewidget/fwl_barcodeimp.cpp", "xfa/fwl/basewidget/fwl_barcodeimp.h", "xfa/fwl/basewidget/fwl_caretimp.cpp", diff --git a/xfa/fde/xml/cfx_saxreader.cpp b/xfa/fde/xml/cfx_saxreader.cpp new file mode 100644 index 0000000000..ec22c73541 --- /dev/null +++ b/xfa/fde/xml/cfx_saxreader.cpp @@ -0,0 +1,717 @@ +// Copyright 2014 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#include "xfa/fde/xml/cfx_saxreader.h" + +#include + +#include "xfa/fxfa/include/xfa_checksum.h" + +enum class CFX_SaxMode { + Text = 0, + NodeStart, + DeclOrComment, + DeclNode, + Comment, + CommentContent, + TagName, + TagAttributeName, + TagAttributeEqual, + TagAttributeValue, + TagMaybeClose, + TagClose, + TagEnd, + TargetData, + MAX +}; + +class CFX_SAXCommentContext { + public: + CFX_SAXCommentContext() : m_iHeaderCount(0), m_iTailCount(0) {} + int32_t m_iHeaderCount; + int32_t m_iTailCount; +}; + +namespace { + +const uint32_t kSaxFileBufSize = 32768; + +typedef void (CFX_SAXReader::*FX_SAXReader_LPFParse)(); +static const FX_SAXReader_LPFParse + g_FX_SAXReader_LPFParse[static_cast(CFX_SaxMode::MAX)] = { + &CFX_SAXReader::ParseText, + &CFX_SAXReader::ParseNodeStart, + &CFX_SAXReader::ParseDeclOrComment, + &CFX_SAXReader::ParseDeclNode, + &CFX_SAXReader::ParseComment, + &CFX_SAXReader::ParseCommentContent, + &CFX_SAXReader::ParseTagName, + &CFX_SAXReader::ParseTagAttributeName, + &CFX_SAXReader::ParseTagAttributeEqual, + &CFX_SAXReader::ParseTagAttributeValue, + &CFX_SAXReader::ParseMaybeClose, + &CFX_SAXReader::ParseTagClose, + &CFX_SAXReader::ParseTagEnd, + &CFX_SAXReader::ParseTargetData, +}; + +} // namespace + +CFX_SAXFile::CFX_SAXFile() + : m_pFile(nullptr), + m_dwStart(0), + m_dwEnd(0), + m_dwCur(0), + m_pBuf(nullptr), + m_dwBufSize(0), + m_dwBufIndex(0) {} +FX_BOOL CFX_SAXFile::StartFile(IFX_FileRead* pFile, + uint32_t dwStart, + uint32_t dwLen) { + ASSERT(!m_pFile && pFile); + uint32_t dwSize = pFile->GetSize(); + if (dwStart >= dwSize) { + return FALSE; + } + if (dwLen == static_cast(-1) || dwStart + dwLen > dwSize) { + dwLen = dwSize - dwStart; + } + if (dwLen == 0) { + return FALSE; + } + m_dwBufSize = std::min(dwLen, kSaxFileBufSize); + m_pBuf = FX_Alloc(uint8_t, m_dwBufSize); + if (!pFile->ReadBlock(m_pBuf, dwStart, m_dwBufSize)) { + return FALSE; + } + m_dwStart = dwStart; + m_dwEnd = dwStart + dwLen; + m_dwCur = dwStart; + m_pFile = pFile; + m_dwBufIndex = 0; + return TRUE; +} +FX_BOOL CFX_SAXFile::ReadNextBlock() { + ASSERT(m_pFile); + uint32_t dwSize = m_dwEnd - m_dwCur; + if (dwSize == 0) { + return FALSE; + } + m_dwBufSize = std::min(dwSize, kSaxFileBufSize); + if (!m_pFile->ReadBlock(m_pBuf, m_dwCur, m_dwBufSize)) { + return FALSE; + } + m_dwBufIndex = 0; + return TRUE; +} +void CFX_SAXFile::Reset() { + if (m_pBuf) { + FX_Free(m_pBuf); + m_pBuf = nullptr; + } + m_pFile = nullptr; +} +CFX_SAXReader::CFX_SAXReader() + : m_File(), + m_pHandler(nullptr), + m_iState(-1), + m_pRoot(nullptr), + m_pCurItem(nullptr), + m_dwItemID(0), + m_iDataSize(256), + m_iNameSize(256), + m_dwParseMode(0), + m_pCommentContext(nullptr) { + m_pszData = FX_Alloc(uint8_t, m_iDataSize); + m_pszName = FX_Alloc(uint8_t, m_iNameSize); +} +CFX_SAXReader::~CFX_SAXReader() { + Reset(); + if (m_pszData) { + FX_Free(m_pszData); + m_pszData = nullptr; + } + if (m_pszName) { + FX_Free(m_pszName); + m_pszName = nullptr; + } +} +void CFX_SAXReader::Reset() { + m_File.Reset(); + CFX_SAXItem* pItem = m_pRoot; + while (pItem) { + CFX_SAXItem* pNext = pItem->m_pNext; + delete pItem; + pItem = pNext; + } + m_pRoot = nullptr; + m_pCurItem = nullptr; + m_dwItemID = 0; + m_SkipStack.RemoveAll(); + m_SkipChar = 0; + m_iDataLength = 0; + m_iEntityStart = -1; + m_iNameLength = 0; + m_iDataPos = 0; + if (m_pCommentContext) { + delete m_pCommentContext; + m_pCommentContext = nullptr; + } +} +inline void CFX_SAXReader::Push() { + CFX_SAXItem* pNew = new CFX_SAXItem; + pNew->m_dwID = ++m_dwItemID; + pNew->m_bSkip = m_pCurItem->m_bSkip; + pNew->m_pPrev = m_pCurItem; + m_pCurItem->m_pNext = pNew; + m_pCurItem = pNew; +} +inline void CFX_SAXReader::Pop() { + if (!m_pCurItem) { + return; + } + CFX_SAXItem* pPrev = m_pCurItem->m_pPrev; + pPrev->m_pNext = nullptr; + delete m_pCurItem; + m_pCurItem = pPrev; +} +inline void CFX_SAXReader::AppendData(uint8_t ch) { + ReallocDataBuffer(); + m_pszData[m_iDataPos++] = ch; +} +inline void CFX_SAXReader::AppendName(uint8_t ch) { + ReallocNameBuffer(); + m_pszName[m_iDataPos++] = ch; +} +void CFX_SAXReader::ReallocDataBuffer() { + if (m_iDataPos < m_iDataSize) { + return; + } + if (m_iDataSize <= 1024 * 1024) { + m_iDataSize *= 2; + } else { + m_iDataSize += 1024 * 1024; + } + m_pszData = (uint8_t*)FX_Realloc(uint8_t, m_pszData, m_iDataSize); +} +void CFX_SAXReader::ReallocNameBuffer() { + if (m_iDataPos < m_iNameSize) { + return; + } + if (m_iNameSize <= 1024 * 1024) { + m_iNameSize *= 2; + } else { + m_iNameSize += 1024 * 1024; + } + m_pszName = (uint8_t*)FX_Realloc(uint8_t, m_pszName, m_iNameSize); +} +inline FX_BOOL CFX_SAXReader::SkipSpace(uint8_t ch) { + return (m_dwParseMode & CFX_SaxParseMode_NotSkipSpace) == 0 && ch < 0x21; +} +int32_t CFX_SAXReader::StartParse(IFX_FileRead* pFile, + uint32_t dwStart, + uint32_t dwLen, + uint32_t dwParseMode) { + m_iState = -1; + Reset(); + if (!m_File.StartFile(pFile, dwStart, dwLen)) { + return -1; + } + m_iState = 0; + m_eMode = CFX_SaxMode::Text; + m_ePrevMode = CFX_SaxMode::Text; + m_bCharData = FALSE; + m_dwDataOffset = 0; + m_pRoot = m_pCurItem = new CFX_SAXItem; + m_pCurItem->m_dwID = ++m_dwItemID; + m_dwParseMode = dwParseMode; + return 0; +} + +int32_t CFX_SAXReader::ContinueParse(IFX_Pause* pPause) { + if (m_iState < 0 || m_iState > 99) { + return m_iState; + } + while (m_File.m_dwCur < m_File.m_dwEnd) { + uint32_t& index = m_File.m_dwBufIndex; + uint32_t size = m_File.m_dwBufSize; + const uint8_t* pBuf = m_File.m_pBuf; + while (index < size) { + m_CurByte = pBuf[index]; + (this->*g_FX_SAXReader_LPFParse[static_cast(m_eMode)])(); + index++; + } + m_File.m_dwCur += index; + m_iState = (m_File.m_dwCur - m_File.m_dwStart) * 100 / + (m_File.m_dwEnd - m_File.m_dwStart); + if (m_File.m_dwCur >= m_File.m_dwEnd) { + break; + } + if (!m_File.ReadNextBlock()) { + m_iState = -2; + break; + } + m_dwDataOffset = 0; + if (pPause && pPause->NeedToPauseNow()) { + break; + } + } + return m_iState; +} +void CFX_SAXReader::ParseChar(uint8_t ch) { + ReallocDataBuffer(); + m_pszData[m_iDataPos] = ch; + if (m_iEntityStart > -1 && ch == ';') { + int32_t iSaveEntityStart = m_iEntityStart; + CFX_ByteString csEntity(m_pszData + m_iEntityStart + 1, + m_iDataPos - m_iEntityStart - 1); + int32_t iLen = csEntity.GetLength(); + if (iLen > 0) { + if (csEntity[0] == '#') { + if ((m_dwParseMode & CFX_SaxParseMode_NotConvert_sharp) == 0) { + ch = 0; + uint8_t w; + if (iLen > 1 && csEntity[1] == 'x') { + for (int32_t i = 2; i < iLen; i++) { + w = csEntity[i]; + if (w >= '0' && w <= '9') { + ch = (ch << 4) + w - '0'; + } else if (w >= 'A' && w <= 'F') { + ch = (ch << 4) + w - 55; + } else if (w >= 'a' && w <= 'f') { + ch = (ch << 4) + w - 87; + } else { + break; + } + } + } else { + for (int32_t i = 1; i < iLen; i++) { + w = csEntity[i]; + if (w < '0' || w > '9') { + break; + } + ch = ch * 10 + w - '0'; + } + } + if (ch != 0) { + m_pszData[m_iEntityStart++] = ch; + } + } + } else { + if (csEntity.Compare("amp") == 0) { + if ((m_dwParseMode & CFX_SaxParseMode_NotConvert_amp) == 0) { + m_pszData[m_iEntityStart++] = '&'; + } + } else if (csEntity.Compare("lt") == 0) { + if ((m_dwParseMode & CFX_SaxParseMode_NotConvert_lt) == 0) { + m_pszData[m_iEntityStart++] = '<'; + } + } else if (csEntity.Compare("gt") == 0) { + if ((m_dwParseMode & CFX_SaxParseMode_NotConvert_gt) == 0) { + m_pszData[m_iEntityStart++] = '>'; + } + } else if (csEntity.Compare("apos") == 0) { + if ((m_dwParseMode & CFX_SaxParseMode_NotConvert_apos) == 0) { + m_pszData[m_iEntityStart++] = '\''; + } + } else if (csEntity.Compare("quot") == 0) { + if ((m_dwParseMode & CFX_SaxParseMode_NotConvert_quot) == 0) { + m_pszData[m_iEntityStart++] = '\"'; + } + } + } + } + if (iSaveEntityStart != m_iEntityStart) { + m_iDataPos = m_iEntityStart; + m_iEntityStart = -1; + } else { + m_iDataPos++; + m_iEntityStart = -1; + } + } else { + if (m_iEntityStart < 0 && ch == '&') { + m_iEntityStart = m_iDataPos; + } + m_iDataPos++; + } +} +void CFX_SAXReader::ParseText() { + if (m_CurByte == '<') { + if (m_iDataPos > 0) { + m_iDataLength = m_iDataPos; + m_iDataPos = 0; + if (m_pHandler) { + NotifyData(); + } + } + Push(); + m_dwNodePos = m_File.m_dwCur + m_File.m_dwBufIndex; + m_eMode = CFX_SaxMode::NodeStart; + return; + } + if (m_iDataPos < 1 && SkipSpace(m_CurByte)) { + return; + } + ParseChar(m_CurByte); +} +void CFX_SAXReader::ParseNodeStart() { + if (m_CurByte == '?') { + m_pCurItem->m_eNode = CFX_SAXItem::Type::Instruction; + m_eMode = CFX_SaxMode::TagName; + return; + } + if (m_CurByte == '!') { + m_eMode = CFX_SaxMode::DeclOrComment; + return; + } + if (m_CurByte == '/') { + m_eMode = CFX_SaxMode::TagEnd; + return; + } + if (m_CurByte == '>') { + Pop(); + m_eMode = CFX_SaxMode::Text; + return; + } + if (m_CurByte > 0x20) { + m_dwDataOffset = m_File.m_dwBufIndex; + m_pCurItem->m_eNode = CFX_SAXItem::Type::Tag; + m_eMode = CFX_SaxMode::TagName; + AppendData(m_CurByte); + } +} +void CFX_SAXReader::ParseDeclOrComment() { + if (m_CurByte == '-') { + m_eMode = CFX_SaxMode::Comment; + m_pCurItem->m_eNode = CFX_SAXItem::Type::Comment; + if (!m_pCommentContext) + m_pCommentContext = new CFX_SAXCommentContext; + + m_pCommentContext->m_iHeaderCount = 1; + m_pCommentContext->m_iTailCount = 0; + } else { + m_eMode = CFX_SaxMode::DeclNode; + m_dwDataOffset = m_File.m_dwBufIndex; + m_SkipChar = '>'; + m_SkipStack.Add('>'); + SkipNode(); + } +} +void CFX_SAXReader::ParseComment() { + m_pCommentContext->m_iHeaderCount = 2; + m_dwNodePos = m_File.m_dwCur + m_File.m_dwBufIndex; + m_eMode = CFX_SaxMode::CommentContent; +} +void CFX_SAXReader::ParseCommentContent() { + if (m_CurByte == '-') { + m_pCommentContext->m_iTailCount++; + } else if (m_CurByte == '>' && m_pCommentContext->m_iTailCount == 2) { + m_iDataLength = m_iDataPos; + m_iDataPos = 0; + if (m_pHandler) { + NotifyTargetData(); + } + Pop(); + m_eMode = CFX_SaxMode::Text; + } else { + while (m_pCommentContext->m_iTailCount > 0) { + AppendData('-'); + m_pCommentContext->m_iTailCount--; + } + AppendData(m_CurByte); + } +} +void CFX_SAXReader::ParseDeclNode() { + SkipNode(); +} +void CFX_SAXReader::ParseTagName() { + if (m_CurByte < 0x21 || m_CurByte == '/' || m_CurByte == '>' || + m_CurByte == '?') { + m_iDataLength = m_iDataPos; + m_iDataPos = 0; + if (m_pHandler) { + NotifyEnter(); + } + if (m_CurByte < 0x21) { + m_eMode = CFX_SaxMode::TagAttributeName; + } else if (m_CurByte == '/' || m_CurByte == '?') { + m_ePrevMode = m_eMode; + m_eMode = CFX_SaxMode::TagMaybeClose; + } else { + if (m_pHandler) { + NotifyBreak(); + } + m_eMode = CFX_SaxMode::Text; + } + } else { + AppendData(m_CurByte); + } +} +void CFX_SAXReader::ParseTagAttributeName() { + if (m_CurByte < 0x21 || m_CurByte == '=') { + if (m_iDataPos < 1 && m_CurByte < 0x21) { + return; + } + m_iNameLength = m_iDataPos; + m_iDataPos = 0; + m_SkipChar = 0; + m_eMode = m_CurByte == '=' ? CFX_SaxMode::TagAttributeValue + : CFX_SaxMode::TagAttributeEqual; + return; + } + if (m_CurByte == '/' || m_CurByte == '>' || m_CurByte == '?') { + if (m_CurByte == '/' || m_CurByte == '?') { + m_ePrevMode = m_eMode; + m_eMode = CFX_SaxMode::TagMaybeClose; + } else { + if (m_pHandler) { + NotifyBreak(); + } + m_eMode = CFX_SaxMode::Text; + } + return; + } + if (m_iDataPos < 1) { + m_dwDataOffset = m_File.m_dwBufIndex; + } + AppendName(m_CurByte); +} +void CFX_SAXReader::ParseTagAttributeEqual() { + if (m_CurByte == '=') { + m_SkipChar = 0; + m_eMode = CFX_SaxMode::TagAttributeValue; + return; + } else if (m_pCurItem->m_eNode == CFX_SAXItem::Type::Instruction) { + m_iDataPos = m_iNameLength; + AppendName(0x20); + m_eMode = CFX_SaxMode::TargetData; + ParseTargetData(); + } +} +void CFX_SAXReader::ParseTagAttributeValue() { + if (m_SkipChar) { + if (m_SkipChar == m_CurByte) { + { + m_iDataLength = m_iDataPos; + m_iDataPos = 0; + if (m_pHandler) { + NotifyAttribute(); + } + } + m_SkipChar = 0; + m_eMode = CFX_SaxMode::TagAttributeName; + return; + } + ParseChar(m_CurByte); + return; + } + if (m_CurByte < 0x21) { + return; + } + if (m_iDataPos < 1) { + if (m_CurByte == '\'' || m_CurByte == '\"') { + m_SkipChar = m_CurByte; + } + } +} +void CFX_SAXReader::ParseMaybeClose() { + if (m_CurByte == '>') { + if (m_pCurItem->m_eNode == CFX_SAXItem::Type::Instruction) { + m_iNameLength = m_iDataPos; + m_iDataPos = 0; + if (m_pHandler) { + NotifyTargetData(); + } + } + ParseTagClose(); + m_eMode = CFX_SaxMode::Text; + } else if (m_ePrevMode == CFX_SaxMode::TagName) { + AppendData('/'); + m_eMode = CFX_SaxMode::TagName; + m_ePrevMode = CFX_SaxMode::Text; + ParseTagName(); + } else if (m_ePrevMode == CFX_SaxMode::TagAttributeName) { + AppendName('/'); + m_eMode = CFX_SaxMode::TagAttributeName; + m_ePrevMode = CFX_SaxMode::Text; + ParseTagAttributeName(); + } else if (m_ePrevMode == CFX_SaxMode::TargetData) { + AppendName('?'); + m_eMode = CFX_SaxMode::TargetData; + m_ePrevMode = CFX_SaxMode::Text; + ParseTargetData(); + } +} +void CFX_SAXReader::ParseTagClose() { + m_dwNodePos = m_File.m_dwCur + m_File.m_dwBufIndex; + if (m_pHandler) { + NotifyClose(); + } + Pop(); +} +void CFX_SAXReader::ParseTagEnd() { + if (m_CurByte < 0x21) { + return; + } + if (m_CurByte == '>') { + Pop(); + m_dwNodePos = m_File.m_dwCur + m_File.m_dwBufIndex; + m_iDataLength = m_iDataPos; + m_iDataPos = 0; + if (m_pHandler) { + NotifyEnd(); + } + Pop(); + m_eMode = CFX_SaxMode::Text; + } else { + ParseChar(m_CurByte); + } +} +void CFX_SAXReader::ParseTargetData() { + if (m_CurByte == '?') { + m_ePrevMode = m_eMode; + m_eMode = CFX_SaxMode::TagMaybeClose; + } else { + AppendName(m_CurByte); + } +} +void CFX_SAXReader::SkipNode() { + int32_t iLen = m_SkipStack.GetSize(); + if (m_SkipChar == '\'' || m_SkipChar == '\"') { + if (m_CurByte != m_SkipChar) { + return; + } + iLen--; + ASSERT(iLen > -1); + m_SkipStack.RemoveAt(iLen, 1); + m_SkipChar = iLen ? m_SkipStack[iLen - 1] : 0; + return; + } + switch (m_CurByte) { + case '<': + m_SkipChar = '>'; + m_SkipStack.Add('>'); + break; + case '[': + m_SkipChar = ']'; + m_SkipStack.Add(']'); + break; + case '(': + m_SkipChar = ')'; + m_SkipStack.Add(')'); + break; + case '\'': + m_SkipChar = '\''; + m_SkipStack.Add('\''); + break; + case '\"': + m_SkipChar = '\"'; + m_SkipStack.Add('\"'); + break; + default: + if (m_CurByte == m_SkipChar) { + iLen--; + m_SkipStack.RemoveAt(iLen, 1); + m_SkipChar = iLen ? m_SkipStack[iLen - 1] : 0; + if (iLen == 0 && m_CurByte == '>') { + m_iDataLength = m_iDataPos; + m_iDataPos = 0; + if (m_iDataLength >= 9 && + FXSYS_memcmp(m_pszData, "[CDATA[", 7 * sizeof(uint8_t)) == 0 && + FXSYS_memcmp(m_pszData + m_iDataLength - 2, "]]", + 2 * sizeof(uint8_t)) == 0) { + Pop(); + m_iDataLength -= 9; + m_dwDataOffset += 7; + FXSYS_memmove(m_pszData, m_pszData + 7, + m_iDataLength * sizeof(uint8_t)); + m_bCharData = TRUE; + if (m_pHandler) { + NotifyData(); + } + m_bCharData = FALSE; + } else { + Pop(); + } + m_eMode = CFX_SaxMode::Text; + } + } + break; + } + if (iLen > 0) { + ParseChar(m_CurByte); + } +} + +void CFX_SAXReader::NotifyData() { + if (m_pCurItem->m_eNode == CFX_SAXItem::Type::Tag) + m_pHandler->OnTagData( + m_pCurItem->m_pNode, + m_bCharData ? CFX_SAXItem::Type::CharData : CFX_SAXItem::Type::Text, + CFX_ByteStringC(m_pszData, m_iDataLength), + m_File.m_dwCur + m_dwDataOffset); +} + +void CFX_SAXReader::NotifyEnter() { + if (m_pCurItem->m_eNode == CFX_SAXItem::Type::Tag || + m_pCurItem->m_eNode == CFX_SAXItem::Type::Instruction) { + m_pCurItem->m_pNode = + m_pHandler->OnTagEnter(CFX_ByteStringC(m_pszData, m_iDataLength), + m_pCurItem->m_eNode, m_dwNodePos); + } +} + +void CFX_SAXReader::NotifyAttribute() { + if (m_pCurItem->m_eNode == CFX_SAXItem::Type::Tag || + m_pCurItem->m_eNode == CFX_SAXItem::Type::Instruction) { + m_pHandler->OnTagAttribute(m_pCurItem->m_pNode, + CFX_ByteStringC(m_pszName, m_iNameLength), + CFX_ByteStringC(m_pszData, m_iDataLength)); + } +} + +void CFX_SAXReader::NotifyBreak() { + if (m_pCurItem->m_eNode == CFX_SAXItem::Type::Tag) + m_pHandler->OnTagBreak(m_pCurItem->m_pNode); +} + +void CFX_SAXReader::NotifyClose() { + if (m_pCurItem->m_eNode == CFX_SAXItem::Type::Tag || + m_pCurItem->m_eNode == CFX_SAXItem::Type::Instruction) { + m_pHandler->OnTagClose(m_pCurItem->m_pNode, m_dwNodePos); + } +} + +void CFX_SAXReader::NotifyEnd() { + if (m_pCurItem->m_eNode != CFX_SAXItem::Type::Tag) + return; + + m_pHandler->OnTagEnd(m_pCurItem->m_pNode, + CFX_ByteStringC(m_pszData, m_iDataLength), m_dwNodePos); +} + +void CFX_SAXReader::NotifyTargetData() { + if (m_pCurItem->m_eNode == CFX_SAXItem::Type::Instruction) { + m_pHandler->OnTargetData(m_pCurItem->m_pNode, m_pCurItem->m_eNode, + CFX_ByteStringC(m_pszName, m_iNameLength), + m_dwNodePos); + } else if (m_pCurItem->m_eNode == CFX_SAXItem::Type::Comment) { + m_pHandler->OnTargetData(m_pCurItem->m_pNode, m_pCurItem->m_eNode, + CFX_ByteStringC(m_pszData, m_iDataLength), + m_dwNodePos); + } +} + +void CFX_SAXReader::SkipCurrentNode() { + if (!m_pCurItem) + return; + + m_pCurItem->m_bSkip = TRUE; +} + +void CFX_SAXReader::SetHandler(CXFA_SAXReaderHandler* pHandler) { + m_pHandler = pHandler; +} diff --git a/xfa/fde/xml/cfx_saxreader.h b/xfa/fde/xml/cfx_saxreader.h new file mode 100644 index 0000000000..18aa039ccc --- /dev/null +++ b/xfa/fde/xml/cfx_saxreader.h @@ -0,0 +1,143 @@ +// Copyright 2014 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#ifndef XFA_FDE_XML_CFX_SAXREADER_H_ +#define XFA_FDE_XML_CFX_SAXREADER_H_ + +#include "core/fxcrt/include/fx_basic.h" + +class CFX_SAXItem { + public: + enum class Type { + Unknown = 0, + Instruction, + Declaration, + Comment, + Tag, + Text, + CharData, + }; + + CFX_SAXItem() + : m_pNode(nullptr), + m_eNode(Type::Unknown), + m_dwID(0), + m_bSkip(FALSE), + m_pPrev(nullptr), + m_pNext(nullptr) {} + + void* m_pNode; + Type m_eNode; + uint32_t m_dwID; + FX_BOOL m_bSkip; + CFX_SAXItem* m_pPrev; + CFX_SAXItem* m_pNext; +}; + +class CFX_SAXFile { + public: + CFX_SAXFile(); + FX_BOOL StartFile(IFX_FileRead* pFile, uint32_t dwStart, uint32_t dwLen); + FX_BOOL ReadNextBlock(); + void Reset(); + IFX_FileRead* m_pFile; + uint32_t m_dwStart; + uint32_t m_dwEnd; + uint32_t m_dwCur; + uint8_t* m_pBuf; + uint32_t m_dwBufSize; + uint32_t m_dwBufIndex; +}; + +class CFX_SAXCommentContext; +enum class CFX_SaxMode; + +enum CFX_SaxParseMode { + CFX_SaxParseMode_NotConvert_amp = 1 << 0, + CFX_SaxParseMode_NotConvert_lt = 1 << 1, + CFX_SaxParseMode_NotConvert_gt = 1 << 2, + CFX_SaxParseMode_NotConvert_apos = 1 << 3, + CFX_SaxParseMode_NotConvert_quot = 1 << 4, + CFX_SaxParseMode_NotConvert_sharp = 1 << 5, + CFX_SaxParseMode_NotSkipSpace = 1 << 6 +}; + +class CXFA_SAXReaderHandler; + +class CFX_SAXReader { + public: + CFX_SAXReader(); + ~CFX_SAXReader(); + + int32_t StartParse(IFX_FileRead* pFile, + uint32_t dwStart = 0, + uint32_t dwLen = -1, + uint32_t dwParseMode = 0); + int32_t ContinueParse(IFX_Pause* pPause = nullptr); + void SkipCurrentNode(); + void SetHandler(CXFA_SAXReaderHandler* pHandler); + void AppendData(uint8_t ch); + void AppendName(uint8_t ch); + void ParseText(); + void ParseNodeStart(); + void ParseInstruction(); + void ParseDeclOrComment(); + void ParseDeclNode(); + void ParseComment(); + void ParseCommentContent(); + void ParseTagName(); + void ParseTagAttributeName(); + void ParseTagAttributeEqual(); + void ParseTagAttributeValue(); + void ParseMaybeClose(); + void ParseTagClose(); + void ParseTagEnd(); + void ParseTargetData(); + + private: + void Reset(); + void Push(); + void Pop(); + FX_BOOL SkipSpace(uint8_t ch); + void SkipNode(); + void NotifyData(); + void NotifyEnter(); + void NotifyAttribute(); + void NotifyBreak(); + void NotifyClose(); + void NotifyEnd(); + void NotifyTargetData(); + void ReallocDataBuffer(); + void ReallocNameBuffer(); + void ParseChar(uint8_t ch); + + CFX_SAXFile m_File; + CXFA_SAXReaderHandler* m_pHandler; + int32_t m_iState; + CFX_SAXItem* m_pRoot; + CFX_SAXItem* m_pCurItem; + uint32_t m_dwItemID; + CFX_SaxMode m_eMode; + CFX_SaxMode m_ePrevMode; + FX_BOOL m_bCharData; + uint8_t m_CurByte; + uint32_t m_dwDataOffset; + CFX_ByteArray m_SkipStack; + uint8_t m_SkipChar; + uint32_t m_dwNodePos; + uint8_t* m_pszData; + int32_t m_iDataSize; + int32_t m_iDataLength; + int32_t m_iEntityStart; + int32_t m_iDataPos; + uint8_t* m_pszName; + int32_t m_iNameSize; + int32_t m_iNameLength; + uint32_t m_dwParseMode; + CFX_SAXCommentContext* m_pCommentContext; +}; + +#endif // XFA_FDE_XML_CFX_SAXREADER_H_ diff --git a/xfa/fgas/xml/fgas_sax.cpp b/xfa/fgas/xml/fgas_sax.cpp deleted file mode 100644 index 1128e1184f..0000000000 --- a/xfa/fgas/xml/fgas_sax.cpp +++ /dev/null @@ -1,688 +0,0 @@ -// Copyright 2014 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#include "xfa/fgas/xml/fgas_sax.h" - -#include - -#include "xfa/fxfa/include/xfa_checksum.h" - -namespace { - -const uint32_t kSaxFileBufSize = 32768; - -} // namespace - -CFX_SAXFile::CFX_SAXFile() - : m_pFile(NULL), - m_dwStart(0), - m_dwEnd(0), - m_dwCur(0), - m_pBuf(NULL), - m_dwBufSize(0), - m_dwBufIndex(0) {} -FX_BOOL CFX_SAXFile::StartFile(IFX_FileRead* pFile, - uint32_t dwStart, - uint32_t dwLen) { - ASSERT(m_pFile == NULL && pFile != NULL); - uint32_t dwSize = pFile->GetSize(); - if (dwStart >= dwSize) { - return FALSE; - } - if (dwLen == static_cast(-1) || dwStart + dwLen > dwSize) { - dwLen = dwSize - dwStart; - } - if (dwLen == 0) { - return FALSE; - } - m_dwBufSize = std::min(dwLen, kSaxFileBufSize); - m_pBuf = FX_Alloc(uint8_t, m_dwBufSize); - if (!pFile->ReadBlock(m_pBuf, dwStart, m_dwBufSize)) { - return FALSE; - } - m_dwStart = dwStart; - m_dwEnd = dwStart + dwLen; - m_dwCur = dwStart; - m_pFile = pFile; - m_dwBufIndex = 0; - return TRUE; -} -FX_BOOL CFX_SAXFile::ReadNextBlock() { - ASSERT(m_pFile != NULL); - uint32_t dwSize = m_dwEnd - m_dwCur; - if (dwSize == 0) { - return FALSE; - } - m_dwBufSize = std::min(dwSize, kSaxFileBufSize); - if (!m_pFile->ReadBlock(m_pBuf, m_dwCur, m_dwBufSize)) { - return FALSE; - } - m_dwBufIndex = 0; - return TRUE; -} -void CFX_SAXFile::Reset() { - if (m_pBuf) { - FX_Free(m_pBuf); - m_pBuf = NULL; - } - m_pFile = NULL; -} -CFX_SAXReader::CFX_SAXReader() - : m_File(), - m_pHandler(nullptr), - m_iState(-1), - m_pRoot(nullptr), - m_pCurItem(nullptr), - m_dwItemID(0), - m_iDataSize(256), - m_iNameSize(256), - m_dwParseMode(0), - m_pCommentContext(nullptr) { - m_pszData = FX_Alloc(uint8_t, m_iDataSize); - m_pszName = FX_Alloc(uint8_t, m_iNameSize); -} -CFX_SAXReader::~CFX_SAXReader() { - Reset(); - if (m_pszData) { - FX_Free(m_pszData); - m_pszData = NULL; - } - if (m_pszName) { - FX_Free(m_pszName); - m_pszName = NULL; - } -} -void CFX_SAXReader::Reset() { - m_File.Reset(); - CFX_SAXItem* pItem = m_pRoot; - while (pItem) { - CFX_SAXItem* pNext = pItem->m_pNext; - delete pItem; - pItem = pNext; - } - m_pRoot = NULL; - m_pCurItem = NULL; - m_dwItemID = 0; - m_SkipStack.RemoveAll(); - m_SkipChar = 0; - m_iDataLength = 0; - m_iEntityStart = -1; - m_iNameLength = 0; - m_iDataPos = 0; - if (m_pCommentContext) { - delete m_pCommentContext; - m_pCommentContext = NULL; - } -} -inline void CFX_SAXReader::Push() { - CFX_SAXItem* pNew = new CFX_SAXItem; - pNew->m_dwID = ++m_dwItemID; - pNew->m_bSkip = m_pCurItem->m_bSkip; - pNew->m_pPrev = m_pCurItem; - m_pCurItem->m_pNext = pNew; - m_pCurItem = pNew; -} -inline void CFX_SAXReader::Pop() { - if (!m_pCurItem) { - return; - } - CFX_SAXItem* pPrev = m_pCurItem->m_pPrev; - pPrev->m_pNext = NULL; - delete m_pCurItem; - m_pCurItem = pPrev; -} -inline void CFX_SAXReader::AppendData(uint8_t ch) { - ReallocDataBuffer(); - m_pszData[m_iDataPos++] = ch; -} -inline void CFX_SAXReader::AppendName(uint8_t ch) { - ReallocNameBuffer(); - m_pszName[m_iDataPos++] = ch; -} -void CFX_SAXReader::ReallocDataBuffer() { - if (m_iDataPos < m_iDataSize) { - return; - } - if (m_iDataSize <= 1024 * 1024) { - m_iDataSize *= 2; - } else { - m_iDataSize += 1024 * 1024; - } - m_pszData = (uint8_t*)FX_Realloc(uint8_t, m_pszData, m_iDataSize); -} -void CFX_SAXReader::ReallocNameBuffer() { - if (m_iDataPos < m_iNameSize) { - return; - } - if (m_iNameSize <= 1024 * 1024) { - m_iNameSize *= 2; - } else { - m_iNameSize += 1024 * 1024; - } - m_pszName = (uint8_t*)FX_Realloc(uint8_t, m_pszName, m_iNameSize); -} -inline FX_BOOL CFX_SAXReader::SkipSpace(uint8_t ch) { - return (m_dwParseMode & FX_SAXPARSEMODE_NotSkipSpace) == 0 && ch < 0x21; -} -int32_t CFX_SAXReader::StartParse(IFX_FileRead* pFile, - uint32_t dwStart, - uint32_t dwLen, - uint32_t dwParseMode) { - m_iState = -1; - Reset(); - if (!m_File.StartFile(pFile, dwStart, dwLen)) { - return -1; - } - m_iState = 0; - m_eMode = FX_SAXMODE_Text; - m_ePrevMode = FX_SAXMODE_Text; - m_bCharData = FALSE; - m_dwDataOffset = 0; - m_pRoot = m_pCurItem = new CFX_SAXItem; - m_pCurItem->m_dwID = ++m_dwItemID; - m_dwParseMode = dwParseMode; - return 0; -} -typedef void (CFX_SAXReader::*FX_SAXReader_LPFParse)(); -static const FX_SAXReader_LPFParse g_FX_SAXReader_LPFParse[FX_SAXMODE_MAX] = { - &CFX_SAXReader::ParseText, - &CFX_SAXReader::ParseNodeStart, - &CFX_SAXReader::ParseDeclOrComment, - &CFX_SAXReader::ParseDeclNode, - &CFX_SAXReader::ParseComment, - &CFX_SAXReader::ParseCommentContent, - &CFX_SAXReader::ParseTagName, - &CFX_SAXReader::ParseTagAttributeName, - &CFX_SAXReader::ParseTagAttributeEqual, - &CFX_SAXReader::ParseTagAttributeValue, - &CFX_SAXReader::ParseMaybeClose, - &CFX_SAXReader::ParseTagClose, - &CFX_SAXReader::ParseTagEnd, - &CFX_SAXReader::ParseTargetData, -}; -int32_t CFX_SAXReader::ContinueParse(IFX_Pause* pPause) { - if (m_iState < 0 || m_iState > 99) { - return m_iState; - } - while (m_File.m_dwCur < m_File.m_dwEnd) { - uint32_t& index = m_File.m_dwBufIndex; - uint32_t size = m_File.m_dwBufSize; - const uint8_t* pBuf = m_File.m_pBuf; - while (index < size) { - m_CurByte = pBuf[index]; - (this->*g_FX_SAXReader_LPFParse[m_eMode])(); - index++; - } - m_File.m_dwCur += index; - m_iState = (m_File.m_dwCur - m_File.m_dwStart) * 100 / - (m_File.m_dwEnd - m_File.m_dwStart); - if (m_File.m_dwCur >= m_File.m_dwEnd) { - break; - } - if (!m_File.ReadNextBlock()) { - m_iState = -2; - break; - } - m_dwDataOffset = 0; - if (pPause && pPause->NeedToPauseNow()) { - break; - } - } - return m_iState; -} -void CFX_SAXReader::ParseChar(uint8_t ch) { - ReallocDataBuffer(); - m_pszData[m_iDataPos] = ch; - if (m_iEntityStart > -1 && ch == ';') { - int32_t iSaveEntityStart = m_iEntityStart; - CFX_ByteString csEntity(m_pszData + m_iEntityStart + 1, - m_iDataPos - m_iEntityStart - 1); - int32_t iLen = csEntity.GetLength(); - if (iLen > 0) { - if (csEntity[0] == '#') { - if ((m_dwParseMode & FX_SAXPARSEMODE_NotConvert_sharp) == 0) { - ch = 0; - uint8_t w; - if (iLen > 1 && csEntity[1] == 'x') { - for (int32_t i = 2; i < iLen; i++) { - w = csEntity[i]; - if (w >= '0' && w <= '9') { - ch = (ch << 4) + w - '0'; - } else if (w >= 'A' && w <= 'F') { - ch = (ch << 4) + w - 55; - } else if (w >= 'a' && w <= 'f') { - ch = (ch << 4) + w - 87; - } else { - break; - } - } - } else { - for (int32_t i = 1; i < iLen; i++) { - w = csEntity[i]; - if (w < '0' || w > '9') { - break; - } - ch = ch * 10 + w - '0'; - } - } - if (ch != 0) { - m_pszData[m_iEntityStart++] = ch; - } - } - } else { - if (csEntity.Compare("amp") == 0) { - if ((m_dwParseMode & FX_SAXPARSEMODE_NotConvert_amp) == 0) { - m_pszData[m_iEntityStart++] = '&'; - } - } else if (csEntity.Compare("lt") == 0) { - if ((m_dwParseMode & FX_SAXPARSEMODE_NotConvert_lt) == 0) { - m_pszData[m_iEntityStart++] = '<'; - } - } else if (csEntity.Compare("gt") == 0) { - if ((m_dwParseMode & FX_SAXPARSEMODE_NotConvert_gt) == 0) { - m_pszData[m_iEntityStart++] = '>'; - } - } else if (csEntity.Compare("apos") == 0) { - if ((m_dwParseMode & FX_SAXPARSEMODE_NotConvert_apos) == 0) { - m_pszData[m_iEntityStart++] = '\''; - } - } else if (csEntity.Compare("quot") == 0) { - if ((m_dwParseMode & FX_SAXPARSEMODE_NotConvert_quot) == 0) { - m_pszData[m_iEntityStart++] = '\"'; - } - } - } - } - if (iSaveEntityStart != m_iEntityStart) { - m_iDataPos = m_iEntityStart; - m_iEntityStart = -1; - } else { - m_iDataPos++; - m_iEntityStart = -1; - } - } else { - if (m_iEntityStart < 0 && ch == '&') { - m_iEntityStart = m_iDataPos; - } - m_iDataPos++; - } -} -void CFX_SAXReader::ParseText() { - if (m_CurByte == '<') { - if (m_iDataPos > 0) { - m_iDataLength = m_iDataPos; - m_iDataPos = 0; - if (m_pHandler) { - NotifyData(); - } - } - Push(); - m_dwNodePos = m_File.m_dwCur + m_File.m_dwBufIndex; - m_eMode = FX_SAXMODE_NodeStart; - return; - } - if (m_iDataPos < 1 && SkipSpace(m_CurByte)) { - return; - } - ParseChar(m_CurByte); -} -void CFX_SAXReader::ParseNodeStart() { - if (m_CurByte == '?') { - m_pCurItem->m_eNode = FX_SAXNODE_Instruction; - m_eMode = FX_SAXMODE_TagName; - return; - } - if (m_CurByte == '!') { - m_eMode = FX_SAXMODE_DeclOrComment; - return; - } - if (m_CurByte == '/') { - m_eMode = FX_SAXMODE_TagEnd; - return; - } - if (m_CurByte == '>') { - Pop(); - m_eMode = FX_SAXMODE_Text; - return; - } - if (m_CurByte > 0x20) { - m_dwDataOffset = m_File.m_dwBufIndex; - m_pCurItem->m_eNode = FX_SAXNODE_Tag; - m_eMode = FX_SAXMODE_TagName; - AppendData(m_CurByte); - } -} -void CFX_SAXReader::ParseDeclOrComment() { - if (m_CurByte == '-') { - m_eMode = FX_SAXMODE_Comment; - m_pCurItem->m_eNode = FX_SAXNODE_Comment; - if (m_pCommentContext == NULL) { - m_pCommentContext = new CFX_SAXCommentContext; - } - m_pCommentContext->m_iHeaderCount = 1; - m_pCommentContext->m_iTailCount = 0; - } else { - m_eMode = FX_SAXMODE_DeclNode; - m_dwDataOffset = m_File.m_dwBufIndex; - m_SkipChar = '>'; - m_SkipStack.Add('>'); - SkipNode(); - } -} -void CFX_SAXReader::ParseComment() { - m_pCommentContext->m_iHeaderCount = 2; - m_dwNodePos = m_File.m_dwCur + m_File.m_dwBufIndex; - m_eMode = FX_SAXMODE_CommentContent; -} -void CFX_SAXReader::ParseCommentContent() { - if (m_CurByte == '-') { - m_pCommentContext->m_iTailCount++; - } else if (m_CurByte == '>' && m_pCommentContext->m_iTailCount == 2) { - m_iDataLength = m_iDataPos; - m_iDataPos = 0; - if (m_pHandler) { - NotifyTargetData(); - } - Pop(); - m_eMode = FX_SAXMODE_Text; - } else { - while (m_pCommentContext->m_iTailCount > 0) { - AppendData('-'); - m_pCommentContext->m_iTailCount--; - } - AppendData(m_CurByte); - } -} -void CFX_SAXReader::ParseDeclNode() { - SkipNode(); -} -void CFX_SAXReader::ParseTagName() { - if (m_CurByte < 0x21 || m_CurByte == '/' || m_CurByte == '>' || - m_CurByte == '?') { - m_iDataLength = m_iDataPos; - m_iDataPos = 0; - if (m_pHandler) { - NotifyEnter(); - } - if (m_CurByte < 0x21) { - m_eMode = FX_SAXMODE_TagAttributeName; - } else if (m_CurByte == '/' || m_CurByte == '?') { - m_ePrevMode = m_eMode; - m_eMode = FX_SAXMODE_TagMaybeClose; - } else { - if (m_pHandler) { - NotifyBreak(); - } - m_eMode = FX_SAXMODE_Text; - } - } else { - AppendData(m_CurByte); - } -} -void CFX_SAXReader::ParseTagAttributeName() { - if (m_CurByte < 0x21 || m_CurByte == '=') { - if (m_iDataPos < 1 && m_CurByte < 0x21) { - return; - } - m_iNameLength = m_iDataPos; - m_iDataPos = 0; - m_SkipChar = 0; - m_eMode = m_CurByte == '=' ? FX_SAXMODE_TagAttributeValue - : FX_SAXMODE_TagAttributeEqual; - return; - } - if (m_CurByte == '/' || m_CurByte == '>' || m_CurByte == '?') { - if (m_CurByte == '/' || m_CurByte == '?') { - m_ePrevMode = m_eMode; - m_eMode = FX_SAXMODE_TagMaybeClose; - } else { - if (m_pHandler) { - NotifyBreak(); - } - m_eMode = FX_SAXMODE_Text; - } - return; - } - if (m_iDataPos < 1) { - m_dwDataOffset = m_File.m_dwBufIndex; - } - AppendName(m_CurByte); -} -void CFX_SAXReader::ParseTagAttributeEqual() { - if (m_CurByte == '=') { - m_SkipChar = 0; - m_eMode = FX_SAXMODE_TagAttributeValue; - return; - } else if (m_pCurItem->m_eNode == FX_SAXNODE_Instruction) { - m_iDataPos = m_iNameLength; - AppendName(0x20); - m_eMode = FX_SAXMODE_TargetData; - ParseTargetData(); - } -} -void CFX_SAXReader::ParseTagAttributeValue() { - if (m_SkipChar) { - if (m_SkipChar == m_CurByte) { - { - m_iDataLength = m_iDataPos; - m_iDataPos = 0; - if (m_pHandler) { - NotifyAttribute(); - } - } - m_SkipChar = 0; - m_eMode = FX_SAXMODE_TagAttributeName; - return; - } - ParseChar(m_CurByte); - return; - } - if (m_CurByte < 0x21) { - return; - } - if (m_iDataPos < 1) { - if (m_CurByte == '\'' || m_CurByte == '\"') { - m_SkipChar = m_CurByte; - } - } -} -void CFX_SAXReader::ParseMaybeClose() { - if (m_CurByte == '>') { - if (m_pCurItem->m_eNode == FX_SAXNODE_Instruction) { - m_iNameLength = m_iDataPos; - m_iDataPos = 0; - if (m_pHandler) { - NotifyTargetData(); - } - } - ParseTagClose(); - m_eMode = FX_SAXMODE_Text; - } else if (m_ePrevMode == FX_SAXMODE_TagName) { - AppendData('/'); - m_eMode = FX_SAXMODE_TagName; - m_ePrevMode = FX_SAXMODE_Text; - ParseTagName(); - } else if (m_ePrevMode == FX_SAXMODE_TagAttributeName) { - AppendName('/'); - m_eMode = FX_SAXMODE_TagAttributeName; - m_ePrevMode = FX_SAXMODE_Text; - ParseTagAttributeName(); - } else if (m_ePrevMode == FX_SAXMODE_TargetData) { - AppendName('?'); - m_eMode = FX_SAXMODE_TargetData; - m_ePrevMode = FX_SAXMODE_Text; - ParseTargetData(); - } -} -void CFX_SAXReader::ParseTagClose() { - m_dwNodePos = m_File.m_dwCur + m_File.m_dwBufIndex; - if (m_pHandler) { - NotifyClose(); - } - Pop(); -} -void CFX_SAXReader::ParseTagEnd() { - if (m_CurByte < 0x21) { - return; - } - if (m_CurByte == '>') { - Pop(); - m_dwNodePos = m_File.m_dwCur + m_File.m_dwBufIndex; - m_iDataLength = m_iDataPos; - m_iDataPos = 0; - if (m_pHandler) { - NotifyEnd(); - } - Pop(); - m_eMode = FX_SAXMODE_Text; - } else { - ParseChar(m_CurByte); - } -} -void CFX_SAXReader::ParseTargetData() { - if (m_CurByte == '?') { - m_ePrevMode = m_eMode; - m_eMode = FX_SAXMODE_TagMaybeClose; - } else { - AppendName(m_CurByte); - } -} -void CFX_SAXReader::SkipNode() { - int32_t iLen = m_SkipStack.GetSize(); - if (m_SkipChar == '\'' || m_SkipChar == '\"') { - if (m_CurByte != m_SkipChar) { - return; - } - iLen--; - ASSERT(iLen > -1); - m_SkipStack.RemoveAt(iLen, 1); - m_SkipChar = iLen ? m_SkipStack[iLen - 1] : 0; - return; - } - switch (m_CurByte) { - case '<': - m_SkipChar = '>'; - m_SkipStack.Add('>'); - break; - case '[': - m_SkipChar = ']'; - m_SkipStack.Add(']'); - break; - case '(': - m_SkipChar = ')'; - m_SkipStack.Add(')'); - break; - case '\'': - m_SkipChar = '\''; - m_SkipStack.Add('\''); - break; - case '\"': - m_SkipChar = '\"'; - m_SkipStack.Add('\"'); - break; - default: - if (m_CurByte == m_SkipChar) { - iLen--; - m_SkipStack.RemoveAt(iLen, 1); - m_SkipChar = iLen ? m_SkipStack[iLen - 1] : 0; - if (iLen == 0 && m_CurByte == '>') { - m_iDataLength = m_iDataPos; - m_iDataPos = 0; - if (m_iDataLength >= 9 && - FXSYS_memcmp(m_pszData, "[CDATA[", 7 * sizeof(uint8_t)) == 0 && - FXSYS_memcmp(m_pszData + m_iDataLength - 2, "]]", - 2 * sizeof(uint8_t)) == 0) { - Pop(); - m_iDataLength -= 9; - m_dwDataOffset += 7; - FXSYS_memmove(m_pszData, m_pszData + 7, - m_iDataLength * sizeof(uint8_t)); - m_bCharData = TRUE; - if (m_pHandler) { - NotifyData(); - } - m_bCharData = FALSE; - } else { - Pop(); - } - m_eMode = FX_SAXMODE_Text; - } - } - break; - } - if (iLen > 0) { - ParseChar(m_CurByte); - } -} - -void CFX_SAXReader::NotifyData() { - if (m_pCurItem->m_eNode == FX_SAXNODE_Tag) - m_pHandler->OnTagData(m_pCurItem->m_pNode, - m_bCharData ? FX_SAXNODE_CharData : FX_SAXNODE_Text, - CFX_ByteStringC(m_pszData, m_iDataLength), - m_File.m_dwCur + m_dwDataOffset); -} - -void CFX_SAXReader::NotifyEnter() { - if (m_pCurItem->m_eNode == FX_SAXNODE_Tag || - m_pCurItem->m_eNode == FX_SAXNODE_Instruction) { - m_pCurItem->m_pNode = - m_pHandler->OnTagEnter(CFX_ByteStringC(m_pszData, m_iDataLength), - m_pCurItem->m_eNode, m_dwNodePos); - } -} - -void CFX_SAXReader::NotifyAttribute() { - if (m_pCurItem->m_eNode == FX_SAXNODE_Tag || - m_pCurItem->m_eNode == FX_SAXNODE_Instruction) { - m_pHandler->OnTagAttribute(m_pCurItem->m_pNode, - CFX_ByteStringC(m_pszName, m_iNameLength), - CFX_ByteStringC(m_pszData, m_iDataLength)); - } -} - -void CFX_SAXReader::NotifyBreak() { - if (m_pCurItem->m_eNode == FX_SAXNODE_Tag) - m_pHandler->OnTagBreak(m_pCurItem->m_pNode); -} - -void CFX_SAXReader::NotifyClose() { - if (m_pCurItem->m_eNode == FX_SAXNODE_Tag || - m_pCurItem->m_eNode == FX_SAXNODE_Instruction) { - m_pHandler->OnTagClose(m_pCurItem->m_pNode, m_dwNodePos); - } -} - -void CFX_SAXReader::NotifyEnd() { - if (m_pCurItem->m_eNode != FX_SAXNODE_Tag) - return; - - m_pHandler->OnTagEnd(m_pCurItem->m_pNode, - CFX_ByteStringC(m_pszData, m_iDataLength), m_dwNodePos); -} - -void CFX_SAXReader::NotifyTargetData() { - if (m_pCurItem->m_eNode == FX_SAXNODE_Instruction) { - m_pHandler->OnTargetData(m_pCurItem->m_pNode, m_pCurItem->m_eNode, - CFX_ByteStringC(m_pszName, m_iNameLength), - m_dwNodePos); - } else if (m_pCurItem->m_eNode == FX_SAXNODE_Comment) { - m_pHandler->OnTargetData(m_pCurItem->m_pNode, m_pCurItem->m_eNode, - CFX_ByteStringC(m_pszData, m_iDataLength), - m_dwNodePos); - } -} - -void CFX_SAXReader::SkipCurrentNode() { - if (!m_pCurItem) - return; - - m_pCurItem->m_bSkip = TRUE; -} - -void CFX_SAXReader::SetHandler(CXFA_SAXReaderHandler* pHandler) { - m_pHandler = pHandler; -} diff --git a/xfa/fgas/xml/fgas_sax.h b/xfa/fgas/xml/fgas_sax.h deleted file mode 100644 index 92440c3b59..0000000000 --- a/xfa/fgas/xml/fgas_sax.h +++ /dev/null @@ -1,162 +0,0 @@ -// Copyright 2014 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#ifndef XFA_FGAS_XML_FGAS_SAX_H_ -#define XFA_FGAS_XML_FGAS_SAX_H_ - -#include "core/fxcrt/include/fx_basic.h" - -#define FX_SAXPARSEMODE_NotConvert_amp 0x0001 -#define FX_SAXPARSEMODE_NotConvert_lt 0x0002 -#define FX_SAXPARSEMODE_NotConvert_gt 0x0004 -#define FX_SAXPARSEMODE_NotConvert_apos 0x0008 -#define FX_SAXPARSEMODE_NotConvert_quot 0x0010 -#define FX_SAXPARSEMODE_NotConvert_sharp 0x0020 -#define FX_SAXPARSEMODE_NotSkipSpace 0x0100 - -enum FX_SAXNODE { - FX_SAXNODE_Unknown = 0, - FX_SAXNODE_Instruction, - FX_SAXNODE_Declaration, - FX_SAXNODE_Comment, - FX_SAXNODE_Tag, - FX_SAXNODE_Text, - FX_SAXNODE_CharData, -}; - -enum FX_SAXMODE { - FX_SAXMODE_Text = 0, - FX_SAXMODE_NodeStart, - FX_SAXMODE_DeclOrComment, - FX_SAXMODE_DeclNode, - FX_SAXMODE_Comment, - FX_SAXMODE_CommentContent, - FX_SAXMODE_TagName, - FX_SAXMODE_TagAttributeName, - FX_SAXMODE_TagAttributeEqual, - FX_SAXMODE_TagAttributeValue, - FX_SAXMODE_TagMaybeClose, - FX_SAXMODE_TagClose, - FX_SAXMODE_TagEnd, - FX_SAXMODE_TargetData, - FX_SAXMODE_MAX, -}; - -class CXFA_SAXReaderHandler; - -class CFX_SAXFile { - public: - CFX_SAXFile(); - FX_BOOL StartFile(IFX_FileRead* pFile, uint32_t dwStart, uint32_t dwLen); - FX_BOOL ReadNextBlock(); - void Reset(); - IFX_FileRead* m_pFile; - uint32_t m_dwStart; - uint32_t m_dwEnd; - uint32_t m_dwCur; - uint8_t* m_pBuf; - uint32_t m_dwBufSize; - uint32_t m_dwBufIndex; -}; - -class CFX_SAXItem { - public: - CFX_SAXItem() - : m_pNode(NULL), - m_eNode(FX_SAXNODE_Unknown), - m_dwID(0), - m_bSkip(FALSE), - m_pPrev(NULL), - m_pNext(NULL) {} - void* m_pNode; - FX_SAXNODE m_eNode; - uint32_t m_dwID; - FX_BOOL m_bSkip; - CFX_SAXItem* m_pPrev; - CFX_SAXItem* m_pNext; -}; - -class CFX_SAXCommentContext { - public: - CFX_SAXCommentContext() : m_iHeaderCount(0), m_iTailCount(0) {} - int32_t m_iHeaderCount; - int32_t m_iTailCount; -}; - -class CFX_SAXReader { - public: - CFX_SAXReader(); - ~CFX_SAXReader(); - - int32_t StartParse(IFX_FileRead* pFile, - uint32_t dwStart = 0, - uint32_t dwLen = -1, - uint32_t dwParseMode = 0); - int32_t ContinueParse(IFX_Pause* pPause = NULL); - void SkipCurrentNode(); - void SetHandler(CXFA_SAXReaderHandler* pHandler); - void AppendData(uint8_t ch); - void AppendName(uint8_t ch); - void ParseText(); - void ParseNodeStart(); - void ParseInstruction(); - void ParseDeclOrComment(); - void ParseDeclNode(); - void ParseComment(); - void ParseCommentContent(); - void ParseTagName(); - void ParseTagAttributeName(); - void ParseTagAttributeEqual(); - void ParseTagAttributeValue(); - void ParseMaybeClose(); - void ParseTagClose(); - void ParseTagEnd(); - void ParseTargetData(); - - protected: - void Reset(); - void Push(); - void Pop(); - FX_BOOL SkipSpace(uint8_t ch); - void SkipNode(); - void NotifyData(); - void NotifyEnter(); - void NotifyAttribute(); - void NotifyBreak(); - void NotifyClose(); - void NotifyEnd(); - void NotifyTargetData(); - void ReallocDataBuffer(); - void ReallocNameBuffer(); - void ParseChar(uint8_t ch); - - CFX_SAXFile m_File; - CXFA_SAXReaderHandler* m_pHandler; - int32_t m_iState; - CFX_SAXItem* m_pRoot; - CFX_SAXItem* m_pCurItem; - uint32_t m_dwItemID; - FX_SAXMODE m_eMode; - FX_SAXMODE m_ePrevMode; - FX_BOOL m_bCharData; - uint8_t m_CurByte; - uint32_t m_dwDataOffset; - CFX_ByteArray m_SkipStack; - uint8_t m_SkipChar; - uint32_t m_dwNodePos; - uint8_t* m_pszData; - int32_t m_iDataSize; - int32_t m_iDataLength; - int32_t m_iEntityStart; - int32_t m_iDataPos; - uint8_t* m_pszName; - int32_t m_iNameSize; - int32_t m_iNameLength; - uint32_t m_dwParseMode; - CFX_SAXCommentContext* m_pCommentContext; -}; - -#endif // XFA_FGAS_XML_FGAS_SAX_H_ diff --git a/xfa/fxfa/app/xfa_checksum.cpp b/xfa/fxfa/app/xfa_checksum.cpp index b5d9fdeb7d..b279dc9596 100644 --- a/xfa/fxfa/app/xfa_checksum.cpp +++ b/xfa/fxfa/app/xfa_checksum.cpp @@ -97,16 +97,17 @@ CXFA_SAXReaderHandler::CXFA_SAXReaderHandler(CXFA_ChecksumContext* pContext) } CXFA_SAXReaderHandler::~CXFA_SAXReaderHandler() {} void* CXFA_SAXReaderHandler::OnTagEnter(const CFX_ByteStringC& bsTagName, - FX_SAXNODE eType, + CFX_SAXItem::Type eType, uint32_t dwStartPos) { UpdateChecksum(TRUE); - if (eType != FX_SAXNODE_Tag && eType != FX_SAXNODE_Instruction) { + if (eType != CFX_SAXItem::Type::Tag && + eType != CFX_SAXItem::Type::Instruction) { return NULL; } m_SAXContext.m_eNode = eType; CFX_ByteTextBuf& textBuf = m_SAXContext.m_TextBuf; textBuf << "<"; - if (eType == FX_SAXNODE_Instruction) { + if (eType == CFX_SAXItem::Type::Instruction) { textBuf << "?"; } textBuf << bsTagName; @@ -131,18 +132,18 @@ void CXFA_SAXReaderHandler::OnTagBreak(void* pTag) { UpdateChecksum(FALSE); } void CXFA_SAXReaderHandler::OnTagData(void* pTag, - FX_SAXNODE eType, + CFX_SAXItem::Type eType, const CFX_ByteStringC& bsData, uint32_t dwStartPos) { if (pTag == NULL) { return; } CFX_ByteTextBuf& textBuf = ((CXFA_SAXContext*)pTag)->m_TextBuf; - if (eType == FX_SAXNODE_CharData) { + if (eType == CFX_SAXItem::Type::CharData) { textBuf << ""; } } @@ -152,9 +153,9 @@ void CXFA_SAXReaderHandler::OnTagClose(void* pTag, uint32_t dwEndPos) { } CXFA_SAXContext* pSAXContext = (CXFA_SAXContext*)pTag; CFX_ByteTextBuf& textBuf = pSAXContext->m_TextBuf; - if (pSAXContext->m_eNode == FX_SAXNODE_Instruction) { + if (pSAXContext->m_eNode == CFX_SAXItem::Type::Instruction) { textBuf << "?>"; - } else if (pSAXContext->m_eNode == FX_SAXNODE_Tag) { + } else if (pSAXContext->m_eNode == CFX_SAXItem::Type::Tag) { textBuf << ">m_bsTagName.AsStringC() << ">"; } UpdateChecksum(FALSE); @@ -170,13 +171,13 @@ void CXFA_SAXReaderHandler::OnTagEnd(void* pTag, UpdateChecksum(FALSE); } void CXFA_SAXReaderHandler::OnTargetData(void* pTag, - FX_SAXNODE eType, + CFX_SAXItem::Type eType, const CFX_ByteStringC& bsData, uint32_t dwStartPos) { - if (pTag == NULL && eType != FX_SAXNODE_Comment) { + if (pTag == NULL && eType != CFX_SAXItem::Type::Comment) { return; } - if (eType == FX_SAXNODE_Comment) { + if (eType == CFX_SAXItem::Type::Comment) { CFX_ByteTextBuf& textBuf = m_SAXContext.m_TextBuf; textBuf << ""; UpdateChecksum(FALSE); @@ -234,9 +235,9 @@ FX_BOOL CXFA_ChecksumContext::UpdateChecksum(IFX_FileRead* pSrcFile, m_pSAXReader->SetHandler(&handler); if (m_pSAXReader->StartParse( pSrcFile, (uint32_t)offset, (uint32_t)size, - FX_SAXPARSEMODE_NotSkipSpace | FX_SAXPARSEMODE_NotConvert_amp | - FX_SAXPARSEMODE_NotConvert_lt | FX_SAXPARSEMODE_NotConvert_gt | - FX_SAXPARSEMODE_NotConvert_sharp) < 0) { + CFX_SaxParseMode_NotSkipSpace | CFX_SaxParseMode_NotConvert_amp | + CFX_SaxParseMode_NotConvert_lt | CFX_SaxParseMode_NotConvert_gt | + CFX_SaxParseMode_NotConvert_sharp) < 0) { return FALSE; } return m_pSAXReader->ContinueParse(NULL) > 99; diff --git a/xfa/fxfa/include/xfa_checksum.h b/xfa/fxfa/include/xfa_checksum.h index f6b13e7cb7..a5e5592cec 100644 --- a/xfa/fxfa/include/xfa_checksum.h +++ b/xfa/fxfa/include/xfa_checksum.h @@ -7,7 +7,7 @@ #ifndef XFA_FXFA_INCLUDE_XFA_CHECKSUM_H_ #define XFA_FXFA_INCLUDE_XFA_CHECKSUM_H_ -#include "xfa/fgas/xml/fgas_sax.h" +#include "xfa/fde/xml/cfx_saxreader.h" #include "xfa/fxfa/include/fxfa.h" class CXFA_SAXReaderHandler; @@ -15,11 +15,11 @@ class CXFA_ChecksumContext; class CXFA_SAXContext { public: - CXFA_SAXContext() : m_eNode(FX_SAXNODE_Unknown) {} + CXFA_SAXContext() : m_eNode(CFX_SAXItem::Type::Unknown) {} CFX_ByteTextBuf m_TextBuf; CFX_ByteString m_bsTagName; - FX_SAXNODE m_eNode; + CFX_SAXItem::Type m_eNode; }; class CXFA_SAXReaderHandler { @@ -28,14 +28,14 @@ class CXFA_SAXReaderHandler { ~CXFA_SAXReaderHandler(); void* OnTagEnter(const CFX_ByteStringC& bsTagName, - FX_SAXNODE eType, + CFX_SAXItem::Type eType, uint32_t dwStartPos); void OnTagAttribute(void* pTag, const CFX_ByteStringC& bsAttri, const CFX_ByteStringC& bsValue); void OnTagBreak(void* pTag); void OnTagData(void* pTag, - FX_SAXNODE eType, + CFX_SAXItem::Type eType, const CFX_ByteStringC& bsData, uint32_t dwStartPos); void OnTagClose(void* pTag, uint32_t dwEndPos); @@ -44,7 +44,7 @@ class CXFA_SAXReaderHandler { uint32_t dwEndPos); void OnTargetData(void* pTag, - FX_SAXNODE eType, + CFX_SAXItem::Type eType, const CFX_ByteStringC& bsData, uint32_t dwStartPos); -- cgit v1.2.3