diff options
author | Dan Sinclair <dsinclair@chromium.org> | 2017-04-19 09:19:57 -0400 |
---|---|---|
committer | Chromium commit bot <commit-bot@chromium.org> | 2017-04-19 13:33:07 +0000 |
commit | 0d86ecb08e1b2c204333b1f1f6b0b014e5b2971c (patch) | |
tree | f816429f8581c16a60773eb23385dc8e55729bac /xfa/fde/xml | |
parent | 3b71d26f092ebc86ca9177fbbe89d83caa67ae1b (diff) | |
download | pdfium-0d86ecb08e1b2c204333b1f1f6b0b014e5b2971c.tar.xz |
Move fde XML parser to core
This CL moves the XML parser from FDE into FXCRT and renames to CFX_
from CFDE_.
Change-Id: I21a9590bf74daf5517df630d7e7a5de89da99ea4
Reviewed-on: https://pdfium-review.googlesource.com/4312
Commit-Queue: dsinclair <dsinclair@chromium.org>
Reviewed-by: Tom Sepez <tsepez@chromium.org>
Reviewed-by: Nicolás Peña <npm@chromium.org>
Diffstat (limited to 'xfa/fde/xml')
-rw-r--r-- | xfa/fde/xml/cfde_xmlattributenode.cpp | 35 | ||||
-rw-r--r-- | xfa/fde/xml/cfde_xmlattributenode.h | 44 | ||||
-rw-r--r-- | xfa/fde/xml/cfde_xmlchardata.cpp | 22 | ||||
-rw-r--r-- | xfa/fde/xml/cfde_xmlchardata.h | 24 | ||||
-rw-r--r-- | xfa/fde/xml/cfde_xmldoc.cpp | 160 | ||||
-rw-r--r-- | xfa/fde/xml/cfde_xmldoc.h | 37 | ||||
-rw-r--r-- | xfa/fde/xml/cfde_xmlelement.cpp | 102 | ||||
-rw-r--r-- | xfa/fde/xml/cfde_xmlelement.h | 33 | ||||
-rw-r--r-- | xfa/fde/xml/cfde_xmlinstruction.cpp | 36 | ||||
-rw-r--r-- | xfa/fde/xml/cfde_xmlinstruction.h | 35 | ||||
-rw-r--r-- | xfa/fde/xml/cfde_xmlnode.cpp | 441 | ||||
-rw-r--r-- | xfa/fde/xml/cfde_xmlnode.h | 75 | ||||
-rw-r--r-- | xfa/fde/xml/cfde_xmlparser.cpp | 171 | ||||
-rw-r--r-- | xfa/fde/xml/cfde_xmlparser.h | 47 | ||||
-rw-r--r-- | xfa/fde/xml/cfde_xmlsyntaxparser.cpp | 698 | ||||
-rw-r--r-- | xfa/fde/xml/cfde_xmlsyntaxparser.h | 128 | ||||
-rw-r--r-- | xfa/fde/xml/cfde_xmlsyntaxparser_unittest.cpp | 527 | ||||
-rw-r--r-- | xfa/fde/xml/cfde_xmltext.cpp | 22 | ||||
-rw-r--r-- | xfa/fde/xml/cfde_xmltext.h | 31 |
19 files changed, 0 insertions, 2668 deletions
diff --git a/xfa/fde/xml/cfde_xmlattributenode.cpp b/xfa/fde/xml/cfde_xmlattributenode.cpp deleted file mode 100644 index 0bfa949b12..0000000000 --- a/xfa/fde/xml/cfde_xmlattributenode.cpp +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright 2017 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#include "xfa/fde/xml/cfde_xmlattributenode.h" - -#include "core/fxcrt/fx_ext.h" - -CFDE_XMLAttributeNode::CFDE_XMLAttributeNode(const CFX_WideString& name) - : CFDE_XMLNode(), name_(name) { - ASSERT(name_.GetLength() > 0); -} - -CFDE_XMLAttributeNode::~CFDE_XMLAttributeNode() {} - -bool CFDE_XMLAttributeNode::HasAttribute(const CFX_WideString& name) const { - return attrs_.find(name) != attrs_.end(); -} - -CFX_WideString CFDE_XMLAttributeNode::GetString( - const CFX_WideString& name) const { - auto it = attrs_.find(name); - return it != attrs_.end() ? it->second : CFX_WideString(); -} - -void CFDE_XMLAttributeNode::SetString(const CFX_WideString& name, - const CFX_WideString& value) { - attrs_[name] = value; -} - -void CFDE_XMLAttributeNode::RemoveAttribute(const CFX_WideString& name) { - attrs_.erase(name); -} diff --git a/xfa/fde/xml/cfde_xmlattributenode.h b/xfa/fde/xml/cfde_xmlattributenode.h deleted file mode 100644 index 07a1ef1610..0000000000 --- a/xfa/fde/xml/cfde_xmlattributenode.h +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright 2017 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#ifndef XFA_FDE_XML_CFDE_XMLATTRIBUTENODE_H_ -#define XFA_FDE_XML_CFDE_XMLATTRIBUTENODE_H_ - -#include <map> -#include <memory> - -#include "core/fxcrt/fx_string.h" -#include "xfa/fde/xml/cfde_xmlnode.h" - -class CFDE_XMLAttributeNode : public CFDE_XMLNode { - public: - explicit CFDE_XMLAttributeNode(const CFX_WideString& name); - ~CFDE_XMLAttributeNode() override; - - // CFDE_XMLNode - FDE_XMLNODETYPE GetType() const override = 0; - std::unique_ptr<CFDE_XMLNode> Clone() override = 0; - - CFX_WideString GetName() const { return name_; } - const std::map<CFX_WideString, CFX_WideString>& GetAttributes() const { - return attrs_; - } - void SetAttributes(const std::map<CFX_WideString, CFX_WideString>& attrs) { - attrs_ = attrs; - } - bool HasAttribute(const CFX_WideString& name) const; - - void SetString(const CFX_WideString& name, const CFX_WideString& value); - CFX_WideString GetString(const CFX_WideString& name) const; - - void RemoveAttribute(const CFX_WideString& name); - - private: - CFX_WideString name_; - std::map<CFX_WideString, CFX_WideString> attrs_; -}; - -#endif // XFA_FDE_XML_CFDE_XMLATTRIBUTENODE_H_ diff --git a/xfa/fde/xml/cfde_xmlchardata.cpp b/xfa/fde/xml/cfde_xmlchardata.cpp deleted file mode 100644 index bb2991937b..0000000000 --- a/xfa/fde/xml/cfde_xmlchardata.cpp +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright 2017 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#include "xfa/fde/xml/cfde_xmlchardata.h" - -#include "third_party/base/ptr_util.h" - -CFDE_XMLCharData::CFDE_XMLCharData(const CFX_WideString& wsCData) - : CFDE_XMLText(wsCData) {} - -CFDE_XMLCharData::~CFDE_XMLCharData() {} - -FDE_XMLNODETYPE CFDE_XMLCharData::GetType() const { - return FDE_XMLNODE_CharData; -} - -std::unique_ptr<CFDE_XMLNode> CFDE_XMLCharData::Clone() { - return pdfium::MakeUnique<CFDE_XMLCharData>(GetText()); -} diff --git a/xfa/fde/xml/cfde_xmlchardata.h b/xfa/fde/xml/cfde_xmlchardata.h deleted file mode 100644 index 56babbc8b4..0000000000 --- a/xfa/fde/xml/cfde_xmlchardata.h +++ /dev/null @@ -1,24 +0,0 @@ -// Copyright 2017 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#ifndef XFA_FDE_XML_CFDE_XMLCHARDATA_H_ -#define XFA_FDE_XML_CFDE_XMLCHARDATA_H_ - -#include <memory> - -#include "core/fxcrt/fx_string.h" -#include "xfa/fde/xml/cfde_xmltext.h" - -class CFDE_XMLCharData : public CFDE_XMLText { - public: - explicit CFDE_XMLCharData(const CFX_WideString& wsCData); - ~CFDE_XMLCharData() override; - - FDE_XMLNODETYPE GetType() const override; - std::unique_ptr<CFDE_XMLNode> Clone() override; -}; - -#endif // XFA_FDE_XML_CFDE_XMLCHARDATA_H_ diff --git a/xfa/fde/xml/cfde_xmldoc.cpp b/xfa/fde/xml/cfde_xmldoc.cpp deleted file mode 100644 index fa44472098..0000000000 --- a/xfa/fde/xml/cfde_xmldoc.cpp +++ /dev/null @@ -1,160 +0,0 @@ -// Copyright 2017 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#include "xfa/fde/xml/cfde_xmldoc.h" - -#include <utility> -#include <vector> - -#include "core/fxcrt/fx_codepage.h" -#include "third_party/base/ptr_util.h" -#include "third_party/base/stl_util.h" -#include "xfa/fde/xml/cfde_xmlchardata.h" -#include "xfa/fde/xml/cfde_xmlelement.h" -#include "xfa/fde/xml/cfde_xmlinstruction.h" -#include "xfa/fde/xml/cfde_xmlnode.h" -#include "xfa/fde/xml/cfde_xmltext.h" - -CFDE_XMLDoc::CFDE_XMLDoc() - : m_iStatus(0), m_pRoot(pdfium::MakeUnique<CFDE_XMLNode>()) { - m_pRoot->InsertChildNode(new CFDE_XMLInstruction(L"xml")); -} - -CFDE_XMLDoc::~CFDE_XMLDoc() {} - -bool CFDE_XMLDoc::LoadXML(std::unique_ptr<CFDE_XMLParser> pXMLParser) { - if (!pXMLParser) - return false; - - m_iStatus = 0; - m_pStream.Reset(); - m_pRoot->DeleteChildren(); - m_pXMLParser = std::move(pXMLParser); - return true; -} - -int32_t CFDE_XMLDoc::DoLoad(IFX_Pause* pPause) { - if (m_iStatus < 100) - m_iStatus = m_pXMLParser->DoParser(pPause); - - return m_iStatus; -} - -void CFDE_XMLDoc::CloseXML() { - m_pXMLParser.reset(); -} - -void CFDE_XMLDoc::SaveXMLNode( - const CFX_RetainPtr<CFX_SeekableStreamProxy>& pXMLStream, - CFDE_XMLNode* pINode) { - CFDE_XMLNode* pNode = (CFDE_XMLNode*)pINode; - switch (pNode->GetType()) { - case FDE_XMLNODE_Instruction: { - CFX_WideString ws; - CFDE_XMLInstruction* pInstruction = (CFDE_XMLInstruction*)pNode; - if (pInstruction->GetName().CompareNoCase(L"xml") == 0) { - ws = L"<?xml version=\"1.0\" encoding=\""; - uint16_t wCodePage = pXMLStream->GetCodePage(); - if (wCodePage == FX_CODEPAGE_UTF16LE) { - ws += L"UTF-16"; - } else if (wCodePage == FX_CODEPAGE_UTF16BE) { - ws += L"UTF-16be"; - } else { - ws += L"UTF-8"; - } - ws += L"\"?>"; - pXMLStream->WriteString(ws.AsStringC()); - } else { - ws.Format(L"<?%s", pInstruction->GetName().c_str()); - pXMLStream->WriteString(ws.AsStringC()); - - for (auto it : pInstruction->GetAttributes()) { - CFX_WideString wsValue = it.second; - wsValue.Replace(L"&", L"&"); - wsValue.Replace(L"<", L"<"); - wsValue.Replace(L">", L">"); - wsValue.Replace(L"\'", L"'"); - wsValue.Replace(L"\"", L"""); - - ws = L" "; - ws += it.first; - ws += L"=\""; - ws += wsValue; - ws += L"\""; - pXMLStream->WriteString(ws.AsStringC()); - } - - for (auto target : pInstruction->GetTargetData()) { - ws = L" \""; - ws += target; - ws += L"\""; - pXMLStream->WriteString(ws.AsStringC()); - } - ws = L"?>"; - pXMLStream->WriteString(ws.AsStringC()); - } - break; - } - case FDE_XMLNODE_Element: { - CFX_WideString ws; - ws = L"<"; - ws += static_cast<CFDE_XMLElement*>(pNode)->GetName(); - pXMLStream->WriteString(ws.AsStringC()); - - for (auto it : static_cast<CFDE_XMLElement*>(pNode)->GetAttributes()) { - CFX_WideString wsValue = it.second; - wsValue.Replace(L"&", L"&"); - wsValue.Replace(L"<", L"<"); - wsValue.Replace(L">", L">"); - wsValue.Replace(L"\'", L"'"); - wsValue.Replace(L"\"", L"""); - - ws = L" "; - ws += it.first; - ws += L"=\""; - ws += wsValue; - ws += L"\""; - pXMLStream->WriteString(ws.AsStringC()); - } - if (pNode->m_pChild) { - ws = L"\n>"; - pXMLStream->WriteString(ws.AsStringC()); - CFDE_XMLNode* pChild = pNode->m_pChild; - while (pChild) { - SaveXMLNode(pXMLStream, static_cast<CFDE_XMLNode*>(pChild)); - pChild = pChild->m_pNext; - } - ws = L"</"; - ws += static_cast<CFDE_XMLElement*>(pNode)->GetName(); - ws += L"\n>"; - } else { - ws = L"\n/>"; - } - pXMLStream->WriteString(ws.AsStringC()); - break; - } - case FDE_XMLNODE_Text: { - CFX_WideString ws = static_cast<CFDE_XMLText*>(pNode)->GetText(); - ws.Replace(L"&", L"&"); - ws.Replace(L"<", L"<"); - ws.Replace(L">", L">"); - ws.Replace(L"\'", L"'"); - ws.Replace(L"\"", L"""); - pXMLStream->WriteString(ws.AsStringC()); - break; - } - case FDE_XMLNODE_CharData: { - CFX_WideString ws = L"<![CDATA["; - ws += static_cast<CFDE_XMLCharData*>(pNode)->GetText(); - ws += L"]]>"; - pXMLStream->WriteString(ws.AsStringC()); - break; - } - case FDE_XMLNODE_Unknown: - default: - break; - } -} diff --git a/xfa/fde/xml/cfde_xmldoc.h b/xfa/fde/xml/cfde_xmldoc.h deleted file mode 100644 index c9c7db41b7..0000000000 --- a/xfa/fde/xml/cfde_xmldoc.h +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright 2017 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#ifndef XFA_FDE_XML_CFDE_XMLDOC_H_ -#define XFA_FDE_XML_CFDE_XMLDOC_H_ - -#include <memory> - -#include "core/fxcrt/cfx_retain_ptr.h" -#include "core/fxcrt/cfx_seekablestreamproxy.h" -#include "xfa/fde/xml/cfde_xmlnode.h" -#include "xfa/fde/xml/cfde_xmlparser.h" - -class CFDE_XMLDoc { - public: - CFDE_XMLDoc(); - ~CFDE_XMLDoc(); - - bool LoadXML(std::unique_ptr<CFDE_XMLParser> pXMLParser); - int32_t DoLoad(IFX_Pause* pPause); - void CloseXML(); - - CFDE_XMLNode* GetRoot() const { return m_pRoot.get(); } - void SaveXMLNode(const CFX_RetainPtr<CFX_SeekableStreamProxy>& pXMLStream, - CFDE_XMLNode* pNode); - - private: - int32_t m_iStatus; - std::unique_ptr<CFDE_XMLNode> m_pRoot; - std::unique_ptr<CFDE_XMLParser> m_pXMLParser; - CFX_RetainPtr<CFX_SeekableStreamProxy> m_pStream; -}; - -#endif // XFA_FDE_XML_CFDE_XMLDOC_H_ diff --git a/xfa/fde/xml/cfde_xmlelement.cpp b/xfa/fde/xml/cfde_xmlelement.cpp deleted file mode 100644 index 560cf89280..0000000000 --- a/xfa/fde/xml/cfde_xmlelement.cpp +++ /dev/null @@ -1,102 +0,0 @@ -// Copyright 2017 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#include "xfa/fde/xml/cfde_xmlelement.h" - -#include "core/fxcrt/fx_ext.h" -#include "third_party/base/ptr_util.h" -#include "third_party/base/stl_util.h" -#include "xfa/fde/xml/cfde_xmlchardata.h" -#include "xfa/fde/xml/cfde_xmltext.h" - -CFDE_XMLElement::CFDE_XMLElement(const CFX_WideString& wsTag) - : CFDE_XMLAttributeNode(wsTag) {} - -CFDE_XMLElement::~CFDE_XMLElement() {} - -FDE_XMLNODETYPE CFDE_XMLElement::GetType() const { - return FDE_XMLNODE_Element; -} - -std::unique_ptr<CFDE_XMLNode> CFDE_XMLElement::Clone() { - auto pClone = pdfium::MakeUnique<CFDE_XMLElement>(GetName()); - pClone->SetAttributes(GetAttributes()); - - CFX_WideString wsText; - CFDE_XMLNode* pChild = m_pChild; - while (pChild) { - switch (pChild->GetType()) { - case FDE_XMLNODE_Text: - wsText += static_cast<CFDE_XMLText*>(pChild)->GetText(); - break; - default: - break; - } - pChild = pChild->m_pNext; - } - pClone->SetTextData(wsText); - return pClone; -} - -CFX_WideString CFDE_XMLElement::GetLocalTagName() const { - FX_STRSIZE iFind = GetName().Find(L':', 0); - if (iFind < 0) - return GetName(); - return GetName().Right(GetName().GetLength() - iFind - 1); -} - -CFX_WideString CFDE_XMLElement::GetNamespacePrefix() const { - FX_STRSIZE iFind = GetName().Find(L':', 0); - if (iFind < 0) - return CFX_WideString(); - return GetName().Left(iFind); -} - -CFX_WideString CFDE_XMLElement::GetNamespaceURI() const { - CFX_WideString wsAttri(L"xmlns"); - CFX_WideString wsPrefix = GetNamespacePrefix(); - if (wsPrefix.GetLength() > 0) { - wsAttri += L":"; - wsAttri += wsPrefix; - } - - auto* pNode = static_cast<const CFDE_XMLNode*>(this); - while (pNode) { - if (pNode->GetType() != FDE_XMLNODE_Element) - break; - - auto* pElement = static_cast<const CFDE_XMLElement*>(pNode); - if (!pElement->HasAttribute(wsAttri)) { - pNode = pNode->GetNodeItem(CFDE_XMLNode::Parent); - continue; - } - return pElement->GetString(wsAttri); - } - return CFX_WideString(); -} - -CFX_WideString CFDE_XMLElement::GetTextData() const { - CFX_WideTextBuf buffer; - CFDE_XMLNode* pChild = m_pChild; - while (pChild) { - switch (pChild->GetType()) { - case FDE_XMLNODE_Text: - case FDE_XMLNODE_CharData: - buffer << static_cast<CFDE_XMLText*>(pChild)->GetText(); - break; - default: - break; - } - pChild = pChild->m_pNext; - } - return buffer.MakeString(); -} - -void CFDE_XMLElement::SetTextData(const CFX_WideString& wsText) { - if (wsText.GetLength() < 1) - return; - InsertChildNode(new CFDE_XMLText(wsText)); -} diff --git a/xfa/fde/xml/cfde_xmlelement.h b/xfa/fde/xml/cfde_xmlelement.h deleted file mode 100644 index a891ce8928..0000000000 --- a/xfa/fde/xml/cfde_xmlelement.h +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright 2017 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#ifndef XFA_FDE_XML_CFDE_XMLELEMENT_H_ -#define XFA_FDE_XML_CFDE_XMLELEMENT_H_ - -#include <memory> -#include <vector> - -#include "core/fxcrt/fx_string.h" -#include "xfa/fde/xml/cfde_xmlattributenode.h" - -class CFDE_XMLElement : public CFDE_XMLAttributeNode { - public: - explicit CFDE_XMLElement(const CFX_WideString& wsTag); - ~CFDE_XMLElement() override; - - // CFDE_XMLNode - FDE_XMLNODETYPE GetType() const override; - std::unique_ptr<CFDE_XMLNode> Clone() override; - - CFX_WideString GetLocalTagName() const; - CFX_WideString GetNamespacePrefix() const; - CFX_WideString GetNamespaceURI() const; - - CFX_WideString GetTextData() const; - void SetTextData(const CFX_WideString& wsText); -}; - -#endif // XFA_FDE_XML_CFDE_XMLELEMENT_H_ diff --git a/xfa/fde/xml/cfde_xmlinstruction.cpp b/xfa/fde/xml/cfde_xmlinstruction.cpp deleted file mode 100644 index d289d9e88a..0000000000 --- a/xfa/fde/xml/cfde_xmlinstruction.cpp +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright 2017 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#include "xfa/fde/xml/cfde_xmlinstruction.h" - -#include "core/fxcrt/fx_ext.h" -#include "third_party/base/ptr_util.h" -#include "third_party/base/stl_util.h" - -CFDE_XMLInstruction::CFDE_XMLInstruction(const CFX_WideString& wsTarget) - : CFDE_XMLAttributeNode(wsTarget) {} - -CFDE_XMLInstruction::~CFDE_XMLInstruction() {} - -FDE_XMLNODETYPE CFDE_XMLInstruction::GetType() const { - return FDE_XMLNODE_Instruction; -} - -std::unique_ptr<CFDE_XMLNode> CFDE_XMLInstruction::Clone() { - auto pClone = pdfium::MakeUnique<CFDE_XMLInstruction>(GetName()); - pClone->SetAttributes(GetAttributes()); - pClone->m_TargetData = m_TargetData; - return pClone; -} - -void CFDE_XMLInstruction::AppendData(const CFX_WideString& wsData) { - m_TargetData.push_back(wsData); -} - -void CFDE_XMLInstruction::RemoveData(int32_t index) { - if (pdfium::IndexInBounds(m_TargetData, index)) - m_TargetData.erase(m_TargetData.begin() + index); -} diff --git a/xfa/fde/xml/cfde_xmlinstruction.h b/xfa/fde/xml/cfde_xmlinstruction.h deleted file mode 100644 index e9c4ad3dfe..0000000000 --- a/xfa/fde/xml/cfde_xmlinstruction.h +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright 2017 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#ifndef XFA_FDE_XML_CFDE_XMLINSTRUCTION_H_ -#define XFA_FDE_XML_CFDE_XMLINSTRUCTION_H_ - -#include <memory> -#include <vector> - -#include "core/fxcrt/fx_string.h" -#include "xfa/fde/xml/cfde_xmlattributenode.h" - -class CFDE_XMLInstruction : public CFDE_XMLAttributeNode { - public: - explicit CFDE_XMLInstruction(const CFX_WideString& wsTarget); - ~CFDE_XMLInstruction() override; - - // CFDE_XMLNode - FDE_XMLNODETYPE GetType() const override; - std::unique_ptr<CFDE_XMLNode> Clone() override; - - const std::vector<CFX_WideString>& GetTargetData() const { - return m_TargetData; - } - void AppendData(const CFX_WideString& wsData); - void RemoveData(int32_t index); - - private: - std::vector<CFX_WideString> m_TargetData; -}; - -#endif // XFA_FDE_XML_CFDE_XMLINSTRUCTION_H_ diff --git a/xfa/fde/xml/cfde_xmlnode.cpp b/xfa/fde/xml/cfde_xmlnode.cpp deleted file mode 100644 index c81de7623a..0000000000 --- a/xfa/fde/xml/cfde_xmlnode.cpp +++ /dev/null @@ -1,441 +0,0 @@ -// Copyright 2017 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#include "xfa/fde/xml/cfde_xmlnode.h" - -#include <vector> - -#include "core/fxcrt/fx_codepage.h" -#include "third_party/base/stl_util.h" -#include "xfa/fde/xml/cfde_xmlchardata.h" -#include "xfa/fde/xml/cfde_xmlelement.h" -#include "xfa/fde/xml/cfde_xmlinstruction.h" -#include "xfa/fde/xml/cfde_xmltext.h" - -CFDE_XMLNode::CFDE_XMLNode() - : m_pParent(nullptr), - m_pChild(nullptr), - m_pPrior(nullptr), - m_pNext(nullptr) {} - -FDE_XMLNODETYPE CFDE_XMLNode::GetType() const { - return FDE_XMLNODE_Unknown; -} - -CFDE_XMLNode::~CFDE_XMLNode() { - DeleteChildren(); -} - -void CFDE_XMLNode::DeleteChildren() { - CFDE_XMLNode* pChild = m_pChild; - while (pChild) { - CFDE_XMLNode* pNext = pChild->m_pNext; - delete pChild; - pChild = pNext; - } - m_pChild = nullptr; -} - -int32_t CFDE_XMLNode::CountChildNodes() const { - int32_t iCount = 0; - CFDE_XMLNode* pChild = m_pChild; - while (pChild) { - iCount++; - pChild = pChild->m_pNext; - } - return iCount; -} - -CFDE_XMLNode* CFDE_XMLNode::GetChildNode(int32_t index) const { - CFDE_XMLNode* pChild = m_pChild; - while (pChild) { - if (index == 0) { - return pChild; - } - index--; - pChild = pChild->m_pNext; - } - return nullptr; -} - -int32_t CFDE_XMLNode::GetChildNodeIndex(CFDE_XMLNode* pNode) const { - int32_t index = 0; - CFDE_XMLNode* pChild = m_pChild; - while (pChild) { - if (pChild == pNode) { - return index; - } - index++; - pChild = pChild->m_pNext; - } - return -1; -} - -CFDE_XMLNode* CFDE_XMLNode::GetPath(const wchar_t* pPath, - int32_t iLength, - bool bQualifiedName) const { - ASSERT(pPath); - if (iLength < 0) { - iLength = FXSYS_wcslen(pPath); - } - if (iLength == 0) { - return nullptr; - } - CFX_WideString csPath; - const wchar_t* pStart = pPath; - const wchar_t* pEnd = pPath + iLength; - wchar_t ch; - while (pStart < pEnd) { - ch = *pStart++; - if (ch == L'/') { - break; - } else { - csPath += ch; - } - } - iLength -= pStart - pPath; - CFDE_XMLNode* pFind = nullptr; - if (csPath.GetLength() < 1) { - pFind = GetNodeItem(CFDE_XMLNode::Root); - } else if (csPath.Compare(L"..") == 0) { - pFind = m_pParent; - } else if (csPath.Compare(L".") == 0) { - pFind = (CFDE_XMLNode*)this; - } else { - CFX_WideString wsTag; - CFDE_XMLNode* pNode = m_pChild; - while (pNode) { - if (pNode->GetType() == FDE_XMLNODE_Element) { - if (bQualifiedName) - wsTag = static_cast<CFDE_XMLElement*>(pNode)->GetName(); - else - wsTag = static_cast<CFDE_XMLElement*>(pNode)->GetLocalTagName(); - - if (wsTag.Compare(csPath) == 0) { - if (iLength < 1) - pFind = pNode; - else - pFind = pNode->GetPath(pStart, iLength, bQualifiedName); - - if (pFind) - return pFind; - } - } - pNode = pNode->m_pNext; - } - } - if (!pFind || iLength < 1) - return pFind; - return pFind->GetPath(pStart, iLength, bQualifiedName); -} - -int32_t CFDE_XMLNode::InsertChildNode(CFDE_XMLNode* pNode, int32_t index) { - pNode->m_pParent = this; - if (!m_pChild) { - m_pChild = pNode; - pNode->m_pPrior = nullptr; - pNode->m_pNext = nullptr; - return 0; - } - if (index == 0) { - pNode->m_pNext = m_pChild; - pNode->m_pPrior = nullptr; - m_pChild->m_pPrior = pNode; - m_pChild = pNode; - return 0; - } - int32_t iCount = 0; - CFDE_XMLNode* pFind = m_pChild; - while (++iCount != index && pFind->m_pNext) { - pFind = pFind->m_pNext; - } - pNode->m_pPrior = pFind; - pNode->m_pNext = pFind->m_pNext; - if (pFind->m_pNext) - pFind->m_pNext->m_pPrior = pNode; - pFind->m_pNext = pNode; - return iCount; -} - -void CFDE_XMLNode::RemoveChildNode(CFDE_XMLNode* pNode) { - ASSERT(m_pChild && pNode); - if (m_pChild == pNode) { - m_pChild = pNode->m_pNext; - } else { - pNode->m_pPrior->m_pNext = pNode->m_pNext; - } - if (pNode->m_pNext) - pNode->m_pNext->m_pPrior = pNode->m_pPrior; - pNode->m_pParent = nullptr; - pNode->m_pNext = nullptr; - pNode->m_pPrior = nullptr; -} - -CFDE_XMLNode* CFDE_XMLNode::GetNodeItem(CFDE_XMLNode::NodeItem eItem) const { - switch (eItem) { - case CFDE_XMLNode::Root: { - CFDE_XMLNode* pParent = (CFDE_XMLNode*)this; - while (pParent->m_pParent) { - pParent = pParent->m_pParent; - } - return pParent; - } - case CFDE_XMLNode::Parent: - return m_pParent; - case CFDE_XMLNode::FirstSibling: { - CFDE_XMLNode* pItem = (CFDE_XMLNode*)this; - while (pItem->m_pPrior) { - pItem = pItem->m_pPrior; - } - return pItem == (CFDE_XMLNode*)this ? nullptr : pItem; - } - case CFDE_XMLNode::PriorSibling: - return m_pPrior; - case CFDE_XMLNode::NextSibling: - return m_pNext; - case CFDE_XMLNode::LastSibling: { - CFDE_XMLNode* pItem = (CFDE_XMLNode*)this; - while (pItem->m_pNext) - pItem = pItem->m_pNext; - return pItem == (CFDE_XMLNode*)this ? nullptr : pItem; - } - case CFDE_XMLNode::FirstNeighbor: { - CFDE_XMLNode* pParent = (CFDE_XMLNode*)this; - while (pParent->m_pParent) - pParent = pParent->m_pParent; - return pParent == (CFDE_XMLNode*)this ? nullptr : pParent; - } - case CFDE_XMLNode::PriorNeighbor: { - if (!m_pPrior) - return m_pParent; - - CFDE_XMLNode* pItem = m_pPrior; - while (pItem->m_pChild) { - pItem = pItem->m_pChild; - while (pItem->m_pNext) - pItem = pItem->m_pNext; - } - return pItem; - } - case CFDE_XMLNode::NextNeighbor: { - if (m_pChild) - return m_pChild; - if (m_pNext) - return m_pNext; - CFDE_XMLNode* pItem = m_pParent; - while (pItem) { - if (pItem->m_pNext) - return pItem->m_pNext; - pItem = pItem->m_pParent; - } - return nullptr; - } - case CFDE_XMLNode::LastNeighbor: { - CFDE_XMLNode* pItem = (CFDE_XMLNode*)this; - while (pItem->m_pParent) { - pItem = pItem->m_pParent; - } - while (true) { - while (pItem->m_pNext) - pItem = pItem->m_pNext; - if (!pItem->m_pChild) - break; - pItem = pItem->m_pChild; - } - return pItem == (CFDE_XMLNode*)this ? nullptr : pItem; - } - case CFDE_XMLNode::FirstChild: - return m_pChild; - case CFDE_XMLNode::LastChild: { - if (!m_pChild) - return nullptr; - - CFDE_XMLNode* pChild = m_pChild; - while (pChild->m_pNext) - pChild = pChild->m_pNext; - return pChild; - } - default: - break; - } - return nullptr; -} - -int32_t CFDE_XMLNode::GetNodeLevel() const { - int32_t iLevel = 0; - const CFDE_XMLNode* pItem = m_pParent; - while (pItem) { - iLevel++; - pItem = pItem->m_pParent; - } - return iLevel; -} - -bool CFDE_XMLNode::InsertNodeItem(CFDE_XMLNode::NodeItem eItem, - CFDE_XMLNode* pNode) { - switch (eItem) { - case CFDE_XMLNode::NextSibling: { - pNode->m_pParent = m_pParent; - pNode->m_pNext = m_pNext; - pNode->m_pPrior = this; - if (m_pNext) { - m_pNext->m_pPrior = pNode; - } - m_pNext = pNode; - return true; - } - case CFDE_XMLNode::PriorSibling: { - pNode->m_pParent = m_pParent; - pNode->m_pNext = this; - pNode->m_pPrior = m_pPrior; - if (m_pPrior) { - m_pPrior->m_pNext = pNode; - } else if (m_pParent) { - m_pParent->m_pChild = pNode; - } - m_pPrior = pNode; - return true; - } - default: - return false; - } -} - -CFDE_XMLNode* CFDE_XMLNode::RemoveNodeItem(CFDE_XMLNode::NodeItem eItem) { - CFDE_XMLNode* pNode = nullptr; - switch (eItem) { - case CFDE_XMLNode::NextSibling: - if (m_pNext) { - pNode = m_pNext; - m_pNext = pNode->m_pNext; - if (m_pNext) { - m_pNext->m_pPrior = this; - } - pNode->m_pParent = nullptr; - pNode->m_pNext = nullptr; - pNode->m_pPrior = nullptr; - } - break; - default: - break; - } - return pNode; -} - -std::unique_ptr<CFDE_XMLNode> CFDE_XMLNode::Clone() { - return nullptr; -} - -void CFDE_XMLNode::SaveXMLNode( - const CFX_RetainPtr<CFX_SeekableStreamProxy>& pXMLStream) { - CFDE_XMLNode* pNode = (CFDE_XMLNode*)this; - switch (pNode->GetType()) { - case FDE_XMLNODE_Instruction: { - CFX_WideString ws; - CFDE_XMLInstruction* pInstruction = (CFDE_XMLInstruction*)pNode; - if (pInstruction->GetName().CompareNoCase(L"xml") == 0) { - ws = L"<?xml version=\"1.0\" encoding=\""; - uint16_t wCodePage = pXMLStream->GetCodePage(); - if (wCodePage == FX_CODEPAGE_UTF16LE) { - ws += L"UTF-16"; - } else if (wCodePage == FX_CODEPAGE_UTF16BE) { - ws += L"UTF-16be"; - } else { - ws += L"UTF-8"; - } - ws += L"\"?>"; - pXMLStream->WriteString(ws.AsStringC()); - } else { - ws.Format(L"<?%s", pInstruction->GetName().c_str()); - pXMLStream->WriteString(ws.AsStringC()); - - for (auto it : pInstruction->GetAttributes()) { - CFX_WideString wsValue = it.second; - wsValue.Replace(L"&", L"&"); - wsValue.Replace(L"<", L"<"); - wsValue.Replace(L">", L">"); - wsValue.Replace(L"\'", L"'"); - wsValue.Replace(L"\"", L"""); - - ws = L" "; - ws += it.first; - ws += L"=\""; - ws += wsValue; - ws += L"\""; - pXMLStream->WriteString(ws.AsStringC()); - } - - for (auto target : pInstruction->GetTargetData()) { - ws = L" \""; - ws += target; - ws += L"\""; - pXMLStream->WriteString(ws.AsStringC()); - } - ws = L"?>"; - pXMLStream->WriteString(ws.AsStringC()); - } - break; - } - case FDE_XMLNODE_Element: { - CFX_WideString ws; - ws = L"<"; - ws += static_cast<CFDE_XMLElement*>(pNode)->GetName(); - pXMLStream->WriteString(ws.AsStringC()); - - for (auto it : static_cast<CFDE_XMLElement*>(pNode)->GetAttributes()) { - CFX_WideString wsValue = it.second; - wsValue.Replace(L"&", L"&"); - wsValue.Replace(L"<", L"<"); - wsValue.Replace(L">", L">"); - wsValue.Replace(L"\'", L"'"); - wsValue.Replace(L"\"", L"""); - - ws = L" "; - ws += it.first; - ws += L"=\""; - ws += wsValue; - ws += L"\""; - pXMLStream->WriteString(ws.AsStringC()); - } - if (pNode->m_pChild) { - ws = L"\n>"; - pXMLStream->WriteString(ws.AsStringC()); - CFDE_XMLNode* pChild = pNode->m_pChild; - while (pChild) { - pChild->SaveXMLNode(pXMLStream); - pChild = pChild->m_pNext; - } - ws = L"</"; - ws += static_cast<CFDE_XMLElement*>(pNode)->GetName(); - ws += L"\n>"; - } else { - ws = L"\n/>"; - } - pXMLStream->WriteString(ws.AsStringC()); - break; - } - case FDE_XMLNODE_Text: { - CFX_WideString ws = static_cast<CFDE_XMLText*>(pNode)->GetText(); - ws.Replace(L"&", L"&"); - ws.Replace(L"<", L"<"); - ws.Replace(L">", L">"); - ws.Replace(L"\'", L"'"); - ws.Replace(L"\"", L"""); - pXMLStream->WriteString(ws.AsStringC()); - break; - } - case FDE_XMLNODE_CharData: { - CFX_WideString ws = L"<![CDATA["; - ws += static_cast<CFDE_XMLCharData*>(pNode)->GetText(); - ws += L"]]>"; - pXMLStream->WriteString(ws.AsStringC()); - break; - } - case FDE_XMLNODE_Unknown: - default: - break; - } -} diff --git a/xfa/fde/xml/cfde_xmlnode.h b/xfa/fde/xml/cfde_xmlnode.h deleted file mode 100644 index 71e1a712e0..0000000000 --- a/xfa/fde/xml/cfde_xmlnode.h +++ /dev/null @@ -1,75 +0,0 @@ -// Copyright 2017 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#ifndef XFA_FDE_XML_CFDE_XMLNODE_H_ -#define XFA_FDE_XML_CFDE_XMLNODE_H_ - -#include <memory> - -#include "core/fxcrt/cfx_retain_ptr.h" -#include "core/fxcrt/cfx_seekablestreamproxy.h" - -enum FDE_XMLNODETYPE { - FDE_XMLNODE_Unknown = 0, - FDE_XMLNODE_Instruction, - FDE_XMLNODE_Element, - FDE_XMLNODE_Text, - FDE_XMLNODE_CharData, -}; - -struct FDE_XMLNODE { - int32_t iNodeNum; - FDE_XMLNODETYPE eNodeType; -}; - -class CFDE_XMLNode { - public: - enum NodeItem { - Root = 0, - Parent, - FirstSibling, - PriorSibling, - NextSibling, - LastSibling, - FirstNeighbor, - PriorNeighbor, - NextNeighbor, - LastNeighbor, - FirstChild, - LastChild - }; - - CFDE_XMLNode(); - virtual ~CFDE_XMLNode(); - - virtual FDE_XMLNODETYPE GetType() const; - virtual std::unique_ptr<CFDE_XMLNode> Clone(); - - int32_t CountChildNodes() const; - CFDE_XMLNode* GetChildNode(int32_t index) const; - int32_t GetChildNodeIndex(CFDE_XMLNode* pNode) const; - int32_t InsertChildNode(CFDE_XMLNode* pNode, int32_t index = -1); - void RemoveChildNode(CFDE_XMLNode* pNode); - void DeleteChildren(); - - CFDE_XMLNode* GetPath(const wchar_t* pPath, - int32_t iLength = -1, - bool bQualifiedName = true) const; - - int32_t GetNodeLevel() const; - CFDE_XMLNode* GetNodeItem(CFDE_XMLNode::NodeItem eItem) const; - bool InsertNodeItem(CFDE_XMLNode::NodeItem eItem, CFDE_XMLNode* pNode); - CFDE_XMLNode* RemoveNodeItem(CFDE_XMLNode::NodeItem eItem); - - void SaveXMLNode(const CFX_RetainPtr<CFX_SeekableStreamProxy>& pXMLStream); - - CFDE_XMLNode* m_pParent; - CFDE_XMLNode* m_pChild; - CFDE_XMLNode* m_pPrior; - CFDE_XMLNode* m_pNext; -}; - -#endif // XFA_FDE_XML_CFDE_XMLNODE_H_ diff --git a/xfa/fde/xml/cfde_xmlparser.cpp b/xfa/fde/xml/cfde_xmlparser.cpp deleted file mode 100644 index c8b300f81d..0000000000 --- a/xfa/fde/xml/cfde_xmlparser.cpp +++ /dev/null @@ -1,171 +0,0 @@ -// Copyright 2016 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#include "xfa/fde/xml/cfde_xmlparser.h" - -#include "core/fxcrt/fx_basic.h" -#include "third_party/base/ptr_util.h" -#include "xfa/fde/xml/cfde_xmlchardata.h" -#include "xfa/fde/xml/cfde_xmlelement.h" -#include "xfa/fde/xml/cfde_xmlinstruction.h" -#include "xfa/fde/xml/cfde_xmlnode.h" -#include "xfa/fde/xml/cfde_xmltext.h" - -CFDE_XMLParser::CFDE_XMLParser( - CFDE_XMLNode* pParent, - const CFX_RetainPtr<CFX_SeekableStreamProxy>& pStream) - : m_nElementStart(0), - m_dwCheckStatus(0), - m_dwCurrentCheckStatus(0), - m_pStream(pStream), - m_pParser(pdfium::MakeUnique<CFDE_XMLSyntaxParser>(m_pStream)), - m_pParent(pParent), - m_pChild(nullptr), - m_syntaxParserResult(FDE_XmlSyntaxResult::None) { - ASSERT(m_pParent && m_pStream); - m_NodeStack.push(m_pParent); -} - -CFDE_XMLParser::~CFDE_XMLParser() {} - -int32_t CFDE_XMLParser::DoParser(IFX_Pause* pPause) { - if (m_syntaxParserResult == FDE_XmlSyntaxResult::Error) - return -1; - if (m_syntaxParserResult == FDE_XmlSyntaxResult::EndOfString) - return 100; - - int32_t iCount = 0; - while (true) { - m_syntaxParserResult = m_pParser->DoSyntaxParse(); - switch (m_syntaxParserResult) { - case FDE_XmlSyntaxResult::InstructionOpen: - break; - case FDE_XmlSyntaxResult::InstructionClose: - if (m_pChild) { - if (m_pChild->GetType() != FDE_XMLNODE_Instruction) { - m_syntaxParserResult = FDE_XmlSyntaxResult::Error; - break; - } - } - m_pChild = m_pParent; - break; - case FDE_XmlSyntaxResult::ElementOpen: - if (m_dwCheckStatus != 0x03 && m_NodeStack.size() == 2) - m_nElementStart = m_pParser->GetCurrentPos() - 1; - break; - case FDE_XmlSyntaxResult::ElementBreak: - break; - case FDE_XmlSyntaxResult::ElementClose: - if (m_pChild->GetType() != FDE_XMLNODE_Element) { - m_syntaxParserResult = FDE_XmlSyntaxResult::Error; - break; - } - m_ws1 = m_pParser->GetTagName(); - m_ws2 = static_cast<CFDE_XMLElement*>(m_pChild)->GetName(); - if (m_ws1.GetLength() > 0 && m_ws1 != m_ws2) { - m_syntaxParserResult = FDE_XmlSyntaxResult::Error; - break; - } - if (!m_NodeStack.empty()) - m_NodeStack.pop(); - if (m_NodeStack.empty()) { - m_syntaxParserResult = FDE_XmlSyntaxResult::Error; - break; - } else if (m_dwCurrentCheckStatus != 0 && m_NodeStack.size() == 2) { - m_nSize[m_dwCurrentCheckStatus - 1] = - m_pParser->GetCurrentBinaryPos() - - m_nStart[m_dwCurrentCheckStatus - 1]; - m_dwCurrentCheckStatus = 0; - } - m_pParent = m_NodeStack.top(); - m_pChild = m_pParent; - iCount++; - break; - case FDE_XmlSyntaxResult::TargetName: - m_ws1 = m_pParser->GetTargetName(); - if (m_ws1 == L"originalXFAVersion" || m_ws1 == L"acrobat") { - m_pChild = new CFDE_XMLInstruction(m_ws1); - m_pParent->InsertChildNode(m_pChild); - } else { - m_pChild = nullptr; - } - m_ws1.clear(); - break; - case FDE_XmlSyntaxResult::TagName: - m_ws1 = m_pParser->GetTagName(); - m_pChild = new CFDE_XMLElement(m_ws1); - m_pParent->InsertChildNode(m_pChild); - m_NodeStack.push(m_pChild); - m_pParent = m_pChild; - - if (m_dwCheckStatus != 0x03 && m_NodeStack.size() == 3) { - CFX_WideString wsTag = - static_cast<CFDE_XMLElement*>(m_pChild)->GetLocalTagName(); - if (wsTag == L"template") { - m_dwCheckStatus |= 0x01; - m_dwCurrentCheckStatus = 0x01; - m_nStart[0] = m_pParser->GetCurrentBinaryPos() - - (m_pParser->GetCurrentPos() - m_nElementStart); - } else if (wsTag == L"datasets") { - m_dwCheckStatus |= 0x02; - m_dwCurrentCheckStatus = 0x02; - m_nStart[1] = m_pParser->GetCurrentBinaryPos() - - (m_pParser->GetCurrentPos() - m_nElementStart); - } - } - break; - case FDE_XmlSyntaxResult::AttriName: - m_ws1 = m_pParser->GetAttributeName(); - break; - case FDE_XmlSyntaxResult::AttriValue: - if (m_pChild) { - m_ws2 = m_pParser->GetAttributeName(); - if (m_pChild->GetType() == FDE_XMLNODE_Element) - static_cast<CFDE_XMLElement*>(m_pChild)->SetString(m_ws1, m_ws2); - } - m_ws1.clear(); - break; - case FDE_XmlSyntaxResult::Text: - m_ws1 = m_pParser->GetTextData(); - m_pChild = new CFDE_XMLText(m_ws1); - m_pParent->InsertChildNode(m_pChild); - m_pChild = m_pParent; - break; - case FDE_XmlSyntaxResult::CData: - m_ws1 = m_pParser->GetTextData(); - m_pChild = new CFDE_XMLCharData(m_ws1); - m_pParent->InsertChildNode(m_pChild); - m_pChild = m_pParent; - break; - case FDE_XmlSyntaxResult::TargetData: - if (m_pChild) { - if (m_pChild->GetType() != FDE_XMLNODE_Instruction) { - m_syntaxParserResult = FDE_XmlSyntaxResult::Error; - break; - } - auto* instruction = static_cast<CFDE_XMLInstruction*>(m_pChild); - if (!m_ws1.IsEmpty()) - instruction->AppendData(m_ws1); - instruction->AppendData(m_pParser->GetTargetData()); - } - m_ws1.clear(); - break; - default: - break; - } - if (m_syntaxParserResult == FDE_XmlSyntaxResult::Error || - m_syntaxParserResult == FDE_XmlSyntaxResult::EndOfString) { - break; - } - if (pPause && iCount > 500 && pPause->NeedToPauseNow()) { - break; - } - } - return (m_syntaxParserResult == FDE_XmlSyntaxResult::Error || - m_NodeStack.size() != 1) - ? -1 - : m_pParser->GetStatus(); -} diff --git a/xfa/fde/xml/cfde_xmlparser.h b/xfa/fde/xml/cfde_xmlparser.h deleted file mode 100644 index 14b2127068..0000000000 --- a/xfa/fde/xml/cfde_xmlparser.h +++ /dev/null @@ -1,47 +0,0 @@ -// Copyright 2016 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#ifndef XFA_FDE_XML_CFDE_XMLPARSER_H_ -#define XFA_FDE_XML_CFDE_XMLPARSER_H_ - -#include <memory> -#include <stack> - -#include "core/fxcrt/cfx_retain_ptr.h" -#include "core/fxcrt/fx_string.h" -#include "xfa/fde/xml/cfde_xmlsyntaxparser.h" - -class CFDE_XMLElement; -class CFDE_XMLNode; -class CFX_SeekableStreamProxy; -class IFX_Pause; - -class CFDE_XMLParser { - public: - CFDE_XMLParser(CFDE_XMLNode* pParent, - const CFX_RetainPtr<CFX_SeekableStreamProxy>& pStream); - ~CFDE_XMLParser(); - - int32_t DoParser(IFX_Pause* pPause); - - FX_FILESIZE m_nStart[2]; - size_t m_nSize[2]; - FX_FILESIZE m_nElementStart; - uint16_t m_dwCheckStatus; - uint16_t m_dwCurrentCheckStatus; - - private: - CFX_RetainPtr<CFX_SeekableStreamProxy> m_pStream; - std::unique_ptr<CFDE_XMLSyntaxParser> m_pParser; - CFDE_XMLNode* m_pParent; - CFDE_XMLNode* m_pChild; - std::stack<CFDE_XMLNode*> m_NodeStack; - CFX_WideString m_ws1; - CFX_WideString m_ws2; - FDE_XmlSyntaxResult m_syntaxParserResult; -}; - -#endif // XFA_FDE_XML_CFDE_XMLPARSER_H_ diff --git a/xfa/fde/xml/cfde_xmlsyntaxparser.cpp b/xfa/fde/xml/cfde_xmlsyntaxparser.cpp deleted file mode 100644 index 5d671bb39a..0000000000 --- a/xfa/fde/xml/cfde_xmlsyntaxparser.cpp +++ /dev/null @@ -1,698 +0,0 @@ -// Copyright 2017 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#include "xfa/fde/xml/cfde_xmlsyntaxparser.h" - -#include <algorithm> - -#include "core/fxcrt/fx_ext.h" -#include "core/fxcrt/fx_safe_types.h" - -namespace { - -const uint32_t kMaxCharRange = 0x10ffff; - -bool IsXMLWhiteSpace(wchar_t ch) { - return ch == L' ' || ch == 0x0A || ch == 0x0D || ch == 0x09; -} - -struct FDE_XMLNAMECHAR { - uint16_t wStart; - uint16_t wEnd; - bool bStartChar; -}; - -const FDE_XMLNAMECHAR g_XMLNameChars[] = { - {L'-', L'.', false}, {L'0', L'9', false}, {L':', L':', false}, - {L'A', L'Z', true}, {L'_', L'_', true}, {L'a', L'z', true}, - {0xB7, 0xB7, false}, {0xC0, 0xD6, true}, {0xD8, 0xF6, true}, - {0xF8, 0x02FF, true}, {0x0300, 0x036F, false}, {0x0370, 0x037D, true}, - {0x037F, 0x1FFF, true}, {0x200C, 0x200D, true}, {0x203F, 0x2040, false}, - {0x2070, 0x218F, true}, {0x2C00, 0x2FEF, true}, {0x3001, 0xD7FF, true}, - {0xF900, 0xFDCF, true}, {0xFDF0, 0xFFFD, true}, -}; - -bool IsXMLNameChar(wchar_t ch, bool bFirstChar) { - int32_t iStart = 0; - int32_t iEnd = FX_ArraySize(g_XMLNameChars) - 1; - while (iStart <= iEnd) { - int32_t iMid = (iStart + iEnd) / 2; - if (ch < g_XMLNameChars[iMid].wStart) { - iEnd = iMid - 1; - } else if (ch > g_XMLNameChars[iMid].wEnd) { - iStart = iMid + 1; - } else { - return bFirstChar ? g_XMLNameChars[iMid].bStartChar : true; - } - } - return false; -} - -int32_t GetUTF8EncodeLength(const std::vector<wchar_t>& src, - FX_FILESIZE iSrcLen) { - uint32_t unicode = 0; - int32_t iDstNum = 0; - const wchar_t* pSrc = src.data(); - while (iSrcLen-- > 0) { - unicode = *pSrc++; - int nbytes = 0; - if ((uint32_t)unicode < 0x80) { - nbytes = 1; - } else if ((uint32_t)unicode < 0x800) { - nbytes = 2; - } else if ((uint32_t)unicode < 0x10000) { - nbytes = 3; - } else if ((uint32_t)unicode < 0x200000) { - nbytes = 4; - } else if ((uint32_t)unicode < 0x4000000) { - nbytes = 5; - } else { - nbytes = 6; - } - iDstNum += nbytes; - } - return iDstNum; -} - -} // namespace - -CFDE_XMLSyntaxParser::CFDE_XMLSyntaxParser( - const CFX_RetainPtr<CFX_SeekableStreamProxy>& pStream) - : m_pStream(pStream), - m_iXMLPlaneSize(32 * 1024), - m_iCurrentPos(0), - m_iCurrentNodeNum(-1), - m_iLastNodeNum(-1), - m_iParsedBytes(0), - m_ParsedChars(0), - m_iBufferChars(0), - m_bEOS(false), - m_Start(0), - m_End(0), - m_iAllocStep(m_BlockBuffer.GetAllocStep()), - m_pCurrentBlock(nullptr), - m_iIndexInBlock(0), - m_iTextDataLength(0), - m_syntaxParserResult(FDE_XmlSyntaxResult::None), - m_syntaxParserState(FDE_XmlSyntaxState::Text), - m_wQuotationMark(0), - m_iEntityStart(-1) { - ASSERT(pStream); - - m_CurNode.iNodeNum = -1; - m_CurNode.eNodeType = FDE_XMLNODE_Unknown; - - m_iXMLPlaneSize = - std::min(m_iXMLPlaneSize, - pdfium::base::checked_cast<FX_STRSIZE>(m_pStream->GetLength())); - m_iCurrentPos = m_pStream->GetBOMLength(); - - FX_SAFE_STRSIZE alloc_size_safe = m_iXMLPlaneSize; - alloc_size_safe += 1; // For NUL. - if (!alloc_size_safe.IsValid() || alloc_size_safe.ValueOrDie() <= 0) { - m_syntaxParserResult = FDE_XmlSyntaxResult::Error; - return; - } - - m_Buffer.resize(pdfium::base::ValueOrDieForType<size_t>(alloc_size_safe)); - - m_BlockBuffer.InitBuffer(); - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); -} - -CFDE_XMLSyntaxParser::~CFDE_XMLSyntaxParser() {} - -FDE_XmlSyntaxResult CFDE_XMLSyntaxParser::DoSyntaxParse() { - if (m_syntaxParserResult == FDE_XmlSyntaxResult::Error || - m_syntaxParserResult == FDE_XmlSyntaxResult::EndOfString) { - return m_syntaxParserResult; - } - - int32_t iStreamLength = m_pStream->GetLength(); - int32_t iPos; - - FDE_XmlSyntaxResult syntaxParserResult = FDE_XmlSyntaxResult::None; - while (true) { - if (m_Start >= m_End) { - if (m_bEOS || m_iCurrentPos >= iStreamLength) { - m_syntaxParserResult = FDE_XmlSyntaxResult::EndOfString; - return m_syntaxParserResult; - } - m_ParsedChars += m_End; - m_iParsedBytes = m_iCurrentPos; - if (m_pStream->GetPosition() != m_iCurrentPos) - m_pStream->Seek(CFX_SeekableStreamProxy::Pos::Begin, m_iCurrentPos); - - m_iBufferChars = - m_pStream->ReadString(m_Buffer.data(), m_iXMLPlaneSize, &m_bEOS); - iPos = m_pStream->GetPosition(); - if (m_iBufferChars < 1) { - m_iCurrentPos = iStreamLength; - m_syntaxParserResult = FDE_XmlSyntaxResult::EndOfString; - return m_syntaxParserResult; - } - m_iCurrentPos = iPos; - m_Start = 0; - m_End = m_iBufferChars; - } - - while (m_Start < m_End) { - wchar_t ch = m_Buffer[m_Start]; - switch (m_syntaxParserState) { - case FDE_XmlSyntaxState::Text: - if (ch == L'<') { - if (!m_BlockBuffer.IsEmpty()) { - m_iTextDataLength = m_BlockBuffer.GetDataLength(); - m_BlockBuffer.Reset(true); - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); - m_iEntityStart = -1; - syntaxParserResult = FDE_XmlSyntaxResult::Text; - } else { - m_Start++; - m_syntaxParserState = FDE_XmlSyntaxState::Node; - } - } else { - ParseTextChar(ch); - } - break; - case FDE_XmlSyntaxState::Node: - if (ch == L'!') { - m_Start++; - m_syntaxParserState = FDE_XmlSyntaxState::SkipCommentOrDecl; - } else if (ch == L'/') { - m_Start++; - m_syntaxParserState = FDE_XmlSyntaxState::CloseElement; - } else if (ch == L'?') { - m_iLastNodeNum++; - m_iCurrentNodeNum = m_iLastNodeNum; - m_CurNode.iNodeNum = m_iLastNodeNum; - m_CurNode.eNodeType = FDE_XMLNODE_Instruction; - m_XMLNodeStack.push(m_CurNode); - m_Start++; - m_syntaxParserState = FDE_XmlSyntaxState::Target; - syntaxParserResult = FDE_XmlSyntaxResult::InstructionOpen; - } else { - m_iLastNodeNum++; - m_iCurrentNodeNum = m_iLastNodeNum; - m_CurNode.iNodeNum = m_iLastNodeNum; - m_CurNode.eNodeType = FDE_XMLNODE_Element; - m_XMLNodeStack.push(m_CurNode); - m_syntaxParserState = FDE_XmlSyntaxState::Tag; - syntaxParserResult = FDE_XmlSyntaxResult::ElementOpen; - } - break; - case FDE_XmlSyntaxState::Target: - case FDE_XmlSyntaxState::Tag: - if (!IsXMLNameChar(ch, m_BlockBuffer.IsEmpty())) { - if (m_BlockBuffer.IsEmpty()) { - m_syntaxParserResult = FDE_XmlSyntaxResult::Error; - return m_syntaxParserResult; - } - - m_iTextDataLength = m_BlockBuffer.GetDataLength(); - m_BlockBuffer.Reset(true); - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); - if (m_syntaxParserState != FDE_XmlSyntaxState::Target) - syntaxParserResult = FDE_XmlSyntaxResult::TagName; - else - syntaxParserResult = FDE_XmlSyntaxResult::TargetName; - - m_syntaxParserState = FDE_XmlSyntaxState::AttriName; - } else { - if (m_iIndexInBlock == m_iAllocStep) { - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); - if (!m_pCurrentBlock) { - return FDE_XmlSyntaxResult::Error; - } - } - m_pCurrentBlock[m_iIndexInBlock++] = ch; - m_BlockBuffer.IncrementDataLength(); - m_Start++; - } - break; - case FDE_XmlSyntaxState::AttriName: - if (m_BlockBuffer.IsEmpty() && IsXMLWhiteSpace(ch)) { - m_Start++; - break; - } - if (!IsXMLNameChar(ch, m_BlockBuffer.IsEmpty())) { - if (m_BlockBuffer.IsEmpty()) { - if (m_CurNode.eNodeType == FDE_XMLNODE_Element) { - if (ch == L'>' || ch == L'/') { - m_syntaxParserState = FDE_XmlSyntaxState::BreakElement; - break; - } - } else if (m_CurNode.eNodeType == FDE_XMLNODE_Instruction) { - if (ch == L'?') { - m_syntaxParserState = FDE_XmlSyntaxState::CloseInstruction; - m_Start++; - } else { - m_syntaxParserState = FDE_XmlSyntaxState::TargetData; - } - break; - } - m_syntaxParserResult = FDE_XmlSyntaxResult::Error; - return m_syntaxParserResult; - } else { - if (m_CurNode.eNodeType == FDE_XMLNODE_Instruction) { - if (ch != '=' && !IsXMLWhiteSpace(ch)) { - m_syntaxParserState = FDE_XmlSyntaxState::TargetData; - break; - } - } - m_iTextDataLength = m_BlockBuffer.GetDataLength(); - m_BlockBuffer.Reset(true); - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); - m_syntaxParserState = FDE_XmlSyntaxState::AttriEqualSign; - syntaxParserResult = FDE_XmlSyntaxResult::AttriName; - } - } else { - if (m_iIndexInBlock == m_iAllocStep) { - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); - if (!m_pCurrentBlock) { - return FDE_XmlSyntaxResult::Error; - } - } - m_pCurrentBlock[m_iIndexInBlock++] = ch; - m_BlockBuffer.IncrementDataLength(); - m_Start++; - } - break; - case FDE_XmlSyntaxState::AttriEqualSign: - if (IsXMLWhiteSpace(ch)) { - m_Start++; - break; - } - if (ch != L'=') { - if (m_CurNode.eNodeType == FDE_XMLNODE_Instruction) { - m_syntaxParserState = FDE_XmlSyntaxState::TargetData; - break; - } - m_syntaxParserResult = FDE_XmlSyntaxResult::Error; - return m_syntaxParserResult; - } else { - m_syntaxParserState = FDE_XmlSyntaxState::AttriQuotation; - m_Start++; - } - break; - case FDE_XmlSyntaxState::AttriQuotation: - if (IsXMLWhiteSpace(ch)) { - m_Start++; - break; - } - if (ch != L'\"' && ch != L'\'') { - m_syntaxParserResult = FDE_XmlSyntaxResult::Error; - return m_syntaxParserResult; - } else { - m_wQuotationMark = ch; - m_syntaxParserState = FDE_XmlSyntaxState::AttriValue; - m_Start++; - } - break; - case FDE_XmlSyntaxState::AttriValue: - if (ch == m_wQuotationMark) { - if (m_iEntityStart > -1) { - m_syntaxParserResult = FDE_XmlSyntaxResult::Error; - return m_syntaxParserResult; - } - m_iTextDataLength = m_BlockBuffer.GetDataLength(); - m_wQuotationMark = 0; - m_BlockBuffer.Reset(true); - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); - m_Start++; - m_syntaxParserState = FDE_XmlSyntaxState::AttriName; - syntaxParserResult = FDE_XmlSyntaxResult::AttriValue; - } else { - ParseTextChar(ch); - } - break; - case FDE_XmlSyntaxState::CloseInstruction: - if (ch != L'>') { - if (m_iIndexInBlock == m_iAllocStep) { - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); - if (!m_pCurrentBlock) { - return FDE_XmlSyntaxResult::Error; - } - } - m_pCurrentBlock[m_iIndexInBlock++] = ch; - m_BlockBuffer.IncrementDataLength(); - m_syntaxParserState = FDE_XmlSyntaxState::TargetData; - } else if (!m_BlockBuffer.IsEmpty()) { - m_iTextDataLength = m_BlockBuffer.GetDataLength(); - m_BlockBuffer.Reset(true); - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); - syntaxParserResult = FDE_XmlSyntaxResult::TargetData; - } else { - m_Start++; - if (m_XMLNodeStack.empty()) { - m_syntaxParserResult = FDE_XmlSyntaxResult::Error; - return m_syntaxParserResult; - } - m_XMLNodeStack.pop(); - if (!m_XMLNodeStack.empty()) { - m_CurNode = m_XMLNodeStack.top(); - } else { - m_CurNode.iNodeNum = -1; - m_CurNode.eNodeType = FDE_XMLNODE_Unknown; - } - m_iCurrentNodeNum = m_CurNode.iNodeNum; - m_BlockBuffer.Reset(true); - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); - m_syntaxParserState = FDE_XmlSyntaxState::Text; - syntaxParserResult = FDE_XmlSyntaxResult::InstructionClose; - } - break; - case FDE_XmlSyntaxState::BreakElement: - if (ch == L'>') { - m_syntaxParserState = FDE_XmlSyntaxState::Text; - syntaxParserResult = FDE_XmlSyntaxResult::ElementBreak; - } else if (ch == L'/') { - m_syntaxParserState = FDE_XmlSyntaxState::CloseElement; - } else { - m_syntaxParserResult = FDE_XmlSyntaxResult::Error; - return m_syntaxParserResult; - } - m_Start++; - break; - case FDE_XmlSyntaxState::CloseElement: - if (!IsXMLNameChar(ch, m_BlockBuffer.IsEmpty())) { - if (ch == L'>') { - if (m_XMLNodeStack.empty()) { - m_syntaxParserResult = FDE_XmlSyntaxResult::Error; - return m_syntaxParserResult; - } - m_XMLNodeStack.pop(); - if (!m_XMLNodeStack.empty()) { - m_CurNode = m_XMLNodeStack.top(); - } else { - m_CurNode.iNodeNum = -1; - m_CurNode.eNodeType = FDE_XMLNODE_Unknown; - } - m_iCurrentNodeNum = m_CurNode.iNodeNum; - m_iTextDataLength = m_BlockBuffer.GetDataLength(); - m_BlockBuffer.Reset(true); - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); - m_syntaxParserState = FDE_XmlSyntaxState::Text; - syntaxParserResult = FDE_XmlSyntaxResult::ElementClose; - } else if (!IsXMLWhiteSpace(ch)) { - m_syntaxParserResult = FDE_XmlSyntaxResult::Error; - return m_syntaxParserResult; - } - } else { - if (m_iIndexInBlock == m_iAllocStep) { - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); - if (!m_pCurrentBlock) { - return FDE_XmlSyntaxResult::Error; - } - } - m_pCurrentBlock[m_iIndexInBlock++] = ch; - m_BlockBuffer.IncrementDataLength(); - } - m_Start++; - break; - case FDE_XmlSyntaxState::SkipCommentOrDecl: - if (FXSYS_wcsnicmp(m_Buffer.data() + m_Start, L"--", 2) == 0) { - m_Start += 2; - m_syntaxParserState = FDE_XmlSyntaxState::SkipComment; - } else if (FXSYS_wcsnicmp(m_Buffer.data() + m_Start, L"[CDATA[", 7) == - 0) { - m_Start += 7; - m_syntaxParserState = FDE_XmlSyntaxState::SkipCData; - } else { - m_syntaxParserState = FDE_XmlSyntaxState::SkipDeclNode; - m_SkipChar = L'>'; - m_SkipStack.push(L'>'); - } - break; - case FDE_XmlSyntaxState::SkipCData: { - if (FXSYS_wcsnicmp(m_Buffer.data() + m_Start, L"]]>", 3) == 0) { - m_Start += 3; - syntaxParserResult = FDE_XmlSyntaxResult::CData; - m_iTextDataLength = m_BlockBuffer.GetDataLength(); - m_BlockBuffer.Reset(true); - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); - m_syntaxParserState = FDE_XmlSyntaxState::Text; - } else { - if (m_iIndexInBlock == m_iAllocStep) { - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); - if (!m_pCurrentBlock) - return FDE_XmlSyntaxResult::Error; - } - m_pCurrentBlock[m_iIndexInBlock++] = ch; - m_BlockBuffer.IncrementDataLength(); - m_Start++; - } - break; - } - case FDE_XmlSyntaxState::SkipDeclNode: - if (m_SkipChar == L'\'' || m_SkipChar == L'\"') { - m_Start++; - if (ch != m_SkipChar) - break; - - m_SkipStack.pop(); - if (m_SkipStack.empty()) - m_syntaxParserState = FDE_XmlSyntaxState::Text; - else - m_SkipChar = m_SkipStack.top(); - } else { - switch (ch) { - case L'<': - m_SkipChar = L'>'; - m_SkipStack.push(L'>'); - break; - case L'[': - m_SkipChar = L']'; - m_SkipStack.push(L']'); - break; - case L'(': - m_SkipChar = L')'; - m_SkipStack.push(L')'); - break; - case L'\'': - m_SkipChar = L'\''; - m_SkipStack.push(L'\''); - break; - case L'\"': - m_SkipChar = L'\"'; - m_SkipStack.push(L'\"'); - break; - default: - if (ch == m_SkipChar) { - m_SkipStack.pop(); - if (m_SkipStack.empty()) { - if (m_BlockBuffer.GetDataLength() >= 9) - (void)m_BlockBuffer.GetTextData(0, 7); - - m_iTextDataLength = m_BlockBuffer.GetDataLength(); - m_BlockBuffer.Reset(true); - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); - m_syntaxParserState = FDE_XmlSyntaxState::Text; - } else { - m_SkipChar = m_SkipStack.top(); - } - } - break; - } - if (!m_SkipStack.empty()) { - if (m_iIndexInBlock == m_iAllocStep) { - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); - if (!m_pCurrentBlock) { - return FDE_XmlSyntaxResult::Error; - } - } - m_pCurrentBlock[m_iIndexInBlock++] = ch; - m_BlockBuffer.IncrementDataLength(); - } - m_Start++; - } - break; - case FDE_XmlSyntaxState::SkipComment: - if (FXSYS_wcsnicmp(m_Buffer.data() + m_Start, L"-->", 3) == 0) { - m_Start += 2; - m_syntaxParserState = FDE_XmlSyntaxState::Text; - } - - m_Start++; - break; - case FDE_XmlSyntaxState::TargetData: - if (IsXMLWhiteSpace(ch)) { - if (m_BlockBuffer.IsEmpty()) { - m_Start++; - break; - } else if (m_wQuotationMark == 0) { - m_iTextDataLength = m_BlockBuffer.GetDataLength(); - m_wQuotationMark = 0; - m_BlockBuffer.Reset(true); - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); - m_Start++; - syntaxParserResult = FDE_XmlSyntaxResult::TargetData; - break; - } - } - if (ch == '?') { - m_syntaxParserState = FDE_XmlSyntaxState::CloseInstruction; - m_Start++; - } else if (ch == '\"') { - if (m_wQuotationMark == 0) { - m_wQuotationMark = ch; - m_Start++; - } else if (ch == m_wQuotationMark) { - m_iTextDataLength = m_BlockBuffer.GetDataLength(); - m_wQuotationMark = 0; - m_BlockBuffer.Reset(true); - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); - m_Start++; - syntaxParserResult = FDE_XmlSyntaxResult::TargetData; - } else { - m_syntaxParserResult = FDE_XmlSyntaxResult::Error; - return m_syntaxParserResult; - } - } else { - if (m_iIndexInBlock == m_iAllocStep) { - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); - if (!m_pCurrentBlock) { - return FDE_XmlSyntaxResult::Error; - } - } - m_pCurrentBlock[m_iIndexInBlock++] = ch; - m_BlockBuffer.IncrementDataLength(); - m_Start++; - } - break; - default: - break; - } - if (syntaxParserResult != FDE_XmlSyntaxResult::None) - return syntaxParserResult; - } - } - return FDE_XmlSyntaxResult::Text; -} - -int32_t CFDE_XMLSyntaxParser::GetStatus() const { - if (!m_pStream) - return -1; - - int32_t iStreamLength = m_pStream->GetLength(); - if (iStreamLength < 1) - return 100; - - if (m_syntaxParserResult == FDE_XmlSyntaxResult::Error) - return -1; - - if (m_syntaxParserResult == FDE_XmlSyntaxResult::EndOfString) - return 100; - return m_iParsedBytes * 100 / iStreamLength; -} - -FX_FILESIZE CFDE_XMLSyntaxParser::GetCurrentBinaryPos() const { - if (!m_pStream) - return 0; - - int32_t nDstLen = GetUTF8EncodeLength(m_Buffer, m_Start); - return m_iParsedBytes + nDstLen; -} - -void CFDE_XMLSyntaxParser::ParseTextChar(wchar_t character) { - if (m_iIndexInBlock == m_iAllocStep) { - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); - if (!m_pCurrentBlock) - return; - } - - m_pCurrentBlock[m_iIndexInBlock++] = character; - m_BlockBuffer.IncrementDataLength(); - if (m_iEntityStart > -1 && character == L';') { - CFX_WideString csEntity = m_BlockBuffer.GetTextData( - m_iEntityStart + 1, - m_BlockBuffer.GetDataLength() - 1 - m_iEntityStart - 1); - int32_t iLen = csEntity.GetLength(); - if (iLen > 0) { - if (csEntity[0] == L'#') { - uint32_t ch = 0; - wchar_t w; - if (iLen > 1 && csEntity[1] == L'x') { - for (int32_t i = 2; i < iLen; i++) { - w = csEntity[i]; - if (w >= L'0' && w <= L'9') { - ch = (ch << 4) + w - L'0'; - } else if (w >= L'A' && w <= L'F') { - ch = (ch << 4) + w - 55; - } else if (w >= L'a' && w <= L'f') { - ch = (ch << 4) + w - 87; - } else { - break; - } - } - } else { - for (int32_t i = 1; i < iLen; i++) { - w = csEntity[i]; - if (w < L'0' || w > L'9') - break; - ch = ch * 10 + w - L'0'; - } - } - if (ch > kMaxCharRange) - ch = ' '; - - character = static_cast<wchar_t>(ch); - if (character != 0) { - m_BlockBuffer.SetTextChar(m_iEntityStart, character); - m_iEntityStart++; - } - } else { - if (csEntity.Compare(L"amp") == 0) { - m_BlockBuffer.SetTextChar(m_iEntityStart, L'&'); - m_iEntityStart++; - } else if (csEntity.Compare(L"lt") == 0) { - m_BlockBuffer.SetTextChar(m_iEntityStart, L'<'); - m_iEntityStart++; - } else if (csEntity.Compare(L"gt") == 0) { - m_BlockBuffer.SetTextChar(m_iEntityStart, L'>'); - m_iEntityStart++; - } else if (csEntity.Compare(L"apos") == 0) { - m_BlockBuffer.SetTextChar(m_iEntityStart, L'\''); - m_iEntityStart++; - } else if (csEntity.Compare(L"quot") == 0) { - m_BlockBuffer.SetTextChar(m_iEntityStart, L'\"'); - m_iEntityStart++; - } - } - } - if (m_iEntityStart >= 0 && - m_BlockBuffer.GetDataLength() > static_cast<size_t>(m_iEntityStart)) { - m_BlockBuffer.DeleteTextChars(m_BlockBuffer.GetDataLength() - - m_iEntityStart); - } - std::tie(m_pCurrentBlock, m_iIndexInBlock) = - m_BlockBuffer.GetAvailableBlock(); - m_iEntityStart = -1; - } else if (m_iEntityStart < 0 && character == L'&') { - m_iEntityStart = m_BlockBuffer.GetDataLength() - 1; - } - m_Start++; -} diff --git a/xfa/fde/xml/cfde_xmlsyntaxparser.h b/xfa/fde/xml/cfde_xmlsyntaxparser.h deleted file mode 100644 index 8b6c9ac8b5..0000000000 --- a/xfa/fde/xml/cfde_xmlsyntaxparser.h +++ /dev/null @@ -1,128 +0,0 @@ -// Copyright 2017 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#ifndef XFA_FDE_XML_CFDE_XMLSYNTAXPARSER_H_ -#define XFA_FDE_XML_CFDE_XMLSYNTAXPARSER_H_ - -#include <stack> -#include <vector> - -#include "core/fxcrt/cfx_blockbuffer.h" -#include "core/fxcrt/cfx_retain_ptr.h" -#include "core/fxcrt/cfx_seekablestreamproxy.h" -#include "core/fxcrt/fx_string.h" -#include "xfa/fde/xml/cfde_xmlnode.h" - -enum class FDE_XmlSyntaxResult { - None, - InstructionOpen, - InstructionClose, - ElementOpen, - ElementBreak, - ElementClose, - TargetName, - TagName, - AttriName, - AttriValue, - Text, - CData, - TargetData, - Error, - EndOfString -}; - -class CFDE_XMLSyntaxParser { - public: - explicit CFDE_XMLSyntaxParser( - const CFX_RetainPtr<CFX_SeekableStreamProxy>& pStream); - ~CFDE_XMLSyntaxParser(); - - FDE_XmlSyntaxResult DoSyntaxParse(); - - int32_t GetStatus() const; - FX_FILESIZE GetCurrentPos() const { return m_ParsedChars + m_Start; } - FX_FILESIZE GetCurrentBinaryPos() const; - int32_t GetCurrentNodeNumber() const { return m_iCurrentNodeNum; } - int32_t GetLastNodeNumber() const { return m_iLastNodeNum; } - - CFX_WideString GetTargetName() const { - return m_BlockBuffer.GetTextData(0, m_iTextDataLength); - } - - CFX_WideString GetTagName() const { - return m_BlockBuffer.GetTextData(0, m_iTextDataLength); - } - - CFX_WideString GetAttributeName() const { - return m_BlockBuffer.GetTextData(0, m_iTextDataLength); - } - - CFX_WideString GetAttributeValue() const { - return m_BlockBuffer.GetTextData(0, m_iTextDataLength); - } - - CFX_WideString GetTextData() const { - return m_BlockBuffer.GetTextData(0, m_iTextDataLength); - } - - CFX_WideString GetTargetData() const { - return m_BlockBuffer.GetTextData(0, m_iTextDataLength); - } - - protected: - enum class FDE_XmlSyntaxState { - Text, - Node, - Target, - Tag, - AttriName, - AttriEqualSign, - AttriQuotation, - AttriValue, - Entity, - EntityDecimal, - EntityHex, - CloseInstruction, - BreakElement, - CloseElement, - SkipDeclNode, - DeclCharData, - SkipComment, - SkipCommentOrDecl, - SkipCData, - TargetData - }; - - void ParseTextChar(wchar_t ch); - - CFX_RetainPtr<CFX_SeekableStreamProxy> m_pStream; - FX_STRSIZE m_iXMLPlaneSize; - int32_t m_iCurrentPos; - int32_t m_iCurrentNodeNum; - int32_t m_iLastNodeNum; - int32_t m_iParsedBytes; - FX_FILESIZE m_ParsedChars; - std::vector<wchar_t> m_Buffer; - int32_t m_iBufferChars; - bool m_bEOS; - FX_FILESIZE m_Start; // Start position in m_Buffer - FX_FILESIZE m_End; // End position in m_Buffer - FDE_XMLNODE m_CurNode; - std::stack<FDE_XMLNODE> m_XMLNodeStack; - CFX_BlockBuffer m_BlockBuffer; - int32_t m_iAllocStep; - wchar_t* m_pCurrentBlock; // Pointer into CFX_BlockBuffer - int32_t m_iIndexInBlock; - int32_t m_iTextDataLength; - FDE_XmlSyntaxResult m_syntaxParserResult; - FDE_XmlSyntaxState m_syntaxParserState; - wchar_t m_wQuotationMark; - int32_t m_iEntityStart; - std::stack<wchar_t> m_SkipStack; - wchar_t m_SkipChar; -}; - -#endif // XFA_FDE_XML_CFDE_XMLSYNTAXPARSER_H_ diff --git a/xfa/fde/xml/cfde_xmlsyntaxparser_unittest.cpp b/xfa/fde/xml/cfde_xmlsyntaxparser_unittest.cpp deleted file mode 100644 index 45ad3bb2f0..0000000000 --- a/xfa/fde/xml/cfde_xmlsyntaxparser_unittest.cpp +++ /dev/null @@ -1,527 +0,0 @@ -// Copyright 2016 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "xfa/fde/xml/cfde_xmlsyntaxparser.h" - -#include <memory> - -#include "core/fxcrt/cfx_seekablestreamproxy.h" -#include "core/fxcrt/fx_codepage.h" -#include "testing/gtest/include/gtest/gtest.h" -#include "testing/test_support.h" - -class CFDE_XMLSyntaxParserTest : public pdfium::FPDF_Test {}; - -TEST_F(CFDE_XMLSyntaxParserTest, CData) { - const char* input = - "<script contentType=\"application/x-javascript\">\n" - " <![CDATA[\n" - " if (a[1] < 3)\n" - " app.alert(\"Tclams\");\n" - " ]]>\n" - "</script>"; - - const wchar_t* cdata = - L"\n" - L" if (a[1] < 3)\n" - L" app.alert(\"Tclams\");\n" - L" "; - - CFX_RetainPtr<CFX_SeekableStreamProxy> stream = - pdfium::MakeRetain<CFX_SeekableStreamProxy>( - reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input)); - stream->SetCodePage(FX_CODEPAGE_UTF8); - - CFDE_XMLSyntaxParser parser(stream); - ASSERT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); - ASSERT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); - ASSERT_EQ(L"script", parser.GetTagName()); - - ASSERT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); - ASSERT_EQ(L"contentType", parser.GetAttributeName()); - ASSERT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); - ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); - - ASSERT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); - ASSERT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); - ASSERT_EQ(L"\n ", parser.GetTextData()); - - ASSERT_EQ(FDE_XmlSyntaxResult::CData, parser.DoSyntaxParse()); - ASSERT_EQ(cdata, parser.GetTextData()); - - ASSERT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); - ASSERT_EQ(L"\n", parser.GetTextData()); - - ASSERT_EQ(FDE_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); - ASSERT_EQ(L"script", parser.GetTagName()); - - ASSERT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); -} - -TEST_F(CFDE_XMLSyntaxParserTest, CDataWithInnerScript) { - const char* input = - "<script contentType=\"application/x-javascript\">\n" - " <![CDATA[\n" - " if (a[1] < 3)\n" - " app.alert(\"Tclams\");\n" - " </script>\n" - " ]]>\n" - "</script>"; - - const wchar_t* cdata = - L"\n" - L" if (a[1] < 3)\n" - L" app.alert(\"Tclams\");\n" - L" </script>\n" - L" "; - - CFX_RetainPtr<CFX_SeekableStreamProxy> stream = - pdfium::MakeRetain<CFX_SeekableStreamProxy>( - reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input)); - stream->SetCodePage(FX_CODEPAGE_UTF8); - - CFDE_XMLSyntaxParser parser(stream); - ASSERT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); - ASSERT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); - ASSERT_EQ(L"script", parser.GetTagName()); - - ASSERT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); - ASSERT_EQ(L"contentType", parser.GetAttributeName()); - ASSERT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); - ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); - - ASSERT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); - ASSERT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); - ASSERT_EQ(L"\n ", parser.GetTextData()); - - ASSERT_EQ(FDE_XmlSyntaxResult::CData, parser.DoSyntaxParse()); - ASSERT_EQ(cdata, parser.GetTextData()); - - ASSERT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); - ASSERT_EQ(L"\n", parser.GetTextData()); - - ASSERT_EQ(FDE_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); - ASSERT_EQ(L"script", parser.GetTagName()); - - ASSERT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); -} - -TEST_F(CFDE_XMLSyntaxParserTest, ArrowBangArrow) { - const char* input = - "<script contentType=\"application/x-javascript\">\n" - " <!>\n" - "</script>"; - - CFX_RetainPtr<CFX_SeekableStreamProxy> stream = - pdfium::MakeRetain<CFX_SeekableStreamProxy>( - reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input)); - stream->SetCodePage(FX_CODEPAGE_UTF8); - - CFDE_XMLSyntaxParser parser(stream); - ASSERT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); - ASSERT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); - - ASSERT_EQ(L"script", parser.GetTagName()); - - ASSERT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); - ASSERT_EQ(L"contentType", parser.GetAttributeName()); - ASSERT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); - ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); - - ASSERT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); - ASSERT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); - ASSERT_EQ(L"\n ", parser.GetTextData()); - - ASSERT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); - ASSERT_EQ(L"\n", parser.GetTextData()); - - ASSERT_EQ(FDE_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); - ASSERT_EQ(L"script", parser.GetTagName()); - - ASSERT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); -} - -TEST_F(CFDE_XMLSyntaxParserTest, ArrowBangBracketArrow) { - const char* input = - "<script contentType=\"application/x-javascript\">\n" - " <![>\n" - "</script>"; - - CFX_RetainPtr<CFX_SeekableStreamProxy> stream = - pdfium::MakeRetain<CFX_SeekableStreamProxy>( - reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input)); - stream->SetCodePage(FX_CODEPAGE_UTF8); - - CFDE_XMLSyntaxParser parser(stream); - ASSERT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); - ASSERT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); - ASSERT_EQ(L"script", parser.GetTagName()); - - ASSERT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); - ASSERT_EQ(L"contentType", parser.GetAttributeName()); - ASSERT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); - ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); - - ASSERT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); - ASSERT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); - ASSERT_EQ(L"\n ", parser.GetTextData()); - - // Parser walks to end of input. - - ASSERT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); -} - -TEST_F(CFDE_XMLSyntaxParserTest, IncompleteCData) { - const char* input = - "<script contentType=\"application/x-javascript\">\n" - " <![CDATA>\n" - "</script>"; - - CFX_RetainPtr<CFX_SeekableStreamProxy> stream = - pdfium::MakeRetain<CFX_SeekableStreamProxy>( - reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input)); - stream->SetCodePage(FX_CODEPAGE_UTF8); - - CFDE_XMLSyntaxParser parser(stream); - ASSERT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); - ASSERT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); - ASSERT_EQ(L"script", parser.GetTagName()); - - ASSERT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); - ASSERT_EQ(L"contentType", parser.GetAttributeName()); - ASSERT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); - ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); - - ASSERT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); - ASSERT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); - ASSERT_EQ(L"\n ", parser.GetTextData()); - - // Parser walks to end of input. - - ASSERT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); -} - -TEST_F(CFDE_XMLSyntaxParserTest, UnClosedCData) { - const char* input = - "<script contentType=\"application/x-javascript\">\n" - " <![CDATA[\n" - "</script>"; - - CFX_RetainPtr<CFX_SeekableStreamProxy> stream = - pdfium::MakeRetain<CFX_SeekableStreamProxy>( - reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input)); - stream->SetCodePage(FX_CODEPAGE_UTF8); - - CFDE_XMLSyntaxParser parser(stream); - ASSERT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); - ASSERT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); - ASSERT_EQ(L"script", parser.GetTagName()); - - ASSERT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); - ASSERT_EQ(L"contentType", parser.GetAttributeName()); - ASSERT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); - ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); - - ASSERT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); - ASSERT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); - ASSERT_EQ(L"\n ", parser.GetTextData()); - - // Parser walks to end of input. - - ASSERT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); -} - -TEST_F(CFDE_XMLSyntaxParserTest, EmptyCData) { - const char* input = - "<script contentType=\"application/x-javascript\">\n" - " <![CDATA[]]>\n" - "</script>"; - - CFX_RetainPtr<CFX_SeekableStreamProxy> stream = - pdfium::MakeRetain<CFX_SeekableStreamProxy>( - reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input)); - stream->SetCodePage(FX_CODEPAGE_UTF8); - - CFDE_XMLSyntaxParser parser(stream); - ASSERT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); - ASSERT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); - ASSERT_EQ(L"script", parser.GetTagName()); - - ASSERT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); - ASSERT_EQ(L"contentType", parser.GetAttributeName()); - ASSERT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); - ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); - - ASSERT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); - ASSERT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); - ASSERT_EQ(L"\n ", parser.GetTextData()); - - ASSERT_EQ(FDE_XmlSyntaxResult::CData, parser.DoSyntaxParse()); - ASSERT_EQ(L"", parser.GetTextData()); - - ASSERT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); - ASSERT_EQ(L"\n", parser.GetTextData()); - - ASSERT_EQ(FDE_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); - ASSERT_EQ(L"script", parser.GetTagName()); - - ASSERT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); -} - -TEST_F(CFDE_XMLSyntaxParserTest, Comment) { - const char* input = - "<script contentType=\"application/x-javascript\">\n" - " <!-- A Comment -->\n" - "</script>"; - - CFX_RetainPtr<CFX_SeekableStreamProxy> stream = - pdfium::MakeRetain<CFX_SeekableStreamProxy>( - reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input)); - stream->SetCodePage(FX_CODEPAGE_UTF8); - - CFDE_XMLSyntaxParser parser(stream); - ASSERT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); - ASSERT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); - ASSERT_EQ(L"script", parser.GetTagName()); - - ASSERT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); - ASSERT_EQ(L"contentType", parser.GetAttributeName()); - ASSERT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); - ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); - - ASSERT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); - ASSERT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); - ASSERT_EQ(L"\n ", parser.GetTextData()); - - ASSERT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); - ASSERT_EQ(L"\n", parser.GetTextData()); - - ASSERT_EQ(FDE_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); - ASSERT_EQ(L"script", parser.GetTagName()); - - ASSERT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); -} - -TEST_F(CFDE_XMLSyntaxParserTest, IncorrectCommentStart) { - const char* input = - "<script contentType=\"application/x-javascript\">\n" - " <!- A Comment -->\n" - "</script>"; - - CFX_RetainPtr<CFX_SeekableStreamProxy> stream = - pdfium::MakeRetain<CFX_SeekableStreamProxy>( - reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input)); - stream->SetCodePage(FX_CODEPAGE_UTF8); - - CFDE_XMLSyntaxParser parser(stream); - ASSERT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); - ASSERT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); - ASSERT_EQ(L"script", parser.GetTagName()); - - ASSERT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); - ASSERT_EQ(L"contentType", parser.GetAttributeName()); - ASSERT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); - ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); - - ASSERT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); - ASSERT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); - ASSERT_EQ(L"\n ", parser.GetTextData()); - - ASSERT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); - ASSERT_EQ(L"\n", parser.GetTextData()); - - ASSERT_EQ(FDE_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); - ASSERT_EQ(L"script", parser.GetTagName()); - - ASSERT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); -} - -TEST_F(CFDE_XMLSyntaxParserTest, CommentEmpty) { - const char* input = - "<script contentType=\"application/x-javascript\">\n" - " <!---->\n" - "</script>"; - - CFX_RetainPtr<CFX_SeekableStreamProxy> stream = - pdfium::MakeRetain<CFX_SeekableStreamProxy>( - reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input)); - stream->SetCodePage(FX_CODEPAGE_UTF8); - - CFDE_XMLSyntaxParser parser(stream); - ASSERT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); - ASSERT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); - ASSERT_EQ(L"script", parser.GetTagName()); - - ASSERT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); - ASSERT_EQ(L"contentType", parser.GetAttributeName()); - ASSERT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); - ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); - - ASSERT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); - ASSERT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); - ASSERT_EQ(L"\n ", parser.GetTextData()); - - ASSERT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); - ASSERT_EQ(L"\n", parser.GetTextData()); - - ASSERT_EQ(FDE_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); - ASSERT_EQ(L"script", parser.GetTagName()); - - ASSERT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); -} - -TEST_F(CFDE_XMLSyntaxParserTest, CommentThreeDash) { - const char* input = - "<script contentType=\"application/x-javascript\">\n" - " <!--->\n" - "</script>"; - - CFX_RetainPtr<CFX_SeekableStreamProxy> stream = - pdfium::MakeRetain<CFX_SeekableStreamProxy>( - reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input)); - stream->SetCodePage(FX_CODEPAGE_UTF8); - - CFDE_XMLSyntaxParser parser(stream); - ASSERT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); - ASSERT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); - ASSERT_EQ(L"script", parser.GetTagName()); - - ASSERT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); - ASSERT_EQ(L"contentType", parser.GetAttributeName()); - ASSERT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); - ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); - - ASSERT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); - ASSERT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); - ASSERT_EQ(L"\n ", parser.GetTextData()); - - ASSERT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); -} - -TEST_F(CFDE_XMLSyntaxParserTest, CommentTwoDash) { - const char* input = - "<script contentType=\"application/x-javascript\">\n" - " <!-->\n" - "</script>"; - - CFX_RetainPtr<CFX_SeekableStreamProxy> stream = - pdfium::MakeRetain<CFX_SeekableStreamProxy>( - reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input)); - stream->SetCodePage(FX_CODEPAGE_UTF8); - - CFDE_XMLSyntaxParser parser(stream); - ASSERT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); - ASSERT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); - ASSERT_EQ(L"script", parser.GetTagName()); - - ASSERT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); - ASSERT_EQ(L"contentType", parser.GetAttributeName()); - ASSERT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); - ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); - - ASSERT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); - ASSERT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); - ASSERT_EQ(L"\n ", parser.GetTextData()); - - ASSERT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); -} - -TEST_F(CFDE_XMLSyntaxParserTest, Entities) { - const char* input = - "<script contentType=\"application/x-javascript\">" - "B" - "T" - "H" - "ꭈ" - "�" - "</script>"; - - CFX_RetainPtr<CFX_SeekableStreamProxy> stream = - pdfium::MakeRetain<CFX_SeekableStreamProxy>( - reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input)); - stream->SetCodePage(FX_CODEPAGE_UTF8); - - CFDE_XMLSyntaxParser parser(stream); - ASSERT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); - ASSERT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); - ASSERT_EQ(L"script", parser.GetTagName()); - - ASSERT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); - ASSERT_EQ(L"contentType", parser.GetAttributeName()); - ASSERT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); - ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); - - ASSERT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); - ASSERT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); - ASSERT_EQ(L"BTH\xab48", parser.GetTextData()); - - ASSERT_EQ(FDE_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); - ASSERT_EQ(L"script", parser.GetTagName()); - - ASSERT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); -} - -TEST_F(CFDE_XMLSyntaxParserTest, EntityOverflowHex) { - const char* input = - "<script contentType=\"application/x-javascript\">" - "�" - "�" - "</script>"; - - CFX_RetainPtr<CFX_SeekableStreamProxy> stream = - pdfium::MakeRetain<CFX_SeekableStreamProxy>( - reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input)); - stream->SetCodePage(FX_CODEPAGE_UTF8); - - CFDE_XMLSyntaxParser parser(stream); - ASSERT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); - ASSERT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); - ASSERT_EQ(L"script", parser.GetTagName()); - - ASSERT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); - ASSERT_EQ(L"contentType", parser.GetAttributeName()); - ASSERT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); - ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); - - ASSERT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); - ASSERT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); - ASSERT_EQ(L" ", parser.GetTextData()); - - ASSERT_EQ(FDE_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); - ASSERT_EQ(L"script", parser.GetTagName()); - - ASSERT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); -} - -TEST_F(CFDE_XMLSyntaxParserTest, EntityOverflowDecimal) { - const char* input = - "<script contentType=\"application/x-javascript\">" - "�" - "�" - "</script>"; - - CFX_RetainPtr<CFX_SeekableStreamProxy> stream = - pdfium::MakeRetain<CFX_SeekableStreamProxy>( - reinterpret_cast<uint8_t*>(const_cast<char*>(input)), strlen(input)); - stream->SetCodePage(FX_CODEPAGE_UTF8); - - CFDE_XMLSyntaxParser parser(stream); - ASSERT_EQ(FDE_XmlSyntaxResult::ElementOpen, parser.DoSyntaxParse()); - ASSERT_EQ(FDE_XmlSyntaxResult::TagName, parser.DoSyntaxParse()); - ASSERT_EQ(L"script", parser.GetTagName()); - - ASSERT_EQ(FDE_XmlSyntaxResult::AttriName, parser.DoSyntaxParse()); - ASSERT_EQ(L"contentType", parser.GetAttributeName()); - ASSERT_EQ(FDE_XmlSyntaxResult::AttriValue, parser.DoSyntaxParse()); - ASSERT_EQ(L"application/x-javascript", parser.GetAttributeValue()); - - ASSERT_EQ(FDE_XmlSyntaxResult::ElementBreak, parser.DoSyntaxParse()); - ASSERT_EQ(FDE_XmlSyntaxResult::Text, parser.DoSyntaxParse()); - ASSERT_EQ(L" ", parser.GetTextData()); - - ASSERT_EQ(FDE_XmlSyntaxResult::ElementClose, parser.DoSyntaxParse()); - ASSERT_EQ(L"script", parser.GetTagName()); - - ASSERT_EQ(FDE_XmlSyntaxResult::EndOfString, parser.DoSyntaxParse()); -} diff --git a/xfa/fde/xml/cfde_xmltext.cpp b/xfa/fde/xml/cfde_xmltext.cpp deleted file mode 100644 index 2af66ac70c..0000000000 --- a/xfa/fde/xml/cfde_xmltext.cpp +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright 2017 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#include "xfa/fde/xml/cfde_xmltext.h" - -#include "third_party/base/ptr_util.h" - -CFDE_XMLText::CFDE_XMLText(const CFX_WideString& wsText) - : CFDE_XMLNode(), m_wsText(wsText) {} - -CFDE_XMLText::~CFDE_XMLText() {} - -FDE_XMLNODETYPE CFDE_XMLText::GetType() const { - return FDE_XMLNODE_Text; -} - -std::unique_ptr<CFDE_XMLNode> CFDE_XMLText::Clone() { - return pdfium::MakeUnique<CFDE_XMLText>(m_wsText); -} diff --git a/xfa/fde/xml/cfde_xmltext.h b/xfa/fde/xml/cfde_xmltext.h deleted file mode 100644 index 6987c49980..0000000000 --- a/xfa/fde/xml/cfde_xmltext.h +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright 2017 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#ifndef XFA_FDE_XML_CFDE_XMLTEXT_H_ -#define XFA_FDE_XML_CFDE_XMLTEXT_H_ - -#include <memory> - -#include "core/fxcrt/fx_string.h" -#include "xfa/fde/xml/cfde_xmlnode.h" - -class CFDE_XMLText : public CFDE_XMLNode { - public: - explicit CFDE_XMLText(const CFX_WideString& wsText); - ~CFDE_XMLText() override; - - // CFDE_XMLNode - FDE_XMLNODETYPE GetType() const override; - std::unique_ptr<CFDE_XMLNode> Clone() override; - - CFX_WideString GetText() const { return m_wsText; } - void SetText(const CFX_WideString& wsText) { m_wsText = wsText; } - - private: - CFX_WideString m_wsText; -}; - -#endif // XFA_FDE_XML_CFDE_XMLTEXT_H_ |