From a8c23df288d6fdfa2591ec43843ab3090bd95951 Mon Sep 17 00:00:00 2001 From: dan sinclair Date: Mon, 16 Apr 2018 19:54:47 +0000 Subject: Remove CXML This CL deletes the CXML parser as it is no longer used. Change-Id: Ic4815b683515ee860a6ae5c7ca39e15573e584bc Reviewed-on: https://pdfium-review.googlesource.com/30694 Commit-Queue: dsinclair Reviewed-by: Henrique Nakashima --- core/fxcrt/xml/cxml_attritem.cpp | 12 - core/fxcrt/xml/cxml_attritem.h | 21 -- core/fxcrt/xml/cxml_content.cpp | 20 -- core/fxcrt/xml/cxml_content.h | 26 -- core/fxcrt/xml/cxml_databufacc.cpp | 20 -- core/fxcrt/xml/cxml_databufacc.h | 31 --- core/fxcrt/xml/cxml_element.cpp | 165 ------------ core/fxcrt/xml/cxml_element.h | 73 ----- core/fxcrt/xml/cxml_object.cpp | 25 -- core/fxcrt/xml/cxml_object.h | 43 --- core/fxcrt/xml/cxml_parser.cpp | 531 ------------------------------------- core/fxcrt/xml/cxml_parser.h | 57 ---- 12 files changed, 1024 deletions(-) delete mode 100644 core/fxcrt/xml/cxml_attritem.cpp delete mode 100644 core/fxcrt/xml/cxml_attritem.h delete mode 100644 core/fxcrt/xml/cxml_content.cpp delete mode 100644 core/fxcrt/xml/cxml_content.h delete mode 100644 core/fxcrt/xml/cxml_databufacc.cpp delete mode 100644 core/fxcrt/xml/cxml_databufacc.h delete mode 100644 core/fxcrt/xml/cxml_element.cpp delete mode 100644 core/fxcrt/xml/cxml_element.h delete mode 100644 core/fxcrt/xml/cxml_object.cpp delete mode 100644 core/fxcrt/xml/cxml_object.h delete mode 100644 core/fxcrt/xml/cxml_parser.cpp delete mode 100644 core/fxcrt/xml/cxml_parser.h (limited to 'core/fxcrt') diff --git a/core/fxcrt/xml/cxml_attritem.cpp b/core/fxcrt/xml/cxml_attritem.cpp deleted file mode 100644 index cbbf3f6041..0000000000 --- a/core/fxcrt/xml/cxml_attritem.cpp +++ /dev/null @@ -1,12 +0,0 @@ -// Copyright 2017 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#include "core/fxcrt/xml/cxml_attritem.h" - -bool CXML_AttrItem::Matches(const ByteString& space, - const ByteString& name) const { - return (space.IsEmpty() || m_QSpaceName == space) && m_AttrName == name; -} diff --git a/core/fxcrt/xml/cxml_attritem.h b/core/fxcrt/xml/cxml_attritem.h deleted file mode 100644 index 84d82950f9..0000000000 --- a/core/fxcrt/xml/cxml_attritem.h +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright 2017 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#ifndef CORE_FXCRT_XML_CXML_ATTRITEM_H_ -#define CORE_FXCRT_XML_CXML_ATTRITEM_H_ - -#include "core/fxcrt/fx_string.h" - -class CXML_AttrItem { - public: - bool Matches(const ByteString& space, const ByteString& name) const; - - ByteString m_QSpaceName; - ByteString m_AttrName; - WideString m_Value; -}; - -#endif // CORE_FXCRT_XML_CXML_ATTRITEM_H_ diff --git a/core/fxcrt/xml/cxml_content.cpp b/core/fxcrt/xml/cxml_content.cpp deleted file mode 100644 index a235009424..0000000000 --- a/core/fxcrt/xml/cxml_content.cpp +++ /dev/null @@ -1,20 +0,0 @@ -// Copyright 2017 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#include "core/fxcrt/xml/cxml_content.h" - -CXML_Content::CXML_Content(bool bCDATA, const WideStringView& content) - : m_bCDATA(bCDATA), m_Content(content) {} - -CXML_Content::~CXML_Content() {} - -CXML_Content* CXML_Content::AsContent() { - return this; -} - -const CXML_Content* CXML_Content::AsContent() const { - return this; -} diff --git a/core/fxcrt/xml/cxml_content.h b/core/fxcrt/xml/cxml_content.h deleted file mode 100644 index 97c1abfe1c..0000000000 --- a/core/fxcrt/xml/cxml_content.h +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright 2017 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#ifndef CORE_FXCRT_XML_CXML_CONTENT_H_ -#define CORE_FXCRT_XML_CXML_CONTENT_H_ - -#include "core/fxcrt/fx_string.h" -#include "core/fxcrt/xml/cxml_object.h" - -class CXML_Content : public CXML_Object { - public: - CXML_Content(bool bCDATA, const WideStringView& content); - ~CXML_Content() override; - - // CXML_Object: - CXML_Content* AsContent() override; - const CXML_Content* AsContent() const override; - - bool m_bCDATA; - WideString m_Content; -}; - -#endif // CORE_FXCRT_XML_CXML_CONTENT_H_ diff --git a/core/fxcrt/xml/cxml_databufacc.cpp b/core/fxcrt/xml/cxml_databufacc.cpp deleted file mode 100644 index b3e2e1f3e0..0000000000 --- a/core/fxcrt/xml/cxml_databufacc.cpp +++ /dev/null @@ -1,20 +0,0 @@ -// Copyright 2017 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#include "core/fxcrt/xml/cxml_databufacc.h" - -CXML_DataBufAcc::CXML_DataBufAcc(const uint8_t* pBuffer, size_t size) - : m_pBuffer(pBuffer), m_dwSize(size), m_dwCurPos(0) {} - -CXML_DataBufAcc::~CXML_DataBufAcc() {} - -bool CXML_DataBufAcc::ReadNextBlock() { - if (m_dwCurPos >= m_dwSize) - return false; - - m_dwCurPos = m_dwSize; - return true; -} diff --git a/core/fxcrt/xml/cxml_databufacc.h b/core/fxcrt/xml/cxml_databufacc.h deleted file mode 100644 index 4fb44b302c..0000000000 --- a/core/fxcrt/xml/cxml_databufacc.h +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright 2017 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#ifndef CORE_FXCRT_XML_CXML_DATABUFACC_H_ -#define CORE_FXCRT_XML_CXML_DATABUFACC_H_ - -#include "core/fxcrt/fx_system.h" - -class CXML_DataBufAcc { - public: - CXML_DataBufAcc(const uint8_t* pBuffer, size_t size); - ~CXML_DataBufAcc(); - - bool IsEOF() const { return m_dwCurPos >= m_dwSize; } - FX_FILESIZE GetPosition() const { - return static_cast(m_dwCurPos); - } - bool ReadNextBlock(); - const uint8_t* GetBlockBuffer() const { return m_pBuffer; } - size_t GetBlockSize() const { return m_dwSize; } - - private: - const uint8_t* m_pBuffer; - size_t m_dwSize; - size_t m_dwCurPos; -}; - -#endif // CORE_FXCRT_XML_CXML_DATABUFACC_H_ diff --git a/core/fxcrt/xml/cxml_element.cpp b/core/fxcrt/xml/cxml_element.cpp deleted file mode 100644 index 1d42e8eac3..0000000000 --- a/core/fxcrt/xml/cxml_element.cpp +++ /dev/null @@ -1,165 +0,0 @@ -// Copyright 2017 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#include "core/fxcrt/xml/cxml_element.h" - -#include "core/fxcrt/xml/cxml_content.h" -#include "core/fxcrt/xml/cxml_parser.h" - -namespace { - -void SplitQualifiedName(const ByteStringView& bsFullName, - ByteStringView* bsSpace, - ByteStringView* bsName) { - if (bsFullName.IsEmpty()) - return; - - auto iStart = bsFullName.Find(':'); - if (iStart.has_value()) { - *bsSpace = bsFullName.Left(iStart.value()); - *bsName = bsFullName.Right(bsFullName.GetLength() - (iStart.value() + 1)); - } else { - *bsName = bsFullName; - } -} - -} // namespace - -// static -std::unique_ptr CXML_Element::Parse(const void* pBuffer, - size_t size) { - CXML_Parser parser; - if (!parser.Init(static_cast(pBuffer), size)) - return nullptr; - return parser.ParseElement(nullptr, false); -} - -CXML_Element::CXML_Element(const CXML_Element* pParent, - const ByteStringView& qSpace, - const ByteStringView& tagname) - : m_pParent(pParent), m_QSpaceName(qSpace), m_TagName(tagname) {} - -CXML_Element::~CXML_Element() {} - -CXML_Element* CXML_Element::AsElement() { - return this; -} - -const CXML_Element* CXML_Element::AsElement() const { - return this; -} - -ByteString CXML_Element::GetTagName() const { - return m_TagName; -} - -ByteString CXML_Element::GetNamespaceURI(const ByteString& qName) const { - const CXML_Element* pElement = this; - do { - const WideString* pwsSpace; - if (qName.IsEmpty()) - pwsSpace = pElement->Lookup("", "xmlns"); - else - pwsSpace = pElement->Lookup("xmlns", qName); - if (pwsSpace) - return pwsSpace->UTF8Encode(); - - pElement = pElement->GetParent(); - } while (pElement); - return ByteString(); -} - -void CXML_Element::GetAttrByIndex(size_t index, - ByteString* space, - ByteString* name, - WideString* value) const { - if (index >= m_AttrMap.size()) - return; - - const CXML_AttrItem& item = m_AttrMap[index]; - *space = item.m_QSpaceName; - *name = item.m_AttrName; - *value = item.m_Value; -} - -WideString CXML_Element::GetAttrValue(const ByteStringView& name) const { - ByteStringView bsSpace; - ByteStringView bsName; - SplitQualifiedName(name, &bsSpace, &bsName); - - WideString attr; - const WideString* pValue = Lookup(ByteString(bsSpace), ByteString(bsName)); - if (pValue) - attr = *pValue; - return attr; -} - -int CXML_Element::GetAttrInteger(const ByteStringView& name) const { - ByteStringView bsSpace; - ByteStringView bsName; - SplitQualifiedName(name, &bsSpace, &bsName); - - const WideString* pwsValue = Lookup(ByteString(bsSpace), ByteString(bsName)); - return pwsValue ? pwsValue->GetInteger() : 0; -} - -size_t CXML_Element::CountElements(const ByteStringView& space, - const ByteStringView& tag) const { - size_t count = 0; - for (const auto& pChild : m_Children) { - const CXML_Element* pKid = pChild->AsElement(); - if (MatchesElement(pKid, space, tag)) - count++; - } - return count; -} - -CXML_Object* CXML_Element::GetChild(size_t index) const { - return index < m_Children.size() ? m_Children[index].get() : nullptr; -} - -CXML_Element* CXML_Element::GetElement(const ByteStringView& space, - const ByteStringView& tag, - size_t nth) const { - for (const auto& pChild : m_Children) { - CXML_Element* pKid = pChild->AsElement(); - if (MatchesElement(pKid, space, tag)) { - if (nth == 0) - return pKid; - --nth; - } - } - return nullptr; -} - -void CXML_Element::SetAttribute(const ByteString& space, - const ByteString& name, - const WideString& value) { - for (CXML_AttrItem& item : m_AttrMap) { - if (item.Matches(space, name)) { - item.m_Value = value; - return; - } - } - m_AttrMap.push_back({space, name, WideString(value)}); -} - -// static -bool CXML_Element::MatchesElement(const CXML_Element* pKid, - const ByteStringView& space, - const ByteStringView& tag) { - return pKid && pKid->m_TagName == tag && - (space.IsEmpty() || pKid->m_QSpaceName == space); -} - -const WideString* CXML_Element::Lookup(const ByteString& space, - const ByteString& name) const { - for (const CXML_AttrItem& item : m_AttrMap) { - if (item.Matches(space, name)) - return &item.m_Value; - } - return nullptr; -} diff --git a/core/fxcrt/xml/cxml_element.h b/core/fxcrt/xml/cxml_element.h deleted file mode 100644 index d3049d77a4..0000000000 --- a/core/fxcrt/xml/cxml_element.h +++ /dev/null @@ -1,73 +0,0 @@ -// Copyright 2014 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#ifndef CORE_FXCRT_XML_CXML_ELEMENT_H_ -#define CORE_FXCRT_XML_CXML_ELEMENT_H_ - -#include -#include -#include - -#include "core/fxcrt/xml/cxml_attritem.h" -#include "core/fxcrt/xml/cxml_object.h" - -class CXML_Element : public CXML_Object { - public: - static std::unique_ptr Parse(const void* pBuffer, size_t size); - - CXML_Element(const CXML_Element* pParent, - const ByteStringView& qSpace, - const ByteStringView& tagname); - ~CXML_Element() override; - - // CXML_Object: - CXML_Element* AsElement() override; - const CXML_Element* AsElement() const override; - - ByteString GetTagName() const; - ByteString GetNamespaceURI(const ByteString& qName) const; - const CXML_Element* GetParent() const { return m_pParent.Get(); } - size_t CountAttrs() const { return m_AttrMap.size(); } - void GetAttrByIndex(size_t index, - ByteString* space, - ByteString* name, - WideString* value) const; - WideString GetAttrValue(const ByteStringView& name) const; - - int GetAttrInteger(const ByteStringView& name) const; - - void AppendChild(std::unique_ptr child) { - m_Children.push_back(std::move(child)); - } - - size_t CountChildren() const { return m_Children.size(); } - size_t CountElements(const ByteStringView& space, - const ByteStringView& tag) const; - CXML_Object* GetChild(size_t index) const; - CXML_Element* GetElement(const ByteStringView& space, - const ByteStringView& tag, - size_t nth) const; - - void SetAttribute(const ByteString& space, - const ByteString& name, - const WideString& value); - - private: - static bool MatchesElement(const CXML_Element* pKid, - const ByteStringView& space, - const ByteStringView& tag); - - const WideString* Lookup(const ByteString& space, - const ByteString& name) const; - - UnownedPtr const m_pParent; - const ByteString m_QSpaceName; - const ByteString m_TagName; - std::vector m_AttrMap; - std::vector> m_Children; -}; - -#endif // CORE_FXCRT_XML_CXML_ELEMENT_H_ diff --git a/core/fxcrt/xml/cxml_object.cpp b/core/fxcrt/xml/cxml_object.cpp deleted file mode 100644 index 61e88cb407..0000000000 --- a/core/fxcrt/xml/cxml_object.cpp +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright 2017 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#include "core/fxcrt/xml/cxml_object.h" - -CXML_Object::~CXML_Object() {} - -CXML_Content* CXML_Object::AsContent() { - return nullptr; -} - -CXML_Element* CXML_Object::AsElement() { - return nullptr; -} - -const CXML_Content* CXML_Object::AsContent() const { - return nullptr; -} - -const CXML_Element* CXML_Object::AsElement() const { - return nullptr; -} diff --git a/core/fxcrt/xml/cxml_object.h b/core/fxcrt/xml/cxml_object.h deleted file mode 100644 index d009359932..0000000000 --- a/core/fxcrt/xml/cxml_object.h +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright 2017 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#ifndef CORE_FXCRT_XML_CXML_OBJECT_H_ -#define CORE_FXCRT_XML_CXML_OBJECT_H_ - -class CXML_Content; -class CXML_Element; - -class CXML_Object { - public: - virtual ~CXML_Object(); - - virtual CXML_Content* AsContent(); - virtual const CXML_Content* AsContent() const; - - virtual CXML_Element* AsElement(); - virtual const CXML_Element* AsElement() const; - - protected: - CXML_Object() {} -}; - -inline CXML_Content* ToContent(CXML_Object* pObj) { - return pObj ? pObj->AsContent() : nullptr; -} - -inline const CXML_Content* ToContent(const CXML_Object* pObj) { - return pObj ? pObj->AsContent() : nullptr; -} - -inline CXML_Element* ToElement(CXML_Object* pObj) { - return pObj ? pObj->AsElement() : nullptr; -} - -inline const CXML_Element* ToElement(const CXML_Object* pObj) { - return pObj ? pObj->AsElement() : nullptr; -} - -#endif // CORE_FXCRT_XML_CXML_OBJECT_H_ diff --git a/core/fxcrt/xml/cxml_parser.cpp b/core/fxcrt/xml/cxml_parser.cpp deleted file mode 100644 index 64bb0ec530..0000000000 --- a/core/fxcrt/xml/cxml_parser.cpp +++ /dev/null @@ -1,531 +0,0 @@ -// Copyright 2014 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#include -#include -#include -#include -#include -#include - -#include "core/fxcrt/cfx_utf8decoder.h" -#include "core/fxcrt/cfx_widetextbuf.h" -#include "core/fxcrt/fx_extension.h" -#include "core/fxcrt/fx_fallthrough.h" -#include "core/fxcrt/xml/cxml_content.h" -#include "core/fxcrt/xml/cxml_element.h" -#include "core/fxcrt/xml/cxml_parser.h" -#include "third_party/base/ptr_util.h" -#include "third_party/base/stl_util.h" - -namespace { - -#define FXCRTM_XML_CHARTYPE_Normal 0x00 -#define FXCRTM_XML_CHARTYPE_SpaceChar 0x01 -#define FXCRTM_XML_CHARTYPE_Letter 0x02 -#define FXCRTM_XML_CHARTYPE_Digital 0x04 -#define FXCRTM_XML_CHARTYPE_NameIntro 0x08 -#define FXCRTM_XML_CHARTYPE_NameChar 0x10 -#define FXCRTM_XML_CHARTYPE_HexDigital 0x20 -#define FXCRTM_XML_CHARTYPE_HexLowerLetter 0x40 -#define FXCRTM_XML_CHARTYPE_HexUpperLetter 0x60 -#define FXCRTM_XML_CHARTYPE_HexChar 0x60 - -const uint8_t g_FXCRT_XML_ByteTypes[256] = { - 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, - 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, - 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x10, 0x00, - 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x08, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x7A, 0x7A, 0x7A, 0x7A, 0x7A, 0x7A, 0x1A, - 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, - 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x00, 0x00, 0x00, 0x00, 0x18, - 0x00, 0x5A, 0x5A, 0x5A, 0x5A, 0x5A, 0x5A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, - 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, - 0x1A, 0x1A, 0x1A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1A, 0x1A, 0x1A, 0x1A, - 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, - 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, - 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, - 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, - 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, - 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, - 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, - 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, - 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, - 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, - 0x1A, 0x1A, 0x01, 0x01, -}; - -constexpr int kMaxDepth = 1024; - -bool g_FXCRT_XML_IsWhiteSpace(uint8_t ch) { - return !!(g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_SpaceChar); -} - -bool g_FXCRT_XML_IsDigital(uint8_t ch) { - return !!(g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_Digital); -} - -bool g_FXCRT_XML_IsNameIntro(uint8_t ch) { - return !!(g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_NameIntro); -} - -bool g_FXCRT_XML_IsNameChar(uint8_t ch) { - return !!(g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_NameChar); -} - -} // namespace - -CXML_Parser::CXML_Parser() - : m_nOffset(0), - m_pBuffer(nullptr), - m_dwBufferSize(0), - m_nBufferOffset(0), - m_dwIndex(0) {} - -CXML_Parser::~CXML_Parser() {} - -bool CXML_Parser::Init(const uint8_t* pBuffer, size_t size) { - m_pDataAcc = pdfium::MakeUnique(pBuffer, size); - m_nOffset = 0; - return ReadNextBlock(); -} - -bool CXML_Parser::ReadNextBlock() { - if (!m_pDataAcc->ReadNextBlock()) - return false; - - m_pBuffer = m_pDataAcc->GetBlockBuffer(); - m_dwBufferSize = m_pDataAcc->GetBlockSize(); - m_nBufferOffset = 0; - m_dwIndex = 0; - return m_dwBufferSize > 0; -} - -bool CXML_Parser::IsEOF() { - return m_pDataAcc->IsEOF() && m_dwIndex >= m_dwBufferSize; -} - -void CXML_Parser::SkipWhiteSpaces() { - m_nOffset = m_nBufferOffset + static_cast(m_dwIndex); - if (IsEOF()) - return; - - do { - while (m_dwIndex < m_dwBufferSize && - g_FXCRT_XML_IsWhiteSpace(m_pBuffer[m_dwIndex])) { - m_dwIndex++; - } - m_nOffset = m_nBufferOffset + static_cast(m_dwIndex); - if (m_dwIndex < m_dwBufferSize || IsEOF()) - break; - } while (ReadNextBlock()); -} - -void CXML_Parser::GetName(ByteString* space, ByteString* name) { - m_nOffset = m_nBufferOffset + static_cast(m_dwIndex); - if (IsEOF()) - return; - - std::ostringstream buf; - do { - while (m_dwIndex < m_dwBufferSize) { - uint8_t ch = m_pBuffer[m_dwIndex]; - if (ch == ':') { - *space = ByteString(buf); - buf.str(""); - } else if (g_FXCRT_XML_IsNameChar(ch)) { - buf << static_cast(ch); - } else { - break; - } - m_dwIndex++; - } - m_nOffset = m_nBufferOffset + static_cast(m_dwIndex); - if (m_dwIndex < m_dwBufferSize || IsEOF()) - break; - } while (ReadNextBlock()); - *name = ByteString(buf); -} - -void CXML_Parser::SkipLiterals(const ByteStringView& str) { - m_nOffset = m_nBufferOffset + static_cast(m_dwIndex); - if (IsEOF()) { - return; - } - int32_t i = 0, iLen = str.GetLength(); - do { - while (m_dwIndex < m_dwBufferSize) { - if (str[i] != m_pBuffer[m_dwIndex++]) { - i = 0; - continue; - } - i++; - if (i == iLen) - break; - } - m_nOffset = m_nBufferOffset + static_cast(m_dwIndex); - if (i == iLen) - return; - - if (m_dwIndex < m_dwBufferSize || IsEOF()) - break; - } while (ReadNextBlock()); - while (!m_pDataAcc->IsEOF()) { - ReadNextBlock(); - m_nOffset = m_nBufferOffset + static_cast(m_dwBufferSize); - } - m_dwIndex = m_dwBufferSize; -} - -uint32_t CXML_Parser::GetCharRef() { - m_nOffset = m_nBufferOffset + static_cast(m_dwIndex); - if (IsEOF()) - return 0; - - uint8_t ch; - int32_t iState = 0; - std::ostringstream buf; - uint32_t code = 0; - do { - while (m_dwIndex < m_dwBufferSize) { - ch = m_pBuffer[m_dwIndex]; - switch (iState) { - case 0: - if (ch == '#') { - m_dwIndex++; - iState = 2; - break; - } - iState = 1; - FX_FALLTHROUGH; - case 1: - m_dwIndex++; - if (ch == ';') { - std::string ref = buf.str(); - if (ref == "gt") - code = '>'; - else if (ref == "lt") - code = '<'; - else if (ref == "amp") - code = '&'; - else if (ref == "apos") - code = '\''; - else if (ref == "quot") - code = '"'; - iState = 10; - break; - } - buf << static_cast(ch); - break; - case 2: - if (ch == 'x') { - m_dwIndex++; - iState = 4; - break; - } - iState = 3; - FX_FALLTHROUGH; - case 3: - m_dwIndex++; - if (ch == ';') { - iState = 10; - break; - } - if (g_FXCRT_XML_IsDigital(ch)) - code = code * 10 + FXSYS_DecimalCharToInt(static_cast(ch)); - break; - case 4: - m_dwIndex++; - if (ch == ';') { - iState = 10; - break; - } - uint8_t nHex = - g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_HexChar; - if (nHex) { - if (nHex == FXCRTM_XML_CHARTYPE_HexDigital) { - code = (code << 4) + - FXSYS_DecimalCharToInt(static_cast(ch)); - } else if (nHex == FXCRTM_XML_CHARTYPE_HexLowerLetter) { - code = (code << 4) + ch - 87; - } else { - code = (code << 4) + ch - 55; - } - } - break; - } - if (iState == 10) - break; - } - m_nOffset = m_nBufferOffset + static_cast(m_dwIndex); - if (iState == 10 || m_dwIndex < m_dwBufferSize || IsEOF()) { - break; - } - } while (ReadNextBlock()); - return code; -} - -WideString CXML_Parser::GetAttrValue() { - m_nOffset = m_nBufferOffset + static_cast(m_dwIndex); - if (IsEOF()) - return WideString(); - - CFX_UTF8Decoder decoder; - uint8_t mark = 0; - uint8_t ch = 0; - do { - while (m_dwIndex < m_dwBufferSize) { - ch = m_pBuffer[m_dwIndex]; - if (mark == 0) { - if (ch != '\'' && ch != '"') - return WideString(); - - mark = ch; - m_dwIndex++; - ch = 0; - continue; - } - m_dwIndex++; - if (ch == mark) - break; - - if (ch == '&') { - decoder.AppendCodePoint(GetCharRef()); - if (IsEOF()) - return WideString(decoder.GetResult()); - } else { - decoder.Input(ch); - } - } - m_nOffset = m_nBufferOffset + static_cast(m_dwIndex); - if (ch == mark || m_dwIndex < m_dwBufferSize || IsEOF()) - break; - } while (ReadNextBlock()); - return WideString(decoder.GetResult()); -} - -void CXML_Parser::GetTagName(bool bStartTag, - bool* bEndTag, - ByteString* space, - ByteString* name) { - m_nOffset = m_nBufferOffset + static_cast(m_dwIndex); - if (IsEOF()) - return; - - *bEndTag = false; - uint8_t ch; - int32_t iState = bStartTag ? 1 : 0; - do { - while (m_dwIndex < m_dwBufferSize) { - ch = m_pBuffer[m_dwIndex]; - switch (iState) { - case 0: - m_dwIndex++; - if (ch != '<') - break; - - iState = 1; - break; - case 1: - if (ch == '?') { - m_dwIndex++; - SkipLiterals("?>"); - iState = 0; - break; - } - if (ch == '!') { - m_dwIndex++; - SkipLiterals("-->"); - iState = 0; - break; - } - if (ch == '/') { - m_dwIndex++; - GetName(space, name); - *bEndTag = true; - } else { - GetName(space, name); - *bEndTag = false; - } - return; - } - } - m_nOffset = m_nBufferOffset + static_cast(m_dwIndex); - if (m_dwIndex < m_dwBufferSize || IsEOF()) - break; - } while (ReadNextBlock()); -} - -std::unique_ptr CXML_Parser::ParseElement(CXML_Element* pParent, - bool bStartTag) { - return ParseElementInternal(pParent, bStartTag, 0); -} - -std::unique_ptr CXML_Parser::ParseElementInternal( - CXML_Element* pParent, - bool bStartTag, - int nDepth) { - if (nDepth > kMaxDepth) - return nullptr; - - m_nOffset = m_nBufferOffset + static_cast(m_dwIndex); - if (IsEOF()) - return nullptr; - - ByteString tag_name; - ByteString tag_space; - bool bEndTag; - GetTagName(bStartTag, &bEndTag, &tag_space, &tag_name); - if (tag_name.IsEmpty() || bEndTag) - return nullptr; - - auto pElement = pdfium::MakeUnique( - pParent, tag_space.AsStringView(), tag_name.AsStringView()); - do { - ByteString attr_space; - ByteString attr_name; - while (m_dwIndex < m_dwBufferSize) { - SkipWhiteSpaces(); - if (IsEOF()) - break; - - if (!g_FXCRT_XML_IsNameIntro(m_pBuffer[m_dwIndex])) - break; - - GetName(&attr_space, &attr_name); - SkipWhiteSpaces(); - if (IsEOF()) - break; - - if (m_pBuffer[m_dwIndex] != '=') - break; - - m_dwIndex++; - SkipWhiteSpaces(); - if (IsEOF()) - break; - - WideString attr_value = GetAttrValue(); - pElement->SetAttribute(attr_space, attr_name, attr_value); - } - m_nOffset = m_nBufferOffset + static_cast(m_dwIndex); - if (m_dwIndex < m_dwBufferSize || IsEOF()) - break; - } while (ReadNextBlock()); - SkipWhiteSpaces(); - if (IsEOF()) - return pElement; - - uint8_t ch = m_pBuffer[m_dwIndex++]; - if (ch == '/') { - m_dwIndex++; - m_nOffset = m_nBufferOffset + static_cast(m_dwIndex); - return pElement; - } - if (ch != '>') { - m_nOffset = m_nBufferOffset + static_cast(m_dwIndex); - return nullptr; - } - SkipWhiteSpaces(); - if (IsEOF()) - return pElement; - - CFX_UTF8Decoder decoder; - CFX_WideTextBuf content; - bool bCDATA = false; - int32_t iState = 0; - do { - while (m_dwIndex < m_dwBufferSize) { - ch = m_pBuffer[m_dwIndex++]; - switch (iState) { - case 0: - if (ch == '<') { - iState = 1; - } else if (ch == '&') { - decoder.ClearStatus(); - decoder.AppendCodePoint(GetCharRef()); - } else { - decoder.Input(ch); - } - break; - case 1: - if (ch == '!') { - iState = 2; - } else if (ch == '?') { - SkipLiterals("?>"); - SkipWhiteSpaces(); - iState = 0; - } else if (ch == '/') { - ByteString space; - ByteString name; - GetName(&space, &name); - SkipWhiteSpaces(); - m_dwIndex++; - iState = 10; - } else { - content << decoder.GetResult(); - WideString dataStr = content.MakeString(); - if (!bCDATA) - dataStr.TrimRight(L" \t\r\n"); - - InsertContentSegment(bCDATA, dataStr.AsStringView(), - pElement.get()); - content.Clear(); - decoder.Clear(); - bCDATA = false; - iState = 0; - m_dwIndex--; - std::unique_ptr pSubElement = - ParseElementInternal(pElement.get(), true, nDepth + 1); - if (!pSubElement) - break; - - pElement->AppendChild(std::move(pSubElement)); - SkipWhiteSpaces(); - } - break; - case 2: - if (ch == '[') { - SkipLiterals("]]>"); - } else if (ch == '-') { - m_dwIndex++; - SkipLiterals("-->"); - } else { - SkipLiterals(">"); - } - decoder.Clear(); - SkipWhiteSpaces(); - iState = 0; - break; - } - if (iState == 10) { - break; - } - } - m_nOffset = m_nBufferOffset + static_cast(m_dwIndex); - if (iState == 10 || m_dwIndex < m_dwBufferSize || IsEOF()) - break; - } while (ReadNextBlock()); - content << decoder.GetResult(); - WideString dataStr = content.MakeString(); - dataStr.TrimRight(L" \t\r\n"); - - InsertContentSegment(bCDATA, dataStr.AsStringView(), pElement.get()); - content.Clear(); - decoder.Clear(); - bCDATA = false; - return pElement; -} - -void CXML_Parser::InsertContentSegment(bool bCDATA, - const WideStringView& content, - CXML_Element* pElement) { - if (content.IsEmpty()) - return; - - pElement->AppendChild(pdfium::MakeUnique(bCDATA, content)); -} diff --git a/core/fxcrt/xml/cxml_parser.h b/core/fxcrt/xml/cxml_parser.h deleted file mode 100644 index a6f1303023..0000000000 --- a/core/fxcrt/xml/cxml_parser.h +++ /dev/null @@ -1,57 +0,0 @@ -// Copyright 2014 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#ifndef CORE_FXCRT_XML_CXML_PARSER_H_ -#define CORE_FXCRT_XML_CXML_PARSER_H_ - -#include -#include - -#include "core/fxcrt/fx_stream.h" -#include "core/fxcrt/xml/cxml_databufacc.h" - -class CFX_UTF8Decoder; -class CXML_Element; - -class CXML_Parser { - public: - CXML_Parser(); - ~CXML_Parser(); - - bool Init(const uint8_t* pBuffer, size_t size); - bool ReadNextBlock(); - bool IsEOF(); - bool HaveAvailData(); - void SkipWhiteSpaces(); - void GetName(ByteString* space, ByteString* name); - WideString GetAttrValue(); - uint32_t GetCharRef(); - void GetTagName(bool bStartTag, - bool* bEndTag, - ByteString* space, - ByteString* name); - void SkipLiterals(const ByteStringView& str); - std::unique_ptr ParseElement(CXML_Element* pParent, - bool bStartTag); - void InsertContentSegment(bool bCDATA, - const WideStringView& content, - CXML_Element* pElement); - void InsertCDATASegment(CFX_UTF8Decoder& decoder, CXML_Element* pElement); - - private: - std::unique_ptr ParseElementInternal(CXML_Element* pParent, - bool bStartTag, - int nDepth); - - std::unique_ptr m_pDataAcc; - FX_FILESIZE m_nOffset; - const uint8_t* m_pBuffer; - size_t m_dwBufferSize; - FX_FILESIZE m_nBufferOffset; - size_t m_dwIndex; -}; - -#endif // CORE_FXCRT_XML_CXML_PARSER_H_ -- cgit v1.2.3