From 908c848202ef137e98d96f82a4eadfae551403b7 Mon Sep 17 00:00:00 2001 From: Dan Sinclair Date: Thu, 30 Mar 2017 14:33:28 -0400 Subject: Move core/fxcrt XML files to core/fxcrt/xml This CL moves the other XML files contained in core/fxcrt into the core/fxcrt/xml directory to contain all the fxcrt XML files in one place. Change-Id: I9faefb1f311bf167b75dfbb7b9b52f25515e3c31 Reviewed-on: https://pdfium-review.googlesource.com/3378 Reviewed-by: Tom Sepez Commit-Queue: dsinclair --- core/fpdfdoc/cpdf_metadata.cpp | 2 +- core/fxcrt/fx_xml.h | 163 ------- core/fxcrt/fx_xml_composer.cpp | 33 -- core/fxcrt/fx_xml_parser.cpp | 918 --------------------------------------- core/fxcrt/xml/cxml_attritem.cpp | 12 + core/fxcrt/xml/cxml_attritem.h | 21 + core/fxcrt/xml/cxml_attrmap.cpp | 50 +++ core/fxcrt/xml/cxml_attrmap.h | 33 ++ core/fxcrt/xml/cxml_content.h | 23 + core/fxcrt/xml/cxml_element.cpp | 228 ++++++++++ core/fxcrt/xml/cxml_element.h | 126 ++++++ core/fxcrt/xml/cxml_parser.cpp | 680 +++++++++++++++++++++++++++++ core/fxcrt/xml/cxml_parser.h | 55 +++ core/fxcrt/xml_int.h | 55 --- 14 files changed, 1229 insertions(+), 1170 deletions(-) delete mode 100644 core/fxcrt/fx_xml.h delete mode 100644 core/fxcrt/fx_xml_composer.cpp delete mode 100644 core/fxcrt/fx_xml_parser.cpp create mode 100644 core/fxcrt/xml/cxml_attritem.cpp create mode 100644 core/fxcrt/xml/cxml_attritem.h create mode 100644 core/fxcrt/xml/cxml_attrmap.cpp create mode 100644 core/fxcrt/xml/cxml_attrmap.h create mode 100644 core/fxcrt/xml/cxml_content.h create mode 100644 core/fxcrt/xml/cxml_element.cpp create mode 100644 core/fxcrt/xml/cxml_element.h create mode 100644 core/fxcrt/xml/cxml_parser.cpp create mode 100644 core/fxcrt/xml/cxml_parser.h delete mode 100644 core/fxcrt/xml_int.h (limited to 'core') diff --git a/core/fpdfdoc/cpdf_metadata.cpp b/core/fpdfdoc/cpdf_metadata.cpp index 5e3acbe3a2..3780d1587d 100644 --- a/core/fpdfdoc/cpdf_metadata.cpp +++ b/core/fpdfdoc/cpdf_metadata.cpp @@ -9,7 +9,7 @@ #include "core/fpdfapi/parser/cpdf_document.h" #include "core/fpdfapi/parser/cpdf_stream.h" #include "core/fpdfapi/parser/cpdf_stream_acc.h" -#include "core/fxcrt/fx_xml.h" +#include "core/fxcrt/xml/cxml_element.h" CPDF_Metadata::CPDF_Metadata(CPDF_Document* pDoc) { CPDF_Dictionary* pRoot = pDoc->GetRoot(); diff --git a/core/fxcrt/fx_xml.h b/core/fxcrt/fx_xml.h deleted file mode 100644 index 0b0de23d67..0000000000 --- a/core/fxcrt/fx_xml.h +++ /dev/null @@ -1,163 +0,0 @@ -// Copyright 2014 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#ifndef CORE_FXCRT_FX_XML_H_ -#define CORE_FXCRT_FX_XML_H_ - -#include -#include - -#include "core/fxcrt/fx_basic.h" - -class CXML_AttrItem { - public: - bool Matches(const CFX_ByteString& space, const CFX_ByteString& name) const; - - CFX_ByteString m_QSpaceName; - CFX_ByteString m_AttrName; - CFX_WideString m_Value; -}; - -class CXML_AttrMap { - public: - CXML_AttrMap(); - ~CXML_AttrMap(); - - const CFX_WideString* Lookup(const CFX_ByteString& space, - const CFX_ByteString& name) const; - int GetSize() const; - CXML_AttrItem& GetAt(int index) const; - - void SetAt(const CFX_ByteString& space, - const CFX_ByteString& name, - const CFX_WideString& value); - - std::unique_ptr> m_pMap; -}; - -class CXML_Content { - public: - CXML_Content() : m_bCDATA(false), m_Content() {} - void Set(bool bCDATA, const CFX_WideStringC& content) { - m_bCDATA = bCDATA; - m_Content = content; - } - - bool m_bCDATA; - CFX_WideString m_Content; -}; - -class CXML_Element { - public: - enum ChildType { Invalid, Element, Content }; - - static std::unique_ptr Parse(const void* pBuffer, size_t size); - - CXML_Element(const CXML_Element* pParent, - const CFX_ByteStringC& qSpace, - const CFX_ByteStringC& tagname); - ~CXML_Element(); - - void Empty(); - CFX_ByteString GetTagName(bool bQualified = false) const; - CFX_ByteString GetNamespace(bool bQualified = false) const; - CFX_ByteString GetNamespaceURI(const CFX_ByteString& qName) const; - const CXML_Element* GetParent() const { return m_pParent; } - uint32_t CountAttrs() const { return m_AttrMap.GetSize(); } - void GetAttrByIndex(int index, - CFX_ByteString& space, - CFX_ByteString& name, - CFX_WideString& value) const; - bool HasAttr(const CFX_ByteStringC& qName) const; - bool GetAttrValue(const CFX_ByteStringC& name, - CFX_WideString& attribute) const; - CFX_WideString GetAttrValue(const CFX_ByteStringC& name) const { - CFX_WideString attr; - GetAttrValue(name, attr); - return attr; - } - - bool GetAttrValue(const CFX_ByteStringC& space, - const CFX_ByteStringC& name, - CFX_WideString& attribute) const; - CFX_WideString GetAttrValue(const CFX_ByteStringC& space, - const CFX_ByteStringC& name) const { - CFX_WideString attr; - GetAttrValue(space, name, attr); - return attr; - } - - bool GetAttrInteger(const CFX_ByteStringC& name, int& attribute) const; - int GetAttrInteger(const CFX_ByteStringC& name) const { - int attr = 0; - GetAttrInteger(name, attr); - return attr; - } - - bool GetAttrInteger(const CFX_ByteStringC& space, - const CFX_ByteStringC& name, - int& attribute) const; - int GetAttrInteger(const CFX_ByteStringC& space, - const CFX_ByteStringC& name) const { - int attr = 0; - GetAttrInteger(space, name, attr); - return attr; - } - - bool GetAttrFloat(const CFX_ByteStringC& name, float& attribute) const; - float GetAttrFloat(const CFX_ByteStringC& name) const { - float attr = 0; - GetAttrFloat(name, attr); - return attr; - } - - bool GetAttrFloat(const CFX_ByteStringC& space, - const CFX_ByteStringC& name, - float& attribute) const; - float GetAttrFloat(const CFX_ByteStringC& space, - const CFX_ByteStringC& name) const { - float attr = 0; - GetAttrFloat(space, name, attr); - return attr; - } - - uint32_t CountChildren() const { return m_Children.size(); } - ChildType GetChildType(uint32_t index) const; - CFX_WideString GetContent(uint32_t index) const; - CXML_Element* GetElement(uint32_t index) const; - CXML_Element* GetElement(const CFX_ByteStringC& space, - const CFX_ByteStringC& tag) const { - return GetElement(space, tag, 0); - } - - uint32_t CountElements(const CFX_ByteStringC& space, - const CFX_ByteStringC& tag) const; - CXML_Element* GetElement(const CFX_ByteStringC& space, - const CFX_ByteStringC& tag, - int index) const; - - uint32_t FindElement(CXML_Element* pChild) const; - void SetTag(const CFX_ByteStringC& qTagName); - void RemoveChildren(); - void RemoveChild(uint32_t index); - - protected: - struct ChildRecord { - ChildType type; - void* child; // CXML_Element and CXML_Content lack a common ancestor. - }; - - const CXML_Element* const m_pParent; - CFX_ByteString m_QSpaceName; - CFX_ByteString m_TagName; - CXML_AttrMap m_AttrMap; - std::vector m_Children; - - friend class CXML_Parser; - friend class CXML_Composer; -}; - -#endif // CORE_FXCRT_FX_XML_H_ diff --git a/core/fxcrt/fx_xml_composer.cpp b/core/fxcrt/fx_xml_composer.cpp deleted file mode 100644 index 637d64cd82..0000000000 --- a/core/fxcrt/fx_xml_composer.cpp +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright 2014 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#include "core/fxcrt/xml_int.h" - -#include "core/fxcrt/fx_xml.h" - -void FX_XML_SplitQualifiedName(const CFX_ByteStringC& bsFullName, - CFX_ByteStringC& bsSpace, - CFX_ByteStringC& bsName) { - if (bsFullName.IsEmpty()) - return; - - FX_STRSIZE iStart = bsFullName.Find(':'); - if (iStart == -1) { - bsName = bsFullName; - } else { - bsSpace = bsFullName.Mid(0, iStart); - bsName = bsFullName.Mid(iStart + 1); - } -} - -void CXML_Element::SetTag(const CFX_ByteStringC& qTagName) { - ASSERT(!qTagName.IsEmpty()); - CFX_ByteStringC bsSpace; - CFX_ByteStringC bsName; - FX_XML_SplitQualifiedName(qTagName, bsSpace, bsName); - m_QSpaceName = bsSpace; - m_TagName = bsName; -} diff --git a/core/fxcrt/fx_xml_parser.cpp b/core/fxcrt/fx_xml_parser.cpp deleted file mode 100644 index da4815bfd9..0000000000 --- a/core/fxcrt/fx_xml_parser.cpp +++ /dev/null @@ -1,918 +0,0 @@ -// Copyright 2014 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#include -#include -#include - -#include "core/fxcrt/fx_ext.h" -#include "core/fxcrt/fx_xml.h" -#include "core/fxcrt/xml_int.h" -#include "third_party/base/ptr_util.h" -#include "third_party/base/stl_util.h" - -namespace { - -#define FXCRTM_XML_CHARTYPE_Normal 0x00 -#define FXCRTM_XML_CHARTYPE_SpaceChar 0x01 -#define FXCRTM_XML_CHARTYPE_Letter 0x02 -#define FXCRTM_XML_CHARTYPE_Digital 0x04 -#define FXCRTM_XML_CHARTYPE_NameIntro 0x08 -#define FXCRTM_XML_CHARTYPE_NameChar 0x10 -#define FXCRTM_XML_CHARTYPE_HexDigital 0x20 -#define FXCRTM_XML_CHARTYPE_HexLowerLetter 0x40 -#define FXCRTM_XML_CHARTYPE_HexUpperLetter 0x60 -#define FXCRTM_XML_CHARTYPE_HexChar 0x60 - -const uint8_t g_FXCRT_XML_ByteTypes[256] = { - 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, - 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, - 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x10, 0x00, - 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x08, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x7A, 0x7A, 0x7A, 0x7A, 0x7A, 0x7A, 0x1A, - 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, - 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x00, 0x00, 0x00, 0x00, 0x18, - 0x00, 0x5A, 0x5A, 0x5A, 0x5A, 0x5A, 0x5A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, - 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, - 0x1A, 0x1A, 0x1A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1A, 0x1A, 0x1A, 0x1A, - 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, - 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, - 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, - 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, - 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, - 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, - 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, - 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, - 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, - 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, - 0x1A, 0x1A, 0x01, 0x01, -}; - -bool g_FXCRT_XML_IsWhiteSpace(uint8_t ch) { - return !!(g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_SpaceChar); -} - -bool g_FXCRT_XML_IsDigital(uint8_t ch) { - return !!(g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_Digital); -} - -bool g_FXCRT_XML_IsNameIntro(uint8_t ch) { - return !!(g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_NameIntro); -} - -bool g_FXCRT_XML_IsNameChar(uint8_t ch) { - return !!(g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_NameChar); -} - -class CXML_DataBufAcc : public IFX_BufferedReadStream { - public: - template - friend CFX_RetainPtr pdfium::MakeRetain(Args&&... args); - - // IFX_BufferedReadStream - bool IsEOF() override; - FX_FILESIZE GetPosition() override; - size_t ReadBlock(void* buffer, size_t size) override; - bool ReadNextBlock(bool bRestart) override; - const uint8_t* GetBlockBuffer() override; - size_t GetBlockSize() override; - FX_FILESIZE GetBlockOffset() override; - - private: - CXML_DataBufAcc(const uint8_t* pBuffer, size_t size); - ~CXML_DataBufAcc() override; - - const uint8_t* m_pBuffer; - size_t m_dwSize; - size_t m_dwCurPos; -}; - -CXML_DataBufAcc::CXML_DataBufAcc(const uint8_t* pBuffer, size_t size) - : m_pBuffer(pBuffer), m_dwSize(size), m_dwCurPos(0) {} - -CXML_DataBufAcc::~CXML_DataBufAcc() {} - -bool CXML_DataBufAcc::IsEOF() { - return m_dwCurPos >= m_dwSize; -} - -FX_FILESIZE CXML_DataBufAcc::GetPosition() { - return static_cast(m_dwCurPos); -} - -size_t CXML_DataBufAcc::ReadBlock(void* buffer, size_t size) { - return 0; -} - -bool CXML_DataBufAcc::ReadNextBlock(bool bRestart) { - if (bRestart) - m_dwCurPos = 0; - - if (m_dwCurPos < m_dwSize) { - m_dwCurPos = m_dwSize; - return true; - } - return false; -} - -const uint8_t* CXML_DataBufAcc::GetBlockBuffer() { - return m_pBuffer; -} - -size_t CXML_DataBufAcc::GetBlockSize() { - return m_dwSize; -} - -FX_FILESIZE CXML_DataBufAcc::GetBlockOffset() { - return 0; -} - -class CXML_DataStmAcc : public IFX_BufferedReadStream { - public: - template - friend CFX_RetainPtr pdfium::MakeRetain(Args&&... args); - - // IFX_BufferedReadStream - bool IsEOF() override; - FX_FILESIZE GetPosition() override; - size_t ReadBlock(void* buffer, size_t size) override; - bool ReadNextBlock(bool bRestart) override; - const uint8_t* GetBlockBuffer() override; - size_t GetBlockSize() override; - FX_FILESIZE GetBlockOffset() override; - - private: - explicit CXML_DataStmAcc( - const CFX_RetainPtr& pFileRead); - ~CXML_DataStmAcc() override; - - CFX_RetainPtr m_pFileRead; - uint8_t* m_pBuffer; - FX_FILESIZE m_nStart; - size_t m_dwSize; -}; - -CXML_DataStmAcc::CXML_DataStmAcc( - const CFX_RetainPtr& pFileRead) - : m_pFileRead(pFileRead), m_pBuffer(nullptr), m_nStart(0), m_dwSize(0) { - ASSERT(m_pFileRead); -} - -CXML_DataStmAcc::~CXML_DataStmAcc() { - FX_Free(m_pBuffer); -} - -bool CXML_DataStmAcc::IsEOF() { - return m_nStart + static_cast(m_dwSize) >= - m_pFileRead->GetSize(); -} - -FX_FILESIZE CXML_DataStmAcc::GetPosition() { - return m_nStart + static_cast(m_dwSize); -} - -size_t CXML_DataStmAcc::ReadBlock(void* buffer, size_t size) { - return 0; -} - -bool CXML_DataStmAcc::ReadNextBlock(bool bRestart) { - if (bRestart) - m_nStart = 0; - - FX_FILESIZE nLength = m_pFileRead->GetSize(); - m_nStart += static_cast(m_dwSize); - if (m_nStart >= nLength) - return false; - - static const FX_FILESIZE FX_XMLDATASTREAM_BufferSize = 32 * 1024; - m_dwSize = static_cast( - std::min(FX_XMLDATASTREAM_BufferSize, nLength - m_nStart)); - if (!m_pBuffer) - m_pBuffer = FX_Alloc(uint8_t, m_dwSize); - - return m_pFileRead->ReadBlock(m_pBuffer, m_nStart, m_dwSize); -} - -const uint8_t* CXML_DataStmAcc::GetBlockBuffer() { - return (const uint8_t*)m_pBuffer; -} - -size_t CXML_DataStmAcc::GetBlockSize() { - return m_dwSize; -} - -FX_FILESIZE CXML_DataStmAcc::GetBlockOffset() { - return m_nStart; -} - -} // namespace - -CXML_Parser::CXML_Parser() - : m_nOffset(0), - m_pBuffer(nullptr), - m_dwBufferSize(0), - m_nBufferOffset(0), - m_dwIndex(0) {} - -CXML_Parser::~CXML_Parser() {} - -bool CXML_Parser::Init(const uint8_t* pBuffer, size_t size) { - m_pDataAcc = pdfium::MakeRetain(pBuffer, size); - m_nOffset = 0; - return ReadNextBlock(); -} - -bool CXML_Parser::ReadNextBlock() { - if (!m_pDataAcc->ReadNextBlock()) - return false; - - m_pBuffer = m_pDataAcc->GetBlockBuffer(); - m_dwBufferSize = m_pDataAcc->GetBlockSize(); - m_nBufferOffset = m_pDataAcc->GetBlockOffset(); - m_dwIndex = 0; - return m_dwBufferSize > 0; -} - -bool CXML_Parser::IsEOF() { - return m_pDataAcc->IsEOF() && m_dwIndex >= m_dwBufferSize; -} - -void CXML_Parser::SkipWhiteSpaces() { - m_nOffset = m_nBufferOffset + static_cast(m_dwIndex); - if (IsEOF()) - return; - - do { - while (m_dwIndex < m_dwBufferSize && - g_FXCRT_XML_IsWhiteSpace(m_pBuffer[m_dwIndex])) { - m_dwIndex++; - } - m_nOffset = m_nBufferOffset + static_cast(m_dwIndex); - if (m_dwIndex < m_dwBufferSize || IsEOF()) - break; - } while (ReadNextBlock()); -} - -void CXML_Parser::GetName(CFX_ByteString* space, CFX_ByteString* name) { - m_nOffset = m_nBufferOffset + static_cast(m_dwIndex); - if (IsEOF()) - return; - - CFX_ByteTextBuf buf; - uint8_t ch; - do { - while (m_dwIndex < m_dwBufferSize) { - ch = m_pBuffer[m_dwIndex]; - if (ch == ':') { - *space = buf.AsStringC(); - buf.Clear(); - } else if (g_FXCRT_XML_IsNameChar(ch)) { - buf.AppendChar(ch); - } else { - break; - } - m_dwIndex++; - } - m_nOffset = m_nBufferOffset + static_cast(m_dwIndex); - if (m_dwIndex < m_dwBufferSize || IsEOF()) - break; - } while (ReadNextBlock()); - *name = buf.AsStringC(); -} - -void CXML_Parser::SkipLiterals(const CFX_ByteStringC& str) { - m_nOffset = m_nBufferOffset + static_cast(m_dwIndex); - if (IsEOF()) { - return; - } - int32_t i = 0, iLen = str.GetLength(); - do { - while (m_dwIndex < m_dwBufferSize) { - if (str.GetAt(i) != m_pBuffer[m_dwIndex++]) { - i = 0; - continue; - } - i++; - if (i == iLen) - break; - } - m_nOffset = m_nBufferOffset + static_cast(m_dwIndex); - if (i == iLen) - return; - - if (m_dwIndex < m_dwBufferSize || IsEOF()) - break; - } while (ReadNextBlock()); - while (!m_pDataAcc->IsEOF()) { - ReadNextBlock(); - m_nOffset = m_nBufferOffset + static_cast(m_dwBufferSize); - } - m_dwIndex = m_dwBufferSize; -} - -uint32_t CXML_Parser::GetCharRef() { - m_nOffset = m_nBufferOffset + static_cast(m_dwIndex); - if (IsEOF()) - return 0; - - uint8_t ch; - int32_t iState = 0; - CFX_ByteTextBuf buf; - uint32_t code = 0; - do { - while (m_dwIndex < m_dwBufferSize) { - ch = m_pBuffer[m_dwIndex]; - switch (iState) { - case 0: - if (ch == '#') { - m_dwIndex++; - iState = 2; - break; - } - iState = 1; - case 1: - m_dwIndex++; - if (ch == ';') { - CFX_ByteStringC ref = buf.AsStringC(); - if (ref == "gt") - code = '>'; - else if (ref == "lt") - code = '<'; - else if (ref == "amp") - code = '&'; - else if (ref == "apos") - code = '\''; - else if (ref == "quot") - code = '"'; - iState = 10; - break; - } - buf.AppendByte(ch); - break; - case 2: - if (ch == 'x') { - m_dwIndex++; - iState = 4; - break; - } - iState = 3; - case 3: - m_dwIndex++; - if (ch == ';') { - iState = 10; - break; - } - if (g_FXCRT_XML_IsDigital(ch)) - code = code * 10 + FXSYS_toDecimalDigit(static_cast(ch)); - break; - case 4: - m_dwIndex++; - if (ch == ';') { - iState = 10; - break; - } - uint8_t nHex = - g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_HexChar; - if (nHex) { - if (nHex == FXCRTM_XML_CHARTYPE_HexDigital) { - code = - (code << 4) + FXSYS_toDecimalDigit(static_cast(ch)); - } else if (nHex == FXCRTM_XML_CHARTYPE_HexLowerLetter) { - code = (code << 4) + ch - 87; - } else { - code = (code << 4) + ch - 55; - } - } - break; - } - if (iState == 10) - break; - } - m_nOffset = m_nBufferOffset + static_cast(m_dwIndex); - if (iState == 10 || m_dwIndex < m_dwBufferSize || IsEOF()) { - break; - } - } while (ReadNextBlock()); - return code; -} - -void CXML_Parser::GetAttrValue(CFX_WideString& value) { - m_nOffset = m_nBufferOffset + static_cast(m_dwIndex); - if (IsEOF()) - return; - - CFX_UTF8Decoder decoder; - uint8_t mark = 0, ch = 0; - do { - while (m_dwIndex < m_dwBufferSize) { - ch = m_pBuffer[m_dwIndex]; - if (mark == 0) { - if (ch != '\'' && ch != '"') - return; - - mark = ch; - m_dwIndex++; - ch = 0; - continue; - } - m_dwIndex++; - if (ch == mark) - break; - - if (ch == '&') { - decoder.AppendChar(GetCharRef()); - if (IsEOF()) { - value = decoder.GetResult(); - return; - } - } else { - decoder.Input(ch); - } - } - m_nOffset = m_nBufferOffset + static_cast(m_dwIndex); - if (ch == mark || m_dwIndex < m_dwBufferSize || IsEOF()) - break; - } while (ReadNextBlock()); - value = decoder.GetResult(); -} - -void CXML_Parser::GetTagName(bool bStartTag, - bool* bEndTag, - CFX_ByteString* space, - CFX_ByteString* name) { - m_nOffset = m_nBufferOffset + static_cast(m_dwIndex); - if (IsEOF()) - return; - - *bEndTag = false; - uint8_t ch; - int32_t iState = bStartTag ? 1 : 0; - do { - while (m_dwIndex < m_dwBufferSize) { - ch = m_pBuffer[m_dwIndex]; - switch (iState) { - case 0: - m_dwIndex++; - if (ch != '<') - break; - - iState = 1; - break; - case 1: - if (ch == '?') { - m_dwIndex++; - SkipLiterals("?>"); - iState = 0; - break; - } - if (ch == '!') { - m_dwIndex++; - SkipLiterals("-->"); - iState = 0; - break; - } - if (ch == '/') { - m_dwIndex++; - GetName(space, name); - *bEndTag = true; - } else { - GetName(space, name); - *bEndTag = false; - } - return; - } - } - m_nOffset = m_nBufferOffset + static_cast(m_dwIndex); - if (m_dwIndex < m_dwBufferSize || IsEOF()) - break; - } while (ReadNextBlock()); -} - -std::unique_ptr CXML_Parser::ParseElement(CXML_Element* pParent, - bool bStartTag) { - m_nOffset = m_nBufferOffset + static_cast(m_dwIndex); - if (IsEOF()) - return nullptr; - - CFX_ByteString tag_name; - CFX_ByteString tag_space; - bool bEndTag; - GetTagName(bStartTag, &bEndTag, &tag_space, &tag_name); - if (tag_name.IsEmpty() || bEndTag) - return nullptr; - - auto pElement = pdfium::MakeUnique( - pParent, tag_space.AsStringC(), tag_name.AsStringC()); - do { - CFX_ByteString attr_space; - CFX_ByteString attr_name; - while (m_dwIndex < m_dwBufferSize) { - SkipWhiteSpaces(); - if (IsEOF()) - break; - - if (!g_FXCRT_XML_IsNameIntro(m_pBuffer[m_dwIndex])) - break; - - GetName(&attr_space, &attr_name); - SkipWhiteSpaces(); - if (IsEOF()) - break; - - if (m_pBuffer[m_dwIndex] != '=') - break; - - m_dwIndex++; - SkipWhiteSpaces(); - if (IsEOF()) - break; - - CFX_WideString attr_value; - GetAttrValue(attr_value); - pElement->m_AttrMap.SetAt(attr_space, attr_name, attr_value); - } - m_nOffset = m_nBufferOffset + static_cast(m_dwIndex); - if (m_dwIndex < m_dwBufferSize || IsEOF()) - break; - } while (ReadNextBlock()); - SkipWhiteSpaces(); - if (IsEOF()) - return pElement; - - uint8_t ch = m_pBuffer[m_dwIndex++]; - if (ch == '/') { - m_dwIndex++; - m_nOffset = m_nBufferOffset + static_cast(m_dwIndex); - return pElement; - } - if (ch != '>') { - m_nOffset = m_nBufferOffset + static_cast(m_dwIndex); - return nullptr; - } - SkipWhiteSpaces(); - if (IsEOF()) - return pElement; - - CFX_UTF8Decoder decoder; - CFX_WideTextBuf content; - bool bCDATA = false; - int32_t iState = 0; - do { - while (m_dwIndex < m_dwBufferSize) { - ch = m_pBuffer[m_dwIndex++]; - switch (iState) { - case 0: - if (ch == '<') { - iState = 1; - } else if (ch == '&') { - decoder.ClearStatus(); - decoder.AppendChar(GetCharRef()); - } else { - decoder.Input(ch); - } - break; - case 1: - if (ch == '!') { - iState = 2; - } else if (ch == '?') { - SkipLiterals("?>"); - SkipWhiteSpaces(); - iState = 0; - } else if (ch == '/') { - CFX_ByteString space; - CFX_ByteString name; - GetName(&space, &name); - SkipWhiteSpaces(); - m_dwIndex++; - iState = 10; - } else { - content << decoder.GetResult(); - CFX_WideString dataStr = content.MakeString(); - if (!bCDATA) - dataStr.TrimRight(L" \t\r\n"); - - InsertContentSegment(bCDATA, dataStr.AsStringC(), pElement.get()); - content.Clear(); - decoder.Clear(); - bCDATA = false; - iState = 0; - m_dwIndex--; - std::unique_ptr pSubElement( - ParseElement(pElement.get(), true)); - if (!pSubElement) - break; - - pElement->m_Children.push_back( - {CXML_Element::Element, pSubElement.release()}); - SkipWhiteSpaces(); - } - break; - case 2: - if (ch == '[') { - SkipLiterals("]]>"); - } else if (ch == '-') { - m_dwIndex++; - SkipLiterals("-->"); - } else { - SkipLiterals(">"); - } - decoder.Clear(); - SkipWhiteSpaces(); - iState = 0; - break; - } - if (iState == 10) { - break; - } - } - m_nOffset = m_nBufferOffset + static_cast(m_dwIndex); - if (iState == 10 || m_dwIndex < m_dwBufferSize || IsEOF()) - break; - } while (ReadNextBlock()); - content << decoder.GetResult(); - CFX_WideString dataStr = content.MakeString(); - dataStr.TrimRight(L" \t\r\n"); - - InsertContentSegment(bCDATA, dataStr.AsStringC(), pElement.get()); - content.Clear(); - decoder.Clear(); - bCDATA = false; - return pElement; -} - -void CXML_Parser::InsertContentSegment(bool bCDATA, - const CFX_WideStringC& content, - CXML_Element* pElement) { - if (content.IsEmpty()) - return; - - CXML_Content* pContent = new CXML_Content; - pContent->Set(bCDATA, content); - pElement->m_Children.push_back({CXML_Element::Content, pContent}); -} - -std::unique_ptr CXML_Element::Parse(const void* pBuffer, - size_t size) { - CXML_Parser parser; - if (!parser.Init(static_cast(pBuffer), size)) - return nullptr; - return parser.ParseElement(nullptr, false); -} - -CXML_Element::CXML_Element(const CXML_Element* pParent, - const CFX_ByteStringC& qSpace, - const CFX_ByteStringC& tagname) - : m_pParent(pParent), m_QSpaceName(qSpace), m_TagName(tagname) {} - -CXML_Element::~CXML_Element() { - Empty(); -} - -void CXML_Element::Empty() { - RemoveChildren(); -} -void CXML_Element::RemoveChildren() { - for (const ChildRecord& record : m_Children) { - if (record.type == Content) { - delete static_cast(record.child); - } else if (record.type == Element) { - CXML_Element* child = static_cast(record.child); - child->RemoveChildren(); - delete child; - } - } - m_Children.clear(); -} -CFX_ByteString CXML_Element::GetTagName(bool bQualified) const { - if (!bQualified || m_QSpaceName.IsEmpty()) { - return m_TagName; - } - CFX_ByteString bsTag = m_QSpaceName; - bsTag += ":"; - bsTag += m_TagName; - return bsTag; -} - -CFX_ByteString CXML_Element::GetNamespace(bool bQualified) const { - return bQualified ? m_QSpaceName : GetNamespaceURI(m_QSpaceName); -} - -CFX_ByteString CXML_Element::GetNamespaceURI( - const CFX_ByteString& qName) const { - const CFX_WideString* pwsSpace; - const CXML_Element* pElement = this; - do { - if (qName.IsEmpty()) - pwsSpace = pElement->m_AttrMap.Lookup("", "xmlns"); - else - pwsSpace = pElement->m_AttrMap.Lookup("xmlns", qName); - if (pwsSpace) - break; - - pElement = pElement->GetParent(); - } while (pElement); - return pwsSpace ? pwsSpace->UTF8Encode() : CFX_ByteString(); -} - -void CXML_Element::GetAttrByIndex(int index, - CFX_ByteString& space, - CFX_ByteString& name, - CFX_WideString& value) const { - if (index < 0 || index >= m_AttrMap.GetSize()) - return; - - CXML_AttrItem& item = m_AttrMap.GetAt(index); - space = item.m_QSpaceName; - name = item.m_AttrName; - value = item.m_Value; -} - -bool CXML_Element::HasAttr(const CFX_ByteStringC& name) const { - CFX_ByteStringC bsSpace; - CFX_ByteStringC bsName; - FX_XML_SplitQualifiedName(name, bsSpace, bsName); - return !!m_AttrMap.Lookup(CFX_ByteString(bsSpace), CFX_ByteString(bsName)); -} - -bool CXML_Element::GetAttrValue(const CFX_ByteStringC& name, - CFX_WideString& attribute) const { - CFX_ByteStringC bsSpace; - CFX_ByteStringC bsName; - FX_XML_SplitQualifiedName(name, bsSpace, bsName); - return GetAttrValue(bsSpace, bsName, attribute); -} - -bool CXML_Element::GetAttrValue(const CFX_ByteStringC& space, - const CFX_ByteStringC& name, - CFX_WideString& attribute) const { - const CFX_WideString* pValue = - m_AttrMap.Lookup(CFX_ByteString(space), CFX_ByteString(name)); - if (!pValue) - return false; - - attribute = *pValue; - return true; -} - -bool CXML_Element::GetAttrInteger(const CFX_ByteStringC& name, - int& attribute) const { - CFX_ByteStringC bsSpace; - CFX_ByteStringC bsName; - FX_XML_SplitQualifiedName(name, bsSpace, bsName); - const CFX_WideString* pwsValue = - m_AttrMap.Lookup(CFX_ByteString(bsSpace), CFX_ByteString(bsName)); - if (!pwsValue) - return false; - - attribute = pwsValue->GetInteger(); - return true; -} - -bool CXML_Element::GetAttrInteger(const CFX_ByteStringC& space, - const CFX_ByteStringC& name, - int& attribute) const { - const CFX_WideString* pwsValue = - m_AttrMap.Lookup(CFX_ByteString(space), CFX_ByteString(name)); - if (!pwsValue) - return false; - - attribute = pwsValue->GetInteger(); - return true; -} - -bool CXML_Element::GetAttrFloat(const CFX_ByteStringC& name, - float& attribute) const { - CFX_ByteStringC bsSpace; - CFX_ByteStringC bsName; - FX_XML_SplitQualifiedName(name, bsSpace, bsName); - return GetAttrFloat(bsSpace, bsName, attribute); -} - -bool CXML_Element::GetAttrFloat(const CFX_ByteStringC& space, - const CFX_ByteStringC& name, - float& attribute) const { - const CFX_WideString* pValue = - m_AttrMap.Lookup(CFX_ByteString(space), CFX_ByteString(name)); - if (!pValue) - return false; - - attribute = pValue->GetFloat(); - return true; -} - -CXML_Element::ChildType CXML_Element::GetChildType(uint32_t index) const { - return index < m_Children.size() ? m_Children[index].type : Invalid; -} - -CFX_WideString CXML_Element::GetContent(uint32_t index) const { - if (index < m_Children.size() && m_Children[index].type == Content) { - CXML_Content* pContent = - static_cast(m_Children[index].child); - if (pContent) - return pContent->m_Content; - } - return CFX_WideString(); -} - -CXML_Element* CXML_Element::GetElement(uint32_t index) const { - if (index < m_Children.size() && m_Children[index].type == Element) - return static_cast(m_Children[index].child); - return nullptr; -} - -uint32_t CXML_Element::CountElements(const CFX_ByteStringC& space, - const CFX_ByteStringC& tag) const { - int count = 0; - for (const ChildRecord& record : m_Children) { - if (record.type != Element) - continue; - - CXML_Element* pKid = static_cast(record.child); - if ((space.IsEmpty() || pKid->m_QSpaceName == space) && - pKid->m_TagName == tag) { - count++; - } - } - return count; -} - -CXML_Element* CXML_Element::GetElement(const CFX_ByteStringC& space, - const CFX_ByteStringC& tag, - int index) const { - if (index < 0) - return nullptr; - - for (const ChildRecord& record : m_Children) { - if (record.type != Element) - continue; - - CXML_Element* pKid = static_cast(record.child); - if ((space.IsEmpty() || pKid->m_QSpaceName == space) && - pKid->m_TagName == tag) { - if (index-- == 0) - return pKid; - } - } - return nullptr; -} - -uint32_t CXML_Element::FindElement(CXML_Element* pChild) const { - int index = 0; - for (const ChildRecord& record : m_Children) { - if (record.type == Element && - static_cast(record.child) == pChild) { - return index; - } - ++index; - } - return 0xFFFFFFFF; -} - -bool CXML_AttrItem::Matches(const CFX_ByteString& space, - const CFX_ByteString& name) const { - return (space.IsEmpty() || m_QSpaceName == space) && m_AttrName == name; -} - -CXML_AttrMap::CXML_AttrMap() {} - -CXML_AttrMap::~CXML_AttrMap() {} - -const CFX_WideString* CXML_AttrMap::Lookup(const CFX_ByteString& space, - const CFX_ByteString& name) const { - if (!m_pMap) - return nullptr; - - for (const auto& item : *m_pMap) { - if (item.Matches(space, name)) - return &item.m_Value; - } - return nullptr; -} - -void CXML_AttrMap::SetAt(const CFX_ByteString& space, - const CFX_ByteString& name, - const CFX_WideString& value) { - if (!m_pMap) - m_pMap = pdfium::MakeUnique>(); - - for (CXML_AttrItem& item : *m_pMap) { - if (item.Matches(space, name)) { - item.m_Value = value; - return; - } - } - - m_pMap->push_back({space, name, CFX_WideString(value)}); -} - -int CXML_AttrMap::GetSize() const { - return m_pMap ? pdfium::CollectionSize(*m_pMap) : 0; -} - -CXML_AttrItem& CXML_AttrMap::GetAt(int index) const { - return (*m_pMap)[index]; -} diff --git a/core/fxcrt/xml/cxml_attritem.cpp b/core/fxcrt/xml/cxml_attritem.cpp new file mode 100644 index 0000000000..8e55799a62 --- /dev/null +++ b/core/fxcrt/xml/cxml_attritem.cpp @@ -0,0 +1,12 @@ +// Copyright 2017 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#include "core/fxcrt/xml/cxml_attritem.h" + +bool CXML_AttrItem::Matches(const CFX_ByteString& space, + const CFX_ByteString& name) const { + return (space.IsEmpty() || m_QSpaceName == space) && m_AttrName == name; +} diff --git a/core/fxcrt/xml/cxml_attritem.h b/core/fxcrt/xml/cxml_attritem.h new file mode 100644 index 0000000000..63305e80c8 --- /dev/null +++ b/core/fxcrt/xml/cxml_attritem.h @@ -0,0 +1,21 @@ +// Copyright 2017 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#ifndef CORE_FXCRT_XML_CXML_ATTRITEM_H_ +#define CORE_FXCRT_XML_CXML_ATTRITEM_H_ + +#include "core/fxcrt/fx_string.h" + +class CXML_AttrItem { + public: + bool Matches(const CFX_ByteString& space, const CFX_ByteString& name) const; + + CFX_ByteString m_QSpaceName; + CFX_ByteString m_AttrName; + CFX_WideString m_Value; +}; + +#endif // CORE_FXCRT_XML_CXML_ATTRITEM_H_ diff --git a/core/fxcrt/xml/cxml_attrmap.cpp b/core/fxcrt/xml/cxml_attrmap.cpp new file mode 100644 index 0000000000..8d226fe5f0 --- /dev/null +++ b/core/fxcrt/xml/cxml_attrmap.cpp @@ -0,0 +1,50 @@ +// Copyright 2017 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#include "core/fxcrt/xml/cxml_attrmap.h" + +#include "third_party/base/ptr_util.h" +#include "third_party/base/stl_util.h" + +CXML_AttrMap::CXML_AttrMap() {} + +CXML_AttrMap::~CXML_AttrMap() {} + +const CFX_WideString* CXML_AttrMap::Lookup(const CFX_ByteString& space, + const CFX_ByteString& name) const { + if (!m_pMap) + return nullptr; + + for (const auto& item : *m_pMap) { + if (item.Matches(space, name)) + return &item.m_Value; + } + return nullptr; +} + +void CXML_AttrMap::SetAt(const CFX_ByteString& space, + const CFX_ByteString& name, + const CFX_WideString& value) { + if (!m_pMap) + m_pMap = pdfium::MakeUnique>(); + + for (CXML_AttrItem& item : *m_pMap) { + if (item.Matches(space, name)) { + item.m_Value = value; + return; + } + } + + m_pMap->push_back({space, name, CFX_WideString(value)}); +} + +int CXML_AttrMap::GetSize() const { + return m_pMap ? pdfium::CollectionSize(*m_pMap) : 0; +} + +CXML_AttrItem& CXML_AttrMap::GetAt(int index) const { + return (*m_pMap)[index]; +} diff --git a/core/fxcrt/xml/cxml_attrmap.h b/core/fxcrt/xml/cxml_attrmap.h new file mode 100644 index 0000000000..0a026d31d7 --- /dev/null +++ b/core/fxcrt/xml/cxml_attrmap.h @@ -0,0 +1,33 @@ +// Copyright 2017 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#ifndef CORE_FXCRT_XML_CXML_ATTRMAP_H_ +#define CORE_FXCRT_XML_CXML_ATTRMAP_H_ + +#include +#include + +#include "core/fxcrt/fx_string.h" +#include "core/fxcrt/xml/cxml_attritem.h" + +class CXML_AttrMap { + public: + CXML_AttrMap(); + ~CXML_AttrMap(); + + const CFX_WideString* Lookup(const CFX_ByteString& space, + const CFX_ByteString& name) const; + int GetSize() const; + CXML_AttrItem& GetAt(int index) const; + + void SetAt(const CFX_ByteString& space, + const CFX_ByteString& name, + const CFX_WideString& value); + + std::unique_ptr> m_pMap; +}; + +#endif // CORE_FXCRT_XML_CXML_ATTRMAP_H_ diff --git a/core/fxcrt/xml/cxml_content.h b/core/fxcrt/xml/cxml_content.h new file mode 100644 index 0000000000..261c622eca --- /dev/null +++ b/core/fxcrt/xml/cxml_content.h @@ -0,0 +1,23 @@ +// Copyright 2017 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#ifndef CORE_FXCRT_XML_CXML_CONTENT_H_ +#define CORE_FXCRT_XML_CXML_CONTENT_H_ + +class CXML_Content { + public: + CXML_Content() : m_bCDATA(false), m_Content() {} + + void Set(bool bCDATA, const CFX_WideStringC& content) { + m_bCDATA = bCDATA; + m_Content = content; + } + + bool m_bCDATA; + CFX_WideString m_Content; +}; + +#endif // CORE_FXCRT_XML_CXML_CONTENT_H_ diff --git a/core/fxcrt/xml/cxml_element.cpp b/core/fxcrt/xml/cxml_element.cpp new file mode 100644 index 0000000000..20ad54e88c --- /dev/null +++ b/core/fxcrt/xml/cxml_element.cpp @@ -0,0 +1,228 @@ +// Copyright 2017 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#include "core/fxcrt/xml/cxml_element.h" + +#include "core/fxcrt/xml/cxml_content.h" +#include "core/fxcrt/xml/cxml_parser.h" + +CXML_Element::CXML_Element(const CXML_Element* pParent, + const CFX_ByteStringC& qSpace, + const CFX_ByteStringC& tagname) + : m_pParent(pParent), m_QSpaceName(qSpace), m_TagName(tagname) {} + +CXML_Element::~CXML_Element() { + Empty(); +} + +void CXML_Element::Empty() { + RemoveChildren(); +} +void CXML_Element::RemoveChildren() { + for (const ChildRecord& record : m_Children) { + if (record.type == Content) { + delete static_cast(record.child); + } else if (record.type == Element) { + CXML_Element* child = static_cast(record.child); + child->RemoveChildren(); + delete child; + } + } + m_Children.clear(); +} +CFX_ByteString CXML_Element::GetTagName(bool bQualified) const { + if (!bQualified || m_QSpaceName.IsEmpty()) { + return m_TagName; + } + CFX_ByteString bsTag = m_QSpaceName; + bsTag += ":"; + bsTag += m_TagName; + return bsTag; +} + +CFX_ByteString CXML_Element::GetNamespace(bool bQualified) const { + return bQualified ? m_QSpaceName : GetNamespaceURI(m_QSpaceName); +} + +CFX_ByteString CXML_Element::GetNamespaceURI( + const CFX_ByteString& qName) const { + const CFX_WideString* pwsSpace; + const CXML_Element* pElement = this; + do { + if (qName.IsEmpty()) + pwsSpace = pElement->m_AttrMap.Lookup("", "xmlns"); + else + pwsSpace = pElement->m_AttrMap.Lookup("xmlns", qName); + if (pwsSpace) + break; + + pElement = pElement->GetParent(); + } while (pElement); + return pwsSpace ? pwsSpace->UTF8Encode() : CFX_ByteString(); +} + +void CXML_Element::GetAttrByIndex(int index, + CFX_ByteString& space, + CFX_ByteString& name, + CFX_WideString& value) const { + if (index < 0 || index >= m_AttrMap.GetSize()) + return; + + CXML_AttrItem& item = m_AttrMap.GetAt(index); + space = item.m_QSpaceName; + name = item.m_AttrName; + value = item.m_Value; +} + +bool CXML_Element::HasAttr(const CFX_ByteStringC& name) const { + CFX_ByteStringC bsSpace; + CFX_ByteStringC bsName; + FX_XML_SplitQualifiedName(name, bsSpace, bsName); + return !!m_AttrMap.Lookup(CFX_ByteString(bsSpace), CFX_ByteString(bsName)); +} + +bool CXML_Element::GetAttrValue(const CFX_ByteStringC& name, + CFX_WideString& attribute) const { + CFX_ByteStringC bsSpace; + CFX_ByteStringC bsName; + FX_XML_SplitQualifiedName(name, bsSpace, bsName); + return GetAttrValue(bsSpace, bsName, attribute); +} + +bool CXML_Element::GetAttrValue(const CFX_ByteStringC& space, + const CFX_ByteStringC& name, + CFX_WideString& attribute) const { + const CFX_WideString* pValue = + m_AttrMap.Lookup(CFX_ByteString(space), CFX_ByteString(name)); + if (!pValue) + return false; + + attribute = *pValue; + return true; +} + +bool CXML_Element::GetAttrInteger(const CFX_ByteStringC& name, + int& attribute) const { + CFX_ByteStringC bsSpace; + CFX_ByteStringC bsName; + FX_XML_SplitQualifiedName(name, bsSpace, bsName); + const CFX_WideString* pwsValue = + m_AttrMap.Lookup(CFX_ByteString(bsSpace), CFX_ByteString(bsName)); + if (!pwsValue) + return false; + + attribute = pwsValue->GetInteger(); + return true; +} + +bool CXML_Element::GetAttrInteger(const CFX_ByteStringC& space, + const CFX_ByteStringC& name, + int& attribute) const { + const CFX_WideString* pwsValue = + m_AttrMap.Lookup(CFX_ByteString(space), CFX_ByteString(name)); + if (!pwsValue) + return false; + + attribute = pwsValue->GetInteger(); + return true; +} + +bool CXML_Element::GetAttrFloat(const CFX_ByteStringC& name, + float& attribute) const { + CFX_ByteStringC bsSpace; + CFX_ByteStringC bsName; + FX_XML_SplitQualifiedName(name, bsSpace, bsName); + return GetAttrFloat(bsSpace, bsName, attribute); +} + +bool CXML_Element::GetAttrFloat(const CFX_ByteStringC& space, + const CFX_ByteStringC& name, + float& attribute) const { + const CFX_WideString* pValue = + m_AttrMap.Lookup(CFX_ByteString(space), CFX_ByteString(name)); + if (!pValue) + return false; + + attribute = pValue->GetFloat(); + return true; +} + +CXML_Element::ChildType CXML_Element::GetChildType(uint32_t index) const { + return index < m_Children.size() ? m_Children[index].type : Invalid; +} + +CFX_WideString CXML_Element::GetContent(uint32_t index) const { + if (index < m_Children.size() && m_Children[index].type == Content) { + CXML_Content* pContent = + static_cast(m_Children[index].child); + if (pContent) + return pContent->m_Content; + } + return CFX_WideString(); +} + +CXML_Element* CXML_Element::GetElement(uint32_t index) const { + if (index < m_Children.size() && m_Children[index].type == Element) + return static_cast(m_Children[index].child); + return nullptr; +} + +uint32_t CXML_Element::CountElements(const CFX_ByteStringC& space, + const CFX_ByteStringC& tag) const { + int count = 0; + for (const ChildRecord& record : m_Children) { + if (record.type != Element) + continue; + + CXML_Element* pKid = static_cast(record.child); + if ((space.IsEmpty() || pKid->m_QSpaceName == space) && + pKid->m_TagName == tag) { + count++; + } + } + return count; +} + +CXML_Element* CXML_Element::GetElement(const CFX_ByteStringC& space, + const CFX_ByteStringC& tag, + int index) const { + if (index < 0) + return nullptr; + + for (const ChildRecord& record : m_Children) { + if (record.type != Element) + continue; + + CXML_Element* pKid = static_cast(record.child); + if ((space.IsEmpty() || pKid->m_QSpaceName == space) && + pKid->m_TagName == tag) { + if (index-- == 0) + return pKid; + } + } + return nullptr; +} + +uint32_t CXML_Element::FindElement(CXML_Element* pChild) const { + int index = 0; + for (const ChildRecord& record : m_Children) { + if (record.type == Element && + static_cast(record.child) == pChild) { + return index; + } + ++index; + } + return 0xFFFFFFFF; +} + +void CXML_Element::SetTag(const CFX_ByteStringC& qTagName) { + ASSERT(!qTagName.IsEmpty()); + CFX_ByteStringC bsSpace; + CFX_ByteStringC bsName; + FX_XML_SplitQualifiedName(qTagName, bsSpace, bsName); + m_QSpaceName = bsSpace; + m_TagName = bsName; +} diff --git a/core/fxcrt/xml/cxml_element.h b/core/fxcrt/xml/cxml_element.h new file mode 100644 index 0000000000..2e18e187b7 --- /dev/null +++ b/core/fxcrt/xml/cxml_element.h @@ -0,0 +1,126 @@ +// Copyright 2014 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#ifndef CORE_FXCRT_XML_CXML_ELEMENT_H_ +#define CORE_FXCRT_XML_CXML_ELEMENT_H_ + +#include +#include + +#include "core/fxcrt/fx_basic.h" +#include "core/fxcrt/xml/cxml_attrmap.h" + +class CXML_Element { + public: + enum ChildType { Invalid, Element, Content }; + + static std::unique_ptr Parse(const void* pBuffer, size_t size); + + CXML_Element(const CXML_Element* pParent, + const CFX_ByteStringC& qSpace, + const CFX_ByteStringC& tagname); + ~CXML_Element(); + + void Empty(); + CFX_ByteString GetTagName(bool bQualified = false) const; + CFX_ByteString GetNamespace(bool bQualified = false) const; + CFX_ByteString GetNamespaceURI(const CFX_ByteString& qName) const; + const CXML_Element* GetParent() const { return m_pParent; } + uint32_t CountAttrs() const { return m_AttrMap.GetSize(); } + void GetAttrByIndex(int index, + CFX_ByteString& space, + CFX_ByteString& name, + CFX_WideString& value) const; + bool HasAttr(const CFX_ByteStringC& qName) const; + bool GetAttrValue(const CFX_ByteStringC& name, + CFX_WideString& attribute) const; + CFX_WideString GetAttrValue(const CFX_ByteStringC& name) const { + CFX_WideString attr; + GetAttrValue(name, attr); + return attr; + } + + bool GetAttrValue(const CFX_ByteStringC& space, + const CFX_ByteStringC& name, + CFX_WideString& attribute) const; + CFX_WideString GetAttrValue(const CFX_ByteStringC& space, + const CFX_ByteStringC& name) const { + CFX_WideString attr; + GetAttrValue(space, name, attr); + return attr; + } + + bool GetAttrInteger(const CFX_ByteStringC& name, int& attribute) const; + int GetAttrInteger(const CFX_ByteStringC& name) const { + int attr = 0; + GetAttrInteger(name, attr); + return attr; + } + + bool GetAttrInteger(const CFX_ByteStringC& space, + const CFX_ByteStringC& name, + int& attribute) const; + int GetAttrInteger(const CFX_ByteStringC& space, + const CFX_ByteStringC& name) const { + int attr = 0; + GetAttrInteger(space, name, attr); + return attr; + } + + bool GetAttrFloat(const CFX_ByteStringC& name, float& attribute) const; + float GetAttrFloat(const CFX_ByteStringC& name) const { + float attr = 0; + GetAttrFloat(name, attr); + return attr; + } + + bool GetAttrFloat(const CFX_ByteStringC& space, + const CFX_ByteStringC& name, + float& attribute) const; + float GetAttrFloat(const CFX_ByteStringC& space, + const CFX_ByteStringC& name) const { + float attr = 0; + GetAttrFloat(space, name, attr); + return attr; + } + + uint32_t CountChildren() const { return m_Children.size(); } + ChildType GetChildType(uint32_t index) const; + CFX_WideString GetContent(uint32_t index) const; + CXML_Element* GetElement(uint32_t index) const; + CXML_Element* GetElement(const CFX_ByteStringC& space, + const CFX_ByteStringC& tag) const { + return GetElement(space, tag, 0); + } + + uint32_t CountElements(const CFX_ByteStringC& space, + const CFX_ByteStringC& tag) const; + CXML_Element* GetElement(const CFX_ByteStringC& space, + const CFX_ByteStringC& tag, + int index) const; + + uint32_t FindElement(CXML_Element* pChild) const; + void SetTag(const CFX_ByteStringC& qTagName); + void RemoveChildren(); + void RemoveChild(uint32_t index); + + private: + friend class CXML_Parser; + friend class CXML_Composer; + + struct ChildRecord { + ChildType type; + void* child; // CXML_Element and CXML_Content lack a common ancestor. + }; + + const CXML_Element* const m_pParent; + CFX_ByteString m_QSpaceName; + CFX_ByteString m_TagName; + CXML_AttrMap m_AttrMap; + std::vector m_Children; +}; + +#endif // CORE_FXCRT_XML_CXML_ELEMENT_H_ diff --git a/core/fxcrt/xml/cxml_parser.cpp b/core/fxcrt/xml/cxml_parser.cpp new file mode 100644 index 0000000000..be48b7adc2 --- /dev/null +++ b/core/fxcrt/xml/cxml_parser.cpp @@ -0,0 +1,680 @@ +// Copyright 2014 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#include +#include +#include + +#include "core/fxcrt/fx_ext.h" +#include "core/fxcrt/xml/cxml_content.h" +#include "core/fxcrt/xml/cxml_element.h" +#include "core/fxcrt/xml/cxml_parser.h" +#include "third_party/base/ptr_util.h" +#include "third_party/base/stl_util.h" + +namespace { + +#define FXCRTM_XML_CHARTYPE_Normal 0x00 +#define FXCRTM_XML_CHARTYPE_SpaceChar 0x01 +#define FXCRTM_XML_CHARTYPE_Letter 0x02 +#define FXCRTM_XML_CHARTYPE_Digital 0x04 +#define FXCRTM_XML_CHARTYPE_NameIntro 0x08 +#define FXCRTM_XML_CHARTYPE_NameChar 0x10 +#define FXCRTM_XML_CHARTYPE_HexDigital 0x20 +#define FXCRTM_XML_CHARTYPE_HexLowerLetter 0x40 +#define FXCRTM_XML_CHARTYPE_HexUpperLetter 0x60 +#define FXCRTM_XML_CHARTYPE_HexChar 0x60 + +const uint8_t g_FXCRT_XML_ByteTypes[256] = { + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x10, 0x00, + 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x08, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x7A, 0x7A, 0x7A, 0x7A, 0x7A, 0x7A, 0x1A, + 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, + 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x00, 0x00, 0x00, 0x00, 0x18, + 0x00, 0x5A, 0x5A, 0x5A, 0x5A, 0x5A, 0x5A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, + 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, + 0x1A, 0x1A, 0x1A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1A, 0x1A, 0x1A, 0x1A, + 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, + 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, + 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, + 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, + 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, + 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, + 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, + 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, + 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, + 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, + 0x1A, 0x1A, 0x01, 0x01, +}; + +bool g_FXCRT_XML_IsWhiteSpace(uint8_t ch) { + return !!(g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_SpaceChar); +} + +bool g_FXCRT_XML_IsDigital(uint8_t ch) { + return !!(g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_Digital); +} + +bool g_FXCRT_XML_IsNameIntro(uint8_t ch) { + return !!(g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_NameIntro); +} + +bool g_FXCRT_XML_IsNameChar(uint8_t ch) { + return !!(g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_NameChar); +} + +class CXML_DataBufAcc : public IFX_BufferedReadStream { + public: + template + friend CFX_RetainPtr pdfium::MakeRetain(Args&&... args); + + // IFX_BufferedReadStream + bool IsEOF() override; + FX_FILESIZE GetPosition() override; + size_t ReadBlock(void* buffer, size_t size) override; + bool ReadNextBlock(bool bRestart) override; + const uint8_t* GetBlockBuffer() override; + size_t GetBlockSize() override; + FX_FILESIZE GetBlockOffset() override; + + private: + CXML_DataBufAcc(const uint8_t* pBuffer, size_t size); + ~CXML_DataBufAcc() override; + + const uint8_t* m_pBuffer; + size_t m_dwSize; + size_t m_dwCurPos; +}; + +CXML_DataBufAcc::CXML_DataBufAcc(const uint8_t* pBuffer, size_t size) + : m_pBuffer(pBuffer), m_dwSize(size), m_dwCurPos(0) {} + +CXML_DataBufAcc::~CXML_DataBufAcc() {} + +bool CXML_DataBufAcc::IsEOF() { + return m_dwCurPos >= m_dwSize; +} + +FX_FILESIZE CXML_DataBufAcc::GetPosition() { + return static_cast(m_dwCurPos); +} + +size_t CXML_DataBufAcc::ReadBlock(void* buffer, size_t size) { + return 0; +} + +bool CXML_DataBufAcc::ReadNextBlock(bool bRestart) { + if (bRestart) + m_dwCurPos = 0; + + if (m_dwCurPos < m_dwSize) { + m_dwCurPos = m_dwSize; + return true; + } + return false; +} + +const uint8_t* CXML_DataBufAcc::GetBlockBuffer() { + return m_pBuffer; +} + +size_t CXML_DataBufAcc::GetBlockSize() { + return m_dwSize; +} + +FX_FILESIZE CXML_DataBufAcc::GetBlockOffset() { + return 0; +} + +class CXML_DataStmAcc : public IFX_BufferedReadStream { + public: + template + friend CFX_RetainPtr pdfium::MakeRetain(Args&&... args); + + // IFX_BufferedReadStream + bool IsEOF() override; + FX_FILESIZE GetPosition() override; + size_t ReadBlock(void* buffer, size_t size) override; + bool ReadNextBlock(bool bRestart) override; + const uint8_t* GetBlockBuffer() override; + size_t GetBlockSize() override; + FX_FILESIZE GetBlockOffset() override; + + private: + explicit CXML_DataStmAcc( + const CFX_RetainPtr& pFileRead); + ~CXML_DataStmAcc() override; + + CFX_RetainPtr m_pFileRead; + uint8_t* m_pBuffer; + FX_FILESIZE m_nStart; + size_t m_dwSize; +}; + +CXML_DataStmAcc::CXML_DataStmAcc( + const CFX_RetainPtr& pFileRead) + : m_pFileRead(pFileRead), m_pBuffer(nullptr), m_nStart(0), m_dwSize(0) { + ASSERT(m_pFileRead); +} + +CXML_DataStmAcc::~CXML_DataStmAcc() { + FX_Free(m_pBuffer); +} + +bool CXML_DataStmAcc::IsEOF() { + return m_nStart + static_cast(m_dwSize) >= + m_pFileRead->GetSize(); +} + +FX_FILESIZE CXML_DataStmAcc::GetPosition() { + return m_nStart + static_cast(m_dwSize); +} + +size_t CXML_DataStmAcc::ReadBlock(void* buffer, size_t size) { + return 0; +} + +bool CXML_DataStmAcc::ReadNextBlock(bool bRestart) { + if (bRestart) + m_nStart = 0; + + FX_FILESIZE nLength = m_pFileRead->GetSize(); + m_nStart += static_cast(m_dwSize); + if (m_nStart >= nLength) + return false; + + static const FX_FILESIZE FX_XMLDATASTREAM_BufferSize = 32 * 1024; + m_dwSize = static_cast( + std::min(FX_XMLDATASTREAM_BufferSize, nLength - m_nStart)); + if (!m_pBuffer) + m_pBuffer = FX_Alloc(uint8_t, m_dwSize); + + return m_pFileRead->ReadBlock(m_pBuffer, m_nStart, m_dwSize); +} + +const uint8_t* CXML_DataStmAcc::GetBlockBuffer() { + return (const uint8_t*)m_pBuffer; +} + +size_t CXML_DataStmAcc::GetBlockSize() { + return m_dwSize; +} + +FX_FILESIZE CXML_DataStmAcc::GetBlockOffset() { + return m_nStart; +} + +} // namespace + +void FX_XML_SplitQualifiedName(const CFX_ByteStringC& bsFullName, + CFX_ByteStringC& bsSpace, + CFX_ByteStringC& bsName) { + if (bsFullName.IsEmpty()) + return; + + FX_STRSIZE iStart = bsFullName.Find(':'); + if (iStart == -1) { + bsName = bsFullName; + } else { + bsSpace = bsFullName.Mid(0, iStart); + bsName = bsFullName.Mid(iStart + 1); + } +} + +CXML_Parser::CXML_Parser() + : m_nOffset(0), + m_pBuffer(nullptr), + m_dwBufferSize(0), + m_nBufferOffset(0), + m_dwIndex(0) {} + +CXML_Parser::~CXML_Parser() {} + +bool CXML_Parser::Init(const uint8_t* pBuffer, size_t size) { + m_pDataAcc = pdfium::MakeRetain(pBuffer, size); + m_nOffset = 0; + return ReadNextBlock(); +} + +bool CXML_Parser::ReadNextBlock() { + if (!m_pDataAcc->ReadNextBlock()) + return false; + + m_pBuffer = m_pDataAcc->GetBlockBuffer(); + m_dwBufferSize = m_pDataAcc->GetBlockSize(); + m_nBufferOffset = m_pDataAcc->GetBlockOffset(); + m_dwIndex = 0; + return m_dwBufferSize > 0; +} + +bool CXML_Parser::IsEOF() { + return m_pDataAcc->IsEOF() && m_dwIndex >= m_dwBufferSize; +} + +void CXML_Parser::SkipWhiteSpaces() { + m_nOffset = m_nBufferOffset + static_cast(m_dwIndex); + if (IsEOF()) + return; + + do { + while (m_dwIndex < m_dwBufferSize && + g_FXCRT_XML_IsWhiteSpace(m_pBuffer[m_dwIndex])) { + m_dwIndex++; + } + m_nOffset = m_nBufferOffset + static_cast(m_dwIndex); + if (m_dwIndex < m_dwBufferSize || IsEOF()) + break; + } while (ReadNextBlock()); +} + +void CXML_Parser::GetName(CFX_ByteString* space, CFX_ByteString* name) { + m_nOffset = m_nBufferOffset + static_cast(m_dwIndex); + if (IsEOF()) + return; + + CFX_ByteTextBuf buf; + uint8_t ch; + do { + while (m_dwIndex < m_dwBufferSize) { + ch = m_pBuffer[m_dwIndex]; + if (ch == ':') { + *space = buf.AsStringC(); + buf.Clear(); + } else if (g_FXCRT_XML_IsNameChar(ch)) { + buf.AppendChar(ch); + } else { + break; + } + m_dwIndex++; + } + m_nOffset = m_nBufferOffset + static_cast(m_dwIndex); + if (m_dwIndex < m_dwBufferSize || IsEOF()) + break; + } while (ReadNextBlock()); + *name = buf.AsStringC(); +} + +void CXML_Parser::SkipLiterals(const CFX_ByteStringC& str) { + m_nOffset = m_nBufferOffset + static_cast(m_dwIndex); + if (IsEOF()) { + return; + } + int32_t i = 0, iLen = str.GetLength(); + do { + while (m_dwIndex < m_dwBufferSize) { + if (str.GetAt(i) != m_pBuffer[m_dwIndex++]) { + i = 0; + continue; + } + i++; + if (i == iLen) + break; + } + m_nOffset = m_nBufferOffset + static_cast(m_dwIndex); + if (i == iLen) + return; + + if (m_dwIndex < m_dwBufferSize || IsEOF()) + break; + } while (ReadNextBlock()); + while (!m_pDataAcc->IsEOF()) { + ReadNextBlock(); + m_nOffset = m_nBufferOffset + static_cast(m_dwBufferSize); + } + m_dwIndex = m_dwBufferSize; +} + +uint32_t CXML_Parser::GetCharRef() { + m_nOffset = m_nBufferOffset + static_cast(m_dwIndex); + if (IsEOF()) + return 0; + + uint8_t ch; + int32_t iState = 0; + CFX_ByteTextBuf buf; + uint32_t code = 0; + do { + while (m_dwIndex < m_dwBufferSize) { + ch = m_pBuffer[m_dwIndex]; + switch (iState) { + case 0: + if (ch == '#') { + m_dwIndex++; + iState = 2; + break; + } + iState = 1; + case 1: + m_dwIndex++; + if (ch == ';') { + CFX_ByteStringC ref = buf.AsStringC(); + if (ref == "gt") + code = '>'; + else if (ref == "lt") + code = '<'; + else if (ref == "amp") + code = '&'; + else if (ref == "apos") + code = '\''; + else if (ref == "quot") + code = '"'; + iState = 10; + break; + } + buf.AppendByte(ch); + break; + case 2: + if (ch == 'x') { + m_dwIndex++; + iState = 4; + break; + } + iState = 3; + case 3: + m_dwIndex++; + if (ch == ';') { + iState = 10; + break; + } + if (g_FXCRT_XML_IsDigital(ch)) + code = code * 10 + FXSYS_toDecimalDigit(static_cast(ch)); + break; + case 4: + m_dwIndex++; + if (ch == ';') { + iState = 10; + break; + } + uint8_t nHex = + g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_HexChar; + if (nHex) { + if (nHex == FXCRTM_XML_CHARTYPE_HexDigital) { + code = + (code << 4) + FXSYS_toDecimalDigit(static_cast(ch)); + } else if (nHex == FXCRTM_XML_CHARTYPE_HexLowerLetter) { + code = (code << 4) + ch - 87; + } else { + code = (code << 4) + ch - 55; + } + } + break; + } + if (iState == 10) + break; + } + m_nOffset = m_nBufferOffset + static_cast(m_dwIndex); + if (iState == 10 || m_dwIndex < m_dwBufferSize || IsEOF()) { + break; + } + } while (ReadNextBlock()); + return code; +} + +void CXML_Parser::GetAttrValue(CFX_WideString& value) { + m_nOffset = m_nBufferOffset + static_cast(m_dwIndex); + if (IsEOF()) + return; + + CFX_UTF8Decoder decoder; + uint8_t mark = 0, ch = 0; + do { + while (m_dwIndex < m_dwBufferSize) { + ch = m_pBuffer[m_dwIndex]; + if (mark == 0) { + if (ch != '\'' && ch != '"') + return; + + mark = ch; + m_dwIndex++; + ch = 0; + continue; + } + m_dwIndex++; + if (ch == mark) + break; + + if (ch == '&') { + decoder.AppendChar(GetCharRef()); + if (IsEOF()) { + value = decoder.GetResult(); + return; + } + } else { + decoder.Input(ch); + } + } + m_nOffset = m_nBufferOffset + static_cast(m_dwIndex); + if (ch == mark || m_dwIndex < m_dwBufferSize || IsEOF()) + break; + } while (ReadNextBlock()); + value = decoder.GetResult(); +} + +void CXML_Parser::GetTagName(bool bStartTag, + bool* bEndTag, + CFX_ByteString* space, + CFX_ByteString* name) { + m_nOffset = m_nBufferOffset + static_cast(m_dwIndex); + if (IsEOF()) + return; + + *bEndTag = false; + uint8_t ch; + int32_t iState = bStartTag ? 1 : 0; + do { + while (m_dwIndex < m_dwBufferSize) { + ch = m_pBuffer[m_dwIndex]; + switch (iState) { + case 0: + m_dwIndex++; + if (ch != '<') + break; + + iState = 1; + break; + case 1: + if (ch == '?') { + m_dwIndex++; + SkipLiterals("?>"); + iState = 0; + break; + } + if (ch == '!') { + m_dwIndex++; + SkipLiterals("-->"); + iState = 0; + break; + } + if (ch == '/') { + m_dwIndex++; + GetName(space, name); + *bEndTag = true; + } else { + GetName(space, name); + *bEndTag = false; + } + return; + } + } + m_nOffset = m_nBufferOffset + static_cast(m_dwIndex); + if (m_dwIndex < m_dwBufferSize || IsEOF()) + break; + } while (ReadNextBlock()); +} + +std::unique_ptr CXML_Parser::ParseElement(CXML_Element* pParent, + bool bStartTag) { + m_nOffset = m_nBufferOffset + static_cast(m_dwIndex); + if (IsEOF()) + return nullptr; + + CFX_ByteString tag_name; + CFX_ByteString tag_space; + bool bEndTag; + GetTagName(bStartTag, &bEndTag, &tag_space, &tag_name); + if (tag_name.IsEmpty() || bEndTag) + return nullptr; + + auto pElement = pdfium::MakeUnique( + pParent, tag_space.AsStringC(), tag_name.AsStringC()); + do { + CFX_ByteString attr_space; + CFX_ByteString attr_name; + while (m_dwIndex < m_dwBufferSize) { + SkipWhiteSpaces(); + if (IsEOF()) + break; + + if (!g_FXCRT_XML_IsNameIntro(m_pBuffer[m_dwIndex])) + break; + + GetName(&attr_space, &attr_name); + SkipWhiteSpaces(); + if (IsEOF()) + break; + + if (m_pBuffer[m_dwIndex] != '=') + break; + + m_dwIndex++; + SkipWhiteSpaces(); + if (IsEOF()) + break; + + CFX_WideString attr_value; + GetAttrValue(attr_value); + pElement->m_AttrMap.SetAt(attr_space, attr_name, attr_value); + } + m_nOffset = m_nBufferOffset + static_cast(m_dwIndex); + if (m_dwIndex < m_dwBufferSize || IsEOF()) + break; + } while (ReadNextBlock()); + SkipWhiteSpaces(); + if (IsEOF()) + return pElement; + + uint8_t ch = m_pBuffer[m_dwIndex++]; + if (ch == '/') { + m_dwIndex++; + m_nOffset = m_nBufferOffset + static_cast(m_dwIndex); + return pElement; + } + if (ch != '>') { + m_nOffset = m_nBufferOffset + static_cast(m_dwIndex); + return nullptr; + } + SkipWhiteSpaces(); + if (IsEOF()) + return pElement; + + CFX_UTF8Decoder decoder; + CFX_WideTextBuf content; + bool bCDATA = false; + int32_t iState = 0; + do { + while (m_dwIndex < m_dwBufferSize) { + ch = m_pBuffer[m_dwIndex++]; + switch (iState) { + case 0: + if (ch == '<') { + iState = 1; + } else if (ch == '&') { + decoder.ClearStatus(); + decoder.AppendChar(GetCharRef()); + } else { + decoder.Input(ch); + } + break; + case 1: + if (ch == '!') { + iState = 2; + } else if (ch == '?') { + SkipLiterals("?>"); + SkipWhiteSpaces(); + iState = 0; + } else if (ch == '/') { + CFX_ByteString space; + CFX_ByteString name; + GetName(&space, &name); + SkipWhiteSpaces(); + m_dwIndex++; + iState = 10; + } else { + content << decoder.GetResult(); + CFX_WideString dataStr = content.MakeString(); + if (!bCDATA) + dataStr.TrimRight(L" \t\r\n"); + + InsertContentSegment(bCDATA, dataStr.AsStringC(), pElement.get()); + content.Clear(); + decoder.Clear(); + bCDATA = false; + iState = 0; + m_dwIndex--; + std::unique_ptr pSubElement( + ParseElement(pElement.get(), true)); + if (!pSubElement) + break; + + pElement->m_Children.push_back( + {CXML_Element::Element, pSubElement.release()}); + SkipWhiteSpaces(); + } + break; + case 2: + if (ch == '[') { + SkipLiterals("]]>"); + } else if (ch == '-') { + m_dwIndex++; + SkipLiterals("-->"); + } else { + SkipLiterals(">"); + } + decoder.Clear(); + SkipWhiteSpaces(); + iState = 0; + break; + } + if (iState == 10) { + break; + } + } + m_nOffset = m_nBufferOffset + static_cast(m_dwIndex); + if (iState == 10 || m_dwIndex < m_dwBufferSize || IsEOF()) + break; + } while (ReadNextBlock()); + content << decoder.GetResult(); + CFX_WideString dataStr = content.MakeString(); + dataStr.TrimRight(L" \t\r\n"); + + InsertContentSegment(bCDATA, dataStr.AsStringC(), pElement.get()); + content.Clear(); + decoder.Clear(); + bCDATA = false; + return pElement; +} + +void CXML_Parser::InsertContentSegment(bool bCDATA, + const CFX_WideStringC& content, + CXML_Element* pElement) { + if (content.IsEmpty()) + return; + + CXML_Content* pContent = new CXML_Content; + pContent->Set(bCDATA, content); + pElement->m_Children.push_back({CXML_Element::Content, pContent}); +} + +std::unique_ptr CXML_Element::Parse(const void* pBuffer, + size_t size) { + CXML_Parser parser; + if (!parser.Init(static_cast(pBuffer), size)) + return nullptr; + return parser.ParseElement(nullptr, false); +} diff --git a/core/fxcrt/xml/cxml_parser.h b/core/fxcrt/xml/cxml_parser.h new file mode 100644 index 0000000000..37f14e9834 --- /dev/null +++ b/core/fxcrt/xml/cxml_parser.h @@ -0,0 +1,55 @@ +// Copyright 2014 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#ifndef CORE_FXCRT_XML_CXML_PARSER_H_ +#define CORE_FXCRT_XML_CXML_PARSER_H_ + +#include +#include + +#include "core/fxcrt/fx_stream.h" + +class CFX_UTF8Decoder; +class CXML_Element; + +class CXML_Parser { + public: + CXML_Parser(); + ~CXML_Parser(); + + bool Init(const uint8_t* pBuffer, size_t size); + bool ReadNextBlock(); + bool IsEOF(); + bool HaveAvailData(); + void SkipWhiteSpaces(); + void GetName(CFX_ByteString* space, CFX_ByteString* name); + void GetAttrValue(CFX_WideString& value); + uint32_t GetCharRef(); + void GetTagName(bool bStartTag, + bool* bEndTag, + CFX_ByteString* space, + CFX_ByteString* name); + void SkipLiterals(const CFX_ByteStringC& str); + std::unique_ptr ParseElement(CXML_Element* pParent, + bool bStartTag); + void InsertContentSegment(bool bCDATA, + const CFX_WideStringC& content, + CXML_Element* pElement); + void InsertCDATASegment(CFX_UTF8Decoder& decoder, CXML_Element* pElement); + + CFX_RetainPtr m_pDataAcc; + FX_FILESIZE m_nOffset; + const uint8_t* m_pBuffer; + size_t m_dwBufferSize; + FX_FILESIZE m_nBufferOffset; + size_t m_dwIndex; +}; + +void FX_XML_SplitQualifiedName(const CFX_ByteStringC& bsFullName, + CFX_ByteStringC& bsSpace, + CFX_ByteStringC& bsName); + +#endif // CORE_FXCRT_XML_CXML_PARSER_H_ diff --git a/core/fxcrt/xml_int.h b/core/fxcrt/xml_int.h deleted file mode 100644 index 96a7da9c51..0000000000 --- a/core/fxcrt/xml_int.h +++ /dev/null @@ -1,55 +0,0 @@ -// Copyright 2014 PDFium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com - -#ifndef CORE_FXCRT_XML_INT_H_ -#define CORE_FXCRT_XML_INT_H_ - -#include -#include - -#include "core/fxcrt/fx_stream.h" - -class CFX_UTF8Decoder; -class CXML_Element; - -class CXML_Parser { - public: - CXML_Parser(); - ~CXML_Parser(); - - bool Init(const uint8_t* pBuffer, size_t size); - bool ReadNextBlock(); - bool IsEOF(); - bool HaveAvailData(); - void SkipWhiteSpaces(); - void GetName(CFX_ByteString* space, CFX_ByteString* name); - void GetAttrValue(CFX_WideString& value); - uint32_t GetCharRef(); - void GetTagName(bool bStartTag, - bool* bEndTag, - CFX_ByteString* space, - CFX_ByteString* name); - void SkipLiterals(const CFX_ByteStringC& str); - std::unique_ptr ParseElement(CXML_Element* pParent, - bool bStartTag); - void InsertContentSegment(bool bCDATA, - const CFX_WideStringC& content, - CXML_Element* pElement); - void InsertCDATASegment(CFX_UTF8Decoder& decoder, CXML_Element* pElement); - - CFX_RetainPtr m_pDataAcc; - FX_FILESIZE m_nOffset; - const uint8_t* m_pBuffer; - size_t m_dwBufferSize; - FX_FILESIZE m_nBufferOffset; - size_t m_dwIndex; -}; - -void FX_XML_SplitQualifiedName(const CFX_ByteStringC& bsFullName, - CFX_ByteStringC& bsSpace, - CFX_ByteStringC& bsName); - -#endif // CORE_FXCRT_XML_INT_H_ -- cgit v1.2.3