summaryrefslogtreecommitdiff
path: root/core/fxcrt/xml/cfx_xmlparser.h
blob: 52d86fb82d764425b481a0a11fd2cf43be3d7674 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
// Copyright 2016 PDFium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com

#ifndef CORE_FXCRT_XML_CFX_XMLPARSER_H_
#define CORE_FXCRT_XML_CFX_XMLPARSER_H_

#include <memory>
#include <stack>
#include <vector>

#include "core/fxcrt/cfx_blockbuffer.h"
#include "core/fxcrt/cfx_seekablestreamproxy.h"
#include "core/fxcrt/fx_string.h"
#include "core/fxcrt/retain_ptr.h"
#include "core/fxcrt/xml/cfx_xmlnode.h"

class CFX_XMLElement;
class CFX_XMLNode;
class IFX_SeekableStream;

enum class FX_XmlSyntaxResult {
  None,
  InstructionOpen,
  InstructionClose,
  ElementOpen,
  ElementBreak,
  ElementClose,
  TargetName,
  TagName,
  AttriName,
  AttriValue,
  Text,
  CData,
  TargetData,
  Error,
  EndOfString
};

class CFX_XMLParser {
 public:
  static bool IsXMLNameChar(wchar_t ch, bool bFirstChar);

  CFX_XMLParser(CFX_XMLNode* pParent,
                const RetainPtr<IFX_SeekableStream>& pStream);
  virtual ~CFX_XMLParser();

  bool Parse();

 protected:
  FX_XmlSyntaxResult DoSyntaxParse();

  WideString GetTagName() const {
    return m_BlockBuffer.GetTextData(0, m_iTextDataLength);
  }

  WideString GetAttributeName() const {
    return m_BlockBuffer.GetTextData(0, m_iTextDataLength);
  }

  WideString GetAttributeValue() const {
    return m_BlockBuffer.GetTextData(0, m_iTextDataLength);
  }

  WideString GetTextData() const {
    return m_BlockBuffer.GetTextData(0, m_iTextDataLength);
  }

 private:
  enum class FDE_XmlSyntaxState {
    Text,
    Node,
    Target,
    Tag,
    AttriName,
    AttriEqualSign,
    AttriQuotation,
    AttriValue,
    Entity,
    EntityDecimal,
    EntityHex,
    CloseInstruction,
    BreakElement,
    CloseElement,
    SkipDeclNode,
    DeclCharData,
    SkipComment,
    SkipCommentOrDecl,
    SkipCData,
    TargetData
  };

  void ParseTextChar(wchar_t ch);

  bool GetStatus() const;
  FX_FILESIZE GetCurrentPos() const { return m_ParsedChars + m_Start; }
  FX_FILESIZE GetCurrentBinaryPos() const;
  int32_t GetCurrentNodeNumber() const { return m_iCurrentNodeNum; }
  int32_t GetLastNodeNumber() const { return m_iLastNodeNum; }

  WideString GetTargetName() const {
    return m_BlockBuffer.GetTextData(0, m_iTextDataLength);
  }

  WideString GetTargetData() const {
    return m_BlockBuffer.GetTextData(0, m_iTextDataLength);
  }

  CFX_XMLNode* m_pParent;
  CFX_XMLNode* m_pChild;
  std::stack<CFX_XMLNode*> m_NodeStack;
  WideString m_ws1;

  RetainPtr<CFX_SeekableStreamProxy> m_pStream;
  size_t m_iXMLPlaneSize;
  FX_FILESIZE m_iCurrentPos;
  int32_t m_iCurrentNodeNum;
  int32_t m_iLastNodeNum;
  int32_t m_iParsedBytes;
  FX_FILESIZE m_ParsedChars;
  std::vector<wchar_t> m_Buffer;
  size_t m_iBufferChars;
  bool m_bEOS;
  FX_FILESIZE m_Start;  // Start position in m_Buffer
  FX_FILESIZE m_End;    // End position in m_Buffer
  FX_XMLNODE m_CurNode;
  std::stack<FX_XMLNODE> m_XMLNodeStack;
  CFX_BlockBuffer m_BlockBuffer;
  int32_t m_iAllocStep;
  wchar_t* m_pCurrentBlock;  // Pointer into CFX_BlockBuffer
  int32_t m_iIndexInBlock;
  int32_t m_iTextDataLength;
  FX_XmlSyntaxResult m_syntaxParserResult;
  FDE_XmlSyntaxState m_syntaxParserState;
  wchar_t m_wQuotationMark;
  int32_t m_iEntityStart;
  std::stack<wchar_t> m_SkipStack;
  wchar_t m_SkipChar;
};

#endif  // CORE_FXCRT_XML_CFX_XMLPARSER_H_