summaryrefslogtreecommitdiff
path: root/core/fxcrt/xml/cfx_xmlsyntaxparser.h
blob: c59cabae19dd48b16e8a9798887f87c10918a078 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
// Copyright 2017 PDFium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com

#ifndef CORE_FXCRT_XML_CFX_XMLSYNTAXPARSER_H_
#define CORE_FXCRT_XML_CFX_XMLSYNTAXPARSER_H_

#include <stack>
#include <vector>

#include "core/fxcrt/cfx_blockbuffer.h"
#include "core/fxcrt/cfx_retain_ptr.h"
#include "core/fxcrt/cfx_seekablestreamproxy.h"
#include "core/fxcrt/fx_string.h"
#include "core/fxcrt/xml/cfx_xmlnode.h"

enum class FX_XmlSyntaxResult {
  None,
  InstructionOpen,
  InstructionClose,
  ElementOpen,
  ElementBreak,
  ElementClose,
  TargetName,
  TagName,
  AttriName,
  AttriValue,
  Text,
  CData,
  TargetData,
  Error,
  EndOfString
};

class CFX_XMLSyntaxParser {
 public:
  static bool IsXMLNameChar(wchar_t ch, bool bFirstChar);

  explicit CFX_XMLSyntaxParser(
      const CFX_RetainPtr<CFX_SeekableStreamProxy>& pStream);
  ~CFX_XMLSyntaxParser();

  FX_XmlSyntaxResult DoSyntaxParse();

  int32_t GetStatus() const;
  FX_FILESIZE GetCurrentPos() const { return m_ParsedChars + m_Start; }
  FX_FILESIZE GetCurrentBinaryPos() const;
  int32_t GetCurrentNodeNumber() const { return m_iCurrentNodeNum; }
  int32_t GetLastNodeNumber() const { return m_iLastNodeNum; }

  CFX_WideString GetTargetName() const {
    return m_BlockBuffer.GetTextData(0, m_iTextDataLength);
  }

  CFX_WideString GetTagName() const {
    return m_BlockBuffer.GetTextData(0, m_iTextDataLength);
  }

  CFX_WideString GetAttributeName() const {
    return m_BlockBuffer.GetTextData(0, m_iTextDataLength);
  }

  CFX_WideString GetAttributeValue() const {
    return m_BlockBuffer.GetTextData(0, m_iTextDataLength);
  }

  CFX_WideString GetTextData() const {
    return m_BlockBuffer.GetTextData(0, m_iTextDataLength);
  }

  CFX_WideString GetTargetData() const {
    return m_BlockBuffer.GetTextData(0, m_iTextDataLength);
  }

 protected:
  enum class FDE_XmlSyntaxState {
    Text,
    Node,
    Target,
    Tag,
    AttriName,
    AttriEqualSign,
    AttriQuotation,
    AttriValue,
    Entity,
    EntityDecimal,
    EntityHex,
    CloseInstruction,
    BreakElement,
    CloseElement,
    SkipDeclNode,
    DeclCharData,
    SkipComment,
    SkipCommentOrDecl,
    SkipCData,
    TargetData
  };

  void ParseTextChar(wchar_t ch);

  CFX_RetainPtr<CFX_SeekableStreamProxy> m_pStream;
  FX_STRSIZE m_iXMLPlaneSize;
  FX_FILESIZE m_iCurrentPos;
  int32_t m_iCurrentNodeNum;
  int32_t m_iLastNodeNum;
  int32_t m_iParsedBytes;
  FX_FILESIZE m_ParsedChars;
  std::vector<wchar_t> m_Buffer;
  FX_STRSIZE m_iBufferChars;
  bool m_bEOS;
  FX_FILESIZE m_Start;  // Start position in m_Buffer
  FX_FILESIZE m_End;    // End position in m_Buffer
  FX_XMLNODE m_CurNode;
  std::stack<FX_XMLNODE> m_XMLNodeStack;
  CFX_BlockBuffer m_BlockBuffer;
  int32_t m_iAllocStep;
  wchar_t* m_pCurrentBlock;  // Pointer into CFX_BlockBuffer
  int32_t m_iIndexInBlock;
  int32_t m_iTextDataLength;
  FX_XmlSyntaxResult m_syntaxParserResult;
  FDE_XmlSyntaxState m_syntaxParserState;
  wchar_t m_wQuotationMark;
  int32_t m_iEntityStart;
  std::stack<wchar_t> m_SkipStack;
  wchar_t m_SkipChar;
};

#endif  // CORE_FXCRT_XML_CFX_XMLSYNTAXPARSER_H_