summaryrefslogtreecommitdiff
path: root/core/fpdfapi/parser/cpdf_simple_parser.cpp
blob: 45ea0d452827461632a1f2a46d09b8421910ff16 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
// Copyright 2016 PDFium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com

#include "core/fpdfapi/parser/cpdf_simple_parser.h"

#include "core/fpdfapi/parser/fpdf_parser_utility.h"

CPDF_SimpleParser::CPDF_SimpleParser(const ByteStringView& str) : data_(str) {}

CPDF_SimpleParser::~CPDF_SimpleParser() = default;

ByteStringView CPDF_SimpleParser::GetWord() {
  uint8_t ch;

  // Skip whitespace and comment lines.
  while (1) {
    if (data_.GetLength() <= cur_pos_)
      return ByteStringView();

    ch = data_[cur_pos_++];
    while (PDFCharIsWhitespace(ch)) {
      if (data_.GetLength() <= cur_pos_)
        return ByteStringView();
      ch = data_[cur_pos_++];
    }

    if (ch != '%')
      break;

    while (1) {
      if (data_.GetLength() <= cur_pos_)
        return ByteStringView();

      ch = data_[cur_pos_++];
      if (PDFCharIsLineEnding(ch))
        break;
    }
  }

  uint8_t dwSize = 0;
  uint32_t start_pos = cur_pos_ - 1;
  if (PDFCharIsDelimiter(ch)) {
    // Find names
    if (ch == '/') {
      while (1) {
        if (data_.GetLength() <= cur_pos_)
          break;

        ch = data_[cur_pos_++];
        if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {
          cur_pos_--;
          dwSize = cur_pos_ - start_pos;
          break;
        }
      }
      return data_.Mid(start_pos, dwSize);
    }

    dwSize = 1;
    if (ch == '<') {
      if (data_.GetLength() <= cur_pos_)
        return data_.Mid(start_pos, dwSize);

      ch = data_[cur_pos_++];
      if (ch == '<') {
        dwSize = 2;
      } else {
        while (cur_pos_ < data_.GetLength() && data_[cur_pos_] != '>')
          cur_pos_++;

        if (cur_pos_ < data_.GetLength())
          cur_pos_++;

        dwSize = cur_pos_ - start_pos;
      }
    } else if (ch == '>') {
      if (data_.GetLength() <= cur_pos_)
        return data_.Mid(start_pos, dwSize);

      ch = data_[cur_pos_++];
      if (ch == '>')
        dwSize = 2;
      else
        cur_pos_--;
    } else if (ch == '(') {
      int level = 1;
      while (cur_pos_ < data_.GetLength()) {
        if (data_[cur_pos_] == ')') {
          level--;
          if (level == 0)
            break;
        }

        if (data_[cur_pos_] == '\\') {
          if (data_.GetLength() <= cur_pos_)
            break;

          cur_pos_++;
        } else if (data_[cur_pos_] == '(') {
          level++;
        }
        if (data_.GetLength() <= cur_pos_)
          break;

        cur_pos_++;
      }
      if (cur_pos_ < data_.GetLength())
        cur_pos_++;

      dwSize = cur_pos_ - start_pos;
    }
    return data_.Mid(start_pos, dwSize);
  }

  dwSize = 1;
  while (cur_pos_ < data_.GetLength()) {
    ch = data_[cur_pos_++];

    if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
      cur_pos_--;
      break;
    }
    dwSize++;
  }
  return data_.Mid(start_pos, dwSize);
}