blob: ff6e2cf94d756b692ed85f96808a0b0f715e92a5 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
|
// Copyright 2016 PDFium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
#include "core/fpdfapi/parser/cpdf_simple_parser.h"
#include "core/fpdfapi/parser/fpdf_parser_utility.h"
CPDF_SimpleParser::CPDF_SimpleParser(pdfium::span<const uint8_t> input)
: data_(input) {}
CPDF_SimpleParser::~CPDF_SimpleParser() = default;
ByteStringView CPDF_SimpleParser::GetWord() {
uint8_t ch;
// Skip whitespace and comment lines.
while (1) {
if (data_.size() <= cur_pos_)
return ByteStringView();
ch = data_[cur_pos_++];
while (PDFCharIsWhitespace(ch)) {
if (data_.size() <= cur_pos_)
return ByteStringView();
ch = data_[cur_pos_++];
}
if (ch != '%')
break;
while (1) {
if (data_.size() <= cur_pos_)
return ByteStringView();
ch = data_[cur_pos_++];
if (PDFCharIsLineEnding(ch))
break;
}
}
uint8_t dwSize = 0;
uint32_t start_pos = cur_pos_ - 1;
if (PDFCharIsDelimiter(ch)) {
// Find names
if (ch == '/') {
while (1) {
if (data_.size() <= cur_pos_)
break;
ch = data_[cur_pos_++];
if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {
cur_pos_--;
dwSize = cur_pos_ - start_pos;
break;
}
}
return data_.subspan(start_pos, dwSize);
}
dwSize = 1;
if (ch == '<') {
if (data_.size() <= cur_pos_)
return data_.subspan(start_pos, dwSize);
ch = data_[cur_pos_++];
if (ch == '<') {
dwSize = 2;
} else {
while (cur_pos_ < data_.size() && data_[cur_pos_] != '>')
cur_pos_++;
if (cur_pos_ < data_.size())
cur_pos_++;
dwSize = cur_pos_ - start_pos;
}
} else if (ch == '>') {
if (data_.size() <= cur_pos_)
return data_.subspan(start_pos, dwSize);
ch = data_[cur_pos_++];
if (ch == '>')
dwSize = 2;
else
cur_pos_--;
} else if (ch == '(') {
int level = 1;
while (cur_pos_ < data_.size()) {
if (data_[cur_pos_] == ')') {
level--;
if (level == 0)
break;
}
if (data_[cur_pos_] == '\\') {
if (data_.size() <= cur_pos_)
break;
cur_pos_++;
} else if (data_[cur_pos_] == '(') {
level++;
}
if (data_.size() <= cur_pos_)
break;
cur_pos_++;
}
if (cur_pos_ < data_.size())
cur_pos_++;
dwSize = cur_pos_ - start_pos;
}
return data_.subspan(start_pos, dwSize);
}
dwSize = 1;
while (cur_pos_ < data_.size()) {
ch = data_[cur_pos_++];
if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
cur_pos_--;
break;
}
dwSize++;
}
return data_.subspan(start_pos, dwSize);
}
|