summaryrefslogtreecommitdiff
path: root/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp
diff options
context:
space:
mode:
authorDan Sinclair <dsinclair@chromium.org>2015-10-28 10:20:35 -0400
committerDan Sinclair <dsinclair@chromium.org>2015-10-28 10:20:35 -0400
commit69472875a28a4e2d40623893e029af129f5e88e2 (patch)
tree667ea56c427e620edfa4262e23d6c24cd967238d /core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp
parentbf18cb6220aa19a64d2705640aad29d3f86ed04a (diff)
downloadpdfium-69472875a28a4e2d40623893e029af129f5e88e2.tar.xz
Merge to XFA: Add helpers to check the PDF_CharType.
This CL adds helpers to provide more descriptive access to PDF_CharType. TBR=thestig@chromium.org Review URL: https://codereview.chromium.org/1407913004 . (cherry picked from commit e3e5675bcdd26b8df7286e10a42d585df6d2321d) Review URL: https://codereview.chromium.org/1419893004 .
Diffstat (limited to 'core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp')
-rw-r--r--core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp77
1 files changed, 35 insertions, 42 deletions
diff --git a/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp b/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp
index e1e60ecae3..335101e85b 100644
--- a/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp
+++ b/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp
@@ -5,6 +5,12 @@
// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
#include "../../../include/fpdfapi/fpdf_parser.h"
+
+// Indexed by 8-bit character code, contains either:
+// 'W' - for whitespace: NUL, TAB, CR, LF, FF, 0x80, 0xff
+// 'N' - for numeric: 0123456789+-.
+// 'D' - for delimiter: %()/<>[]{}
+// 'R' - otherwise.
const char PDF_CharType[256] = {
// NUL SOH STX ETX EOT ENQ ACK BEL BS HT LF VT FF CR SO
// SI
@@ -72,45 +78,37 @@ void CPDF_SimpleParser::ParseWord(const uint8_t*& pStart,
dwSize = 0;
type = PDFWORD_EOF;
uint8_t ch;
- char chartype;
while (1) {
- if (m_dwSize <= m_dwCurPos) {
+ if (m_dwSize <= m_dwCurPos)
return;
- }
ch = m_pData[m_dwCurPos++];
- chartype = PDF_CharType[ch];
- while (chartype == 'W') {
- if (m_dwSize <= m_dwCurPos) {
+ while (PDFCharIsWhitespace(ch)) {
+ if (m_dwSize <= m_dwCurPos)
return;
- }
ch = m_pData[m_dwCurPos++];
- chartype = PDF_CharType[ch];
}
- if (ch != '%') {
+
+ if (ch != '%')
break;
- }
+
while (1) {
- if (m_dwSize <= m_dwCurPos) {
+ if (m_dwSize <= m_dwCurPos)
return;
- }
ch = m_pData[m_dwCurPos++];
- if (ch == '\r' || ch == '\n') {
+ if (ch == '\r' || ch == '\n')
break;
- }
}
- chartype = PDF_CharType[ch];
}
+
FX_DWORD start_pos = m_dwCurPos - 1;
pStart = m_pData + start_pos;
- if (chartype == 'D') {
+ if (PDFCharIsDelimiter(ch)) {
if (ch == '/') {
while (1) {
- if (m_dwSize <= m_dwCurPos) {
+ if (m_dwSize <= m_dwCurPos)
return;
- }
ch = m_pData[m_dwCurPos++];
- chartype = PDF_CharType[ch];
- if (chartype != 'R' && chartype != 'N') {
+ if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {
m_dwCurPos--;
dwSize = m_dwCurPos - start_pos;
type = PDFWORD_NAME;
@@ -121,41 +119,36 @@ void CPDF_SimpleParser::ParseWord(const uint8_t*& pStart,
type = PDFWORD_DELIMITER;
dwSize = 1;
if (ch == '<') {
- if (m_dwSize <= m_dwCurPos) {
+ if (m_dwSize <= m_dwCurPos)
return;
- }
ch = m_pData[m_dwCurPos++];
- if (ch == '<') {
+ if (ch == '<')
dwSize = 2;
- } else {
+ else
m_dwCurPos--;
- }
} else if (ch == '>') {
- if (m_dwSize <= m_dwCurPos) {
+ if (m_dwSize <= m_dwCurPos)
return;
- }
ch = m_pData[m_dwCurPos++];
- if (ch == '>') {
+ if (ch == '>')
dwSize = 2;
- } else {
+ else
m_dwCurPos--;
- }
}
}
return;
}
+
type = PDFWORD_NUMBER;
dwSize = 1;
while (1) {
- if (chartype != 'N') {
+ if (!PDFCharIsNumeric(ch))
type = PDFWORD_TEXT;
- }
- if (m_dwSize <= m_dwCurPos) {
+ if (m_dwSize <= m_dwCurPos)
return;
- }
ch = m_pData[m_dwCurPos++];
- chartype = PDF_CharType[ch];
- if (chartype == 'D' || chartype == 'W') {
+
+ if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
m_dwCurPos--;
break;
}
@@ -331,23 +324,23 @@ CFX_ByteString PDF_NameEncode(const CFX_ByteString& orig) {
int i;
for (i = 0; i < src_len; i++) {
uint8_t ch = src_buf[i];
- if (ch >= 0x80 || PDF_CharType[ch] == 'W' || ch == '#' ||
- PDF_CharType[ch] == 'D') {
+ if (ch >= 0x80 || PDFCharIsWhitespace(ch) || ch == '#' ||
+ PDFCharIsDelimiter(ch)) {
dest_len += 3;
} else {
dest_len++;
}
}
- if (dest_len == src_len) {
+ if (dest_len == src_len)
return orig;
- }
+
CFX_ByteString res;
FX_CHAR* dest_buf = res.GetBuffer(dest_len);
dest_len = 0;
for (i = 0; i < src_len; i++) {
uint8_t ch = src_buf[i];
- if (ch >= 0x80 || PDF_CharType[ch] == 'W' || ch == '#' ||
- PDF_CharType[ch] == 'D') {
+ if (ch >= 0x80 || PDFCharIsWhitespace(ch) || ch == '#' ||
+ PDFCharIsDelimiter(ch)) {
dest_buf[dest_len++] = '#';
dest_buf[dest_len++] = "0123456789ABCDEF"[ch / 16];
dest_buf[dest_len++] = "0123456789ABCDEF"[ch % 16];