summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDan Sinclair <dsinclair@chromium.org>2015-10-28 10:14:08 -0400
committerDan Sinclair <dsinclair@chromium.org>2015-10-28 10:14:08 -0400
commite3e5675bcdd26b8df7286e10a42d585df6d2321d (patch)
tree51e839324e4b9a923a669438295acf4151ab418d
parent74b147b5747cf65a8936d201b3ed5b32454365cc (diff)
downloadpdfium-e3e5675bcdd26b8df7286e10a42d585df6d2321d.tar.xz
Add helpers to check the PDF_CharType.
This CL adds helpers to provide more descriptive access to PDF_CharType. R=thestig@chromium.org Review URL: https://codereview.chromium.org/1407913004 .
-rw-r--r--core/include/fpdfapi/fpdf_parser.h19
-rw-r--r--core/src/fpdfapi/fpdf_page/fpdf_page_parser_old.cpp173
-rw-r--r--core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp196
-rw-r--r--core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp77
4 files changed, 216 insertions, 249 deletions
diff --git a/core/include/fpdfapi/fpdf_parser.h b/core/include/fpdfapi/fpdf_parser.h
index a5fce34fd7..d121bb4f79 100644
--- a/core/include/fpdfapi/fpdf_parser.h
+++ b/core/include/fpdfapi/fpdf_parser.h
@@ -45,13 +45,22 @@ class CFX_PrivateData;
#define FPDFPERM_PRINT_HIGH 0x0800
#define FPDF_PAGE_MAX_NUM 0xFFFFF
-// Indexed by 8-bit character code, contains either:
-// 'W' - for whitespace: NUL, TAB, CR, LF, FF, 0x80, 0xff
-// 'N' - for numeric: 0123456789+-.
-// 'D' - for delimiter: %()/<>[]{}
-// 'R' - otherwise.
+// Use the accessors below instead of directly accessing PDF_CharType.
extern const char PDF_CharType[256];
+inline bool PDFCharIsWhitespace(uint8_t c) {
+ return PDF_CharType[c] == 'W';
+}
+inline bool PDFCharIsNumeric(uint8_t c) {
+ return PDF_CharType[c] == 'N';
+}
+inline bool PDFCharIsDelimiter(uint8_t c) {
+ return PDF_CharType[c] == 'D';
+}
+inline bool PDFCharIsOther(uint8_t c) {
+ return PDF_CharType[c] == 'R';
+}
+
// Indexed by 8-bit char code, contains unicode code points.
extern const FX_WORD PDFDocEncoding[256];
diff --git a/core/src/fpdfapi/fpdf_page/fpdf_page_parser_old.cpp b/core/src/fpdfapi/fpdf_page/fpdf_page_parser_old.cpp
index 694d5234a8..27d8c24f15 100644
--- a/core/src/fpdfapi/fpdf_page/fpdf_page_parser_old.cpp
+++ b/core/src/fpdfapi/fpdf_page/fpdf_page_parser_old.cpp
@@ -326,14 +326,14 @@ CPDF_Stream* CPDF_StreamParser::ReadInlineStream(CPDF_Document* pDoc,
CPDF_Dictionary* pDict,
CPDF_Object* pCSObj,
FX_BOOL bDecode) {
- if (m_Pos == m_Size) {
- return NULL;
- }
- if (PDF_CharType[m_pBuf[m_Pos]] == 'W') {
+ if (m_Pos == m_Size)
+ return nullptr;
+
+ if (PDFCharIsWhitespace(m_pBuf[m_Pos]))
m_Pos++;
- }
+
CFX_ByteString Decoder;
- CPDF_Dictionary* pParam = NULL;
+ CPDF_Dictionary* pParam = nullptr;
CPDF_Object* pFilter = pDict->GetElementValue(FX_BSTRC("Filter"));
if (pFilter) {
if (CPDF_Array* pArray = pFilter->AsArray()) {
@@ -453,66 +453,66 @@ CPDF_Stream* CPDF_StreamParser::ReadInlineStream(CPDF_Document* pDoc,
CPDF_StreamParser::SyntaxType CPDF_StreamParser::ParseNextElement() {
if (m_pLastObj) {
m_pLastObj->Release();
- m_pLastObj = NULL;
+ m_pLastObj = nullptr;
}
+
m_WordSize = 0;
FX_BOOL bIsNumber = TRUE;
- if (m_Pos >= m_Size) {
+ if (m_Pos >= m_Size)
return EndOfData;
- }
+
int ch = m_pBuf[m_Pos++];
- int type = PDF_CharType[ch];
while (1) {
- while (type == 'W') {
- if (m_Size <= m_Pos) {
+ while (PDFCharIsWhitespace(ch)) {
+ if (m_Size <= m_Pos)
return EndOfData;
- }
+
ch = m_pBuf[m_Pos++];
- type = PDF_CharType[ch];
}
- if (ch != '%') {
+
+ if (ch != '%')
break;
- }
+
while (1) {
- if (m_Size <= m_Pos) {
+ if (m_Size <= m_Pos)
return EndOfData;
- }
+
ch = m_pBuf[m_Pos++];
- if (ch == '\r' || ch == '\n') {
+ if (ch == '\r' || ch == '\n')
break;
- }
}
- type = PDF_CharType[ch];
}
- if (type == 'D' && ch != '/') {
+
+ if (PDFCharIsDelimiter(ch) && ch != '/') {
m_Pos--;
m_pLastObj = ReadNextObject();
return Others;
}
+
while (1) {
- if (m_WordSize < MAX_WORD_BUFFER) {
+ if (m_WordSize < MAX_WORD_BUFFER)
m_WordBuffer[m_WordSize++] = ch;
- }
- if (type != 'N') {
+
+ if (!PDFCharIsNumeric(ch))
bIsNumber = FALSE;
- }
- if (m_Size <= m_Pos) {
+
+ if (m_Size <= m_Pos)
break;
- }
+
ch = m_pBuf[m_Pos++];
- type = PDF_CharType[ch];
- if (type == 'D' || type == 'W') {
+
+ if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
m_Pos--;
break;
}
}
+
m_WordBuffer[m_WordSize] = 0;
- if (bIsNumber) {
+ if (bIsNumber)
return Number;
- }
- if (m_WordBuffer[0] == '/') {
+ if (m_WordBuffer[0] == '/')
return Name;
- }
+
if (m_WordSize == 4) {
if (*(FX_DWORD*)m_WordBuffer == FXDWORD_TRUE) {
m_pLastObj = CPDF_Boolean::Create(TRUE);
@@ -532,51 +532,48 @@ CPDF_StreamParser::SyntaxType CPDF_StreamParser::ParseNextElement() {
}
void CPDF_StreamParser::SkipPathObject() {
FX_DWORD command_startpos = m_Pos;
- if (m_Pos >= m_Size) {
+ if (m_Pos >= m_Size)
return;
- }
+
int ch = m_pBuf[m_Pos++];
- int type = PDF_CharType[ch];
while (1) {
- while (type == 'W') {
- if (m_Pos >= m_Size) {
+ while (PDFCharIsWhitespace(ch)) {
+ if (m_Pos >= m_Size)
return;
- }
ch = m_pBuf[m_Pos++];
- type = PDF_CharType[ch];
}
- if (type != 'N') {
+
+ if (!PDFCharIsNumeric(ch)) {
m_Pos = command_startpos;
return;
}
+
while (1) {
- while (type != 'W') {
- if (m_Pos >= m_Size) {
+ while (!PDFCharIsWhitespace(ch)) {
+ if (m_Pos >= m_Size)
return;
- }
ch = m_pBuf[m_Pos++];
- type = PDF_CharType[ch];
}
- while (type == 'W') {
- if (m_Pos >= m_Size) {
+
+ while (PDFCharIsWhitespace(ch)) {
+ if (m_Pos >= m_Size)
return;
- }
ch = m_pBuf[m_Pos++];
- type = PDF_CharType[ch];
}
- if (type == 'N') {
+
+ if (PDFCharIsNumeric(ch))
continue;
- }
+
FX_DWORD op_startpos = m_Pos - 1;
- while (type != 'W' && type != 'D') {
- if (m_Pos >= m_Size) {
+ while (!PDFCharIsWhitespace(ch) && !PDFCharIsDelimiter(ch)) {
+ if (m_Pos >= m_Size)
return;
- }
ch = m_pBuf[m_Pos++];
- type = PDF_CharType[ch];
}
+
if (m_Pos - op_startpos == 2) {
int op = m_pBuf[op_startpos];
+ // TODO(dsinclair): Can these be turned into named constants?
if (op == 'm' || op == 'l' || op == 'c' || op == 'v' || op == 'y') {
command_startpos = m_Pos;
break;
@@ -682,92 +679,82 @@ CPDF_Object* CPDF_StreamParser::ReadNextObject(FX_BOOL bAllowNestedArray,
void CPDF_StreamParser::GetNextWord(FX_BOOL& bIsNumber) {
m_WordSize = 0;
bIsNumber = TRUE;
- if (m_Size <= m_Pos) {
+ if (m_Size <= m_Pos)
return;
- }
+
int ch = m_pBuf[m_Pos++];
- int type = PDF_CharType[ch];
while (1) {
- while (type == 'W') {
+ while (PDFCharIsWhitespace(ch)) {
if (m_Size <= m_Pos) {
return;
}
ch = m_pBuf[m_Pos++];
- type = PDF_CharType[ch];
}
- if (ch != '%') {
+
+ if (ch != '%')
break;
- }
+
while (1) {
- if (m_Size <= m_Pos) {
+ if (m_Size <= m_Pos)
return;
- }
ch = m_pBuf[m_Pos++];
- if (ch == '\r' || ch == '\n') {
+ if (ch == '\r' || ch == '\n')
break;
- }
}
- type = PDF_CharType[ch];
}
- if (type == 'D') {
+
+ if (PDFCharIsDelimiter(ch)) {
bIsNumber = FALSE;
m_WordBuffer[m_WordSize++] = ch;
if (ch == '/') {
while (1) {
- if (m_Size <= m_Pos) {
+ if (m_Size <= m_Pos)
return;
- }
ch = m_pBuf[m_Pos++];
- type = PDF_CharType[ch];
- if (type != 'R' && type != 'N') {
+ if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {
m_Pos--;
return;
}
- if (m_WordSize < MAX_WORD_BUFFER) {
+
+ if (m_WordSize < MAX_WORD_BUFFER)
m_WordBuffer[m_WordSize++] = ch;
- }
}
} else if (ch == '<') {
- if (m_Size <= m_Pos) {
+ if (m_Size <= m_Pos)
return;
- }
ch = m_pBuf[m_Pos++];
- if (ch == '<') {
+ if (ch == '<')
m_WordBuffer[m_WordSize++] = ch;
- } else {
+ else
m_Pos--;
- }
} else if (ch == '>') {
- if (m_Size <= m_Pos) {
+ if (m_Size <= m_Pos)
return;
- }
ch = m_pBuf[m_Pos++];
- if (ch == '>') {
+ if (ch == '>')
m_WordBuffer[m_WordSize++] = ch;
- } else {
+ else
m_Pos--;
- }
}
return;
}
+
while (1) {
- if (m_WordSize < MAX_WORD_BUFFER) {
+ if (m_WordSize < MAX_WORD_BUFFER)
m_WordBuffer[m_WordSize++] = ch;
- }
- if (type != 'N') {
+ if (!PDFCharIsNumeric(ch))
bIsNumber = FALSE;
- }
- if (m_Size <= m_Pos) {
+
+ if (m_Size <= m_Pos)
return;
- }
ch = m_pBuf[m_Pos++];
- type = PDF_CharType[ch];
- if (type == 'D' || type == 'W') {
+ if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
m_Pos--;
break;
}
}
}
+
CFX_ByteString CPDF_StreamParser::ReadString() {
if (m_Size <= m_Pos) {
return CFX_ByteString();
diff --git a/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp b/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp
index c1b78f1d81..e8842888c8 100644
--- a/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp
+++ b/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp
@@ -630,7 +630,7 @@ FX_BOOL CPDF_Parser::RebuildCrossRef() {
uint8_t byte = buffer[i];
switch (status) {
case 0:
- if (PDF_CharType[byte] == 'W') {
+ if (PDFCharIsWhitespace(byte)) {
status = 1;
}
if (byte <= '9' && byte >= '0') {
@@ -658,7 +658,7 @@ FX_BOOL CPDF_Parser::RebuildCrossRef() {
}
break;
case 1:
- if (PDF_CharType[byte] == 'W') {
+ if (PDFCharIsWhitespace(byte)) {
break;
} else if (byte <= '9' && byte >= '0') {
start_pos = pos + i;
@@ -679,7 +679,7 @@ FX_BOOL CPDF_Parser::RebuildCrossRef() {
if (byte <= '9' && byte >= '0') {
objnum = objnum * 10 + byte - '0';
break;
- } else if (PDF_CharType[byte] == 'W') {
+ } else if (PDFCharIsWhitespace(byte)) {
status = 3;
} else {
--i;
@@ -692,7 +692,7 @@ FX_BOOL CPDF_Parser::RebuildCrossRef() {
start_pos1 = pos + i;
status = 4;
gennum = byte - '0';
- } else if (PDF_CharType[byte] == 'W') {
+ } else if (PDFCharIsWhitespace(byte)) {
break;
} else if (byte == 't') {
status = 7;
@@ -706,7 +706,7 @@ FX_BOOL CPDF_Parser::RebuildCrossRef() {
if (byte <= '9' && byte >= '0') {
gennum = gennum * 10 + byte - '0';
break;
- } else if (PDF_CharType[byte] == 'W') {
+ } else if (PDFCharIsWhitespace(byte)) {
status = 5;
} else {
--i;
@@ -717,7 +717,7 @@ FX_BOOL CPDF_Parser::RebuildCrossRef() {
if (byte == 'o') {
status = 6;
inside_index = 1;
- } else if (PDF_CharType[byte] == 'W') {
+ } else if (PDFCharIsWhitespace(byte)) {
break;
} else if (byte <= '9' && byte >= '0') {
objnum = gennum;
@@ -752,7 +752,7 @@ FX_BOOL CPDF_Parser::RebuildCrossRef() {
}
break;
case 3:
- if (PDF_CharType[byte] == 'W' || PDF_CharType[byte] == 'D') {
+ if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) {
if (objnum > 0x1000000) {
status = 0;
break;
@@ -826,7 +826,7 @@ FX_BOOL CPDF_Parser::RebuildCrossRef() {
break;
case 7:
if (inside_index == 7) {
- if (PDF_CharType[byte] == 'W' || PDF_CharType[byte] == 'D') {
+ if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) {
last_trailer = pos + i - 7;
m_Syntax.RestorePos(pos + i - m_Syntax.m_HeaderOffset);
CPDF_Object* pObj = m_Syntax.GetObject(m_pDocument, 0, 0, 0);
@@ -937,13 +937,13 @@ FX_BOOL CPDF_Parser::RebuildCrossRef() {
status = 0;
break;
case 13:
- if (PDF_CharType[byte] == 'D' || PDF_CharType[byte] == 'W') {
+ if (PDFCharIsDelimiter(byte) || PDFCharIsWhitespace(byte)) {
--i;
status = 0;
}
break;
case 14:
- if (PDF_CharType[byte] == 'W') {
+ if (PDFCharIsWhitespace(byte)) {
status = 0;
} else if (byte == '%' || byte == '(' || byte == '<' ||
byte == '\\') {
@@ -1646,15 +1646,13 @@ FX_DWORD CPDF_Parser::LoadLinearizedMainXRefTable() {
uint8_t ch = 0;
FX_DWORD dwCount = 0;
m_Syntax.GetNextChar(ch);
- int32_t type = PDF_CharType[ch];
- while (type == 'W') {
+ while (PDFCharIsWhitespace(ch)) {
++dwCount;
if (m_Syntax.m_FileLen >=
(FX_FILESIZE)(m_Syntax.SavePos() + m_Syntax.m_HeaderOffset)) {
break;
}
m_Syntax.GetNextChar(ch);
- type = PDF_CharType[ch];
}
m_LastXRefOffset += dwCount;
FX_POSITION pos = m_ObjectStreamMap.GetStartPosition();
@@ -1771,77 +1769,66 @@ void CPDF_SyntaxParser::GetNextWord() {
if (!GetNextChar(ch)) {
return;
}
- uint8_t type = PDF_CharType[ch];
while (1) {
- while (type == 'W') {
- if (!GetNextChar(ch)) {
+ while (PDFCharIsWhitespace(ch)) {
+ if (!GetNextChar(ch))
return;
- }
- type = PDF_CharType[ch];
}
- if (ch != '%') {
+ if (ch != '%')
break;
- }
+
while (1) {
- if (!GetNextChar(ch)) {
+ if (!GetNextChar(ch))
return;
- }
- if (ch == '\r' || ch == '\n') {
+ if (ch == '\r' || ch == '\n')
break;
- }
}
- type = PDF_CharType[ch];
}
- if (type == 'D') {
+
+ if (PDFCharIsDelimiter(ch)) {
m_bIsNumber = FALSE;
m_WordBuffer[m_WordSize++] = ch;
if (ch == '/') {
while (1) {
- if (!GetNextChar(ch)) {
+ if (!GetNextChar(ch))
return;
- }
- type = PDF_CharType[ch];
- if (type != 'R' && type != 'N') {
+
+ if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {
m_Pos--;
return;
}
- if (m_WordSize < MAX_WORD_BUFFER) {
+
+ if (m_WordSize < MAX_WORD_BUFFER)
m_WordBuffer[m_WordSize++] = ch;
- }
}
} else if (ch == '<') {
- if (!GetNextChar(ch)) {
+ if (!GetNextChar(ch))
return;
- }
- if (ch == '<') {
+ if (ch == '<')
m_WordBuffer[m_WordSize++] = ch;
- } else {
+ else
m_Pos--;
- }
} else if (ch == '>') {
- if (!GetNextChar(ch)) {
+ if (!GetNextChar(ch))
return;
- }
- if (ch == '>') {
+ if (ch == '>')
m_WordBuffer[m_WordSize++] = ch;
- } else {
+ else
m_Pos--;
- }
}
return;
}
+
while (1) {
- if (m_WordSize < MAX_WORD_BUFFER) {
+ if (m_WordSize < MAX_WORD_BUFFER)
m_WordBuffer[m_WordSize++] = ch;
- }
- if (type != 'N') {
+
+ if (!PDFCharIsNumeric(ch))
m_bIsNumber = FALSE;
- }
- if (!GetNextChar(ch)) {
+ if (!GetNextChar(ch))
return;
- }
- type = PDF_CharType[ch];
- if (type == 'D' || type == 'W') {
+
+ if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
m_Pos--;
break;
}
@@ -1996,33 +1983,29 @@ void CPDF_SyntaxParser::ToNextLine() {
}
void CPDF_SyntaxParser::ToNextWord() {
uint8_t ch;
- if (!GetNextChar(ch)) {
+ if (!GetNextChar(ch))
return;
- }
- uint8_t type = PDF_CharType[ch];
+
while (1) {
- while (type == 'W') {
+ while (PDFCharIsWhitespace(ch)) {
m_dwWordPos = m_Pos;
- if (!GetNextChar(ch)) {
+ if (!GetNextChar(ch))
return;
- }
- type = PDF_CharType[ch];
}
- if (ch != '%') {
+
+ if (ch != '%')
break;
- }
+
while (1) {
- if (!GetNextChar(ch)) {
+ if (!GetNextChar(ch))
return;
- }
- if (ch == '\r' || ch == '\n') {
+ if (ch == '\r' || ch == '\n')
break;
- }
}
- type = PDF_CharType[ch];
}
m_Pos--;
}
+
CFX_ByteString CPDF_SyntaxParser::GetNextWord(FX_BOOL& bIsNumber) {
GetNextWord();
bIsNumber = m_bIsNumber;
@@ -2511,21 +2494,21 @@ FX_BOOL CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos,
const uint8_t* tag,
FX_DWORD taglen,
FX_BOOL checkKeyword) {
- uint8_t type = PDF_CharType[tag[0]];
- FX_BOOL bCheckLeft = type != 'D' && type != 'W';
- type = PDF_CharType[tag[taglen - 1]];
- FX_BOOL bCheckRight = type != 'D' && type != 'W';
+ bool bCheckLeft = !PDFCharIsDelimiter(tag[0]) && !PDFCharIsWhitespace(tag[0]);
+ bool bCheckRight = !PDFCharIsDelimiter(tag[taglen - 1]) &&
+ !PDFCharIsWhitespace(tag[taglen - 1]);
uint8_t ch;
if (bCheckRight && startpos + (int32_t)taglen <= limit &&
GetCharAt(startpos + (int32_t)taglen, ch)) {
- uint8_t type = PDF_CharType[ch];
- if (type == 'N' || type == 'R' || (checkKeyword && type == 'D')) {
+ if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) ||
+ (checkKeyword && PDFCharIsDelimiter(ch))) {
return FALSE;
}
}
+
if (bCheckLeft && startpos > 0 && GetCharAt(startpos - 1, ch)) {
- uint8_t type = PDF_CharType[ch];
- if (type == 'N' || type == 'R' || (checkKeyword && type == 'D')) {
+ if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) ||
+ (checkKeyword && PDFCharIsDelimiter(ch))) {
return FALSE;
}
}
@@ -3769,84 +3752,79 @@ inline void CPDF_DataAvail::SetStartOffset(FX_FILESIZE dwOffset) {
FX_BOOL CPDF_DataAvail::GetNextToken(CFX_ByteString& token) {
m_WordSize = 0;
uint8_t ch;
- if (!GetNextChar(ch)) {
+ if (!GetNextChar(ch))
return FALSE;
- }
- uint8_t type = PDF_CharType[ch];
+
while (1) {
- while (type == 'W') {
- if (!GetNextChar(ch)) {
+ while (PDFCharIsWhitespace(ch)) {
+ if (!GetNextChar(ch))
return FALSE;
- }
- type = PDF_CharType[ch];
}
- if (ch != '%') {
+
+ if (ch != '%')
break;
- }
+
while (1) {
- if (!GetNextChar(ch)) {
+ if (!GetNextChar(ch))
return FALSE;
- }
- if (ch == '\r' || ch == '\n') {
+ if (ch == '\r' || ch == '\n')
break;
- }
}
- type = PDF_CharType[ch];
}
- if (type == 'D') {
+
+ if (PDFCharIsDelimiter(ch)) {
m_WordBuffer[m_WordSize++] = ch;
if (ch == '/') {
while (1) {
- if (!GetNextChar(ch)) {
+ if (!GetNextChar(ch))
return FALSE;
- }
- type = PDF_CharType[ch];
- if (type != 'R' && type != 'N') {
+
+ if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {
m_Pos--;
CFX_ByteString ret(m_WordBuffer, m_WordSize);
token = ret;
return TRUE;
}
- if (m_WordSize < MAX_WORD_BUFFER) {
+
+ if (m_WordSize < MAX_WORD_BUFFER)
m_WordBuffer[m_WordSize++] = ch;
- }
}
} else if (ch == '<') {
- if (!GetNextChar(ch)) {
+ if (!GetNextChar(ch))
return FALSE;
- }
- if (ch == '<') {
+
+ if (ch == '<')
m_WordBuffer[m_WordSize++] = ch;
- } else {
+ else
m_Pos--;
- }
} else if (ch == '>') {
- if (!GetNextChar(ch)) {
+ if (!GetNextChar(ch))
return FALSE;
- }
- if (ch == '>') {
+
+ if (ch == '>')
m_WordBuffer[m_WordSize++] = ch;
- } else {
+ else
m_Pos--;
- }
}
+
CFX_ByteString ret(m_WordBuffer, m_WordSize);
token = ret;
return TRUE;
}
+
while (1) {
- if (m_WordSize < MAX_WORD_BUFFER) {
+ if (m_WordSize < MAX_WORD_BUFFER)
m_WordBuffer[m_WordSize++] = ch;
- }
- if (!GetNextChar(ch)) {
+
+ if (!GetNextChar(ch))
return FALSE;
- }
- type = PDF_CharType[ch];
- if (type == 'D' || type == 'W') {
+
+ if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
m_Pos--;
break;
}
}
+
CFX_ByteString ret(m_WordBuffer, m_WordSize);
token = ret;
return TRUE;
diff --git a/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp b/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp
index e1e60ecae3..335101e85b 100644
--- a/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp
+++ b/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp
@@ -5,6 +5,12 @@
// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
#include "../../../include/fpdfapi/fpdf_parser.h"
+
+// Indexed by 8-bit character code, contains either:
+// 'W' - for whitespace: NUL, TAB, CR, LF, FF, 0x80, 0xff
+// 'N' - for numeric: 0123456789+-.
+// 'D' - for delimiter: %()/<>[]{}
+// 'R' - otherwise.
const char PDF_CharType[256] = {
// NUL SOH STX ETX EOT ENQ ACK BEL BS HT LF VT FF CR SO
// SI
@@ -72,45 +78,37 @@ void CPDF_SimpleParser::ParseWord(const uint8_t*& pStart,
dwSize = 0;
type = PDFWORD_EOF;
uint8_t ch;
- char chartype;
while (1) {
- if (m_dwSize <= m_dwCurPos) {
+ if (m_dwSize <= m_dwCurPos)
return;
- }
ch = m_pData[m_dwCurPos++];
- chartype = PDF_CharType[ch];
- while (chartype == 'W') {
- if (m_dwSize <= m_dwCurPos) {
+ while (PDFCharIsWhitespace(ch)) {
+ if (m_dwSize <= m_dwCurPos)
return;
- }
ch = m_pData[m_dwCurPos++];
- chartype = PDF_CharType[ch];
}
- if (ch != '%') {
+
+ if (ch != '%')
break;
- }
+
while (1) {
- if (m_dwSize <= m_dwCurPos) {
+ if (m_dwSize <= m_dwCurPos)
return;
- }
ch = m_pData[m_dwCurPos++];
- if (ch == '\r' || ch == '\n') {
+ if (ch == '\r' || ch == '\n')
break;
- }
}
- chartype = PDF_CharType[ch];
}
+
FX_DWORD start_pos = m_dwCurPos - 1;
pStart = m_pData + start_pos;
- if (chartype == 'D') {
+ if (PDFCharIsDelimiter(ch)) {
if (ch == '/') {
while (1) {
- if (m_dwSize <= m_dwCurPos) {
+ if (m_dwSize <= m_dwCurPos)
return;
- }
ch = m_pData[m_dwCurPos++];
- chartype = PDF_CharType[ch];
- if (chartype != 'R' && chartype != 'N') {
+ if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {
m_dwCurPos--;
dwSize = m_dwCurPos - start_pos;
type = PDFWORD_NAME;
@@ -121,41 +119,36 @@ void CPDF_SimpleParser::ParseWord(const uint8_t*& pStart,
type = PDFWORD_DELIMITER;
dwSize = 1;
if (ch == '<') {
- if (m_dwSize <= m_dwCurPos) {
+ if (m_dwSize <= m_dwCurPos)
return;
- }
ch = m_pData[m_dwCurPos++];
- if (ch == '<') {
+ if (ch == '<')
dwSize = 2;
- } else {
+ else
m_dwCurPos--;
- }
} else if (ch == '>') {
- if (m_dwSize <= m_dwCurPos) {
+ if (m_dwSize <= m_dwCurPos)
return;
- }
ch = m_pData[m_dwCurPos++];
- if (ch == '>') {
+ if (ch == '>')
dwSize = 2;
- } else {
+ else
m_dwCurPos--;
- }
}
}
return;
}
+
type = PDFWORD_NUMBER;
dwSize = 1;
while (1) {
- if (chartype != 'N') {
+ if (!PDFCharIsNumeric(ch))
type = PDFWORD_TEXT;
- }
- if (m_dwSize <= m_dwCurPos) {
+ if (m_dwSize <= m_dwCurPos)
return;
- }
ch = m_pData[m_dwCurPos++];
- chartype = PDF_CharType[ch];
- if (chartype == 'D' || chartype == 'W') {
+
+ if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
m_dwCurPos--;
break;
}
@@ -331,23 +324,23 @@ CFX_ByteString PDF_NameEncode(const CFX_ByteString& orig) {
int i;
for (i = 0; i < src_len; i++) {
uint8_t ch = src_buf[i];
- if (ch >= 0x80 || PDF_CharType[ch] == 'W' || ch == '#' ||
- PDF_CharType[ch] == 'D') {
+ if (ch >= 0x80 || PDFCharIsWhitespace(ch) || ch == '#' ||
+ PDFCharIsDelimiter(ch)) {
dest_len += 3;
} else {
dest_len++;
}
}
- if (dest_len == src_len) {
+ if (dest_len == src_len)
return orig;
- }
+
CFX_ByteString res;
FX_CHAR* dest_buf = res.GetBuffer(dest_len);
dest_len = 0;
for (i = 0; i < src_len; i++) {
uint8_t ch = src_buf[i];
- if (ch >= 0x80 || PDF_CharType[ch] == 'W' || ch == '#' ||
- PDF_CharType[ch] == 'D') {
+ if (ch >= 0x80 || PDFCharIsWhitespace(ch) || ch == '#' ||
+ PDFCharIsDelimiter(ch)) {
dest_buf[dest_len++] = '#';
dest_buf[dest_len++] = "0123456789ABCDEF"[ch / 16];
dest_buf[dest_len++] = "0123456789ABCDEF"[ch % 16];