From 69472875a28a4e2d40623893e029af129f5e88e2 Mon Sep 17 00:00:00 2001 From: Dan Sinclair Date: Wed, 28 Oct 2015 10:20:35 -0400 Subject: Merge to XFA: Add helpers to check the PDF_CharType. This CL adds helpers to provide more descriptive access to PDF_CharType. TBR=thestig@chromium.org Review URL: https://codereview.chromium.org/1407913004 . (cherry picked from commit e3e5675bcdd26b8df7286e10a42d585df6d2321d) Review URL: https://codereview.chromium.org/1419893004 . --- .../src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp | 196 +++++++++------------ .../fpdfapi/fpdf_parser/fpdf_parser_utility.cpp | 77 ++++---- 2 files changed, 122 insertions(+), 151 deletions(-) (limited to 'core/src/fpdfapi/fpdf_parser') diff --git a/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp b/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp index c1b78f1d81..e8842888c8 100644 --- a/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp +++ b/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp @@ -630,7 +630,7 @@ FX_BOOL CPDF_Parser::RebuildCrossRef() { uint8_t byte = buffer[i]; switch (status) { case 0: - if (PDF_CharType[byte] == 'W') { + if (PDFCharIsWhitespace(byte)) { status = 1; } if (byte <= '9' && byte >= '0') { @@ -658,7 +658,7 @@ FX_BOOL CPDF_Parser::RebuildCrossRef() { } break; case 1: - if (PDF_CharType[byte] == 'W') { + if (PDFCharIsWhitespace(byte)) { break; } else if (byte <= '9' && byte >= '0') { start_pos = pos + i; @@ -679,7 +679,7 @@ FX_BOOL CPDF_Parser::RebuildCrossRef() { if (byte <= '9' && byte >= '0') { objnum = objnum * 10 + byte - '0'; break; - } else if (PDF_CharType[byte] == 'W') { + } else if (PDFCharIsWhitespace(byte)) { status = 3; } else { --i; @@ -692,7 +692,7 @@ FX_BOOL CPDF_Parser::RebuildCrossRef() { start_pos1 = pos + i; status = 4; gennum = byte - '0'; - } else if (PDF_CharType[byte] == 'W') { + } else if (PDFCharIsWhitespace(byte)) { break; } else if (byte == 't') { status = 7; @@ -706,7 +706,7 @@ FX_BOOL CPDF_Parser::RebuildCrossRef() { if (byte <= '9' && byte >= '0') { gennum = gennum * 10 + byte - '0'; break; - } else if (PDF_CharType[byte] == 'W') { + } else if (PDFCharIsWhitespace(byte)) { status = 5; } else { --i; @@ -717,7 +717,7 @@ FX_BOOL CPDF_Parser::RebuildCrossRef() { if (byte == 'o') { status = 6; inside_index = 1; - } else if (PDF_CharType[byte] == 'W') { + } else if (PDFCharIsWhitespace(byte)) { break; } else if (byte <= '9' && byte >= '0') { objnum = gennum; @@ -752,7 +752,7 @@ FX_BOOL CPDF_Parser::RebuildCrossRef() { } break; case 3: - if (PDF_CharType[byte] == 'W' || PDF_CharType[byte] == 'D') { + if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) { if (objnum > 0x1000000) { status = 0; break; @@ -826,7 +826,7 @@ FX_BOOL CPDF_Parser::RebuildCrossRef() { break; case 7: if (inside_index == 7) { - if (PDF_CharType[byte] == 'W' || PDF_CharType[byte] == 'D') { + if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) { last_trailer = pos + i - 7; m_Syntax.RestorePos(pos + i - m_Syntax.m_HeaderOffset); CPDF_Object* pObj = m_Syntax.GetObject(m_pDocument, 0, 0, 0); @@ -937,13 +937,13 @@ FX_BOOL CPDF_Parser::RebuildCrossRef() { status = 0; break; case 13: - if (PDF_CharType[byte] == 'D' || PDF_CharType[byte] == 'W') { + if (PDFCharIsDelimiter(byte) || PDFCharIsWhitespace(byte)) { --i; status = 0; } break; case 14: - if (PDF_CharType[byte] == 'W') { + if (PDFCharIsWhitespace(byte)) { status = 0; } else if (byte == '%' || byte == '(' || byte == '<' || byte == '\\') { @@ -1646,15 +1646,13 @@ FX_DWORD CPDF_Parser::LoadLinearizedMainXRefTable() { uint8_t ch = 0; FX_DWORD dwCount = 0; m_Syntax.GetNextChar(ch); - int32_t type = PDF_CharType[ch]; - while (type == 'W') { + while (PDFCharIsWhitespace(ch)) { ++dwCount; if (m_Syntax.m_FileLen >= (FX_FILESIZE)(m_Syntax.SavePos() + m_Syntax.m_HeaderOffset)) { break; } m_Syntax.GetNextChar(ch); - type = PDF_CharType[ch]; } m_LastXRefOffset += dwCount; FX_POSITION pos = m_ObjectStreamMap.GetStartPosition(); @@ -1771,77 +1769,66 @@ void CPDF_SyntaxParser::GetNextWord() { if (!GetNextChar(ch)) { return; } - uint8_t type = PDF_CharType[ch]; while (1) { - while (type == 'W') { - if (!GetNextChar(ch)) { + while (PDFCharIsWhitespace(ch)) { + if (!GetNextChar(ch)) return; - } - type = PDF_CharType[ch]; } - if (ch != '%') { + if (ch != '%') break; - } + while (1) { - if (!GetNextChar(ch)) { + if (!GetNextChar(ch)) return; - } - if (ch == '\r' || ch == '\n') { + if (ch == '\r' || ch == '\n') break; - } } - type = PDF_CharType[ch]; } - if (type == 'D') { + + if (PDFCharIsDelimiter(ch)) { m_bIsNumber = FALSE; m_WordBuffer[m_WordSize++] = ch; if (ch == '/') { while (1) { - if (!GetNextChar(ch)) { + if (!GetNextChar(ch)) return; - } - type = PDF_CharType[ch]; - if (type != 'R' && type != 'N') { + + if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) { m_Pos--; return; } - if (m_WordSize < MAX_WORD_BUFFER) { + + if (m_WordSize < MAX_WORD_BUFFER) m_WordBuffer[m_WordSize++] = ch; - } } } else if (ch == '<') { - if (!GetNextChar(ch)) { + if (!GetNextChar(ch)) return; - } - if (ch == '<') { + if (ch == '<') m_WordBuffer[m_WordSize++] = ch; - } else { + else m_Pos--; - } } else if (ch == '>') { - if (!GetNextChar(ch)) { + if (!GetNextChar(ch)) return; - } - if (ch == '>') { + if (ch == '>') m_WordBuffer[m_WordSize++] = ch; - } else { + else m_Pos--; - } } return; } + while (1) { - if (m_WordSize < MAX_WORD_BUFFER) { + if (m_WordSize < MAX_WORD_BUFFER) m_WordBuffer[m_WordSize++] = ch; - } - if (type != 'N') { + + if (!PDFCharIsNumeric(ch)) m_bIsNumber = FALSE; - } - if (!GetNextChar(ch)) { + if (!GetNextChar(ch)) return; - } - type = PDF_CharType[ch]; - if (type == 'D' || type == 'W') { + + if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) { m_Pos--; break; } @@ -1996,33 +1983,29 @@ void CPDF_SyntaxParser::ToNextLine() { } void CPDF_SyntaxParser::ToNextWord() { uint8_t ch; - if (!GetNextChar(ch)) { + if (!GetNextChar(ch)) return; - } - uint8_t type = PDF_CharType[ch]; + while (1) { - while (type == 'W') { + while (PDFCharIsWhitespace(ch)) { m_dwWordPos = m_Pos; - if (!GetNextChar(ch)) { + if (!GetNextChar(ch)) return; - } - type = PDF_CharType[ch]; } - if (ch != '%') { + + if (ch != '%') break; - } + while (1) { - if (!GetNextChar(ch)) { + if (!GetNextChar(ch)) return; - } - if (ch == '\r' || ch == '\n') { + if (ch == '\r' || ch == '\n') break; - } } - type = PDF_CharType[ch]; } m_Pos--; } + CFX_ByteString CPDF_SyntaxParser::GetNextWord(FX_BOOL& bIsNumber) { GetNextWord(); bIsNumber = m_bIsNumber; @@ -2511,21 +2494,21 @@ FX_BOOL CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos, const uint8_t* tag, FX_DWORD taglen, FX_BOOL checkKeyword) { - uint8_t type = PDF_CharType[tag[0]]; - FX_BOOL bCheckLeft = type != 'D' && type != 'W'; - type = PDF_CharType[tag[taglen - 1]]; - FX_BOOL bCheckRight = type != 'D' && type != 'W'; + bool bCheckLeft = !PDFCharIsDelimiter(tag[0]) && !PDFCharIsWhitespace(tag[0]); + bool bCheckRight = !PDFCharIsDelimiter(tag[taglen - 1]) && + !PDFCharIsWhitespace(tag[taglen - 1]); uint8_t ch; if (bCheckRight && startpos + (int32_t)taglen <= limit && GetCharAt(startpos + (int32_t)taglen, ch)) { - uint8_t type = PDF_CharType[ch]; - if (type == 'N' || type == 'R' || (checkKeyword && type == 'D')) { + if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) || + (checkKeyword && PDFCharIsDelimiter(ch))) { return FALSE; } } + if (bCheckLeft && startpos > 0 && GetCharAt(startpos - 1, ch)) { - uint8_t type = PDF_CharType[ch]; - if (type == 'N' || type == 'R' || (checkKeyword && type == 'D')) { + if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) || + (checkKeyword && PDFCharIsDelimiter(ch))) { return FALSE; } } @@ -3769,84 +3752,79 @@ inline void CPDF_DataAvail::SetStartOffset(FX_FILESIZE dwOffset) { FX_BOOL CPDF_DataAvail::GetNextToken(CFX_ByteString& token) { m_WordSize = 0; uint8_t ch; - if (!GetNextChar(ch)) { + if (!GetNextChar(ch)) return FALSE; - } - uint8_t type = PDF_CharType[ch]; + while (1) { - while (type == 'W') { - if (!GetNextChar(ch)) { + while (PDFCharIsWhitespace(ch)) { + if (!GetNextChar(ch)) return FALSE; - } - type = PDF_CharType[ch]; } - if (ch != '%') { + + if (ch != '%') break; - } + while (1) { - if (!GetNextChar(ch)) { + if (!GetNextChar(ch)) return FALSE; - } - if (ch == '\r' || ch == '\n') { + if (ch == '\r' || ch == '\n') break; - } } - type = PDF_CharType[ch]; } - if (type == 'D') { + + if (PDFCharIsDelimiter(ch)) { m_WordBuffer[m_WordSize++] = ch; if (ch == '/') { while (1) { - if (!GetNextChar(ch)) { + if (!GetNextChar(ch)) return FALSE; - } - type = PDF_CharType[ch]; - if (type != 'R' && type != 'N') { + + if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) { m_Pos--; CFX_ByteString ret(m_WordBuffer, m_WordSize); token = ret; return TRUE; } - if (m_WordSize < MAX_WORD_BUFFER) { + + if (m_WordSize < MAX_WORD_BUFFER) m_WordBuffer[m_WordSize++] = ch; - } } } else if (ch == '<') { - if (!GetNextChar(ch)) { + if (!GetNextChar(ch)) return FALSE; - } - if (ch == '<') { + + if (ch == '<') m_WordBuffer[m_WordSize++] = ch; - } else { + else m_Pos--; - } } else if (ch == '>') { - if (!GetNextChar(ch)) { + if (!GetNextChar(ch)) return FALSE; - } - if (ch == '>') { + + if (ch == '>') m_WordBuffer[m_WordSize++] = ch; - } else { + else m_Pos--; - } } + CFX_ByteString ret(m_WordBuffer, m_WordSize); token = ret; return TRUE; } + while (1) { - if (m_WordSize < MAX_WORD_BUFFER) { + if (m_WordSize < MAX_WORD_BUFFER) m_WordBuffer[m_WordSize++] = ch; - } - if (!GetNextChar(ch)) { + + if (!GetNextChar(ch)) return FALSE; - } - type = PDF_CharType[ch]; - if (type == 'D' || type == 'W') { + + if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) { m_Pos--; break; } } + CFX_ByteString ret(m_WordBuffer, m_WordSize); token = ret; return TRUE; diff --git a/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp b/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp index e1e60ecae3..335101e85b 100644 --- a/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp +++ b/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp @@ -5,6 +5,12 @@ // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com #include "../../../include/fpdfapi/fpdf_parser.h" + +// Indexed by 8-bit character code, contains either: +// 'W' - for whitespace: NUL, TAB, CR, LF, FF, 0x80, 0xff +// 'N' - for numeric: 0123456789+-. +// 'D' - for delimiter: %()/<>[]{} +// 'R' - otherwise. const char PDF_CharType[256] = { // NUL SOH STX ETX EOT ENQ ACK BEL BS HT LF VT FF CR SO // SI @@ -72,45 +78,37 @@ void CPDF_SimpleParser::ParseWord(const uint8_t*& pStart, dwSize = 0; type = PDFWORD_EOF; uint8_t ch; - char chartype; while (1) { - if (m_dwSize <= m_dwCurPos) { + if (m_dwSize <= m_dwCurPos) return; - } ch = m_pData[m_dwCurPos++]; - chartype = PDF_CharType[ch]; - while (chartype == 'W') { - if (m_dwSize <= m_dwCurPos) { + while (PDFCharIsWhitespace(ch)) { + if (m_dwSize <= m_dwCurPos) return; - } ch = m_pData[m_dwCurPos++]; - chartype = PDF_CharType[ch]; } - if (ch != '%') { + + if (ch != '%') break; - } + while (1) { - if (m_dwSize <= m_dwCurPos) { + if (m_dwSize <= m_dwCurPos) return; - } ch = m_pData[m_dwCurPos++]; - if (ch == '\r' || ch == '\n') { + if (ch == '\r' || ch == '\n') break; - } } - chartype = PDF_CharType[ch]; } + FX_DWORD start_pos = m_dwCurPos - 1; pStart = m_pData + start_pos; - if (chartype == 'D') { + if (PDFCharIsDelimiter(ch)) { if (ch == '/') { while (1) { - if (m_dwSize <= m_dwCurPos) { + if (m_dwSize <= m_dwCurPos) return; - } ch = m_pData[m_dwCurPos++]; - chartype = PDF_CharType[ch]; - if (chartype != 'R' && chartype != 'N') { + if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) { m_dwCurPos--; dwSize = m_dwCurPos - start_pos; type = PDFWORD_NAME; @@ -121,41 +119,36 @@ void CPDF_SimpleParser::ParseWord(const uint8_t*& pStart, type = PDFWORD_DELIMITER; dwSize = 1; if (ch == '<') { - if (m_dwSize <= m_dwCurPos) { + if (m_dwSize <= m_dwCurPos) return; - } ch = m_pData[m_dwCurPos++]; - if (ch == '<') { + if (ch == '<') dwSize = 2; - } else { + else m_dwCurPos--; - } } else if (ch == '>') { - if (m_dwSize <= m_dwCurPos) { + if (m_dwSize <= m_dwCurPos) return; - } ch = m_pData[m_dwCurPos++]; - if (ch == '>') { + if (ch == '>') dwSize = 2; - } else { + else m_dwCurPos--; - } } } return; } + type = PDFWORD_NUMBER; dwSize = 1; while (1) { - if (chartype != 'N') { + if (!PDFCharIsNumeric(ch)) type = PDFWORD_TEXT; - } - if (m_dwSize <= m_dwCurPos) { + if (m_dwSize <= m_dwCurPos) return; - } ch = m_pData[m_dwCurPos++]; - chartype = PDF_CharType[ch]; - if (chartype == 'D' || chartype == 'W') { + + if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) { m_dwCurPos--; break; } @@ -331,23 +324,23 @@ CFX_ByteString PDF_NameEncode(const CFX_ByteString& orig) { int i; for (i = 0; i < src_len; i++) { uint8_t ch = src_buf[i]; - if (ch >= 0x80 || PDF_CharType[ch] == 'W' || ch == '#' || - PDF_CharType[ch] == 'D') { + if (ch >= 0x80 || PDFCharIsWhitespace(ch) || ch == '#' || + PDFCharIsDelimiter(ch)) { dest_len += 3; } else { dest_len++; } } - if (dest_len == src_len) { + if (dest_len == src_len) return orig; - } + CFX_ByteString res; FX_CHAR* dest_buf = res.GetBuffer(dest_len); dest_len = 0; for (i = 0; i < src_len; i++) { uint8_t ch = src_buf[i]; - if (ch >= 0x80 || PDF_CharType[ch] == 'W' || ch == '#' || - PDF_CharType[ch] == 'D') { + if (ch >= 0x80 || PDFCharIsWhitespace(ch) || ch == '#' || + PDFCharIsDelimiter(ch)) { dest_buf[dest_len++] = '#'; dest_buf[dest_len++] = "0123456789ABCDEF"[ch / 16]; dest_buf[dest_len++] = "0123456789ABCDEF"[ch % 16]; -- cgit v1.2.3