diff options
author | Dan Sinclair <dsinclair@chromium.org> | 2015-10-28 10:14:08 -0400 |
---|---|---|
committer | Dan Sinclair <dsinclair@chromium.org> | 2015-10-28 10:14:08 -0400 |
commit | e3e5675bcdd26b8df7286e10a42d585df6d2321d (patch) | |
tree | 51e839324e4b9a923a669438295acf4151ab418d | |
parent | 74b147b5747cf65a8936d201b3ed5b32454365cc (diff) | |
download | pdfium-e3e5675bcdd26b8df7286e10a42d585df6d2321d.tar.xz |
Add helpers to check the PDF_CharType.
This CL adds helpers to provide more descriptive access to
PDF_CharType.
R=thestig@chromium.org
Review URL: https://codereview.chromium.org/1407913004 .
-rw-r--r-- | core/include/fpdfapi/fpdf_parser.h | 19 | ||||
-rw-r--r-- | core/src/fpdfapi/fpdf_page/fpdf_page_parser_old.cpp | 173 | ||||
-rw-r--r-- | core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp | 196 | ||||
-rw-r--r-- | core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp | 77 |
4 files changed, 216 insertions, 249 deletions
diff --git a/core/include/fpdfapi/fpdf_parser.h b/core/include/fpdfapi/fpdf_parser.h index a5fce34fd7..d121bb4f79 100644 --- a/core/include/fpdfapi/fpdf_parser.h +++ b/core/include/fpdfapi/fpdf_parser.h @@ -45,13 +45,22 @@ class CFX_PrivateData; #define FPDFPERM_PRINT_HIGH 0x0800 #define FPDF_PAGE_MAX_NUM 0xFFFFF -// Indexed by 8-bit character code, contains either: -// 'W' - for whitespace: NUL, TAB, CR, LF, FF, 0x80, 0xff -// 'N' - for numeric: 0123456789+-. -// 'D' - for delimiter: %()/<>[]{} -// 'R' - otherwise. +// Use the accessors below instead of directly accessing PDF_CharType. extern const char PDF_CharType[256]; +inline bool PDFCharIsWhitespace(uint8_t c) { + return PDF_CharType[c] == 'W'; +} +inline bool PDFCharIsNumeric(uint8_t c) { + return PDF_CharType[c] == 'N'; +} +inline bool PDFCharIsDelimiter(uint8_t c) { + return PDF_CharType[c] == 'D'; +} +inline bool PDFCharIsOther(uint8_t c) { + return PDF_CharType[c] == 'R'; +} + // Indexed by 8-bit char code, contains unicode code points. extern const FX_WORD PDFDocEncoding[256]; diff --git a/core/src/fpdfapi/fpdf_page/fpdf_page_parser_old.cpp b/core/src/fpdfapi/fpdf_page/fpdf_page_parser_old.cpp index 694d5234a8..27d8c24f15 100644 --- a/core/src/fpdfapi/fpdf_page/fpdf_page_parser_old.cpp +++ b/core/src/fpdfapi/fpdf_page/fpdf_page_parser_old.cpp @@ -326,14 +326,14 @@ CPDF_Stream* CPDF_StreamParser::ReadInlineStream(CPDF_Document* pDoc, CPDF_Dictionary* pDict, CPDF_Object* pCSObj, FX_BOOL bDecode) { - if (m_Pos == m_Size) { - return NULL; - } - if (PDF_CharType[m_pBuf[m_Pos]] == 'W') { + if (m_Pos == m_Size) + return nullptr; + + if (PDFCharIsWhitespace(m_pBuf[m_Pos])) m_Pos++; - } + CFX_ByteString Decoder; - CPDF_Dictionary* pParam = NULL; + CPDF_Dictionary* pParam = nullptr; CPDF_Object* pFilter = pDict->GetElementValue(FX_BSTRC("Filter")); if (pFilter) { if (CPDF_Array* pArray = pFilter->AsArray()) { @@ -453,66 +453,66 @@ CPDF_Stream* CPDF_StreamParser::ReadInlineStream(CPDF_Document* pDoc, CPDF_StreamParser::SyntaxType CPDF_StreamParser::ParseNextElement() { if (m_pLastObj) { m_pLastObj->Release(); - m_pLastObj = NULL; + m_pLastObj = nullptr; } + m_WordSize = 0; FX_BOOL bIsNumber = TRUE; - if (m_Pos >= m_Size) { + if (m_Pos >= m_Size) return EndOfData; - } + int ch = m_pBuf[m_Pos++]; - int type = PDF_CharType[ch]; while (1) { - while (type == 'W') { - if (m_Size <= m_Pos) { + while (PDFCharIsWhitespace(ch)) { + if (m_Size <= m_Pos) return EndOfData; - } + ch = m_pBuf[m_Pos++]; - type = PDF_CharType[ch]; } - if (ch != '%') { + + if (ch != '%') break; - } + while (1) { - if (m_Size <= m_Pos) { + if (m_Size <= m_Pos) return EndOfData; - } + ch = m_pBuf[m_Pos++]; - if (ch == '\r' || ch == '\n') { + if (ch == '\r' || ch == '\n') break; - } } - type = PDF_CharType[ch]; } - if (type == 'D' && ch != '/') { + + if (PDFCharIsDelimiter(ch) && ch != '/') { m_Pos--; m_pLastObj = ReadNextObject(); return Others; } + while (1) { - if (m_WordSize < MAX_WORD_BUFFER) { + if (m_WordSize < MAX_WORD_BUFFER) m_WordBuffer[m_WordSize++] = ch; - } - if (type != 'N') { + + if (!PDFCharIsNumeric(ch)) bIsNumber = FALSE; - } - if (m_Size <= m_Pos) { + + if (m_Size <= m_Pos) break; - } + ch = m_pBuf[m_Pos++]; - type = PDF_CharType[ch]; - if (type == 'D' || type == 'W') { + + if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) { m_Pos--; break; } } + m_WordBuffer[m_WordSize] = 0; - if (bIsNumber) { + if (bIsNumber) return Number; - } - if (m_WordBuffer[0] == '/') { + if (m_WordBuffer[0] == '/') return Name; - } + if (m_WordSize == 4) { if (*(FX_DWORD*)m_WordBuffer == FXDWORD_TRUE) { m_pLastObj = CPDF_Boolean::Create(TRUE); @@ -532,51 +532,48 @@ CPDF_StreamParser::SyntaxType CPDF_StreamParser::ParseNextElement() { } void CPDF_StreamParser::SkipPathObject() { FX_DWORD command_startpos = m_Pos; - if (m_Pos >= m_Size) { + if (m_Pos >= m_Size) return; - } + int ch = m_pBuf[m_Pos++]; - int type = PDF_CharType[ch]; while (1) { - while (type == 'W') { - if (m_Pos >= m_Size) { + while (PDFCharIsWhitespace(ch)) { + if (m_Pos >= m_Size) return; - } ch = m_pBuf[m_Pos++]; - type = PDF_CharType[ch]; } - if (type != 'N') { + + if (!PDFCharIsNumeric(ch)) { m_Pos = command_startpos; return; } + while (1) { - while (type != 'W') { - if (m_Pos >= m_Size) { + while (!PDFCharIsWhitespace(ch)) { + if (m_Pos >= m_Size) return; - } ch = m_pBuf[m_Pos++]; - type = PDF_CharType[ch]; } - while (type == 'W') { - if (m_Pos >= m_Size) { + + while (PDFCharIsWhitespace(ch)) { + if (m_Pos >= m_Size) return; - } ch = m_pBuf[m_Pos++]; - type = PDF_CharType[ch]; } - if (type == 'N') { + + if (PDFCharIsNumeric(ch)) continue; - } + FX_DWORD op_startpos = m_Pos - 1; - while (type != 'W' && type != 'D') { - if (m_Pos >= m_Size) { + while (!PDFCharIsWhitespace(ch) && !PDFCharIsDelimiter(ch)) { + if (m_Pos >= m_Size) return; - } ch = m_pBuf[m_Pos++]; - type = PDF_CharType[ch]; } + if (m_Pos - op_startpos == 2) { int op = m_pBuf[op_startpos]; + // TODO(dsinclair): Can these be turned into named constants? if (op == 'm' || op == 'l' || op == 'c' || op == 'v' || op == 'y') { command_startpos = m_Pos; break; @@ -682,92 +679,82 @@ CPDF_Object* CPDF_StreamParser::ReadNextObject(FX_BOOL bAllowNestedArray, void CPDF_StreamParser::GetNextWord(FX_BOOL& bIsNumber) { m_WordSize = 0; bIsNumber = TRUE; - if (m_Size <= m_Pos) { + if (m_Size <= m_Pos) return; - } + int ch = m_pBuf[m_Pos++]; - int type = PDF_CharType[ch]; while (1) { - while (type == 'W') { + while (PDFCharIsWhitespace(ch)) { if (m_Size <= m_Pos) { return; } ch = m_pBuf[m_Pos++]; - type = PDF_CharType[ch]; } - if (ch != '%') { + + if (ch != '%') break; - } + while (1) { - if (m_Size <= m_Pos) { + if (m_Size <= m_Pos) return; - } ch = m_pBuf[m_Pos++]; - if (ch == '\r' || ch == '\n') { + if (ch == '\r' || ch == '\n') break; - } } - type = PDF_CharType[ch]; } - if (type == 'D') { + + if (PDFCharIsDelimiter(ch)) { bIsNumber = FALSE; m_WordBuffer[m_WordSize++] = ch; if (ch == '/') { while (1) { - if (m_Size <= m_Pos) { + if (m_Size <= m_Pos) return; - } ch = m_pBuf[m_Pos++]; - type = PDF_CharType[ch]; - if (type != 'R' && type != 'N') { + if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) { m_Pos--; return; } - if (m_WordSize < MAX_WORD_BUFFER) { + + if (m_WordSize < MAX_WORD_BUFFER) m_WordBuffer[m_WordSize++] = ch; - } } } else if (ch == '<') { - if (m_Size <= m_Pos) { + if (m_Size <= m_Pos) return; - } ch = m_pBuf[m_Pos++]; - if (ch == '<') { + if (ch == '<') m_WordBuffer[m_WordSize++] = ch; - } else { + else m_Pos--; - } } else if (ch == '>') { - if (m_Size <= m_Pos) { + if (m_Size <= m_Pos) return; - } ch = m_pBuf[m_Pos++]; - if (ch == '>') { + if (ch == '>') m_WordBuffer[m_WordSize++] = ch; - } else { + else m_Pos--; - } } return; } + while (1) { - if (m_WordSize < MAX_WORD_BUFFER) { + if (m_WordSize < MAX_WORD_BUFFER) m_WordBuffer[m_WordSize++] = ch; - } - if (type != 'N') { + if (!PDFCharIsNumeric(ch)) bIsNumber = FALSE; - } - if (m_Size <= m_Pos) { + + if (m_Size <= m_Pos) return; - } ch = m_pBuf[m_Pos++]; - type = PDF_CharType[ch]; - if (type == 'D' || type == 'W') { + if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) { m_Pos--; break; } } } + CFX_ByteString CPDF_StreamParser::ReadString() { if (m_Size <= m_Pos) { return CFX_ByteString(); diff --git a/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp b/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp index c1b78f1d81..e8842888c8 100644 --- a/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp +++ b/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp @@ -630,7 +630,7 @@ FX_BOOL CPDF_Parser::RebuildCrossRef() { uint8_t byte = buffer[i]; switch (status) { case 0: - if (PDF_CharType[byte] == 'W') { + if (PDFCharIsWhitespace(byte)) { status = 1; } if (byte <= '9' && byte >= '0') { @@ -658,7 +658,7 @@ FX_BOOL CPDF_Parser::RebuildCrossRef() { } break; case 1: - if (PDF_CharType[byte] == 'W') { + if (PDFCharIsWhitespace(byte)) { break; } else if (byte <= '9' && byte >= '0') { start_pos = pos + i; @@ -679,7 +679,7 @@ FX_BOOL CPDF_Parser::RebuildCrossRef() { if (byte <= '9' && byte >= '0') { objnum = objnum * 10 + byte - '0'; break; - } else if (PDF_CharType[byte] == 'W') { + } else if (PDFCharIsWhitespace(byte)) { status = 3; } else { --i; @@ -692,7 +692,7 @@ FX_BOOL CPDF_Parser::RebuildCrossRef() { start_pos1 = pos + i; status = 4; gennum = byte - '0'; - } else if (PDF_CharType[byte] == 'W') { + } else if (PDFCharIsWhitespace(byte)) { break; } else if (byte == 't') { status = 7; @@ -706,7 +706,7 @@ FX_BOOL CPDF_Parser::RebuildCrossRef() { if (byte <= '9' && byte >= '0') { gennum = gennum * 10 + byte - '0'; break; - } else if (PDF_CharType[byte] == 'W') { + } else if (PDFCharIsWhitespace(byte)) { status = 5; } else { --i; @@ -717,7 +717,7 @@ FX_BOOL CPDF_Parser::RebuildCrossRef() { if (byte == 'o') { status = 6; inside_index = 1; - } else if (PDF_CharType[byte] == 'W') { + } else if (PDFCharIsWhitespace(byte)) { break; } else if (byte <= '9' && byte >= '0') { objnum = gennum; @@ -752,7 +752,7 @@ FX_BOOL CPDF_Parser::RebuildCrossRef() { } break; case 3: - if (PDF_CharType[byte] == 'W' || PDF_CharType[byte] == 'D') { + if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) { if (objnum > 0x1000000) { status = 0; break; @@ -826,7 +826,7 @@ FX_BOOL CPDF_Parser::RebuildCrossRef() { break; case 7: if (inside_index == 7) { - if (PDF_CharType[byte] == 'W' || PDF_CharType[byte] == 'D') { + if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) { last_trailer = pos + i - 7; m_Syntax.RestorePos(pos + i - m_Syntax.m_HeaderOffset); CPDF_Object* pObj = m_Syntax.GetObject(m_pDocument, 0, 0, 0); @@ -937,13 +937,13 @@ FX_BOOL CPDF_Parser::RebuildCrossRef() { status = 0; break; case 13: - if (PDF_CharType[byte] == 'D' || PDF_CharType[byte] == 'W') { + if (PDFCharIsDelimiter(byte) || PDFCharIsWhitespace(byte)) { --i; status = 0; } break; case 14: - if (PDF_CharType[byte] == 'W') { + if (PDFCharIsWhitespace(byte)) { status = 0; } else if (byte == '%' || byte == '(' || byte == '<' || byte == '\\') { @@ -1646,15 +1646,13 @@ FX_DWORD CPDF_Parser::LoadLinearizedMainXRefTable() { uint8_t ch = 0; FX_DWORD dwCount = 0; m_Syntax.GetNextChar(ch); - int32_t type = PDF_CharType[ch]; - while (type == 'W') { + while (PDFCharIsWhitespace(ch)) { ++dwCount; if (m_Syntax.m_FileLen >= (FX_FILESIZE)(m_Syntax.SavePos() + m_Syntax.m_HeaderOffset)) { break; } m_Syntax.GetNextChar(ch); - type = PDF_CharType[ch]; } m_LastXRefOffset += dwCount; FX_POSITION pos = m_ObjectStreamMap.GetStartPosition(); @@ -1771,77 +1769,66 @@ void CPDF_SyntaxParser::GetNextWord() { if (!GetNextChar(ch)) { return; } - uint8_t type = PDF_CharType[ch]; while (1) { - while (type == 'W') { - if (!GetNextChar(ch)) { + while (PDFCharIsWhitespace(ch)) { + if (!GetNextChar(ch)) return; - } - type = PDF_CharType[ch]; } - if (ch != '%') { + if (ch != '%') break; - } + while (1) { - if (!GetNextChar(ch)) { + if (!GetNextChar(ch)) return; - } - if (ch == '\r' || ch == '\n') { + if (ch == '\r' || ch == '\n') break; - } } - type = PDF_CharType[ch]; } - if (type == 'D') { + + if (PDFCharIsDelimiter(ch)) { m_bIsNumber = FALSE; m_WordBuffer[m_WordSize++] = ch; if (ch == '/') { while (1) { - if (!GetNextChar(ch)) { + if (!GetNextChar(ch)) return; - } - type = PDF_CharType[ch]; - if (type != 'R' && type != 'N') { + + if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) { m_Pos--; return; } - if (m_WordSize < MAX_WORD_BUFFER) { + + if (m_WordSize < MAX_WORD_BUFFER) m_WordBuffer[m_WordSize++] = ch; - } } } else if (ch == '<') { - if (!GetNextChar(ch)) { + if (!GetNextChar(ch)) return; - } - if (ch == '<') { + if (ch == '<') m_WordBuffer[m_WordSize++] = ch; - } else { + else m_Pos--; - } } else if (ch == '>') { - if (!GetNextChar(ch)) { + if (!GetNextChar(ch)) return; - } - if (ch == '>') { + if (ch == '>') m_WordBuffer[m_WordSize++] = ch; - } else { + else m_Pos--; - } } return; } + while (1) { - if (m_WordSize < MAX_WORD_BUFFER) { + if (m_WordSize < MAX_WORD_BUFFER) m_WordBuffer[m_WordSize++] = ch; - } - if (type != 'N') { + + if (!PDFCharIsNumeric(ch)) m_bIsNumber = FALSE; - } - if (!GetNextChar(ch)) { + if (!GetNextChar(ch)) return; - } - type = PDF_CharType[ch]; - if (type == 'D' || type == 'W') { + + if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) { m_Pos--; break; } @@ -1996,33 +1983,29 @@ void CPDF_SyntaxParser::ToNextLine() { } void CPDF_SyntaxParser::ToNextWord() { uint8_t ch; - if (!GetNextChar(ch)) { + if (!GetNextChar(ch)) return; - } - uint8_t type = PDF_CharType[ch]; + while (1) { - while (type == 'W') { + while (PDFCharIsWhitespace(ch)) { m_dwWordPos = m_Pos; - if (!GetNextChar(ch)) { + if (!GetNextChar(ch)) return; - } - type = PDF_CharType[ch]; } - if (ch != '%') { + + if (ch != '%') break; - } + while (1) { - if (!GetNextChar(ch)) { + if (!GetNextChar(ch)) return; - } - if (ch == '\r' || ch == '\n') { + if (ch == '\r' || ch == '\n') break; - } } - type = PDF_CharType[ch]; } m_Pos--; } + CFX_ByteString CPDF_SyntaxParser::GetNextWord(FX_BOOL& bIsNumber) { GetNextWord(); bIsNumber = m_bIsNumber; @@ -2511,21 +2494,21 @@ FX_BOOL CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos, const uint8_t* tag, FX_DWORD taglen, FX_BOOL checkKeyword) { - uint8_t type = PDF_CharType[tag[0]]; - FX_BOOL bCheckLeft = type != 'D' && type != 'W'; - type = PDF_CharType[tag[taglen - 1]]; - FX_BOOL bCheckRight = type != 'D' && type != 'W'; + bool bCheckLeft = !PDFCharIsDelimiter(tag[0]) && !PDFCharIsWhitespace(tag[0]); + bool bCheckRight = !PDFCharIsDelimiter(tag[taglen - 1]) && + !PDFCharIsWhitespace(tag[taglen - 1]); uint8_t ch; if (bCheckRight && startpos + (int32_t)taglen <= limit && GetCharAt(startpos + (int32_t)taglen, ch)) { - uint8_t type = PDF_CharType[ch]; - if (type == 'N' || type == 'R' || (checkKeyword && type == 'D')) { + if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) || + (checkKeyword && PDFCharIsDelimiter(ch))) { return FALSE; } } + if (bCheckLeft && startpos > 0 && GetCharAt(startpos - 1, ch)) { - uint8_t type = PDF_CharType[ch]; - if (type == 'N' || type == 'R' || (checkKeyword && type == 'D')) { + if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) || + (checkKeyword && PDFCharIsDelimiter(ch))) { return FALSE; } } @@ -3769,84 +3752,79 @@ inline void CPDF_DataAvail::SetStartOffset(FX_FILESIZE dwOffset) { FX_BOOL CPDF_DataAvail::GetNextToken(CFX_ByteString& token) { m_WordSize = 0; uint8_t ch; - if (!GetNextChar(ch)) { + if (!GetNextChar(ch)) return FALSE; - } - uint8_t type = PDF_CharType[ch]; + while (1) { - while (type == 'W') { - if (!GetNextChar(ch)) { + while (PDFCharIsWhitespace(ch)) { + if (!GetNextChar(ch)) return FALSE; - } - type = PDF_CharType[ch]; } - if (ch != '%') { + + if (ch != '%') break; - } + while (1) { - if (!GetNextChar(ch)) { + if (!GetNextChar(ch)) return FALSE; - } - if (ch == '\r' || ch == '\n') { + if (ch == '\r' || ch == '\n') break; - } } - type = PDF_CharType[ch]; } - if (type == 'D') { + + if (PDFCharIsDelimiter(ch)) { m_WordBuffer[m_WordSize++] = ch; if (ch == '/') { while (1) { - if (!GetNextChar(ch)) { + if (!GetNextChar(ch)) return FALSE; - } - type = PDF_CharType[ch]; - if (type != 'R' && type != 'N') { + + if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) { m_Pos--; CFX_ByteString ret(m_WordBuffer, m_WordSize); token = ret; return TRUE; } - if (m_WordSize < MAX_WORD_BUFFER) { + + if (m_WordSize < MAX_WORD_BUFFER) m_WordBuffer[m_WordSize++] = ch; - } } } else if (ch == '<') { - if (!GetNextChar(ch)) { + if (!GetNextChar(ch)) return FALSE; - } - if (ch == '<') { + + if (ch == '<') m_WordBuffer[m_WordSize++] = ch; - } else { + else m_Pos--; - } } else if (ch == '>') { - if (!GetNextChar(ch)) { + if (!GetNextChar(ch)) return FALSE; - } - if (ch == '>') { + + if (ch == '>') m_WordBuffer[m_WordSize++] = ch; - } else { + else m_Pos--; - } } + CFX_ByteString ret(m_WordBuffer, m_WordSize); token = ret; return TRUE; } + while (1) { - if (m_WordSize < MAX_WORD_BUFFER) { + if (m_WordSize < MAX_WORD_BUFFER) m_WordBuffer[m_WordSize++] = ch; - } - if (!GetNextChar(ch)) { + + if (!GetNextChar(ch)) return FALSE; - } - type = PDF_CharType[ch]; - if (type == 'D' || type == 'W') { + + if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) { m_Pos--; break; } } + CFX_ByteString ret(m_WordBuffer, m_WordSize); token = ret; return TRUE; diff --git a/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp b/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp index e1e60ecae3..335101e85b 100644 --- a/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp +++ b/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp @@ -5,6 +5,12 @@ // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com #include "../../../include/fpdfapi/fpdf_parser.h" + +// Indexed by 8-bit character code, contains either: +// 'W' - for whitespace: NUL, TAB, CR, LF, FF, 0x80, 0xff +// 'N' - for numeric: 0123456789+-. +// 'D' - for delimiter: %()/<>[]{} +// 'R' - otherwise. const char PDF_CharType[256] = { // NUL SOH STX ETX EOT ENQ ACK BEL BS HT LF VT FF CR SO // SI @@ -72,45 +78,37 @@ void CPDF_SimpleParser::ParseWord(const uint8_t*& pStart, dwSize = 0; type = PDFWORD_EOF; uint8_t ch; - char chartype; while (1) { - if (m_dwSize <= m_dwCurPos) { + if (m_dwSize <= m_dwCurPos) return; - } ch = m_pData[m_dwCurPos++]; - chartype = PDF_CharType[ch]; - while (chartype == 'W') { - if (m_dwSize <= m_dwCurPos) { + while (PDFCharIsWhitespace(ch)) { + if (m_dwSize <= m_dwCurPos) return; - } ch = m_pData[m_dwCurPos++]; - chartype = PDF_CharType[ch]; } - if (ch != '%') { + + if (ch != '%') break; - } + while (1) { - if (m_dwSize <= m_dwCurPos) { + if (m_dwSize <= m_dwCurPos) return; - } ch = m_pData[m_dwCurPos++]; - if (ch == '\r' || ch == '\n') { + if (ch == '\r' || ch == '\n') break; - } } - chartype = PDF_CharType[ch]; } + FX_DWORD start_pos = m_dwCurPos - 1; pStart = m_pData + start_pos; - if (chartype == 'D') { + if (PDFCharIsDelimiter(ch)) { if (ch == '/') { while (1) { - if (m_dwSize <= m_dwCurPos) { + if (m_dwSize <= m_dwCurPos) return; - } ch = m_pData[m_dwCurPos++]; - chartype = PDF_CharType[ch]; - if (chartype != 'R' && chartype != 'N') { + if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) { m_dwCurPos--; dwSize = m_dwCurPos - start_pos; type = PDFWORD_NAME; @@ -121,41 +119,36 @@ void CPDF_SimpleParser::ParseWord(const uint8_t*& pStart, type = PDFWORD_DELIMITER; dwSize = 1; if (ch == '<') { - if (m_dwSize <= m_dwCurPos) { + if (m_dwSize <= m_dwCurPos) return; - } ch = m_pData[m_dwCurPos++]; - if (ch == '<') { + if (ch == '<') dwSize = 2; - } else { + else m_dwCurPos--; - } } else if (ch == '>') { - if (m_dwSize <= m_dwCurPos) { + if (m_dwSize <= m_dwCurPos) return; - } ch = m_pData[m_dwCurPos++]; - if (ch == '>') { + if (ch == '>') dwSize = 2; - } else { + else m_dwCurPos--; - } } } return; } + type = PDFWORD_NUMBER; dwSize = 1; while (1) { - if (chartype != 'N') { + if (!PDFCharIsNumeric(ch)) type = PDFWORD_TEXT; - } - if (m_dwSize <= m_dwCurPos) { + if (m_dwSize <= m_dwCurPos) return; - } ch = m_pData[m_dwCurPos++]; - chartype = PDF_CharType[ch]; - if (chartype == 'D' || chartype == 'W') { + + if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) { m_dwCurPos--; break; } @@ -331,23 +324,23 @@ CFX_ByteString PDF_NameEncode(const CFX_ByteString& orig) { int i; for (i = 0; i < src_len; i++) { uint8_t ch = src_buf[i]; - if (ch >= 0x80 || PDF_CharType[ch] == 'W' || ch == '#' || - PDF_CharType[ch] == 'D') { + if (ch >= 0x80 || PDFCharIsWhitespace(ch) || ch == '#' || + PDFCharIsDelimiter(ch)) { dest_len += 3; } else { dest_len++; } } - if (dest_len == src_len) { + if (dest_len == src_len) return orig; - } + CFX_ByteString res; FX_CHAR* dest_buf = res.GetBuffer(dest_len); dest_len = 0; for (i = 0; i < src_len; i++) { uint8_t ch = src_buf[i]; - if (ch >= 0x80 || PDF_CharType[ch] == 'W' || ch == '#' || - PDF_CharType[ch] == 'D') { + if (ch >= 0x80 || PDFCharIsWhitespace(ch) || ch == '#' || + PDFCharIsDelimiter(ch)) { dest_buf[dest_len++] = '#'; dest_buf[dest_len++] = "0123456789ABCDEF"[ch / 16]; dest_buf[dest_len++] = "0123456789ABCDEF"[ch % 16]; |