diff options
-rw-r--r-- | core/include/fpdfapi/fpdf_parser.h | 8 | ||||
-rw-r--r-- | core/src/fpdfapi/fpdf_page/fpdf_page_parser_old.cpp | 31 | ||||
-rw-r--r-- | core/src/fpdfapi/fpdf_parser/fpdf_parser_filters.cpp | 5 | ||||
-rw-r--r-- | core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp | 59 | ||||
-rw-r--r-- | core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp | 62 |
5 files changed, 98 insertions, 67 deletions
diff --git a/core/include/fpdfapi/fpdf_parser.h b/core/include/fpdfapi/fpdf_parser.h index e1a0f5cf1a..0ed8ed20d8 100644 --- a/core/include/fpdfapi/fpdf_parser.h +++ b/core/include/fpdfapi/fpdf_parser.h @@ -46,6 +46,14 @@ class CFX_PrivateData; #define FPDFPERM_ASSEMBLE 0x0400 #define FPDFPERM_PRINT_HIGH 0x0800 #define FPDF_PAGE_MAX_NUM 0xFFFFF + +// Indexed by 8-bit character code, contains either: +// 'W' - for whitespace: NUL, TAB, CR, LF, FF, 0x80, 0xff +// 'N' - for numeric: 0123456789+-. +// 'D' - for delimiter: %()/<>[]{} +// 'R' - otherwise. +extern const char PDF_CharType[256]; + class IPDF_EnumPageHandler { public: diff --git a/core/src/fpdfapi/fpdf_page/fpdf_page_parser_old.cpp b/core/src/fpdfapi/fpdf_page/fpdf_page_parser_old.cpp index bb29595721..9177c3e1d4 100644 --- a/core/src/fpdfapi/fpdf_page/fpdf_page_parser_old.cpp +++ b/core/src/fpdfapi/fpdf_page/fpdf_page_parser_old.cpp @@ -307,13 +307,12 @@ FX_DWORD PDF_DecodeInlineStream(const FX_BYTE* src_buf, FX_DWORD limit, dest_buf = 0; return (FX_DWORD) - 1; } -extern const FX_LPCSTR _PDF_CharType; CPDF_Stream* CPDF_StreamParser::ReadInlineStream(CPDF_Document* pDoc, CPDF_Dictionary* pDict, CPDF_Object* pCSObj, FX_BOOL bDecode) { if (m_Pos == m_Size) { return NULL; } - if (_PDF_CharType[m_pBuf[m_Pos]] == 'W') { + if (PDF_CharType[m_pBuf[m_Pos]] == 'W') { m_Pos ++; } CFX_ByteString Decoder; @@ -447,14 +446,14 @@ CPDF_StreamParser::SyntaxType CPDF_StreamParser::ParseNextElement() return EndOfData; } int ch = m_pBuf[m_Pos++]; - int type = _PDF_CharType[ch]; + int type = PDF_CharType[ch]; while (1) { while (type == 'W') { if (m_Size <= m_Pos) { return EndOfData; } ch = m_pBuf[m_Pos++]; - type = _PDF_CharType[ch]; + type = PDF_CharType[ch]; } if (ch != '%') { break; @@ -468,7 +467,7 @@ CPDF_StreamParser::SyntaxType CPDF_StreamParser::ParseNextElement() break; } } - type = _PDF_CharType[ch]; + type = PDF_CharType[ch]; } if (type == 'D' && ch != '/') { m_Pos --; @@ -486,7 +485,7 @@ CPDF_StreamParser::SyntaxType CPDF_StreamParser::ParseNextElement() break; } ch = m_pBuf[m_Pos++]; - type = _PDF_CharType[ch]; + type = PDF_CharType[ch]; if (type == 'D' || type == 'W') { m_Pos --; break; @@ -523,14 +522,14 @@ void CPDF_StreamParser::SkipPathObject() return; } int ch = m_pBuf[m_Pos++]; - int type = _PDF_CharType[ch]; + int type = PDF_CharType[ch]; while (1) { while (type == 'W') { if (m_Pos >= m_Size) { return; } ch = m_pBuf[m_Pos++]; - type = _PDF_CharType[ch]; + type = PDF_CharType[ch]; } if (type != 'N') { m_Pos = command_startpos; @@ -542,14 +541,14 @@ void CPDF_StreamParser::SkipPathObject() return; } ch = m_pBuf[m_Pos++]; - type = _PDF_CharType[ch]; + type = PDF_CharType[ch]; } while (type == 'W') { if (m_Pos >= m_Size) { return; } ch = m_pBuf[m_Pos++]; - type = _PDF_CharType[ch]; + type = PDF_CharType[ch]; } if (type == 'N') { continue; @@ -560,7 +559,7 @@ void CPDF_StreamParser::SkipPathObject() return; } ch = m_pBuf[m_Pos++]; - type = _PDF_CharType[ch]; + type = PDF_CharType[ch]; } if (m_Pos - op_startpos == 2) { int op = m_pBuf[op_startpos]; @@ -672,14 +671,14 @@ void CPDF_StreamParser::GetNextWord(FX_BOOL& bIsNumber) return; } int ch = m_pBuf[m_Pos++]; - int type = _PDF_CharType[ch]; + int type = PDF_CharType[ch]; while (1) { while (type == 'W') { if (m_Size <= m_Pos) { return; } ch = m_pBuf[m_Pos++]; - type = _PDF_CharType[ch]; + type = PDF_CharType[ch]; } if (ch != '%') { break; @@ -693,7 +692,7 @@ void CPDF_StreamParser::GetNextWord(FX_BOOL& bIsNumber) break; } } - type = _PDF_CharType[ch]; + type = PDF_CharType[ch]; } if (type == 'D') { bIsNumber = FALSE; @@ -704,7 +703,7 @@ void CPDF_StreamParser::GetNextWord(FX_BOOL& bIsNumber) return; } ch = m_pBuf[m_Pos++]; - type = _PDF_CharType[ch]; + type = PDF_CharType[ch]; if (type != 'R' && type != 'N') { m_Pos --; return; @@ -747,7 +746,7 @@ void CPDF_StreamParser::GetNextWord(FX_BOOL& bIsNumber) return; } ch = m_pBuf[m_Pos++]; - type = _PDF_CharType[ch]; + type = PDF_CharType[ch]; if (type == 'D' || type == 'W') { m_Pos --; break; diff --git a/core/src/fpdfapi/fpdf_parser/fpdf_parser_filters.cpp b/core/src/fpdfapi/fpdf_parser/fpdf_parser_filters.cpp index 610fb39781..d17b123765 100644 --- a/core/src/fpdfapi/fpdf_parser/fpdf_parser_filters.cpp +++ b/core/src/fpdfapi/fpdf_parser/fpdf_parser_filters.cpp @@ -576,12 +576,11 @@ CPDF_Ascii85Filter::CPDF_Ascii85Filter() m_State = 0; m_CharCount = 0; } -extern const FX_LPCSTR _PDF_CharType; void CPDF_Ascii85Filter::v_FilterIn(FX_LPCBYTE src_buf, FX_DWORD src_size, CFX_BinaryBuf& dest_buf) { for (FX_DWORD i = 0; i < src_size; i ++) { FX_BYTE byte = src_buf[i]; - if (_PDF_CharType[byte] == 'W') { + if (PDF_CharType[byte] == 'W') { continue; } switch (m_State) { @@ -640,7 +639,7 @@ void CPDF_AsciiHexFilter::v_FilterIn(FX_LPCBYTE src_buf, FX_DWORD src_size, CFX_ { for (FX_DWORD i = 0; i < src_size; i ++) { FX_BYTE byte = src_buf[i]; - if (_PDF_CharType[byte] == 'W') { + if (PDF_CharType[byte] == 'W') { continue; } int digit; diff --git a/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp b/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp index 819598976c..29265f407f 100644 --- a/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp +++ b/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp @@ -12,7 +12,6 @@ #include <utility> #include <vector> -extern const FX_LPCSTR _PDF_CharType; FX_BOOL IsSignatureDict(const CPDF_Dictionary* pDict) { CPDF_Object* pType = pDict->GetElementValue(FX_BSTRC("Type")); @@ -648,7 +647,7 @@ FX_BOOL CPDF_Parser::RebuildCrossRef() FX_BYTE byte = buffer[i]; switch (status) { case 0: - if (_PDF_CharType[byte] == 'W') { + if (PDF_CharType[byte] == 'W') { status = 1; } if (byte <= '9' && byte >= '0') { @@ -676,7 +675,7 @@ FX_BOOL CPDF_Parser::RebuildCrossRef() } break; case 1: - if (_PDF_CharType[byte] == 'W') { + if (PDF_CharType[byte] == 'W') { break; } else if (byte <= '9' && byte >= '0') { start_pos = pos + i; @@ -697,7 +696,7 @@ FX_BOOL CPDF_Parser::RebuildCrossRef() if (byte <= '9' && byte >= '0') { objnum = objnum * 10 + byte - '0'; break; - } else if (_PDF_CharType[byte] == 'W') { + } else if (PDF_CharType[byte] == 'W') { status = 3; } else { --i; @@ -710,7 +709,7 @@ FX_BOOL CPDF_Parser::RebuildCrossRef() start_pos1 = pos + i; status = 4; gennum = byte - '0'; - } else if (_PDF_CharType[byte] == 'W') { + } else if (PDF_CharType[byte] == 'W') { break; } else if (byte == 't') { status = 7; @@ -724,7 +723,7 @@ FX_BOOL CPDF_Parser::RebuildCrossRef() if (byte <= '9' && byte >= '0') { gennum = gennum * 10 + byte - '0'; break; - } else if (_PDF_CharType[byte] == 'W') { + } else if (PDF_CharType[byte] == 'W') { status = 5; } else { --i; @@ -735,7 +734,7 @@ FX_BOOL CPDF_Parser::RebuildCrossRef() if (byte == 'o') { status = 6; inside_index = 1; - } else if (_PDF_CharType[byte] == 'W') { + } else if (PDF_CharType[byte] == 'W') { break; } else if (byte <= '9' && byte >= '0') { objnum = gennum; @@ -770,7 +769,7 @@ FX_BOOL CPDF_Parser::RebuildCrossRef() } break; case 3: - if (_PDF_CharType[byte] == 'W' || _PDF_CharType[byte] == 'D') { + if (PDF_CharType[byte] == 'W' || PDF_CharType[byte] == 'D') { if (objnum > 0x1000000) { status = 0; break; @@ -844,7 +843,7 @@ FX_BOOL CPDF_Parser::RebuildCrossRef() break; case 7: if (inside_index == 7) { - if (_PDF_CharType[byte] == 'W' || _PDF_CharType[byte] == 'D') { + if (PDF_CharType[byte] == 'W' || PDF_CharType[byte] == 'D') { last_trailer = pos + i - 7; m_Syntax.RestorePos(pos + i - m_Syntax.m_HeaderOffset); CPDF_Object* pObj = m_Syntax.GetObject(m_pDocument, 0, 0, 0); @@ -949,13 +948,13 @@ FX_BOOL CPDF_Parser::RebuildCrossRef() status = 0; break; case 13: - if (_PDF_CharType[byte] == 'D' || _PDF_CharType[byte] == 'W') { + if (PDF_CharType[byte] == 'D' || PDF_CharType[byte] == 'W') { --i; status = 0; } break; case 14: - if (_PDF_CharType[byte] == 'W') { + if (PDF_CharType[byte] == 'W') { status = 0; } else if (byte == '%' || byte == '(' || byte == '<' || byte == '\\') { status = 0; @@ -1649,14 +1648,14 @@ FX_DWORD CPDF_Parser::LoadLinearizedMainXRefTable() FX_BYTE ch = 0; FX_DWORD dwCount = 0; m_Syntax.GetNextChar(ch); - FX_INT32 type = _PDF_CharType[ch]; + FX_INT32 type = PDF_CharType[ch]; while (type == 'W') { ++dwCount; if (m_Syntax.m_FileLen >= (FX_FILESIZE)(m_Syntax.SavePos() + m_Syntax.m_HeaderOffset)) { break; } m_Syntax.GetNextChar(ch); - type = _PDF_CharType[ch]; + type = PDF_CharType[ch]; } m_LastXRefOffset += dwCount; FX_POSITION pos = m_ObjectStreamMap.GetStartPosition(); @@ -1781,13 +1780,13 @@ void CPDF_SyntaxParser::GetNextWord() if (!GetNextChar(ch)) { return; } - FX_BYTE type = _PDF_CharType[ch]; + FX_BYTE type = PDF_CharType[ch]; while (1) { while (type == 'W') { if (!GetNextChar(ch)) { return; } - type = _PDF_CharType[ch]; + type = PDF_CharType[ch]; } if (ch != '%') { break; @@ -1800,7 +1799,7 @@ void CPDF_SyntaxParser::GetNextWord() break; } } - type = _PDF_CharType[ch]; + type = PDF_CharType[ch]; } if (type == 'D') { m_bIsNumber = FALSE; @@ -1810,7 +1809,7 @@ void CPDF_SyntaxParser::GetNextWord() if (!GetNextChar(ch)) { return; } - type = _PDF_CharType[ch]; + type = PDF_CharType[ch]; if (type != 'R' && type != 'N') { m_Pos --; return; @@ -1850,7 +1849,7 @@ void CPDF_SyntaxParser::GetNextWord() if (!GetNextChar(ch)) { return; } - type = _PDF_CharType[ch]; + type = PDF_CharType[ch]; if (type == 'D' || type == 'W') { m_Pos --; break; @@ -2018,14 +2017,14 @@ void CPDF_SyntaxParser::ToNextWord() if (!GetNextChar(ch)) { return; } - FX_BYTE type = _PDF_CharType[ch]; + FX_BYTE type = PDF_CharType[ch]; while (1) { while (type == 'W') { m_dwWordPos = m_Pos; if (!GetNextChar(ch)) { return; } - type = _PDF_CharType[ch]; + type = PDF_CharType[ch]; } if (ch != '%') { break; @@ -2038,7 +2037,7 @@ void CPDF_SyntaxParser::ToNextWord() break; } } - type = _PDF_CharType[ch]; + type = PDF_CharType[ch]; } m_Pos --; } @@ -2530,19 +2529,19 @@ FX_INT32 CPDF_SyntaxParser::GetDirectNum() } FX_BOOL CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos, FX_FILESIZE limit, FX_LPCBYTE tag, FX_DWORD taglen) { - FX_BYTE type = _PDF_CharType[tag[0]]; + FX_BYTE type = PDF_CharType[tag[0]]; FX_BOOL bCheckLeft = type != 'D' && type != 'W'; - type = _PDF_CharType[tag[taglen - 1]]; + type = PDF_CharType[tag[taglen - 1]]; FX_BOOL bCheckRight = type != 'D' && type != 'W'; FX_BYTE ch; if (bCheckRight && startpos + (FX_INT32)taglen <= limit && GetCharAt(startpos + (FX_INT32)taglen, ch)) { - FX_BYTE type = _PDF_CharType[ch]; + FX_BYTE type = PDF_CharType[ch]; if (type == 'N' || type == 'R') { return FALSE; } } if (bCheckLeft && startpos > 0 && GetCharAt(startpos - 1, ch)) { - FX_BYTE type = _PDF_CharType[ch]; + FX_BYTE type = PDF_CharType[ch]; if (type == 'N' || type == 'R') { return FALSE; } @@ -3828,13 +3827,13 @@ FX_BOOL CPDF_DataAvail::GetNextToken(CFX_ByteString &token) if (!GetNextChar(ch)) { return FALSE; } - FX_BYTE type = _PDF_CharType[ch]; + FX_BYTE type = PDF_CharType[ch]; while (1) { while (type == 'W') { if (!GetNextChar(ch)) { return FALSE; } - type = _PDF_CharType[ch]; + type = PDF_CharType[ch]; } if (ch != '%') { break; @@ -3847,7 +3846,7 @@ FX_BOOL CPDF_DataAvail::GetNextToken(CFX_ByteString &token) break; } } - type = _PDF_CharType[ch]; + type = PDF_CharType[ch]; } if (type == 'D') { m_WordBuffer[m_WordSize++] = ch; @@ -3856,7 +3855,7 @@ FX_BOOL CPDF_DataAvail::GetNextToken(CFX_ByteString &token) if (!GetNextChar(ch)) { return FALSE; } - type = _PDF_CharType[ch]; + type = PDF_CharType[ch]; if (type != 'R' && type != 'N') { m_Pos --; CFX_ByteString ret(m_WordBuffer, m_WordSize); @@ -3897,7 +3896,7 @@ FX_BOOL CPDF_DataAvail::GetNextToken(CFX_ByteString &token) if (!GetNextChar(ch)) { return FALSE; } - type = _PDF_CharType[ch]; + type = PDF_CharType[ch]; if (type == 'D' || type == 'W') { m_Pos --; break; diff --git a/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp b/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp index e5e68c2f5c..bbfd4cc680 100644 --- a/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp +++ b/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp @@ -5,15 +5,41 @@ // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com #include "../../../include/fpdfapi/fpdf_parser.h" -extern const FX_LPCSTR _PDF_CharType = - "WRRRRRRRRWWRWWRRRRRRRRRRRRRRRRRR" - "WRRRRDRRDDRNRNNDNNNNNNNNNNRRDRDR" - "RRRRRRRRRRRRRRRRRRRRRRRRRRRDRDRR" - "RRRRRRRRRRRRRRRRRRRRRRRRRRRDRDRR" - "WRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR" - "RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR" - "RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR" - "RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRW"; +const char PDF_CharType[256] = { + //NUL SOH STX ETX EOT ENQ ACK BEL BS HT LF VT FF CR SO SI + 'W', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'W', 'W', 'R', 'W', 'W', 'R', 'R', + + //DLE DC1 DC2 DC3 DC4 NAK SYN ETB CAN EM SUB ESC FS GS RS US + 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', + + //SP ! " # $ % & ยด ( ) * + , - . / + 'W', 'R', 'R', 'R', 'R', 'D', 'R', 'R', 'D', 'D', 'R', 'N', 'R', 'N', 'N', 'D', + + // 0 1 2 3 4 5 6 7 8 9 : ; < = > ? + 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'R', 'R', 'D', 'R', 'D', 'R', + + // @ A B C D E F G H I J K L M N O + 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', + + // P Q R S T U V W X Y Z [ \ ] ^ _ + 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'D', 'R', 'D', 'R', 'R', + + // ` a b c d e f g h i j k l m n o + 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', + + // p q r s t u v w x y z { | } ~ DEL + 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'D', 'R', 'D', 'R', 'R', + + 'W', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', + 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', + 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', + 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', + 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', + 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', + 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', + 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'W' +}; + #ifndef MAX_PATH #define MAX_PATH 4096 #endif @@ -41,13 +67,13 @@ void CPDF_SimpleParser::ParseWord(FX_LPCBYTE& pStart, FX_DWORD& dwSize, int& typ return; } ch = m_pData[m_dwCurPos++]; - chartype = _PDF_CharType[ch]; + chartype = PDF_CharType[ch]; while (chartype == 'W') { if (m_dwSize <= m_dwCurPos) { return; } ch = m_pData[m_dwCurPos++]; - chartype = _PDF_CharType[ch]; + chartype = PDF_CharType[ch]; } if (ch != '%') { break; @@ -61,7 +87,7 @@ void CPDF_SimpleParser::ParseWord(FX_LPCBYTE& pStart, FX_DWORD& dwSize, int& typ break; } } - chartype = _PDF_CharType[ch]; + chartype = PDF_CharType[ch]; } FX_DWORD start_pos = m_dwCurPos - 1; pStart = m_pData + start_pos; @@ -72,7 +98,7 @@ void CPDF_SimpleParser::ParseWord(FX_LPCBYTE& pStart, FX_DWORD& dwSize, int& typ return; } ch = m_pData[m_dwCurPos++]; - chartype = _PDF_CharType[ch]; + chartype = PDF_CharType[ch]; if (chartype != 'R' && chartype != 'N') { m_dwCurPos --; dwSize = m_dwCurPos - start_pos; @@ -117,7 +143,7 @@ void CPDF_SimpleParser::ParseWord(FX_LPCBYTE& pStart, FX_DWORD& dwSize, int& typ return; } ch = m_pData[m_dwCurPos++]; - chartype = _PDF_CharType[ch]; + chartype = PDF_CharType[ch]; if (chartype == 'D' || chartype == 'W') { m_dwCurPos --; break; @@ -297,8 +323,8 @@ CFX_ByteString PDF_NameEncode(const CFX_ByteString& orig) int i; for (i = 0; i < src_len; i ++) { FX_BYTE ch = src_buf[i]; - if (ch >= 0x80 || _PDF_CharType[ch] == 'W' || ch == '#' || - _PDF_CharType[ch] == 'D') { + if (ch >= 0x80 || PDF_CharType[ch] == 'W' || ch == '#' || + PDF_CharType[ch] == 'D') { dest_len += 3; } else { dest_len ++; @@ -312,8 +338,8 @@ CFX_ByteString PDF_NameEncode(const CFX_ByteString& orig) dest_len = 0; for (i = 0; i < src_len; i ++) { FX_BYTE ch = src_buf[i]; - if (ch >= 0x80 || _PDF_CharType[ch] == 'W' || ch == '#' || - _PDF_CharType[ch] == 'D') { + if (ch >= 0x80 || PDF_CharType[ch] == 'W' || ch == '#' || + PDF_CharType[ch] == 'D') { dest_buf[dest_len++] = '#'; dest_buf[dest_len++] = "0123456789ABCDEF"[ch / 16]; dest_buf[dest_len++] = "0123456789ABCDEF"[ch % 16]; |