diff options
author | Jun Fang <jun_fang@foxitsoftware.com> | 2015-09-25 22:03:26 -0700 |
---|---|---|
committer | Jun Fang <jun_fang@foxitsoftware.com> | 2015-09-25 22:03:26 -0700 |
commit | e36f64066fb189a43ee488dedb535ef98a009db7 (patch) | |
tree | e1950c29e9e295dd247a0edc5ed8d490e18800d8 /core | |
parent | fa9756f77ad6145940d3dc697814b84f5755ae17 (diff) | |
download | pdfium-e36f64066fb189a43ee488dedb535ef98a009db7.tar.xz |
Revert "Revert "Fix the issue that pdfium swallows 'fi' or 'ff' in some pdf files""
This reverts commit fa9756f77ad6145940d3dc697814b84f5755ae17.
TBR=thestig@chromium.org
Review URL: https://codereview.chromium.org/1307353005/
Diffstat (limited to 'core')
-rw-r--r-- | core/include/fpdfapi/fpdf_parser.h | 3 | ||||
-rw-r--r-- | core/src/fpdfapi/fpdf_page/fpdf_page_parser.cpp | 2 | ||||
-rw-r--r-- | core/src/fpdfapi/fpdf_page/fpdf_page_parser_old.cpp | 34 | ||||
-rw-r--r-- | core/src/fpdfapi/fpdf_page/pageint.h | 2 | ||||
-rw-r--r-- | core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp | 56 |
5 files changed, 27 insertions, 70 deletions
diff --git a/core/include/fpdfapi/fpdf_parser.h b/core/include/fpdfapi/fpdf_parser.h index 03ae4cf570..6194247568 100644 --- a/core/include/fpdfapi/fpdf_parser.h +++ b/core/include/fpdfapi/fpdf_parser.h @@ -294,7 +294,8 @@ class CPDF_SyntaxParser { FX_BOOL IsWholeWord(FX_FILESIZE startpos, FX_FILESIZE limit, const uint8_t* tag, - FX_DWORD taglen); + FX_DWORD taglen, + FX_BOOL checkKeyword); CFX_ByteString ReadString(); diff --git a/core/src/fpdfapi/fpdf_page/fpdf_page_parser.cpp b/core/src/fpdfapi/fpdf_page/fpdf_page_parser.cpp index 059dd4c2a6..4e5ef1c898 100644 --- a/core/src/fpdfapi/fpdf_page/fpdf_page_parser.cpp +++ b/core/src/fpdfapi/fpdf_page/fpdf_page_parser.cpp @@ -11,7 +11,6 @@ #define REQUIRE_PARAMS(count) \ if (m_ParamCount != count) { \ - m_bAbort = TRUE; \ return; \ } @@ -34,7 +33,6 @@ CPDF_StreamContentParser::CPDF_StreamContentParser( m_Level(level), m_ParamStartPos(0), m_ParamCount(0), - m_bAbort(FALSE), m_pCurStates(new CPDF_AllStates), m_pLastTextObject(nullptr), m_DefFontSize(0), diff --git a/core/src/fpdfapi/fpdf_page/fpdf_page_parser_old.cpp b/core/src/fpdfapi/fpdf_page/fpdf_page_parser_old.cpp index 48e9b98d3a..c9bcff6db6 100644 --- a/core/src/fpdfapi/fpdf_page/fpdf_page_parser_old.cpp +++ b/core/src/fpdfapi/fpdf_page/fpdf_page_parser_old.cpp @@ -9,27 +9,7 @@ #include "../../../include/fxcodec/fx_codec.h" #include "pageint.h" #include <limits.h> -const FX_CHAR* const _PDF_OpCharType = - "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII" - "IIVIIIIVIIVIIIIIVVIIIIIIIIIIIIII" - "IIVVVVVVIVVVVVVIVVVVVIIVVIIIIIII" - "IIVVVVVVVVVVVVVVIVVVIIVVIVVIIIII" - "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII" - "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII" - "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII" - "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII"; -FX_BOOL _PDF_HasInvalidOpChar(const FX_CHAR* op) { - if (!op) { - return FALSE; - } - uint8_t ch; - while ((ch = *op++)) { - if (_PDF_OpCharType[ch] == 'I') { - return TRUE; - } - } - return FALSE; -} + class CPDF_StreamParserAutoClearer { public: CPDF_StreamParserAutoClearer(CPDF_StreamParser** scoped_variable, @@ -61,13 +41,7 @@ FX_DWORD CPDF_StreamContentParser::Parse(const uint8_t* pData, case CPDF_StreamParser::EndOfData: return m_pSyntax->GetPos(); case CPDF_StreamParser::Keyword: - if (!OnOperator((char*)syntax.GetWordBuf()) && - _PDF_HasInvalidOpChar((char*)syntax.GetWordBuf())) { - m_bAbort = TRUE; - } - if (m_bAbort) { - return m_pSyntax->GetPos(); - } + OnOperator((char*)syntax.GetWordBuf()); ClearAllParams(); break; case CPDF_StreamParser::Number: @@ -1126,10 +1100,6 @@ void CPDF_ContentParser::Continue(IFX_Pause* pPause) { m_CurrentOffset += m_pParser->Parse(m_pData + m_CurrentOffset, m_Size - m_CurrentOffset, PARSE_STEP_LIMIT); - if (m_pParser->ShouldAbort()) { - m_InternalStage = PAGEPARSE_STAGE_CHECKCLIP; - continue; - } } } if (m_InternalStage == PAGEPARSE_STAGE_CHECKCLIP) { diff --git a/core/src/fpdfapi/fpdf_page/pageint.h b/core/src/fpdfapi/fpdf_page/pageint.h index 6bec07268c..c85523b833 100644 --- a/core/src/fpdfapi/fpdf_page/pageint.h +++ b/core/src/fpdfapi/fpdf_page/pageint.h @@ -162,7 +162,6 @@ class CPDF_StreamContentParser { int level); ~CPDF_StreamContentParser(); - FX_BOOL ShouldAbort() const { return m_bAbort; } CPDF_PageObjects* GetObjectList() const { return m_pObjectList; } CPDF_AllStates* GetCurStates() const { return m_pCurStates.get(); } FX_BOOL IsColored() const { return m_bColored; } @@ -305,7 +304,6 @@ class CPDF_StreamContentParser { _ContentParam m_ParamBuf1[PARAM_BUF_SIZE]; FX_DWORD m_ParamStartPos; FX_DWORD m_ParamCount; - FX_BOOL m_bAbort; CPDF_StreamParser* m_pSyntax; nonstd::unique_ptr<CPDF_AllStates> m_pCurStates; CPDF_ContentMark m_CurContentMark; diff --git a/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp b/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp index 7482f0b8e4..cd923f3e22 100644 --- a/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp +++ b/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp @@ -2455,8 +2455,8 @@ CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict, len = pLenObj->GetInteger(); } // Check whether end of line markers follow the keyword 'stream'. - unsigned int numMarkers = ReadEOLMarkers(m_Pos); - m_Pos += numMarkers; + // The stream starts after end of line markers. + m_Pos += ReadEOLMarkers(m_Pos); FX_FILESIZE streamStartPos = m_Pos; if (pContext) { pContext->m_DataStart = streamStartPos; @@ -2467,19 +2467,18 @@ CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict, objnum == (FX_DWORD)m_MetadataObjnum ? nullptr : m_pCryptoHandler; if (!pCryptoHandler) { FX_BOOL bSearchForKeyword = TRUE; - unsigned int prevMarkers = 0; - unsigned int nextMarkers = 0; if (len >= 0) { pdfium::base::CheckedNumeric<FX_FILESIZE> pos = m_Pos; pos += len; if (pos.IsValid() && pos.ValueOrDie() < m_FileLen) { m_Pos = pos.ValueOrDie(); } - prevMarkers = ReadEOLMarkers(m_Pos); + m_Pos += ReadEOLMarkers(m_Pos); + FXSYS_memset(m_WordBuffer, 0, ENDSTREAM_LEN + 1); GetNextWord(); - nextMarkers = ReadEOLMarkers(m_Pos); - if (m_WordSize == ENDSTREAM_LEN && prevMarkers != 0 && nextMarkers != 0 && - FXSYS_memcmp(m_WordBuffer, "endstream", ENDSTREAM_LEN) == 0) { + if (FXSYS_memcmp(m_WordBuffer, "endstream", ENDSTREAM_LEN) == 0 && + IsWholeWord(m_Pos - ENDSTREAM_LEN, m_FileLen, + FX_BSTRC("endstream").GetPtr(), ENDSTREAM_LEN, TRUE)) { bSearchForKeyword = FALSE; } } @@ -2494,22 +2493,12 @@ CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict, // Can't find any "endstream". break; } - prevMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 1); - nextMarkers = - ReadEOLMarkers(streamStartPos + endStreamOffset + ENDSTREAM_LEN); - if (prevMarkers != 0 && nextMarkers != 0) { + if (IsWholeWord(m_Pos - ENDSTREAM_LEN, m_FileLen, + FX_BSTRC("endstream").GetPtr(), ENDSTREAM_LEN, TRUE)) { // Stop searching when the keyword "endstream" is found. + endStreamOffset = m_Pos - streamStartPos - ENDSTREAM_LEN; break; - } else { - unsigned char ch = 0x00; - GetCharAt(streamStartPos + endStreamOffset + ENDSTREAM_LEN, ch); - if (ch == 0x09 || ch == 0x20) { - //"endstream" is treated as a keyword - // when it is followed by a tab or whitespace - break; - } } - m_Pos += ENDSTREAM_LEN; } m_Pos = streamStartPos; FX_FILESIZE endObjOffset = 0; @@ -2519,14 +2508,12 @@ CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict, // Can't find any "endobj". break; } - prevMarkers = ReadEOLMarkers(streamStartPos + endObjOffset - 1); - nextMarkers = - ReadEOLMarkers(streamStartPos + endObjOffset + ENDOBJ_LEN); - if (prevMarkers != 0 && nextMarkers != 0) { + if (IsWholeWord(m_Pos - ENDOBJ_LEN, m_FileLen, + FX_BSTRC("endobj").GetPtr(), ENDOBJ_LEN, TRUE)) { // Stop searching when the keyword "endobj" is found. + endObjOffset = m_Pos - streamStartPos - ENDOBJ_LEN; break; } - m_Pos += ENDOBJ_LEN; } if (endStreamOffset < 0 && endObjOffset < 0) { // Can't find "endstream" or "endobj". @@ -2542,7 +2529,7 @@ CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict, endStreamOffset = endObjOffset; } len = endStreamOffset; - numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 2); + int numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 2); if (numMarkers == 2) { len -= 2; } else { @@ -2579,8 +2566,9 @@ CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict, pContext->m_DataEnd = pContext->m_DataStart + len; } streamStartPos = m_Pos; + FXSYS_memset(m_WordBuffer, 0, ENDOBJ_LEN + 1); GetNextWord(); - numMarkers = ReadEOLMarkers(m_Pos); + int numMarkers = ReadEOLMarkers(m_Pos); if (m_WordSize == ENDOBJ_LEN && numMarkers != 0 && FXSYS_memcmp(m_WordBuffer, "endobj", ENDOBJ_LEN) == 0) { m_Pos = streamStartPos; @@ -2611,7 +2599,8 @@ int32_t CPDF_SyntaxParser::GetDirectNum() { FX_BOOL CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos, FX_FILESIZE limit, const uint8_t* tag, - FX_DWORD taglen) { + FX_DWORD taglen, + FX_BOOL checkKeyword) { uint8_t type = PDF_CharType[tag[0]]; FX_BOOL bCheckLeft = type != 'D' && type != 'W'; type = PDF_CharType[tag[taglen - 1]]; @@ -2620,13 +2609,13 @@ FX_BOOL CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos, if (bCheckRight && startpos + (int32_t)taglen <= limit && GetCharAt(startpos + (int32_t)taglen, ch)) { uint8_t type = PDF_CharType[ch]; - if (type == 'N' || type == 'R') { + if (type == 'N' || type == 'R' || (checkKeyword && type == 'D')) { return FALSE; } } if (bCheckLeft && startpos > 0 && GetCharAt(startpos - 1, ch)) { uint8_t type = PDF_CharType[ch]; - if (type == 'N' || type == 'R') { + if (type == 'N' || type == 'R' || (checkKeyword && type == 'D')) { return FALSE; } } @@ -2682,7 +2671,8 @@ FX_BOOL CPDF_SyntaxParser::SearchWord(const CFX_ByteStringC& tag, } } FX_FILESIZE startpos = bForward ? pos - taglen + 1 : pos; - if (!bWholeWord || IsWholeWord(startpos, limit, tag.GetPtr(), taglen)) { + if (!bWholeWord || + IsWholeWord(startpos, limit, tag.GetPtr(), taglen, FALSE)) { m_Pos = startpos; return TRUE; } @@ -2739,7 +2729,7 @@ int32_t CPDF_SyntaxParser::SearchMultiWord(const CFX_ByteStringC& tags, if (pPatterns[i].m_Offset == pPatterns[i].m_Len) { if (!bWholeWord || IsWholeWord(pos - pPatterns[i].m_Len, limit, pPatterns[i].m_pTag, - pPatterns[i].m_Len)) { + pPatterns[i].m_Len, FALSE)) { found = i; goto end; } else { |