From fa9756f77ad6145940d3dc697814b84f5755ae17 Mon Sep 17 00:00:00 2001 From: Jun Fang Date: Fri, 25 Sep 2015 21:12:51 -0700 Subject: Revert "Fix the issue that pdfium swallows 'fi' or 'ff' in some pdf files" This reverts commit 9bd18183ba8210c91d71c3060146235750a4c71c. --- core/src/fpdfapi/fpdf_page/fpdf_page_parser.cpp | 2 + .../src/fpdfapi/fpdf_page/fpdf_page_parser_old.cpp | 34 ++++++++++++- core/src/fpdfapi/fpdf_page/pageint.h | 2 + .../src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp | 56 +++++++++++++--------- 4 files changed, 69 insertions(+), 25 deletions(-) (limited to 'core/src/fpdfapi') diff --git a/core/src/fpdfapi/fpdf_page/fpdf_page_parser.cpp b/core/src/fpdfapi/fpdf_page/fpdf_page_parser.cpp index 4e5ef1c898..059dd4c2a6 100644 --- a/core/src/fpdfapi/fpdf_page/fpdf_page_parser.cpp +++ b/core/src/fpdfapi/fpdf_page/fpdf_page_parser.cpp @@ -11,6 +11,7 @@ #define REQUIRE_PARAMS(count) \ if (m_ParamCount != count) { \ + m_bAbort = TRUE; \ return; \ } @@ -33,6 +34,7 @@ CPDF_StreamContentParser::CPDF_StreamContentParser( m_Level(level), m_ParamStartPos(0), m_ParamCount(0), + m_bAbort(FALSE), m_pCurStates(new CPDF_AllStates), m_pLastTextObject(nullptr), m_DefFontSize(0), diff --git a/core/src/fpdfapi/fpdf_page/fpdf_page_parser_old.cpp b/core/src/fpdfapi/fpdf_page/fpdf_page_parser_old.cpp index c9bcff6db6..48e9b98d3a 100644 --- a/core/src/fpdfapi/fpdf_page/fpdf_page_parser_old.cpp +++ b/core/src/fpdfapi/fpdf_page/fpdf_page_parser_old.cpp @@ -9,7 +9,27 @@ #include "../../../include/fxcodec/fx_codec.h" #include "pageint.h" #include - +const FX_CHAR* const _PDF_OpCharType = + "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII" + "IIVIIIIVIIVIIIIIVVIIIIIIIIIIIIII" + "IIVVVVVVIVVVVVVIVVVVVIIVVIIIIIII" + "IIVVVVVVVVVVVVVVIVVVIIVVIVVIIIII" + "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII" + "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII" + "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII" + "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII"; +FX_BOOL _PDF_HasInvalidOpChar(const FX_CHAR* op) { + if (!op) { + return FALSE; + } + uint8_t ch; + while ((ch = *op++)) { + if (_PDF_OpCharType[ch] == 'I') { + return TRUE; + } + } + return FALSE; +} class CPDF_StreamParserAutoClearer { public: CPDF_StreamParserAutoClearer(CPDF_StreamParser** scoped_variable, @@ -41,7 +61,13 @@ FX_DWORD CPDF_StreamContentParser::Parse(const uint8_t* pData, case CPDF_StreamParser::EndOfData: return m_pSyntax->GetPos(); case CPDF_StreamParser::Keyword: - OnOperator((char*)syntax.GetWordBuf()); + if (!OnOperator((char*)syntax.GetWordBuf()) && + _PDF_HasInvalidOpChar((char*)syntax.GetWordBuf())) { + m_bAbort = TRUE; + } + if (m_bAbort) { + return m_pSyntax->GetPos(); + } ClearAllParams(); break; case CPDF_StreamParser::Number: @@ -1100,6 +1126,10 @@ void CPDF_ContentParser::Continue(IFX_Pause* pPause) { m_CurrentOffset += m_pParser->Parse(m_pData + m_CurrentOffset, m_Size - m_CurrentOffset, PARSE_STEP_LIMIT); + if (m_pParser->ShouldAbort()) { + m_InternalStage = PAGEPARSE_STAGE_CHECKCLIP; + continue; + } } } if (m_InternalStage == PAGEPARSE_STAGE_CHECKCLIP) { diff --git a/core/src/fpdfapi/fpdf_page/pageint.h b/core/src/fpdfapi/fpdf_page/pageint.h index c85523b833..6bec07268c 100644 --- a/core/src/fpdfapi/fpdf_page/pageint.h +++ b/core/src/fpdfapi/fpdf_page/pageint.h @@ -162,6 +162,7 @@ class CPDF_StreamContentParser { int level); ~CPDF_StreamContentParser(); + FX_BOOL ShouldAbort() const { return m_bAbort; } CPDF_PageObjects* GetObjectList() const { return m_pObjectList; } CPDF_AllStates* GetCurStates() const { return m_pCurStates.get(); } FX_BOOL IsColored() const { return m_bColored; } @@ -304,6 +305,7 @@ class CPDF_StreamContentParser { _ContentParam m_ParamBuf1[PARAM_BUF_SIZE]; FX_DWORD m_ParamStartPos; FX_DWORD m_ParamCount; + FX_BOOL m_bAbort; CPDF_StreamParser* m_pSyntax; nonstd::unique_ptr m_pCurStates; CPDF_ContentMark m_CurContentMark; diff --git a/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp b/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp index cd923f3e22..7482f0b8e4 100644 --- a/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp +++ b/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp @@ -2455,8 +2455,8 @@ CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict, len = pLenObj->GetInteger(); } // Check whether end of line markers follow the keyword 'stream'. - // The stream starts after end of line markers. - m_Pos += ReadEOLMarkers(m_Pos); + unsigned int numMarkers = ReadEOLMarkers(m_Pos); + m_Pos += numMarkers; FX_FILESIZE streamStartPos = m_Pos; if (pContext) { pContext->m_DataStart = streamStartPos; @@ -2467,18 +2467,19 @@ CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict, objnum == (FX_DWORD)m_MetadataObjnum ? nullptr : m_pCryptoHandler; if (!pCryptoHandler) { FX_BOOL bSearchForKeyword = TRUE; + unsigned int prevMarkers = 0; + unsigned int nextMarkers = 0; if (len >= 0) { pdfium::base::CheckedNumeric pos = m_Pos; pos += len; if (pos.IsValid() && pos.ValueOrDie() < m_FileLen) { m_Pos = pos.ValueOrDie(); } - m_Pos += ReadEOLMarkers(m_Pos); - FXSYS_memset(m_WordBuffer, 0, ENDSTREAM_LEN + 1); + prevMarkers = ReadEOLMarkers(m_Pos); GetNextWord(); - if (FXSYS_memcmp(m_WordBuffer, "endstream", ENDSTREAM_LEN) == 0 && - IsWholeWord(m_Pos - ENDSTREAM_LEN, m_FileLen, - FX_BSTRC("endstream").GetPtr(), ENDSTREAM_LEN, TRUE)) { + nextMarkers = ReadEOLMarkers(m_Pos); + if (m_WordSize == ENDSTREAM_LEN && prevMarkers != 0 && nextMarkers != 0 && + FXSYS_memcmp(m_WordBuffer, "endstream", ENDSTREAM_LEN) == 0) { bSearchForKeyword = FALSE; } } @@ -2493,12 +2494,22 @@ CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict, // Can't find any "endstream". break; } - if (IsWholeWord(m_Pos - ENDSTREAM_LEN, m_FileLen, - FX_BSTRC("endstream").GetPtr(), ENDSTREAM_LEN, TRUE)) { + prevMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 1); + nextMarkers = + ReadEOLMarkers(streamStartPos + endStreamOffset + ENDSTREAM_LEN); + if (prevMarkers != 0 && nextMarkers != 0) { // Stop searching when the keyword "endstream" is found. - endStreamOffset = m_Pos - streamStartPos - ENDSTREAM_LEN; break; + } else { + unsigned char ch = 0x00; + GetCharAt(streamStartPos + endStreamOffset + ENDSTREAM_LEN, ch); + if (ch == 0x09 || ch == 0x20) { + //"endstream" is treated as a keyword + // when it is followed by a tab or whitespace + break; + } } + m_Pos += ENDSTREAM_LEN; } m_Pos = streamStartPos; FX_FILESIZE endObjOffset = 0; @@ -2508,12 +2519,14 @@ CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict, // Can't find any "endobj". break; } - if (IsWholeWord(m_Pos - ENDOBJ_LEN, m_FileLen, - FX_BSTRC("endobj").GetPtr(), ENDOBJ_LEN, TRUE)) { + prevMarkers = ReadEOLMarkers(streamStartPos + endObjOffset - 1); + nextMarkers = + ReadEOLMarkers(streamStartPos + endObjOffset + ENDOBJ_LEN); + if (prevMarkers != 0 && nextMarkers != 0) { // Stop searching when the keyword "endobj" is found. - endObjOffset = m_Pos - streamStartPos - ENDOBJ_LEN; break; } + m_Pos += ENDOBJ_LEN; } if (endStreamOffset < 0 && endObjOffset < 0) { // Can't find "endstream" or "endobj". @@ -2529,7 +2542,7 @@ CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict, endStreamOffset = endObjOffset; } len = endStreamOffset; - int numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 2); + numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 2); if (numMarkers == 2) { len -= 2; } else { @@ -2566,9 +2579,8 @@ CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict, pContext->m_DataEnd = pContext->m_DataStart + len; } streamStartPos = m_Pos; - FXSYS_memset(m_WordBuffer, 0, ENDOBJ_LEN + 1); GetNextWord(); - int numMarkers = ReadEOLMarkers(m_Pos); + numMarkers = ReadEOLMarkers(m_Pos); if (m_WordSize == ENDOBJ_LEN && numMarkers != 0 && FXSYS_memcmp(m_WordBuffer, "endobj", ENDOBJ_LEN) == 0) { m_Pos = streamStartPos; @@ -2599,8 +2611,7 @@ int32_t CPDF_SyntaxParser::GetDirectNum() { FX_BOOL CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos, FX_FILESIZE limit, const uint8_t* tag, - FX_DWORD taglen, - FX_BOOL checkKeyword) { + FX_DWORD taglen) { uint8_t type = PDF_CharType[tag[0]]; FX_BOOL bCheckLeft = type != 'D' && type != 'W'; type = PDF_CharType[tag[taglen - 1]]; @@ -2609,13 +2620,13 @@ FX_BOOL CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos, if (bCheckRight && startpos + (int32_t)taglen <= limit && GetCharAt(startpos + (int32_t)taglen, ch)) { uint8_t type = PDF_CharType[ch]; - if (type == 'N' || type == 'R' || (checkKeyword && type == 'D')) { + if (type == 'N' || type == 'R') { return FALSE; } } if (bCheckLeft && startpos > 0 && GetCharAt(startpos - 1, ch)) { uint8_t type = PDF_CharType[ch]; - if (type == 'N' || type == 'R' || (checkKeyword && type == 'D')) { + if (type == 'N' || type == 'R') { return FALSE; } } @@ -2671,8 +2682,7 @@ FX_BOOL CPDF_SyntaxParser::SearchWord(const CFX_ByteStringC& tag, } } FX_FILESIZE startpos = bForward ? pos - taglen + 1 : pos; - if (!bWholeWord || - IsWholeWord(startpos, limit, tag.GetPtr(), taglen, FALSE)) { + if (!bWholeWord || IsWholeWord(startpos, limit, tag.GetPtr(), taglen)) { m_Pos = startpos; return TRUE; } @@ -2729,7 +2739,7 @@ int32_t CPDF_SyntaxParser::SearchMultiWord(const CFX_ByteStringC& tags, if (pPatterns[i].m_Offset == pPatterns[i].m_Len) { if (!bWholeWord || IsWholeWord(pos - pPatterns[i].m_Len, limit, pPatterns[i].m_pTag, - pPatterns[i].m_Len, FALSE)) { + pPatterns[i].m_Len)) { found = i; goto end; } else { -- cgit v1.2.3