From fa9756f77ad6145940d3dc697814b84f5755ae17 Mon Sep 17 00:00:00 2001 From: Jun Fang Date: Fri, 25 Sep 2015 21:12:51 -0700 Subject: Revert "Fix the issue that pdfium swallows 'fi' or 'ff' in some pdf files" This reverts commit 9bd18183ba8210c91d71c3060146235750a4c71c. --- .../src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp | 56 +++++++++++++--------- 1 file changed, 33 insertions(+), 23 deletions(-) (limited to 'core/src/fpdfapi/fpdf_parser') diff --git a/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp b/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp index cd923f3e22..7482f0b8e4 100644 --- a/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp +++ b/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp @@ -2455,8 +2455,8 @@ CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict, len = pLenObj->GetInteger(); } // Check whether end of line markers follow the keyword 'stream'. - // The stream starts after end of line markers. - m_Pos += ReadEOLMarkers(m_Pos); + unsigned int numMarkers = ReadEOLMarkers(m_Pos); + m_Pos += numMarkers; FX_FILESIZE streamStartPos = m_Pos; if (pContext) { pContext->m_DataStart = streamStartPos; @@ -2467,18 +2467,19 @@ CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict, objnum == (FX_DWORD)m_MetadataObjnum ? nullptr : m_pCryptoHandler; if (!pCryptoHandler) { FX_BOOL bSearchForKeyword = TRUE; + unsigned int prevMarkers = 0; + unsigned int nextMarkers = 0; if (len >= 0) { pdfium::base::CheckedNumeric pos = m_Pos; pos += len; if (pos.IsValid() && pos.ValueOrDie() < m_FileLen) { m_Pos = pos.ValueOrDie(); } - m_Pos += ReadEOLMarkers(m_Pos); - FXSYS_memset(m_WordBuffer, 0, ENDSTREAM_LEN + 1); + prevMarkers = ReadEOLMarkers(m_Pos); GetNextWord(); - if (FXSYS_memcmp(m_WordBuffer, "endstream", ENDSTREAM_LEN) == 0 && - IsWholeWord(m_Pos - ENDSTREAM_LEN, m_FileLen, - FX_BSTRC("endstream").GetPtr(), ENDSTREAM_LEN, TRUE)) { + nextMarkers = ReadEOLMarkers(m_Pos); + if (m_WordSize == ENDSTREAM_LEN && prevMarkers != 0 && nextMarkers != 0 && + FXSYS_memcmp(m_WordBuffer, "endstream", ENDSTREAM_LEN) == 0) { bSearchForKeyword = FALSE; } } @@ -2493,12 +2494,22 @@ CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict, // Can't find any "endstream". break; } - if (IsWholeWord(m_Pos - ENDSTREAM_LEN, m_FileLen, - FX_BSTRC("endstream").GetPtr(), ENDSTREAM_LEN, TRUE)) { + prevMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 1); + nextMarkers = + ReadEOLMarkers(streamStartPos + endStreamOffset + ENDSTREAM_LEN); + if (prevMarkers != 0 && nextMarkers != 0) { // Stop searching when the keyword "endstream" is found. - endStreamOffset = m_Pos - streamStartPos - ENDSTREAM_LEN; break; + } else { + unsigned char ch = 0x00; + GetCharAt(streamStartPos + endStreamOffset + ENDSTREAM_LEN, ch); + if (ch == 0x09 || ch == 0x20) { + //"endstream" is treated as a keyword + // when it is followed by a tab or whitespace + break; + } } + m_Pos += ENDSTREAM_LEN; } m_Pos = streamStartPos; FX_FILESIZE endObjOffset = 0; @@ -2508,12 +2519,14 @@ CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict, // Can't find any "endobj". break; } - if (IsWholeWord(m_Pos - ENDOBJ_LEN, m_FileLen, - FX_BSTRC("endobj").GetPtr(), ENDOBJ_LEN, TRUE)) { + prevMarkers = ReadEOLMarkers(streamStartPos + endObjOffset - 1); + nextMarkers = + ReadEOLMarkers(streamStartPos + endObjOffset + ENDOBJ_LEN); + if (prevMarkers != 0 && nextMarkers != 0) { // Stop searching when the keyword "endobj" is found. - endObjOffset = m_Pos - streamStartPos - ENDOBJ_LEN; break; } + m_Pos += ENDOBJ_LEN; } if (endStreamOffset < 0 && endObjOffset < 0) { // Can't find "endstream" or "endobj". @@ -2529,7 +2542,7 @@ CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict, endStreamOffset = endObjOffset; } len = endStreamOffset; - int numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 2); + numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 2); if (numMarkers == 2) { len -= 2; } else { @@ -2566,9 +2579,8 @@ CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict, pContext->m_DataEnd = pContext->m_DataStart + len; } streamStartPos = m_Pos; - FXSYS_memset(m_WordBuffer, 0, ENDOBJ_LEN + 1); GetNextWord(); - int numMarkers = ReadEOLMarkers(m_Pos); + numMarkers = ReadEOLMarkers(m_Pos); if (m_WordSize == ENDOBJ_LEN && numMarkers != 0 && FXSYS_memcmp(m_WordBuffer, "endobj", ENDOBJ_LEN) == 0) { m_Pos = streamStartPos; @@ -2599,8 +2611,7 @@ int32_t CPDF_SyntaxParser::GetDirectNum() { FX_BOOL CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos, FX_FILESIZE limit, const uint8_t* tag, - FX_DWORD taglen, - FX_BOOL checkKeyword) { + FX_DWORD taglen) { uint8_t type = PDF_CharType[tag[0]]; FX_BOOL bCheckLeft = type != 'D' && type != 'W'; type = PDF_CharType[tag[taglen - 1]]; @@ -2609,13 +2620,13 @@ FX_BOOL CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos, if (bCheckRight && startpos + (int32_t)taglen <= limit && GetCharAt(startpos + (int32_t)taglen, ch)) { uint8_t type = PDF_CharType[ch]; - if (type == 'N' || type == 'R' || (checkKeyword && type == 'D')) { + if (type == 'N' || type == 'R') { return FALSE; } } if (bCheckLeft && startpos > 0 && GetCharAt(startpos - 1, ch)) { uint8_t type = PDF_CharType[ch]; - if (type == 'N' || type == 'R' || (checkKeyword && type == 'D')) { + if (type == 'N' || type == 'R') { return FALSE; } } @@ -2671,8 +2682,7 @@ FX_BOOL CPDF_SyntaxParser::SearchWord(const CFX_ByteStringC& tag, } } FX_FILESIZE startpos = bForward ? pos - taglen + 1 : pos; - if (!bWholeWord || - IsWholeWord(startpos, limit, tag.GetPtr(), taglen, FALSE)) { + if (!bWholeWord || IsWholeWord(startpos, limit, tag.GetPtr(), taglen)) { m_Pos = startpos; return TRUE; } @@ -2729,7 +2739,7 @@ int32_t CPDF_SyntaxParser::SearchMultiWord(const CFX_ByteStringC& tags, if (pPatterns[i].m_Offset == pPatterns[i].m_Len) { if (!bWholeWord || IsWholeWord(pos - pPatterns[i].m_Len, limit, pPatterns[i].m_pTag, - pPatterns[i].m_Len, FALSE)) { + pPatterns[i].m_Len)) { found = i; goto end; } else { -- cgit v1.2.3