summaryrefslogtreecommitdiff
path: root/core/src/fpdfapi/fpdf_parser
diff options
context:
space:
mode:
authorJun Fang <jun_fang@foxitsoftware.com>2015-09-25 21:12:51 -0700
committerJun Fang <jun_fang@foxitsoftware.com>2015-09-25 21:12:51 -0700
commitfa9756f77ad6145940d3dc697814b84f5755ae17 (patch)
tree45044e7ac308813d031282c4665a4ba67db2ca17 /core/src/fpdfapi/fpdf_parser
parent9bd18183ba8210c91d71c3060146235750a4c71c (diff)
downloadpdfium-fa9756f77ad6145940d3dc697814b84f5755ae17.tar.xz
Revert "Fix the issue that pdfium swallows 'fi' or 'ff' in some pdf files"
This reverts commit 9bd18183ba8210c91d71c3060146235750a4c71c.
Diffstat (limited to 'core/src/fpdfapi/fpdf_parser')
-rw-r--r--core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp56
1 files changed, 33 insertions, 23 deletions
diff --git a/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp b/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp
index cd923f3e22..7482f0b8e4 100644
--- a/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp
+++ b/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp
@@ -2455,8 +2455,8 @@ CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict,
len = pLenObj->GetInteger();
}
// Check whether end of line markers follow the keyword 'stream'.
- // The stream starts after end of line markers.
- m_Pos += ReadEOLMarkers(m_Pos);
+ unsigned int numMarkers = ReadEOLMarkers(m_Pos);
+ m_Pos += numMarkers;
FX_FILESIZE streamStartPos = m_Pos;
if (pContext) {
pContext->m_DataStart = streamStartPos;
@@ -2467,18 +2467,19 @@ CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict,
objnum == (FX_DWORD)m_MetadataObjnum ? nullptr : m_pCryptoHandler;
if (!pCryptoHandler) {
FX_BOOL bSearchForKeyword = TRUE;
+ unsigned int prevMarkers = 0;
+ unsigned int nextMarkers = 0;
if (len >= 0) {
pdfium::base::CheckedNumeric<FX_FILESIZE> pos = m_Pos;
pos += len;
if (pos.IsValid() && pos.ValueOrDie() < m_FileLen) {
m_Pos = pos.ValueOrDie();
}
- m_Pos += ReadEOLMarkers(m_Pos);
- FXSYS_memset(m_WordBuffer, 0, ENDSTREAM_LEN + 1);
+ prevMarkers = ReadEOLMarkers(m_Pos);
GetNextWord();
- if (FXSYS_memcmp(m_WordBuffer, "endstream", ENDSTREAM_LEN) == 0 &&
- IsWholeWord(m_Pos - ENDSTREAM_LEN, m_FileLen,
- FX_BSTRC("endstream").GetPtr(), ENDSTREAM_LEN, TRUE)) {
+ nextMarkers = ReadEOLMarkers(m_Pos);
+ if (m_WordSize == ENDSTREAM_LEN && prevMarkers != 0 && nextMarkers != 0 &&
+ FXSYS_memcmp(m_WordBuffer, "endstream", ENDSTREAM_LEN) == 0) {
bSearchForKeyword = FALSE;
}
}
@@ -2493,12 +2494,22 @@ CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict,
// Can't find any "endstream".
break;
}
- if (IsWholeWord(m_Pos - ENDSTREAM_LEN, m_FileLen,
- FX_BSTRC("endstream").GetPtr(), ENDSTREAM_LEN, TRUE)) {
+ prevMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 1);
+ nextMarkers =
+ ReadEOLMarkers(streamStartPos + endStreamOffset + ENDSTREAM_LEN);
+ if (prevMarkers != 0 && nextMarkers != 0) {
// Stop searching when the keyword "endstream" is found.
- endStreamOffset = m_Pos - streamStartPos - ENDSTREAM_LEN;
break;
+ } else {
+ unsigned char ch = 0x00;
+ GetCharAt(streamStartPos + endStreamOffset + ENDSTREAM_LEN, ch);
+ if (ch == 0x09 || ch == 0x20) {
+ //"endstream" is treated as a keyword
+ // when it is followed by a tab or whitespace
+ break;
+ }
}
+ m_Pos += ENDSTREAM_LEN;
}
m_Pos = streamStartPos;
FX_FILESIZE endObjOffset = 0;
@@ -2508,12 +2519,14 @@ CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict,
// Can't find any "endobj".
break;
}
- if (IsWholeWord(m_Pos - ENDOBJ_LEN, m_FileLen,
- FX_BSTRC("endobj").GetPtr(), ENDOBJ_LEN, TRUE)) {
+ prevMarkers = ReadEOLMarkers(streamStartPos + endObjOffset - 1);
+ nextMarkers =
+ ReadEOLMarkers(streamStartPos + endObjOffset + ENDOBJ_LEN);
+ if (prevMarkers != 0 && nextMarkers != 0) {
// Stop searching when the keyword "endobj" is found.
- endObjOffset = m_Pos - streamStartPos - ENDOBJ_LEN;
break;
}
+ m_Pos += ENDOBJ_LEN;
}
if (endStreamOffset < 0 && endObjOffset < 0) {
// Can't find "endstream" or "endobj".
@@ -2529,7 +2542,7 @@ CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict,
endStreamOffset = endObjOffset;
}
len = endStreamOffset;
- int numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 2);
+ numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 2);
if (numMarkers == 2) {
len -= 2;
} else {
@@ -2566,9 +2579,8 @@ CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict,
pContext->m_DataEnd = pContext->m_DataStart + len;
}
streamStartPos = m_Pos;
- FXSYS_memset(m_WordBuffer, 0, ENDOBJ_LEN + 1);
GetNextWord();
- int numMarkers = ReadEOLMarkers(m_Pos);
+ numMarkers = ReadEOLMarkers(m_Pos);
if (m_WordSize == ENDOBJ_LEN && numMarkers != 0 &&
FXSYS_memcmp(m_WordBuffer, "endobj", ENDOBJ_LEN) == 0) {
m_Pos = streamStartPos;
@@ -2599,8 +2611,7 @@ int32_t CPDF_SyntaxParser::GetDirectNum() {
FX_BOOL CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos,
FX_FILESIZE limit,
const uint8_t* tag,
- FX_DWORD taglen,
- FX_BOOL checkKeyword) {
+ FX_DWORD taglen) {
uint8_t type = PDF_CharType[tag[0]];
FX_BOOL bCheckLeft = type != 'D' && type != 'W';
type = PDF_CharType[tag[taglen - 1]];
@@ -2609,13 +2620,13 @@ FX_BOOL CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos,
if (bCheckRight && startpos + (int32_t)taglen <= limit &&
GetCharAt(startpos + (int32_t)taglen, ch)) {
uint8_t type = PDF_CharType[ch];
- if (type == 'N' || type == 'R' || (checkKeyword && type == 'D')) {
+ if (type == 'N' || type == 'R') {
return FALSE;
}
}
if (bCheckLeft && startpos > 0 && GetCharAt(startpos - 1, ch)) {
uint8_t type = PDF_CharType[ch];
- if (type == 'N' || type == 'R' || (checkKeyword && type == 'D')) {
+ if (type == 'N' || type == 'R') {
return FALSE;
}
}
@@ -2671,8 +2682,7 @@ FX_BOOL CPDF_SyntaxParser::SearchWord(const CFX_ByteStringC& tag,
}
}
FX_FILESIZE startpos = bForward ? pos - taglen + 1 : pos;
- if (!bWholeWord ||
- IsWholeWord(startpos, limit, tag.GetPtr(), taglen, FALSE)) {
+ if (!bWholeWord || IsWholeWord(startpos, limit, tag.GetPtr(), taglen)) {
m_Pos = startpos;
return TRUE;
}
@@ -2729,7 +2739,7 @@ int32_t CPDF_SyntaxParser::SearchMultiWord(const CFX_ByteStringC& tags,
if (pPatterns[i].m_Offset == pPatterns[i].m_Len) {
if (!bWholeWord ||
IsWholeWord(pos - pPatterns[i].m_Len, limit, pPatterns[i].m_pTag,
- pPatterns[i].m_Len, FALSE)) {
+ pPatterns[i].m_Len)) {
found = i;
goto end;
} else {