summaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
authorJun Fang <jun_fang@foxitsoftware.com>2015-09-25 21:12:51 -0700
committerJun Fang <jun_fang@foxitsoftware.com>2015-09-25 21:12:51 -0700
commitfa9756f77ad6145940d3dc697814b84f5755ae17 (patch)
tree45044e7ac308813d031282c4665a4ba67db2ca17 /core
parent9bd18183ba8210c91d71c3060146235750a4c71c (diff)
downloadpdfium-fa9756f77ad6145940d3dc697814b84f5755ae17.tar.xz
Revert "Fix the issue that pdfium swallows 'fi' or 'ff' in some pdf files"
This reverts commit 9bd18183ba8210c91d71c3060146235750a4c71c.
Diffstat (limited to 'core')
-rw-r--r--core/include/fpdfapi/fpdf_parser.h3
-rw-r--r--core/src/fpdfapi/fpdf_page/fpdf_page_parser.cpp2
-rw-r--r--core/src/fpdfapi/fpdf_page/fpdf_page_parser_old.cpp34
-rw-r--r--core/src/fpdfapi/fpdf_page/pageint.h2
-rw-r--r--core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp56
5 files changed, 70 insertions, 27 deletions
diff --git a/core/include/fpdfapi/fpdf_parser.h b/core/include/fpdfapi/fpdf_parser.h
index 6194247568..03ae4cf570 100644
--- a/core/include/fpdfapi/fpdf_parser.h
+++ b/core/include/fpdfapi/fpdf_parser.h
@@ -294,8 +294,7 @@ class CPDF_SyntaxParser {
FX_BOOL IsWholeWord(FX_FILESIZE startpos,
FX_FILESIZE limit,
const uint8_t* tag,
- FX_DWORD taglen,
- FX_BOOL checkKeyword);
+ FX_DWORD taglen);
CFX_ByteString ReadString();
diff --git a/core/src/fpdfapi/fpdf_page/fpdf_page_parser.cpp b/core/src/fpdfapi/fpdf_page/fpdf_page_parser.cpp
index 4e5ef1c898..059dd4c2a6 100644
--- a/core/src/fpdfapi/fpdf_page/fpdf_page_parser.cpp
+++ b/core/src/fpdfapi/fpdf_page/fpdf_page_parser.cpp
@@ -11,6 +11,7 @@
#define REQUIRE_PARAMS(count) \
if (m_ParamCount != count) { \
+ m_bAbort = TRUE; \
return; \
}
@@ -33,6 +34,7 @@ CPDF_StreamContentParser::CPDF_StreamContentParser(
m_Level(level),
m_ParamStartPos(0),
m_ParamCount(0),
+ m_bAbort(FALSE),
m_pCurStates(new CPDF_AllStates),
m_pLastTextObject(nullptr),
m_DefFontSize(0),
diff --git a/core/src/fpdfapi/fpdf_page/fpdf_page_parser_old.cpp b/core/src/fpdfapi/fpdf_page/fpdf_page_parser_old.cpp
index c9bcff6db6..48e9b98d3a 100644
--- a/core/src/fpdfapi/fpdf_page/fpdf_page_parser_old.cpp
+++ b/core/src/fpdfapi/fpdf_page/fpdf_page_parser_old.cpp
@@ -9,7 +9,27 @@
#include "../../../include/fxcodec/fx_codec.h"
#include "pageint.h"
#include <limits.h>
-
+const FX_CHAR* const _PDF_OpCharType =
+ "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII"
+ "IIVIIIIVIIVIIIIIVVIIIIIIIIIIIIII"
+ "IIVVVVVVIVVVVVVIVVVVVIIVVIIIIIII"
+ "IIVVVVVVVVVVVVVVIVVVIIVVIVVIIIII"
+ "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII"
+ "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII"
+ "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII"
+ "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII";
+FX_BOOL _PDF_HasInvalidOpChar(const FX_CHAR* op) {
+ if (!op) {
+ return FALSE;
+ }
+ uint8_t ch;
+ while ((ch = *op++)) {
+ if (_PDF_OpCharType[ch] == 'I') {
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
class CPDF_StreamParserAutoClearer {
public:
CPDF_StreamParserAutoClearer(CPDF_StreamParser** scoped_variable,
@@ -41,7 +61,13 @@ FX_DWORD CPDF_StreamContentParser::Parse(const uint8_t* pData,
case CPDF_StreamParser::EndOfData:
return m_pSyntax->GetPos();
case CPDF_StreamParser::Keyword:
- OnOperator((char*)syntax.GetWordBuf());
+ if (!OnOperator((char*)syntax.GetWordBuf()) &&
+ _PDF_HasInvalidOpChar((char*)syntax.GetWordBuf())) {
+ m_bAbort = TRUE;
+ }
+ if (m_bAbort) {
+ return m_pSyntax->GetPos();
+ }
ClearAllParams();
break;
case CPDF_StreamParser::Number:
@@ -1100,6 +1126,10 @@ void CPDF_ContentParser::Continue(IFX_Pause* pPause) {
m_CurrentOffset +=
m_pParser->Parse(m_pData + m_CurrentOffset,
m_Size - m_CurrentOffset, PARSE_STEP_LIMIT);
+ if (m_pParser->ShouldAbort()) {
+ m_InternalStage = PAGEPARSE_STAGE_CHECKCLIP;
+ continue;
+ }
}
}
if (m_InternalStage == PAGEPARSE_STAGE_CHECKCLIP) {
diff --git a/core/src/fpdfapi/fpdf_page/pageint.h b/core/src/fpdfapi/fpdf_page/pageint.h
index c85523b833..6bec07268c 100644
--- a/core/src/fpdfapi/fpdf_page/pageint.h
+++ b/core/src/fpdfapi/fpdf_page/pageint.h
@@ -162,6 +162,7 @@ class CPDF_StreamContentParser {
int level);
~CPDF_StreamContentParser();
+ FX_BOOL ShouldAbort() const { return m_bAbort; }
CPDF_PageObjects* GetObjectList() const { return m_pObjectList; }
CPDF_AllStates* GetCurStates() const { return m_pCurStates.get(); }
FX_BOOL IsColored() const { return m_bColored; }
@@ -304,6 +305,7 @@ class CPDF_StreamContentParser {
_ContentParam m_ParamBuf1[PARAM_BUF_SIZE];
FX_DWORD m_ParamStartPos;
FX_DWORD m_ParamCount;
+ FX_BOOL m_bAbort;
CPDF_StreamParser* m_pSyntax;
nonstd::unique_ptr<CPDF_AllStates> m_pCurStates;
CPDF_ContentMark m_CurContentMark;
diff --git a/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp b/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp
index cd923f3e22..7482f0b8e4 100644
--- a/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp
+++ b/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp
@@ -2455,8 +2455,8 @@ CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict,
len = pLenObj->GetInteger();
}
// Check whether end of line markers follow the keyword 'stream'.
- // The stream starts after end of line markers.
- m_Pos += ReadEOLMarkers(m_Pos);
+ unsigned int numMarkers = ReadEOLMarkers(m_Pos);
+ m_Pos += numMarkers;
FX_FILESIZE streamStartPos = m_Pos;
if (pContext) {
pContext->m_DataStart = streamStartPos;
@@ -2467,18 +2467,19 @@ CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict,
objnum == (FX_DWORD)m_MetadataObjnum ? nullptr : m_pCryptoHandler;
if (!pCryptoHandler) {
FX_BOOL bSearchForKeyword = TRUE;
+ unsigned int prevMarkers = 0;
+ unsigned int nextMarkers = 0;
if (len >= 0) {
pdfium::base::CheckedNumeric<FX_FILESIZE> pos = m_Pos;
pos += len;
if (pos.IsValid() && pos.ValueOrDie() < m_FileLen) {
m_Pos = pos.ValueOrDie();
}
- m_Pos += ReadEOLMarkers(m_Pos);
- FXSYS_memset(m_WordBuffer, 0, ENDSTREAM_LEN + 1);
+ prevMarkers = ReadEOLMarkers(m_Pos);
GetNextWord();
- if (FXSYS_memcmp(m_WordBuffer, "endstream", ENDSTREAM_LEN) == 0 &&
- IsWholeWord(m_Pos - ENDSTREAM_LEN, m_FileLen,
- FX_BSTRC("endstream").GetPtr(), ENDSTREAM_LEN, TRUE)) {
+ nextMarkers = ReadEOLMarkers(m_Pos);
+ if (m_WordSize == ENDSTREAM_LEN && prevMarkers != 0 && nextMarkers != 0 &&
+ FXSYS_memcmp(m_WordBuffer, "endstream", ENDSTREAM_LEN) == 0) {
bSearchForKeyword = FALSE;
}
}
@@ -2493,12 +2494,22 @@ CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict,
// Can't find any "endstream".
break;
}
- if (IsWholeWord(m_Pos - ENDSTREAM_LEN, m_FileLen,
- FX_BSTRC("endstream").GetPtr(), ENDSTREAM_LEN, TRUE)) {
+ prevMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 1);
+ nextMarkers =
+ ReadEOLMarkers(streamStartPos + endStreamOffset + ENDSTREAM_LEN);
+ if (prevMarkers != 0 && nextMarkers != 0) {
// Stop searching when the keyword "endstream" is found.
- endStreamOffset = m_Pos - streamStartPos - ENDSTREAM_LEN;
break;
+ } else {
+ unsigned char ch = 0x00;
+ GetCharAt(streamStartPos + endStreamOffset + ENDSTREAM_LEN, ch);
+ if (ch == 0x09 || ch == 0x20) {
+ //"endstream" is treated as a keyword
+ // when it is followed by a tab or whitespace
+ break;
+ }
}
+ m_Pos += ENDSTREAM_LEN;
}
m_Pos = streamStartPos;
FX_FILESIZE endObjOffset = 0;
@@ -2508,12 +2519,14 @@ CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict,
// Can't find any "endobj".
break;
}
- if (IsWholeWord(m_Pos - ENDOBJ_LEN, m_FileLen,
- FX_BSTRC("endobj").GetPtr(), ENDOBJ_LEN, TRUE)) {
+ prevMarkers = ReadEOLMarkers(streamStartPos + endObjOffset - 1);
+ nextMarkers =
+ ReadEOLMarkers(streamStartPos + endObjOffset + ENDOBJ_LEN);
+ if (prevMarkers != 0 && nextMarkers != 0) {
// Stop searching when the keyword "endobj" is found.
- endObjOffset = m_Pos - streamStartPos - ENDOBJ_LEN;
break;
}
+ m_Pos += ENDOBJ_LEN;
}
if (endStreamOffset < 0 && endObjOffset < 0) {
// Can't find "endstream" or "endobj".
@@ -2529,7 +2542,7 @@ CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict,
endStreamOffset = endObjOffset;
}
len = endStreamOffset;
- int numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 2);
+ numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 2);
if (numMarkers == 2) {
len -= 2;
} else {
@@ -2566,9 +2579,8 @@ CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict,
pContext->m_DataEnd = pContext->m_DataStart + len;
}
streamStartPos = m_Pos;
- FXSYS_memset(m_WordBuffer, 0, ENDOBJ_LEN + 1);
GetNextWord();
- int numMarkers = ReadEOLMarkers(m_Pos);
+ numMarkers = ReadEOLMarkers(m_Pos);
if (m_WordSize == ENDOBJ_LEN && numMarkers != 0 &&
FXSYS_memcmp(m_WordBuffer, "endobj", ENDOBJ_LEN) == 0) {
m_Pos = streamStartPos;
@@ -2599,8 +2611,7 @@ int32_t CPDF_SyntaxParser::GetDirectNum() {
FX_BOOL CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos,
FX_FILESIZE limit,
const uint8_t* tag,
- FX_DWORD taglen,
- FX_BOOL checkKeyword) {
+ FX_DWORD taglen) {
uint8_t type = PDF_CharType[tag[0]];
FX_BOOL bCheckLeft = type != 'D' && type != 'W';
type = PDF_CharType[tag[taglen - 1]];
@@ -2609,13 +2620,13 @@ FX_BOOL CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos,
if (bCheckRight && startpos + (int32_t)taglen <= limit &&
GetCharAt(startpos + (int32_t)taglen, ch)) {
uint8_t type = PDF_CharType[ch];
- if (type == 'N' || type == 'R' || (checkKeyword && type == 'D')) {
+ if (type == 'N' || type == 'R') {
return FALSE;
}
}
if (bCheckLeft && startpos > 0 && GetCharAt(startpos - 1, ch)) {
uint8_t type = PDF_CharType[ch];
- if (type == 'N' || type == 'R' || (checkKeyword && type == 'D')) {
+ if (type == 'N' || type == 'R') {
return FALSE;
}
}
@@ -2671,8 +2682,7 @@ FX_BOOL CPDF_SyntaxParser::SearchWord(const CFX_ByteStringC& tag,
}
}
FX_FILESIZE startpos = bForward ? pos - taglen + 1 : pos;
- if (!bWholeWord ||
- IsWholeWord(startpos, limit, tag.GetPtr(), taglen, FALSE)) {
+ if (!bWholeWord || IsWholeWord(startpos, limit, tag.GetPtr(), taglen)) {
m_Pos = startpos;
return TRUE;
}
@@ -2729,7 +2739,7 @@ int32_t CPDF_SyntaxParser::SearchMultiWord(const CFX_ByteStringC& tags,
if (pPatterns[i].m_Offset == pPatterns[i].m_Len) {
if (!bWholeWord ||
IsWholeWord(pos - pPatterns[i].m_Len, limit, pPatterns[i].m_pTag,
- pPatterns[i].m_Len, FALSE)) {
+ pPatterns[i].m_Len)) {
found = i;
goto end;
} else {