summaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
authorJun Fang <jun_fang@foxitsoftware.com>2015-09-25 22:03:26 -0700
committerJun Fang <jun_fang@foxitsoftware.com>2015-09-25 22:03:26 -0700
commite36f64066fb189a43ee488dedb535ef98a009db7 (patch)
treee1950c29e9e295dd247a0edc5ed8d490e18800d8 /core
parentfa9756f77ad6145940d3dc697814b84f5755ae17 (diff)
downloadpdfium-e36f64066fb189a43ee488dedb535ef98a009db7.tar.xz
Revert "Revert "Fix the issue that pdfium swallows 'fi' or 'ff' in some pdf files""
This reverts commit fa9756f77ad6145940d3dc697814b84f5755ae17. TBR=thestig@chromium.org Review URL: https://codereview.chromium.org/1307353005/
Diffstat (limited to 'core')
-rw-r--r--core/include/fpdfapi/fpdf_parser.h3
-rw-r--r--core/src/fpdfapi/fpdf_page/fpdf_page_parser.cpp2
-rw-r--r--core/src/fpdfapi/fpdf_page/fpdf_page_parser_old.cpp34
-rw-r--r--core/src/fpdfapi/fpdf_page/pageint.h2
-rw-r--r--core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp56
5 files changed, 27 insertions, 70 deletions
diff --git a/core/include/fpdfapi/fpdf_parser.h b/core/include/fpdfapi/fpdf_parser.h
index 03ae4cf570..6194247568 100644
--- a/core/include/fpdfapi/fpdf_parser.h
+++ b/core/include/fpdfapi/fpdf_parser.h
@@ -294,7 +294,8 @@ class CPDF_SyntaxParser {
FX_BOOL IsWholeWord(FX_FILESIZE startpos,
FX_FILESIZE limit,
const uint8_t* tag,
- FX_DWORD taglen);
+ FX_DWORD taglen,
+ FX_BOOL checkKeyword);
CFX_ByteString ReadString();
diff --git a/core/src/fpdfapi/fpdf_page/fpdf_page_parser.cpp b/core/src/fpdfapi/fpdf_page/fpdf_page_parser.cpp
index 059dd4c2a6..4e5ef1c898 100644
--- a/core/src/fpdfapi/fpdf_page/fpdf_page_parser.cpp
+++ b/core/src/fpdfapi/fpdf_page/fpdf_page_parser.cpp
@@ -11,7 +11,6 @@
#define REQUIRE_PARAMS(count) \
if (m_ParamCount != count) { \
- m_bAbort = TRUE; \
return; \
}
@@ -34,7 +33,6 @@ CPDF_StreamContentParser::CPDF_StreamContentParser(
m_Level(level),
m_ParamStartPos(0),
m_ParamCount(0),
- m_bAbort(FALSE),
m_pCurStates(new CPDF_AllStates),
m_pLastTextObject(nullptr),
m_DefFontSize(0),
diff --git a/core/src/fpdfapi/fpdf_page/fpdf_page_parser_old.cpp b/core/src/fpdfapi/fpdf_page/fpdf_page_parser_old.cpp
index 48e9b98d3a..c9bcff6db6 100644
--- a/core/src/fpdfapi/fpdf_page/fpdf_page_parser_old.cpp
+++ b/core/src/fpdfapi/fpdf_page/fpdf_page_parser_old.cpp
@@ -9,27 +9,7 @@
#include "../../../include/fxcodec/fx_codec.h"
#include "pageint.h"
#include <limits.h>
-const FX_CHAR* const _PDF_OpCharType =
- "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII"
- "IIVIIIIVIIVIIIIIVVIIIIIIIIIIIIII"
- "IIVVVVVVIVVVVVVIVVVVVIIVVIIIIIII"
- "IIVVVVVVVVVVVVVVIVVVIIVVIVVIIIII"
- "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII"
- "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII"
- "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII"
- "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII";
-FX_BOOL _PDF_HasInvalidOpChar(const FX_CHAR* op) {
- if (!op) {
- return FALSE;
- }
- uint8_t ch;
- while ((ch = *op++)) {
- if (_PDF_OpCharType[ch] == 'I') {
- return TRUE;
- }
- }
- return FALSE;
-}
+
class CPDF_StreamParserAutoClearer {
public:
CPDF_StreamParserAutoClearer(CPDF_StreamParser** scoped_variable,
@@ -61,13 +41,7 @@ FX_DWORD CPDF_StreamContentParser::Parse(const uint8_t* pData,
case CPDF_StreamParser::EndOfData:
return m_pSyntax->GetPos();
case CPDF_StreamParser::Keyword:
- if (!OnOperator((char*)syntax.GetWordBuf()) &&
- _PDF_HasInvalidOpChar((char*)syntax.GetWordBuf())) {
- m_bAbort = TRUE;
- }
- if (m_bAbort) {
- return m_pSyntax->GetPos();
- }
+ OnOperator((char*)syntax.GetWordBuf());
ClearAllParams();
break;
case CPDF_StreamParser::Number:
@@ -1126,10 +1100,6 @@ void CPDF_ContentParser::Continue(IFX_Pause* pPause) {
m_CurrentOffset +=
m_pParser->Parse(m_pData + m_CurrentOffset,
m_Size - m_CurrentOffset, PARSE_STEP_LIMIT);
- if (m_pParser->ShouldAbort()) {
- m_InternalStage = PAGEPARSE_STAGE_CHECKCLIP;
- continue;
- }
}
}
if (m_InternalStage == PAGEPARSE_STAGE_CHECKCLIP) {
diff --git a/core/src/fpdfapi/fpdf_page/pageint.h b/core/src/fpdfapi/fpdf_page/pageint.h
index 6bec07268c..c85523b833 100644
--- a/core/src/fpdfapi/fpdf_page/pageint.h
+++ b/core/src/fpdfapi/fpdf_page/pageint.h
@@ -162,7 +162,6 @@ class CPDF_StreamContentParser {
int level);
~CPDF_StreamContentParser();
- FX_BOOL ShouldAbort() const { return m_bAbort; }
CPDF_PageObjects* GetObjectList() const { return m_pObjectList; }
CPDF_AllStates* GetCurStates() const { return m_pCurStates.get(); }
FX_BOOL IsColored() const { return m_bColored; }
@@ -305,7 +304,6 @@ class CPDF_StreamContentParser {
_ContentParam m_ParamBuf1[PARAM_BUF_SIZE];
FX_DWORD m_ParamStartPos;
FX_DWORD m_ParamCount;
- FX_BOOL m_bAbort;
CPDF_StreamParser* m_pSyntax;
nonstd::unique_ptr<CPDF_AllStates> m_pCurStates;
CPDF_ContentMark m_CurContentMark;
diff --git a/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp b/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp
index 7482f0b8e4..cd923f3e22 100644
--- a/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp
+++ b/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp
@@ -2455,8 +2455,8 @@ CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict,
len = pLenObj->GetInteger();
}
// Check whether end of line markers follow the keyword 'stream'.
- unsigned int numMarkers = ReadEOLMarkers(m_Pos);
- m_Pos += numMarkers;
+ // The stream starts after end of line markers.
+ m_Pos += ReadEOLMarkers(m_Pos);
FX_FILESIZE streamStartPos = m_Pos;
if (pContext) {
pContext->m_DataStart = streamStartPos;
@@ -2467,19 +2467,18 @@ CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict,
objnum == (FX_DWORD)m_MetadataObjnum ? nullptr : m_pCryptoHandler;
if (!pCryptoHandler) {
FX_BOOL bSearchForKeyword = TRUE;
- unsigned int prevMarkers = 0;
- unsigned int nextMarkers = 0;
if (len >= 0) {
pdfium::base::CheckedNumeric<FX_FILESIZE> pos = m_Pos;
pos += len;
if (pos.IsValid() && pos.ValueOrDie() < m_FileLen) {
m_Pos = pos.ValueOrDie();
}
- prevMarkers = ReadEOLMarkers(m_Pos);
+ m_Pos += ReadEOLMarkers(m_Pos);
+ FXSYS_memset(m_WordBuffer, 0, ENDSTREAM_LEN + 1);
GetNextWord();
- nextMarkers = ReadEOLMarkers(m_Pos);
- if (m_WordSize == ENDSTREAM_LEN && prevMarkers != 0 && nextMarkers != 0 &&
- FXSYS_memcmp(m_WordBuffer, "endstream", ENDSTREAM_LEN) == 0) {
+ if (FXSYS_memcmp(m_WordBuffer, "endstream", ENDSTREAM_LEN) == 0 &&
+ IsWholeWord(m_Pos - ENDSTREAM_LEN, m_FileLen,
+ FX_BSTRC("endstream").GetPtr(), ENDSTREAM_LEN, TRUE)) {
bSearchForKeyword = FALSE;
}
}
@@ -2494,22 +2493,12 @@ CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict,
// Can't find any "endstream".
break;
}
- prevMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 1);
- nextMarkers =
- ReadEOLMarkers(streamStartPos + endStreamOffset + ENDSTREAM_LEN);
- if (prevMarkers != 0 && nextMarkers != 0) {
+ if (IsWholeWord(m_Pos - ENDSTREAM_LEN, m_FileLen,
+ FX_BSTRC("endstream").GetPtr(), ENDSTREAM_LEN, TRUE)) {
// Stop searching when the keyword "endstream" is found.
+ endStreamOffset = m_Pos - streamStartPos - ENDSTREAM_LEN;
break;
- } else {
- unsigned char ch = 0x00;
- GetCharAt(streamStartPos + endStreamOffset + ENDSTREAM_LEN, ch);
- if (ch == 0x09 || ch == 0x20) {
- //"endstream" is treated as a keyword
- // when it is followed by a tab or whitespace
- break;
- }
}
- m_Pos += ENDSTREAM_LEN;
}
m_Pos = streamStartPos;
FX_FILESIZE endObjOffset = 0;
@@ -2519,14 +2508,12 @@ CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict,
// Can't find any "endobj".
break;
}
- prevMarkers = ReadEOLMarkers(streamStartPos + endObjOffset - 1);
- nextMarkers =
- ReadEOLMarkers(streamStartPos + endObjOffset + ENDOBJ_LEN);
- if (prevMarkers != 0 && nextMarkers != 0) {
+ if (IsWholeWord(m_Pos - ENDOBJ_LEN, m_FileLen,
+ FX_BSTRC("endobj").GetPtr(), ENDOBJ_LEN, TRUE)) {
// Stop searching when the keyword "endobj" is found.
+ endObjOffset = m_Pos - streamStartPos - ENDOBJ_LEN;
break;
}
- m_Pos += ENDOBJ_LEN;
}
if (endStreamOffset < 0 && endObjOffset < 0) {
// Can't find "endstream" or "endobj".
@@ -2542,7 +2529,7 @@ CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict,
endStreamOffset = endObjOffset;
}
len = endStreamOffset;
- numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 2);
+ int numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 2);
if (numMarkers == 2) {
len -= 2;
} else {
@@ -2579,8 +2566,9 @@ CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict,
pContext->m_DataEnd = pContext->m_DataStart + len;
}
streamStartPos = m_Pos;
+ FXSYS_memset(m_WordBuffer, 0, ENDOBJ_LEN + 1);
GetNextWord();
- numMarkers = ReadEOLMarkers(m_Pos);
+ int numMarkers = ReadEOLMarkers(m_Pos);
if (m_WordSize == ENDOBJ_LEN && numMarkers != 0 &&
FXSYS_memcmp(m_WordBuffer, "endobj", ENDOBJ_LEN) == 0) {
m_Pos = streamStartPos;
@@ -2611,7 +2599,8 @@ int32_t CPDF_SyntaxParser::GetDirectNum() {
FX_BOOL CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos,
FX_FILESIZE limit,
const uint8_t* tag,
- FX_DWORD taglen) {
+ FX_DWORD taglen,
+ FX_BOOL checkKeyword) {
uint8_t type = PDF_CharType[tag[0]];
FX_BOOL bCheckLeft = type != 'D' && type != 'W';
type = PDF_CharType[tag[taglen - 1]];
@@ -2620,13 +2609,13 @@ FX_BOOL CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos,
if (bCheckRight && startpos + (int32_t)taglen <= limit &&
GetCharAt(startpos + (int32_t)taglen, ch)) {
uint8_t type = PDF_CharType[ch];
- if (type == 'N' || type == 'R') {
+ if (type == 'N' || type == 'R' || (checkKeyword && type == 'D')) {
return FALSE;
}
}
if (bCheckLeft && startpos > 0 && GetCharAt(startpos - 1, ch)) {
uint8_t type = PDF_CharType[ch];
- if (type == 'N' || type == 'R') {
+ if (type == 'N' || type == 'R' || (checkKeyword && type == 'D')) {
return FALSE;
}
}
@@ -2682,7 +2671,8 @@ FX_BOOL CPDF_SyntaxParser::SearchWord(const CFX_ByteStringC& tag,
}
}
FX_FILESIZE startpos = bForward ? pos - taglen + 1 : pos;
- if (!bWholeWord || IsWholeWord(startpos, limit, tag.GetPtr(), taglen)) {
+ if (!bWholeWord ||
+ IsWholeWord(startpos, limit, tag.GetPtr(), taglen, FALSE)) {
m_Pos = startpos;
return TRUE;
}
@@ -2739,7 +2729,7 @@ int32_t CPDF_SyntaxParser::SearchMultiWord(const CFX_ByteStringC& tags,
if (pPatterns[i].m_Offset == pPatterns[i].m_Len) {
if (!bWholeWord ||
IsWholeWord(pos - pPatterns[i].m_Len, limit, pPatterns[i].m_pTag,
- pPatterns[i].m_Len)) {
+ pPatterns[i].m_Len, FALSE)) {
found = i;
goto end;
} else {