summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--core/include/fpdfapi/fpdf_parser.h2
-rw-r--r--core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp161
2 files changed, 118 insertions, 45 deletions
diff --git a/core/include/fpdfapi/fpdf_parser.h b/core/include/fpdfapi/fpdf_parser.h
index 7640c857fa..cb4e15e25d 100644
--- a/core/include/fpdfapi/fpdf_parser.h
+++ b/core/include/fpdfapi/fpdf_parser.h
@@ -297,6 +297,8 @@ class CPDF_SyntaxParser {
CFX_ByteString ReadHexString();
+ unsigned int ReadEOLMarkers(FX_FILESIZE pos);
+
CPDF_Stream* ReadStream(CPDF_Dictionary* pDict,
PARSE_CONTEXT* pContext,
FX_DWORD objnum,
diff --git a/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp b/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp
index 714db002de..55fba8df98 100644
--- a/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp
+++ b/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp
@@ -2439,65 +2439,134 @@ CPDF_Object* CPDF_SyntaxParser::GetObjectByStrict(
}
return NULL;
}
+unsigned int CPDF_SyntaxParser::ReadEOLMarkers(FX_FILESIZE pos) {
+ unsigned char byte1 = 0;
+ unsigned char byte2 = 0;
+ GetCharAt(pos, byte1);
+ GetCharAt(pos+1, byte2);
+ unsigned int markers = 0;
+ if (byte1 == '\r' && byte2 == '\n') {
+ markers = 2;
+ } else if (byte1 == '\r' || byte1 == '\n') {
+ markers = 1;
+ }
+ return markers;
+}
CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict,
PARSE_CONTEXT* pContext,
FX_DWORD objnum,
FX_DWORD gennum) {
CPDF_Object* pLenObj = pDict->GetElement(FX_BSTRC("Length"));
- FX_FILESIZE len = 0;
+ FX_FILESIZE len = -1;
if (pLenObj && ((pLenObj->GetType() != PDFOBJ_REFERENCE) ||
- ((((CPDF_Reference*)pLenObj)->GetObjList() != NULL) &&
+ ((((CPDF_Reference*)pLenObj)->GetObjList()) &&
((CPDF_Reference*)pLenObj)->GetRefObjNum() != objnum))) {
len = pLenObj->GetInteger();
}
-
- ToNextLine();
- FX_FILESIZE StreamStartPos = m_Pos;
+ //Check whether end of line markers follow the keyword 'stream'.
+ unsigned int numMarkers = ReadEOLMarkers(m_Pos);
+ m_Pos += numMarkers;
+ FX_FILESIZE streamStartPos = m_Pos;
if (pContext) {
- pContext->m_DataStart = m_Pos;
- }
-
- CPDF_CryptoHandler* pCryptoHandler =
- objnum == (FX_DWORD)m_MetadataObjnum ? NULL : m_pCryptoHandler;
- if (pCryptoHandler == NULL) {
- pdfium::base::CheckedNumeric<FX_FILESIZE> pos = m_Pos;
- pos += len;
- if (pos.IsValid() && pos.ValueOrDie() < m_FileLen) {
- m_Pos = pos.ValueOrDie();
- }
- GetNextWord();
- if (m_WordSize < 9 || FXSYS_memcmp(m_WordBuffer, "endstream", 9)) {
- m_Pos = StreamStartPos;
- FX_FILESIZE offset = FindTag(FX_BSTRC("endstream"), 0);
- if (offset >= 0) {
- FX_FILESIZE curPos = m_Pos;
- m_Pos = StreamStartPos;
- FX_FILESIZE endobjOffset = FindTag(FX_BSTRC("endobj"), 0);
- if (endobjOffset < offset && endobjOffset >= 0) {
- offset = endobjOffset;
+ pContext->m_DataStart = streamStartPos;
+ }
+ const unsigned int ENDSTREAM_LEN = sizeof("endstream") - 1;
+ const unsigned int ENDOBJ_LEN = sizeof("endobj") - 1;
+ CPDF_CryptoHandler* pCryptoHandler = objnum == (FX_DWORD)m_MetadataObjnum ? nullptr : m_pCryptoHandler;
+ if (!pCryptoHandler) {
+ FX_BOOL bSearchForKeyword = TRUE;
+ unsigned int prevMarkers = 0;
+ unsigned int nextMarkers = 0;
+ if (len >= 0) {
+ pdfium::base::CheckedNumeric<FX_FILESIZE> pos = m_Pos;
+ pos += len;
+ if (pos.IsValid() && pos.ValueOrDie() < m_FileLen) {
+ m_Pos = pos.ValueOrDie();
+ }
+ prevMarkers = ReadEOLMarkers(m_Pos);
+ GetNextWord();
+ nextMarkers = ReadEOLMarkers(m_Pos);
+ if (m_WordSize == ENDSTREAM_LEN && prevMarkers != 0 && nextMarkers != 0 &&
+ FXSYS_memcmp(m_WordBuffer, "endstream", ENDSTREAM_LEN) == 0) {
+ bSearchForKeyword = FALSE;
+ }
+ }
+ if (bSearchForKeyword) {
+ //If len is not available, len needs to be calculated
+ //by searching the keywords "endstream" or "endobj".
+ m_Pos = streamStartPos;
+ FX_FILESIZE endStreamOffset = 0;
+ while (endStreamOffset >= 0) {
+ endStreamOffset = FindTag(FX_BSTRC("endstream"), 0);
+ if (endStreamOffset < 0) {
+ //Can't find any "endstream".
+ break;
+ }
+ prevMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 1);
+ nextMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset + ENDSTREAM_LEN);
+ if (prevMarkers != 0 && nextMarkers != 0) {
+ //Stop searching when the keyword "endstream" is found.
+ break;
} else {
- m_Pos = curPos;
+ unsigned char ch = 0x00;
+ GetCharAt(streamStartPos + endStreamOffset + ENDSTREAM_LEN, ch);
+ if (ch == 0x09 || ch == 0x20) {
+ //"endstream" is treated as a keyword
+ //when it is followed by a tab or whitespace
+ break;
+ }
}
- uint8_t byte1, byte2;
- GetCharAt(StreamStartPos + offset - 1, byte1);
- GetCharAt(StreamStartPos + offset - 2, byte2);
- if (byte1 == 0x0a && byte2 == 0x0d) {
- len -= 2;
- } else if (byte1 == 0x0a || byte1 == 0x0d) {
- len--;
+ m_Pos += ENDSTREAM_LEN;
+ }
+ m_Pos = streamStartPos;
+ FX_FILESIZE endObjOffset = 0;
+ while (endObjOffset >= 0) {
+ endObjOffset = FindTag(FX_BSTRC("endobj"), 0);
+ if (endObjOffset < 0) {
+ //Can't find any "endobj".
+ break;
+ }
+ prevMarkers = ReadEOLMarkers(streamStartPos + endObjOffset - 1);
+ nextMarkers = ReadEOLMarkers(streamStartPos + endObjOffset + ENDOBJ_LEN);
+ if (prevMarkers != 0 && nextMarkers != 0) {
+ //Stop searching when the keyword "endobj" is found.
+ break;
}
- len = (FX_DWORD)offset;
- pDict->SetAtInteger(FX_BSTRC("Length"), len);
+ m_Pos += ENDOBJ_LEN;
+ }
+ if (endStreamOffset < 0 && endObjOffset < 0) {
+ //Can't find "endstream" or "endobj".
+ return nullptr;
+ }
+ if (endStreamOffset < 0 && endObjOffset >= 0) {
+ //Correct the position of end stream.
+ endStreamOffset = endObjOffset;
+ } else if (endStreamOffset >= 0 && endObjOffset < 0) {
+ //Correct the position of end obj.
+ endObjOffset = endStreamOffset;
+ } else if (endStreamOffset > endObjOffset) {
+ endStreamOffset = endObjOffset;
+ }
+ len = endStreamOffset;
+ numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 2);
+ if (numMarkers == 2) {
+ len -= 2;
} else {
- m_Pos = StreamStartPos;
- if (FindTag(FX_BSTRC("endobj"), 0) < 0) {
- return NULL;
+ numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 1);
+ if (numMarkers == 1) {
+ len -= 1;
}
}
+ if (len <= 0) {
+ return nullptr;
+ }
+ pDict->SetAtInteger(FX_BSTRC("Length"), len);
}
- m_Pos = StreamStartPos;
+ m_Pos = streamStartPos;
+ }
+ if (len <= 0) {
+ return nullptr;
}
- CPDF_Stream* pStream;
uint8_t* pData = FX_Alloc(uint8_t, len);
ReadBlock(pData, len);
if (pCryptoHandler) {
@@ -2511,14 +2580,16 @@ CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict,
len = dest_buf.GetSize();
dest_buf.DetachBuffer();
}
- pStream = new CPDF_Stream(pData, len, pDict);
+ CPDF_Stream* pStream = new CPDF_Stream(pData, len, pDict);
if (pContext) {
pContext->m_DataEnd = pContext->m_DataStart + len;
}
- StreamStartPos = m_Pos;
+ streamStartPos = m_Pos;
GetNextWord();
- if (m_WordSize == 6 && 0 == FXSYS_memcmp(m_WordBuffer, "endobj", 6)) {
- m_Pos = StreamStartPos;
+ numMarkers = ReadEOLMarkers(m_Pos);
+ if (m_WordSize == ENDOBJ_LEN && numMarkers != 0 &&
+ FXSYS_memcmp(m_WordBuffer, "endobj", ENDOBJ_LEN) == 0) {
+ m_Pos = streamStartPos;
}
return pStream;
}