summaryrefslogtreecommitdiff
path: root/core/fpdfapi/fpdf_parser/cpdf_syntax_parser.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'core/fpdfapi/fpdf_parser/cpdf_syntax_parser.cpp')
-rw-r--r--core/fpdfapi/fpdf_parser/cpdf_syntax_parser.cpp990
1 files changed, 990 insertions, 0 deletions
diff --git a/core/fpdfapi/fpdf_parser/cpdf_syntax_parser.cpp b/core/fpdfapi/fpdf_parser/cpdf_syntax_parser.cpp
new file mode 100644
index 0000000000..62be48818b
--- /dev/null
+++ b/core/fpdfapi/fpdf_parser/cpdf_syntax_parser.cpp
@@ -0,0 +1,990 @@
+// Copyright 2016 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
+
+#include "core/fpdfapi/fpdf_parser/cpdf_syntax_parser.h"
+
+#include <vector>
+
+#include "core/fpdfapi/fpdf_parser/fpdf_parser_utility.h"
+#include "core/include/fpdfapi/cpdf_array.h"
+#include "core/include/fpdfapi/cpdf_boolean.h"
+#include "core/include/fpdfapi/cpdf_dictionary.h"
+#include "core/include/fpdfapi/cpdf_name.h"
+#include "core/include/fpdfapi/cpdf_null.h"
+#include "core/include/fpdfapi/cpdf_number.h"
+#include "core/include/fpdfapi/cpdf_reference.h"
+#include "core/include/fpdfapi/cpdf_stream.h"
+#include "core/include/fpdfapi/cpdf_string.h"
+#include "core/include/fpdfapi/fpdf_module.h"
+#include "core/include/fpdfapi/fpdf_parser_decode.h"
+#include "core/include/fpdfapi/ipdf_crypto_handler.h"
+#include "core/include/fxcrt/fx_ext.h"
+#include "third_party/base/numerics/safe_math.h"
+
+namespace {
+
+struct SearchTagRecord {
+ const char* m_pTag;
+ FX_DWORD m_Len;
+ FX_DWORD m_Offset;
+};
+
+} // namespace
+
+// static
+int CPDF_SyntaxParser::s_CurrentRecursionDepth = 0;
+
+CPDF_SyntaxParser::CPDF_SyntaxParser()
+ : m_MetadataObjnum(0),
+ m_pFileAccess(nullptr),
+ m_pFileBuf(nullptr),
+ m_BufSize(CPDF_ModuleMgr::kFileBufSize) {}
+
+CPDF_SyntaxParser::~CPDF_SyntaxParser() {
+ FX_Free(m_pFileBuf);
+}
+
+FX_BOOL CPDF_SyntaxParser::GetCharAt(FX_FILESIZE pos, uint8_t& ch) {
+ CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos);
+ m_Pos = pos;
+ return GetNextChar(ch);
+}
+
+FX_BOOL CPDF_SyntaxParser::GetNextChar(uint8_t& ch) {
+ FX_FILESIZE pos = m_Pos + m_HeaderOffset;
+ if (pos >= m_FileLen)
+ return FALSE;
+
+ if (m_BufOffset >= pos || (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) {
+ FX_FILESIZE read_pos = pos;
+ FX_DWORD read_size = m_BufSize;
+ if ((FX_FILESIZE)read_size > m_FileLen)
+ read_size = (FX_DWORD)m_FileLen;
+
+ if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) {
+ if (m_FileLen < (FX_FILESIZE)read_size) {
+ read_pos = 0;
+ read_size = (FX_DWORD)m_FileLen;
+ } else {
+ read_pos = m_FileLen - read_size;
+ }
+ }
+
+ if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size))
+ return FALSE;
+
+ m_BufOffset = read_pos;
+ }
+ ch = m_pFileBuf[pos - m_BufOffset];
+ m_Pos++;
+ return TRUE;
+}
+
+FX_BOOL CPDF_SyntaxParser::GetCharAtBackward(FX_FILESIZE pos, uint8_t& ch) {
+ pos += m_HeaderOffset;
+ if (pos >= m_FileLen)
+ return FALSE;
+
+ if (m_BufOffset >= pos || (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) {
+ FX_FILESIZE read_pos;
+ if (pos < (FX_FILESIZE)m_BufSize)
+ read_pos = 0;
+ else
+ read_pos = pos - m_BufSize + 1;
+
+ FX_DWORD read_size = m_BufSize;
+ if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) {
+ if (m_FileLen < (FX_FILESIZE)read_size) {
+ read_pos = 0;
+ read_size = (FX_DWORD)m_FileLen;
+ } else {
+ read_pos = m_FileLen - read_size;
+ }
+ }
+
+ if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size))
+ return FALSE;
+
+ m_BufOffset = read_pos;
+ }
+ ch = m_pFileBuf[pos - m_BufOffset];
+ return TRUE;
+}
+
+FX_BOOL CPDF_SyntaxParser::ReadBlock(uint8_t* pBuf, FX_DWORD size) {
+ if (!m_pFileAccess->ReadBlock(pBuf, m_Pos + m_HeaderOffset, size))
+ return FALSE;
+ m_Pos += size;
+ return TRUE;
+}
+
+void CPDF_SyntaxParser::GetNextWordInternal(bool* bIsNumber) {
+ m_WordSize = 0;
+ if (bIsNumber)
+ *bIsNumber = true;
+
+ uint8_t ch;
+ if (!GetNextChar(ch))
+ return;
+
+ while (1) {
+ while (PDFCharIsWhitespace(ch)) {
+ if (!GetNextChar(ch))
+ return;
+ }
+
+ if (ch != '%')
+ break;
+
+ while (1) {
+ if (!GetNextChar(ch))
+ return;
+ if (PDFCharIsLineEnding(ch))
+ break;
+ }
+ }
+
+ if (PDFCharIsDelimiter(ch)) {
+ if (bIsNumber)
+ *bIsNumber = false;
+
+ m_WordBuffer[m_WordSize++] = ch;
+ if (ch == '/') {
+ while (1) {
+ if (!GetNextChar(ch))
+ return;
+
+ if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {
+ m_Pos--;
+ return;
+ }
+
+ if (m_WordSize < sizeof(m_WordBuffer) - 1)
+ m_WordBuffer[m_WordSize++] = ch;
+ }
+ } else if (ch == '<') {
+ if (!GetNextChar(ch))
+ return;
+
+ if (ch == '<')
+ m_WordBuffer[m_WordSize++] = ch;
+ else
+ m_Pos--;
+ } else if (ch == '>') {
+ if (!GetNextChar(ch))
+ return;
+
+ if (ch == '>')
+ m_WordBuffer[m_WordSize++] = ch;
+ else
+ m_Pos--;
+ }
+ return;
+ }
+
+ while (1) {
+ if (m_WordSize < sizeof(m_WordBuffer) - 1)
+ m_WordBuffer[m_WordSize++] = ch;
+
+ if (!PDFCharIsNumeric(ch)) {
+ if (bIsNumber)
+ *bIsNumber = false;
+ }
+
+ if (!GetNextChar(ch))
+ return;
+
+ if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
+ m_Pos--;
+ break;
+ }
+ }
+}
+
+CFX_ByteString CPDF_SyntaxParser::ReadString() {
+ uint8_t ch;
+ if (!GetNextChar(ch))
+ return CFX_ByteString();
+
+ CFX_ByteTextBuf buf;
+ int32_t parlevel = 0;
+ int32_t status = 0;
+ int32_t iEscCode = 0;
+ while (1) {
+ switch (status) {
+ case 0:
+ if (ch == ')') {
+ if (parlevel == 0) {
+ return buf.GetByteString();
+ }
+ parlevel--;
+ buf.AppendChar(')');
+ } else if (ch == '(') {
+ parlevel++;
+ buf.AppendChar('(');
+ } else if (ch == '\\') {
+ status = 1;
+ } else {
+ buf.AppendChar(ch);
+ }
+ break;
+ case 1:
+ if (ch >= '0' && ch <= '7') {
+ iEscCode = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
+ status = 2;
+ break;
+ }
+
+ if (ch == 'n') {
+ buf.AppendChar('\n');
+ } else if (ch == 'r') {
+ buf.AppendChar('\r');
+ } else if (ch == 't') {
+ buf.AppendChar('\t');
+ } else if (ch == 'b') {
+ buf.AppendChar('\b');
+ } else if (ch == 'f') {
+ buf.AppendChar('\f');
+ } else if (ch == '\r') {
+ status = 4;
+ break;
+ } else if (ch != '\n') {
+ buf.AppendChar(ch);
+ }
+ status = 0;
+ break;
+ case 2:
+ if (ch >= '0' && ch <= '7') {
+ iEscCode =
+ iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
+ status = 3;
+ } else {
+ buf.AppendChar(iEscCode);
+ status = 0;
+ continue;
+ }
+ break;
+ case 3:
+ if (ch >= '0' && ch <= '7') {
+ iEscCode =
+ iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
+ buf.AppendChar(iEscCode);
+ status = 0;
+ } else {
+ buf.AppendChar(iEscCode);
+ status = 0;
+ continue;
+ }
+ break;
+ case 4:
+ status = 0;
+ if (ch != '\n')
+ continue;
+ break;
+ }
+
+ if (!GetNextChar(ch))
+ break;
+ }
+
+ GetNextChar(ch);
+ return buf.GetByteString();
+}
+
+CFX_ByteString CPDF_SyntaxParser::ReadHexString() {
+ uint8_t ch;
+ if (!GetNextChar(ch))
+ return CFX_ByteString();
+
+ CFX_ByteTextBuf buf;
+ bool bFirst = true;
+ uint8_t code = 0;
+ while (1) {
+ if (ch == '>')
+ break;
+
+ if (std::isxdigit(ch)) {
+ int val = FXSYS_toHexDigit(ch);
+ if (bFirst) {
+ code = val * 16;
+ } else {
+ code += val;
+ buf.AppendByte(code);
+ }
+ bFirst = !bFirst;
+ }
+
+ if (!GetNextChar(ch))
+ break;
+ }
+ if (!bFirst)
+ buf.AppendByte(code);
+
+ return buf.GetByteString();
+}
+
+void CPDF_SyntaxParser::ToNextLine() {
+ uint8_t ch;
+ while (GetNextChar(ch)) {
+ if (ch == '\n')
+ break;
+
+ if (ch == '\r') {
+ GetNextChar(ch);
+ if (ch != '\n')
+ --m_Pos;
+ break;
+ }
+ }
+}
+
+void CPDF_SyntaxParser::ToNextWord() {
+ uint8_t ch;
+ if (!GetNextChar(ch))
+ return;
+
+ while (1) {
+ while (PDFCharIsWhitespace(ch)) {
+ if (!GetNextChar(ch))
+ return;
+ }
+
+ if (ch != '%')
+ break;
+
+ while (1) {
+ if (!GetNextChar(ch))
+ return;
+ if (PDFCharIsLineEnding(ch))
+ break;
+ }
+ }
+ m_Pos--;
+}
+
+CFX_ByteString CPDF_SyntaxParser::GetNextWord(bool* bIsNumber) {
+ GetNextWordInternal(bIsNumber);
+ return CFX_ByteString((const FX_CHAR*)m_WordBuffer, m_WordSize);
+}
+
+CFX_ByteString CPDF_SyntaxParser::GetKeyword() {
+ return GetNextWord(nullptr);
+}
+
+CPDF_Object* CPDF_SyntaxParser::GetObject(CPDF_IndirectObjectHolder* pObjList,
+ FX_DWORD objnum,
+ FX_DWORD gennum,
+ FX_BOOL bDecrypt) {
+ CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth);
+ if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth)
+ return nullptr;
+
+ FX_FILESIZE SavedPos = m_Pos;
+ bool bIsNumber;
+ CFX_ByteString word = GetNextWord(&bIsNumber);
+ if (word.GetLength() == 0)
+ return nullptr;
+
+ if (bIsNumber) {
+ FX_FILESIZE SavedPos = m_Pos;
+ CFX_ByteString nextword = GetNextWord(&bIsNumber);
+ if (bIsNumber) {
+ CFX_ByteString nextword2 = GetNextWord(nullptr);
+ if (nextword2 == "R") {
+ FX_DWORD objnum = FXSYS_atoui(word);
+ return new CPDF_Reference(pObjList, objnum);
+ }
+ }
+ m_Pos = SavedPos;
+ return new CPDF_Number(word);
+ }
+
+ if (word == "true" || word == "false")
+ return new CPDF_Boolean(word == "true");
+
+ if (word == "null")
+ return new CPDF_Null;
+
+ if (word == "(") {
+ CFX_ByteString str = ReadString();
+ if (m_pCryptoHandler && bDecrypt)
+ m_pCryptoHandler->Decrypt(objnum, gennum, str);
+ return new CPDF_String(str, FALSE);
+ }
+
+ if (word == "<") {
+ CFX_ByteString str = ReadHexString();
+ if (m_pCryptoHandler && bDecrypt)
+ m_pCryptoHandler->Decrypt(objnum, gennum, str);
+
+ return new CPDF_String(str, TRUE);
+ }
+
+ if (word == "[") {
+ CPDF_Array* pArray = new CPDF_Array;
+ while (CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true))
+ pArray->Add(pObj);
+
+ return pArray;
+ }
+
+ if (word[0] == '/') {
+ return new CPDF_Name(
+ PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)));
+ }
+
+ if (word == "<<") {
+ int32_t nKeys = 0;
+ FX_FILESIZE dwSignValuePos = 0;
+
+ std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict(
+ new CPDF_Dictionary);
+ while (1) {
+ CFX_ByteString key = GetNextWord(nullptr);
+ if (key.IsEmpty())
+ return nullptr;
+
+ FX_FILESIZE SavedPos = m_Pos - key.GetLength();
+ if (key == ">>")
+ break;
+
+ if (key == "endobj") {
+ m_Pos = SavedPos;
+ break;
+ }
+
+ if (key[0] != '/')
+ continue;
+
+ ++nKeys;
+ key = PDF_NameDecode(key);
+ if (key.IsEmpty())
+ continue;
+
+ if (key == "/Contents")
+ dwSignValuePos = m_Pos;
+
+ CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true);
+ if (!pObj)
+ continue;
+
+ CFX_ByteStringC keyNoSlash(key.c_str() + 1, key.GetLength() - 1);
+ pDict->SetAt(keyNoSlash, pObj);
+ }
+
+ // Only when this is a signature dictionary and has contents, we reset the
+ // contents to the un-decrypted form.
+ if (pDict->IsSignatureDict() && dwSignValuePos) {
+ CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos);
+ m_Pos = dwSignValuePos;
+ pDict->SetAt("Contents", GetObject(pObjList, objnum, gennum, false));
+ }
+
+ FX_FILESIZE SavedPos = m_Pos;
+ CFX_ByteString nextword = GetNextWord(nullptr);
+ if (nextword != "stream") {
+ m_Pos = SavedPos;
+ return pDict.release();
+ }
+ return ReadStream(pDict.release(), objnum, gennum);
+ }
+
+ if (word == ">>")
+ m_Pos = SavedPos;
+
+ return nullptr;
+}
+
+CPDF_Object* CPDF_SyntaxParser::GetObjectByStrict(
+ CPDF_IndirectObjectHolder* pObjList,
+ FX_DWORD objnum,
+ FX_DWORD gennum) {
+ CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth);
+ if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth)
+ return nullptr;
+
+ FX_FILESIZE SavedPos = m_Pos;
+ bool bIsNumber;
+ CFX_ByteString word = GetNextWord(&bIsNumber);
+ if (word.GetLength() == 0)
+ return nullptr;
+
+ if (bIsNumber) {
+ FX_FILESIZE SavedPos = m_Pos;
+ CFX_ByteString nextword = GetNextWord(&bIsNumber);
+ if (bIsNumber) {
+ CFX_ByteString nextword2 = GetNextWord(nullptr);
+ if (nextword2 == "R")
+ return new CPDF_Reference(pObjList, FXSYS_atoui(word));
+ }
+ m_Pos = SavedPos;
+ return new CPDF_Number(word);
+ }
+
+ if (word == "true" || word == "false")
+ return new CPDF_Boolean(word == "true");
+
+ if (word == "null")
+ return new CPDF_Null;
+
+ if (word == "(") {
+ CFX_ByteString str = ReadString();
+ if (m_pCryptoHandler)
+ m_pCryptoHandler->Decrypt(objnum, gennum, str);
+ return new CPDF_String(str, FALSE);
+ }
+
+ if (word == "<") {
+ CFX_ByteString str = ReadHexString();
+ if (m_pCryptoHandler)
+ m_pCryptoHandler->Decrypt(objnum, gennum, str);
+ return new CPDF_String(str, TRUE);
+ }
+
+ if (word == "[") {
+ std::unique_ptr<CPDF_Array, ReleaseDeleter<CPDF_Array>> pArray(
+ new CPDF_Array);
+ while (CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true))
+ pArray->Add(pObj);
+
+ return m_WordBuffer[0] == ']' ? pArray.release() : nullptr;
+ }
+
+ if (word[0] == '/') {
+ return new CPDF_Name(
+ PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)));
+ }
+
+ if (word == "<<") {
+ std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict(
+ new CPDF_Dictionary);
+ while (1) {
+ FX_FILESIZE SavedPos = m_Pos;
+ CFX_ByteString key = GetNextWord(nullptr);
+ if (key.IsEmpty())
+ return nullptr;
+
+ if (key == ">>")
+ break;
+
+ if (key == "endobj") {
+ m_Pos = SavedPos;
+ break;
+ }
+
+ if (key[0] != '/')
+ continue;
+
+ key = PDF_NameDecode(key);
+ std::unique_ptr<CPDF_Object, ReleaseDeleter<CPDF_Object>> obj(
+ GetObject(pObjList, objnum, gennum, true));
+ if (!obj) {
+ uint8_t ch;
+ while (GetNextChar(ch) && ch != 0x0A && ch != 0x0D) {
+ continue;
+ }
+ return nullptr;
+ }
+
+ if (key.GetLength() > 1) {
+ pDict->SetAt(CFX_ByteStringC(key.c_str() + 1, key.GetLength() - 1),
+ obj.release());
+ }
+ }
+
+ FX_FILESIZE SavedPos = m_Pos;
+ CFX_ByteString nextword = GetNextWord(nullptr);
+ if (nextword != "stream") {
+ m_Pos = SavedPos;
+ return pDict.release();
+ }
+
+ return ReadStream(pDict.release(), objnum, gennum);
+ }
+
+ if (word == ">>")
+ m_Pos = SavedPos;
+
+ return nullptr;
+}
+
+unsigned int CPDF_SyntaxParser::ReadEOLMarkers(FX_FILESIZE pos) {
+ unsigned char byte1 = 0;
+ unsigned char byte2 = 0;
+
+ GetCharAt(pos, byte1);
+ GetCharAt(pos + 1, byte2);
+
+ if (byte1 == '\r' && byte2 == '\n')
+ return 2;
+
+ if (byte1 == '\r' || byte1 == '\n')
+ return 1;
+
+ return 0;
+}
+
+CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict,
+ FX_DWORD objnum,
+ FX_DWORD gennum) {
+ CPDF_Object* pLenObj = pDict->GetElement("Length");
+ FX_FILESIZE len = -1;
+ CPDF_Reference* pLenObjRef = ToReference(pLenObj);
+
+ bool differingObjNum = !pLenObjRef || (pLenObjRef->GetObjList() &&
+ pLenObjRef->GetRefObjNum() != objnum);
+ if (pLenObj && differingObjNum)
+ len = pLenObj->GetInteger();
+
+ // Locate the start of stream.
+ ToNextLine();
+ FX_FILESIZE streamStartPos = m_Pos;
+
+ const CFX_ByteStringC kEndStreamStr("endstream");
+ const CFX_ByteStringC kEndObjStr("endobj");
+
+ IPDF_CryptoHandler* pCryptoHandler =
+ objnum == (FX_DWORD)m_MetadataObjnum ? nullptr : m_pCryptoHandler.get();
+ if (!pCryptoHandler) {
+ FX_BOOL bSearchForKeyword = TRUE;
+ if (len >= 0) {
+ pdfium::base::CheckedNumeric<FX_FILESIZE> pos = m_Pos;
+ pos += len;
+ if (pos.IsValid() && pos.ValueOrDie() < m_FileLen)
+ m_Pos = pos.ValueOrDie();
+
+ m_Pos += ReadEOLMarkers(m_Pos);
+ FXSYS_memset(m_WordBuffer, 0, kEndStreamStr.GetLength() + 1);
+ GetNextWordInternal(nullptr);
+ // Earlier version of PDF specification doesn't require EOL marker before
+ // 'endstream' keyword. If keyword 'endstream' follows the bytes in
+ // specified length, it signals the end of stream.
+ if (FXSYS_memcmp(m_WordBuffer, kEndStreamStr.GetPtr(),
+ kEndStreamStr.GetLength()) == 0) {
+ bSearchForKeyword = FALSE;
+ }
+ }
+
+ if (bSearchForKeyword) {
+ // If len is not available, len needs to be calculated
+ // by searching the keywords "endstream" or "endobj".
+ m_Pos = streamStartPos;
+ FX_FILESIZE endStreamOffset = 0;
+ while (endStreamOffset >= 0) {
+ endStreamOffset = FindTag(kEndStreamStr, 0);
+
+ // Can't find "endstream".
+ if (endStreamOffset < 0)
+ break;
+
+ // Stop searching when "endstream" is found.
+ if (IsWholeWord(m_Pos - kEndStreamStr.GetLength(), m_FileLen,
+ kEndStreamStr, TRUE)) {
+ endStreamOffset = m_Pos - streamStartPos - kEndStreamStr.GetLength();
+ break;
+ }
+ }
+
+ m_Pos = streamStartPos;
+ FX_FILESIZE endObjOffset = 0;
+ while (endObjOffset >= 0) {
+ endObjOffset = FindTag(kEndObjStr, 0);
+
+ // Can't find "endobj".
+ if (endObjOffset < 0)
+ break;
+
+ // Stop searching when "endobj" is found.
+ if (IsWholeWord(m_Pos - kEndObjStr.GetLength(), m_FileLen, kEndObjStr,
+ TRUE)) {
+ endObjOffset = m_Pos - streamStartPos - kEndObjStr.GetLength();
+ break;
+ }
+ }
+
+ // Can't find "endstream" or "endobj".
+ if (endStreamOffset < 0 && endObjOffset < 0) {
+ pDict->Release();
+ return nullptr;
+ }
+
+ if (endStreamOffset < 0 && endObjOffset >= 0) {
+ // Correct the position of end stream.
+ endStreamOffset = endObjOffset;
+ } else if (endStreamOffset >= 0 && endObjOffset < 0) {
+ // Correct the position of end obj.
+ endObjOffset = endStreamOffset;
+ } else if (endStreamOffset > endObjOffset) {
+ endStreamOffset = endObjOffset;
+ }
+
+ len = endStreamOffset;
+ int numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 2);
+ if (numMarkers == 2) {
+ len -= 2;
+ } else {
+ numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 1);
+ if (numMarkers == 1) {
+ len -= 1;
+ }
+ }
+
+ if (len < 0) {
+ pDict->Release();
+ return nullptr;
+ }
+ pDict->SetAtInteger("Length", len);
+ }
+ m_Pos = streamStartPos;
+ }
+
+ if (len < 0) {
+ pDict->Release();
+ return nullptr;
+ }
+
+ uint8_t* pData = nullptr;
+ if (len > 0) {
+ pData = FX_Alloc(uint8_t, len);
+ ReadBlock(pData, len);
+ if (pCryptoHandler) {
+ CFX_BinaryBuf dest_buf;
+ dest_buf.EstimateSize(pCryptoHandler->DecryptGetSize(len));
+
+ void* context = pCryptoHandler->DecryptStart(objnum, gennum);
+ pCryptoHandler->DecryptStream(context, pData, len, dest_buf);
+ pCryptoHandler->DecryptFinish(context, dest_buf);
+
+ FX_Free(pData);
+ pData = dest_buf.GetBuffer();
+ len = dest_buf.GetSize();
+ dest_buf.DetachBuffer();
+ }
+ }
+
+ CPDF_Stream* pStream = new CPDF_Stream(pData, len, pDict);
+ streamStartPos = m_Pos;
+ FXSYS_memset(m_WordBuffer, 0, kEndObjStr.GetLength() + 1);
+
+ GetNextWordInternal(nullptr);
+
+ int numMarkers = ReadEOLMarkers(m_Pos);
+ if (m_WordSize == static_cast<unsigned int>(kEndObjStr.GetLength()) &&
+ numMarkers != 0 &&
+ FXSYS_memcmp(m_WordBuffer, kEndObjStr.GetPtr(), kEndObjStr.GetLength()) ==
+ 0) {
+ m_Pos = streamStartPos;
+ }
+ return pStream;
+}
+
+void CPDF_SyntaxParser::InitParser(IFX_FileRead* pFileAccess,
+ FX_DWORD HeaderOffset) {
+ FX_Free(m_pFileBuf);
+
+ m_pFileBuf = FX_Alloc(uint8_t, m_BufSize);
+ m_HeaderOffset = HeaderOffset;
+ m_FileLen = pFileAccess->GetSize();
+ m_Pos = 0;
+ m_pFileAccess = pFileAccess;
+ m_BufOffset = 0;
+ pFileAccess->ReadBlock(
+ m_pFileBuf, 0,
+ (size_t)((FX_FILESIZE)m_BufSize > m_FileLen ? m_FileLen : m_BufSize));
+}
+
+uint32_t CPDF_SyntaxParser::GetDirectNum() {
+ bool bIsNumber;
+ GetNextWordInternal(&bIsNumber);
+ if (!bIsNumber)
+ return 0;
+
+ m_WordBuffer[m_WordSize] = 0;
+ return FXSYS_atoui(reinterpret_cast<const FX_CHAR*>(m_WordBuffer));
+}
+
+bool CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos,
+ FX_FILESIZE limit,
+ const CFX_ByteStringC& tag,
+ FX_BOOL checkKeyword) {
+ const FX_DWORD taglen = tag.GetLength();
+
+ bool bCheckLeft = !PDFCharIsDelimiter(tag[0]) && !PDFCharIsWhitespace(tag[0]);
+ bool bCheckRight = !PDFCharIsDelimiter(tag[taglen - 1]) &&
+ !PDFCharIsWhitespace(tag[taglen - 1]);
+
+ uint8_t ch;
+ if (bCheckRight && startpos + (int32_t)taglen <= limit &&
+ GetCharAt(startpos + (int32_t)taglen, ch)) {
+ if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) ||
+ (checkKeyword && PDFCharIsDelimiter(ch))) {
+ return false;
+ }
+ }
+
+ if (bCheckLeft && startpos > 0 && GetCharAt(startpos - 1, ch)) {
+ if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) ||
+ (checkKeyword && PDFCharIsDelimiter(ch))) {
+ return false;
+ }
+ }
+ return true;
+}
+
+// TODO(dsinclair): Split into a SearchWordForward and SearchWordBackwards
+// and drop the bool.
+FX_BOOL CPDF_SyntaxParser::SearchWord(const CFX_ByteStringC& tag,
+ FX_BOOL bWholeWord,
+ FX_BOOL bForward,
+ FX_FILESIZE limit) {
+ int32_t taglen = tag.GetLength();
+ if (taglen == 0)
+ return FALSE;
+
+ FX_FILESIZE pos = m_Pos;
+ int32_t offset = 0;
+ if (!bForward)
+ offset = taglen - 1;
+
+ const uint8_t* tag_data = tag.GetPtr();
+ uint8_t byte;
+ while (1) {
+ if (bForward) {
+ if (limit && pos >= m_Pos + limit)
+ return FALSE;
+
+ if (!GetCharAt(pos, byte))
+ return FALSE;
+
+ } else {
+ if (limit && pos <= m_Pos - limit)
+ return FALSE;
+
+ if (!GetCharAtBackward(pos, byte))
+ return FALSE;
+ }
+
+ if (byte == tag_data[offset]) {
+ if (bForward) {
+ offset++;
+ if (offset < taglen) {
+ pos++;
+ continue;
+ }
+ } else {
+ offset--;
+ if (offset >= 0) {
+ pos--;
+ continue;
+ }
+ }
+
+ FX_FILESIZE startpos = bForward ? pos - taglen + 1 : pos;
+ if (!bWholeWord || IsWholeWord(startpos, limit, tag, FALSE)) {
+ m_Pos = startpos;
+ return TRUE;
+ }
+ }
+
+ if (bForward) {
+ offset = byte == tag_data[0] ? 1 : 0;
+ pos++;
+ } else {
+ offset = byte == tag_data[taglen - 1] ? taglen - 2 : taglen - 1;
+ pos--;
+ }
+
+ if (pos < 0)
+ return FALSE;
+ }
+
+ return FALSE;
+}
+
+int32_t CPDF_SyntaxParser::SearchMultiWord(const CFX_ByteStringC& tags,
+ FX_BOOL bWholeWord,
+ FX_FILESIZE limit) {
+ int32_t ntags = 1;
+ for (int i = 0; i < tags.GetLength(); ++i) {
+ if (tags[i] == 0)
+ ++ntags;
+ }
+
+ std::vector<SearchTagRecord> patterns(ntags);
+ FX_DWORD start = 0;
+ FX_DWORD itag = 0;
+ FX_DWORD max_len = 0;
+ for (int i = 0; i <= tags.GetLength(); ++i) {
+ if (tags[i] == 0) {
+ FX_DWORD len = i - start;
+ max_len = std::max(len, max_len);
+ patterns[itag].m_pTag = tags.GetCStr() + start;
+ patterns[itag].m_Len = len;
+ patterns[itag].m_Offset = 0;
+ start = i + 1;
+ ++itag;
+ }
+ }
+
+ const FX_FILESIZE pos_limit = m_Pos + limit;
+ for (FX_FILESIZE pos = m_Pos; !limit || pos < pos_limit; ++pos) {
+ uint8_t byte;
+ if (!GetCharAt(pos, byte))
+ break;
+
+ for (int i = 0; i < ntags; ++i) {
+ SearchTagRecord& pat = patterns[i];
+ if (pat.m_pTag[pat.m_Offset] != byte) {
+ pat.m_Offset = (pat.m_pTag[0] == byte) ? 1 : 0;
+ continue;
+ }
+
+ ++pat.m_Offset;
+ if (pat.m_Offset != pat.m_Len)
+ continue;
+
+ if (!bWholeWord ||
+ IsWholeWord(pos - pat.m_Len, limit,
+ CFX_ByteStringC(pat.m_pTag, pat.m_Len), FALSE)) {
+ return i;
+ }
+
+ pat.m_Offset = (pat.m_pTag[0] == byte) ? 1 : 0;
+ }
+ }
+ return -1;
+}
+
+FX_FILESIZE CPDF_SyntaxParser::FindTag(const CFX_ByteStringC& tag,
+ FX_FILESIZE limit) {
+ int32_t taglen = tag.GetLength();
+ int32_t match = 0;
+ limit += m_Pos;
+ FX_FILESIZE startpos = m_Pos;
+
+ while (1) {
+ uint8_t ch;
+ if (!GetNextChar(ch))
+ return -1;
+
+ if (ch == tag[match]) {
+ match++;
+ if (match == taglen)
+ return m_Pos - startpos - taglen;
+ } else {
+ match = ch == tag[0] ? 1 : 0;
+ }
+
+ if (limit && m_Pos == limit)
+ return -1;
+ }
+ return -1;
+}
+
+void CPDF_SyntaxParser::SetEncrypt(
+ std::unique_ptr<IPDF_CryptoHandler> pCryptoHandler) {
+ m_pCryptoHandler = std::move(pCryptoHandler);
+}