summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Sepez <tsepez@chromium.org>2016-03-08 12:42:40 -0800
committerTom Sepez <tsepez@chromium.org>2016-03-08 12:42:40 -0800
commitb70f5a44635d29981abac766d727ff54daea2c37 (patch)
tree15e257d49fd1c382aec79099f19a2bf356f7fc53
parente1b163c2e06c753ef0580989e1081d25e70537f2 (diff)
downloadpdfium-b70f5a44635d29981abac766d727ff54daea2c37.tar.xz
Re-land "Split CPDF_SyntaxParser into its own named .cpp/.h files."
This reverts commit 06fae28d307fe051f0c7e890344cd487c2260910. R=dsinclair@chromium.org Review URL: https://codereview.chromium.org/1775023003 .
-rw-r--r--BUILD.gn2
-rw-r--r--core/include/fpdfapi/fpdf_parser.h86
-rw-r--r--core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.cpp978
-rw-r--r--core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.h96
-rw-r--r--core/src/fpdfapi/fpdf_parser/fpdf_parser_fdf.cpp1
-rw-r--r--core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp1236
-rw-r--r--core/src/fpdfapi/fpdf_parser/fpdf_parser_parser_unittest.cpp5
-rw-r--r--pdfium.gyp2
8 files changed, 1231 insertions, 1175 deletions
diff --git a/BUILD.gn b/BUILD.gn
index cc31d65a01..007f2ae7d2 100644
--- a/BUILD.gn
+++ b/BUILD.gn
@@ -331,6 +331,8 @@ static_library("fpdfapi") {
"core/src/fpdfapi/fpdf_page/fpdf_page_path.cpp",
"core/src/fpdfapi/fpdf_page/fpdf_page_pattern.cpp",
"core/src/fpdfapi/fpdf_page/pageint.h",
+ "core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.cpp",
+ "core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.h",
"core/src/fpdfapi/fpdf_parser/fpdf_parser_decode.cpp",
"core/src/fpdfapi/fpdf_parser/fpdf_parser_document.cpp",
"core/src/fpdfapi/fpdf_parser/fpdf_parser_encrypt.cpp",
diff --git a/core/include/fpdfapi/fpdf_parser.h b/core/include/fpdfapi/fpdf_parser.h
index 53f9c5a316..1b09aa0148 100644
--- a/core/include/fpdfapi/fpdf_parser.h
+++ b/core/include/fpdfapi/fpdf_parser.h
@@ -28,6 +28,7 @@ class CPDF_Object;
class CPDF_Parser;
class CPDF_Pattern;
class CPDF_StandardSecurityHandler;
+class CPDF_SyntaxParser;
class IPDF_SecurityHandler;
#define FPDFPERM_PRINT 0x0004
@@ -231,81 +232,6 @@ class CPDF_SimpleParser {
FX_DWORD m_dwCurPos;
};
-class CPDF_SyntaxParser {
- public:
- CPDF_SyntaxParser();
- ~CPDF_SyntaxParser();
-
- void InitParser(IFX_FileRead* pFileAccess, FX_DWORD HeaderOffset);
-
- FX_FILESIZE SavePos() const { return m_Pos; }
- void RestorePos(FX_FILESIZE pos) { m_Pos = pos; }
-
- CPDF_Object* GetObject(CPDF_IndirectObjectHolder* pObjList,
- FX_DWORD objnum,
- FX_DWORD gennum,
- FX_BOOL bDecrypt);
- CPDF_Object* GetObjectByStrict(CPDF_IndirectObjectHolder* pObjList,
- FX_DWORD objnum,
- FX_DWORD gennum);
- CFX_ByteString GetKeyword();
-
- void ToNextLine();
- void ToNextWord();
-
- FX_BOOL SearchWord(const CFX_ByteStringC& word,
- FX_BOOL bWholeWord,
- FX_BOOL bForward,
- FX_FILESIZE limit);
- int SearchMultiWord(const CFX_ByteStringC& words,
- FX_BOOL bWholeWord,
- FX_FILESIZE limit);
- FX_FILESIZE FindTag(const CFX_ByteStringC& tag, FX_FILESIZE limit);
-
- void SetEncrypt(std::unique_ptr<CPDF_CryptoHandler> pCryptoHandler);
-
- FX_BOOL ReadBlock(uint8_t* pBuf, FX_DWORD size);
- FX_BOOL GetCharAt(FX_FILESIZE pos, uint8_t& ch);
- CFX_ByteString GetNextWord(bool* bIsNumber);
-
- private:
- friend class CPDF_Parser;
- friend class CPDF_DataAvail;
- friend class fpdf_parser_parser_ReadHexString_Test;
-
- static const int kParserMaxRecursionDepth = 64;
- static int s_CurrentRecursionDepth;
-
- uint32_t GetDirectNum();
-
- FX_BOOL GetNextChar(uint8_t& ch);
- FX_BOOL GetCharAtBackward(FX_FILESIZE pos, uint8_t& ch);
- void GetNextWordInternal(bool* bIsNumber);
- bool IsWholeWord(FX_FILESIZE startpos,
- FX_FILESIZE limit,
- const CFX_ByteStringC& tag,
- FX_BOOL checkKeyword);
-
- CFX_ByteString ReadString();
- CFX_ByteString ReadHexString();
- unsigned int ReadEOLMarkers(FX_FILESIZE pos);
- CPDF_Stream* ReadStream(CPDF_Dictionary* pDict,
- FX_DWORD objnum,
- FX_DWORD gennum);
-
- FX_FILESIZE m_Pos;
- int m_MetadataObjnum;
- IFX_FileRead* m_pFileAccess;
- FX_DWORD m_HeaderOffset;
- FX_FILESIZE m_FileLen;
- uint8_t* m_pFileBuf;
- FX_DWORD m_BufSize;
- FX_FILESIZE m_BufOffset;
- std::unique_ptr<CPDF_CryptoHandler> m_pCryptoHandler;
- uint8_t m_WordBuffer[257];
- FX_DWORD m_WordSize;
-};
-
class CPDF_Parser {
public:
enum Error {
@@ -324,11 +250,6 @@ class CPDF_Parser {
void SetPassword(const FX_CHAR* password) { m_Password = password; }
CFX_ByteString GetPassword() { return m_Password; }
-
- CPDF_CryptoHandler* GetCryptoHandler() {
- return m_Syntax.m_pCryptoHandler.get();
- }
-
CPDF_Dictionary* GetTrailer() const { return m_pTrailer; }
FX_FILESIZE GetLastXRefOffset() const { return m_LastXRefOffset; }
CPDF_Document* GetDocument() const { return m_pDocument; }
@@ -350,12 +271,13 @@ class CPDF_Parser {
bool IsVersionUpdated() const { return m_bVersionUpdated; }
bool IsObjectFreeOrNull(FX_DWORD objnum) const;
FX_BOOL IsFormStream(FX_DWORD objnum, FX_BOOL& bForm);
+ CPDF_CryptoHandler* GetCryptoHandler();
+ IFX_FileRead* GetFileAccess() const;
FX_FILESIZE GetObjectOffset(FX_DWORD objnum) const;
FX_FILESIZE GetObjectSize(FX_DWORD objnum) const;
void GetIndirectBinary(FX_DWORD objnum, uint8_t*& pBuffer, FX_DWORD& size);
- IFX_FileRead* GetFileAccess() const { return m_Syntax.m_pFileAccess; }
int GetFileVersion() const { return m_FileVersion; }
FX_BOOL IsXRefStream() const { return m_bXRefStream; }
@@ -402,7 +324,7 @@ class CPDF_Parser {
void ShrinkObjectMap(FX_DWORD size);
CPDF_Document* m_pDocument;
- CPDF_SyntaxParser m_Syntax;
+ std::unique_ptr<CPDF_SyntaxParser> m_pSyntax;
bool m_bOwnFileRead;
int m_FileVersion;
CPDF_Dictionary* m_pTrailer;
diff --git a/core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.cpp b/core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.cpp
new file mode 100644
index 0000000000..3091ab108b
--- /dev/null
+++ b/core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.cpp
@@ -0,0 +1,978 @@
+// Copyright 2016 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
+
+#include "core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.h"
+
+#include <vector>
+
+#include "core/include/fpdfapi/fpdf_module.h"
+#include "core/include/fpdfapi/fpdf_parser.h"
+#include "core/include/fxcrt/fx_ext.h"
+#include "third_party/base/numerics/safe_math.h"
+
+namespace {
+
+struct SearchTagRecord {
+ const char* m_pTag;
+ FX_DWORD m_Len;
+ FX_DWORD m_Offset;
+};
+
+} // namespace
+
+// static
+int CPDF_SyntaxParser::s_CurrentRecursionDepth = 0;
+
+CPDF_SyntaxParser::CPDF_SyntaxParser()
+ : m_MetadataObjnum(0),
+ m_pFileAccess(nullptr),
+ m_pFileBuf(nullptr),
+ m_BufSize(CPDF_ModuleMgr::kFileBufSize) {}
+
+CPDF_SyntaxParser::~CPDF_SyntaxParser() {
+ FX_Free(m_pFileBuf);
+}
+
+FX_BOOL CPDF_SyntaxParser::GetCharAt(FX_FILESIZE pos, uint8_t& ch) {
+ CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos);
+ m_Pos = pos;
+ return GetNextChar(ch);
+}
+
+FX_BOOL CPDF_SyntaxParser::GetNextChar(uint8_t& ch) {
+ FX_FILESIZE pos = m_Pos + m_HeaderOffset;
+ if (pos >= m_FileLen)
+ return FALSE;
+
+ if (m_BufOffset >= pos || (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) {
+ FX_FILESIZE read_pos = pos;
+ FX_DWORD read_size = m_BufSize;
+ if ((FX_FILESIZE)read_size > m_FileLen)
+ read_size = (FX_DWORD)m_FileLen;
+
+ if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) {
+ if (m_FileLen < (FX_FILESIZE)read_size) {
+ read_pos = 0;
+ read_size = (FX_DWORD)m_FileLen;
+ } else {
+ read_pos = m_FileLen - read_size;
+ }
+ }
+
+ if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size))
+ return FALSE;
+
+ m_BufOffset = read_pos;
+ }
+ ch = m_pFileBuf[pos - m_BufOffset];
+ m_Pos++;
+ return TRUE;
+}
+
+FX_BOOL CPDF_SyntaxParser::GetCharAtBackward(FX_FILESIZE pos, uint8_t& ch) {
+ pos += m_HeaderOffset;
+ if (pos >= m_FileLen)
+ return FALSE;
+
+ if (m_BufOffset >= pos || (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) {
+ FX_FILESIZE read_pos;
+ if (pos < (FX_FILESIZE)m_BufSize)
+ read_pos = 0;
+ else
+ read_pos = pos - m_BufSize + 1;
+
+ FX_DWORD read_size = m_BufSize;
+ if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) {
+ if (m_FileLen < (FX_FILESIZE)read_size) {
+ read_pos = 0;
+ read_size = (FX_DWORD)m_FileLen;
+ } else {
+ read_pos = m_FileLen - read_size;
+ }
+ }
+
+ if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size))
+ return FALSE;
+
+ m_BufOffset = read_pos;
+ }
+ ch = m_pFileBuf[pos - m_BufOffset];
+ return TRUE;
+}
+
+FX_BOOL CPDF_SyntaxParser::ReadBlock(uint8_t* pBuf, FX_DWORD size) {
+ if (!m_pFileAccess->ReadBlock(pBuf, m_Pos + m_HeaderOffset, size))
+ return FALSE;
+ m_Pos += size;
+ return TRUE;
+}
+
+void CPDF_SyntaxParser::GetNextWordInternal(bool* bIsNumber) {
+ m_WordSize = 0;
+ if (bIsNumber)
+ *bIsNumber = true;
+
+ uint8_t ch;
+ if (!GetNextChar(ch))
+ return;
+
+ while (1) {
+ while (PDFCharIsWhitespace(ch)) {
+ if (!GetNextChar(ch))
+ return;
+ }
+
+ if (ch != '%')
+ break;
+
+ while (1) {
+ if (!GetNextChar(ch))
+ return;
+ if (PDFCharIsLineEnding(ch))
+ break;
+ }
+ }
+
+ if (PDFCharIsDelimiter(ch)) {
+ if (bIsNumber)
+ *bIsNumber = false;
+
+ m_WordBuffer[m_WordSize++] = ch;
+ if (ch == '/') {
+ while (1) {
+ if (!GetNextChar(ch))
+ return;
+
+ if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {
+ m_Pos--;
+ return;
+ }
+
+ if (m_WordSize < sizeof(m_WordBuffer) - 1)
+ m_WordBuffer[m_WordSize++] = ch;
+ }
+ } else if (ch == '<') {
+ if (!GetNextChar(ch))
+ return;
+
+ if (ch == '<')
+ m_WordBuffer[m_WordSize++] = ch;
+ else
+ m_Pos--;
+ } else if (ch == '>') {
+ if (!GetNextChar(ch))
+ return;
+
+ if (ch == '>')
+ m_WordBuffer[m_WordSize++] = ch;
+ else
+ m_Pos--;
+ }
+ return;
+ }
+
+ while (1) {
+ if (m_WordSize < sizeof(m_WordBuffer) - 1)
+ m_WordBuffer[m_WordSize++] = ch;
+
+ if (!PDFCharIsNumeric(ch)) {
+ if (bIsNumber)
+ *bIsNumber = false;
+ }
+
+ if (!GetNextChar(ch))
+ return;
+
+ if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
+ m_Pos--;
+ break;
+ }
+ }
+}
+
+CFX_ByteString CPDF_SyntaxParser::ReadString() {
+ uint8_t ch;
+ if (!GetNextChar(ch))
+ return CFX_ByteString();
+
+ CFX_ByteTextBuf buf;
+ int32_t parlevel = 0;
+ int32_t status = 0;
+ int32_t iEscCode = 0;
+ while (1) {
+ switch (status) {
+ case 0:
+ if (ch == ')') {
+ if (parlevel == 0) {
+ return buf.GetByteString();
+ }
+ parlevel--;
+ buf.AppendChar(')');
+ } else if (ch == '(') {
+ parlevel++;
+ buf.AppendChar('(');
+ } else if (ch == '\\') {
+ status = 1;
+ } else {
+ buf.AppendChar(ch);
+ }
+ break;
+ case 1:
+ if (ch >= '0' && ch <= '7') {
+ iEscCode = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
+ status = 2;
+ break;
+ }
+
+ if (ch == 'n') {
+ buf.AppendChar('\n');
+ } else if (ch == 'r') {
+ buf.AppendChar('\r');
+ } else if (ch == 't') {
+ buf.AppendChar('\t');
+ } else if (ch == 'b') {
+ buf.AppendChar('\b');
+ } else if (ch == 'f') {
+ buf.AppendChar('\f');
+ } else if (ch == '\r') {
+ status = 4;
+ break;
+ } else if (ch != '\n') {
+ buf.AppendChar(ch);
+ }
+ status = 0;
+ break;
+ case 2:
+ if (ch >= '0' && ch <= '7') {
+ iEscCode =
+ iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
+ status = 3;
+ } else {
+ buf.AppendChar(iEscCode);
+ status = 0;
+ continue;
+ }
+ break;
+ case 3:
+ if (ch >= '0' && ch <= '7') {
+ iEscCode =
+ iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
+ buf.AppendChar(iEscCode);
+ status = 0;
+ } else {
+ buf.AppendChar(iEscCode);
+ status = 0;
+ continue;
+ }
+ break;
+ case 4:
+ status = 0;
+ if (ch != '\n')
+ continue;
+ break;
+ }
+
+ if (!GetNextChar(ch))
+ break;
+ }
+
+ GetNextChar(ch);
+ return buf.GetByteString();
+}
+
+CFX_ByteString CPDF_SyntaxParser::ReadHexString() {
+ uint8_t ch;
+ if (!GetNextChar(ch))
+ return CFX_ByteString();
+
+ CFX_ByteTextBuf buf;
+ bool bFirst = true;
+ uint8_t code = 0;
+ while (1) {
+ if (ch == '>')
+ break;
+
+ if (std::isxdigit(ch)) {
+ int val = FXSYS_toHexDigit(ch);
+ if (bFirst) {
+ code = val * 16;
+ } else {
+ code += val;
+ buf.AppendByte(code);
+ }
+ bFirst = !bFirst;
+ }
+
+ if (!GetNextChar(ch))
+ break;
+ }
+ if (!bFirst)
+ buf.AppendByte(code);
+
+ return buf.GetByteString();
+}
+
+void CPDF_SyntaxParser::ToNextLine() {
+ uint8_t ch;
+ while (GetNextChar(ch)) {
+ if (ch == '\n')
+ break;
+
+ if (ch == '\r') {
+ GetNextChar(ch);
+ if (ch != '\n')
+ --m_Pos;
+ break;
+ }
+ }
+}
+
+void CPDF_SyntaxParser::ToNextWord() {
+ uint8_t ch;
+ if (!GetNextChar(ch))
+ return;
+
+ while (1) {
+ while (PDFCharIsWhitespace(ch)) {
+ if (!GetNextChar(ch))
+ return;
+ }
+
+ if (ch != '%')
+ break;
+
+ while (1) {
+ if (!GetNextChar(ch))
+ return;
+ if (PDFCharIsLineEnding(ch))
+ break;
+ }
+ }
+ m_Pos--;
+}
+
+CFX_ByteString CPDF_SyntaxParser::GetNextWord(bool* bIsNumber) {
+ GetNextWordInternal(bIsNumber);
+ return CFX_ByteString((const FX_CHAR*)m_WordBuffer, m_WordSize);
+}
+
+CFX_ByteString CPDF_SyntaxParser::GetKeyword() {
+ return GetNextWord(nullptr);
+}
+
+CPDF_Object* CPDF_SyntaxParser::GetObject(CPDF_IndirectObjectHolder* pObjList,
+ FX_DWORD objnum,
+ FX_DWORD gennum,
+ FX_BOOL bDecrypt) {
+ CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth);
+ if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth)
+ return nullptr;
+
+ FX_FILESIZE SavedPos = m_Pos;
+ bool bIsNumber;
+ CFX_ByteString word = GetNextWord(&bIsNumber);
+ if (word.GetLength() == 0)
+ return nullptr;
+
+ if (bIsNumber) {
+ FX_FILESIZE SavedPos = m_Pos;
+ CFX_ByteString nextword = GetNextWord(&bIsNumber);
+ if (bIsNumber) {
+ CFX_ByteString nextword2 = GetNextWord(nullptr);
+ if (nextword2 == "R") {
+ FX_DWORD objnum = FXSYS_atoui(word);
+ return new CPDF_Reference(pObjList, objnum);
+ }
+ }
+ m_Pos = SavedPos;
+ return new CPDF_Number(word);
+ }
+
+ if (word == "true" || word == "false")
+ return new CPDF_Boolean(word == "true");
+
+ if (word == "null")
+ return new CPDF_Null;
+
+ if (word == "(") {
+ CFX_ByteString str = ReadString();
+ if (m_pCryptoHandler && bDecrypt)
+ m_pCryptoHandler->Decrypt(objnum, gennum, str);
+ return new CPDF_String(str, FALSE);
+ }
+
+ if (word == "<") {
+ CFX_ByteString str = ReadHexString();
+ if (m_pCryptoHandler && bDecrypt)
+ m_pCryptoHandler->Decrypt(objnum, gennum, str);
+
+ return new CPDF_String(str, TRUE);
+ }
+
+ if (word == "[") {
+ CPDF_Array* pArray = new CPDF_Array;
+ while (CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true))
+ pArray->Add(pObj);
+
+ return pArray;
+ }
+
+ if (word[0] == '/') {
+ return new CPDF_Name(
+ PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)));
+ }
+
+ if (word == "<<") {
+ int32_t nKeys = 0;
+ FX_FILESIZE dwSignValuePos = 0;
+
+ std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict(
+ new CPDF_Dictionary);
+ while (1) {
+ CFX_ByteString key = GetNextWord(nullptr);
+ if (key.IsEmpty())
+ return nullptr;
+
+ FX_FILESIZE SavedPos = m_Pos - key.GetLength();
+ if (key == ">>")
+ break;
+
+ if (key == "endobj") {
+ m_Pos = SavedPos;
+ break;
+ }
+
+ if (key[0] != '/')
+ continue;
+
+ ++nKeys;
+ key = PDF_NameDecode(key);
+ if (key.IsEmpty())
+ continue;
+
+ if (key == "/Contents")
+ dwSignValuePos = m_Pos;
+
+ CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true);
+ if (!pObj)
+ continue;
+
+ CFX_ByteStringC keyNoSlash(key.c_str() + 1, key.GetLength() - 1);
+ pDict->SetAt(keyNoSlash, pObj);
+ }
+
+ // Only when this is a signature dictionary and has contents, we reset the
+ // contents to the un-decrypted form.
+ if (IsSignatureDict(pDict.get()) && dwSignValuePos) {
+ CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos);
+ m_Pos = dwSignValuePos;
+ pDict->SetAt("Contents", GetObject(pObjList, objnum, gennum, false));
+ }
+
+ FX_FILESIZE SavedPos = m_Pos;
+ CFX_ByteString nextword = GetNextWord(nullptr);
+ if (nextword != "stream") {
+ m_Pos = SavedPos;
+ return pDict.release();
+ }
+ return ReadStream(pDict.release(), objnum, gennum);
+ }
+
+ if (word == ">>")
+ m_Pos = SavedPos;
+
+ return nullptr;
+}
+
+CPDF_Object* CPDF_SyntaxParser::GetObjectByStrict(
+ CPDF_IndirectObjectHolder* pObjList,
+ FX_DWORD objnum,
+ FX_DWORD gennum) {
+ CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth);
+ if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth)
+ return nullptr;
+
+ FX_FILESIZE SavedPos = m_Pos;
+ bool bIsNumber;
+ CFX_ByteString word = GetNextWord(&bIsNumber);
+ if (word.GetLength() == 0)
+ return nullptr;
+
+ if (bIsNumber) {
+ FX_FILESIZE SavedPos = m_Pos;
+ CFX_ByteString nextword = GetNextWord(&bIsNumber);
+ if (bIsNumber) {
+ CFX_ByteString nextword2 = GetNextWord(nullptr);
+ if (nextword2 == "R")
+ return new CPDF_Reference(pObjList, FXSYS_atoui(word));
+ }
+ m_Pos = SavedPos;
+ return new CPDF_Number(word);
+ }
+
+ if (word == "true" || word == "false")
+ return new CPDF_Boolean(word == "true");
+
+ if (word == "null")
+ return new CPDF_Null;
+
+ if (word == "(") {
+ CFX_ByteString str = ReadString();
+ if (m_pCryptoHandler)
+ m_pCryptoHandler->Decrypt(objnum, gennum, str);
+ return new CPDF_String(str, FALSE);
+ }
+
+ if (word == "<") {
+ CFX_ByteString str = ReadHexString();
+ if (m_pCryptoHandler)
+ m_pCryptoHandler->Decrypt(objnum, gennum, str);
+ return new CPDF_String(str, TRUE);
+ }
+
+ if (word == "[") {
+ std::unique_ptr<CPDF_Array, ReleaseDeleter<CPDF_Array>> pArray(
+ new CPDF_Array);
+ while (CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true))
+ pArray->Add(pObj);
+
+ return m_WordBuffer[0] == ']' ? pArray.release() : nullptr;
+ }
+
+ if (word[0] == '/') {
+ return new CPDF_Name(
+ PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)));
+ }
+
+ if (word == "<<") {
+ std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict(
+ new CPDF_Dictionary);
+ while (1) {
+ FX_FILESIZE SavedPos = m_Pos;
+ CFX_ByteString key = GetNextWord(nullptr);
+ if (key.IsEmpty())
+ return nullptr;
+
+ if (key == ">>")
+ break;
+
+ if (key == "endobj") {
+ m_Pos = SavedPos;
+ break;
+ }
+
+ if (key[0] != '/')
+ continue;
+
+ key = PDF_NameDecode(key);
+ std::unique_ptr<CPDF_Object, ReleaseDeleter<CPDF_Object>> obj(
+ GetObject(pObjList, objnum, gennum, true));
+ if (!obj) {
+ uint8_t ch;
+ while (GetNextChar(ch) && ch != 0x0A && ch != 0x0D) {
+ continue;
+ }
+ return nullptr;
+ }
+
+ if (key.GetLength() > 1) {
+ pDict->SetAt(CFX_ByteStringC(key.c_str() + 1, key.GetLength() - 1),
+ obj.release());
+ }
+ }
+
+ FX_FILESIZE SavedPos = m_Pos;
+ CFX_ByteString nextword = GetNextWord(nullptr);
+ if (nextword != "stream") {
+ m_Pos = SavedPos;
+ return pDict.release();
+ }
+
+ return ReadStream(pDict.release(), objnum, gennum);
+ }
+
+ if (word == ">>")
+ m_Pos = SavedPos;
+
+ return nullptr;
+}
+
+unsigned int CPDF_SyntaxParser::ReadEOLMarkers(FX_FILESIZE pos) {
+ unsigned char byte1 = 0;
+ unsigned char byte2 = 0;
+
+ GetCharAt(pos, byte1);
+ GetCharAt(pos + 1, byte2);
+
+ if (byte1 == '\r' && byte2 == '\n')
+ return 2;
+
+ if (byte1 == '\r' || byte1 == '\n')
+ return 1;
+
+ return 0;
+}
+
+CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict,
+ FX_DWORD objnum,
+ FX_DWORD gennum) {
+ CPDF_Object* pLenObj = pDict->GetElement("Length");
+ FX_FILESIZE len = -1;
+ CPDF_Reference* pLenObjRef = ToReference(pLenObj);
+
+ bool differingObjNum = !pLenObjRef || (pLenObjRef->GetObjList() &&
+ pLenObjRef->GetRefObjNum() != objnum);
+ if (pLenObj && differingObjNum)
+ len = pLenObj->GetInteger();
+
+ // Locate the start of stream.
+ ToNextLine();
+ FX_FILESIZE streamStartPos = m_Pos;
+
+ const CFX_ByteStringC kEndStreamStr("endstream");
+ const CFX_ByteStringC kEndObjStr("endobj");
+
+ CPDF_CryptoHandler* pCryptoHandler =
+ objnum == (FX_DWORD)m_MetadataObjnum ? nullptr : m_pCryptoHandler.get();
+ if (!pCryptoHandler) {
+ FX_BOOL bSearchForKeyword = TRUE;
+ if (len >= 0) {
+ pdfium::base::CheckedNumeric<FX_FILESIZE> pos = m_Pos;
+ pos += len;
+ if (pos.IsValid() && pos.ValueOrDie() < m_FileLen)
+ m_Pos = pos.ValueOrDie();
+
+ m_Pos += ReadEOLMarkers(m_Pos);
+ FXSYS_memset(m_WordBuffer, 0, kEndStreamStr.GetLength() + 1);
+ GetNextWordInternal(nullptr);
+ // Earlier version of PDF specification doesn't require EOL marker before
+ // 'endstream' keyword. If keyword 'endstream' follows the bytes in
+ // specified length, it signals the end of stream.
+ if (FXSYS_memcmp(m_WordBuffer, kEndStreamStr.GetPtr(),
+ kEndStreamStr.GetLength()) == 0) {
+ bSearchForKeyword = FALSE;
+ }
+ }
+
+ if (bSearchForKeyword) {
+ // If len is not available, len needs to be calculated
+ // by searching the keywords "endstream" or "endobj".
+ m_Pos = streamStartPos;
+ FX_FILESIZE endStreamOffset = 0;
+ while (endStreamOffset >= 0) {
+ endStreamOffset = FindTag(kEndStreamStr, 0);
+
+ // Can't find "endstream".
+ if (endStreamOffset < 0)
+ break;
+
+ // Stop searching when "endstream" is found.
+ if (IsWholeWord(m_Pos - kEndStreamStr.GetLength(), m_FileLen,
+ kEndStreamStr, TRUE)) {
+ endStreamOffset = m_Pos - streamStartPos - kEndStreamStr.GetLength();
+ break;
+ }
+ }
+
+ m_Pos = streamStartPos;
+ FX_FILESIZE endObjOffset = 0;
+ while (endObjOffset >= 0) {
+ endObjOffset = FindTag(kEndObjStr, 0);
+
+ // Can't find "endobj".
+ if (endObjOffset < 0)
+ break;
+
+ // Stop searching when "endobj" is found.
+ if (IsWholeWord(m_Pos - kEndObjStr.GetLength(), m_FileLen, kEndObjStr,
+ TRUE)) {
+ endObjOffset = m_Pos - streamStartPos - kEndObjStr.GetLength();
+ break;
+ }
+ }
+
+ // Can't find "endstream" or "endobj".
+ if (endStreamOffset < 0 && endObjOffset < 0) {
+ pDict->Release();
+ return nullptr;
+ }
+
+ if (endStreamOffset < 0 && endObjOffset >= 0) {
+ // Correct the position of end stream.
+ endStreamOffset = endObjOffset;
+ } else if (endStreamOffset >= 0 && endObjOffset < 0) {
+ // Correct the position of end obj.
+ endObjOffset = endStreamOffset;
+ } else if (endStreamOffset > endObjOffset) {
+ endStreamOffset = endObjOffset;
+ }
+
+ len = endStreamOffset;
+ int numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 2);
+ if (numMarkers == 2) {
+ len -= 2;
+ } else {
+ numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 1);
+ if (numMarkers == 1) {
+ len -= 1;
+ }
+ }
+
+ if (len < 0) {
+ pDict->Release();
+ return nullptr;
+ }
+ pDict->SetAtInteger("Length", len);
+ }
+ m_Pos = streamStartPos;
+ }
+
+ if (len < 0) {
+ pDict->Release();
+ return nullptr;
+ }
+
+ uint8_t* pData = nullptr;
+ if (len > 0) {
+ pData = FX_Alloc(uint8_t, len);
+ ReadBlock(pData, len);
+ if (pCryptoHandler) {
+ CFX_BinaryBuf dest_buf;
+ dest_buf.EstimateSize(pCryptoHandler->DecryptGetSize(len));
+
+ void* context = pCryptoHandler->DecryptStart(objnum, gennum);
+ pCryptoHandler->DecryptStream(context, pData, len, dest_buf);
+ pCryptoHandler->DecryptFinish(context, dest_buf);
+
+ FX_Free(pData);
+ pData = dest_buf.GetBuffer();
+ len = dest_buf.GetSize();
+ dest_buf.DetachBuffer();
+ }
+ }
+
+ CPDF_Stream* pStream = new CPDF_Stream(pData, len, pDict);
+ streamStartPos = m_Pos;
+ FXSYS_memset(m_WordBuffer, 0, kEndObjStr.GetLength() + 1);
+
+ GetNextWordInternal(nullptr);
+
+ int numMarkers = ReadEOLMarkers(m_Pos);
+ if (m_WordSize == kEndObjStr.GetLength() && numMarkers != 0 &&
+ FXSYS_memcmp(m_WordBuffer, kEndObjStr.GetPtr(), kEndObjStr.GetLength()) ==
+ 0) {
+ m_Pos = streamStartPos;
+ }
+ return pStream;
+}
+
+void CPDF_SyntaxParser::InitParser(IFX_FileRead* pFileAccess,
+ FX_DWORD HeaderOffset) {
+ FX_Free(m_pFileBuf);
+
+ m_pFileBuf = FX_Alloc(uint8_t, m_BufSize);
+ m_HeaderOffset = HeaderOffset;
+ m_FileLen = pFileAccess->GetSize();
+ m_Pos = 0;
+ m_pFileAccess = pFileAccess;
+ m_BufOffset = 0;
+ pFileAccess->ReadBlock(
+ m_pFileBuf, 0,
+ (size_t)((FX_FILESIZE)m_BufSize > m_FileLen ? m_FileLen : m_BufSize));
+}
+
+uint32_t CPDF_SyntaxParser::GetDirectNum() {
+ bool bIsNumber;
+ GetNextWordInternal(&bIsNumber);
+ if (!bIsNumber)
+ return 0;
+
+ m_WordBuffer[m_WordSize] = 0;
+ return FXSYS_atoui(reinterpret_cast<const FX_CHAR*>(m_WordBuffer));
+}
+
+bool CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos,
+ FX_FILESIZE limit,
+ const CFX_ByteStringC& tag,
+ FX_BOOL checkKeyword) {
+ const FX_DWORD taglen = tag.GetLength();
+
+ bool bCheckLeft = !PDFCharIsDelimiter(tag[0]) && !PDFCharIsWhitespace(tag[0]);
+ bool bCheckRight = !PDFCharIsDelimiter(tag[taglen - 1]) &&
+ !PDFCharIsWhitespace(tag[taglen - 1]);
+
+ uint8_t ch;
+ if (bCheckRight && startpos + (int32_t)taglen <= limit &&
+ GetCharAt(startpos + (int32_t)taglen, ch)) {
+ if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) ||
+ (checkKeyword && PDFCharIsDelimiter(ch))) {
+ return false;
+ }
+ }
+
+ if (bCheckLeft && startpos > 0 && GetCharAt(startpos - 1, ch)) {
+ if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) ||
+ (checkKeyword && PDFCharIsDelimiter(ch))) {
+ return false;
+ }
+ }
+ return true;
+}
+
+// TODO(dsinclair): Split into a SearchWordForward and SearchWordBackwards
+// and drop the bool.
+FX_BOOL CPDF_SyntaxParser::SearchWord(const CFX_ByteStringC& tag,
+ FX_BOOL bWholeWord,
+ FX_BOOL bForward,
+ FX_FILESIZE limit) {
+ int32_t taglen = tag.GetLength();
+ if (taglen == 0)
+ return FALSE;
+
+ FX_FILESIZE pos = m_Pos;
+ int32_t offset = 0;
+ if (!bForward)
+ offset = taglen - 1;
+
+ const uint8_t* tag_data = tag.GetPtr();
+ uint8_t byte;
+ while (1) {
+ if (bForward) {
+ if (limit && pos >= m_Pos + limit)
+ return FALSE;
+
+ if (!GetCharAt(pos, byte))
+ return FALSE;
+
+ } else {
+ if (limit && pos <= m_Pos - limit)
+ return FALSE;
+
+ if (!GetCharAtBackward(pos, byte))
+ return FALSE;
+ }
+
+ if (byte == tag_data[offset]) {
+ if (bForward) {
+ offset++;
+ if (offset < taglen) {
+ pos++;
+ continue;
+ }
+ } else {
+ offset--;
+ if (offset >= 0) {
+ pos--;
+ continue;
+ }
+ }
+
+ FX_FILESIZE startpos = bForward ? pos - taglen + 1 : pos;
+ if (!bWholeWord || IsWholeWord(startpos, limit, tag, FALSE)) {
+ m_Pos = startpos;
+ return TRUE;
+ }
+ }
+
+ if (bForward) {
+ offset = byte == tag_data[0] ? 1 : 0;
+ pos++;
+ } else {
+ offset = byte == tag_data[taglen - 1] ? taglen - 2 : taglen - 1;
+ pos--;
+ }
+
+ if (pos < 0)
+ return FALSE;
+ }
+
+ return FALSE;
+}
+
+int32_t CPDF_SyntaxParser::SearchMultiWord(const CFX_ByteStringC& tags,
+ FX_BOOL bWholeWord,
+ FX_FILESIZE limit) {
+ int32_t ntags = 1;
+ for (int i = 0; i < tags.GetLength(); ++i) {
+ if (tags[i] == 0)
+ ++ntags;
+ }
+
+ std::vector<SearchTagRecord> patterns(ntags);
+ FX_DWORD start = 0;
+ FX_DWORD itag = 0;
+ FX_DWORD max_len = 0;
+ for (int i = 0; i <= tags.GetLength(); ++i) {
+ if (tags[i] == 0) {
+ FX_DWORD len = i - start;
+ max_len = std::max(len, max_len);
+ patterns[itag].m_pTag = tags.GetCStr() + start;
+ patterns[itag].m_Len = len;
+ patterns[itag].m_Offset = 0;
+ start = i + 1;
+ ++itag;
+ }
+ }
+
+ const FX_FILESIZE pos_limit = m_Pos + limit;
+ for (FX_FILESIZE pos = m_Pos; !limit || pos < pos_limit; ++pos) {
+ uint8_t byte;
+ if (!GetCharAt(pos, byte))
+ break;
+
+ for (int i = 0; i < ntags; ++i) {
+ SearchTagRecord& pat = patterns[i];
+ if (pat.m_pTag[pat.m_Offset] != byte) {
+ pat.m_Offset = (pat.m_pTag[0] == byte) ? 1 : 0;
+ continue;
+ }
+
+ ++pat.m_Offset;
+ if (pat.m_Offset != pat.m_Len)
+ continue;
+
+ if (!bWholeWord ||
+ IsWholeWord(pos - pat.m_Len, limit,
+ CFX_ByteStringC(pat.m_pTag, pat.m_Len), FALSE)) {
+ return i;
+ }
+
+ pat.m_Offset = (pat.m_pTag[0] == byte) ? 1 : 0;
+ }
+ }
+ return -1;
+}
+
+FX_FILESIZE CPDF_SyntaxParser::FindTag(const CFX_ByteStringC& tag,
+ FX_FILESIZE limit) {
+ int32_t taglen = tag.GetLength();
+ int32_t match = 0;
+ limit += m_Pos;
+ FX_FILESIZE startpos = m_Pos;
+
+ while (1) {
+ uint8_t ch;
+ if (!GetNextChar(ch))
+ return -1;
+
+ if (ch == tag[match]) {
+ match++;
+ if (match == taglen)
+ return m_Pos - startpos - taglen;
+ } else {
+ match = ch == tag[0] ? 1 : 0;
+ }
+
+ if (limit && m_Pos == limit)
+ return -1;
+ }
+ return -1;
+}
+
+void CPDF_SyntaxParser::SetEncrypt(
+ std::unique_ptr<CPDF_CryptoHandler> pCryptoHandler) {
+ m_pCryptoHandler = std::move(pCryptoHandler);
+}
diff --git a/core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.h b/core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.h
new file mode 100644
index 0000000000..44c98eafd8
--- /dev/null
+++ b/core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.h
@@ -0,0 +1,96 @@
+// Copyright 2016 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
+
+#ifndef CORE_SRC_FPDFAPI_FPDF_PARSER_CPDF_SYNTAX_PARSER_H_
+#define CORE_SRC_FPDFAPI_FPDF_PARSER_CPDF_SYNTAX_PARSER_H_
+
+#include <memory>
+
+#include "core/include/fxcrt/fx_basic.h"
+
+class CPDF_CryptoHandler;
+class CPDF_IndirectObjectHolder;
+class CPDF_Object;
+class CPDF_Dictionary;
+class CPDF_Stream;
+class IFX_FileRead;
+
+class CPDF_SyntaxParser {
+ public:
+ CPDF_SyntaxParser();
+ ~CPDF_SyntaxParser();
+
+ void InitParser(IFX_FileRead* pFileAccess, FX_DWORD HeaderOffset);
+
+ FX_FILESIZE SavePos() const { return m_Pos; }
+ void RestorePos(FX_FILESIZE pos) { m_Pos = pos; }
+
+ CPDF_Object* GetObject(CPDF_IndirectObjectHolder* pObjList,
+ FX_DWORD objnum,
+ FX_DWORD gennum,
+ FX_BOOL bDecrypt);
+ CPDF_Object* GetObjectByStrict(CPDF_IndirectObjectHolder* pObjList,
+ FX_DWORD objnum,
+ FX_DWORD gennum);
+ CFX_ByteString GetKeyword();
+
+ void ToNextLine();
+ void ToNextWord();
+
+ FX_BOOL SearchWord(const CFX_ByteStringC& word,
+ FX_BOOL bWholeWord,
+ FX_BOOL bForward,
+ FX_FILESIZE limit);
+ int SearchMultiWord(const CFX_ByteStringC& words,
+ FX_BOOL bWholeWord,
+ FX_FILESIZE limit);
+ FX_FILESIZE FindTag(const CFX_ByteStringC& tag, FX_FILESIZE limit);
+
+ void SetEncrypt(std::unique_ptr<CPDF_CryptoHandler> pCryptoHandler);
+
+ FX_BOOL ReadBlock(uint8_t* pBuf, FX_DWORD size);
+ FX_BOOL GetCharAt(FX_FILESIZE pos, uint8_t& ch);
+ CFX_ByteString GetNextWord(bool* bIsNumber);
+
+ private:
+ friend class CPDF_Parser;
+ friend class CPDF_DataAvail;
+ friend class fpdf_parser_parser_ReadHexString_Test;
+
+ static const int kParserMaxRecursionDepth = 64;
+ static int s_CurrentRecursionDepth;
+
+ uint32_t GetDirectNum();
+
+ FX_BOOL GetNextChar(uint8_t& ch);
+ FX_BOOL GetCharAtBackward(FX_FILESIZE pos, uint8_t& ch);
+ void GetNextWordInternal(bool* bIsNumber);
+ bool IsWholeWord(FX_FILESIZE startpos,
+ FX_FILESIZE limit,
+ const CFX_ByteStringC& tag,
+ FX_BOOL checkKeyword);
+
+ CFX_ByteString ReadString();
+ CFX_ByteString ReadHexString();
+ unsigned int ReadEOLMarkers(FX_FILESIZE pos);
+ CPDF_Stream* ReadStream(CPDF_Dictionary* pDict,
+ FX_DWORD objnum,
+ FX_DWORD gennum);
+
+ FX_FILESIZE m_Pos;
+ int m_MetadataObjnum;
+ IFX_FileRead* m_pFileAccess;
+ FX_DWORD m_HeaderOffset;
+ FX_FILESIZE m_FileLen;
+ uint8_t* m_pFileBuf;
+ FX_DWORD m_BufSize;
+ FX_FILESIZE m_BufOffset;
+ std::unique_ptr<CPDF_CryptoHandler> m_pCryptoHandler;
+ uint8_t m_WordBuffer[257];
+ FX_DWORD m_WordSize;
+};
+
+#endif // CORE_SRC_FPDFAPI_FPDF_PARSER_CPDF_SYNTAX_PARSER_H_
diff --git a/core/src/fpdfapi/fpdf_parser/fpdf_parser_fdf.cpp b/core/src/fpdfapi/fpdf_parser/fpdf_parser_fdf.cpp
index fbe6a8fb3a..c87fd63a28 100644
--- a/core/src/fpdfapi/fpdf_parser/fpdf_parser_fdf.cpp
+++ b/core/src/fpdfapi/fpdf_parser/fpdf_parser_fdf.cpp
@@ -7,6 +7,7 @@
#include "core/include/fpdfapi/fpdf_parser.h"
#include "core/include/fpdfapi/fpdf_serial.h"
+#include "core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.h"
CFDF_Document::CFDF_Document() : CPDF_IndirectObjectHolder(NULL) {
m_pRootDict = NULL;
diff --git a/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp b/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp
index e18775e54b..a00d1bc98d 100644
--- a/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp
+++ b/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp
@@ -17,6 +17,7 @@
#include "core/include/fxcrt/fx_ext.h"
#include "core/include/fxcrt/fx_safe_types.h"
#include "core/src/fpdfapi/fpdf_page/pageint.h"
+#include "core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.h"
#include "core/src/fpdfapi/fpdf_parser/parser_int.h"
#include "third_party/base/stl_util.h"
@@ -30,12 +31,6 @@ const int32_t kMaxXRefSize = 1048576;
// are higher, but this may be large enough in practice.
const FX_DWORD kMaxObjectNumber = 1048576;
-struct SearchTagRecord {
- const char* m_pTag;
- FX_DWORD m_Len;
- FX_DWORD m_Offset;
-};
-
int32_t GetHeaderOffset(IFX_FileRead* pFile) {
// TODO(dsinclair): This is a complicated way of saying %PDF, simplify?
const FX_DWORD tag = FXDWORD_FROM_LSBFIRST(0x46445025);
@@ -98,7 +93,9 @@ CPDF_Parser::CPDF_Parser()
m_pEncryptDict(nullptr),
m_pLinearized(nullptr),
m_dwFirstPageNo(0),
- m_dwXrefStartObjNum(0) {}
+ m_dwXrefStartObjNum(0) {
+ m_pSyntax.reset(new CPDF_SyntaxParser);
+}
CPDF_Parser::~CPDF_Parser() {
CloseParser();
@@ -138,6 +135,14 @@ void CPDF_Parser::SetEncryptDictionary(CPDF_Dictionary* pDict) {
m_pEncryptDict = pDict;
}
+CPDF_CryptoHandler* CPDF_Parser::GetCryptoHandler() {
+ return m_pSyntax->m_pCryptoHandler.get();
+}
+
+IFX_FileRead* CPDF_Parser::GetFileAccess() const {
+ return m_pSyntax->m_pFileAccess;
+}
+
void CPDF_Parser::ShrinkObjectMap(FX_DWORD objnum) {
if (objnum == 0) {
m_ObjectInfo.clear();
@@ -166,9 +171,9 @@ void CPDF_Parser::CloseParser() {
ReleaseEncryptHandler();
SetEncryptDictionary(nullptr);
- if (m_bOwnFileRead && m_Syntax.m_pFileAccess) {
- m_Syntax.m_pFileAccess->Release();
- m_Syntax.m_pFileAccess = nullptr;
+ if (m_bOwnFileRead && m_pSyntax->m_pFileAccess) {
+ m_pSyntax->m_pFileAccess->Release();
+ m_pSyntax->m_pFileAccess = nullptr;
}
m_ObjectStreamMap.clear();
@@ -202,32 +207,32 @@ CPDF_Parser::Error CPDF_Parser::StartParse(IFX_FileRead* pFileAccess) {
pFileAccess->Release();
return FORMAT_ERROR;
}
- m_Syntax.InitParser(pFileAccess, offset);
+ m_pSyntax->InitParser(pFileAccess, offset);
uint8_t ch;
- if (!m_Syntax.GetCharAt(5, ch))
+ if (!m_pSyntax->GetCharAt(5, ch))
return FORMAT_ERROR;
if (std::isdigit(ch))
m_FileVersion = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)) * 10;
- if (!m_Syntax.GetCharAt(7, ch))
+ if (!m_pSyntax->GetCharAt(7, ch))
return FORMAT_ERROR;
if (std::isdigit(ch))
m_FileVersion += FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
- if (m_Syntax.m_FileLen < m_Syntax.m_HeaderOffset + 9)
+ if (m_pSyntax->m_FileLen < m_pSyntax->m_HeaderOffset + 9)
return FORMAT_ERROR;
- m_Syntax.RestorePos(m_Syntax.m_FileLen - m_Syntax.m_HeaderOffset - 9);
+ m_pSyntax->RestorePos(m_pSyntax->m_FileLen - m_pSyntax->m_HeaderOffset - 9);
m_pDocument = new CPDF_Document(this);
FX_BOOL bXRefRebuilt = FALSE;
- if (m_Syntax.SearchWord("startxref", TRUE, FALSE, 4096)) {
- m_SortedOffset.insert(m_Syntax.SavePos());
- m_Syntax.GetKeyword();
+ if (m_pSyntax->SearchWord("startxref", TRUE, FALSE, 4096)) {
+ m_SortedOffset.insert(m_pSyntax->SavePos());
+ m_pSyntax->GetKeyword();
bool bNumber;
- CFX_ByteString xrefpos_str = m_Syntax.GetNextWord(&bNumber);
+ CFX_ByteString xrefpos_str = m_pSyntax->GetNextWord(&bNumber);
if (!bNumber)
return FORMAT_ERROR;
@@ -280,7 +285,7 @@ CPDF_Parser::Error CPDF_Parser::StartParse(IFX_FileRead* pFileAccess) {
CPDF_Reference* pMetadata =
ToReference(m_pDocument->GetRoot()->GetElement("Metadata"));
if (pMetadata)
- m_Syntax.m_MetadataObjnum = pMetadata->GetRefObjNum();
+ m_pSyntax->m_MetadataObjnum = pMetadata->GetRefObjNum();
}
return SUCCESS;
}
@@ -321,13 +326,13 @@ CPDF_Parser::Error CPDF_Parser::SetEncryptHandler() {
m_pSecurityHandler->CreateCryptoHandler());
if (!pCryptoHandler->Init(m_pEncryptDict, m_pSecurityHandler.get()))
return HANDLER_ERROR;
- m_Syntax.SetEncrypt(std::move(pCryptoHandler));
+ m_pSyntax->SetEncrypt(std::move(pCryptoHandler));
}
return SUCCESS;
}
void CPDF_Parser::ReleaseEncryptHandler() {
- m_Syntax.m_pCryptoHandler.reset();
+ m_pSyntax->m_pCryptoHandler.reset();
m_pSecurityHandler.reset();
}
@@ -454,14 +459,14 @@ FX_BOOL CPDF_Parser::LoadLinearizedAllCrossRefV4(FX_FILESIZE xrefpos,
FX_BOOL CPDF_Parser::LoadLinearizedCrossRefV4(FX_FILESIZE pos,
FX_DWORD dwObjCount) {
- FX_FILESIZE dwStartPos = pos - m_Syntax.m_HeaderOffset;
+ FX_FILESIZE dwStartPos = pos - m_pSyntax->m_HeaderOffset;
- m_Syntax.RestorePos(dwStartPos);
+ m_pSyntax->RestorePos(dwStartPos);
m_SortedOffset.insert(pos);
FX_DWORD start_objnum = 0;
FX_DWORD count = dwObjCount;
- FX_FILESIZE SavedPos = m_Syntax.SavePos();
+ FX_FILESIZE SavedPos = m_pSyntax->SavePos();
const int32_t recordsize = 20;
std::vector<char> buf(1024 * recordsize + 1);
@@ -471,11 +476,11 @@ FX_BOOL CPDF_Parser::LoadLinearizedCrossRefV4(FX_FILESIZE pos,
for (int32_t block = 0; block < nBlocks; block++) {
int32_t block_size = block == nBlocks - 1 ? count % 1024 : 1024;
FX_DWORD dwReadSize = block_size * recordsize;
- if ((FX_FILESIZE)(dwStartPos + dwReadSize) > m_Syntax.m_FileLen)
+ if ((FX_FILESIZE)(dwStartPos + dwReadSize) > m_pSyntax->m_FileLen)
return FALSE;
- if (!m_Syntax.ReadBlock(reinterpret_cast<uint8_t*>(buf.data()),
- dwReadSize)) {
+ if (!m_pSyntax->ReadBlock(reinterpret_cast<uint8_t*>(buf.data()),
+ dwReadSize)) {
return FALSE;
}
@@ -500,22 +505,22 @@ FX_BOOL CPDF_Parser::LoadLinearizedCrossRefV4(FX_FILESIZE pos,
m_bVersionUpdated = TRUE;
m_ObjectInfo[objnum].gennum = version;
- if (m_ObjectInfo[objnum].pos < m_Syntax.m_FileLen)
+ if (m_ObjectInfo[objnum].pos < m_pSyntax->m_FileLen)
m_SortedOffset.insert(m_ObjectInfo[objnum].pos);
m_ObjectInfo[objnum].type = 1;
}
}
}
- m_Syntax.RestorePos(SavedPos + count * recordsize);
+ m_pSyntax->RestorePos(SavedPos + count * recordsize);
return TRUE;
}
bool CPDF_Parser::LoadCrossRefV4(FX_FILESIZE pos,
FX_FILESIZE streampos,
FX_BOOL bSkip) {
- m_Syntax.RestorePos(pos);
- if (m_Syntax.GetKeyword() != "xref")
+ m_pSyntax->RestorePos(pos);
+ if (m_pSyntax->GetKeyword() != "xref")
return false;
m_SortedOffset.insert(pos);
@@ -523,14 +528,14 @@ bool CPDF_Parser::LoadCrossRefV4(FX_FILESIZE pos,
m_SortedOffset.insert(streampos);
while (1) {
- FX_FILESIZE SavedPos = m_Syntax.SavePos();
+ FX_FILESIZE SavedPos = m_pSyntax->SavePos();
bool bIsNumber;
- CFX_ByteString word = m_Syntax.GetNextWord(&bIsNumber);
+ CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber);
if (word.IsEmpty())
return false;
if (!bIsNumber) {
- m_Syntax.RestorePos(SavedPos);
+ m_pSyntax->RestorePos(SavedPos);
break;
}
@@ -538,9 +543,9 @@ bool CPDF_Parser::LoadCrossRefV4(FX_FILESIZE pos,
if (start_objnum >= kMaxObjectNumber)
return false;
- FX_DWORD count = m_Syntax.GetDirectNum();
- m_Syntax.ToNextWord();
- SavedPos = m_Syntax.SavePos();
+ FX_DWORD count = m_pSyntax->GetDirectNum();
+ m_pSyntax->ToNextWord();
+ SavedPos = m_pSyntax->SavePos();
const int32_t recordsize = 20;
m_dwXrefStartObjNum = start_objnum;
@@ -551,8 +556,8 @@ bool CPDF_Parser::LoadCrossRefV4(FX_FILESIZE pos,
int32_t nBlocks = count / 1024 + 1;
for (int32_t block = 0; block < nBlocks; block++) {
int32_t block_size = block == nBlocks - 1 ? count % 1024 : 1024;
- m_Syntax.ReadBlock(reinterpret_cast<uint8_t*>(buf.data()),
- block_size * recordsize);
+ m_pSyntax->ReadBlock(reinterpret_cast<uint8_t*>(buf.data()),
+ block_size * recordsize);
for (int32_t i = 0; i < block_size; i++) {
FX_DWORD objnum = start_objnum + block * 1024 + i;
@@ -575,7 +580,7 @@ bool CPDF_Parser::LoadCrossRefV4(FX_FILESIZE pos,
m_bVersionUpdated = TRUE;
m_ObjectInfo[objnum].gennum = version;
- if (m_ObjectInfo[objnum].pos < m_Syntax.m_FileLen)
+ if (m_ObjectInfo[objnum].pos < m_pSyntax->m_FileLen)
m_SortedOffset.insert(m_ObjectInfo[objnum].pos);
m_ObjectInfo[objnum].type = 1;
@@ -583,7 +588,7 @@ bool CPDF_Parser::LoadCrossRefV4(FX_FILESIZE pos,
}
}
}
- m_Syntax.RestorePos(SavedPos + count * recordsize);
+ m_pSyntax->RestorePos(SavedPos + count * recordsize);
}
return !streampos || LoadCrossRefV5(&streampos, FALSE);
}
@@ -625,18 +630,19 @@ FX_BOOL CPDF_Parser::RebuildCrossRef() {
const FX_DWORD kBufferSize = 4096;
std::vector<uint8_t> buffer(kBufferSize);
- FX_FILESIZE pos = m_Syntax.m_HeaderOffset;
+ FX_FILESIZE pos = m_pSyntax->m_HeaderOffset;
FX_FILESIZE start_pos = 0;
FX_FILESIZE start_pos1 = 0;
FX_FILESIZE last_obj = -1;
FX_FILESIZE last_xref = -1;
FX_FILESIZE last_trailer = -1;
- while (pos < m_Syntax.m_FileLen) {
+ while (pos < m_pSyntax->m_FileLen) {
const FX_FILESIZE saved_pos = pos;
bool bOverFlow = false;
- FX_DWORD size = std::min((FX_DWORD)(m_Syntax.m_FileLen - pos), kBufferSize);
- if (!m_Syntax.m_pFileAccess->ReadBlock(buffer.data(), pos, size))
+ FX_DWORD size =
+ std::min((FX_DWORD)(m_pSyntax->m_FileLen - pos), kBufferSize);
+ if (!m_pSyntax->m_pFileAccess->ReadBlock(buffer.data(), pos, size))
break;
for (FX_DWORD i = 0; i < size; i++) {
@@ -760,7 +766,7 @@ FX_BOOL CPDF_Parser::RebuildCrossRef() {
break;
case 3:
if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) {
- FX_FILESIZE obj_pos = start_pos - m_Syntax.m_HeaderOffset;
+ FX_FILESIZE obj_pos = start_pos - m_pSyntax->m_HeaderOffset;
m_SortedOffset.insert(obj_pos);
last_obj = start_pos;
FX_FILESIZE obj_end = 0;
@@ -783,8 +789,8 @@ FX_BOOL CPDF_Parser::RebuildCrossRef() {
}
FX_FILESIZE offset = 0;
- m_Syntax.RestorePos(obj_pos);
- offset = m_Syntax.FindTag("obj", 0);
+ m_pSyntax->RestorePos(obj_pos);
+ offset = m_pSyntax->FindTag("obj", 0);
if (offset == -1)
offset = 0;
else
@@ -792,7 +798,7 @@ FX_BOOL CPDF_Parser::RebuildCrossRef() {
FX_FILESIZE nLen = obj_end - obj_pos - offset;
if ((FX_DWORD)nLen > size - i) {
- pos = obj_end + m_Syntax.m_HeaderOffset;
+ pos = obj_end + m_pSyntax->m_HeaderOffset;
bOverFlow = true;
} else {
i += (FX_DWORD)nLen;
@@ -826,9 +832,9 @@ FX_BOOL CPDF_Parser::RebuildCrossRef() {
if (inside_index == 7) {
if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) {
last_trailer = pos + i - 7;
- m_Syntax.RestorePos(pos + i - m_Syntax.m_HeaderOffset);
+ m_pSyntax->RestorePos(pos + i - m_pSyntax->m_HeaderOffset);
- CPDF_Object* pObj = m_Syntax.GetObject(m_pDocument, 0, 0, true);
+ CPDF_Object* pObj = m_pSyntax->GetObject(m_pDocument, 0, 0, true);
if (pObj) {
if (!pObj->IsDictionary() && !pObj->AsStream()) {
pObj->Release();
@@ -866,16 +872,16 @@ FX_BOOL CPDF_Parser::RebuildCrossRef() {
m_pTrailer = pTrailer;
}
- FX_FILESIZE dwSavePos = m_Syntax.SavePos();
- CFX_ByteString strWord = m_Syntax.GetKeyword();
+ FX_FILESIZE dwSavePos = m_pSyntax->SavePos();
+ CFX_ByteString strWord = m_pSyntax->GetKeyword();
if (!strWord.Compare("startxref")) {
bool bNumber;
CFX_ByteString bsOffset =
- m_Syntax.GetNextWord(&bNumber);
+ m_pSyntax->GetNextWord(&bNumber);
if (bNumber)
m_LastXRefOffset = FXSYS_atoi(bsOffset);
}
- m_Syntax.RestorePos(dwSavePos);
+ m_pSyntax->RestorePos(dwSavePos);
}
} else {
pObj->Release();
@@ -967,9 +973,9 @@ FX_BOOL CPDF_Parser::RebuildCrossRef() {
if (last_xref != -1 && last_xref > last_obj)
last_trailer = last_xref;
else if (last_trailer == -1 || last_xref < last_obj)
- last_trailer = m_Syntax.m_FileLen;
+ last_trailer = m_pSyntax->m_FileLen;
- m_SortedOffset.insert(last_trailer - m_Syntax.m_HeaderOffset);
+ m_SortedOffset.insert(last_trailer - m_pSyntax->m_HeaderOffset);
return m_pTrailer && !m_ObjectInfo.empty();
}
@@ -1166,13 +1172,13 @@ FX_BOOL CPDF_Parser::IsFormStream(FX_DWORD objnum, FX_BOOL& bForm) {
return FALSE;
FX_FILESIZE size = *it - pos;
- FX_FILESIZE SavedPos = m_Syntax.SavePos();
- m_Syntax.RestorePos(pos);
+ FX_FILESIZE SavedPos = m_pSyntax->SavePos();
+ m_pSyntax->RestorePos(pos);
const char kFormStream[] = "/Form\0stream";
const CFX_ByteStringC kFormStreamStr(kFormStream, sizeof(kFormStream) - 1);
- bForm = m_Syntax.SearchMultiWord(kFormStreamStr, TRUE, size) == 0;
- m_Syntax.RestorePos(SavedPos);
+ bForm = m_pSyntax->SearchMultiWord(kFormStreamStr, TRUE, size) == 0;
+ m_pSyntax->RestorePos(SavedPos);
return TRUE;
}
@@ -1311,114 +1317,115 @@ void CPDF_Parser::GetIndirectBinary(FX_DWORD objnum,
if (pos == 0)
return;
- FX_FILESIZE SavedPos = m_Syntax.SavePos();
- m_Syntax.RestorePos(pos);
+ FX_FILESIZE SavedPos = m_pSyntax->SavePos();
+ m_pSyntax->RestorePos(pos);
bool bIsNumber;
- CFX_ByteString word = m_Syntax.GetNextWord(&bIsNumber);
+ CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber);
if (!bIsNumber) {
- m_Syntax.RestorePos(SavedPos);
+ m_pSyntax->RestorePos(SavedPos);
return;
}
FX_DWORD parser_objnum = FXSYS_atoui(word);
if (parser_objnum && parser_objnum != objnum) {
- m_Syntax.RestorePos(SavedPos);
+ m_pSyntax->RestorePos(SavedPos);
return;
}
- word = m_Syntax.GetNextWord(&bIsNumber);
+ word = m_pSyntax->GetNextWord(&bIsNumber);
if (!bIsNumber) {
- m_Syntax.RestorePos(SavedPos);
+ m_pSyntax->RestorePos(SavedPos);
return;
}
- if (m_Syntax.GetKeyword() != "obj") {
- m_Syntax.RestorePos(SavedPos);
+ if (m_pSyntax->GetKeyword() != "obj") {
+ m_pSyntax->RestorePos(SavedPos);
return;
}
auto it = m_SortedOffset.find(pos);
if (it == m_SortedOffset.end() || ++it == m_SortedOffset.end()) {
- m_Syntax.RestorePos(SavedPos);
+ m_pSyntax->RestorePos(SavedPos);
return;
}
FX_FILESIZE nextoff = *it;
FX_BOOL bNextOffValid = FALSE;
if (nextoff != pos) {
- m_Syntax.RestorePos(nextoff);
- word = m_Syntax.GetNextWord(&bIsNumber);
+ m_pSyntax->RestorePos(nextoff);
+ word = m_pSyntax->GetNextWord(&bIsNumber);
if (word == "xref") {
bNextOffValid = TRUE;
} else if (bIsNumber) {
- word = m_Syntax.GetNextWord(&bIsNumber);
- if (bIsNumber && m_Syntax.GetKeyword() == "obj") {
+ word = m_pSyntax->GetNextWord(&bIsNumber);
+ if (bIsNumber && m_pSyntax->GetKeyword() == "obj") {
bNextOffValid = TRUE;
}
}
}
if (!bNextOffValid) {
- m_Syntax.RestorePos(pos);
+ m_pSyntax->RestorePos(pos);
while (1) {
- if (m_Syntax.GetKeyword() == "endobj")
+ if (m_pSyntax->GetKeyword() == "endobj")
break;
- if (m_Syntax.SavePos() == m_Syntax.m_FileLen)
+ if (m_pSyntax->SavePos() == m_pSyntax->m_FileLen)
break;
}
- nextoff = m_Syntax.SavePos();
+ nextoff = m_pSyntax->SavePos();
}
size = (FX_DWORD)(nextoff - pos);
pBuffer = FX_Alloc(uint8_t, size);
- m_Syntax.RestorePos(pos);
- m_Syntax.ReadBlock(pBuffer, size);
- m_Syntax.RestorePos(SavedPos);
+ m_pSyntax->RestorePos(pos);
+ m_pSyntax->ReadBlock(pBuffer, size);
+ m_pSyntax->RestorePos(SavedPos);
}
CPDF_Object* CPDF_Parser::ParseIndirectObjectAt(
CPDF_IndirectObjectHolder* pObjList,
FX_FILESIZE pos,
FX_DWORD objnum) {
- FX_FILESIZE SavedPos = m_Syntax.SavePos();
- m_Syntax.RestorePos(pos);
+ FX_FILESIZE SavedPos = m_pSyntax->SavePos();
+ m_pSyntax->RestorePos(pos);
bool bIsNumber;
- CFX_ByteString word = m_Syntax.GetNextWord(&bIsNumber);
+ CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber);
if (!bIsNumber) {
- m_Syntax.RestorePos(SavedPos);
+ m_pSyntax->RestorePos(SavedPos);
return nullptr;
}
- FX_FILESIZE objOffset = m_Syntax.SavePos();
+ FX_FILESIZE objOffset = m_pSyntax->SavePos();
objOffset -= word.GetLength();
FX_DWORD parser_objnum = FXSYS_atoui(word);
if (objnum && parser_objnum != objnum) {
- m_Syntax.RestorePos(SavedPos);
+ m_pSyntax->RestorePos(SavedPos);
return nullptr;
}
- word = m_Syntax.GetNextWord(&bIsNumber);
+ word = m_pSyntax->GetNextWord(&bIsNumber);
if (!bIsNumber) {
- m_Syntax.RestorePos(SavedPos);
+ m_pSyntax->RestorePos(SavedPos);
return nullptr;
}
FX_DWORD parser_gennum = FXSYS_atoui(word);
- if (m_Syntax.GetKeyword() != "obj") {
- m_Syntax.RestorePos(SavedPos);
+ if (m_pSyntax->GetKeyword() != "obj") {
+ m_pSyntax->RestorePos(SavedPos);
return nullptr;
}
- CPDF_Object* pObj = m_Syntax.GetObject(pObjList, objnum, parser_gennum, true);
- m_Syntax.SavePos();
+ CPDF_Object* pObj =
+ m_pSyntax->GetObject(pObjList, objnum, parser_gennum, true);
+ m_pSyntax->SavePos();
- CFX_ByteString bsWord = m_Syntax.GetKeyword();
+ CFX_ByteString bsWord = m_pSyntax->GetKeyword();
if (bsWord == "endobj")
- m_Syntax.SavePos();
+ m_pSyntax->SavePos();
- m_Syntax.RestorePos(SavedPos);
+ m_pSyntax->RestorePos(SavedPos);
if (pObj) {
if (!objnum)
pObj->m_ObjNum = parser_objnum;
@@ -1432,48 +1439,48 @@ CPDF_Object* CPDF_Parser::ParseIndirectObjectAtByStrict(
FX_FILESIZE pos,
FX_DWORD objnum,
FX_FILESIZE* pResultPos) {
- FX_FILESIZE SavedPos = m_Syntax.SavePos();
- m_Syntax.RestorePos(pos);
+ FX_FILESIZE SavedPos = m_pSyntax->SavePos();
+ m_pSyntax->RestorePos(pos);
bool bIsNumber;
- CFX_ByteString word = m_Syntax.GetNextWord(&bIsNumber);
+ CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber);
if (!bIsNumber) {
- m_Syntax.RestorePos(SavedPos);
+ m_pSyntax->RestorePos(SavedPos);
return nullptr;
}
FX_DWORD parser_objnum = FXSYS_atoui(word);
if (objnum && parser_objnum != objnum) {
- m_Syntax.RestorePos(SavedPos);
+ m_pSyntax->RestorePos(SavedPos);
return nullptr;
}
- word = m_Syntax.GetNextWord(&bIsNumber);
+ word = m_pSyntax->GetNextWord(&bIsNumber);
if (!bIsNumber) {
- m_Syntax.RestorePos(SavedPos);
+ m_pSyntax->RestorePos(SavedPos);
return nullptr;
}
FX_DWORD gennum = FXSYS_atoui(word);
- if (m_Syntax.GetKeyword() != "obj") {
- m_Syntax.RestorePos(SavedPos);
+ if (m_pSyntax->GetKeyword() != "obj") {
+ m_pSyntax->RestorePos(SavedPos);
return nullptr;
}
- CPDF_Object* pObj = m_Syntax.GetObjectByStrict(pObjList, objnum, gennum);
+ CPDF_Object* pObj = m_pSyntax->GetObjectByStrict(pObjList, objnum, gennum);
if (pResultPos)
- *pResultPos = m_Syntax.m_Pos;
+ *pResultPos = m_pSyntax->m_Pos;
- m_Syntax.RestorePos(SavedPos);
+ m_pSyntax->RestorePos(SavedPos);
return pObj;
}
CPDF_Dictionary* CPDF_Parser::LoadTrailerV4() {
- if (m_Syntax.GetKeyword() != "trailer")
+ if (m_pSyntax->GetKeyword() != "trailer")
return nullptr;
std::unique_ptr<CPDF_Object, ReleaseDeleter<CPDF_Object>> pObj(
- m_Syntax.GetObject(m_pDocument, 0, 0, true));
+ m_pSyntax->GetObject(m_pDocument, 0, 0, true));
if (!ToDictionary(pObj.get()))
return nullptr;
return pObj.release()->AsDictionary();
@@ -1495,33 +1502,33 @@ FX_DWORD CPDF_Parser::GetPermissions(FX_BOOL bCheckRevision) {
FX_BOOL CPDF_Parser::IsLinearizedFile(IFX_FileRead* pFileAccess,
FX_DWORD offset) {
- m_Syntax.InitParser(pFileAccess, offset);
- m_Syntax.RestorePos(m_Syntax.m_HeaderOffset + 9);
+ m_pSyntax->InitParser(pFileAccess, offset);
+ m_pSyntax->RestorePos(m_pSyntax->m_HeaderOffset + 9);
- FX_FILESIZE SavedPos = m_Syntax.SavePos();
+ FX_FILESIZE SavedPos = m_pSyntax->SavePos();
bool bIsNumber;
- CFX_ByteString word = m_Syntax.GetNextWord(&bIsNumber);
+ CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber);
if (!bIsNumber)
return FALSE;
FX_DWORD objnum = FXSYS_atoui(word);
- word = m_Syntax.GetNextWord(&bIsNumber);
+ word = m_pSyntax->GetNextWord(&bIsNumber);
if (!bIsNumber)
return FALSE;
FX_DWORD gennum = FXSYS_atoui(word);
- if (m_Syntax.GetKeyword() != "obj") {
- m_Syntax.RestorePos(SavedPos);
+ if (m_pSyntax->GetKeyword() != "obj") {
+ m_pSyntax->RestorePos(SavedPos);
return FALSE;
}
- m_pLinearized = m_Syntax.GetObject(nullptr, objnum, gennum, true);
+ m_pLinearized = m_pSyntax->GetObject(nullptr, objnum, gennum, true);
if (!m_pLinearized)
return FALSE;
CPDF_Dictionary* pDict = m_pLinearized->GetDict();
if (pDict && pDict->GetElement("Linearized")) {
- m_Syntax.GetNextWord(nullptr);
+ m_pSyntax->GetNextWord(nullptr);
CPDF_Object* pLen = pDict->GetElement("L");
if (!pLen) {
@@ -1557,12 +1564,12 @@ CPDF_Parser::Error CPDF_Parser::StartAsyncParse(IFX_FileRead* pFileAccess) {
return FORMAT_ERROR;
if (!IsLinearizedFile(pFileAccess, offset)) {
- m_Syntax.m_pFileAccess = nullptr;
+ m_pSyntax->m_pFileAccess = nullptr;
return StartParse(pFileAccess);
}
m_pDocument = new CPDF_Document(this);
- FX_FILESIZE dwFirstXRefOffset = m_Syntax.SavePos();
+ FX_FILESIZE dwFirstXRefOffset = m_pSyntax->SavePos();
FX_BOOL bXRefRebuilt = FALSE;
FX_BOOL bLoadV4 = FALSE;
@@ -1620,7 +1627,7 @@ CPDF_Parser::Error CPDF_Parser::StartAsyncParse(IFX_FileRead* pFileAccess) {
if (m_pSecurityHandler && m_pSecurityHandler->IsMetadataEncrypted()) {
if (CPDF_Reference* pMetadata =
ToReference(m_pDocument->GetRoot()->GetElement("Metadata")))
- m_Syntax.m_MetadataObjnum = pMetadata->GetRefObjNum();
+ m_pSyntax->m_MetadataObjnum = pMetadata->GetRefObjNum();
}
return SUCCESS;
}
@@ -1645,24 +1652,24 @@ FX_BOOL CPDF_Parser::LoadLinearizedAllCrossRefV5(FX_FILESIZE xrefpos) {
}
CPDF_Parser::Error CPDF_Parser::LoadLinearizedMainXRefTable() {
- FX_DWORD dwSaveMetadataObjnum = m_Syntax.m_MetadataObjnum;
- m_Syntax.m_MetadataObjnum = 0;
+ FX_DWORD dwSaveMetadataObjnum = m_pSyntax->m_MetadataObjnum;
+ m_pSyntax->m_MetadataObjnum = 0;
if (m_pTrailer) {
m_pTrailer->Release();
m_pTrailer = nullptr;
}
- m_Syntax.RestorePos(m_LastXRefOffset - m_Syntax.m_HeaderOffset);
+ m_pSyntax->RestorePos(m_LastXRefOffset - m_pSyntax->m_HeaderOffset);
uint8_t ch = 0;
FX_DWORD dwCount = 0;
- m_Syntax.GetNextChar(ch);
+ m_pSyntax->GetNextChar(ch);
while (PDFCharIsWhitespace(ch)) {
++dwCount;
- if (m_Syntax.m_FileLen >=
- (FX_FILESIZE)(m_Syntax.SavePos() + m_Syntax.m_HeaderOffset)) {
+ if (m_pSyntax->m_FileLen >=
+ (FX_FILESIZE)(m_pSyntax->SavePos() + m_pSyntax->m_HeaderOffset)) {
break;
}
- m_Syntax.GetNextChar(ch);
+ m_pSyntax->GetNextChar(ch);
}
m_LastXRefOffset += dwCount;
m_ObjectStreamMap.clear();
@@ -1671,967 +1678,14 @@ CPDF_Parser::Error CPDF_Parser::LoadLinearizedMainXRefTable() {
if (!LoadLinearizedAllCrossRefV4(m_LastXRefOffset, m_dwXrefStartObjNum) &&
!LoadLinearizedAllCrossRefV5(m_LastXRefOffset)) {
m_LastXRefOffset = 0;
- m_Syntax.m_MetadataObjnum = dwSaveMetadataObjnum;
+ m_pSyntax->m_MetadataObjnum = dwSaveMetadataObjnum;
return FORMAT_ERROR;
}
- m_Syntax.m_MetadataObjnum = dwSaveMetadataObjnum;
+ m_pSyntax->m_MetadataObjnum = dwSaveMetadataObjnum;
return SUCCESS;
}
-// static
-int CPDF_SyntaxParser::s_CurrentRecursionDepth = 0;
-
-CPDF_SyntaxParser::CPDF_SyntaxParser()
- : m_MetadataObjnum(0),
- m_pFileAccess(nullptr),
- m_pFileBuf(nullptr),
- m_BufSize(CPDF_ModuleMgr::kFileBufSize) {}
-
-CPDF_SyntaxParser::~CPDF_SyntaxParser() {
- FX_Free(m_pFileBuf);
-}
-
-FX_BOOL CPDF_SyntaxParser::GetCharAt(FX_FILESIZE pos, uint8_t& ch) {
- CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos);
- m_Pos = pos;
- return GetNextChar(ch);
-}
-
-FX_BOOL CPDF_SyntaxParser::GetNextChar(uint8_t& ch) {
- FX_FILESIZE pos = m_Pos + m_HeaderOffset;
- if (pos >= m_FileLen)
- return FALSE;
-
- if (m_BufOffset >= pos || (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) {
- FX_FILESIZE read_pos = pos;
- FX_DWORD read_size = m_BufSize;
- if ((FX_FILESIZE)read_size > m_FileLen)
- read_size = (FX_DWORD)m_FileLen;
-
- if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) {
- if (m_FileLen < (FX_FILESIZE)read_size) {
- read_pos = 0;
- read_size = (FX_DWORD)m_FileLen;
- } else {
- read_pos = m_FileLen - read_size;
- }
- }
-
- if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size))
- return FALSE;
-
- m_BufOffset = read_pos;
- }
- ch = m_pFileBuf[pos - m_BufOffset];
- m_Pos++;
- return TRUE;
-}
-
-FX_BOOL CPDF_SyntaxParser::GetCharAtBackward(FX_FILESIZE pos, uint8_t& ch) {
- pos += m_HeaderOffset;
- if (pos >= m_FileLen)
- return FALSE;
-
- if (m_BufOffset >= pos || (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) {
- FX_FILESIZE read_pos;
- if (pos < (FX_FILESIZE)m_BufSize)
- read_pos = 0;
- else
- read_pos = pos - m_BufSize + 1;
-
- FX_DWORD read_size = m_BufSize;
- if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) {
- if (m_FileLen < (FX_FILESIZE)read_size) {
- read_pos = 0;
- read_size = (FX_DWORD)m_FileLen;
- } else {
- read_pos = m_FileLen - read_size;
- }
- }
-
- if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size))
- return FALSE;
-
- m_BufOffset = read_pos;
- }
- ch = m_pFileBuf[pos - m_BufOffset];
- return TRUE;
-}
-
-FX_BOOL CPDF_SyntaxParser::ReadBlock(uint8_t* pBuf, FX_DWORD size) {
- if (!m_pFileAccess->ReadBlock(pBuf, m_Pos + m_HeaderOffset, size))
- return FALSE;
- m_Pos += size;
- return TRUE;
-}
-
-void CPDF_SyntaxParser::GetNextWordInternal(bool* bIsNumber) {
- m_WordSize = 0;
- if (bIsNumber)
- *bIsNumber = true;
-
- uint8_t ch;
- if (!GetNextChar(ch))
- return;
-
- while (1) {
- while (PDFCharIsWhitespace(ch)) {
- if (!GetNextChar(ch))
- return;
- }
-
- if (ch != '%')
- break;
-
- while (1) {
- if (!GetNextChar(ch))
- return;
- if (PDFCharIsLineEnding(ch))
- break;
- }
- }
-
- if (PDFCharIsDelimiter(ch)) {
- if (bIsNumber)
- *bIsNumber = false;
-
- m_WordBuffer[m_WordSize++] = ch;
- if (ch == '/') {
- while (1) {
- if (!GetNextChar(ch))
- return;
-
- if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {
- m_Pos--;
- return;
- }
-
- if (m_WordSize < sizeof(m_WordBuffer) - 1)
- m_WordBuffer[m_WordSize++] = ch;
- }
- } else if (ch == '<') {
- if (!GetNextChar(ch))
- return;
-
- if (ch == '<')
- m_WordBuffer[m_WordSize++] = ch;
- else
- m_Pos--;
- } else if (ch == '>') {
- if (!GetNextChar(ch))
- return;
-
- if (ch == '>')
- m_WordBuffer[m_WordSize++] = ch;
- else
- m_Pos--;
- }
- return;
- }
-
- while (1) {
- if (m_WordSize < sizeof(m_WordBuffer) - 1)
- m_WordBuffer[m_WordSize++] = ch;
-
- if (!PDFCharIsNumeric(ch)) {
- if (bIsNumber)
- *bIsNumber = false;
- }
-
- if (!GetNextChar(ch))
- return;
-
- if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
- m_Pos--;
- break;
- }
- }
-}
-
-CFX_ByteString CPDF_SyntaxParser::ReadString() {
- uint8_t ch;
- if (!GetNextChar(ch))
- return CFX_ByteString();
-
- CFX_ByteTextBuf buf;
- int32_t parlevel = 0;
- int32_t status = 0;
- int32_t iEscCode = 0;
- while (1) {
- switch (status) {
- case 0:
- if (ch == ')') {
- if (parlevel == 0) {
- return buf.GetByteString();
- }
- parlevel--;
- buf.AppendChar(')');
- } else if (ch == '(') {
- parlevel++;
- buf.AppendChar('(');
- } else if (ch == '\\') {
- status = 1;
- } else {
- buf.AppendChar(ch);
- }
- break;
- case 1:
- if (ch >= '0' && ch <= '7') {
- iEscCode = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
- status = 2;
- break;
- }
-
- if (ch == 'n') {
- buf.AppendChar('\n');
- } else if (ch == 'r') {
- buf.AppendChar('\r');
- } else if (ch == 't') {
- buf.AppendChar('\t');
- } else if (ch == 'b') {
- buf.AppendChar('\b');
- } else if (ch == 'f') {
- buf.AppendChar('\f');
- } else if (ch == '\r') {
- status = 4;
- break;
- } else if (ch != '\n') {
- buf.AppendChar(ch);
- }
- status = 0;
- break;
- case 2:
- if (ch >= '0' && ch <= '7') {
- iEscCode =
- iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
- status = 3;
- } else {
- buf.AppendChar(iEscCode);
- status = 0;
- continue;
- }
- break;
- case 3:
- if (ch >= '0' && ch <= '7') {
- iEscCode =
- iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
- buf.AppendChar(iEscCode);
- status = 0;
- } else {
- buf.AppendChar(iEscCode);
- status = 0;
- continue;
- }
- break;
- case 4:
- status = 0;
- if (ch != '\n')
- continue;
- break;
- }
-
- if (!GetNextChar(ch))
- break;
- }
-
- GetNextChar(ch);
- return buf.GetByteString();
-}
-
-CFX_ByteString CPDF_SyntaxParser::ReadHexString() {
- uint8_t ch;
- if (!GetNextChar(ch))
- return CFX_ByteString();
-
- CFX_ByteTextBuf buf;
- bool bFirst = true;
- uint8_t code = 0;
- while (1) {
- if (ch == '>')
- break;
-
- if (std::isxdigit(ch)) {
- int val = FXSYS_toHexDigit(ch);
- if (bFirst) {
- code = val * 16;
- } else {
- code += val;
- buf.AppendByte(code);
- }
- bFirst = !bFirst;
- }
-
- if (!GetNextChar(ch))
- break;
- }
- if (!bFirst)
- buf.AppendByte(code);
-
- return buf.GetByteString();
-}
-
-void CPDF_SyntaxParser::ToNextLine() {
- uint8_t ch;
- while (GetNextChar(ch)) {
- if (ch == '\n')
- break;
-
- if (ch == '\r') {
- GetNextChar(ch);
- if (ch != '\n')
- --m_Pos;
- break;
- }
- }
-}
-
-void CPDF_SyntaxParser::ToNextWord() {
- uint8_t ch;
- if (!GetNextChar(ch))
- return;
-
- while (1) {
- while (PDFCharIsWhitespace(ch)) {
- if (!GetNextChar(ch))
- return;
- }
-
- if (ch != '%')
- break;
-
- while (1) {
- if (!GetNextChar(ch))
- return;
- if (PDFCharIsLineEnding(ch))
- break;
- }
- }
- m_Pos--;
-}
-
-CFX_ByteString CPDF_SyntaxParser::GetNextWord(bool* bIsNumber) {
- GetNextWordInternal(bIsNumber);
- return CFX_ByteString((const FX_CHAR*)m_WordBuffer, m_WordSize);
-}
-
-CFX_ByteString CPDF_SyntaxParser::GetKeyword() {
- return GetNextWord(nullptr);
-}
-
-CPDF_Object* CPDF_SyntaxParser::GetObject(CPDF_IndirectObjectHolder* pObjList,
- FX_DWORD objnum,
- FX_DWORD gennum,
- FX_BOOL bDecrypt) {
- CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth);
- if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth)
- return nullptr;
-
- FX_FILESIZE SavedPos = m_Pos;
- bool bIsNumber;
- CFX_ByteString word = GetNextWord(&bIsNumber);
- if (word.GetLength() == 0)
- return nullptr;
-
- if (bIsNumber) {
- FX_FILESIZE SavedPos = m_Pos;
- CFX_ByteString nextword = GetNextWord(&bIsNumber);
- if (bIsNumber) {
- CFX_ByteString nextword2 = GetNextWord(nullptr);
- if (nextword2 == "R") {
- FX_DWORD objnum = FXSYS_atoui(word);
- return new CPDF_Reference(pObjList, objnum);
- }
- }
- m_Pos = SavedPos;
- return new CPDF_Number(word);
- }
-
- if (word == "true" || word == "false")
- return new CPDF_Boolean(word == "true");
-
- if (word == "null")
- return new CPDF_Null;
-
- if (word == "(") {
- CFX_ByteString str = ReadString();
- if (m_pCryptoHandler && bDecrypt)
- m_pCryptoHandler->Decrypt(objnum, gennum, str);
- return new CPDF_String(str, FALSE);
- }
-
- if (word == "<") {
- CFX_ByteString str = ReadHexString();
- if (m_pCryptoHandler && bDecrypt)
- m_pCryptoHandler->Decrypt(objnum, gennum, str);
-
- return new CPDF_String(str, TRUE);
- }
-
- if (word == "[") {
- CPDF_Array* pArray = new CPDF_Array;
- while (CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true))
- pArray->Add(pObj);
-
- return pArray;
- }
-
- if (word[0] == '/') {
- return new CPDF_Name(
- PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)));
- }
-
- if (word == "<<") {
- int32_t nKeys = 0;
- FX_FILESIZE dwSignValuePos = 0;
-
- std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict(
- new CPDF_Dictionary);
- while (1) {
- CFX_ByteString key = GetNextWord(nullptr);
- if (key.IsEmpty())
- return nullptr;
-
- FX_FILESIZE SavedPos = m_Pos - key.GetLength();
- if (key == ">>")
- break;
-
- if (key == "endobj") {
- m_Pos = SavedPos;
- break;
- }
-
- if (key[0] != '/')
- continue;
-
- ++nKeys;
- key = PDF_NameDecode(key);
- if (key.IsEmpty())
- continue;
-
- if (key == "/Contents")
- dwSignValuePos = m_Pos;
-
- CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true);
- if (!pObj)
- continue;
-
- CFX_ByteStringC keyNoSlash(key.c_str() + 1, key.GetLength() - 1);
- pDict->SetAt(keyNoSlash, pObj);
- }
-
- // Only when this is a signature dictionary and has contents, we reset the
- // contents to the un-decrypted form.
- if (IsSignatureDict(pDict.get()) && dwSignValuePos) {
- CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos);
- m_Pos = dwSignValuePos;
- pDict->SetAt("Contents", GetObject(pObjList, objnum, gennum, false));
- }
-
- FX_FILESIZE SavedPos = m_Pos;
- CFX_ByteString nextword = GetNextWord(nullptr);
- if (nextword != "stream") {
- m_Pos = SavedPos;
- return pDict.release();
- }
- return ReadStream(pDict.release(), objnum, gennum);
- }
-
- if (word == ">>")
- m_Pos = SavedPos;
-
- return nullptr;
-}
-
-CPDF_Object* CPDF_SyntaxParser::GetObjectByStrict(
- CPDF_IndirectObjectHolder* pObjList,
- FX_DWORD objnum,
- FX_DWORD gennum) {
- CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth);
- if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth)
- return nullptr;
-
- FX_FILESIZE SavedPos = m_Pos;
- bool bIsNumber;
- CFX_ByteString word = GetNextWord(&bIsNumber);
- if (word.GetLength() == 0)
- return nullptr;
-
- if (bIsNumber) {
- FX_FILESIZE SavedPos = m_Pos;
- CFX_ByteString nextword = GetNextWord(&bIsNumber);
- if (bIsNumber) {
- CFX_ByteString nextword2 = GetNextWord(nullptr);
- if (nextword2 == "R")
- return new CPDF_Reference(pObjList, FXSYS_atoui(word));
- }
- m_Pos = SavedPos;
- return new CPDF_Number(word);
- }
-
- if (word == "true" || word == "false")
- return new CPDF_Boolean(word == "true");
-
- if (word == "null")
- return new CPDF_Null;
-
- if (word == "(") {
- CFX_ByteString str = ReadString();
- if (m_pCryptoHandler)
- m_pCryptoHandler->Decrypt(objnum, gennum, str);
- return new CPDF_String(str, FALSE);
- }
-
- if (word == "<") {
- CFX_ByteString str = ReadHexString();
- if (m_pCryptoHandler)
- m_pCryptoHandler->Decrypt(objnum, gennum, str);
- return new CPDF_String(str, TRUE);
- }
-
- if (word == "[") {
- std::unique_ptr<CPDF_Array, ReleaseDeleter<CPDF_Array>> pArray(
- new CPDF_Array);
- while (CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, true))
- pArray->Add(pObj);
-
- return m_WordBuffer[0] == ']' ? pArray.release() : nullptr;
- }
-
- if (word[0] == '/') {
- return new CPDF_Name(
- PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)));
- }
-
- if (word == "<<") {
- std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict(
- new CPDF_Dictionary);
- while (1) {
- FX_FILESIZE SavedPos = m_Pos;
- CFX_ByteString key = GetNextWord(nullptr);
- if (key.IsEmpty())
- return nullptr;
-
- if (key == ">>")
- break;
-
- if (key == "endobj") {
- m_Pos = SavedPos;
- break;
- }
-
- if (key[0] != '/')
- continue;
-
- key = PDF_NameDecode(key);
- std::unique_ptr<CPDF_Object, ReleaseDeleter<CPDF_Object>> obj(
- GetObject(pObjList, objnum, gennum, true));
- if (!obj) {
- uint8_t ch;
- while (GetNextChar(ch) && ch != 0x0A && ch != 0x0D) {
- continue;
- }
- return nullptr;
- }
-
- if (key.GetLength() > 1) {
- pDict->SetAt(CFX_ByteStringC(key.c_str() + 1, key.GetLength() - 1),
- obj.release());
- }
- }
-
- FX_FILESIZE SavedPos = m_Pos;
- CFX_ByteString nextword = GetNextWord(nullptr);
- if (nextword != "stream") {
- m_Pos = SavedPos;
- return pDict.release();
- }
-
- return ReadStream(pDict.release(), objnum, gennum);
- }
-
- if (word == ">>")
- m_Pos = SavedPos;
-
- return nullptr;
-}
-
-unsigned int CPDF_SyntaxParser::ReadEOLMarkers(FX_FILESIZE pos) {
- unsigned char byte1 = 0;
- unsigned char byte2 = 0;
-
- GetCharAt(pos, byte1);
- GetCharAt(pos + 1, byte2);
-
- if (byte1 == '\r' && byte2 == '\n')
- return 2;
-
- if (byte1 == '\r' || byte1 == '\n')
- return 1;
-
- return 0;
-}
-
-CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict,
- FX_DWORD objnum,
- FX_DWORD gennum) {
- CPDF_Object* pLenObj = pDict->GetElement("Length");
- FX_FILESIZE len = -1;
- CPDF_Reference* pLenObjRef = ToReference(pLenObj);
-
- bool differingObjNum = !pLenObjRef || (pLenObjRef->GetObjList() &&
- pLenObjRef->GetRefObjNum() != objnum);
- if (pLenObj && differingObjNum)
- len = pLenObj->GetInteger();
-
- // Locate the start of stream.
- ToNextLine();
- FX_FILESIZE streamStartPos = m_Pos;
-
- const CFX_ByteStringC kEndStreamStr("endstream");
- const CFX_ByteStringC kEndObjStr("endobj");
-
- CPDF_CryptoHandler* pCryptoHandler =
- objnum == (FX_DWORD)m_MetadataObjnum ? nullptr : m_pCryptoHandler.get();
- if (!pCryptoHandler) {
- FX_BOOL bSearchForKeyword = TRUE;
- if (len >= 0) {
- pdfium::base::CheckedNumeric<FX_FILESIZE> pos = m_Pos;
- pos += len;
- if (pos.IsValid() && pos.ValueOrDie() < m_FileLen)
- m_Pos = pos.ValueOrDie();
-
- m_Pos += ReadEOLMarkers(m_Pos);
- FXSYS_memset(m_WordBuffer, 0, kEndStreamStr.GetLength() + 1);
- GetNextWordInternal(nullptr);
- // Earlier version of PDF specification doesn't require EOL marker before
- // 'endstream' keyword. If keyword 'endstream' follows the bytes in
- // specified length, it signals the end of stream.
- if (FXSYS_memcmp(m_WordBuffer, kEndStreamStr.GetPtr(),
- kEndStreamStr.GetLength()) == 0) {
- bSearchForKeyword = FALSE;
- }
- }
-
- if (bSearchForKeyword) {
- // If len is not available, len needs to be calculated
- // by searching the keywords "endstream" or "endobj".
- m_Pos = streamStartPos;
- FX_FILESIZE endStreamOffset = 0;
- while (endStreamOffset >= 0) {
- endStreamOffset = FindTag(kEndStreamStr, 0);
-
- // Can't find "endstream".
- if (endStreamOffset < 0)
- break;
-
- // Stop searching when "endstream" is found.
- if (IsWholeWord(m_Pos - kEndStreamStr.GetLength(), m_FileLen,
- kEndStreamStr, TRUE)) {
- endStreamOffset = m_Pos - streamStartPos - kEndStreamStr.GetLength();
- break;
- }
- }
-
- m_Pos = streamStartPos;
- FX_FILESIZE endObjOffset = 0;
- while (endObjOffset >= 0) {
- endObjOffset = FindTag(kEndObjStr, 0);
-
- // Can't find "endobj".
- if (endObjOffset < 0)
- break;
-
- // Stop searching when "endobj" is found.
- if (IsWholeWord(m_Pos - kEndObjStr.GetLength(), m_FileLen, kEndObjStr,
- TRUE)) {
- endObjOffset = m_Pos - streamStartPos - kEndObjStr.GetLength();
- break;
- }
- }
-
- // Can't find "endstream" or "endobj".
- if (endStreamOffset < 0 && endObjOffset < 0) {
- pDict->Release();
- return nullptr;
- }
-
- if (endStreamOffset < 0 && endObjOffset >= 0) {
- // Correct the position of end stream.
- endStreamOffset = endObjOffset;
- } else if (endStreamOffset >= 0 && endObjOffset < 0) {
- // Correct the position of end obj.
- endObjOffset = endStreamOffset;
- } else if (endStreamOffset > endObjOffset) {
- endStreamOffset = endObjOffset;
- }
-
- len = endStreamOffset;
- int numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 2);
- if (numMarkers == 2) {
- len -= 2;
- } else {
- numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 1);
- if (numMarkers == 1) {
- len -= 1;
- }
- }
-
- if (len < 0) {
- pDict->Release();
- return nullptr;
- }
- pDict->SetAtInteger("Length", len);
- }
- m_Pos = streamStartPos;
- }
-
- if (len < 0) {
- pDict->Release();
- return nullptr;
- }
-
- uint8_t* pData = nullptr;
- if (len > 0) {
- pData = FX_Alloc(uint8_t, len);
- ReadBlock(pData, len);
- if (pCryptoHandler) {
- CFX_BinaryBuf dest_buf;
- dest_buf.EstimateSize(pCryptoHandler->DecryptGetSize(len));
-
- void* context = pCryptoHandler->DecryptStart(objnum, gennum);
- pCryptoHandler->DecryptStream(context, pData, len, dest_buf);
- pCryptoHandler->DecryptFinish(context, dest_buf);
-
- FX_Free(pData);
- pData = dest_buf.GetBuffer();
- len = dest_buf.GetSize();
- dest_buf.DetachBuffer();
- }
- }
-
- CPDF_Stream* pStream = new CPDF_Stream(pData, len, pDict);
- streamStartPos = m_Pos;
- FXSYS_memset(m_WordBuffer, 0, kEndObjStr.GetLength() + 1);
-
- GetNextWordInternal(nullptr);
-
- int numMarkers = ReadEOLMarkers(m_Pos);
- if (m_WordSize == kEndObjStr.GetLength() && numMarkers != 0 &&
- FXSYS_memcmp(m_WordBuffer, kEndObjStr.GetPtr(), kEndObjStr.GetLength()) ==
- 0) {
- m_Pos = streamStartPos;
- }
- return pStream;
-}
-
-void CPDF_SyntaxParser::InitParser(IFX_FileRead* pFileAccess,
- FX_DWORD HeaderOffset) {
- FX_Free(m_pFileBuf);
-
- m_pFileBuf = FX_Alloc(uint8_t, m_BufSize);
- m_HeaderOffset = HeaderOffset;
- m_FileLen = pFileAccess->GetSize();
- m_Pos = 0;
- m_pFileAccess = pFileAccess;
- m_BufOffset = 0;
- pFileAccess->ReadBlock(
- m_pFileBuf, 0,
- (size_t)((FX_FILESIZE)m_BufSize > m_FileLen ? m_FileLen : m_BufSize));
-}
-
-uint32_t CPDF_SyntaxParser::GetDirectNum() {
- bool bIsNumber;
- GetNextWordInternal(&bIsNumber);
- if (!bIsNumber)
- return 0;
-
- m_WordBuffer[m_WordSize] = 0;
- return FXSYS_atoui(reinterpret_cast<const FX_CHAR*>(m_WordBuffer));
-}
-
-bool CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos,
- FX_FILESIZE limit,
- const CFX_ByteStringC& tag,
- FX_BOOL checkKeyword) {
- const FX_DWORD taglen = tag.GetLength();
-
- bool bCheckLeft = !PDFCharIsDelimiter(tag[0]) && !PDFCharIsWhitespace(tag[0]);
- bool bCheckRight = !PDFCharIsDelimiter(tag[taglen - 1]) &&
- !PDFCharIsWhitespace(tag[taglen - 1]);
-
- uint8_t ch;
- if (bCheckRight && startpos + (int32_t)taglen <= limit &&
- GetCharAt(startpos + (int32_t)taglen, ch)) {
- if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) ||
- (checkKeyword && PDFCharIsDelimiter(ch))) {
- return false;
- }
- }
-
- if (bCheckLeft && startpos > 0 && GetCharAt(startpos - 1, ch)) {
- if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) ||
- (checkKeyword && PDFCharIsDelimiter(ch))) {
- return false;
- }
- }
- return true;
-}
-
-// TODO(dsinclair): Split into a SearchWordForward and SearchWordBackwards
-// and drop the bool.
-FX_BOOL CPDF_SyntaxParser::SearchWord(const CFX_ByteStringC& tag,
- FX_BOOL bWholeWord,
- FX_BOOL bForward,
- FX_FILESIZE limit) {
- int32_t taglen = tag.GetLength();
- if (taglen == 0)
- return FALSE;
-
- FX_FILESIZE pos = m_Pos;
- int32_t offset = 0;
- if (!bForward)
- offset = taglen - 1;
-
- const uint8_t* tag_data = tag.GetPtr();
- uint8_t byte;
- while (1) {
- if (bForward) {
- if (limit && pos >= m_Pos + limit)
- return FALSE;
-
- if (!GetCharAt(pos, byte))
- return FALSE;
-
- } else {
- if (limit && pos <= m_Pos - limit)
- return FALSE;
-
- if (!GetCharAtBackward(pos, byte))
- return FALSE;
- }
-
- if (byte == tag_data[offset]) {
- if (bForward) {
- offset++;
- if (offset < taglen) {
- pos++;
- continue;
- }
- } else {
- offset--;
- if (offset >= 0) {
- pos--;
- continue;
- }
- }
-
- FX_FILESIZE startpos = bForward ? pos - taglen + 1 : pos;
- if (!bWholeWord || IsWholeWord(startpos, limit, tag, FALSE)) {
- m_Pos = startpos;
- return TRUE;
- }
- }
-
- if (bForward) {
- offset = byte == tag_data[0] ? 1 : 0;
- pos++;
- } else {
- offset = byte == tag_data[taglen - 1] ? taglen - 2 : taglen - 1;
- pos--;
- }
-
- if (pos < 0)
- return FALSE;
- }
-
- return FALSE;
-}
-
-int32_t CPDF_SyntaxParser::SearchMultiWord(const CFX_ByteStringC& tags,
- FX_BOOL bWholeWord,
- FX_FILESIZE limit) {
- int32_t ntags = 1;
- for (int i = 0; i < tags.GetLength(); ++i) {
- if (tags[i] == 0)
- ++ntags;
- }
-
- std::vector<SearchTagRecord> patterns(ntags);
- FX_DWORD start = 0;
- FX_DWORD itag = 0;
- FX_DWORD max_len = 0;
- for (int i = 0; i <= tags.GetLength(); ++i) {
- if (tags[i] == 0) {
- FX_DWORD len = i - start;
- max_len = std::max(len, max_len);
- patterns[itag].m_pTag = tags.GetCStr() + start;
- patterns[itag].m_Len = len;
- patterns[itag].m_Offset = 0;
- start = i + 1;
- ++itag;
- }
- }
-
- const FX_FILESIZE pos_limit = m_Pos + limit;
- for (FX_FILESIZE pos = m_Pos; !limit || pos < pos_limit; ++pos) {
- uint8_t byte;
- if (!GetCharAt(pos, byte))
- break;
-
- for (int i = 0; i < ntags; ++i) {
- SearchTagRecord& pat = patterns[i];
- if (pat.m_pTag[pat.m_Offset] != byte) {
- pat.m_Offset = (pat.m_pTag[0] == byte) ? 1 : 0;
- continue;
- }
-
- ++pat.m_Offset;
- if (pat.m_Offset != pat.m_Len)
- continue;
-
- if (!bWholeWord ||
- IsWholeWord(pos - pat.m_Len, limit,
- CFX_ByteStringC(pat.m_pTag, pat.m_Len), FALSE)) {
- return i;
- }
-
- pat.m_Offset = (pat.m_pTag[0] == byte) ? 1 : 0;
- }
- }
- return -1;
-}
-
-FX_FILESIZE CPDF_SyntaxParser::FindTag(const CFX_ByteStringC& tag,
- FX_FILESIZE limit) {
- int32_t taglen = tag.GetLength();
- int32_t match = 0;
- limit += m_Pos;
- FX_FILESIZE startpos = m_Pos;
-
- while (1) {
- uint8_t ch;
- if (!GetNextChar(ch))
- return -1;
-
- if (ch == tag[match]) {
- match++;
- if (match == taglen)
- return m_Pos - startpos - taglen;
- } else {
- match = ch == tag[0] ? 1 : 0;
- }
-
- if (limit && m_Pos == limit)
- return -1;
- }
- return -1;
-}
-
-void CPDF_SyntaxParser::SetEncrypt(
- std::unique_ptr<CPDF_CryptoHandler> pCryptoHandler) {
- m_pCryptoHandler = std::move(pCryptoHandler);
-}
class CPDF_DataAvail final : public IPDF_DataAvail {
public:
@@ -3112,7 +2166,7 @@ FX_BOOL CPDF_DataAvail::LoadAllFile(IFX_DownloadHints* pHints) {
}
FX_BOOL CPDF_DataAvail::LoadAllXref(IFX_DownloadHints* pHints) {
- m_parser.m_Syntax.InitParser(m_pFileRead, (FX_DWORD)m_dwHeaderOffset);
+ m_parser.m_pSyntax->InitParser(m_pFileRead, (FX_DWORD)m_dwHeaderOffset);
m_parser.m_bOwnFileRead = false;
if (!m_parser.LoadAllCrossRefV4(m_dwLastXRefOffset) &&
!m_parser.LoadAllCrossRefV5(m_dwLastXRefOffset)) {
@@ -3697,17 +2751,17 @@ int32_t CPDF_DataAvail::CheckCrossRefStream(IFX_DownloadHints* pHints,
m_pFileRead->ReadBlock(pBuf, m_dwCurrentXRefSteam, iSize);
ScopedFileStream file(FX_CreateMemoryStream(pBuf, (size_t)iSize, FALSE));
- m_parser.m_Syntax.InitParser(file.get(), 0);
+ m_parser.m_pSyntax->InitParser(file.get(), 0);
bool bNumber;
- CFX_ByteString objnum = m_parser.m_Syntax.GetNextWord(&bNumber);
+ CFX_ByteString objnum = m_parser.m_pSyntax->GetNextWord(&bNumber);
if (!bNumber)
return -1;
FX_DWORD objNum = FXSYS_atoui(objnum);
CPDF_Object* pObj = m_parser.ParseIndirectObjectAt(nullptr, 0, objNum);
if (!pObj) {
- m_Pos += m_parser.m_Syntax.SavePos();
+ m_Pos += m_parser.m_pSyntax->SavePos();
return 0;
}
@@ -3715,7 +2769,7 @@ int32_t CPDF_DataAvail::CheckCrossRefStream(IFX_DownloadHints* pHints,
CPDF_Name* pName = ToName(pDict ? pDict->GetElement("Type") : nullptr);
if (pName) {
if (pName->GetString() == "XRef") {
- m_Pos += m_parser.m_Syntax.SavePos();
+ m_Pos += m_parser.m_pSyntax->SavePos();
xref_offset = pObj->GetDict()->GetIntegerBy("Prev");
pObj->Release();
return 1;
diff --git a/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser_unittest.cpp b/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser_unittest.cpp
index e71b19011e..294a99675d 100644
--- a/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser_unittest.cpp
+++ b/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser_unittest.cpp
@@ -4,6 +4,7 @@
#include "core/include/fpdfapi/fpdf_parser.h"
#include "core/include/fxcrt/fx_stream.h"
+#include "core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "testing/utils/path_service.h"
@@ -45,7 +46,7 @@ class CPDF_TestParser : public CPDF_Parser {
return false;
// For the test file, the header is set at the beginning.
- m_Syntax.InitParser(pFileAccess, 0);
+ m_pSyntax->InitParser(pFileAccess, 0);
return true;
}
@@ -54,7 +55,7 @@ class CPDF_TestParser : public CPDF_Parser {
CFX_TestBufferRead* buffer_reader = new CFX_TestBufferRead(buffer, len);
// For the test file, the header is set at the beginning.
- m_Syntax.InitParser(buffer_reader, 0);
+ m_pSyntax->InitParser(buffer_reader, 0);
return true;
}
diff --git a/pdfium.gyp b/pdfium.gyp
index a400061856..f2161ae8de 100644
--- a/pdfium.gyp
+++ b/pdfium.gyp
@@ -345,6 +345,8 @@
'core/src/fpdfapi/fpdf_page/fpdf_page_path.cpp',
'core/src/fpdfapi/fpdf_page/fpdf_page_pattern.cpp',
'core/src/fpdfapi/fpdf_page/pageint.h',
+ 'core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.cpp',
+ 'core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.h',
'core/src/fpdfapi/fpdf_parser/fpdf_parser_decode.cpp',
'core/src/fpdfapi/fpdf_parser/fpdf_parser_document.cpp',
'core/src/fpdfapi/fpdf_parser/fpdf_parser_encrypt.cpp',