summaryrefslogtreecommitdiff
path: root/core/src/fpdfapi/fpdf_parser
diff options
context:
space:
mode:
Diffstat (limited to 'core/src/fpdfapi/fpdf_parser')
-rw-r--r--core/src/fpdfapi/fpdf_parser/cpdf_document.cpp1
-rw-r--r--core/src/fpdfapi/fpdf_parser/cpdf_parser.cpp1643
-rw-r--r--core/src/fpdfapi/fpdf_parser/cpdf_simple_parser.cpp170
-rw-r--r--core/src/fpdfapi/fpdf_parser/cpdf_simple_parser_unittest.cpp (renamed from core/src/fpdfapi/fpdf_parser/fpdf_parser_utility_unittest.cpp)4
-rw-r--r--core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.cpp1
-rw-r--r--core/src/fpdfapi/fpdf_parser/fpdf_parser_decode.cpp1
-rw-r--r--core/src/fpdfapi/fpdf_parser/fpdf_parser_encrypt.cpp2
-rw-r--r--core/src/fpdfapi/fpdf_parser/fpdf_parser_objects.cpp1
-rw-r--r--core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp1654
-rw-r--r--core/src/fpdfapi/fpdf_parser/fpdf_parser_parser_unittest.cpp1
-rw-r--r--core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp178
-rw-r--r--core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.h39
12 files changed, 1884 insertions, 1811 deletions
diff --git a/core/src/fpdfapi/fpdf_parser/cpdf_document.cpp b/core/src/fpdfapi/fpdf_parser/cpdf_document.cpp
index f837c00509..d97d6e01ff 100644
--- a/core/src/fpdfapi/fpdf_parser/cpdf_document.cpp
+++ b/core/src/fpdfapi/fpdf_parser/cpdf_document.cpp
@@ -8,6 +8,7 @@
#include <set>
+#include "core/include/fpdfapi/cpdf_parser.h"
#include "core/include/fpdfapi/fpdf_module.h"
#include "core/include/fpdfapi/fpdf_parser.h"
#include "core/include/fxge/fx_font.h"
diff --git a/core/src/fpdfapi/fpdf_parser/cpdf_parser.cpp b/core/src/fpdfapi/fpdf_parser/cpdf_parser.cpp
new file mode 100644
index 0000000000..1871467458
--- /dev/null
+++ b/core/src/fpdfapi/fpdf_parser/cpdf_parser.cpp
@@ -0,0 +1,1643 @@
+// Copyright 2016 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
+
+#include "core/include/fpdfapi/cpdf_parser.h"
+
+#include "core/include/fpdfapi/cpdf_document.h"
+#include "core/include/fpdfapi/fpdf_parser.h"
+#include "core/include/fxcrt/fx_ext.h"
+#include "core/include/fxcrt/fx_safe_types.h"
+#include "core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.h"
+#include "core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.h"
+#include "third_party/base/stl_util.h"
+
+namespace {
+
+// A limit on the size of the xref table. Theoretical limits are higher, but
+// this may be large enough in practice.
+const int32_t kMaxXRefSize = 1048576;
+
+// A limit on the maximum object number in the xref table. Theoretical limits
+// are higher, but this may be large enough in practice.
+const FX_DWORD kMaxObjectNumber = 1048576;
+
+FX_DWORD GetVarInt(const uint8_t* p, int32_t n) {
+ FX_DWORD result = 0;
+ for (int32_t i = 0; i < n; ++i)
+ result = result * 256 + p[i];
+ return result;
+}
+
+int32_t GetStreamNCount(CPDF_StreamAcc* pObjStream) {
+ return pObjStream->GetDict()->GetIntegerBy("N");
+}
+
+int32_t GetStreamFirst(CPDF_StreamAcc* pObjStream) {
+ return pObjStream->GetDict()->GetIntegerBy("First");
+}
+
+} // namespace
+
+CPDF_Parser::CPDF_Parser()
+ : m_pDocument(nullptr),
+ m_bOwnFileRead(true),
+ m_FileVersion(0),
+ m_pTrailer(nullptr),
+ m_pEncryptDict(nullptr),
+ m_pLinearized(nullptr),
+ m_dwFirstPageNo(0),
+ m_dwXrefStartObjNum(0) {
+ m_pSyntax.reset(new CPDF_SyntaxParser);
+}
+
+CPDF_Parser::~CPDF_Parser() {
+ CloseParser();
+}
+
+FX_DWORD CPDF_Parser::GetLastObjNum() const {
+ return m_ObjectInfo.empty() ? 0 : m_ObjectInfo.rbegin()->first;
+}
+
+bool CPDF_Parser::IsValidObjectNumber(FX_DWORD objnum) const {
+ return !m_ObjectInfo.empty() && objnum <= m_ObjectInfo.rbegin()->first;
+}
+
+FX_FILESIZE CPDF_Parser::GetObjectPositionOrZero(FX_DWORD objnum) const {
+ auto it = m_ObjectInfo.find(objnum);
+ return it != m_ObjectInfo.end() ? it->second.pos : 0;
+}
+
+uint8_t CPDF_Parser::GetObjectType(FX_DWORD objnum) const {
+ ASSERT(IsValidObjectNumber(objnum));
+ auto it = m_ObjectInfo.find(objnum);
+ return it != m_ObjectInfo.end() ? it->second.type : 0;
+}
+
+uint16_t CPDF_Parser::GetObjectGenNum(FX_DWORD objnum) const {
+ ASSERT(IsValidObjectNumber(objnum));
+ auto it = m_ObjectInfo.find(objnum);
+ return it != m_ObjectInfo.end() ? it->second.gennum : 0;
+}
+
+bool CPDF_Parser::IsObjectFreeOrNull(FX_DWORD objnum) const {
+ uint8_t type = GetObjectType(objnum);
+ return type == 0 || type == 255;
+}
+
+void CPDF_Parser::SetEncryptDictionary(CPDF_Dictionary* pDict) {
+ m_pEncryptDict = pDict;
+}
+
+CPDF_CryptoHandler* CPDF_Parser::GetCryptoHandler() {
+ return m_pSyntax->m_pCryptoHandler.get();
+}
+
+IFX_FileRead* CPDF_Parser::GetFileAccess() const {
+ return m_pSyntax->m_pFileAccess;
+}
+
+void CPDF_Parser::ShrinkObjectMap(FX_DWORD objnum) {
+ if (objnum == 0) {
+ m_ObjectInfo.clear();
+ return;
+ }
+
+ auto it = m_ObjectInfo.lower_bound(objnum);
+ while (it != m_ObjectInfo.end()) {
+ auto saved_it = it++;
+ m_ObjectInfo.erase(saved_it);
+ }
+
+ if (!pdfium::ContainsKey(m_ObjectInfo, objnum - 1))
+ m_ObjectInfo[objnum - 1].pos = 0;
+}
+
+void CPDF_Parser::CloseParser() {
+ m_bVersionUpdated = FALSE;
+ delete m_pDocument;
+ m_pDocument = nullptr;
+
+ if (m_pTrailer) {
+ m_pTrailer->Release();
+ m_pTrailer = nullptr;
+ }
+ ReleaseEncryptHandler();
+ SetEncryptDictionary(nullptr);
+
+ if (m_bOwnFileRead && m_pSyntax->m_pFileAccess) {
+ m_pSyntax->m_pFileAccess->Release();
+ m_pSyntax->m_pFileAccess = nullptr;
+ }
+
+ m_ObjectStreamMap.clear();
+ m_ObjCache.clear();
+ m_SortedOffset.clear();
+ m_ObjectInfo.clear();
+
+ int32_t iLen = m_Trailers.GetSize();
+ for (int32_t i = 0; i < iLen; ++i) {
+ if (CPDF_Dictionary* trailer = m_Trailers.GetAt(i))
+ trailer->Release();
+ }
+ m_Trailers.RemoveAll();
+
+ if (m_pLinearized) {
+ m_pLinearized->Release();
+ m_pLinearized = nullptr;
+ }
+}
+
+CPDF_Parser::Error CPDF_Parser::StartParse(IFX_FileRead* pFileAccess) {
+ CloseParser();
+
+ m_bXRefStream = FALSE;
+ m_LastXRefOffset = 0;
+ m_bOwnFileRead = true;
+
+ int32_t offset = GetHeaderOffset(pFileAccess);
+ if (offset == -1) {
+ if (pFileAccess)
+ pFileAccess->Release();
+ return FORMAT_ERROR;
+ }
+ m_pSyntax->InitParser(pFileAccess, offset);
+
+ uint8_t ch;
+ if (!m_pSyntax->GetCharAt(5, ch))
+ return FORMAT_ERROR;
+ if (std::isdigit(ch))
+ m_FileVersion = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)) * 10;
+
+ if (!m_pSyntax->GetCharAt(7, ch))
+ return FORMAT_ERROR;
+ if (std::isdigit(ch))
+ m_FileVersion += FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
+
+ if (m_pSyntax->m_FileLen < m_pSyntax->m_HeaderOffset + 9)
+ return FORMAT_ERROR;
+
+ m_pSyntax->RestorePos(m_pSyntax->m_FileLen - m_pSyntax->m_HeaderOffset - 9);
+ m_pDocument = new CPDF_Document(this);
+
+ FX_BOOL bXRefRebuilt = FALSE;
+ if (m_pSyntax->SearchWord("startxref", TRUE, FALSE, 4096)) {
+ m_SortedOffset.insert(m_pSyntax->SavePos());
+ m_pSyntax->GetKeyword();
+
+ bool bNumber;
+ CFX_ByteString xrefpos_str = m_pSyntax->GetNextWord(&bNumber);
+ if (!bNumber)
+ return FORMAT_ERROR;
+
+ m_LastXRefOffset = (FX_FILESIZE)FXSYS_atoi64(xrefpos_str);
+ if (!LoadAllCrossRefV4(m_LastXRefOffset) &&
+ !LoadAllCrossRefV5(m_LastXRefOffset)) {
+ if (!RebuildCrossRef())
+ return FORMAT_ERROR;
+
+ bXRefRebuilt = TRUE;
+ m_LastXRefOffset = 0;
+ }
+ } else {
+ if (!RebuildCrossRef())
+ return FORMAT_ERROR;
+
+ bXRefRebuilt = TRUE;
+ }
+ Error eRet = SetEncryptHandler();
+ if (eRet != SUCCESS)
+ return eRet;
+
+ m_pDocument->LoadDoc();
+ if (!m_pDocument->GetRoot() || m_pDocument->GetPageCount() == 0) {
+ if (bXRefRebuilt)
+ return FORMAT_ERROR;
+
+ ReleaseEncryptHandler();
+ if (!RebuildCrossRef())
+ return FORMAT_ERROR;
+
+ eRet = SetEncryptHandler();
+ if (eRet != SUCCESS)
+ return eRet;
+
+ m_pDocument->LoadDoc();
+ if (!m_pDocument->GetRoot())
+ return FORMAT_ERROR;
+ }
+ if (GetRootObjNum() == 0) {
+ ReleaseEncryptHandler();
+ if (!RebuildCrossRef() || GetRootObjNum() == 0)
+ return FORMAT_ERROR;
+
+ eRet = SetEncryptHandler();
+ if (eRet != SUCCESS)
+ return eRet;
+ }
+ if (m_pSecurityHandler && !m_pSecurityHandler->IsMetadataEncrypted()) {
+ CPDF_Reference* pMetadata =
+ ToReference(m_pDocument->GetRoot()->GetElement("Metadata"));
+ if (pMetadata)
+ m_pSyntax->m_MetadataObjnum = pMetadata->GetRefObjNum();
+ }
+ return SUCCESS;
+}
+CPDF_Parser::Error CPDF_Parser::SetEncryptHandler() {
+ ReleaseEncryptHandler();
+ SetEncryptDictionary(nullptr);
+
+ if (!m_pTrailer)
+ return FORMAT_ERROR;
+
+ CPDF_Object* pEncryptObj = m_pTrailer->GetElement("Encrypt");
+ if (pEncryptObj) {
+ if (CPDF_Dictionary* pEncryptDict = pEncryptObj->AsDictionary()) {
+ SetEncryptDictionary(pEncryptDict);
+ } else if (CPDF_Reference* pRef = pEncryptObj->AsReference()) {
+ pEncryptObj = m_pDocument->GetIndirectObject(pRef->GetRefObjNum());
+ if (pEncryptObj)
+ SetEncryptDictionary(pEncryptObj->GetDict());
+ }
+ }
+
+ if (m_pEncryptDict) {
+ CFX_ByteString filter = m_pEncryptDict->GetStringBy("Filter");
+ std::unique_ptr<IPDF_SecurityHandler> pSecurityHandler;
+ Error err = HANDLER_ERROR;
+ if (filter == "Standard") {
+ pSecurityHandler.reset(new CPDF_StandardSecurityHandler);
+ err = PASSWORD_ERROR;
+ }
+ if (!pSecurityHandler)
+ return HANDLER_ERROR;
+
+ if (!pSecurityHandler->OnInit(this, m_pEncryptDict))
+ return err;
+
+ m_pSecurityHandler = std::move(pSecurityHandler);
+ std::unique_ptr<CPDF_CryptoHandler> pCryptoHandler(
+ m_pSecurityHandler->CreateCryptoHandler());
+ if (!pCryptoHandler->Init(m_pEncryptDict, m_pSecurityHandler.get()))
+ return HANDLER_ERROR;
+ m_pSyntax->SetEncrypt(std::move(pCryptoHandler));
+ }
+ return SUCCESS;
+}
+
+void CPDF_Parser::ReleaseEncryptHandler() {
+ m_pSyntax->m_pCryptoHandler.reset();
+ m_pSecurityHandler.reset();
+}
+
+FX_FILESIZE CPDF_Parser::GetObjectOffset(FX_DWORD objnum) const {
+ if (!IsValidObjectNumber(objnum))
+ return 0;
+
+ if (GetObjectType(objnum) == 1)
+ return GetObjectPositionOrZero(objnum);
+
+ if (GetObjectType(objnum) == 2) {
+ FX_FILESIZE pos = GetObjectPositionOrZero(objnum);
+ return GetObjectPositionOrZero(pos);
+ }
+ return 0;
+}
+
+FX_BOOL CPDF_Parser::LoadAllCrossRefV4(FX_FILESIZE xrefpos) {
+ if (!LoadCrossRefV4(xrefpos, 0, TRUE))
+ return FALSE;
+
+ m_pTrailer = LoadTrailerV4();
+ if (!m_pTrailer)
+ return FALSE;
+
+ int32_t xrefsize = GetDirectInteger(m_pTrailer, "Size");
+ if (xrefsize > 0 && xrefsize <= kMaxXRefSize)
+ ShrinkObjectMap(xrefsize);
+
+ std::vector<FX_FILESIZE> CrossRefList;
+ std::vector<FX_FILESIZE> XRefStreamList;
+ std::set<FX_FILESIZE> seen_xrefpos;
+
+ CrossRefList.push_back(xrefpos);
+ XRefStreamList.push_back(GetDirectInteger(m_pTrailer, "XRefStm"));
+ seen_xrefpos.insert(xrefpos);
+
+ // When |m_pTrailer| doesn't have Prev entry or Prev entry value is not
+ // numerical, GetDirectInteger() returns 0. Loading will end.
+ xrefpos = GetDirectInteger(m_pTrailer, "Prev");
+ while (xrefpos) {
+ // Check for circular references.
+ if (pdfium::ContainsKey(seen_xrefpos, xrefpos))
+ return FALSE;
+
+ seen_xrefpos.insert(xrefpos);
+
+ // SLOW ...
+ CrossRefList.insert(CrossRefList.begin(), xrefpos);
+ LoadCrossRefV4(xrefpos, 0, TRUE);
+
+ std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict(
+ LoadTrailerV4());
+ if (!pDict)
+ return FALSE;
+
+ xrefpos = GetDirectInteger(pDict.get(), "Prev");
+
+ // SLOW ...
+ XRefStreamList.insert(XRefStreamList.begin(),
+ pDict->GetIntegerBy("XRefStm"));
+ m_Trailers.Add(pDict.release());
+ }
+
+ for (size_t i = 0; i < CrossRefList.size(); ++i) {
+ if (!LoadCrossRefV4(CrossRefList[i], XRefStreamList[i], FALSE))
+ return FALSE;
+ }
+ return TRUE;
+}
+
+FX_BOOL CPDF_Parser::LoadLinearizedAllCrossRefV4(FX_FILESIZE xrefpos,
+ FX_DWORD dwObjCount) {
+ if (!LoadLinearizedCrossRefV4(xrefpos, dwObjCount))
+ return FALSE;
+
+ m_pTrailer = LoadTrailerV4();
+ if (!m_pTrailer)
+ return FALSE;
+
+ int32_t xrefsize = GetDirectInteger(m_pTrailer, "Size");
+ if (xrefsize == 0)
+ return FALSE;
+
+ std::vector<FX_FILESIZE> CrossRefList;
+ std::vector<FX_FILESIZE> XRefStreamList;
+ std::set<FX_FILESIZE> seen_xrefpos;
+
+ CrossRefList.push_back(xrefpos);
+ XRefStreamList.push_back(GetDirectInteger(m_pTrailer, "XRefStm"));
+ seen_xrefpos.insert(xrefpos);
+
+ xrefpos = GetDirectInteger(m_pTrailer, "Prev");
+ while (xrefpos) {
+ // Check for circular references.
+ if (pdfium::ContainsKey(seen_xrefpos, xrefpos))
+ return FALSE;
+
+ seen_xrefpos.insert(xrefpos);
+
+ // SLOW ...
+ CrossRefList.insert(CrossRefList.begin(), xrefpos);
+ LoadCrossRefV4(xrefpos, 0, TRUE);
+
+ std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict(
+ LoadTrailerV4());
+ if (!pDict)
+ return FALSE;
+
+ xrefpos = GetDirectInteger(pDict.get(), "Prev");
+
+ // SLOW ...
+ XRefStreamList.insert(XRefStreamList.begin(),
+ pDict->GetIntegerBy("XRefStm"));
+ m_Trailers.Add(pDict.release());
+ }
+
+ for (size_t i = 1; i < CrossRefList.size(); ++i) {
+ if (!LoadCrossRefV4(CrossRefList[i], XRefStreamList[i], FALSE))
+ return FALSE;
+ }
+ return TRUE;
+}
+
+FX_BOOL CPDF_Parser::LoadLinearizedCrossRefV4(FX_FILESIZE pos,
+ FX_DWORD dwObjCount) {
+ FX_FILESIZE dwStartPos = pos - m_pSyntax->m_HeaderOffset;
+
+ m_pSyntax->RestorePos(dwStartPos);
+ m_SortedOffset.insert(pos);
+
+ FX_DWORD start_objnum = 0;
+ FX_DWORD count = dwObjCount;
+ FX_FILESIZE SavedPos = m_pSyntax->SavePos();
+
+ const int32_t recordsize = 20;
+ std::vector<char> buf(1024 * recordsize + 1);
+ buf[1024 * recordsize] = '\0';
+
+ int32_t nBlocks = count / 1024 + 1;
+ for (int32_t block = 0; block < nBlocks; block++) {
+ int32_t block_size = block == nBlocks - 1 ? count % 1024 : 1024;
+ FX_DWORD dwReadSize = block_size * recordsize;
+ if ((FX_FILESIZE)(dwStartPos + dwReadSize) > m_pSyntax->m_FileLen)
+ return FALSE;
+
+ if (!m_pSyntax->ReadBlock(reinterpret_cast<uint8_t*>(buf.data()),
+ dwReadSize)) {
+ return FALSE;
+ }
+
+ for (int32_t i = 0; i < block_size; i++) {
+ FX_DWORD objnum = start_objnum + block * 1024 + i;
+ char* pEntry = &buf[i * recordsize];
+ if (pEntry[17] == 'f') {
+ m_ObjectInfo[objnum].pos = 0;
+ m_ObjectInfo[objnum].type = 0;
+ } else {
+ int32_t offset = FXSYS_atoi(pEntry);
+ if (offset == 0) {
+ for (int32_t c = 0; c < 10; c++) {
+ if (!std::isdigit(pEntry[c]))
+ return FALSE;
+ }
+ }
+
+ m_ObjectInfo[objnum].pos = offset;
+ int32_t version = FXSYS_atoi(pEntry + 11);
+ if (version >= 1)
+ m_bVersionUpdated = TRUE;
+
+ m_ObjectInfo[objnum].gennum = version;
+ if (m_ObjectInfo[objnum].pos < m_pSyntax->m_FileLen)
+ m_SortedOffset.insert(m_ObjectInfo[objnum].pos);
+
+ m_ObjectInfo[objnum].type = 1;
+ }
+ }
+ }
+ m_pSyntax->RestorePos(SavedPos + count * recordsize);
+ return TRUE;
+}
+
+bool CPDF_Parser::LoadCrossRefV4(FX_FILESIZE pos,
+ FX_FILESIZE streampos,
+ FX_BOOL bSkip) {
+ m_pSyntax->RestorePos(pos);
+ if (m_pSyntax->GetKeyword() != "xref")
+ return false;
+
+ m_SortedOffset.insert(pos);
+ if (streampos)
+ m_SortedOffset.insert(streampos);
+
+ while (1) {
+ FX_FILESIZE SavedPos = m_pSyntax->SavePos();
+ bool bIsNumber;
+ CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber);
+ if (word.IsEmpty())
+ return false;
+
+ if (!bIsNumber) {
+ m_pSyntax->RestorePos(SavedPos);
+ break;
+ }
+
+ FX_DWORD start_objnum = FXSYS_atoui(word);
+ if (start_objnum >= kMaxObjectNumber)
+ return false;
+
+ FX_DWORD count = m_pSyntax->GetDirectNum();
+ m_pSyntax->ToNextWord();
+ SavedPos = m_pSyntax->SavePos();
+ const int32_t recordsize = 20;
+
+ m_dwXrefStartObjNum = start_objnum;
+ if (!bSkip) {
+ std::vector<char> buf(1024 * recordsize + 1);
+ buf[1024 * recordsize] = '\0';
+
+ int32_t nBlocks = count / 1024 + 1;
+ for (int32_t block = 0; block < nBlocks; block++) {
+ int32_t block_size = block == nBlocks - 1 ? count % 1024 : 1024;
+ m_pSyntax->ReadBlock(reinterpret_cast<uint8_t*>(buf.data()),
+ block_size * recordsize);
+
+ for (int32_t i = 0; i < block_size; i++) {
+ FX_DWORD objnum = start_objnum + block * 1024 + i;
+ char* pEntry = &buf[i * recordsize];
+ if (pEntry[17] == 'f') {
+ m_ObjectInfo[objnum].pos = 0;
+ m_ObjectInfo[objnum].type = 0;
+ } else {
+ FX_FILESIZE offset = (FX_FILESIZE)FXSYS_atoi64(pEntry);
+ if (offset == 0) {
+ for (int32_t c = 0; c < 10; c++) {
+ if (!std::isdigit(pEntry[c]))
+ return false;
+ }
+ }
+
+ m_ObjectInfo[objnum].pos = offset;
+ int32_t version = FXSYS_atoi(pEntry + 11);
+ if (version >= 1)
+ m_bVersionUpdated = TRUE;
+
+ m_ObjectInfo[objnum].gennum = version;
+ if (m_ObjectInfo[objnum].pos < m_pSyntax->m_FileLen)
+ m_SortedOffset.insert(m_ObjectInfo[objnum].pos);
+
+ m_ObjectInfo[objnum].type = 1;
+ }
+ }
+ }
+ }
+ m_pSyntax->RestorePos(SavedPos + count * recordsize);
+ }
+ return !streampos || LoadCrossRefV5(&streampos, FALSE);
+}
+
+FX_BOOL CPDF_Parser::LoadAllCrossRefV5(FX_FILESIZE xrefpos) {
+ if (!LoadCrossRefV5(&xrefpos, TRUE))
+ return FALSE;
+
+ std::set<FX_FILESIZE> seen_xrefpos;
+ while (xrefpos) {
+ seen_xrefpos.insert(xrefpos);
+ if (!LoadCrossRefV5(&xrefpos, FALSE))
+ return FALSE;
+
+ // Check for circular references.
+ if (pdfium::ContainsKey(seen_xrefpos, xrefpos))
+ return FALSE;
+ }
+ m_ObjectStreamMap.clear();
+ m_bXRefStream = TRUE;
+ return TRUE;
+}
+
+FX_BOOL CPDF_Parser::RebuildCrossRef() {
+ m_ObjectInfo.clear();
+ m_SortedOffset.clear();
+ if (m_pTrailer) {
+ m_pTrailer->Release();
+ m_pTrailer = nullptr;
+ }
+
+ ParserState state = ParserState::kDefault;
+
+ int32_t inside_index = 0;
+ FX_DWORD objnum = 0;
+ FX_DWORD gennum = 0;
+ int32_t depth = 0;
+
+ const FX_DWORD kBufferSize = 4096;
+ std::vector<uint8_t> buffer(kBufferSize);
+
+ FX_FILESIZE pos = m_pSyntax->m_HeaderOffset;
+ FX_FILESIZE start_pos = 0;
+ FX_FILESIZE start_pos1 = 0;
+ FX_FILESIZE last_obj = -1;
+ FX_FILESIZE last_xref = -1;
+ FX_FILESIZE last_trailer = -1;
+
+ while (pos < m_pSyntax->m_FileLen) {
+ const FX_FILESIZE saved_pos = pos;
+ bool bOverFlow = false;
+ FX_DWORD size =
+ std::min((FX_DWORD)(m_pSyntax->m_FileLen - pos), kBufferSize);
+ if (!m_pSyntax->m_pFileAccess->ReadBlock(buffer.data(), pos, size))
+ break;
+
+ for (FX_DWORD i = 0; i < size; i++) {
+ uint8_t byte = buffer[i];
+ switch (state) {
+ case ParserState::kDefault:
+ if (PDFCharIsWhitespace(byte)) {
+ state = ParserState::kWhitespace;
+ } else if (std::isdigit(byte)) {
+ --i;
+ state = ParserState::kWhitespace;
+ } else if (byte == '%') {
+ inside_index = 0;
+ state = ParserState::kComment;
+ } else if (byte == '(') {
+ state = ParserState::kString;
+ depth = 1;
+ } else if (byte == '<') {
+ inside_index = 1;
+ state = ParserState::kHexString;
+ } else if (byte == '\\') {
+ state = ParserState::kEscapedString;
+ } else if (byte == 't') {
+ state = ParserState::kTrailer;
+ inside_index = 1;
+ }
+ break;
+
+ case ParserState::kWhitespace:
+ if (std::isdigit(byte)) {
+ start_pos = pos + i;
+ state = ParserState::kObjNum;
+ objnum = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte));
+ } else if (byte == 't') {
+ state = ParserState::kTrailer;
+ inside_index = 1;
+ } else if (byte == 'x') {
+ state = ParserState::kXref;
+ inside_index = 1;
+ } else if (!PDFCharIsWhitespace(byte)) {
+ --i;
+ state = ParserState::kDefault;
+ }
+ break;
+
+ case ParserState::kObjNum:
+ if (std::isdigit(byte)) {
+ objnum =
+ objnum * 10 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte));
+ } else if (PDFCharIsWhitespace(byte)) {
+ state = ParserState::kPostObjNum;
+ } else {
+ --i;
+ state = ParserState::kEndObj;
+ inside_index = 0;
+ }
+ break;
+
+ case ParserState::kPostObjNum:
+ if (std::isdigit(byte)) {
+ start_pos1 = pos + i;
+ state = ParserState::kGenNum;
+ gennum = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte));
+ } else if (byte == 't') {
+ state = ParserState::kTrailer;
+ inside_index = 1;
+ } else if (!PDFCharIsWhitespace(byte)) {
+ --i;
+ state = ParserState::kDefault;
+ }
+ break;
+
+ case ParserState::kGenNum:
+ if (std::isdigit(byte)) {
+ gennum =
+ gennum * 10 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte));
+ } else if (PDFCharIsWhitespace(byte)) {
+ state = ParserState::kPostGenNum;
+ } else {
+ --i;
+ state = ParserState::kDefault;
+ }
+ break;
+
+ case ParserState::kPostGenNum:
+ if (byte == 'o') {
+ state = ParserState::kBeginObj;
+ inside_index = 1;
+ } else if (std::isdigit(byte)) {
+ objnum = gennum;
+ gennum = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte));
+ start_pos = start_pos1;
+ start_pos1 = pos + i;
+ state = ParserState::kGenNum;
+ } else if (byte == 't') {
+ state = ParserState::kTrailer;
+ inside_index = 1;
+ } else if (!PDFCharIsWhitespace(byte)) {
+ --i;
+ state = ParserState::kDefault;
+ }
+ break;
+
+ case ParserState::kBeginObj:
+ switch (inside_index) {
+ case 1:
+ if (byte != 'b') {
+ --i;
+ state = ParserState::kDefault;
+ } else {
+ inside_index++;
+ }
+ break;
+ case 2:
+ if (byte != 'j') {
+ --i;
+ state = ParserState::kDefault;
+ } else {
+ inside_index++;
+ }
+ break;
+ case 3:
+ if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) {
+ FX_FILESIZE obj_pos = start_pos - m_pSyntax->m_HeaderOffset;
+ m_SortedOffset.insert(obj_pos);
+ last_obj = start_pos;
+ FX_FILESIZE obj_end = 0;
+ CPDF_Object* pObject = ParseIndirectObjectAtByStrict(
+ m_pDocument, obj_pos, objnum, &obj_end);
+ if (CPDF_Stream* pStream = ToStream(pObject)) {
+ if (CPDF_Dictionary* pDict = pStream->GetDict()) {
+ if ((pDict->KeyExist("Type")) &&
+ (pDict->GetStringBy("Type") == "XRef" &&
+ pDict->KeyExist("Size"))) {
+ CPDF_Object* pRoot = pDict->GetElement("Root");
+ if (pRoot && pRoot->GetDict() &&
+ pRoot->GetDict()->GetElement("Pages")) {
+ if (m_pTrailer)
+ m_pTrailer->Release();
+ m_pTrailer = ToDictionary(pDict->Clone());
+ }
+ }
+ }
+ }
+
+ FX_FILESIZE offset = 0;
+ m_pSyntax->RestorePos(obj_pos);
+ offset = m_pSyntax->FindTag("obj", 0);
+ if (offset == -1)
+ offset = 0;
+ else
+ offset += 3;
+
+ FX_FILESIZE nLen = obj_end - obj_pos - offset;
+ if ((FX_DWORD)nLen > size - i) {
+ pos = obj_end + m_pSyntax->m_HeaderOffset;
+ bOverFlow = true;
+ } else {
+ i += (FX_DWORD)nLen;
+ }
+
+ if (!m_ObjectInfo.empty() && IsValidObjectNumber(objnum) &&
+ m_ObjectInfo[objnum].pos) {
+ if (pObject) {
+ FX_DWORD oldgen = GetObjectGenNum(objnum);
+ m_ObjectInfo[objnum].pos = obj_pos;
+ m_ObjectInfo[objnum].gennum = gennum;
+ if (oldgen != gennum)
+ m_bVersionUpdated = TRUE;
+ }
+ } else {
+ m_ObjectInfo[objnum].pos = obj_pos;
+ m_ObjectInfo[objnum].type = 1;
+ m_ObjectInfo[objnum].gennum = gennum;
+ }
+
+ if (pObject)
+ pObject->Release();
+ }
+ --i;
+ state = ParserState::kDefault;
+ break;
+ }
+ break;
+
+ case ParserState::kTrailer:
+ if (inside_index == 7) {
+ if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) {
+ last_trailer = pos + i - 7;
+ m_pSyntax->RestorePos(pos + i - m_pSyntax->m_HeaderOffset);
+
+ CPDF_Object* pObj = m_pSyntax->GetObject(m_pDocument, 0, 0, true);
+ if (pObj) {
+ if (!pObj->IsDictionary() && !pObj->AsStream()) {
+ pObj->Release();
+ } else {
+ CPDF_Stream* pStream = pObj->AsStream();
+ if (CPDF_Dictionary* pTrailer =
+ pStream ? pStream->GetDict() : pObj->AsDictionary()) {
+ if (m_pTrailer) {
+ CPDF_Object* pRoot = pTrailer->GetElement("Root");
+ CPDF_Reference* pRef = ToReference(pRoot);
+ if (!pRoot ||
+ (pRef && IsValidObjectNumber(pRef->GetRefObjNum()) &&
+ m_ObjectInfo[pRef->GetRefObjNum()].pos != 0)) {
+ auto it = pTrailer->begin();
+ while (it != pTrailer->end()) {
+ const CFX_ByteString& key = it->first;
+ CPDF_Object* pElement = it->second;
+ ++it;
+ FX_DWORD dwObjNum =
+ pElement ? pElement->GetObjNum() : 0;
+ if (dwObjNum) {
+ m_pTrailer->SetAtReference(key, m_pDocument,
+ dwObjNum);
+ } else {
+ m_pTrailer->SetAt(key, pElement->Clone());
+ }
+ }
+ }
+ pObj->Release();
+ } else {
+ if (pObj->IsStream()) {
+ m_pTrailer = ToDictionary(pTrailer->Clone());
+ pObj->Release();
+ } else {
+ m_pTrailer = pTrailer;
+ }
+
+ FX_FILESIZE dwSavePos = m_pSyntax->SavePos();
+ CFX_ByteString strWord = m_pSyntax->GetKeyword();
+ if (!strWord.Compare("startxref")) {
+ bool bNumber;
+ CFX_ByteString bsOffset =
+ m_pSyntax->GetNextWord(&bNumber);
+ if (bNumber)
+ m_LastXRefOffset = FXSYS_atoi(bsOffset);
+ }
+ m_pSyntax->RestorePos(dwSavePos);
+ }
+ } else {
+ pObj->Release();
+ }
+ }
+ }
+ }
+ --i;
+ state = ParserState::kDefault;
+ } else if (byte == "trailer"[inside_index]) {
+ inside_index++;
+ } else {
+ --i;
+ state = ParserState::kDefault;
+ }
+ break;
+
+ case ParserState::kXref:
+ if (inside_index == 4) {
+ last_xref = pos + i - 4;
+ state = ParserState::kWhitespace;
+ } else if (byte == "xref"[inside_index]) {
+ inside_index++;
+ } else {
+ --i;
+ state = ParserState::kDefault;
+ }
+ break;
+
+ case ParserState::kComment:
+ if (PDFCharIsLineEnding(byte))
+ state = ParserState::kDefault;
+ break;
+
+ case ParserState::kString:
+ if (byte == ')') {
+ if (depth > 0)
+ depth--;
+ } else if (byte == '(') {
+ depth++;
+ }
+
+ if (!depth)
+ state = ParserState::kDefault;
+ break;
+
+ case ParserState::kHexString:
+ if (byte == '>' || (byte == '<' && inside_index == 1))
+ state = ParserState::kDefault;
+ inside_index = 0;
+ break;
+
+ case ParserState::kEscapedString:
+ if (PDFCharIsDelimiter(byte) || PDFCharIsWhitespace(byte)) {
+ --i;
+ state = ParserState::kDefault;
+ }
+ break;
+
+ case ParserState::kEndObj:
+ if (PDFCharIsWhitespace(byte)) {
+ state = ParserState::kDefault;
+ } else if (byte == '%' || byte == '(' || byte == '<' ||
+ byte == '\\') {
+ state = ParserState::kDefault;
+ --i;
+ } else if (inside_index == 6) {
+ state = ParserState::kDefault;
+ --i;
+ } else if (byte == "endobj"[inside_index]) {
+ inside_index++;
+ }
+ break;
+ }
+
+ if (bOverFlow) {
+ size = 0;
+ break;
+ }
+ }
+ pos += size;
+
+ // If the position has not changed at all in a loop iteration, then break
+ // out to prevent infinite looping.
+ if (pos == saved_pos)
+ break;
+ }
+
+ if (last_xref != -1 && last_xref > last_obj)
+ last_trailer = last_xref;
+ else if (last_trailer == -1 || last_xref < last_obj)
+ last_trailer = m_pSyntax->m_FileLen;
+
+ m_SortedOffset.insert(last_trailer - m_pSyntax->m_HeaderOffset);
+ return m_pTrailer && !m_ObjectInfo.empty();
+}
+
+FX_BOOL CPDF_Parser::LoadCrossRefV5(FX_FILESIZE* pos, FX_BOOL bMainXRef) {
+ CPDF_Object* pObject = ParseIndirectObjectAt(m_pDocument, *pos, 0);
+ if (!pObject)
+ return FALSE;
+
+ if (m_pDocument) {
+ FX_BOOL bInserted = FALSE;
+ CPDF_Dictionary* pDict = m_pDocument->GetRoot();
+ if (!pDict || pDict->GetObjNum() != pObject->m_ObjNum) {
+ bInserted = m_pDocument->InsertIndirectObject(pObject->m_ObjNum, pObject);
+ } else {
+ if (pObject->IsStream())
+ pObject->Release();
+ }
+
+ if (!bInserted)
+ return FALSE;
+ }
+
+ CPDF_Stream* pStream = pObject->AsStream();
+ if (!pStream)
+ return FALSE;
+
+ *pos = pStream->GetDict()->GetIntegerBy("Prev");
+ int32_t size = pStream->GetDict()->GetIntegerBy("Size");
+ if (size < 0) {
+ pStream->Release();
+ return FALSE;
+ }
+
+ if (bMainXRef) {
+ m_pTrailer = ToDictionary(pStream->GetDict()->Clone());
+ ShrinkObjectMap(size);
+ for (auto& it : m_ObjectInfo)
+ it.second.type = 0;
+ } else {
+ m_Trailers.Add(ToDictionary(pStream->GetDict()->Clone()));
+ }
+
+ std::vector<std::pair<int32_t, int32_t>> arrIndex;
+ CPDF_Array* pArray = pStream->GetDict()->GetArrayBy("Index");
+ if (pArray) {
+ FX_DWORD nPairSize = pArray->GetCount() / 2;
+ for (FX_DWORD i = 0; i < nPairSize; i++) {
+ CPDF_Object* pStartNumObj = pArray->GetElement(i * 2);
+ CPDF_Object* pCountObj = pArray->GetElement(i * 2 + 1);
+
+ if (ToNumber(pStartNumObj) && ToNumber(pCountObj)) {
+ int nStartNum = pStartNumObj->GetInteger();
+ int nCount = pCountObj->GetInteger();
+ if (nStartNum >= 0 && nCount > 0)
+ arrIndex.push_back(std::make_pair(nStartNum, nCount));
+ }
+ }
+ }
+
+ if (arrIndex.size() == 0)
+ arrIndex.push_back(std::make_pair(0, size));
+
+ pArray = pStream->GetDict()->GetArrayBy("W");
+ if (!pArray) {
+ pStream->Release();
+ return FALSE;
+ }
+
+ CFX_DWordArray WidthArray;
+ FX_SAFE_DWORD dwAccWidth = 0;
+ for (FX_DWORD i = 0; i < pArray->GetCount(); i++) {
+ WidthArray.Add(pArray->GetIntegerAt(i));
+ dwAccWidth += WidthArray[i];
+ }
+
+ if (!dwAccWidth.IsValid() || WidthArray.GetSize() < 3) {
+ pStream->Release();
+ return FALSE;
+ }
+
+ FX_DWORD totalWidth = dwAccWidth.ValueOrDie();
+ CPDF_StreamAcc acc;
+ acc.LoadAllData(pStream);
+
+ const uint8_t* pData = acc.GetData();
+ FX_DWORD dwTotalSize = acc.GetSize();
+ FX_DWORD segindex = 0;
+ for (FX_DWORD i = 0; i < arrIndex.size(); i++) {
+ int32_t startnum = arrIndex[i].first;
+ if (startnum < 0)
+ continue;
+
+ m_dwXrefStartObjNum =
+ pdfium::base::checked_cast<FX_DWORD, int32_t>(startnum);
+ FX_DWORD count =
+ pdfium::base::checked_cast<FX_DWORD, int32_t>(arrIndex[i].second);
+ FX_SAFE_DWORD dwCaculatedSize = segindex;
+ dwCaculatedSize += count;
+ dwCaculatedSize *= totalWidth;
+ if (!dwCaculatedSize.IsValid() ||
+ dwCaculatedSize.ValueOrDie() > dwTotalSize) {
+ continue;
+ }
+
+ const uint8_t* segstart = pData + segindex * totalWidth;
+ FX_SAFE_DWORD dwMaxObjNum = startnum;
+ dwMaxObjNum += count;
+ FX_DWORD dwV5Size = m_ObjectInfo.empty() ? 0 : GetLastObjNum() + 1;
+ if (!dwMaxObjNum.IsValid() || dwMaxObjNum.ValueOrDie() > dwV5Size)
+ continue;
+
+ for (FX_DWORD j = 0; j < count; j++) {
+ int32_t type = 1;
+ const uint8_t* entrystart = segstart + j * totalWidth;
+ if (WidthArray[0])
+ type = GetVarInt(entrystart, WidthArray[0]);
+
+ if (GetObjectType(startnum + j) == 255) {
+ FX_FILESIZE offset =
+ GetVarInt(entrystart + WidthArray[0], WidthArray[1]);
+ m_ObjectInfo[startnum + j].pos = offset;
+ m_SortedOffset.insert(offset);
+ continue;
+ }
+
+ if (GetObjectType(startnum + j))
+ continue;
+
+ m_ObjectInfo[startnum + j].type = type;
+ if (type == 0) {
+ m_ObjectInfo[startnum + j].pos = 0;
+ } else {
+ FX_FILESIZE offset =
+ GetVarInt(entrystart + WidthArray[0], WidthArray[1]);
+ m_ObjectInfo[startnum + j].pos = offset;
+ if (type == 1) {
+ m_SortedOffset.insert(offset);
+ } else {
+ if (offset < 0 || !IsValidObjectNumber(offset)) {
+ pStream->Release();
+ return FALSE;
+ }
+ m_ObjectInfo[offset].type = 255;
+ }
+ }
+ }
+ segindex += count;
+ }
+ pStream->Release();
+ return TRUE;
+}
+
+CPDF_Array* CPDF_Parser::GetIDArray() {
+ CPDF_Object* pID = m_pTrailer ? m_pTrailer->GetElement("ID") : nullptr;
+ if (!pID)
+ return nullptr;
+
+ if (CPDF_Reference* pRef = pID->AsReference()) {
+ pID = ParseIndirectObject(nullptr, pRef->GetRefObjNum());
+ m_pTrailer->SetAt("ID", pID);
+ }
+ return ToArray(pID);
+}
+
+FX_DWORD CPDF_Parser::GetRootObjNum() {
+ CPDF_Reference* pRef =
+ ToReference(m_pTrailer ? m_pTrailer->GetElement("Root") : nullptr);
+ return pRef ? pRef->GetRefObjNum() : 0;
+}
+
+FX_DWORD CPDF_Parser::GetInfoObjNum() {
+ CPDF_Reference* pRef =
+ ToReference(m_pTrailer ? m_pTrailer->GetElement("Info") : nullptr);
+ return pRef ? pRef->GetRefObjNum() : 0;
+}
+
+FX_BOOL CPDF_Parser::IsFormStream(FX_DWORD objnum, FX_BOOL& bForm) {
+ bForm = FALSE;
+ if (!IsValidObjectNumber(objnum))
+ return TRUE;
+
+ if (GetObjectType(objnum) == 0)
+ return TRUE;
+
+ if (GetObjectType(objnum) == 2)
+ return TRUE;
+
+ FX_FILESIZE pos = m_ObjectInfo[objnum].pos;
+ auto it = m_SortedOffset.find(pos);
+ if (it == m_SortedOffset.end())
+ return TRUE;
+
+ if (++it == m_SortedOffset.end())
+ return FALSE;
+
+ FX_FILESIZE size = *it - pos;
+ FX_FILESIZE SavedPos = m_pSyntax->SavePos();
+ m_pSyntax->RestorePos(pos);
+
+ const char kFormStream[] = "/Form\0stream";
+ const CFX_ByteStringC kFormStreamStr(kFormStream, sizeof(kFormStream) - 1);
+ bForm = m_pSyntax->SearchMultiWord(kFormStreamStr, TRUE, size) == 0;
+ m_pSyntax->RestorePos(SavedPos);
+ return TRUE;
+}
+
+CPDF_Object* CPDF_Parser::ParseIndirectObject(
+ CPDF_IndirectObjectHolder* pObjList,
+ FX_DWORD objnum) {
+ if (!IsValidObjectNumber(objnum))
+ return nullptr;
+
+ // Prevent circular parsing the same object.
+ if (pdfium::ContainsKey(m_ParsingObjNums, objnum))
+ return nullptr;
+ ScopedSetInsertion<FX_DWORD> local_insert(&m_ParsingObjNums, objnum);
+
+ if (GetObjectType(objnum) == 1 || GetObjectType(objnum) == 255) {
+ FX_FILESIZE pos = m_ObjectInfo[objnum].pos;
+ if (pos <= 0)
+ return nullptr;
+ return ParseIndirectObjectAt(pObjList, pos, objnum);
+ }
+ if (GetObjectType(objnum) != 2)
+ return nullptr;
+
+ CPDF_StreamAcc* pObjStream = GetObjectStream(m_ObjectInfo[objnum].pos);
+ if (!pObjStream)
+ return nullptr;
+
+ ScopedFileStream file(FX_CreateMemoryStream(
+ (uint8_t*)pObjStream->GetData(), (size_t)pObjStream->GetSize(), FALSE));
+ CPDF_SyntaxParser syntax;
+ syntax.InitParser(file.get(), 0);
+ const int32_t offset = GetStreamFirst(pObjStream);
+
+ // Read object numbers from |pObjStream| into a cache.
+ if (!pdfium::ContainsKey(m_ObjCache, pObjStream)) {
+ for (int32_t i = GetStreamNCount(pObjStream); i > 0; --i) {
+ FX_DWORD thisnum = syntax.GetDirectNum();
+ FX_DWORD thisoff = syntax.GetDirectNum();
+ m_ObjCache[pObjStream][thisnum] = thisoff;
+ }
+ }
+
+ const auto it = m_ObjCache[pObjStream].find(objnum);
+ if (it == m_ObjCache[pObjStream].end())
+ return nullptr;
+
+ syntax.RestorePos(offset + it->second);
+ return syntax.GetObject(pObjList, 0, 0, true);
+}
+
+CPDF_StreamAcc* CPDF_Parser::GetObjectStream(FX_DWORD objnum) {
+ auto it = m_ObjectStreamMap.find(objnum);
+ if (it != m_ObjectStreamMap.end())
+ return it->second.get();
+
+ if (!m_pDocument)
+ return nullptr;
+
+ const CPDF_Stream* pStream = ToStream(m_pDocument->GetIndirectObject(objnum));
+ if (!pStream)
+ return nullptr;
+
+ CPDF_StreamAcc* pStreamAcc = new CPDF_StreamAcc;
+ pStreamAcc->LoadAllData(pStream);
+ m_ObjectStreamMap[objnum].reset(pStreamAcc);
+ return pStreamAcc;
+}
+
+FX_FILESIZE CPDF_Parser::GetObjectSize(FX_DWORD objnum) const {
+ if (!IsValidObjectNumber(objnum))
+ return 0;
+
+ if (GetObjectType(objnum) == 2)
+ objnum = GetObjectPositionOrZero(objnum);
+
+ if (GetObjectType(objnum) != 1 && GetObjectType(objnum) != 255)
+ return 0;
+
+ FX_FILESIZE offset = GetObjectPositionOrZero(objnum);
+ if (offset == 0)
+ return 0;
+
+ auto it = m_SortedOffset.find(offset);
+ if (it == m_SortedOffset.end() || ++it == m_SortedOffset.end())
+ return 0;
+
+ return *it - offset;
+}
+
+void CPDF_Parser::GetIndirectBinary(FX_DWORD objnum,
+ uint8_t*& pBuffer,
+ FX_DWORD& size) {
+ pBuffer = nullptr;
+ size = 0;
+ if (!IsValidObjectNumber(objnum))
+ return;
+
+ if (GetObjectType(objnum) == 2) {
+ CPDF_StreamAcc* pObjStream = GetObjectStream(m_ObjectInfo[objnum].pos);
+ if (!pObjStream)
+ return;
+
+ int32_t offset = GetStreamFirst(pObjStream);
+ const uint8_t* pData = pObjStream->GetData();
+ FX_DWORD totalsize = pObjStream->GetSize();
+ ScopedFileStream file(
+ FX_CreateMemoryStream((uint8_t*)pData, (size_t)totalsize, FALSE));
+
+ CPDF_SyntaxParser syntax;
+ syntax.InitParser(file.get(), 0);
+ for (int i = GetStreamNCount(pObjStream); i > 0; --i) {
+ FX_DWORD thisnum = syntax.GetDirectNum();
+ FX_DWORD thisoff = syntax.GetDirectNum();
+ if (thisnum != objnum)
+ continue;
+
+ if (i == 1) {
+ size = totalsize - (thisoff + offset);
+ } else {
+ syntax.GetDirectNum(); // Skip nextnum.
+ FX_DWORD nextoff = syntax.GetDirectNum();
+ size = nextoff - thisoff;
+ }
+
+ pBuffer = FX_Alloc(uint8_t, size);
+ FXSYS_memcpy(pBuffer, pData + thisoff + offset, size);
+ return;
+ }
+ return;
+ }
+
+ if (GetObjectType(objnum) != 1)
+ return;
+
+ FX_FILESIZE pos = m_ObjectInfo[objnum].pos;
+ if (pos == 0)
+ return;
+
+ FX_FILESIZE SavedPos = m_pSyntax->SavePos();
+ m_pSyntax->RestorePos(pos);
+
+ bool bIsNumber;
+ CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber);
+ if (!bIsNumber) {
+ m_pSyntax->RestorePos(SavedPos);
+ return;
+ }
+
+ FX_DWORD parser_objnum = FXSYS_atoui(word);
+ if (parser_objnum && parser_objnum != objnum) {
+ m_pSyntax->RestorePos(SavedPos);
+ return;
+ }
+
+ word = m_pSyntax->GetNextWord(&bIsNumber);
+ if (!bIsNumber) {
+ m_pSyntax->RestorePos(SavedPos);
+ return;
+ }
+
+ if (m_pSyntax->GetKeyword() != "obj") {
+ m_pSyntax->RestorePos(SavedPos);
+ return;
+ }
+
+ auto it = m_SortedOffset.find(pos);
+ if (it == m_SortedOffset.end() || ++it == m_SortedOffset.end()) {
+ m_pSyntax->RestorePos(SavedPos);
+ return;
+ }
+
+ FX_FILESIZE nextoff = *it;
+ FX_BOOL bNextOffValid = FALSE;
+ if (nextoff != pos) {
+ m_pSyntax->RestorePos(nextoff);
+ word = m_pSyntax->GetNextWord(&bIsNumber);
+ if (word == "xref") {
+ bNextOffValid = TRUE;
+ } else if (bIsNumber) {
+ word = m_pSyntax->GetNextWord(&bIsNumber);
+ if (bIsNumber && m_pSyntax->GetKeyword() == "obj") {
+ bNextOffValid = TRUE;
+ }
+ }
+ }
+
+ if (!bNextOffValid) {
+ m_pSyntax->RestorePos(pos);
+ while (1) {
+ if (m_pSyntax->GetKeyword() == "endobj")
+ break;
+
+ if (m_pSyntax->SavePos() == m_pSyntax->m_FileLen)
+ break;
+ }
+ nextoff = m_pSyntax->SavePos();
+ }
+
+ size = (FX_DWORD)(nextoff - pos);
+ pBuffer = FX_Alloc(uint8_t, size);
+ m_pSyntax->RestorePos(pos);
+ m_pSyntax->ReadBlock(pBuffer, size);
+ m_pSyntax->RestorePos(SavedPos);
+}
+
+CPDF_Object* CPDF_Parser::ParseIndirectObjectAt(
+ CPDF_IndirectObjectHolder* pObjList,
+ FX_FILESIZE pos,
+ FX_DWORD objnum) {
+ FX_FILESIZE SavedPos = m_pSyntax->SavePos();
+ m_pSyntax->RestorePos(pos);
+ bool bIsNumber;
+ CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber);
+ if (!bIsNumber) {
+ m_pSyntax->RestorePos(SavedPos);
+ return nullptr;
+ }
+
+ FX_FILESIZE objOffset = m_pSyntax->SavePos();
+ objOffset -= word.GetLength();
+ FX_DWORD parser_objnum = FXSYS_atoui(word);
+ if (objnum && parser_objnum != objnum) {
+ m_pSyntax->RestorePos(SavedPos);
+ return nullptr;
+ }
+
+ word = m_pSyntax->GetNextWord(&bIsNumber);
+ if (!bIsNumber) {
+ m_pSyntax->RestorePos(SavedPos);
+ return nullptr;
+ }
+
+ FX_DWORD parser_gennum = FXSYS_atoui(word);
+ if (m_pSyntax->GetKeyword() != "obj") {
+ m_pSyntax->RestorePos(SavedPos);
+ return nullptr;
+ }
+
+ CPDF_Object* pObj =
+ m_pSyntax->GetObject(pObjList, objnum, parser_gennum, true);
+ m_pSyntax->SavePos();
+
+ CFX_ByteString bsWord = m_pSyntax->GetKeyword();
+ if (bsWord == "endobj")
+ m_pSyntax->SavePos();
+
+ m_pSyntax->RestorePos(SavedPos);
+ if (pObj) {
+ if (!objnum)
+ pObj->m_ObjNum = parser_objnum;
+ pObj->m_GenNum = parser_gennum;
+ }
+ return pObj;
+}
+
+CPDF_Object* CPDF_Parser::ParseIndirectObjectAtByStrict(
+ CPDF_IndirectObjectHolder* pObjList,
+ FX_FILESIZE pos,
+ FX_DWORD objnum,
+ FX_FILESIZE* pResultPos) {
+ FX_FILESIZE SavedPos = m_pSyntax->SavePos();
+ m_pSyntax->RestorePos(pos);
+
+ bool bIsNumber;
+ CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber);
+ if (!bIsNumber) {
+ m_pSyntax->RestorePos(SavedPos);
+ return nullptr;
+ }
+
+ FX_DWORD parser_objnum = FXSYS_atoui(word);
+ if (objnum && parser_objnum != objnum) {
+ m_pSyntax->RestorePos(SavedPos);
+ return nullptr;
+ }
+
+ word = m_pSyntax->GetNextWord(&bIsNumber);
+ if (!bIsNumber) {
+ m_pSyntax->RestorePos(SavedPos);
+ return nullptr;
+ }
+
+ FX_DWORD gennum = FXSYS_atoui(word);
+ if (m_pSyntax->GetKeyword() != "obj") {
+ m_pSyntax->RestorePos(SavedPos);
+ return nullptr;
+ }
+
+ CPDF_Object* pObj = m_pSyntax->GetObjectByStrict(pObjList, objnum, gennum);
+ if (pResultPos)
+ *pResultPos = m_pSyntax->m_Pos;
+
+ m_pSyntax->RestorePos(SavedPos);
+ return pObj;
+}
+
+CPDF_Dictionary* CPDF_Parser::LoadTrailerV4() {
+ if (m_pSyntax->GetKeyword() != "trailer")
+ return nullptr;
+
+ std::unique_ptr<CPDF_Object, ReleaseDeleter<CPDF_Object>> pObj(
+ m_pSyntax->GetObject(m_pDocument, 0, 0, true));
+ if (!ToDictionary(pObj.get()))
+ return nullptr;
+ return pObj.release()->AsDictionary();
+}
+
+FX_DWORD CPDF_Parser::GetPermissions(FX_BOOL bCheckRevision) {
+ if (!m_pSecurityHandler)
+ return (FX_DWORD)-1;
+
+ FX_DWORD dwPermission = m_pSecurityHandler->GetPermissions();
+ if (m_pEncryptDict && m_pEncryptDict->GetStringBy("Filter") == "Standard") {
+ dwPermission &= 0xFFFFFFFC;
+ dwPermission |= 0xFFFFF0C0;
+ if (bCheckRevision && m_pEncryptDict->GetIntegerBy("R") == 2)
+ dwPermission &= 0xFFFFF0FF;
+ }
+ return dwPermission;
+}
+
+FX_BOOL CPDF_Parser::IsLinearizedFile(IFX_FileRead* pFileAccess,
+ FX_DWORD offset) {
+ m_pSyntax->InitParser(pFileAccess, offset);
+ m_pSyntax->RestorePos(m_pSyntax->m_HeaderOffset + 9);
+
+ FX_FILESIZE SavedPos = m_pSyntax->SavePos();
+ bool bIsNumber;
+ CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber);
+ if (!bIsNumber)
+ return FALSE;
+
+ FX_DWORD objnum = FXSYS_atoui(word);
+ word = m_pSyntax->GetNextWord(&bIsNumber);
+ if (!bIsNumber)
+ return FALSE;
+
+ FX_DWORD gennum = FXSYS_atoui(word);
+ if (m_pSyntax->GetKeyword() != "obj") {
+ m_pSyntax->RestorePos(SavedPos);
+ return FALSE;
+ }
+
+ m_pLinearized = m_pSyntax->GetObject(nullptr, objnum, gennum, true);
+ if (!m_pLinearized)
+ return FALSE;
+
+ CPDF_Dictionary* pDict = m_pLinearized->GetDict();
+ if (pDict && pDict->GetElement("Linearized")) {
+ m_pSyntax->GetNextWord(nullptr);
+
+ CPDF_Object* pLen = pDict->GetElement("L");
+ if (!pLen) {
+ m_pLinearized->Release();
+ m_pLinearized = nullptr;
+ return FALSE;
+ }
+
+ if (pLen->GetInteger() != (int)pFileAccess->GetSize())
+ return FALSE;
+
+ if (CPDF_Number* pNo = ToNumber(pDict->GetElement("P")))
+ m_dwFirstPageNo = pNo->GetInteger();
+
+ if (CPDF_Number* pTable = ToNumber(pDict->GetElement("T")))
+ m_LastXRefOffset = pTable->GetInteger();
+
+ return TRUE;
+ }
+ m_pLinearized->Release();
+ m_pLinearized = nullptr;
+ return FALSE;
+}
+
+CPDF_Parser::Error CPDF_Parser::StartAsyncParse(IFX_FileRead* pFileAccess) {
+ CloseParser();
+ m_bXRefStream = FALSE;
+ m_LastXRefOffset = 0;
+ m_bOwnFileRead = true;
+
+ int32_t offset = GetHeaderOffset(pFileAccess);
+ if (offset == -1)
+ return FORMAT_ERROR;
+
+ if (!IsLinearizedFile(pFileAccess, offset)) {
+ m_pSyntax->m_pFileAccess = nullptr;
+ return StartParse(pFileAccess);
+ }
+
+ m_pDocument = new CPDF_Document(this);
+ FX_FILESIZE dwFirstXRefOffset = m_pSyntax->SavePos();
+
+ FX_BOOL bXRefRebuilt = FALSE;
+ FX_BOOL bLoadV4 = FALSE;
+ if (!(bLoadV4 = LoadCrossRefV4(dwFirstXRefOffset, 0, FALSE)) &&
+ !LoadCrossRefV5(&dwFirstXRefOffset, TRUE)) {
+ if (!RebuildCrossRef())
+ return FORMAT_ERROR;
+
+ bXRefRebuilt = TRUE;
+ m_LastXRefOffset = 0;
+ }
+
+ if (bLoadV4) {
+ m_pTrailer = LoadTrailerV4();
+ if (!m_pTrailer)
+ return SUCCESS;
+
+ int32_t xrefsize = GetDirectInteger(m_pTrailer, "Size");
+ if (xrefsize > 0)
+ ShrinkObjectMap(xrefsize);
+ }
+
+ Error eRet = SetEncryptHandler();
+ if (eRet != SUCCESS)
+ return eRet;
+
+ m_pDocument->LoadAsynDoc(m_pLinearized->GetDict());
+ if (!m_pDocument->GetRoot() || m_pDocument->GetPageCount() == 0) {
+ if (bXRefRebuilt)
+ return FORMAT_ERROR;
+
+ ReleaseEncryptHandler();
+ if (!RebuildCrossRef())
+ return FORMAT_ERROR;
+
+ eRet = SetEncryptHandler();
+ if (eRet != SUCCESS)
+ return eRet;
+
+ m_pDocument->LoadAsynDoc(m_pLinearized->GetDict());
+ if (!m_pDocument->GetRoot())
+ return FORMAT_ERROR;
+ }
+
+ if (GetRootObjNum() == 0) {
+ ReleaseEncryptHandler();
+ if (!RebuildCrossRef() || GetRootObjNum() == 0)
+ return FORMAT_ERROR;
+
+ eRet = SetEncryptHandler();
+ if (eRet != SUCCESS)
+ return eRet;
+ }
+
+ if (m_pSecurityHandler && m_pSecurityHandler->IsMetadataEncrypted()) {
+ if (CPDF_Reference* pMetadata =
+ ToReference(m_pDocument->GetRoot()->GetElement("Metadata")))
+ m_pSyntax->m_MetadataObjnum = pMetadata->GetRefObjNum();
+ }
+ return SUCCESS;
+}
+
+FX_BOOL CPDF_Parser::LoadLinearizedAllCrossRefV5(FX_FILESIZE xrefpos) {
+ if (!LoadCrossRefV5(&xrefpos, FALSE))
+ return FALSE;
+
+ std::set<FX_FILESIZE> seen_xrefpos;
+ while (xrefpos) {
+ seen_xrefpos.insert(xrefpos);
+ if (!LoadCrossRefV5(&xrefpos, FALSE))
+ return FALSE;
+
+ // Check for circular references.
+ if (pdfium::ContainsKey(seen_xrefpos, xrefpos))
+ return FALSE;
+ }
+ m_ObjectStreamMap.clear();
+ m_bXRefStream = TRUE;
+ return TRUE;
+}
+
+CPDF_Parser::Error CPDF_Parser::LoadLinearizedMainXRefTable() {
+ FX_DWORD dwSaveMetadataObjnum = m_pSyntax->m_MetadataObjnum;
+ m_pSyntax->m_MetadataObjnum = 0;
+ if (m_pTrailer) {
+ m_pTrailer->Release();
+ m_pTrailer = nullptr;
+ }
+
+ m_pSyntax->RestorePos(m_LastXRefOffset - m_pSyntax->m_HeaderOffset);
+ uint8_t ch = 0;
+ FX_DWORD dwCount = 0;
+ m_pSyntax->GetNextChar(ch);
+ while (PDFCharIsWhitespace(ch)) {
+ ++dwCount;
+ if (m_pSyntax->m_FileLen >=
+ (FX_FILESIZE)(m_pSyntax->SavePos() + m_pSyntax->m_HeaderOffset)) {
+ break;
+ }
+ m_pSyntax->GetNextChar(ch);
+ }
+ m_LastXRefOffset += dwCount;
+ m_ObjectStreamMap.clear();
+ m_ObjCache.clear();
+
+ if (!LoadLinearizedAllCrossRefV4(m_LastXRefOffset, m_dwXrefStartObjNum) &&
+ !LoadLinearizedAllCrossRefV5(m_LastXRefOffset)) {
+ m_LastXRefOffset = 0;
+ m_pSyntax->m_MetadataObjnum = dwSaveMetadataObjnum;
+ return FORMAT_ERROR;
+ }
+
+ m_pSyntax->m_MetadataObjnum = dwSaveMetadataObjnum;
+ return SUCCESS;
+}
diff --git a/core/src/fpdfapi/fpdf_parser/cpdf_simple_parser.cpp b/core/src/fpdfapi/fpdf_parser/cpdf_simple_parser.cpp
new file mode 100644
index 0000000000..ad656795f2
--- /dev/null
+++ b/core/src/fpdfapi/fpdf_parser/cpdf_simple_parser.cpp
@@ -0,0 +1,170 @@
+// Copyright 2016 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
+
+#include "core/include/fpdfapi/cpdf_simple_parser.h"
+
+#include "core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.h"
+
+CPDF_SimpleParser::CPDF_SimpleParser(const uint8_t* pData, FX_DWORD dwSize)
+ : m_pData(pData), m_dwSize(dwSize), m_dwCurPos(0) {}
+
+CPDF_SimpleParser::CPDF_SimpleParser(const CFX_ByteStringC& str)
+ : m_pData(str.GetPtr()), m_dwSize(str.GetLength()), m_dwCurPos(0) {}
+
+void CPDF_SimpleParser::ParseWord(const uint8_t*& pStart, FX_DWORD& dwSize) {
+ pStart = nullptr;
+ dwSize = 0;
+ uint8_t ch;
+ while (1) {
+ if (m_dwSize <= m_dwCurPos)
+ return;
+ ch = m_pData[m_dwCurPos++];
+ while (PDFCharIsWhitespace(ch)) {
+ if (m_dwSize <= m_dwCurPos)
+ return;
+ ch = m_pData[m_dwCurPos++];
+ }
+
+ if (ch != '%')
+ break;
+
+ while (1) {
+ if (m_dwSize <= m_dwCurPos)
+ return;
+ ch = m_pData[m_dwCurPos++];
+ if (PDFCharIsLineEnding(ch))
+ break;
+ }
+ }
+
+ FX_DWORD start_pos = m_dwCurPos - 1;
+ pStart = m_pData + start_pos;
+ if (PDFCharIsDelimiter(ch)) {
+ if (ch == '/') {
+ while (1) {
+ if (m_dwSize <= m_dwCurPos)
+ return;
+ ch = m_pData[m_dwCurPos++];
+ if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {
+ m_dwCurPos--;
+ dwSize = m_dwCurPos - start_pos;
+ return;
+ }
+ }
+ } else {
+ dwSize = 1;
+ if (ch == '<') {
+ if (m_dwSize <= m_dwCurPos)
+ return;
+ ch = m_pData[m_dwCurPos++];
+ if (ch == '<')
+ dwSize = 2;
+ else
+ m_dwCurPos--;
+ } else if (ch == '>') {
+ if (m_dwSize <= m_dwCurPos)
+ return;
+ ch = m_pData[m_dwCurPos++];
+ if (ch == '>')
+ dwSize = 2;
+ else
+ m_dwCurPos--;
+ }
+ }
+ return;
+ }
+
+ dwSize = 1;
+ while (1) {
+ if (m_dwSize <= m_dwCurPos)
+ return;
+ ch = m_pData[m_dwCurPos++];
+
+ if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
+ m_dwCurPos--;
+ break;
+ }
+ dwSize++;
+ }
+}
+
+CFX_ByteStringC CPDF_SimpleParser::GetWord() {
+ const uint8_t* pStart;
+ FX_DWORD dwSize;
+ ParseWord(pStart, dwSize);
+ if (dwSize == 1 && pStart[0] == '<') {
+ while (m_dwCurPos < m_dwSize && m_pData[m_dwCurPos] != '>') {
+ m_dwCurPos++;
+ }
+ if (m_dwCurPos < m_dwSize) {
+ m_dwCurPos++;
+ }
+ return CFX_ByteStringC(pStart,
+ (FX_STRSIZE)(m_dwCurPos - (pStart - m_pData)));
+ }
+ if (dwSize == 1 && pStart[0] == '(') {
+ int level = 1;
+ while (m_dwCurPos < m_dwSize) {
+ if (m_pData[m_dwCurPos] == ')') {
+ level--;
+ if (level == 0) {
+ break;
+ }
+ }
+ if (m_pData[m_dwCurPos] == '\\') {
+ if (m_dwSize <= m_dwCurPos) {
+ break;
+ }
+ m_dwCurPos++;
+ } else if (m_pData[m_dwCurPos] == '(') {
+ level++;
+ }
+ if (m_dwSize <= m_dwCurPos) {
+ break;
+ }
+ m_dwCurPos++;
+ }
+ if (m_dwCurPos < m_dwSize) {
+ m_dwCurPos++;
+ }
+ return CFX_ByteStringC(pStart,
+ (FX_STRSIZE)(m_dwCurPos - (pStart - m_pData)));
+ }
+ return CFX_ByteStringC(pStart, dwSize);
+}
+
+bool CPDF_SimpleParser::FindTagParamFromStart(const CFX_ByteStringC& token,
+ int nParams) {
+ nParams++;
+ FX_DWORD* pBuf = FX_Alloc(FX_DWORD, nParams);
+ int buf_index = 0;
+ int buf_count = 0;
+ m_dwCurPos = 0;
+ while (1) {
+ pBuf[buf_index++] = m_dwCurPos;
+ if (buf_index == nParams) {
+ buf_index = 0;
+ }
+ buf_count++;
+ if (buf_count > nParams) {
+ buf_count = nParams;
+ }
+ CFX_ByteStringC word = GetWord();
+ if (word.IsEmpty()) {
+ FX_Free(pBuf);
+ return false;
+ }
+ if (word == token) {
+ if (buf_count < nParams) {
+ continue;
+ }
+ m_dwCurPos = pBuf[buf_index];
+ FX_Free(pBuf);
+ return true;
+ }
+ }
+ return false;
+}
diff --git a/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility_unittest.cpp b/core/src/fpdfapi/fpdf_parser/cpdf_simple_parser_unittest.cpp
index 2eb930bef6..f95838d91f 100644
--- a/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility_unittest.cpp
+++ b/core/src/fpdfapi/fpdf_parser/cpdf_simple_parser_unittest.cpp
@@ -2,10 +2,12 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
-#include "core/include/fpdfapi/fpdf_parser.h"
+#include "core/include/fpdfapi/cpdf_simple_parser.h"
#include <string>
+#include "core/include/fpdfapi/fpdf_parser.h"
+#include "core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "testing/test_support.h"
diff --git a/core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.cpp b/core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.cpp
index 3091ab108b..151139e4a4 100644
--- a/core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.cpp
+++ b/core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.cpp
@@ -11,6 +11,7 @@
#include "core/include/fpdfapi/fpdf_module.h"
#include "core/include/fpdfapi/fpdf_parser.h"
#include "core/include/fxcrt/fx_ext.h"
+#include "core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.h"
#include "third_party/base/numerics/safe_math.h"
namespace {
diff --git a/core/src/fpdfapi/fpdf_parser/fpdf_parser_decode.cpp b/core/src/fpdfapi/fpdf_parser/fpdf_parser_decode.cpp
index 4fb471713b..473ea76e76 100644
--- a/core/src/fpdfapi/fpdf_parser/fpdf_parser_decode.cpp
+++ b/core/src/fpdfapi/fpdf_parser/fpdf_parser_decode.cpp
@@ -12,6 +12,7 @@
#include "core/include/fpdfapi/fpdf_parser.h"
#include "core/include/fxcodec/fx_codec.h"
#include "core/include/fxcrt/fx_ext.h"
+#include "core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.h"
#include "third_party/base/stl_util.h"
#define _STREAM_MAX_SIZE_ 20 * 1024 * 1024
diff --git a/core/src/fpdfapi/fpdf_parser/fpdf_parser_encrypt.cpp b/core/src/fpdfapi/fpdf_parser/fpdf_parser_encrypt.cpp
index 95907ae8fd..9fe534ec35 100644
--- a/core/src/fpdfapi/fpdf_parser/fpdf_parser_encrypt.cpp
+++ b/core/src/fpdfapi/fpdf_parser/fpdf_parser_encrypt.cpp
@@ -8,6 +8,8 @@
#include <time.h>
+#include "core/include/fpdfapi/cpdf_parser.h"
+#include "core/include/fpdfapi/cpdf_simple_parser.h"
#include "core/include/fdrm/fx_crypt.h"
const uint8_t defpasscode[32] = {
diff --git a/core/src/fpdfapi/fpdf_parser/fpdf_parser_objects.cpp b/core/src/fpdfapi/fpdf_parser/fpdf_parser_objects.cpp
index bed70ebd5d..28c35c899e 100644
--- a/core/src/fpdfapi/fpdf_parser/fpdf_parser_objects.cpp
+++ b/core/src/fpdfapi/fpdf_parser/fpdf_parser_objects.cpp
@@ -8,6 +8,7 @@
#include <algorithm>
+#include "core/include/fpdfapi/cpdf_parser.h"
#include "core/include/fpdfapi/fpdf_parser.h"
#include "core/include/fxcrt/fx_string.h"
#include "third_party/base/stl_util.h"
diff --git a/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp b/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp
index 2b61cdc2a2..0d62e4d3ed 100644
--- a/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp
+++ b/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp
@@ -13,68 +13,23 @@
#include <vector>
#include "core/include/fpdfapi/cpdf_document.h"
+#include "core/include/fpdfapi/cpdf_parser.h"
#include "core/include/fpdfapi/fpdf_module.h"
#include "core/include/fpdfapi/fpdf_page.h"
#include "core/include/fxcrt/fx_ext.h"
#include "core/include/fxcrt/fx_safe_types.h"
#include "core/src/fpdfapi/fpdf_page/pageint.h"
#include "core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.h"
+#include "core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.h"
#include "core/src/fpdfapi/fpdf_parser/parser_int.h"
#include "third_party/base/stl_util.h"
namespace {
-// A limit on the size of the xref table. Theoretical limits are higher, but
-// this may be large enough in practice.
-const int32_t kMaxXRefSize = 1048576;
-
-// A limit on the maximum object number in the xref table. Theoretical limits
-// are higher, but this may be large enough in practice.
-const FX_DWORD kMaxObjectNumber = 1048576;
-
-int32_t GetHeaderOffset(IFX_FileRead* pFile) {
- // TODO(dsinclair): This is a complicated way of saying %PDF, simplify?
- const FX_DWORD tag = FXDWORD_FROM_LSBFIRST(0x46445025);
-
- const size_t kBufSize = 4;
- uint8_t buf[kBufSize];
- int32_t offset = 0;
- while (offset <= 1024) {
- if (!pFile->ReadBlock(buf, offset, kBufSize))
- return -1;
-
- if (*(FX_DWORD*)buf == tag)
- return offset;
-
- ++offset;
- }
- return -1;
-}
-
-int32_t GetDirectInteger(CPDF_Dictionary* pDict, const CFX_ByteStringC& key) {
- CPDF_Number* pObj = ToNumber(pDict->GetElement(key));
- return pObj ? pObj->GetInteger() : 0;
-}
-
-FX_DWORD GetVarInt(const uint8_t* p, int32_t n) {
- FX_DWORD result = 0;
- for (int32_t i = 0; i < n; ++i)
- result = result * 256 + p[i];
- return result;
-}
-
-int32_t GetStreamNCount(CPDF_StreamAcc* pObjStream) {
- return pObjStream->GetDict()->GetIntegerBy("N");
-}
-
-int32_t GetStreamFirst(CPDF_StreamAcc* pObjStream) {
- return pObjStream->GetDict()->GetIntegerBy("First");
-}
-
bool CanReadFromBitStream(const CFX_BitStream* hStream,
const FX_SAFE_DWORD& num_bits) {
- return (num_bits.IsValid() &&
- hStream->BitsRemaining() >= num_bits.ValueOrDie());
+ return num_bits.IsValid() &&
+ hStream->BitsRemaining() >= num_bits.ValueOrDie();
}
} // namespace
@@ -86,1607 +41,6 @@ bool IsSignatureDict(const CPDF_Dictionary* pDict) {
return pType && pType->GetString() == "Sig";
}
-CPDF_Parser::CPDF_Parser()
- : m_pDocument(nullptr),
- m_bOwnFileRead(true),
- m_FileVersion(0),
- m_pTrailer(nullptr),
- m_pEncryptDict(nullptr),
- m_pLinearized(nullptr),
- m_dwFirstPageNo(0),
- m_dwXrefStartObjNum(0) {
- m_pSyntax.reset(new CPDF_SyntaxParser);
-}
-
-CPDF_Parser::~CPDF_Parser() {
- CloseParser();
-}
-
-FX_DWORD CPDF_Parser::GetLastObjNum() const {
- return m_ObjectInfo.empty() ? 0 : m_ObjectInfo.rbegin()->first;
-}
-
-bool CPDF_Parser::IsValidObjectNumber(FX_DWORD objnum) const {
- return !m_ObjectInfo.empty() && objnum <= m_ObjectInfo.rbegin()->first;
-}
-
-FX_FILESIZE CPDF_Parser::GetObjectPositionOrZero(FX_DWORD objnum) const {
- auto it = m_ObjectInfo.find(objnum);
- return it != m_ObjectInfo.end() ? it->second.pos : 0;
-}
-
-uint8_t CPDF_Parser::GetObjectType(FX_DWORD objnum) const {
- ASSERT(IsValidObjectNumber(objnum));
- auto it = m_ObjectInfo.find(objnum);
- return it != m_ObjectInfo.end() ? it->second.type : 0;
-}
-
-uint16_t CPDF_Parser::GetObjectGenNum(FX_DWORD objnum) const {
- ASSERT(IsValidObjectNumber(objnum));
- auto it = m_ObjectInfo.find(objnum);
- return it != m_ObjectInfo.end() ? it->second.gennum : 0;
-}
-
-bool CPDF_Parser::IsObjectFreeOrNull(FX_DWORD objnum) const {
- uint8_t type = GetObjectType(objnum);
- return type == 0 || type == 255;
-}
-
-void CPDF_Parser::SetEncryptDictionary(CPDF_Dictionary* pDict) {
- m_pEncryptDict = pDict;
-}
-
-CPDF_CryptoHandler* CPDF_Parser::GetCryptoHandler() {
- return m_pSyntax->m_pCryptoHandler.get();
-}
-
-IFX_FileRead* CPDF_Parser::GetFileAccess() const {
- return m_pSyntax->m_pFileAccess;
-}
-
-void CPDF_Parser::ShrinkObjectMap(FX_DWORD objnum) {
- if (objnum == 0) {
- m_ObjectInfo.clear();
- return;
- }
-
- auto it = m_ObjectInfo.lower_bound(objnum);
- while (it != m_ObjectInfo.end()) {
- auto saved_it = it++;
- m_ObjectInfo.erase(saved_it);
- }
-
- if (!pdfium::ContainsKey(m_ObjectInfo, objnum - 1))
- m_ObjectInfo[objnum - 1].pos = 0;
-}
-
-void CPDF_Parser::CloseParser() {
- m_bVersionUpdated = FALSE;
- delete m_pDocument;
- m_pDocument = nullptr;
-
- if (m_pTrailer) {
- m_pTrailer->Release();
- m_pTrailer = nullptr;
- }
- ReleaseEncryptHandler();
- SetEncryptDictionary(nullptr);
-
- if (m_bOwnFileRead && m_pSyntax->m_pFileAccess) {
- m_pSyntax->m_pFileAccess->Release();
- m_pSyntax->m_pFileAccess = nullptr;
- }
-
- m_ObjectStreamMap.clear();
- m_ObjCache.clear();
- m_SortedOffset.clear();
- m_ObjectInfo.clear();
-
- int32_t iLen = m_Trailers.GetSize();
- for (int32_t i = 0; i < iLen; ++i) {
- if (CPDF_Dictionary* trailer = m_Trailers.GetAt(i))
- trailer->Release();
- }
- m_Trailers.RemoveAll();
-
- if (m_pLinearized) {
- m_pLinearized->Release();
- m_pLinearized = nullptr;
- }
-}
-
-CPDF_Parser::Error CPDF_Parser::StartParse(IFX_FileRead* pFileAccess) {
- CloseParser();
-
- m_bXRefStream = FALSE;
- m_LastXRefOffset = 0;
- m_bOwnFileRead = true;
-
- int32_t offset = GetHeaderOffset(pFileAccess);
- if (offset == -1) {
- if (pFileAccess)
- pFileAccess->Release();
- return FORMAT_ERROR;
- }
- m_pSyntax->InitParser(pFileAccess, offset);
-
- uint8_t ch;
- if (!m_pSyntax->GetCharAt(5, ch))
- return FORMAT_ERROR;
- if (std::isdigit(ch))
- m_FileVersion = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)) * 10;
-
- if (!m_pSyntax->GetCharAt(7, ch))
- return FORMAT_ERROR;
- if (std::isdigit(ch))
- m_FileVersion += FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
-
- if (m_pSyntax->m_FileLen < m_pSyntax->m_HeaderOffset + 9)
- return FORMAT_ERROR;
-
- m_pSyntax->RestorePos(m_pSyntax->m_FileLen - m_pSyntax->m_HeaderOffset - 9);
- m_pDocument = new CPDF_Document(this);
-
- FX_BOOL bXRefRebuilt = FALSE;
- if (m_pSyntax->SearchWord("startxref", TRUE, FALSE, 4096)) {
- m_SortedOffset.insert(m_pSyntax->SavePos());
- m_pSyntax->GetKeyword();
-
- bool bNumber;
- CFX_ByteString xrefpos_str = m_pSyntax->GetNextWord(&bNumber);
- if (!bNumber)
- return FORMAT_ERROR;
-
- m_LastXRefOffset = (FX_FILESIZE)FXSYS_atoi64(xrefpos_str);
- if (!LoadAllCrossRefV4(m_LastXRefOffset) &&
- !LoadAllCrossRefV5(m_LastXRefOffset)) {
- if (!RebuildCrossRef())
- return FORMAT_ERROR;
-
- bXRefRebuilt = TRUE;
- m_LastXRefOffset = 0;
- }
- } else {
- if (!RebuildCrossRef())
- return FORMAT_ERROR;
-
- bXRefRebuilt = TRUE;
- }
- Error eRet = SetEncryptHandler();
- if (eRet != SUCCESS)
- return eRet;
-
- m_pDocument->LoadDoc();
- if (!m_pDocument->GetRoot() || m_pDocument->GetPageCount() == 0) {
- if (bXRefRebuilt)
- return FORMAT_ERROR;
-
- ReleaseEncryptHandler();
- if (!RebuildCrossRef())
- return FORMAT_ERROR;
-
- eRet = SetEncryptHandler();
- if (eRet != SUCCESS)
- return eRet;
-
- m_pDocument->LoadDoc();
- if (!m_pDocument->GetRoot())
- return FORMAT_ERROR;
- }
- if (GetRootObjNum() == 0) {
- ReleaseEncryptHandler();
- if (!RebuildCrossRef() || GetRootObjNum() == 0)
- return FORMAT_ERROR;
-
- eRet = SetEncryptHandler();
- if (eRet != SUCCESS)
- return eRet;
- }
- if (m_pSecurityHandler && !m_pSecurityHandler->IsMetadataEncrypted()) {
- CPDF_Reference* pMetadata =
- ToReference(m_pDocument->GetRoot()->GetElement("Metadata"));
- if (pMetadata)
- m_pSyntax->m_MetadataObjnum = pMetadata->GetRefObjNum();
- }
- return SUCCESS;
-}
-CPDF_Parser::Error CPDF_Parser::SetEncryptHandler() {
- ReleaseEncryptHandler();
- SetEncryptDictionary(nullptr);
-
- if (!m_pTrailer)
- return FORMAT_ERROR;
-
- CPDF_Object* pEncryptObj = m_pTrailer->GetElement("Encrypt");
- if (pEncryptObj) {
- if (CPDF_Dictionary* pEncryptDict = pEncryptObj->AsDictionary()) {
- SetEncryptDictionary(pEncryptDict);
- } else if (CPDF_Reference* pRef = pEncryptObj->AsReference()) {
- pEncryptObj = m_pDocument->GetIndirectObject(pRef->GetRefObjNum());
- if (pEncryptObj)
- SetEncryptDictionary(pEncryptObj->GetDict());
- }
- }
-
- if (m_pEncryptDict) {
- CFX_ByteString filter = m_pEncryptDict->GetStringBy("Filter");
- std::unique_ptr<IPDF_SecurityHandler> pSecurityHandler;
- Error err = HANDLER_ERROR;
- if (filter == "Standard") {
- pSecurityHandler.reset(new CPDF_StandardSecurityHandler);
- err = PASSWORD_ERROR;
- }
- if (!pSecurityHandler)
- return HANDLER_ERROR;
-
- if (!pSecurityHandler->OnInit(this, m_pEncryptDict))
- return err;
-
- m_pSecurityHandler = std::move(pSecurityHandler);
- std::unique_ptr<CPDF_CryptoHandler> pCryptoHandler(
- m_pSecurityHandler->CreateCryptoHandler());
- if (!pCryptoHandler->Init(m_pEncryptDict, m_pSecurityHandler.get()))
- return HANDLER_ERROR;
- m_pSyntax->SetEncrypt(std::move(pCryptoHandler));
- }
- return SUCCESS;
-}
-
-void CPDF_Parser::ReleaseEncryptHandler() {
- m_pSyntax->m_pCryptoHandler.reset();
- m_pSecurityHandler.reset();
-}
-
-FX_FILESIZE CPDF_Parser::GetObjectOffset(FX_DWORD objnum) const {
- if (!IsValidObjectNumber(objnum))
- return 0;
-
- if (GetObjectType(objnum) == 1)
- return GetObjectPositionOrZero(objnum);
-
- if (GetObjectType(objnum) == 2) {
- FX_FILESIZE pos = GetObjectPositionOrZero(objnum);
- return GetObjectPositionOrZero(pos);
- }
- return 0;
-}
-
-FX_BOOL CPDF_Parser::LoadAllCrossRefV4(FX_FILESIZE xrefpos) {
- if (!LoadCrossRefV4(xrefpos, 0, TRUE))
- return FALSE;
-
- m_pTrailer = LoadTrailerV4();
- if (!m_pTrailer)
- return FALSE;
-
- int32_t xrefsize = GetDirectInteger(m_pTrailer, "Size");
- if (xrefsize > 0 && xrefsize <= kMaxXRefSize)
- ShrinkObjectMap(xrefsize);
-
- std::vector<FX_FILESIZE> CrossRefList;
- std::vector<FX_FILESIZE> XRefStreamList;
- std::set<FX_FILESIZE> seen_xrefpos;
-
- CrossRefList.push_back(xrefpos);
- XRefStreamList.push_back(GetDirectInteger(m_pTrailer, "XRefStm"));
- seen_xrefpos.insert(xrefpos);
-
- // When |m_pTrailer| doesn't have Prev entry or Prev entry value is not
- // numerical, GetDirectInteger() returns 0. Loading will end.
- xrefpos = GetDirectInteger(m_pTrailer, "Prev");
- while (xrefpos) {
- // Check for circular references.
- if (pdfium::ContainsKey(seen_xrefpos, xrefpos))
- return FALSE;
-
- seen_xrefpos.insert(xrefpos);
-
- // SLOW ...
- CrossRefList.insert(CrossRefList.begin(), xrefpos);
- LoadCrossRefV4(xrefpos, 0, TRUE);
-
- std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict(
- LoadTrailerV4());
- if (!pDict)
- return FALSE;
-
- xrefpos = GetDirectInteger(pDict.get(), "Prev");
-
- // SLOW ...
- XRefStreamList.insert(XRefStreamList.begin(),
- pDict->GetIntegerBy("XRefStm"));
- m_Trailers.Add(pDict.release());
- }
-
- for (size_t i = 0; i < CrossRefList.size(); ++i) {
- if (!LoadCrossRefV4(CrossRefList[i], XRefStreamList[i], FALSE))
- return FALSE;
- }
- return TRUE;
-}
-
-FX_BOOL CPDF_Parser::LoadLinearizedAllCrossRefV4(FX_FILESIZE xrefpos,
- FX_DWORD dwObjCount) {
- if (!LoadLinearizedCrossRefV4(xrefpos, dwObjCount))
- return FALSE;
-
- m_pTrailer = LoadTrailerV4();
- if (!m_pTrailer)
- return FALSE;
-
- int32_t xrefsize = GetDirectInteger(m_pTrailer, "Size");
- if (xrefsize == 0)
- return FALSE;
-
- std::vector<FX_FILESIZE> CrossRefList;
- std::vector<FX_FILESIZE> XRefStreamList;
- std::set<FX_FILESIZE> seen_xrefpos;
-
- CrossRefList.push_back(xrefpos);
- XRefStreamList.push_back(GetDirectInteger(m_pTrailer, "XRefStm"));
- seen_xrefpos.insert(xrefpos);
-
- xrefpos = GetDirectInteger(m_pTrailer, "Prev");
- while (xrefpos) {
- // Check for circular references.
- if (pdfium::ContainsKey(seen_xrefpos, xrefpos))
- return FALSE;
-
- seen_xrefpos.insert(xrefpos);
-
- // SLOW ...
- CrossRefList.insert(CrossRefList.begin(), xrefpos);
- LoadCrossRefV4(xrefpos, 0, TRUE);
-
- std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict(
- LoadTrailerV4());
- if (!pDict)
- return FALSE;
-
- xrefpos = GetDirectInteger(pDict.get(), "Prev");
-
- // SLOW ...
- XRefStreamList.insert(XRefStreamList.begin(),
- pDict->GetIntegerBy("XRefStm"));
- m_Trailers.Add(pDict.release());
- }
-
- for (size_t i = 1; i < CrossRefList.size(); ++i) {
- if (!LoadCrossRefV4(CrossRefList[i], XRefStreamList[i], FALSE))
- return FALSE;
- }
- return TRUE;
-}
-
-FX_BOOL CPDF_Parser::LoadLinearizedCrossRefV4(FX_FILESIZE pos,
- FX_DWORD dwObjCount) {
- FX_FILESIZE dwStartPos = pos - m_pSyntax->m_HeaderOffset;
-
- m_pSyntax->RestorePos(dwStartPos);
- m_SortedOffset.insert(pos);
-
- FX_DWORD start_objnum = 0;
- FX_DWORD count = dwObjCount;
- FX_FILESIZE SavedPos = m_pSyntax->SavePos();
-
- const int32_t recordsize = 20;
- std::vector<char> buf(1024 * recordsize + 1);
- buf[1024 * recordsize] = '\0';
-
- int32_t nBlocks = count / 1024 + 1;
- for (int32_t block = 0; block < nBlocks; block++) {
- int32_t block_size = block == nBlocks - 1 ? count % 1024 : 1024;
- FX_DWORD dwReadSize = block_size * recordsize;
- if ((FX_FILESIZE)(dwStartPos + dwReadSize) > m_pSyntax->m_FileLen)
- return FALSE;
-
- if (!m_pSyntax->ReadBlock(reinterpret_cast<uint8_t*>(buf.data()),
- dwReadSize)) {
- return FALSE;
- }
-
- for (int32_t i = 0; i < block_size; i++) {
- FX_DWORD objnum = start_objnum + block * 1024 + i;
- char* pEntry = &buf[i * recordsize];
- if (pEntry[17] == 'f') {
- m_ObjectInfo[objnum].pos = 0;
- m_ObjectInfo[objnum].type = 0;
- } else {
- int32_t offset = FXSYS_atoi(pEntry);
- if (offset == 0) {
- for (int32_t c = 0; c < 10; c++) {
- if (!std::isdigit(pEntry[c]))
- return FALSE;
- }
- }
-
- m_ObjectInfo[objnum].pos = offset;
- int32_t version = FXSYS_atoi(pEntry + 11);
- if (version >= 1)
- m_bVersionUpdated = TRUE;
-
- m_ObjectInfo[objnum].gennum = version;
- if (m_ObjectInfo[objnum].pos < m_pSyntax->m_FileLen)
- m_SortedOffset.insert(m_ObjectInfo[objnum].pos);
-
- m_ObjectInfo[objnum].type = 1;
- }
- }
- }
- m_pSyntax->RestorePos(SavedPos + count * recordsize);
- return TRUE;
-}
-
-bool CPDF_Parser::LoadCrossRefV4(FX_FILESIZE pos,
- FX_FILESIZE streampos,
- FX_BOOL bSkip) {
- m_pSyntax->RestorePos(pos);
- if (m_pSyntax->GetKeyword() != "xref")
- return false;
-
- m_SortedOffset.insert(pos);
- if (streampos)
- m_SortedOffset.insert(streampos);
-
- while (1) {
- FX_FILESIZE SavedPos = m_pSyntax->SavePos();
- bool bIsNumber;
- CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber);
- if (word.IsEmpty())
- return false;
-
- if (!bIsNumber) {
- m_pSyntax->RestorePos(SavedPos);
- break;
- }
-
- FX_DWORD start_objnum = FXSYS_atoui(word);
- if (start_objnum >= kMaxObjectNumber)
- return false;
-
- FX_DWORD count = m_pSyntax->GetDirectNum();
- m_pSyntax->ToNextWord();
- SavedPos = m_pSyntax->SavePos();
- const int32_t recordsize = 20;
-
- m_dwXrefStartObjNum = start_objnum;
- if (!bSkip) {
- std::vector<char> buf(1024 * recordsize + 1);
- buf[1024 * recordsize] = '\0';
-
- int32_t nBlocks = count / 1024 + 1;
- for (int32_t block = 0; block < nBlocks; block++) {
- int32_t block_size = block == nBlocks - 1 ? count % 1024 : 1024;
- m_pSyntax->ReadBlock(reinterpret_cast<uint8_t*>(buf.data()),
- block_size * recordsize);
-
- for (int32_t i = 0; i < block_size; i++) {
- FX_DWORD objnum = start_objnum + block * 1024 + i;
- char* pEntry = &buf[i * recordsize];
- if (pEntry[17] == 'f') {
- m_ObjectInfo[objnum].pos = 0;
- m_ObjectInfo[objnum].type = 0;
- } else {
- FX_FILESIZE offset = (FX_FILESIZE)FXSYS_atoi64(pEntry);
- if (offset == 0) {
- for (int32_t c = 0; c < 10; c++) {
- if (!std::isdigit(pEntry[c]))
- return false;
- }
- }
-
- m_ObjectInfo[objnum].pos = offset;
- int32_t version = FXSYS_atoi(pEntry + 11);
- if (version >= 1)
- m_bVersionUpdated = TRUE;
-
- m_ObjectInfo[objnum].gennum = version;
- if (m_ObjectInfo[objnum].pos < m_pSyntax->m_FileLen)
- m_SortedOffset.insert(m_ObjectInfo[objnum].pos);
-
- m_ObjectInfo[objnum].type = 1;
- }
- }
- }
- }
- m_pSyntax->RestorePos(SavedPos + count * recordsize);
- }
- return !streampos || LoadCrossRefV5(&streampos, FALSE);
-}
-
-FX_BOOL CPDF_Parser::LoadAllCrossRefV5(FX_FILESIZE xrefpos) {
- if (!LoadCrossRefV5(&xrefpos, TRUE))
- return FALSE;
-
- std::set<FX_FILESIZE> seen_xrefpos;
- while (xrefpos) {
- seen_xrefpos.insert(xrefpos);
- if (!LoadCrossRefV5(&xrefpos, FALSE))
- return FALSE;
-
- // Check for circular references.
- if (pdfium::ContainsKey(seen_xrefpos, xrefpos))
- return FALSE;
- }
- m_ObjectStreamMap.clear();
- m_bXRefStream = TRUE;
- return TRUE;
-}
-
-FX_BOOL CPDF_Parser::RebuildCrossRef() {
- m_ObjectInfo.clear();
- m_SortedOffset.clear();
- if (m_pTrailer) {
- m_pTrailer->Release();
- m_pTrailer = nullptr;
- }
-
- ParserState state = ParserState::kDefault;
-
- int32_t inside_index = 0;
- FX_DWORD objnum = 0;
- FX_DWORD gennum = 0;
- int32_t depth = 0;
-
- const FX_DWORD kBufferSize = 4096;
- std::vector<uint8_t> buffer(kBufferSize);
-
- FX_FILESIZE pos = m_pSyntax->m_HeaderOffset;
- FX_FILESIZE start_pos = 0;
- FX_FILESIZE start_pos1 = 0;
- FX_FILESIZE last_obj = -1;
- FX_FILESIZE last_xref = -1;
- FX_FILESIZE last_trailer = -1;
-
- while (pos < m_pSyntax->m_FileLen) {
- const FX_FILESIZE saved_pos = pos;
- bool bOverFlow = false;
- FX_DWORD size =
- std::min((FX_DWORD)(m_pSyntax->m_FileLen - pos), kBufferSize);
- if (!m_pSyntax->m_pFileAccess->ReadBlock(buffer.data(), pos, size))
- break;
-
- for (FX_DWORD i = 0; i < size; i++) {
- uint8_t byte = buffer[i];
- switch (state) {
- case ParserState::kDefault:
- if (PDFCharIsWhitespace(byte)) {
- state = ParserState::kWhitespace;
- } else if (std::isdigit(byte)) {
- --i;
- state = ParserState::kWhitespace;
- } else if (byte == '%') {
- inside_index = 0;
- state = ParserState::kComment;
- } else if (byte == '(') {
- state = ParserState::kString;
- depth = 1;
- } else if (byte == '<') {
- inside_index = 1;
- state = ParserState::kHexString;
- } else if (byte == '\\') {
- state = ParserState::kEscapedString;
- } else if (byte == 't') {
- state = ParserState::kTrailer;
- inside_index = 1;
- }
- break;
-
- case ParserState::kWhitespace:
- if (std::isdigit(byte)) {
- start_pos = pos + i;
- state = ParserState::kObjNum;
- objnum = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte));
- } else if (byte == 't') {
- state = ParserState::kTrailer;
- inside_index = 1;
- } else if (byte == 'x') {
- state = ParserState::kXref;
- inside_index = 1;
- } else if (!PDFCharIsWhitespace(byte)) {
- --i;
- state = ParserState::kDefault;
- }
- break;
-
- case ParserState::kObjNum:
- if (std::isdigit(byte)) {
- objnum =
- objnum * 10 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte));
- } else if (PDFCharIsWhitespace(byte)) {
- state = ParserState::kPostObjNum;
- } else {
- --i;
- state = ParserState::kEndObj;
- inside_index = 0;
- }
- break;
-
- case ParserState::kPostObjNum:
- if (std::isdigit(byte)) {
- start_pos1 = pos + i;
- state = ParserState::kGenNum;
- gennum = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte));
- } else if (byte == 't') {
- state = ParserState::kTrailer;
- inside_index = 1;
- } else if (!PDFCharIsWhitespace(byte)) {
- --i;
- state = ParserState::kDefault;
- }
- break;
-
- case ParserState::kGenNum:
- if (std::isdigit(byte)) {
- gennum =
- gennum * 10 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte));
- } else if (PDFCharIsWhitespace(byte)) {
- state = ParserState::kPostGenNum;
- } else {
- --i;
- state = ParserState::kDefault;
- }
- break;
-
- case ParserState::kPostGenNum:
- if (byte == 'o') {
- state = ParserState::kBeginObj;
- inside_index = 1;
- } else if (std::isdigit(byte)) {
- objnum = gennum;
- gennum = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte));
- start_pos = start_pos1;
- start_pos1 = pos + i;
- state = ParserState::kGenNum;
- } else if (byte == 't') {
- state = ParserState::kTrailer;
- inside_index = 1;
- } else if (!PDFCharIsWhitespace(byte)) {
- --i;
- state = ParserState::kDefault;
- }
- break;
-
- case ParserState::kBeginObj:
- switch (inside_index) {
- case 1:
- if (byte != 'b') {
- --i;
- state = ParserState::kDefault;
- } else {
- inside_index++;
- }
- break;
- case 2:
- if (byte != 'j') {
- --i;
- state = ParserState::kDefault;
- } else {
- inside_index++;
- }
- break;
- case 3:
- if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) {
- FX_FILESIZE obj_pos = start_pos - m_pSyntax->m_HeaderOffset;
- m_SortedOffset.insert(obj_pos);
- last_obj = start_pos;
- FX_FILESIZE obj_end = 0;
- CPDF_Object* pObject = ParseIndirectObjectAtByStrict(
- m_pDocument, obj_pos, objnum, &obj_end);
- if (CPDF_Stream* pStream = ToStream(pObject)) {
- if (CPDF_Dictionary* pDict = pStream->GetDict()) {
- if ((pDict->KeyExist("Type")) &&
- (pDict->GetStringBy("Type") == "XRef" &&
- pDict->KeyExist("Size"))) {
- CPDF_Object* pRoot = pDict->GetElement("Root");
- if (pRoot && pRoot->GetDict() &&
- pRoot->GetDict()->GetElement("Pages")) {
- if (m_pTrailer)
- m_pTrailer->Release();
- m_pTrailer = ToDictionary(pDict->Clone());
- }
- }
- }
- }
-
- FX_FILESIZE offset = 0;
- m_pSyntax->RestorePos(obj_pos);
- offset = m_pSyntax->FindTag("obj", 0);
- if (offset == -1)
- offset = 0;
- else
- offset += 3;
-
- FX_FILESIZE nLen = obj_end - obj_pos - offset;
- if ((FX_DWORD)nLen > size - i) {
- pos = obj_end + m_pSyntax->m_HeaderOffset;
- bOverFlow = true;
- } else {
- i += (FX_DWORD)nLen;
- }
-
- if (!m_ObjectInfo.empty() && IsValidObjectNumber(objnum) &&
- m_ObjectInfo[objnum].pos) {
- if (pObject) {
- FX_DWORD oldgen = GetObjectGenNum(objnum);
- m_ObjectInfo[objnum].pos = obj_pos;
- m_ObjectInfo[objnum].gennum = gennum;
- if (oldgen != gennum)
- m_bVersionUpdated = TRUE;
- }
- } else {
- m_ObjectInfo[objnum].pos = obj_pos;
- m_ObjectInfo[objnum].type = 1;
- m_ObjectInfo[objnum].gennum = gennum;
- }
-
- if (pObject)
- pObject->Release();
- }
- --i;
- state = ParserState::kDefault;
- break;
- }
- break;
-
- case ParserState::kTrailer:
- if (inside_index == 7) {
- if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) {
- last_trailer = pos + i - 7;
- m_pSyntax->RestorePos(pos + i - m_pSyntax->m_HeaderOffset);
-
- CPDF_Object* pObj = m_pSyntax->GetObject(m_pDocument, 0, 0, true);
- if (pObj) {
- if (!pObj->IsDictionary() && !pObj->AsStream()) {
- pObj->Release();
- } else {
- CPDF_Stream* pStream = pObj->AsStream();
- if (CPDF_Dictionary* pTrailer =
- pStream ? pStream->GetDict() : pObj->AsDictionary()) {
- if (m_pTrailer) {
- CPDF_Object* pRoot = pTrailer->GetElement("Root");
- CPDF_Reference* pRef = ToReference(pRoot);
- if (!pRoot ||
- (pRef && IsValidObjectNumber(pRef->GetRefObjNum()) &&
- m_ObjectInfo[pRef->GetRefObjNum()].pos != 0)) {
- auto it = pTrailer->begin();
- while (it != pTrailer->end()) {
- const CFX_ByteString& key = it->first;
- CPDF_Object* pElement = it->second;
- ++it;
- FX_DWORD dwObjNum =
- pElement ? pElement->GetObjNum() : 0;
- if (dwObjNum) {
- m_pTrailer->SetAtReference(key, m_pDocument,
- dwObjNum);
- } else {
- m_pTrailer->SetAt(key, pElement->Clone());
- }
- }
- }
- pObj->Release();
- } else {
- if (pObj->IsStream()) {
- m_pTrailer = ToDictionary(pTrailer->Clone());
- pObj->Release();
- } else {
- m_pTrailer = pTrailer;
- }
-
- FX_FILESIZE dwSavePos = m_pSyntax->SavePos();
- CFX_ByteString strWord = m_pSyntax->GetKeyword();
- if (!strWord.Compare("startxref")) {
- bool bNumber;
- CFX_ByteString bsOffset =
- m_pSyntax->GetNextWord(&bNumber);
- if (bNumber)
- m_LastXRefOffset = FXSYS_atoi(bsOffset);
- }
- m_pSyntax->RestorePos(dwSavePos);
- }
- } else {
- pObj->Release();
- }
- }
- }
- }
- --i;
- state = ParserState::kDefault;
- } else if (byte == "trailer"[inside_index]) {
- inside_index++;
- } else {
- --i;
- state = ParserState::kDefault;
- }
- break;
-
- case ParserState::kXref:
- if (inside_index == 4) {
- last_xref = pos + i - 4;
- state = ParserState::kWhitespace;
- } else if (byte == "xref"[inside_index]) {
- inside_index++;
- } else {
- --i;
- state = ParserState::kDefault;
- }
- break;
-
- case ParserState::kComment:
- if (byte == '\r' || byte == '\n')
- state = ParserState::kDefault;
- break;
-
- case ParserState::kString:
- if (byte == ')') {
- if (depth > 0)
- depth--;
- } else if (byte == '(') {
- depth++;
- }
-
- if (!depth)
- state = ParserState::kDefault;
- break;
-
- case ParserState::kHexString:
- if (byte == '>' || (byte == '<' && inside_index == 1))
- state = ParserState::kDefault;
- inside_index = 0;
- break;
-
- case ParserState::kEscapedString:
- if (PDFCharIsDelimiter(byte) || PDFCharIsWhitespace(byte)) {
- --i;
- state = ParserState::kDefault;
- }
- break;
-
- case ParserState::kEndObj:
- if (PDFCharIsWhitespace(byte)) {
- state = ParserState::kDefault;
- } else if (byte == '%' || byte == '(' || byte == '<' ||
- byte == '\\') {
- state = ParserState::kDefault;
- --i;
- } else if (inside_index == 6) {
- state = ParserState::kDefault;
- --i;
- } else if (byte == "endobj"[inside_index]) {
- inside_index++;
- }
- break;
- }
-
- if (bOverFlow) {
- size = 0;
- break;
- }
- }
- pos += size;
-
- // If the position has not changed at all in a loop iteration, then break
- // out to prevent infinite looping.
- if (pos == saved_pos)
- break;
- }
-
- if (last_xref != -1 && last_xref > last_obj)
- last_trailer = last_xref;
- else if (last_trailer == -1 || last_xref < last_obj)
- last_trailer = m_pSyntax->m_FileLen;
-
- m_SortedOffset.insert(last_trailer - m_pSyntax->m_HeaderOffset);
- return m_pTrailer && !m_ObjectInfo.empty();
-}
-
-FX_BOOL CPDF_Parser::LoadCrossRefV5(FX_FILESIZE* pos, FX_BOOL bMainXRef) {
- CPDF_Object* pObject = ParseIndirectObjectAt(m_pDocument, *pos, 0);
- if (!pObject)
- return FALSE;
-
- if (m_pDocument) {
- FX_BOOL bInserted = FALSE;
- CPDF_Dictionary* pDict = m_pDocument->GetRoot();
- if (!pDict || pDict->GetObjNum() != pObject->m_ObjNum) {
- bInserted = m_pDocument->InsertIndirectObject(pObject->m_ObjNum, pObject);
- } else {
- if (pObject->IsStream())
- pObject->Release();
- }
-
- if (!bInserted)
- return FALSE;
- }
-
- CPDF_Stream* pStream = pObject->AsStream();
- if (!pStream)
- return FALSE;
-
- *pos = pStream->GetDict()->GetIntegerBy("Prev");
- int32_t size = pStream->GetDict()->GetIntegerBy("Size");
- if (size < 0) {
- pStream->Release();
- return FALSE;
- }
-
- if (bMainXRef) {
- m_pTrailer = ToDictionary(pStream->GetDict()->Clone());
- ShrinkObjectMap(size);
- for (auto& it : m_ObjectInfo)
- it.second.type = 0;
- } else {
- m_Trailers.Add(ToDictionary(pStream->GetDict()->Clone()));
- }
-
- std::vector<std::pair<int32_t, int32_t> > arrIndex;
- CPDF_Array* pArray = pStream->GetDict()->GetArrayBy("Index");
- if (pArray) {
- FX_DWORD nPairSize = pArray->GetCount() / 2;
- for (FX_DWORD i = 0; i < nPairSize; i++) {
- CPDF_Object* pStartNumObj = pArray->GetElement(i * 2);
- CPDF_Object* pCountObj = pArray->GetElement(i * 2 + 1);
-
- if (ToNumber(pStartNumObj) && ToNumber(pCountObj)) {
- int nStartNum = pStartNumObj->GetInteger();
- int nCount = pCountObj->GetInteger();
- if (nStartNum >= 0 && nCount > 0)
- arrIndex.push_back(std::make_pair(nStartNum, nCount));
- }
- }
- }
-
- if (arrIndex.size() == 0)
- arrIndex.push_back(std::make_pair(0, size));
-
- pArray = pStream->GetDict()->GetArrayBy("W");
- if (!pArray) {
- pStream->Release();
- return FALSE;
- }
-
- CFX_DWordArray WidthArray;
- FX_SAFE_DWORD dwAccWidth = 0;
- for (FX_DWORD i = 0; i < pArray->GetCount(); i++) {
- WidthArray.Add(pArray->GetIntegerAt(i));
- dwAccWidth += WidthArray[i];
- }
-
- if (!dwAccWidth.IsValid() || WidthArray.GetSize() < 3) {
- pStream->Release();
- return FALSE;
- }
-
- FX_DWORD totalWidth = dwAccWidth.ValueOrDie();
- CPDF_StreamAcc acc;
- acc.LoadAllData(pStream);
-
- const uint8_t* pData = acc.GetData();
- FX_DWORD dwTotalSize = acc.GetSize();
- FX_DWORD segindex = 0;
- for (FX_DWORD i = 0; i < arrIndex.size(); i++) {
- int32_t startnum = arrIndex[i].first;
- if (startnum < 0)
- continue;
-
- m_dwXrefStartObjNum =
- pdfium::base::checked_cast<FX_DWORD, int32_t>(startnum);
- FX_DWORD count =
- pdfium::base::checked_cast<FX_DWORD, int32_t>(arrIndex[i].second);
- FX_SAFE_DWORD dwCaculatedSize = segindex;
- dwCaculatedSize += count;
- dwCaculatedSize *= totalWidth;
- if (!dwCaculatedSize.IsValid() ||
- dwCaculatedSize.ValueOrDie() > dwTotalSize) {
- continue;
- }
-
- const uint8_t* segstart = pData + segindex * totalWidth;
- FX_SAFE_DWORD dwMaxObjNum = startnum;
- dwMaxObjNum += count;
- FX_DWORD dwV5Size = m_ObjectInfo.empty() ? 0 : GetLastObjNum() + 1;
- if (!dwMaxObjNum.IsValid() || dwMaxObjNum.ValueOrDie() > dwV5Size)
- continue;
-
- for (FX_DWORD j = 0; j < count; j++) {
- int32_t type = 1;
- const uint8_t* entrystart = segstart + j * totalWidth;
- if (WidthArray[0])
- type = GetVarInt(entrystart, WidthArray[0]);
-
- if (GetObjectType(startnum + j) == 255) {
- FX_FILESIZE offset =
- GetVarInt(entrystart + WidthArray[0], WidthArray[1]);
- m_ObjectInfo[startnum + j].pos = offset;
- m_SortedOffset.insert(offset);
- continue;
- }
-
- if (GetObjectType(startnum + j))
- continue;
-
- m_ObjectInfo[startnum + j].type = type;
- if (type == 0) {
- m_ObjectInfo[startnum + j].pos = 0;
- } else {
- FX_FILESIZE offset =
- GetVarInt(entrystart + WidthArray[0], WidthArray[1]);
- m_ObjectInfo[startnum + j].pos = offset;
- if (type == 1) {
- m_SortedOffset.insert(offset);
- } else {
- if (offset < 0 || !IsValidObjectNumber(offset)) {
- pStream->Release();
- return FALSE;
- }
- m_ObjectInfo[offset].type = 255;
- }
- }
- }
- segindex += count;
- }
- pStream->Release();
- return TRUE;
-}
-
-CPDF_Array* CPDF_Parser::GetIDArray() {
- CPDF_Object* pID = m_pTrailer ? m_pTrailer->GetElement("ID") : nullptr;
- if (!pID)
- return nullptr;
-
- if (CPDF_Reference* pRef = pID->AsReference()) {
- pID = ParseIndirectObject(nullptr, pRef->GetRefObjNum());
- m_pTrailer->SetAt("ID", pID);
- }
- return ToArray(pID);
-}
-
-FX_DWORD CPDF_Parser::GetRootObjNum() {
- CPDF_Reference* pRef =
- ToReference(m_pTrailer ? m_pTrailer->GetElement("Root") : nullptr);
- return pRef ? pRef->GetRefObjNum() : 0;
-}
-
-FX_DWORD CPDF_Parser::GetInfoObjNum() {
- CPDF_Reference* pRef =
- ToReference(m_pTrailer ? m_pTrailer->GetElement("Info") : nullptr);
- return pRef ? pRef->GetRefObjNum() : 0;
-}
-
-FX_BOOL CPDF_Parser::IsFormStream(FX_DWORD objnum, FX_BOOL& bForm) {
- bForm = FALSE;
- if (!IsValidObjectNumber(objnum))
- return TRUE;
-
- if (GetObjectType(objnum) == 0)
- return TRUE;
-
- if (GetObjectType(objnum) == 2)
- return TRUE;
-
- FX_FILESIZE pos = m_ObjectInfo[objnum].pos;
- auto it = m_SortedOffset.find(pos);
- if (it == m_SortedOffset.end())
- return TRUE;
-
- if (++it == m_SortedOffset.end())
- return FALSE;
-
- FX_FILESIZE size = *it - pos;
- FX_FILESIZE SavedPos = m_pSyntax->SavePos();
- m_pSyntax->RestorePos(pos);
-
- const char kFormStream[] = "/Form\0stream";
- const CFX_ByteStringC kFormStreamStr(kFormStream, sizeof(kFormStream) - 1);
- bForm = m_pSyntax->SearchMultiWord(kFormStreamStr, TRUE, size) == 0;
- m_pSyntax->RestorePos(SavedPos);
- return TRUE;
-}
-
-CPDF_Object* CPDF_Parser::ParseIndirectObject(
- CPDF_IndirectObjectHolder* pObjList,
- FX_DWORD objnum) {
- if (!IsValidObjectNumber(objnum))
- return nullptr;
-
- // Prevent circular parsing the same object.
- if (pdfium::ContainsKey(m_ParsingObjNums, objnum))
- return nullptr;
- ScopedSetInsertion<FX_DWORD> local_insert(&m_ParsingObjNums, objnum);
-
- if (GetObjectType(objnum) == 1 || GetObjectType(objnum) == 255) {
- FX_FILESIZE pos = m_ObjectInfo[objnum].pos;
- if (pos <= 0)
- return nullptr;
- return ParseIndirectObjectAt(pObjList, pos, objnum);
- }
- if (GetObjectType(objnum) != 2)
- return nullptr;
-
- CPDF_StreamAcc* pObjStream = GetObjectStream(m_ObjectInfo[objnum].pos);
- if (!pObjStream)
- return nullptr;
-
- ScopedFileStream file(FX_CreateMemoryStream(
- (uint8_t*)pObjStream->GetData(), (size_t)pObjStream->GetSize(), FALSE));
- CPDF_SyntaxParser syntax;
- syntax.InitParser(file.get(), 0);
- const int32_t offset = GetStreamFirst(pObjStream);
-
- // Read object numbers from |pObjStream| into a cache.
- if (!pdfium::ContainsKey(m_ObjCache, pObjStream)) {
- for (int32_t i = GetStreamNCount(pObjStream); i > 0; --i) {
- FX_DWORD thisnum = syntax.GetDirectNum();
- FX_DWORD thisoff = syntax.GetDirectNum();
- m_ObjCache[pObjStream][thisnum] = thisoff;
- }
- }
-
- const auto it = m_ObjCache[pObjStream].find(objnum);
- if (it == m_ObjCache[pObjStream].end())
- return nullptr;
-
- syntax.RestorePos(offset + it->second);
- return syntax.GetObject(pObjList, 0, 0, true);
-}
-
-CPDF_StreamAcc* CPDF_Parser::GetObjectStream(FX_DWORD objnum) {
- auto it = m_ObjectStreamMap.find(objnum);
- if (it != m_ObjectStreamMap.end())
- return it->second.get();
-
- if (!m_pDocument)
- return nullptr;
-
- const CPDF_Stream* pStream = ToStream(m_pDocument->GetIndirectObject(objnum));
- if (!pStream)
- return nullptr;
-
- CPDF_StreamAcc* pStreamAcc = new CPDF_StreamAcc;
- pStreamAcc->LoadAllData(pStream);
- m_ObjectStreamMap[objnum].reset(pStreamAcc);
- return pStreamAcc;
-}
-
-FX_FILESIZE CPDF_Parser::GetObjectSize(FX_DWORD objnum) const {
- if (!IsValidObjectNumber(objnum))
- return 0;
-
- if (GetObjectType(objnum) == 2)
- objnum = GetObjectPositionOrZero(objnum);
-
- if (GetObjectType(objnum) != 1 && GetObjectType(objnum) != 255)
- return 0;
-
- FX_FILESIZE offset = GetObjectPositionOrZero(objnum);
- if (offset == 0)
- return 0;
-
- auto it = m_SortedOffset.find(offset);
- if (it == m_SortedOffset.end() || ++it == m_SortedOffset.end())
- return 0;
-
- return *it - offset;
-}
-
-void CPDF_Parser::GetIndirectBinary(FX_DWORD objnum,
- uint8_t*& pBuffer,
- FX_DWORD& size) {
- pBuffer = nullptr;
- size = 0;
- if (!IsValidObjectNumber(objnum))
- return;
-
- if (GetObjectType(objnum) == 2) {
- CPDF_StreamAcc* pObjStream = GetObjectStream(m_ObjectInfo[objnum].pos);
- if (!pObjStream)
- return;
-
- int32_t offset = GetStreamFirst(pObjStream);
- const uint8_t* pData = pObjStream->GetData();
- FX_DWORD totalsize = pObjStream->GetSize();
- ScopedFileStream file(
- FX_CreateMemoryStream((uint8_t*)pData, (size_t)totalsize, FALSE));
-
- CPDF_SyntaxParser syntax;
- syntax.InitParser(file.get(), 0);
- for (int i = GetStreamNCount(pObjStream); i > 0; --i) {
- FX_DWORD thisnum = syntax.GetDirectNum();
- FX_DWORD thisoff = syntax.GetDirectNum();
- if (thisnum != objnum)
- continue;
-
- if (i == 1) {
- size = totalsize - (thisoff + offset);
- } else {
- syntax.GetDirectNum(); // Skip nextnum.
- FX_DWORD nextoff = syntax.GetDirectNum();
- size = nextoff - thisoff;
- }
-
- pBuffer = FX_Alloc(uint8_t, size);
- FXSYS_memcpy(pBuffer, pData + thisoff + offset, size);
- return;
- }
- return;
- }
-
- if (GetObjectType(objnum) != 1)
- return;
-
- FX_FILESIZE pos = m_ObjectInfo[objnum].pos;
- if (pos == 0)
- return;
-
- FX_FILESIZE SavedPos = m_pSyntax->SavePos();
- m_pSyntax->RestorePos(pos);
-
- bool bIsNumber;
- CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber);
- if (!bIsNumber) {
- m_pSyntax->RestorePos(SavedPos);
- return;
- }
-
- FX_DWORD parser_objnum = FXSYS_atoui(word);
- if (parser_objnum && parser_objnum != objnum) {
- m_pSyntax->RestorePos(SavedPos);
- return;
- }
-
- word = m_pSyntax->GetNextWord(&bIsNumber);
- if (!bIsNumber) {
- m_pSyntax->RestorePos(SavedPos);
- return;
- }
-
- if (m_pSyntax->GetKeyword() != "obj") {
- m_pSyntax->RestorePos(SavedPos);
- return;
- }
-
- auto it = m_SortedOffset.find(pos);
- if (it == m_SortedOffset.end() || ++it == m_SortedOffset.end()) {
- m_pSyntax->RestorePos(SavedPos);
- return;
- }
-
- FX_FILESIZE nextoff = *it;
- FX_BOOL bNextOffValid = FALSE;
- if (nextoff != pos) {
- m_pSyntax->RestorePos(nextoff);
- word = m_pSyntax->GetNextWord(&bIsNumber);
- if (word == "xref") {
- bNextOffValid = TRUE;
- } else if (bIsNumber) {
- word = m_pSyntax->GetNextWord(&bIsNumber);
- if (bIsNumber && m_pSyntax->GetKeyword() == "obj") {
- bNextOffValid = TRUE;
- }
- }
- }
-
- if (!bNextOffValid) {
- m_pSyntax->RestorePos(pos);
- while (1) {
- if (m_pSyntax->GetKeyword() == "endobj")
- break;
-
- if (m_pSyntax->SavePos() == m_pSyntax->m_FileLen)
- break;
- }
- nextoff = m_pSyntax->SavePos();
- }
-
- size = (FX_DWORD)(nextoff - pos);
- pBuffer = FX_Alloc(uint8_t, size);
- m_pSyntax->RestorePos(pos);
- m_pSyntax->ReadBlock(pBuffer, size);
- m_pSyntax->RestorePos(SavedPos);
-}
-
-CPDF_Object* CPDF_Parser::ParseIndirectObjectAt(
- CPDF_IndirectObjectHolder* pObjList,
- FX_FILESIZE pos,
- FX_DWORD objnum) {
- FX_FILESIZE SavedPos = m_pSyntax->SavePos();
- m_pSyntax->RestorePos(pos);
- bool bIsNumber;
- CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber);
- if (!bIsNumber) {
- m_pSyntax->RestorePos(SavedPos);
- return nullptr;
- }
-
- FX_FILESIZE objOffset = m_pSyntax->SavePos();
- objOffset -= word.GetLength();
- FX_DWORD parser_objnum = FXSYS_atoui(word);
- if (objnum && parser_objnum != objnum) {
- m_pSyntax->RestorePos(SavedPos);
- return nullptr;
- }
-
- word = m_pSyntax->GetNextWord(&bIsNumber);
- if (!bIsNumber) {
- m_pSyntax->RestorePos(SavedPos);
- return nullptr;
- }
-
- FX_DWORD parser_gennum = FXSYS_atoui(word);
- if (m_pSyntax->GetKeyword() != "obj") {
- m_pSyntax->RestorePos(SavedPos);
- return nullptr;
- }
-
- CPDF_Object* pObj =
- m_pSyntax->GetObject(pObjList, objnum, parser_gennum, true);
- m_pSyntax->SavePos();
-
- CFX_ByteString bsWord = m_pSyntax->GetKeyword();
- if (bsWord == "endobj")
- m_pSyntax->SavePos();
-
- m_pSyntax->RestorePos(SavedPos);
- if (pObj) {
- if (!objnum)
- pObj->m_ObjNum = parser_objnum;
- pObj->m_GenNum = parser_gennum;
- }
- return pObj;
-}
-
-CPDF_Object* CPDF_Parser::ParseIndirectObjectAtByStrict(
- CPDF_IndirectObjectHolder* pObjList,
- FX_FILESIZE pos,
- FX_DWORD objnum,
- FX_FILESIZE* pResultPos) {
- FX_FILESIZE SavedPos = m_pSyntax->SavePos();
- m_pSyntax->RestorePos(pos);
-
- bool bIsNumber;
- CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber);
- if (!bIsNumber) {
- m_pSyntax->RestorePos(SavedPos);
- return nullptr;
- }
-
- FX_DWORD parser_objnum = FXSYS_atoui(word);
- if (objnum && parser_objnum != objnum) {
- m_pSyntax->RestorePos(SavedPos);
- return nullptr;
- }
-
- word = m_pSyntax->GetNextWord(&bIsNumber);
- if (!bIsNumber) {
- m_pSyntax->RestorePos(SavedPos);
- return nullptr;
- }
-
- FX_DWORD gennum = FXSYS_atoui(word);
- if (m_pSyntax->GetKeyword() != "obj") {
- m_pSyntax->RestorePos(SavedPos);
- return nullptr;
- }
-
- CPDF_Object* pObj = m_pSyntax->GetObjectByStrict(pObjList, objnum, gennum);
- if (pResultPos)
- *pResultPos = m_pSyntax->m_Pos;
-
- m_pSyntax->RestorePos(SavedPos);
- return pObj;
-}
-
-CPDF_Dictionary* CPDF_Parser::LoadTrailerV4() {
- if (m_pSyntax->GetKeyword() != "trailer")
- return nullptr;
-
- std::unique_ptr<CPDF_Object, ReleaseDeleter<CPDF_Object>> pObj(
- m_pSyntax->GetObject(m_pDocument, 0, 0, true));
- if (!ToDictionary(pObj.get()))
- return nullptr;
- return pObj.release()->AsDictionary();
-}
-
-FX_DWORD CPDF_Parser::GetPermissions(FX_BOOL bCheckRevision) {
- if (!m_pSecurityHandler)
- return (FX_DWORD)-1;
-
- FX_DWORD dwPermission = m_pSecurityHandler->GetPermissions();
- if (m_pEncryptDict && m_pEncryptDict->GetStringBy("Filter") == "Standard") {
- dwPermission &= 0xFFFFFFFC;
- dwPermission |= 0xFFFFF0C0;
- if (bCheckRevision && m_pEncryptDict->GetIntegerBy("R") == 2)
- dwPermission &= 0xFFFFF0FF;
- }
- return dwPermission;
-}
-
-FX_BOOL CPDF_Parser::IsLinearizedFile(IFX_FileRead* pFileAccess,
- FX_DWORD offset) {
- m_pSyntax->InitParser(pFileAccess, offset);
- m_pSyntax->RestorePos(m_pSyntax->m_HeaderOffset + 9);
-
- FX_FILESIZE SavedPos = m_pSyntax->SavePos();
- bool bIsNumber;
- CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber);
- if (!bIsNumber)
- return FALSE;
-
- FX_DWORD objnum = FXSYS_atoui(word);
- word = m_pSyntax->GetNextWord(&bIsNumber);
- if (!bIsNumber)
- return FALSE;
-
- FX_DWORD gennum = FXSYS_atoui(word);
- if (m_pSyntax->GetKeyword() != "obj") {
- m_pSyntax->RestorePos(SavedPos);
- return FALSE;
- }
-
- m_pLinearized = m_pSyntax->GetObject(nullptr, objnum, gennum, true);
- if (!m_pLinearized)
- return FALSE;
-
- CPDF_Dictionary* pDict = m_pLinearized->GetDict();
- if (pDict && pDict->GetElement("Linearized")) {
- m_pSyntax->GetNextWord(nullptr);
-
- CPDF_Object* pLen = pDict->GetElement("L");
- if (!pLen) {
- m_pLinearized->Release();
- m_pLinearized = nullptr;
- return FALSE;
- }
-
- if (pLen->GetInteger() != (int)pFileAccess->GetSize())
- return FALSE;
-
- if (CPDF_Number* pNo = ToNumber(pDict->GetElement("P")))
- m_dwFirstPageNo = pNo->GetInteger();
-
- if (CPDF_Number* pTable = ToNumber(pDict->GetElement("T")))
- m_LastXRefOffset = pTable->GetInteger();
-
- return TRUE;
- }
- m_pLinearized->Release();
- m_pLinearized = nullptr;
- return FALSE;
-}
-
-CPDF_Parser::Error CPDF_Parser::StartAsyncParse(IFX_FileRead* pFileAccess) {
- CloseParser();
- m_bXRefStream = FALSE;
- m_LastXRefOffset = 0;
- m_bOwnFileRead = true;
-
- int32_t offset = GetHeaderOffset(pFileAccess);
- if (offset == -1)
- return FORMAT_ERROR;
-
- if (!IsLinearizedFile(pFileAccess, offset)) {
- m_pSyntax->m_pFileAccess = nullptr;
- return StartParse(pFileAccess);
- }
-
- m_pDocument = new CPDF_Document(this);
- FX_FILESIZE dwFirstXRefOffset = m_pSyntax->SavePos();
-
- FX_BOOL bXRefRebuilt = FALSE;
- FX_BOOL bLoadV4 = FALSE;
- if (!(bLoadV4 = LoadCrossRefV4(dwFirstXRefOffset, 0, FALSE)) &&
- !LoadCrossRefV5(&dwFirstXRefOffset, TRUE)) {
- if (!RebuildCrossRef())
- return FORMAT_ERROR;
-
- bXRefRebuilt = TRUE;
- m_LastXRefOffset = 0;
- }
-
- if (bLoadV4) {
- m_pTrailer = LoadTrailerV4();
- if (!m_pTrailer)
- return SUCCESS;
-
- int32_t xrefsize = GetDirectInteger(m_pTrailer, "Size");
- if (xrefsize > 0)
- ShrinkObjectMap(xrefsize);
- }
-
- Error eRet = SetEncryptHandler();
- if (eRet != SUCCESS)
- return eRet;
-
- m_pDocument->LoadAsynDoc(m_pLinearized->GetDict());
- if (!m_pDocument->GetRoot() || m_pDocument->GetPageCount() == 0) {
- if (bXRefRebuilt)
- return FORMAT_ERROR;
-
- ReleaseEncryptHandler();
- if (!RebuildCrossRef())
- return FORMAT_ERROR;
-
- eRet = SetEncryptHandler();
- if (eRet != SUCCESS)
- return eRet;
-
- m_pDocument->LoadAsynDoc(m_pLinearized->GetDict());
- if (!m_pDocument->GetRoot())
- return FORMAT_ERROR;
- }
-
- if (GetRootObjNum() == 0) {
- ReleaseEncryptHandler();
- if (!RebuildCrossRef() || GetRootObjNum() == 0)
- return FORMAT_ERROR;
-
- eRet = SetEncryptHandler();
- if (eRet != SUCCESS)
- return eRet;
- }
-
- if (m_pSecurityHandler && m_pSecurityHandler->IsMetadataEncrypted()) {
- if (CPDF_Reference* pMetadata =
- ToReference(m_pDocument->GetRoot()->GetElement("Metadata")))
- m_pSyntax->m_MetadataObjnum = pMetadata->GetRefObjNum();
- }
- return SUCCESS;
-}
-
-FX_BOOL CPDF_Parser::LoadLinearizedAllCrossRefV5(FX_FILESIZE xrefpos) {
- if (!LoadCrossRefV5(&xrefpos, FALSE))
- return FALSE;
-
- std::set<FX_FILESIZE> seen_xrefpos;
- while (xrefpos) {
- seen_xrefpos.insert(xrefpos);
- if (!LoadCrossRefV5(&xrefpos, FALSE))
- return FALSE;
-
- // Check for circular references.
- if (pdfium::ContainsKey(seen_xrefpos, xrefpos))
- return FALSE;
- }
- m_ObjectStreamMap.clear();
- m_bXRefStream = TRUE;
- return TRUE;
-}
-
-CPDF_Parser::Error CPDF_Parser::LoadLinearizedMainXRefTable() {
- FX_DWORD dwSaveMetadataObjnum = m_pSyntax->m_MetadataObjnum;
- m_pSyntax->m_MetadataObjnum = 0;
- if (m_pTrailer) {
- m_pTrailer->Release();
- m_pTrailer = nullptr;
- }
-
- m_pSyntax->RestorePos(m_LastXRefOffset - m_pSyntax->m_HeaderOffset);
- uint8_t ch = 0;
- FX_DWORD dwCount = 0;
- m_pSyntax->GetNextChar(ch);
- while (PDFCharIsWhitespace(ch)) {
- ++dwCount;
- if (m_pSyntax->m_FileLen >=
- (FX_FILESIZE)(m_pSyntax->SavePos() + m_pSyntax->m_HeaderOffset)) {
- break;
- }
- m_pSyntax->GetNextChar(ch);
- }
- m_LastXRefOffset += dwCount;
- m_ObjectStreamMap.clear();
- m_ObjCache.clear();
-
- if (!LoadLinearizedAllCrossRefV4(m_LastXRefOffset, m_dwXrefStartObjNum) &&
- !LoadLinearizedAllCrossRefV5(m_LastXRefOffset)) {
- m_LastXRefOffset = 0;
- m_pSyntax->m_MetadataObjnum = dwSaveMetadataObjnum;
- return FORMAT_ERROR;
- }
-
- m_pSyntax->m_MetadataObjnum = dwSaveMetadataObjnum;
- return SUCCESS;
-}
-
class CPDF_DataAvail final : public IPDF_DataAvail {
public:
diff --git a/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser_unittest.cpp b/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser_unittest.cpp
index 68068cb522..c8e1ceec0b 100644
--- a/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser_unittest.cpp
+++ b/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser_unittest.cpp
@@ -5,6 +5,7 @@
#include <limits>
#include <string>
+#include "core/include/fpdfapi/cpdf_parser.h"
#include "core/include/fpdfapi/fpdf_parser.h"
#include "core/include/fxcrt/fx_stream.h"
#include "core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.h"
diff --git a/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp b/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp
index 37b82dd564..a08cf7d466 100644
--- a/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp
+++ b/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp
@@ -4,8 +4,9 @@
// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
-#include "core/include/fpdfapi/fpdf_parser.h"
+#include "core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.h"
+#include "core/include/fpdfapi/fpdf_parser.h"
#include "core/include/fxcrt/fx_ext.h"
// Indexed by 8-bit character code, contains either:
@@ -60,171 +61,28 @@ const char PDF_CharType[256] = {
'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
'R', 'R', 'R', 'R', 'R', 'R', 'R', 'W'};
-CPDF_SimpleParser::CPDF_SimpleParser(const uint8_t* pData, FX_DWORD dwSize) {
- m_pData = pData;
- m_dwSize = dwSize;
- m_dwCurPos = 0;
-}
-
-CPDF_SimpleParser::CPDF_SimpleParser(const CFX_ByteStringC& str) {
- m_pData = str.GetPtr();
- m_dwSize = str.GetLength();
- m_dwCurPos = 0;
-}
+int32_t GetHeaderOffset(IFX_FileRead* pFile) {
+ // TODO(dsinclair): This is a complicated way of saying %PDF, simplify?
+ const FX_DWORD tag = FXDWORD_FROM_LSBFIRST(0x46445025);
-void CPDF_SimpleParser::ParseWord(const uint8_t*& pStart, FX_DWORD& dwSize) {
- pStart = NULL;
- dwSize = 0;
- uint8_t ch;
- while (1) {
- if (m_dwSize <= m_dwCurPos)
- return;
- ch = m_pData[m_dwCurPos++];
- while (PDFCharIsWhitespace(ch)) {
- if (m_dwSize <= m_dwCurPos)
- return;
- ch = m_pData[m_dwCurPos++];
- }
+ const size_t kBufSize = 4;
+ uint8_t buf[kBufSize];
+ int32_t offset = 0;
+ while (offset <= 1024) {
+ if (!pFile->ReadBlock(buf, offset, kBufSize))
+ return -1;
- if (ch != '%')
- break;
-
- while (1) {
- if (m_dwSize <= m_dwCurPos)
- return;
- ch = m_pData[m_dwCurPos++];
- if (ch == '\r' || ch == '\n')
- break;
- }
- }
+ if (*(FX_DWORD*)buf == tag)
+ return offset;
- FX_DWORD start_pos = m_dwCurPos - 1;
- pStart = m_pData + start_pos;
- if (PDFCharIsDelimiter(ch)) {
- if (ch == '/') {
- while (1) {
- if (m_dwSize <= m_dwCurPos)
- return;
- ch = m_pData[m_dwCurPos++];
- if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {
- m_dwCurPos--;
- dwSize = m_dwCurPos - start_pos;
- return;
- }
- }
- } else {
- dwSize = 1;
- if (ch == '<') {
- if (m_dwSize <= m_dwCurPos)
- return;
- ch = m_pData[m_dwCurPos++];
- if (ch == '<')
- dwSize = 2;
- else
- m_dwCurPos--;
- } else if (ch == '>') {
- if (m_dwSize <= m_dwCurPos)
- return;
- ch = m_pData[m_dwCurPos++];
- if (ch == '>')
- dwSize = 2;
- else
- m_dwCurPos--;
- }
- }
- return;
- }
-
- dwSize = 1;
- while (1) {
- if (m_dwSize <= m_dwCurPos)
- return;
- ch = m_pData[m_dwCurPos++];
-
- if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
- m_dwCurPos--;
- break;
- }
- dwSize++;
+ ++offset;
}
+ return -1;
}
-CFX_ByteStringC CPDF_SimpleParser::GetWord() {
- const uint8_t* pStart;
- FX_DWORD dwSize;
- ParseWord(pStart, dwSize);
- if (dwSize == 1 && pStart[0] == '<') {
- while (m_dwCurPos < m_dwSize && m_pData[m_dwCurPos] != '>') {
- m_dwCurPos++;
- }
- if (m_dwCurPos < m_dwSize) {
- m_dwCurPos++;
- }
- return CFX_ByteStringC(pStart,
- (FX_STRSIZE)(m_dwCurPos - (pStart - m_pData)));
- }
- if (dwSize == 1 && pStart[0] == '(') {
- int level = 1;
- while (m_dwCurPos < m_dwSize) {
- if (m_pData[m_dwCurPos] == ')') {
- level--;
- if (level == 0) {
- break;
- }
- }
- if (m_pData[m_dwCurPos] == '\\') {
- if (m_dwSize <= m_dwCurPos) {
- break;
- }
- m_dwCurPos++;
- } else if (m_pData[m_dwCurPos] == '(') {
- level++;
- }
- if (m_dwSize <= m_dwCurPos) {
- break;
- }
- m_dwCurPos++;
- }
- if (m_dwCurPos < m_dwSize) {
- m_dwCurPos++;
- }
- return CFX_ByteStringC(pStart,
- (FX_STRSIZE)(m_dwCurPos - (pStart - m_pData)));
- }
- return CFX_ByteStringC(pStart, dwSize);
-}
-
-bool CPDF_SimpleParser::FindTagParamFromStart(const CFX_ByteStringC& token,
- int nParams) {
- nParams++;
- FX_DWORD* pBuf = FX_Alloc(FX_DWORD, nParams);
- int buf_index = 0;
- int buf_count = 0;
- m_dwCurPos = 0;
- while (1) {
- pBuf[buf_index++] = m_dwCurPos;
- if (buf_index == nParams) {
- buf_index = 0;
- }
- buf_count++;
- if (buf_count > nParams) {
- buf_count = nParams;
- }
- CFX_ByteStringC word = GetWord();
- if (word.IsEmpty()) {
- FX_Free(pBuf);
- return false;
- }
- if (word == token) {
- if (buf_count < nParams) {
- continue;
- }
- m_dwCurPos = pBuf[buf_index];
- FX_Free(pBuf);
- return true;
- }
- }
- return false;
+int32_t GetDirectInteger(CPDF_Dictionary* pDict, const CFX_ByteStringC& key) {
+ CPDF_Number* pObj = ToNumber(pDict->GetElement(key));
+ return pObj ? pObj->GetInteger() : 0;
}
CFX_ByteString PDF_NameDecode(const CFX_ByteStringC& bstr) {
diff --git a/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.h b/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.h
new file mode 100644
index 0000000000..cdc1199623
--- /dev/null
+++ b/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.h
@@ -0,0 +1,39 @@
+// Copyright 2016 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
+
+#ifndef CORE_SRC_FPDFAPI_FPDF_PARSER_FPDF_PARSER_UTILITY_H_
+#define CORE_SRC_FPDFAPI_FPDF_PARSER_FPDF_PARSER_UTILITY_H_
+
+#include "core/include/fxcrt/fx_string.h"
+#include "core/include/fxcrt/fx_system.h"
+
+class IFX_FileRead;
+class CPDF_Dictionary;
+
+// Use the accessors below instead of directly accessing PDF_CharType.
+extern const char PDF_CharType[256];
+
+inline bool PDFCharIsWhitespace(uint8_t c) {
+ return PDF_CharType[c] == 'W';
+}
+inline bool PDFCharIsNumeric(uint8_t c) {
+ return PDF_CharType[c] == 'N';
+}
+inline bool PDFCharIsDelimiter(uint8_t c) {
+ return PDF_CharType[c] == 'D';
+}
+inline bool PDFCharIsOther(uint8_t c) {
+ return PDF_CharType[c] == 'R';
+}
+
+inline bool PDFCharIsLineEnding(uint8_t c) {
+ return c == '\r' || c == '\n';
+}
+
+int32_t GetHeaderOffset(IFX_FileRead* pFile);
+int32_t GetDirectInteger(CPDF_Dictionary* pDict, const CFX_ByteStringC& key);
+
+#endif // CORE_SRC_FPDFAPI_FPDF_PARSER_FPDF_PARSER_UTILITY_H_