diff options
author | Tom Sepez <tsepez@chromium.org> | 2016-03-09 10:47:45 -0800 |
---|---|---|
committer | Tom Sepez <tsepez@chromium.org> | 2016-03-09 10:47:45 -0800 |
commit | 8388037a5c58d60043b11c03a8efe78c54c65a4b (patch) | |
tree | d9348e8842aa1f8888bcbd2590d2ae7bee1b6db7 /core/src/fpdfapi | |
parent | f1fa151e146d70c5e031726581e176a8c7d0d579 (diff) | |
download | pdfium-8388037a5c58d60043b11c03a8efe78c54c65a4b.tar.xz |
Split off CPDF_Parser and CPDF_SimpleParser into .h/.cpp files
R=dsinclair@chromium.org
Review URL: https://codereview.chromium.org/1773103003 .
Diffstat (limited to 'core/src/fpdfapi')
17 files changed, 1889 insertions, 1811 deletions
diff --git a/core/src/fpdfapi/fpdf_edit/fpdf_edit_create.cpp b/core/src/fpdfapi/fpdf_edit/fpdf_edit_create.cpp index ac01bf901c..88bd272e1e 100644 --- a/core/src/fpdfapi/fpdf_edit/fpdf_edit_create.cpp +++ b/core/src/fpdfapi/fpdf_edit/fpdf_edit_create.cpp @@ -9,6 +9,7 @@ #include <vector> #include "core/include/fpdfapi/cpdf_document.h" +#include "core/include/fpdfapi/cpdf_parser.h" #include "core/include/fpdfapi/fpdf_parser.h" #include "core/include/fpdfapi/fpdf_serial.h" #include "core/include/fxcrt/fx_ext.h" diff --git a/core/src/fpdfapi/fpdf_font/fpdf_font.cpp b/core/src/fpdfapi/fpdf_font/fpdf_font.cpp index d9bc148326..9b4c39aa84 100644 --- a/core/src/fpdfapi/fpdf_font/fpdf_font.cpp +++ b/core/src/fpdfapi/fpdf_font/fpdf_font.cpp @@ -7,6 +7,7 @@ #include "core/src/fpdfapi/fpdf_font/font_int.h" #include "core/include/fpdfapi/cpdf_document.h" +#include "core/include/fpdfapi/cpdf_simple_parser.h" #include "core/include/fpdfapi/fpdf_module.h" #include "core/include/fpdfapi/fpdf_page.h" #include "core/include/fpdfapi/fpdf_pageobj.h" diff --git a/core/src/fpdfapi/fpdf_font/fpdf_font_cid.cpp b/core/src/fpdfapi/fpdf_font/fpdf_font_cid.cpp index 38406394c4..1ea686e9e0 100644 --- a/core/src/fpdfapi/fpdf_font/fpdf_font_cid.cpp +++ b/core/src/fpdfapi/fpdf_font/fpdf_font_cid.cpp @@ -6,6 +6,7 @@ #include "core/src/fpdfapi/fpdf_font/font_int.h" +#include "core/include/fpdfapi/cpdf_simple_parser.h" #include "core/include/fpdfapi/fpdf_module.h" #include "core/include/fpdfapi/fpdf_page.h" #include "core/include/fpdfapi/fpdf_resource.h" diff --git a/core/src/fpdfapi/fpdf_page/fpdf_page_func.cpp b/core/src/fpdfapi/fpdf_page/fpdf_page_func.cpp index f428bf89a1..e9ea7eb74f 100644 --- a/core/src/fpdfapi/fpdf_page/fpdf_page_func.cpp +++ b/core/src/fpdfapi/fpdf_page/fpdf_page_func.cpp @@ -12,6 +12,7 @@ #include <utility> #include <vector> +#include "core/include/fpdfapi/cpdf_simple_parser.h" #include "core/include/fpdfapi/fpdf_module.h" #include "core/include/fpdfapi/fpdf_page.h" #include "core/include/fxcrt/fx_safe_types.h" diff --git a/core/src/fpdfapi/fpdf_page/fpdf_page_parser_old.cpp b/core/src/fpdfapi/fpdf_page/fpdf_page_parser_old.cpp index a808c2948b..87e65c4190 100644 --- a/core/src/fpdfapi/fpdf_page/fpdf_page_parser_old.cpp +++ b/core/src/fpdfapi/fpdf_page/fpdf_page_parser_old.cpp @@ -14,6 +14,7 @@ #include "core/include/fxcodec/fx_codec.h" #include "core/include/fxcrt/fx_ext.h" #include "core/include/fxcrt/fx_safe_types.h" +#include "core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.h" CPDF_StreamParser::CPDF_StreamParser(const uint8_t* pData, FX_DWORD dwSize) { m_pBuf = pData; diff --git a/core/src/fpdfapi/fpdf_parser/cpdf_document.cpp b/core/src/fpdfapi/fpdf_parser/cpdf_document.cpp index f837c00509..d97d6e01ff 100644 --- a/core/src/fpdfapi/fpdf_parser/cpdf_document.cpp +++ b/core/src/fpdfapi/fpdf_parser/cpdf_document.cpp @@ -8,6 +8,7 @@ #include <set> +#include "core/include/fpdfapi/cpdf_parser.h" #include "core/include/fpdfapi/fpdf_module.h" #include "core/include/fpdfapi/fpdf_parser.h" #include "core/include/fxge/fx_font.h" diff --git a/core/src/fpdfapi/fpdf_parser/cpdf_parser.cpp b/core/src/fpdfapi/fpdf_parser/cpdf_parser.cpp new file mode 100644 index 0000000000..1871467458 --- /dev/null +++ b/core/src/fpdfapi/fpdf_parser/cpdf_parser.cpp @@ -0,0 +1,1643 @@ +// Copyright 2016 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#include "core/include/fpdfapi/cpdf_parser.h" + +#include "core/include/fpdfapi/cpdf_document.h" +#include "core/include/fpdfapi/fpdf_parser.h" +#include "core/include/fxcrt/fx_ext.h" +#include "core/include/fxcrt/fx_safe_types.h" +#include "core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.h" +#include "core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.h" +#include "third_party/base/stl_util.h" + +namespace { + +// A limit on the size of the xref table. Theoretical limits are higher, but +// this may be large enough in practice. +const int32_t kMaxXRefSize = 1048576; + +// A limit on the maximum object number in the xref table. Theoretical limits +// are higher, but this may be large enough in practice. +const FX_DWORD kMaxObjectNumber = 1048576; + +FX_DWORD GetVarInt(const uint8_t* p, int32_t n) { + FX_DWORD result = 0; + for (int32_t i = 0; i < n; ++i) + result = result * 256 + p[i]; + return result; +} + +int32_t GetStreamNCount(CPDF_StreamAcc* pObjStream) { + return pObjStream->GetDict()->GetIntegerBy("N"); +} + +int32_t GetStreamFirst(CPDF_StreamAcc* pObjStream) { + return pObjStream->GetDict()->GetIntegerBy("First"); +} + +} // namespace + +CPDF_Parser::CPDF_Parser() + : m_pDocument(nullptr), + m_bOwnFileRead(true), + m_FileVersion(0), + m_pTrailer(nullptr), + m_pEncryptDict(nullptr), + m_pLinearized(nullptr), + m_dwFirstPageNo(0), + m_dwXrefStartObjNum(0) { + m_pSyntax.reset(new CPDF_SyntaxParser); +} + +CPDF_Parser::~CPDF_Parser() { + CloseParser(); +} + +FX_DWORD CPDF_Parser::GetLastObjNum() const { + return m_ObjectInfo.empty() ? 0 : m_ObjectInfo.rbegin()->first; +} + +bool CPDF_Parser::IsValidObjectNumber(FX_DWORD objnum) const { + return !m_ObjectInfo.empty() && objnum <= m_ObjectInfo.rbegin()->first; +} + +FX_FILESIZE CPDF_Parser::GetObjectPositionOrZero(FX_DWORD objnum) const { + auto it = m_ObjectInfo.find(objnum); + return it != m_ObjectInfo.end() ? it->second.pos : 0; +} + +uint8_t CPDF_Parser::GetObjectType(FX_DWORD objnum) const { + ASSERT(IsValidObjectNumber(objnum)); + auto it = m_ObjectInfo.find(objnum); + return it != m_ObjectInfo.end() ? it->second.type : 0; +} + +uint16_t CPDF_Parser::GetObjectGenNum(FX_DWORD objnum) const { + ASSERT(IsValidObjectNumber(objnum)); + auto it = m_ObjectInfo.find(objnum); + return it != m_ObjectInfo.end() ? it->second.gennum : 0; +} + +bool CPDF_Parser::IsObjectFreeOrNull(FX_DWORD objnum) const { + uint8_t type = GetObjectType(objnum); + return type == 0 || type == 255; +} + +void CPDF_Parser::SetEncryptDictionary(CPDF_Dictionary* pDict) { + m_pEncryptDict = pDict; +} + +CPDF_CryptoHandler* CPDF_Parser::GetCryptoHandler() { + return m_pSyntax->m_pCryptoHandler.get(); +} + +IFX_FileRead* CPDF_Parser::GetFileAccess() const { + return m_pSyntax->m_pFileAccess; +} + +void CPDF_Parser::ShrinkObjectMap(FX_DWORD objnum) { + if (objnum == 0) { + m_ObjectInfo.clear(); + return; + } + + auto it = m_ObjectInfo.lower_bound(objnum); + while (it != m_ObjectInfo.end()) { + auto saved_it = it++; + m_ObjectInfo.erase(saved_it); + } + + if (!pdfium::ContainsKey(m_ObjectInfo, objnum - 1)) + m_ObjectInfo[objnum - 1].pos = 0; +} + +void CPDF_Parser::CloseParser() { + m_bVersionUpdated = FALSE; + delete m_pDocument; + m_pDocument = nullptr; + + if (m_pTrailer) { + m_pTrailer->Release(); + m_pTrailer = nullptr; + } + ReleaseEncryptHandler(); + SetEncryptDictionary(nullptr); + + if (m_bOwnFileRead && m_pSyntax->m_pFileAccess) { + m_pSyntax->m_pFileAccess->Release(); + m_pSyntax->m_pFileAccess = nullptr; + } + + m_ObjectStreamMap.clear(); + m_ObjCache.clear(); + m_SortedOffset.clear(); + m_ObjectInfo.clear(); + + int32_t iLen = m_Trailers.GetSize(); + for (int32_t i = 0; i < iLen; ++i) { + if (CPDF_Dictionary* trailer = m_Trailers.GetAt(i)) + trailer->Release(); + } + m_Trailers.RemoveAll(); + + if (m_pLinearized) { + m_pLinearized->Release(); + m_pLinearized = nullptr; + } +} + +CPDF_Parser::Error CPDF_Parser::StartParse(IFX_FileRead* pFileAccess) { + CloseParser(); + + m_bXRefStream = FALSE; + m_LastXRefOffset = 0; + m_bOwnFileRead = true; + + int32_t offset = GetHeaderOffset(pFileAccess); + if (offset == -1) { + if (pFileAccess) + pFileAccess->Release(); + return FORMAT_ERROR; + } + m_pSyntax->InitParser(pFileAccess, offset); + + uint8_t ch; + if (!m_pSyntax->GetCharAt(5, ch)) + return FORMAT_ERROR; + if (std::isdigit(ch)) + m_FileVersion = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)) * 10; + + if (!m_pSyntax->GetCharAt(7, ch)) + return FORMAT_ERROR; + if (std::isdigit(ch)) + m_FileVersion += FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)); + + if (m_pSyntax->m_FileLen < m_pSyntax->m_HeaderOffset + 9) + return FORMAT_ERROR; + + m_pSyntax->RestorePos(m_pSyntax->m_FileLen - m_pSyntax->m_HeaderOffset - 9); + m_pDocument = new CPDF_Document(this); + + FX_BOOL bXRefRebuilt = FALSE; + if (m_pSyntax->SearchWord("startxref", TRUE, FALSE, 4096)) { + m_SortedOffset.insert(m_pSyntax->SavePos()); + m_pSyntax->GetKeyword(); + + bool bNumber; + CFX_ByteString xrefpos_str = m_pSyntax->GetNextWord(&bNumber); + if (!bNumber) + return FORMAT_ERROR; + + m_LastXRefOffset = (FX_FILESIZE)FXSYS_atoi64(xrefpos_str); + if (!LoadAllCrossRefV4(m_LastXRefOffset) && + !LoadAllCrossRefV5(m_LastXRefOffset)) { + if (!RebuildCrossRef()) + return FORMAT_ERROR; + + bXRefRebuilt = TRUE; + m_LastXRefOffset = 0; + } + } else { + if (!RebuildCrossRef()) + return FORMAT_ERROR; + + bXRefRebuilt = TRUE; + } + Error eRet = SetEncryptHandler(); + if (eRet != SUCCESS) + return eRet; + + m_pDocument->LoadDoc(); + if (!m_pDocument->GetRoot() || m_pDocument->GetPageCount() == 0) { + if (bXRefRebuilt) + return FORMAT_ERROR; + + ReleaseEncryptHandler(); + if (!RebuildCrossRef()) + return FORMAT_ERROR; + + eRet = SetEncryptHandler(); + if (eRet != SUCCESS) + return eRet; + + m_pDocument->LoadDoc(); + if (!m_pDocument->GetRoot()) + return FORMAT_ERROR; + } + if (GetRootObjNum() == 0) { + ReleaseEncryptHandler(); + if (!RebuildCrossRef() || GetRootObjNum() == 0) + return FORMAT_ERROR; + + eRet = SetEncryptHandler(); + if (eRet != SUCCESS) + return eRet; + } + if (m_pSecurityHandler && !m_pSecurityHandler->IsMetadataEncrypted()) { + CPDF_Reference* pMetadata = + ToReference(m_pDocument->GetRoot()->GetElement("Metadata")); + if (pMetadata) + m_pSyntax->m_MetadataObjnum = pMetadata->GetRefObjNum(); + } + return SUCCESS; +} +CPDF_Parser::Error CPDF_Parser::SetEncryptHandler() { + ReleaseEncryptHandler(); + SetEncryptDictionary(nullptr); + + if (!m_pTrailer) + return FORMAT_ERROR; + + CPDF_Object* pEncryptObj = m_pTrailer->GetElement("Encrypt"); + if (pEncryptObj) { + if (CPDF_Dictionary* pEncryptDict = pEncryptObj->AsDictionary()) { + SetEncryptDictionary(pEncryptDict); + } else if (CPDF_Reference* pRef = pEncryptObj->AsReference()) { + pEncryptObj = m_pDocument->GetIndirectObject(pRef->GetRefObjNum()); + if (pEncryptObj) + SetEncryptDictionary(pEncryptObj->GetDict()); + } + } + + if (m_pEncryptDict) { + CFX_ByteString filter = m_pEncryptDict->GetStringBy("Filter"); + std::unique_ptr<IPDF_SecurityHandler> pSecurityHandler; + Error err = HANDLER_ERROR; + if (filter == "Standard") { + pSecurityHandler.reset(new CPDF_StandardSecurityHandler); + err = PASSWORD_ERROR; + } + if (!pSecurityHandler) + return HANDLER_ERROR; + + if (!pSecurityHandler->OnInit(this, m_pEncryptDict)) + return err; + + m_pSecurityHandler = std::move(pSecurityHandler); + std::unique_ptr<CPDF_CryptoHandler> pCryptoHandler( + m_pSecurityHandler->CreateCryptoHandler()); + if (!pCryptoHandler->Init(m_pEncryptDict, m_pSecurityHandler.get())) + return HANDLER_ERROR; + m_pSyntax->SetEncrypt(std::move(pCryptoHandler)); + } + return SUCCESS; +} + +void CPDF_Parser::ReleaseEncryptHandler() { + m_pSyntax->m_pCryptoHandler.reset(); + m_pSecurityHandler.reset(); +} + +FX_FILESIZE CPDF_Parser::GetObjectOffset(FX_DWORD objnum) const { + if (!IsValidObjectNumber(objnum)) + return 0; + + if (GetObjectType(objnum) == 1) + return GetObjectPositionOrZero(objnum); + + if (GetObjectType(objnum) == 2) { + FX_FILESIZE pos = GetObjectPositionOrZero(objnum); + return GetObjectPositionOrZero(pos); + } + return 0; +} + +FX_BOOL CPDF_Parser::LoadAllCrossRefV4(FX_FILESIZE xrefpos) { + if (!LoadCrossRefV4(xrefpos, 0, TRUE)) + return FALSE; + + m_pTrailer = LoadTrailerV4(); + if (!m_pTrailer) + return FALSE; + + int32_t xrefsize = GetDirectInteger(m_pTrailer, "Size"); + if (xrefsize > 0 && xrefsize <= kMaxXRefSize) + ShrinkObjectMap(xrefsize); + + std::vector<FX_FILESIZE> CrossRefList; + std::vector<FX_FILESIZE> XRefStreamList; + std::set<FX_FILESIZE> seen_xrefpos; + + CrossRefList.push_back(xrefpos); + XRefStreamList.push_back(GetDirectInteger(m_pTrailer, "XRefStm")); + seen_xrefpos.insert(xrefpos); + + // When |m_pTrailer| doesn't have Prev entry or Prev entry value is not + // numerical, GetDirectInteger() returns 0. Loading will end. + xrefpos = GetDirectInteger(m_pTrailer, "Prev"); + while (xrefpos) { + // Check for circular references. + if (pdfium::ContainsKey(seen_xrefpos, xrefpos)) + return FALSE; + + seen_xrefpos.insert(xrefpos); + + // SLOW ... + CrossRefList.insert(CrossRefList.begin(), xrefpos); + LoadCrossRefV4(xrefpos, 0, TRUE); + + std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict( + LoadTrailerV4()); + if (!pDict) + return FALSE; + + xrefpos = GetDirectInteger(pDict.get(), "Prev"); + + // SLOW ... + XRefStreamList.insert(XRefStreamList.begin(), + pDict->GetIntegerBy("XRefStm")); + m_Trailers.Add(pDict.release()); + } + + for (size_t i = 0; i < CrossRefList.size(); ++i) { + if (!LoadCrossRefV4(CrossRefList[i], XRefStreamList[i], FALSE)) + return FALSE; + } + return TRUE; +} + +FX_BOOL CPDF_Parser::LoadLinearizedAllCrossRefV4(FX_FILESIZE xrefpos, + FX_DWORD dwObjCount) { + if (!LoadLinearizedCrossRefV4(xrefpos, dwObjCount)) + return FALSE; + + m_pTrailer = LoadTrailerV4(); + if (!m_pTrailer) + return FALSE; + + int32_t xrefsize = GetDirectInteger(m_pTrailer, "Size"); + if (xrefsize == 0) + return FALSE; + + std::vector<FX_FILESIZE> CrossRefList; + std::vector<FX_FILESIZE> XRefStreamList; + std::set<FX_FILESIZE> seen_xrefpos; + + CrossRefList.push_back(xrefpos); + XRefStreamList.push_back(GetDirectInteger(m_pTrailer, "XRefStm")); + seen_xrefpos.insert(xrefpos); + + xrefpos = GetDirectInteger(m_pTrailer, "Prev"); + while (xrefpos) { + // Check for circular references. + if (pdfium::ContainsKey(seen_xrefpos, xrefpos)) + return FALSE; + + seen_xrefpos.insert(xrefpos); + + // SLOW ... + CrossRefList.insert(CrossRefList.begin(), xrefpos); + LoadCrossRefV4(xrefpos, 0, TRUE); + + std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict( + LoadTrailerV4()); + if (!pDict) + return FALSE; + + xrefpos = GetDirectInteger(pDict.get(), "Prev"); + + // SLOW ... + XRefStreamList.insert(XRefStreamList.begin(), + pDict->GetIntegerBy("XRefStm")); + m_Trailers.Add(pDict.release()); + } + + for (size_t i = 1; i < CrossRefList.size(); ++i) { + if (!LoadCrossRefV4(CrossRefList[i], XRefStreamList[i], FALSE)) + return FALSE; + } + return TRUE; +} + +FX_BOOL CPDF_Parser::LoadLinearizedCrossRefV4(FX_FILESIZE pos, + FX_DWORD dwObjCount) { + FX_FILESIZE dwStartPos = pos - m_pSyntax->m_HeaderOffset; + + m_pSyntax->RestorePos(dwStartPos); + m_SortedOffset.insert(pos); + + FX_DWORD start_objnum = 0; + FX_DWORD count = dwObjCount; + FX_FILESIZE SavedPos = m_pSyntax->SavePos(); + + const int32_t recordsize = 20; + std::vector<char> buf(1024 * recordsize + 1); + buf[1024 * recordsize] = '\0'; + + int32_t nBlocks = count / 1024 + 1; + for (int32_t block = 0; block < nBlocks; block++) { + int32_t block_size = block == nBlocks - 1 ? count % 1024 : 1024; + FX_DWORD dwReadSize = block_size * recordsize; + if ((FX_FILESIZE)(dwStartPos + dwReadSize) > m_pSyntax->m_FileLen) + return FALSE; + + if (!m_pSyntax->ReadBlock(reinterpret_cast<uint8_t*>(buf.data()), + dwReadSize)) { + return FALSE; + } + + for (int32_t i = 0; i < block_size; i++) { + FX_DWORD objnum = start_objnum + block * 1024 + i; + char* pEntry = &buf[i * recordsize]; + if (pEntry[17] == 'f') { + m_ObjectInfo[objnum].pos = 0; + m_ObjectInfo[objnum].type = 0; + } else { + int32_t offset = FXSYS_atoi(pEntry); + if (offset == 0) { + for (int32_t c = 0; c < 10; c++) { + if (!std::isdigit(pEntry[c])) + return FALSE; + } + } + + m_ObjectInfo[objnum].pos = offset; + int32_t version = FXSYS_atoi(pEntry + 11); + if (version >= 1) + m_bVersionUpdated = TRUE; + + m_ObjectInfo[objnum].gennum = version; + if (m_ObjectInfo[objnum].pos < m_pSyntax->m_FileLen) + m_SortedOffset.insert(m_ObjectInfo[objnum].pos); + + m_ObjectInfo[objnum].type = 1; + } + } + } + m_pSyntax->RestorePos(SavedPos + count * recordsize); + return TRUE; +} + +bool CPDF_Parser::LoadCrossRefV4(FX_FILESIZE pos, + FX_FILESIZE streampos, + FX_BOOL bSkip) { + m_pSyntax->RestorePos(pos); + if (m_pSyntax->GetKeyword() != "xref") + return false; + + m_SortedOffset.insert(pos); + if (streampos) + m_SortedOffset.insert(streampos); + + while (1) { + FX_FILESIZE SavedPos = m_pSyntax->SavePos(); + bool bIsNumber; + CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber); + if (word.IsEmpty()) + return false; + + if (!bIsNumber) { + m_pSyntax->RestorePos(SavedPos); + break; + } + + FX_DWORD start_objnum = FXSYS_atoui(word); + if (start_objnum >= kMaxObjectNumber) + return false; + + FX_DWORD count = m_pSyntax->GetDirectNum(); + m_pSyntax->ToNextWord(); + SavedPos = m_pSyntax->SavePos(); + const int32_t recordsize = 20; + + m_dwXrefStartObjNum = start_objnum; + if (!bSkip) { + std::vector<char> buf(1024 * recordsize + 1); + buf[1024 * recordsize] = '\0'; + + int32_t nBlocks = count / 1024 + 1; + for (int32_t block = 0; block < nBlocks; block++) { + int32_t block_size = block == nBlocks - 1 ? count % 1024 : 1024; + m_pSyntax->ReadBlock(reinterpret_cast<uint8_t*>(buf.data()), + block_size * recordsize); + + for (int32_t i = 0; i < block_size; i++) { + FX_DWORD objnum = start_objnum + block * 1024 + i; + char* pEntry = &buf[i * recordsize]; + if (pEntry[17] == 'f') { + m_ObjectInfo[objnum].pos = 0; + m_ObjectInfo[objnum].type = 0; + } else { + FX_FILESIZE offset = (FX_FILESIZE)FXSYS_atoi64(pEntry); + if (offset == 0) { + for (int32_t c = 0; c < 10; c++) { + if (!std::isdigit(pEntry[c])) + return false; + } + } + + m_ObjectInfo[objnum].pos = offset; + int32_t version = FXSYS_atoi(pEntry + 11); + if (version >= 1) + m_bVersionUpdated = TRUE; + + m_ObjectInfo[objnum].gennum = version; + if (m_ObjectInfo[objnum].pos < m_pSyntax->m_FileLen) + m_SortedOffset.insert(m_ObjectInfo[objnum].pos); + + m_ObjectInfo[objnum].type = 1; + } + } + } + } + m_pSyntax->RestorePos(SavedPos + count * recordsize); + } + return !streampos || LoadCrossRefV5(&streampos, FALSE); +} + +FX_BOOL CPDF_Parser::LoadAllCrossRefV5(FX_FILESIZE xrefpos) { + if (!LoadCrossRefV5(&xrefpos, TRUE)) + return FALSE; + + std::set<FX_FILESIZE> seen_xrefpos; + while (xrefpos) { + seen_xrefpos.insert(xrefpos); + if (!LoadCrossRefV5(&xrefpos, FALSE)) + return FALSE; + + // Check for circular references. + if (pdfium::ContainsKey(seen_xrefpos, xrefpos)) + return FALSE; + } + m_ObjectStreamMap.clear(); + m_bXRefStream = TRUE; + return TRUE; +} + +FX_BOOL CPDF_Parser::RebuildCrossRef() { + m_ObjectInfo.clear(); + m_SortedOffset.clear(); + if (m_pTrailer) { + m_pTrailer->Release(); + m_pTrailer = nullptr; + } + + ParserState state = ParserState::kDefault; + + int32_t inside_index = 0; + FX_DWORD objnum = 0; + FX_DWORD gennum = 0; + int32_t depth = 0; + + const FX_DWORD kBufferSize = 4096; + std::vector<uint8_t> buffer(kBufferSize); + + FX_FILESIZE pos = m_pSyntax->m_HeaderOffset; + FX_FILESIZE start_pos = 0; + FX_FILESIZE start_pos1 = 0; + FX_FILESIZE last_obj = -1; + FX_FILESIZE last_xref = -1; + FX_FILESIZE last_trailer = -1; + + while (pos < m_pSyntax->m_FileLen) { + const FX_FILESIZE saved_pos = pos; + bool bOverFlow = false; + FX_DWORD size = + std::min((FX_DWORD)(m_pSyntax->m_FileLen - pos), kBufferSize); + if (!m_pSyntax->m_pFileAccess->ReadBlock(buffer.data(), pos, size)) + break; + + for (FX_DWORD i = 0; i < size; i++) { + uint8_t byte = buffer[i]; + switch (state) { + case ParserState::kDefault: + if (PDFCharIsWhitespace(byte)) { + state = ParserState::kWhitespace; + } else if (std::isdigit(byte)) { + --i; + state = ParserState::kWhitespace; + } else if (byte == '%') { + inside_index = 0; + state = ParserState::kComment; + } else if (byte == '(') { + state = ParserState::kString; + depth = 1; + } else if (byte == '<') { + inside_index = 1; + state = ParserState::kHexString; + } else if (byte == '\\') { + state = ParserState::kEscapedString; + } else if (byte == 't') { + state = ParserState::kTrailer; + inside_index = 1; + } + break; + + case ParserState::kWhitespace: + if (std::isdigit(byte)) { + start_pos = pos + i; + state = ParserState::kObjNum; + objnum = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte)); + } else if (byte == 't') { + state = ParserState::kTrailer; + inside_index = 1; + } else if (byte == 'x') { + state = ParserState::kXref; + inside_index = 1; + } else if (!PDFCharIsWhitespace(byte)) { + --i; + state = ParserState::kDefault; + } + break; + + case ParserState::kObjNum: + if (std::isdigit(byte)) { + objnum = + objnum * 10 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte)); + } else if (PDFCharIsWhitespace(byte)) { + state = ParserState::kPostObjNum; + } else { + --i; + state = ParserState::kEndObj; + inside_index = 0; + } + break; + + case ParserState::kPostObjNum: + if (std::isdigit(byte)) { + start_pos1 = pos + i; + state = ParserState::kGenNum; + gennum = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte)); + } else if (byte == 't') { + state = ParserState::kTrailer; + inside_index = 1; + } else if (!PDFCharIsWhitespace(byte)) { + --i; + state = ParserState::kDefault; + } + break; + + case ParserState::kGenNum: + if (std::isdigit(byte)) { + gennum = + gennum * 10 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte)); + } else if (PDFCharIsWhitespace(byte)) { + state = ParserState::kPostGenNum; + } else { + --i; + state = ParserState::kDefault; + } + break; + + case ParserState::kPostGenNum: + if (byte == 'o') { + state = ParserState::kBeginObj; + inside_index = 1; + } else if (std::isdigit(byte)) { + objnum = gennum; + gennum = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte)); + start_pos = start_pos1; + start_pos1 = pos + i; + state = ParserState::kGenNum; + } else if (byte == 't') { + state = ParserState::kTrailer; + inside_index = 1; + } else if (!PDFCharIsWhitespace(byte)) { + --i; + state = ParserState::kDefault; + } + break; + + case ParserState::kBeginObj: + switch (inside_index) { + case 1: + if (byte != 'b') { + --i; + state = ParserState::kDefault; + } else { + inside_index++; + } + break; + case 2: + if (byte != 'j') { + --i; + state = ParserState::kDefault; + } else { + inside_index++; + } + break; + case 3: + if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) { + FX_FILESIZE obj_pos = start_pos - m_pSyntax->m_HeaderOffset; + m_SortedOffset.insert(obj_pos); + last_obj = start_pos; + FX_FILESIZE obj_end = 0; + CPDF_Object* pObject = ParseIndirectObjectAtByStrict( + m_pDocument, obj_pos, objnum, &obj_end); + if (CPDF_Stream* pStream = ToStream(pObject)) { + if (CPDF_Dictionary* pDict = pStream->GetDict()) { + if ((pDict->KeyExist("Type")) && + (pDict->GetStringBy("Type") == "XRef" && + pDict->KeyExist("Size"))) { + CPDF_Object* pRoot = pDict->GetElement("Root"); + if (pRoot && pRoot->GetDict() && + pRoot->GetDict()->GetElement("Pages")) { + if (m_pTrailer) + m_pTrailer->Release(); + m_pTrailer = ToDictionary(pDict->Clone()); + } + } + } + } + + FX_FILESIZE offset = 0; + m_pSyntax->RestorePos(obj_pos); + offset = m_pSyntax->FindTag("obj", 0); + if (offset == -1) + offset = 0; + else + offset += 3; + + FX_FILESIZE nLen = obj_end - obj_pos - offset; + if ((FX_DWORD)nLen > size - i) { + pos = obj_end + m_pSyntax->m_HeaderOffset; + bOverFlow = true; + } else { + i += (FX_DWORD)nLen; + } + + if (!m_ObjectInfo.empty() && IsValidObjectNumber(objnum) && + m_ObjectInfo[objnum].pos) { + if (pObject) { + FX_DWORD oldgen = GetObjectGenNum(objnum); + m_ObjectInfo[objnum].pos = obj_pos; + m_ObjectInfo[objnum].gennum = gennum; + if (oldgen != gennum) + m_bVersionUpdated = TRUE; + } + } else { + m_ObjectInfo[objnum].pos = obj_pos; + m_ObjectInfo[objnum].type = 1; + m_ObjectInfo[objnum].gennum = gennum; + } + + if (pObject) + pObject->Release(); + } + --i; + state = ParserState::kDefault; + break; + } + break; + + case ParserState::kTrailer: + if (inside_index == 7) { + if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) { + last_trailer = pos + i - 7; + m_pSyntax->RestorePos(pos + i - m_pSyntax->m_HeaderOffset); + + CPDF_Object* pObj = m_pSyntax->GetObject(m_pDocument, 0, 0, true); + if (pObj) { + if (!pObj->IsDictionary() && !pObj->AsStream()) { + pObj->Release(); + } else { + CPDF_Stream* pStream = pObj->AsStream(); + if (CPDF_Dictionary* pTrailer = + pStream ? pStream->GetDict() : pObj->AsDictionary()) { + if (m_pTrailer) { + CPDF_Object* pRoot = pTrailer->GetElement("Root"); + CPDF_Reference* pRef = ToReference(pRoot); + if (!pRoot || + (pRef && IsValidObjectNumber(pRef->GetRefObjNum()) && + m_ObjectInfo[pRef->GetRefObjNum()].pos != 0)) { + auto it = pTrailer->begin(); + while (it != pTrailer->end()) { + const CFX_ByteString& key = it->first; + CPDF_Object* pElement = it->second; + ++it; + FX_DWORD dwObjNum = + pElement ? pElement->GetObjNum() : 0; + if (dwObjNum) { + m_pTrailer->SetAtReference(key, m_pDocument, + dwObjNum); + } else { + m_pTrailer->SetAt(key, pElement->Clone()); + } + } + } + pObj->Release(); + } else { + if (pObj->IsStream()) { + m_pTrailer = ToDictionary(pTrailer->Clone()); + pObj->Release(); + } else { + m_pTrailer = pTrailer; + } + + FX_FILESIZE dwSavePos = m_pSyntax->SavePos(); + CFX_ByteString strWord = m_pSyntax->GetKeyword(); + if (!strWord.Compare("startxref")) { + bool bNumber; + CFX_ByteString bsOffset = + m_pSyntax->GetNextWord(&bNumber); + if (bNumber) + m_LastXRefOffset = FXSYS_atoi(bsOffset); + } + m_pSyntax->RestorePos(dwSavePos); + } + } else { + pObj->Release(); + } + } + } + } + --i; + state = ParserState::kDefault; + } else if (byte == "trailer"[inside_index]) { + inside_index++; + } else { + --i; + state = ParserState::kDefault; + } + break; + + case ParserState::kXref: + if (inside_index == 4) { + last_xref = pos + i - 4; + state = ParserState::kWhitespace; + } else if (byte == "xref"[inside_index]) { + inside_index++; + } else { + --i; + state = ParserState::kDefault; + } + break; + + case ParserState::kComment: + if (PDFCharIsLineEnding(byte)) + state = ParserState::kDefault; + break; + + case ParserState::kString: + if (byte == ')') { + if (depth > 0) + depth--; + } else if (byte == '(') { + depth++; + } + + if (!depth) + state = ParserState::kDefault; + break; + + case ParserState::kHexString: + if (byte == '>' || (byte == '<' && inside_index == 1)) + state = ParserState::kDefault; + inside_index = 0; + break; + + case ParserState::kEscapedString: + if (PDFCharIsDelimiter(byte) || PDFCharIsWhitespace(byte)) { + --i; + state = ParserState::kDefault; + } + break; + + case ParserState::kEndObj: + if (PDFCharIsWhitespace(byte)) { + state = ParserState::kDefault; + } else if (byte == '%' || byte == '(' || byte == '<' || + byte == '\\') { + state = ParserState::kDefault; + --i; + } else if (inside_index == 6) { + state = ParserState::kDefault; + --i; + } else if (byte == "endobj"[inside_index]) { + inside_index++; + } + break; + } + + if (bOverFlow) { + size = 0; + break; + } + } + pos += size; + + // If the position has not changed at all in a loop iteration, then break + // out to prevent infinite looping. + if (pos == saved_pos) + break; + } + + if (last_xref != -1 && last_xref > last_obj) + last_trailer = last_xref; + else if (last_trailer == -1 || last_xref < last_obj) + last_trailer = m_pSyntax->m_FileLen; + + m_SortedOffset.insert(last_trailer - m_pSyntax->m_HeaderOffset); + return m_pTrailer && !m_ObjectInfo.empty(); +} + +FX_BOOL CPDF_Parser::LoadCrossRefV5(FX_FILESIZE* pos, FX_BOOL bMainXRef) { + CPDF_Object* pObject = ParseIndirectObjectAt(m_pDocument, *pos, 0); + if (!pObject) + return FALSE; + + if (m_pDocument) { + FX_BOOL bInserted = FALSE; + CPDF_Dictionary* pDict = m_pDocument->GetRoot(); + if (!pDict || pDict->GetObjNum() != pObject->m_ObjNum) { + bInserted = m_pDocument->InsertIndirectObject(pObject->m_ObjNum, pObject); + } else { + if (pObject->IsStream()) + pObject->Release(); + } + + if (!bInserted) + return FALSE; + } + + CPDF_Stream* pStream = pObject->AsStream(); + if (!pStream) + return FALSE; + + *pos = pStream->GetDict()->GetIntegerBy("Prev"); + int32_t size = pStream->GetDict()->GetIntegerBy("Size"); + if (size < 0) { + pStream->Release(); + return FALSE; + } + + if (bMainXRef) { + m_pTrailer = ToDictionary(pStream->GetDict()->Clone()); + ShrinkObjectMap(size); + for (auto& it : m_ObjectInfo) + it.second.type = 0; + } else { + m_Trailers.Add(ToDictionary(pStream->GetDict()->Clone())); + } + + std::vector<std::pair<int32_t, int32_t>> arrIndex; + CPDF_Array* pArray = pStream->GetDict()->GetArrayBy("Index"); + if (pArray) { + FX_DWORD nPairSize = pArray->GetCount() / 2; + for (FX_DWORD i = 0; i < nPairSize; i++) { + CPDF_Object* pStartNumObj = pArray->GetElement(i * 2); + CPDF_Object* pCountObj = pArray->GetElement(i * 2 + 1); + + if (ToNumber(pStartNumObj) && ToNumber(pCountObj)) { + int nStartNum = pStartNumObj->GetInteger(); + int nCount = pCountObj->GetInteger(); + if (nStartNum >= 0 && nCount > 0) + arrIndex.push_back(std::make_pair(nStartNum, nCount)); + } + } + } + + if (arrIndex.size() == 0) + arrIndex.push_back(std::make_pair(0, size)); + + pArray = pStream->GetDict()->GetArrayBy("W"); + if (!pArray) { + pStream->Release(); + return FALSE; + } + + CFX_DWordArray WidthArray; + FX_SAFE_DWORD dwAccWidth = 0; + for (FX_DWORD i = 0; i < pArray->GetCount(); i++) { + WidthArray.Add(pArray->GetIntegerAt(i)); + dwAccWidth += WidthArray[i]; + } + + if (!dwAccWidth.IsValid() || WidthArray.GetSize() < 3) { + pStream->Release(); + return FALSE; + } + + FX_DWORD totalWidth = dwAccWidth.ValueOrDie(); + CPDF_StreamAcc acc; + acc.LoadAllData(pStream); + + const uint8_t* pData = acc.GetData(); + FX_DWORD dwTotalSize = acc.GetSize(); + FX_DWORD segindex = 0; + for (FX_DWORD i = 0; i < arrIndex.size(); i++) { + int32_t startnum = arrIndex[i].first; + if (startnum < 0) + continue; + + m_dwXrefStartObjNum = + pdfium::base::checked_cast<FX_DWORD, int32_t>(startnum); + FX_DWORD count = + pdfium::base::checked_cast<FX_DWORD, int32_t>(arrIndex[i].second); + FX_SAFE_DWORD dwCaculatedSize = segindex; + dwCaculatedSize += count; + dwCaculatedSize *= totalWidth; + if (!dwCaculatedSize.IsValid() || + dwCaculatedSize.ValueOrDie() > dwTotalSize) { + continue; + } + + const uint8_t* segstart = pData + segindex * totalWidth; + FX_SAFE_DWORD dwMaxObjNum = startnum; + dwMaxObjNum += count; + FX_DWORD dwV5Size = m_ObjectInfo.empty() ? 0 : GetLastObjNum() + 1; + if (!dwMaxObjNum.IsValid() || dwMaxObjNum.ValueOrDie() > dwV5Size) + continue; + + for (FX_DWORD j = 0; j < count; j++) { + int32_t type = 1; + const uint8_t* entrystart = segstart + j * totalWidth; + if (WidthArray[0]) + type = GetVarInt(entrystart, WidthArray[0]); + + if (GetObjectType(startnum + j) == 255) { + FX_FILESIZE offset = + GetVarInt(entrystart + WidthArray[0], WidthArray[1]); + m_ObjectInfo[startnum + j].pos = offset; + m_SortedOffset.insert(offset); + continue; + } + + if (GetObjectType(startnum + j)) + continue; + + m_ObjectInfo[startnum + j].type = type; + if (type == 0) { + m_ObjectInfo[startnum + j].pos = 0; + } else { + FX_FILESIZE offset = + GetVarInt(entrystart + WidthArray[0], WidthArray[1]); + m_ObjectInfo[startnum + j].pos = offset; + if (type == 1) { + m_SortedOffset.insert(offset); + } else { + if (offset < 0 || !IsValidObjectNumber(offset)) { + pStream->Release(); + return FALSE; + } + m_ObjectInfo[offset].type = 255; + } + } + } + segindex += count; + } + pStream->Release(); + return TRUE; +} + +CPDF_Array* CPDF_Parser::GetIDArray() { + CPDF_Object* pID = m_pTrailer ? m_pTrailer->GetElement("ID") : nullptr; + if (!pID) + return nullptr; + + if (CPDF_Reference* pRef = pID->AsReference()) { + pID = ParseIndirectObject(nullptr, pRef->GetRefObjNum()); + m_pTrailer->SetAt("ID", pID); + } + return ToArray(pID); +} + +FX_DWORD CPDF_Parser::GetRootObjNum() { + CPDF_Reference* pRef = + ToReference(m_pTrailer ? m_pTrailer->GetElement("Root") : nullptr); + return pRef ? pRef->GetRefObjNum() : 0; +} + +FX_DWORD CPDF_Parser::GetInfoObjNum() { + CPDF_Reference* pRef = + ToReference(m_pTrailer ? m_pTrailer->GetElement("Info") : nullptr); + return pRef ? pRef->GetRefObjNum() : 0; +} + +FX_BOOL CPDF_Parser::IsFormStream(FX_DWORD objnum, FX_BOOL& bForm) { + bForm = FALSE; + if (!IsValidObjectNumber(objnum)) + return TRUE; + + if (GetObjectType(objnum) == 0) + return TRUE; + + if (GetObjectType(objnum) == 2) + return TRUE; + + FX_FILESIZE pos = m_ObjectInfo[objnum].pos; + auto it = m_SortedOffset.find(pos); + if (it == m_SortedOffset.end()) + return TRUE; + + if (++it == m_SortedOffset.end()) + return FALSE; + + FX_FILESIZE size = *it - pos; + FX_FILESIZE SavedPos = m_pSyntax->SavePos(); + m_pSyntax->RestorePos(pos); + + const char kFormStream[] = "/Form\0stream"; + const CFX_ByteStringC kFormStreamStr(kFormStream, sizeof(kFormStream) - 1); + bForm = m_pSyntax->SearchMultiWord(kFormStreamStr, TRUE, size) == 0; + m_pSyntax->RestorePos(SavedPos); + return TRUE; +} + +CPDF_Object* CPDF_Parser::ParseIndirectObject( + CPDF_IndirectObjectHolder* pObjList, + FX_DWORD objnum) { + if (!IsValidObjectNumber(objnum)) + return nullptr; + + // Prevent circular parsing the same object. + if (pdfium::ContainsKey(m_ParsingObjNums, objnum)) + return nullptr; + ScopedSetInsertion<FX_DWORD> local_insert(&m_ParsingObjNums, objnum); + + if (GetObjectType(objnum) == 1 || GetObjectType(objnum) == 255) { + FX_FILESIZE pos = m_ObjectInfo[objnum].pos; + if (pos <= 0) + return nullptr; + return ParseIndirectObjectAt(pObjList, pos, objnum); + } + if (GetObjectType(objnum) != 2) + return nullptr; + + CPDF_StreamAcc* pObjStream = GetObjectStream(m_ObjectInfo[objnum].pos); + if (!pObjStream) + return nullptr; + + ScopedFileStream file(FX_CreateMemoryStream( + (uint8_t*)pObjStream->GetData(), (size_t)pObjStream->GetSize(), FALSE)); + CPDF_SyntaxParser syntax; + syntax.InitParser(file.get(), 0); + const int32_t offset = GetStreamFirst(pObjStream); + + // Read object numbers from |pObjStream| into a cache. + if (!pdfium::ContainsKey(m_ObjCache, pObjStream)) { + for (int32_t i = GetStreamNCount(pObjStream); i > 0; --i) { + FX_DWORD thisnum = syntax.GetDirectNum(); + FX_DWORD thisoff = syntax.GetDirectNum(); + m_ObjCache[pObjStream][thisnum] = thisoff; + } + } + + const auto it = m_ObjCache[pObjStream].find(objnum); + if (it == m_ObjCache[pObjStream].end()) + return nullptr; + + syntax.RestorePos(offset + it->second); + return syntax.GetObject(pObjList, 0, 0, true); +} + +CPDF_StreamAcc* CPDF_Parser::GetObjectStream(FX_DWORD objnum) { + auto it = m_ObjectStreamMap.find(objnum); + if (it != m_ObjectStreamMap.end()) + return it->second.get(); + + if (!m_pDocument) + return nullptr; + + const CPDF_Stream* pStream = ToStream(m_pDocument->GetIndirectObject(objnum)); + if (!pStream) + return nullptr; + + CPDF_StreamAcc* pStreamAcc = new CPDF_StreamAcc; + pStreamAcc->LoadAllData(pStream); + m_ObjectStreamMap[objnum].reset(pStreamAcc); + return pStreamAcc; +} + +FX_FILESIZE CPDF_Parser::GetObjectSize(FX_DWORD objnum) const { + if (!IsValidObjectNumber(objnum)) + return 0; + + if (GetObjectType(objnum) == 2) + objnum = GetObjectPositionOrZero(objnum); + + if (GetObjectType(objnum) != 1 && GetObjectType(objnum) != 255) + return 0; + + FX_FILESIZE offset = GetObjectPositionOrZero(objnum); + if (offset == 0) + return 0; + + auto it = m_SortedOffset.find(offset); + if (it == m_SortedOffset.end() || ++it == m_SortedOffset.end()) + return 0; + + return *it - offset; +} + +void CPDF_Parser::GetIndirectBinary(FX_DWORD objnum, + uint8_t*& pBuffer, + FX_DWORD& size) { + pBuffer = nullptr; + size = 0; + if (!IsValidObjectNumber(objnum)) + return; + + if (GetObjectType(objnum) == 2) { + CPDF_StreamAcc* pObjStream = GetObjectStream(m_ObjectInfo[objnum].pos); + if (!pObjStream) + return; + + int32_t offset = GetStreamFirst(pObjStream); + const uint8_t* pData = pObjStream->GetData(); + FX_DWORD totalsize = pObjStream->GetSize(); + ScopedFileStream file( + FX_CreateMemoryStream((uint8_t*)pData, (size_t)totalsize, FALSE)); + + CPDF_SyntaxParser syntax; + syntax.InitParser(file.get(), 0); + for (int i = GetStreamNCount(pObjStream); i > 0; --i) { + FX_DWORD thisnum = syntax.GetDirectNum(); + FX_DWORD thisoff = syntax.GetDirectNum(); + if (thisnum != objnum) + continue; + + if (i == 1) { + size = totalsize - (thisoff + offset); + } else { + syntax.GetDirectNum(); // Skip nextnum. + FX_DWORD nextoff = syntax.GetDirectNum(); + size = nextoff - thisoff; + } + + pBuffer = FX_Alloc(uint8_t, size); + FXSYS_memcpy(pBuffer, pData + thisoff + offset, size); + return; + } + return; + } + + if (GetObjectType(objnum) != 1) + return; + + FX_FILESIZE pos = m_ObjectInfo[objnum].pos; + if (pos == 0) + return; + + FX_FILESIZE SavedPos = m_pSyntax->SavePos(); + m_pSyntax->RestorePos(pos); + + bool bIsNumber; + CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber); + if (!bIsNumber) { + m_pSyntax->RestorePos(SavedPos); + return; + } + + FX_DWORD parser_objnum = FXSYS_atoui(word); + if (parser_objnum && parser_objnum != objnum) { + m_pSyntax->RestorePos(SavedPos); + return; + } + + word = m_pSyntax->GetNextWord(&bIsNumber); + if (!bIsNumber) { + m_pSyntax->RestorePos(SavedPos); + return; + } + + if (m_pSyntax->GetKeyword() != "obj") { + m_pSyntax->RestorePos(SavedPos); + return; + } + + auto it = m_SortedOffset.find(pos); + if (it == m_SortedOffset.end() || ++it == m_SortedOffset.end()) { + m_pSyntax->RestorePos(SavedPos); + return; + } + + FX_FILESIZE nextoff = *it; + FX_BOOL bNextOffValid = FALSE; + if (nextoff != pos) { + m_pSyntax->RestorePos(nextoff); + word = m_pSyntax->GetNextWord(&bIsNumber); + if (word == "xref") { + bNextOffValid = TRUE; + } else if (bIsNumber) { + word = m_pSyntax->GetNextWord(&bIsNumber); + if (bIsNumber && m_pSyntax->GetKeyword() == "obj") { + bNextOffValid = TRUE; + } + } + } + + if (!bNextOffValid) { + m_pSyntax->RestorePos(pos); + while (1) { + if (m_pSyntax->GetKeyword() == "endobj") + break; + + if (m_pSyntax->SavePos() == m_pSyntax->m_FileLen) + break; + } + nextoff = m_pSyntax->SavePos(); + } + + size = (FX_DWORD)(nextoff - pos); + pBuffer = FX_Alloc(uint8_t, size); + m_pSyntax->RestorePos(pos); + m_pSyntax->ReadBlock(pBuffer, size); + m_pSyntax->RestorePos(SavedPos); +} + +CPDF_Object* CPDF_Parser::ParseIndirectObjectAt( + CPDF_IndirectObjectHolder* pObjList, + FX_FILESIZE pos, + FX_DWORD objnum) { + FX_FILESIZE SavedPos = m_pSyntax->SavePos(); + m_pSyntax->RestorePos(pos); + bool bIsNumber; + CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber); + if (!bIsNumber) { + m_pSyntax->RestorePos(SavedPos); + return nullptr; + } + + FX_FILESIZE objOffset = m_pSyntax->SavePos(); + objOffset -= word.GetLength(); + FX_DWORD parser_objnum = FXSYS_atoui(word); + if (objnum && parser_objnum != objnum) { + m_pSyntax->RestorePos(SavedPos); + return nullptr; + } + + word = m_pSyntax->GetNextWord(&bIsNumber); + if (!bIsNumber) { + m_pSyntax->RestorePos(SavedPos); + return nullptr; + } + + FX_DWORD parser_gennum = FXSYS_atoui(word); + if (m_pSyntax->GetKeyword() != "obj") { + m_pSyntax->RestorePos(SavedPos); + return nullptr; + } + + CPDF_Object* pObj = + m_pSyntax->GetObject(pObjList, objnum, parser_gennum, true); + m_pSyntax->SavePos(); + + CFX_ByteString bsWord = m_pSyntax->GetKeyword(); + if (bsWord == "endobj") + m_pSyntax->SavePos(); + + m_pSyntax->RestorePos(SavedPos); + if (pObj) { + if (!objnum) + pObj->m_ObjNum = parser_objnum; + pObj->m_GenNum = parser_gennum; + } + return pObj; +} + +CPDF_Object* CPDF_Parser::ParseIndirectObjectAtByStrict( + CPDF_IndirectObjectHolder* pObjList, + FX_FILESIZE pos, + FX_DWORD objnum, + FX_FILESIZE* pResultPos) { + FX_FILESIZE SavedPos = m_pSyntax->SavePos(); + m_pSyntax->RestorePos(pos); + + bool bIsNumber; + CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber); + if (!bIsNumber) { + m_pSyntax->RestorePos(SavedPos); + return nullptr; + } + + FX_DWORD parser_objnum = FXSYS_atoui(word); + if (objnum && parser_objnum != objnum) { + m_pSyntax->RestorePos(SavedPos); + return nullptr; + } + + word = m_pSyntax->GetNextWord(&bIsNumber); + if (!bIsNumber) { + m_pSyntax->RestorePos(SavedPos); + return nullptr; + } + + FX_DWORD gennum = FXSYS_atoui(word); + if (m_pSyntax->GetKeyword() != "obj") { + m_pSyntax->RestorePos(SavedPos); + return nullptr; + } + + CPDF_Object* pObj = m_pSyntax->GetObjectByStrict(pObjList, objnum, gennum); + if (pResultPos) + *pResultPos = m_pSyntax->m_Pos; + + m_pSyntax->RestorePos(SavedPos); + return pObj; +} + +CPDF_Dictionary* CPDF_Parser::LoadTrailerV4() { + if (m_pSyntax->GetKeyword() != "trailer") + return nullptr; + + std::unique_ptr<CPDF_Object, ReleaseDeleter<CPDF_Object>> pObj( + m_pSyntax->GetObject(m_pDocument, 0, 0, true)); + if (!ToDictionary(pObj.get())) + return nullptr; + return pObj.release()->AsDictionary(); +} + +FX_DWORD CPDF_Parser::GetPermissions(FX_BOOL bCheckRevision) { + if (!m_pSecurityHandler) + return (FX_DWORD)-1; + + FX_DWORD dwPermission = m_pSecurityHandler->GetPermissions(); + if (m_pEncryptDict && m_pEncryptDict->GetStringBy("Filter") == "Standard") { + dwPermission &= 0xFFFFFFFC; + dwPermission |= 0xFFFFF0C0; + if (bCheckRevision && m_pEncryptDict->GetIntegerBy("R") == 2) + dwPermission &= 0xFFFFF0FF; + } + return dwPermission; +} + +FX_BOOL CPDF_Parser::IsLinearizedFile(IFX_FileRead* pFileAccess, + FX_DWORD offset) { + m_pSyntax->InitParser(pFileAccess, offset); + m_pSyntax->RestorePos(m_pSyntax->m_HeaderOffset + 9); + + FX_FILESIZE SavedPos = m_pSyntax->SavePos(); + bool bIsNumber; + CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber); + if (!bIsNumber) + return FALSE; + + FX_DWORD objnum = FXSYS_atoui(word); + word = m_pSyntax->GetNextWord(&bIsNumber); + if (!bIsNumber) + return FALSE; + + FX_DWORD gennum = FXSYS_atoui(word); + if (m_pSyntax->GetKeyword() != "obj") { + m_pSyntax->RestorePos(SavedPos); + return FALSE; + } + + m_pLinearized = m_pSyntax->GetObject(nullptr, objnum, gennum, true); + if (!m_pLinearized) + return FALSE; + + CPDF_Dictionary* pDict = m_pLinearized->GetDict(); + if (pDict && pDict->GetElement("Linearized")) { + m_pSyntax->GetNextWord(nullptr); + + CPDF_Object* pLen = pDict->GetElement("L"); + if (!pLen) { + m_pLinearized->Release(); + m_pLinearized = nullptr; + return FALSE; + } + + if (pLen->GetInteger() != (int)pFileAccess->GetSize()) + return FALSE; + + if (CPDF_Number* pNo = ToNumber(pDict->GetElement("P"))) + m_dwFirstPageNo = pNo->GetInteger(); + + if (CPDF_Number* pTable = ToNumber(pDict->GetElement("T"))) + m_LastXRefOffset = pTable->GetInteger(); + + return TRUE; + } + m_pLinearized->Release(); + m_pLinearized = nullptr; + return FALSE; +} + +CPDF_Parser::Error CPDF_Parser::StartAsyncParse(IFX_FileRead* pFileAccess) { + CloseParser(); + m_bXRefStream = FALSE; + m_LastXRefOffset = 0; + m_bOwnFileRead = true; + + int32_t offset = GetHeaderOffset(pFileAccess); + if (offset == -1) + return FORMAT_ERROR; + + if (!IsLinearizedFile(pFileAccess, offset)) { + m_pSyntax->m_pFileAccess = nullptr; + return StartParse(pFileAccess); + } + + m_pDocument = new CPDF_Document(this); + FX_FILESIZE dwFirstXRefOffset = m_pSyntax->SavePos(); + + FX_BOOL bXRefRebuilt = FALSE; + FX_BOOL bLoadV4 = FALSE; + if (!(bLoadV4 = LoadCrossRefV4(dwFirstXRefOffset, 0, FALSE)) && + !LoadCrossRefV5(&dwFirstXRefOffset, TRUE)) { + if (!RebuildCrossRef()) + return FORMAT_ERROR; + + bXRefRebuilt = TRUE; + m_LastXRefOffset = 0; + } + + if (bLoadV4) { + m_pTrailer = LoadTrailerV4(); + if (!m_pTrailer) + return SUCCESS; + + int32_t xrefsize = GetDirectInteger(m_pTrailer, "Size"); + if (xrefsize > 0) + ShrinkObjectMap(xrefsize); + } + + Error eRet = SetEncryptHandler(); + if (eRet != SUCCESS) + return eRet; + + m_pDocument->LoadAsynDoc(m_pLinearized->GetDict()); + if (!m_pDocument->GetRoot() || m_pDocument->GetPageCount() == 0) { + if (bXRefRebuilt) + return FORMAT_ERROR; + + ReleaseEncryptHandler(); + if (!RebuildCrossRef()) + return FORMAT_ERROR; + + eRet = SetEncryptHandler(); + if (eRet != SUCCESS) + return eRet; + + m_pDocument->LoadAsynDoc(m_pLinearized->GetDict()); + if (!m_pDocument->GetRoot()) + return FORMAT_ERROR; + } + + if (GetRootObjNum() == 0) { + ReleaseEncryptHandler(); + if (!RebuildCrossRef() || GetRootObjNum() == 0) + return FORMAT_ERROR; + + eRet = SetEncryptHandler(); + if (eRet != SUCCESS) + return eRet; + } + + if (m_pSecurityHandler && m_pSecurityHandler->IsMetadataEncrypted()) { + if (CPDF_Reference* pMetadata = + ToReference(m_pDocument->GetRoot()->GetElement("Metadata"))) + m_pSyntax->m_MetadataObjnum = pMetadata->GetRefObjNum(); + } + return SUCCESS; +} + +FX_BOOL CPDF_Parser::LoadLinearizedAllCrossRefV5(FX_FILESIZE xrefpos) { + if (!LoadCrossRefV5(&xrefpos, FALSE)) + return FALSE; + + std::set<FX_FILESIZE> seen_xrefpos; + while (xrefpos) { + seen_xrefpos.insert(xrefpos); + if (!LoadCrossRefV5(&xrefpos, FALSE)) + return FALSE; + + // Check for circular references. + if (pdfium::ContainsKey(seen_xrefpos, xrefpos)) + return FALSE; + } + m_ObjectStreamMap.clear(); + m_bXRefStream = TRUE; + return TRUE; +} + +CPDF_Parser::Error CPDF_Parser::LoadLinearizedMainXRefTable() { + FX_DWORD dwSaveMetadataObjnum = m_pSyntax->m_MetadataObjnum; + m_pSyntax->m_MetadataObjnum = 0; + if (m_pTrailer) { + m_pTrailer->Release(); + m_pTrailer = nullptr; + } + + m_pSyntax->RestorePos(m_LastXRefOffset - m_pSyntax->m_HeaderOffset); + uint8_t ch = 0; + FX_DWORD dwCount = 0; + m_pSyntax->GetNextChar(ch); + while (PDFCharIsWhitespace(ch)) { + ++dwCount; + if (m_pSyntax->m_FileLen >= + (FX_FILESIZE)(m_pSyntax->SavePos() + m_pSyntax->m_HeaderOffset)) { + break; + } + m_pSyntax->GetNextChar(ch); + } + m_LastXRefOffset += dwCount; + m_ObjectStreamMap.clear(); + m_ObjCache.clear(); + + if (!LoadLinearizedAllCrossRefV4(m_LastXRefOffset, m_dwXrefStartObjNum) && + !LoadLinearizedAllCrossRefV5(m_LastXRefOffset)) { + m_LastXRefOffset = 0; + m_pSyntax->m_MetadataObjnum = dwSaveMetadataObjnum; + return FORMAT_ERROR; + } + + m_pSyntax->m_MetadataObjnum = dwSaveMetadataObjnum; + return SUCCESS; +} diff --git a/core/src/fpdfapi/fpdf_parser/cpdf_simple_parser.cpp b/core/src/fpdfapi/fpdf_parser/cpdf_simple_parser.cpp new file mode 100644 index 0000000000..ad656795f2 --- /dev/null +++ b/core/src/fpdfapi/fpdf_parser/cpdf_simple_parser.cpp @@ -0,0 +1,170 @@ +// Copyright 2016 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#include "core/include/fpdfapi/cpdf_simple_parser.h" + +#include "core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.h" + +CPDF_SimpleParser::CPDF_SimpleParser(const uint8_t* pData, FX_DWORD dwSize) + : m_pData(pData), m_dwSize(dwSize), m_dwCurPos(0) {} + +CPDF_SimpleParser::CPDF_SimpleParser(const CFX_ByteStringC& str) + : m_pData(str.GetPtr()), m_dwSize(str.GetLength()), m_dwCurPos(0) {} + +void CPDF_SimpleParser::ParseWord(const uint8_t*& pStart, FX_DWORD& dwSize) { + pStart = nullptr; + dwSize = 0; + uint8_t ch; + while (1) { + if (m_dwSize <= m_dwCurPos) + return; + ch = m_pData[m_dwCurPos++]; + while (PDFCharIsWhitespace(ch)) { + if (m_dwSize <= m_dwCurPos) + return; + ch = m_pData[m_dwCurPos++]; + } + + if (ch != '%') + break; + + while (1) { + if (m_dwSize <= m_dwCurPos) + return; + ch = m_pData[m_dwCurPos++]; + if (PDFCharIsLineEnding(ch)) + break; + } + } + + FX_DWORD start_pos = m_dwCurPos - 1; + pStart = m_pData + start_pos; + if (PDFCharIsDelimiter(ch)) { + if (ch == '/') { + while (1) { + if (m_dwSize <= m_dwCurPos) + return; + ch = m_pData[m_dwCurPos++]; + if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) { + m_dwCurPos--; + dwSize = m_dwCurPos - start_pos; + return; + } + } + } else { + dwSize = 1; + if (ch == '<') { + if (m_dwSize <= m_dwCurPos) + return; + ch = m_pData[m_dwCurPos++]; + if (ch == '<') + dwSize = 2; + else + m_dwCurPos--; + } else if (ch == '>') { + if (m_dwSize <= m_dwCurPos) + return; + ch = m_pData[m_dwCurPos++]; + if (ch == '>') + dwSize = 2; + else + m_dwCurPos--; + } + } + return; + } + + dwSize = 1; + while (1) { + if (m_dwSize <= m_dwCurPos) + return; + ch = m_pData[m_dwCurPos++]; + + if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) { + m_dwCurPos--; + break; + } + dwSize++; + } +} + +CFX_ByteStringC CPDF_SimpleParser::GetWord() { + const uint8_t* pStart; + FX_DWORD dwSize; + ParseWord(pStart, dwSize); + if (dwSize == 1 && pStart[0] == '<') { + while (m_dwCurPos < m_dwSize && m_pData[m_dwCurPos] != '>') { + m_dwCurPos++; + } + if (m_dwCurPos < m_dwSize) { + m_dwCurPos++; + } + return CFX_ByteStringC(pStart, + (FX_STRSIZE)(m_dwCurPos - (pStart - m_pData))); + } + if (dwSize == 1 && pStart[0] == '(') { + int level = 1; + while (m_dwCurPos < m_dwSize) { + if (m_pData[m_dwCurPos] == ')') { + level--; + if (level == 0) { + break; + } + } + if (m_pData[m_dwCurPos] == '\\') { + if (m_dwSize <= m_dwCurPos) { + break; + } + m_dwCurPos++; + } else if (m_pData[m_dwCurPos] == '(') { + level++; + } + if (m_dwSize <= m_dwCurPos) { + break; + } + m_dwCurPos++; + } + if (m_dwCurPos < m_dwSize) { + m_dwCurPos++; + } + return CFX_ByteStringC(pStart, + (FX_STRSIZE)(m_dwCurPos - (pStart - m_pData))); + } + return CFX_ByteStringC(pStart, dwSize); +} + +bool CPDF_SimpleParser::FindTagParamFromStart(const CFX_ByteStringC& token, + int nParams) { + nParams++; + FX_DWORD* pBuf = FX_Alloc(FX_DWORD, nParams); + int buf_index = 0; + int buf_count = 0; + m_dwCurPos = 0; + while (1) { + pBuf[buf_index++] = m_dwCurPos; + if (buf_index == nParams) { + buf_index = 0; + } + buf_count++; + if (buf_count > nParams) { + buf_count = nParams; + } + CFX_ByteStringC word = GetWord(); + if (word.IsEmpty()) { + FX_Free(pBuf); + return false; + } + if (word == token) { + if (buf_count < nParams) { + continue; + } + m_dwCurPos = pBuf[buf_index]; + FX_Free(pBuf); + return true; + } + } + return false; +} diff --git a/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility_unittest.cpp b/core/src/fpdfapi/fpdf_parser/cpdf_simple_parser_unittest.cpp index 2eb930bef6..f95838d91f 100644 --- a/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility_unittest.cpp +++ b/core/src/fpdfapi/fpdf_parser/cpdf_simple_parser_unittest.cpp @@ -2,10 +2,12 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#include "core/include/fpdfapi/fpdf_parser.h" +#include "core/include/fpdfapi/cpdf_simple_parser.h" #include <string> +#include "core/include/fpdfapi/fpdf_parser.h" +#include "core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.h" #include "testing/gtest/include/gtest/gtest.h" #include "testing/test_support.h" diff --git a/core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.cpp b/core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.cpp index 3091ab108b..151139e4a4 100644 --- a/core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.cpp +++ b/core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.cpp @@ -11,6 +11,7 @@ #include "core/include/fpdfapi/fpdf_module.h" #include "core/include/fpdfapi/fpdf_parser.h" #include "core/include/fxcrt/fx_ext.h" +#include "core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.h" #include "third_party/base/numerics/safe_math.h" namespace { diff --git a/core/src/fpdfapi/fpdf_parser/fpdf_parser_decode.cpp b/core/src/fpdfapi/fpdf_parser/fpdf_parser_decode.cpp index 4fb471713b..473ea76e76 100644 --- a/core/src/fpdfapi/fpdf_parser/fpdf_parser_decode.cpp +++ b/core/src/fpdfapi/fpdf_parser/fpdf_parser_decode.cpp @@ -12,6 +12,7 @@ #include "core/include/fpdfapi/fpdf_parser.h" #include "core/include/fxcodec/fx_codec.h" #include "core/include/fxcrt/fx_ext.h" +#include "core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.h" #include "third_party/base/stl_util.h" #define _STREAM_MAX_SIZE_ 20 * 1024 * 1024 diff --git a/core/src/fpdfapi/fpdf_parser/fpdf_parser_encrypt.cpp b/core/src/fpdfapi/fpdf_parser/fpdf_parser_encrypt.cpp index 95907ae8fd..9fe534ec35 100644 --- a/core/src/fpdfapi/fpdf_parser/fpdf_parser_encrypt.cpp +++ b/core/src/fpdfapi/fpdf_parser/fpdf_parser_encrypt.cpp @@ -8,6 +8,8 @@ #include <time.h> +#include "core/include/fpdfapi/cpdf_parser.h" +#include "core/include/fpdfapi/cpdf_simple_parser.h" #include "core/include/fdrm/fx_crypt.h" const uint8_t defpasscode[32] = { diff --git a/core/src/fpdfapi/fpdf_parser/fpdf_parser_objects.cpp b/core/src/fpdfapi/fpdf_parser/fpdf_parser_objects.cpp index bed70ebd5d..28c35c899e 100644 --- a/core/src/fpdfapi/fpdf_parser/fpdf_parser_objects.cpp +++ b/core/src/fpdfapi/fpdf_parser/fpdf_parser_objects.cpp @@ -8,6 +8,7 @@ #include <algorithm> +#include "core/include/fpdfapi/cpdf_parser.h" #include "core/include/fpdfapi/fpdf_parser.h" #include "core/include/fxcrt/fx_string.h" #include "third_party/base/stl_util.h" diff --git a/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp b/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp index 2b61cdc2a2..0d62e4d3ed 100644 --- a/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp +++ b/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser.cpp @@ -13,68 +13,23 @@ #include <vector> #include "core/include/fpdfapi/cpdf_document.h" +#include "core/include/fpdfapi/cpdf_parser.h" #include "core/include/fpdfapi/fpdf_module.h" #include "core/include/fpdfapi/fpdf_page.h" #include "core/include/fxcrt/fx_ext.h" #include "core/include/fxcrt/fx_safe_types.h" #include "core/src/fpdfapi/fpdf_page/pageint.h" #include "core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.h" +#include "core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.h" #include "core/src/fpdfapi/fpdf_parser/parser_int.h" #include "third_party/base/stl_util.h" namespace { -// A limit on the size of the xref table. Theoretical limits are higher, but -// this may be large enough in practice. -const int32_t kMaxXRefSize = 1048576; - -// A limit on the maximum object number in the xref table. Theoretical limits -// are higher, but this may be large enough in practice. -const FX_DWORD kMaxObjectNumber = 1048576; - -int32_t GetHeaderOffset(IFX_FileRead* pFile) { - // TODO(dsinclair): This is a complicated way of saying %PDF, simplify? - const FX_DWORD tag = FXDWORD_FROM_LSBFIRST(0x46445025); - - const size_t kBufSize = 4; - uint8_t buf[kBufSize]; - int32_t offset = 0; - while (offset <= 1024) { - if (!pFile->ReadBlock(buf, offset, kBufSize)) - return -1; - - if (*(FX_DWORD*)buf == tag) - return offset; - - ++offset; - } - return -1; -} - -int32_t GetDirectInteger(CPDF_Dictionary* pDict, const CFX_ByteStringC& key) { - CPDF_Number* pObj = ToNumber(pDict->GetElement(key)); - return pObj ? pObj->GetInteger() : 0; -} - -FX_DWORD GetVarInt(const uint8_t* p, int32_t n) { - FX_DWORD result = 0; - for (int32_t i = 0; i < n; ++i) - result = result * 256 + p[i]; - return result; -} - -int32_t GetStreamNCount(CPDF_StreamAcc* pObjStream) { - return pObjStream->GetDict()->GetIntegerBy("N"); -} - -int32_t GetStreamFirst(CPDF_StreamAcc* pObjStream) { - return pObjStream->GetDict()->GetIntegerBy("First"); -} - bool CanReadFromBitStream(const CFX_BitStream* hStream, const FX_SAFE_DWORD& num_bits) { - return (num_bits.IsValid() && - hStream->BitsRemaining() >= num_bits.ValueOrDie()); + return num_bits.IsValid() && + hStream->BitsRemaining() >= num_bits.ValueOrDie(); } } // namespace @@ -86,1607 +41,6 @@ bool IsSignatureDict(const CPDF_Dictionary* pDict) { return pType && pType->GetString() == "Sig"; } -CPDF_Parser::CPDF_Parser() - : m_pDocument(nullptr), - m_bOwnFileRead(true), - m_FileVersion(0), - m_pTrailer(nullptr), - m_pEncryptDict(nullptr), - m_pLinearized(nullptr), - m_dwFirstPageNo(0), - m_dwXrefStartObjNum(0) { - m_pSyntax.reset(new CPDF_SyntaxParser); -} - -CPDF_Parser::~CPDF_Parser() { - CloseParser(); -} - -FX_DWORD CPDF_Parser::GetLastObjNum() const { - return m_ObjectInfo.empty() ? 0 : m_ObjectInfo.rbegin()->first; -} - -bool CPDF_Parser::IsValidObjectNumber(FX_DWORD objnum) const { - return !m_ObjectInfo.empty() && objnum <= m_ObjectInfo.rbegin()->first; -} - -FX_FILESIZE CPDF_Parser::GetObjectPositionOrZero(FX_DWORD objnum) const { - auto it = m_ObjectInfo.find(objnum); - return it != m_ObjectInfo.end() ? it->second.pos : 0; -} - -uint8_t CPDF_Parser::GetObjectType(FX_DWORD objnum) const { - ASSERT(IsValidObjectNumber(objnum)); - auto it = m_ObjectInfo.find(objnum); - return it != m_ObjectInfo.end() ? it->second.type : 0; -} - -uint16_t CPDF_Parser::GetObjectGenNum(FX_DWORD objnum) const { - ASSERT(IsValidObjectNumber(objnum)); - auto it = m_ObjectInfo.find(objnum); - return it != m_ObjectInfo.end() ? it->second.gennum : 0; -} - -bool CPDF_Parser::IsObjectFreeOrNull(FX_DWORD objnum) const { - uint8_t type = GetObjectType(objnum); - return type == 0 || type == 255; -} - -void CPDF_Parser::SetEncryptDictionary(CPDF_Dictionary* pDict) { - m_pEncryptDict = pDict; -} - -CPDF_CryptoHandler* CPDF_Parser::GetCryptoHandler() { - return m_pSyntax->m_pCryptoHandler.get(); -} - -IFX_FileRead* CPDF_Parser::GetFileAccess() const { - return m_pSyntax->m_pFileAccess; -} - -void CPDF_Parser::ShrinkObjectMap(FX_DWORD objnum) { - if (objnum == 0) { - m_ObjectInfo.clear(); - return; - } - - auto it = m_ObjectInfo.lower_bound(objnum); - while (it != m_ObjectInfo.end()) { - auto saved_it = it++; - m_ObjectInfo.erase(saved_it); - } - - if (!pdfium::ContainsKey(m_ObjectInfo, objnum - 1)) - m_ObjectInfo[objnum - 1].pos = 0; -} - -void CPDF_Parser::CloseParser() { - m_bVersionUpdated = FALSE; - delete m_pDocument; - m_pDocument = nullptr; - - if (m_pTrailer) { - m_pTrailer->Release(); - m_pTrailer = nullptr; - } - ReleaseEncryptHandler(); - SetEncryptDictionary(nullptr); - - if (m_bOwnFileRead && m_pSyntax->m_pFileAccess) { - m_pSyntax->m_pFileAccess->Release(); - m_pSyntax->m_pFileAccess = nullptr; - } - - m_ObjectStreamMap.clear(); - m_ObjCache.clear(); - m_SortedOffset.clear(); - m_ObjectInfo.clear(); - - int32_t iLen = m_Trailers.GetSize(); - for (int32_t i = 0; i < iLen; ++i) { - if (CPDF_Dictionary* trailer = m_Trailers.GetAt(i)) - trailer->Release(); - } - m_Trailers.RemoveAll(); - - if (m_pLinearized) { - m_pLinearized->Release(); - m_pLinearized = nullptr; - } -} - -CPDF_Parser::Error CPDF_Parser::StartParse(IFX_FileRead* pFileAccess) { - CloseParser(); - - m_bXRefStream = FALSE; - m_LastXRefOffset = 0; - m_bOwnFileRead = true; - - int32_t offset = GetHeaderOffset(pFileAccess); - if (offset == -1) { - if (pFileAccess) - pFileAccess->Release(); - return FORMAT_ERROR; - } - m_pSyntax->InitParser(pFileAccess, offset); - - uint8_t ch; - if (!m_pSyntax->GetCharAt(5, ch)) - return FORMAT_ERROR; - if (std::isdigit(ch)) - m_FileVersion = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)) * 10; - - if (!m_pSyntax->GetCharAt(7, ch)) - return FORMAT_ERROR; - if (std::isdigit(ch)) - m_FileVersion += FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)); - - if (m_pSyntax->m_FileLen < m_pSyntax->m_HeaderOffset + 9) - return FORMAT_ERROR; - - m_pSyntax->RestorePos(m_pSyntax->m_FileLen - m_pSyntax->m_HeaderOffset - 9); - m_pDocument = new CPDF_Document(this); - - FX_BOOL bXRefRebuilt = FALSE; - if (m_pSyntax->SearchWord("startxref", TRUE, FALSE, 4096)) { - m_SortedOffset.insert(m_pSyntax->SavePos()); - m_pSyntax->GetKeyword(); - - bool bNumber; - CFX_ByteString xrefpos_str = m_pSyntax->GetNextWord(&bNumber); - if (!bNumber) - return FORMAT_ERROR; - - m_LastXRefOffset = (FX_FILESIZE)FXSYS_atoi64(xrefpos_str); - if (!LoadAllCrossRefV4(m_LastXRefOffset) && - !LoadAllCrossRefV5(m_LastXRefOffset)) { - if (!RebuildCrossRef()) - return FORMAT_ERROR; - - bXRefRebuilt = TRUE; - m_LastXRefOffset = 0; - } - } else { - if (!RebuildCrossRef()) - return FORMAT_ERROR; - - bXRefRebuilt = TRUE; - } - Error eRet = SetEncryptHandler(); - if (eRet != SUCCESS) - return eRet; - - m_pDocument->LoadDoc(); - if (!m_pDocument->GetRoot() || m_pDocument->GetPageCount() == 0) { - if (bXRefRebuilt) - return FORMAT_ERROR; - - ReleaseEncryptHandler(); - if (!RebuildCrossRef()) - return FORMAT_ERROR; - - eRet = SetEncryptHandler(); - if (eRet != SUCCESS) - return eRet; - - m_pDocument->LoadDoc(); - if (!m_pDocument->GetRoot()) - return FORMAT_ERROR; - } - if (GetRootObjNum() == 0) { - ReleaseEncryptHandler(); - if (!RebuildCrossRef() || GetRootObjNum() == 0) - return FORMAT_ERROR; - - eRet = SetEncryptHandler(); - if (eRet != SUCCESS) - return eRet; - } - if (m_pSecurityHandler && !m_pSecurityHandler->IsMetadataEncrypted()) { - CPDF_Reference* pMetadata = - ToReference(m_pDocument->GetRoot()->GetElement("Metadata")); - if (pMetadata) - m_pSyntax->m_MetadataObjnum = pMetadata->GetRefObjNum(); - } - return SUCCESS; -} -CPDF_Parser::Error CPDF_Parser::SetEncryptHandler() { - ReleaseEncryptHandler(); - SetEncryptDictionary(nullptr); - - if (!m_pTrailer) - return FORMAT_ERROR; - - CPDF_Object* pEncryptObj = m_pTrailer->GetElement("Encrypt"); - if (pEncryptObj) { - if (CPDF_Dictionary* pEncryptDict = pEncryptObj->AsDictionary()) { - SetEncryptDictionary(pEncryptDict); - } else if (CPDF_Reference* pRef = pEncryptObj->AsReference()) { - pEncryptObj = m_pDocument->GetIndirectObject(pRef->GetRefObjNum()); - if (pEncryptObj) - SetEncryptDictionary(pEncryptObj->GetDict()); - } - } - - if (m_pEncryptDict) { - CFX_ByteString filter = m_pEncryptDict->GetStringBy("Filter"); - std::unique_ptr<IPDF_SecurityHandler> pSecurityHandler; - Error err = HANDLER_ERROR; - if (filter == "Standard") { - pSecurityHandler.reset(new CPDF_StandardSecurityHandler); - err = PASSWORD_ERROR; - } - if (!pSecurityHandler) - return HANDLER_ERROR; - - if (!pSecurityHandler->OnInit(this, m_pEncryptDict)) - return err; - - m_pSecurityHandler = std::move(pSecurityHandler); - std::unique_ptr<CPDF_CryptoHandler> pCryptoHandler( - m_pSecurityHandler->CreateCryptoHandler()); - if (!pCryptoHandler->Init(m_pEncryptDict, m_pSecurityHandler.get())) - return HANDLER_ERROR; - m_pSyntax->SetEncrypt(std::move(pCryptoHandler)); - } - return SUCCESS; -} - -void CPDF_Parser::ReleaseEncryptHandler() { - m_pSyntax->m_pCryptoHandler.reset(); - m_pSecurityHandler.reset(); -} - -FX_FILESIZE CPDF_Parser::GetObjectOffset(FX_DWORD objnum) const { - if (!IsValidObjectNumber(objnum)) - return 0; - - if (GetObjectType(objnum) == 1) - return GetObjectPositionOrZero(objnum); - - if (GetObjectType(objnum) == 2) { - FX_FILESIZE pos = GetObjectPositionOrZero(objnum); - return GetObjectPositionOrZero(pos); - } - return 0; -} - -FX_BOOL CPDF_Parser::LoadAllCrossRefV4(FX_FILESIZE xrefpos) { - if (!LoadCrossRefV4(xrefpos, 0, TRUE)) - return FALSE; - - m_pTrailer = LoadTrailerV4(); - if (!m_pTrailer) - return FALSE; - - int32_t xrefsize = GetDirectInteger(m_pTrailer, "Size"); - if (xrefsize > 0 && xrefsize <= kMaxXRefSize) - ShrinkObjectMap(xrefsize); - - std::vector<FX_FILESIZE> CrossRefList; - std::vector<FX_FILESIZE> XRefStreamList; - std::set<FX_FILESIZE> seen_xrefpos; - - CrossRefList.push_back(xrefpos); - XRefStreamList.push_back(GetDirectInteger(m_pTrailer, "XRefStm")); - seen_xrefpos.insert(xrefpos); - - // When |m_pTrailer| doesn't have Prev entry or Prev entry value is not - // numerical, GetDirectInteger() returns 0. Loading will end. - xrefpos = GetDirectInteger(m_pTrailer, "Prev"); - while (xrefpos) { - // Check for circular references. - if (pdfium::ContainsKey(seen_xrefpos, xrefpos)) - return FALSE; - - seen_xrefpos.insert(xrefpos); - - // SLOW ... - CrossRefList.insert(CrossRefList.begin(), xrefpos); - LoadCrossRefV4(xrefpos, 0, TRUE); - - std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict( - LoadTrailerV4()); - if (!pDict) - return FALSE; - - xrefpos = GetDirectInteger(pDict.get(), "Prev"); - - // SLOW ... - XRefStreamList.insert(XRefStreamList.begin(), - pDict->GetIntegerBy("XRefStm")); - m_Trailers.Add(pDict.release()); - } - - for (size_t i = 0; i < CrossRefList.size(); ++i) { - if (!LoadCrossRefV4(CrossRefList[i], XRefStreamList[i], FALSE)) - return FALSE; - } - return TRUE; -} - -FX_BOOL CPDF_Parser::LoadLinearizedAllCrossRefV4(FX_FILESIZE xrefpos, - FX_DWORD dwObjCount) { - if (!LoadLinearizedCrossRefV4(xrefpos, dwObjCount)) - return FALSE; - - m_pTrailer = LoadTrailerV4(); - if (!m_pTrailer) - return FALSE; - - int32_t xrefsize = GetDirectInteger(m_pTrailer, "Size"); - if (xrefsize == 0) - return FALSE; - - std::vector<FX_FILESIZE> CrossRefList; - std::vector<FX_FILESIZE> XRefStreamList; - std::set<FX_FILESIZE> seen_xrefpos; - - CrossRefList.push_back(xrefpos); - XRefStreamList.push_back(GetDirectInteger(m_pTrailer, "XRefStm")); - seen_xrefpos.insert(xrefpos); - - xrefpos = GetDirectInteger(m_pTrailer, "Prev"); - while (xrefpos) { - // Check for circular references. - if (pdfium::ContainsKey(seen_xrefpos, xrefpos)) - return FALSE; - - seen_xrefpos.insert(xrefpos); - - // SLOW ... - CrossRefList.insert(CrossRefList.begin(), xrefpos); - LoadCrossRefV4(xrefpos, 0, TRUE); - - std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict( - LoadTrailerV4()); - if (!pDict) - return FALSE; - - xrefpos = GetDirectInteger(pDict.get(), "Prev"); - - // SLOW ... - XRefStreamList.insert(XRefStreamList.begin(), - pDict->GetIntegerBy("XRefStm")); - m_Trailers.Add(pDict.release()); - } - - for (size_t i = 1; i < CrossRefList.size(); ++i) { - if (!LoadCrossRefV4(CrossRefList[i], XRefStreamList[i], FALSE)) - return FALSE; - } - return TRUE; -} - -FX_BOOL CPDF_Parser::LoadLinearizedCrossRefV4(FX_FILESIZE pos, - FX_DWORD dwObjCount) { - FX_FILESIZE dwStartPos = pos - m_pSyntax->m_HeaderOffset; - - m_pSyntax->RestorePos(dwStartPos); - m_SortedOffset.insert(pos); - - FX_DWORD start_objnum = 0; - FX_DWORD count = dwObjCount; - FX_FILESIZE SavedPos = m_pSyntax->SavePos(); - - const int32_t recordsize = 20; - std::vector<char> buf(1024 * recordsize + 1); - buf[1024 * recordsize] = '\0'; - - int32_t nBlocks = count / 1024 + 1; - for (int32_t block = 0; block < nBlocks; block++) { - int32_t block_size = block == nBlocks - 1 ? count % 1024 : 1024; - FX_DWORD dwReadSize = block_size * recordsize; - if ((FX_FILESIZE)(dwStartPos + dwReadSize) > m_pSyntax->m_FileLen) - return FALSE; - - if (!m_pSyntax->ReadBlock(reinterpret_cast<uint8_t*>(buf.data()), - dwReadSize)) { - return FALSE; - } - - for (int32_t i = 0; i < block_size; i++) { - FX_DWORD objnum = start_objnum + block * 1024 + i; - char* pEntry = &buf[i * recordsize]; - if (pEntry[17] == 'f') { - m_ObjectInfo[objnum].pos = 0; - m_ObjectInfo[objnum].type = 0; - } else { - int32_t offset = FXSYS_atoi(pEntry); - if (offset == 0) { - for (int32_t c = 0; c < 10; c++) { - if (!std::isdigit(pEntry[c])) - return FALSE; - } - } - - m_ObjectInfo[objnum].pos = offset; - int32_t version = FXSYS_atoi(pEntry + 11); - if (version >= 1) - m_bVersionUpdated = TRUE; - - m_ObjectInfo[objnum].gennum = version; - if (m_ObjectInfo[objnum].pos < m_pSyntax->m_FileLen) - m_SortedOffset.insert(m_ObjectInfo[objnum].pos); - - m_ObjectInfo[objnum].type = 1; - } - } - } - m_pSyntax->RestorePos(SavedPos + count * recordsize); - return TRUE; -} - -bool CPDF_Parser::LoadCrossRefV4(FX_FILESIZE pos, - FX_FILESIZE streampos, - FX_BOOL bSkip) { - m_pSyntax->RestorePos(pos); - if (m_pSyntax->GetKeyword() != "xref") - return false; - - m_SortedOffset.insert(pos); - if (streampos) - m_SortedOffset.insert(streampos); - - while (1) { - FX_FILESIZE SavedPos = m_pSyntax->SavePos(); - bool bIsNumber; - CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber); - if (word.IsEmpty()) - return false; - - if (!bIsNumber) { - m_pSyntax->RestorePos(SavedPos); - break; - } - - FX_DWORD start_objnum = FXSYS_atoui(word); - if (start_objnum >= kMaxObjectNumber) - return false; - - FX_DWORD count = m_pSyntax->GetDirectNum(); - m_pSyntax->ToNextWord(); - SavedPos = m_pSyntax->SavePos(); - const int32_t recordsize = 20; - - m_dwXrefStartObjNum = start_objnum; - if (!bSkip) { - std::vector<char> buf(1024 * recordsize + 1); - buf[1024 * recordsize] = '\0'; - - int32_t nBlocks = count / 1024 + 1; - for (int32_t block = 0; block < nBlocks; block++) { - int32_t block_size = block == nBlocks - 1 ? count % 1024 : 1024; - m_pSyntax->ReadBlock(reinterpret_cast<uint8_t*>(buf.data()), - block_size * recordsize); - - for (int32_t i = 0; i < block_size; i++) { - FX_DWORD objnum = start_objnum + block * 1024 + i; - char* pEntry = &buf[i * recordsize]; - if (pEntry[17] == 'f') { - m_ObjectInfo[objnum].pos = 0; - m_ObjectInfo[objnum].type = 0; - } else { - FX_FILESIZE offset = (FX_FILESIZE)FXSYS_atoi64(pEntry); - if (offset == 0) { - for (int32_t c = 0; c < 10; c++) { - if (!std::isdigit(pEntry[c])) - return false; - } - } - - m_ObjectInfo[objnum].pos = offset; - int32_t version = FXSYS_atoi(pEntry + 11); - if (version >= 1) - m_bVersionUpdated = TRUE; - - m_ObjectInfo[objnum].gennum = version; - if (m_ObjectInfo[objnum].pos < m_pSyntax->m_FileLen) - m_SortedOffset.insert(m_ObjectInfo[objnum].pos); - - m_ObjectInfo[objnum].type = 1; - } - } - } - } - m_pSyntax->RestorePos(SavedPos + count * recordsize); - } - return !streampos || LoadCrossRefV5(&streampos, FALSE); -} - -FX_BOOL CPDF_Parser::LoadAllCrossRefV5(FX_FILESIZE xrefpos) { - if (!LoadCrossRefV5(&xrefpos, TRUE)) - return FALSE; - - std::set<FX_FILESIZE> seen_xrefpos; - while (xrefpos) { - seen_xrefpos.insert(xrefpos); - if (!LoadCrossRefV5(&xrefpos, FALSE)) - return FALSE; - - // Check for circular references. - if (pdfium::ContainsKey(seen_xrefpos, xrefpos)) - return FALSE; - } - m_ObjectStreamMap.clear(); - m_bXRefStream = TRUE; - return TRUE; -} - -FX_BOOL CPDF_Parser::RebuildCrossRef() { - m_ObjectInfo.clear(); - m_SortedOffset.clear(); - if (m_pTrailer) { - m_pTrailer->Release(); - m_pTrailer = nullptr; - } - - ParserState state = ParserState::kDefault; - - int32_t inside_index = 0; - FX_DWORD objnum = 0; - FX_DWORD gennum = 0; - int32_t depth = 0; - - const FX_DWORD kBufferSize = 4096; - std::vector<uint8_t> buffer(kBufferSize); - - FX_FILESIZE pos = m_pSyntax->m_HeaderOffset; - FX_FILESIZE start_pos = 0; - FX_FILESIZE start_pos1 = 0; - FX_FILESIZE last_obj = -1; - FX_FILESIZE last_xref = -1; - FX_FILESIZE last_trailer = -1; - - while (pos < m_pSyntax->m_FileLen) { - const FX_FILESIZE saved_pos = pos; - bool bOverFlow = false; - FX_DWORD size = - std::min((FX_DWORD)(m_pSyntax->m_FileLen - pos), kBufferSize); - if (!m_pSyntax->m_pFileAccess->ReadBlock(buffer.data(), pos, size)) - break; - - for (FX_DWORD i = 0; i < size; i++) { - uint8_t byte = buffer[i]; - switch (state) { - case ParserState::kDefault: - if (PDFCharIsWhitespace(byte)) { - state = ParserState::kWhitespace; - } else if (std::isdigit(byte)) { - --i; - state = ParserState::kWhitespace; - } else if (byte == '%') { - inside_index = 0; - state = ParserState::kComment; - } else if (byte == '(') { - state = ParserState::kString; - depth = 1; - } else if (byte == '<') { - inside_index = 1; - state = ParserState::kHexString; - } else if (byte == '\\') { - state = ParserState::kEscapedString; - } else if (byte == 't') { - state = ParserState::kTrailer; - inside_index = 1; - } - break; - - case ParserState::kWhitespace: - if (std::isdigit(byte)) { - start_pos = pos + i; - state = ParserState::kObjNum; - objnum = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte)); - } else if (byte == 't') { - state = ParserState::kTrailer; - inside_index = 1; - } else if (byte == 'x') { - state = ParserState::kXref; - inside_index = 1; - } else if (!PDFCharIsWhitespace(byte)) { - --i; - state = ParserState::kDefault; - } - break; - - case ParserState::kObjNum: - if (std::isdigit(byte)) { - objnum = - objnum * 10 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte)); - } else if (PDFCharIsWhitespace(byte)) { - state = ParserState::kPostObjNum; - } else { - --i; - state = ParserState::kEndObj; - inside_index = 0; - } - break; - - case ParserState::kPostObjNum: - if (std::isdigit(byte)) { - start_pos1 = pos + i; - state = ParserState::kGenNum; - gennum = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte)); - } else if (byte == 't') { - state = ParserState::kTrailer; - inside_index = 1; - } else if (!PDFCharIsWhitespace(byte)) { - --i; - state = ParserState::kDefault; - } - break; - - case ParserState::kGenNum: - if (std::isdigit(byte)) { - gennum = - gennum * 10 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte)); - } else if (PDFCharIsWhitespace(byte)) { - state = ParserState::kPostGenNum; - } else { - --i; - state = ParserState::kDefault; - } - break; - - case ParserState::kPostGenNum: - if (byte == 'o') { - state = ParserState::kBeginObj; - inside_index = 1; - } else if (std::isdigit(byte)) { - objnum = gennum; - gennum = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte)); - start_pos = start_pos1; - start_pos1 = pos + i; - state = ParserState::kGenNum; - } else if (byte == 't') { - state = ParserState::kTrailer; - inside_index = 1; - } else if (!PDFCharIsWhitespace(byte)) { - --i; - state = ParserState::kDefault; - } - break; - - case ParserState::kBeginObj: - switch (inside_index) { - case 1: - if (byte != 'b') { - --i; - state = ParserState::kDefault; - } else { - inside_index++; - } - break; - case 2: - if (byte != 'j') { - --i; - state = ParserState::kDefault; - } else { - inside_index++; - } - break; - case 3: - if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) { - FX_FILESIZE obj_pos = start_pos - m_pSyntax->m_HeaderOffset; - m_SortedOffset.insert(obj_pos); - last_obj = start_pos; - FX_FILESIZE obj_end = 0; - CPDF_Object* pObject = ParseIndirectObjectAtByStrict( - m_pDocument, obj_pos, objnum, &obj_end); - if (CPDF_Stream* pStream = ToStream(pObject)) { - if (CPDF_Dictionary* pDict = pStream->GetDict()) { - if ((pDict->KeyExist("Type")) && - (pDict->GetStringBy("Type") == "XRef" && - pDict->KeyExist("Size"))) { - CPDF_Object* pRoot = pDict->GetElement("Root"); - if (pRoot && pRoot->GetDict() && - pRoot->GetDict()->GetElement("Pages")) { - if (m_pTrailer) - m_pTrailer->Release(); - m_pTrailer = ToDictionary(pDict->Clone()); - } - } - } - } - - FX_FILESIZE offset = 0; - m_pSyntax->RestorePos(obj_pos); - offset = m_pSyntax->FindTag("obj", 0); - if (offset == -1) - offset = 0; - else - offset += 3; - - FX_FILESIZE nLen = obj_end - obj_pos - offset; - if ((FX_DWORD)nLen > size - i) { - pos = obj_end + m_pSyntax->m_HeaderOffset; - bOverFlow = true; - } else { - i += (FX_DWORD)nLen; - } - - if (!m_ObjectInfo.empty() && IsValidObjectNumber(objnum) && - m_ObjectInfo[objnum].pos) { - if (pObject) { - FX_DWORD oldgen = GetObjectGenNum(objnum); - m_ObjectInfo[objnum].pos = obj_pos; - m_ObjectInfo[objnum].gennum = gennum; - if (oldgen != gennum) - m_bVersionUpdated = TRUE; - } - } else { - m_ObjectInfo[objnum].pos = obj_pos; - m_ObjectInfo[objnum].type = 1; - m_ObjectInfo[objnum].gennum = gennum; - } - - if (pObject) - pObject->Release(); - } - --i; - state = ParserState::kDefault; - break; - } - break; - - case ParserState::kTrailer: - if (inside_index == 7) { - if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) { - last_trailer = pos + i - 7; - m_pSyntax->RestorePos(pos + i - m_pSyntax->m_HeaderOffset); - - CPDF_Object* pObj = m_pSyntax->GetObject(m_pDocument, 0, 0, true); - if (pObj) { - if (!pObj->IsDictionary() && !pObj->AsStream()) { - pObj->Release(); - } else { - CPDF_Stream* pStream = pObj->AsStream(); - if (CPDF_Dictionary* pTrailer = - pStream ? pStream->GetDict() : pObj->AsDictionary()) { - if (m_pTrailer) { - CPDF_Object* pRoot = pTrailer->GetElement("Root"); - CPDF_Reference* pRef = ToReference(pRoot); - if (!pRoot || - (pRef && IsValidObjectNumber(pRef->GetRefObjNum()) && - m_ObjectInfo[pRef->GetRefObjNum()].pos != 0)) { - auto it = pTrailer->begin(); - while (it != pTrailer->end()) { - const CFX_ByteString& key = it->first; - CPDF_Object* pElement = it->second; - ++it; - FX_DWORD dwObjNum = - pElement ? pElement->GetObjNum() : 0; - if (dwObjNum) { - m_pTrailer->SetAtReference(key, m_pDocument, - dwObjNum); - } else { - m_pTrailer->SetAt(key, pElement->Clone()); - } - } - } - pObj->Release(); - } else { - if (pObj->IsStream()) { - m_pTrailer = ToDictionary(pTrailer->Clone()); - pObj->Release(); - } else { - m_pTrailer = pTrailer; - } - - FX_FILESIZE dwSavePos = m_pSyntax->SavePos(); - CFX_ByteString strWord = m_pSyntax->GetKeyword(); - if (!strWord.Compare("startxref")) { - bool bNumber; - CFX_ByteString bsOffset = - m_pSyntax->GetNextWord(&bNumber); - if (bNumber) - m_LastXRefOffset = FXSYS_atoi(bsOffset); - } - m_pSyntax->RestorePos(dwSavePos); - } - } else { - pObj->Release(); - } - } - } - } - --i; - state = ParserState::kDefault; - } else if (byte == "trailer"[inside_index]) { - inside_index++; - } else { - --i; - state = ParserState::kDefault; - } - break; - - case ParserState::kXref: - if (inside_index == 4) { - last_xref = pos + i - 4; - state = ParserState::kWhitespace; - } else if (byte == "xref"[inside_index]) { - inside_index++; - } else { - --i; - state = ParserState::kDefault; - } - break; - - case ParserState::kComment: - if (byte == '\r' || byte == '\n') - state = ParserState::kDefault; - break; - - case ParserState::kString: - if (byte == ')') { - if (depth > 0) - depth--; - } else if (byte == '(') { - depth++; - } - - if (!depth) - state = ParserState::kDefault; - break; - - case ParserState::kHexString: - if (byte == '>' || (byte == '<' && inside_index == 1)) - state = ParserState::kDefault; - inside_index = 0; - break; - - case ParserState::kEscapedString: - if (PDFCharIsDelimiter(byte) || PDFCharIsWhitespace(byte)) { - --i; - state = ParserState::kDefault; - } - break; - - case ParserState::kEndObj: - if (PDFCharIsWhitespace(byte)) { - state = ParserState::kDefault; - } else if (byte == '%' || byte == '(' || byte == '<' || - byte == '\\') { - state = ParserState::kDefault; - --i; - } else if (inside_index == 6) { - state = ParserState::kDefault; - --i; - } else if (byte == "endobj"[inside_index]) { - inside_index++; - } - break; - } - - if (bOverFlow) { - size = 0; - break; - } - } - pos += size; - - // If the position has not changed at all in a loop iteration, then break - // out to prevent infinite looping. - if (pos == saved_pos) - break; - } - - if (last_xref != -1 && last_xref > last_obj) - last_trailer = last_xref; - else if (last_trailer == -1 || last_xref < last_obj) - last_trailer = m_pSyntax->m_FileLen; - - m_SortedOffset.insert(last_trailer - m_pSyntax->m_HeaderOffset); - return m_pTrailer && !m_ObjectInfo.empty(); -} - -FX_BOOL CPDF_Parser::LoadCrossRefV5(FX_FILESIZE* pos, FX_BOOL bMainXRef) { - CPDF_Object* pObject = ParseIndirectObjectAt(m_pDocument, *pos, 0); - if (!pObject) - return FALSE; - - if (m_pDocument) { - FX_BOOL bInserted = FALSE; - CPDF_Dictionary* pDict = m_pDocument->GetRoot(); - if (!pDict || pDict->GetObjNum() != pObject->m_ObjNum) { - bInserted = m_pDocument->InsertIndirectObject(pObject->m_ObjNum, pObject); - } else { - if (pObject->IsStream()) - pObject->Release(); - } - - if (!bInserted) - return FALSE; - } - - CPDF_Stream* pStream = pObject->AsStream(); - if (!pStream) - return FALSE; - - *pos = pStream->GetDict()->GetIntegerBy("Prev"); - int32_t size = pStream->GetDict()->GetIntegerBy("Size"); - if (size < 0) { - pStream->Release(); - return FALSE; - } - - if (bMainXRef) { - m_pTrailer = ToDictionary(pStream->GetDict()->Clone()); - ShrinkObjectMap(size); - for (auto& it : m_ObjectInfo) - it.second.type = 0; - } else { - m_Trailers.Add(ToDictionary(pStream->GetDict()->Clone())); - } - - std::vector<std::pair<int32_t, int32_t> > arrIndex; - CPDF_Array* pArray = pStream->GetDict()->GetArrayBy("Index"); - if (pArray) { - FX_DWORD nPairSize = pArray->GetCount() / 2; - for (FX_DWORD i = 0; i < nPairSize; i++) { - CPDF_Object* pStartNumObj = pArray->GetElement(i * 2); - CPDF_Object* pCountObj = pArray->GetElement(i * 2 + 1); - - if (ToNumber(pStartNumObj) && ToNumber(pCountObj)) { - int nStartNum = pStartNumObj->GetInteger(); - int nCount = pCountObj->GetInteger(); - if (nStartNum >= 0 && nCount > 0) - arrIndex.push_back(std::make_pair(nStartNum, nCount)); - } - } - } - - if (arrIndex.size() == 0) - arrIndex.push_back(std::make_pair(0, size)); - - pArray = pStream->GetDict()->GetArrayBy("W"); - if (!pArray) { - pStream->Release(); - return FALSE; - } - - CFX_DWordArray WidthArray; - FX_SAFE_DWORD dwAccWidth = 0; - for (FX_DWORD i = 0; i < pArray->GetCount(); i++) { - WidthArray.Add(pArray->GetIntegerAt(i)); - dwAccWidth += WidthArray[i]; - } - - if (!dwAccWidth.IsValid() || WidthArray.GetSize() < 3) { - pStream->Release(); - return FALSE; - } - - FX_DWORD totalWidth = dwAccWidth.ValueOrDie(); - CPDF_StreamAcc acc; - acc.LoadAllData(pStream); - - const uint8_t* pData = acc.GetData(); - FX_DWORD dwTotalSize = acc.GetSize(); - FX_DWORD segindex = 0; - for (FX_DWORD i = 0; i < arrIndex.size(); i++) { - int32_t startnum = arrIndex[i].first; - if (startnum < 0) - continue; - - m_dwXrefStartObjNum = - pdfium::base::checked_cast<FX_DWORD, int32_t>(startnum); - FX_DWORD count = - pdfium::base::checked_cast<FX_DWORD, int32_t>(arrIndex[i].second); - FX_SAFE_DWORD dwCaculatedSize = segindex; - dwCaculatedSize += count; - dwCaculatedSize *= totalWidth; - if (!dwCaculatedSize.IsValid() || - dwCaculatedSize.ValueOrDie() > dwTotalSize) { - continue; - } - - const uint8_t* segstart = pData + segindex * totalWidth; - FX_SAFE_DWORD dwMaxObjNum = startnum; - dwMaxObjNum += count; - FX_DWORD dwV5Size = m_ObjectInfo.empty() ? 0 : GetLastObjNum() + 1; - if (!dwMaxObjNum.IsValid() || dwMaxObjNum.ValueOrDie() > dwV5Size) - continue; - - for (FX_DWORD j = 0; j < count; j++) { - int32_t type = 1; - const uint8_t* entrystart = segstart + j * totalWidth; - if (WidthArray[0]) - type = GetVarInt(entrystart, WidthArray[0]); - - if (GetObjectType(startnum + j) == 255) { - FX_FILESIZE offset = - GetVarInt(entrystart + WidthArray[0], WidthArray[1]); - m_ObjectInfo[startnum + j].pos = offset; - m_SortedOffset.insert(offset); - continue; - } - - if (GetObjectType(startnum + j)) - continue; - - m_ObjectInfo[startnum + j].type = type; - if (type == 0) { - m_ObjectInfo[startnum + j].pos = 0; - } else { - FX_FILESIZE offset = - GetVarInt(entrystart + WidthArray[0], WidthArray[1]); - m_ObjectInfo[startnum + j].pos = offset; - if (type == 1) { - m_SortedOffset.insert(offset); - } else { - if (offset < 0 || !IsValidObjectNumber(offset)) { - pStream->Release(); - return FALSE; - } - m_ObjectInfo[offset].type = 255; - } - } - } - segindex += count; - } - pStream->Release(); - return TRUE; -} - -CPDF_Array* CPDF_Parser::GetIDArray() { - CPDF_Object* pID = m_pTrailer ? m_pTrailer->GetElement("ID") : nullptr; - if (!pID) - return nullptr; - - if (CPDF_Reference* pRef = pID->AsReference()) { - pID = ParseIndirectObject(nullptr, pRef->GetRefObjNum()); - m_pTrailer->SetAt("ID", pID); - } - return ToArray(pID); -} - -FX_DWORD CPDF_Parser::GetRootObjNum() { - CPDF_Reference* pRef = - ToReference(m_pTrailer ? m_pTrailer->GetElement("Root") : nullptr); - return pRef ? pRef->GetRefObjNum() : 0; -} - -FX_DWORD CPDF_Parser::GetInfoObjNum() { - CPDF_Reference* pRef = - ToReference(m_pTrailer ? m_pTrailer->GetElement("Info") : nullptr); - return pRef ? pRef->GetRefObjNum() : 0; -} - -FX_BOOL CPDF_Parser::IsFormStream(FX_DWORD objnum, FX_BOOL& bForm) { - bForm = FALSE; - if (!IsValidObjectNumber(objnum)) - return TRUE; - - if (GetObjectType(objnum) == 0) - return TRUE; - - if (GetObjectType(objnum) == 2) - return TRUE; - - FX_FILESIZE pos = m_ObjectInfo[objnum].pos; - auto it = m_SortedOffset.find(pos); - if (it == m_SortedOffset.end()) - return TRUE; - - if (++it == m_SortedOffset.end()) - return FALSE; - - FX_FILESIZE size = *it - pos; - FX_FILESIZE SavedPos = m_pSyntax->SavePos(); - m_pSyntax->RestorePos(pos); - - const char kFormStream[] = "/Form\0stream"; - const CFX_ByteStringC kFormStreamStr(kFormStream, sizeof(kFormStream) - 1); - bForm = m_pSyntax->SearchMultiWord(kFormStreamStr, TRUE, size) == 0; - m_pSyntax->RestorePos(SavedPos); - return TRUE; -} - -CPDF_Object* CPDF_Parser::ParseIndirectObject( - CPDF_IndirectObjectHolder* pObjList, - FX_DWORD objnum) { - if (!IsValidObjectNumber(objnum)) - return nullptr; - - // Prevent circular parsing the same object. - if (pdfium::ContainsKey(m_ParsingObjNums, objnum)) - return nullptr; - ScopedSetInsertion<FX_DWORD> local_insert(&m_ParsingObjNums, objnum); - - if (GetObjectType(objnum) == 1 || GetObjectType(objnum) == 255) { - FX_FILESIZE pos = m_ObjectInfo[objnum].pos; - if (pos <= 0) - return nullptr; - return ParseIndirectObjectAt(pObjList, pos, objnum); - } - if (GetObjectType(objnum) != 2) - return nullptr; - - CPDF_StreamAcc* pObjStream = GetObjectStream(m_ObjectInfo[objnum].pos); - if (!pObjStream) - return nullptr; - - ScopedFileStream file(FX_CreateMemoryStream( - (uint8_t*)pObjStream->GetData(), (size_t)pObjStream->GetSize(), FALSE)); - CPDF_SyntaxParser syntax; - syntax.InitParser(file.get(), 0); - const int32_t offset = GetStreamFirst(pObjStream); - - // Read object numbers from |pObjStream| into a cache. - if (!pdfium::ContainsKey(m_ObjCache, pObjStream)) { - for (int32_t i = GetStreamNCount(pObjStream); i > 0; --i) { - FX_DWORD thisnum = syntax.GetDirectNum(); - FX_DWORD thisoff = syntax.GetDirectNum(); - m_ObjCache[pObjStream][thisnum] = thisoff; - } - } - - const auto it = m_ObjCache[pObjStream].find(objnum); - if (it == m_ObjCache[pObjStream].end()) - return nullptr; - - syntax.RestorePos(offset + it->second); - return syntax.GetObject(pObjList, 0, 0, true); -} - -CPDF_StreamAcc* CPDF_Parser::GetObjectStream(FX_DWORD objnum) { - auto it = m_ObjectStreamMap.find(objnum); - if (it != m_ObjectStreamMap.end()) - return it->second.get(); - - if (!m_pDocument) - return nullptr; - - const CPDF_Stream* pStream = ToStream(m_pDocument->GetIndirectObject(objnum)); - if (!pStream) - return nullptr; - - CPDF_StreamAcc* pStreamAcc = new CPDF_StreamAcc; - pStreamAcc->LoadAllData(pStream); - m_ObjectStreamMap[objnum].reset(pStreamAcc); - return pStreamAcc; -} - -FX_FILESIZE CPDF_Parser::GetObjectSize(FX_DWORD objnum) const { - if (!IsValidObjectNumber(objnum)) - return 0; - - if (GetObjectType(objnum) == 2) - objnum = GetObjectPositionOrZero(objnum); - - if (GetObjectType(objnum) != 1 && GetObjectType(objnum) != 255) - return 0; - - FX_FILESIZE offset = GetObjectPositionOrZero(objnum); - if (offset == 0) - return 0; - - auto it = m_SortedOffset.find(offset); - if (it == m_SortedOffset.end() || ++it == m_SortedOffset.end()) - return 0; - - return *it - offset; -} - -void CPDF_Parser::GetIndirectBinary(FX_DWORD objnum, - uint8_t*& pBuffer, - FX_DWORD& size) { - pBuffer = nullptr; - size = 0; - if (!IsValidObjectNumber(objnum)) - return; - - if (GetObjectType(objnum) == 2) { - CPDF_StreamAcc* pObjStream = GetObjectStream(m_ObjectInfo[objnum].pos); - if (!pObjStream) - return; - - int32_t offset = GetStreamFirst(pObjStream); - const uint8_t* pData = pObjStream->GetData(); - FX_DWORD totalsize = pObjStream->GetSize(); - ScopedFileStream file( - FX_CreateMemoryStream((uint8_t*)pData, (size_t)totalsize, FALSE)); - - CPDF_SyntaxParser syntax; - syntax.InitParser(file.get(), 0); - for (int i = GetStreamNCount(pObjStream); i > 0; --i) { - FX_DWORD thisnum = syntax.GetDirectNum(); - FX_DWORD thisoff = syntax.GetDirectNum(); - if (thisnum != objnum) - continue; - - if (i == 1) { - size = totalsize - (thisoff + offset); - } else { - syntax.GetDirectNum(); // Skip nextnum. - FX_DWORD nextoff = syntax.GetDirectNum(); - size = nextoff - thisoff; - } - - pBuffer = FX_Alloc(uint8_t, size); - FXSYS_memcpy(pBuffer, pData + thisoff + offset, size); - return; - } - return; - } - - if (GetObjectType(objnum) != 1) - return; - - FX_FILESIZE pos = m_ObjectInfo[objnum].pos; - if (pos == 0) - return; - - FX_FILESIZE SavedPos = m_pSyntax->SavePos(); - m_pSyntax->RestorePos(pos); - - bool bIsNumber; - CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber); - if (!bIsNumber) { - m_pSyntax->RestorePos(SavedPos); - return; - } - - FX_DWORD parser_objnum = FXSYS_atoui(word); - if (parser_objnum && parser_objnum != objnum) { - m_pSyntax->RestorePos(SavedPos); - return; - } - - word = m_pSyntax->GetNextWord(&bIsNumber); - if (!bIsNumber) { - m_pSyntax->RestorePos(SavedPos); - return; - } - - if (m_pSyntax->GetKeyword() != "obj") { - m_pSyntax->RestorePos(SavedPos); - return; - } - - auto it = m_SortedOffset.find(pos); - if (it == m_SortedOffset.end() || ++it == m_SortedOffset.end()) { - m_pSyntax->RestorePos(SavedPos); - return; - } - - FX_FILESIZE nextoff = *it; - FX_BOOL bNextOffValid = FALSE; - if (nextoff != pos) { - m_pSyntax->RestorePos(nextoff); - word = m_pSyntax->GetNextWord(&bIsNumber); - if (word == "xref") { - bNextOffValid = TRUE; - } else if (bIsNumber) { - word = m_pSyntax->GetNextWord(&bIsNumber); - if (bIsNumber && m_pSyntax->GetKeyword() == "obj") { - bNextOffValid = TRUE; - } - } - } - - if (!bNextOffValid) { - m_pSyntax->RestorePos(pos); - while (1) { - if (m_pSyntax->GetKeyword() == "endobj") - break; - - if (m_pSyntax->SavePos() == m_pSyntax->m_FileLen) - break; - } - nextoff = m_pSyntax->SavePos(); - } - - size = (FX_DWORD)(nextoff - pos); - pBuffer = FX_Alloc(uint8_t, size); - m_pSyntax->RestorePos(pos); - m_pSyntax->ReadBlock(pBuffer, size); - m_pSyntax->RestorePos(SavedPos); -} - -CPDF_Object* CPDF_Parser::ParseIndirectObjectAt( - CPDF_IndirectObjectHolder* pObjList, - FX_FILESIZE pos, - FX_DWORD objnum) { - FX_FILESIZE SavedPos = m_pSyntax->SavePos(); - m_pSyntax->RestorePos(pos); - bool bIsNumber; - CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber); - if (!bIsNumber) { - m_pSyntax->RestorePos(SavedPos); - return nullptr; - } - - FX_FILESIZE objOffset = m_pSyntax->SavePos(); - objOffset -= word.GetLength(); - FX_DWORD parser_objnum = FXSYS_atoui(word); - if (objnum && parser_objnum != objnum) { - m_pSyntax->RestorePos(SavedPos); - return nullptr; - } - - word = m_pSyntax->GetNextWord(&bIsNumber); - if (!bIsNumber) { - m_pSyntax->RestorePos(SavedPos); - return nullptr; - } - - FX_DWORD parser_gennum = FXSYS_atoui(word); - if (m_pSyntax->GetKeyword() != "obj") { - m_pSyntax->RestorePos(SavedPos); - return nullptr; - } - - CPDF_Object* pObj = - m_pSyntax->GetObject(pObjList, objnum, parser_gennum, true); - m_pSyntax->SavePos(); - - CFX_ByteString bsWord = m_pSyntax->GetKeyword(); - if (bsWord == "endobj") - m_pSyntax->SavePos(); - - m_pSyntax->RestorePos(SavedPos); - if (pObj) { - if (!objnum) - pObj->m_ObjNum = parser_objnum; - pObj->m_GenNum = parser_gennum; - } - return pObj; -} - -CPDF_Object* CPDF_Parser::ParseIndirectObjectAtByStrict( - CPDF_IndirectObjectHolder* pObjList, - FX_FILESIZE pos, - FX_DWORD objnum, - FX_FILESIZE* pResultPos) { - FX_FILESIZE SavedPos = m_pSyntax->SavePos(); - m_pSyntax->RestorePos(pos); - - bool bIsNumber; - CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber); - if (!bIsNumber) { - m_pSyntax->RestorePos(SavedPos); - return nullptr; - } - - FX_DWORD parser_objnum = FXSYS_atoui(word); - if (objnum && parser_objnum != objnum) { - m_pSyntax->RestorePos(SavedPos); - return nullptr; - } - - word = m_pSyntax->GetNextWord(&bIsNumber); - if (!bIsNumber) { - m_pSyntax->RestorePos(SavedPos); - return nullptr; - } - - FX_DWORD gennum = FXSYS_atoui(word); - if (m_pSyntax->GetKeyword() != "obj") { - m_pSyntax->RestorePos(SavedPos); - return nullptr; - } - - CPDF_Object* pObj = m_pSyntax->GetObjectByStrict(pObjList, objnum, gennum); - if (pResultPos) - *pResultPos = m_pSyntax->m_Pos; - - m_pSyntax->RestorePos(SavedPos); - return pObj; -} - -CPDF_Dictionary* CPDF_Parser::LoadTrailerV4() { - if (m_pSyntax->GetKeyword() != "trailer") - return nullptr; - - std::unique_ptr<CPDF_Object, ReleaseDeleter<CPDF_Object>> pObj( - m_pSyntax->GetObject(m_pDocument, 0, 0, true)); - if (!ToDictionary(pObj.get())) - return nullptr; - return pObj.release()->AsDictionary(); -} - -FX_DWORD CPDF_Parser::GetPermissions(FX_BOOL bCheckRevision) { - if (!m_pSecurityHandler) - return (FX_DWORD)-1; - - FX_DWORD dwPermission = m_pSecurityHandler->GetPermissions(); - if (m_pEncryptDict && m_pEncryptDict->GetStringBy("Filter") == "Standard") { - dwPermission &= 0xFFFFFFFC; - dwPermission |= 0xFFFFF0C0; - if (bCheckRevision && m_pEncryptDict->GetIntegerBy("R") == 2) - dwPermission &= 0xFFFFF0FF; - } - return dwPermission; -} - -FX_BOOL CPDF_Parser::IsLinearizedFile(IFX_FileRead* pFileAccess, - FX_DWORD offset) { - m_pSyntax->InitParser(pFileAccess, offset); - m_pSyntax->RestorePos(m_pSyntax->m_HeaderOffset + 9); - - FX_FILESIZE SavedPos = m_pSyntax->SavePos(); - bool bIsNumber; - CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber); - if (!bIsNumber) - return FALSE; - - FX_DWORD objnum = FXSYS_atoui(word); - word = m_pSyntax->GetNextWord(&bIsNumber); - if (!bIsNumber) - return FALSE; - - FX_DWORD gennum = FXSYS_atoui(word); - if (m_pSyntax->GetKeyword() != "obj") { - m_pSyntax->RestorePos(SavedPos); - return FALSE; - } - - m_pLinearized = m_pSyntax->GetObject(nullptr, objnum, gennum, true); - if (!m_pLinearized) - return FALSE; - - CPDF_Dictionary* pDict = m_pLinearized->GetDict(); - if (pDict && pDict->GetElement("Linearized")) { - m_pSyntax->GetNextWord(nullptr); - - CPDF_Object* pLen = pDict->GetElement("L"); - if (!pLen) { - m_pLinearized->Release(); - m_pLinearized = nullptr; - return FALSE; - } - - if (pLen->GetInteger() != (int)pFileAccess->GetSize()) - return FALSE; - - if (CPDF_Number* pNo = ToNumber(pDict->GetElement("P"))) - m_dwFirstPageNo = pNo->GetInteger(); - - if (CPDF_Number* pTable = ToNumber(pDict->GetElement("T"))) - m_LastXRefOffset = pTable->GetInteger(); - - return TRUE; - } - m_pLinearized->Release(); - m_pLinearized = nullptr; - return FALSE; -} - -CPDF_Parser::Error CPDF_Parser::StartAsyncParse(IFX_FileRead* pFileAccess) { - CloseParser(); - m_bXRefStream = FALSE; - m_LastXRefOffset = 0; - m_bOwnFileRead = true; - - int32_t offset = GetHeaderOffset(pFileAccess); - if (offset == -1) - return FORMAT_ERROR; - - if (!IsLinearizedFile(pFileAccess, offset)) { - m_pSyntax->m_pFileAccess = nullptr; - return StartParse(pFileAccess); - } - - m_pDocument = new CPDF_Document(this); - FX_FILESIZE dwFirstXRefOffset = m_pSyntax->SavePos(); - - FX_BOOL bXRefRebuilt = FALSE; - FX_BOOL bLoadV4 = FALSE; - if (!(bLoadV4 = LoadCrossRefV4(dwFirstXRefOffset, 0, FALSE)) && - !LoadCrossRefV5(&dwFirstXRefOffset, TRUE)) { - if (!RebuildCrossRef()) - return FORMAT_ERROR; - - bXRefRebuilt = TRUE; - m_LastXRefOffset = 0; - } - - if (bLoadV4) { - m_pTrailer = LoadTrailerV4(); - if (!m_pTrailer) - return SUCCESS; - - int32_t xrefsize = GetDirectInteger(m_pTrailer, "Size"); - if (xrefsize > 0) - ShrinkObjectMap(xrefsize); - } - - Error eRet = SetEncryptHandler(); - if (eRet != SUCCESS) - return eRet; - - m_pDocument->LoadAsynDoc(m_pLinearized->GetDict()); - if (!m_pDocument->GetRoot() || m_pDocument->GetPageCount() == 0) { - if (bXRefRebuilt) - return FORMAT_ERROR; - - ReleaseEncryptHandler(); - if (!RebuildCrossRef()) - return FORMAT_ERROR; - - eRet = SetEncryptHandler(); - if (eRet != SUCCESS) - return eRet; - - m_pDocument->LoadAsynDoc(m_pLinearized->GetDict()); - if (!m_pDocument->GetRoot()) - return FORMAT_ERROR; - } - - if (GetRootObjNum() == 0) { - ReleaseEncryptHandler(); - if (!RebuildCrossRef() || GetRootObjNum() == 0) - return FORMAT_ERROR; - - eRet = SetEncryptHandler(); - if (eRet != SUCCESS) - return eRet; - } - - if (m_pSecurityHandler && m_pSecurityHandler->IsMetadataEncrypted()) { - if (CPDF_Reference* pMetadata = - ToReference(m_pDocument->GetRoot()->GetElement("Metadata"))) - m_pSyntax->m_MetadataObjnum = pMetadata->GetRefObjNum(); - } - return SUCCESS; -} - -FX_BOOL CPDF_Parser::LoadLinearizedAllCrossRefV5(FX_FILESIZE xrefpos) { - if (!LoadCrossRefV5(&xrefpos, FALSE)) - return FALSE; - - std::set<FX_FILESIZE> seen_xrefpos; - while (xrefpos) { - seen_xrefpos.insert(xrefpos); - if (!LoadCrossRefV5(&xrefpos, FALSE)) - return FALSE; - - // Check for circular references. - if (pdfium::ContainsKey(seen_xrefpos, xrefpos)) - return FALSE; - } - m_ObjectStreamMap.clear(); - m_bXRefStream = TRUE; - return TRUE; -} - -CPDF_Parser::Error CPDF_Parser::LoadLinearizedMainXRefTable() { - FX_DWORD dwSaveMetadataObjnum = m_pSyntax->m_MetadataObjnum; - m_pSyntax->m_MetadataObjnum = 0; - if (m_pTrailer) { - m_pTrailer->Release(); - m_pTrailer = nullptr; - } - - m_pSyntax->RestorePos(m_LastXRefOffset - m_pSyntax->m_HeaderOffset); - uint8_t ch = 0; - FX_DWORD dwCount = 0; - m_pSyntax->GetNextChar(ch); - while (PDFCharIsWhitespace(ch)) { - ++dwCount; - if (m_pSyntax->m_FileLen >= - (FX_FILESIZE)(m_pSyntax->SavePos() + m_pSyntax->m_HeaderOffset)) { - break; - } - m_pSyntax->GetNextChar(ch); - } - m_LastXRefOffset += dwCount; - m_ObjectStreamMap.clear(); - m_ObjCache.clear(); - - if (!LoadLinearizedAllCrossRefV4(m_LastXRefOffset, m_dwXrefStartObjNum) && - !LoadLinearizedAllCrossRefV5(m_LastXRefOffset)) { - m_LastXRefOffset = 0; - m_pSyntax->m_MetadataObjnum = dwSaveMetadataObjnum; - return FORMAT_ERROR; - } - - m_pSyntax->m_MetadataObjnum = dwSaveMetadataObjnum; - return SUCCESS; -} - class CPDF_DataAvail final : public IPDF_DataAvail { public: diff --git a/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser_unittest.cpp b/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser_unittest.cpp index 68068cb522..c8e1ceec0b 100644 --- a/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser_unittest.cpp +++ b/core/src/fpdfapi/fpdf_parser/fpdf_parser_parser_unittest.cpp @@ -5,6 +5,7 @@ #include <limits> #include <string> +#include "core/include/fpdfapi/cpdf_parser.h" #include "core/include/fpdfapi/fpdf_parser.h" #include "core/include/fxcrt/fx_stream.h" #include "core/src/fpdfapi/fpdf_parser/cpdf_syntax_parser.h" diff --git a/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp b/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp index 37b82dd564..a08cf7d466 100644 --- a/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp +++ b/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.cpp @@ -4,8 +4,9 @@ // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com -#include "core/include/fpdfapi/fpdf_parser.h" +#include "core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.h" +#include "core/include/fpdfapi/fpdf_parser.h" #include "core/include/fxcrt/fx_ext.h" // Indexed by 8-bit character code, contains either: @@ -60,171 +61,28 @@ const char PDF_CharType[256] = { 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'W'}; -CPDF_SimpleParser::CPDF_SimpleParser(const uint8_t* pData, FX_DWORD dwSize) { - m_pData = pData; - m_dwSize = dwSize; - m_dwCurPos = 0; -} - -CPDF_SimpleParser::CPDF_SimpleParser(const CFX_ByteStringC& str) { - m_pData = str.GetPtr(); - m_dwSize = str.GetLength(); - m_dwCurPos = 0; -} +int32_t GetHeaderOffset(IFX_FileRead* pFile) { + // TODO(dsinclair): This is a complicated way of saying %PDF, simplify? + const FX_DWORD tag = FXDWORD_FROM_LSBFIRST(0x46445025); -void CPDF_SimpleParser::ParseWord(const uint8_t*& pStart, FX_DWORD& dwSize) { - pStart = NULL; - dwSize = 0; - uint8_t ch; - while (1) { - if (m_dwSize <= m_dwCurPos) - return; - ch = m_pData[m_dwCurPos++]; - while (PDFCharIsWhitespace(ch)) { - if (m_dwSize <= m_dwCurPos) - return; - ch = m_pData[m_dwCurPos++]; - } + const size_t kBufSize = 4; + uint8_t buf[kBufSize]; + int32_t offset = 0; + while (offset <= 1024) { + if (!pFile->ReadBlock(buf, offset, kBufSize)) + return -1; - if (ch != '%') - break; - - while (1) { - if (m_dwSize <= m_dwCurPos) - return; - ch = m_pData[m_dwCurPos++]; - if (ch == '\r' || ch == '\n') - break; - } - } + if (*(FX_DWORD*)buf == tag) + return offset; - FX_DWORD start_pos = m_dwCurPos - 1; - pStart = m_pData + start_pos; - if (PDFCharIsDelimiter(ch)) { - if (ch == '/') { - while (1) { - if (m_dwSize <= m_dwCurPos) - return; - ch = m_pData[m_dwCurPos++]; - if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) { - m_dwCurPos--; - dwSize = m_dwCurPos - start_pos; - return; - } - } - } else { - dwSize = 1; - if (ch == '<') { - if (m_dwSize <= m_dwCurPos) - return; - ch = m_pData[m_dwCurPos++]; - if (ch == '<') - dwSize = 2; - else - m_dwCurPos--; - } else if (ch == '>') { - if (m_dwSize <= m_dwCurPos) - return; - ch = m_pData[m_dwCurPos++]; - if (ch == '>') - dwSize = 2; - else - m_dwCurPos--; - } - } - return; - } - - dwSize = 1; - while (1) { - if (m_dwSize <= m_dwCurPos) - return; - ch = m_pData[m_dwCurPos++]; - - if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) { - m_dwCurPos--; - break; - } - dwSize++; + ++offset; } + return -1; } -CFX_ByteStringC CPDF_SimpleParser::GetWord() { - const uint8_t* pStart; - FX_DWORD dwSize; - ParseWord(pStart, dwSize); - if (dwSize == 1 && pStart[0] == '<') { - while (m_dwCurPos < m_dwSize && m_pData[m_dwCurPos] != '>') { - m_dwCurPos++; - } - if (m_dwCurPos < m_dwSize) { - m_dwCurPos++; - } - return CFX_ByteStringC(pStart, - (FX_STRSIZE)(m_dwCurPos - (pStart - m_pData))); - } - if (dwSize == 1 && pStart[0] == '(') { - int level = 1; - while (m_dwCurPos < m_dwSize) { - if (m_pData[m_dwCurPos] == ')') { - level--; - if (level == 0) { - break; - } - } - if (m_pData[m_dwCurPos] == '\\') { - if (m_dwSize <= m_dwCurPos) { - break; - } - m_dwCurPos++; - } else if (m_pData[m_dwCurPos] == '(') { - level++; - } - if (m_dwSize <= m_dwCurPos) { - break; - } - m_dwCurPos++; - } - if (m_dwCurPos < m_dwSize) { - m_dwCurPos++; - } - return CFX_ByteStringC(pStart, - (FX_STRSIZE)(m_dwCurPos - (pStart - m_pData))); - } - return CFX_ByteStringC(pStart, dwSize); -} - -bool CPDF_SimpleParser::FindTagParamFromStart(const CFX_ByteStringC& token, - int nParams) { - nParams++; - FX_DWORD* pBuf = FX_Alloc(FX_DWORD, nParams); - int buf_index = 0; - int buf_count = 0; - m_dwCurPos = 0; - while (1) { - pBuf[buf_index++] = m_dwCurPos; - if (buf_index == nParams) { - buf_index = 0; - } - buf_count++; - if (buf_count > nParams) { - buf_count = nParams; - } - CFX_ByteStringC word = GetWord(); - if (word.IsEmpty()) { - FX_Free(pBuf); - return false; - } - if (word == token) { - if (buf_count < nParams) { - continue; - } - m_dwCurPos = pBuf[buf_index]; - FX_Free(pBuf); - return true; - } - } - return false; +int32_t GetDirectInteger(CPDF_Dictionary* pDict, const CFX_ByteStringC& key) { + CPDF_Number* pObj = ToNumber(pDict->GetElement(key)); + return pObj ? pObj->GetInteger() : 0; } CFX_ByteString PDF_NameDecode(const CFX_ByteStringC& bstr) { diff --git a/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.h b/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.h new file mode 100644 index 0000000000..cdc1199623 --- /dev/null +++ b/core/src/fpdfapi/fpdf_parser/fpdf_parser_utility.h @@ -0,0 +1,39 @@ +// Copyright 2016 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com + +#ifndef CORE_SRC_FPDFAPI_FPDF_PARSER_FPDF_PARSER_UTILITY_H_ +#define CORE_SRC_FPDFAPI_FPDF_PARSER_FPDF_PARSER_UTILITY_H_ + +#include "core/include/fxcrt/fx_string.h" +#include "core/include/fxcrt/fx_system.h" + +class IFX_FileRead; +class CPDF_Dictionary; + +// Use the accessors below instead of directly accessing PDF_CharType. +extern const char PDF_CharType[256]; + +inline bool PDFCharIsWhitespace(uint8_t c) { + return PDF_CharType[c] == 'W'; +} +inline bool PDFCharIsNumeric(uint8_t c) { + return PDF_CharType[c] == 'N'; +} +inline bool PDFCharIsDelimiter(uint8_t c) { + return PDF_CharType[c] == 'D'; +} +inline bool PDFCharIsOther(uint8_t c) { + return PDF_CharType[c] == 'R'; +} + +inline bool PDFCharIsLineEnding(uint8_t c) { + return c == '\r' || c == '\n'; +} + +int32_t GetHeaderOffset(IFX_FileRead* pFile); +int32_t GetDirectInteger(CPDF_Dictionary* pDict, const CFX_ByteStringC& key); + +#endif // CORE_SRC_FPDFAPI_FPDF_PARSER_FPDF_PARSER_UTILITY_H_ |