// Copyright 2016 PDFium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com #include "core/fpdfapi/parser/cpdf_parser.h" #include #include #include #include "core/fpdfapi/parser/cpdf_array.h" #include "core/fpdfapi/parser/cpdf_crypto_handler.h" #include "core/fpdfapi/parser/cpdf_dictionary.h" #include "core/fpdfapi/parser/cpdf_document.h" #include "core/fpdfapi/parser/cpdf_linearized_header.h" #include "core/fpdfapi/parser/cpdf_number.h" #include "core/fpdfapi/parser/cpdf_object_stream.h" #include "core/fpdfapi/parser/cpdf_read_validator.h" #include "core/fpdfapi/parser/cpdf_reference.h" #include "core/fpdfapi/parser/cpdf_security_handler.h" #include "core/fpdfapi/parser/cpdf_stream.h" #include "core/fpdfapi/parser/cpdf_stream_acc.h" #include "core/fpdfapi/parser/cpdf_syntax_parser.h" #include "core/fpdfapi/parser/fpdf_parser_utility.h" #include "core/fxcrt/autorestorer.h" #include "core/fxcrt/cfx_memorystream.h" #include "core/fxcrt/fx_extension.h" #include "core/fxcrt/fx_safe_types.h" #include "third_party/base/ptr_util.h" #include "third_party/base/stl_util.h" namespace { // A limit on the size of the xref table. Theoretical limits are higher, but // this may be large enough in practice. const int32_t kMaxXRefSize = 1048576; // "%PDF-1.7\n" constexpr FX_FILESIZE kPDFHeaderSize = 9; uint32_t GetVarInt(const uint8_t* p, int32_t n) { uint32_t result = 0; for (int32_t i = 0; i < n; ++i) result = result * 256 + p[i]; return result; } class ObjectsHolderStub : public CPDF_Parser::ParsedObjectsHolder { public: ObjectsHolderStub() = default; ~ObjectsHolderStub() override = default; bool TryInit() override { return true; } }; } // namespace CPDF_Parser::CPDF_Parser(ParsedObjectsHolder* holder) : m_pObjectsHolder(holder), m_CrossRefTable(pdfium::MakeUnique()) { if (!holder) { m_pOwnedObjectsHolder = pdfium::MakeUnique(); m_pObjectsHolder = m_pOwnedObjectsHolder.get(); } } CPDF_Parser::CPDF_Parser() : CPDF_Parser(nullptr) {} CPDF_Parser::~CPDF_Parser() { ReleaseEncryptHandler(); } uint32_t CPDF_Parser::GetLastObjNum() const { const uint32_t size = m_CrossRefTable->GetSize(); return size ? size - 1 : 0; } bool CPDF_Parser::IsValidObjectNumber(uint32_t objnum) const { return objnum <= GetLastObjNum(); } FX_FILESIZE CPDF_Parser::GetObjectPositionOrZero(uint32_t objnum) const { const auto* info = m_CrossRefTable->GetObjectInfo(objnum); return (info && info->type == ObjectType::kNormal) ? info->pos : 0; } CPDF_Parser::ObjectType CPDF_Parser::GetObjectType(uint32_t objnum) const { ASSERT(IsValidObjectNumber(objnum)); const auto* info = m_CrossRefTable->GetObjectInfo(objnum); return info ? info->type : ObjectType::kFree; } uint16_t CPDF_Parser::GetObjectGenNum(uint32_t objnum) const { ASSERT(IsValidObjectNumber(objnum)); const auto* info = m_CrossRefTable->GetObjectInfo(objnum); return (info && info->type == ObjectType::kNormal) ? info->gennum : 0; } bool CPDF_Parser::IsObjectFreeOrNull(uint32_t objnum) const { switch (GetObjectType(objnum)) { case ObjectType::kFree: case ObjectType::kNull: return true; case ObjectType::kNotCompressed: case ObjectType::kCompressed: return false; } NOTREACHED(); return false; } bool CPDF_Parser::IsObjectFree(uint32_t objnum) const { return GetObjectType(objnum) == ObjectType::kFree; } void CPDF_Parser::ShrinkObjectMap(uint32_t max_size) { m_CrossRefTable->ShrinkObjectMap(max_size); } bool CPDF_Parser::InitSyntaxParser( const RetainPtr& validator) { const Optional header_offset = GetHeaderOffset(validator); if (!header_offset) return false; if (validator->GetSize() < *header_offset + kPDFHeaderSize) return false; m_pSyntax = pdfium::MakeUnique(validator, *header_offset); return ParseFileVersion(); } bool CPDF_Parser::ParseFileVersion() { m_FileVersion = 0; uint8_t ch; if (!m_pSyntax->GetCharAt(5, ch)) return false; if (std::isdigit(ch)) m_FileVersion = FXSYS_DecimalCharToInt(static_cast(ch)) * 10; if (!m_pSyntax->GetCharAt(7, ch)) return false; if (std::isdigit(ch)) m_FileVersion += FXSYS_DecimalCharToInt(static_cast(ch)); return true; } CPDF_Parser::Error CPDF_Parser::StartParse( const RetainPtr& pFileAccess, const char* password) { if (!InitSyntaxParser( pdfium::MakeRetain(pFileAccess, nullptr))) return FORMAT_ERROR; SetPassword(password); return StartParseInternal(); } CPDF_Parser::Error CPDF_Parser::StartParseInternal() { ASSERT(!m_bHasParsed); m_bHasParsed = true; m_bXRefStream = false; bool bXRefRebuilt = false; m_LastXRefOffset = ParseStartXRef(); if (m_LastXRefOffset > 0) { if (!LoadAllCrossRefV4(m_LastXRefOffset) && !LoadAllCrossRefV5(m_LastXRefOffset)) { if (!RebuildCrossRef()) return FORMAT_ERROR; bXRefRebuilt = true; m_LastXRefOffset = 0; } } else { if (!RebuildCrossRef()) return FORMAT_ERROR; bXRefRebuilt = true; } Error eRet = SetEncryptHandler(); if (eRet != SUCCESS) return eRet; if (!GetRoot() || !m_pObjectsHolder->TryInit()) { if (bXRefRebuilt) return FORMAT_ERROR; ReleaseEncryptHandler(); if (!RebuildCrossRef()) return FORMAT_ERROR; eRet = SetEncryptHandler(); if (eRet != SUCCESS) return eRet; m_pObjectsHolder->TryInit(); if (!GetRoot()) return FORMAT_ERROR; } if (GetRootObjNum() == CPDF_Object::kInvalidObjNum) { ReleaseEncryptHandler(); if (!RebuildCrossRef() || GetRootObjNum() == CPDF_Object::kInvalidObjNum) return FORMAT_ERROR; eRet = SetEncryptHandler(); if (eRet != SUCCESS) return eRet; } if (m_pSecurityHandler && !m_pSecurityHandler->IsMetadataEncrypted()) { CPDF_Reference* pMetadata = ToReference(GetRoot()->GetObjectFor("Metadata")); if (pMetadata) m_MetadataObjnum = pMetadata->GetRefObjNum(); } return SUCCESS; } FX_FILESIZE CPDF_Parser::ParseStartXRef() { static constexpr char kStartXRefKeyword[] = "startxref"; m_pSyntax->SetPos(m_pSyntax->GetDocumentSize() - strlen(kStartXRefKeyword)); if (!m_pSyntax->BackwardsSearchToWord(kStartXRefKeyword, 4096)) return 0; // Skip "startxref" keyword. m_pSyntax->GetKeyword(); // Read XRef offset. bool bNumber; const ByteString xrefpos_str = m_pSyntax->GetNextWord(&bNumber); if (!bNumber || xrefpos_str.IsEmpty()) return 0; const FX_SAFE_FILESIZE result = FXSYS_atoi64(xrefpos_str.c_str()); if (!result.IsValid() || result.ValueOrDie() >= m_pSyntax->GetDocumentSize()) return 0; return result.ValueOrDie(); } CPDF_Parser::Error CPDF_Parser::SetEncryptHandler() { ReleaseEncryptHandler(); if (!GetTrailer()) return FORMAT_ERROR; const CPDF_Dictionary* pEncryptDict = GetEncryptDict(); if (!pEncryptDict) return SUCCESS; if (pEncryptDict->GetStringFor("Filter") != "Standard") return HANDLER_ERROR; std::unique_ptr pSecurityHandler = pdfium::MakeUnique(); if (!pSecurityHandler->OnInit(pEncryptDict, GetIDArray(), m_Password)) return PASSWORD_ERROR; m_pSecurityHandler = std::move(pSecurityHandler); return SUCCESS; } void CPDF_Parser::ReleaseEncryptHandler() { m_pSecurityHandler.reset(); } // Ideally, all the cross reference entries should be verified. // In reality, we rarely see well-formed cross references don't match // with the objects. crbug/602650 showed a case where object numbers // in the cross reference table are all off by one. bool CPDF_Parser::VerifyCrossRefV4() { for (const auto& it : m_CrossRefTable->objects_info()) { if (it.second.pos == 0) continue; // Find the first non-zero position. FX_FILESIZE SavedPos = m_pSyntax->GetPos(); m_pSyntax->SetPos(it.second.pos); bool is_num = false; ByteString num_str = m_pSyntax->GetNextWord(&is_num); m_pSyntax->SetPos(SavedPos); if (!is_num || num_str.IsEmpty() || FXSYS_atoui(num_str.c_str()) != it.first) { // If the object number read doesn't match the one stored, // something is wrong with the cross reference table. return false; } break; } return true; } bool CPDF_Parser::LoadAllCrossRefV4(FX_FILESIZE xrefpos) { if (!LoadCrossRefV4(xrefpos, true)) return false; std::unique_ptr trailer = LoadTrailerV4(); if (!trailer) return false; m_CrossRefTable->SetTrailer(std::move(trailer)); int32_t xrefsize = GetDirectInteger(GetTrailer(), "Size"); if (xrefsize > 0 && xrefsize <= kMaxXRefSize) ShrinkObjectMap(xrefsize); std::vector CrossRefList; std::vector XRefStreamList; std::set seen_xrefpos; CrossRefList.push_back(xrefpos); XRefStreamList.push_back(GetDirectInteger(GetTrailer(), "XRefStm")); seen_xrefpos.insert(xrefpos); // When the trailer doesn't have Prev entry or Prev entry value is not // numerical, GetDirectInteger() returns 0. Loading will end. xrefpos = GetDirectInteger(GetTrailer(), "Prev"); while (xrefpos) { // Check for circular references. if (pdfium::ContainsKey(seen_xrefpos, xrefpos)) return false; seen_xrefpos.insert(xrefpos); // SLOW ... CrossRefList.insert(CrossRefList.begin(), xrefpos); LoadCrossRefV4(xrefpos, true); std::unique_ptr pDict(LoadTrailerV4()); if (!pDict) return false; xrefpos = GetDirectInteger(pDict.get(), "Prev"); // SLOW ... XRefStreamList.insert(XRefStreamList.begin(), pDict->GetIntegerFor("XRefStm")); m_CrossRefTable = CPDF_CrossRefTable::MergeUp( pdfium::MakeUnique(std::move(pDict)), std::move(m_CrossRefTable)); } for (size_t i = 0; i < CrossRefList.size(); ++i) { if (!LoadCrossRefV4(CrossRefList[i], false)) return false; if (XRefStreamList[i] && !LoadCrossRefV5(&XRefStreamList[i], false)) return false; if (i == 0 && !VerifyCrossRefV4()) return false; } return true; } bool CPDF_Parser::LoadLinearizedAllCrossRefV4(FX_FILESIZE xrefpos) { if (!LoadCrossRefV4(xrefpos, false)) return false; std::unique_ptr trailer = LoadTrailerV4(); if (!trailer) return false; m_CrossRefTable = CPDF_CrossRefTable::MergeUp( pdfium::MakeUnique(std::move(trailer)), std::move(m_CrossRefTable)); int32_t xrefsize = GetDirectInteger(GetTrailer(), "Size"); if (xrefsize == 0) return false; std::vector CrossRefList; std::vector XRefStreamList; std::set seen_xrefpos; CrossRefList.push_back(xrefpos); XRefStreamList.push_back(GetDirectInteger(GetTrailer(), "XRefStm")); seen_xrefpos.insert(xrefpos); xrefpos = GetDirectInteger(GetTrailer(), "Prev"); while (xrefpos) { // Check for circular references. if (pdfium::ContainsKey(seen_xrefpos, xrefpos)) return false; seen_xrefpos.insert(xrefpos); // SLOW ... CrossRefList.insert(CrossRefList.begin(), xrefpos); LoadCrossRefV4(xrefpos, true); std::unique_ptr pDict(LoadTrailerV4()); if (!pDict) return false; xrefpos = GetDirectInteger(pDict.get(), "Prev"); // SLOW ... XRefStreamList.insert(XRefStreamList.begin(), pDict->GetIntegerFor("XRefStm")); m_CrossRefTable = CPDF_CrossRefTable::MergeUp( pdfium::MakeUnique(std::move(pDict)), std::move(m_CrossRefTable)); } for (size_t i = 1; i < CrossRefList.size(); ++i) { if (!LoadCrossRefV4(CrossRefList[i], false)) return false; if (XRefStreamList[i] && !LoadCrossRefV5(&XRefStreamList[i], false)) return false; } return true; } bool CPDF_Parser::ParseAndAppendCrossRefSubsectionData( uint32_t start_objnum, uint32_t count, std::vector* out_objects) { // Each entry shall be exactly 20 byte. // A sample entry looks like: // "0000000000 00007 f\r\n" static constexpr int32_t kEntryConstSize = 20; if (!out_objects) { FX_SAFE_FILESIZE pos = count; pos *= kEntryConstSize; pos += m_pSyntax->GetPos(); if (!pos.IsValid()) return false; m_pSyntax->SetPos(pos.ValueOrDie()); return true; } const size_t start_obj_index = out_objects->size(); FX_SAFE_SIZE_T new_size = start_obj_index; new_size += count; if (!new_size.IsValid()) return false; if (new_size.ValueOrDie() > kMaxXRefSize) return false; const size_t max_entries_in_file = m_pSyntax->GetDocumentSize() / kEntryConstSize; if (new_size.ValueOrDie() > max_entries_in_file) return false; out_objects->resize(new_size.ValueOrDie()); std::vector buf(1024 * kEntryConstSize + 1); buf.back() = '\0'; int32_t nBlocks = count / 1024 + 1; for (int32_t block = 0; block < nBlocks; block++) { int32_t block_size = block == nBlocks - 1 ? count % 1024 : 1024; if (!m_pSyntax->ReadBlock(reinterpret_cast(buf.data()), block_size * kEntryConstSize)) { return false; } for (int32_t i = 0; i < block_size; i++) { CrossRefObjData& obj_data = (*out_objects)[start_obj_index + block * 1024 + i]; const uint32_t objnum = start_objnum + block * 1024 + i; obj_data.obj_num = objnum; ObjectInfo& info = obj_data.info; char* pEntry = &buf[i * kEntryConstSize]; if (pEntry[17] == 'f') { info.pos = 0; info.type = ObjectType::kFree; } else { const FX_SAFE_FILESIZE offset = FXSYS_atoi64(pEntry); if (!offset.IsValid()) return false; if (offset.ValueOrDie() == 0) { for (int32_t c = 0; c < 10; c++) { if (!std::isdigit(pEntry[c])) return false; } } info.pos = offset.ValueOrDie(); // TODO(art-snake): The info.gennum is uint16_t, but version may be // greated than max. Needs solve this issue. const int32_t version = FXSYS_atoi(pEntry + 11); info.gennum = version; info.type = ObjectType::kNotCompressed; } } } return true; } bool CPDF_Parser::ParseCrossRefV4(std::vector* out_objects) { if (out_objects) out_objects->clear(); if (m_pSyntax->GetKeyword() != "xref") return false; std::vector result_objects; while (1) { FX_FILESIZE SavedPos = m_pSyntax->GetPos(); bool bIsNumber; ByteString word = m_pSyntax->GetNextWord(&bIsNumber); if (word.IsEmpty()) { return false; } if (!bIsNumber) { m_pSyntax->SetPos(SavedPos); break; } uint32_t start_objnum = FXSYS_atoui(word.c_str()); if (start_objnum >= kMaxObjectNumber) return false; uint32_t count = m_pSyntax->GetDirectNum(); m_pSyntax->ToNextWord(); SavedPos = m_pSyntax->GetPos(); if (!ParseAndAppendCrossRefSubsectionData( start_objnum, count, out_objects ? &result_objects : nullptr)) { return false; } } if (out_objects) *out_objects = std::move(result_objects); return true; } bool CPDF_Parser::LoadCrossRefV4(FX_FILESIZE pos, bool bSkip) { m_pSyntax->SetPos(pos); std::vector objects; if (!ParseCrossRefV4(bSkip ? nullptr : &objects)) return false; MergeCrossRefObjectsData(objects); return true; } void CPDF_Parser::MergeCrossRefObjectsData( const std::vector& objects) { for (const auto& obj : objects) { switch (obj.info.type) { case ObjectType::kFree: if (obj.info.gennum > 0) m_CrossRefTable->SetFree(obj.obj_num); break; case ObjectType::kNormal: case ObjectType::kObjStream: m_CrossRefTable->AddNormal(obj.obj_num, obj.info.gennum, obj.info.pos); break; case ObjectType::kCompressed: m_CrossRefTable->AddCompressed(obj.obj_num, obj.info.archive_obj_num); break; default: NOTREACHED(); } } } bool CPDF_Parser::LoadAllCrossRefV5(FX_FILESIZE xrefpos) { if (!LoadCrossRefV5(&xrefpos, true)) return false; std::set seen_xrefpos; while (xrefpos) { seen_xrefpos.insert(xrefpos); if (!LoadCrossRefV5(&xrefpos, false)) return false; // Check for circular references. if (pdfium::ContainsKey(seen_xrefpos, xrefpos)) return false; } m_ObjectStreamMap.clear(); m_bXRefStream = true; return true; } bool CPDF_Parser::RebuildCrossRef() { auto cross_ref_table = pdfium::MakeUnique(); const uint32_t kBufferSize = 4096; m_pSyntax->SetReadBufferSize(kBufferSize); m_pSyntax->SetPos(0); bool bIsNumber; std::vector> numbers; for (ByteString word = m_pSyntax->GetNextWord(&bIsNumber); !word.IsEmpty(); word = m_pSyntax->GetNextWord(&bIsNumber)) { if (bIsNumber) { numbers.emplace_back(FXSYS_atoui(word.c_str()), m_pSyntax->GetPos() - word.GetLength()); if (numbers.size() > 2u) numbers.erase(numbers.begin()); continue; } if (word == "(") { m_pSyntax->ReadString(); } else if (word == "<") { m_pSyntax->ReadHexString(); } else if (word == "trailer") { std::unique_ptr pTrailer = m_pSyntax->GetObjectBody(nullptr); if (pTrailer) { cross_ref_table = CPDF_CrossRefTable::MergeUp( std::move(cross_ref_table), pdfium::MakeUnique(ToDictionary( pTrailer->IsStream() ? pTrailer->AsStream()->GetDict()->Clone() : std::move(pTrailer)))); } } else if (word == "obj" && numbers.size() == 2u) { const FX_FILESIZE obj_pos = numbers[0].second; const uint32_t obj_num = numbers[0].first; const uint32_t gen_num = numbers[1].first; m_pSyntax->SetPos(obj_pos); const std::unique_ptr pStream = ToStream(m_pSyntax->GetIndirectObject( nullptr, CPDF_SyntaxParser::ParseType::kStrict)); if (pStream && pStream->GetDict()->GetStringFor("Type") == "XRef") { cross_ref_table = CPDF_CrossRefTable::MergeUp( std::move(cross_ref_table), pdfium::MakeUnique( ToDictionary(pStream->GetDict()->Clone()))); } if (obj_num < kMaxObjectNumber) { cross_ref_table->AddNormal(obj_num, gen_num, obj_pos); if (const auto object_stream = CPDF_ObjectStream::Create(pStream.get())) { for (const auto& it : object_stream->objects_offsets()) { if (it.first < kMaxObjectNumber) cross_ref_table->AddCompressed(it.first, obj_num); } } } } numbers.clear(); } m_CrossRefTable = CPDF_CrossRefTable::MergeUp(std::move(m_CrossRefTable), std::move(cross_ref_table)); // Resore default buffer size. m_pSyntax->SetReadBufferSize(CPDF_ModuleMgr::kFileBufSize); return GetTrailer() && !m_CrossRefTable->objects_info().empty(); } bool CPDF_Parser::LoadCrossRefV5(FX_FILESIZE* pos, bool bMainXRef) { std::unique_ptr pObject(ParseIndirectObjectAt(*pos, 0)); if (!pObject || !pObject->GetObjNum()) return false; CPDF_Stream* pStream = pObject->AsStream(); if (!pStream) return false; CPDF_Dictionary* pDict = pStream->GetDict(); *pos = pDict->GetIntegerFor("Prev"); int32_t size = pDict->GetIntegerFor("Size"); if (size < 0) return false; std::unique_ptr pNewTrailer = ToDictionary(pDict->Clone()); if (bMainXRef) { m_CrossRefTable = pdfium::MakeUnique(std::move(pNewTrailer)); m_CrossRefTable->ShrinkObjectMap(size); } else { m_CrossRefTable = CPDF_CrossRefTable::MergeUp( pdfium::MakeUnique(std::move(pNewTrailer)), std::move(m_CrossRefTable)); } std::vector> arrIndex; CPDF_Array* pArray = pDict->GetArrayFor("Index"); if (pArray) { for (size_t i = 0; i < pArray->GetCount() / 2; i++) { CPDF_Object* pStartNumObj = pArray->GetObjectAt(i * 2); CPDF_Object* pCountObj = pArray->GetObjectAt(i * 2 + 1); if (ToNumber(pStartNumObj) && ToNumber(pCountObj)) { int nStartNum = pStartNumObj->GetInteger(); int nCount = pCountObj->GetInteger(); if (nStartNum >= 0 && nCount > 0) arrIndex.push_back(std::make_pair(nStartNum, nCount)); } } } if (arrIndex.empty()) arrIndex.push_back(std::make_pair(0, size)); pArray = pDict->GetArrayFor("W"); if (!pArray) return false; std::vector WidthArray; FX_SAFE_UINT32 dwAccWidth = 0; for (size_t i = 0; i < pArray->GetCount(); ++i) { WidthArray.push_back(pArray->GetIntegerAt(i)); dwAccWidth += WidthArray[i]; } if (!dwAccWidth.IsValid() || WidthArray.size() < 3) return false; uint32_t totalWidth = dwAccWidth.ValueOrDie(); auto pAcc = pdfium::MakeRetain(pStream); pAcc->LoadAllDataFiltered(); const uint8_t* pData = pAcc->GetData(); uint32_t dwTotalSize = pAcc->GetSize(); uint32_t segindex = 0; for (const auto& index : arrIndex) { const int32_t startnum = index.first; if (startnum < 0) continue; uint32_t count = pdfium::base::checked_cast(index.second); FX_SAFE_UINT32 dwCaculatedSize = segindex; dwCaculatedSize += count; dwCaculatedSize *= totalWidth; if (!dwCaculatedSize.IsValid() || dwCaculatedSize.ValueOrDie() > dwTotalSize) { continue; } const uint8_t* segstart = pData + segindex * totalWidth; FX_SAFE_UINT32 dwMaxObjNum = startnum; dwMaxObjNum += count; uint32_t dwV5Size = m_CrossRefTable->GetSize(); if (!dwMaxObjNum.IsValid() || dwMaxObjNum.ValueOrDie() > dwV5Size) continue; for (uint32_t i = 0; i < count; i++) { ObjectType type = ObjectType::kNotCompressed; const uint8_t* entrystart = segstart + i * totalWidth; if (WidthArray[0]) { const uint32_t cross_ref_stream_obj_type = GetVarInt(entrystart, WidthArray[0]); type = GetObjectTypeFromCrossRefStreamType(cross_ref_stream_obj_type); if (type == ObjectType::kNull) continue; } const uint32_t objnum = startnum + i; if (GetObjectType(objnum) == ObjectType::kNull) { uint32_t offset = GetVarInt(entrystart + WidthArray[0], WidthArray[1]); if (pdfium::base::IsValueInRangeForNumericType(offset)) m_CrossRefTable->AddNormal(objnum, 0, offset); continue; } if (GetObjectType(objnum) != ObjectType::kFree) continue; if (type == ObjectType::kFree) { m_CrossRefTable->SetFree(objnum); continue; } const uint32_t entry_value = GetVarInt(entrystart + WidthArray[0], WidthArray[1]); if (type == ObjectType::kNotCompressed) { const uint32_t offset = entry_value; if (pdfium::base::IsValueInRangeForNumericType(offset)) m_CrossRefTable->AddNormal(objnum, 0, offset); continue; } ASSERT(type == ObjectType::kCompressed); const uint32_t archive_obj_num = entry_value; if (!IsValidObjectNumber(archive_obj_num)) return false; m_CrossRefTable->AddCompressed(objnum, archive_obj_num); } segindex += count; } return true; } const CPDF_Array* CPDF_Parser::GetIDArray() const { return GetTrailer() ? GetTrailer()->GetArrayFor("ID") : nullptr; } CPDF_Dictionary* CPDF_Parser::GetRoot() const { CPDF_Object* obj = m_pObjectsHolder->GetOrParseIndirectObject(GetRootObjNum()); return obj ? obj->GetDict() : nullptr; } const CPDF_Dictionary* CPDF_Parser::GetEncryptDict() const { if (!GetTrailer()) return nullptr; const CPDF_Object* pEncryptObj = GetTrailer()->GetObjectFor("Encrypt"); if (!pEncryptObj) return nullptr; if (pEncryptObj->IsDictionary()) return ToDictionary(pEncryptObj); if (pEncryptObj->IsReference()) { return ToDictionary(m_pObjectsHolder->GetOrParseIndirectObject( pEncryptObj->AsReference()->GetRefObjNum())); } return nullptr; } const CPDF_Dictionary* CPDF_Parser::GetTrailer() const { return m_CrossRefTable->trailer(); } std::unique_ptr CPDF_Parser::GetCombinedTrailer() const { return m_CrossRefTable->trailer() ? ToDictionary(m_CrossRefTable->trailer()->Clone()) : std::unique_ptr(); } uint32_t CPDF_Parser::GetInfoObjNum() const { const CPDF_Reference* pRef = ToReference(m_CrossRefTable->trailer() ? m_CrossRefTable->trailer()->GetObjectFor("Info") : nullptr); return pRef ? pRef->GetRefObjNum() : CPDF_Object::kInvalidObjNum; } uint32_t CPDF_Parser::GetRootObjNum() const { const CPDF_Reference* pRef = ToReference(m_CrossRefTable->trailer() ? m_CrossRefTable->trailer()->GetObjectFor("Root") : nullptr); return pRef ? pRef->GetRefObjNum() : CPDF_Object::kInvalidObjNum; } std::unique_ptr CPDF_Parser::ParseIndirectObject( uint32_t objnum) { if (!IsValidObjectNumber(objnum)) return nullptr; // Prevent circular parsing the same object. if (pdfium::ContainsKey(m_ParsingObjNums, objnum)) return nullptr; pdfium::ScopedSetInsertion local_insert(&m_ParsingObjNums, objnum); if (GetObjectType(objnum) == ObjectType::kNotCompressed) { FX_FILESIZE pos = GetObjectPositionOrZero(objnum); if (pos <= 0) return nullptr; return ParseIndirectObjectAt(pos, objnum); } if (GetObjectType(objnum) != ObjectType::kCompressed) return nullptr; const CPDF_ObjectStream* pObjStream = GetObjectStream(m_CrossRefTable->GetObjectInfo(objnum)->archive_obj_num); if (!pObjStream) return nullptr; return pObjStream->ParseObject(m_pObjectsHolder.Get(), objnum); } const CPDF_ObjectStream* CPDF_Parser::GetObjectStream(uint32_t object_number) { // Prevent circular parsing the same object. if (pdfium::ContainsKey(m_ParsingObjNums, object_number)) return nullptr; pdfium::ScopedSetInsertion local_insert(&m_ParsingObjNums, object_number); auto it = m_ObjectStreamMap.find(object_number); if (it != m_ObjectStreamMap.end()) return it->second.get(); const auto* info = m_CrossRefTable->GetObjectInfo(object_number); if (!info || info->type != ObjectType::kObjStream) return nullptr; const FX_FILESIZE object_pos = info->pos; if (object_pos <= 0) return nullptr; std::unique_ptr object = ParseIndirectObjectAt(object_pos, object_number); if (!object) return nullptr; std::unique_ptr objs_stream = CPDF_ObjectStream::Create(ToStream(object.get())); const CPDF_ObjectStream* result = objs_stream.get(); m_ObjectStreamMap[object_number] = std::move(objs_stream); return result; } std::unique_ptr CPDF_Parser::ParseIndirectObjectAt( FX_FILESIZE pos, uint32_t objnum) { const FX_FILESIZE saved_pos = m_pSyntax->GetPos(); m_pSyntax->SetPos(pos); auto result = m_pSyntax->GetIndirectObject( m_pObjectsHolder.Get(), CPDF_SyntaxParser::ParseType::kLoose); m_pSyntax->SetPos(saved_pos); if (result && objnum && result->GetObjNum() != objnum) return nullptr; const bool should_decrypt = m_pSecurityHandler && m_pSecurityHandler->GetCryptoHandler() && objnum != m_MetadataObjnum; if (should_decrypt) result = m_pSecurityHandler->GetCryptoHandler()->DecryptObjectTree( std::move(result)); return result; } uint32_t CPDF_Parser::GetFirstPageNo() const { return m_pLinearized ? m_pLinearized->GetFirstPageNo() : 0; } void CPDF_Parser::SetLinearizedHeader( std::unique_ptr pLinearized) { m_pLinearized = std::move(pLinearized); } std::unique_ptr CPDF_Parser::LoadTrailerV4() { if (m_pSyntax->GetKeyword() != "trailer") return nullptr; return ToDictionary(m_pSyntax->GetObjectBody(m_pObjectsHolder.Get())); } uint32_t CPDF_Parser::GetPermissions() const { return m_pSecurityHandler ? m_pSecurityHandler->GetPermissions() : 0xFFFFFFFF; } std::unique_ptr CPDF_Parser::ParseLinearizedHeader() { return CPDF_LinearizedHeader::Parse(m_pSyntax.get()); } CPDF_Parser::Error CPDF_Parser::StartLinearizedParse( const RetainPtr& validator, const char* password) { ASSERT(!m_bHasParsed); SetPassword(password); m_bXRefStream = false; m_LastXRefOffset = 0; if (!InitSyntaxParser(validator)) return FORMAT_ERROR; m_pLinearized = ParseLinearizedHeader(); if (!m_pLinearized) return StartParseInternal(); m_bHasParsed = true; m_LastXRefOffset = m_pLinearized->GetLastXRefOffset(); FX_FILESIZE dwFirstXRefOffset = m_LastXRefOffset; bool bXRefRebuilt = false; bool bLoadV4 = LoadCrossRefV4(dwFirstXRefOffset, false); if (!bLoadV4 && !LoadCrossRefV5(&dwFirstXRefOffset, true)) { if (!RebuildCrossRef()) return FORMAT_ERROR; bXRefRebuilt = true; m_LastXRefOffset = 0; } if (bLoadV4) { std::unique_ptr trailer = LoadTrailerV4(); if (!trailer) return SUCCESS; m_CrossRefTable->SetTrailer(std::move(trailer)); int32_t xrefsize = GetDirectInteger(GetTrailer(), "Size"); if (xrefsize > 0) ShrinkObjectMap(xrefsize); } Error eRet = SetEncryptHandler(); if (eRet != SUCCESS) return eRet; if (!GetRoot() || !m_pObjectsHolder->TryInit()) { if (bXRefRebuilt) return FORMAT_ERROR; ReleaseEncryptHandler(); if (!RebuildCrossRef()) return FORMAT_ERROR; eRet = SetEncryptHandler(); if (eRet != SUCCESS) return eRet; m_pObjectsHolder->TryInit(); if (!GetRoot()) return FORMAT_ERROR; } if (GetRootObjNum() == CPDF_Object::kInvalidObjNum) { ReleaseEncryptHandler(); if (!RebuildCrossRef() || GetRootObjNum() == CPDF_Object::kInvalidObjNum) return FORMAT_ERROR; eRet = SetEncryptHandler(); if (eRet != SUCCESS) return eRet; } if (m_pSecurityHandler && m_pSecurityHandler->IsMetadataEncrypted()) { if (CPDF_Reference* pMetadata = ToReference(GetRoot()->GetObjectFor("Metadata"))) m_MetadataObjnum = pMetadata->GetRefObjNum(); } return SUCCESS; } bool CPDF_Parser::LoadLinearizedAllCrossRefV5(FX_FILESIZE xrefpos) { if (!LoadCrossRefV5(&xrefpos, false)) return false; std::set seen_xrefpos; while (xrefpos) { seen_xrefpos.insert(xrefpos); if (!LoadCrossRefV5(&xrefpos, false)) return false; // Check for circular references. if (pdfium::ContainsKey(seen_xrefpos, xrefpos)) return false; } m_ObjectStreamMap.clear(); m_bXRefStream = true; return true; } CPDF_Parser::Error CPDF_Parser::LoadLinearizedMainXRefTable() { const FX_SAFE_FILESIZE main_xref_offset = GetTrailer()->GetIntegerFor("Prev"); if (!main_xref_offset.IsValid()) return FORMAT_ERROR; if (main_xref_offset.ValueOrDie() == 0) return SUCCESS; const AutoRestorer save_metadata_objnum(&m_MetadataObjnum); m_MetadataObjnum = 0; m_ObjectStreamMap.clear(); if (!LoadLinearizedAllCrossRefV4(main_xref_offset.ValueOrDie()) && !LoadLinearizedAllCrossRefV5(main_xref_offset.ValueOrDie())) { m_LastXRefOffset = 0; return FORMAT_ERROR; } return SUCCESS; } CPDF_Parser::ObjectType CPDF_Parser::GetObjectTypeFromCrossRefStreamType( uint32_t cross_ref_stream_type) const { switch (cross_ref_stream_type) { case 0: return CPDF_Parser::ObjectType::kFree; case 1: return CPDF_Parser::ObjectType::kNotCompressed; case 2: return CPDF_Parser::ObjectType::kCompressed; default: return CPDF_Parser::ObjectType::kNull; } }