From 9e12f14814722c0c0d46d4968f636b5e1a72a1e7 Mon Sep 17 00:00:00 2001 From: Artem Strygin Date: Wed, 27 Jun 2018 17:52:40 +0000 Subject: Implement CPDF_CrossRefTable Change-Id: I5ac61ab323adb5eec2de8660064fff95ee877b5e Reviewed-on: https://pdfium-review.googlesource.com/35432 Reviewed-by: dsinclair Commit-Queue: Art Snake --- BUILD.gn | 2 + core/fpdfapi/parser/cpdf_cross_ref_table.cpp | 129 +++++++++++++++++++++++++++ core/fpdfapi/parser/cpdf_cross_ref_table.h | 68 ++++++++++++++ core/fpdfapi/parser/cpdf_parser.cpp | 75 ++++++---------- core/fpdfapi/parser/cpdf_parser.h | 22 +---- 5 files changed, 228 insertions(+), 68 deletions(-) create mode 100644 core/fpdfapi/parser/cpdf_cross_ref_table.cpp create mode 100644 core/fpdfapi/parser/cpdf_cross_ref_table.h diff --git a/BUILD.gn b/BUILD.gn index 938bfa9f9b..6c26519f6e 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -605,6 +605,8 @@ jumbo_static_library("fpdfapi") { "core/fpdfapi/parser/cpdf_boolean.h", "core/fpdfapi/parser/cpdf_cross_ref_avail.cpp", "core/fpdfapi/parser/cpdf_cross_ref_avail.h", + "core/fpdfapi/parser/cpdf_cross_ref_table.cpp", + "core/fpdfapi/parser/cpdf_cross_ref_table.h", "core/fpdfapi/parser/cpdf_crypto_handler.cpp", "core/fpdfapi/parser/cpdf_crypto_handler.h", "core/fpdfapi/parser/cpdf_data_avail.cpp", diff --git a/core/fpdfapi/parser/cpdf_cross_ref_table.cpp b/core/fpdfapi/parser/cpdf_cross_ref_table.cpp new file mode 100644 index 0000000000..770c483dca --- /dev/null +++ b/core/fpdfapi/parser/cpdf_cross_ref_table.cpp @@ -0,0 +1,129 @@ +// Copyright 2018 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "core/fpdfapi/parser/cpdf_cross_ref_table.h" + +#include + +#include "core/fpdfapi/parser/cpdf_dictionary.h" + +CPDF_CrossRefTable::CPDF_CrossRefTable() = default; + +CPDF_CrossRefTable::CPDF_CrossRefTable(std::unique_ptr trailer) + : trailer_(std::move(trailer)) {} + +CPDF_CrossRefTable::~CPDF_CrossRefTable() = default; + +void CPDF_CrossRefTable::AddCompressed(uint32_t obj_num, + uint32_t archive_obj_num) { + auto& info = objects_info_[obj_num]; + if (info.gennum > 0) + return; + + if (info.type == ObjectType::kObjStream) + return; + + info.type = ObjectType::kCompressed; + info.archive_obj_num = archive_obj_num; + info.gennum = 0; + + objects_info_[archive_obj_num].type = ObjectType::kObjStream; +} + +void CPDF_CrossRefTable::AddNormal(uint32_t obj_num, + uint16_t gen_num, + FX_FILESIZE pos) { + auto& info = objects_info_[obj_num]; + if (info.gennum > gen_num) + return; + + if (info.type == ObjectType::kCompressed && gen_num == 0) + return; + + if (info.type != ObjectType::kObjStream) + info.type = ObjectType::kNormal; + + info.gennum = gen_num; + info.pos = pos; +} + +void CPDF_CrossRefTable::SetFree(uint32_t obj_num) { + auto& info = objects_info_[obj_num]; + info.type = ObjectType::kFree; + info.gennum = 0xFFFF; + info.pos = 0; +} + +void CPDF_CrossRefTable::SetTrailer(std::unique_ptr trailer) { + trailer_ = std::move(trailer); +} + +const CPDF_CrossRefTable::ObjectInfo* CPDF_CrossRefTable::GetObjectInfo( + uint32_t obj_num) const { + const auto it = objects_info_.find(obj_num); + return it != objects_info_.end() ? &it->second : nullptr; +} + +void CPDF_CrossRefTable::Update( + std::unique_ptr new_cross_ref) { + UpdateInfo(std::move(new_cross_ref->objects_info_)); + UpdateTrailer(std::move(new_cross_ref->trailer_)); +} + +void CPDF_CrossRefTable::ShrinkObjectMap(uint32_t objnum) { + if (objnum == 0) { + objects_info_.clear(); + return; + } + + objects_info_.erase(objects_info_.lower_bound(objnum), objects_info_.end()); + + if (!pdfium::ContainsKey(objects_info_, objnum - 1)) + objects_info_[objnum - 1].pos = 0; +} + +void CPDF_CrossRefTable::UpdateInfo( + std::map&& new_objects_info) { + auto cur_it = objects_info_.begin(); + auto new_it = new_objects_info.begin(); + while (cur_it != objects_info_.end() && new_it != new_objects_info.end()) { + if (cur_it->first == new_it->first) { + if (cur_it->second.type == ObjectType::kObjStream && + new_it->second.type == ObjectType::kNormal) { + new_it->second.type = ObjectType::kObjStream; + } + ++cur_it; + ++new_it; + } else if (cur_it->first < new_it->first) { + new_objects_info.insert(new_it, *cur_it); + ++cur_it; + } else { + new_it = new_objects_info.lower_bound(cur_it->first); + } + } + for (; cur_it != objects_info_.end(); ++cur_it) { + new_objects_info.insert(new_objects_info.end(), *cur_it); + } + objects_info_ = std::move(new_objects_info); +} + +void CPDF_CrossRefTable::UpdateTrailer( + std::unique_ptr new_trailer) { + if (!new_trailer) + return; + + if (!trailer_) { + trailer_ = std::move(new_trailer); + return; + } + + new_trailer->SetFor("XRefStm", trailer_->RemoveFor("XRefStm")); + new_trailer->SetFor("Prev", trailer_->RemoveFor("Prev")); + + for (auto it = new_trailer->begin(); it != new_trailer->end();) { + const ByteString key = it->first; + ++it; + trailer_->SetFor(key, new_trailer->RemoveFor(key)); + } +} diff --git a/core/fpdfapi/parser/cpdf_cross_ref_table.h b/core/fpdfapi/parser/cpdf_cross_ref_table.h new file mode 100644 index 0000000000..ade1b336b2 --- /dev/null +++ b/core/fpdfapi/parser/cpdf_cross_ref_table.h @@ -0,0 +1,68 @@ +// Copyright 2018 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CORE_FPDFAPI_PARSER_CPDF_CROSS_REF_TABLE_H_ +#define CORE_FPDFAPI_PARSER_CPDF_CROSS_REF_TABLE_H_ + +#include +#include + +#include "core/fxcrt/fx_system.h" + +class CPDF_Dictionary; + +class CPDF_CrossRefTable { + public: + enum class ObjectType : uint8_t { + kFree = 0x00, + kNormal = 0x01, + kNotCompressed = kNormal, + kCompressed = 0x02, + kObjStream = 0xFF, + kNull = kObjStream, + }; + + struct ObjectInfo { + ObjectInfo() : pos(0), type(ObjectType::kFree), gennum(0) {} + // if type is ObjectType::kCompressed the archive_obj_num should be used. + // if type is ObjectType::kNotCompressed the pos should be used. + // In other cases its are unused. + union { + FX_FILESIZE pos; + uint32_t archive_obj_num; + }; + ObjectType type; + uint16_t gennum; + }; + + CPDF_CrossRefTable(); + explicit CPDF_CrossRefTable(std::unique_ptr trailer); + ~CPDF_CrossRefTable(); + + void AddCompressed(uint32_t obj_num, uint32_t archive_obj_num); + void AddNormal(uint32_t obj_num, uint16_t gen_num, FX_FILESIZE pos); + void SetFree(uint32_t obj_num); + + const CPDF_Dictionary* trailer() const { return trailer_.get(); } + void SetTrailer(std::unique_ptr trailer); + + const ObjectInfo* GetObjectInfo(uint32_t obj_num) const; + + const std::map& objects_info() const { + return objects_info_; + } + + void Update(std::unique_ptr new_cross_ref); + + void ShrinkObjectMap(uint32_t objnum); + + private: + void UpdateInfo(std::map&& new_objects_info); + void UpdateTrailer(std::unique_ptr new_trailer); + + std::unique_ptr trailer_; + std::map objects_info_; +}; + +#endif // CORE_FPDFAPI_PARSER_CPDF_CROSS_REF_TABLE_H_ diff --git a/core/fpdfapi/parser/cpdf_parser.cpp b/core/fpdfapi/parser/cpdf_parser.cpp index 3a7afd0b49..c7a3fe16c8 100644 --- a/core/fpdfapi/parser/cpdf_parser.cpp +++ b/core/fpdfapi/parser/cpdf_parser.cpp @@ -670,6 +670,7 @@ bool CPDF_Parser::LoadAllCrossRefV5(FX_FILESIZE xrefpos) { } bool CPDF_Parser::RebuildCrossRef() { + CPDF_CrossRefTable cross_ref_table; m_ObjectInfo.clear(); m_TrailerData->Clear(); @@ -826,23 +827,14 @@ bool CPDF_Parser::RebuildCrossRef() { CPDF_Object* pRoot = pDict->GetObjectFor("Root"); if (pRoot && pRoot->GetDict() && pRoot->GetDict()->GetObjectFor("Pages")) { - m_TrailerData->SetMainTrailer( - ToDictionary(pDict->Clone())); + cross_ref_table.Update( + pdfium::MakeUnique( + ToDictionary(pDict->Clone()))); } } } } - ObjectInfo& info = m_ObjectInfo[objnum]; - if (pObject || !info.pos) { - info.pos = obj_pos; - info.type = ObjectType::kNotCompressed; - // The newer version of object should be located after old - // version. - // Do not worry about gennum in this case, because we read - // file - // in front order. - info.gennum = gennum; - } + cross_ref_table.AddNormal(objnum, gennum, obj_pos); } state = ParserState::kDefault; break; @@ -859,42 +851,21 @@ bool CPDF_Parser::RebuildCrossRef() { if (!pObj) m_pSyntax->SetPos(current_char_pos); - { - if (pObj && (pObj->IsDictionary() || pObj->IsStream())) { - if (CPDF_Dictionary* pTrailer = - pObj->IsStream() ? pObj->AsStream()->GetDict() - : pObj->AsDictionary()) { - if (GetTrailer()) { - CPDF_Object* pRoot = pTrailer->GetObjectFor("Root"); - CPDF_Reference* pRef = ToReference(pRoot); - if (!pRoot || - (pRef && IsValidObjectNumber(pRef->GetRefObjNum()) && - m_ObjectInfo[pRef->GetRefObjNum()].pos != 0)) { - // This is newer version of trailer. Merge it with old. - for (auto it = pTrailer->begin(); - it != pTrailer->end();) { - DCHECK(it->second->IsInline()); - const ByteString key = it->first; - ++it; - GetTrailer()->SetFor(key, pTrailer->RemoveFor(key)); - } - } - } else { - m_TrailerData->SetMainTrailer( - ToDictionary(pObj->IsStream() ? pTrailer->Clone() - : std::move(pObj))); - - FX_FILESIZE dwSavePos = m_pSyntax->GetPos(); - ByteString strWord = m_pSyntax->GetKeyword(); - if (!strWord.Compare("startxref")) { - bool bNumber; - ByteString bsOffset = m_pSyntax->GetNextWord(&bNumber); - if (bNumber) - m_LastXRefOffset = FXSYS_atoi(bsOffset.c_str()); - } - m_pSyntax->SetPos(dwSavePos); - } - } + if (pObj) { + cross_ref_table.Update(pdfium::MakeUnique( + ToDictionary(pObj->IsStream() + ? pObj->AsStream()->GetDict()->Clone() + : std::move(pObj)))); + + FX_FILESIZE dwSavePos = m_pSyntax->GetPos(); + ByteString strWord = m_pSyntax->GetKeyword(); + if (!strWord.Compare("startxref")) { + bool bNumber; + ByteString bsOffset = m_pSyntax->GetNextWord(&bNumber); + if (bNumber) + last_xref = FXSYS_atoi(bsOffset.c_str()); + } else { + m_pSyntax->SetPos(dwSavePos); } } } @@ -972,8 +943,14 @@ bool CPDF_Parser::RebuildCrossRef() { else if (last_trailer == -1 || last_xref < last_obj) last_trailer = m_pSyntax->m_FileLen; + if (cross_ref_table.trailer()) { + m_TrailerData->SetMainTrailer( + ToDictionary(cross_ref_table.trailer()->Clone())); + m_ObjectInfo = cross_ref_table.objects_info(); + } // Resore default buffer size. m_pSyntax->SetReadBufferSize(CPDF_ModuleMgr::kFileBufSize); + return GetTrailer() && !m_ObjectInfo.empty(); } diff --git a/core/fpdfapi/parser/cpdf_parser.h b/core/fpdfapi/parser/cpdf_parser.h index 3151da61c9..f859db5d16 100644 --- a/core/fpdfapi/parser/cpdf_parser.h +++ b/core/fpdfapi/parser/cpdf_parser.h @@ -13,6 +13,7 @@ #include #include +#include "core/fpdfapi/parser/cpdf_cross_ref_table.h" #include "core/fpdfapi/parser/cpdf_syntax_parser.h" #include "core/fxcrt/fx_string.h" #include "core/fxcrt/fx_system.h" @@ -113,25 +114,8 @@ class CPDF_Parser { void SetLinearizedHeader(std::unique_ptr pLinearized); protected: - enum class ObjectType : uint8_t { - kFree = 0x00, - kNotCompressed = 0x01, - kCompressed = 0x02, - kNull = 0xFF, - }; - - struct ObjectInfo { - ObjectInfo() : pos(0), type(ObjectType::kFree), gennum(0) {} - // if type is ObjectType::kCompressed the archive_obj_num should be used. - // if type is ObjectType::kNotCompressed the pos should be used. - // In other cases its are unused. - union { - FX_FILESIZE pos; - FX_FILESIZE archive_obj_num; - }; - ObjectType type; - uint16_t gennum; - }; + using ObjectType = CPDF_CrossRefTable::ObjectType; + using ObjectInfo = CPDF_CrossRefTable::ObjectInfo; std::unique_ptr m_pSyntax; std::map m_ObjectInfo; -- cgit v1.2.3