From 084c31d93439d5166c60dbfb0ccee6db1406d09c Mon Sep 17 00:00:00 2001 From: Artem Strygin Date: Wed, 18 Jul 2018 04:32:08 +0000 Subject: Use CPDF_CrossRefTable within CPDF_Parser Change-Id: I354e8bed12606abdc67427bbc7928e3b1f11e243 Reviewed-on: https://pdfium-review.googlesource.com/35433 Commit-Queue: Art Snake Reviewed-by: Lei Zhang --- core/fpdfapi/parser/cpdf_parser.cpp | 227 +++++++++++++----------------------- 1 file changed, 83 insertions(+), 144 deletions(-) (limited to 'core/fpdfapi/parser/cpdf_parser.cpp') diff --git a/core/fpdfapi/parser/cpdf_parser.cpp b/core/fpdfapi/parser/cpdf_parser.cpp index 8d4d7728ab..54e05245a9 100644 --- a/core/fpdfapi/parser/cpdf_parser.cpp +++ b/core/fpdfapi/parser/cpdf_parser.cpp @@ -56,81 +56,13 @@ class ObjectsHolderStub : public CPDF_Parser::ParsedObjectsHolder { } // namespace -class CPDF_Parser::TrailerData { - public: - TrailerData() {} - ~TrailerData() {} - - CPDF_Dictionary* GetMainTrailer() const { return main_trailer_.get(); } - - std::unique_ptr GetCombinedTrailer() const { - std::unique_ptr result = - ToDictionary(main_trailer_->Clone()); - - // Info is optional. - uint32_t info_obj_num = GetInfoObjNum(); - if (info_obj_num != CPDF_Object::kInvalidObjNum) - result->SetNewFor("Info", nullptr, GetInfoObjNum()); - - // Root is required. - result->SetNewFor("Root", nullptr, GetRootObjNum()); - return result; - } - - void SetMainTrailer(std::unique_ptr trailer) { - ASSERT(trailer); - main_trailer_ = std::move(trailer); - ApplyTrailer(main_trailer_.get()); - } - - void AppendTrailer(std::unique_ptr trailer) { - ASSERT(trailer); - ApplyTrailer(trailer.get()); - } - - void Clear() { - main_trailer_.reset(); - last_info_obj_num_ = CPDF_Object::kInvalidObjNum; - last_root_obj_num_ = CPDF_Object::kInvalidObjNum; - } - - uint32_t GetInfoObjNum() const { - const CPDF_Reference* pRef = ToReference( - GetMainTrailer() ? GetMainTrailer()->GetObjectFor("Info") : nullptr); - return pRef ? pRef->GetRefObjNum() : last_info_obj_num_; - } - - uint32_t GetRootObjNum() const { - const CPDF_Reference* pRef = ToReference( - GetMainTrailer() ? GetMainTrailer()->GetObjectFor("Root") : nullptr); - return pRef ? pRef->GetRefObjNum() : last_root_obj_num_; - } - - private: - void ApplyTrailer(const CPDF_Dictionary* dict) { - // The most recent Info object number contained in last added trailer. - // See PDF 1.7 spec, section 3.4.5 - Incremental Updates. - const auto* pRef = ToReference(dict->GetObjectFor("Info")); - if (pRef) - last_info_obj_num_ = pRef->GetRefObjNum(); - - const auto* pRoot = ToReference(dict->GetObjectFor("Root")); - if (pRoot) - last_root_obj_num_ = pRoot->GetRefObjNum(); - } - - std::unique_ptr main_trailer_; - uint32_t last_info_obj_num_ = CPDF_Object::kInvalidObjNum; - uint32_t last_root_obj_num_ = CPDF_Object::kInvalidObjNum; -}; - CPDF_Parser::CPDF_Parser(ParsedObjectsHolder* holder) : m_pSyntax(pdfium::MakeUnique()), m_pObjectsHolder(holder), m_bHasParsed(false), m_bXRefStream(false), m_FileVersion(0), - m_TrailerData(pdfium::MakeUnique()) { + m_CrossRefTable(pdfium::MakeUnique()) { if (!holder) { m_pOwnedObjectsHolder = pdfium::MakeUnique(); m_pObjectsHolder = m_pOwnedObjectsHolder.get(); @@ -144,28 +76,30 @@ CPDF_Parser::~CPDF_Parser() { } uint32_t CPDF_Parser::GetLastObjNum() const { - return m_ObjectInfo.empty() ? 0 : m_ObjectInfo.rbegin()->first; + return m_CrossRefTable->objects_info().empty() + ? 0 + : m_CrossRefTable->objects_info().rbegin()->first; } bool CPDF_Parser::IsValidObjectNumber(uint32_t objnum) const { - return !m_ObjectInfo.empty() && objnum <= m_ObjectInfo.rbegin()->first; + return objnum <= GetLastObjNum(); } FX_FILESIZE CPDF_Parser::GetObjectPositionOrZero(uint32_t objnum) const { - auto it = m_ObjectInfo.find(objnum); - return it != m_ObjectInfo.end() ? it->second.pos : 0; + const auto* info = m_CrossRefTable->GetObjectInfo(objnum); + return (info && info->type == ObjectType::kNormal) ? info->pos : 0; } CPDF_Parser::ObjectType CPDF_Parser::GetObjectType(uint32_t objnum) const { ASSERT(IsValidObjectNumber(objnum)); - auto it = m_ObjectInfo.find(objnum); - return it != m_ObjectInfo.end() ? it->second.type : ObjectType::kFree; + const auto* info = m_CrossRefTable->GetObjectInfo(objnum); + return info ? info->type : ObjectType::kFree; } uint16_t CPDF_Parser::GetObjectGenNum(uint32_t objnum) const { ASSERT(IsValidObjectNumber(objnum)); - auto it = m_ObjectInfo.find(objnum); - return it != m_ObjectInfo.end() ? it->second.gennum : 0; + const auto* info = m_CrossRefTable->GetObjectInfo(objnum); + return (info && info->type == ObjectType::kNormal) ? info->gennum : 0; } bool CPDF_Parser::IsObjectFreeOrNull(uint32_t objnum) const { @@ -194,19 +128,7 @@ RetainPtr CPDF_Parser::GetFileAccess() const { } void CPDF_Parser::ShrinkObjectMap(uint32_t objnum) { - if (objnum == 0) { - m_ObjectInfo.clear(); - return; - } - - auto it = m_ObjectInfo.lower_bound(objnum); - while (it != m_ObjectInfo.end()) { - auto saved_it = it++; - m_ObjectInfo.erase(saved_it); - } - - if (!pdfium::ContainsKey(m_ObjectInfo, objnum - 1)) - m_ObjectInfo[objnum - 1].pos = 0; + m_CrossRefTable->ShrinkObjectMap(objnum); } bool CPDF_Parser::InitSyntaxParser( @@ -371,26 +293,12 @@ void CPDF_Parser::ReleaseEncryptHandler() { SetEncryptDictionary(nullptr); } -FX_FILESIZE CPDF_Parser::GetObjectOffset(uint32_t objnum) const { - if (!IsValidObjectNumber(objnum)) - return 0; - - if (GetObjectType(objnum) == ObjectType::kNotCompressed) - return GetObjectPositionOrZero(objnum); - - if (GetObjectType(objnum) == ObjectType::kCompressed) { - FX_FILESIZE pos = GetObjectPositionOrZero(objnum); - return GetObjectPositionOrZero(pos); - } - return 0; -} - // Ideally, all the cross reference entries should be verified. // In reality, we rarely see well-formed cross references don't match // with the objects. crbug/602650 showed a case where object numbers // in the cross reference table are all off by one. bool CPDF_Parser::VerifyCrossRefV4() { - for (const auto& it : m_ObjectInfo) { + for (const auto& it : m_CrossRefTable->objects_info()) { if (it.second.pos == 0) continue; // Find the first non-zero position. @@ -418,7 +326,7 @@ bool CPDF_Parser::LoadAllCrossRefV4(FX_FILESIZE xrefpos) { if (!trailer) return false; - m_TrailerData->SetMainTrailer(std::move(trailer)); + m_CrossRefTable->SetTrailer(std::move(trailer)); int32_t xrefsize = GetDirectInteger(GetTrailer(), "Size"); if (xrefsize > 0 && xrefsize <= kMaxXRefSize) ShrinkObjectMap(xrefsize); @@ -454,7 +362,10 @@ bool CPDF_Parser::LoadAllCrossRefV4(FX_FILESIZE xrefpos) { // SLOW ... XRefStreamList.insert(XRefStreamList.begin(), pDict->GetIntegerFor("XRefStm")); - m_TrailerData->AppendTrailer(std::move(pDict)); + + m_CrossRefTable = CPDF_CrossRefTable::MergeUp( + pdfium::MakeUnique(std::move(pDict)), + std::move(m_CrossRefTable)); } for (size_t i = 0; i < CrossRefList.size(); ++i) { @@ -478,7 +389,10 @@ bool CPDF_Parser::LoadLinearizedAllCrossRefV4(FX_FILESIZE xrefpos) { if (!trailer) return false; - m_TrailerData->SetMainTrailer(std::move(trailer)); + m_CrossRefTable = CPDF_CrossRefTable::MergeUp( + pdfium::MakeUnique(std::move(trailer)), + std::move(m_CrossRefTable)); + int32_t xrefsize = GetDirectInteger(GetTrailer(), "Size"); if (xrefsize == 0) return false; @@ -512,7 +426,10 @@ bool CPDF_Parser::LoadLinearizedAllCrossRefV4(FX_FILESIZE xrefpos) { // SLOW ... XRefStreamList.insert(XRefStreamList.begin(), pDict->GetIntegerFor("XRefStm")); - m_TrailerData->AppendTrailer(std::move(pDict)); + + m_CrossRefTable = CPDF_CrossRefTable::MergeUp( + pdfium::MakeUnique(std::move(pDict)), + std::move(m_CrossRefTable)); } for (size_t i = 1; i < CrossRefList.size(); ++i) { @@ -660,7 +577,21 @@ bool CPDF_Parser::LoadCrossRefV4(FX_FILESIZE pos, bool bSkip) { void CPDF_Parser::MergeCrossRefObjectsData( const std::vector& objects) { for (const auto& obj : objects) { - m_ObjectInfo[obj.obj_num] = obj.info; + switch (obj.info.type) { + case ObjectType::kFree: + if (obj.info.gennum > 0) + m_CrossRefTable->SetFree(obj.obj_num); + break; + case ObjectType::kNormal: + case ObjectType::kObjStream: + m_CrossRefTable->AddNormal(obj.obj_num, obj.info.gennum, obj.info.pos); + break; + case ObjectType::kCompressed: + m_CrossRefTable->AddCompressed(obj.obj_num, obj.info.archive_obj_num); + break; + default: + NOTREACHED(); + } } } @@ -684,9 +615,7 @@ bool CPDF_Parser::LoadAllCrossRefV5(FX_FILESIZE xrefpos) { } bool CPDF_Parser::RebuildCrossRef() { - CPDF_CrossRefTable cross_ref_table; - m_ObjectInfo.clear(); - m_TrailerData->Clear(); + auto cross_ref_table = pdfium::MakeUnique(); ParserState state = ParserState::kDefault; int32_t inside_index = 0; @@ -841,14 +770,14 @@ bool CPDF_Parser::RebuildCrossRef() { CPDF_Object* pRoot = pDict->GetObjectFor("Root"); if (pRoot && pRoot->GetDict() && pRoot->GetDict()->GetObjectFor("Pages")) { - cross_ref_table.Update( + cross_ref_table->Update( pdfium::MakeUnique( ToDictionary(pDict->Clone()))); } } } } - cross_ref_table.AddNormal(objnum, gennum, obj_pos); + cross_ref_table->AddNormal(objnum, gennum, obj_pos); } state = ParserState::kDefault; break; @@ -866,7 +795,7 @@ bool CPDF_Parser::RebuildCrossRef() { m_pSyntax->SetPos(current_char_pos); if (pObj) { - cross_ref_table.Update(pdfium::MakeUnique( + cross_ref_table->Update(pdfium::MakeUnique( ToDictionary(pObj->IsStream() ? pObj->AsStream()->GetDict()->Clone() : std::move(pObj)))); @@ -957,15 +886,12 @@ bool CPDF_Parser::RebuildCrossRef() { else if (last_trailer == -1 || last_xref < last_obj) last_trailer = m_pSyntax->m_FileLen; - if (cross_ref_table.trailer()) { - m_TrailerData->SetMainTrailer( - ToDictionary(cross_ref_table.trailer()->Clone())); - m_ObjectInfo = cross_ref_table.objects_info(); - } + m_CrossRefTable = CPDF_CrossRefTable::MergeUp(std::move(m_CrossRefTable), + std::move(cross_ref_table)); // Resore default buffer size. m_pSyntax->SetReadBufferSize(CPDF_ModuleMgr::kFileBufSize); - return GetTrailer() && !m_ObjectInfo.empty(); + return GetTrailer() && !m_CrossRefTable->objects_info().empty(); } bool CPDF_Parser::LoadCrossRefV5(FX_FILESIZE* pos, bool bMainXRef) { @@ -989,12 +915,13 @@ bool CPDF_Parser::LoadCrossRefV5(FX_FILESIZE* pos, bool bMainXRef) { std::unique_ptr pNewTrailer = ToDictionary(pDict->Clone()); if (bMainXRef) { - m_TrailerData->SetMainTrailer(std::move(pNewTrailer)); - ShrinkObjectMap(size); - for (auto& it : m_ObjectInfo) - it.second.type = ObjectType::kFree; + m_CrossRefTable = + pdfium::MakeUnique(std::move(pNewTrailer)); + m_CrossRefTable->ShrinkObjectMap(size); } else { - m_TrailerData->AppendTrailer(std::move(pNewTrailer)); + m_CrossRefTable = CPDF_CrossRefTable::MergeUp( + pdfium::MakeUnique(std::move(pNewTrailer)), + std::move(m_CrossRefTable)); } std::vector> arrIndex; @@ -1054,7 +981,8 @@ bool CPDF_Parser::LoadCrossRefV5(FX_FILESIZE* pos, bool bMainXRef) { const uint8_t* segstart = pData + segindex * totalWidth; FX_SAFE_UINT32 dwMaxObjNum = startnum; dwMaxObjNum += count; - uint32_t dwV5Size = m_ObjectInfo.empty() ? 0 : GetLastObjNum() + 1; + uint32_t dwV5Size = + m_CrossRefTable->objects_info().empty() ? 0 : GetLastObjNum() + 1; if (!dwMaxObjNum.IsValid() || dwMaxObjNum.ValueOrDie() > dwV5Size) continue; @@ -1070,30 +998,27 @@ bool CPDF_Parser::LoadCrossRefV5(FX_FILESIZE* pos, bool bMainXRef) { if (GetObjectType(startnum + j) == ObjectType::kNull) { FX_FILESIZE offset = GetVarInt(entrystart + WidthArray[0], WidthArray[1]); - m_ObjectInfo[startnum + j].pos = offset; + m_CrossRefTable->AddNormal(startnum + j, 0, offset); continue; } if (GetObjectType(startnum + j) != ObjectType::kFree) continue; - ObjectInfo& info = m_ObjectInfo[startnum + j]; - - info.type = type; if (type == ObjectType::kFree) { - info.pos = 0; + m_CrossRefTable->SetFree(startnum + j); } else { const FX_FILESIZE entry_value = GetVarInt(entrystart + WidthArray[0], WidthArray[1]); if (type == ObjectType::kNotCompressed) { const auto object_offset = entry_value; - info.pos = object_offset; + m_CrossRefTable->AddNormal(startnum + j, 0, object_offset); } else { const auto archive_obj_num = entry_value; - info.archive_obj_num = archive_obj_num; if (archive_obj_num < 0 || !IsValidObjectNumber(archive_obj_num)) return false; - m_ObjectInfo[archive_obj_num].type = ObjectType::kNull; + + m_CrossRefTable->AddCompressed(startnum + j, archive_obj_num); } } } @@ -1113,19 +1038,29 @@ CPDF_Dictionary* CPDF_Parser::GetRoot() const { } const CPDF_Dictionary* CPDF_Parser::GetTrailer() const { - return m_TrailerData->GetMainTrailer(); + return m_CrossRefTable->trailer(); } std::unique_ptr CPDF_Parser::GetCombinedTrailer() const { - return m_TrailerData->GetCombinedTrailer(); + return m_CrossRefTable->trailer() + ? ToDictionary(m_CrossRefTable->trailer()->Clone()) + : std::unique_ptr(); } uint32_t CPDF_Parser::GetInfoObjNum() const { - return m_TrailerData->GetInfoObjNum(); + const CPDF_Reference* pRef = + ToReference(m_CrossRefTable->trailer() + ? m_CrossRefTable->trailer()->GetObjectFor("Info") + : nullptr); + return pRef ? pRef->GetRefObjNum() : CPDF_Object::kInvalidObjNum; } uint32_t CPDF_Parser::GetRootObjNum() const { - return m_TrailerData->GetRootObjNum(); + const CPDF_Reference* pRef = + ToReference(m_CrossRefTable->trailer() + ? m_CrossRefTable->trailer()->GetObjectFor("Root") + : nullptr); + return pRef ? pRef->GetRefObjNum() : CPDF_Object::kInvalidObjNum; } std::unique_ptr CPDF_Parser::ParseIndirectObject( @@ -1139,7 +1074,7 @@ std::unique_ptr CPDF_Parser::ParseIndirectObject( pdfium::ScopedSetInsertion local_insert(&m_ParsingObjNums, objnum); if (GetObjectType(objnum) == ObjectType::kNotCompressed) { - FX_FILESIZE pos = m_ObjectInfo[objnum].pos; + FX_FILESIZE pos = GetObjectPositionOrZero(objnum); if (pos <= 0) return nullptr; return ParseIndirectObjectAt(pos, objnum); @@ -1148,7 +1083,7 @@ std::unique_ptr CPDF_Parser::ParseIndirectObject( return nullptr; const CPDF_ObjectStream* pObjStream = - GetObjectStream(m_ObjectInfo[objnum].pos); + GetObjectStream(m_CrossRefTable->GetObjectInfo(objnum)->archive_obj_num); if (!pObjStream) return nullptr; @@ -1167,7 +1102,11 @@ const CPDF_ObjectStream* CPDF_Parser::GetObjectStream(uint32_t object_number) { if (it != m_ObjectStreamMap.end()) return it->second.get(); - const FX_FILESIZE object_pos = GetObjectPositionOrZero(object_number); + const auto* info = m_CrossRefTable->GetObjectInfo(object_number); + if (!info || info->type != ObjectType::kObjStream) + return nullptr; + + const FX_FILESIZE object_pos = info->pos; if (object_pos <= 0) return nullptr; @@ -1273,7 +1212,7 @@ CPDF_Parser::Error CPDF_Parser::StartLinearizedParse( if (!trailer) return SUCCESS; - m_TrailerData->SetMainTrailer(std::move(trailer)); + m_CrossRefTable->SetTrailer(std::move(trailer)); int32_t xrefsize = GetDirectInteger(GetTrailer(), "Size"); if (xrefsize > 0) ShrinkObjectMap(xrefsize); -- cgit v1.2.3