summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--BUILD.gn2
-rw-r--r--core/fpdfapi/parser/cpdf_cross_ref_table.cpp129
-rw-r--r--core/fpdfapi/parser/cpdf_cross_ref_table.h68
-rw-r--r--core/fpdfapi/parser/cpdf_parser.cpp75
-rw-r--r--core/fpdfapi/parser/cpdf_parser.h22
5 files changed, 228 insertions, 68 deletions
diff --git a/BUILD.gn b/BUILD.gn
index 938bfa9f9b..6c26519f6e 100644
--- a/BUILD.gn
+++ b/BUILD.gn
@@ -605,6 +605,8 @@ jumbo_static_library("fpdfapi") {
"core/fpdfapi/parser/cpdf_boolean.h",
"core/fpdfapi/parser/cpdf_cross_ref_avail.cpp",
"core/fpdfapi/parser/cpdf_cross_ref_avail.h",
+ "core/fpdfapi/parser/cpdf_cross_ref_table.cpp",
+ "core/fpdfapi/parser/cpdf_cross_ref_table.h",
"core/fpdfapi/parser/cpdf_crypto_handler.cpp",
"core/fpdfapi/parser/cpdf_crypto_handler.h",
"core/fpdfapi/parser/cpdf_data_avail.cpp",
diff --git a/core/fpdfapi/parser/cpdf_cross_ref_table.cpp b/core/fpdfapi/parser/cpdf_cross_ref_table.cpp
new file mode 100644
index 0000000000..770c483dca
--- /dev/null
+++ b/core/fpdfapi/parser/cpdf_cross_ref_table.cpp
@@ -0,0 +1,129 @@
+// Copyright 2018 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "core/fpdfapi/parser/cpdf_cross_ref_table.h"
+
+#include <utility>
+
+#include "core/fpdfapi/parser/cpdf_dictionary.h"
+
+CPDF_CrossRefTable::CPDF_CrossRefTable() = default;
+
+CPDF_CrossRefTable::CPDF_CrossRefTable(std::unique_ptr<CPDF_Dictionary> trailer)
+ : trailer_(std::move(trailer)) {}
+
+CPDF_CrossRefTable::~CPDF_CrossRefTable() = default;
+
+void CPDF_CrossRefTable::AddCompressed(uint32_t obj_num,
+ uint32_t archive_obj_num) {
+ auto& info = objects_info_[obj_num];
+ if (info.gennum > 0)
+ return;
+
+ if (info.type == ObjectType::kObjStream)
+ return;
+
+ info.type = ObjectType::kCompressed;
+ info.archive_obj_num = archive_obj_num;
+ info.gennum = 0;
+
+ objects_info_[archive_obj_num].type = ObjectType::kObjStream;
+}
+
+void CPDF_CrossRefTable::AddNormal(uint32_t obj_num,
+ uint16_t gen_num,
+ FX_FILESIZE pos) {
+ auto& info = objects_info_[obj_num];
+ if (info.gennum > gen_num)
+ return;
+
+ if (info.type == ObjectType::kCompressed && gen_num == 0)
+ return;
+
+ if (info.type != ObjectType::kObjStream)
+ info.type = ObjectType::kNormal;
+
+ info.gennum = gen_num;
+ info.pos = pos;
+}
+
+void CPDF_CrossRefTable::SetFree(uint32_t obj_num) {
+ auto& info = objects_info_[obj_num];
+ info.type = ObjectType::kFree;
+ info.gennum = 0xFFFF;
+ info.pos = 0;
+}
+
+void CPDF_CrossRefTable::SetTrailer(std::unique_ptr<CPDF_Dictionary> trailer) {
+ trailer_ = std::move(trailer);
+}
+
+const CPDF_CrossRefTable::ObjectInfo* CPDF_CrossRefTable::GetObjectInfo(
+ uint32_t obj_num) const {
+ const auto it = objects_info_.find(obj_num);
+ return it != objects_info_.end() ? &it->second : nullptr;
+}
+
+void CPDF_CrossRefTable::Update(
+ std::unique_ptr<CPDF_CrossRefTable> new_cross_ref) {
+ UpdateInfo(std::move(new_cross_ref->objects_info_));
+ UpdateTrailer(std::move(new_cross_ref->trailer_));
+}
+
+void CPDF_CrossRefTable::ShrinkObjectMap(uint32_t objnum) {
+ if (objnum == 0) {
+ objects_info_.clear();
+ return;
+ }
+
+ objects_info_.erase(objects_info_.lower_bound(objnum), objects_info_.end());
+
+ if (!pdfium::ContainsKey(objects_info_, objnum - 1))
+ objects_info_[objnum - 1].pos = 0;
+}
+
+void CPDF_CrossRefTable::UpdateInfo(
+ std::map<uint32_t, ObjectInfo>&& new_objects_info) {
+ auto cur_it = objects_info_.begin();
+ auto new_it = new_objects_info.begin();
+ while (cur_it != objects_info_.end() && new_it != new_objects_info.end()) {
+ if (cur_it->first == new_it->first) {
+ if (cur_it->second.type == ObjectType::kObjStream &&
+ new_it->second.type == ObjectType::kNormal) {
+ new_it->second.type = ObjectType::kObjStream;
+ }
+ ++cur_it;
+ ++new_it;
+ } else if (cur_it->first < new_it->first) {
+ new_objects_info.insert(new_it, *cur_it);
+ ++cur_it;
+ } else {
+ new_it = new_objects_info.lower_bound(cur_it->first);
+ }
+ }
+ for (; cur_it != objects_info_.end(); ++cur_it) {
+ new_objects_info.insert(new_objects_info.end(), *cur_it);
+ }
+ objects_info_ = std::move(new_objects_info);
+}
+
+void CPDF_CrossRefTable::UpdateTrailer(
+ std::unique_ptr<CPDF_Dictionary> new_trailer) {
+ if (!new_trailer)
+ return;
+
+ if (!trailer_) {
+ trailer_ = std::move(new_trailer);
+ return;
+ }
+
+ new_trailer->SetFor("XRefStm", trailer_->RemoveFor("XRefStm"));
+ new_trailer->SetFor("Prev", trailer_->RemoveFor("Prev"));
+
+ for (auto it = new_trailer->begin(); it != new_trailer->end();) {
+ const ByteString key = it->first;
+ ++it;
+ trailer_->SetFor(key, new_trailer->RemoveFor(key));
+ }
+}
diff --git a/core/fpdfapi/parser/cpdf_cross_ref_table.h b/core/fpdfapi/parser/cpdf_cross_ref_table.h
new file mode 100644
index 0000000000..ade1b336b2
--- /dev/null
+++ b/core/fpdfapi/parser/cpdf_cross_ref_table.h
@@ -0,0 +1,68 @@
+// Copyright 2018 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CORE_FPDFAPI_PARSER_CPDF_CROSS_REF_TABLE_H_
+#define CORE_FPDFAPI_PARSER_CPDF_CROSS_REF_TABLE_H_
+
+#include <map>
+#include <memory>
+
+#include "core/fxcrt/fx_system.h"
+
+class CPDF_Dictionary;
+
+class CPDF_CrossRefTable {
+ public:
+ enum class ObjectType : uint8_t {
+ kFree = 0x00,
+ kNormal = 0x01,
+ kNotCompressed = kNormal,
+ kCompressed = 0x02,
+ kObjStream = 0xFF,
+ kNull = kObjStream,
+ };
+
+ struct ObjectInfo {
+ ObjectInfo() : pos(0), type(ObjectType::kFree), gennum(0) {}
+ // if type is ObjectType::kCompressed the archive_obj_num should be used.
+ // if type is ObjectType::kNotCompressed the pos should be used.
+ // In other cases its are unused.
+ union {
+ FX_FILESIZE pos;
+ uint32_t archive_obj_num;
+ };
+ ObjectType type;
+ uint16_t gennum;
+ };
+
+ CPDF_CrossRefTable();
+ explicit CPDF_CrossRefTable(std::unique_ptr<CPDF_Dictionary> trailer);
+ ~CPDF_CrossRefTable();
+
+ void AddCompressed(uint32_t obj_num, uint32_t archive_obj_num);
+ void AddNormal(uint32_t obj_num, uint16_t gen_num, FX_FILESIZE pos);
+ void SetFree(uint32_t obj_num);
+
+ const CPDF_Dictionary* trailer() const { return trailer_.get(); }
+ void SetTrailer(std::unique_ptr<CPDF_Dictionary> trailer);
+
+ const ObjectInfo* GetObjectInfo(uint32_t obj_num) const;
+
+ const std::map<uint32_t, ObjectInfo>& objects_info() const {
+ return objects_info_;
+ }
+
+ void Update(std::unique_ptr<CPDF_CrossRefTable> new_cross_ref);
+
+ void ShrinkObjectMap(uint32_t objnum);
+
+ private:
+ void UpdateInfo(std::map<uint32_t, ObjectInfo>&& new_objects_info);
+ void UpdateTrailer(std::unique_ptr<CPDF_Dictionary> new_trailer);
+
+ std::unique_ptr<CPDF_Dictionary> trailer_;
+ std::map<uint32_t, ObjectInfo> objects_info_;
+};
+
+#endif // CORE_FPDFAPI_PARSER_CPDF_CROSS_REF_TABLE_H_
diff --git a/core/fpdfapi/parser/cpdf_parser.cpp b/core/fpdfapi/parser/cpdf_parser.cpp
index 3a7afd0b49..c7a3fe16c8 100644
--- a/core/fpdfapi/parser/cpdf_parser.cpp
+++ b/core/fpdfapi/parser/cpdf_parser.cpp
@@ -670,6 +670,7 @@ bool CPDF_Parser::LoadAllCrossRefV5(FX_FILESIZE xrefpos) {
}
bool CPDF_Parser::RebuildCrossRef() {
+ CPDF_CrossRefTable cross_ref_table;
m_ObjectInfo.clear();
m_TrailerData->Clear();
@@ -826,23 +827,14 @@ bool CPDF_Parser::RebuildCrossRef() {
CPDF_Object* pRoot = pDict->GetObjectFor("Root");
if (pRoot && pRoot->GetDict() &&
pRoot->GetDict()->GetObjectFor("Pages")) {
- m_TrailerData->SetMainTrailer(
- ToDictionary(pDict->Clone()));
+ cross_ref_table.Update(
+ pdfium::MakeUnique<CPDF_CrossRefTable>(
+ ToDictionary(pDict->Clone())));
}
}
}
}
- ObjectInfo& info = m_ObjectInfo[objnum];
- if (pObject || !info.pos) {
- info.pos = obj_pos;
- info.type = ObjectType::kNotCompressed;
- // The newer version of object should be located after old
- // version.
- // Do not worry about gennum in this case, because we read
- // file
- // in front order.
- info.gennum = gennum;
- }
+ cross_ref_table.AddNormal(objnum, gennum, obj_pos);
}
state = ParserState::kDefault;
break;
@@ -859,42 +851,21 @@ bool CPDF_Parser::RebuildCrossRef() {
if (!pObj)
m_pSyntax->SetPos(current_char_pos);
- {
- if (pObj && (pObj->IsDictionary() || pObj->IsStream())) {
- if (CPDF_Dictionary* pTrailer =
- pObj->IsStream() ? pObj->AsStream()->GetDict()
- : pObj->AsDictionary()) {
- if (GetTrailer()) {
- CPDF_Object* pRoot = pTrailer->GetObjectFor("Root");
- CPDF_Reference* pRef = ToReference(pRoot);
- if (!pRoot ||
- (pRef && IsValidObjectNumber(pRef->GetRefObjNum()) &&
- m_ObjectInfo[pRef->GetRefObjNum()].pos != 0)) {
- // This is newer version of trailer. Merge it with old.
- for (auto it = pTrailer->begin();
- it != pTrailer->end();) {
- DCHECK(it->second->IsInline());
- const ByteString key = it->first;
- ++it;
- GetTrailer()->SetFor(key, pTrailer->RemoveFor(key));
- }
- }
- } else {
- m_TrailerData->SetMainTrailer(
- ToDictionary(pObj->IsStream() ? pTrailer->Clone()
- : std::move(pObj)));
-
- FX_FILESIZE dwSavePos = m_pSyntax->GetPos();
- ByteString strWord = m_pSyntax->GetKeyword();
- if (!strWord.Compare("startxref")) {
- bool bNumber;
- ByteString bsOffset = m_pSyntax->GetNextWord(&bNumber);
- if (bNumber)
- m_LastXRefOffset = FXSYS_atoi(bsOffset.c_str());
- }
- m_pSyntax->SetPos(dwSavePos);
- }
- }
+ if (pObj) {
+ cross_ref_table.Update(pdfium::MakeUnique<CPDF_CrossRefTable>(
+ ToDictionary(pObj->IsStream()
+ ? pObj->AsStream()->GetDict()->Clone()
+ : std::move(pObj))));
+
+ FX_FILESIZE dwSavePos = m_pSyntax->GetPos();
+ ByteString strWord = m_pSyntax->GetKeyword();
+ if (!strWord.Compare("startxref")) {
+ bool bNumber;
+ ByteString bsOffset = m_pSyntax->GetNextWord(&bNumber);
+ if (bNumber)
+ last_xref = FXSYS_atoi(bsOffset.c_str());
+ } else {
+ m_pSyntax->SetPos(dwSavePos);
}
}
}
@@ -972,8 +943,14 @@ bool CPDF_Parser::RebuildCrossRef() {
else if (last_trailer == -1 || last_xref < last_obj)
last_trailer = m_pSyntax->m_FileLen;
+ if (cross_ref_table.trailer()) {
+ m_TrailerData->SetMainTrailer(
+ ToDictionary(cross_ref_table.trailer()->Clone()));
+ m_ObjectInfo = cross_ref_table.objects_info();
+ }
// Resore default buffer size.
m_pSyntax->SetReadBufferSize(CPDF_ModuleMgr::kFileBufSize);
+
return GetTrailer() && !m_ObjectInfo.empty();
}
diff --git a/core/fpdfapi/parser/cpdf_parser.h b/core/fpdfapi/parser/cpdf_parser.h
index 3151da61c9..f859db5d16 100644
--- a/core/fpdfapi/parser/cpdf_parser.h
+++ b/core/fpdfapi/parser/cpdf_parser.h
@@ -13,6 +13,7 @@
#include <set>
#include <vector>
+#include "core/fpdfapi/parser/cpdf_cross_ref_table.h"
#include "core/fpdfapi/parser/cpdf_syntax_parser.h"
#include "core/fxcrt/fx_string.h"
#include "core/fxcrt/fx_system.h"
@@ -113,25 +114,8 @@ class CPDF_Parser {
void SetLinearizedHeader(std::unique_ptr<CPDF_LinearizedHeader> pLinearized);
protected:
- enum class ObjectType : uint8_t {
- kFree = 0x00,
- kNotCompressed = 0x01,
- kCompressed = 0x02,
- kNull = 0xFF,
- };
-
- struct ObjectInfo {
- ObjectInfo() : pos(0), type(ObjectType::kFree), gennum(0) {}
- // if type is ObjectType::kCompressed the archive_obj_num should be used.
- // if type is ObjectType::kNotCompressed the pos should be used.
- // In other cases its are unused.
- union {
- FX_FILESIZE pos;
- FX_FILESIZE archive_obj_num;
- };
- ObjectType type;
- uint16_t gennum;
- };
+ using ObjectType = CPDF_CrossRefTable::ObjectType;
+ using ObjectInfo = CPDF_CrossRefTable::ObjectInfo;
std::unique_ptr<CPDF_SyntaxParser> m_pSyntax;
std::map<uint32_t, ObjectInfo> m_ObjectInfo;