summaryrefslogtreecommitdiff
path: root/core/fpdfapi/parser
diff options
context:
space:
mode:
Diffstat (limited to 'core/fpdfapi/parser')
-rw-r--r--core/fpdfapi/parser/cpdf_parser.cpp116
-rw-r--r--core/fpdfapi/parser/cpdf_parser.h15
2 files changed, 78 insertions, 53 deletions
diff --git a/core/fpdfapi/parser/cpdf_parser.cpp b/core/fpdfapi/parser/cpdf_parser.cpp
index 25d2ac28cf..3184d88943 100644
--- a/core/fpdfapi/parser/cpdf_parser.cpp
+++ b/core/fpdfapi/parser/cpdf_parser.cpp
@@ -67,14 +67,56 @@ CPDF_Parser::ObjectType GetObjectTypeFromCrossRefStreamType(
} // namespace
+class CPDF_Parser::TrailerData {
+ public:
+ TrailerData() {}
+ ~TrailerData() {}
+
+ CPDF_Dictionary* GetMainTrailer() const { return main_trailer_.get(); }
+
+ void SetMainTrailer(std::unique_ptr<CPDF_Dictionary> trailer) {
+ ASSERT(trailer);
+ main_trailer_ = std::move(trailer);
+ ApplyTrailer(main_trailer_.get());
+ }
+
+ void AppendTrailer(std::unique_ptr<CPDF_Dictionary> trailer) {
+ ASSERT(trailer);
+ ApplyTrailer(trailer.get());
+ }
+
+ void Clear() {
+ main_trailer_.reset();
+ last_info_obj_num_ = 0;
+ }
+
+ uint32_t GetInfoObjNum() const {
+ const CPDF_Reference* pRef = ToReference(
+ GetMainTrailer() ? GetMainTrailer()->GetObjectFor("Info") : nullptr);
+ return pRef ? pRef->GetRefObjNum() : last_info_obj_num_;
+ }
+
+ private:
+ void ApplyTrailer(const CPDF_Dictionary* dict) {
+ // The most recent Info object number contained in last added trailer.
+ // See PDF 1.7 spec, section 3.4.5 - Incremental Updates.
+ const auto* pRef = ToReference(dict->GetObjectFor("Info"));
+ if (pRef)
+ last_info_obj_num_ = pRef->GetRefObjNum();
+ }
+
+ std::unique_ptr<CPDF_Dictionary> main_trailer_;
+ uint32_t last_info_obj_num_ = 0;
+};
+
CPDF_Parser::CPDF_Parser()
: m_pSyntax(pdfium::MakeUnique<CPDF_SyntaxParser>()),
m_bHasParsed(false),
m_bXRefStream(false),
m_FileVersion(0),
m_pEncryptDict(nullptr),
- m_TrailerPos(CPDF_Parser::kInvalidPos),
- m_dwXrefStartObjNum(0) {}
+ m_TrailerData(pdfium::MakeUnique<TrailerData>()),
+ m_linearized_first_page_cross_ref_start_obj_num(0) {}
CPDF_Parser::~CPDF_Parser() {
ReleaseEncryptHandler();
@@ -344,8 +386,7 @@ bool CPDF_Parser::LoadAllCrossRefV4(FX_FILESIZE xrefpos) {
if (!trailer)
return false;
- m_Trailers.push_back(std::move(trailer));
- m_TrailerPos = m_Trailers.size() - 1;
+ m_TrailerData->SetMainTrailer(std::move(trailer));
int32_t xrefsize = GetDirectInteger(GetTrailer(), "Size");
if (xrefsize > 0 && xrefsize <= kMaxXRefSize)
ShrinkObjectMap(xrefsize);
@@ -381,7 +422,7 @@ bool CPDF_Parser::LoadAllCrossRefV4(FX_FILESIZE xrefpos) {
// SLOW ...
XRefStreamList.insert(XRefStreamList.begin(),
pDict->GetIntegerFor("XRefStm"));
- m_Trailers.push_back(std::move(pDict));
+ m_TrailerData->AppendTrailer(std::move(pDict));
}
for (size_t i = 0; i < CrossRefList.size(); ++i) {
@@ -402,8 +443,7 @@ bool CPDF_Parser::LoadLinearizedAllCrossRefV4(FX_FILESIZE xrefpos,
if (!trailer)
return false;
- m_Trailers.push_back(std::move(trailer));
- m_TrailerPos = m_Trailers.size() - 1;
+ m_TrailerData->SetMainTrailer(std::move(trailer));
int32_t xrefsize = GetDirectInteger(GetTrailer(), "Size");
if (xrefsize == 0)
return false;
@@ -437,7 +477,7 @@ bool CPDF_Parser::LoadLinearizedAllCrossRefV4(FX_FILESIZE xrefpos,
// SLOW ...
XRefStreamList.insert(XRefStreamList.begin(),
pDict->GetIntegerFor("XRefStm"));
- m_Trailers.push_back(std::move(pDict));
+ m_TrailerData->AppendTrailer(std::move(pDict));
}
for (size_t i = 1; i < CrossRefList.size(); ++i) {
@@ -543,8 +583,7 @@ bool CPDF_Parser::ParseAndAppendCrossRefSubsectionData(
return true;
}
-bool CPDF_Parser::ParseCrossRefV4(std::vector<CrossRefObjData>* out_objects,
- uint32_t* start_obj_num_at_last_block) {
+bool CPDF_Parser::ParseCrossRefV4(std::vector<CrossRefObjData>* out_objects) {
if (out_objects)
out_objects->clear();
@@ -567,8 +606,6 @@ bool CPDF_Parser::ParseCrossRefV4(std::vector<CrossRefObjData>* out_objects,
uint32_t start_objnum = FXSYS_atoui(word.c_str());
if (start_objnum >= kMaxObjectNumber)
return false;
- if (start_obj_num_at_last_block)
- *start_obj_num_at_last_block = start_objnum;
uint32_t count = m_pSyntax->GetDirectNum();
m_pSyntax->ToNextWord();
@@ -589,7 +626,7 @@ bool CPDF_Parser::LoadCrossRefV4(FX_FILESIZE pos,
bool bSkip) {
m_pSyntax->SetPos(pos);
std::vector<CrossRefObjData> objects;
- if (!ParseCrossRefV4(bSkip ? nullptr : &objects, &m_dwXrefStartObjNum))
+ if (!ParseCrossRefV4(bSkip ? nullptr : &objects))
return false;
MergeCrossRefObjectsData(objects);
@@ -625,8 +662,7 @@ bool CPDF_Parser::LoadAllCrossRefV5(FX_FILESIZE xrefpos) {
bool CPDF_Parser::RebuildCrossRef() {
m_ObjectInfo.clear();
- m_Trailers.clear();
- m_TrailerPos = CPDF_Parser::kInvalidPos;
+ m_TrailerData->Clear();
ParserState state = ParserState::kDefault;
int32_t inside_index = 0;
@@ -786,8 +822,8 @@ bool CPDF_Parser::RebuildCrossRef() {
CPDF_Object* pRoot = pDict->GetObjectFor("Root");
if (pRoot && pRoot->GetDict() &&
pRoot->GetDict()->GetObjectFor("Pages")) {
- m_Trailers.push_back(ToDictionary(pDict->Clone()));
- m_TrailerPos = m_Trailers.size() - 1;
+ m_TrailerData->SetMainTrailer(
+ ToDictionary(pDict->Clone()));
}
}
}
@@ -862,11 +898,9 @@ bool CPDF_Parser::RebuildCrossRef() {
}
}
} else {
- if (pObj->IsStream())
- m_Trailers.push_back(ToDictionary(pTrailer->Clone()));
- else
- m_Trailers.push_back(ToDictionary(std::move(pObj)));
- m_TrailerPos = m_Trailers.size() - 1;
+ m_TrailerData->SetMainTrailer(
+ ToDictionary(pObj->IsStream() ? pTrailer->Clone()
+ : std::move(pObj)));
FX_FILESIZE dwSavePos = m_pSyntax->GetPos();
CFX_ByteString strWord = m_pSyntax->GetKeyword();
@@ -1005,13 +1039,12 @@ bool CPDF_Parser::LoadCrossRefV5(FX_FILESIZE* pos, bool bMainXRef) {
std::unique_ptr<CPDF_Dictionary> pNewTrailer = ToDictionary(pDict->Clone());
if (bMainXRef) {
- m_Trailers.push_back(std::move(pNewTrailer));
- m_TrailerPos = m_Trailers.size() - 1;
+ m_TrailerData->SetMainTrailer(std::move(pNewTrailer));
ShrinkObjectMap(size);
for (auto& it : m_ObjectInfo)
it.second.type = ObjectType::kFree;
} else {
- m_Trailers.push_back(std::move(pNewTrailer));
+ m_TrailerData->AppendTrailer(std::move(pNewTrailer));
}
std::vector<std::pair<int32_t, int32_t>> arrIndex;
@@ -1059,7 +1092,6 @@ bool CPDF_Parser::LoadCrossRefV5(FX_FILESIZE* pos, bool bMainXRef) {
if (startnum < 0)
continue;
- m_dwXrefStartObjNum = pdfium::base::checked_cast<uint32_t>(startnum);
uint32_t count = pdfium::base::checked_cast<uint32_t>(arrIndex[i].second);
FX_SAFE_UINT32 dwCaculatedSize = segindex;
dwCaculatedSize += count;
@@ -1145,21 +1177,12 @@ uint32_t CPDF_Parser::GetRootObjNum() {
return pRef ? pRef->GetRefObjNum() : 0;
}
+CPDF_Dictionary* CPDF_Parser::GetTrailer() const {
+ return m_TrailerData->GetMainTrailer();
+}
+
uint32_t CPDF_Parser::GetInfoObjNum() {
- CPDF_Reference* pRef =
- ToReference(GetTrailer() ? GetTrailer()->GetObjectFor("Info") : nullptr);
- if (pRef)
- return pRef->GetRefObjNum();
-
- // Search trailers array from latest to earliest revision, as we want the
- // most recent Info object number.
- // See PDF 1.7 spec, section 3.4.5 - Incremental Updates.
- for (auto it = m_Trailers.rbegin(); it != m_Trailers.rend(); ++it) {
- pRef = ToReference(it->get()->GetObjectFor("Info"));
- if (pRef)
- return pRef->GetRefObjNum();
- }
- return 0;
+ return m_TrailerData->GetInfoObjNum();
}
std::unique_ptr<CPDF_Object> CPDF_Parser::ParseIndirectObject(
@@ -1378,14 +1401,14 @@ CPDF_Parser::Error CPDF_Parser::StartLinearizedParse(
bXRefRebuilt = true;
m_LastXRefOffset = 0;
}
-
+ m_linearized_first_page_cross_ref_start_obj_num =
+ m_ObjectInfo.empty() ? 0 : m_ObjectInfo.begin()->first;
if (bLoadV4) {
std::unique_ptr<CPDF_Dictionary> trailer = LoadTrailerV4();
if (!trailer)
return SUCCESS;
- m_Trailers.push_back(std::move(trailer));
- m_TrailerPos = m_Trailers.size() - 1;
+ m_TrailerData->SetMainTrailer(std::move(trailer));
int32_t xrefsize = GetDirectInteger(GetTrailer(), "Size");
if (xrefsize > 0)
ShrinkObjectMap(xrefsize);
@@ -1470,7 +1493,12 @@ CPDF_Parser::Error CPDF_Parser::LoadLinearizedMainXRefTable() {
m_ObjectStreamMap.clear();
m_ObjCache.clear();
- if (!LoadLinearizedAllCrossRefV4(m_LastXRefOffset, m_dwXrefStartObjNum) &&
+ // In linearized document, the main cross ref always should start from 0
+ // objnum.
+ // And should have count equals to first obj number of first page cross ref
+ // table.
+ if (!LoadLinearizedAllCrossRefV4(
+ m_LastXRefOffset, m_linearized_first_page_cross_ref_start_obj_num) &&
!LoadLinearizedAllCrossRefV5(m_LastXRefOffset)) {
m_LastXRefOffset = 0;
m_pSyntax->m_MetadataObjnum = dwSaveMetadataObjnum;
diff --git a/core/fpdfapi/parser/cpdf_parser.h b/core/fpdfapi/parser/cpdf_parser.h
index f636704162..437f214db6 100644
--- a/core/fpdfapi/parser/cpdf_parser.h
+++ b/core/fpdfapi/parser/cpdf_parser.h
@@ -61,10 +61,7 @@ class CPDF_Parser {
void SetPassword(const char* password) { m_Password = password; }
CFX_ByteString GetPassword() { return m_Password; }
- CPDF_Dictionary* GetTrailer() const {
- return m_TrailerPos == kInvalidPos ? nullptr
- : m_Trailers[m_TrailerPos].get();
- }
+ CPDF_Dictionary* GetTrailer() const;
FX_FILESIZE GetLastXRefOffset() const { return m_LastXRefOffset; }
uint32_t GetPermissions() const;
@@ -128,6 +125,8 @@ class CPDF_Parser {
private:
friend class CPDF_DataAvail;
+ class TrailerData;
+
enum class ParserState {
kDefault,
kComment,
@@ -176,8 +175,7 @@ class CPDF_Parser {
uint32_t start_objnum,
uint32_t count,
std::vector<CrossRefObjData>* out_objects);
- bool ParseCrossRefV4(std::vector<CrossRefObjData>* out_objects,
- uint32_t* start_obj_num_at_last_block);
+ bool ParseCrossRefV4(std::vector<CrossRefObjData>* out_objects);
void MergeCrossRefObjectsData(const std::vector<CrossRefObjData>& objects);
std::unique_ptr<CPDF_Object> ParseIndirectObjectAtInternal(
@@ -199,10 +197,9 @@ class CPDF_Parser {
FX_FILESIZE m_LastXRefOffset;
std::unique_ptr<CPDF_SecurityHandler> m_pSecurityHandler;
CFX_ByteString m_Password;
- std::vector<std::unique_ptr<CPDF_Dictionary>> m_Trailers;
- size_t m_TrailerPos;
+ std::unique_ptr<TrailerData> m_TrailerData;
std::unique_ptr<CPDF_LinearizedHeader> m_pLinearized;
- uint32_t m_dwXrefStartObjNum;
+ uint32_t m_linearized_first_page_cross_ref_start_obj_num;
// A map of object numbers to indirect streams.
std::map<uint32_t, CFX_RetainPtr<CPDF_StreamAcc>> m_ObjectStreamMap;