summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArtem Strygin <art-snake@yandex-team.ru>2018-06-26 16:01:38 +0000
committerChromium commit bot <commit-bot@chromium.org>2018-06-26 16:01:38 +0000
commite3d3ce04e585c4a6c5596056bdf1ced639c763d7 (patch)
tree55ba6fde26546f818d8ffadc879087ada34502ab
parent08b6819660a69cdc83bd133d1074da5813d9e414 (diff)
downloadpdfium-e3d3ce04e585c4a6c5596056bdf1ced639c763d7.tar.xz
Implement CPDF_ObjStream.
It is allow do not store raw objects streams within CPDF_Document for reduce memory usage. Change-Id: I4377bd5119d87314e76f14255171618cf6ee533d Reviewed-on: https://pdfium-review.googlesource.com/35430 Reviewed-by: dsinclair <dsinclair@chromium.org> Reviewed-by: Tom Sepez <tsepez@chromium.org> Commit-Queue: Art Snake <art-snake@yandex-team.ru>
-rw-r--r--BUILD.gn2
-rw-r--r--core/fpdfapi/parser/cpdf_object_stream.cpp136
-rw-r--r--core/fpdfapi/parser/cpdf_object_stream.h52
-rw-r--r--core/fpdfapi/parser/cpdf_parser.cpp70
-rw-r--r--core/fpdfapi/parser/cpdf_parser.h15
-rw-r--r--core/fpdfapi/parser/cpdf_syntax_parser.h2
6 files changed, 221 insertions, 56 deletions
diff --git a/BUILD.gn b/BUILD.gn
index 934601bfd1..938bfa9f9b 100644
--- a/BUILD.gn
+++ b/BUILD.gn
@@ -629,6 +629,8 @@ jumbo_static_library("fpdfapi") {
"core/fpdfapi/parser/cpdf_object.h",
"core/fpdfapi/parser/cpdf_object_avail.cpp",
"core/fpdfapi/parser/cpdf_object_avail.h",
+ "core/fpdfapi/parser/cpdf_object_stream.cpp",
+ "core/fpdfapi/parser/cpdf_object_stream.h",
"core/fpdfapi/parser/cpdf_object_walker.cpp",
"core/fpdfapi/parser/cpdf_object_walker.h",
"core/fpdfapi/parser/cpdf_page_object_avail.cpp",
diff --git a/core/fpdfapi/parser/cpdf_object_stream.cpp b/core/fpdfapi/parser/cpdf_object_stream.cpp
new file mode 100644
index 0000000000..779fbffd83
--- /dev/null
+++ b/core/fpdfapi/parser/cpdf_object_stream.cpp
@@ -0,0 +1,136 @@
+// Copyright 2018 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "core/fpdfapi/parser/cpdf_object_stream.h"
+
+#include <utility>
+
+#include "core/fpdfapi/parser/cpdf_dictionary.h"
+#include "core/fpdfapi/parser/cpdf_number.h"
+#include "core/fpdfapi/parser/cpdf_parser.h"
+#include "core/fpdfapi/parser/cpdf_reference.h"
+#include "core/fpdfapi/parser/cpdf_stream.h"
+#include "core/fpdfapi/parser/cpdf_stream_acc.h"
+#include "core/fxcrt/cfx_memorystream.h"
+#include "third_party/base/stl_util.h"
+
+// static
+bool CPDF_ObjectStream::IsObjectsStreamObject(const CPDF_Object* object) {
+ const CPDF_Stream* stream = ToStream(object);
+ if (!stream)
+ return false;
+
+ const CPDF_Dictionary* stream_dict = stream->GetDict();
+ if (!stream_dict)
+ return false;
+
+ if (stream_dict->GetStringFor("Type") != "ObjStm")
+ return false;
+
+ const CPDF_Number* number_of_objects =
+ ToNumber(stream_dict->GetObjectFor("N"));
+ if (!number_of_objects || !number_of_objects->IsInteger() ||
+ number_of_objects->GetInteger() < 0 ||
+ number_of_objects->GetInteger() >=
+ static_cast<int>(CPDF_Parser::kMaxObjectNumber)) {
+ return false;
+ }
+
+ const CPDF_Number* first_object_offset =
+ ToNumber(stream_dict->GetObjectFor("First"));
+ if (!first_object_offset || !first_object_offset->IsInteger() ||
+ first_object_offset->GetInteger() < 0) {
+ return false;
+ }
+
+ return true;
+}
+
+// static
+std::unique_ptr<CPDF_ObjectStream> CPDF_ObjectStream::Create(
+ const CPDF_Stream* stream) {
+ if (!IsObjectsStreamObject(stream))
+ return nullptr;
+ // The ctor of CPDF_ObjectStream is protected. Use WrapUnique instead
+ // MakeUnique.
+ return pdfium::WrapUnique(new CPDF_ObjectStream(stream));
+}
+
+CPDF_ObjectStream::CPDF_ObjectStream(const CPDF_Stream* obj_stream)
+ : obj_num_(obj_stream->GetObjNum()),
+ first_object_offset_(obj_stream->GetDict()->GetIntegerFor("First")) {
+ DCHECK(IsObjectsStreamObject(obj_stream));
+ if (const auto* extends_ref =
+ ToReference(obj_stream->GetDict()->GetObjectFor("Extends"))) {
+ extends_obj_num_ = extends_ref->GetRefObjNum();
+ }
+ Init(obj_stream);
+}
+
+CPDF_ObjectStream::~CPDF_ObjectStream() = default;
+
+bool CPDF_ObjectStream::HasObject(uint32_t obj_number) const {
+ return pdfium::ContainsKey(objects_offsets_, obj_number);
+}
+
+std::unique_ptr<CPDF_Object> CPDF_ObjectStream::ParseObject(
+ CPDF_IndirectObjectHolder* pObjList,
+ uint32_t obj_number) const {
+ const auto it = objects_offsets_.find(obj_number);
+ if (it == objects_offsets_.end())
+ return nullptr;
+
+ std::unique_ptr<CPDF_Object> result =
+ ParseObjectAtOffset(pObjList, it->second);
+ if (!result)
+ return nullptr;
+
+ result->SetObjNum(obj_number);
+ return result;
+}
+
+void CPDF_ObjectStream::Init(const CPDF_Stream* stream) {
+ {
+ auto stream_acc = pdfium::MakeRetain<CPDF_StreamAcc>(stream);
+ stream_acc->LoadAllDataFiltered();
+ const uint32_t data_size = stream_acc->GetSize();
+ data_stream_ = pdfium::MakeRetain<CFX_MemoryStream>(
+ stream_acc->DetachData().release(), static_cast<size_t>(data_size),
+ true);
+ }
+
+ CPDF_SyntaxParser syntax;
+ syntax.InitParser(data_stream_, 0);
+
+ const int object_count = stream->GetDict()->GetIntegerFor("N");
+ for (int32_t i = object_count; i > 0; --i) {
+ if (syntax.GetPos() >= data_stream_->GetSize())
+ break;
+
+ const uint32_t obj_num = syntax.GetDirectNum();
+ const uint32_t obj_offset = syntax.GetDirectNum();
+ if (!obj_num)
+ continue;
+
+ objects_offsets_[obj_num] = obj_offset;
+ }
+}
+
+std::unique_ptr<CPDF_Object> CPDF_ObjectStream::ParseObjectAtOffset(
+ CPDF_IndirectObjectHolder* pObjList,
+ uint32_t object_offset) const {
+ FX_SAFE_FILESIZE offset_in_stream = first_object_offset_;
+ offset_in_stream += object_offset;
+
+ if (!offset_in_stream.IsValid())
+ return nullptr;
+
+ if (offset_in_stream.ValueOrDie() >= data_stream_->GetSize())
+ return nullptr;
+
+ CPDF_SyntaxParser syntax;
+ syntax.InitParser(data_stream_, 0);
+ syntax.SetPos(offset_in_stream.ValueOrDie());
+ return syntax.GetObjectBody(pObjList);
+}
diff --git a/core/fpdfapi/parser/cpdf_object_stream.h b/core/fpdfapi/parser/cpdf_object_stream.h
new file mode 100644
index 0000000000..816c1be77e
--- /dev/null
+++ b/core/fpdfapi/parser/cpdf_object_stream.h
@@ -0,0 +1,52 @@
+// Copyright 2018 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CORE_FPDFAPI_PARSER_CPDF_OBJECT_STREAM_H_
+#define CORE_FPDFAPI_PARSER_CPDF_OBJECT_STREAM_H_
+
+#include <map>
+#include <memory>
+#include <vector>
+
+#include "core/fpdfapi/parser/cpdf_object.h"
+#include "core/fxcrt/retain_ptr.h"
+
+class CPDF_IndirectObjectHolder;
+class CPDF_Stream;
+class CPDF_StreamAcc;
+class IFX_SeekableReadStream;
+
+// Implementation of logic of PDF "Object Streams".
+// See "PDF 32000-1:2008" Spec. section 7.5.7.
+class CPDF_ObjectStream {
+ public:
+ static bool IsObjectsStreamObject(const CPDF_Object* object);
+
+ static std::unique_ptr<CPDF_ObjectStream> Create(const CPDF_Stream* stream);
+
+ ~CPDF_ObjectStream();
+
+ uint32_t obj_num() const { return obj_num_; }
+ uint32_t extends_obj_num() const { return extends_obj_num_; }
+
+ bool HasObject(uint32_t obj_number) const;
+ std::unique_ptr<CPDF_Object> ParseObject(CPDF_IndirectObjectHolder* pObjList,
+ uint32_t obj_number) const;
+
+ protected:
+ explicit CPDF_ObjectStream(const CPDF_Stream* stream);
+
+ void Init(const CPDF_Stream* stream);
+ std::unique_ptr<CPDF_Object> ParseObjectAtOffset(
+ CPDF_IndirectObjectHolder* pObjList,
+ uint32_t object_offset) const;
+
+ uint32_t obj_num_ = CPDF_Object::kInvalidObjNum;
+ uint32_t extends_obj_num_ = CPDF_Object::kInvalidObjNum;
+ RetainPtr<IFX_SeekableReadStream> data_stream_;
+ int first_object_offset_ = 0;
+ std::map<uint32_t, uint32_t> objects_offsets_;
+};
+
+#endif // CORE_FPDFAPI_PARSER_CPDF_OBJECT_STREAM_H_
diff --git a/core/fpdfapi/parser/cpdf_parser.cpp b/core/fpdfapi/parser/cpdf_parser.cpp
index 73b4bcdc0c..6cdd6ab789 100644
--- a/core/fpdfapi/parser/cpdf_parser.cpp
+++ b/core/fpdfapi/parser/cpdf_parser.cpp
@@ -16,6 +16,7 @@
#include "core/fpdfapi/parser/cpdf_document.h"
#include "core/fpdfapi/parser/cpdf_linearized_header.h"
#include "core/fpdfapi/parser/cpdf_number.h"
+#include "core/fpdfapi/parser/cpdf_object_stream.h"
#include "core/fpdfapi/parser/cpdf_read_validator.h"
#include "core/fpdfapi/parser/cpdf_reference.h"
#include "core/fpdfapi/parser/cpdf_security_handler.h"
@@ -46,14 +47,6 @@ uint32_t GetVarInt(const uint8_t* p, int32_t n) {
return result;
}
-int32_t GetStreamNCount(const RetainPtr<CPDF_StreamAcc>& pObjStream) {
- return pObjStream->GetDict()->GetIntegerFor("N");
-}
-
-int32_t GetStreamFirst(const RetainPtr<CPDF_StreamAcc>& pObjStream) {
- return pObjStream->GetDict()->GetIntegerFor("First");
-}
-
} // namespace
class CPDF_Parser::TrailerData {
@@ -1198,8 +1191,7 @@ std::unique_ptr<CPDF_Object> CPDF_Parser::ParseIndirectObject(
return nullptr;
pdfium::ScopedSetInsertion<uint32_t> local_insert(&m_ParsingObjNums, objnum);
- if (GetObjectType(objnum) == ObjectType::kNotCompressed ||
- GetObjectType(objnum) == ObjectType::kNull) {
+ if (GetObjectType(objnum) == ObjectType::kNotCompressed) {
FX_FILESIZE pos = m_ObjectInfo[objnum].pos;
if (pos <= 0)
return nullptr;
@@ -1208,52 +1200,43 @@ std::unique_ptr<CPDF_Object> CPDF_Parser::ParseIndirectObject(
if (GetObjectType(objnum) != ObjectType::kCompressed)
return nullptr;
- RetainPtr<CPDF_StreamAcc> pObjStream =
- GetObjectStream(m_ObjectInfo[objnum].pos);
+ const CPDF_ObjectStream* pObjStream =
+ GetObjectStream(pObjList, m_ObjectInfo[objnum].pos);
if (!pObjStream)
return nullptr;
- auto file = pdfium::MakeRetain<CFX_MemoryStream>(
- const_cast<uint8_t*>(pObjStream->GetData()),
- static_cast<size_t>(pObjStream->GetSize()), false);
- CPDF_SyntaxParser syntax;
- syntax.InitParser(file, 0);
- const int32_t offset = GetStreamFirst(pObjStream);
-
- // Read object numbers from |pObjStream| into a cache.
- if (!pdfium::ContainsKey(m_ObjCache, pObjStream)) {
- for (int32_t i = GetStreamNCount(pObjStream); i > 0; --i) {
- uint32_t thisnum = syntax.GetDirectNum();
- uint32_t thisoff = syntax.GetDirectNum();
- m_ObjCache[pObjStream][thisnum] = thisoff;
- }
- }
+ return pObjStream->ParseObject(pObjList, objnum);
+}
- const auto it = m_ObjCache[pObjStream].find(objnum);
- if (it == m_ObjCache[pObjStream].end())
+const CPDF_ObjectStream* CPDF_Parser::GetObjectStream(
+ CPDF_IndirectObjectHolder* pObjList,
+ uint32_t object_number) {
+ // Prevent circular parsing the same object.
+ if (pdfium::ContainsKey(m_ParsingObjNums, object_number))
return nullptr;
- syntax.SetPos(offset + it->second);
- return syntax.GetObjectBody(pObjList);
-}
+ pdfium::ScopedSetInsertion<uint32_t> local_insert(&m_ParsingObjNums,
+ object_number);
-RetainPtr<CPDF_StreamAcc> CPDF_Parser::GetObjectStream(uint32_t objnum) {
- auto it = m_ObjectStreamMap.find(objnum);
+ auto it = m_ObjectStreamMap.find(object_number);
if (it != m_ObjectStreamMap.end())
- return it->second;
+ return it->second.get();
- if (!m_pDocument)
+ const FX_FILESIZE object_pos = GetObjectPositionOrZero(object_number);
+ if (object_pos <= 0)
return nullptr;
- const CPDF_Stream* pStream =
- ToStream(m_pDocument->GetOrParseIndirectObject(objnum));
- if (!pStream)
+ std::unique_ptr<CPDF_Object> object =
+ ParseIndirectObjectAt(pObjList, object_pos, object_number);
+ if (!object)
return nullptr;
- auto pStreamAcc = pdfium::MakeRetain<CPDF_StreamAcc>(pStream);
- pStreamAcc->LoadAllDataFiltered();
- m_ObjectStreamMap[objnum] = pStreamAcc;
- return pStreamAcc;
+ std::unique_ptr<CPDF_ObjectStream> objs_stream =
+ CPDF_ObjectStream::Create(ToStream(object.get()));
+ const CPDF_ObjectStream* result = objs_stream.get();
+ m_ObjectStreamMap[object_number] = std::move(objs_stream);
+
+ return result;
}
std::unique_ptr<CPDF_Object> CPDF_Parser::ParseIndirectObjectAt(
@@ -1437,7 +1420,6 @@ CPDF_Parser::Error CPDF_Parser::LoadLinearizedMainXRefTable() {
const AutoRestorer<uint32_t> save_metadata_objnum(&m_MetadataObjnum);
m_MetadataObjnum = 0;
m_ObjectStreamMap.clear();
- m_ObjCache.clear();
if (!LoadLinearizedAllCrossRefV4(main_xref_offset.ValueOrDie()) &&
!LoadLinearizedAllCrossRefV5(main_xref_offset.ValueOrDie())) {
diff --git a/core/fpdfapi/parser/cpdf_parser.h b/core/fpdfapi/parser/cpdf_parser.h
index 5f0a4a1cf3..705fad9bdb 100644
--- a/core/fpdfapi/parser/cpdf_parser.h
+++ b/core/fpdfapi/parser/cpdf_parser.h
@@ -26,9 +26,9 @@ class CPDF_Document;
class CPDF_IndirectObjectHolder;
class CPDF_LinearizedHeader;
class CPDF_Object;
+class CPDF_ObjectStream;
class CPDF_ReadValidator;
class CPDF_SecurityHandler;
-class CPDF_StreamAcc;
class CPDF_SyntaxParser;
class IFX_SeekableReadStream;
@@ -172,7 +172,8 @@ class CPDF_Parser {
bool LoadLinearizedAllCrossRefV4(FX_FILESIZE pos);
bool LoadLinearizedAllCrossRefV5(FX_FILESIZE pos);
Error LoadLinearizedMainXRefTable();
- RetainPtr<CPDF_StreamAcc> GetObjectStream(uint32_t number);
+ const CPDF_ObjectStream* GetObjectStream(CPDF_IndirectObjectHolder* pObjList,
+ uint32_t object_number);
std::unique_ptr<CPDF_LinearizedHeader> ParseLinearizedHeader();
void SetEncryptDictionary(CPDF_Dictionary* pDict);
void ShrinkObjectMap(uint32_t size);
@@ -218,15 +219,7 @@ class CPDF_Parser {
std::unique_ptr<CPDF_LinearizedHeader> m_pLinearized;
// A map of object numbers to indirect streams.
- std::map<uint32_t, RetainPtr<CPDF_StreamAcc>> m_ObjectStreamMap;
-
- // Mapping of object numbers to offsets. The offsets are relative to the first
- // object in the stream.
- using StreamObjectCache = std::map<uint32_t, uint32_t>;
-
- // Mapping of streams to their object caches. This is valid as long as the
- // streams in |m_ObjectStreamMap| are valid.
- std::map<RetainPtr<CPDF_StreamAcc>, StreamObjectCache> m_ObjCache;
+ std::map<uint32_t, std::unique_ptr<CPDF_ObjectStream>> m_ObjectStreamMap;
// All indirect object numbers that are being parsed.
std::set<uint32_t> m_ParsingObjNums;
diff --git a/core/fpdfapi/parser/cpdf_syntax_parser.h b/core/fpdfapi/parser/cpdf_syntax_parser.h
index 5aad2e463c..ed760934b9 100644
--- a/core/fpdfapi/parser/cpdf_syntax_parser.h
+++ b/core/fpdfapi/parser/cpdf_syntax_parser.h
@@ -60,6 +60,7 @@ class CPDF_SyntaxParser {
const RetainPtr<CPDF_ReadValidator>& GetValidator() const {
return m_pFileAccess;
}
+ uint32_t GetDirectNum();
private:
friend class CPDF_Parser;
@@ -69,7 +70,6 @@ class CPDF_SyntaxParser {
static const int kParserMaxRecursionDepth = 64;
static int s_CurrentRecursionDepth;
- uint32_t GetDirectNum();
bool ReadBlockAt(FX_FILESIZE read_pos);
bool GetNextChar(uint8_t& ch);
bool GetCharAtBackward(FX_FILESIZE pos, uint8_t* ch);