From 0dcf1f40652edd701d032227a742f6a63e6e3fae Mon Sep 17 00:00:00 2001 From: Henrique Nakashima Date: Thu, 21 Jun 2018 18:51:15 +0000 Subject: Do not save content stream if all page objects were removed from it. Bug: pdfium:1051 Change-Id: Ia990a47eeceb47fd2b15fe4ea7226861507484db Reviewed-on: https://pdfium-review.googlesource.com/35115 Reviewed-by: dsinclair Commit-Queue: Henrique Nakashima --- core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp | 11 +++-- core/fpdfapi/edit/cpdf_pagecontentmanager.cpp | 60 +++++++++++++++++++++++++ core/fpdfapi/edit/cpdf_pagecontentmanager.h | 11 +++++ 3 files changed, 78 insertions(+), 4 deletions(-) (limited to 'core/fpdfapi') diff --git a/core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp b/core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp index 80b121a572..da6c74e2c7 100644 --- a/core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp +++ b/core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp @@ -161,11 +161,14 @@ void CPDF_PageContentGenerator::UpdateContentStreams( page_content_manager.GetStreamByIndex(stream_index); ASSERT(old_stream); - // TODO(pdfium:1051): Remove streams that are now empty. If buf is empty, - // remove this instead of setting the data. - - old_stream->SetData(buf); + // If buf is now empty, remove the stream instead of setting the data. + if (buf->tellp() <= 0) + page_content_manager.ScheduleRemoveStreamByIndex(stream_index); + else + old_stream->SetData(buf); } + + page_content_manager.ExecuteScheduledRemovals(); } ByteString CPDF_PageContentGenerator::RealizeResource( diff --git a/core/fpdfapi/edit/cpdf_pagecontentmanager.cpp b/core/fpdfapi/edit/cpdf_pagecontentmanager.cpp index e9ade27bf8..46033bc0d1 100644 --- a/core/fpdfapi/edit/cpdf_pagecontentmanager.cpp +++ b/core/fpdfapi/edit/cpdf_pagecontentmanager.cpp @@ -4,6 +4,11 @@ #include "core/fpdfapi/edit/cpdf_pagecontentmanager.h" +#include +#include +#include + +#include "core/fpdfapi/page/cpdf_pageobject.h" #include "core/fpdfapi/parser/cpdf_array.h" #include "core/fpdfapi/parser/cpdf_dictionary.h" #include "core/fpdfapi/parser/cpdf_document.h" @@ -85,3 +90,58 @@ size_t CPDF_PageContentManager::AddStream(std::ostringstream* buf) { contents_stream_ = new_stream; return 0; } + +void CPDF_PageContentManager::ScheduleRemoveStreamByIndex(size_t stream_index) { + streams_to_remove_.insert(stream_index); +} + +void CPDF_PageContentManager::ExecuteScheduledRemovals() { + // This method assumes there are no dirty streams in the + // CPDF_PageObjectHolder. If there were any, their indexes would need to be + // updated. + // Since this is only called by CPDF_PageContentGenerator::GenerateContent(), + // which cleans up the dirty streams first, this should always be true. + ASSERT(obj_holder_->GetDirtyStreams()->empty()); + + if (contents_stream_) { + // Only stream that can be removed is 0. + if (streams_to_remove_.find(0) != streams_to_remove_.end()) { + CPDF_Dictionary* page_dict = obj_holder_->GetDict(); + page_dict->RemoveFor("Contents"); + contents_stream_ = nullptr; + } + } else if (contents_array_) { + // Initialize a vector with the old stream indexes. This will be used to + // build a map from the old to the new indexes. + std::vector streams_left(contents_array_->GetCount()); + std::iota(streams_left.begin(), streams_left.end(), 0); + + // In reverse order so as to not change the indexes in the middle of the + // loop, remove the streams. + for (auto it = streams_to_remove_.rbegin(); it != streams_to_remove_.rend(); + ++it) { + size_t stream_index = *it; + contents_array_->RemoveAt(stream_index); + streams_left.erase(streams_left.begin() + stream_index); + } + + // Create a mapping from the old to the new stream indexes, shifted due to + // the deletion of the |streams_to_remove_|. + std::map stream_index_mapping; + for (size_t i = 0; i < streams_left.size(); ++i) + stream_index_mapping[streams_left[i]] = i; + + // Update the page objects' content stream indexes. + for (const auto& obj : *obj_holder_->GetPageObjectList()) { + int32_t old_stream_index = obj->GetContentStream(); + size_t new_stream_index = stream_index_mapping[old_stream_index]; + obj->SetContentStream(new_stream_index); + } + + // Even if there is a single content stream now, keep the array with a + // single element. It's valid, a second stream might be added soon, and the + // complexity of removing it is not worth it. + } + + streams_to_remove_.clear(); +} diff --git a/core/fpdfapi/edit/cpdf_pagecontentmanager.h b/core/fpdfapi/edit/cpdf_pagecontentmanager.h index 384405b188..bad8c7a38c 100644 --- a/core/fpdfapi/edit/cpdf_pagecontentmanager.h +++ b/core/fpdfapi/edit/cpdf_pagecontentmanager.h @@ -5,6 +5,7 @@ #ifndef CORE_FPDFAPI_EDIT_CPDF_PAGECONTENTMANAGER_H_ #define CORE_FPDFAPI_EDIT_CPDF_PAGECONTENTMANAGER_H_ +#include #include #include "core/fxcrt/unowned_ptr.h" @@ -28,11 +29,21 @@ class CPDF_PageContentManager { // if Contents is not an array, but only a single stream. size_t AddStream(std::ostringstream* buf); + // Schedule the removal of the Content stream at a given index. It will be + // removed when ExecuteScheduledRemovals() is called. + void ScheduleRemoveStreamByIndex(size_t stream_index); + + // Remove all Content streams for which ScheduleRemoveStreamByIndex() was + // called. Update the content stream of all page objects with the shifted + // indexes. + void ExecuteScheduledRemovals(); + private: UnownedPtr const obj_holder_; UnownedPtr const doc_; UnownedPtr contents_array_; UnownedPtr contents_stream_; + std::set streams_to_remove_; }; #endif // CORE_FPDFAPI_EDIT_CPDF_PAGECONTENTMANAGER_H_ -- cgit v1.2.3