From 0dcf1f40652edd701d032227a742f6a63e6e3fae Mon Sep 17 00:00:00 2001 From: Henrique Nakashima Date: Thu, 21 Jun 2018 18:51:15 +0000 Subject: Do not save content stream if all page objects were removed from it. Bug: pdfium:1051 Change-Id: Ia990a47eeceb47fd2b15fe4ea7226861507484db Reviewed-on: https://pdfium-review.googlesource.com/35115 Reviewed-by: dsinclair Commit-Queue: Henrique Nakashima --- core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp | 11 +- core/fpdfapi/edit/cpdf_pagecontentmanager.cpp | 60 +++++ core/fpdfapi/edit/cpdf_pagecontentmanager.h | 11 + fpdfsdk/fpdf_edit_embeddertest.cpp | 344 +++++++++++++++++++++++- 4 files changed, 420 insertions(+), 6 deletions(-) diff --git a/core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp b/core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp index 80b121a572..da6c74e2c7 100644 --- a/core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp +++ b/core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp @@ -161,11 +161,14 @@ void CPDF_PageContentGenerator::UpdateContentStreams( page_content_manager.GetStreamByIndex(stream_index); ASSERT(old_stream); - // TODO(pdfium:1051): Remove streams that are now empty. If buf is empty, - // remove this instead of setting the data. - - old_stream->SetData(buf); + // If buf is now empty, remove the stream instead of setting the data. + if (buf->tellp() <= 0) + page_content_manager.ScheduleRemoveStreamByIndex(stream_index); + else + old_stream->SetData(buf); } + + page_content_manager.ExecuteScheduledRemovals(); } ByteString CPDF_PageContentGenerator::RealizeResource( diff --git a/core/fpdfapi/edit/cpdf_pagecontentmanager.cpp b/core/fpdfapi/edit/cpdf_pagecontentmanager.cpp index e9ade27bf8..46033bc0d1 100644 --- a/core/fpdfapi/edit/cpdf_pagecontentmanager.cpp +++ b/core/fpdfapi/edit/cpdf_pagecontentmanager.cpp @@ -4,6 +4,11 @@ #include "core/fpdfapi/edit/cpdf_pagecontentmanager.h" +#include +#include +#include + +#include "core/fpdfapi/page/cpdf_pageobject.h" #include "core/fpdfapi/parser/cpdf_array.h" #include "core/fpdfapi/parser/cpdf_dictionary.h" #include "core/fpdfapi/parser/cpdf_document.h" @@ -85,3 +90,58 @@ size_t CPDF_PageContentManager::AddStream(std::ostringstream* buf) { contents_stream_ = new_stream; return 0; } + +void CPDF_PageContentManager::ScheduleRemoveStreamByIndex(size_t stream_index) { + streams_to_remove_.insert(stream_index); +} + +void CPDF_PageContentManager::ExecuteScheduledRemovals() { + // This method assumes there are no dirty streams in the + // CPDF_PageObjectHolder. If there were any, their indexes would need to be + // updated. + // Since this is only called by CPDF_PageContentGenerator::GenerateContent(), + // which cleans up the dirty streams first, this should always be true. + ASSERT(obj_holder_->GetDirtyStreams()->empty()); + + if (contents_stream_) { + // Only stream that can be removed is 0. + if (streams_to_remove_.find(0) != streams_to_remove_.end()) { + CPDF_Dictionary* page_dict = obj_holder_->GetDict(); + page_dict->RemoveFor("Contents"); + contents_stream_ = nullptr; + } + } else if (contents_array_) { + // Initialize a vector with the old stream indexes. This will be used to + // build a map from the old to the new indexes. + std::vector streams_left(contents_array_->GetCount()); + std::iota(streams_left.begin(), streams_left.end(), 0); + + // In reverse order so as to not change the indexes in the middle of the + // loop, remove the streams. + for (auto it = streams_to_remove_.rbegin(); it != streams_to_remove_.rend(); + ++it) { + size_t stream_index = *it; + contents_array_->RemoveAt(stream_index); + streams_left.erase(streams_left.begin() + stream_index); + } + + // Create a mapping from the old to the new stream indexes, shifted due to + // the deletion of the |streams_to_remove_|. + std::map stream_index_mapping; + for (size_t i = 0; i < streams_left.size(); ++i) + stream_index_mapping[streams_left[i]] = i; + + // Update the page objects' content stream indexes. + for (const auto& obj : *obj_holder_->GetPageObjectList()) { + int32_t old_stream_index = obj->GetContentStream(); + size_t new_stream_index = stream_index_mapping[old_stream_index]; + obj->SetContentStream(new_stream_index); + } + + // Even if there is a single content stream now, keep the array with a + // single element. It's valid, a second stream might be added soon, and the + // complexity of removing it is not worth it. + } + + streams_to_remove_.clear(); +} diff --git a/core/fpdfapi/edit/cpdf_pagecontentmanager.h b/core/fpdfapi/edit/cpdf_pagecontentmanager.h index 384405b188..bad8c7a38c 100644 --- a/core/fpdfapi/edit/cpdf_pagecontentmanager.h +++ b/core/fpdfapi/edit/cpdf_pagecontentmanager.h @@ -5,6 +5,7 @@ #ifndef CORE_FPDFAPI_EDIT_CPDF_PAGECONTENTMANAGER_H_ #define CORE_FPDFAPI_EDIT_CPDF_PAGECONTENTMANAGER_H_ +#include #include #include "core/fxcrt/unowned_ptr.h" @@ -28,11 +29,21 @@ class CPDF_PageContentManager { // if Contents is not an array, but only a single stream. size_t AddStream(std::ostringstream* buf); + // Schedule the removal of the Content stream at a given index. It will be + // removed when ExecuteScheduledRemovals() is called. + void ScheduleRemoveStreamByIndex(size_t stream_index); + + // Remove all Content streams for which ScheduleRemoveStreamByIndex() was + // called. Update the content stream of all page objects with the shifted + // indexes. + void ExecuteScheduledRemovals(); + private: UnownedPtr const obj_holder_; UnownedPtr const doc_; UnownedPtr contents_array_; UnownedPtr contents_stream_; + std::set streams_to_remove_; }; #endif // CORE_FPDFAPI_EDIT_CPDF_PAGECONTENTMANAGER_H_ diff --git a/fpdfsdk/fpdf_edit_embeddertest.cpp b/fpdfsdk/fpdf_edit_embeddertest.cpp index c78700ea67..f2690499d1 100644 --- a/fpdfsdk/fpdf_edit_embeddertest.cpp +++ b/fpdfsdk/fpdf_edit_embeddertest.cpp @@ -748,8 +748,6 @@ TEST_F(FPDFEditEmbeddertest, RemoveExistingPageObjectSplitStreamsLonely) { CloseSavedDocument(); } -// TODO(pdfium:1051): Extend this test to remove some elements and verify -// saving works. TEST_F(FPDFEditEmbeddertest, GetContentStream) { // Load document with some text split across streams. EXPECT_TRUE(OpenDocument("split_streams.pdf")); @@ -776,6 +774,348 @@ TEST_F(FPDFEditEmbeddertest, GetContentStream) { UnloadPage(page); } +TEST_F(FPDFEditEmbeddertest, RemoveAllFromStream) { + // Load document with some text split across streams. + EXPECT_TRUE(OpenDocument("split_streams.pdf")); + FPDF_PAGE page = LoadPage(0); + ASSERT_TRUE(page); + + // Content stream 0: page objects 0-14. + // Content stream 1: page objects 15-17. + // Content stream 2: page object 18. + ASSERT_EQ(19, FPDFPage_CountObjects(page)); + + // Loop backwards because objects will being removed, which shifts the indexes + // after the removed position. + for (int i = 18; i >= 0; i--) { + FPDF_PAGEOBJECT page_object = FPDFPage_GetObject(page, i); + ASSERT_TRUE(page_object); + CPDF_PageObject* cpdf_page_object = + CPDFPageObjectFromFPDFPageObject(page_object); + + // Empty content stream 1. + if (cpdf_page_object->GetContentStream() == 1) { + EXPECT_TRUE(FPDFPage_RemoveObject(page, page_object)); + FPDFPageObj_Destroy(page_object); + } + } + + // Content stream 0: page objects 0-14. + // Content stream 2: page object 15. + ASSERT_EQ(16, FPDFPage_CountObjects(page)); + for (int i = 0; i < 16; i++) { + FPDF_PAGEOBJECT page_object = FPDFPage_GetObject(page, i); + ASSERT_TRUE(page_object); + CPDF_PageObject* cpdf_page_object = + CPDFPageObjectFromFPDFPageObject(page_object); + if (i < 15) + EXPECT_EQ(0, cpdf_page_object->GetContentStream()) << i; + else + EXPECT_EQ(2, cpdf_page_object->GetContentStream()) << i; + } + + // Generate contents should remove the empty stream and update the page + // objects' contents stream indexes. + EXPECT_TRUE(FPDFPage_GenerateContent(page)); + + // Content stream 0: page objects 0-14. + // Content stream 1: page object 15. + ASSERT_EQ(16, FPDFPage_CountObjects(page)); + for (int i = 0; i < 16; i++) { + FPDF_PAGEOBJECT page_object = FPDFPage_GetObject(page, i); + ASSERT_TRUE(page_object); + CPDF_PageObject* cpdf_page_object = + CPDFPageObjectFromFPDFPageObject(page_object); + if (i < 15) + EXPECT_EQ(0, cpdf_page_object->GetContentStream()) << i; + else + EXPECT_EQ(1, cpdf_page_object->GetContentStream()) << i; + } + +#if _FX_PLATFORM_ == _FX_PLATFORM_APPLE_ + const char kStream1RemovedMD5[] = "d2e21fbd5a6de563f619feeeb6163331"; +#elif _FX_PLATFORM_ == _FX_PLATFORM_WINDOWS_ + const char kStream1RemovedMD5[] = "b4140f203523e38793283a5943d8075b"; +#else + const char kStream1RemovedMD5[] = "e86a3efc160ede6cfcb1f59bcacf1105"; +#endif + { + ScopedFPDFBitmap page_bitmap = RenderPageWithFlags(page, nullptr, 0); + CompareBitmap(page_bitmap.get(), 200, 200, kStream1RemovedMD5); + } + + // Save the file + EXPECT_TRUE(FPDF_SaveAsCopy(document(), this, 0)); + UnloadPage(page); + + // Re-open the file and check the page object count is still 16, and that + // content stream 1 was removed. + OpenSavedDocument(); + FPDF_PAGE saved_page = LoadSavedPage(0); + + // Content stream 0: page objects 0-14. + // Content stream 1: page object 15. + EXPECT_EQ(16, FPDFPage_CountObjects(saved_page)); + for (int i = 0; i < 16; i++) { + FPDF_PAGEOBJECT page_object = FPDFPage_GetObject(saved_page, i); + ASSERT_TRUE(page_object); + CPDF_PageObject* cpdf_page_object = + CPDFPageObjectFromFPDFPageObject(page_object); + if (i < 15) + EXPECT_EQ(0, cpdf_page_object->GetContentStream()) << i; + else + EXPECT_EQ(1, cpdf_page_object->GetContentStream()) << i; + } + + { + ScopedFPDFBitmap page_bitmap = RenderPageWithFlags(saved_page, nullptr, 0); + CompareBitmap(page_bitmap.get(), 200, 200, kStream1RemovedMD5); + } + + CloseSavedPage(saved_page); + CloseSavedDocument(); +} + +TEST_F(FPDFEditEmbeddertest, RemoveAllFromSingleStream) { + // Load document with a single stream. + EXPECT_TRUE(OpenDocument("hello_world.pdf")); + FPDF_PAGE page = LoadPage(0); + ASSERT_TRUE(page); + + // Content stream 0: page objects 0-1. + ASSERT_EQ(2, FPDFPage_CountObjects(page)); + + // Loop backwards because objects will being removed, which shifts the indexes + // after the removed position. + for (int i = 1; i >= 0; i--) { + FPDF_PAGEOBJECT page_object = FPDFPage_GetObject(page, i); + ASSERT_TRUE(page_object); + CPDF_PageObject* cpdf_page_object = + CPDFPageObjectFromFPDFPageObject(page_object); + ASSERT_EQ(0, cpdf_page_object->GetContentStream()); + ASSERT_TRUE(FPDFPage_RemoveObject(page, page_object)); + FPDFPageObj_Destroy(page_object); + } + + // No more objects in the stream + ASSERT_EQ(0, FPDFPage_CountObjects(page)); + + // Generate contents should remove the empty stream and update the page + // objects' contents stream indexes. + EXPECT_TRUE(FPDFPage_GenerateContent(page)); + + ASSERT_EQ(0, FPDFPage_CountObjects(page)); + + const char kAllRemovedMD5[] = "eee4600ac08b458ac7ac2320e225674c"; + { + ScopedFPDFBitmap page_bitmap = RenderPageWithFlags(page, nullptr, 0); + CompareBitmap(page_bitmap.get(), 200, 200, kAllRemovedMD5); + } + + // Save the file + EXPECT_TRUE(FPDF_SaveAsCopy(document(), this, 0)); + UnloadPage(page); + + // Re-open the file and check the page object count is still 0. + OpenSavedDocument(); + FPDF_PAGE saved_page = LoadSavedPage(0); + + EXPECT_EQ(0, FPDFPage_CountObjects(saved_page)); + { + ScopedFPDFBitmap page_bitmap = RenderPageWithFlags(saved_page, nullptr, 0); + CompareBitmap(page_bitmap.get(), 200, 200, kAllRemovedMD5); + } + + CloseSavedPage(saved_page); + CloseSavedDocument(); +} + +TEST_F(FPDFEditEmbeddertest, RemoveFirstFromSingleStream) { + // Load document with a single stream. + EXPECT_TRUE(OpenDocument("hello_world.pdf")); + FPDF_PAGE page = LoadPage(0); + ASSERT_TRUE(page); + + // Content stream 0: page objects 0-1. + ASSERT_EQ(2, FPDFPage_CountObjects(page)); + + // Remove first object. + FPDF_PAGEOBJECT page_object = FPDFPage_GetObject(page, 0); + ASSERT_TRUE(page_object); + CPDF_PageObject* cpdf_page_object = + CPDFPageObjectFromFPDFPageObject(page_object); + ASSERT_EQ(0, cpdf_page_object->GetContentStream()); + ASSERT_TRUE(FPDFPage_RemoveObject(page, page_object)); + FPDFPageObj_Destroy(page_object); + + // One object left in the stream. + ASSERT_EQ(1, FPDFPage_CountObjects(page)); + page_object = FPDFPage_GetObject(page, 0); + ASSERT_TRUE(page_object); + cpdf_page_object = CPDFPageObjectFromFPDFPageObject(page_object); + ASSERT_EQ(0, cpdf_page_object->GetContentStream()); + + EXPECT_TRUE(FPDFPage_GenerateContent(page)); + + // Still one object left in the stream. + ASSERT_EQ(1, FPDFPage_CountObjects(page)); + page_object = FPDFPage_GetObject(page, 0); + ASSERT_TRUE(page_object); + cpdf_page_object = CPDFPageObjectFromFPDFPageObject(page_object); + ASSERT_EQ(0, cpdf_page_object->GetContentStream()); + +#if _FX_PLATFORM_ == _FX_PLATFORM_APPLE_ + const char kFirstRemovedMD5[] = "af760c4702467cb1492a57fb8215efaa"; +#elif _FX_PLATFORM_ == _FX_PLATFORM_WINDOWS_ + const char kFirstRemovedMD5[] = "72be917349bf7004a5c39661fe1fc433"; +#else + const char kFirstRemovedMD5[] = "b76df015fe88009c3c342395df96abf1"; +#endif + { + ScopedFPDFBitmap page_bitmap = RenderPageWithFlags(page, nullptr, 0); + CompareBitmap(page_bitmap.get(), 200, 200, kFirstRemovedMD5); + } + + // Save the file + EXPECT_TRUE(FPDF_SaveAsCopy(document(), this, 0)); + UnloadPage(page); + + // Re-open the file and check the page object count is still 0. + OpenSavedDocument(); + FPDF_PAGE saved_page = LoadSavedPage(0); + + ASSERT_EQ(1, FPDFPage_CountObjects(saved_page)); + page_object = FPDFPage_GetObject(saved_page, 0); + ASSERT_TRUE(page_object); + cpdf_page_object = CPDFPageObjectFromFPDFPageObject(page_object); + ASSERT_EQ(0, cpdf_page_object->GetContentStream()); + { + ScopedFPDFBitmap page_bitmap = RenderPageWithFlags(saved_page, nullptr, 0); + CompareBitmap(page_bitmap.get(), 200, 200, kFirstRemovedMD5); + } + + CloseSavedPage(saved_page); + CloseSavedDocument(); +} + +TEST_F(FPDFEditEmbeddertest, RemoveLastFromSingleStream) { + // Load document with a single stream. + EXPECT_TRUE(OpenDocument("hello_world.pdf")); + FPDF_PAGE page = LoadPage(0); + ASSERT_TRUE(page); + + // Content stream 0: page objects 0-1. + ASSERT_EQ(2, FPDFPage_CountObjects(page)); + + // Remove last object + FPDF_PAGEOBJECT page_object = FPDFPage_GetObject(page, 1); + ASSERT_TRUE(page_object); + CPDF_PageObject* cpdf_page_object = + CPDFPageObjectFromFPDFPageObject(page_object); + ASSERT_EQ(0, cpdf_page_object->GetContentStream()); + ASSERT_TRUE(FPDFPage_RemoveObject(page, page_object)); + FPDFPageObj_Destroy(page_object); + + // One object left in the stream. + ASSERT_EQ(1, FPDFPage_CountObjects(page)); + page_object = FPDFPage_GetObject(page, 0); + ASSERT_TRUE(page_object); + cpdf_page_object = CPDFPageObjectFromFPDFPageObject(page_object); + ASSERT_EQ(0, cpdf_page_object->GetContentStream()); + + EXPECT_TRUE(FPDFPage_GenerateContent(page)); + + // Still one object left in the stream. + ASSERT_EQ(1, FPDFPage_CountObjects(page)); + page_object = FPDFPage_GetObject(page, 0); + ASSERT_TRUE(page_object); + cpdf_page_object = CPDFPageObjectFromFPDFPageObject(page_object); + ASSERT_EQ(0, cpdf_page_object->GetContentStream()); + +#if _FX_PLATFORM_ == _FX_PLATFORM_APPLE_ + const char kLastRemovedMD5[] = "f8fbd14a048b9e2ea8e5f059f22a910e"; +#else + const char kLastRemovedMD5[] = "93dcc09055f87a2792c8e3065af99a1b"; +#endif + { + ScopedFPDFBitmap page_bitmap = RenderPageWithFlags(page, nullptr, 0); + CompareBitmap(page_bitmap.get(), 200, 200, kLastRemovedMD5); + } + + // Save the file + EXPECT_TRUE(FPDF_SaveAsCopy(document(), this, 0)); + UnloadPage(page); + + // Re-open the file and check the page object count is still 0. + OpenSavedDocument(); + FPDF_PAGE saved_page = LoadSavedPage(0); + + ASSERT_EQ(1, FPDFPage_CountObjects(saved_page)); + page_object = FPDFPage_GetObject(saved_page, 0); + ASSERT_TRUE(page_object); + cpdf_page_object = CPDFPageObjectFromFPDFPageObject(page_object); + ASSERT_EQ(0, cpdf_page_object->GetContentStream()); + { + ScopedFPDFBitmap page_bitmap = RenderPageWithFlags(saved_page, nullptr, 0); + CompareBitmap(page_bitmap.get(), 200, 200, kLastRemovedMD5); + } + + CloseSavedPage(saved_page); + CloseSavedDocument(); +} + +TEST_F(FPDFEditEmbeddertest, RemoveAllFromMultipleStreams) { + // Load document with some text. + EXPECT_TRUE(OpenDocument("hello_world_split_streams.pdf")); + FPDF_PAGE page = LoadPage(0); + ASSERT_TRUE(page); + + // Content stream 0: page objects 0-1. + // Content stream 1: page object 2. + ASSERT_EQ(3, FPDFPage_CountObjects(page)); + + // Loop backwards because objects will being removed, which shifts the indexes + // after the removed position. + for (int i = 2; i >= 0; i--) { + FPDF_PAGEOBJECT page_object = FPDFPage_GetObject(page, i); + ASSERT_TRUE(page_object); + ASSERT_TRUE(FPDFPage_RemoveObject(page, page_object)); + FPDFPageObj_Destroy(page_object); + } + + // No more objects in the page. + ASSERT_EQ(0, FPDFPage_CountObjects(page)); + + // Generate contents should remove the empty streams and update the page + // objects' contents stream indexes. + EXPECT_TRUE(FPDFPage_GenerateContent(page)); + + ASSERT_EQ(0, FPDFPage_CountObjects(page)); + + const char kAllRemovedMD5[] = "eee4600ac08b458ac7ac2320e225674c"; + { + ScopedFPDFBitmap page_bitmap = RenderPageWithFlags(page, nullptr, 0); + CompareBitmap(page_bitmap.get(), 200, 200, kAllRemovedMD5); + } + + // Save the file + EXPECT_TRUE(FPDF_SaveAsCopy(document(), this, 0)); + UnloadPage(page); + + // Re-open the file and check the page object count is still 0. + OpenSavedDocument(); + FPDF_PAGE saved_page = LoadSavedPage(0); + + EXPECT_EQ(0, FPDFPage_CountObjects(saved_page)); + { + ScopedFPDFBitmap page_bitmap = RenderPageWithFlags(saved_page, nullptr, 0); + CompareBitmap(page_bitmap.get(), 200, 200, kAllRemovedMD5); + } + + CloseSavedPage(saved_page); + CloseSavedDocument(); +} + TEST_F(FPDFEditEmbeddertest, InsertPageObjectAndSave) { // Load document with some text. EXPECT_TRUE(OpenDocument("hello_world.pdf")); -- cgit v1.2.3