summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHenrique Nakashima <hnakashima@chromium.org>2018-06-21 18:51:15 +0000
committerChromium commit bot <commit-bot@chromium.org>2018-06-21 18:51:15 +0000
commit0dcf1f40652edd701d032227a742f6a63e6e3fae (patch)
tree5bb3c21d9e502e1b32488535769a4ed90384a391
parentd52a0afaa3e4374dee469e41db4389bf9c61c4a9 (diff)
downloadpdfium-0dcf1f40652edd701d032227a742f6a63e6e3fae.tar.xz
Do not save content stream if all page objects were removed from it.
Bug: pdfium:1051 Change-Id: Ia990a47eeceb47fd2b15fe4ea7226861507484db Reviewed-on: https://pdfium-review.googlesource.com/35115 Reviewed-by: dsinclair <dsinclair@chromium.org> Commit-Queue: Henrique Nakashima <hnakashima@chromium.org>
-rw-r--r--core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp11
-rw-r--r--core/fpdfapi/edit/cpdf_pagecontentmanager.cpp60
-rw-r--r--core/fpdfapi/edit/cpdf_pagecontentmanager.h11
-rw-r--r--fpdfsdk/fpdf_edit_embeddertest.cpp344
4 files changed, 420 insertions, 6 deletions
diff --git a/core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp b/core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp
index 80b121a572..da6c74e2c7 100644
--- a/core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp
+++ b/core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp
@@ -161,11 +161,14 @@ void CPDF_PageContentGenerator::UpdateContentStreams(
page_content_manager.GetStreamByIndex(stream_index);
ASSERT(old_stream);
- // TODO(pdfium:1051): Remove streams that are now empty. If buf is empty,
- // remove this instead of setting the data.
-
- old_stream->SetData(buf);
+ // If buf is now empty, remove the stream instead of setting the data.
+ if (buf->tellp() <= 0)
+ page_content_manager.ScheduleRemoveStreamByIndex(stream_index);
+ else
+ old_stream->SetData(buf);
}
+
+ page_content_manager.ExecuteScheduledRemovals();
}
ByteString CPDF_PageContentGenerator::RealizeResource(
diff --git a/core/fpdfapi/edit/cpdf_pagecontentmanager.cpp b/core/fpdfapi/edit/cpdf_pagecontentmanager.cpp
index e9ade27bf8..46033bc0d1 100644
--- a/core/fpdfapi/edit/cpdf_pagecontentmanager.cpp
+++ b/core/fpdfapi/edit/cpdf_pagecontentmanager.cpp
@@ -4,6 +4,11 @@
#include "core/fpdfapi/edit/cpdf_pagecontentmanager.h"
+#include <map>
+#include <numeric>
+#include <vector>
+
+#include "core/fpdfapi/page/cpdf_pageobject.h"
#include "core/fpdfapi/parser/cpdf_array.h"
#include "core/fpdfapi/parser/cpdf_dictionary.h"
#include "core/fpdfapi/parser/cpdf_document.h"
@@ -85,3 +90,58 @@ size_t CPDF_PageContentManager::AddStream(std::ostringstream* buf) {
contents_stream_ = new_stream;
return 0;
}
+
+void CPDF_PageContentManager::ScheduleRemoveStreamByIndex(size_t stream_index) {
+ streams_to_remove_.insert(stream_index);
+}
+
+void CPDF_PageContentManager::ExecuteScheduledRemovals() {
+ // This method assumes there are no dirty streams in the
+ // CPDF_PageObjectHolder. If there were any, their indexes would need to be
+ // updated.
+ // Since this is only called by CPDF_PageContentGenerator::GenerateContent(),
+ // which cleans up the dirty streams first, this should always be true.
+ ASSERT(obj_holder_->GetDirtyStreams()->empty());
+
+ if (contents_stream_) {
+ // Only stream that can be removed is 0.
+ if (streams_to_remove_.find(0) != streams_to_remove_.end()) {
+ CPDF_Dictionary* page_dict = obj_holder_->GetDict();
+ page_dict->RemoveFor("Contents");
+ contents_stream_ = nullptr;
+ }
+ } else if (contents_array_) {
+ // Initialize a vector with the old stream indexes. This will be used to
+ // build a map from the old to the new indexes.
+ std::vector<size_t> streams_left(contents_array_->GetCount());
+ std::iota(streams_left.begin(), streams_left.end(), 0);
+
+ // In reverse order so as to not change the indexes in the middle of the
+ // loop, remove the streams.
+ for (auto it = streams_to_remove_.rbegin(); it != streams_to_remove_.rend();
+ ++it) {
+ size_t stream_index = *it;
+ contents_array_->RemoveAt(stream_index);
+ streams_left.erase(streams_left.begin() + stream_index);
+ }
+
+ // Create a mapping from the old to the new stream indexes, shifted due to
+ // the deletion of the |streams_to_remove_|.
+ std::map<int32_t, size_t> stream_index_mapping;
+ for (size_t i = 0; i < streams_left.size(); ++i)
+ stream_index_mapping[streams_left[i]] = i;
+
+ // Update the page objects' content stream indexes.
+ for (const auto& obj : *obj_holder_->GetPageObjectList()) {
+ int32_t old_stream_index = obj->GetContentStream();
+ size_t new_stream_index = stream_index_mapping[old_stream_index];
+ obj->SetContentStream(new_stream_index);
+ }
+
+ // Even if there is a single content stream now, keep the array with a
+ // single element. It's valid, a second stream might be added soon, and the
+ // complexity of removing it is not worth it.
+ }
+
+ streams_to_remove_.clear();
+}
diff --git a/core/fpdfapi/edit/cpdf_pagecontentmanager.h b/core/fpdfapi/edit/cpdf_pagecontentmanager.h
index 384405b188..bad8c7a38c 100644
--- a/core/fpdfapi/edit/cpdf_pagecontentmanager.h
+++ b/core/fpdfapi/edit/cpdf_pagecontentmanager.h
@@ -5,6 +5,7 @@
#ifndef CORE_FPDFAPI_EDIT_CPDF_PAGECONTENTMANAGER_H_
#define CORE_FPDFAPI_EDIT_CPDF_PAGECONTENTMANAGER_H_
+#include <set>
#include <sstream>
#include "core/fxcrt/unowned_ptr.h"
@@ -28,11 +29,21 @@ class CPDF_PageContentManager {
// if Contents is not an array, but only a single stream.
size_t AddStream(std::ostringstream* buf);
+ // Schedule the removal of the Content stream at a given index. It will be
+ // removed when ExecuteScheduledRemovals() is called.
+ void ScheduleRemoveStreamByIndex(size_t stream_index);
+
+ // Remove all Content streams for which ScheduleRemoveStreamByIndex() was
+ // called. Update the content stream of all page objects with the shifted
+ // indexes.
+ void ExecuteScheduledRemovals();
+
private:
UnownedPtr<CPDF_PageObjectHolder> const obj_holder_;
UnownedPtr<CPDF_Document> const doc_;
UnownedPtr<CPDF_Array> contents_array_;
UnownedPtr<CPDF_Stream> contents_stream_;
+ std::set<size_t> streams_to_remove_;
};
#endif // CORE_FPDFAPI_EDIT_CPDF_PAGECONTENTMANAGER_H_
diff --git a/fpdfsdk/fpdf_edit_embeddertest.cpp b/fpdfsdk/fpdf_edit_embeddertest.cpp
index c78700ea67..f2690499d1 100644
--- a/fpdfsdk/fpdf_edit_embeddertest.cpp
+++ b/fpdfsdk/fpdf_edit_embeddertest.cpp
@@ -748,8 +748,6 @@ TEST_F(FPDFEditEmbeddertest, RemoveExistingPageObjectSplitStreamsLonely) {
CloseSavedDocument();
}
-// TODO(pdfium:1051): Extend this test to remove some elements and verify
-// saving works.
TEST_F(FPDFEditEmbeddertest, GetContentStream) {
// Load document with some text split across streams.
EXPECT_TRUE(OpenDocument("split_streams.pdf"));
@@ -776,6 +774,348 @@ TEST_F(FPDFEditEmbeddertest, GetContentStream) {
UnloadPage(page);
}
+TEST_F(FPDFEditEmbeddertest, RemoveAllFromStream) {
+ // Load document with some text split across streams.
+ EXPECT_TRUE(OpenDocument("split_streams.pdf"));
+ FPDF_PAGE page = LoadPage(0);
+ ASSERT_TRUE(page);
+
+ // Content stream 0: page objects 0-14.
+ // Content stream 1: page objects 15-17.
+ // Content stream 2: page object 18.
+ ASSERT_EQ(19, FPDFPage_CountObjects(page));
+
+ // Loop backwards because objects will being removed, which shifts the indexes
+ // after the removed position.
+ for (int i = 18; i >= 0; i--) {
+ FPDF_PAGEOBJECT page_object = FPDFPage_GetObject(page, i);
+ ASSERT_TRUE(page_object);
+ CPDF_PageObject* cpdf_page_object =
+ CPDFPageObjectFromFPDFPageObject(page_object);
+
+ // Empty content stream 1.
+ if (cpdf_page_object->GetContentStream() == 1) {
+ EXPECT_TRUE(FPDFPage_RemoveObject(page, page_object));
+ FPDFPageObj_Destroy(page_object);
+ }
+ }
+
+ // Content stream 0: page objects 0-14.
+ // Content stream 2: page object 15.
+ ASSERT_EQ(16, FPDFPage_CountObjects(page));
+ for (int i = 0; i < 16; i++) {
+ FPDF_PAGEOBJECT page_object = FPDFPage_GetObject(page, i);
+ ASSERT_TRUE(page_object);
+ CPDF_PageObject* cpdf_page_object =
+ CPDFPageObjectFromFPDFPageObject(page_object);
+ if (i < 15)
+ EXPECT_EQ(0, cpdf_page_object->GetContentStream()) << i;
+ else
+ EXPECT_EQ(2, cpdf_page_object->GetContentStream()) << i;
+ }
+
+ // Generate contents should remove the empty stream and update the page
+ // objects' contents stream indexes.
+ EXPECT_TRUE(FPDFPage_GenerateContent(page));
+
+ // Content stream 0: page objects 0-14.
+ // Content stream 1: page object 15.
+ ASSERT_EQ(16, FPDFPage_CountObjects(page));
+ for (int i = 0; i < 16; i++) {
+ FPDF_PAGEOBJECT page_object = FPDFPage_GetObject(page, i);
+ ASSERT_TRUE(page_object);
+ CPDF_PageObject* cpdf_page_object =
+ CPDFPageObjectFromFPDFPageObject(page_object);
+ if (i < 15)
+ EXPECT_EQ(0, cpdf_page_object->GetContentStream()) << i;
+ else
+ EXPECT_EQ(1, cpdf_page_object->GetContentStream()) << i;
+ }
+
+#if _FX_PLATFORM_ == _FX_PLATFORM_APPLE_
+ const char kStream1RemovedMD5[] = "d2e21fbd5a6de563f619feeeb6163331";
+#elif _FX_PLATFORM_ == _FX_PLATFORM_WINDOWS_
+ const char kStream1RemovedMD5[] = "b4140f203523e38793283a5943d8075b";
+#else
+ const char kStream1RemovedMD5[] = "e86a3efc160ede6cfcb1f59bcacf1105";
+#endif
+ {
+ ScopedFPDFBitmap page_bitmap = RenderPageWithFlags(page, nullptr, 0);
+ CompareBitmap(page_bitmap.get(), 200, 200, kStream1RemovedMD5);
+ }
+
+ // Save the file
+ EXPECT_TRUE(FPDF_SaveAsCopy(document(), this, 0));
+ UnloadPage(page);
+
+ // Re-open the file and check the page object count is still 16, and that
+ // content stream 1 was removed.
+ OpenSavedDocument();
+ FPDF_PAGE saved_page = LoadSavedPage(0);
+
+ // Content stream 0: page objects 0-14.
+ // Content stream 1: page object 15.
+ EXPECT_EQ(16, FPDFPage_CountObjects(saved_page));
+ for (int i = 0; i < 16; i++) {
+ FPDF_PAGEOBJECT page_object = FPDFPage_GetObject(saved_page, i);
+ ASSERT_TRUE(page_object);
+ CPDF_PageObject* cpdf_page_object =
+ CPDFPageObjectFromFPDFPageObject(page_object);
+ if (i < 15)
+ EXPECT_EQ(0, cpdf_page_object->GetContentStream()) << i;
+ else
+ EXPECT_EQ(1, cpdf_page_object->GetContentStream()) << i;
+ }
+
+ {
+ ScopedFPDFBitmap page_bitmap = RenderPageWithFlags(saved_page, nullptr, 0);
+ CompareBitmap(page_bitmap.get(), 200, 200, kStream1RemovedMD5);
+ }
+
+ CloseSavedPage(saved_page);
+ CloseSavedDocument();
+}
+
+TEST_F(FPDFEditEmbeddertest, RemoveAllFromSingleStream) {
+ // Load document with a single stream.
+ EXPECT_TRUE(OpenDocument("hello_world.pdf"));
+ FPDF_PAGE page = LoadPage(0);
+ ASSERT_TRUE(page);
+
+ // Content stream 0: page objects 0-1.
+ ASSERT_EQ(2, FPDFPage_CountObjects(page));
+
+ // Loop backwards because objects will being removed, which shifts the indexes
+ // after the removed position.
+ for (int i = 1; i >= 0; i--) {
+ FPDF_PAGEOBJECT page_object = FPDFPage_GetObject(page, i);
+ ASSERT_TRUE(page_object);
+ CPDF_PageObject* cpdf_page_object =
+ CPDFPageObjectFromFPDFPageObject(page_object);
+ ASSERT_EQ(0, cpdf_page_object->GetContentStream());
+ ASSERT_TRUE(FPDFPage_RemoveObject(page, page_object));
+ FPDFPageObj_Destroy(page_object);
+ }
+
+ // No more objects in the stream
+ ASSERT_EQ(0, FPDFPage_CountObjects(page));
+
+ // Generate contents should remove the empty stream and update the page
+ // objects' contents stream indexes.
+ EXPECT_TRUE(FPDFPage_GenerateContent(page));
+
+ ASSERT_EQ(0, FPDFPage_CountObjects(page));
+
+ const char kAllRemovedMD5[] = "eee4600ac08b458ac7ac2320e225674c";
+ {
+ ScopedFPDFBitmap page_bitmap = RenderPageWithFlags(page, nullptr, 0);
+ CompareBitmap(page_bitmap.get(), 200, 200, kAllRemovedMD5);
+ }
+
+ // Save the file
+ EXPECT_TRUE(FPDF_SaveAsCopy(document(), this, 0));
+ UnloadPage(page);
+
+ // Re-open the file and check the page object count is still 0.
+ OpenSavedDocument();
+ FPDF_PAGE saved_page = LoadSavedPage(0);
+
+ EXPECT_EQ(0, FPDFPage_CountObjects(saved_page));
+ {
+ ScopedFPDFBitmap page_bitmap = RenderPageWithFlags(saved_page, nullptr, 0);
+ CompareBitmap(page_bitmap.get(), 200, 200, kAllRemovedMD5);
+ }
+
+ CloseSavedPage(saved_page);
+ CloseSavedDocument();
+}
+
+TEST_F(FPDFEditEmbeddertest, RemoveFirstFromSingleStream) {
+ // Load document with a single stream.
+ EXPECT_TRUE(OpenDocument("hello_world.pdf"));
+ FPDF_PAGE page = LoadPage(0);
+ ASSERT_TRUE(page);
+
+ // Content stream 0: page objects 0-1.
+ ASSERT_EQ(2, FPDFPage_CountObjects(page));
+
+ // Remove first object.
+ FPDF_PAGEOBJECT page_object = FPDFPage_GetObject(page, 0);
+ ASSERT_TRUE(page_object);
+ CPDF_PageObject* cpdf_page_object =
+ CPDFPageObjectFromFPDFPageObject(page_object);
+ ASSERT_EQ(0, cpdf_page_object->GetContentStream());
+ ASSERT_TRUE(FPDFPage_RemoveObject(page, page_object));
+ FPDFPageObj_Destroy(page_object);
+
+ // One object left in the stream.
+ ASSERT_EQ(1, FPDFPage_CountObjects(page));
+ page_object = FPDFPage_GetObject(page, 0);
+ ASSERT_TRUE(page_object);
+ cpdf_page_object = CPDFPageObjectFromFPDFPageObject(page_object);
+ ASSERT_EQ(0, cpdf_page_object->GetContentStream());
+
+ EXPECT_TRUE(FPDFPage_GenerateContent(page));
+
+ // Still one object left in the stream.
+ ASSERT_EQ(1, FPDFPage_CountObjects(page));
+ page_object = FPDFPage_GetObject(page, 0);
+ ASSERT_TRUE(page_object);
+ cpdf_page_object = CPDFPageObjectFromFPDFPageObject(page_object);
+ ASSERT_EQ(0, cpdf_page_object->GetContentStream());
+
+#if _FX_PLATFORM_ == _FX_PLATFORM_APPLE_
+ const char kFirstRemovedMD5[] = "af760c4702467cb1492a57fb8215efaa";
+#elif _FX_PLATFORM_ == _FX_PLATFORM_WINDOWS_
+ const char kFirstRemovedMD5[] = "72be917349bf7004a5c39661fe1fc433";
+#else
+ const char kFirstRemovedMD5[] = "b76df015fe88009c3c342395df96abf1";
+#endif
+ {
+ ScopedFPDFBitmap page_bitmap = RenderPageWithFlags(page, nullptr, 0);
+ CompareBitmap(page_bitmap.get(), 200, 200, kFirstRemovedMD5);
+ }
+
+ // Save the file
+ EXPECT_TRUE(FPDF_SaveAsCopy(document(), this, 0));
+ UnloadPage(page);
+
+ // Re-open the file and check the page object count is still 0.
+ OpenSavedDocument();
+ FPDF_PAGE saved_page = LoadSavedPage(0);
+
+ ASSERT_EQ(1, FPDFPage_CountObjects(saved_page));
+ page_object = FPDFPage_GetObject(saved_page, 0);
+ ASSERT_TRUE(page_object);
+ cpdf_page_object = CPDFPageObjectFromFPDFPageObject(page_object);
+ ASSERT_EQ(0, cpdf_page_object->GetContentStream());
+ {
+ ScopedFPDFBitmap page_bitmap = RenderPageWithFlags(saved_page, nullptr, 0);
+ CompareBitmap(page_bitmap.get(), 200, 200, kFirstRemovedMD5);
+ }
+
+ CloseSavedPage(saved_page);
+ CloseSavedDocument();
+}
+
+TEST_F(FPDFEditEmbeddertest, RemoveLastFromSingleStream) {
+ // Load document with a single stream.
+ EXPECT_TRUE(OpenDocument("hello_world.pdf"));
+ FPDF_PAGE page = LoadPage(0);
+ ASSERT_TRUE(page);
+
+ // Content stream 0: page objects 0-1.
+ ASSERT_EQ(2, FPDFPage_CountObjects(page));
+
+ // Remove last object
+ FPDF_PAGEOBJECT page_object = FPDFPage_GetObject(page, 1);
+ ASSERT_TRUE(page_object);
+ CPDF_PageObject* cpdf_page_object =
+ CPDFPageObjectFromFPDFPageObject(page_object);
+ ASSERT_EQ(0, cpdf_page_object->GetContentStream());
+ ASSERT_TRUE(FPDFPage_RemoveObject(page, page_object));
+ FPDFPageObj_Destroy(page_object);
+
+ // One object left in the stream.
+ ASSERT_EQ(1, FPDFPage_CountObjects(page));
+ page_object = FPDFPage_GetObject(page, 0);
+ ASSERT_TRUE(page_object);
+ cpdf_page_object = CPDFPageObjectFromFPDFPageObject(page_object);
+ ASSERT_EQ(0, cpdf_page_object->GetContentStream());
+
+ EXPECT_TRUE(FPDFPage_GenerateContent(page));
+
+ // Still one object left in the stream.
+ ASSERT_EQ(1, FPDFPage_CountObjects(page));
+ page_object = FPDFPage_GetObject(page, 0);
+ ASSERT_TRUE(page_object);
+ cpdf_page_object = CPDFPageObjectFromFPDFPageObject(page_object);
+ ASSERT_EQ(0, cpdf_page_object->GetContentStream());
+
+#if _FX_PLATFORM_ == _FX_PLATFORM_APPLE_
+ const char kLastRemovedMD5[] = "f8fbd14a048b9e2ea8e5f059f22a910e";
+#else
+ const char kLastRemovedMD5[] = "93dcc09055f87a2792c8e3065af99a1b";
+#endif
+ {
+ ScopedFPDFBitmap page_bitmap = RenderPageWithFlags(page, nullptr, 0);
+ CompareBitmap(page_bitmap.get(), 200, 200, kLastRemovedMD5);
+ }
+
+ // Save the file
+ EXPECT_TRUE(FPDF_SaveAsCopy(document(), this, 0));
+ UnloadPage(page);
+
+ // Re-open the file and check the page object count is still 0.
+ OpenSavedDocument();
+ FPDF_PAGE saved_page = LoadSavedPage(0);
+
+ ASSERT_EQ(1, FPDFPage_CountObjects(saved_page));
+ page_object = FPDFPage_GetObject(saved_page, 0);
+ ASSERT_TRUE(page_object);
+ cpdf_page_object = CPDFPageObjectFromFPDFPageObject(page_object);
+ ASSERT_EQ(0, cpdf_page_object->GetContentStream());
+ {
+ ScopedFPDFBitmap page_bitmap = RenderPageWithFlags(saved_page, nullptr, 0);
+ CompareBitmap(page_bitmap.get(), 200, 200, kLastRemovedMD5);
+ }
+
+ CloseSavedPage(saved_page);
+ CloseSavedDocument();
+}
+
+TEST_F(FPDFEditEmbeddertest, RemoveAllFromMultipleStreams) {
+ // Load document with some text.
+ EXPECT_TRUE(OpenDocument("hello_world_split_streams.pdf"));
+ FPDF_PAGE page = LoadPage(0);
+ ASSERT_TRUE(page);
+
+ // Content stream 0: page objects 0-1.
+ // Content stream 1: page object 2.
+ ASSERT_EQ(3, FPDFPage_CountObjects(page));
+
+ // Loop backwards because objects will being removed, which shifts the indexes
+ // after the removed position.
+ for (int i = 2; i >= 0; i--) {
+ FPDF_PAGEOBJECT page_object = FPDFPage_GetObject(page, i);
+ ASSERT_TRUE(page_object);
+ ASSERT_TRUE(FPDFPage_RemoveObject(page, page_object));
+ FPDFPageObj_Destroy(page_object);
+ }
+
+ // No more objects in the page.
+ ASSERT_EQ(0, FPDFPage_CountObjects(page));
+
+ // Generate contents should remove the empty streams and update the page
+ // objects' contents stream indexes.
+ EXPECT_TRUE(FPDFPage_GenerateContent(page));
+
+ ASSERT_EQ(0, FPDFPage_CountObjects(page));
+
+ const char kAllRemovedMD5[] = "eee4600ac08b458ac7ac2320e225674c";
+ {
+ ScopedFPDFBitmap page_bitmap = RenderPageWithFlags(page, nullptr, 0);
+ CompareBitmap(page_bitmap.get(), 200, 200, kAllRemovedMD5);
+ }
+
+ // Save the file
+ EXPECT_TRUE(FPDF_SaveAsCopy(document(), this, 0));
+ UnloadPage(page);
+
+ // Re-open the file and check the page object count is still 0.
+ OpenSavedDocument();
+ FPDF_PAGE saved_page = LoadSavedPage(0);
+
+ EXPECT_EQ(0, FPDFPage_CountObjects(saved_page));
+ {
+ ScopedFPDFBitmap page_bitmap = RenderPageWithFlags(saved_page, nullptr, 0);
+ CompareBitmap(page_bitmap.get(), 200, 200, kAllRemovedMD5);
+ }
+
+ CloseSavedPage(saved_page);
+ CloseSavedDocument();
+}
+
TEST_F(FPDFEditEmbeddertest, InsertPageObjectAndSave) {
// Load document with some text.
EXPECT_TRUE(OpenDocument("hello_world.pdf"));