diff options
author | Henrique Nakashima <hnakashima@chromium.org> | 2018-06-14 16:22:30 +0000 |
---|---|---|
committer | Chromium commit bot <commit-bot@chromium.org> | 2018-06-14 16:22:30 +0000 |
commit | 27cf78d88fdb44bd246cd17bcc712225388e9134 (patch) | |
tree | 437b128710bc065d70d642022483d83b9cc641ef | |
parent | e16ffd4fc3f286ebfaf7820351d4fee680deca88 (diff) | |
download | pdfium-27cf78d88fdb44bd246cd17bcc712225388e9134.tar.xz |
Rewrite content stream regeneration.
Loop through the dirty page objects and streams and regenerate all
streams that are dirty.
Bug: pdfium:1051
Change-Id: I837b5a7cd9542b7777e7c7ae7ac9cc75f69f30b5
Reviewed-on: https://pdfium-review.googlesource.com/34330
Commit-Queue: Henrique Nakashima <hnakashima@chromium.org>
Reviewed-by: dsinclair <dsinclair@chromium.org>
-rw-r--r-- | core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp | 105 | ||||
-rw-r--r-- | core/fpdfapi/edit/cpdf_pagecontentgenerator.h | 4 | ||||
-rw-r--r-- | core/fpdfapi/page/cpdf_pageobject.h | 6 | ||||
-rw-r--r-- | fpdfsdk/fpdf_edit_embeddertest.cpp | 133 | ||||
-rw-r--r-- | testing/resources/hello_world_split_streams.in | 63 | ||||
-rw-r--r-- | testing/resources/hello_world_split_streams.pdf | 77 |
6 files changed, 351 insertions, 37 deletions
diff --git a/core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp b/core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp index 19994fa57f..ba8d03d7f9 100644 --- a/core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp +++ b/core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp @@ -6,6 +6,9 @@ #include "core/fpdfapi/edit/cpdf_pagecontentgenerator.h" +#include <map> +#include <memory> +#include <set> #include <tuple> #include <utility> @@ -73,33 +76,67 @@ void CPDF_PageContentGenerator::GenerateContent() { std::map<int32_t, std::unique_ptr<std::ostringstream>> CPDF_PageContentGenerator::GenerateModifiedStreams() { - auto buf = pdfium::MakeUnique<std::ostringstream>(); + // Make sure default graphics are created. + (void)GetOrCreateDefaultGraphics(); + // Figure out which streams are dirty. + std::set<int32_t> all_dirty_streams; + for (auto& pPageObj : m_pageObjects) { + if (pPageObj->IsDirty()) + all_dirty_streams.insert(pPageObj->GetContentStream()); + } + const std::set<int32_t>* marked_dirty_streams = + m_pObjHolder->GetDirtyStreams(); + all_dirty_streams.insert(marked_dirty_streams->begin(), + marked_dirty_streams->end()); + + // Start regenerating dirty streams. std::map<int32_t, std::unique_ptr<std::ostringstream>> streams; - if (GenerateStreamWithNewObjects(buf.get())) - streams[CPDF_PageObject::kNoContentStream] = std::move(buf); + std::map<int32_t, bool> stream_is_empty; - // TODO(pdfium:1051): Generate other streams and add to |streams|. + for (int32_t dirty_stream : all_dirty_streams) { + std::unique_ptr<std::ostringstream> buf = + pdfium::MakeUnique<std::ostringstream>(); - return streams; -} + // Set the default graphic state values + *buf << "q\n"; + if (!m_pObjHolder->GetLastCTM().IsIdentity()) + *buf << m_pObjHolder->GetLastCTM().GetInverse() << " cm\n"; -bool CPDF_PageContentGenerator::GenerateStreamWithNewObjects( - std::ostringstream* buf) { - // Set the default graphic state values - *buf << "q\n"; - if (!m_pObjHolder->GetLastCTM().IsIdentity()) - *buf << m_pObjHolder->GetLastCTM().GetInverse() << " cm\n"; - ProcessDefaultGraphics(buf); - - // Process the page objects - if (!ProcessPageObjects(buf)) - return false; + ProcessDefaultGraphics(buf.get()); - // Return graphics to original state - *buf << "Q\n"; + streams[dirty_stream] = std::move(buf); + stream_is_empty[dirty_stream] = true; + } - return true; + // Process the page objects, write into each dirty stream. + for (auto& pPageObj : m_pageObjects) { + int stream_index = pPageObj->GetContentStream(); + auto it = streams.find(stream_index); + if (it == streams.end()) + continue; + + std::ostringstream* buf = it->second.get(); + stream_is_empty[stream_index] = false; + ProcessPageObject(buf, pPageObj.Get()); + } + + // Finish dirty streams. + for (int32_t dirty_stream : all_dirty_streams) { + std::ostringstream* buf = streams[dirty_stream].get(); + if (stream_is_empty[dirty_stream]) { + // Clear to show that this stream needs to be deleted. + buf->str(""); + } else { + // Return graphics to original state + *buf << "Q\n"; + } + } + + // Clear dirty streams in m_pObjHolder + m_pObjHolder->ClearDirtyStreams(); + + return streams; } void CPDF_PageContentGenerator::UpdateContentStreams( @@ -124,6 +161,9 @@ void CPDF_PageContentGenerator::UpdateContentStreams( page_content_manager.GetStreamByIndex(stream_index); ASSERT(old_stream); + // TODO(pdfium:1051): Remove streams that are now empty. If buf is empty, + // remove this instead of setting the data. + old_stream->SetData(buf); } } @@ -162,21 +202,28 @@ bool CPDF_PageContentGenerator::ProcessPageObjects(std::ostringstream* buf) { continue; bDirty = true; - if (CPDF_ImageObject* pImageObject = pPageObj->AsImage()) - ProcessImage(buf, pImageObject); - else if (CPDF_PathObject* pPathObj = pPageObj->AsPath()) - ProcessPath(buf, pPathObj); - else if (CPDF_TextObject* pTextObj = pPageObj->AsText()) - ProcessText(buf, pTextObj); - pPageObj->SetDirty(false); + ProcessPageObject(buf, pPageObj.Get()); } return bDirty; } void CPDF_PageContentGenerator::UpdateStreamlessPageObjects( int new_content_stream_index) { - // TODO(pdfium:1051): Mark page objects that did not have a content stream - // with the new content stream index. + for (auto& pPageObj : m_pageObjects) { + if (pPageObj->GetContentStream() == CPDF_PageObject::kNoContentStream) + pPageObj->SetContentStream(new_content_stream_index); + } +} + +void CPDF_PageContentGenerator::ProcessPageObject(std::ostringstream* buf, + CPDF_PageObject* pPageObj) { + if (CPDF_ImageObject* pImageObject = pPageObj->AsImage()) + ProcessImage(buf, pImageObject); + else if (CPDF_PathObject* pPathObj = pPageObj->AsPath()) + ProcessPath(buf, pPathObj); + else if (CPDF_TextObject* pTextObj = pPageObj->AsText()) + ProcessText(buf, pTextObj); + pPageObj->SetDirty(false); } void CPDF_PageContentGenerator::ProcessImage(std::ostringstream* buf, diff --git a/core/fpdfapi/edit/cpdf_pagecontentgenerator.h b/core/fpdfapi/edit/cpdf_pagecontentgenerator.h index 04adf1c1d2..13b8431f18 100644 --- a/core/fpdfapi/edit/cpdf_pagecontentgenerator.h +++ b/core/fpdfapi/edit/cpdf_pagecontentgenerator.h @@ -35,6 +35,7 @@ class CPDF_PageContentGenerator { private: friend class CPDF_PageContentGeneratorTest; + void ProcessPageObject(std::ostringstream* buf, CPDF_PageObject* pPageObj); void ProcessPath(std::ostringstream* buf, CPDF_PathObject* pPathObj); void ProcessImage(std::ostringstream* buf, CPDF_ImageObject* pImageObj); void ProcessGraphics(std::ostringstream* buf, CPDF_PageObject* pPageObj); @@ -49,9 +50,6 @@ class CPDF_PageContentGenerator { std::map<int32_t, std::unique_ptr<std::ostringstream>> GenerateModifiedStreams(); - // Generate new stream data with all dirty page objects. - bool GenerateStreamWithNewObjects(std::ostringstream* buf); - // Add buffer as a stream in page's 'Contents' void UpdateContentStreams( std::map<int32_t, std::unique_ptr<std::ostringstream>>* buf); diff --git a/core/fpdfapi/page/cpdf_pageobject.h b/core/fpdfapi/page/cpdf_pageobject.h index 39e7629541..3fc35aa063 100644 --- a/core/fpdfapi/page/cpdf_pageobject.h +++ b/core/fpdfapi/page/cpdf_pageobject.h @@ -65,10 +65,14 @@ class CPDF_PageObject : public CPDF_GraphicStates { // Get what content stream the object was parsed from in its page. This number // is the index of the content stream in the "Contents" array, or 0 if there // is a single content stream. If the object is newly created, - // kNoContentStream is returned. + // |kNoContentStream| is returned. + // // If the object is spread among more than one content stream, this is the // index of the last stream. int32_t GetContentStream() const { return m_ContentStream; } + void SetContentStream(int32_t new_content_stream) { + m_ContentStream = new_content_stream; + } float m_Left; float m_Right; diff --git a/fpdfsdk/fpdf_edit_embeddertest.cpp b/fpdfsdk/fpdf_edit_embeddertest.cpp index 0a119b9577..07879c5054 100644 --- a/fpdfsdk/fpdf_edit_embeddertest.cpp +++ b/fpdfsdk/fpdf_edit_embeddertest.cpp @@ -428,8 +428,7 @@ TEST_F(FPDFEditEmbeddertest, AddPaths) { VerifySavedDocument(612, 792, kLastMD5); } -// Fails due to pdfium:1051. -TEST_F(FPDFEditEmbeddertest, DISABLED_SetText) { +TEST_F(FPDFEditEmbeddertest, SetText) { // Load document with some text. EXPECT_TRUE(OpenDocument("hello_world.pdf")); FPDF_PAGE page = LoadPage(0); @@ -626,8 +625,7 @@ TEST_F(FPDFEditEmbeddertest, RemoveMarkedObjectsPrime) { UnloadPage(page); } -// Fails due to pdfium:1051. -TEST_F(FPDFEditEmbeddertest, DISABLED_RemoveExistingPageObject) { +TEST_F(FPDFEditEmbeddertest, RemoveExistingPageObject) { // Load document with some text. EXPECT_TRUE(OpenDocument("hello_world.pdf")); FPDF_PAGE page = LoadPage(0); @@ -656,6 +654,100 @@ TEST_F(FPDFEditEmbeddertest, DISABLED_RemoveExistingPageObject) { CloseSavedDocument(); } +TEST_F(FPDFEditEmbeddertest, RemoveExistingPageObjectSplitStreamsNotLonely) { + // Load document with some text. + EXPECT_TRUE(OpenDocument("hello_world_split_streams.pdf")); + FPDF_PAGE page = LoadPage(0); + ASSERT_TRUE(page); + + // Get the "Hello, world!" text object and remove it. There is another object + // in the same stream that says "Goodbye, world!" + ASSERT_EQ(3, FPDFPage_CountObjects(page)); + FPDF_PAGEOBJECT page_object = FPDFPage_GetObject(page, 0); + ASSERT_TRUE(page_object); + EXPECT_TRUE(FPDFPage_RemoveObject(page, page_object)); + + // Verify the "Hello, world!" text is gone. + ASSERT_EQ(2, FPDFPage_CountObjects(page)); +#if _FX_PLATFORM_ == _FX_PLATFORM_APPLE_ + const char kHelloRemovedMD5[] = "e07a62d412728fc4d6e3ff42f2dd0e11"; +#elif _FX_PLATFORM_ == _FX_PLATFORM_WINDOWS_ + const char kHelloRemovedMD5[] = "de37b0bb7ff903c1068bae361844be50"; +#else + const char kHelloRemovedMD5[] = "95b92950647a2190e1230911e7a1a0e9"; +#endif + { + ScopedFPDFBitmap page_bitmap = RenderPageWithFlags(page, nullptr, 0); + CompareBitmap(page_bitmap.get(), 200, 200, kHelloRemovedMD5); + } + + // Save the file + EXPECT_TRUE(FPDFPage_GenerateContent(page)); + EXPECT_TRUE(FPDF_SaveAsCopy(document(), this, 0)); + UnloadPage(page); + FPDFPageObj_Destroy(page_object); + + // Re-open the file and check the page object count is still 2. + OpenSavedDocument(); + FPDF_PAGE saved_page = LoadSavedPage(0); + + EXPECT_EQ(2, FPDFPage_CountObjects(saved_page)); + { + ScopedFPDFBitmap page_bitmap = RenderPageWithFlags(saved_page, nullptr, 0); + CompareBitmap(page_bitmap.get(), 200, 200, kHelloRemovedMD5); + } + + CloseSavedPage(saved_page); + CloseSavedDocument(); +} + +TEST_F(FPDFEditEmbeddertest, RemoveExistingPageObjectSplitStreamsLonely) { + // Load document with some text. + EXPECT_TRUE(OpenDocument("hello_world_split_streams.pdf")); + FPDF_PAGE page = LoadPage(0); + ASSERT_TRUE(page); + + // Get the "Greetings, world!" text object and remove it. This is the only + // object in the stream. + ASSERT_EQ(3, FPDFPage_CountObjects(page)); + FPDF_PAGEOBJECT page_object = FPDFPage_GetObject(page, 2); + ASSERT_TRUE(page_object); + EXPECT_TRUE(FPDFPage_RemoveObject(page, page_object)); + + // Verify the "Greetings, world!" text is gone. + ASSERT_EQ(2, FPDFPage_CountObjects(page)); +#if _FX_PLATFORM_ == _FX_PLATFORM_APPLE_ + const char kGreetingsRemovedMD5[] = "b90475ca64d1348c3bf5e2b77ad9187a"; +#elif _FX_PLATFORM_ == _FX_PLATFORM_WINDOWS_ + const char kGreetingsRemovedMD5[] = "e5a6fa28298db07484cd922f3e210c88"; +#else + const char kGreetingsRemovedMD5[] = "2baa4c0e1758deba1b9c908e1fbd04ed"; +#endif + { + ScopedFPDFBitmap page_bitmap = RenderPageWithFlags(page, nullptr, 0); + CompareBitmap(page_bitmap.get(), 200, 200, kGreetingsRemovedMD5); + } + + // Save the file + EXPECT_TRUE(FPDFPage_GenerateContent(page)); + EXPECT_TRUE(FPDF_SaveAsCopy(document(), this, 0)); + UnloadPage(page); + FPDFPageObj_Destroy(page_object); + + // Re-open the file and check the page object count is still 2. + OpenSavedDocument(); + FPDF_PAGE saved_page = LoadSavedPage(0); + + EXPECT_EQ(2, FPDFPage_CountObjects(saved_page)); + { + ScopedFPDFBitmap page_bitmap = RenderPageWithFlags(saved_page, nullptr, 0); + CompareBitmap(page_bitmap.get(), 200, 200, kGreetingsRemovedMD5); + } + + CloseSavedPage(saved_page); + CloseSavedDocument(); +} + // TODO(pdfium:1051): Extend this test to remove some elements and verify // saving works. TEST_F(FPDFEditEmbeddertest, GetContentStream) { @@ -713,6 +805,39 @@ TEST_F(FPDFEditEmbeddertest, InsertPageObjectAndSave) { CloseSavedDocument(); } +TEST_F(FPDFEditEmbeddertest, InsertPageObjectEditAndSave) { + // Load document with some text. + EXPECT_TRUE(OpenDocument("hello_world.pdf")); + FPDF_PAGE page = LoadPage(0); + ASSERT_TRUE(page); + + // Add a red rectangle. + ASSERT_EQ(2, FPDFPage_CountObjects(page)); + FPDF_PAGEOBJECT red_rect = FPDFPageObj_CreateNewRect(20, 100, 50, 50); + EXPECT_TRUE(FPDFPath_SetFillColor(red_rect, 255, 100, 100, 255)); + EXPECT_TRUE(FPDFPath_SetDrawMode(red_rect, FPDF_FILLMODE_ALTERNATE, 0)); + FPDFPage_InsertObject(page, red_rect); + + // Verify the red rectangle was added. + ASSERT_EQ(3, FPDFPage_CountObjects(page)); + + // Generate content but change it again + EXPECT_TRUE(FPDFPage_GenerateContent(page)); + EXPECT_TRUE(FPDFPath_SetFillColor(red_rect, 255, 0, 0, 255)); + + // Save the file + EXPECT_TRUE(FPDFPage_GenerateContent(page)); + EXPECT_TRUE(FPDF_SaveAsCopy(document(), this, 0)); + UnloadPage(page); + + // Re-open the file and check the page object count is still 3. + OpenSavedDocument(); + FPDF_PAGE saved_page = LoadSavedPage(0); + EXPECT_EQ(3, FPDFPage_CountObjects(saved_page)); + CloseSavedPage(saved_page); + CloseSavedDocument(); +} + TEST_F(FPDFEditEmbeddertest, AddAndRemovePaths) { // Start with a blank page. FPDF_PAGE page = FPDFPage_New(CreateNewDocument(), 0, 612, 792); diff --git a/testing/resources/hello_world_split_streams.in b/testing/resources/hello_world_split_streams.in new file mode 100644 index 0000000000..c7a29352f1 --- /dev/null +++ b/testing/resources/hello_world_split_streams.in @@ -0,0 +1,63 @@ +{{header}} +{{object 1 0}} << + /Type /Catalog + /Pages 2 0 R +>> +endobj +{{object 2 0}} << + /Type /Pages + /MediaBox [ 0 0 200 200 ] + /Count 1 + /Kids [ 3 0 R ] +>> +endobj +{{object 3 0}} << + /Type /Page + /Parent 2 0 R + /Resources << + /Font << + /F1 4 0 R + /F2 5 0 R + >> + >> + /Contents [6 0 R 7 0 R] +>> +endobj +{{object 4 0}} << + /Type /Font + /Subtype /Type1 + /BaseFont /Times-Roman +>> +endobj +{{object 5 0}} << + /Type /Font + /Subtype /Type1 + /BaseFont /Helvetica +>> +endobj +{{object 6 0}} << +>> +stream +BT +20 50 Td +/F1 12 Tf +(Hello, world!) Tj +0 50 Td +/F2 16 Tf +(Goodbye, world!) Tj +ET +endstream +endobj +{{object 7 0}} << +>> +stream +BT +20 50 Td +/F1 12 Tf +(Greetings, world!) Tj +endstream +endobj +{{xref}} +{{trailer}} +{{startxref}} +%%EOF diff --git a/testing/resources/hello_world_split_streams.pdf b/testing/resources/hello_world_split_streams.pdf new file mode 100644 index 0000000000..969fccf7de --- /dev/null +++ b/testing/resources/hello_world_split_streams.pdf @@ -0,0 +1,77 @@ +%PDF-1.7 +% ò¤ô +1 0 obj << + /Type /Catalog + /Pages 2 0 R +>> +endobj +2 0 obj << + /Type /Pages + /MediaBox [ 0 0 200 200 ] + /Count 1 + /Kids [ 3 0 R ] +>> +endobj +3 0 obj << + /Type /Page + /Parent 2 0 R + /Resources << + /Font << + /F1 4 0 R + /F2 5 0 R + >> + >> + /Contents [6 0 R 7 0 R] +>> +endobj +4 0 obj << + /Type /Font + /Subtype /Type1 + /BaseFont /Times-Roman +>> +endobj +5 0 obj << + /Type /Font + /Subtype /Type1 + /BaseFont /Helvetica +>> +endobj +6 0 obj << +>> +stream +BT +20 50 Td +/F1 12 Tf +(Hello, world!) Tj +0 50 Td +/F2 16 Tf +(Goodbye, world!) Tj +ET +endstream +endobj +7 0 obj << +>> +stream +BT +20 50 Td +/F1 12 Tf +(Greetings, world!) Tj +endstream +endobj +xref +0 8 +0000000000 65535 f +0000000015 00000 n +0000000068 00000 n +0000000161 00000 n +0000000311 00000 n +0000000389 00000 n +0000000465 00000 n +0000000586 00000 n +trailer << + /Root 1 0 R + /Size 8 +>> +startxref +669 +%%EOF |