From b4bcf69210719810ca563b9f8c0179719e80d212 Mon Sep 17 00:00:00 2001 From: Henrique Nakashima Date: Wed, 11 Jul 2018 21:19:22 +0000 Subject: Write marked content operators when generating a stream. The marked content operators are BMC, BDC and EMC. In the case of BDC, it is preceded by a direct dict or a property name. Bug: pdfium:1118 Change-Id: I3ee736ff7be3e7d7dde55ef581af3444a325e887 Reviewed-on: https://pdfium-review.googlesource.com/37470 Reviewed-by: Lei Zhang Commit-Queue: Henrique Nakashima --- BUILD.gn | 2 + core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp | 71 +++++++++++++++++++++ core/fpdfapi/edit/cpdf_pagecontentgenerator.h | 6 ++ core/fpdfapi/edit/cpdf_stringarchivestream.cpp | 35 ++++++++++ core/fpdfapi/edit/cpdf_stringarchivestream.h | 26 ++++++++ core/fpdfapi/page/cpdf_contentmark.cpp | 15 +++++ core/fpdfapi/page/cpdf_contentmark.h | 1 + fpdfsdk/fpdf_edit_embeddertest.cpp | 85 ++++++++++++++++++++++++- 8 files changed, 239 insertions(+), 2 deletions(-) create mode 100644 core/fpdfapi/edit/cpdf_stringarchivestream.cpp create mode 100644 core/fpdfapi/edit/cpdf_stringarchivestream.h diff --git a/BUILD.gn b/BUILD.gn index 222784f36e..e571772915 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -474,6 +474,8 @@ jumbo_static_library("fpdfapi") { "core/fpdfapi/edit/cpdf_pagecontentgenerator.h", "core/fpdfapi/edit/cpdf_pagecontentmanager.cpp", "core/fpdfapi/edit/cpdf_pagecontentmanager.h", + "core/fpdfapi/edit/cpdf_stringarchivestream.cpp", + "core/fpdfapi/edit/cpdf_stringarchivestream.h", "core/fpdfapi/font/cfx_cttgsubtable.cpp", "core/fpdfapi/font/cfx_cttgsubtable.h", "core/fpdfapi/font/cfx_stockfontarray.cpp", diff --git a/core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp b/core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp index 88f14b2ce4..f6a941d200 100644 --- a/core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp +++ b/core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp @@ -13,7 +13,9 @@ #include #include "core/fpdfapi/edit/cpdf_pagecontentmanager.h" +#include "core/fpdfapi/edit/cpdf_stringarchivestream.h" #include "core/fpdfapi/font/cpdf_font.h" +#include "core/fpdfapi/page/cpdf_contentmark.h" #include "core/fpdfapi/page/cpdf_docpagedata.h" #include "core/fpdfapi/page/cpdf_image.h" #include "core/fpdfapi/page/cpdf_imageobject.h" @@ -94,6 +96,9 @@ CPDF_PageContentGenerator::GenerateModifiedStreams() { // Start regenerating dirty streams. std::map> streams; std::set empty_streams; + std::unique_ptr empty_content_mark = + pdfium::MakeUnique(); + std::map current_content_mark; for (int32_t dirty_stream : all_dirty_streams) { std::unique_ptr buf = @@ -108,6 +113,7 @@ CPDF_PageContentGenerator::GenerateModifiedStreams() { streams[dirty_stream] = std::move(buf); empty_streams.insert(dirty_stream); + current_content_mark[dirty_stream] = empty_content_mark.get(); } // Process the page objects, write into each dirty stream. @@ -119,6 +125,8 @@ CPDF_PageContentGenerator::GenerateModifiedStreams() { std::ostringstream* buf = it->second.get(); empty_streams.erase(stream_index); + current_content_mark[stream_index] = ProcessContentMarks( + buf, pPageObj.Get(), current_content_mark[stream_index]); ProcessPageObject(buf, pPageObj.Get()); } @@ -129,6 +137,8 @@ CPDF_PageContentGenerator::GenerateModifiedStreams() { // Clear to show that this stream needs to be deleted. buf->str(""); } else { + FinishMarks(buf, current_content_mark[dirty_stream]); + // Return graphics to original state *buf << "Q\n"; } @@ -201,13 +211,19 @@ ByteString CPDF_PageContentGenerator::RealizeResource( bool CPDF_PageContentGenerator::ProcessPageObjects(std::ostringstream* buf) { bool bDirty = false; + std::unique_ptr empty_content_mark = + pdfium::MakeUnique(); + const CPDF_ContentMark* content_mark = empty_content_mark.get(); + for (auto& pPageObj : m_pageObjects) { if (m_pObjHolder->IsPage() && !pPageObj->IsDirty()) continue; bDirty = true; + content_mark = ProcessContentMarks(buf, pPageObj.Get(), content_mark); ProcessPageObject(buf, pPageObj.Get()); } + FinishMarks(buf, content_mark); return bDirty; } @@ -219,6 +235,61 @@ void CPDF_PageContentGenerator::UpdateStreamlessPageObjects( } } +const CPDF_ContentMark* CPDF_PageContentGenerator::ProcessContentMarks( + std::ostringstream* buf, + const CPDF_PageObject* pPageObj, + const CPDF_ContentMark* pPrev) { + const CPDF_ContentMark* pNext = &pPageObj->m_ContentMark; + + size_t first_different = pPrev->FindFirstDifference(pNext); + + // Close all marks that are in prev but not in next. + // Technically we should iterate backwards to close from the top to the + // bottom, but since the EMC operators do not identify which mark they are + // closing, it does not matter. + for (size_t i = first_different; i < pPrev->CountItems(); ++i) + *buf << "EMC\n"; + + // Open all marks that are in next but not in prev. + for (size_t i = first_different; i < pNext->CountItems(); ++i) { + const CPDF_ContentMarkItem* item = pNext->GetItem(i); + + // Write mark tag. + *buf << "/" << item->GetName() << " "; + + // If there are no parameters, write a BMC (begin marked content) operator. + if (item->GetParamType() == CPDF_ContentMarkItem::None) { + *buf << "BMC\n"; + continue; + } + + // If there are parameters, write properties, direct or indirect. + if (item->GetParamType() == CPDF_ContentMarkItem::DirectDict) { + CPDF_StringArchiveStream archive_stream(buf); + item->GetParam()->WriteTo(&archive_stream); + *buf << " "; + } else { + ASSERT(item->GetParamType() == CPDF_ContentMarkItem::PropertiesDict); + *buf << "/" << item->GetPropertyName() << " "; + } + + // Write BDC (begin dictionary content) operator. + *buf << "BDC\n"; + } + + return pNext; +} + +void CPDF_PageContentGenerator::FinishMarks( + std::ostringstream* buf, + const CPDF_ContentMark* pContentMark) { + // Technically we should iterate backwards to close from the top to the + // bottom, but since the EMC operators do not identify which mark they are + // closing, it does not matter. + for (size_t i = 0; i < pContentMark->CountItems(); ++i) + *buf << "EMC\n"; +} + void CPDF_PageContentGenerator::ProcessPageObject(std::ostringstream* buf, CPDF_PageObject* pPageObj) { if (CPDF_ImageObject* pImageObject = pPageObj->AsImage()) diff --git a/core/fpdfapi/edit/cpdf_pagecontentgenerator.h b/core/fpdfapi/edit/cpdf_pagecontentgenerator.h index 13b8431f18..029a77935b 100644 --- a/core/fpdfapi/edit/cpdf_pagecontentgenerator.h +++ b/core/fpdfapi/edit/cpdf_pagecontentgenerator.h @@ -16,6 +16,7 @@ #include "core/fxcrt/fx_system.h" #include "core/fxcrt/unowned_ptr.h" +class CPDF_ContentMark; class CPDF_Document; class CPDF_ImageObject; class CPDF_Object; @@ -44,6 +45,11 @@ class CPDF_PageContentGenerator { ByteString GetOrCreateDefaultGraphics() const; ByteString RealizeResource(const CPDF_Object* pResource, const ByteString& bsType) const; + const CPDF_ContentMark* ProcessContentMarks(std::ostringstream* buf, + const CPDF_PageObject* pPageObj, + const CPDF_ContentMark* pPrev); + void FinishMarks(std::ostringstream* buf, + const CPDF_ContentMark* pContentMark); // Returns a map from content stream index to new stream data. Unmodified // streams are not touched. diff --git a/core/fpdfapi/edit/cpdf_stringarchivestream.cpp b/core/fpdfapi/edit/cpdf_stringarchivestream.cpp new file mode 100644 index 0000000000..328d6a217e --- /dev/null +++ b/core/fpdfapi/edit/cpdf_stringarchivestream.cpp @@ -0,0 +1,35 @@ +// Copyright 2018 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "core/fpdfapi/edit/cpdf_stringarchivestream.h" + +CPDF_StringArchiveStream::CPDF_StringArchiveStream(std::ostringstream* stream) + : stream_(stream) {} + +CPDF_StringArchiveStream::~CPDF_StringArchiveStream() {} + +bool CPDF_StringArchiveStream::WriteByte(uint8_t byte) { + NOTREACHED(); + return false; +} + +bool CPDF_StringArchiveStream::WriteDWord(uint32_t i) { + NOTREACHED(); + return false; +} + +FX_FILESIZE CPDF_StringArchiveStream::CurrentOffset() const { + NOTREACHED(); + return false; +} + +bool CPDF_StringArchiveStream::WriteBlock(const void* pData, size_t size) { + stream_->write(static_cast(pData), size); + return true; +} + +bool CPDF_StringArchiveStream::WriteString(const ByteStringView& str) { + stream_->write(str.unterminated_c_str(), str.GetLength()); + return true; +} diff --git a/core/fpdfapi/edit/cpdf_stringarchivestream.h b/core/fpdfapi/edit/cpdf_stringarchivestream.h new file mode 100644 index 0000000000..bb5481e376 --- /dev/null +++ b/core/fpdfapi/edit/cpdf_stringarchivestream.h @@ -0,0 +1,26 @@ +// Copyright 2018 PDFium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CORE_FPDFAPI_EDIT_CPDF_STRINGARCHIVESTREAM_H_ +#define CORE_FPDFAPI_EDIT_CPDF_STRINGARCHIVESTREAM_H_ + +#include "core/fxcrt/fx_stream.h" + +class CPDF_StringArchiveStream : public IFX_ArchiveStream { + public: + explicit CPDF_StringArchiveStream(std::ostringstream* stream); + ~CPDF_StringArchiveStream() override; + + // IFX_ArchiveStream + bool WriteByte(uint8_t byte) override; + bool WriteDWord(uint32_t i) override; + FX_FILESIZE CurrentOffset() const override; + bool WriteBlock(const void* pData, size_t size) override; + bool WriteString(const ByteStringView& str) override; + + private: + std::ostringstream* stream_; +}; + +#endif // CORE_FPDFAPI_EDIT_CPDF_STRINGARCHIVESTREAM_H_ diff --git a/core/fpdfapi/page/cpdf_contentmark.cpp b/core/fpdfapi/page/cpdf_contentmark.cpp index 29d1bba210..1ff567d9e6 100644 --- a/core/fpdfapi/page/cpdf_contentmark.cpp +++ b/core/fpdfapi/page/cpdf_contentmark.cpp @@ -6,6 +6,7 @@ #include "core/fpdfapi/page/cpdf_contentmark.h" +#include #include #include "core/fpdfapi/parser/cpdf_dictionary.h" @@ -79,6 +80,20 @@ void CPDF_ContentMark::DeleteLastMark() { m_pMarkData.Reset(); } +size_t CPDF_ContentMark::FindFirstDifference( + const CPDF_ContentMark* other) const { + if (m_pMarkData == other->m_pMarkData) + return CountItems(); + + size_t min_len = std::min(CountItems(), other->CountItems()); + + for (size_t i = 0; i < min_len; ++i) { + if (GetItem(i) != other->GetItem(i)) + return i; + } + return min_len; +} + CPDF_ContentMark::MarkData::MarkData() {} CPDF_ContentMark::MarkData::MarkData(const MarkData& src) diff --git a/core/fpdfapi/page/cpdf_contentmark.h b/core/fpdfapi/page/cpdf_contentmark.h index 33180333af..8bbae52418 100644 --- a/core/fpdfapi/page/cpdf_contentmark.h +++ b/core/fpdfapi/page/cpdf_contentmark.h @@ -35,6 +35,7 @@ class CPDF_ContentMark { CPDF_Dictionary* pDict, const ByteString& property_name); void DeleteLastMark(); + size_t FindFirstDifference(const CPDF_ContentMark* other) const; private: class MarkData : public Retainable { diff --git a/fpdfsdk/fpdf_edit_embeddertest.cpp b/fpdfsdk/fpdf_edit_embeddertest.cpp index e169c46b6d..52cc9fed86 100644 --- a/fpdfsdk/fpdf_edit_embeddertest.cpp +++ b/fpdfsdk/fpdf_edit_embeddertest.cpp @@ -694,6 +694,68 @@ TEST_F(FPDFEditEmbeddertest, RemoveMarkedObjectsPrime) { UnloadPage(page); } +TEST_F(FPDFEditEmbeddertest, MaintainMarkedObjects) { + // Load document with some text. + EXPECT_TRUE(OpenDocument("text_in_page_marked.pdf")); + FPDF_PAGE page = LoadPage(0); + ASSERT_TRUE(page); + + // Iterate over all objects, counting the number of times each content mark + // name appears. + CheckMarkCounts(page, 1, 19, 8, 4, 9, 1); + + // Remove first page object. + FPDF_PAGEOBJECT page_object = FPDFPage_GetObject(page, 0); + EXPECT_TRUE(FPDFPage_RemoveObject(page, page_object)); + FPDFPageObj_Destroy(page_object); + + CheckMarkCounts(page, 2, 18, 8, 3, 9, 1); + + EXPECT_TRUE(FPDFPage_GenerateContent(page)); + EXPECT_TRUE(FPDF_SaveAsCopy(document(), this, 0)); + + UnloadPage(page); + + OpenSavedDocument(); + FPDF_PAGE saved_page = LoadSavedPage(0); + + CheckMarkCounts(saved_page, 2, 18, 8, 3, 9, 1); + + CloseSavedPage(saved_page); + CloseSavedDocument(); +} + +TEST_F(FPDFEditEmbeddertest, MaintainIndirectMarkedObjects) { + // Load document with some text. + EXPECT_TRUE(OpenDocument("text_in_page_marked_indirect.pdf")); + FPDF_PAGE page = LoadPage(0); + ASSERT_TRUE(page); + + // Iterate over all objects, counting the number of times each content mark + // name appears. + CheckMarkCounts(page, 1, 19, 8, 4, 9, 1); + + // Remove first page object. + FPDF_PAGEOBJECT page_object = FPDFPage_GetObject(page, 0); + EXPECT_TRUE(FPDFPage_RemoveObject(page, page_object)); + FPDFPageObj_Destroy(page_object); + + CheckMarkCounts(page, 2, 18, 8, 3, 9, 1); + + EXPECT_TRUE(FPDFPage_GenerateContent(page)); + EXPECT_TRUE(FPDF_SaveAsCopy(document(), this, 0)); + + UnloadPage(page); + + OpenSavedDocument(); + FPDF_PAGE saved_page = LoadSavedPage(0); + + CheckMarkCounts(saved_page, 2, 18, 8, 3, 9, 1); + + CloseSavedPage(saved_page); + CloseSavedDocument(); +} + TEST_F(FPDFEditEmbeddertest, RemoveExistingPageObject) { // Load document with some text. EXPECT_TRUE(OpenDocument("hello_world.pdf")); @@ -2160,10 +2222,29 @@ TEST_F(FPDFEditEmbeddertest, AddMarkedText) { CompareBitmap(page_bitmap.get(), 612, 792, md5); } + // Now save the result. + EXPECT_EQ(1, FPDFPage_CountObjects(page)); + EXPECT_TRUE(FPDFPage_GenerateContent(page)); + EXPECT_TRUE(FPDF_SaveAsCopy(document(), this, 0)); + FPDF_ClosePage(page); - // TODO(pdfium:1118): Save, then re-open the file and check the changes were - // kept in the saved .pdf. + // Re-open the file and check the changes were kept in the saved .pdf. + OpenSavedDocument(); + FPDF_PAGE saved_page = LoadSavedPage(0); + EXPECT_EQ(1, FPDFPage_CountObjects(saved_page)); + + text_object = FPDFPage_GetObject(saved_page, 0); + EXPECT_TRUE(text_object); + EXPECT_EQ(1, FPDFPageObj_CountMarks(text_object)); + mark = FPDFPageObj_GetMark(text_object, 0); + EXPECT_TRUE(mark); + EXPECT_GT(FPDFPageObjMark_GetName(mark, buffer, 256), 0u); + name = GetPlatformWString(reinterpret_cast(buffer)); + EXPECT_EQ(L"TestMarkName", name); + + CloseSavedPage(saved_page); + CloseSavedDocument(); } TEST_F(FPDFEditEmbeddertest, ExtractImageBitmap) { -- cgit v1.2.3