summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHenrique Nakashima <hnakashima@chromium.org>2018-07-11 21:19:22 +0000
committerChromium commit bot <commit-bot@chromium.org>2018-07-11 21:19:22 +0000
commitb4bcf69210719810ca563b9f8c0179719e80d212 (patch)
tree1a809ee9a2c7b20db29e33918b6eecfa0aae8a2b
parent10a7ddb596f0089ba12d0db29b5752a61919a208 (diff)
downloadpdfium-b4bcf69210719810ca563b9f8c0179719e80d212.tar.xz
Write marked content operators when generating a stream.
The marked content operators are BMC, BDC and EMC. In the case of BDC, it is preceded by a direct dict or a property name. Bug: pdfium:1118 Change-Id: I3ee736ff7be3e7d7dde55ef581af3444a325e887 Reviewed-on: https://pdfium-review.googlesource.com/37470 Reviewed-by: Lei Zhang <thestig@chromium.org> Commit-Queue: Henrique Nakashima <hnakashima@chromium.org>
-rw-r--r--BUILD.gn2
-rw-r--r--core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp71
-rw-r--r--core/fpdfapi/edit/cpdf_pagecontentgenerator.h6
-rw-r--r--core/fpdfapi/edit/cpdf_stringarchivestream.cpp35
-rw-r--r--core/fpdfapi/edit/cpdf_stringarchivestream.h26
-rw-r--r--core/fpdfapi/page/cpdf_contentmark.cpp15
-rw-r--r--core/fpdfapi/page/cpdf_contentmark.h1
-rw-r--r--fpdfsdk/fpdf_edit_embeddertest.cpp85
8 files changed, 239 insertions, 2 deletions
diff --git a/BUILD.gn b/BUILD.gn
index 222784f36e..e571772915 100644
--- a/BUILD.gn
+++ b/BUILD.gn
@@ -474,6 +474,8 @@ jumbo_static_library("fpdfapi") {
"core/fpdfapi/edit/cpdf_pagecontentgenerator.h",
"core/fpdfapi/edit/cpdf_pagecontentmanager.cpp",
"core/fpdfapi/edit/cpdf_pagecontentmanager.h",
+ "core/fpdfapi/edit/cpdf_stringarchivestream.cpp",
+ "core/fpdfapi/edit/cpdf_stringarchivestream.h",
"core/fpdfapi/font/cfx_cttgsubtable.cpp",
"core/fpdfapi/font/cfx_cttgsubtable.h",
"core/fpdfapi/font/cfx_stockfontarray.cpp",
diff --git a/core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp b/core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp
index 88f14b2ce4..f6a941d200 100644
--- a/core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp
+++ b/core/fpdfapi/edit/cpdf_pagecontentgenerator.cpp
@@ -13,7 +13,9 @@
#include <utility>
#include "core/fpdfapi/edit/cpdf_pagecontentmanager.h"
+#include "core/fpdfapi/edit/cpdf_stringarchivestream.h"
#include "core/fpdfapi/font/cpdf_font.h"
+#include "core/fpdfapi/page/cpdf_contentmark.h"
#include "core/fpdfapi/page/cpdf_docpagedata.h"
#include "core/fpdfapi/page/cpdf_image.h"
#include "core/fpdfapi/page/cpdf_imageobject.h"
@@ -94,6 +96,9 @@ CPDF_PageContentGenerator::GenerateModifiedStreams() {
// Start regenerating dirty streams.
std::map<int32_t, std::unique_ptr<std::ostringstream>> streams;
std::set<int32_t> empty_streams;
+ std::unique_ptr<const CPDF_ContentMark> empty_content_mark =
+ pdfium::MakeUnique<CPDF_ContentMark>();
+ std::map<int32_t, const CPDF_ContentMark*> current_content_mark;
for (int32_t dirty_stream : all_dirty_streams) {
std::unique_ptr<std::ostringstream> buf =
@@ -108,6 +113,7 @@ CPDF_PageContentGenerator::GenerateModifiedStreams() {
streams[dirty_stream] = std::move(buf);
empty_streams.insert(dirty_stream);
+ current_content_mark[dirty_stream] = empty_content_mark.get();
}
// Process the page objects, write into each dirty stream.
@@ -119,6 +125,8 @@ CPDF_PageContentGenerator::GenerateModifiedStreams() {
std::ostringstream* buf = it->second.get();
empty_streams.erase(stream_index);
+ current_content_mark[stream_index] = ProcessContentMarks(
+ buf, pPageObj.Get(), current_content_mark[stream_index]);
ProcessPageObject(buf, pPageObj.Get());
}
@@ -129,6 +137,8 @@ CPDF_PageContentGenerator::GenerateModifiedStreams() {
// Clear to show that this stream needs to be deleted.
buf->str("");
} else {
+ FinishMarks(buf, current_content_mark[dirty_stream]);
+
// Return graphics to original state
*buf << "Q\n";
}
@@ -201,13 +211,19 @@ ByteString CPDF_PageContentGenerator::RealizeResource(
bool CPDF_PageContentGenerator::ProcessPageObjects(std::ostringstream* buf) {
bool bDirty = false;
+ std::unique_ptr<const CPDF_ContentMark> empty_content_mark =
+ pdfium::MakeUnique<CPDF_ContentMark>();
+ const CPDF_ContentMark* content_mark = empty_content_mark.get();
+
for (auto& pPageObj : m_pageObjects) {
if (m_pObjHolder->IsPage() && !pPageObj->IsDirty())
continue;
bDirty = true;
+ content_mark = ProcessContentMarks(buf, pPageObj.Get(), content_mark);
ProcessPageObject(buf, pPageObj.Get());
}
+ FinishMarks(buf, content_mark);
return bDirty;
}
@@ -219,6 +235,61 @@ void CPDF_PageContentGenerator::UpdateStreamlessPageObjects(
}
}
+const CPDF_ContentMark* CPDF_PageContentGenerator::ProcessContentMarks(
+ std::ostringstream* buf,
+ const CPDF_PageObject* pPageObj,
+ const CPDF_ContentMark* pPrev) {
+ const CPDF_ContentMark* pNext = &pPageObj->m_ContentMark;
+
+ size_t first_different = pPrev->FindFirstDifference(pNext);
+
+ // Close all marks that are in prev but not in next.
+ // Technically we should iterate backwards to close from the top to the
+ // bottom, but since the EMC operators do not identify which mark they are
+ // closing, it does not matter.
+ for (size_t i = first_different; i < pPrev->CountItems(); ++i)
+ *buf << "EMC\n";
+
+ // Open all marks that are in next but not in prev.
+ for (size_t i = first_different; i < pNext->CountItems(); ++i) {
+ const CPDF_ContentMarkItem* item = pNext->GetItem(i);
+
+ // Write mark tag.
+ *buf << "/" << item->GetName() << " ";
+
+ // If there are no parameters, write a BMC (begin marked content) operator.
+ if (item->GetParamType() == CPDF_ContentMarkItem::None) {
+ *buf << "BMC\n";
+ continue;
+ }
+
+ // If there are parameters, write properties, direct or indirect.
+ if (item->GetParamType() == CPDF_ContentMarkItem::DirectDict) {
+ CPDF_StringArchiveStream archive_stream(buf);
+ item->GetParam()->WriteTo(&archive_stream);
+ *buf << " ";
+ } else {
+ ASSERT(item->GetParamType() == CPDF_ContentMarkItem::PropertiesDict);
+ *buf << "/" << item->GetPropertyName() << " ";
+ }
+
+ // Write BDC (begin dictionary content) operator.
+ *buf << "BDC\n";
+ }
+
+ return pNext;
+}
+
+void CPDF_PageContentGenerator::FinishMarks(
+ std::ostringstream* buf,
+ const CPDF_ContentMark* pContentMark) {
+ // Technically we should iterate backwards to close from the top to the
+ // bottom, but since the EMC operators do not identify which mark they are
+ // closing, it does not matter.
+ for (size_t i = 0; i < pContentMark->CountItems(); ++i)
+ *buf << "EMC\n";
+}
+
void CPDF_PageContentGenerator::ProcessPageObject(std::ostringstream* buf,
CPDF_PageObject* pPageObj) {
if (CPDF_ImageObject* pImageObject = pPageObj->AsImage())
diff --git a/core/fpdfapi/edit/cpdf_pagecontentgenerator.h b/core/fpdfapi/edit/cpdf_pagecontentgenerator.h
index 13b8431f18..029a77935b 100644
--- a/core/fpdfapi/edit/cpdf_pagecontentgenerator.h
+++ b/core/fpdfapi/edit/cpdf_pagecontentgenerator.h
@@ -16,6 +16,7 @@
#include "core/fxcrt/fx_system.h"
#include "core/fxcrt/unowned_ptr.h"
+class CPDF_ContentMark;
class CPDF_Document;
class CPDF_ImageObject;
class CPDF_Object;
@@ -44,6 +45,11 @@ class CPDF_PageContentGenerator {
ByteString GetOrCreateDefaultGraphics() const;
ByteString RealizeResource(const CPDF_Object* pResource,
const ByteString& bsType) const;
+ const CPDF_ContentMark* ProcessContentMarks(std::ostringstream* buf,
+ const CPDF_PageObject* pPageObj,
+ const CPDF_ContentMark* pPrev);
+ void FinishMarks(std::ostringstream* buf,
+ const CPDF_ContentMark* pContentMark);
// Returns a map from content stream index to new stream data. Unmodified
// streams are not touched.
diff --git a/core/fpdfapi/edit/cpdf_stringarchivestream.cpp b/core/fpdfapi/edit/cpdf_stringarchivestream.cpp
new file mode 100644
index 0000000000..328d6a217e
--- /dev/null
+++ b/core/fpdfapi/edit/cpdf_stringarchivestream.cpp
@@ -0,0 +1,35 @@
+// Copyright 2018 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "core/fpdfapi/edit/cpdf_stringarchivestream.h"
+
+CPDF_StringArchiveStream::CPDF_StringArchiveStream(std::ostringstream* stream)
+ : stream_(stream) {}
+
+CPDF_StringArchiveStream::~CPDF_StringArchiveStream() {}
+
+bool CPDF_StringArchiveStream::WriteByte(uint8_t byte) {
+ NOTREACHED();
+ return false;
+}
+
+bool CPDF_StringArchiveStream::WriteDWord(uint32_t i) {
+ NOTREACHED();
+ return false;
+}
+
+FX_FILESIZE CPDF_StringArchiveStream::CurrentOffset() const {
+ NOTREACHED();
+ return false;
+}
+
+bool CPDF_StringArchiveStream::WriteBlock(const void* pData, size_t size) {
+ stream_->write(static_cast<const char*>(pData), size);
+ return true;
+}
+
+bool CPDF_StringArchiveStream::WriteString(const ByteStringView& str) {
+ stream_->write(str.unterminated_c_str(), str.GetLength());
+ return true;
+}
diff --git a/core/fpdfapi/edit/cpdf_stringarchivestream.h b/core/fpdfapi/edit/cpdf_stringarchivestream.h
new file mode 100644
index 0000000000..bb5481e376
--- /dev/null
+++ b/core/fpdfapi/edit/cpdf_stringarchivestream.h
@@ -0,0 +1,26 @@
+// Copyright 2018 PDFium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CORE_FPDFAPI_EDIT_CPDF_STRINGARCHIVESTREAM_H_
+#define CORE_FPDFAPI_EDIT_CPDF_STRINGARCHIVESTREAM_H_
+
+#include "core/fxcrt/fx_stream.h"
+
+class CPDF_StringArchiveStream : public IFX_ArchiveStream {
+ public:
+ explicit CPDF_StringArchiveStream(std::ostringstream* stream);
+ ~CPDF_StringArchiveStream() override;
+
+ // IFX_ArchiveStream
+ bool WriteByte(uint8_t byte) override;
+ bool WriteDWord(uint32_t i) override;
+ FX_FILESIZE CurrentOffset() const override;
+ bool WriteBlock(const void* pData, size_t size) override;
+ bool WriteString(const ByteStringView& str) override;
+
+ private:
+ std::ostringstream* stream_;
+};
+
+#endif // CORE_FPDFAPI_EDIT_CPDF_STRINGARCHIVESTREAM_H_
diff --git a/core/fpdfapi/page/cpdf_contentmark.cpp b/core/fpdfapi/page/cpdf_contentmark.cpp
index 29d1bba210..1ff567d9e6 100644
--- a/core/fpdfapi/page/cpdf_contentmark.cpp
+++ b/core/fpdfapi/page/cpdf_contentmark.cpp
@@ -6,6 +6,7 @@
#include "core/fpdfapi/page/cpdf_contentmark.h"
+#include <algorithm>
#include <utility>
#include "core/fpdfapi/parser/cpdf_dictionary.h"
@@ -79,6 +80,20 @@ void CPDF_ContentMark::DeleteLastMark() {
m_pMarkData.Reset();
}
+size_t CPDF_ContentMark::FindFirstDifference(
+ const CPDF_ContentMark* other) const {
+ if (m_pMarkData == other->m_pMarkData)
+ return CountItems();
+
+ size_t min_len = std::min(CountItems(), other->CountItems());
+
+ for (size_t i = 0; i < min_len; ++i) {
+ if (GetItem(i) != other->GetItem(i))
+ return i;
+ }
+ return min_len;
+}
+
CPDF_ContentMark::MarkData::MarkData() {}
CPDF_ContentMark::MarkData::MarkData(const MarkData& src)
diff --git a/core/fpdfapi/page/cpdf_contentmark.h b/core/fpdfapi/page/cpdf_contentmark.h
index 33180333af..8bbae52418 100644
--- a/core/fpdfapi/page/cpdf_contentmark.h
+++ b/core/fpdfapi/page/cpdf_contentmark.h
@@ -35,6 +35,7 @@ class CPDF_ContentMark {
CPDF_Dictionary* pDict,
const ByteString& property_name);
void DeleteLastMark();
+ size_t FindFirstDifference(const CPDF_ContentMark* other) const;
private:
class MarkData : public Retainable {
diff --git a/fpdfsdk/fpdf_edit_embeddertest.cpp b/fpdfsdk/fpdf_edit_embeddertest.cpp
index e169c46b6d..52cc9fed86 100644
--- a/fpdfsdk/fpdf_edit_embeddertest.cpp
+++ b/fpdfsdk/fpdf_edit_embeddertest.cpp
@@ -694,6 +694,68 @@ TEST_F(FPDFEditEmbeddertest, RemoveMarkedObjectsPrime) {
UnloadPage(page);
}
+TEST_F(FPDFEditEmbeddertest, MaintainMarkedObjects) {
+ // Load document with some text.
+ EXPECT_TRUE(OpenDocument("text_in_page_marked.pdf"));
+ FPDF_PAGE page = LoadPage(0);
+ ASSERT_TRUE(page);
+
+ // Iterate over all objects, counting the number of times each content mark
+ // name appears.
+ CheckMarkCounts(page, 1, 19, 8, 4, 9, 1);
+
+ // Remove first page object.
+ FPDF_PAGEOBJECT page_object = FPDFPage_GetObject(page, 0);
+ EXPECT_TRUE(FPDFPage_RemoveObject(page, page_object));
+ FPDFPageObj_Destroy(page_object);
+
+ CheckMarkCounts(page, 2, 18, 8, 3, 9, 1);
+
+ EXPECT_TRUE(FPDFPage_GenerateContent(page));
+ EXPECT_TRUE(FPDF_SaveAsCopy(document(), this, 0));
+
+ UnloadPage(page);
+
+ OpenSavedDocument();
+ FPDF_PAGE saved_page = LoadSavedPage(0);
+
+ CheckMarkCounts(saved_page, 2, 18, 8, 3, 9, 1);
+
+ CloseSavedPage(saved_page);
+ CloseSavedDocument();
+}
+
+TEST_F(FPDFEditEmbeddertest, MaintainIndirectMarkedObjects) {
+ // Load document with some text.
+ EXPECT_TRUE(OpenDocument("text_in_page_marked_indirect.pdf"));
+ FPDF_PAGE page = LoadPage(0);
+ ASSERT_TRUE(page);
+
+ // Iterate over all objects, counting the number of times each content mark
+ // name appears.
+ CheckMarkCounts(page, 1, 19, 8, 4, 9, 1);
+
+ // Remove first page object.
+ FPDF_PAGEOBJECT page_object = FPDFPage_GetObject(page, 0);
+ EXPECT_TRUE(FPDFPage_RemoveObject(page, page_object));
+ FPDFPageObj_Destroy(page_object);
+
+ CheckMarkCounts(page, 2, 18, 8, 3, 9, 1);
+
+ EXPECT_TRUE(FPDFPage_GenerateContent(page));
+ EXPECT_TRUE(FPDF_SaveAsCopy(document(), this, 0));
+
+ UnloadPage(page);
+
+ OpenSavedDocument();
+ FPDF_PAGE saved_page = LoadSavedPage(0);
+
+ CheckMarkCounts(saved_page, 2, 18, 8, 3, 9, 1);
+
+ CloseSavedPage(saved_page);
+ CloseSavedDocument();
+}
+
TEST_F(FPDFEditEmbeddertest, RemoveExistingPageObject) {
// Load document with some text.
EXPECT_TRUE(OpenDocument("hello_world.pdf"));
@@ -2160,10 +2222,29 @@ TEST_F(FPDFEditEmbeddertest, AddMarkedText) {
CompareBitmap(page_bitmap.get(), 612, 792, md5);
}
+ // Now save the result.
+ EXPECT_EQ(1, FPDFPage_CountObjects(page));
+ EXPECT_TRUE(FPDFPage_GenerateContent(page));
+ EXPECT_TRUE(FPDF_SaveAsCopy(document(), this, 0));
+
FPDF_ClosePage(page);
- // TODO(pdfium:1118): Save, then re-open the file and check the changes were
- // kept in the saved .pdf.
+ // Re-open the file and check the changes were kept in the saved .pdf.
+ OpenSavedDocument();
+ FPDF_PAGE saved_page = LoadSavedPage(0);
+ EXPECT_EQ(1, FPDFPage_CountObjects(saved_page));
+
+ text_object = FPDFPage_GetObject(saved_page, 0);
+ EXPECT_TRUE(text_object);
+ EXPECT_EQ(1, FPDFPageObj_CountMarks(text_object));
+ mark = FPDFPageObj_GetMark(text_object, 0);
+ EXPECT_TRUE(mark);
+ EXPECT_GT(FPDFPageObjMark_GetName(mark, buffer, 256), 0u);
+ name = GetPlatformWString(reinterpret_cast<unsigned short*>(buffer));
+ EXPECT_EQ(L"TestMarkName", name);
+
+ CloseSavedPage(saved_page);
+ CloseSavedDocument();
}
TEST_F(FPDFEditEmbeddertest, ExtractImageBitmap) {