summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/mupdf/pdf/page.h36
-rw-r--r--source/pdf/pdf-clean.c7
-rw-r--r--source/pdf/pdf-write.c2
3 files changed, 37 insertions, 8 deletions
diff --git a/include/mupdf/pdf/page.h b/include/mupdf/pdf/page.h
index ab4a8ce3..857e8994 100644
--- a/include/mupdf/pdf/page.h
+++ b/include/mupdf/pdf/page.h
@@ -72,11 +72,36 @@ void pdf_run_page_with_usage(fz_context *ctx, pdf_document *doc, pdf_page *page,
void pdf_run_page_contents(fz_context *ctx, pdf_page *page, fz_device *dev, const fz_matrix *ctm, fz_cookie *cookie);
/*
- pdf_clean_page_contents: Clean a loaded pages rendering operations.
- This involves filtering the PDF operators used to avoid (some cases
+ pdf_page_contents_process_fn: A function used for processing the
+ cleaned page contents/resources gathered as part of
+ pdf_clean_page_contents.
+
+ arg: An opaque arg specific to the particular function.
+
+ buffer: A buffer holding the page contents.
+
+ res: A pdf_obj holding the page resources.
+*/
+typedef void (pdf_page_contents_process_fn)(void *arg, fz_buffer *buffer, pdf_obj *res);
+
+/*
+ pdf_clean_page_contents: Clean a loaded pages rendering operations,
+ with an optional post processing step.
+
+ Firstly, this filters the PDF operators used to avoid (some cases
of) repetition, and leaves the page in a balanced state with an
- unchanged top level matrix etc. Just the main page contents without
- the annotations
+ unchanged top level matrix etc. At the same time, the resources
+ used by the page contents are collected.
+
+ Next, the resources themselves are cleaned (as appropriate) in the
+ same way.
+
+ Next, an optional post processing stage is called.
+
+ Finally, the page contents and resources in the documents page tree
+ are replaced by these processed versions.
+
+ Annotations remain unaffected.
page: A page loaded by pdf_load_page.
@@ -85,7 +110,8 @@ void pdf_run_page_contents(fz_context *ctx, pdf_page *page, fz_device *dev, cons
cookie: A pointer to an optional fz_cookie structure that can be used
to track progress, collect errors etc.
*/
-void pdf_clean_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page, fz_cookie *cookie);
+void pdf_clean_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page, fz_cookie *cookie,
+ pdf_page_contents_process_fn *proc, void *proc_arg);
/*
Presentation interface.
diff --git a/source/pdf/pdf-clean.c b/source/pdf/pdf-clean.c
index 89926832..b88bbb71 100644
--- a/source/pdf/pdf-clean.c
+++ b/source/pdf/pdf-clean.c
@@ -117,7 +117,7 @@ pdf_clean_type3(fz_context *ctx, pdf_document *doc, pdf_obj *obj, pdf_obj *orig_
}
}
-void pdf_clean_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page, fz_cookie *cookie)
+void pdf_clean_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page, fz_cookie *cookie, pdf_page_contents_process_fn *proc_fn, void *proc_arg)
{
pdf_process process, process2;
fz_buffer *buffer = fz_new_buffer(ctx, 1024);
@@ -166,7 +166,6 @@ void pdf_clean_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page,
num = pdf_to_num(ctx, contents);
pdf_dict_dels(ctx, contents, "Filter");
}
- pdf_update_stream(ctx, doc, num, buffer);
/* Now deal with resources. The spec allows for Type3 fonts and form
* XObjects to omit a resource dictionary and look in the parent.
@@ -260,6 +259,10 @@ void pdf_clean_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page,
/* Properties - no cleaning possible. */
+ if (proc_fn)
+ (*proc_fn)(proc_arg, buffer, res);
+
+ pdf_update_stream(ctx, doc, num, buffer);
pdf_drop_obj(ctx, page->resources);
ref = pdf_new_ref(ctx, doc, res);
page->resources = pdf_keep_obj(ctx, ref);
diff --git a/source/pdf/pdf-write.c b/source/pdf/pdf-write.c
index 1016e8af..6f5088c6 100644
--- a/source/pdf/pdf-write.c
+++ b/source/pdf/pdf-write.c
@@ -2533,7 +2533,7 @@ static void sanitise(fz_context *ctx, pdf_document *doc)
{
pdf_page *page = pdf_load_page(ctx, doc, i);
- pdf_clean_page_contents(ctx, doc, page, NULL);
+ pdf_clean_page_contents(ctx, doc, page, NULL, NULL, NULL);
fz_drop_page(ctx, &page->super);
}