diff options
-rw-r--r-- | include/mupdf/pdf/page.h | 36 | ||||
-rw-r--r-- | source/pdf/pdf-clean.c | 7 | ||||
-rw-r--r-- | source/pdf/pdf-write.c | 2 |
3 files changed, 37 insertions, 8 deletions
diff --git a/include/mupdf/pdf/page.h b/include/mupdf/pdf/page.h index ab4a8ce3..857e8994 100644 --- a/include/mupdf/pdf/page.h +++ b/include/mupdf/pdf/page.h @@ -72,11 +72,36 @@ void pdf_run_page_with_usage(fz_context *ctx, pdf_document *doc, pdf_page *page, void pdf_run_page_contents(fz_context *ctx, pdf_page *page, fz_device *dev, const fz_matrix *ctm, fz_cookie *cookie); /* - pdf_clean_page_contents: Clean a loaded pages rendering operations. - This involves filtering the PDF operators used to avoid (some cases + pdf_page_contents_process_fn: A function used for processing the + cleaned page contents/resources gathered as part of + pdf_clean_page_contents. + + arg: An opaque arg specific to the particular function. + + buffer: A buffer holding the page contents. + + res: A pdf_obj holding the page resources. +*/ +typedef void (pdf_page_contents_process_fn)(void *arg, fz_buffer *buffer, pdf_obj *res); + +/* + pdf_clean_page_contents: Clean a loaded pages rendering operations, + with an optional post processing step. + + Firstly, this filters the PDF operators used to avoid (some cases of) repetition, and leaves the page in a balanced state with an - unchanged top level matrix etc. Just the main page contents without - the annotations + unchanged top level matrix etc. At the same time, the resources + used by the page contents are collected. + + Next, the resources themselves are cleaned (as appropriate) in the + same way. + + Next, an optional post processing stage is called. + + Finally, the page contents and resources in the documents page tree + are replaced by these processed versions. + + Annotations remain unaffected. page: A page loaded by pdf_load_page. @@ -85,7 +110,8 @@ void pdf_run_page_contents(fz_context *ctx, pdf_page *page, fz_device *dev, cons cookie: A pointer to an optional fz_cookie structure that can be used to track progress, collect errors etc. */ -void pdf_clean_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page, fz_cookie *cookie); +void pdf_clean_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page, fz_cookie *cookie, + pdf_page_contents_process_fn *proc, void *proc_arg); /* Presentation interface. diff --git a/source/pdf/pdf-clean.c b/source/pdf/pdf-clean.c index 89926832..b88bbb71 100644 --- a/source/pdf/pdf-clean.c +++ b/source/pdf/pdf-clean.c @@ -117,7 +117,7 @@ pdf_clean_type3(fz_context *ctx, pdf_document *doc, pdf_obj *obj, pdf_obj *orig_ } } -void pdf_clean_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page, fz_cookie *cookie) +void pdf_clean_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page, fz_cookie *cookie, pdf_page_contents_process_fn *proc_fn, void *proc_arg) { pdf_process process, process2; fz_buffer *buffer = fz_new_buffer(ctx, 1024); @@ -166,7 +166,6 @@ void pdf_clean_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page, num = pdf_to_num(ctx, contents); pdf_dict_dels(ctx, contents, "Filter"); } - pdf_update_stream(ctx, doc, num, buffer); /* Now deal with resources. The spec allows for Type3 fonts and form * XObjects to omit a resource dictionary and look in the parent. @@ -260,6 +259,10 @@ void pdf_clean_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page, /* Properties - no cleaning possible. */ + if (proc_fn) + (*proc_fn)(proc_arg, buffer, res); + + pdf_update_stream(ctx, doc, num, buffer); pdf_drop_obj(ctx, page->resources); ref = pdf_new_ref(ctx, doc, res); page->resources = pdf_keep_obj(ctx, ref); diff --git a/source/pdf/pdf-write.c b/source/pdf/pdf-write.c index 1016e8af..6f5088c6 100644 --- a/source/pdf/pdf-write.c +++ b/source/pdf/pdf-write.c @@ -2533,7 +2533,7 @@ static void sanitise(fz_context *ctx, pdf_document *doc) { pdf_page *page = pdf_load_page(ctx, doc, i); - pdf_clean_page_contents(ctx, doc, page, NULL); + pdf_clean_page_contents(ctx, doc, page, NULL, NULL, NULL); fz_drop_page(ctx, &page->super); } |