diff options
author | Tor Andersson <tor.andersson@artifex.com> | 2016-07-06 13:15:49 +0200 |
---|---|---|
committer | Tor Andersson <tor.andersson@artifex.com> | 2016-07-06 15:49:55 +0200 |
commit | f0eabc17d6ec113c6e765ac3272f19623a6cbd4e (patch) | |
tree | 873d91bfbc610d70e571358550c59d24c8b64ed2 /source/pdf/pdf-clean.c | |
parent | c3944e2e1cfb4ac86a8580829376357e1d5bccda (diff) | |
download | mupdf-f0eabc17d6ec113c6e765ac3272f19623a6cbd4e.tar.xz |
Start slimming pdf_page.
We want to turn pdf_page into a thin wrapper around a pdf_obj, so that
any updates to the underlying PDF objects will be reflected without
having to reload the pdf_page.
Diffstat (limited to 'source/pdf/pdf-clean.c')
-rw-r--r-- | source/pdf/pdf-clean.c | 63 |
1 files changed, 28 insertions, 35 deletions
diff --git a/source/pdf/pdf-clean.c b/source/pdf/pdf-clean.c index d09cc4f7..ff54e6e7 100644 --- a/source/pdf/pdf-clean.c +++ b/source/pdf/pdf-clean.c @@ -132,16 +132,17 @@ void pdf_clean_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page, pdf_processor *proc_filter = NULL; pdf_obj *new_obj = NULL; pdf_obj *new_ref = NULL; + pdf_obj *res_ref = NULL; pdf_obj *res = NULL; - pdf_obj *ref = NULL; pdf_obj *obj; pdf_obj *contents; + pdf_obj *resources; fz_buffer *buffer; fz_var(new_obj); fz_var(new_ref); fz_var(res); - fz_var(ref); + fz_var(res_ref); fz_var(proc_buffer); fz_var(proc_filter); @@ -151,20 +152,23 @@ void pdf_clean_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page, { res = pdf_new_dict(ctx, doc, 1); + contents = pdf_page_contents(ctx, page); + resources = pdf_page_resources(ctx, page); + proc_buffer = pdf_new_buffer_processor(ctx, buffer, ascii); - proc_filter = pdf_new_filter_processor(ctx, proc_buffer, doc, page->resources, res); + proc_filter = pdf_new_filter_processor(ctx, proc_buffer, doc, resources, res); - pdf_process_contents(ctx, proc_filter, doc, page->resources, page->contents, cookie); + pdf_process_contents(ctx, proc_filter, doc, resources, contents, cookie); + + /* Deal with page content stream. */ - contents = page->contents; if (pdf_is_array(ctx, contents)) { /* create a new object to replace the array */ new_obj = pdf_new_dict(ctx, doc, 1); new_ref = pdf_add_object(ctx, doc, new_obj); - pdf_drop_obj(ctx, page->contents); - page->contents = contents = pdf_keep_obj(ctx, new_ref); - pdf_dict_put(ctx, page->me, PDF_NAME_Contents, contents); + contents = new_ref; + pdf_dict_put(ctx, page->obj, PDF_NAME_Contents, contents); } else { @@ -172,6 +176,8 @@ void pdf_clean_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page, pdf_dict_del(ctx, contents, PDF_NAME_DecodeParms); } + pdf_update_stream(ctx, doc, contents, buffer, 0); + /* Now deal with resources. The spec allows for Type3 fonts and form * XObjects to omit a resource dictionary and look in the parent. * Avoid that by flattening here as part of the cleaning. This could @@ -187,55 +193,45 @@ void pdf_clean_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page, for (i = 0; i < l; i++) { pdf_obj *o = pdf_dict_get(ctx, pdf_dict_get_val(ctx, obj, i), PDF_NAME_SMask); - if (!o) continue; o = pdf_dict_get(ctx, o, PDF_NAME_G); if (!o) continue; - /* Transparency group XObject */ - pdf_clean_stream_object(ctx, doc, o, page->resources, cookie, 1, ascii); + pdf_clean_stream_object(ctx, doc, o, resources, cookie, 1, ascii); } } - /* ColorSpace - no cleaning possible */ - /* Pattern */ obj = pdf_dict_get(ctx, res, PDF_NAME_Pattern); if (obj) { int i, l; - l = pdf_dict_len(ctx, obj); for (i = 0; i < l; i++) { pdf_obj *pat = pdf_dict_get_val(ctx, obj, i); - if (!pat) continue; if (pdf_to_int(ctx, pdf_dict_get(ctx, pat, PDF_NAME_PatternType)) == 1) - pdf_clean_stream_object(ctx, doc, pat, page->resources, cookie, 0, ascii); + pdf_clean_stream_object(ctx, doc, pat, resources, cookie, 0, ascii); } } - /* Shading - no cleaning possible */ - /* XObject */ obj = pdf_dict_get(ctx, res, PDF_NAME_XObject); if (obj) { int i, l; - l = pdf_dict_len(ctx, obj); for (i = 0; i < l; i++) { pdf_obj *xobj = pdf_dict_get_val(ctx, obj, i); - - if (!pdf_name_eq(ctx, PDF_NAME_Form, pdf_dict_get(ctx, xobj, PDF_NAME_Subtype))) + if (!xobj) continue; - - pdf_clean_stream_object(ctx, doc, xobj, page->resources, cookie, 1, ascii); + if (pdf_name_eq(ctx, PDF_NAME_Form, pdf_dict_get(ctx, xobj, PDF_NAME_Subtype))) + pdf_clean_stream_object(ctx, doc, xobj, resources, cookie, 1, ascii); } } @@ -244,34 +240,31 @@ void pdf_clean_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page, if (obj) { int i, l; - l = pdf_dict_len(ctx, obj); for (i = 0; i < l; i++) { pdf_obj *o = pdf_dict_get_val(ctx, obj, i); - + if (!o) + continue; if (pdf_name_eq(ctx, PDF_NAME_Type3, pdf_dict_get(ctx, o, PDF_NAME_Subtype))) - { - pdf_clean_type3(ctx, doc, o, page->resources, cookie, ascii); - } + pdf_clean_type3(ctx, doc, o, resources, cookie, ascii); } } /* ProcSet - no cleaning possible. Inherit this from the old dict. */ - obj = pdf_dict_get(ctx, page->resources, PDF_NAME_ProcSet); + obj = pdf_dict_get(ctx, resources, PDF_NAME_ProcSet); if (obj) pdf_dict_put(ctx, res, PDF_NAME_ProcSet, obj); + /* ColorSpace - no cleaning possible. */ /* Properties - no cleaning possible. */ if (proc_fn) (*proc_fn)(ctx, buffer, res, proc_arg); - pdf_update_stream(ctx, doc, contents, buffer, 0); - pdf_drop_obj(ctx, page->resources); - ref = pdf_add_object(ctx, doc, res); - page->resources = pdf_keep_obj(ctx, ref); - pdf_dict_put(ctx, page->me, PDF_NAME_Resources, ref); + /* Update resource dictionary */ + res_ref = pdf_add_object(ctx, doc, res); + pdf_dict_put(ctx, page->obj, PDF_NAME_Resources, res_ref); } fz_always(ctx) { @@ -280,8 +273,8 @@ void pdf_clean_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page, fz_drop_buffer(ctx, buffer); pdf_drop_obj(ctx, new_obj); pdf_drop_obj(ctx, new_ref); + pdf_drop_obj(ctx, res_ref); pdf_drop_obj(ctx, res); - pdf_drop_obj(ctx, ref); } fz_catch(ctx) { |