summaryrefslogtreecommitdiff
path: root/source/pdf/pdf-clean.c
diff options
context:
space:
mode:
authorTor Andersson <tor.andersson@artifex.com>2016-07-06 13:15:49 +0200
committerTor Andersson <tor.andersson@artifex.com>2016-07-06 15:49:55 +0200
commitf0eabc17d6ec113c6e765ac3272f19623a6cbd4e (patch)
tree873d91bfbc610d70e571358550c59d24c8b64ed2 /source/pdf/pdf-clean.c
parentc3944e2e1cfb4ac86a8580829376357e1d5bccda (diff)
downloadmupdf-f0eabc17d6ec113c6e765ac3272f19623a6cbd4e.tar.xz
Start slimming pdf_page.
We want to turn pdf_page into a thin wrapper around a pdf_obj, so that any updates to the underlying PDF objects will be reflected without having to reload the pdf_page.
Diffstat (limited to 'source/pdf/pdf-clean.c')
-rw-r--r--source/pdf/pdf-clean.c63
1 files changed, 28 insertions, 35 deletions
diff --git a/source/pdf/pdf-clean.c b/source/pdf/pdf-clean.c
index d09cc4f7..ff54e6e7 100644
--- a/source/pdf/pdf-clean.c
+++ b/source/pdf/pdf-clean.c
@@ -132,16 +132,17 @@ void pdf_clean_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page,
pdf_processor *proc_filter = NULL;
pdf_obj *new_obj = NULL;
pdf_obj *new_ref = NULL;
+ pdf_obj *res_ref = NULL;
pdf_obj *res = NULL;
- pdf_obj *ref = NULL;
pdf_obj *obj;
pdf_obj *contents;
+ pdf_obj *resources;
fz_buffer *buffer;
fz_var(new_obj);
fz_var(new_ref);
fz_var(res);
- fz_var(ref);
+ fz_var(res_ref);
fz_var(proc_buffer);
fz_var(proc_filter);
@@ -151,20 +152,23 @@ void pdf_clean_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page,
{
res = pdf_new_dict(ctx, doc, 1);
+ contents = pdf_page_contents(ctx, page);
+ resources = pdf_page_resources(ctx, page);
+
proc_buffer = pdf_new_buffer_processor(ctx, buffer, ascii);
- proc_filter = pdf_new_filter_processor(ctx, proc_buffer, doc, page->resources, res);
+ proc_filter = pdf_new_filter_processor(ctx, proc_buffer, doc, resources, res);
- pdf_process_contents(ctx, proc_filter, doc, page->resources, page->contents, cookie);
+ pdf_process_contents(ctx, proc_filter, doc, resources, contents, cookie);
+
+ /* Deal with page content stream. */
- contents = page->contents;
if (pdf_is_array(ctx, contents))
{
/* create a new object to replace the array */
new_obj = pdf_new_dict(ctx, doc, 1);
new_ref = pdf_add_object(ctx, doc, new_obj);
- pdf_drop_obj(ctx, page->contents);
- page->contents = contents = pdf_keep_obj(ctx, new_ref);
- pdf_dict_put(ctx, page->me, PDF_NAME_Contents, contents);
+ contents = new_ref;
+ pdf_dict_put(ctx, page->obj, PDF_NAME_Contents, contents);
}
else
{
@@ -172,6 +176,8 @@ void pdf_clean_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page,
pdf_dict_del(ctx, contents, PDF_NAME_DecodeParms);
}
+ pdf_update_stream(ctx, doc, contents, buffer, 0);
+
/* Now deal with resources. The spec allows for Type3 fonts and form
* XObjects to omit a resource dictionary and look in the parent.
* Avoid that by flattening here as part of the cleaning. This could
@@ -187,55 +193,45 @@ void pdf_clean_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page,
for (i = 0; i < l; i++)
{
pdf_obj *o = pdf_dict_get(ctx, pdf_dict_get_val(ctx, obj, i), PDF_NAME_SMask);
-
if (!o)
continue;
o = pdf_dict_get(ctx, o, PDF_NAME_G);
if (!o)
continue;
-
/* Transparency group XObject */
- pdf_clean_stream_object(ctx, doc, o, page->resources, cookie, 1, ascii);
+ pdf_clean_stream_object(ctx, doc, o, resources, cookie, 1, ascii);
}
}
- /* ColorSpace - no cleaning possible */
-
/* Pattern */
obj = pdf_dict_get(ctx, res, PDF_NAME_Pattern);
if (obj)
{
int i, l;
-
l = pdf_dict_len(ctx, obj);
for (i = 0; i < l; i++)
{
pdf_obj *pat = pdf_dict_get_val(ctx, obj, i);
-
if (!pat)
continue;
if (pdf_to_int(ctx, pdf_dict_get(ctx, pat, PDF_NAME_PatternType)) == 1)
- pdf_clean_stream_object(ctx, doc, pat, page->resources, cookie, 0, ascii);
+ pdf_clean_stream_object(ctx, doc, pat, resources, cookie, 0, ascii);
}
}
- /* Shading - no cleaning possible */
-
/* XObject */
obj = pdf_dict_get(ctx, res, PDF_NAME_XObject);
if (obj)
{
int i, l;
-
l = pdf_dict_len(ctx, obj);
for (i = 0; i < l; i++)
{
pdf_obj *xobj = pdf_dict_get_val(ctx, obj, i);
-
- if (!pdf_name_eq(ctx, PDF_NAME_Form, pdf_dict_get(ctx, xobj, PDF_NAME_Subtype)))
+ if (!xobj)
continue;
-
- pdf_clean_stream_object(ctx, doc, xobj, page->resources, cookie, 1, ascii);
+ if (pdf_name_eq(ctx, PDF_NAME_Form, pdf_dict_get(ctx, xobj, PDF_NAME_Subtype)))
+ pdf_clean_stream_object(ctx, doc, xobj, resources, cookie, 1, ascii);
}
}
@@ -244,34 +240,31 @@ void pdf_clean_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page,
if (obj)
{
int i, l;
-
l = pdf_dict_len(ctx, obj);
for (i = 0; i < l; i++)
{
pdf_obj *o = pdf_dict_get_val(ctx, obj, i);
-
+ if (!o)
+ continue;
if (pdf_name_eq(ctx, PDF_NAME_Type3, pdf_dict_get(ctx, o, PDF_NAME_Subtype)))
- {
- pdf_clean_type3(ctx, doc, o, page->resources, cookie, ascii);
- }
+ pdf_clean_type3(ctx, doc, o, resources, cookie, ascii);
}
}
/* ProcSet - no cleaning possible. Inherit this from the old dict. */
- obj = pdf_dict_get(ctx, page->resources, PDF_NAME_ProcSet);
+ obj = pdf_dict_get(ctx, resources, PDF_NAME_ProcSet);
if (obj)
pdf_dict_put(ctx, res, PDF_NAME_ProcSet, obj);
+ /* ColorSpace - no cleaning possible. */
/* Properties - no cleaning possible. */
if (proc_fn)
(*proc_fn)(ctx, buffer, res, proc_arg);
- pdf_update_stream(ctx, doc, contents, buffer, 0);
- pdf_drop_obj(ctx, page->resources);
- ref = pdf_add_object(ctx, doc, res);
- page->resources = pdf_keep_obj(ctx, ref);
- pdf_dict_put(ctx, page->me, PDF_NAME_Resources, ref);
+ /* Update resource dictionary */
+ res_ref = pdf_add_object(ctx, doc, res);
+ pdf_dict_put(ctx, page->obj, PDF_NAME_Resources, res_ref);
}
fz_always(ctx)
{
@@ -280,8 +273,8 @@ void pdf_clean_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page,
fz_drop_buffer(ctx, buffer);
pdf_drop_obj(ctx, new_obj);
pdf_drop_obj(ctx, new_ref);
+ pdf_drop_obj(ctx, res_ref);
pdf_drop_obj(ctx, res);
- pdf_drop_obj(ctx, ref);
}
fz_catch(ctx)
{