Start slimming pdf_page.

We want to turn pdf_page into a thin wrapper around a pdf_obj, so that any updates to the underlying PDF objects will be reflected without having to reload the pdf_page.
author: Tor Andersson <tor.andersson@artifex.com> 2016-07-06 13:15:49 +0200
committer: Tor Andersson <tor.andersson@artifex.com> 2016-07-06 15:49:55 +0200
commit: f0eabc17d6ec113c6e765ac3272f19623a6cbd4e (patch)
tree: 873d91bfbc610d70e571358550c59d24c8b64ed2 /source/pdf/pdf-clean.c
parent: c3944e2e1cfb4ac86a8580829376357e1d5bccda (diff)
download: mupdf-f0eabc17d6ec113c6e765ac3272f19623a6cbd4e.tar.xz
1 files changed, 28 insertions, 35 deletions
diff --git a/source/pdf/pdf-clean.c b/source/pdf/pdf-clean.c
index d09cc4f7..ff54e6e7 100644
--- a/source/pdf/pdf-clean.c
+++ b/source/pdf/pdf-clean.c
@@ -132,16 +132,17 @@ void pdf_clean_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page,
 	pdf_processor *proc_filter = NULL;
 	pdf_obj *new_obj = NULL;
 	pdf_obj *new_ref = NULL;
+	pdf_obj *res_ref = NULL;
 	pdf_obj *res = NULL;
-	pdf_obj *ref = NULL;
 	pdf_obj *obj;
 	pdf_obj *contents;
+	pdf_obj *resources;
 	fz_buffer *buffer;
 
 	fz_var(new_obj);
 	fz_var(new_ref);
 	fz_var(res);
-	fz_var(ref);
+	fz_var(res_ref);
 	fz_var(proc_buffer);
 	fz_var(proc_filter);
 
@@ -151,20 +152,23 @@ void pdf_clean_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page,
 	{
 		res = pdf_new_dict(ctx, doc, 1);
 
+		contents = pdf_page_contents(ctx, page);
+		resources = pdf_page_resources(ctx, page);
+
 		proc_buffer = pdf_new_buffer_processor(ctx, buffer, ascii);
-		proc_filter = pdf_new_filter_processor(ctx, proc_buffer, doc, page->resources, res);
+		proc_filter = pdf_new_filter_processor(ctx, proc_buffer, doc, resources, res);
 
-		pdf_process_contents(ctx, proc_filter, doc, page->resources, page->contents, cookie);
+		pdf_process_contents(ctx, proc_filter, doc, resources, contents, cookie);
+
+		/* Deal with page content stream. */
 
-		contents = page->contents;
 		if (pdf_is_array(ctx, contents))
 		{
 			/* create a new object to replace the array */
 			new_obj = pdf_new_dict(ctx, doc, 1);
 			new_ref = pdf_add_object(ctx, doc, new_obj);
-			pdf_drop_obj(ctx, page->contents);
-			page->contents = contents = pdf_keep_obj(ctx, new_ref);
-			pdf_dict_put(ctx, page->me, PDF_NAME_Contents, contents);
+			contents = new_ref;
+			pdf_dict_put(ctx, page->obj, PDF_NAME_Contents, contents);
 		}
 		else
 		{
@@ -172,6 +176,8 @@ void pdf_clean_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page,
 			pdf_dict_del(ctx, contents, PDF_NAME_DecodeParms);
 		}
 
+		pdf_update_stream(ctx, doc, contents, buffer, 0);
+
 		/* Now deal with resources. The spec allows for Type3 fonts and form
 		 * XObjects to omit a resource dictionary and look in the parent.
 		 * Avoid that by flattening here as part of the cleaning. This could
@@ -187,55 +193,45 @@ void pdf_clean_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page,
 			for (i = 0; i < l; i++)
 			{
 				pdf_obj *o = pdf_dict_get(ctx, pdf_dict_get_val(ctx, obj, i), PDF_NAME_SMask);
-
 				if (!o)
 					continue;
 				o = pdf_dict_get(ctx, o, PDF_NAME_G);
 				if (!o)
 					continue;
-
 				/* Transparency group XObject */
-				pdf_clean_stream_object(ctx, doc, o, page->resources, cookie, 1, ascii);
+				pdf_clean_stream_object(ctx, doc, o, resources, cookie, 1, ascii);
 			}
 		}
 
-		/* ColorSpace - no cleaning possible */
-
 		/* Pattern */
 		obj = pdf_dict_get(ctx, res, PDF_NAME_Pattern);
 		if (obj)
 		{
 			int i, l;
-
 			l = pdf_dict_len(ctx, obj);
 			for (i = 0; i < l; i++)
 			{
 				pdf_obj *pat = pdf_dict_get_val(ctx, obj, i);
-
 				if (!pat)
 					continue;
 				if (pdf_to_int(ctx, pdf_dict_get(ctx, pat, PDF_NAME_PatternType)) == 1)
-					pdf_clean_stream_object(ctx, doc, pat, page->resources, cookie, 0, ascii);
+					pdf_clean_stream_object(ctx, doc, pat, resources, cookie, 0, ascii);
 			}
 		}
 
-		/* Shading - no cleaning possible */
-
 		/* XObject */
 		obj = pdf_dict_get(ctx, res, PDF_NAME_XObject);
 		if (obj)
 		{
 			int i, l;
-
 			l = pdf_dict_len(ctx, obj);
 			for (i = 0; i < l; i++)
 			{
 				pdf_obj *xobj = pdf_dict_get_val(ctx, obj, i);
-
-				if (!pdf_name_eq(ctx, PDF_NAME_Form, pdf_dict_get(ctx, xobj, PDF_NAME_Subtype)))
+				if (!xobj)
 					continue;
-
-				pdf_clean_stream_object(ctx, doc, xobj, page->resources, cookie, 1, ascii);
+				if (pdf_name_eq(ctx, PDF_NAME_Form, pdf_dict_get(ctx, xobj, PDF_NAME_Subtype)))
+					pdf_clean_stream_object(ctx, doc, xobj, resources, cookie, 1, ascii);
 			}
 		}
 
@@ -244,34 +240,31 @@ void pdf_clean_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page,
 		if (obj)
 		{
 			int i, l;
-
 			l = pdf_dict_len(ctx, obj);
 			for (i = 0; i < l; i++)
 			{
 				pdf_obj *o = pdf_dict_get_val(ctx, obj, i);
-
+				if (!o)
+					continue;
 				if (pdf_name_eq(ctx, PDF_NAME_Type3, pdf_dict_get(ctx, o, PDF_NAME_Subtype)))
-				{
-					pdf_clean_type3(ctx, doc, o, page->resources, cookie, ascii);
-				}
+					pdf_clean_type3(ctx, doc, o, resources, cookie, ascii);
 			}
 		}
 
 		/* ProcSet - no cleaning possible. Inherit this from the old dict. */
-		obj = pdf_dict_get(ctx, page->resources, PDF_NAME_ProcSet);
+		obj = pdf_dict_get(ctx, resources, PDF_NAME_ProcSet);
 		if (obj)
 			pdf_dict_put(ctx, res, PDF_NAME_ProcSet, obj);
 
+		/* ColorSpace - no cleaning possible. */
 		/* Properties - no cleaning possible. */
 
 		if (proc_fn)
 			(*proc_fn)(ctx, buffer, res, proc_arg);
 
-		pdf_update_stream(ctx, doc, contents, buffer, 0);
-		pdf_drop_obj(ctx, page->resources);
-		ref = pdf_add_object(ctx, doc, res);
-		page->resources = pdf_keep_obj(ctx, ref);
-		pdf_dict_put(ctx, page->me, PDF_NAME_Resources, ref);
+		/* Update resource dictionary */
+		res_ref = pdf_add_object(ctx, doc, res);
+		pdf_dict_put(ctx, page->obj, PDF_NAME_Resources, res_ref);
 	}
 	fz_always(ctx)
 	{
@@ -280,8 +273,8 @@ void pdf_clean_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page,
 		fz_drop_buffer(ctx, buffer);
 		pdf_drop_obj(ctx, new_obj);
 		pdf_drop_obj(ctx, new_ref);
+		pdf_drop_obj(ctx, res_ref);
 		pdf_drop_obj(ctx, res);
-		pdf_drop_obj(ctx, ref);
 	}
 	fz_catch(ctx)
 	{
author	Tor Andersson <tor.andersson@artifex.com>	2016-07-06 13:15:49 +0200
committer	Tor Andersson <tor.andersson@artifex.com>	2016-07-06 15:49:55 +0200
commit	f0eabc17d6ec113c6e765ac3272f19623a6cbd4e (patch)
tree	873d91bfbc610d70e571358550c59d24c8b64ed2 /source/pdf/pdf-clean.c
parent	c3944e2e1cfb4ac86a8580829376357e1d5bccda (diff)
download	mupdf-f0eabc17d6ec113c6e765ac3272f19623a6cbd4e.tar.xz