Add documentation for pdf_processors.

Expose pdf_new_output_processor. Remove pdf_document argument to pdf_new_filter_processor. It is only ever used when copying resources from the old resource dictionary to the new one, whereupon it must agree with the bound pdf_document in the old resource dictionary.
author: Robin Watts <Robin.Watts@artifex.com> 2017-06-02 08:36:39 -0700
committer: Robin Watts <Robin.Watts@artifex.com> 2017-06-03 06:29:26 -0700
commit: 565624dd5c042de4ddf95be251d4cd011e9460d0 (patch)
tree: 468d731a472fce42add1b55156560ebe996c5e50
parent: 6f3972535746487bf552eab2fa0d0096b60aab19 (diff)
download: mupdf-565624dd5c042de4ddf95be251d4cd011e9460d0.tar.xz
4 files changed, 85 insertions, 9 deletions
diff --git a/include/mupdf/pdf/interpret.h b/include/mupdf/pdf/interpret.h
index b7b85fff..ad4807ec 100644
--- a/include/mupdf/pdf/interpret.h
+++ b/include/mupdf/pdf/interpret.h
@@ -155,9 +155,87 @@ struct pdf_csi_s
 };
 
 /* Functions to set up pdf_process structures */
+
+/*
+	pdf_new_run_processor: Create a new "run" processor. This maps
+	from PDF operators to fz_device level calls.
+
+	dev: The device to which the resulting device calls are to be
+	sent.
+
+	ctm: The initial transformation matrix to use.
+
+	usage: A NULL terminated string that describes the 'usage' of
+	this interpretation. Typically 'View', though 'Print' is also
+	defined within the PDF reference manual, and others are possible.
+
+	gstate: The initial graphics state.
+
+	nested: The nested depth of this interpreter. This should be
+	0 for an initial call, and will be incremented in nested calls
+	due to Type 3 fonts.
+*/
 pdf_processor *pdf_new_run_processor(fz_context *ctx, fz_device *dev, const fz_matrix *ctm, const char *usage, pdf_gstate *gstate, int nested);
+
+/*
+	pdf_new_buffer_processor: Create a buffer processor. This
+	collects the incoming PDF operator stream into an fz_buffer.
+
+	buffer: The (possibly empty) buffer to which operators will be
+	appended.
+
+	ahxencode: If 0, then image streams will be send as binary,
+	otherwise they will be asciihexencoded.
+*/
 pdf_processor *pdf_new_buffer_processor(fz_context *ctx, fz_buffer *buffer, int ahxencode);
-pdf_processor *pdf_new_filter_processor(fz_context *ctx, pdf_processor *chain, pdf_document *doc, pdf_obj *old_res, pdf_obj *new_res);
+
+/*
+	pdf_new_output_processor: Create an output processor. This
+	sends the incoming PDF operator stream to an fz_output stream.
+
+	out: The output stream to which operators will be sent.
+
+	ahxencode: If 0, then image streams will be send as binary,
+	otherwise they will be asciihexencoded.
+*/
+pdf_processor *pdf_new_output_processor(fz_context *ctx, fz_output *out, int ahxencode);
+
+/*
+	pdf_new_filter_processor: Create a filter processor. This
+	filters the PDF operators it is fed, and passes them down
+	(with some changes) to the child filter.
+
+	The changes made by the filter are:
+
+	* No operations are allowed to change the top level gstate.
+	Additional q/Q operators are inserted to prevent this.
+
+	* Repeated/unnecessary colour operators are removed (so,
+	for example, "0 0 0 rg 0 1 rg 0.5 g" would be sanitised to
+	"0.5 g")
+
+	The intention of these changes is to provide a simpler,
+	but equivalent stream, repairing problems with mismatched
+	operators, maintaining structure (such as BMC, EMC calls)
+	and leaving the graphics state in an known (default) state
+	so that subsequent operations (such as synthesising new
+	operators to be appended to the stream) are easier.
+
+	The net graphical effect of the filtered operator stream
+	should be identical to the incoming operator stream.
+
+	chain: The child processor to which the filtered operators
+	will be fed.
+
+	old_res: The incoming resource dictionary.
+
+	new_res: An (initially empty) resource dictionary that will
+	be populated by copying entries from the old dictionary to
+	the new one as they are used. At the end therefore, this
+	contains exactly those resource objects actually required.
+
+*/
+pdf_processor *pdf_new_filter_processor(fz_context *ctx, pdf_processor *chain, pdf_obj *old_res, pdf_obj *new_res);
 
 /* Functions to actually process annotations, glyphs and general stream objects */
 void pdf_process_contents(fz_context *ctx, pdf_processor *proc, pdf_document *doc, pdf_obj *obj, pdf_obj *res, fz_cookie *cookie);
diff --git a/source/pdf/pdf-clean.c b/source/pdf/pdf-clean.c
index f74a3359..b8fa96c7 100644
--- a/source/pdf/pdf-clean.c
+++ b/source/pdf/pdf-clean.c
@@ -32,7 +32,7 @@ pdf_clean_stream_object(fz_context *ctx, pdf_document *doc, pdf_obj *obj, pdf_ob
 		res = pdf_new_dict(ctx, doc, 1);
 
 		proc_buffer = pdf_new_buffer_processor(ctx, buffer, ascii);
-		proc_filter = pdf_new_filter_processor(ctx, proc_buffer, doc, orig_res, res);
+		proc_filter = pdf_new_filter_processor(ctx, proc_buffer, orig_res, res);
 
 		pdf_process_contents(ctx, proc_filter, doc, orig_res, obj, cookie);
 		pdf_close_processor(ctx, proc_filter);
@@ -94,7 +94,7 @@ pdf_clean_type3(fz_context *ctx, pdf_document *doc, pdf_obj *obj, pdf_obj *orig_
 			fz_try(ctx)
 			{
 				proc_buffer = pdf_new_buffer_processor(ctx, buffer, ascii);
-				proc_filter = pdf_new_filter_processor(ctx, proc_buffer, doc, orig_res, res);
+				proc_filter = pdf_new_filter_processor(ctx, proc_buffer, orig_res, res);
 
 				pdf_process_contents(ctx, proc_filter, doc, orig_res, val, cookie);
 				pdf_close_processor(ctx, proc_filter);
@@ -161,7 +161,7 @@ void pdf_clean_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page,
 		resources = pdf_page_resources(ctx, page);
 
 		proc_buffer = pdf_new_buffer_processor(ctx, buffer, ascii);
-		proc_filter = pdf_new_filter_processor(ctx, proc_buffer, doc, resources, res);
+		proc_filter = pdf_new_filter_processor(ctx, proc_buffer, resources, res);
 
 		pdf_process_contents(ctx, proc_filter, doc, resources, contents, cookie);
 		pdf_close_processor(ctx, proc_filter);
diff --git a/source/pdf/pdf-op-buffer.c b/source/pdf/pdf-op-buffer.c
index 6ae7f8fc..d5585766 100644
--- a/source/pdf/pdf-op-buffer.c
+++ b/source/pdf/pdf-op-buffer.c
@@ -775,7 +775,7 @@ pdf_drop_output_processor(fz_context *ctx, pdf_processor *proc)
 	fz_drop_output(ctx, out);
 }
 
-static pdf_processor *
+pdf_processor *
 pdf_new_output_processor(fz_context *ctx, fz_output *out, int ahxencode)
 {
 	pdf_output_processor *proc = pdf_new_processor(ctx, sizeof *proc);
diff --git a/source/pdf/pdf-op-filter.c b/source/pdf/pdf-op-filter.c
index 731cabf2..ad80d3cd 100644
--- a/source/pdf/pdf-op-filter.c
+++ b/source/pdf/pdf-op-filter.c
@@ -49,7 +49,6 @@ typedef struct pdf_filter_processor_s
 	pdf_processor super;
 	pdf_processor *chain;
 	filter_gstate *gstate;
-	pdf_document *doc;
 	pdf_obj *old_rdb, *new_rdb;
 } pdf_filter_processor;
 
@@ -68,7 +67,7 @@ copy_resource(fz_context *ctx, pdf_filter_processor *p, pdf_obj *key, const char
 		res = pdf_dict_get(ctx, p->new_rdb, key);
 		if (!res)
 		{
-			res = pdf_new_dict(ctx, p->doc, 1);
+			res = pdf_new_dict(ctx, pdf_get_bound_document(ctx, p->new_rdb), 1);
 			pdf_dict_put_drop(ctx, p->new_rdb, key, res);
 		}
 		pdf_dict_putp(ctx, res, name, obj);
@@ -1123,7 +1122,7 @@ pdf_drop_filter_processor(fz_context *ctx, pdf_processor *proc)
 }
 
 pdf_processor *
-pdf_new_filter_processor(fz_context *ctx, pdf_processor *chain, pdf_document *doc, pdf_obj *old_rdb, pdf_obj *new_rdb)
+pdf_new_filter_processor(fz_context *ctx, pdf_processor *chain, pdf_obj *old_rdb, pdf_obj *new_rdb)
 {
 	pdf_filter_processor *proc = pdf_new_processor(ctx, sizeof *proc);
 	{
@@ -1243,7 +1242,6 @@ pdf_new_filter_processor(fz_context *ctx, pdf_processor *chain, pdf_document *do
 	}
 
 	proc->chain = chain;
-	proc->doc = doc;
 	proc->old_rdb = old_rdb;
 	proc->new_rdb = new_rdb;
author	Robin Watts <Robin.Watts@artifex.com>	2017-06-02 08:36:39 -0700
committer	Robin Watts <Robin.Watts@artifex.com>	2017-06-03 06:29:26 -0700
commit	565624dd5c042de4ddf95be251d4cd011e9460d0 (patch)
tree	468d731a472fce42add1b55156560ebe996c5e50
parent	6f3972535746487bf552eab2fa0d0096b60aab19 (diff)
download	mupdf-565624dd5c042de4ddf95be251d4cd011e9460d0.tar.xz