summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobin Watts <Robin.Watts@artifex.com>2017-06-02 08:36:39 -0700
committerRobin Watts <Robin.Watts@artifex.com>2017-06-03 06:29:26 -0700
commit565624dd5c042de4ddf95be251d4cd011e9460d0 (patch)
tree468d731a472fce42add1b55156560ebe996c5e50
parent6f3972535746487bf552eab2fa0d0096b60aab19 (diff)
downloadmupdf-565624dd5c042de4ddf95be251d4cd011e9460d0.tar.xz
Add documentation for pdf_processors.
Expose pdf_new_output_processor. Remove pdf_document argument to pdf_new_filter_processor. It is only ever used when copying resources from the old resource dictionary to the new one, whereupon it must agree with the bound pdf_document in the old resource dictionary.
-rw-r--r--include/mupdf/pdf/interpret.h80
-rw-r--r--source/pdf/pdf-clean.c6
-rw-r--r--source/pdf/pdf-op-buffer.c2
-rw-r--r--source/pdf/pdf-op-filter.c6
4 files changed, 85 insertions, 9 deletions
diff --git a/include/mupdf/pdf/interpret.h b/include/mupdf/pdf/interpret.h
index b7b85fff..ad4807ec 100644
--- a/include/mupdf/pdf/interpret.h
+++ b/include/mupdf/pdf/interpret.h
@@ -155,9 +155,87 @@ struct pdf_csi_s
};
/* Functions to set up pdf_process structures */
+
+/*
+ pdf_new_run_processor: Create a new "run" processor. This maps
+ from PDF operators to fz_device level calls.
+
+ dev: The device to which the resulting device calls are to be
+ sent.
+
+ ctm: The initial transformation matrix to use.
+
+ usage: A NULL terminated string that describes the 'usage' of
+ this interpretation. Typically 'View', though 'Print' is also
+ defined within the PDF reference manual, and others are possible.
+
+ gstate: The initial graphics state.
+
+ nested: The nested depth of this interpreter. This should be
+ 0 for an initial call, and will be incremented in nested calls
+ due to Type 3 fonts.
+*/
pdf_processor *pdf_new_run_processor(fz_context *ctx, fz_device *dev, const fz_matrix *ctm, const char *usage, pdf_gstate *gstate, int nested);
+
+/*
+ pdf_new_buffer_processor: Create a buffer processor. This
+ collects the incoming PDF operator stream into an fz_buffer.
+
+ buffer: The (possibly empty) buffer to which operators will be
+ appended.
+
+ ahxencode: If 0, then image streams will be send as binary,
+ otherwise they will be asciihexencoded.
+*/
pdf_processor *pdf_new_buffer_processor(fz_context *ctx, fz_buffer *buffer, int ahxencode);
-pdf_processor *pdf_new_filter_processor(fz_context *ctx, pdf_processor *chain, pdf_document *doc, pdf_obj *old_res, pdf_obj *new_res);
+
+/*
+ pdf_new_output_processor: Create an output processor. This
+ sends the incoming PDF operator stream to an fz_output stream.
+
+ out: The output stream to which operators will be sent.
+
+ ahxencode: If 0, then image streams will be send as binary,
+ otherwise they will be asciihexencoded.
+*/
+pdf_processor *pdf_new_output_processor(fz_context *ctx, fz_output *out, int ahxencode);
+
+/*
+ pdf_new_filter_processor: Create a filter processor. This
+ filters the PDF operators it is fed, and passes them down
+ (with some changes) to the child filter.
+
+ The changes made by the filter are:
+
+ * No operations are allowed to change the top level gstate.
+ Additional q/Q operators are inserted to prevent this.
+
+ * Repeated/unnecessary colour operators are removed (so,
+ for example, "0 0 0 rg 0 1 rg 0.5 g" would be sanitised to
+ "0.5 g")
+
+ The intention of these changes is to provide a simpler,
+ but equivalent stream, repairing problems with mismatched
+ operators, maintaining structure (such as BMC, EMC calls)
+ and leaving the graphics state in an known (default) state
+ so that subsequent operations (such as synthesising new
+ operators to be appended to the stream) are easier.
+
+ The net graphical effect of the filtered operator stream
+ should be identical to the incoming operator stream.
+
+ chain: The child processor to which the filtered operators
+ will be fed.
+
+ old_res: The incoming resource dictionary.
+
+ new_res: An (initially empty) resource dictionary that will
+ be populated by copying entries from the old dictionary to
+ the new one as they are used. At the end therefore, this
+ contains exactly those resource objects actually required.
+
+*/
+pdf_processor *pdf_new_filter_processor(fz_context *ctx, pdf_processor *chain, pdf_obj *old_res, pdf_obj *new_res);
/* Functions to actually process annotations, glyphs and general stream objects */
void pdf_process_contents(fz_context *ctx, pdf_processor *proc, pdf_document *doc, pdf_obj *obj, pdf_obj *res, fz_cookie *cookie);
diff --git a/source/pdf/pdf-clean.c b/source/pdf/pdf-clean.c
index f74a3359..b8fa96c7 100644
--- a/source/pdf/pdf-clean.c
+++ b/source/pdf/pdf-clean.c
@@ -32,7 +32,7 @@ pdf_clean_stream_object(fz_context *ctx, pdf_document *doc, pdf_obj *obj, pdf_ob
res = pdf_new_dict(ctx, doc, 1);
proc_buffer = pdf_new_buffer_processor(ctx, buffer, ascii);
- proc_filter = pdf_new_filter_processor(ctx, proc_buffer, doc, orig_res, res);
+ proc_filter = pdf_new_filter_processor(ctx, proc_buffer, orig_res, res);
pdf_process_contents(ctx, proc_filter, doc, orig_res, obj, cookie);
pdf_close_processor(ctx, proc_filter);
@@ -94,7 +94,7 @@ pdf_clean_type3(fz_context *ctx, pdf_document *doc, pdf_obj *obj, pdf_obj *orig_
fz_try(ctx)
{
proc_buffer = pdf_new_buffer_processor(ctx, buffer, ascii);
- proc_filter = pdf_new_filter_processor(ctx, proc_buffer, doc, orig_res, res);
+ proc_filter = pdf_new_filter_processor(ctx, proc_buffer, orig_res, res);
pdf_process_contents(ctx, proc_filter, doc, orig_res, val, cookie);
pdf_close_processor(ctx, proc_filter);
@@ -161,7 +161,7 @@ void pdf_clean_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page,
resources = pdf_page_resources(ctx, page);
proc_buffer = pdf_new_buffer_processor(ctx, buffer, ascii);
- proc_filter = pdf_new_filter_processor(ctx, proc_buffer, doc, resources, res);
+ proc_filter = pdf_new_filter_processor(ctx, proc_buffer, resources, res);
pdf_process_contents(ctx, proc_filter, doc, resources, contents, cookie);
pdf_close_processor(ctx, proc_filter);
diff --git a/source/pdf/pdf-op-buffer.c b/source/pdf/pdf-op-buffer.c
index 6ae7f8fc..d5585766 100644
--- a/source/pdf/pdf-op-buffer.c
+++ b/source/pdf/pdf-op-buffer.c
@@ -775,7 +775,7 @@ pdf_drop_output_processor(fz_context *ctx, pdf_processor *proc)
fz_drop_output(ctx, out);
}
-static pdf_processor *
+pdf_processor *
pdf_new_output_processor(fz_context *ctx, fz_output *out, int ahxencode)
{
pdf_output_processor *proc = pdf_new_processor(ctx, sizeof *proc);
diff --git a/source/pdf/pdf-op-filter.c b/source/pdf/pdf-op-filter.c
index 731cabf2..ad80d3cd 100644
--- a/source/pdf/pdf-op-filter.c
+++ b/source/pdf/pdf-op-filter.c
@@ -49,7 +49,6 @@ typedef struct pdf_filter_processor_s
pdf_processor super;
pdf_processor *chain;
filter_gstate *gstate;
- pdf_document *doc;
pdf_obj *old_rdb, *new_rdb;
} pdf_filter_processor;
@@ -68,7 +67,7 @@ copy_resource(fz_context *ctx, pdf_filter_processor *p, pdf_obj *key, const char
res = pdf_dict_get(ctx, p->new_rdb, key);
if (!res)
{
- res = pdf_new_dict(ctx, p->doc, 1);
+ res = pdf_new_dict(ctx, pdf_get_bound_document(ctx, p->new_rdb), 1);
pdf_dict_put_drop(ctx, p->new_rdb, key, res);
}
pdf_dict_putp(ctx, res, name, obj);
@@ -1123,7 +1122,7 @@ pdf_drop_filter_processor(fz_context *ctx, pdf_processor *proc)
}
pdf_processor *
-pdf_new_filter_processor(fz_context *ctx, pdf_processor *chain, pdf_document *doc, pdf_obj *old_rdb, pdf_obj *new_rdb)
+pdf_new_filter_processor(fz_context *ctx, pdf_processor *chain, pdf_obj *old_rdb, pdf_obj *new_rdb)
{
pdf_filter_processor *proc = pdf_new_processor(ctx, sizeof *proc);
{
@@ -1243,7 +1242,6 @@ pdf_new_filter_processor(fz_context *ctx, pdf_processor *chain, pdf_document *do
}
proc->chain = chain;
- proc->doc = doc;
proc->old_rdb = old_rdb;
proc->new_rdb = new_rdb;