diff options
author | Robin Watts <robin.watts@artifex.com> | 2015-02-27 14:39:21 +0000 |
---|---|---|
committer | Robin Watts <robin.watts@artifex.com> | 2015-02-27 17:20:32 +0000 |
commit | 8ff8784def2fbf49a303a86259919ff143050c5f (patch) | |
tree | 0bef30f78a42529ff56c99172da0cb2b3b3c25d6 | |
parent | 060ae5d3483fb4f060ecbbf2d706c1159b760114 (diff) | |
download | mupdf-8ff8784def2fbf49a303a86259919ff143050c5f.tar.xz |
Bug 695853: Fix pdf clean operation with invalid refs in input file.
MuPDF (and other PDF readers) treat invalid references as 'null'
objects. For instance, in the supplied file, object 239 is supposedly
free, but a reference is made to it.
When cleaning (or linearising) a file, we renumber objects; such
illegal refs then end up pointing somewhere else.
The workaround here is simply to spot the invalid refs during the
mark phase, and to set the referencing to null.
-rw-r--r-- | include/mupdf/pdf/object.h | 2 | ||||
-rw-r--r-- | source/pdf/pdf-object.c | 26 | ||||
-rw-r--r-- | source/pdf/pdf-write.c | 47 |
3 files changed, 64 insertions, 11 deletions
diff --git a/include/mupdf/pdf/object.h b/include/mupdf/pdf/object.h index bcb4ab56..642a52d2 100644 --- a/include/mupdf/pdf/object.h +++ b/include/mupdf/pdf/object.h @@ -75,6 +75,7 @@ int pdf_to_gen(fz_context *ctx, pdf_obj *obj); int pdf_array_len(fz_context *ctx, pdf_obj *array); pdf_obj *pdf_array_get(fz_context *ctx, pdf_obj *array, int i); void pdf_array_put(fz_context *ctx, pdf_obj *array, int i, pdf_obj *obj); +void pdf_array_put_drop(fz_context *ctx, pdf_obj *array, int i, pdf_obj *obj); void pdf_array_push(fz_context *ctx, pdf_obj *array, pdf_obj *obj); void pdf_array_push_drop(fz_context *ctx, pdf_obj *array, pdf_obj *obj); void pdf_array_insert(fz_context *ctx, pdf_obj *array, pdf_obj *obj, int index); @@ -85,6 +86,7 @@ int pdf_array_contains(fz_context *ctx, pdf_obj *array, pdf_obj *obj); int pdf_dict_len(fz_context *ctx, pdf_obj *dict); pdf_obj *pdf_dict_get_key(fz_context *ctx, pdf_obj *dict, int idx); pdf_obj *pdf_dict_get_val(fz_context *ctx, pdf_obj *dict, int idx); +void pdf_dict_put_val_drop(fz_context *ctx, pdf_obj *obj, int i, pdf_obj *new_obj); pdf_obj *pdf_dict_get(fz_context *ctx, pdf_obj *dict, pdf_obj *key); pdf_obj *pdf_dict_gets(fz_context *ctx, pdf_obj *dict, const char *key); pdf_obj *pdf_dict_getp(fz_context *ctx, pdf_obj *dict, const char *key); diff --git a/source/pdf/pdf-object.c b/source/pdf/pdf-object.c index d4b008fb..1a0b9abf 100644 --- a/source/pdf/pdf-object.c +++ b/source/pdf/pdf-object.c @@ -566,6 +566,13 @@ pdf_array_put(fz_context *ctx, pdf_obj *obj, int i, pdf_obj *item) } void +pdf_array_put_drop(fz_context *ctx, pdf_obj *obj, int i, pdf_obj *item) +{ + pdf_array_put(ctx, obj, i, item); + pdf_drop_obj(ctx, item); +} + +void pdf_array_push(fz_context *ctx, pdf_obj *obj, pdf_obj *item) { RESOLVE(obj); @@ -880,6 +887,25 @@ pdf_dict_get_val(fz_context *ctx, pdf_obj *obj, int i) return obj->u.d.items[i].v; } +void +pdf_dict_put_val_drop(fz_context *ctx, pdf_obj *obj, int i, pdf_obj *new_obj) +{ + RESOLVE(obj); + if (!obj || obj->kind != PDF_DICT) + { + pdf_drop_obj(ctx, new_obj); + return; + } + if (i < 0 || i >= obj->u.d.len) + { + /* FIXME: Should probably extend the dict here */ + pdf_drop_obj(ctx, new_obj); + return; + } + pdf_drop_obj(ctx, obj->u.d.items[i].v); + obj->u.d.items[i].v = new_obj; +} + static int pdf_dict_finds(fz_context *ctx, pdf_obj *obj, const char *key, int *location) { diff --git a/source/pdf/pdf-write.c b/source/pdf/pdf-write.c index 6f5088c6..dc063762 100644 --- a/source/pdf/pdf-write.c +++ b/source/pdf/pdf-write.c @@ -504,13 +504,22 @@ objects_dump(fz_context *ctx, pdf_document *doc, pdf_write_options *opts) * Garbage collect objects not reachable from the trailer. */ -static pdf_obj *sweepref(fz_context *ctx, pdf_document *doc, pdf_write_options *opts, pdf_obj *obj) +/* Mark a reference. If it's been marked already, return NULL (as no further + * processing is required). If it's not, return the resolved object so + * that we can continue our recursive marking. If it's a duff reference + * return the fact so that we can remove the reference at source. + */ +static pdf_obj *markref(fz_context *ctx, pdf_document *doc, pdf_write_options *opts, pdf_obj *obj, int *duff) { int num = pdf_to_num(ctx, obj); int gen = pdf_to_gen(ctx, obj); if (num <= 0 || num >= pdf_xref_len(ctx, doc)) + { + *duff = 1; return NULL; + } + *duff = 0; if (opts->use_list[num]) return NULL; @@ -536,29 +545,47 @@ static pdf_obj *sweepref(fz_context *ctx, pdf_document *doc, pdf_write_options * /* Leave broken */ } - return pdf_resolve_indirect(ctx, obj); + obj = pdf_resolve_indirect(ctx, obj); + if (obj == NULL || pdf_is_null(ctx, obj)) + { + *duff = 1; + opts->use_list[num] = 0; + } + + return obj; } -static void sweepobj(fz_context *ctx, pdf_document *doc, pdf_write_options *opts, pdf_obj *obj) +/* Recursively mark an object. If any references found are duff, then + * replace them with nulls. */ +static int markobj(fz_context *ctx, pdf_document *doc, pdf_write_options *opts, pdf_obj *obj) { int i; if (pdf_is_indirect(ctx, obj)) - obj = sweepref(ctx, doc, opts, obj); + { + int duff; + obj = markref(ctx, doc, opts, obj, &duff); + if (duff) + return 1; + } if (pdf_is_dict(ctx, obj)) { int n = pdf_dict_len(ctx, obj); for (i = 0; i < n; i++) - sweepobj(ctx, doc, opts, pdf_dict_get_val(ctx, obj, i)); + if (markobj(ctx, doc, opts, pdf_dict_get_val(ctx, obj, i))) + pdf_dict_put_val_drop(ctx, obj, i, pdf_new_null(ctx, doc)); } else if (pdf_is_array(ctx, obj)) { int n = pdf_array_len(ctx, obj); for (i = 0; i < n; i++) - sweepobj(ctx, doc, opts, pdf_array_get(ctx, obj, i)); + if (markobj(ctx, doc, opts, pdf_array_get(ctx, obj, i))) + pdf_array_put_drop(ctx, obj, i, pdf_new_null(ctx, doc)); } + + return 0; } /* @@ -2626,8 +2653,8 @@ void pdf_write_document(fz_context *ctx, pdf_document *doc, char *filename, fz_w } /* Sweep & mark objects from the trailer */ - if (opts.do_garbage >= 1) - sweepobj(ctx, doc, &opts, pdf_trailer(ctx, doc)); + if (opts.do_garbage >= 1 || opts.do_linear) + (void)markobj(ctx, doc, &opts, pdf_trailer(ctx, doc)); else for (num = 0; num < xref_len; num++) opts.use_list[num] = 1; @@ -2645,14 +2672,12 @@ void pdf_write_document(fz_context *ctx, pdf_document *doc, char *filename, fz_w renumberobjs(ctx, doc, &opts); /* Truncate the xref after compacting and renumbering */ - if (opts.do_garbage >= 2 && !opts.do_incremental) + if ((opts.do_garbage >= 2 || opts.do_linear) && !opts.do_incremental) while (xref_len > 0 && !opts.use_list[xref_len-1]) xref_len--; if (opts.do_linear) - { linearize(ctx, doc, &opts); - } writeobjects(ctx, doc, &opts, 0); |