diff options
author | Robin Watts <robin.watts@artifex.com> | 2015-02-27 14:39:21 +0000 |
---|---|---|
committer | Robin Watts <robin.watts@artifex.com> | 2015-02-27 17:20:32 +0000 |
commit | 8ff8784def2fbf49a303a86259919ff143050c5f (patch) | |
tree | 0bef30f78a42529ff56c99172da0cb2b3b3c25d6 /source | |
parent | 060ae5d3483fb4f060ecbbf2d706c1159b760114 (diff) | |
download | mupdf-8ff8784def2fbf49a303a86259919ff143050c5f.tar.xz |
Bug 695853: Fix pdf clean operation with invalid refs in input file.
MuPDF (and other PDF readers) treat invalid references as 'null'
objects. For instance, in the supplied file, object 239 is supposedly
free, but a reference is made to it.
When cleaning (or linearising) a file, we renumber objects; such
illegal refs then end up pointing somewhere else.
The workaround here is simply to spot the invalid refs during the
mark phase, and to set the referencing to null.
Diffstat (limited to 'source')
-rw-r--r-- | source/pdf/pdf-object.c | 26 | ||||
-rw-r--r-- | source/pdf/pdf-write.c | 47 |
2 files changed, 62 insertions, 11 deletions
diff --git a/source/pdf/pdf-object.c b/source/pdf/pdf-object.c index d4b008fb..1a0b9abf 100644 --- a/source/pdf/pdf-object.c +++ b/source/pdf/pdf-object.c @@ -566,6 +566,13 @@ pdf_array_put(fz_context *ctx, pdf_obj *obj, int i, pdf_obj *item) } void +pdf_array_put_drop(fz_context *ctx, pdf_obj *obj, int i, pdf_obj *item) +{ + pdf_array_put(ctx, obj, i, item); + pdf_drop_obj(ctx, item); +} + +void pdf_array_push(fz_context *ctx, pdf_obj *obj, pdf_obj *item) { RESOLVE(obj); @@ -880,6 +887,25 @@ pdf_dict_get_val(fz_context *ctx, pdf_obj *obj, int i) return obj->u.d.items[i].v; } +void +pdf_dict_put_val_drop(fz_context *ctx, pdf_obj *obj, int i, pdf_obj *new_obj) +{ + RESOLVE(obj); + if (!obj || obj->kind != PDF_DICT) + { + pdf_drop_obj(ctx, new_obj); + return; + } + if (i < 0 || i >= obj->u.d.len) + { + /* FIXME: Should probably extend the dict here */ + pdf_drop_obj(ctx, new_obj); + return; + } + pdf_drop_obj(ctx, obj->u.d.items[i].v); + obj->u.d.items[i].v = new_obj; +} + static int pdf_dict_finds(fz_context *ctx, pdf_obj *obj, const char *key, int *location) { diff --git a/source/pdf/pdf-write.c b/source/pdf/pdf-write.c index 6f5088c6..dc063762 100644 --- a/source/pdf/pdf-write.c +++ b/source/pdf/pdf-write.c @@ -504,13 +504,22 @@ objects_dump(fz_context *ctx, pdf_document *doc, pdf_write_options *opts) * Garbage collect objects not reachable from the trailer. */ -static pdf_obj *sweepref(fz_context *ctx, pdf_document *doc, pdf_write_options *opts, pdf_obj *obj) +/* Mark a reference. If it's been marked already, return NULL (as no further + * processing is required). If it's not, return the resolved object so + * that we can continue our recursive marking. If it's a duff reference + * return the fact so that we can remove the reference at source. + */ +static pdf_obj *markref(fz_context *ctx, pdf_document *doc, pdf_write_options *opts, pdf_obj *obj, int *duff) { int num = pdf_to_num(ctx, obj); int gen = pdf_to_gen(ctx, obj); if (num <= 0 || num >= pdf_xref_len(ctx, doc)) + { + *duff = 1; return NULL; + } + *duff = 0; if (opts->use_list[num]) return NULL; @@ -536,29 +545,47 @@ static pdf_obj *sweepref(fz_context *ctx, pdf_document *doc, pdf_write_options * /* Leave broken */ } - return pdf_resolve_indirect(ctx, obj); + obj = pdf_resolve_indirect(ctx, obj); + if (obj == NULL || pdf_is_null(ctx, obj)) + { + *duff = 1; + opts->use_list[num] = 0; + } + + return obj; } -static void sweepobj(fz_context *ctx, pdf_document *doc, pdf_write_options *opts, pdf_obj *obj) +/* Recursively mark an object. If any references found are duff, then + * replace them with nulls. */ +static int markobj(fz_context *ctx, pdf_document *doc, pdf_write_options *opts, pdf_obj *obj) { int i; if (pdf_is_indirect(ctx, obj)) - obj = sweepref(ctx, doc, opts, obj); + { + int duff; + obj = markref(ctx, doc, opts, obj, &duff); + if (duff) + return 1; + } if (pdf_is_dict(ctx, obj)) { int n = pdf_dict_len(ctx, obj); for (i = 0; i < n; i++) - sweepobj(ctx, doc, opts, pdf_dict_get_val(ctx, obj, i)); + if (markobj(ctx, doc, opts, pdf_dict_get_val(ctx, obj, i))) + pdf_dict_put_val_drop(ctx, obj, i, pdf_new_null(ctx, doc)); } else if (pdf_is_array(ctx, obj)) { int n = pdf_array_len(ctx, obj); for (i = 0; i < n; i++) - sweepobj(ctx, doc, opts, pdf_array_get(ctx, obj, i)); + if (markobj(ctx, doc, opts, pdf_array_get(ctx, obj, i))) + pdf_array_put_drop(ctx, obj, i, pdf_new_null(ctx, doc)); } + + return 0; } /* @@ -2626,8 +2653,8 @@ void pdf_write_document(fz_context *ctx, pdf_document *doc, char *filename, fz_w } /* Sweep & mark objects from the trailer */ - if (opts.do_garbage >= 1) - sweepobj(ctx, doc, &opts, pdf_trailer(ctx, doc)); + if (opts.do_garbage >= 1 || opts.do_linear) + (void)markobj(ctx, doc, &opts, pdf_trailer(ctx, doc)); else for (num = 0; num < xref_len; num++) opts.use_list[num] = 1; @@ -2645,14 +2672,12 @@ void pdf_write_document(fz_context *ctx, pdf_document *doc, char *filename, fz_w renumberobjs(ctx, doc, &opts); /* Truncate the xref after compacting and renumbering */ - if (opts.do_garbage >= 2 && !opts.do_incremental) + if ((opts.do_garbage >= 2 || opts.do_linear) && !opts.do_incremental) while (xref_len > 0 && !opts.use_list[xref_len-1]) xref_len--; if (opts.do_linear) - { linearize(ctx, doc, &opts); - } writeobjects(ctx, doc, &opts, 0); |