summaryrefslogtreecommitdiff
path: root/source
diff options
context:
space:
mode:
authorRobin Watts <robin.watts@artifex.com>2015-02-27 14:39:21 +0000
committerRobin Watts <robin.watts@artifex.com>2015-02-27 17:20:32 +0000
commit8ff8784def2fbf49a303a86259919ff143050c5f (patch)
tree0bef30f78a42529ff56c99172da0cb2b3b3c25d6 /source
parent060ae5d3483fb4f060ecbbf2d706c1159b760114 (diff)
downloadmupdf-8ff8784def2fbf49a303a86259919ff143050c5f.tar.xz
Bug 695853: Fix pdf clean operation with invalid refs in input file.
MuPDF (and other PDF readers) treat invalid references as 'null' objects. For instance, in the supplied file, object 239 is supposedly free, but a reference is made to it. When cleaning (or linearising) a file, we renumber objects; such illegal refs then end up pointing somewhere else. The workaround here is simply to spot the invalid refs during the mark phase, and to set the referencing to null.
Diffstat (limited to 'source')
-rw-r--r--source/pdf/pdf-object.c26
-rw-r--r--source/pdf/pdf-write.c47
2 files changed, 62 insertions, 11 deletions
diff --git a/source/pdf/pdf-object.c b/source/pdf/pdf-object.c
index d4b008fb..1a0b9abf 100644
--- a/source/pdf/pdf-object.c
+++ b/source/pdf/pdf-object.c
@@ -566,6 +566,13 @@ pdf_array_put(fz_context *ctx, pdf_obj *obj, int i, pdf_obj *item)
}
void
+pdf_array_put_drop(fz_context *ctx, pdf_obj *obj, int i, pdf_obj *item)
+{
+ pdf_array_put(ctx, obj, i, item);
+ pdf_drop_obj(ctx, item);
+}
+
+void
pdf_array_push(fz_context *ctx, pdf_obj *obj, pdf_obj *item)
{
RESOLVE(obj);
@@ -880,6 +887,25 @@ pdf_dict_get_val(fz_context *ctx, pdf_obj *obj, int i)
return obj->u.d.items[i].v;
}
+void
+pdf_dict_put_val_drop(fz_context *ctx, pdf_obj *obj, int i, pdf_obj *new_obj)
+{
+ RESOLVE(obj);
+ if (!obj || obj->kind != PDF_DICT)
+ {
+ pdf_drop_obj(ctx, new_obj);
+ return;
+ }
+ if (i < 0 || i >= obj->u.d.len)
+ {
+ /* FIXME: Should probably extend the dict here */
+ pdf_drop_obj(ctx, new_obj);
+ return;
+ }
+ pdf_drop_obj(ctx, obj->u.d.items[i].v);
+ obj->u.d.items[i].v = new_obj;
+}
+
static int
pdf_dict_finds(fz_context *ctx, pdf_obj *obj, const char *key, int *location)
{
diff --git a/source/pdf/pdf-write.c b/source/pdf/pdf-write.c
index 6f5088c6..dc063762 100644
--- a/source/pdf/pdf-write.c
+++ b/source/pdf/pdf-write.c
@@ -504,13 +504,22 @@ objects_dump(fz_context *ctx, pdf_document *doc, pdf_write_options *opts)
* Garbage collect objects not reachable from the trailer.
*/
-static pdf_obj *sweepref(fz_context *ctx, pdf_document *doc, pdf_write_options *opts, pdf_obj *obj)
+/* Mark a reference. If it's been marked already, return NULL (as no further
+ * processing is required). If it's not, return the resolved object so
+ * that we can continue our recursive marking. If it's a duff reference
+ * return the fact so that we can remove the reference at source.
+ */
+static pdf_obj *markref(fz_context *ctx, pdf_document *doc, pdf_write_options *opts, pdf_obj *obj, int *duff)
{
int num = pdf_to_num(ctx, obj);
int gen = pdf_to_gen(ctx, obj);
if (num <= 0 || num >= pdf_xref_len(ctx, doc))
+ {
+ *duff = 1;
return NULL;
+ }
+ *duff = 0;
if (opts->use_list[num])
return NULL;
@@ -536,29 +545,47 @@ static pdf_obj *sweepref(fz_context *ctx, pdf_document *doc, pdf_write_options *
/* Leave broken */
}
- return pdf_resolve_indirect(ctx, obj);
+ obj = pdf_resolve_indirect(ctx, obj);
+ if (obj == NULL || pdf_is_null(ctx, obj))
+ {
+ *duff = 1;
+ opts->use_list[num] = 0;
+ }
+
+ return obj;
}
-static void sweepobj(fz_context *ctx, pdf_document *doc, pdf_write_options *opts, pdf_obj *obj)
+/* Recursively mark an object. If any references found are duff, then
+ * replace them with nulls. */
+static int markobj(fz_context *ctx, pdf_document *doc, pdf_write_options *opts, pdf_obj *obj)
{
int i;
if (pdf_is_indirect(ctx, obj))
- obj = sweepref(ctx, doc, opts, obj);
+ {
+ int duff;
+ obj = markref(ctx, doc, opts, obj, &duff);
+ if (duff)
+ return 1;
+ }
if (pdf_is_dict(ctx, obj))
{
int n = pdf_dict_len(ctx, obj);
for (i = 0; i < n; i++)
- sweepobj(ctx, doc, opts, pdf_dict_get_val(ctx, obj, i));
+ if (markobj(ctx, doc, opts, pdf_dict_get_val(ctx, obj, i)))
+ pdf_dict_put_val_drop(ctx, obj, i, pdf_new_null(ctx, doc));
}
else if (pdf_is_array(ctx, obj))
{
int n = pdf_array_len(ctx, obj);
for (i = 0; i < n; i++)
- sweepobj(ctx, doc, opts, pdf_array_get(ctx, obj, i));
+ if (markobj(ctx, doc, opts, pdf_array_get(ctx, obj, i)))
+ pdf_array_put_drop(ctx, obj, i, pdf_new_null(ctx, doc));
}
+
+ return 0;
}
/*
@@ -2626,8 +2653,8 @@ void pdf_write_document(fz_context *ctx, pdf_document *doc, char *filename, fz_w
}
/* Sweep & mark objects from the trailer */
- if (opts.do_garbage >= 1)
- sweepobj(ctx, doc, &opts, pdf_trailer(ctx, doc));
+ if (opts.do_garbage >= 1 || opts.do_linear)
+ (void)markobj(ctx, doc, &opts, pdf_trailer(ctx, doc));
else
for (num = 0; num < xref_len; num++)
opts.use_list[num] = 1;
@@ -2645,14 +2672,12 @@ void pdf_write_document(fz_context *ctx, pdf_document *doc, char *filename, fz_w
renumberobjs(ctx, doc, &opts);
/* Truncate the xref after compacting and renumbering */
- if (opts.do_garbage >= 2 && !opts.do_incremental)
+ if ((opts.do_garbage >= 2 || opts.do_linear) && !opts.do_incremental)
while (xref_len > 0 && !opts.use_list[xref_len-1])
xref_len--;
if (opts.do_linear)
- {
linearize(ctx, doc, &opts);
- }
writeobjects(ctx, doc, &opts, 0);