summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobin Watts <robin.watts@artifex.com>2012-05-09 19:13:14 +0100
committerRobin Watts <robin.watts@artifex.com>2012-05-10 13:38:26 +0100
commit90a289b18e2936bd2e585265964474df31c0dd5f (patch)
treeae9678288dd715bdf7c052f6c0ad4dc10a683507
parentb0beab3eecdb1681f5fb8a163278803aad852b81 (diff)
downloadmupdf-90a289b18e2936bd2e585265964474df31c0dd5f.tar.xz
mupdfclean - update to allow renumbering of encrypted objects
mupdfclean (or more correctly, the pdf_write function) currently has a limitation, in that we cannot renumber objects when encryption is being used. This is because the object/generation number is pickled into the stream, and renumbering the object causes it to become unreadable. The solution used here is to provide extended functions that take both the object/generation number and the original object/generation number. The original object numbers are only used for setting up the encryption. pdf_write now keeps track of the original object/generation number for each object. This fix is important, if we ever want to output linearized pdf as this requires us to be able to renumber objects to a very specific order. We also make a fix in removeduplicateobjects that should only matter in the case where we fail to read an object correctly.
-rw-r--r--pdf/mupdf-internal.h7
-rw-r--r--pdf/pdf_image.c4
-rw-r--r--pdf/pdf_stream.c58
-rw-r--r--pdf/pdf_write.c56
4 files changed, 88 insertions, 37 deletions
diff --git a/pdf/mupdf-internal.h b/pdf/mupdf-internal.h
index 3396bb8a..e947cbbd 100644
--- a/pdf/mupdf-internal.h
+++ b/pdf/mupdf-internal.h
@@ -194,11 +194,14 @@ struct pdf_document_s
void pdf_cache_object(pdf_document *doc, int num, int gen);
fz_stream *pdf_open_inline_stream(pdf_document *doc, pdf_obj *stmobj, int length, fz_stream *chain, pdf_image_params *params);
-fz_buffer *pdf_load_image_stream(pdf_document *doc, int num, int gen, pdf_image_params *params);
-fz_stream *pdf_open_image_stream(pdf_document *doc, int num, int gen, pdf_image_params *params);
+fz_buffer *pdf_load_image_stream(pdf_document *doc, int num, int gen, int orig_num, int orig_gen, pdf_image_params *params);
+fz_stream *pdf_open_image_stream(pdf_document *doc, int num, int gen, int orig_num, int orig_gen, pdf_image_params *params);
fz_stream *pdf_open_stream_with_offset(pdf_document *doc, int num, int gen, pdf_obj *dict, int stm_ofs);
fz_stream *pdf_open_image_decomp_stream(fz_context *ctx, fz_buffer *, pdf_image_params *params, int *factor);
fz_stream *pdf_open_contents_stream(pdf_document *xref, pdf_obj *obj);
+fz_buffer *pdf_load_raw_renumbered_stream(pdf_document *doc, int num, int gen, int orig_num, int orig_gen);
+fz_buffer *pdf_load_renumbered_stream(pdf_document *doc, int num, int gen, int orig_num, int orig_gen);
+fz_stream *pdf_open_raw_renumbered_stream(pdf_document *doc, int num, int gen, int orig_num, int orig_gen);
void pdf_repair_xref(pdf_document *doc, pdf_lexbuf *buf);
void pdf_repair_obj_stms(pdf_document *doc);
diff --git a/pdf/pdf_image.c b/pdf/pdf_image.c
index f4ddbc72..b4571bbe 100644
--- a/pdf/pdf_image.c
+++ b/pdf/pdf_image.c
@@ -430,7 +430,9 @@ pdf_load_image_imp(pdf_document *xref, pdf_obj *rdb, pdf_obj *dict, fz_stream *c
{
/* Just load the compressed image data now and we can
* decode it on demand. */
- image->buffer = pdf_load_image_stream(xref, pdf_to_num(dict), pdf_to_gen(dict), &image->params);
+ int num = pdf_to_num(dict);
+ int gen = pdf_to_gen(dict);
+ image->buffer = pdf_load_image_stream(xref, num, gen, num, gen, &image->params);
break; /* Out of fz_try */
}
diff --git a/pdf/pdf_stream.c b/pdf/pdf_stream.c
index 3086fbc9..8cc755f2 100644
--- a/pdf/pdf_stream.c
+++ b/pdf/pdf_stream.c
@@ -222,8 +222,11 @@ build_filter_chain(fz_stream *chain, pdf_document *xref, pdf_obj *fs, pdf_obj *p
/*
* Build a filter for reading raw stream data.
- * This is a null filter to constrain reading to the
- * stream length, followed by a decryption filter.
+ * This is a null filter to constrain reading to the stream length (and to
+ * allow for other people accessing the file), followed by a decryption
+ * filter.
+ *
+ * num and gen are used purely to seed the encryption.
*/
static fz_stream *
pdf_open_raw_filter(fz_stream *chain, pdf_document *xref, pdf_obj *stmobj, int num, int gen, int offset)
@@ -302,14 +305,17 @@ pdf_open_inline_stream(pdf_document *xref, pdf_obj *stmobj, int length, fz_strea
/*
* Open a stream for reading the raw (compressed but decrypted) data.
- * Using xref->file while this is open is a bad idea.
*/
fz_stream *
pdf_open_raw_stream(pdf_document *xref, int num, int gen)
{
- pdf_xref_entry *x;
+ return pdf_open_raw_renumbered_stream(xref, num, gen, num, gen);
+}
- fz_var(x);
+fz_stream *
+pdf_open_raw_renumbered_stream(pdf_document *xref, int num, int gen, int orig_num, int orig_gen)
+{
+ pdf_xref_entry *x;
if (num < 0 || num >= xref->len)
fz_throw(xref->ctx, "object id out of range (%d %d R)", num, gen);
@@ -322,7 +328,7 @@ pdf_open_raw_stream(pdf_document *xref, int num, int gen)
if (x->stm_ofs == 0)
fz_throw(xref->ctx, "object is not a stream");
- return pdf_open_raw_filter(xref->file, xref, x->obj, num, gen, x->stm_ofs);
+ return pdf_open_raw_filter(xref->file, xref, x->obj, orig_num, orig_gen, x->stm_ofs);
}
/*
@@ -333,11 +339,11 @@ pdf_open_raw_stream(pdf_document *xref, int num, int gen)
fz_stream *
pdf_open_stream(pdf_document *xref, int num, int gen)
{
- return pdf_open_image_stream(xref, num, gen, NULL);
+ return pdf_open_image_stream(xref, num, gen, num, gen, NULL);
}
fz_stream *
-pdf_open_image_stream(pdf_document *xref, int num, int gen, pdf_image_params *params)
+pdf_open_image_stream(pdf_document *xref, int num, int gen, int orig_num, int orig_gen, pdf_image_params *params)
{
pdf_xref_entry *x;
@@ -352,7 +358,7 @@ pdf_open_image_stream(pdf_document *xref, int num, int gen, pdf_image_params *pa
if (x->stm_ofs == 0)
fz_throw(xref->ctx, "object is not a stream");
- return pdf_open_filter(xref->file, xref, x->obj, num, gen, x->stm_ofs, params);
+ return pdf_open_filter(xref->file, xref, x->obj, orig_num, orig_gen, x->stm_ofs, params);
}
fz_stream *
@@ -414,6 +420,12 @@ pdf_open_stream_with_offset(pdf_document *xref, int num, int gen, pdf_obj *dict,
fz_buffer *
pdf_load_raw_stream(pdf_document *xref, int num, int gen)
{
+ return pdf_load_raw_renumbered_stream(xref, num, gen, num, gen);
+}
+
+fz_buffer *
+pdf_load_raw_renumbered_stream(pdf_document *xref, int num, int gen, int orig_num, int orig_gen)
+{
fz_stream *stm;
pdf_obj *dict;
int len;
@@ -426,7 +438,7 @@ pdf_load_raw_stream(pdf_document *xref, int num, int gen)
pdf_drop_obj(dict);
- stm = pdf_open_raw_stream(xref, num, gen);
+ stm = pdf_open_raw_renumbered_stream(xref, num, gen, orig_num, orig_gen);
/* RJW: "cannot open raw stream (%d %d R)", num, gen */
buf = fz_read_all(stm, len);
@@ -458,11 +470,17 @@ pdf_guess_filter_length(int len, char *filter)
fz_buffer *
pdf_load_stream(pdf_document *xref, int num, int gen)
{
- return pdf_load_image_stream(xref, num, gen, NULL);
+ return pdf_load_image_stream(xref, num, gen, num, gen, NULL);
}
fz_buffer *
-pdf_load_image_stream(pdf_document *xref, int num, int gen, pdf_image_params *params)
+pdf_load_renumbered_stream(pdf_document *xref, int num, int gen, int orig_num, int orig_gen)
+{
+ return pdf_load_image_stream(xref, num, gen, orig_num, orig_gen, NULL);
+}
+
+fz_buffer *
+pdf_load_image_stream(pdf_document *xref, int num, int gen, int orig_num, int orig_gen, pdf_image_params *params)
{
fz_context *ctx = xref->ctx;
fz_stream *stm = NULL;
@@ -484,7 +502,7 @@ pdf_load_image_stream(pdf_document *xref, int num, int gen, pdf_image_params *pa
pdf_drop_obj(dict);
- stm = pdf_open_image_stream(xref, num, gen, params);
+ stm = pdf_open_image_stream(xref, num, gen, orig_num, orig_gen, params);
/* RJW: "cannot open stream (%d %d R)", num, gen */
fz_try(ctx)
@@ -535,16 +553,16 @@ fz_stream *
pdf_open_contents_stream(pdf_document *xref, pdf_obj *obj)
{
fz_context *ctx = xref->ctx;
+ int num, gen;
if (pdf_is_array(obj))
- {
return pdf_open_object_array(xref, obj);
- }
- else if (pdf_is_stream(xref, pdf_to_num(obj), pdf_to_gen(obj)))
- {
- return pdf_open_image_stream(xref, pdf_to_num(obj), pdf_to_gen(obj), NULL);
- }
- fz_warn(ctx, "pdf object stream missing (%d %d R)", pdf_to_num(obj), pdf_to_gen(obj));
+ num = pdf_to_num(obj);
+ gen = pdf_to_gen(obj);
+ if (pdf_is_stream(xref, num, gen))
+ return pdf_open_image_stream(xref, num, gen, num, gen, NULL);
+
+ fz_warn(ctx, "pdf object stream missing (%d %d R)", num, gen);
return NULL;
}
diff --git a/pdf/pdf_write.c b/pdf/pdf_write.c
index e14cfa28..e2086e5c 100644
--- a/pdf/pdf_write.c
+++ b/pdf/pdf_write.c
@@ -13,6 +13,8 @@ struct pdf_write_options_s
int *ofslist;
int *genlist;
int *renumbermap;
+ int *revrenumbermap;
+ int *revgenlist;
};
/*
@@ -91,7 +93,7 @@ static void removeduplicateobjs(pdf_document *xref, pdf_write_options *opts)
for (other = 1; other < num; other++)
{
pdf_obj *a, *b;
- int match;
+ int differ, newnum;
if (num == other || !opts->uselist[num] || !opts->uselist[other])
continue;
@@ -104,14 +106,14 @@ static void removeduplicateobjs(pdf_document *xref, pdf_write_options *opts)
*/
fz_try(ctx)
{
- match = (pdf_is_stream(xref, num, 0) || pdf_is_stream(xref, other, 0));
+ differ = (pdf_is_stream(xref, num, 0) || pdf_is_stream(xref, other, 0));
}
fz_catch(ctx)
{
/* Assume different */
- match = 0;
+ differ = 1;
}
- if (match)
+ if (differ)
continue;
a = xref->table[num].obj;
@@ -124,8 +126,10 @@ static void removeduplicateobjs(pdf_document *xref, pdf_write_options *opts)
continue;
/* Keep the lowest numbered object */
- opts->renumbermap[num] = MIN(num, other);
- opts->renumbermap[other] = MIN(num, other);
+ newnum = MIN(num, other);
+ opts->renumbermap[num] = newnum;
+ opts->renumbermap[other] = newnum;
+ opts->revrenumbermap[newnum] = num; /* Either will do */
opts->uselist[MAX(num, other)] = 0;
/* One duplicate was found, do not look for another */
@@ -136,6 +140,8 @@ static void removeduplicateobjs(pdf_document *xref, pdf_write_options *opts)
/*
* Renumber objects sequentially so the xref is more compact
+ *
+ * This code assumes that any opts->renumbermap[n] <= n for all n.
*/
static void compactxref(pdf_document *xref, pdf_write_options *opts)
@@ -152,10 +158,25 @@ static void compactxref(pdf_document *xref, pdf_write_options *opts)
newnum = 1;
for (num = 1; num < xref->len; num++)
{
- if (opts->uselist[num] && opts->renumbermap[num] == num)
+ /* If it's not used, map it to zero */
+ if (!opts->uselist[num])
+ {
+ opts->renumbermap[num] = 0;
+ }
+ /* If it's not moved, compact it. */
+ else if (opts->renumbermap[num] == num)
+ {
+ opts->revrenumbermap[newnum] = opts->revrenumbermap[num];
+ opts->revgenlist[newnum] = opts->revgenlist[num];
opts->renumbermap[num] = newnum++;
- else if (opts->renumbermap[num] != num)
+ }
+ /* Otherwise it's used, and moved. We know that it must have
+ * moved down, so the place it's moved to will be in the right
+ * place already. */
+ else
+ {
opts->renumbermap[num] = opts->renumbermap[opts->renumbermap[num]];
+ }
}
}
@@ -384,8 +405,10 @@ static void copystream(pdf_document *xref, pdf_write_options *opts, pdf_obj *obj
fz_buffer *buf, *tmp;
pdf_obj *newlen;
fz_context *ctx = xref->ctx;
+ int orig_num = opts->revrenumbermap[num];
+ int orig_gen = opts->revgenlist[num];
- buf = pdf_load_raw_stream(xref, num, gen);
+ buf = pdf_load_raw_renumbered_stream(xref, num, gen, orig_num, orig_gen);
if (opts->doascii && isbinarystream(buf))
{
@@ -414,8 +437,10 @@ static void expandstream(pdf_document *xref, pdf_write_options *opts, pdf_obj *o
fz_buffer *buf, *tmp;
pdf_obj *newlen;
fz_context *ctx = xref->ctx;
+ int orig_num = opts->revrenumbermap[num];
+ int orig_gen = opts->revgenlist[num];
- buf = pdf_load_stream(xref, num, gen);
+ buf = pdf_load_renumbered_stream(xref, num, gen, orig_num, orig_gen);
pdf_dict_dels(obj, "Filter");
pdf_dict_dels(obj, "DecodeParms");
@@ -580,6 +605,8 @@ void pdf_write(pdf_document *xref, char *filename, fz_write_options *fz_opts)
opts.ofslist = fz_malloc_array(ctx, xref->len + 1, sizeof(int));
opts.genlist = fz_malloc_array(ctx, xref->len + 1, sizeof(int));
opts.renumbermap = fz_malloc_array(ctx, xref->len + 1, sizeof(int));
+ opts.revrenumbermap = fz_malloc_array(ctx, xref->len + 1, sizeof(int));
+ opts.revgenlist = fz_malloc_array(ctx, xref->len + 1, sizeof(int));
fprintf(opts.out, "%%PDF-%d.%d\n", xref->version / 10, xref->version % 10);
fprintf(opts.out, "%%\316\274\341\277\246\n\n");
@@ -589,6 +616,8 @@ void pdf_write(pdf_document *xref, char *filename, fz_write_options *fz_opts)
opts.uselist[num] = 0;
opts.ofslist[num] = 0;
opts.renumbermap[num] = num;
+ opts.revrenumbermap[num] = num;
+ opts.revgenlist[num] = xref->table[num].gen;
}
/* Make sure any objects hidden in compressed streams have been loaded */
@@ -607,10 +636,7 @@ void pdf_write(pdf_document *xref, char *filename, fz_write_options *fz_opts)
compactxref(xref, &opts);
/* Make renumbering affect all indirect references and update xref */
- /* Do not renumber objects if encryption is in use, as the object
- * numbers are baked into the streams/strings, and we can't currently
- * cope with moving them. See bug 692627. */
- if (opts.dogarbage >= 2 && !xref->crypt)
+ if (opts.dogarbage >= 2)
renumberobjs(xref, &opts);
for (num = 0; num < xref->len; num++)
@@ -653,6 +679,8 @@ void pdf_write(pdf_document *xref, char *filename, fz_write_options *fz_opts)
fz_free(ctx, opts.ofslist);
fz_free(ctx, opts.genlist);
fz_free(ctx, opts.renumbermap);
+ fz_free(ctx, opts.revrenumbermap);
+ fz_free(ctx, opts.revgenlist);
fclose(opts.out);
}
fz_catch(ctx)