diff options
author | Robin Watts <robin.watts@artifex.com> | 2012-12-24 14:23:57 +0000 |
---|---|---|
committer | Robin Watts <robin.watts@artifex.com> | 2013-01-03 13:05:17 +0000 |
commit | 511a2bf0411f29e4620d0a23dea8771976cd1a37 (patch) | |
tree | 9913d2df9b79bbd12db79d14f5a3ae9c9d93d831 /pdf | |
parent | 98cc01d82be792e600e13e88de9712fffa3240d5 (diff) | |
download | mupdf-511a2bf0411f29e4620d0a23dea8771976cd1a37.tar.xz |
Improve mutool clean behaviour on broken streams.
When cleaning a file with a corrupt stream in it, historically mupdf
would give up when it encountered such a stream. This is often not
what is desired, as information can be lost.
The changes herein allow us to use our best efforts when reading
a stream, so that broken streams are reproduced in the output
cleaned file.
Problem found in a test file, pdf_001/2599.pdf.asan.58.1778 supplied
by Mateusz "j00ru" Jurczyk and Gynvael Coldwind of the Google
Security Team using Address Sanitizer. Many thanks!
Diffstat (limited to 'pdf')
-rw-r--r-- | pdf/mupdf-internal.h | 2 | ||||
-rw-r--r-- | pdf/pdf_stream.c | 17 | ||||
-rw-r--r-- | pdf/pdf_write.c | 5 |
3 files changed, 15 insertions, 9 deletions
diff --git a/pdf/mupdf-internal.h b/pdf/mupdf-internal.h index a9ee53e3..26f78202 100644 --- a/pdf/mupdf-internal.h +++ b/pdf/mupdf-internal.h @@ -188,7 +188,7 @@ fz_stream *pdf_open_stream_with_offset(pdf_document *doc, int num, int gen, pdf_ fz_stream *pdf_open_compressed_stream(fz_context *ctx, fz_compressed_buffer *); fz_stream *pdf_open_contents_stream(pdf_document *xref, pdf_obj *obj); fz_buffer *pdf_load_raw_renumbered_stream(pdf_document *doc, int num, int gen, int orig_num, int orig_gen); -fz_buffer *pdf_load_renumbered_stream(pdf_document *doc, int num, int gen, int orig_num, int orig_gen); +fz_buffer *pdf_load_renumbered_stream(pdf_document *doc, int num, int gen, int orig_num, int orig_gen, int *truncated); fz_stream *pdf_open_raw_renumbered_stream(pdf_document *doc, int num, int gen, int orig_num, int orig_gen); void pdf_repair_xref(pdf_document *doc, pdf_lexbuf *buf); diff --git a/pdf/pdf_stream.c b/pdf/pdf_stream.c index 1efe116c..77bf7c64 100644 --- a/pdf/pdf_stream.c +++ b/pdf/pdf_stream.c @@ -415,8 +415,8 @@ pdf_guess_filter_length(int len, char *filter) return len; } -fz_buffer * -pdf_load_image_stream(pdf_document *xref, int num, int gen, int orig_num, int orig_gen, fz_compression_params *params) +static fz_buffer * +pdf_load_image_stream(pdf_document *xref, int num, int gen, int orig_num, int orig_gen, fz_compression_params *params, int *truncated) { fz_context *ctx = xref->ctx; fz_stream *stm = NULL; @@ -444,7 +444,10 @@ pdf_load_image_stream(pdf_document *xref, int num, int gen, int orig_num, int or fz_try(ctx) { - buf = fz_read_all(stm, len); + if (truncated) + buf = fz_read_best(stm, len, truncated); + else + buf = fz_read_all(stm, len); } fz_always(ctx) { @@ -464,13 +467,13 @@ pdf_load_image_stream(pdf_document *xref, int num, int gen, int orig_num, int or fz_buffer * pdf_load_stream(pdf_document *xref, int num, int gen) { - return pdf_load_image_stream(xref, num, gen, num, gen, NULL); + return pdf_load_image_stream(xref, num, gen, num, gen, NULL, NULL); } fz_buffer * -pdf_load_renumbered_stream(pdf_document *xref, int num, int gen, int orig_num, int orig_gen) +pdf_load_renumbered_stream(pdf_document *xref, int num, int gen, int orig_num, int orig_gen, int *truncated) { - return pdf_load_image_stream(xref, num, gen, orig_num, orig_gen, NULL); + return pdf_load_image_stream(xref, num, gen, orig_num, orig_gen, NULL, truncated); } fz_compressed_buffer * @@ -481,7 +484,7 @@ pdf_load_compressed_stream(pdf_document *xref, int num, int gen) fz_try(ctx) { - bc->buffer = pdf_load_image_stream(xref, num, gen, num, gen, &bc->params); + bc->buffer = pdf_load_image_stream(xref, num, gen, num, gen, &bc->params, NULL); } fz_catch(ctx) { diff --git a/pdf/pdf_write.c b/pdf/pdf_write.c index a94bd044..c3eda9d8 100644 --- a/pdf/pdf_write.c +++ b/pdf/pdf_write.c @@ -1511,8 +1511,11 @@ static void expandstream(pdf_document *xref, pdf_write_options *opts, pdf_obj *o fz_context *ctx = xref->ctx; int orig_num = opts->rev_renumber_map[num]; int orig_gen = opts->rev_gen_list[num]; + int truncated = 0; - buf = pdf_load_renumbered_stream(xref, num, gen, orig_num, orig_gen); + buf = pdf_load_renumbered_stream(xref, num, gen, orig_num, orig_gen, (opts->continue_on_error ? &truncated : NULL)); + if (truncated && opts->errors) + (*opts->errors)++; obj = pdf_copy_dict(ctx, obj_orig); pdf_dict_dels(obj, "Filter"); |