From 511a2bf0411f29e4620d0a23dea8771976cd1a37 Mon Sep 17 00:00:00 2001 From: Robin Watts Date: Mon, 24 Dec 2012 14:23:57 +0000 Subject: Improve mutool clean behaviour on broken streams. When cleaning a file with a corrupt stream in it, historically mupdf would give up when it encountered such a stream. This is often not what is desired, as information can be lost. The changes herein allow us to use our best efforts when reading a stream, so that broken streams are reproduced in the output cleaned file. Problem found in a test file, pdf_001/2599.pdf.asan.58.1778 supplied by Mateusz "j00ru" Jurczyk and Gynvael Coldwind of the Google Security Team using Address Sanitizer. Many thanks! --- pdf/mupdf-internal.h | 2 +- pdf/pdf_stream.c | 17 ++++++++++------- pdf/pdf_write.c | 5 ++++- 3 files changed, 15 insertions(+), 9 deletions(-) (limited to 'pdf') diff --git a/pdf/mupdf-internal.h b/pdf/mupdf-internal.h index a9ee53e3..26f78202 100644 --- a/pdf/mupdf-internal.h +++ b/pdf/mupdf-internal.h @@ -188,7 +188,7 @@ fz_stream *pdf_open_stream_with_offset(pdf_document *doc, int num, int gen, pdf_ fz_stream *pdf_open_compressed_stream(fz_context *ctx, fz_compressed_buffer *); fz_stream *pdf_open_contents_stream(pdf_document *xref, pdf_obj *obj); fz_buffer *pdf_load_raw_renumbered_stream(pdf_document *doc, int num, int gen, int orig_num, int orig_gen); -fz_buffer *pdf_load_renumbered_stream(pdf_document *doc, int num, int gen, int orig_num, int orig_gen); +fz_buffer *pdf_load_renumbered_stream(pdf_document *doc, int num, int gen, int orig_num, int orig_gen, int *truncated); fz_stream *pdf_open_raw_renumbered_stream(pdf_document *doc, int num, int gen, int orig_num, int orig_gen); void pdf_repair_xref(pdf_document *doc, pdf_lexbuf *buf); diff --git a/pdf/pdf_stream.c b/pdf/pdf_stream.c index 1efe116c..77bf7c64 100644 --- a/pdf/pdf_stream.c +++ b/pdf/pdf_stream.c @@ -415,8 +415,8 @@ pdf_guess_filter_length(int len, char *filter) return len; } -fz_buffer * -pdf_load_image_stream(pdf_document *xref, int num, int gen, int orig_num, int orig_gen, fz_compression_params *params) +static fz_buffer * +pdf_load_image_stream(pdf_document *xref, int num, int gen, int orig_num, int orig_gen, fz_compression_params *params, int *truncated) { fz_context *ctx = xref->ctx; fz_stream *stm = NULL; @@ -444,7 +444,10 @@ pdf_load_image_stream(pdf_document *xref, int num, int gen, int orig_num, int or fz_try(ctx) { - buf = fz_read_all(stm, len); + if (truncated) + buf = fz_read_best(stm, len, truncated); + else + buf = fz_read_all(stm, len); } fz_always(ctx) { @@ -464,13 +467,13 @@ pdf_load_image_stream(pdf_document *xref, int num, int gen, int orig_num, int or fz_buffer * pdf_load_stream(pdf_document *xref, int num, int gen) { - return pdf_load_image_stream(xref, num, gen, num, gen, NULL); + return pdf_load_image_stream(xref, num, gen, num, gen, NULL, NULL); } fz_buffer * -pdf_load_renumbered_stream(pdf_document *xref, int num, int gen, int orig_num, int orig_gen) +pdf_load_renumbered_stream(pdf_document *xref, int num, int gen, int orig_num, int orig_gen, int *truncated) { - return pdf_load_image_stream(xref, num, gen, orig_num, orig_gen, NULL); + return pdf_load_image_stream(xref, num, gen, orig_num, orig_gen, NULL, truncated); } fz_compressed_buffer * @@ -481,7 +484,7 @@ pdf_load_compressed_stream(pdf_document *xref, int num, int gen) fz_try(ctx) { - bc->buffer = pdf_load_image_stream(xref, num, gen, num, gen, &bc->params); + bc->buffer = pdf_load_image_stream(xref, num, gen, num, gen, &bc->params, NULL); } fz_catch(ctx) { diff --git a/pdf/pdf_write.c b/pdf/pdf_write.c index a94bd044..c3eda9d8 100644 --- a/pdf/pdf_write.c +++ b/pdf/pdf_write.c @@ -1511,8 +1511,11 @@ static void expandstream(pdf_document *xref, pdf_write_options *opts, pdf_obj *o fz_context *ctx = xref->ctx; int orig_num = opts->rev_renumber_map[num]; int orig_gen = opts->rev_gen_list[num]; + int truncated = 0; - buf = pdf_load_renumbered_stream(xref, num, gen, orig_num, orig_gen); + buf = pdf_load_renumbered_stream(xref, num, gen, orig_num, orig_gen, (opts->continue_on_error ? &truncated : NULL)); + if (truncated && opts->errors) + (*opts->errors)++; obj = pdf_copy_dict(ctx, obj_orig); pdf_dict_dels(obj, "Filter"); -- cgit v1.2.3