summaryrefslogtreecommitdiff
path: root/pdf
diff options
context:
space:
mode:
authorRobin Watts <robin.watts@artifex.com>2012-12-24 14:23:57 +0000
committerRobin Watts <robin.watts@artifex.com>2013-01-03 13:05:17 +0000
commit511a2bf0411f29e4620d0a23dea8771976cd1a37 (patch)
tree9913d2df9b79bbd12db79d14f5a3ae9c9d93d831 /pdf
parent98cc01d82be792e600e13e88de9712fffa3240d5 (diff)
downloadmupdf-511a2bf0411f29e4620d0a23dea8771976cd1a37.tar.xz
Improve mutool clean behaviour on broken streams.
When cleaning a file with a corrupt stream in it, historically mupdf would give up when it encountered such a stream. This is often not what is desired, as information can be lost. The changes herein allow us to use our best efforts when reading a stream, so that broken streams are reproduced in the output cleaned file. Problem found in a test file, pdf_001/2599.pdf.asan.58.1778 supplied by Mateusz "j00ru" Jurczyk and Gynvael Coldwind of the Google Security Team using Address Sanitizer. Many thanks!
Diffstat (limited to 'pdf')
-rw-r--r--pdf/mupdf-internal.h2
-rw-r--r--pdf/pdf_stream.c17
-rw-r--r--pdf/pdf_write.c5
3 files changed, 15 insertions, 9 deletions
diff --git a/pdf/mupdf-internal.h b/pdf/mupdf-internal.h
index a9ee53e3..26f78202 100644
--- a/pdf/mupdf-internal.h
+++ b/pdf/mupdf-internal.h
@@ -188,7 +188,7 @@ fz_stream *pdf_open_stream_with_offset(pdf_document *doc, int num, int gen, pdf_
fz_stream *pdf_open_compressed_stream(fz_context *ctx, fz_compressed_buffer *);
fz_stream *pdf_open_contents_stream(pdf_document *xref, pdf_obj *obj);
fz_buffer *pdf_load_raw_renumbered_stream(pdf_document *doc, int num, int gen, int orig_num, int orig_gen);
-fz_buffer *pdf_load_renumbered_stream(pdf_document *doc, int num, int gen, int orig_num, int orig_gen);
+fz_buffer *pdf_load_renumbered_stream(pdf_document *doc, int num, int gen, int orig_num, int orig_gen, int *truncated);
fz_stream *pdf_open_raw_renumbered_stream(pdf_document *doc, int num, int gen, int orig_num, int orig_gen);
void pdf_repair_xref(pdf_document *doc, pdf_lexbuf *buf);
diff --git a/pdf/pdf_stream.c b/pdf/pdf_stream.c
index 1efe116c..77bf7c64 100644
--- a/pdf/pdf_stream.c
+++ b/pdf/pdf_stream.c
@@ -415,8 +415,8 @@ pdf_guess_filter_length(int len, char *filter)
return len;
}
-fz_buffer *
-pdf_load_image_stream(pdf_document *xref, int num, int gen, int orig_num, int orig_gen, fz_compression_params *params)
+static fz_buffer *
+pdf_load_image_stream(pdf_document *xref, int num, int gen, int orig_num, int orig_gen, fz_compression_params *params, int *truncated)
{
fz_context *ctx = xref->ctx;
fz_stream *stm = NULL;
@@ -444,7 +444,10 @@ pdf_load_image_stream(pdf_document *xref, int num, int gen, int orig_num, int or
fz_try(ctx)
{
- buf = fz_read_all(stm, len);
+ if (truncated)
+ buf = fz_read_best(stm, len, truncated);
+ else
+ buf = fz_read_all(stm, len);
}
fz_always(ctx)
{
@@ -464,13 +467,13 @@ pdf_load_image_stream(pdf_document *xref, int num, int gen, int orig_num, int or
fz_buffer *
pdf_load_stream(pdf_document *xref, int num, int gen)
{
- return pdf_load_image_stream(xref, num, gen, num, gen, NULL);
+ return pdf_load_image_stream(xref, num, gen, num, gen, NULL, NULL);
}
fz_buffer *
-pdf_load_renumbered_stream(pdf_document *xref, int num, int gen, int orig_num, int orig_gen)
+pdf_load_renumbered_stream(pdf_document *xref, int num, int gen, int orig_num, int orig_gen, int *truncated)
{
- return pdf_load_image_stream(xref, num, gen, orig_num, orig_gen, NULL);
+ return pdf_load_image_stream(xref, num, gen, orig_num, orig_gen, NULL, truncated);
}
fz_compressed_buffer *
@@ -481,7 +484,7 @@ pdf_load_compressed_stream(pdf_document *xref, int num, int gen)
fz_try(ctx)
{
- bc->buffer = pdf_load_image_stream(xref, num, gen, num, gen, &bc->params);
+ bc->buffer = pdf_load_image_stream(xref, num, gen, num, gen, &bc->params, NULL);
}
fz_catch(ctx)
{
diff --git a/pdf/pdf_write.c b/pdf/pdf_write.c
index a94bd044..c3eda9d8 100644
--- a/pdf/pdf_write.c
+++ b/pdf/pdf_write.c
@@ -1511,8 +1511,11 @@ static void expandstream(pdf_document *xref, pdf_write_options *opts, pdf_obj *o
fz_context *ctx = xref->ctx;
int orig_num = opts->rev_renumber_map[num];
int orig_gen = opts->rev_gen_list[num];
+ int truncated = 0;
- buf = pdf_load_renumbered_stream(xref, num, gen, orig_num, orig_gen);
+ buf = pdf_load_renumbered_stream(xref, num, gen, orig_num, orig_gen, (opts->continue_on_error ? &truncated : NULL));
+ if (truncated && opts->errors)
+ (*opts->errors)++;
obj = pdf_copy_dict(ctx, obj_orig);
pdf_dict_dels(obj, "Filter");