diff options
author | Robin Watts <robin.watts@artifex.com> | 2015-10-01 18:05:46 +0100 |
---|---|---|
committer | Robin Watts <robin.watts@artifex.com> | 2015-10-02 15:53:19 +0100 |
commit | 1acaaf2b40614401378aa697de47093be9f390fe (patch) | |
tree | 884597d2a9345e93aa55f19aba40b9d2f40f8100 /source | |
parent | 1cf9a6c5092adce610f3e6b33219e09b4383249f (diff) | |
download | mupdf-1acaaf2b40614401378aa697de47093be9f390fe.tar.xz |
Bug 696129: Be more forgiving of broken files.
The PDF spec says that old format xrefs should start with:
xref\n<start> <len>
The example file in question has:
xref <start> <len>
which confuses our parsing code. Update the parse code to avoid
using fz_read_line, and to instead work on a char level.
Also, downgrade the error given when the first object is not free
to be a warning. Now we do 'just in time' repair, we are probably
better able to cope.
Diffstat (limited to 'source')
-rw-r--r-- | source/pdf/pdf-xref.c | 37 |
1 files changed, 32 insertions, 5 deletions
diff --git a/source/pdf/pdf-xref.c b/source/pdf/pdf-xref.c index 727dcda7..7c722c9c 100644 --- a/source/pdf/pdf-xref.c +++ b/source/pdf/pdf-xref.c @@ -587,6 +587,31 @@ pdf_read_start_xref(fz_context *ctx, pdf_document *doc) fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find startxref"); } +static void +fz_skip_space(fz_context *ctx, fz_stream *stm) +{ + do + { + int c = fz_peek_byte(ctx, stm); + if (c > 32 && c != EOF) + return; + (void)fz_read_byte(ctx, stm); + } + while (1); +} + +static int fz_skip_string(fz_context *ctx, fz_stream *stm, const char *str) +{ + while (*str) + { + int c = fz_peek_byte(ctx, stm); + if (c == EOF || c != *str++) + return 1; + (void)fz_read_byte(ctx, stm); + } + return 0; +} + /* * trailer dictionary */ @@ -608,9 +633,10 @@ pdf_xref_size_from_old_trailer(fz_context *ctx, pdf_document *doc, pdf_lexbuf *b /* Record the current file read offset so that we can reinstate it */ ofs = fz_tell(ctx, doc->file); - fz_read_line(ctx, doc->file, buf->scratch, buf->size); - if (strncmp(buf->scratch, "xref", 4) != 0) + fz_skip_space(ctx, doc->file); + if (fz_skip_string(ctx, doc->file, "xref")) fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find xref marker"); + fz_skip_space(ctx, doc->file); while (1) { @@ -752,9 +778,10 @@ pdf_read_old_xref(fz_context *ctx, pdf_document *doc, pdf_lexbuf *buf) int xref_len = pdf_xref_size_from_old_trailer(ctx, doc, buf); pdf_xref_entry *table; - fz_read_line(ctx, file, buf->scratch, buf->size); - if (strncmp(buf->scratch, "xref", 4) != 0) + fz_skip_space(ctx, doc->file); + if (fz_skip_string(ctx, doc->file, "xref")) fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find xref marker"); + fz_skip_space(ctx, doc->file); while (1) { @@ -1151,7 +1178,7 @@ pdf_load_xref(fz_context *ctx, pdf_document *doc, pdf_lexbuf *buf) } /* broken pdfs where first object is not free */ else if (entry->type != 'f') - fz_throw(ctx, FZ_ERROR_GENERIC, "first object in xref is not free"); + fz_warn(ctx, "first object in xref is not free"); /* broken pdfs where object offsets are out of range */ xref_len = pdf_xref_len(ctx, doc); |