diff options
author | Robin Watts <robin.watts@artifex.com> | 2016-10-04 13:59:43 +0100 |
---|---|---|
committer | Robin Watts <robin.watts@artifex.com> | 2016-10-06 11:49:43 +0100 |
commit | bcd4c4b4b8379df2997f32776cd34b8ad0ff2da1 (patch) | |
tree | ef089b8b89ca664c76c5216b432ca113d84128c3 /source/pdf | |
parent | a5abafc7583ecda91323a49e58706ac2f99c931f (diff) | |
download | mupdf-bcd4c4b4b8379df2997f32776cd34b8ad0ff2da1.tar.xz |
Update Xref reading code to cope with 19 byte entries.
The spec says entries should be 20 bytes long. In practise we
see 19 byte long ones more often than we like. This is due to
the use of a single EOL char rather than 2.
The PCLm files I've seen use 19 byte ones, so update the code
to cope with these.
Diffstat (limited to 'source/pdf')
-rw-r--r-- | source/pdf/pdf-xref.c | 35 |
1 files changed, 31 insertions, 4 deletions
diff --git a/source/pdf/pdf-xref.c b/source/pdf/pdf-xref.c index a7dc3514..af2fec33 100644 --- a/source/pdf/pdf-xref.c +++ b/source/pdf/pdf-xref.c @@ -635,6 +635,7 @@ pdf_xref_size_from_old_trailer(fz_context *ctx, pdf_document *doc, pdf_lexbuf *b int size; fz_off_t ofs; pdf_obj *trailer = NULL; + int n; fz_var(trailer); @@ -668,10 +669,24 @@ pdf_xref_size_from_old_trailer(fz_context *ctx, pdf_document *doc, pdf_lexbuf *b t = fz_tell(ctx, doc->file); if (t < 0) fz_throw(ctx, FZ_ERROR_GENERIC, "cannot tell in file"); - if (len > (FZ_OFF_MAX - t) / 20) + + /* Spec says xref entries should be 20 bytes, but it's not infrequent + * to see 19, in particular for some PCLm drivers. Cope. */ + if (len > 0) + { + n = fz_read(ctx, doc->file, (unsigned char *)buf->scratch, 20); + if (n < 19) + fz_throw(ctx, FZ_ERROR_GENERIC, "malformed xref table"); + if (n == 20 && buf->scratch[19] > 32) + n = 19; + } + else + n = 20; + + if (len > (FZ_OFF_MAX - t) / n) fz_throw(ctx, FZ_ERROR_GENERIC, "xref has too many entries"); - fz_seek(ctx, doc->file, t + 20 * len, SEEK_SET); + fz_seek(ctx, doc->file, t + n * len, SEEK_SET); } fz_try(ctx) @@ -776,6 +791,7 @@ pdf_read_old_xref(fz_context *ctx, pdf_document *doc, pdf_lexbuf *buf) int c; int xref_len = pdf_xref_size_from_old_trailer(ctx, doc, buf); pdf_xref_entry *table; + int carried; fz_skip_space(ctx, doc->file); if (fz_skip_string(ctx, doc->file, "xref")) @@ -811,12 +827,17 @@ pdf_read_old_xref(fz_context *ctx, pdf_document *doc, pdf_lexbuf *buf) table = pdf_xref_find_subsection(ctx, doc, ofs, len); + /* Xref entries SHOULD be 20 bytes long, but we see 19 byte + * ones more frequently than we'd like (e.g. PCLm drivers). + * Cope with this by 'carrying' data forward. */ + carried = 0; for (i = ofs; i < ofs + len; i++) { pdf_xref_entry *entry = &table[i-ofs]; - n = fz_read(ctx, file, (unsigned char *) buf->scratch, 20); - if (n != 20) + n = fz_read(ctx, file, (unsigned char *) buf->scratch + carried, 20-carried); + if (n != 20-carried) fz_throw(ctx, FZ_ERROR_GENERIC, "unexpected EOF in xref table"); + n += carried; if (!entry->type) { s = buf->scratch; @@ -831,8 +852,14 @@ pdf_read_old_xref(fz_context *ctx, pdf_document *doc, pdf_lexbuf *buf) entry->type = s[17]; if (s[17] != 'f' && s[17] != 'n' && s[17] != 'o') fz_throw(ctx, FZ_ERROR_GENERIC, "unexpected xref type: %#x (%d %d R)", s[17], entry->num, entry->gen); + /* If the last byte of our buffer isn't an EOL (or space), carry one byte forward */ + carried = s[19] > 32; + if (carried) + s[0] = s[19]; } } + if (carried) + fz_unread_byte(ctx, file); } tok = pdf_lex(ctx, file, buf); |