summaryrefslogtreecommitdiff
path: root/source/pdf
diff options
context:
space:
mode:
authorRobin Watts <robin.watts@artifex.com>2016-10-04 13:59:43 +0100
committerRobin Watts <robin.watts@artifex.com>2016-10-06 11:49:43 +0100
commitbcd4c4b4b8379df2997f32776cd34b8ad0ff2da1 (patch)
treeef089b8b89ca664c76c5216b432ca113d84128c3 /source/pdf
parenta5abafc7583ecda91323a49e58706ac2f99c931f (diff)
downloadmupdf-bcd4c4b4b8379df2997f32776cd34b8ad0ff2da1.tar.xz
Update Xref reading code to cope with 19 byte entries.
The spec says entries should be 20 bytes long. In practise we see 19 byte long ones more often than we like. This is due to the use of a single EOL char rather than 2. The PCLm files I've seen use 19 byte ones, so update the code to cope with these.
Diffstat (limited to 'source/pdf')
-rw-r--r--source/pdf/pdf-xref.c35
1 files changed, 31 insertions, 4 deletions
diff --git a/source/pdf/pdf-xref.c b/source/pdf/pdf-xref.c
index a7dc3514..af2fec33 100644
--- a/source/pdf/pdf-xref.c
+++ b/source/pdf/pdf-xref.c
@@ -635,6 +635,7 @@ pdf_xref_size_from_old_trailer(fz_context *ctx, pdf_document *doc, pdf_lexbuf *b
int size;
fz_off_t ofs;
pdf_obj *trailer = NULL;
+ int n;
fz_var(trailer);
@@ -668,10 +669,24 @@ pdf_xref_size_from_old_trailer(fz_context *ctx, pdf_document *doc, pdf_lexbuf *b
t = fz_tell(ctx, doc->file);
if (t < 0)
fz_throw(ctx, FZ_ERROR_GENERIC, "cannot tell in file");
- if (len > (FZ_OFF_MAX - t) / 20)
+
+ /* Spec says xref entries should be 20 bytes, but it's not infrequent
+ * to see 19, in particular for some PCLm drivers. Cope. */
+ if (len > 0)
+ {
+ n = fz_read(ctx, doc->file, (unsigned char *)buf->scratch, 20);
+ if (n < 19)
+ fz_throw(ctx, FZ_ERROR_GENERIC, "malformed xref table");
+ if (n == 20 && buf->scratch[19] > 32)
+ n = 19;
+ }
+ else
+ n = 20;
+
+ if (len > (FZ_OFF_MAX - t) / n)
fz_throw(ctx, FZ_ERROR_GENERIC, "xref has too many entries");
- fz_seek(ctx, doc->file, t + 20 * len, SEEK_SET);
+ fz_seek(ctx, doc->file, t + n * len, SEEK_SET);
}
fz_try(ctx)
@@ -776,6 +791,7 @@ pdf_read_old_xref(fz_context *ctx, pdf_document *doc, pdf_lexbuf *buf)
int c;
int xref_len = pdf_xref_size_from_old_trailer(ctx, doc, buf);
pdf_xref_entry *table;
+ int carried;
fz_skip_space(ctx, doc->file);
if (fz_skip_string(ctx, doc->file, "xref"))
@@ -811,12 +827,17 @@ pdf_read_old_xref(fz_context *ctx, pdf_document *doc, pdf_lexbuf *buf)
table = pdf_xref_find_subsection(ctx, doc, ofs, len);
+ /* Xref entries SHOULD be 20 bytes long, but we see 19 byte
+ * ones more frequently than we'd like (e.g. PCLm drivers).
+ * Cope with this by 'carrying' data forward. */
+ carried = 0;
for (i = ofs; i < ofs + len; i++)
{
pdf_xref_entry *entry = &table[i-ofs];
- n = fz_read(ctx, file, (unsigned char *) buf->scratch, 20);
- if (n != 20)
+ n = fz_read(ctx, file, (unsigned char *) buf->scratch + carried, 20-carried);
+ if (n != 20-carried)
fz_throw(ctx, FZ_ERROR_GENERIC, "unexpected EOF in xref table");
+ n += carried;
if (!entry->type)
{
s = buf->scratch;
@@ -831,8 +852,14 @@ pdf_read_old_xref(fz_context *ctx, pdf_document *doc, pdf_lexbuf *buf)
entry->type = s[17];
if (s[17] != 'f' && s[17] != 'n' && s[17] != 'o')
fz_throw(ctx, FZ_ERROR_GENERIC, "unexpected xref type: %#x (%d %d R)", s[17], entry->num, entry->gen);
+ /* If the last byte of our buffer isn't an EOL (or space), carry one byte forward */
+ carried = s[19] > 32;
+ if (carried)
+ s[0] = s[19];
}
}
+ if (carried)
+ fz_unread_byte(ctx, file);
}
tok = pdf_lex(ctx, file, buf);