Update Xref reading code to cope with 19 byte entries.

The spec says entries should be 20 bytes long. In practise we see 19 byte long ones more often than we like. This is due to the use of a single EOL char rather than 2. The PCLm files I've seen use 19 byte ones, so update the code to cope with these.
author: Robin Watts <robin.watts@artifex.com> 2016-10-04 13:59:43 +0100
committer: Robin Watts <robin.watts@artifex.com> 2016-10-06 11:49:43 +0100
commit: bcd4c4b4b8379df2997f32776cd34b8ad0ff2da1 (patch)
tree: ef089b8b89ca664c76c5216b432ca113d84128c3 /source/pdf
parent: a5abafc7583ecda91323a49e58706ac2f99c931f (diff)
download: mupdf-bcd4c4b4b8379df2997f32776cd34b8ad0ff2da1.tar.xz
1 files changed, 31 insertions, 4 deletions
diff --git a/source/pdf/pdf-xref.c b/source/pdf/pdf-xref.c
index a7dc3514..af2fec33 100644
--- a/source/pdf/pdf-xref.c
+++ b/source/pdf/pdf-xref.c
@@ -635,6 +635,7 @@ pdf_xref_size_from_old_trailer(fz_context *ctx, pdf_document *doc, pdf_lexbuf *b
 	int size;
 	fz_off_t ofs;
 	pdf_obj *trailer = NULL;
+	int n;
 
 	fz_var(trailer);
 
@@ -668,10 +669,24 @@ pdf_xref_size_from_old_trailer(fz_context *ctx, pdf_document *doc, pdf_lexbuf *b
 		t = fz_tell(ctx, doc->file);
 		if (t < 0)
 			fz_throw(ctx, FZ_ERROR_GENERIC, "cannot tell in file");
-		if (len > (FZ_OFF_MAX - t) / 20)
+
+		/* Spec says xref entries should be 20 bytes, but it's not infrequent
+		 * to see 19, in particular for some PCLm drivers. Cope. */
+		if (len > 0)
+		{
+			n = fz_read(ctx, doc->file, (unsigned char *)buf->scratch, 20);
+			if (n < 19)
+				fz_throw(ctx, FZ_ERROR_GENERIC, "malformed xref table");
+			if (n == 20 && buf->scratch[19] > 32)
+				n = 19;
+		}
+		else
+			n = 20;
+
+		if (len > (FZ_OFF_MAX - t) / n)
 			fz_throw(ctx, FZ_ERROR_GENERIC, "xref has too many entries");
 
-		fz_seek(ctx, doc->file, t + 20 * len, SEEK_SET);
+		fz_seek(ctx, doc->file, t + n * len, SEEK_SET);
 	}
 
 	fz_try(ctx)
@@ -776,6 +791,7 @@ pdf_read_old_xref(fz_context *ctx, pdf_document *doc, pdf_lexbuf *buf)
 	int c;
 	int xref_len = pdf_xref_size_from_old_trailer(ctx, doc, buf);
 	pdf_xref_entry *table;
+	int carried;
 
 	fz_skip_space(ctx, doc->file);
 	if (fz_skip_string(ctx, doc->file, "xref"))
@@ -811,12 +827,17 @@ pdf_read_old_xref(fz_context *ctx, pdf_document *doc, pdf_lexbuf *buf)
 
 		table = pdf_xref_find_subsection(ctx, doc, ofs, len);
 
+		/* Xref entries SHOULD be 20 bytes long, but we see 19 byte
+		 * ones more frequently than we'd like (e.g. PCLm drivers).
+		 * Cope with this by 'carrying' data forward. */
+		carried = 0;
 		for (i = ofs; i < ofs + len; i++)
 		{
 			pdf_xref_entry *entry = &table[i-ofs];
-			n = fz_read(ctx, file, (unsigned char *) buf->scratch, 20);
-			if (n != 20)
+			n = fz_read(ctx, file, (unsigned char *) buf->scratch + carried, 20-carried);
+			if (n != 20-carried)
 				fz_throw(ctx, FZ_ERROR_GENERIC, "unexpected EOF in xref table");
+			n += carried;
 			if (!entry->type)
 			{
 				s = buf->scratch;
@@ -831,8 +852,14 @@ pdf_read_old_xref(fz_context *ctx, pdf_document *doc, pdf_lexbuf *buf)
 				entry->type = s[17];
 				if (s[17] != 'f' && s[17] != 'n' && s[17] != 'o')
 					fz_throw(ctx, FZ_ERROR_GENERIC, "unexpected xref type: %#x (%d %d R)", s[17], entry->num, entry->gen);
+				/* If the last byte of our buffer isn't an EOL (or space), carry one byte forward */
+				carried = s[19] > 32;
+				if (carried)
+					s[0] = s[19];
 			}
 		}
+		if (carried)
+			fz_unread_byte(ctx, file);
 	}
 
 	tok = pdf_lex(ctx, file, buf);
author	Robin Watts <robin.watts@artifex.com>	2016-10-04 13:59:43 +0100
committer	Robin Watts <robin.watts@artifex.com>	2016-10-06 11:49:43 +0100
commit	bcd4c4b4b8379df2997f32776cd34b8ad0ff2da1 (patch)
tree	ef089b8b89ca664c76c5216b432ca113d84128c3 /source/pdf
parent	a5abafc7583ecda91323a49e58706ac2f99c931f (diff)
download	mupdf-bcd4c4b4b8379df2997f32776cd34b8ad0ff2da1.tar.xz