Bug 692424: make repair cope better with missing endobj

Previously when parsing an object with a missing endobj, the code would consume the header of the following object. Here we amend the code to give up searching for an endobj if it finds an integer (presumed to be the start of the next object). We backtrack over that integer and carry on.
author: Robin Watts <robin.watts@artifex.com> 2011-11-15 20:20:05 +0000
committer: Robin Watts <robin.watts@artifex.com> 2011-11-17 18:32:08 +0000
commit: 84037901d3838b85f7c4d61b191376ce2e32b909 (patch)
tree: b6017104a1980dcbc7936eec8aa5b5ba9d7b9e33 /pdf/pdf_repair.c
parent: 8f8794063127610db433c5dd0f6e7f5241308c6f (diff)
download: mupdf-84037901d3838b85f7c4d61b191376ce2e32b909.tar.xz
1 files changed, 8 insertions, 2 deletions
diff --git a/pdf/pdf_repair.c b/pdf/pdf_repair.c
index 067fe2cf..08adcb99 100644
--- a/pdf/pdf_repair.c
+++ b/pdf/pdf_repair.c
@@ -68,14 +68,20 @@ pdf_repair_obj(fz_stream *file, char *buf, int cap, int *stmofsp, int *stmlenp,
 	while ( tok != PDF_TOK_STREAM &&
 		tok != PDF_TOK_ENDOBJ &&
 		tok != PDF_TOK_ERROR &&
-		tok != PDF_TOK_EOF )
+		tok != PDF_TOK_EOF &&
+		tok != PDF_TOK_INT )
 	{
 		error = pdf_lex(&tok, file, buf, cap, &len);
 		if (error)
 			return fz_rethrow(error, "cannot scan for endobj or stream token");
 	}
 
-	if (tok == PDF_TOK_STREAM)
+	if (tok == PDF_TOK_INT)
+	{
+		while (len-- > 0)
+			fz_unread_byte(file);
+	}
+	else if (tok == PDF_TOK_STREAM)
 	{
 		int c = fz_read_byte(file);
 		if (c == '\r') {
author	Robin Watts <robin.watts@artifex.com>	2011-11-15 20:20:05 +0000
committer	Robin Watts <robin.watts@artifex.com>	2011-11-17 18:32:08 +0000
commit	84037901d3838b85f7c4d61b191376ce2e32b909 (patch)
tree	b6017104a1980dcbc7936eec8aa5b5ba9d7b9e33 /pdf/pdf_repair.c
parent	8f8794063127610db433c5dd0f6e7f5241308c6f (diff)
download	mupdf-84037901d3838b85f7c4d61b191376ce2e32b909.tar.xz