From 03dee17a4f84ee521f64778bb7441accb42e35b9 Mon Sep 17 00:00:00 2001
From: Tor Andersson <tor.andersson@artifex.com>
Date: Tue, 23 Jan 2018 12:29:41 +0100
Subject: Return PDF_TOK_ERROR when encountering isolated '>' and ')'
 characters.

Also return PDF_TOK_ERROR instead of swallowing string opening quotes in
pdf_lex_no_string.

Also fix the repair code to not skip an extra byte whenever it scans an error
token.
---
 source/pdf/pdf-lex.c    | 47 ++++++++++++++---------------------------------
 source/pdf/pdf-repair.c |  5 +++--
 2 files changed, 17 insertions(+), 35 deletions(-)

(limited to 'source/pdf')

diff --git a/source/pdf/pdf-lex.c b/source/pdf/pdf-lex.c
index fc439d17..1301c64a 100644
--- a/source/pdf/pdf-lex.c
+++ b/source/pdf/pdf-lex.c
@@ -521,32 +521,21 @@ pdf_lex(fz_context *ctx, fz_stream *f, pdf_lexbuf *buf)
 		case '(':
 			return lex_string(ctx, f, buf);
 		case ')':
-			fz_warn(ctx, "lexical error (unexpected ')')");
-			continue;
+			return PDF_TOK_ERROR;
 		case '<':
 			c = fz_read_byte(ctx, f);
 			if (c == '<')
-			{
 				return PDF_TOK_OPEN_DICT;
-			}
-			else
-			{
+			if (c != EOF)
 				fz_unread_byte(ctx, f);
-				return lex_hex_string(ctx, f, buf);
-			}
+			return lex_hex_string(ctx, f, buf);
 		case '>':
 			c = fz_read_byte(ctx, f);
 			if (c == '>')
-			{
 				return PDF_TOK_CLOSE_DICT;
-			}
-			fz_warn(ctx, "lexical error (unexpected '>')");
-			if (c == EOF)
-			{
-				return PDF_TOK_EOF;
-			}
-			fz_unread_byte(ctx, f);
-			continue;
+			if (c != EOF)
+				fz_unread_byte(ctx, f);
+			return PDF_TOK_ERROR;
 		case '[':
 			return PDF_TOK_OPEN_ARRAY;
 		case ']':
@@ -585,31 +574,23 @@ pdf_lex_no_string(fz_context *ctx, fz_stream *f, pdf_lexbuf *buf)
 			lex_name(ctx, f, buf);
 			return PDF_TOK_NAME;
 		case '(':
-			continue;
+			return PDF_TOK_ERROR; /* no strings allowed */
 		case ')':
-			continue;
+			return PDF_TOK_ERROR; /* no strings allowed */
 		case '<':
 			c = fz_read_byte(ctx, f);
 			if (c == '<')
-			{
 				return PDF_TOK_OPEN_DICT;
-			}
-			else
-			{
-				continue;
-			}
+			if (c != EOF)
+				fz_unread_byte(ctx, f);
+			return PDF_TOK_ERROR; /* no strings allowed */
 		case '>':
 			c = fz_read_byte(ctx, f);
 			if (c == '>')
-			{
 				return PDF_TOK_CLOSE_DICT;
-			}
-			if (c == EOF)
-			{
-				return PDF_TOK_EOF;
-			}
-			fz_unread_byte(ctx, f);
-			continue;
+			if (c != EOF)
+				fz_unread_byte(ctx, f);
+			return PDF_TOK_ERROR;
 		case '[':
 			return PDF_TOK_OPEN_ARRAY;
 		case ']':
diff --git a/source/pdf/pdf-repair.c b/source/pdf/pdf-repair.c
index 0c29758e..78a928f9 100644
--- a/source/pdf/pdf-repair.c
+++ b/source/pdf/pdf-repair.c
@@ -515,11 +515,12 @@ pdf_repair_xref(fz_context *ctx, pdf_document *doc)
 			}
 
 			else if (tok == PDF_TOK_EOF)
+			{
 				break;
+			}
+
 			else
 			{
-				if (tok == PDF_TOK_ERROR)
-					fz_read_byte(ctx, doc->file);
 				num = 0;
 				gen = 0;
 			}
-- 
cgit v1.2.3