Fix 698785: Catch malformed numbers in PDF lexical scanner.

Return error tokens when parsing numbers with trailing garbage rather than ignoring the extra characters. Also handle error tokens more gracefully in array and dictionary parsing. Treat error tokens as the 'null' keyword and continue parsing.
author: Tor Andersson <tor.andersson@artifex.com> 2017-12-01 16:07:23 +0100
committer: Tor Andersson <tor.andersson@artifex.com> 2017-12-13 15:01:05 +0100
commit: fa9cd085533f68367c299e058ab3fbb7ad8a2dc6 (patch)
tree: 23444296b2d499d3ebd69a8aa85539562600025f /source/pdf
parent: 5722ebc5823381ee57c525cbc0d4dc627009979d (diff)
download: mupdf-fa9cd085533f68367c299e058ab3fbb7ad8a2dc6.tar.xz
2 files changed, 28 insertions, 9 deletions
diff --git a/source/pdf/pdf-lex.c b/source/pdf/pdf-lex.c
index 44c68557..fc439d17 100644
--- a/source/pdf/pdf-lex.c
+++ b/source/pdf/pdf-lex.c
@@ -151,12 +151,21 @@ lex_number(fz_context *ctx, fz_stream *f, pdf_lexbuf *buf, int c)
 	char *e = buf->scratch + buf->size - 1; /* leave space for zero terminator */
 	char *isreal = (c == '.' ? s : NULL);
 	int neg = (c == '-');
+	int isbad = 0;
 
 	*s++ = c;
 
+	c = fz_read_byte(ctx, f);
+
+	/* skip extra '-' signs at start of number */
+	if (neg)
+	{
+		while (c == '-')
+			c = fz_read_byte(ctx, f);
+	}
+
 	while (s < e)
 	{
-		c = fz_read_byte(ctx, f);
 		switch (c)
 		{
 		case IS_WHITE:
@@ -165,21 +174,27 @@ lex_number(fz_context *ctx, fz_stream *f, pdf_lexbuf *buf, int c)
 			goto end;
 		case EOF:
 			goto end;
-		case '-':
-			neg++;
-			*s++ = c;
-			break;
 		case '.':
+			if (isreal)
+				isbad = 1;
 			isreal = s;
-			/* Fall through */
+			*s++ = c;
+			break;
+		case RANGE_0_9:
+			*s++ = c;
+			break;
 		default:
+			isbad = 1;
 			*s++ = c;
 			break;
 		}
+		c = fz_read_byte(ctx, f);
 	}
 
 end:
 	*s = '\0';
+	if (isbad)
+		return PDF_TOK_ERROR;
 	if (isreal)
 	{
 		/* We'd like to use the fastest possible atof
diff --git a/source/pdf/pdf-parse.c b/source/pdf/pdf-parse.c
index 451d9e1d..ff741dcb 100644
--- a/source/pdf/pdf-parse.c
+++ b/source/pdf/pdf-parse.c
@@ -457,7 +457,8 @@ pdf_parse_array(fz_context *ctx, pdf_document *doc, fz_stream *file, pdf_lexbuf
 				break;
 
 			default:
-				fz_throw(ctx, FZ_ERROR_SYNTAX, "cannot parse token in array");
+				pdf_array_push_drop(ctx, ary, pdf_new_null(ctx, doc));
+				break;
 			}
 		}
 end:
@@ -547,10 +548,13 @@ pdf_parse_dict(fz_context *ctx, pdf_document *doc, fz_stream *file, pdf_lexbuf *
 						break;
 					}
 				}
-				fz_throw(ctx, FZ_ERROR_SYNTAX, "invalid indirect reference in dict");
+				fz_warn(ctx, "invalid indirect reference in dict");
+				val = pdf_new_null(ctx, doc);
+				break;
 
 			default:
-				fz_throw(ctx, FZ_ERROR_SYNTAX, "unknown token in dict");
+				val = pdf_new_null(ctx, doc);
+				break;
 			}
 
 			pdf_dict_put(ctx, dict, key, val);
author	Tor Andersson <tor.andersson@artifex.com>	2017-12-01 16:07:23 +0100
committer	Tor Andersson <tor.andersson@artifex.com>	2017-12-13 15:01:05 +0100
commit	fa9cd085533f68367c299e058ab3fbb7ad8a2dc6 (patch)
tree	23444296b2d499d3ebd69a8aa85539562600025f /source/pdf
parent	5722ebc5823381ee57c525cbc0d4dc627009979d (diff)
download	mupdf-fa9cd085533f68367c299e058ab3fbb7ad8a2dc6.tar.xz