From fa9cd085533f68367c299e058ab3fbb7ad8a2dc6 Mon Sep 17 00:00:00 2001 From: Tor Andersson Date: Fri, 1 Dec 2017 16:07:23 +0100 Subject: Fix 698785: Catch malformed numbers in PDF lexical scanner. Return error tokens when parsing numbers with trailing garbage rather than ignoring the extra characters. Also handle error tokens more gracefully in array and dictionary parsing. Treat error tokens as the 'null' keyword and continue parsing. --- source/pdf/pdf-lex.c | 27 +++++++++++++++++++++------ source/pdf/pdf-parse.c | 10 +++++++--- 2 files changed, 28 insertions(+), 9 deletions(-) (limited to 'source') diff --git a/source/pdf/pdf-lex.c b/source/pdf/pdf-lex.c index 44c68557..fc439d17 100644 --- a/source/pdf/pdf-lex.c +++ b/source/pdf/pdf-lex.c @@ -151,12 +151,21 @@ lex_number(fz_context *ctx, fz_stream *f, pdf_lexbuf *buf, int c) char *e = buf->scratch + buf->size - 1; /* leave space for zero terminator */ char *isreal = (c == '.' ? s : NULL); int neg = (c == '-'); + int isbad = 0; *s++ = c; + c = fz_read_byte(ctx, f); + + /* skip extra '-' signs at start of number */ + if (neg) + { + while (c == '-') + c = fz_read_byte(ctx, f); + } + while (s < e) { - c = fz_read_byte(ctx, f); switch (c) { case IS_WHITE: @@ -165,21 +174,27 @@ lex_number(fz_context *ctx, fz_stream *f, pdf_lexbuf *buf, int c) goto end; case EOF: goto end; - case '-': - neg++; - *s++ = c; - break; case '.': + if (isreal) + isbad = 1; isreal = s; - /* Fall through */ + *s++ = c; + break; + case RANGE_0_9: + *s++ = c; + break; default: + isbad = 1; *s++ = c; break; } + c = fz_read_byte(ctx, f); } end: *s = '\0'; + if (isbad) + return PDF_TOK_ERROR; if (isreal) { /* We'd like to use the fastest possible atof diff --git a/source/pdf/pdf-parse.c b/source/pdf/pdf-parse.c index 451d9e1d..ff741dcb 100644 --- a/source/pdf/pdf-parse.c +++ b/source/pdf/pdf-parse.c @@ -457,7 +457,8 @@ pdf_parse_array(fz_context *ctx, pdf_document *doc, fz_stream *file, pdf_lexbuf break; default: - fz_throw(ctx, FZ_ERROR_SYNTAX, "cannot parse token in array"); + pdf_array_push_drop(ctx, ary, pdf_new_null(ctx, doc)); + break; } } end: @@ -547,10 +548,13 @@ pdf_parse_dict(fz_context *ctx, pdf_document *doc, fz_stream *file, pdf_lexbuf * break; } } - fz_throw(ctx, FZ_ERROR_SYNTAX, "invalid indirect reference in dict"); + fz_warn(ctx, "invalid indirect reference in dict"); + val = pdf_new_null(ctx, doc); + break; default: - fz_throw(ctx, FZ_ERROR_SYNTAX, "unknown token in dict"); + val = pdf_new_null(ctx, doc); + break; } pdf_dict_put(ctx, dict, key, val); -- cgit v1.2.3