diff options
author | Tor Andersson <tor.andersson@artifex.com> | 2017-12-01 16:07:23 +0100 |
---|---|---|
committer | Tor Andersson <tor.andersson@artifex.com> | 2017-12-13 15:01:05 +0100 |
commit | fa9cd085533f68367c299e058ab3fbb7ad8a2dc6 (patch) | |
tree | 23444296b2d499d3ebd69a8aa85539562600025f /source/pdf | |
parent | 5722ebc5823381ee57c525cbc0d4dc627009979d (diff) | |
download | mupdf-fa9cd085533f68367c299e058ab3fbb7ad8a2dc6.tar.xz |
Fix 698785: Catch malformed numbers in PDF lexical scanner.
Return error tokens when parsing numbers with trailing garbage rather than
ignoring the extra characters.
Also handle error tokens more gracefully in array and dictionary parsing.
Treat error tokens as the 'null' keyword and continue parsing.
Diffstat (limited to 'source/pdf')
-rw-r--r-- | source/pdf/pdf-lex.c | 27 | ||||
-rw-r--r-- | source/pdf/pdf-parse.c | 10 |
2 files changed, 28 insertions, 9 deletions
diff --git a/source/pdf/pdf-lex.c b/source/pdf/pdf-lex.c index 44c68557..fc439d17 100644 --- a/source/pdf/pdf-lex.c +++ b/source/pdf/pdf-lex.c @@ -151,12 +151,21 @@ lex_number(fz_context *ctx, fz_stream *f, pdf_lexbuf *buf, int c) char *e = buf->scratch + buf->size - 1; /* leave space for zero terminator */ char *isreal = (c == '.' ? s : NULL); int neg = (c == '-'); + int isbad = 0; *s++ = c; + c = fz_read_byte(ctx, f); + + /* skip extra '-' signs at start of number */ + if (neg) + { + while (c == '-') + c = fz_read_byte(ctx, f); + } + while (s < e) { - c = fz_read_byte(ctx, f); switch (c) { case IS_WHITE: @@ -165,21 +174,27 @@ lex_number(fz_context *ctx, fz_stream *f, pdf_lexbuf *buf, int c) goto end; case EOF: goto end; - case '-': - neg++; - *s++ = c; - break; case '.': + if (isreal) + isbad = 1; isreal = s; - /* Fall through */ + *s++ = c; + break; + case RANGE_0_9: + *s++ = c; + break; default: + isbad = 1; *s++ = c; break; } + c = fz_read_byte(ctx, f); } end: *s = '\0'; + if (isbad) + return PDF_TOK_ERROR; if (isreal) { /* We'd like to use the fastest possible atof diff --git a/source/pdf/pdf-parse.c b/source/pdf/pdf-parse.c index 451d9e1d..ff741dcb 100644 --- a/source/pdf/pdf-parse.c +++ b/source/pdf/pdf-parse.c @@ -457,7 +457,8 @@ pdf_parse_array(fz_context *ctx, pdf_document *doc, fz_stream *file, pdf_lexbuf break; default: - fz_throw(ctx, FZ_ERROR_SYNTAX, "cannot parse token in array"); + pdf_array_push_drop(ctx, ary, pdf_new_null(ctx, doc)); + break; } } end: @@ -547,10 +548,13 @@ pdf_parse_dict(fz_context *ctx, pdf_document *doc, fz_stream *file, pdf_lexbuf * break; } } - fz_throw(ctx, FZ_ERROR_SYNTAX, "invalid indirect reference in dict"); + fz_warn(ctx, "invalid indirect reference in dict"); + val = pdf_new_null(ctx, doc); + break; default: - fz_throw(ctx, FZ_ERROR_SYNTAX, "unknown token in dict"); + val = pdf_new_null(ctx, doc); + break; } pdf_dict_put(ctx, dict, key, val); |