summaryrefslogtreecommitdiff
path: root/source/pdf
diff options
context:
space:
mode:
authorTor Andersson <tor.andersson@artifex.com>2017-12-01 16:07:23 +0100
committerTor Andersson <tor.andersson@artifex.com>2017-12-13 15:01:05 +0100
commitfa9cd085533f68367c299e058ab3fbb7ad8a2dc6 (patch)
tree23444296b2d499d3ebd69a8aa85539562600025f /source/pdf
parent5722ebc5823381ee57c525cbc0d4dc627009979d (diff)
downloadmupdf-fa9cd085533f68367c299e058ab3fbb7ad8a2dc6.tar.xz
Fix 698785: Catch malformed numbers in PDF lexical scanner.
Return error tokens when parsing numbers with trailing garbage rather than ignoring the extra characters. Also handle error tokens more gracefully in array and dictionary parsing. Treat error tokens as the 'null' keyword and continue parsing.
Diffstat (limited to 'source/pdf')
-rw-r--r--source/pdf/pdf-lex.c27
-rw-r--r--source/pdf/pdf-parse.c10
2 files changed, 28 insertions, 9 deletions
diff --git a/source/pdf/pdf-lex.c b/source/pdf/pdf-lex.c
index 44c68557..fc439d17 100644
--- a/source/pdf/pdf-lex.c
+++ b/source/pdf/pdf-lex.c
@@ -151,12 +151,21 @@ lex_number(fz_context *ctx, fz_stream *f, pdf_lexbuf *buf, int c)
char *e = buf->scratch + buf->size - 1; /* leave space for zero terminator */
char *isreal = (c == '.' ? s : NULL);
int neg = (c == '-');
+ int isbad = 0;
*s++ = c;
+ c = fz_read_byte(ctx, f);
+
+ /* skip extra '-' signs at start of number */
+ if (neg)
+ {
+ while (c == '-')
+ c = fz_read_byte(ctx, f);
+ }
+
while (s < e)
{
- c = fz_read_byte(ctx, f);
switch (c)
{
case IS_WHITE:
@@ -165,21 +174,27 @@ lex_number(fz_context *ctx, fz_stream *f, pdf_lexbuf *buf, int c)
goto end;
case EOF:
goto end;
- case '-':
- neg++;
- *s++ = c;
- break;
case '.':
+ if (isreal)
+ isbad = 1;
isreal = s;
- /* Fall through */
+ *s++ = c;
+ break;
+ case RANGE_0_9:
+ *s++ = c;
+ break;
default:
+ isbad = 1;
*s++ = c;
break;
}
+ c = fz_read_byte(ctx, f);
}
end:
*s = '\0';
+ if (isbad)
+ return PDF_TOK_ERROR;
if (isreal)
{
/* We'd like to use the fastest possible atof
diff --git a/source/pdf/pdf-parse.c b/source/pdf/pdf-parse.c
index 451d9e1d..ff741dcb 100644
--- a/source/pdf/pdf-parse.c
+++ b/source/pdf/pdf-parse.c
@@ -457,7 +457,8 @@ pdf_parse_array(fz_context *ctx, pdf_document *doc, fz_stream *file, pdf_lexbuf
break;
default:
- fz_throw(ctx, FZ_ERROR_SYNTAX, "cannot parse token in array");
+ pdf_array_push_drop(ctx, ary, pdf_new_null(ctx, doc));
+ break;
}
}
end:
@@ -547,10 +548,13 @@ pdf_parse_dict(fz_context *ctx, pdf_document *doc, fz_stream *file, pdf_lexbuf *
break;
}
}
- fz_throw(ctx, FZ_ERROR_SYNTAX, "invalid indirect reference in dict");
+ fz_warn(ctx, "invalid indirect reference in dict");
+ val = pdf_new_null(ctx, doc);
+ break;
default:
- fz_throw(ctx, FZ_ERROR_SYNTAX, "unknown token in dict");
+ val = pdf_new_null(ctx, doc);
+ break;
}
pdf_dict_put(ctx, dict, key, val);