diff options
author | Tor Andersson <tor.andersson@artifex.com> | 2016-01-15 12:28:00 +0100 |
---|---|---|
committer | Tor Andersson <tor.andersson@artifex.com> | 2016-01-15 12:28:00 +0100 |
commit | 9bc886e621a20aa3dd667be31bf2481e4cfad50d (patch) | |
tree | 94b70e43d61d7f97dc14b0aa26c60991f38b11fa | |
parent | ad8936bf2bcf54c7042bdec20c49c96657649b34 (diff) | |
download | mupdf-9bc886e621a20aa3dd667be31bf2481e4cfad50d.tar.xz |
pdf: Consume entire token before lexing numbers.
"0.00-70" should be parsed as one token, not two tokens as we did.
-rw-r--r-- | source/pdf/pdf-lex.c | 173 |
1 files changed, 93 insertions, 80 deletions
diff --git a/source/pdf/pdf-lex.c b/source/pdf/pdf-lex.c index c2381e8d..99bf3ffd 100644 --- a/source/pdf/pdf-lex.c +++ b/source/pdf/pdf-lex.c @@ -63,107 +63,120 @@ lex_comment(fz_context *ctx, fz_stream *f) } while ((c != '\012') && (c != '\015') && (c != EOF)); } -static int -lex_number(fz_context *ctx, fz_stream *f, pdf_lexbuf *buf, int c) +/* Fast but inaccurate strtof, with Adobe overflow handling. */ +static float fast_atof(char *s) { int neg = 0; - fz_off_t i = 0; - int n; - int d; - float v; - float fd; - - /* Initially we might have +, -, . or a digit */ - switch (c) + int i = 0; + + while (*s == '-') { - case '.': - goto loop_after_dot; - case '-': neg = 1; - break; - case '+': - break; - default: /* Must be a digit */ - i = c - '0'; - break; + ++s; + } + while (*s == '+') + { + ++s; } - while (1) + while (*s >= '0' && *s <= '9') { - c = fz_read_byte(ctx, f); - switch (c) - { - case '.': - goto loop_after_dot; - case RANGE_0_9: - /* We deliberately ignore overflow here. We tried - * code that returned INT_MIN/MAX as appropriate, - * but this causes loss of data (see Bug695950.pdf - * for an example). Tests show that Acrobat handles - * overflows in exactly the same way we do (i.e. - * 123450000000000000000678 is read as 678). */ - i = 10*i + c - '0'; - break; - default: - fz_unread_byte(ctx, f); - /* Fallthrough */ - case EOF: - if (neg) - i = -i; - buf->i = i; - return PDF_TOK_INT; - } + /* We deliberately ignore overflow here. + * Tests show that Acrobat handles * overflows in exactly the same way we do: + * 123450000000000000000678 is read as 678. + */ + i = i * 10 + (*s - '0'); + ++s; } - /* In here, we've seen a dot, so can accept just digits */ -loop_after_dot: - n = 0; - d = 1; - while (1) + if (*s == '.') { - c = fz_read_byte(ctx, f); - switch (c) + float v = i; + float n = 0; + float d = 1; + ++s; + while (*s >= '0' && *s <= '9') { - case RANGE_0_9: - if (d >= INT_MAX/10) - goto underflow; - n = n*10 + (c - '0'); - d *= 10; - break; - default: - fz_unread_byte(ctx, f); - /* Fallthrough */ - case EOF: - v = (float)i + ((float)n / (float)d); - if (neg) - v = -v; - buf->f = v; - return PDF_TOK_REAL; + n = 10 * n + (*s - '0'); + d = 10 * d; + ++s; } + v += n / d; + return neg ? -v : v; } + else + { + return neg ? -i : i; + } +} -underflow: - fd = 1 / (float)d; - v = (float)i + ((float)n * fd); - while (1) +/* Fast but inaccurate atoi. */ +static int fast_atoi(char *s) +{ + int neg = 0; + int i = 0; + + while (*s == '-') { - c = fz_read_byte(ctx, f); + neg = 1; + ++s; + } + while (*s == '+') + { + ++s; + } + + while (*s >= '0' && *s <= '9') + { + /* We deliberately ignore overflow here. */ + i = i * 10 + (*s - '0'); + ++s; + } + + return neg ? -i : i; +} + +static int +lex_number(fz_context *ctx, fz_stream *f, pdf_lexbuf *buf, int c) +{ + char *s = buf->scratch; + char *e = buf->scratch + buf->size - 1; /* leave space for zero terminator */ + int isreal = (c == '.'); + + *s++ = c; + + while (s < e) + { + int c = fz_read_byte(ctx, f); switch (c) { - case RANGE_0_9: - fd /= 10; - v += (c - '0') * fd; - break; - default: + case IS_WHITE: + case IS_DELIM: fz_unread_byte(ctx, f); - /* Fallthrough */ + goto end; case EOF: - if (neg) - v = -v; - buf->f = v; - return PDF_TOK_REAL; + goto end; + case '.': + isreal = 1; + /* Fall through */ + default: + *s++ = c; + break; } } + +end: + *s = '\0'; + if (isreal) + { + buf->f = fast_atof(buf->scratch); + return PDF_TOK_REAL; + } + else + { + buf->i = fast_atoi(buf->scratch); + return PDF_TOK_INT; + } } static void |