summaryrefslogtreecommitdiff
path: root/pdf/pdf_lex.c
diff options
context:
space:
mode:
Diffstat (limited to 'pdf/pdf_lex.c')
-rw-r--r--pdf/pdf_lex.c121
1 files changed, 67 insertions, 54 deletions
diff --git a/pdf/pdf_lex.c b/pdf/pdf_lex.c
index 24828412..6774167a 100644
--- a/pdf/pdf_lex.c
+++ b/pdf/pdf_lex.c
@@ -1,5 +1,5 @@
-#include "fitz.h"
-#include "mupdf.h"
+#include "fitz-internal.h"
+#include "mupdf-internal.h"
#define IS_NUMBER \
'+':case'-':case'.':case'0':case'1':case'2':case'3':\
@@ -63,87 +63,106 @@ lex_comment(fz_stream *f)
}
static int
-lex_number(fz_stream *f, char *s, int n, int *tok)
+lex_number(fz_stream *f, pdf_lexbuf *buf, int c)
{
- char *buf = s;
- *tok = PDF_TOK_INT;
+ int neg = 0;
+ int i = 0;
+ int n;
+ int d;
+ float v;
/* Initially we might have +, -, . or a digit */
- if (n > 1)
+ switch (c)
+ {
+ case '.':
+ goto loop_after_dot;
+ case '-':
+ neg = 1;
+ break;
+ case '+':
+ break;
+ default: /* Must be a digit */
+ i = c - '0';
+ break;
+ }
+
+ while (1)
{
int c = fz_read_byte(f);
switch (c)
{
case '.':
- *tok = PDF_TOK_REAL;
- *s++ = c;
- n--;
goto loop_after_dot;
- case '+':
- case '-':
case RANGE_0_9:
- *s++ = c;
- n--;
- goto loop_after_sign;
+ i = 10*i + c - '0';
+ /* FIXME: Need overflow check here; do we care? */
+ break;
default:
fz_unread_byte(f);
- goto end;
+ /* Fallthrough */
case EOF:
- goto end;
+ if (neg)
+ i = -i;
+ buf->i = i;
+ return PDF_TOK_INT;
}
}
- /* We can't accept a sign from here on in, just . or a digit */
-loop_after_sign:
- while (n > 1)
+ /* In here, we've seen a dot, so can accept just digits */
+loop_after_dot:
+ n = 0;
+ d = 1;
+ while (1)
{
int c = fz_read_byte(f);
switch (c)
{
- case '.':
- *tok = PDF_TOK_REAL;
- *s++ = c;
- n--;
- goto loop_after_dot;
case RANGE_0_9:
- *s++ = c;
+ if (d >= INT_MAX/10)
+ goto underflow;
+ n = n*10 + (c - '0');
+ d *= 10;
break;
default:
fz_unread_byte(f);
- goto end;
+ /* Fallthrough */
case EOF:
- goto end;
+ v = (float)i + ((float)n / (float)d);
+ if (neg)
+ v = -v;
+ buf->f = v;
+ return PDF_TOK_REAL;
}
- n--;
}
- /* In here, we've seen a dot, so can accept just digits */
-loop_after_dot:
- while (n > 1)
+underflow:
+ /* Ignore any digits after here, because they are too small */
+ while (1)
{
int c = fz_read_byte(f);
switch (c)
{
case RANGE_0_9:
- *s++ = c;
break;
default:
fz_unread_byte(f);
- goto end;
+ /* Fallthrough */
case EOF:
- goto end;
+ v = (float)i + ((float)n / (float)d);
+ if (neg)
+ v = -v;
+ buf->f = v;
+ return PDF_TOK_REAL;
}
- n--;
}
-
-end:
- *s = '\0';
- return s-buf;
}
static void
-lex_name(fz_stream *f, char *s, int n)
+lex_name(fz_stream *f, pdf_lexbuf *buf)
{
+ char *s = buf->scratch;
+ int n = buf->size;
+
while (n > 1)
{
int c = fz_read_byte(f);
@@ -208,6 +227,7 @@ lex_name(fz_stream *f, char *s, int n)
}
end:
*s = '\0';
+ buf->len = s - buf->scratch;
}
static int
@@ -380,7 +400,7 @@ pdf_token_from_keyword(char *key)
}
int
-pdf_lex(fz_stream *f, char *buf, int n, int *sl)
+pdf_lex(fz_stream *f, pdf_lexbuf *buf)
{
while (1)
{
@@ -396,11 +416,10 @@ pdf_lex(fz_stream *f, char *buf, int n, int *sl)
lex_comment(f);
break;
case '/':
- lex_name(f, buf, n);
- *sl = strlen(buf);
+ lex_name(f, buf);
return PDF_TOK_NAME;
case '(':
- *sl = lex_string(f, buf, n);
+ buf->len = lex_string(f, buf->scratch, buf->size);
return PDF_TOK_STRING;
case ')':
fz_warn(f->ctx, "lexical error (unexpected ')')");
@@ -414,7 +433,7 @@ pdf_lex(fz_stream *f, char *buf, int n, int *sl)
else
{
fz_unread_byte(f);
- *sl = lex_hex_string(f, buf, n);
+ buf->len = lex_hex_string(f, buf->scratch, buf->size);
return PDF_TOK_STRING;
}
case '>':
@@ -434,17 +453,11 @@ pdf_lex(fz_stream *f, char *buf, int n, int *sl)
case '}':
return PDF_TOK_CLOSE_BRACE;
case IS_NUMBER:
- {
- int tok;
- fz_unread_byte(f);
- *sl = lex_number(f, buf, n, &tok);
- return tok;
- }
+ return lex_number(f, buf, c);
default: /* isregular: !isdelim && !iswhite && c != EOF */
fz_unread_byte(f);
- lex_name(f, buf, n);
- *sl = strlen(buf);
- return pdf_token_from_keyword(buf);
+ lex_name(f, buf);
+ return pdf_token_from_keyword(buf->scratch);
}
}
}