1 files changed, 67 insertions, 54 deletions
diff --git a/pdf/pdf_lex.c b/pdf/pdf_lex.c
index 24828412..6774167a 100644
--- a/pdf/pdf_lex.c
+++ b/pdf/pdf_lex.c
@@ -1,5 +1,5 @@
-#include "fitz.h"
-#include "mupdf.h"
+#include "fitz-internal.h"
+#include "mupdf-internal.h"
 
 #define IS_NUMBER \
 	'+':case'-':case'.':case'0':case'1':case'2':case'3':\
@@ -63,87 +63,106 @@ lex_comment(fz_stream *f)
 }
 
 static int
-lex_number(fz_stream *f, char *s, int n, int *tok)
+lex_number(fz_stream *f, pdf_lexbuf *buf, int c)
 {
-	char *buf = s;
-	*tok = PDF_TOK_INT;
+	int neg = 0;
+	int i = 0;
+	int n;
+	int d;
+	float v;
 
 	/* Initially we might have +, -, . or a digit */
-	if (n > 1)
+	switch (c)
+	{
+	case '.':
+		goto loop_after_dot;
+	case '-':
+		neg = 1;
+		break;
+	case '+':
+		break;
+	default: /* Must be a digit */
+		i = c - '0';
+		break;
+	}
+
+	while (1)
 	{
 		int c = fz_read_byte(f);
 		switch (c)
 		{
 		case '.':
-			*tok = PDF_TOK_REAL;
-			*s++ = c;
-			n--;
 			goto loop_after_dot;
-		case '+':
-		case '-':
 		case RANGE_0_9:
-			*s++ = c;
-			n--;
-			goto loop_after_sign;
+			i = 10*i + c - '0';
+			/* FIXME: Need overflow check here; do we care? */
+			break;
 		default:
 			fz_unread_byte(f);
-			goto end;
+			/* Fallthrough */
 		case EOF:
-			goto end;
+			if (neg)
+				i = -i;
+			buf->i = i;
+			return PDF_TOK_INT;
 		}
 	}
 
-	/* We can't accept a sign from here on in, just . or a digit */
-loop_after_sign:
-	while (n > 1)
+	/* In here, we've seen a dot, so can accept just digits */
+loop_after_dot:
+	n = 0;
+	d = 1;
+	while (1)
 	{
 		int c = fz_read_byte(f);
 		switch (c)
 		{
-		case '.':
-			*tok = PDF_TOK_REAL;
-			*s++ = c;
-			n--;
-			goto loop_after_dot;
 		case RANGE_0_9:
-			*s++ = c;
+			if (d >= INT_MAX/10)
+				goto underflow;
+			n = n*10 + (c - '0');
+			d *= 10;
 			break;
 		default:
 			fz_unread_byte(f);
-			goto end;
+			/* Fallthrough */
 		case EOF:
-			goto end;
+			v = (float)i + ((float)n / (float)d);
+			if (neg)
+				v = -v;
+			buf->f = v;
+			return PDF_TOK_REAL;
 		}
-		n--;
 	}
 
-	/* In here, we've seen a dot, so can accept just digits */
-loop_after_dot:
-	while (n > 1)
+underflow:
+	/* Ignore any digits after here, because they are too small */
+	while (1)
 	{
 		int c = fz_read_byte(f);
 		switch (c)
 		{
 		case RANGE_0_9:
-			*s++ = c;
 			break;
 		default:
 			fz_unread_byte(f);
-			goto end;
+			/* Fallthrough */
 		case EOF:
-			goto end;
+			v = (float)i + ((float)n / (float)d);
+			if (neg)
+				v = -v;
+			buf->f = v;
+			return PDF_TOK_REAL;
 		}
-		n--;
 	}
-
-end:
-	*s = '\0';
-	return s-buf;
 }
 
 static void
-lex_name(fz_stream *f, char *s, int n)
+lex_name(fz_stream *f, pdf_lexbuf *buf)
 {
+	char *s = buf->scratch;
+	int n = buf->size;
+
 	while (n > 1)
 	{
 		int c = fz_read_byte(f);
@@ -208,6 +227,7 @@ lex_name(fz_stream *f, char *s, int n)
 	}
 end:
 	*s = '\0';
+	buf->len = s - buf->scratch;
 }
 
 static int
@@ -380,7 +400,7 @@ pdf_token_from_keyword(char *key)
 }
 
 int
-pdf_lex(fz_stream *f, char *buf, int n, int *sl)
+pdf_lex(fz_stream *f, pdf_lexbuf *buf)
 {
 	while (1)
 	{
@@ -396,11 +416,10 @@ pdf_lex(fz_stream *f, char *buf, int n, int *sl)
 			lex_comment(f);
 			break;
 		case '/':
-			lex_name(f, buf, n);
-			*sl = strlen(buf);
+			lex_name(f, buf);
 			return PDF_TOK_NAME;
 		case '(':
-			*sl = lex_string(f, buf, n);
+			buf->len = lex_string(f, buf->scratch, buf->size);
 			return PDF_TOK_STRING;
 		case ')':
 			fz_warn(f->ctx, "lexical error (unexpected ')')");
@@ -414,7 +433,7 @@ pdf_lex(fz_stream *f, char *buf, int n, int *sl)
 			else
 			{
 				fz_unread_byte(f);
-				*sl = lex_hex_string(f, buf, n);
+				buf->len = lex_hex_string(f, buf->scratch, buf->size);
 				return PDF_TOK_STRING;
 			}
 		case '>':
@@ -434,17 +453,11 @@ pdf_lex(fz_stream *f, char *buf, int n, int *sl)
 		case '}':
 			return PDF_TOK_CLOSE_BRACE;
 		case IS_NUMBER:
-			{
-				int tok;
-				fz_unread_byte(f);
-				*sl = lex_number(f, buf, n, &tok);
-				return tok;
-			}
+			return lex_number(f, buf, c);
 		default: /* isregular: !isdelim && !iswhite && c != EOF */
 			fz_unread_byte(f);
-			lex_name(f, buf, n);
-			*sl = strlen(buf);
-			return pdf_token_from_keyword(buf);
+			lex_name(f, buf);
+			return pdf_token_from_keyword(buf->scratch);
 		}
 	}
 }