8 files changed, 337 insertions, 292 deletions
diff --git a/pdf/mupdf.h b/pdf/mupdf.h
index b233288f..15a96541 100644
--- a/pdf/mupdf.h
+++ b/pdf/mupdf.h
@@ -102,12 +102,40 @@ enum
 	PDF_NUM_TOKENS
 };
 
-int pdf_lex(fz_stream *f, char *buf, int n, int *len);
+enum
+{
+	PDF_LEXBUF_SMALL = 256,
+	PDF_LEXBUF_LARGE = 65536
+};
+
+    
+
+typedef struct pdf_lexbuf_s pdf_lexbuf;
+typedef struct pdf_lexbuf_large_s pdf_lexbuf_large;
+
+struct pdf_lexbuf_s
+{
+	int size;
+	int len;
+	int i;
+	float f;
+	char scratch[PDF_LEXBUF_SMALL];
+};
+
+struct pdf_lexbuf_large_s
+{
+	pdf_lexbuf base;
+	char scratch[PDF_LEXBUF_LARGE - PDF_LEXBUF_SMALL];
+};
+
+    
+
+int pdf_lex(fz_stream *f, pdf_lexbuf *lexbuf);
 
-fz_obj *pdf_parse_array(pdf_document *doc, fz_stream *f, char *buf, int cap);
-fz_obj *pdf_parse_dict(pdf_document *doc, fz_stream *f, char *buf, int cap);
-fz_obj *pdf_parse_stm_obj(pdf_document *doc, fz_stream *f, char *buf, int cap);
-fz_obj *pdf_parse_ind_obj(pdf_document *doc, fz_stream *f, char *buf, int cap, int *num, int *gen, int *stm_ofs);
+fz_obj *pdf_parse_array(pdf_document *doc, fz_stream *f, pdf_lexbuf *buf);
+fz_obj *pdf_parse_dict(pdf_document *doc, fz_stream *f, pdf_lexbuf *buf);
+fz_obj *pdf_parse_stm_obj(pdf_document *doc, fz_stream *f, pdf_lexbuf *buf);
+fz_obj *pdf_parse_ind_obj(pdf_document *doc, fz_stream *f, pdf_lexbuf *buf, int *num, int *gen, int *stm_ofs);
 
 fz_rect pdf_to_rect(fz_context *ctx, fz_obj *array);
 fz_matrix pdf_to_matrix(fz_context *ctx, fz_obj *array);
@@ -170,7 +198,7 @@ struct pdf_document_s
 	fz_obj **page_objs;
 	fz_obj **page_refs;
 
-	char scratch[65536];
+	pdf_lexbuf_large lexbuf;
 };
 
 fz_obj *pdf_resolve_indirect(fz_obj *ref);
@@ -194,7 +222,7 @@ pdf_document *pdf_open_document(fz_context *ctx, const char *filename);
 void pdf_close_document(pdf_document *doc);
 
 /* private */
-void pdf_repair_xref(pdf_document *doc, char *buf, int bufsize);
+void pdf_repair_xref(pdf_document *doc, pdf_lexbuf *buf);
 void pdf_repair_obj_stms(pdf_document *doc);
 void pdf_debug_xref(pdf_document *);
 void pdf_resize_xref(pdf_document *doc, int newcap);
diff --git a/pdf/pdf_cmap_parse.c b/pdf/pdf_cmap_parse.c
index fb37c4a9..5c21393e 100644
--- a/pdf/pdf_cmap_parse.c
+++ b/pdf/pdf_cmap_parse.c
@@ -49,14 +49,14 @@ pdf_code_from_string(char *buf, int len)
 }
 
 static int
-pdf_lex_cmap(fz_stream *file, char *buf, int n, int *sl)
+pdf_lex_cmap(fz_stream *file, pdf_lexbuf *buf)
 {
-	int tok = pdf_lex(file, buf, n, sl);
+	int tok = pdf_lex(file, buf);
 
 	/* RJW: Lost debugging here: "cannot parse cmap token" */
 
 	if (tok == PDF_TOK_KEYWORD)
-		tok = pdf_cmap_token_from_keyword(buf);
+		tok = pdf_cmap_token_from_keyword(buf->scratch);
 
 	return tok;
 }
@@ -64,15 +64,15 @@ pdf_lex_cmap(fz_stream *file, char *buf, int n, int *sl)
 static void
 pdf_parse_cmap_name(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
 {
-	char buf[256];
+	pdf_lexbuf buf;
 	int tok;
-	int len;
 
-	tok = pdf_lex_cmap(file, buf, sizeof buf, &len);
+        buf.size = PDF_LEXBUF_SMALL;
+	tok = pdf_lex_cmap(file, &buf);
 	/* RJW: Lost debugging: "syntaxerror in cmap" */
 
 	if (tok == PDF_TOK_NAME)
-		fz_strlcpy(cmap->cmap_name, buf, sizeof(cmap->cmap_name));
+		fz_strlcpy(cmap->cmap_name, buf.scratch, sizeof(cmap->cmap_name));
 	else
 		fz_warn(ctx, "expected name after CMapName in cmap");
 }
@@ -80,15 +80,15 @@ pdf_parse_cmap_name(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
 static void
 pdf_parse_wmode(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
 {
-	char buf[256];
+	pdf_lexbuf buf;
 	int tok;
-	int len;
 
-	tok = pdf_lex_cmap(file, buf, sizeof buf, &len);
+        buf.size = PDF_LEXBUF_SMALL;
+	tok = pdf_lex_cmap(file, &buf);
 	/* RJW: Lost debugging: "syntaxerror in cmap" */
 
 	if (tok == PDF_TOK_INT)
-		pdf_set_wmode(ctx, cmap, atoi(buf));
+		pdf_set_wmode(ctx, cmap, buf.i);
 	else
 		fz_warn(ctx, "expected integer after WMode in cmap");
 }
@@ -96,14 +96,14 @@ pdf_parse_wmode(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
 static void
 pdf_parse_codespace_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
 {
-	char buf[256];
+	pdf_lexbuf buf;
 	int tok;
-	int len;
 	int lo, hi;
 
+        buf.size = PDF_LEXBUF_SMALL;
 	while (1)
 	{
-		tok = pdf_lex_cmap(file, buf, sizeof buf, &len);
+		tok = pdf_lex_cmap(file, &buf);
 		/* RJW: Lost debugging: "syntaxerror in cmap" */
 
 		if (tok == TOK_END_CODESPACE_RANGE)
@@ -111,13 +111,13 @@ pdf_parse_codespace_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
 
 		else if (tok == PDF_TOK_STRING)
 		{
-			lo = pdf_code_from_string(buf, len);
-			tok = pdf_lex_cmap(file, buf, sizeof buf, &len);
+			lo = pdf_code_from_string(buf.scratch, buf.len);
+			tok = pdf_lex_cmap(file, &buf);
 			/* RJW: Lost debugging: "syntaxerror in cmap" */
 			if (tok == PDF_TOK_STRING)
 			{
-				hi = pdf_code_from_string(buf, len);
-				pdf_add_codespace(ctx, cmap, lo, hi, len);
+				hi = pdf_code_from_string(buf.scratch, buf.len);
+				pdf_add_codespace(ctx, cmap, lo, hi, buf.len);
 			}
 			else break;
 		}
@@ -131,14 +131,14 @@ pdf_parse_codespace_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
 static void
 pdf_parse_cid_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
 {
-	char buf[256];
+	pdf_lexbuf buf;
 	int tok;
-	int len;
 	int lo, hi, dst;
 
+        buf.size = PDF_LEXBUF_SMALL;
 	while (1)
 	{
-		tok = pdf_lex_cmap(file, buf, sizeof buf, &len);
+		tok = pdf_lex_cmap(file, &buf);
 		/* RJW: Lost debugging: "syntaxerror in cmap" */
 
 		if (tok == TOK_END_CID_RANGE)
@@ -147,21 +147,21 @@ pdf_parse_cid_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
 		else if (tok != PDF_TOK_STRING)
 			fz_throw(ctx, "expected string or endcidrange");
 
-		lo = pdf_code_from_string(buf, len);
+		lo = pdf_code_from_string(buf.scratch, buf.len);
 
-		tok = pdf_lex_cmap(file, buf, sizeof buf, &len);
+		tok = pdf_lex_cmap(file, &buf);
 		/* RJW: Lost debugging: "syntaxerror in cmap" */
 		if (tok != PDF_TOK_STRING)
 			fz_throw(ctx, "expected string");
 
-		hi = pdf_code_from_string(buf, len);
+		hi = pdf_code_from_string(buf.scratch, buf.len);
 
-		tok = pdf_lex_cmap(file, buf, sizeof buf, &len);
+		tok = pdf_lex_cmap(file, &buf);
 		/* RJW: Lost debugging: "syntaxerror in cmap" */
 		if (tok != PDF_TOK_INT)
 			fz_throw(ctx, "expected integer");
 
-		dst = atoi(buf);
+		dst = buf.i;
 
 		pdf_map_range_to_range(ctx, cmap, lo, hi, dst);
 	}
@@ -170,14 +170,14 @@ pdf_parse_cid_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
 static void
 pdf_parse_cid_char(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
 {
-	char buf[256];
+	pdf_lexbuf buf;
 	int tok;
-	int len;
 	int src, dst;
 
+        buf.size = PDF_LEXBUF_SMALL;
 	while (1)
 	{
-		tok = pdf_lex_cmap(file, buf, sizeof buf, &len);
+		tok = pdf_lex_cmap(file, &buf);
 		/* RJW: "syntaxerror in cmap" */
 
 		if (tok == TOK_END_CID_CHAR)
@@ -186,15 +186,15 @@ pdf_parse_cid_char(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
 		else if (tok != PDF_TOK_STRING)
 			fz_throw(ctx, "expected string or endcidchar");
 
-		src = pdf_code_from_string(buf, len);
+		src = pdf_code_from_string(buf.scratch, buf.len);
 
-		tok = pdf_lex_cmap(file, buf, sizeof buf, &len);
+		tok = pdf_lex_cmap(file, &buf);
 		/* RJW: "syntaxerror in cmap" */
 
 		if (tok != PDF_TOK_INT)
 			fz_throw(ctx, "expected integer");
 
-		dst = atoi(buf);
+		dst = buf.i;
 
 		pdf_map_range_to_range(ctx, cmap, src, src, dst);
 	}
@@ -203,15 +203,15 @@ pdf_parse_cid_char(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
 static void
 pdf_parse_bf_range_array(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, int lo, int hi)
 {
-	char buf[256];
+	pdf_lexbuf buf;
 	int tok;
-	int len;
 	int dst[256];
 	int i;
 
+        buf.size = PDF_LEXBUF_SMALL;
 	while (1)
 	{
-		tok = pdf_lex_cmap(file, buf, sizeof buf, &len);
+		tok = pdf_lex_cmap(file, &buf);
 		/* RJW: "syntaxerror in cmap" */
 
 		if (tok == PDF_TOK_CLOSE_ARRAY)
@@ -221,12 +221,12 @@ pdf_parse_bf_range_array(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, int l
 		else if (tok != PDF_TOK_STRING)
 			fz_throw(ctx, "expected string or ]");
 
-		if (len / 2)
+		if (buf.len / 2)
 		{
-			for (i = 0; i < len / 2; i++)
-				dst[i] = pdf_code_from_string(buf + i * 2, 2);
+			for (i = 0; i < buf.len / 2; i++)
+				dst[i] = pdf_code_from_string(&buf.scratch[i * 2], 2);
 
-			pdf_map_one_to_many(ctx, cmap, lo, dst, len / 2);
+			pdf_map_one_to_many(ctx, cmap, lo, dst, buf.len / 2);
 		}
 
 		lo ++;
@@ -236,14 +236,14 @@ pdf_parse_bf_range_array(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, int l
 static void
 pdf_parse_bf_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
 {
-	char buf[256];
+	pdf_lexbuf buf;
 	int tok;
-	int len;
 	int lo, hi, dst;
 
+        buf.size = PDF_LEXBUF_SMALL;
 	while (1)
 	{
-		tok = pdf_lex_cmap(file, buf, sizeof buf, &len);
+		tok = pdf_lex_cmap(file, &buf);
 		/* RJW: "syntaxerror in cmap" */
 
 		if (tok == TOK_END_BF_RANGE)
@@ -252,23 +252,23 @@ pdf_parse_bf_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
 		else if (tok != PDF_TOK_STRING)
 			fz_throw(ctx, "expected string or endbfrange");
 
-		lo = pdf_code_from_string(buf, len);
+		lo = pdf_code_from_string(buf.scratch, buf.len);
 
-		tok = pdf_lex_cmap(file, buf, sizeof buf, &len);
+		tok = pdf_lex_cmap(file, &buf);
 		/* RJW: "syntaxerror in cmap" */
 		if (tok != PDF_TOK_STRING)
 			fz_throw(ctx, "expected string");
 
-		hi = pdf_code_from_string(buf, len);
+		hi = pdf_code_from_string(buf.scratch, buf.len);
 
-		tok = pdf_lex_cmap(file, buf, sizeof buf, &len);
+		tok = pdf_lex_cmap(file, &buf);
 		/* RJW: "syntaxerror in cmap" */
 
 		if (tok == PDF_TOK_STRING)
 		{
-			if (len == 2)
+			if (buf.len == 2)
 			{
-				dst = pdf_code_from_string(buf, len);
+				dst = pdf_code_from_string(buf.scratch, buf.len);
 				pdf_map_range_to_range(ctx, cmap, lo, hi, dst);
 			}
 			else
@@ -276,10 +276,10 @@ pdf_parse_bf_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
 				int dststr[256];
 				int i;
 
-				if (len / 2)
+				if (buf.len / 2)
 				{
-					for (i = 0; i < len / 2; i++)
-						dststr[i] = pdf_code_from_string(buf + i * 2, 2);
+					for (i = 0; i < buf.len / 2; i++)
+						dststr[i] = pdf_code_from_string(&buf.scratch[i * 2], 2);
 
 					while (lo <= hi)
 					{
@@ -307,16 +307,16 @@ pdf_parse_bf_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
 static void
 pdf_parse_bf_char(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
 {
-	char buf[256];
+	pdf_lexbuf buf;
 	int tok;
-	int len;
 	int dst[256];
 	int src;
 	int i;
 
+        buf.size = PDF_LEXBUF_SMALL;
 	while (1)
 	{
-		tok = pdf_lex_cmap(file, buf, sizeof buf, &len);
+		tok = pdf_lex_cmap(file, &buf);
 		/* RJW: "syntaxerror in cmap" */
 
 		if (tok == TOK_END_BF_CHAR)
@@ -325,18 +325,18 @@ pdf_parse_bf_char(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
 		else if (tok != PDF_TOK_STRING)
 			fz_throw(ctx, "expected string or endbfchar");
 
-		src = pdf_code_from_string(buf, len);
+		src = pdf_code_from_string(buf.scratch, buf.len);
 
-		tok = pdf_lex_cmap(file, buf, sizeof buf, &len);
+		tok = pdf_lex_cmap(file, &buf);
 		/* RJW: "syntaxerror in cmap" */
 		/* Note: does not handle /dstName */
 		if (tok != PDF_TOK_STRING)
 			fz_throw(ctx, "expected string");
 
-		if (len / 2)
+		if (buf.len / 2)
 		{
-			for (i = 0; i < len / 2; i++)
-				dst[i] = pdf_code_from_string(buf + i * 2, 2);
+			for (i = 0; i < buf.len / 2; i++)
+				dst[i] = pdf_code_from_string(&buf.scratch[i * 2], 2);
 			pdf_map_one_to_many(ctx, cmap, src, dst, i);
 		}
 	}
@@ -347,11 +347,11 @@ pdf_load_cmap(fz_context *ctx, fz_stream *file)
 {
 	pdf_cmap *cmap;
 	char key[64];
-	char buf[256];
+	pdf_lexbuf buf;
 	int tok;
-	int len;
 	const char *where;
 
+        buf.size = PDF_LEXBUF_SMALL;
 	cmap = pdf_new_cmap(ctx);
 
 	strcpy(key, ".notdef");
@@ -363,25 +363,25 @@ pdf_load_cmap(fz_context *ctx, fz_stream *file)
 		while (1)
 		{
 			where = "";
-			tok = pdf_lex_cmap(file, buf, sizeof buf, &len);
+			tok = pdf_lex_cmap(file, &buf);
 
 			if (tok == PDF_TOK_EOF || tok == TOK_END_CMAP)
 				break;
 
 			else if (tok == PDF_TOK_NAME)
 			{
-				if (!strcmp(buf, "CMapName"))
+				if (!strcmp(buf.scratch, "CMapName"))
 				{
 					where = " after CMapName";
 					pdf_parse_cmap_name(ctx, cmap, file);
 				}
-				else if (!strcmp(buf, "WMode"))
+				else if (!strcmp(buf.scratch, "WMode"))
 				{
 					where = " after WMode";
 					pdf_parse_wmode(ctx, cmap, file);
 				}
 				else
-					fz_strlcpy(key, buf, sizeof key);
+					fz_strlcpy(key, buf.scratch, sizeof key);
 			}
 
 			else if (tok == TOK_USECMAP)
diff --git a/pdf/pdf_function.c b/pdf/pdf_function.c
index 4478827c..17373f42 100644
--- a/pdf/pdf_function.c
+++ b/pdf/pdf_function.c
@@ -683,18 +683,18 @@ resize_code(fz_context *ctx, pdf_function *func, int newsize)
 static void
 parse_code(pdf_function *func, fz_stream *stream, int *codeptr)
 {
-	char buf[64];
-	int len;
+	pdf_lexbuf buf;
 	int tok;
 	int opptr, elseptr, ifptr;
 	int a, b, mid, cmp;
 	fz_context *ctx = stream->ctx;
 
-	memset(buf, 0, sizeof(buf));
+        buf.size = PDF_LEXBUF_SMALL;
+	memset(buf.scratch, 0, sizeof(buf.scratch));
 
 	while (1)
 	{
-		tok = pdf_lex(stream, buf, sizeof buf, &len);
+		tok = pdf_lex(stream, &buf);
 		/* RJW: "calculator function lexical error" */
 
 		switch(tok)
@@ -705,7 +705,7 @@ parse_code(pdf_function *func, fz_stream *stream, int *codeptr)
 		case PDF_TOK_INT:
 			resize_code(ctx, func, *codeptr);
 			func->u.p.code[*codeptr].type = PS_INT;
-			func->u.p.code[*codeptr].u.i = atoi(buf);
+			func->u.p.code[*codeptr].u.i = buf.i;
 			++*codeptr;
 			break;
 
@@ -726,7 +726,7 @@ parse_code(pdf_function *func, fz_stream *stream, int *codeptr)
 		case PDF_TOK_REAL:
 			resize_code(ctx, func, *codeptr);
 			func->u.p.code[*codeptr].type = PS_REAL;
-			func->u.p.code[*codeptr].u.f = fz_atof(buf);
+			func->u.p.code[*codeptr].u.f = buf.f;
 			++*codeptr;
 			break;
 
@@ -740,7 +740,7 @@ parse_code(pdf_function *func, fz_stream *stream, int *codeptr)
 			parse_code(func, stream, codeptr);
 			/* RJW: "error in 'if' branch" */
 
-			tok = pdf_lex(stream, buf, sizeof buf, &len);
+			tok = pdf_lex(stream, &buf);
 			/* RJW: "calculator function syntax error" */
 
 			if (tok == PDF_TOK_OPEN_BRACE)
@@ -749,7 +749,7 @@ parse_code(pdf_function *func, fz_stream *stream, int *codeptr)
 				parse_code(func, stream, codeptr);
 				/* RJW: "error in 'else' branch" */
 
-				tok = pdf_lex(stream, buf, sizeof buf, &len);
+				tok = pdf_lex(stream, &buf);
 				/* RJW: "calculator function syntax error" */
 			}
 			else
@@ -760,7 +760,7 @@ parse_code(pdf_function *func, fz_stream *stream, int *codeptr)
 			if (tok != PDF_TOK_KEYWORD)
 				fz_throw(ctx, "missing keyword in 'if-else' context");
 
-			if (!strcmp(buf, "if"))
+			if (!strcmp(buf.scratch, "if"))
 			{
 				if (elseptr >= 0)
 					fz_throw(ctx, "too many branches for 'if'");
@@ -771,7 +771,7 @@ parse_code(pdf_function *func, fz_stream *stream, int *codeptr)
 				func->u.p.code[opptr+3].type = PS_BLOCK;
 				func->u.p.code[opptr+3].u.block = *codeptr;
 			}
-			else if (!strcmp(buf, "ifelse"))
+			else if (!strcmp(buf.scratch, "ifelse"))
 			{
 				if (elseptr < 0)
 					fz_throw(ctx, "not enough branches for 'ifelse'");
@@ -786,7 +786,7 @@ parse_code(pdf_function *func, fz_stream *stream, int *codeptr)
 			}
 			else
 			{
-				fz_throw(ctx, "unknown keyword in 'if-else' context: '%s'", buf);
+				fz_throw(ctx, "unknown keyword in 'if-else' context: '%s'", buf.scratch);
 			}
 			break;
 
@@ -804,7 +804,7 @@ parse_code(pdf_function *func, fz_stream *stream, int *codeptr)
 			while (b - a > 1)
 			{
 				mid = (a + b) / 2;
-				cmp = strcmp(buf, ps_op_names[mid]);
+				cmp = strcmp(buf.scratch, ps_op_names[mid]);
 				if (cmp > 0)
 					a = mid;
 				else if (cmp < 0)
@@ -813,7 +813,7 @@ parse_code(pdf_function *func, fz_stream *stream, int *codeptr)
 					a = b = mid;
 			}
 			if (cmp != 0)
-				fz_throw(ctx, "unknown operator: '%s'", buf);
+				fz_throw(ctx, "unknown operator: '%s'", buf.scratch);
 
 			resize_code(ctx, func, *codeptr);
 			func->u.p.code[*codeptr].type = PS_OPERATOR;
@@ -832,12 +832,13 @@ load_postscript_func(pdf_function *func, pdf_document *xref, fz_obj *dict, int n
 {
 	fz_stream *stream = NULL;
 	int codeptr;
-	char buf[64];
+	pdf_lexbuf buf;
 	int tok;
-	int len;
 	fz_context *ctx = xref->ctx;
 	int locked = 0;
 
+	buf.size = PDF_LEXBUF_SMALL;
+
 	fz_var(stream);
 	fz_var(locked);
 
@@ -846,7 +847,7 @@ load_postscript_func(pdf_function *func, pdf_document *xref, fz_obj *dict, int n
 		stream = pdf_open_stream(xref, num, gen);
 		/* RJW: "cannot open calculator function stream" */
 
-		tok = pdf_lex(stream, buf, sizeof buf, &len);
+		tok = pdf_lex(stream, &buf);
 		if (tok != PDF_TOK_OPEN_BRACE)
 		{
 			fz_throw(ctx, "stream is not a calculator function");
diff --git a/pdf/pdf_interpret.c b/pdf/pdf_interpret.c
index effea657..1cfe6a96 100644
--- a/pdf/pdf_interpret.c
+++ b/pdf/pdf_interpret.c
@@ -819,6 +819,7 @@ pdf_show_string(pdf_csi *csi, unsigned char *buf, int len)
 	{
 		int w = pdf_decode_cmap(fontdesc->encoding, buf, &cpt);
 		buf += w;
+                
 		cid = pdf_lookup_cmap(fontdesc->encoding, cpt);
 		if (cid >= 0)
 			pdf_show_char(csi, cid);
@@ -1625,12 +1626,10 @@ static void pdf_run_BI(pdf_csi *csi, fz_obj *rdb, fz_stream *file)
 {
 	fz_context *ctx = csi->dev->ctx;
 	int ch;
-	char *buf = csi->xref->scratch;
-	int buflen = sizeof(csi->xref->scratch);
 	fz_image *img;
 	fz_obj *obj;
 
-	obj = pdf_parse_dict(csi->xref, file, buf, buflen);
+	obj = pdf_parse_dict(csi->xref, file, &csi->xref->lexbuf.base);
 	/* RJW: "cannot parse inline image dictionary" */
 
 	/* read whitespace after ID keyword */
@@ -2523,10 +2522,10 @@ pdf_run_keyword(pdf_csi *csi, fz_obj *rdb, fz_stream *file, char *buf)
 }
 
 static void
-pdf_run_stream(pdf_csi *csi, fz_obj *rdb, fz_stream *file, char *buf, int buflen)
+pdf_run_stream(pdf_csi *csi, fz_obj *rdb, fz_stream *file, pdf_lexbuf *buf)
 {
 	fz_context *ctx = csi->dev->ctx;
-	int tok, len, in_array;
+	int tok, in_array;
 
 	/* make sure we have a clean slate if we come here from flush_text */
 	pdf_clear_stack(csi);
@@ -2551,7 +2550,7 @@ pdf_run_stream(pdf_csi *csi, fz_obj *rdb, fz_stream *file, char *buf, int buflen
 			csi->cookie->progress++;
 		}
 
-		tok = pdf_lex(file, buf, buflen, &len);
+		tok = pdf_lex(file, buf);
 		/* RJW: "lexical error in content stream" */
 
 		if (in_array)
@@ -2560,19 +2559,24 @@ pdf_run_stream(pdf_csi *csi, fz_obj *rdb, fz_stream *file, char *buf, int buflen
 			{
 				in_array = 0;
 			}
-			else if (tok == PDF_TOK_INT || tok == PDF_TOK_REAL)
+			else if (tok == PDF_TOK_REAL)
 			{
 				pdf_gstate *gstate = csi->gstate + csi->gtop;
-				pdf_show_space(csi, -fz_atof(buf) * gstate->size * 0.001f);
+				pdf_show_space(csi, -buf->f * gstate->size * 0.001f);
+			}
+			else if (tok == PDF_TOK_INT)
+			{
+				pdf_gstate *gstate = csi->gstate + csi->gtop;
+				pdf_show_space(csi, -buf->i * gstate->size * 0.001f);
 			}
 			else if (tok == PDF_TOK_STRING)
 			{
-				pdf_show_string(csi, (unsigned char *)buf, len);
+				pdf_show_string(csi, (unsigned char *)buf->scratch, buf->len);
 			}
 			else if (tok == PDF_TOK_KEYWORD)
 			{
-				if (!strcmp(buf, "Tw") || !strcmp(buf, "Tc"))
-					fz_warn(ctx, "ignoring keyword '%s' inside array", buf);
+				if (!strcmp(buf->scratch, "Tw") || !strcmp(buf->scratch, "Tc"))
+					fz_warn(ctx, "ignoring keyword '%s' inside array", buf->scratch);
 				else
 					fz_throw(ctx, "syntax error in array");
 			}
@@ -2591,7 +2595,7 @@ pdf_run_stream(pdf_csi *csi, fz_obj *rdb, fz_stream *file, char *buf, int buflen
 		case PDF_TOK_OPEN_ARRAY:
 			if (!csi->in_text)
 			{
-				csi->obj = pdf_parse_array(csi->xref, file, buf, buflen);
+				csi->obj = pdf_parse_array(csi->xref, file, buf);
 				/* RJW: "cannot parse array" */
 			}
 			else
@@ -2601,38 +2605,38 @@ pdf_run_stream(pdf_csi *csi, fz_obj *rdb, fz_stream *file, char *buf, int buflen
 			break;
 
 		case PDF_TOK_OPEN_DICT:
-			csi->obj = pdf_parse_dict(csi->xref, file, buf, buflen);
+			csi->obj = pdf_parse_dict(csi->xref, file, buf);
 			/* RJW: "cannot parse dictionary" */
 			break;
 
 		case PDF_TOK_NAME:
-			fz_strlcpy(csi->name, buf, sizeof(csi->name));
+			fz_strlcpy(csi->name, buf->scratch, sizeof(csi->name));
 			break;
 
 		case PDF_TOK_INT:
-			csi->stack[csi->top] = atoi(buf);
+			csi->stack[csi->top] = buf->i;
 			csi->top ++;
 			break;
 
 		case PDF_TOK_REAL:
-			csi->stack[csi->top] = fz_atof(buf);
+			csi->stack[csi->top] = buf->f;
 			csi->top ++;
 			break;
 
 		case PDF_TOK_STRING:
-			if (len <= sizeof(csi->string))
+			if (buf->len <= sizeof(csi->string))
 			{
-				memcpy(csi->string, buf, len);
-				csi->string_len = len;
+				memcpy(csi->string, buf->scratch, buf->len);
+				csi->string_len = buf->len;
 			}
 			else
 			{
-				csi->obj = fz_new_string(ctx, buf, len);
+				csi->obj = fz_new_string(ctx, buf->scratch, buf->len);
 			}
 			break;
 
 		case PDF_TOK_KEYWORD:
-			pdf_run_keyword(csi, rdb, file, buf);
+			pdf_run_keyword(csi, rdb, file, buf->scratch);
 			/* RJW: "cannot run keyword" */
 			pdf_clear_stack(csi);
 			break;
@@ -2651,8 +2655,7 @@ static void
 pdf_run_buffer(pdf_csi *csi, fz_obj *rdb, fz_buffer *contents)
 {
 	fz_context *ctx = csi->dev->ctx;
-	int len = sizeof csi->xref->scratch;
-	char *buf = NULL;
+	pdf_lexbuf_large *buf;
 	fz_stream * file = NULL;
 	int save_in_text;
 
@@ -2664,13 +2667,14 @@ pdf_run_buffer(pdf_csi *csi, fz_obj *rdb, fz_buffer *contents)
 
 	fz_try(ctx)
 	{
-		buf = fz_malloc(ctx, len); /* we must be re-entrant for type3 fonts */
+		buf = fz_malloc(ctx, sizeof(*buf)); /* we must be re-entrant for type3 fonts */
+		buf->base.size = PDF_LEXBUF_LARGE;
 		file = fz_open_buffer(ctx, contents);
 		save_in_text = csi->in_text;
 		csi->in_text = 0;
 		fz_try(ctx)
 		{
-			pdf_run_stream(csi, rdb, file, buf, len);
+			pdf_run_stream(csi, rdb, file, &buf->base);
 		}
 		fz_catch(ctx)
 		{
@@ -2678,14 +2682,15 @@ pdf_run_buffer(pdf_csi *csi, fz_obj *rdb, fz_buffer *contents)
 		}
 		csi->in_text = save_in_text;
 	}
-	fz_catch(ctx)
-	{
+        fz_always(ctx)
+        {
 		fz_close(file);
 		fz_free(ctx, buf);
+        }
+	fz_catch(ctx)
+	{
 		fz_throw(ctx, "cannot parse context stream");
 	}
-	fz_close(file);
-	fz_free(ctx, buf);
 }
 
 void
diff --git a/pdf/pdf_lex.c b/pdf/pdf_lex.c
index 24828412..322d945c 100644
--- a/pdf/pdf_lex.c
+++ b/pdf/pdf_lex.c
@@ -63,87 +63,106 @@ lex_comment(fz_stream *f)
 }
 
 static int
-lex_number(fz_stream *f, char *s, int n, int *tok)
+lex_number(fz_stream *f, pdf_lexbuf *buf, int c)
 {
-	char *buf = s;
-	*tok = PDF_TOK_INT;
+	int neg = 0;
+	int i = 0;
+	int n;
+	int d;
+	float v;
 
 	/* Initially we might have +, -, . or a digit */
-	if (n > 1)
+	switch (c)
+	{
+	case '.':
+		goto loop_after_dot;
+	case '-':
+		neg = 1;
+		break;
+	case '+':
+		break;
+	default: /* Must be a digit */
+		i = c - '0';
+		break;
+	}
+
+	while (1)
 	{
 		int c = fz_read_byte(f);
 		switch (c)
 		{
 		case '.':
-			*tok = PDF_TOK_REAL;
-			*s++ = c;
-			n--;
 			goto loop_after_dot;
-		case '+':
-		case '-':
 		case RANGE_0_9:
-			*s++ = c;
-			n--;
-			goto loop_after_sign;
+			i = 10*i + c - '0';
+			/* FIXME: Need overflow check here; do we care? */
+			break;
 		default:
 			fz_unread_byte(f);
-			goto end;
+			/* Fallthrough */
 		case EOF:
-			goto end;
+			if (neg)
+				i = -i;
+			buf->i = i;
+			return PDF_TOK_INT;
 		}
 	}
 
-	/* We can't accept a sign from here on in, just . or a digit */
-loop_after_sign:
-	while (n > 1)
+	/* In here, we've seen a dot, so can accept just digits */
+loop_after_dot:
+	n = 0;
+	d = 1;
+	while (1)
 	{
 		int c = fz_read_byte(f);
 		switch (c)
 		{
-		case '.':
-			*tok = PDF_TOK_REAL;
-			*s++ = c;
-			n--;
-			goto loop_after_dot;
 		case RANGE_0_9:
-			*s++ = c;
+			if (d >= INT_MAX/10)
+				goto underflow;
+			n = n*10 + (c - '0');
+			d *= 10;
 			break;
 		default:
 			fz_unread_byte(f);
-			goto end;
+			/* Fallthrough */
 		case EOF:
-			goto end;
+			v = (float)i + ((float)n / (float)d);
+			if (neg)
+				v = -v;
+			buf->f = v;
+			return PDF_TOK_REAL;
 		}
-		n--;
 	}
 
-	/* In here, we've seen a dot, so can accept just digits */
-loop_after_dot:
-	while (n > 1)
+underflow:
+	/* Ignore any digits after here, because they are too small */
+	while (1)
 	{
 		int c = fz_read_byte(f);
 		switch (c)
 		{
 		case RANGE_0_9:
-			*s++ = c;
 			break;
 		default:
 			fz_unread_byte(f);
-			goto end;
+			/* Fallthrough */
 		case EOF:
-			goto end;
+			v = (float)i + ((float)n / (float)d);
+			if (neg)
+				v = -v;
+			buf->f = v;
+			return PDF_TOK_REAL;
 		}
-		n--;
 	}
-
-end:
-	*s = '\0';
-	return s-buf;
 }
 
 static void
-lex_name(fz_stream *f, char *s, int n)
+lex_name(fz_stream *f, pdf_lexbuf *buf)
 {
+	char *s = buf->scratch;
+	int n = buf->size;
+
 	while (n > 1)
 	{
 		int c = fz_read_byte(f);
@@ -208,6 +227,7 @@ lex_name(fz_stream *f, char *s, int n)
 	}
 end:
 	*s = '\0';
+	buf->len = s - buf->scratch;
 }
 
 static int
@@ -380,7 +400,7 @@ pdf_token_from_keyword(char *key)
 }
 
 int
-pdf_lex(fz_stream *f, char *buf, int n, int *sl)
+pdf_lex(fz_stream *f, pdf_lexbuf *buf)
 {
 	while (1)
 	{
@@ -396,11 +416,10 @@ pdf_lex(fz_stream *f, char *buf, int n, int *sl)
 			lex_comment(f);
 			break;
 		case '/':
-			lex_name(f, buf, n);
-			*sl = strlen(buf);
+			lex_name(f, buf);
 			return PDF_TOK_NAME;
 		case '(':
-			*sl = lex_string(f, buf, n);
+			buf->len = lex_string(f, buf->scratch, buf->size);
 			return PDF_TOK_STRING;
 		case ')':
 			fz_warn(f->ctx, "lexical error (unexpected ')')");
@@ -414,7 +433,7 @@ pdf_lex(fz_stream *f, char *buf, int n, int *sl)
 			else
 			{
 				fz_unread_byte(f);
-				*sl = lex_hex_string(f, buf, n);
+				buf->len = lex_hex_string(f, buf->scratch, buf->size);
 				return PDF_TOK_STRING;
 			}
 		case '>':
@@ -434,17 +453,11 @@ pdf_lex(fz_stream *f, char *buf, int n, int *sl)
 		case '}':
 			return PDF_TOK_CLOSE_BRACE;
 		case IS_NUMBER:
-			{
-				int tok;
-				fz_unread_byte(f);
-				*sl = lex_number(f, buf, n, &tok);
-				return tok;
-			}
+			return lex_number(f, buf, c);
 		default: /* isregular: !isdelim && !iswhite && c != EOF */
 			fz_unread_byte(f);
-			lex_name(f, buf, n);
-			*sl = strlen(buf);
-			return pdf_token_from_keyword(buf);
+			lex_name(f, buf);
+			return pdf_token_from_keyword(buf->scratch);
 		}
 	}
 }
diff --git a/pdf/pdf_parse.c b/pdf/pdf_parse.c
index 220eb30c..fb6cb7ef 100644
--- a/pdf/pdf_parse.c
+++ b/pdf/pdf_parse.c
@@ -171,13 +171,12 @@ pdf_to_utf8_name(fz_context *ctx, fz_obj *src)
 }
 
 fz_obj *
-pdf_parse_array(pdf_document *xref, fz_stream *file, char *buf, int cap)
+pdf_parse_array(pdf_document *xref, fz_stream *file, pdf_lexbuf *buf)
 {
 	fz_obj *ary = NULL;
 	fz_obj *obj = NULL;
 	int a = 0, b = 0, n = 0;
 	int tok;
-	int len;
 	fz_context *ctx = file->ctx;
 	fz_obj *op;
 
@@ -189,7 +188,7 @@ pdf_parse_array(pdf_document *xref, fz_stream *file, char *buf, int cap)
 	{
 		while (1)
 		{
-			tok = pdf_lex(file, buf, cap, &len);
+			tok = pdf_lex(file, buf);
 
 			if (tok != PDF_TOK_INT && tok != PDF_TOK_R)
 			{
@@ -228,9 +227,9 @@ pdf_parse_array(pdf_document *xref, fz_stream *file, char *buf, int cap)
 
 			case PDF_TOK_INT:
 				if (n == 0)
-					a = atoi(buf);
+					a = buf->i;
 				if (n == 1)
-					b = atoi(buf);
+					b = buf->i;
 				n ++;
 				break;
 
@@ -245,33 +244,33 @@ pdf_parse_array(pdf_document *xref, fz_stream *file, char *buf, int cap)
 				break;
 
 			case PDF_TOK_OPEN_ARRAY:
-				obj = pdf_parse_array(xref, file, buf, cap);
+				obj = pdf_parse_array(xref, file, buf);
 				fz_array_push(ary, obj);
 				fz_drop_obj(obj);
 				obj = NULL;
 				break;
 
 			case PDF_TOK_OPEN_DICT:
-				obj = pdf_parse_dict(xref, file, buf, cap);
+				obj = pdf_parse_dict(xref, file, buf);
 				fz_array_push(ary, obj);
 				fz_drop_obj(obj);
 				obj = NULL;
 				break;
 
 			case PDF_TOK_NAME:
-				obj = fz_new_name(ctx, buf);
+				obj = fz_new_name(ctx, buf->scratch);
 				fz_array_push(ary, obj);
 				fz_drop_obj(obj);
 				obj = NULL;
 				break;
 			case PDF_TOK_REAL:
-				obj = fz_new_real(ctx, fz_atof(buf));
+				obj = fz_new_real(ctx, buf->f);
 				fz_array_push(ary, obj);
 				fz_drop_obj(obj);
 				obj = NULL;
 				break;
 			case PDF_TOK_STRING:
-				obj = fz_new_string(ctx, buf, len);
+				obj = fz_new_string(ctx, buf->scratch, buf->len);
 				fz_array_push(ary, obj);
 				fz_drop_obj(obj);
 				obj = NULL;
@@ -312,13 +311,12 @@ end:
 }
 
 fz_obj *
-pdf_parse_dict(pdf_document *xref, fz_stream *file, char *buf, int cap)
+pdf_parse_dict(pdf_document *xref, fz_stream *file, pdf_lexbuf *buf)
 {
 	fz_obj *dict = NULL;
 	fz_obj *key = NULL;
 	fz_obj *val = NULL;
 	int tok;
-	int len;
 	int a, b;
 	fz_context *ctx = file->ctx;
 
@@ -332,45 +330,45 @@ pdf_parse_dict(pdf_document *xref, fz_stream *file, char *buf, int cap)
 	{
 		while (1)
 		{
-			tok = pdf_lex(file, buf, cap, &len);
+			tok = pdf_lex(file, buf);
 	skip:
 			if (tok == PDF_TOK_CLOSE_DICT)
 				break;
 
 			/* for BI .. ID .. EI in content streams */
-			if (tok == PDF_TOK_KEYWORD && !strcmp(buf, "ID"))
+			if (tok == PDF_TOK_KEYWORD && !strcmp(buf->scratch, "ID"))
 				break;
 
 			if (tok != PDF_TOK_NAME)
 				fz_throw(ctx, "invalid key in dict");
 
-			key = fz_new_name(ctx, buf);
+			key = fz_new_name(ctx, buf->scratch);
 
-			tok = pdf_lex(file, buf, cap, &len);
+			tok = pdf_lex(file, buf);
 
 			switch (tok)
 			{
 			case PDF_TOK_OPEN_ARRAY:
-				val = pdf_parse_array(xref, file, buf, cap);
+				val = pdf_parse_array(xref, file, buf);
 				break;
 
 			case PDF_TOK_OPEN_DICT:
-				val = pdf_parse_dict(xref, file, buf, cap);
+				val = pdf_parse_dict(xref, file, buf);
 				break;
 
-			case PDF_TOK_NAME: val = fz_new_name(ctx, buf); break;
-			case PDF_TOK_REAL: val = fz_new_real(ctx, fz_atof(buf)); break;
-			case PDF_TOK_STRING: val = fz_new_string(ctx, buf, len); break;
+			case PDF_TOK_NAME: val = fz_new_name(ctx, buf->scratch); break;
+			case PDF_TOK_REAL: val = fz_new_real(ctx, buf->f); break;
+			case PDF_TOK_STRING: val = fz_new_string(ctx, buf->scratch, buf->len); break;
 			case PDF_TOK_TRUE: val = fz_new_bool(ctx, 1); break;
 			case PDF_TOK_FALSE: val = fz_new_bool(ctx, 0); break;
 			case PDF_TOK_NULL: val = fz_new_null(ctx); break;
 
 			case PDF_TOK_INT:
 				/* 64-bit to allow for numbers > INT_MAX and overflow */
-				a = (int) strtoll(buf, 0, 10);
-				tok = pdf_lex(file, buf, cap, &len);
+				a = buf->i;
+				tok = pdf_lex(file, buf);
 				if (tok == PDF_TOK_CLOSE_DICT || tok == PDF_TOK_NAME ||
-					(tok == PDF_TOK_KEYWORD && !strcmp(buf, "ID")))
+					(tok == PDF_TOK_KEYWORD && !strcmp(buf->scratch, "ID")))
 				{
 					val = fz_new_int(ctx, a);
 					fz_dict_put(dict, key, val);
@@ -382,8 +380,8 @@ pdf_parse_dict(pdf_document *xref, fz_stream *file, char *buf, int cap)
 				}
 				if (tok == PDF_TOK_INT)
 				{
-					b = atoi(buf);
-					tok = pdf_lex(file, buf, cap, &len);
+					b = buf->i;
+					tok = pdf_lex(file, buf);
 					if (tok == PDF_TOK_R)
 					{
 						val = fz_new_indirect(ctx, a, b, xref);
@@ -414,30 +412,29 @@ pdf_parse_dict(pdf_document *xref, fz_stream *file, char *buf, int cap)
 }
 
 fz_obj *
-pdf_parse_stm_obj(pdf_document *xref, fz_stream *file, char *buf, int cap)
+pdf_parse_stm_obj(pdf_document *xref, fz_stream *file, pdf_lexbuf *buf)
 {
 	int tok;
-	int len;
 	fz_context *ctx = file->ctx;
 
-	tok = pdf_lex(file, buf, cap, &len);
+	tok = pdf_lex(file, buf);
 	/* RJW: "cannot parse token in object stream") */
 
 	switch (tok)
 	{
 	case PDF_TOK_OPEN_ARRAY:
-		return pdf_parse_array(xref, file, buf, cap);
+		return pdf_parse_array(xref, file, buf);
 		/* RJW: "cannot parse object stream" */
 	case PDF_TOK_OPEN_DICT:
-		return pdf_parse_dict(xref, file, buf, cap);
+		return pdf_parse_dict(xref, file, buf);
 		/* RJW: "cannot parse object stream" */
-	case PDF_TOK_NAME: return fz_new_name(ctx, buf); break;
-	case PDF_TOK_REAL: return fz_new_real(ctx, fz_atof(buf)); break;
-	case PDF_TOK_STRING: return fz_new_string(ctx, buf, len); break;
+	case PDF_TOK_NAME: return fz_new_name(ctx, buf->scratch); break;
+	case PDF_TOK_REAL: return fz_new_real(ctx, buf->f); break;
+	case PDF_TOK_STRING: return fz_new_string(ctx, buf->scratch, buf->len); break;
 	case PDF_TOK_TRUE: return fz_new_bool(ctx, 1); break;
 	case PDF_TOK_FALSE: return fz_new_bool(ctx, 0); break;
 	case PDF_TOK_NULL: return fz_new_null(ctx); break;
-	case PDF_TOK_INT: return fz_new_int(ctx, atoi(buf)); break;
+	case PDF_TOK_INT: return fz_new_int(ctx, buf->i); break;
 	default: fz_throw(ctx, "unknown token in object stream");
 	}
 	return NULL; /* Stupid MSVC */
@@ -445,60 +442,59 @@ pdf_parse_stm_obj(pdf_document *xref, fz_stream *file, char *buf, int cap)
 
 fz_obj *
 pdf_parse_ind_obj(pdf_document *xref,
-	fz_stream *file, char *buf, int cap,
+	fz_stream *file, pdf_lexbuf *buf,
 	int *onum, int *ogen, int *ostmofs)
 {
 	fz_obj *obj = NULL;
 	int num = 0, gen = 0, stm_ofs;
 	int tok;
-	int len;
 	int a, b;
 	fz_context *ctx = file->ctx;
 
 	fz_var(obj);
 
-	tok = pdf_lex(file, buf, cap, &len);
+	tok = pdf_lex(file, buf);
 	/* RJW: cannot parse indirect object (%d %d R)", num, gen */
 	if (tok != PDF_TOK_INT)
 		fz_throw(ctx, "expected object number (%d %d R)", num, gen);
-	num = atoi(buf);
+	num = buf->i;
 
-	tok = pdf_lex(file, buf, cap, &len);
+	tok = pdf_lex(file, buf);
 	/* RJW: "cannot parse indirect object (%d %d R)", num, gen */
 	if (tok != PDF_TOK_INT)
 		fz_throw(ctx, "expected generation number (%d %d R)", num, gen);
-	gen = atoi(buf);
+	gen = buf->i;
 
-	tok = pdf_lex(file, buf, cap, &len);
+	tok = pdf_lex(file, buf);
 	/* RJW: "cannot parse indirect object (%d %d R)", num, gen */
 	if (tok != PDF_TOK_OBJ)
 		fz_throw(ctx, "expected 'obj' keyword (%d %d R)", num, gen);
 
-	tok = pdf_lex(file, buf, cap, &len);
+	tok = pdf_lex(file, buf);
 	/* RJW: "cannot parse indirect object (%d %d R)", num, gen */
 
 	switch (tok)
 	{
 	case PDF_TOK_OPEN_ARRAY:
-		obj = pdf_parse_array(xref, file, buf, cap);
+		obj = pdf_parse_array(xref, file, buf);
 		/* RJW: "cannot parse indirect object (%d %d R)", num, gen */
 		break;
 
 	case PDF_TOK_OPEN_DICT:
-		obj = pdf_parse_dict(xref, file, buf, cap);
+		obj = pdf_parse_dict(xref, file, buf);
 		/* RJW: "cannot parse indirect object (%d %d R)", num, gen */
 		break;
 
-	case PDF_TOK_NAME: obj = fz_new_name(ctx, buf); break;
-	case PDF_TOK_REAL: obj = fz_new_real(ctx, fz_atof(buf)); break;
-	case PDF_TOK_STRING: obj = fz_new_string(ctx, buf, len); break;
+	case PDF_TOK_NAME: obj = fz_new_name(ctx, buf->scratch); break;
+	case PDF_TOK_REAL: obj = fz_new_real(ctx, buf->f); break;
+	case PDF_TOK_STRING: obj = fz_new_string(ctx, buf->scratch, buf->len); break;
 	case PDF_TOK_TRUE: obj = fz_new_bool(ctx, 1); break;
 	case PDF_TOK_FALSE: obj = fz_new_bool(ctx, 0); break;
 	case PDF_TOK_NULL: obj = fz_new_null(ctx); break;
 
 	case PDF_TOK_INT:
-		a = atoi(buf);
-		tok = pdf_lex(file, buf, cap, &len);
+		a = buf->i;
+		tok = pdf_lex(file, buf);
 		/* "cannot parse indirect object (%d %d R)", num, gen */
 		if (tok == PDF_TOK_STREAM || tok == PDF_TOK_ENDOBJ)
 		{
@@ -507,8 +503,8 @@ pdf_parse_ind_obj(pdf_document *xref,
 		}
 		if (tok == PDF_TOK_INT)
 		{
-			b = atoi(buf);
-			tok = pdf_lex(file, buf, cap, &len);
+			b = buf->i;
+			tok = pdf_lex(file, buf);
 			/* RJW: "cannot parse indirect object (%d %d R)", num, gen); */
 			if (tok == PDF_TOK_R)
 			{
@@ -528,7 +524,7 @@ pdf_parse_ind_obj(pdf_document *xref,
 
 	fz_try(ctx)
 	{
-		tok = pdf_lex(file, buf, cap, &len);
+		tok = pdf_lex(file, buf);
 	}
 	fz_catch(ctx)
 	{
diff --git a/pdf/pdf_repair.c b/pdf/pdf_repair.c
index 0dc0e132..c70df3e2 100644
--- a/pdf/pdf_repair.c
+++ b/pdf/pdf_repair.c
@@ -13,11 +13,10 @@ struct entry
 };
 
 static void
-pdf_repair_obj(fz_stream *file, char *buf, int cap, int *stmofsp, int *stmlenp, fz_obj **encrypt, fz_obj **id)
+pdf_repair_obj(fz_stream *file, pdf_lexbuf *buf, int *stmofsp, int *stmlenp, fz_obj **encrypt, fz_obj **id)
 {
 	int tok;
 	int stm_len;
-	int len;
 	int n;
 	fz_context *ctx = file->ctx;
 
@@ -26,7 +25,7 @@ pdf_repair_obj(fz_stream *file, char *buf, int cap, int *stmofsp, int *stmlenp,
 
 	stm_len = 0;
 
-	tok = pdf_lex(file, buf, cap, &len);
+	tok = pdf_lex(file, buf);
 	/* RJW: "cannot parse object" */
 	if (tok == PDF_TOK_OPEN_DICT)
 	{
@@ -35,7 +34,7 @@ pdf_repair_obj(fz_stream *file, char *buf, int cap, int *stmofsp, int *stmlenp,
 		/* Send NULL xref so we don't try to resolve references */
 		fz_try(ctx)
 		{
-			dict = pdf_parse_dict(NULL, file, buf, cap);
+			dict = pdf_parse_dict(NULL, file, buf);
 		}
 		fz_catch(ctx)
 		{
@@ -79,13 +78,13 @@ pdf_repair_obj(fz_stream *file, char *buf, int cap, int *stmofsp, int *stmlenp,
 		tok != PDF_TOK_EOF &&
 		tok != PDF_TOK_INT )
 	{
-		tok = pdf_lex(file, buf, cap, &len);
+		tok = pdf_lex(file, buf);
 		/* RJW: "cannot scan for endobj or stream token" */
 	}
 
 	if (tok == PDF_TOK_INT)
 	{
-		while (len-- > 0)
+		while (buf->len-- > 0)
 			fz_unread_byte(file);
 	}
 	else if (tok == PDF_TOK_STREAM)
@@ -106,7 +105,7 @@ pdf_repair_obj(fz_stream *file, char *buf, int cap, int *stmofsp, int *stmlenp,
 			fz_seek(file, *stmofsp + stm_len, 0);
 			fz_try(ctx)
 			{
-				tok = pdf_lex(file, buf, cap, &len);
+				tok = pdf_lex(file, buf);
 			}
 			fz_catch(ctx)
 			{
@@ -117,23 +116,23 @@ pdf_repair_obj(fz_stream *file, char *buf, int cap, int *stmofsp, int *stmlenp,
 			fz_seek(file, *stmofsp, 0);
 		}
 
-		n = fz_read(file, (unsigned char *) buf, 9);
+		n = fz_read(file, (unsigned char *) buf->scratch, 9);
 		if (n < 0)
 			fz_throw(ctx, "cannot read from file");
 
-		while (memcmp(buf, "endstream", 9) != 0)
+		while (memcmp(buf->scratch, "endstream", 9) != 0)
 		{
 			c = fz_read_byte(file);
 			if (c == EOF)
 				break;
-			memmove(buf, buf + 1, 8);
-			buf[8] = c;
+			memmove(&buf->scratch[0], &buf->scratch[1], 8);
+			buf->scratch[8] = c;
 		}
 
 		*stmlenp = fz_tell(file) - *stmofsp - 9;
 
 atobjend:
-		tok = pdf_lex(file, buf, cap, &len);
+		tok = pdf_lex(file, buf);
 		/* RJW: "cannot scan for endobj token" */
 		if (tok != PDF_TOK_ENDOBJ)
 			fz_warn(ctx, "object missing 'endobj' token");
@@ -147,11 +146,13 @@ pdf_repair_obj_stm(pdf_document *xref, int num, int gen)
 	fz_stream *stm = NULL;
 	int tok;
 	int i, n, count;
-	char buf[256];
 	fz_context *ctx = xref->ctx;
+	pdf_lexbuf buf;
 
 	fz_var(stm);
 
+	buf.size = PDF_LEXBUF_SMALL;
+
 	fz_try(ctx)
 	{
 		obj = pdf_load_object(xref, num, gen);
@@ -164,11 +165,11 @@ pdf_repair_obj_stm(pdf_document *xref, int num, int gen)
 
 		for (i = 0; i < count; i++)
 		{
-			tok = pdf_lex(stm, buf, sizeof buf, &n);
+			tok = pdf_lex(stm, &buf);
 			if (tok != PDF_TOK_INT)
 				fz_throw(ctx, "corrupt object stream (%d %d R)", num, gen);
 
-			n = atoi(buf);
+			n = buf.i;
 			if (n >= xref->len)
 				pdf_resize_xref(xref, n + 1);
 
@@ -179,7 +180,7 @@ pdf_repair_obj_stm(pdf_document *xref, int num, int gen)
 			xref->table[n].obj = NULL;
 			xref->table[n].type = 'o';
 
-			tok = pdf_lex(stm, buf, sizeof buf, &n);
+			tok = pdf_lex(stm, &buf);
 			if (tok != PDF_TOK_INT)
 				fz_throw(ctx, "corrupt object stream (%d %d R)", num, gen);
 		}
@@ -195,7 +196,7 @@ pdf_repair_obj_stm(pdf_document *xref, int num, int gen)
 }
 
 void
-pdf_repair_xref(pdf_document *xref, char *buf, int bufsize)
+pdf_repair_xref(pdf_document *xref, pdf_lexbuf *buf)
 {
 	fz_obj *dict, *obj;
 	fz_obj *length;
@@ -234,14 +235,14 @@ pdf_repair_xref(pdf_document *xref, char *buf, int bufsize)
 		list = fz_malloc_array(ctx, listcap, sizeof(struct entry));
 
 		/* look for '%PDF' version marker within first kilobyte of file */
-		n = fz_read(xref->file, (unsigned char *)buf, MIN(bufsize, 1024));
+		n = fz_read(xref->file, (unsigned char *)buf->scratch, MIN(buf->size, 1024));
 		if (n < 0)
 			fz_throw(ctx, "cannot read from file");
 
 		fz_seek(xref->file, 0, 0);
 		for (i = 0; i < n - 4; i++)
 		{
-			if (memcmp(buf + i, "%PDF", 4) == 0)
+			if (memcmp(&buf->scratch[i], "%PDF", 4) == 0)
 			{
 				fz_seek(xref->file, i + 8, 0); /* skip "%PDF-X.Y" */
 				break;
@@ -263,7 +264,7 @@ pdf_repair_xref(pdf_document *xref, char *buf, int bufsize)
 
 			fz_try(ctx)
 			{
-				tok = pdf_lex(xref->file, buf, bufsize, &n);
+				tok = pdf_lex(xref->file, buf);
 			}
 			fz_catch(ctx)
 			{
@@ -276,14 +277,14 @@ pdf_repair_xref(pdf_document *xref, char *buf, int bufsize)
 				numofs = genofs;
 				num = gen;
 				genofs = tmpofs;
-				gen = atoi(buf);
+				gen = buf->i;
 			}
 
 			else if (tok == PDF_TOK_OBJ)
 			{
 				fz_try(ctx)
 				{
-					pdf_repair_obj(xref->file, buf, bufsize, &stm_ofs, &stm_len, &encrypt, &id);
+					pdf_repair_obj(xref->file, buf, &stm_ofs, &stm_len, &encrypt, &id);
 				}
 				fz_catch(ctx)
 				{
@@ -318,7 +319,7 @@ pdf_repair_xref(pdf_document *xref, char *buf, int bufsize)
 			{
 				fz_try(ctx)
 				{
-					dict = pdf_parse_dict(xref, xref->file, buf, bufsize);
+					dict = pdf_parse_dict(xref, xref->file, buf);
 				}
 				fz_catch(ctx)
 				{
diff --git a/pdf/pdf_xref.c b/pdf/pdf_xref.c
index 7500ded3..383747a7 100644
--- a/pdf/pdf_xref.c
+++ b/pdf/pdf_xref.c
@@ -51,6 +51,7 @@ pdf_read_start_xref(pdf_document *xref)
 			while (iswhite(buf[i]) && i < n)
 				i ++;
 			xref->startxref = atoi((char*)(buf + i));
+                        
 			return;
 		}
 	}
@@ -63,17 +64,16 @@ pdf_read_start_xref(pdf_document *xref)
  */
 
 static void
-pdf_read_old_trailer(pdf_document *xref, char *buf, int cap)
+pdf_read_old_trailer(pdf_document *xref, pdf_lexbuf *buf)
 {
 	int len;
 	char *s;
-	int n;
 	int t;
 	int tok;
 	int c;
 
-	fz_read_line(xref->file, buf, cap);
-	if (strncmp(buf, "xref", 4) != 0)
+	fz_read_line(xref->file, buf->scratch, buf->size);
+	if (strncmp(buf->scratch, "xref", 4) != 0)
 		fz_throw(xref->ctx, "cannot find xref marker");
 
 	while (1)
@@ -82,8 +82,8 @@ pdf_read_old_trailer(pdf_document *xref, char *buf, int cap)
 		if (!(c >= '0' && c <= '9'))
 			break;
 
-		fz_read_line(xref->file, buf, cap);
-		s = buf;
+		fz_read_line(xref->file, buf->scratch, buf->size);
+		s = buf->scratch;
 		fz_strsep(&s, " "); /* ignore ofs */
 		if (!s)
 			fz_throw(xref->ctx, "invalid range marker in xref");
@@ -102,15 +102,15 @@ pdf_read_old_trailer(pdf_document *xref, char *buf, int cap)
 
 	fz_try(xref->ctx)
 	{
-		tok = pdf_lex(xref->file, buf, cap, &n);
+		tok = pdf_lex(xref->file, buf);
 		if (tok != PDF_TOK_TRAILER)
 			fz_throw(xref->ctx, "expected trailer marker");
 
-		tok = pdf_lex(xref->file, buf, cap, &n);
+		tok = pdf_lex(xref->file, buf);
 		if (tok != PDF_TOK_OPEN_DICT)
 			fz_throw(xref->ctx, "expected trailer dictionary");
 
-		xref->trailer = pdf_parse_dict(xref, xref->file, buf, cap);
+		xref->trailer = pdf_parse_dict(xref, xref->file, buf);
 	}
 	fz_catch(xref->ctx)
 	{
@@ -119,11 +119,11 @@ pdf_read_old_trailer(pdf_document *xref, char *buf, int cap)
 }
 
 static void
-pdf_read_new_trailer(pdf_document *xref, char *buf, int cap)
+pdf_read_new_trailer(pdf_document *xref, pdf_lexbuf *buf)
 {
 	fz_try(xref->ctx)
 	{
-		xref->trailer = pdf_parse_ind_obj(xref, xref->file, buf, cap, NULL, NULL, NULL);
+		xref->trailer = pdf_parse_ind_obj(xref, xref->file, buf, NULL, NULL, NULL);
 	}
 	fz_catch(xref->ctx)
 	{
@@ -132,7 +132,7 @@ pdf_read_new_trailer(pdf_document *xref, char *buf, int cap)
 }
 
 static void
-pdf_read_trailer(pdf_document *xref, char *buf, int cap)
+pdf_read_trailer(pdf_document *xref, pdf_lexbuf *buf)
 {
 	int c;
 
@@ -145,9 +145,9 @@ pdf_read_trailer(pdf_document *xref, char *buf, int cap)
 	{
 		c = fz_peek_byte(xref->file);
 		if (c == 'x')
-			pdf_read_old_trailer(xref, buf, cap);
+			pdf_read_old_trailer(xref, buf);
 		else if (c >= '0' && c <= '9')
-			pdf_read_new_trailer(xref, buf, cap);
+			pdf_read_new_trailer(xref, buf);
 		else
 			fz_throw(xref->ctx, "cannot recognize xref format: '%c'", c);
 	}
@@ -179,7 +179,7 @@ pdf_resize_xref(pdf_document *xref, int newlen)
 }
 
 static fz_obj *
-pdf_read_old_xref(pdf_document *xref, char *buf, int cap)
+pdf_read_old_xref(pdf_document *xref, pdf_lexbuf *buf)
 {
 	int ofs, len;
 	char *s;
@@ -189,8 +189,8 @@ pdf_read_old_xref(pdf_document *xref, char *buf, int cap)
 	int c;
 	fz_obj *trailer;
 
-	fz_read_line(xref->file, buf, cap);
-	if (strncmp(buf, "xref", 4) != 0)
+	fz_read_line(xref->file, buf->scratch, buf->size);
+	if (strncmp(buf->scratch, "xref", 4) != 0)
 		fz_throw(xref->ctx, "cannot find xref marker");
 
 	while (1)
@@ -199,8 +199,8 @@ pdf_read_old_xref(pdf_document *xref, char *buf, int cap)
 		if (!(c >= '0' && c <= '9'))
 			break;
 
-		fz_read_line(xref->file, buf, cap);
-		s = buf;
+		fz_read_line(xref->file, buf->scratch, buf->size);
+		s = buf->scratch;
 		ofs = atoi(fz_strsep(&s, " "));
 		len = atoi(fz_strsep(&s, " "));
 
@@ -220,12 +220,12 @@ pdf_read_old_xref(pdf_document *xref, char *buf, int cap)
 
 		for (i = ofs; i < ofs + len; i++)
 		{
-			n = fz_read(xref->file, (unsigned char *) buf, 20);
+			n = fz_read(xref->file, (unsigned char *) buf->scratch, 20);
 			if (n < 0)
 				fz_throw(xref->ctx, "cannot read xref table");
 			if (!xref->table[i].type)
 			{
-				s = buf;
+				s = buf->scratch;
 
 				/* broken pdfs where line start with white space */
 				while (*s != '\0' && iswhite(*s))
@@ -242,15 +242,15 @@ pdf_read_old_xref(pdf_document *xref, char *buf, int cap)
 
 	fz_try(xref->ctx)
 	{
-		tok = pdf_lex(xref->file, buf, cap, &n);
+		tok = pdf_lex(xref->file, buf);
 		if (tok != PDF_TOK_TRAILER)
 			fz_throw(xref->ctx, "expected trailer marker");
 
-		tok = pdf_lex(xref->file, buf, cap, &n);
+		tok = pdf_lex(xref->file, buf);
 		if (tok != PDF_TOK_OPEN_DICT)
 			fz_throw(xref->ctx, "expected trailer dictionary");
 
-		trailer = pdf_parse_dict(xref, xref->file, buf, cap);
+		trailer = pdf_parse_dict(xref, xref->file, buf);
 	}
 	fz_catch(xref->ctx)
 	{
@@ -296,7 +296,7 @@ pdf_read_new_xref_section(pdf_document *xref, fz_stream *stm, int i0, int i1, in
 /* Entered with file locked. Drops the lock in the middle, but then picks
  * it up again before exiting. */
 static fz_obj *
-pdf_read_new_xref(pdf_document *xref, char *buf, int cap)
+pdf_read_new_xref(pdf_document *xref, pdf_lexbuf *buf)
 {
 	fz_stream *stm = NULL;
 	fz_obj *trailer = NULL;
@@ -312,7 +312,7 @@ pdf_read_new_xref(pdf_document *xref, char *buf, int cap)
 
 	fz_try(ctx)
 	{
-		trailer = pdf_parse_ind_obj(xref, xref->file, buf, cap, &num, &gen, &stm_ofs);
+		trailer = pdf_parse_ind_obj(xref, xref->file, buf, &num, &gen, &stm_ofs);
 	}
 	fz_catch(ctx)
 	{
@@ -378,7 +378,7 @@ pdf_read_new_xref(pdf_document *xref, char *buf, int cap)
 
 /* File is locked on entry, and exit (but may be dropped in the middle) */
 static fz_obj *
-pdf_read_xref(pdf_document *xref, int ofs, char *buf, int cap)
+pdf_read_xref(pdf_document *xref, int ofs, pdf_lexbuf *buf)
 {
 	int c;
 	fz_context *ctx = xref->ctx;
@@ -393,9 +393,9 @@ pdf_read_xref(pdf_document *xref, int ofs, char *buf, int cap)
 	{
 		c = fz_peek_byte(xref->file);
 		if (c == 'x')
-			trailer = pdf_read_old_xref(xref, buf, cap);
+			trailer = pdf_read_old_xref(xref, buf);
 		else if (c >= '0' && c <= '9')
-			trailer = pdf_read_new_xref(xref, buf, cap);
+			trailer = pdf_read_new_xref(xref, buf);
 		else
 			fz_throw(ctx, "cannot recognize xref format");
 	}
@@ -407,7 +407,7 @@ pdf_read_xref(pdf_document *xref, int ofs, char *buf, int cap)
 }
 
 static void
-pdf_read_xref_sections(pdf_document *xref, int ofs, char *buf, int cap)
+pdf_read_xref_sections(pdf_document *xref, int ofs, pdf_lexbuf *buf)
 {
 	fz_obj *trailer = NULL;
 	fz_obj *xrefstm = NULL;
@@ -416,16 +416,16 @@ pdf_read_xref_sections(pdf_document *xref, int ofs, char *buf, int cap)
 
 	fz_try(ctx)
 	{
-		trailer = pdf_read_xref(xref, ofs, buf, cap);
+		trailer = pdf_read_xref(xref, ofs, buf);
 
 		/* FIXME: do we overwrite free entries properly? */
 		xrefstm = fz_dict_gets(trailer, "XRefStm");
 		if (xrefstm)
-			pdf_read_xref_sections(xref, fz_to_int(xrefstm), buf, cap);
+			pdf_read_xref_sections(xref, fz_to_int(xrefstm), buf);
 
 		prev = fz_dict_gets(trailer, "Prev");
 		if (prev)
-			pdf_read_xref_sections(xref, fz_to_int(prev), buf, cap);
+			pdf_read_xref_sections(xref, fz_to_int(prev), buf);
 	}
 	fz_catch(ctx)
 	{
@@ -441,7 +441,7 @@ pdf_read_xref_sections(pdf_document *xref, int ofs, char *buf, int cap)
  */
 
 static void
-pdf_load_xref(pdf_document *xref, char *buf, int bufsize)
+pdf_load_xref(pdf_document *xref, pdf_lexbuf *buf)
 {
 	fz_obj *size;
 	int i;
@@ -451,7 +451,7 @@ pdf_load_xref(pdf_document *xref, char *buf, int bufsize)
 
 	pdf_read_start_xref(xref);
 
-	pdf_read_trailer(xref, buf, bufsize);
+	pdf_read_trailer(xref, buf);
 
 	size = fz_dict_gets(xref->trailer, "Size");
 	if (!size)
@@ -459,7 +459,7 @@ pdf_load_xref(pdf_document *xref, char *buf, int bufsize)
 
 	pdf_resize_xref(xref, fz_to_int(size));
 
-	pdf_read_xref_sections(xref, xref->startxref, buf, bufsize);
+	pdf_read_xref_sections(xref, xref->startxref, buf);
 
 	/* broken pdfs where first object is not free */
 	if (xref->table[0].type != 'f')
@@ -672,6 +672,7 @@ pdf_open_document_with_stream(fz_stream *file)
 
 	xref = fz_malloc_struct(ctx, pdf_document);
 	pdf_init_document(xref);
+        xref->lexbuf.base.size = PDF_LEXBUF_LARGE;
 
 	xref->file = fz_keep_stream(file);
 	xref->ctx = ctx;
@@ -681,7 +682,7 @@ pdf_open_document_with_stream(fz_stream *file)
 
 	fz_try(ctx)
 	{
-		pdf_load_xref(xref, xref->scratch, sizeof xref->scratch);
+		pdf_load_xref(xref, &xref->lexbuf.base);
 	}
 	fz_catch(ctx)
 	{
@@ -705,7 +706,7 @@ pdf_open_document_with_stream(fz_stream *file)
 		int hasroot, hasinfo;
 
 		if (repaired)
-			pdf_repair_xref(xref, xref->scratch, sizeof xref->scratch);
+			pdf_repair_xref(xref, &xref->lexbuf.base);
 
 		fz_unlock(ctx, FZ_LOCK_FILE);
 		locked = 0;
@@ -864,7 +865,7 @@ pdf_debug_xref(pdf_document *xref)
  */
 
 static void
-pdf_load_obj_stm(pdf_document *xref, int num, int gen, char *buf, int cap)
+pdf_load_obj_stm(pdf_document *xref, int num, int gen, pdf_lexbuf *buf)
 {
 	fz_stream *stm = NULL;
 	fz_obj *objstm = NULL;
@@ -874,7 +875,7 @@ pdf_load_obj_stm(pdf_document *xref, int num, int gen, char *buf, int cap)
 	fz_obj *obj;
 	int first;
 	int count;
-	int i, n;
+	int i;
 	int tok;
 	fz_context *ctx = xref->ctx;
 
@@ -896,15 +897,15 @@ pdf_load_obj_stm(pdf_document *xref, int num, int gen, char *buf, int cap)
 		stm = pdf_open_stream(xref, num, gen);
 		for (i = 0; i < count; i++)
 		{
-			tok = pdf_lex(stm, buf, cap, &n);
+			tok = pdf_lex(stm, buf);
 			if (tok != PDF_TOK_INT)
 				fz_throw(ctx, "corrupt object stream (%d %d R)", num, gen);
-			numbuf[i] = atoi(buf);
+			numbuf[i] = buf->i;
 
-			tok = pdf_lex(stm, buf, cap, &n);
+			tok = pdf_lex(stm, buf);
 			if (tok != PDF_TOK_INT)
 				fz_throw(ctx, "corrupt object stream (%d %d R)", num, gen);
-			ofsbuf[i] = atoi(buf);
+			ofsbuf[i] = buf->i;
 		}
 
 		fz_seek(stm, first, 0);
@@ -913,7 +914,7 @@ pdf_load_obj_stm(pdf_document *xref, int num, int gen, char *buf, int cap)
 		{
 			fz_seek(stm, first + ofsbuf[i], 0);
 
-			obj = pdf_parse_stm_obj(xref, stm, buf, cap);
+			obj = pdf_parse_stm_obj(xref, stm, buf);
 			/* RJW: Ensure above does fz_throw(ctx, "cannot parse object %d in stream (%d %d R)", i, num, gen); */
 
 			if (numbuf[i] < 1 || numbuf[i] >= xref->len)
@@ -978,7 +979,7 @@ pdf_cache_object(pdf_document *xref, int num, int gen)
 
 		fz_try(ctx)
 		{
-			x->obj = pdf_parse_ind_obj(xref, xref->file, xref->scratch, sizeof xref->scratch,
+			x->obj = pdf_parse_ind_obj(xref, xref->file, &xref->lexbuf.base,
 					&rnum, &rgen, &x->stm_ofs);
 		}
 		fz_catch(ctx)
@@ -1005,7 +1006,7 @@ pdf_cache_object(pdf_document *xref, int num, int gen)
 		{
 			fz_try(ctx)
 			{
-				pdf_load_obj_stm(xref, x->ofs, 0, xref->scratch, sizeof xref->scratch);
+				pdf_load_obj_stm(xref, x->ofs, 0, &xref->lexbuf.base);
 			}
 			fz_catch(ctx)
 			{