summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobin Watts <robin.watts@artifex.com>2012-06-22 13:26:35 +0100
committerRobin Watts <robin.watts@artifex.com>2012-06-22 16:42:09 +0100
commitb4157f3585c0f7a07305ac324e7ab9d47e7d04ae (patch)
treed993f31a9fb04e9b833ca042e2aa8088e393999c
parent34fc2fb23d9f0a9b2a68896e608ea149d8bc38e2 (diff)
downloadmupdf-b4157f3585c0f7a07305ac324e7ab9d47e7d04ae.tar.xz
Rework pdf_lexbuf to allow for dynamic parsing buffers.
Currently pdf_lexbufs use a static scratch buffer for parsing. In the main case this is 64K in size, but in other cases it can be just 256 bytes; this causes problems when parsing long strings. Even the 64K limit is an implementation limit of Acrobat, not an architectural limit of PDF. Change here to allow dynamic buffers. This means a slightly more complex setup and destruction for each buffer, but more importantly requires correct cleanup on errors. To avoid having to insert lots more try/catch clauses this commit includes various changes to the code so we reuse pdf_lexbufs where possible. This keeps the speed up.
-rw-r--r--pdf/mupdf-internal.h11
-rw-r--r--pdf/pdf_cmap_parse.c134
-rw-r--r--pdf/pdf_form.c12
-rw-r--r--pdf/pdf_function.c35
-rw-r--r--pdf/pdf_interpret.c7
-rw-r--r--pdf/pdf_lex.c69
-rw-r--r--pdf/pdf_repair.c3
-rw-r--r--pdf/pdf_xref.c4
8 files changed, 156 insertions, 119 deletions
diff --git a/pdf/mupdf-internal.h b/pdf/mupdf-internal.h
index 6785cabc..69702ef3 100644
--- a/pdf/mupdf-internal.h
+++ b/pdf/mupdf-internal.h
@@ -114,19 +114,26 @@ typedef struct pdf_lexbuf_large_s pdf_lexbuf_large;
struct pdf_lexbuf_s
{
+ fz_context *ctx;
int size;
+ int base_size;
int len;
int i;
float f;
- char scratch[PDF_LEXBUF_SMALL];
+ char *scratch;
+ char buffer[PDF_LEXBUF_SMALL];
};
struct pdf_lexbuf_large_s
{
pdf_lexbuf base;
- char scratch[PDF_LEXBUF_LARGE - PDF_LEXBUF_SMALL];
+ char buffer[PDF_LEXBUF_LARGE - PDF_LEXBUF_SMALL];
};
+void pdf_lexbuf_init(fz_context *ctx, pdf_lexbuf *lexbuf, int size);
+void pdf_lexbuf_fin(pdf_lexbuf *lexbuf);
+ptrdiff_t pdf_lexbuf_grow(pdf_lexbuf *lexbuf);
+
int pdf_lex(fz_stream *f, pdf_lexbuf *lexbuf);
pdf_obj *pdf_parse_array(pdf_document *doc, fz_stream *f, pdf_lexbuf *buf);
diff --git a/pdf/pdf_cmap_parse.c b/pdf/pdf_cmap_parse.c
index 93dd97c9..b78a36ec 100644
--- a/pdf/pdf_cmap_parse.c
+++ b/pdf/pdf_cmap_parse.c
@@ -62,48 +62,42 @@ pdf_lex_cmap(fz_stream *file, pdf_lexbuf *buf)
}
static void
-pdf_parse_cmap_name(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
+pdf_parse_cmap_name(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, pdf_lexbuf *buf)
{
- pdf_lexbuf buf;
int tok;
- buf.size = PDF_LEXBUF_SMALL;
- tok = pdf_lex_cmap(file, &buf);
+ tok = pdf_lex_cmap(file, buf);
/* RJW: Lost debugging: "syntaxerror in cmap" */
if (tok == PDF_TOK_NAME)
- fz_strlcpy(cmap->cmap_name, buf.scratch, sizeof(cmap->cmap_name));
+ fz_strlcpy(cmap->cmap_name, buf->scratch, sizeof(cmap->cmap_name));
else
fz_warn(ctx, "expected name after CMapName in cmap");
}
static void
-pdf_parse_wmode(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
+pdf_parse_wmode(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, pdf_lexbuf *buf)
{
- pdf_lexbuf buf;
int tok;
- buf.size = PDF_LEXBUF_SMALL;
- tok = pdf_lex_cmap(file, &buf);
+ tok = pdf_lex_cmap(file, buf);
/* RJW: Lost debugging: "syntaxerror in cmap" */
if (tok == PDF_TOK_INT)
- pdf_set_cmap_wmode(ctx, cmap, buf.i);
+ pdf_set_cmap_wmode(ctx, cmap, buf->i);
else
fz_warn(ctx, "expected integer after WMode in cmap");
}
static void
-pdf_parse_codespace_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
+pdf_parse_codespace_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, pdf_lexbuf *buf)
{
- pdf_lexbuf buf;
int tok;
int lo, hi;
- buf.size = PDF_LEXBUF_SMALL;
while (1)
{
- tok = pdf_lex_cmap(file, &buf);
+ tok = pdf_lex_cmap(file, buf);
/* RJW: Lost debugging: "syntaxerror in cmap" */
if (tok == TOK_END_CODESPACE_RANGE)
@@ -111,13 +105,13 @@ pdf_parse_codespace_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
else if (tok == PDF_TOK_STRING)
{
- lo = pdf_code_from_string(buf.scratch, buf.len);
- tok = pdf_lex_cmap(file, &buf);
+ lo = pdf_code_from_string(buf->scratch, buf->len);
+ tok = pdf_lex_cmap(file, buf);
/* RJW: Lost debugging: "syntaxerror in cmap" */
if (tok == PDF_TOK_STRING)
{
- hi = pdf_code_from_string(buf.scratch, buf.len);
- pdf_add_codespace(ctx, cmap, lo, hi, buf.len);
+ hi = pdf_code_from_string(buf->scratch, buf->len);
+ pdf_add_codespace(ctx, cmap, lo, hi, buf->len);
}
else break;
}
@@ -129,16 +123,14 @@ pdf_parse_codespace_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
}
static void
-pdf_parse_cid_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
+pdf_parse_cid_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, pdf_lexbuf *buf)
{
- pdf_lexbuf buf;
int tok;
int lo, hi, dst;
- buf.size = PDF_LEXBUF_SMALL;
while (1)
{
- tok = pdf_lex_cmap(file, &buf);
+ tok = pdf_lex_cmap(file, buf);
/* RJW: Lost debugging: "syntaxerror in cmap" */
if (tok == TOK_END_CID_RANGE)
@@ -147,37 +139,35 @@ pdf_parse_cid_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
else if (tok != PDF_TOK_STRING)
fz_throw(ctx, "expected string or endcidrange");
- lo = pdf_code_from_string(buf.scratch, buf.len);
+ lo = pdf_code_from_string(buf->scratch, buf->len);
- tok = pdf_lex_cmap(file, &buf);
+ tok = pdf_lex_cmap(file, buf);
/* RJW: Lost debugging: "syntaxerror in cmap" */
if (tok != PDF_TOK_STRING)
fz_throw(ctx, "expected string");
- hi = pdf_code_from_string(buf.scratch, buf.len);
+ hi = pdf_code_from_string(buf->scratch, buf->len);
- tok = pdf_lex_cmap(file, &buf);
+ tok = pdf_lex_cmap(file, buf);
/* RJW: Lost debugging: "syntaxerror in cmap" */
if (tok != PDF_TOK_INT)
fz_throw(ctx, "expected integer");
- dst = buf.i;
+ dst = buf->i;
pdf_map_range_to_range(ctx, cmap, lo, hi, dst);
}
}
static void
-pdf_parse_cid_char(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
+pdf_parse_cid_char(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, pdf_lexbuf *buf)
{
- pdf_lexbuf buf;
int tok;
int src, dst;
- buf.size = PDF_LEXBUF_SMALL;
while (1)
{
- tok = pdf_lex_cmap(file, &buf);
+ tok = pdf_lex_cmap(file, buf);
/* RJW: "syntaxerror in cmap" */
if (tok == TOK_END_CID_CHAR)
@@ -186,32 +176,30 @@ pdf_parse_cid_char(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
else if (tok != PDF_TOK_STRING)
fz_throw(ctx, "expected string or endcidchar");
- src = pdf_code_from_string(buf.scratch, buf.len);
+ src = pdf_code_from_string(buf->scratch, buf->len);
- tok = pdf_lex_cmap(file, &buf);
+ tok = pdf_lex_cmap(file, buf);
/* RJW: "syntaxerror in cmap" */
if (tok != PDF_TOK_INT)
fz_throw(ctx, "expected integer");
- dst = buf.i;
+ dst = buf->i;
pdf_map_range_to_range(ctx, cmap, src, src, dst);
}
}
static void
-pdf_parse_bf_range_array(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, int lo, int hi)
+pdf_parse_bf_range_array(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, pdf_lexbuf *buf, int lo, int hi)
{
- pdf_lexbuf buf;
int tok;
int dst[256];
int i;
- buf.size = PDF_LEXBUF_SMALL;
while (1)
{
- tok = pdf_lex_cmap(file, &buf);
+ tok = pdf_lex_cmap(file, buf);
/* RJW: "syntaxerror in cmap" */
if (tok == PDF_TOK_CLOSE_ARRAY)
@@ -221,12 +209,12 @@ pdf_parse_bf_range_array(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, int l
else if (tok != PDF_TOK_STRING)
fz_throw(ctx, "expected string or ]");
- if (buf.len / 2)
+ if (buf->len / 2)
{
- for (i = 0; i < buf.len / 2; i++)
- dst[i] = pdf_code_from_string(&buf.scratch[i * 2], 2);
+ for (i = 0; i < buf->len / 2; i++)
+ dst[i] = pdf_code_from_string(&buf->scratch[i * 2], 2);
- pdf_map_one_to_many(ctx, cmap, lo, dst, buf.len / 2);
+ pdf_map_one_to_many(ctx, cmap, lo, dst, buf->len / 2);
}
lo ++;
@@ -234,16 +222,14 @@ pdf_parse_bf_range_array(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, int l
}
static void
-pdf_parse_bf_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
+pdf_parse_bf_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, pdf_lexbuf *buf)
{
- pdf_lexbuf buf;
int tok;
int lo, hi, dst;
- buf.size = PDF_LEXBUF_SMALL;
while (1)
{
- tok = pdf_lex_cmap(file, &buf);
+ tok = pdf_lex_cmap(file, buf);
/* RJW: "syntaxerror in cmap" */
if (tok == TOK_END_BF_RANGE)
@@ -252,23 +238,23 @@ pdf_parse_bf_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
else if (tok != PDF_TOK_STRING)
fz_throw(ctx, "expected string or endbfrange");
- lo = pdf_code_from_string(buf.scratch, buf.len);
+ lo = pdf_code_from_string(buf->scratch, buf->len);
- tok = pdf_lex_cmap(file, &buf);
+ tok = pdf_lex_cmap(file, buf);
/* RJW: "syntaxerror in cmap" */
if (tok != PDF_TOK_STRING)
fz_throw(ctx, "expected string");
- hi = pdf_code_from_string(buf.scratch, buf.len);
+ hi = pdf_code_from_string(buf->scratch, buf->len);
- tok = pdf_lex_cmap(file, &buf);
+ tok = pdf_lex_cmap(file, buf);
/* RJW: "syntaxerror in cmap" */
if (tok == PDF_TOK_STRING)
{
- if (buf.len == 2)
+ if (buf->len == 2)
{
- dst = pdf_code_from_string(buf.scratch, buf.len);
+ dst = pdf_code_from_string(buf->scratch, buf->len);
pdf_map_range_to_range(ctx, cmap, lo, hi, dst);
}
else
@@ -276,10 +262,10 @@ pdf_parse_bf_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
int dststr[256];
int i;
- if (buf.len / 2)
+ if (buf->len / 2)
{
- for (i = 0; i < buf.len / 2; i++)
- dststr[i] = pdf_code_from_string(&buf.scratch[i * 2], 2);
+ for (i = 0; i < buf->len / 2; i++)
+ dststr[i] = pdf_code_from_string(&buf->scratch[i * 2], 2);
while (lo <= hi)
{
@@ -293,7 +279,7 @@ pdf_parse_bf_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
else if (tok == PDF_TOK_OPEN_ARRAY)
{
- pdf_parse_bf_range_array(ctx, cmap, file, lo, hi);
+ pdf_parse_bf_range_array(ctx, cmap, file, buf, lo, hi);
/* RJW: "cannot map bfrange" */
}
@@ -305,18 +291,16 @@ pdf_parse_bf_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
}
static void
-pdf_parse_bf_char(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
+pdf_parse_bf_char(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, pdf_lexbuf *buf)
{
- pdf_lexbuf buf;
int tok;
int dst[256];
int src;
int i;
- buf.size = PDF_LEXBUF_SMALL;
while (1)
{
- tok = pdf_lex_cmap(file, &buf);
+ tok = pdf_lex_cmap(file, buf);
/* RJW: "syntaxerror in cmap" */
if (tok == TOK_END_BF_CHAR)
@@ -325,18 +309,18 @@ pdf_parse_bf_char(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
else if (tok != PDF_TOK_STRING)
fz_throw(ctx, "expected string or endbfchar");
- src = pdf_code_from_string(buf.scratch, buf.len);
+ src = pdf_code_from_string(buf->scratch, buf->len);
- tok = pdf_lex_cmap(file, &buf);
+ tok = pdf_lex_cmap(file, buf);
/* RJW: "syntaxerror in cmap" */
/* Note: does not handle /dstName */
if (tok != PDF_TOK_STRING)
fz_throw(ctx, "expected string");
- if (buf.len / 2)
+ if (buf->len / 2)
{
- for (i = 0; i < buf.len / 2; i++)
- dst[i] = pdf_code_from_string(&buf.scratch[i * 2], 2);
+ for (i = 0; i < buf->len / 2; i++)
+ dst[i] = pdf_code_from_string(&buf->scratch[i * 2], 2);
pdf_map_one_to_many(ctx, cmap, src, dst, i);
}
}
@@ -351,7 +335,7 @@ pdf_load_cmap(fz_context *ctx, fz_stream *file)
int tok;
const char *where;
- buf.size = PDF_LEXBUF_SMALL;
+ pdf_lexbuf_init(ctx, &buf, PDF_LEXBUF_SMALL);
cmap = pdf_new_cmap(ctx);
strcpy(key, ".notdef");
@@ -373,12 +357,12 @@ pdf_load_cmap(fz_context *ctx, fz_stream *file)
if (!strcmp(buf.scratch, "CMapName"))
{
where = " after CMapName";
- pdf_parse_cmap_name(ctx, cmap, file);
+ pdf_parse_cmap_name(ctx, cmap, file, &buf);
}
else if (!strcmp(buf.scratch, "WMode"))
{
where = " after WMode";
- pdf_parse_wmode(ctx, cmap, file);
+ pdf_parse_wmode(ctx, cmap, file, &buf);
}
else
fz_strlcpy(key, buf.scratch, sizeof key);
@@ -392,31 +376,31 @@ pdf_load_cmap(fz_context *ctx, fz_stream *file)
else if (tok == TOK_BEGIN_CODESPACE_RANGE)
{
where = " codespacerange";
- pdf_parse_codespace_range(ctx, cmap, file);
+ pdf_parse_codespace_range(ctx, cmap, file, &buf);
}
else if (tok == TOK_BEGIN_BF_CHAR)
{
where = " bfchar";
- pdf_parse_bf_char(ctx, cmap, file);
+ pdf_parse_bf_char(ctx, cmap, file, &buf);
}
else if (tok == TOK_BEGIN_CID_CHAR)
{
where = " cidchar";
- pdf_parse_cid_char(ctx, cmap, file);
+ pdf_parse_cid_char(ctx, cmap, file, &buf);
}
else if (tok == TOK_BEGIN_BF_RANGE)
{
where = " bfrange";
- pdf_parse_bf_range(ctx, cmap, file);
+ pdf_parse_bf_range(ctx, cmap, file, &buf);
}
else if (tok == TOK_BEGIN_CID_RANGE)
{
where = "cidrange";
- pdf_parse_cid_range(ctx, cmap, file);
+ pdf_parse_cid_range(ctx, cmap, file, &buf);
}
/* ignore everything else */
@@ -424,6 +408,10 @@ pdf_load_cmap(fz_context *ctx, fz_stream *file)
pdf_sort_cmap(ctx, cmap);
}
+ fz_always(ctx)
+ {
+ pdf_lexbuf_fin(&buf);
+ }
fz_catch(ctx)
{
pdf_drop_cmap(ctx, cmap);
diff --git a/pdf/pdf_form.c b/pdf/pdf_form.c
index 45510aac..68db8327 100644
--- a/pdf/pdf_form.c
+++ b/pdf/pdf_form.c
@@ -293,8 +293,7 @@ static void parse_da(fz_context *ctx, char *da, da_info *di)
pdf_lexbuf lbuf;
fz_stream *str = fz_open_memory(ctx, (unsigned char *)da, strlen(da));
- memset(lbuf.scratch, 0, sizeof(lbuf.scratch));
- lbuf.size = sizeof(lbuf.scratch);
+ pdf_lexbuf_init(ctx, &lbuf, PDF_LEXBUF_SMALL);
fz_var(str);
fz_var(name);
@@ -347,6 +346,7 @@ static void parse_da(fz_context *ctx, char *da, da_info *di)
{
fz_free(ctx, name);
fz_close(str);
+ pdf_lexbuf_fin(&lbuf);
}
fz_catch(ctx)
{
@@ -852,8 +852,7 @@ static void update_marked_content(pdf_document *doc, pdf_xobject *form, fz_buffe
int len;
fz_buffer *newbuf = NULL;
- memset(lbuf.scratch, 0, sizeof(lbuf.scratch));
- lbuf.size = sizeof(lbuf.scratch);
+ pdf_lexbuf_init(ctx, &lbuf, PDF_LEXBUF_SMALL);
fz_var(str_outer);
fz_var(str_inner);
@@ -922,6 +921,7 @@ static void update_marked_content(pdf_document *doc, pdf_xobject *form, fz_buffe
fz_close(str_outer);
fz_close(str_inner);
fz_drop_buffer(ctx, newbuf);
+ pdf_lexbuf_fin(&lbuf);
}
fz_catch(ctx)
{
@@ -938,8 +938,7 @@ static int get_matrix(pdf_document *doc, pdf_xobject *form, int q, fz_matrix *mt
str = pdf_open_stream(doc, pdf_to_num(form->contents), pdf_to_gen(form->contents));
- memset(lbuf.scratch, 0, sizeof(lbuf.scratch));
- lbuf.size = sizeof(lbuf.scratch);
+ pdf_lexbuf_init(ctx, &lbuf, PDF_LEXBUF_SMALL);
fz_try(ctx)
{
@@ -1004,6 +1003,7 @@ static int get_matrix(pdf_document *doc, pdf_xobject *form, int q, fz_matrix *mt
fz_always(ctx)
{
fz_close(str);
+ pdf_lexbuf_fin(&lbuf);
}
fz_catch(ctx)
{
diff --git a/pdf/pdf_function.c b/pdf/pdf_function.c
index 67c34836..9a3bf1c2 100644
--- a/pdf/pdf_function.c
+++ b/pdf/pdf_function.c
@@ -699,20 +699,16 @@ resize_code(fz_context *ctx, pdf_function *func, int newsize)
}
static void
-parse_code(pdf_function *func, fz_stream *stream, int *codeptr)
+parse_code(pdf_function *func, fz_stream *stream, int *codeptr, pdf_lexbuf *buf)
{
- pdf_lexbuf buf;
int tok;
int opptr, elseptr, ifptr;
int a, b, mid, cmp;
fz_context *ctx = stream->ctx;
- buf.size = PDF_LEXBUF_SMALL;
- memset(buf.scratch, 0, sizeof(buf.scratch));
-
while (1)
{
- tok = pdf_lex(stream, &buf);
+ tok = pdf_lex(stream, buf);
/* RJW: "calculator function lexical error" */
switch(tok)
@@ -723,7 +719,7 @@ parse_code(pdf_function *func, fz_stream *stream, int *codeptr)
case PDF_TOK_INT:
resize_code(ctx, func, *codeptr);
func->u.p.code[*codeptr].type = PS_INT;
- func->u.p.code[*codeptr].u.i = buf.i;
+ func->u.p.code[*codeptr].u.i = buf->i;
++*codeptr;
break;
@@ -744,7 +740,7 @@ parse_code(pdf_function *func, fz_stream *stream, int *codeptr)
case PDF_TOK_REAL:
resize_code(ctx, func, *codeptr);
func->u.p.code[*codeptr].type = PS_REAL;
- func->u.p.code[*codeptr].u.f = buf.f;
+ func->u.p.code[*codeptr].u.f = buf->f;
++*codeptr;
break;
@@ -755,19 +751,19 @@ parse_code(pdf_function *func, fz_stream *stream, int *codeptr)
resize_code(ctx, func, *codeptr);
ifptr = *codeptr;
- parse_code(func, stream, codeptr);
+ parse_code(func, stream, codeptr, buf);
/* RJW: "error in 'if' branch" */
- tok = pdf_lex(stream, &buf);
+ tok = pdf_lex(stream, buf);
/* RJW: "calculator function syntax error" */
if (tok == PDF_TOK_OPEN_BRACE)
{
elseptr = *codeptr;
- parse_code(func, stream, codeptr);
+ parse_code(func, stream, codeptr, buf);
/* RJW: "error in 'else' branch" */
- tok = pdf_lex(stream, &buf);
+ tok = pdf_lex(stream, buf);
/* RJW: "calculator function syntax error" */
}
else
@@ -778,7 +774,7 @@ parse_code(pdf_function *func, fz_stream *stream, int *codeptr)
if (tok != PDF_TOK_KEYWORD)
fz_throw(ctx, "missing keyword in 'if-else' context");
- if (!strcmp(buf.scratch, "if"))
+ if (!strcmp(buf->scratch, "if"))
{
if (elseptr >= 0)
fz_throw(ctx, "too many branches for 'if'");
@@ -789,7 +785,7 @@ parse_code(pdf_function *func, fz_stream *stream, int *codeptr)
func->u.p.code[opptr+3].type = PS_BLOCK;
func->u.p.code[opptr+3].u.block = *codeptr;
}
- else if (!strcmp(buf.scratch, "ifelse"))
+ else if (!strcmp(buf->scratch, "ifelse"))
{
if (elseptr < 0)
fz_throw(ctx, "not enough branches for 'ifelse'");
@@ -804,7 +800,7 @@ parse_code(pdf_function *func, fz_stream *stream, int *codeptr)
}
else
{
- fz_throw(ctx, "unknown keyword in 'if-else' context: '%s'", buf.scratch);
+ fz_throw(ctx, "unknown keyword in 'if-else' context: '%s'", buf->scratch);
}
break;
@@ -822,7 +818,7 @@ parse_code(pdf_function *func, fz_stream *stream, int *codeptr)
while (b - a > 1)
{
mid = (a + b) / 2;
- cmp = strcmp(buf.scratch, ps_op_names[mid]);
+ cmp = strcmp(buf->scratch, ps_op_names[mid]);
if (cmp > 0)
a = mid;
else if (cmp < 0)
@@ -831,7 +827,7 @@ parse_code(pdf_function *func, fz_stream *stream, int *codeptr)
a = b = mid;
}
if (cmp != 0)
- fz_throw(ctx, "unknown operator: '%s'", buf.scratch);
+ fz_throw(ctx, "unknown operator: '%s'", buf->scratch);
resize_code(ctx, func, *codeptr);
func->u.p.code[*codeptr].type = PS_OPERATOR;
@@ -855,7 +851,7 @@ load_postscript_func(pdf_function *func, pdf_document *xref, pdf_obj *dict, int
fz_context *ctx = xref->ctx;
int locked = 0;
- buf.size = PDF_LEXBUF_SMALL;
+ pdf_lexbuf_init(ctx, &buf, PDF_LEXBUF_SMALL);
fz_var(stream);
fz_var(locked);
@@ -875,11 +871,12 @@ load_postscript_func(pdf_function *func, pdf_document *xref, pdf_obj *dict, int
func->u.p.cap = 0;
codeptr = 0;
- parse_code(func, stream, &codeptr);
+ parse_code(func, stream, &codeptr, &buf);
}
fz_always(ctx)
{
fz_close(stream);
+ pdf_lexbuf_fin(&buf);
}
fz_catch(ctx)
{
diff --git a/pdf/pdf_interpret.c b/pdf/pdf_interpret.c
index 050417e8..a2fdacb9 100644
--- a/pdf/pdf_interpret.c
+++ b/pdf/pdf_interpret.c
@@ -2697,7 +2697,7 @@ static void
pdf_run_contents_stream(pdf_csi *csi, pdf_obj *rdb, fz_stream *file)
{
fz_context *ctx = csi->dev->ctx;
- pdf_lexbuf_large *buf;
+ pdf_lexbuf *buf;
int save_in_text;
fz_var(buf);
@@ -2706,18 +2706,19 @@ pdf_run_contents_stream(pdf_csi *csi, pdf_obj *rdb, fz_stream *file)
return;
buf = fz_malloc(ctx, sizeof(*buf)); /* we must be re-entrant for type3 fonts */
- buf->base.size = PDF_LEXBUF_LARGE;
+ pdf_lexbuf_init(ctx, buf, PDF_LEXBUF_SMALL);
save_in_text = csi->in_text;
csi->in_text = 0;
fz_try(ctx)
{
- pdf_run_stream(csi, rdb, file, &buf->base);
+ pdf_run_stream(csi, rdb, file, buf);
}
fz_catch(ctx)
{
fz_warn(ctx, "Content stream parsing error - rendering truncated");
}
csi->in_text = save_in_text;
+ pdf_lexbuf_fin(buf);
fz_free(ctx, buf);
}
diff --git a/pdf/pdf_lex.c b/pdf/pdf_lex.c
index c6ab6604..967b6dcc 100644
--- a/pdf/pdf_lex.c
+++ b/pdf/pdf_lex.c
@@ -231,16 +231,21 @@ end:
}
static int
-lex_string(fz_stream *f, char *buf, int n)
+lex_string(fz_stream *f, pdf_lexbuf *lb)
{
- char *s = buf;
- char *e = buf + n;
+ char *s = lb->scratch;
+ char *e = s + lb->size;
int bal = 1;
int oct;
int c;
- while (s < e)
+ while (1)
{
+ if (s == e)
+ {
+ s += pdf_lexbuf_grow(lb);
+ e = lb->scratch + lb->size;
+ }
c = fz_read_byte(f);
switch (c)
{
@@ -319,19 +324,25 @@ lex_string(fz_stream *f, char *buf, int n)
}
}
end:
- return s - buf;
+ lb->len = s - lb->scratch;
+ return PDF_TOK_STRING;
}
static int
-lex_hex_string(fz_stream *f, char *buf, int n)
+lex_hex_string(fz_stream *f, pdf_lexbuf *lb)
{
- char *s = buf;
- char *e = buf + n;
+ char *s = lb->scratch;
+ char *e = s + lb->size;
int a = 0, x = 0;
int c;
- while (s < e)
+ while (1)
{
+ if (s == e)
+ {
+ s += pdf_lexbuf_grow(lb);
+ e = lb->scratch + lb->size;
+ }
c = fz_read_byte(f);
switch (c)
{
@@ -357,7 +368,8 @@ lex_hex_string(fz_stream *f, char *buf, int n)
}
}
end:
- return s - buf;
+ lb->len = s - lb->scratch;
+ return PDF_TOK_STRING;
}
static int
@@ -399,6 +411,37 @@ pdf_token_from_keyword(char *key)
return PDF_TOK_KEYWORD;
}
+void pdf_lexbuf_init(fz_context *ctx, pdf_lexbuf *lb, int size)
+{
+ lb->size = lb->base_size = size;
+ lb->len = 0;
+ lb->ctx = ctx;
+ lb->scratch = &lb->buffer[0];
+}
+
+void pdf_lexbuf_fin(pdf_lexbuf *lb)
+{
+ if (lb && lb->size != lb->base_size)
+ fz_free(lb->ctx, lb->scratch);
+}
+
+ptrdiff_t pdf_lexbuf_grow(pdf_lexbuf *lb)
+{
+ char *old = lb->scratch;
+ int newsize = lb->size * 2;
+ if (lb->size == lb->base_size)
+ {
+ lb->scratch = fz_malloc(lb->ctx, newsize);
+ memcpy(lb->scratch, lb->buffer, lb->size);
+ }
+ else
+ {
+ lb->scratch = fz_resize_array(lb->ctx, lb->scratch, newsize, 1);
+ }
+ lb->size = newsize;
+ return lb->scratch - old;
+}
+
int
pdf_lex(fz_stream *f, pdf_lexbuf *buf)
{
@@ -419,8 +462,7 @@ pdf_lex(fz_stream *f, pdf_lexbuf *buf)
lex_name(f, buf);
return PDF_TOK_NAME;
case '(':
- buf->len = lex_string(f, buf->scratch, buf->size);
- return PDF_TOK_STRING;
+ return lex_string(f, buf);
case ')':
fz_warn(f->ctx, "lexical error (unexpected ')')");
continue;
@@ -433,8 +475,7 @@ pdf_lex(fz_stream *f, pdf_lexbuf *buf)
else
{
fz_unread_byte(f);
- buf->len = lex_hex_string(f, buf->scratch, buf->size);
- return PDF_TOK_STRING;
+ return lex_hex_string(f, buf);
}
case '>':
c = fz_read_byte(f);
diff --git a/pdf/pdf_repair.c b/pdf/pdf_repair.c
index 27846855..85709219 100644
--- a/pdf/pdf_repair.c
+++ b/pdf/pdf_repair.c
@@ -151,7 +151,7 @@ pdf_repair_obj_stm(pdf_document *xref, int num, int gen)
fz_var(stm);
- buf.size = PDF_LEXBUF_SMALL;
+ pdf_lexbuf_init(ctx, &buf, PDF_LEXBUF_SMALL);
fz_try(ctx)
{
@@ -188,6 +188,7 @@ pdf_repair_obj_stm(pdf_document *xref, int num, int gen)
fz_always(ctx)
{
fz_close(stm);
+ pdf_lexbuf_fin(&buf);
}
fz_catch(ctx)
{
diff --git a/pdf/pdf_xref.c b/pdf/pdf_xref.c
index 8f51c46b..4da0577c 100644
--- a/pdf/pdf_xref.c
+++ b/pdf/pdf_xref.c
@@ -840,6 +840,8 @@ pdf_close_document(pdf_document *xref)
fz_empty_store(ctx);
+ pdf_lexbuf_fin(&xref->lexbuf.base);
+
fz_free(ctx, xref);
}
@@ -1270,7 +1272,7 @@ pdf_new_document(fz_stream *file)
doc->super.meta = (void*)pdf_meta;
doc->super.interact = (void*)pdf_interact;
- doc->lexbuf.base.size = PDF_LEXBUF_LARGE;
+ pdf_lexbuf_init(ctx, &doc->lexbuf.base, PDF_LEXBUF_LARGE);
doc->file = fz_keep_stream(file);
doc->ctx = ctx;