summaryrefslogtreecommitdiff
path: root/pdf/pdf_function.c
diff options
context:
space:
mode:
authorRobin Watts <robin.watts@artifex.com>2012-06-22 13:26:35 +0100
committerRobin Watts <robin.watts@artifex.com>2012-06-22 16:40:57 +0100
commit01cd1e4dfdc6ab53e79d0a9e548415494851b5ea (patch)
tree409664f785d1795c5796de81d91b420539aab31c /pdf/pdf_function.c
parentf4eb518ea74189d75f6603f49e29bc49068b73b2 (diff)
downloadmupdf-01cd1e4dfdc6ab53e79d0a9e548415494851b5ea.tar.xz
Rework pdf_lexbuf to allow for dynamic parsing buffers.
Currently pdf_lexbufs use a static scratch buffer for parsing. In the main case this is 64K in size, but in other cases it can be just 256 bytes; this causes problems when parsing long strings. Even the 64K limit is an implementation limit of Acrobat, not an architectural limit of PDF. Change here to allow dynamic buffers. This means a slightly more complex setup and destruction for each buffer, but more importantly requires correct cleanup on errors. To avoid having to insert lots more try/catch clauses this commit includes various changes to the code so we reuse pdf_lexbufs where possible. This keeps the speed up.
Diffstat (limited to 'pdf/pdf_function.c')
-rw-r--r--pdf/pdf_function.c35
1 files changed, 16 insertions, 19 deletions
diff --git a/pdf/pdf_function.c b/pdf/pdf_function.c
index 67c34836..9a3bf1c2 100644
--- a/pdf/pdf_function.c
+++ b/pdf/pdf_function.c
@@ -699,20 +699,16 @@ resize_code(fz_context *ctx, pdf_function *func, int newsize)
}
static void
-parse_code(pdf_function *func, fz_stream *stream, int *codeptr)
+parse_code(pdf_function *func, fz_stream *stream, int *codeptr, pdf_lexbuf *buf)
{
- pdf_lexbuf buf;
int tok;
int opptr, elseptr, ifptr;
int a, b, mid, cmp;
fz_context *ctx = stream->ctx;
- buf.size = PDF_LEXBUF_SMALL;
- memset(buf.scratch, 0, sizeof(buf.scratch));
-
while (1)
{
- tok = pdf_lex(stream, &buf);
+ tok = pdf_lex(stream, buf);
/* RJW: "calculator function lexical error" */
switch(tok)
@@ -723,7 +719,7 @@ parse_code(pdf_function *func, fz_stream *stream, int *codeptr)
case PDF_TOK_INT:
resize_code(ctx, func, *codeptr);
func->u.p.code[*codeptr].type = PS_INT;
- func->u.p.code[*codeptr].u.i = buf.i;
+ func->u.p.code[*codeptr].u.i = buf->i;
++*codeptr;
break;
@@ -744,7 +740,7 @@ parse_code(pdf_function *func, fz_stream *stream, int *codeptr)
case PDF_TOK_REAL:
resize_code(ctx, func, *codeptr);
func->u.p.code[*codeptr].type = PS_REAL;
- func->u.p.code[*codeptr].u.f = buf.f;
+ func->u.p.code[*codeptr].u.f = buf->f;
++*codeptr;
break;
@@ -755,19 +751,19 @@ parse_code(pdf_function *func, fz_stream *stream, int *codeptr)
resize_code(ctx, func, *codeptr);
ifptr = *codeptr;
- parse_code(func, stream, codeptr);
+ parse_code(func, stream, codeptr, buf);
/* RJW: "error in 'if' branch" */
- tok = pdf_lex(stream, &buf);
+ tok = pdf_lex(stream, buf);
/* RJW: "calculator function syntax error" */
if (tok == PDF_TOK_OPEN_BRACE)
{
elseptr = *codeptr;
- parse_code(func, stream, codeptr);
+ parse_code(func, stream, codeptr, buf);
/* RJW: "error in 'else' branch" */
- tok = pdf_lex(stream, &buf);
+ tok = pdf_lex(stream, buf);
/* RJW: "calculator function syntax error" */
}
else
@@ -778,7 +774,7 @@ parse_code(pdf_function *func, fz_stream *stream, int *codeptr)
if (tok != PDF_TOK_KEYWORD)
fz_throw(ctx, "missing keyword in 'if-else' context");
- if (!strcmp(buf.scratch, "if"))
+ if (!strcmp(buf->scratch, "if"))
{
if (elseptr >= 0)
fz_throw(ctx, "too many branches for 'if'");
@@ -789,7 +785,7 @@ parse_code(pdf_function *func, fz_stream *stream, int *codeptr)
func->u.p.code[opptr+3].type = PS_BLOCK;
func->u.p.code[opptr+3].u.block = *codeptr;
}
- else if (!strcmp(buf.scratch, "ifelse"))
+ else if (!strcmp(buf->scratch, "ifelse"))
{
if (elseptr < 0)
fz_throw(ctx, "not enough branches for 'ifelse'");
@@ -804,7 +800,7 @@ parse_code(pdf_function *func, fz_stream *stream, int *codeptr)
}
else
{
- fz_throw(ctx, "unknown keyword in 'if-else' context: '%s'", buf.scratch);
+ fz_throw(ctx, "unknown keyword in 'if-else' context: '%s'", buf->scratch);
}
break;
@@ -822,7 +818,7 @@ parse_code(pdf_function *func, fz_stream *stream, int *codeptr)
while (b - a > 1)
{
mid = (a + b) / 2;
- cmp = strcmp(buf.scratch, ps_op_names[mid]);
+ cmp = strcmp(buf->scratch, ps_op_names[mid]);
if (cmp > 0)
a = mid;
else if (cmp < 0)
@@ -831,7 +827,7 @@ parse_code(pdf_function *func, fz_stream *stream, int *codeptr)
a = b = mid;
}
if (cmp != 0)
- fz_throw(ctx, "unknown operator: '%s'", buf.scratch);
+ fz_throw(ctx, "unknown operator: '%s'", buf->scratch);
resize_code(ctx, func, *codeptr);
func->u.p.code[*codeptr].type = PS_OPERATOR;
@@ -855,7 +851,7 @@ load_postscript_func(pdf_function *func, pdf_document *xref, pdf_obj *dict, int
fz_context *ctx = xref->ctx;
int locked = 0;
- buf.size = PDF_LEXBUF_SMALL;
+ pdf_lexbuf_init(ctx, &buf, PDF_LEXBUF_SMALL);
fz_var(stream);
fz_var(locked);
@@ -875,11 +871,12 @@ load_postscript_func(pdf_function *func, pdf_document *xref, pdf_obj *dict, int
func->u.p.cap = 0;
codeptr = 0;
- parse_code(func, stream, &codeptr);
+ parse_code(func, stream, &codeptr, &buf);
}
fz_always(ctx)
{
fz_close(stream);
+ pdf_lexbuf_fin(&buf);
}
fz_catch(ctx)
{