summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--pdf/mupdf.h42
-rw-r--r--pdf/pdf_cmap_parse.c130
-rw-r--r--pdf/pdf_function.c33
-rw-r--r--pdf/pdf_interpret.c63
-rw-r--r--pdf/pdf_lex.c117
-rw-r--r--pdf/pdf_parse.c102
-rw-r--r--pdf/pdf_repair.c47
-rw-r--r--pdf/pdf_xref.c95
8 files changed, 337 insertions, 292 deletions
diff --git a/pdf/mupdf.h b/pdf/mupdf.h
index b233288f..15a96541 100644
--- a/pdf/mupdf.h
+++ b/pdf/mupdf.h
@@ -102,12 +102,40 @@ enum
PDF_NUM_TOKENS
};
-int pdf_lex(fz_stream *f, char *buf, int n, int *len);
+enum
+{
+ PDF_LEXBUF_SMALL = 256,
+ PDF_LEXBUF_LARGE = 65536
+};
+
+
+
+typedef struct pdf_lexbuf_s pdf_lexbuf;
+typedef struct pdf_lexbuf_large_s pdf_lexbuf_large;
+
+struct pdf_lexbuf_s
+{
+ int size;
+ int len;
+ int i;
+ float f;
+ char scratch[PDF_LEXBUF_SMALL];
+};
+
+struct pdf_lexbuf_large_s
+{
+ pdf_lexbuf base;
+ char scratch[PDF_LEXBUF_LARGE - PDF_LEXBUF_SMALL];
+};
+
+
+
+int pdf_lex(fz_stream *f, pdf_lexbuf *lexbuf);
-fz_obj *pdf_parse_array(pdf_document *doc, fz_stream *f, char *buf, int cap);
-fz_obj *pdf_parse_dict(pdf_document *doc, fz_stream *f, char *buf, int cap);
-fz_obj *pdf_parse_stm_obj(pdf_document *doc, fz_stream *f, char *buf, int cap);
-fz_obj *pdf_parse_ind_obj(pdf_document *doc, fz_stream *f, char *buf, int cap, int *num, int *gen, int *stm_ofs);
+fz_obj *pdf_parse_array(pdf_document *doc, fz_stream *f, pdf_lexbuf *buf);
+fz_obj *pdf_parse_dict(pdf_document *doc, fz_stream *f, pdf_lexbuf *buf);
+fz_obj *pdf_parse_stm_obj(pdf_document *doc, fz_stream *f, pdf_lexbuf *buf);
+fz_obj *pdf_parse_ind_obj(pdf_document *doc, fz_stream *f, pdf_lexbuf *buf, int *num, int *gen, int *stm_ofs);
fz_rect pdf_to_rect(fz_context *ctx, fz_obj *array);
fz_matrix pdf_to_matrix(fz_context *ctx, fz_obj *array);
@@ -170,7 +198,7 @@ struct pdf_document_s
fz_obj **page_objs;
fz_obj **page_refs;
- char scratch[65536];
+ pdf_lexbuf_large lexbuf;
};
fz_obj *pdf_resolve_indirect(fz_obj *ref);
@@ -194,7 +222,7 @@ pdf_document *pdf_open_document(fz_context *ctx, const char *filename);
void pdf_close_document(pdf_document *doc);
/* private */
-void pdf_repair_xref(pdf_document *doc, char *buf, int bufsize);
+void pdf_repair_xref(pdf_document *doc, pdf_lexbuf *buf);
void pdf_repair_obj_stms(pdf_document *doc);
void pdf_debug_xref(pdf_document *);
void pdf_resize_xref(pdf_document *doc, int newcap);
diff --git a/pdf/pdf_cmap_parse.c b/pdf/pdf_cmap_parse.c
index fb37c4a9..5c21393e 100644
--- a/pdf/pdf_cmap_parse.c
+++ b/pdf/pdf_cmap_parse.c
@@ -49,14 +49,14 @@ pdf_code_from_string(char *buf, int len)
}
static int
-pdf_lex_cmap(fz_stream *file, char *buf, int n, int *sl)
+pdf_lex_cmap(fz_stream *file, pdf_lexbuf *buf)
{
- int tok = pdf_lex(file, buf, n, sl);
+ int tok = pdf_lex(file, buf);
/* RJW: Lost debugging here: "cannot parse cmap token" */
if (tok == PDF_TOK_KEYWORD)
- tok = pdf_cmap_token_from_keyword(buf);
+ tok = pdf_cmap_token_from_keyword(buf->scratch);
return tok;
}
@@ -64,15 +64,15 @@ pdf_lex_cmap(fz_stream *file, char *buf, int n, int *sl)
static void
pdf_parse_cmap_name(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
{
- char buf[256];
+ pdf_lexbuf buf;
int tok;
- int len;
- tok = pdf_lex_cmap(file, buf, sizeof buf, &len);
+ buf.size = PDF_LEXBUF_SMALL;
+ tok = pdf_lex_cmap(file, &buf);
/* RJW: Lost debugging: "syntaxerror in cmap" */
if (tok == PDF_TOK_NAME)
- fz_strlcpy(cmap->cmap_name, buf, sizeof(cmap->cmap_name));
+ fz_strlcpy(cmap->cmap_name, buf.scratch, sizeof(cmap->cmap_name));
else
fz_warn(ctx, "expected name after CMapName in cmap");
}
@@ -80,15 +80,15 @@ pdf_parse_cmap_name(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
static void
pdf_parse_wmode(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
{
- char buf[256];
+ pdf_lexbuf buf;
int tok;
- int len;
- tok = pdf_lex_cmap(file, buf, sizeof buf, &len);
+ buf.size = PDF_LEXBUF_SMALL;
+ tok = pdf_lex_cmap(file, &buf);
/* RJW: Lost debugging: "syntaxerror in cmap" */
if (tok == PDF_TOK_INT)
- pdf_set_wmode(ctx, cmap, atoi(buf));
+ pdf_set_wmode(ctx, cmap, buf.i);
else
fz_warn(ctx, "expected integer after WMode in cmap");
}
@@ -96,14 +96,14 @@ pdf_parse_wmode(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
static void
pdf_parse_codespace_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
{
- char buf[256];
+ pdf_lexbuf buf;
int tok;
- int len;
int lo, hi;
+ buf.size = PDF_LEXBUF_SMALL;
while (1)
{
- tok = pdf_lex_cmap(file, buf, sizeof buf, &len);
+ tok = pdf_lex_cmap(file, &buf);
/* RJW: Lost debugging: "syntaxerror in cmap" */
if (tok == TOK_END_CODESPACE_RANGE)
@@ -111,13 +111,13 @@ pdf_parse_codespace_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
else if (tok == PDF_TOK_STRING)
{
- lo = pdf_code_from_string(buf, len);
- tok = pdf_lex_cmap(file, buf, sizeof buf, &len);
+ lo = pdf_code_from_string(buf.scratch, buf.len);
+ tok = pdf_lex_cmap(file, &buf);
/* RJW: Lost debugging: "syntaxerror in cmap" */
if (tok == PDF_TOK_STRING)
{
- hi = pdf_code_from_string(buf, len);
- pdf_add_codespace(ctx, cmap, lo, hi, len);
+ hi = pdf_code_from_string(buf.scratch, buf.len);
+ pdf_add_codespace(ctx, cmap, lo, hi, buf.len);
}
else break;
}
@@ -131,14 +131,14 @@ pdf_parse_codespace_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
static void
pdf_parse_cid_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
{
- char buf[256];
+ pdf_lexbuf buf;
int tok;
- int len;
int lo, hi, dst;
+ buf.size = PDF_LEXBUF_SMALL;
while (1)
{
- tok = pdf_lex_cmap(file, buf, sizeof buf, &len);
+ tok = pdf_lex_cmap(file, &buf);
/* RJW: Lost debugging: "syntaxerror in cmap" */
if (tok == TOK_END_CID_RANGE)
@@ -147,21 +147,21 @@ pdf_parse_cid_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
else if (tok != PDF_TOK_STRING)
fz_throw(ctx, "expected string or endcidrange");
- lo = pdf_code_from_string(buf, len);
+ lo = pdf_code_from_string(buf.scratch, buf.len);
- tok = pdf_lex_cmap(file, buf, sizeof buf, &len);
+ tok = pdf_lex_cmap(file, &buf);
/* RJW: Lost debugging: "syntaxerror in cmap" */
if (tok != PDF_TOK_STRING)
fz_throw(ctx, "expected string");
- hi = pdf_code_from_string(buf, len);
+ hi = pdf_code_from_string(buf.scratch, buf.len);
- tok = pdf_lex_cmap(file, buf, sizeof buf, &len);
+ tok = pdf_lex_cmap(file, &buf);
/* RJW: Lost debugging: "syntaxerror in cmap" */
if (tok != PDF_TOK_INT)
fz_throw(ctx, "expected integer");
- dst = atoi(buf);
+ dst = buf.i;
pdf_map_range_to_range(ctx, cmap, lo, hi, dst);
}
@@ -170,14 +170,14 @@ pdf_parse_cid_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
static void
pdf_parse_cid_char(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
{
- char buf[256];
+ pdf_lexbuf buf;
int tok;
- int len;
int src, dst;
+ buf.size = PDF_LEXBUF_SMALL;
while (1)
{
- tok = pdf_lex_cmap(file, buf, sizeof buf, &len);
+ tok = pdf_lex_cmap(file, &buf);
/* RJW: "syntaxerror in cmap" */
if (tok == TOK_END_CID_CHAR)
@@ -186,15 +186,15 @@ pdf_parse_cid_char(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
else if (tok != PDF_TOK_STRING)
fz_throw(ctx, "expected string or endcidchar");
- src = pdf_code_from_string(buf, len);
+ src = pdf_code_from_string(buf.scratch, buf.len);
- tok = pdf_lex_cmap(file, buf, sizeof buf, &len);
+ tok = pdf_lex_cmap(file, &buf);
/* RJW: "syntaxerror in cmap" */
if (tok != PDF_TOK_INT)
fz_throw(ctx, "expected integer");
- dst = atoi(buf);
+ dst = buf.i;
pdf_map_range_to_range(ctx, cmap, src, src, dst);
}
@@ -203,15 +203,15 @@ pdf_parse_cid_char(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
static void
pdf_parse_bf_range_array(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, int lo, int hi)
{
- char buf[256];
+ pdf_lexbuf buf;
int tok;
- int len;
int dst[256];
int i;
+ buf.size = PDF_LEXBUF_SMALL;
while (1)
{
- tok = pdf_lex_cmap(file, buf, sizeof buf, &len);
+ tok = pdf_lex_cmap(file, &buf);
/* RJW: "syntaxerror in cmap" */
if (tok == PDF_TOK_CLOSE_ARRAY)
@@ -221,12 +221,12 @@ pdf_parse_bf_range_array(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, int l
else if (tok != PDF_TOK_STRING)
fz_throw(ctx, "expected string or ]");
- if (len / 2)
+ if (buf.len / 2)
{
- for (i = 0; i < len / 2; i++)
- dst[i] = pdf_code_from_string(buf + i * 2, 2);
+ for (i = 0; i < buf.len / 2; i++)
+ dst[i] = pdf_code_from_string(&buf.scratch[i * 2], 2);
- pdf_map_one_to_many(ctx, cmap, lo, dst, len / 2);
+ pdf_map_one_to_many(ctx, cmap, lo, dst, buf.len / 2);
}
lo ++;
@@ -236,14 +236,14 @@ pdf_parse_bf_range_array(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, int l
static void
pdf_parse_bf_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
{
- char buf[256];
+ pdf_lexbuf buf;
int tok;
- int len;
int lo, hi, dst;
+ buf.size = PDF_LEXBUF_SMALL;
while (1)
{
- tok = pdf_lex_cmap(file, buf, sizeof buf, &len);
+ tok = pdf_lex_cmap(file, &buf);
/* RJW: "syntaxerror in cmap" */
if (tok == TOK_END_BF_RANGE)
@@ -252,23 +252,23 @@ pdf_parse_bf_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
else if (tok != PDF_TOK_STRING)
fz_throw(ctx, "expected string or endbfrange");
- lo = pdf_code_from_string(buf, len);
+ lo = pdf_code_from_string(buf.scratch, buf.len);
- tok = pdf_lex_cmap(file, buf, sizeof buf, &len);
+ tok = pdf_lex_cmap(file, &buf);
/* RJW: "syntaxerror in cmap" */
if (tok != PDF_TOK_STRING)
fz_throw(ctx, "expected string");
- hi = pdf_code_from_string(buf, len);
+ hi = pdf_code_from_string(buf.scratch, buf.len);
- tok = pdf_lex_cmap(file, buf, sizeof buf, &len);
+ tok = pdf_lex_cmap(file, &buf);
/* RJW: "syntaxerror in cmap" */
if (tok == PDF_TOK_STRING)
{
- if (len == 2)
+ if (buf.len == 2)
{
- dst = pdf_code_from_string(buf, len);
+ dst = pdf_code_from_string(buf.scratch, buf.len);
pdf_map_range_to_range(ctx, cmap, lo, hi, dst);
}
else
@@ -276,10 +276,10 @@ pdf_parse_bf_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
int dststr[256];
int i;
- if (len / 2)
+ if (buf.len / 2)
{
- for (i = 0; i < len / 2; i++)
- dststr[i] = pdf_code_from_string(buf + i * 2, 2);
+ for (i = 0; i < buf.len / 2; i++)
+ dststr[i] = pdf_code_from_string(&buf.scratch[i * 2], 2);
while (lo <= hi)
{
@@ -307,16 +307,16 @@ pdf_parse_bf_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
static void
pdf_parse_bf_char(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
{
- char buf[256];
+ pdf_lexbuf buf;
int tok;
- int len;
int dst[256];
int src;
int i;
+ buf.size = PDF_LEXBUF_SMALL;
while (1)
{
- tok = pdf_lex_cmap(file, buf, sizeof buf, &len);
+ tok = pdf_lex_cmap(file, &buf);
/* RJW: "syntaxerror in cmap" */
if (tok == TOK_END_BF_CHAR)
@@ -325,18 +325,18 @@ pdf_parse_bf_char(fz_context *ctx, pdf_cmap *cmap, fz_stream *file)
else if (tok != PDF_TOK_STRING)
fz_throw(ctx, "expected string or endbfchar");
- src = pdf_code_from_string(buf, len);
+ src = pdf_code_from_string(buf.scratch, buf.len);
- tok = pdf_lex_cmap(file, buf, sizeof buf, &len);
+ tok = pdf_lex_cmap(file, &buf);
/* RJW: "syntaxerror in cmap" */
/* Note: does not handle /dstName */
if (tok != PDF_TOK_STRING)
fz_throw(ctx, "expected string");
- if (len / 2)
+ if (buf.len / 2)
{
- for (i = 0; i < len / 2; i++)
- dst[i] = pdf_code_from_string(buf + i * 2, 2);
+ for (i = 0; i < buf.len / 2; i++)
+ dst[i] = pdf_code_from_string(&buf.scratch[i * 2], 2);
pdf_map_one_to_many(ctx, cmap, src, dst, i);
}
}
@@ -347,11 +347,11 @@ pdf_load_cmap(fz_context *ctx, fz_stream *file)
{
pdf_cmap *cmap;
char key[64];
- char buf[256];
+ pdf_lexbuf buf;
int tok;
- int len;
const char *where;
+ buf.size = PDF_LEXBUF_SMALL;
cmap = pdf_new_cmap(ctx);
strcpy(key, ".notdef");
@@ -363,25 +363,25 @@ pdf_load_cmap(fz_context *ctx, fz_stream *file)
while (1)
{
where = "";
- tok = pdf_lex_cmap(file, buf, sizeof buf, &len);
+ tok = pdf_lex_cmap(file, &buf);
if (tok == PDF_TOK_EOF || tok == TOK_END_CMAP)
break;
else if (tok == PDF_TOK_NAME)
{
- if (!strcmp(buf, "CMapName"))
+ if (!strcmp(buf.scratch, "CMapName"))
{
where = " after CMapName";
pdf_parse_cmap_name(ctx, cmap, file);
}
- else if (!strcmp(buf, "WMode"))
+ else if (!strcmp(buf.scratch, "WMode"))
{
where = " after WMode";
pdf_parse_wmode(ctx, cmap, file);
}
else
- fz_strlcpy(key, buf, sizeof key);
+ fz_strlcpy(key, buf.scratch, sizeof key);
}
else if (tok == TOK_USECMAP)
diff --git a/pdf/pdf_function.c b/pdf/pdf_function.c
index 4478827c..17373f42 100644
--- a/pdf/pdf_function.c
+++ b/pdf/pdf_function.c
@@ -683,18 +683,18 @@ resize_code(fz_context *ctx, pdf_function *func, int newsize)
static void
parse_code(pdf_function *func, fz_stream *stream, int *codeptr)
{
- char buf[64];
- int len;
+ pdf_lexbuf buf;
int tok;
int opptr, elseptr, ifptr;
int a, b, mid, cmp;
fz_context *ctx = stream->ctx;
- memset(buf, 0, sizeof(buf));
+ buf.size = PDF_LEXBUF_SMALL;
+ memset(buf.scratch, 0, sizeof(buf.scratch));
while (1)
{
- tok = pdf_lex(stream, buf, sizeof buf, &len);
+ tok = pdf_lex(stream, &buf);
/* RJW: "calculator function lexical error" */
switch(tok)
@@ -705,7 +705,7 @@ parse_code(pdf_function *func, fz_stream *stream, int *codeptr)
case PDF_TOK_INT:
resize_code(ctx, func, *codeptr);
func->u.p.code[*codeptr].type = PS_INT;
- func->u.p.code[*codeptr].u.i = atoi(buf);
+ func->u.p.code[*codeptr].u.i = buf.i;
++*codeptr;
break;
@@ -726,7 +726,7 @@ parse_code(pdf_function *func, fz_stream *stream, int *codeptr)
case PDF_TOK_REAL:
resize_code(ctx, func, *codeptr);
func->u.p.code[*codeptr].type = PS_REAL;
- func->u.p.code[*codeptr].u.f = fz_atof(buf);
+ func->u.p.code[*codeptr].u.f = buf.f;
++*codeptr;
break;
@@ -740,7 +740,7 @@ parse_code(pdf_function *func, fz_stream *stream, int *codeptr)
parse_code(func, stream, codeptr);
/* RJW: "error in 'if' branch" */
- tok = pdf_lex(stream, buf, sizeof buf, &len);
+ tok = pdf_lex(stream, &buf);
/* RJW: "calculator function syntax error" */
if (tok == PDF_TOK_OPEN_BRACE)
@@ -749,7 +749,7 @@ parse_code(pdf_function *func, fz_stream *stream, int *codeptr)
parse_code(func, stream, codeptr);
/* RJW: "error in 'else' branch" */
- tok = pdf_lex(stream, buf, sizeof buf, &len);
+ tok = pdf_lex(stream, &buf);
/* RJW: "calculator function syntax error" */
}
else
@@ -760,7 +760,7 @@ parse_code(pdf_function *func, fz_stream *stream, int *codeptr)
if (tok != PDF_TOK_KEYWORD)
fz_throw(ctx, "missing keyword in 'if-else' context");
- if (!strcmp(buf, "if"))
+ if (!strcmp(buf.scratch, "if"))
{
if (elseptr >= 0)
fz_throw(ctx, "too many branches for 'if'");
@@ -771,7 +771,7 @@ parse_code(pdf_function *func, fz_stream *stream, int *codeptr)
func->u.p.code[opptr+3].type = PS_BLOCK;
func->u.p.code[opptr+3].u.block = *codeptr;
}
- else if (!strcmp(buf, "ifelse"))
+ else if (!strcmp(buf.scratch, "ifelse"))
{
if (elseptr < 0)
fz_throw(ctx, "not enough branches for 'ifelse'");
@@ -786,7 +786,7 @@ parse_code(pdf_function *func, fz_stream *stream, int *codeptr)
}
else
{
- fz_throw(ctx, "unknown keyword in 'if-else' context: '%s'", buf);
+ fz_throw(ctx, "unknown keyword in 'if-else' context: '%s'", buf.scratch);
}
break;
@@ -804,7 +804,7 @@ parse_code(pdf_function *func, fz_stream *stream, int *codeptr)
while (b - a > 1)
{
mid = (a + b) / 2;
- cmp = strcmp(buf, ps_op_names[mid]);
+ cmp = strcmp(buf.scratch, ps_op_names[mid]);
if (cmp > 0)
a = mid;
else if (cmp < 0)
@@ -813,7 +813,7 @@ parse_code(pdf_function *func, fz_stream *stream, int *codeptr)
a = b = mid;
}
if (cmp != 0)
- fz_throw(ctx, "unknown operator: '%s'", buf);
+ fz_throw(ctx, "unknown operator: '%s'", buf.scratch);
resize_code(ctx, func, *codeptr);
func->u.p.code[*codeptr].type = PS_OPERATOR;
@@ -832,12 +832,13 @@ load_postscript_func(pdf_function *func, pdf_document *xref, fz_obj *dict, int n
{
fz_stream *stream = NULL;
int codeptr;
- char buf[64];
+ pdf_lexbuf buf;
int tok;
- int len;
fz_context *ctx = xref->ctx;
int locked = 0;
+ buf.size = PDF_LEXBUF_SMALL;
+
fz_var(stream);
fz_var(locked);
@@ -846,7 +847,7 @@ load_postscript_func(pdf_function *func, pdf_document *xref, fz_obj *dict, int n
stream = pdf_open_stream(xref, num, gen);
/* RJW: "cannot open calculator function stream" */
- tok = pdf_lex(stream, buf, sizeof buf, &len);
+ tok = pdf_lex(stream, &buf);
if (tok != PDF_TOK_OPEN_BRACE)
{
fz_throw(ctx, "stream is not a calculator function");
diff --git a/pdf/pdf_interpret.c b/pdf/pdf_interpret.c
index effea657..1cfe6a96 100644
--- a/pdf/pdf_interpret.c
+++ b/pdf/pdf_interpret.c
@@ -819,6 +819,7 @@ pdf_show_string(pdf_csi *csi, unsigned char *buf, int len)
{
int w = pdf_decode_cmap(fontdesc->encoding, buf, &cpt);
buf += w;
+
cid = pdf_lookup_cmap(fontdesc->encoding, cpt);
if (cid >= 0)
pdf_show_char(csi, cid);
@@ -1625,12 +1626,10 @@ static void pdf_run_BI(pdf_csi *csi, fz_obj *rdb, fz_stream *file)
{
fz_context *ctx = csi->dev->ctx;
int ch;
- char *buf = csi->xref->scratch;
- int buflen = sizeof(csi->xref->scratch);
fz_image *img;
fz_obj *obj;
- obj = pdf_parse_dict(csi->xref, file, buf, buflen);
+ obj = pdf_parse_dict(csi->xref, file, &csi->xref->lexbuf.base);
/* RJW: "cannot parse inline image dictionary" */
/* read whitespace after ID keyword */
@@ -2523,10 +2522,10 @@ pdf_run_keyword(pdf_csi *csi, fz_obj *rdb, fz_stream *file, char *buf)
}
static void
-pdf_run_stream(pdf_csi *csi, fz_obj *rdb, fz_stream *file, char *buf, int buflen)
+pdf_run_stream(pdf_csi *csi, fz_obj *rdb, fz_stream *file, pdf_lexbuf *buf)
{
fz_context *ctx = csi->dev->ctx;
- int tok, len, in_array;
+ int tok, in_array;
/* make sure we have a clean slate if we come here from flush_text */
pdf_clear_stack(csi);
@@ -2551,7 +2550,7 @@ pdf_run_stream(pdf_csi *csi, fz_obj *rdb, fz_stream *file, char *buf, int buflen
csi->cookie->progress++;
}
- tok = pdf_lex(file, buf, buflen, &len);
+ tok = pdf_lex(file, buf);
/* RJW: "lexical error in content stream" */
if (in_array)
@@ -2560,19 +2559,24 @@ pdf_run_stream(pdf_csi *csi, fz_obj *rdb, fz_stream *file, char *buf, int buflen
{
in_array = 0;
}
- else if (tok == PDF_TOK_INT || tok == PDF_TOK_REAL)
+ else if (tok == PDF_TOK_REAL)
{
pdf_gstate *gstate = csi->gstate + csi->gtop;
- pdf_show_space(csi, -fz_atof(buf) * gstate->size * 0.001f);
+ pdf_show_space(csi, -buf->f * gstate->size * 0.001f);
+ }
+ else if (tok == PDF_TOK_INT)
+ {
+ pdf_gstate *gstate = csi->gstate + csi->gtop;
+ pdf_show_space(csi, -buf->i * gstate->size * 0.001f);
}
else if (tok == PDF_TOK_STRING)
{
- pdf_show_string(csi, (unsigned char *)buf, len);
+ pdf_show_string(csi, (unsigned char *)buf->scratch, buf->len);
}
else if (tok == PDF_TOK_KEYWORD)
{
- if (!strcmp(buf, "Tw") || !strcmp(buf, "Tc"))
- fz_warn(ctx, "ignoring keyword '%s' inside array", buf);
+ if (!strcmp(buf->scratch, "Tw") || !strcmp(buf->scratch, "Tc"))
+ fz_warn(ctx, "ignoring keyword '%s' inside array", buf->scratch);
else
fz_throw(ctx, "syntax error in array");
}
@@ -2591,7 +2595,7 @@ pdf_run_stream(pdf_csi *csi, fz_obj *rdb, fz_stream *file, char *buf, int buflen
case PDF_TOK_OPEN_ARRAY:
if (!csi->in_text)
{
- csi->obj = pdf_parse_array(csi->xref, file, buf, buflen);
+ csi->obj = pdf_parse_array(csi->xref, file, buf);
/* RJW: "cannot parse array" */
}
else
@@ -2601,38 +2605,38 @@ pdf_run_stream(pdf_csi *csi, fz_obj *rdb, fz_stream *file, char *buf, int buflen
break;
case PDF_TOK_OPEN_DICT:
- csi->obj = pdf_parse_dict(csi->xref, file, buf, buflen);
+ csi->obj = pdf_parse_dict(csi->xref, file, buf);
/* RJW: "cannot parse dictionary" */
break;
case PDF_TOK_NAME:
- fz_strlcpy(csi->name, buf, sizeof(csi->name));
+ fz_strlcpy(csi->name, buf->scratch, sizeof(csi->name));
break;
case PDF_TOK_INT:
- csi->stack[csi->top] = atoi(buf);
+ csi->stack[csi->top] = buf->i;
csi->top ++;
break;
case PDF_TOK_REAL:
- csi->stack[csi->top] = fz_atof(buf);
+ csi->stack[csi->top] = buf->f;
csi->top ++;
break;
case PDF_TOK_STRING:
- if (len <= sizeof(csi->string))
+ if (buf->len <= sizeof(csi->string))
{
- memcpy(csi->string, buf, len);
- csi->string_len = len;
+ memcpy(csi->string, buf->scratch, buf->len);
+ csi->string_len = buf->len;
}
else
{
- csi->obj = fz_new_string(ctx, buf, len);
+ csi->obj = fz_new_string(ctx, buf->scratch, buf->len);
}
break;
case PDF_TOK_KEYWORD:
- pdf_run_keyword(csi, rdb, file, buf);
+ pdf_run_keyword(csi, rdb, file, buf->scratch);
/* RJW: "cannot run keyword" */
pdf_clear_stack(csi);
break;
@@ -2651,8 +2655,7 @@ static void
pdf_run_buffer(pdf_csi *csi, fz_obj *rdb, fz_buffer *contents)
{
fz_context *ctx = csi->dev->ctx;
- int len = sizeof csi->xref->scratch;
- char *buf = NULL;
+ pdf_lexbuf_large *buf;
fz_stream * file = NULL;
int save_in_text;
@@ -2664,13 +2667,14 @@ pdf_run_buffer(pdf_csi *csi, fz_obj *rdb, fz_buffer *contents)
fz_try(ctx)
{
- buf = fz_malloc(ctx, len); /* we must be re-entrant for type3 fonts */
+ buf = fz_malloc(ctx, sizeof(*buf)); /* we must be re-entrant for type3 fonts */
+ buf->base.size = PDF_LEXBUF_LARGE;
file = fz_open_buffer(ctx, contents);
save_in_text = csi->in_text;
csi->in_text = 0;
fz_try(ctx)
{
- pdf_run_stream(csi, rdb, file, buf, len);
+ pdf_run_stream(csi, rdb, file, &buf->base);
}
fz_catch(ctx)
{
@@ -2678,14 +2682,15 @@ pdf_run_buffer(pdf_csi *csi, fz_obj *rdb, fz_buffer *contents)
}
csi->in_text = save_in_text;
}
- fz_catch(ctx)
- {
+ fz_always(ctx)
+ {
fz_close(file);
fz_free(ctx, buf);
+ }
+ fz_catch(ctx)
+ {
fz_throw(ctx, "cannot parse context stream");
}
- fz_close(file);
- fz_free(ctx, buf);
}
void
diff --git a/pdf/pdf_lex.c b/pdf/pdf_lex.c
index 24828412..322d945c 100644
--- a/pdf/pdf_lex.c
+++ b/pdf/pdf_lex.c
@@ -63,87 +63,106 @@ lex_comment(fz_stream *f)
}
static int
-lex_number(fz_stream *f, char *s, int n, int *tok)
+lex_number(fz_stream *f, pdf_lexbuf *buf, int c)
{
- char *buf = s;
- *tok = PDF_TOK_INT;
+ int neg = 0;
+ int i = 0;
+ int n;
+ int d;
+ float v;
/* Initially we might have +, -, . or a digit */
- if (n > 1)
+ switch (c)
+ {
+ case '.':
+ goto loop_after_dot;
+ case '-':
+ neg = 1;
+ break;
+ case '+':
+ break;
+ default: /* Must be a digit */
+ i = c - '0';
+ break;
+ }
+
+ while (1)
{
int c = fz_read_byte(f);
switch (c)
{
case '.':
- *tok = PDF_TOK_REAL;
- *s++ = c;
- n--;
goto loop_after_dot;
- case '+':
- case '-':
case RANGE_0_9:
- *s++ = c;
- n--;
- goto loop_after_sign;
+ i = 10*i + c - '0';
+ /* FIXME: Need overflow check here; do we care? */
+ break;
default:
fz_unread_byte(f);
- goto end;
+ /* Fallthrough */
case EOF:
- goto end;
+ if (neg)
+ i = -i;
+ buf->i = i;
+ return PDF_TOK_INT;
}
}
- /* We can't accept a sign from here on in, just . or a digit */
-loop_after_sign:
- while (n > 1)
+ /* In here, we've seen a dot, so can accept just digits */
+loop_after_dot:
+ n = 0;
+ d = 1;
+ while (1)
{
int c = fz_read_byte(f);
switch (c)
{
- case '.':
- *tok = PDF_TOK_REAL;
- *s++ = c;
- n--;
- goto loop_after_dot;
case RANGE_0_9:
- *s++ = c;
+ if (d >= INT_MAX/10)
+ goto underflow;
+ n = n*10 + (c - '0');
+ d *= 10;
break;
default:
fz_unread_byte(f);
- goto end;
+ /* Fallthrough */
case EOF:
- goto end;
+ v = (float)i + ((float)n / (float)d);
+ if (neg)
+ v = -v;
+ buf->f = v;
+ return PDF_TOK_REAL;
}
- n--;
}
- /* In here, we've seen a dot, so can accept just digits */
-loop_after_dot:
- while (n > 1)
+underflow:
+ /* Ignore any digits after here, because they are too small */
+ while (1)
{
int c = fz_read_byte(f);
switch (c)
{
case RANGE_0_9:
- *s++ = c;
break;
default:
fz_unread_byte(f);
- goto end;
+ /* Fallthrough */
case EOF:
- goto end;
+ v = (float)i + ((float)n / (float)d);
+ if (neg)
+ v = -v;
+ buf->f = v;
+ return PDF_TOK_REAL;
}
- n--;
}
-
-end:
- *s = '\0';
- return s-buf;
}
static void
-lex_name(fz_stream *f, char *s, int n)
+lex_name(fz_stream *f, pdf_lexbuf *buf)
{
+ char *s = buf->scratch;
+ int n = buf->size;
+
while (n > 1)
{
int c = fz_read_byte(f);
@@ -208,6 +227,7 @@ lex_name(fz_stream *f, char *s, int n)
}
end:
*s = '\0';
+ buf->len = s - buf->scratch;
}
static int
@@ -380,7 +400,7 @@ pdf_token_from_keyword(char *key)
}
int
-pdf_lex(fz_stream *f, char *buf, int n, int *sl)
+pdf_lex(fz_stream *f, pdf_lexbuf *buf)
{
while (1)
{
@@ -396,11 +416,10 @@ pdf_lex(fz_stream *f, char *buf, int n, int *sl)
lex_comment(f);
break;
case '/':
- lex_name(f, buf, n);
- *sl = strlen(buf);
+ lex_name(f, buf);
return PDF_TOK_NAME;
case '(':
- *sl = lex_string(f, buf, n);
+ buf->len = lex_string(f, buf->scratch, buf->size);
return PDF_TOK_STRING;
case ')':
fz_warn(f->ctx, "lexical error (unexpected ')')");
@@ -414,7 +433,7 @@ pdf_lex(fz_stream *f, char *buf, int n, int *sl)
else
{
fz_unread_byte(f);
- *sl = lex_hex_string(f, buf, n);
+ buf->len = lex_hex_string(f, buf->scratch, buf->size);
return PDF_TOK_STRING;
}
case '>':
@@ -434,17 +453,11 @@ pdf_lex(fz_stream *f, char *buf, int n, int *sl)
case '}':
return PDF_TOK_CLOSE_BRACE;
case IS_NUMBER:
- {
- int tok;
- fz_unread_byte(f);
- *sl = lex_number(f, buf, n, &tok);
- return tok;
- }
+ return lex_number(f, buf, c);
default: /* isregular: !isdelim && !iswhite && c != EOF */
fz_unread_byte(f);
- lex_name(f, buf, n);
- *sl = strlen(buf);
- return pdf_token_from_keyword(buf);
+ lex_name(f, buf);
+ return pdf_token_from_keyword(buf->scratch);
}
}
}
diff --git a/pdf/pdf_parse.c b/pdf/pdf_parse.c
index 220eb30c..fb6cb7ef 100644
--- a/pdf/pdf_parse.c
+++ b/pdf/pdf_parse.c
@@ -171,13 +171,12 @@ pdf_to_utf8_name(fz_context *ctx, fz_obj *src)
}
fz_obj *
-pdf_parse_array(pdf_document *xref, fz_stream *file, char *buf, int cap)
+pdf_parse_array(pdf_document *xref, fz_stream *file, pdf_lexbuf *buf)
{
fz_obj *ary = NULL;
fz_obj *obj = NULL;
int a = 0, b = 0, n = 0;
int tok;
- int len;
fz_context *ctx = file->ctx;
fz_obj *op;
@@ -189,7 +188,7 @@ pdf_parse_array(pdf_document *xref, fz_stream *file, char *buf, int cap)
{
while (1)
{
- tok = pdf_lex(file, buf, cap, &len);
+ tok = pdf_lex(file, buf);
if (tok != PDF_TOK_INT && tok != PDF_TOK_R)
{
@@ -228,9 +227,9 @@ pdf_parse_array(pdf_document *xref, fz_stream *file, char *buf, int cap)
case PDF_TOK_INT:
if (n == 0)
- a = atoi(buf);
+ a = buf->i;
if (n == 1)
- b = atoi(buf);
+ b = buf->i;
n ++;
break;
@@ -245,33 +244,33 @@ pdf_parse_array(pdf_document *xref, fz_stream *file, char *buf, int cap)
break;
case PDF_TOK_OPEN_ARRAY:
- obj = pdf_parse_array(xref, file, buf, cap);
+ obj = pdf_parse_array(xref, file, buf);
fz_array_push(ary, obj);
fz_drop_obj(obj);
obj = NULL;
break;
case PDF_TOK_OPEN_DICT:
- obj = pdf_parse_dict(xref, file, buf, cap);
+ obj = pdf_parse_dict(xref, file, buf);
fz_array_push(ary, obj);
fz_drop_obj(obj);
obj = NULL;
break;
case PDF_TOK_NAME:
- obj = fz_new_name(ctx, buf);
+ obj = fz_new_name(ctx, buf->scratch);
fz_array_push(ary, obj);
fz_drop_obj(obj);
obj = NULL;
break;
case PDF_TOK_REAL:
- obj = fz_new_real(ctx, fz_atof(buf));
+ obj = fz_new_real(ctx, buf->f);
fz_array_push(ary, obj);
fz_drop_obj(obj);
obj = NULL;
break;
case PDF_TOK_STRING:
- obj = fz_new_string(ctx, buf, len);
+ obj = fz_new_string(ctx, buf->scratch, buf->len);
fz_array_push(ary, obj);
fz_drop_obj(obj);
obj = NULL;
@@ -312,13 +311,12 @@ end:
}
fz_obj *
-pdf_parse_dict(pdf_document *xref, fz_stream *file, char *buf, int cap)
+pdf_parse_dict(pdf_document *xref, fz_stream *file, pdf_lexbuf *buf)
{
fz_obj *dict = NULL;
fz_obj *key = NULL;
fz_obj *val = NULL;
int tok;
- int len;
int a, b;
fz_context *ctx = file->ctx;
@@ -332,45 +330,45 @@ pdf_parse_dict(pdf_document *xref, fz_stream *file, char *buf, int cap)
{
while (1)
{
- tok = pdf_lex(file, buf, cap, &len);
+ tok = pdf_lex(file, buf);
skip:
if (tok == PDF_TOK_CLOSE_DICT)
break;
/* for BI .. ID .. EI in content streams */
- if (tok == PDF_TOK_KEYWORD && !strcmp(buf, "ID"))
+ if (tok == PDF_TOK_KEYWORD && !strcmp(buf->scratch, "ID"))
break;
if (tok != PDF_TOK_NAME)
fz_throw(ctx, "invalid key in dict");
- key = fz_new_name(ctx, buf);
+ key = fz_new_name(ctx, buf->scratch);
- tok = pdf_lex(file, buf, cap, &len);
+ tok = pdf_lex(file, buf);
switch (tok)
{
case PDF_TOK_OPEN_ARRAY:
- val = pdf_parse_array(xref, file, buf, cap);
+ val = pdf_parse_array(xref, file, buf);
break;
case PDF_TOK_OPEN_DICT:
- val = pdf_parse_dict(xref, file, buf, cap);
+ val = pdf_parse_dict(xref, file, buf);
break;
- case PDF_TOK_NAME: val = fz_new_name(ctx, buf); break;
- case PDF_TOK_REAL: val = fz_new_real(ctx, fz_atof(buf)); break;
- case PDF_TOK_STRING: val = fz_new_string(ctx, buf, len); break;
+ case PDF_TOK_NAME: val = fz_new_name(ctx, buf->scratch); break;
+ case PDF_TOK_REAL: val = fz_new_real(ctx, buf->f); break;
+ case PDF_TOK_STRING: val = fz_new_string(ctx, buf->scratch, buf->len); break;
case PDF_TOK_TRUE: val = fz_new_bool(ctx, 1); break;
case PDF_TOK_FALSE: val = fz_new_bool(ctx, 0); break;
case PDF_TOK_NULL: val = fz_new_null(ctx); break;
case PDF_TOK_INT:
/* 64-bit to allow for numbers > INT_MAX and overflow */
- a = (int) strtoll(buf, 0, 10);
- tok = pdf_lex(file, buf, cap, &len);
+ a = buf->i;
+ tok = pdf_lex(file, buf);
if (tok == PDF_TOK_CLOSE_DICT || tok == PDF_TOK_NAME ||
- (tok == PDF_TOK_KEYWORD && !strcmp(buf, "ID")))
+ (tok == PDF_TOK_KEYWORD && !strcmp(buf->scratch, "ID")))
{
val = fz_new_int(ctx, a);
fz_dict_put(dict, key, val);
@@ -382,8 +380,8 @@ pdf_parse_dict(pdf_document *xref, fz_stream *file, char *buf, int cap)
}
if (tok == PDF_TOK_INT)
{
- b = atoi(buf);
- tok = pdf_lex(file, buf, cap, &len);
+ b = buf->i;
+ tok = pdf_lex(file, buf);
if (tok == PDF_TOK_R)
{
val = fz_new_indirect(ctx, a, b, xref);
@@ -414,30 +412,29 @@ pdf_parse_dict(pdf_document *xref, fz_stream *file, char *buf, int cap)
}
fz_obj *
-pdf_parse_stm_obj(pdf_document *xref, fz_stream *file, char *buf, int cap)
+pdf_parse_stm_obj(pdf_document *xref, fz_stream *file, pdf_lexbuf *buf)
{
int tok;
- int len;
fz_context *ctx = file->ctx;
- tok = pdf_lex(file, buf, cap, &len);
+ tok = pdf_lex(file, buf);
/* RJW: "cannot parse token in object stream") */
switch (tok)
{
case PDF_TOK_OPEN_ARRAY:
- return pdf_parse_array(xref, file, buf, cap);
+ return pdf_parse_array(xref, file, buf);
/* RJW: "cannot parse object stream" */
case PDF_TOK_OPEN_DICT:
- return pdf_parse_dict(xref, file, buf, cap);
+ return pdf_parse_dict(xref, file, buf);
/* RJW: "cannot parse object stream" */
- case PDF_TOK_NAME: return fz_new_name(ctx, buf); break;
- case PDF_TOK_REAL: return fz_new_real(ctx, fz_atof(buf)); break;
- case PDF_TOK_STRING: return fz_new_string(ctx, buf, len); break;
+ case PDF_TOK_NAME: return fz_new_name(ctx, buf->scratch); break;
+ case PDF_TOK_REAL: return fz_new_real(ctx, buf->f); break;
+ case PDF_TOK_STRING: return fz_new_string(ctx, buf->scratch, buf->len); break;
case PDF_TOK_TRUE: return fz_new_bool(ctx, 1); break;
case PDF_TOK_FALSE: return fz_new_bool(ctx, 0); break;
case PDF_TOK_NULL: return fz_new_null(ctx); break;
- case PDF_TOK_INT: return fz_new_int(ctx, atoi(buf)); break;
+ case PDF_TOK_INT: return fz_new_int(ctx, buf->i); break;
default: fz_throw(ctx, "unknown token in object stream");
}
return NULL; /* Stupid MSVC */
@@ -445,60 +442,59 @@ pdf_parse_stm_obj(pdf_document *xref, fz_stream *file, char *buf, int cap)
fz_obj *
pdf_parse_ind_obj(pdf_document *xref,
- fz_stream *file, char *buf, int cap,
+ fz_stream *file, pdf_lexbuf *buf,
int *onum, int *ogen, int *ostmofs)
{
fz_obj *obj = NULL;
int num = 0, gen = 0, stm_ofs;
int tok;
- int len;
int a, b;
fz_context *ctx = file->ctx;
fz_var(obj);
- tok = pdf_lex(file, buf, cap, &len);
+ tok = pdf_lex(file, buf);
/* RJW: cannot parse indirect object (%d %d R)", num, gen */
if (tok != PDF_TOK_INT)
fz_throw(ctx, "expected object number (%d %d R)", num, gen);
- num = atoi(buf);
+ num = buf->i;
- tok = pdf_lex(file, buf, cap, &len);
+ tok = pdf_lex(file, buf);
/* RJW: "cannot parse indirect object (%d %d R)", num, gen */
if (tok != PDF_TOK_INT)
fz_throw(ctx, "expected generation number (%d %d R)", num, gen);
- gen = atoi(buf);
+ gen = buf->i;
- tok = pdf_lex(file, buf, cap, &len);
+ tok = pdf_lex(file, buf);
/* RJW: "cannot parse indirect object (%d %d R)", num, gen */
if (tok != PDF_TOK_OBJ)
fz_throw(ctx, "expected 'obj' keyword (%d %d R)", num, gen);
- tok = pdf_lex(file, buf, cap, &len);
+ tok = pdf_lex(file, buf);
/* RJW: "cannot parse indirect object (%d %d R)", num, gen */
switch (tok)
{
case PDF_TOK_OPEN_ARRAY:
- obj = pdf_parse_array(xref, file, buf, cap);
+ obj = pdf_parse_array(xref, file, buf);
/* RJW: "cannot parse indirect object (%d %d R)", num, gen */
break;
case PDF_TOK_OPEN_DICT:
- obj = pdf_parse_dict(xref, file, buf, cap);
+ obj = pdf_parse_dict(xref, file, buf);
/* RJW: "cannot parse indirect object (%d %d R)", num, gen */
break;
- case PDF_TOK_NAME: obj = fz_new_name(ctx, buf); break;
- case PDF_TOK_REAL: obj = fz_new_real(ctx, fz_atof(buf)); break;
- case PDF_TOK_STRING: obj = fz_new_string(ctx, buf, len); break;
+ case PDF_TOK_NAME: obj = fz_new_name(ctx, buf->scratch); break;
+ case PDF_TOK_REAL: obj = fz_new_real(ctx, buf->f); break;
+ case PDF_TOK_STRING: obj = fz_new_string(ctx, buf->scratch, buf->len); break;
case PDF_TOK_TRUE: obj = fz_new_bool(ctx, 1); break;
case PDF_TOK_FALSE: obj = fz_new_bool(ctx, 0); break;
case PDF_TOK_NULL: obj = fz_new_null(ctx); break;
case PDF_TOK_INT:
- a = atoi(buf);
- tok = pdf_lex(file, buf, cap, &len);
+ a = buf->i;
+ tok = pdf_lex(file, buf);
/* "cannot parse indirect object (%d %d R)", num, gen */
if (tok == PDF_TOK_STREAM || tok == PDF_TOK_ENDOBJ)
{
@@ -507,8 +503,8 @@ pdf_parse_ind_obj(pdf_document *xref,
}
if (tok == PDF_TOK_INT)
{
- b = atoi(buf);
- tok = pdf_lex(file, buf, cap, &len);
+ b = buf->i;
+ tok = pdf_lex(file, buf);
/* RJW: "cannot parse indirect object (%d %d R)", num, gen); */
if (tok == PDF_TOK_R)
{
@@ -528,7 +524,7 @@ pdf_parse_ind_obj(pdf_document *xref,
fz_try(ctx)
{
- tok = pdf_lex(file, buf, cap, &len);
+ tok = pdf_lex(file, buf);
}
fz_catch(ctx)
{
diff --git a/pdf/pdf_repair.c b/pdf/pdf_repair.c
index 0dc0e132..c70df3e2 100644
--- a/pdf/pdf_repair.c
+++ b/pdf/pdf_repair.c
@@ -13,11 +13,10 @@ struct entry
};
static void
-pdf_repair_obj(fz_stream *file, char *buf, int cap, int *stmofsp, int *stmlenp, fz_obj **encrypt, fz_obj **id)
+pdf_repair_obj(fz_stream *file, pdf_lexbuf *buf, int *stmofsp, int *stmlenp, fz_obj **encrypt, fz_obj **id)
{
int tok;
int stm_len;
- int len;
int n;
fz_context *ctx = file->ctx;
@@ -26,7 +25,7 @@ pdf_repair_obj(fz_stream *file, char *buf, int cap, int *stmofsp, int *stmlenp,
stm_len = 0;
- tok = pdf_lex(file, buf, cap, &len);
+ tok = pdf_lex(file, buf);
/* RJW: "cannot parse object" */
if (tok == PDF_TOK_OPEN_DICT)
{
@@ -35,7 +34,7 @@ pdf_repair_obj(fz_stream *file, char *buf, int cap, int *stmofsp, int *stmlenp,
/* Send NULL xref so we don't try to resolve references */
fz_try(ctx)
{
- dict = pdf_parse_dict(NULL, file, buf, cap);
+ dict = pdf_parse_dict(NULL, file, buf);
}
fz_catch(ctx)
{
@@ -79,13 +78,13 @@ pdf_repair_obj(fz_stream *file, char *buf, int cap, int *stmofsp, int *stmlenp,
tok != PDF_TOK_EOF &&
tok != PDF_TOK_INT )
{
- tok = pdf_lex(file, buf, cap, &len);
+ tok = pdf_lex(file, buf);
/* RJW: "cannot scan for endobj or stream token" */
}
if (tok == PDF_TOK_INT)
{
- while (len-- > 0)
+ while (buf->len-- > 0)
fz_unread_byte(file);
}
else if (tok == PDF_TOK_STREAM)
@@ -106,7 +105,7 @@ pdf_repair_obj(fz_stream *file, char *buf, int cap, int *stmofsp, int *stmlenp,
fz_seek(file, *stmofsp + stm_len, 0);
fz_try(ctx)
{
- tok = pdf_lex(file, buf, cap, &len);
+ tok = pdf_lex(file, buf);
}
fz_catch(ctx)
{
@@ -117,23 +116,23 @@ pdf_repair_obj(fz_stream *file, char *buf, int cap, int *stmofsp, int *stmlenp,
fz_seek(file, *stmofsp, 0);
}
- n = fz_read(file, (unsigned char *) buf, 9);
+ n = fz_read(file, (unsigned char *) buf->scratch, 9);
if (n < 0)
fz_throw(ctx, "cannot read from file");
- while (memcmp(buf, "endstream", 9) != 0)
+ while (memcmp(buf->scratch, "endstream", 9) != 0)
{
c = fz_read_byte(file);
if (c == EOF)
break;
- memmove(buf, buf + 1, 8);
- buf[8] = c;
+ memmove(&buf->scratch[0], &buf->scratch[1], 8);
+ buf->scratch[8] = c;
}
*stmlenp = fz_tell(file) - *stmofsp - 9;
atobjend:
- tok = pdf_lex(file, buf, cap, &len);
+ tok = pdf_lex(file, buf);
/* RJW: "cannot scan for endobj token" */
if (tok != PDF_TOK_ENDOBJ)
fz_warn(ctx, "object missing 'endobj' token");
@@ -147,11 +146,13 @@ pdf_repair_obj_stm(pdf_document *xref, int num, int gen)
fz_stream *stm = NULL;
int tok;
int i, n, count;
- char buf[256];
fz_context *ctx = xref->ctx;
+ pdf_lexbuf buf;
fz_var(stm);
+ buf.size = PDF_LEXBUF_SMALL;
+
fz_try(ctx)
{
obj = pdf_load_object(xref, num, gen);
@@ -164,11 +165,11 @@ pdf_repair_obj_stm(pdf_document *xref, int num, int gen)
for (i = 0; i < count; i++)
{
- tok = pdf_lex(stm, buf, sizeof buf, &n);
+ tok = pdf_lex(stm, &buf);
if (tok != PDF_TOK_INT)
fz_throw(ctx, "corrupt object stream (%d %d R)", num, gen);
- n = atoi(buf);
+ n = buf.i;
if (n >= xref->len)
pdf_resize_xref(xref, n + 1);
@@ -179,7 +180,7 @@ pdf_repair_obj_stm(pdf_document *xref, int num, int gen)
xref->table[n].obj = NULL;
xref->table[n].type = 'o';
- tok = pdf_lex(stm, buf, sizeof buf, &n);
+ tok = pdf_lex(stm, &buf);
if (tok != PDF_TOK_INT)
fz_throw(ctx, "corrupt object stream (%d %d R)", num, gen);
}
@@ -195,7 +196,7 @@ pdf_repair_obj_stm(pdf_document *xref, int num, int gen)
}
void
-pdf_repair_xref(pdf_document *xref, char *buf, int bufsize)
+pdf_repair_xref(pdf_document *xref, pdf_lexbuf *buf)
{
fz_obj *dict, *obj;
fz_obj *length;
@@ -234,14 +235,14 @@ pdf_repair_xref(pdf_document *xref, char *buf, int bufsize)
list = fz_malloc_array(ctx, listcap, sizeof(struct entry));
/* look for '%PDF' version marker within first kilobyte of file */
- n = fz_read(xref->file, (unsigned char *)buf, MIN(bufsize, 1024));
+ n = fz_read(xref->file, (unsigned char *)buf->scratch, MIN(buf->size, 1024));
if (n < 0)
fz_throw(ctx, "cannot read from file");
fz_seek(xref->file, 0, 0);
for (i = 0; i < n - 4; i++)
{
- if (memcmp(buf + i, "%PDF", 4) == 0)
+ if (memcmp(&buf->scratch[i], "%PDF", 4) == 0)
{
fz_seek(xref->file, i + 8, 0); /* skip "%PDF-X.Y" */
break;
@@ -263,7 +264,7 @@ pdf_repair_xref(pdf_document *xref, char *buf, int bufsize)
fz_try(ctx)
{
- tok = pdf_lex(xref->file, buf, bufsize, &n);
+ tok = pdf_lex(xref->file, buf);
}
fz_catch(ctx)
{
@@ -276,14 +277,14 @@ pdf_repair_xref(pdf_document *xref, char *buf, int bufsize)
numofs = genofs;
num = gen;
genofs = tmpofs;
- gen = atoi(buf);
+ gen = buf->i;
}
else if (tok == PDF_TOK_OBJ)
{
fz_try(ctx)
{
- pdf_repair_obj(xref->file, buf, bufsize, &stm_ofs, &stm_len, &encrypt, &id);
+ pdf_repair_obj(xref->file, buf, &stm_ofs, &stm_len, &encrypt, &id);
}
fz_catch(ctx)
{
@@ -318,7 +319,7 @@ pdf_repair_xref(pdf_document *xref, char *buf, int bufsize)
{
fz_try(ctx)
{
- dict = pdf_parse_dict(xref, xref->file, buf, bufsize);
+ dict = pdf_parse_dict(xref, xref->file, buf);
}
fz_catch(ctx)
{
diff --git a/pdf/pdf_xref.c b/pdf/pdf_xref.c
index 7500ded3..383747a7 100644
--- a/pdf/pdf_xref.c
+++ b/pdf/pdf_xref.c
@@ -51,6 +51,7 @@ pdf_read_start_xref(pdf_document *xref)
while (iswhite(buf[i]) && i < n)
i ++;
xref->startxref = atoi((char*)(buf + i));
+
return;
}
}
@@ -63,17 +64,16 @@ pdf_read_start_xref(pdf_document *xref)
*/
static void
-pdf_read_old_trailer(pdf_document *xref, char *buf, int cap)
+pdf_read_old_trailer(pdf_document *xref, pdf_lexbuf *buf)
{
int len;
char *s;
- int n;
int t;
int tok;
int c;
- fz_read_line(xref->file, buf, cap);
- if (strncmp(buf, "xref", 4) != 0)
+ fz_read_line(xref->file, buf->scratch, buf->size);
+ if (strncmp(buf->scratch, "xref", 4) != 0)
fz_throw(xref->ctx, "cannot find xref marker");
while (1)
@@ -82,8 +82,8 @@ pdf_read_old_trailer(pdf_document *xref, char *buf, int cap)
if (!(c >= '0' && c <= '9'))
break;
- fz_read_line(xref->file, buf, cap);
- s = buf;
+ fz_read_line(xref->file, buf->scratch, buf->size);
+ s = buf->scratch;
fz_strsep(&s, " "); /* ignore ofs */
if (!s)
fz_throw(xref->ctx, "invalid range marker in xref");
@@ -102,15 +102,15 @@ pdf_read_old_trailer(pdf_document *xref, char *buf, int cap)
fz_try(xref->ctx)
{
- tok = pdf_lex(xref->file, buf, cap, &n);
+ tok = pdf_lex(xref->file, buf);
if (tok != PDF_TOK_TRAILER)
fz_throw(xref->ctx, "expected trailer marker");
- tok = pdf_lex(xref->file, buf, cap, &n);
+ tok = pdf_lex(xref->file, buf);
if (tok != PDF_TOK_OPEN_DICT)
fz_throw(xref->ctx, "expected trailer dictionary");
- xref->trailer = pdf_parse_dict(xref, xref->file, buf, cap);
+ xref->trailer = pdf_parse_dict(xref, xref->file, buf);
}
fz_catch(xref->ctx)
{
@@ -119,11 +119,11 @@ pdf_read_old_trailer(pdf_document *xref, char *buf, int cap)
}
static void
-pdf_read_new_trailer(pdf_document *xref, char *buf, int cap)
+pdf_read_new_trailer(pdf_document *xref, pdf_lexbuf *buf)
{
fz_try(xref->ctx)
{
- xref->trailer = pdf_parse_ind_obj(xref, xref->file, buf, cap, NULL, NULL, NULL);
+ xref->trailer = pdf_parse_ind_obj(xref, xref->file, buf, NULL, NULL, NULL);
}
fz_catch(xref->ctx)
{
@@ -132,7 +132,7 @@ pdf_read_new_trailer(pdf_document *xref, char *buf, int cap)
}
static void
-pdf_read_trailer(pdf_document *xref, char *buf, int cap)
+pdf_read_trailer(pdf_document *xref, pdf_lexbuf *buf)
{
int c;
@@ -145,9 +145,9 @@ pdf_read_trailer(pdf_document *xref, char *buf, int cap)
{
c = fz_peek_byte(xref->file);
if (c == 'x')
- pdf_read_old_trailer(xref, buf, cap);
+ pdf_read_old_trailer(xref, buf);
else if (c >= '0' && c <= '9')
- pdf_read_new_trailer(xref, buf, cap);
+ pdf_read_new_trailer(xref, buf);
else
fz_throw(xref->ctx, "cannot recognize xref format: '%c'", c);
}
@@ -179,7 +179,7 @@ pdf_resize_xref(pdf_document *xref, int newlen)
}
static fz_obj *
-pdf_read_old_xref(pdf_document *xref, char *buf, int cap)
+pdf_read_old_xref(pdf_document *xref, pdf_lexbuf *buf)
{
int ofs, len;
char *s;
@@ -189,8 +189,8 @@ pdf_read_old_xref(pdf_document *xref, char *buf, int cap)
int c;
fz_obj *trailer;
- fz_read_line(xref->file, buf, cap);
- if (strncmp(buf, "xref", 4) != 0)
+ fz_read_line(xref->file, buf->scratch, buf->size);
+ if (strncmp(buf->scratch, "xref", 4) != 0)
fz_throw(xref->ctx, "cannot find xref marker");
while (1)
@@ -199,8 +199,8 @@ pdf_read_old_xref(pdf_document *xref, char *buf, int cap)
if (!(c >= '0' && c <= '9'))
break;
- fz_read_line(xref->file, buf, cap);
- s = buf;
+ fz_read_line(xref->file, buf->scratch, buf->size);
+ s = buf->scratch;
ofs = atoi(fz_strsep(&s, " "));
len = atoi(fz_strsep(&s, " "));
@@ -220,12 +220,12 @@ pdf_read_old_xref(pdf_document *xref, char *buf, int cap)
for (i = ofs; i < ofs + len; i++)
{
- n = fz_read(xref->file, (unsigned char *) buf, 20);
+ n = fz_read(xref->file, (unsigned char *) buf->scratch, 20);
if (n < 0)
fz_throw(xref->ctx, "cannot read xref table");
if (!xref->table[i].type)
{
- s = buf;
+ s = buf->scratch;
/* broken pdfs where line start with white space */
while (*s != '\0' && iswhite(*s))
@@ -242,15 +242,15 @@ pdf_read_old_xref(pdf_document *xref, char *buf, int cap)
fz_try(xref->ctx)
{
- tok = pdf_lex(xref->file, buf, cap, &n);
+ tok = pdf_lex(xref->file, buf);
if (tok != PDF_TOK_TRAILER)
fz_throw(xref->ctx, "expected trailer marker");
- tok = pdf_lex(xref->file, buf, cap, &n);
+ tok = pdf_lex(xref->file, buf);
if (tok != PDF_TOK_OPEN_DICT)
fz_throw(xref->ctx, "expected trailer dictionary");
- trailer = pdf_parse_dict(xref, xref->file, buf, cap);
+ trailer = pdf_parse_dict(xref, xref->file, buf);
}
fz_catch(xref->ctx)
{
@@ -296,7 +296,7 @@ pdf_read_new_xref_section(pdf_document *xref, fz_stream *stm, int i0, int i1, in
/* Entered with file locked. Drops the lock in the middle, but then picks
* it up again before exiting. */
static fz_obj *
-pdf_read_new_xref(pdf_document *xref, char *buf, int cap)
+pdf_read_new_xref(pdf_document *xref, pdf_lexbuf *buf)
{
fz_stream *stm = NULL;
fz_obj *trailer = NULL;
@@ -312,7 +312,7 @@ pdf_read_new_xref(pdf_document *xref, char *buf, int cap)
fz_try(ctx)
{
- trailer = pdf_parse_ind_obj(xref, xref->file, buf, cap, &num, &gen, &stm_ofs);
+ trailer = pdf_parse_ind_obj(xref, xref->file, buf, &num, &gen, &stm_ofs);
}
fz_catch(ctx)
{
@@ -378,7 +378,7 @@ pdf_read_new_xref(pdf_document *xref, char *buf, int cap)
/* File is locked on entry, and exit (but may be dropped in the middle) */
static fz_obj *
-pdf_read_xref(pdf_document *xref, int ofs, char *buf, int cap)
+pdf_read_xref(pdf_document *xref, int ofs, pdf_lexbuf *buf)
{
int c;
fz_context *ctx = xref->ctx;
@@ -393,9 +393,9 @@ pdf_read_xref(pdf_document *xref, int ofs, char *buf, int cap)
{
c = fz_peek_byte(xref->file);
if (c == 'x')
- trailer = pdf_read_old_xref(xref, buf, cap);
+ trailer = pdf_read_old_xref(xref, buf);
else if (c >= '0' && c <= '9')
- trailer = pdf_read_new_xref(xref, buf, cap);
+ trailer = pdf_read_new_xref(xref, buf);
else
fz_throw(ctx, "cannot recognize xref format");
}
@@ -407,7 +407,7 @@ pdf_read_xref(pdf_document *xref, int ofs, char *buf, int cap)
}
static void
-pdf_read_xref_sections(pdf_document *xref, int ofs, char *buf, int cap)
+pdf_read_xref_sections(pdf_document *xref, int ofs, pdf_lexbuf *buf)
{
fz_obj *trailer = NULL;
fz_obj *xrefstm = NULL;
@@ -416,16 +416,16 @@ pdf_read_xref_sections(pdf_document *xref, int ofs, char *buf, int cap)
fz_try(ctx)
{
- trailer = pdf_read_xref(xref, ofs, buf, cap);
+ trailer = pdf_read_xref(xref, ofs, buf);
/* FIXME: do we overwrite free entries properly? */
xrefstm = fz_dict_gets(trailer, "XRefStm");
if (xrefstm)
- pdf_read_xref_sections(xref, fz_to_int(xrefstm), buf, cap);
+ pdf_read_xref_sections(xref, fz_to_int(xrefstm), buf);
prev = fz_dict_gets(trailer, "Prev");
if (prev)
- pdf_read_xref_sections(xref, fz_to_int(prev), buf, cap);
+ pdf_read_xref_sections(xref, fz_to_int(prev), buf);
}
fz_catch(ctx)
{
@@ -441,7 +441,7 @@ pdf_read_xref_sections(pdf_document *xref, int ofs, char *buf, int cap)
*/
static void
-pdf_load_xref(pdf_document *xref, char *buf, int bufsize)
+pdf_load_xref(pdf_document *xref, pdf_lexbuf *buf)
{
fz_obj *size;
int i;
@@ -451,7 +451,7 @@ pdf_load_xref(pdf_document *xref, char *buf, int bufsize)
pdf_read_start_xref(xref);
- pdf_read_trailer(xref, buf, bufsize);
+ pdf_read_trailer(xref, buf);
size = fz_dict_gets(xref->trailer, "Size");
if (!size)
@@ -459,7 +459,7 @@ pdf_load_xref(pdf_document *xref, char *buf, int bufsize)
pdf_resize_xref(xref, fz_to_int(size));
- pdf_read_xref_sections(xref, xref->startxref, buf, bufsize);
+ pdf_read_xref_sections(xref, xref->startxref, buf);
/* broken pdfs where first object is not free */
if (xref->table[0].type != 'f')
@@ -672,6 +672,7 @@ pdf_open_document_with_stream(fz_stream *file)
xref = fz_malloc_struct(ctx, pdf_document);
pdf_init_document(xref);
+ xref->lexbuf.base.size = PDF_LEXBUF_LARGE;
xref->file = fz_keep_stream(file);
xref->ctx = ctx;
@@ -681,7 +682,7 @@ pdf_open_document_with_stream(fz_stream *file)
fz_try(ctx)
{
- pdf_load_xref(xref, xref->scratch, sizeof xref->scratch);
+ pdf_load_xref(xref, &xref->lexbuf.base);
}
fz_catch(ctx)
{
@@ -705,7 +706,7 @@ pdf_open_document_with_stream(fz_stream *file)
int hasroot, hasinfo;
if (repaired)
- pdf_repair_xref(xref, xref->scratch, sizeof xref->scratch);
+ pdf_repair_xref(xref, &xref->lexbuf.base);
fz_unlock(ctx, FZ_LOCK_FILE);
locked = 0;
@@ -864,7 +865,7 @@ pdf_debug_xref(pdf_document *xref)
*/
static void
-pdf_load_obj_stm(pdf_document *xref, int num, int gen, char *buf, int cap)
+pdf_load_obj_stm(pdf_document *xref, int num, int gen, pdf_lexbuf *buf)
{
fz_stream *stm = NULL;
fz_obj *objstm = NULL;
@@ -874,7 +875,7 @@ pdf_load_obj_stm(pdf_document *xref, int num, int gen, char *buf, int cap)
fz_obj *obj;
int first;
int count;
- int i, n;
+ int i;
int tok;
fz_context *ctx = xref->ctx;
@@ -896,15 +897,15 @@ pdf_load_obj_stm(pdf_document *xref, int num, int gen, char *buf, int cap)
stm = pdf_open_stream(xref, num, gen);
for (i = 0; i < count; i++)
{
- tok = pdf_lex(stm, buf, cap, &n);
+ tok = pdf_lex(stm, buf);
if (tok != PDF_TOK_INT)
fz_throw(ctx, "corrupt object stream (%d %d R)", num, gen);
- numbuf[i] = atoi(buf);
+ numbuf[i] = buf->i;
- tok = pdf_lex(stm, buf, cap, &n);
+ tok = pdf_lex(stm, buf);
if (tok != PDF_TOK_INT)
fz_throw(ctx, "corrupt object stream (%d %d R)", num, gen);
- ofsbuf[i] = atoi(buf);
+ ofsbuf[i] = buf->i;
}
fz_seek(stm, first, 0);
@@ -913,7 +914,7 @@ pdf_load_obj_stm(pdf_document *xref, int num, int gen, char *buf, int cap)
{
fz_seek(stm, first + ofsbuf[i], 0);
- obj = pdf_parse_stm_obj(xref, stm, buf, cap);
+ obj = pdf_parse_stm_obj(xref, stm, buf);
/* RJW: Ensure above does fz_throw(ctx, "cannot parse object %d in stream (%d %d R)", i, num, gen); */
if (numbuf[i] < 1 || numbuf[i] >= xref->len)
@@ -978,7 +979,7 @@ pdf_cache_object(pdf_document *xref, int num, int gen)
fz_try(ctx)
{
- x->obj = pdf_parse_ind_obj(xref, xref->file, xref->scratch, sizeof xref->scratch,
+ x->obj = pdf_parse_ind_obj(xref, xref->file, &xref->lexbuf.base,
&rnum, &rgen, &x->stm_ofs);
}
fz_catch(ctx)
@@ -1005,7 +1006,7 @@ pdf_cache_object(pdf_document *xref, int num, int gen)
{
fz_try(ctx)
{
- pdf_load_obj_stm(xref, x->ofs, 0, xref->scratch, sizeof xref->scratch);
+ pdf_load_obj_stm(xref, x->ofs, 0, &xref->lexbuf.base);
}
fz_catch(ctx)
{