diff options
-rw-r--r-- | pdf/mupdf.h | 42 | ||||
-rw-r--r-- | pdf/pdf_cmap_parse.c | 130 | ||||
-rw-r--r-- | pdf/pdf_function.c | 33 | ||||
-rw-r--r-- | pdf/pdf_interpret.c | 63 | ||||
-rw-r--r-- | pdf/pdf_lex.c | 117 | ||||
-rw-r--r-- | pdf/pdf_parse.c | 102 | ||||
-rw-r--r-- | pdf/pdf_repair.c | 47 | ||||
-rw-r--r-- | pdf/pdf_xref.c | 95 |
8 files changed, 337 insertions, 292 deletions
diff --git a/pdf/mupdf.h b/pdf/mupdf.h index b233288f..15a96541 100644 --- a/pdf/mupdf.h +++ b/pdf/mupdf.h @@ -102,12 +102,40 @@ enum PDF_NUM_TOKENS }; -int pdf_lex(fz_stream *f, char *buf, int n, int *len); +enum +{ + PDF_LEXBUF_SMALL = 256, + PDF_LEXBUF_LARGE = 65536 +}; + + + +typedef struct pdf_lexbuf_s pdf_lexbuf; +typedef struct pdf_lexbuf_large_s pdf_lexbuf_large; + +struct pdf_lexbuf_s +{ + int size; + int len; + int i; + float f; + char scratch[PDF_LEXBUF_SMALL]; +}; + +struct pdf_lexbuf_large_s +{ + pdf_lexbuf base; + char scratch[PDF_LEXBUF_LARGE - PDF_LEXBUF_SMALL]; +}; + + + +int pdf_lex(fz_stream *f, pdf_lexbuf *lexbuf); -fz_obj *pdf_parse_array(pdf_document *doc, fz_stream *f, char *buf, int cap); -fz_obj *pdf_parse_dict(pdf_document *doc, fz_stream *f, char *buf, int cap); -fz_obj *pdf_parse_stm_obj(pdf_document *doc, fz_stream *f, char *buf, int cap); -fz_obj *pdf_parse_ind_obj(pdf_document *doc, fz_stream *f, char *buf, int cap, int *num, int *gen, int *stm_ofs); +fz_obj *pdf_parse_array(pdf_document *doc, fz_stream *f, pdf_lexbuf *buf); +fz_obj *pdf_parse_dict(pdf_document *doc, fz_stream *f, pdf_lexbuf *buf); +fz_obj *pdf_parse_stm_obj(pdf_document *doc, fz_stream *f, pdf_lexbuf *buf); +fz_obj *pdf_parse_ind_obj(pdf_document *doc, fz_stream *f, pdf_lexbuf *buf, int *num, int *gen, int *stm_ofs); fz_rect pdf_to_rect(fz_context *ctx, fz_obj *array); fz_matrix pdf_to_matrix(fz_context *ctx, fz_obj *array); @@ -170,7 +198,7 @@ struct pdf_document_s fz_obj **page_objs; fz_obj **page_refs; - char scratch[65536]; + pdf_lexbuf_large lexbuf; }; fz_obj *pdf_resolve_indirect(fz_obj *ref); @@ -194,7 +222,7 @@ pdf_document *pdf_open_document(fz_context *ctx, const char *filename); void pdf_close_document(pdf_document *doc); /* private */ -void pdf_repair_xref(pdf_document *doc, char *buf, int bufsize); +void pdf_repair_xref(pdf_document *doc, pdf_lexbuf *buf); void pdf_repair_obj_stms(pdf_document *doc); void pdf_debug_xref(pdf_document *); void pdf_resize_xref(pdf_document *doc, int newcap); diff --git a/pdf/pdf_cmap_parse.c b/pdf/pdf_cmap_parse.c index fb37c4a9..5c21393e 100644 --- a/pdf/pdf_cmap_parse.c +++ b/pdf/pdf_cmap_parse.c @@ -49,14 +49,14 @@ pdf_code_from_string(char *buf, int len) } static int -pdf_lex_cmap(fz_stream *file, char *buf, int n, int *sl) +pdf_lex_cmap(fz_stream *file, pdf_lexbuf *buf) { - int tok = pdf_lex(file, buf, n, sl); + int tok = pdf_lex(file, buf); /* RJW: Lost debugging here: "cannot parse cmap token" */ if (tok == PDF_TOK_KEYWORD) - tok = pdf_cmap_token_from_keyword(buf); + tok = pdf_cmap_token_from_keyword(buf->scratch); return tok; } @@ -64,15 +64,15 @@ pdf_lex_cmap(fz_stream *file, char *buf, int n, int *sl) static void pdf_parse_cmap_name(fz_context *ctx, pdf_cmap *cmap, fz_stream *file) { - char buf[256]; + pdf_lexbuf buf; int tok; - int len; - tok = pdf_lex_cmap(file, buf, sizeof buf, &len); + buf.size = PDF_LEXBUF_SMALL; + tok = pdf_lex_cmap(file, &buf); /* RJW: Lost debugging: "syntaxerror in cmap" */ if (tok == PDF_TOK_NAME) - fz_strlcpy(cmap->cmap_name, buf, sizeof(cmap->cmap_name)); + fz_strlcpy(cmap->cmap_name, buf.scratch, sizeof(cmap->cmap_name)); else fz_warn(ctx, "expected name after CMapName in cmap"); } @@ -80,15 +80,15 @@ pdf_parse_cmap_name(fz_context *ctx, pdf_cmap *cmap, fz_stream *file) static void pdf_parse_wmode(fz_context *ctx, pdf_cmap *cmap, fz_stream *file) { - char buf[256]; + pdf_lexbuf buf; int tok; - int len; - tok = pdf_lex_cmap(file, buf, sizeof buf, &len); + buf.size = PDF_LEXBUF_SMALL; + tok = pdf_lex_cmap(file, &buf); /* RJW: Lost debugging: "syntaxerror in cmap" */ if (tok == PDF_TOK_INT) - pdf_set_wmode(ctx, cmap, atoi(buf)); + pdf_set_wmode(ctx, cmap, buf.i); else fz_warn(ctx, "expected integer after WMode in cmap"); } @@ -96,14 +96,14 @@ pdf_parse_wmode(fz_context *ctx, pdf_cmap *cmap, fz_stream *file) static void pdf_parse_codespace_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file) { - char buf[256]; + pdf_lexbuf buf; int tok; - int len; int lo, hi; + buf.size = PDF_LEXBUF_SMALL; while (1) { - tok = pdf_lex_cmap(file, buf, sizeof buf, &len); + tok = pdf_lex_cmap(file, &buf); /* RJW: Lost debugging: "syntaxerror in cmap" */ if (tok == TOK_END_CODESPACE_RANGE) @@ -111,13 +111,13 @@ pdf_parse_codespace_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file) else if (tok == PDF_TOK_STRING) { - lo = pdf_code_from_string(buf, len); - tok = pdf_lex_cmap(file, buf, sizeof buf, &len); + lo = pdf_code_from_string(buf.scratch, buf.len); + tok = pdf_lex_cmap(file, &buf); /* RJW: Lost debugging: "syntaxerror in cmap" */ if (tok == PDF_TOK_STRING) { - hi = pdf_code_from_string(buf, len); - pdf_add_codespace(ctx, cmap, lo, hi, len); + hi = pdf_code_from_string(buf.scratch, buf.len); + pdf_add_codespace(ctx, cmap, lo, hi, buf.len); } else break; } @@ -131,14 +131,14 @@ pdf_parse_codespace_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file) static void pdf_parse_cid_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file) { - char buf[256]; + pdf_lexbuf buf; int tok; - int len; int lo, hi, dst; + buf.size = PDF_LEXBUF_SMALL; while (1) { - tok = pdf_lex_cmap(file, buf, sizeof buf, &len); + tok = pdf_lex_cmap(file, &buf); /* RJW: Lost debugging: "syntaxerror in cmap" */ if (tok == TOK_END_CID_RANGE) @@ -147,21 +147,21 @@ pdf_parse_cid_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file) else if (tok != PDF_TOK_STRING) fz_throw(ctx, "expected string or endcidrange"); - lo = pdf_code_from_string(buf, len); + lo = pdf_code_from_string(buf.scratch, buf.len); - tok = pdf_lex_cmap(file, buf, sizeof buf, &len); + tok = pdf_lex_cmap(file, &buf); /* RJW: Lost debugging: "syntaxerror in cmap" */ if (tok != PDF_TOK_STRING) fz_throw(ctx, "expected string"); - hi = pdf_code_from_string(buf, len); + hi = pdf_code_from_string(buf.scratch, buf.len); - tok = pdf_lex_cmap(file, buf, sizeof buf, &len); + tok = pdf_lex_cmap(file, &buf); /* RJW: Lost debugging: "syntaxerror in cmap" */ if (tok != PDF_TOK_INT) fz_throw(ctx, "expected integer"); - dst = atoi(buf); + dst = buf.i; pdf_map_range_to_range(ctx, cmap, lo, hi, dst); } @@ -170,14 +170,14 @@ pdf_parse_cid_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file) static void pdf_parse_cid_char(fz_context *ctx, pdf_cmap *cmap, fz_stream *file) { - char buf[256]; + pdf_lexbuf buf; int tok; - int len; int src, dst; + buf.size = PDF_LEXBUF_SMALL; while (1) { - tok = pdf_lex_cmap(file, buf, sizeof buf, &len); + tok = pdf_lex_cmap(file, &buf); /* RJW: "syntaxerror in cmap" */ if (tok == TOK_END_CID_CHAR) @@ -186,15 +186,15 @@ pdf_parse_cid_char(fz_context *ctx, pdf_cmap *cmap, fz_stream *file) else if (tok != PDF_TOK_STRING) fz_throw(ctx, "expected string or endcidchar"); - src = pdf_code_from_string(buf, len); + src = pdf_code_from_string(buf.scratch, buf.len); - tok = pdf_lex_cmap(file, buf, sizeof buf, &len); + tok = pdf_lex_cmap(file, &buf); /* RJW: "syntaxerror in cmap" */ if (tok != PDF_TOK_INT) fz_throw(ctx, "expected integer"); - dst = atoi(buf); + dst = buf.i; pdf_map_range_to_range(ctx, cmap, src, src, dst); } @@ -203,15 +203,15 @@ pdf_parse_cid_char(fz_context *ctx, pdf_cmap *cmap, fz_stream *file) static void pdf_parse_bf_range_array(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, int lo, int hi) { - char buf[256]; + pdf_lexbuf buf; int tok; - int len; int dst[256]; int i; + buf.size = PDF_LEXBUF_SMALL; while (1) { - tok = pdf_lex_cmap(file, buf, sizeof buf, &len); + tok = pdf_lex_cmap(file, &buf); /* RJW: "syntaxerror in cmap" */ if (tok == PDF_TOK_CLOSE_ARRAY) @@ -221,12 +221,12 @@ pdf_parse_bf_range_array(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, int l else if (tok != PDF_TOK_STRING) fz_throw(ctx, "expected string or ]"); - if (len / 2) + if (buf.len / 2) { - for (i = 0; i < len / 2; i++) - dst[i] = pdf_code_from_string(buf + i * 2, 2); + for (i = 0; i < buf.len / 2; i++) + dst[i] = pdf_code_from_string(&buf.scratch[i * 2], 2); - pdf_map_one_to_many(ctx, cmap, lo, dst, len / 2); + pdf_map_one_to_many(ctx, cmap, lo, dst, buf.len / 2); } lo ++; @@ -236,14 +236,14 @@ pdf_parse_bf_range_array(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, int l static void pdf_parse_bf_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file) { - char buf[256]; + pdf_lexbuf buf; int tok; - int len; int lo, hi, dst; + buf.size = PDF_LEXBUF_SMALL; while (1) { - tok = pdf_lex_cmap(file, buf, sizeof buf, &len); + tok = pdf_lex_cmap(file, &buf); /* RJW: "syntaxerror in cmap" */ if (tok == TOK_END_BF_RANGE) @@ -252,23 +252,23 @@ pdf_parse_bf_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file) else if (tok != PDF_TOK_STRING) fz_throw(ctx, "expected string or endbfrange"); - lo = pdf_code_from_string(buf, len); + lo = pdf_code_from_string(buf.scratch, buf.len); - tok = pdf_lex_cmap(file, buf, sizeof buf, &len); + tok = pdf_lex_cmap(file, &buf); /* RJW: "syntaxerror in cmap" */ if (tok != PDF_TOK_STRING) fz_throw(ctx, "expected string"); - hi = pdf_code_from_string(buf, len); + hi = pdf_code_from_string(buf.scratch, buf.len); - tok = pdf_lex_cmap(file, buf, sizeof buf, &len); + tok = pdf_lex_cmap(file, &buf); /* RJW: "syntaxerror in cmap" */ if (tok == PDF_TOK_STRING) { - if (len == 2) + if (buf.len == 2) { - dst = pdf_code_from_string(buf, len); + dst = pdf_code_from_string(buf.scratch, buf.len); pdf_map_range_to_range(ctx, cmap, lo, hi, dst); } else @@ -276,10 +276,10 @@ pdf_parse_bf_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file) int dststr[256]; int i; - if (len / 2) + if (buf.len / 2) { - for (i = 0; i < len / 2; i++) - dststr[i] = pdf_code_from_string(buf + i * 2, 2); + for (i = 0; i < buf.len / 2; i++) + dststr[i] = pdf_code_from_string(&buf.scratch[i * 2], 2); while (lo <= hi) { @@ -307,16 +307,16 @@ pdf_parse_bf_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file) static void pdf_parse_bf_char(fz_context *ctx, pdf_cmap *cmap, fz_stream *file) { - char buf[256]; + pdf_lexbuf buf; int tok; - int len; int dst[256]; int src; int i; + buf.size = PDF_LEXBUF_SMALL; while (1) { - tok = pdf_lex_cmap(file, buf, sizeof buf, &len); + tok = pdf_lex_cmap(file, &buf); /* RJW: "syntaxerror in cmap" */ if (tok == TOK_END_BF_CHAR) @@ -325,18 +325,18 @@ pdf_parse_bf_char(fz_context *ctx, pdf_cmap *cmap, fz_stream *file) else if (tok != PDF_TOK_STRING) fz_throw(ctx, "expected string or endbfchar"); - src = pdf_code_from_string(buf, len); + src = pdf_code_from_string(buf.scratch, buf.len); - tok = pdf_lex_cmap(file, buf, sizeof buf, &len); + tok = pdf_lex_cmap(file, &buf); /* RJW: "syntaxerror in cmap" */ /* Note: does not handle /dstName */ if (tok != PDF_TOK_STRING) fz_throw(ctx, "expected string"); - if (len / 2) + if (buf.len / 2) { - for (i = 0; i < len / 2; i++) - dst[i] = pdf_code_from_string(buf + i * 2, 2); + for (i = 0; i < buf.len / 2; i++) + dst[i] = pdf_code_from_string(&buf.scratch[i * 2], 2); pdf_map_one_to_many(ctx, cmap, src, dst, i); } } @@ -347,11 +347,11 @@ pdf_load_cmap(fz_context *ctx, fz_stream *file) { pdf_cmap *cmap; char key[64]; - char buf[256]; + pdf_lexbuf buf; int tok; - int len; const char *where; + buf.size = PDF_LEXBUF_SMALL; cmap = pdf_new_cmap(ctx); strcpy(key, ".notdef"); @@ -363,25 +363,25 @@ pdf_load_cmap(fz_context *ctx, fz_stream *file) while (1) { where = ""; - tok = pdf_lex_cmap(file, buf, sizeof buf, &len); + tok = pdf_lex_cmap(file, &buf); if (tok == PDF_TOK_EOF || tok == TOK_END_CMAP) break; else if (tok == PDF_TOK_NAME) { - if (!strcmp(buf, "CMapName")) + if (!strcmp(buf.scratch, "CMapName")) { where = " after CMapName"; pdf_parse_cmap_name(ctx, cmap, file); } - else if (!strcmp(buf, "WMode")) + else if (!strcmp(buf.scratch, "WMode")) { where = " after WMode"; pdf_parse_wmode(ctx, cmap, file); } else - fz_strlcpy(key, buf, sizeof key); + fz_strlcpy(key, buf.scratch, sizeof key); } else if (tok == TOK_USECMAP) diff --git a/pdf/pdf_function.c b/pdf/pdf_function.c index 4478827c..17373f42 100644 --- a/pdf/pdf_function.c +++ b/pdf/pdf_function.c @@ -683,18 +683,18 @@ resize_code(fz_context *ctx, pdf_function *func, int newsize) static void parse_code(pdf_function *func, fz_stream *stream, int *codeptr) { - char buf[64]; - int len; + pdf_lexbuf buf; int tok; int opptr, elseptr, ifptr; int a, b, mid, cmp; fz_context *ctx = stream->ctx; - memset(buf, 0, sizeof(buf)); + buf.size = PDF_LEXBUF_SMALL; + memset(buf.scratch, 0, sizeof(buf.scratch)); while (1) { - tok = pdf_lex(stream, buf, sizeof buf, &len); + tok = pdf_lex(stream, &buf); /* RJW: "calculator function lexical error" */ switch(tok) @@ -705,7 +705,7 @@ parse_code(pdf_function *func, fz_stream *stream, int *codeptr) case PDF_TOK_INT: resize_code(ctx, func, *codeptr); func->u.p.code[*codeptr].type = PS_INT; - func->u.p.code[*codeptr].u.i = atoi(buf); + func->u.p.code[*codeptr].u.i = buf.i; ++*codeptr; break; @@ -726,7 +726,7 @@ parse_code(pdf_function *func, fz_stream *stream, int *codeptr) case PDF_TOK_REAL: resize_code(ctx, func, *codeptr); func->u.p.code[*codeptr].type = PS_REAL; - func->u.p.code[*codeptr].u.f = fz_atof(buf); + func->u.p.code[*codeptr].u.f = buf.f; ++*codeptr; break; @@ -740,7 +740,7 @@ parse_code(pdf_function *func, fz_stream *stream, int *codeptr) parse_code(func, stream, codeptr); /* RJW: "error in 'if' branch" */ - tok = pdf_lex(stream, buf, sizeof buf, &len); + tok = pdf_lex(stream, &buf); /* RJW: "calculator function syntax error" */ if (tok == PDF_TOK_OPEN_BRACE) @@ -749,7 +749,7 @@ parse_code(pdf_function *func, fz_stream *stream, int *codeptr) parse_code(func, stream, codeptr); /* RJW: "error in 'else' branch" */ - tok = pdf_lex(stream, buf, sizeof buf, &len); + tok = pdf_lex(stream, &buf); /* RJW: "calculator function syntax error" */ } else @@ -760,7 +760,7 @@ parse_code(pdf_function *func, fz_stream *stream, int *codeptr) if (tok != PDF_TOK_KEYWORD) fz_throw(ctx, "missing keyword in 'if-else' context"); - if (!strcmp(buf, "if")) + if (!strcmp(buf.scratch, "if")) { if (elseptr >= 0) fz_throw(ctx, "too many branches for 'if'"); @@ -771,7 +771,7 @@ parse_code(pdf_function *func, fz_stream *stream, int *codeptr) func->u.p.code[opptr+3].type = PS_BLOCK; func->u.p.code[opptr+3].u.block = *codeptr; } - else if (!strcmp(buf, "ifelse")) + else if (!strcmp(buf.scratch, "ifelse")) { if (elseptr < 0) fz_throw(ctx, "not enough branches for 'ifelse'"); @@ -786,7 +786,7 @@ parse_code(pdf_function *func, fz_stream *stream, int *codeptr) } else { - fz_throw(ctx, "unknown keyword in 'if-else' context: '%s'", buf); + fz_throw(ctx, "unknown keyword in 'if-else' context: '%s'", buf.scratch); } break; @@ -804,7 +804,7 @@ parse_code(pdf_function *func, fz_stream *stream, int *codeptr) while (b - a > 1) { mid = (a + b) / 2; - cmp = strcmp(buf, ps_op_names[mid]); + cmp = strcmp(buf.scratch, ps_op_names[mid]); if (cmp > 0) a = mid; else if (cmp < 0) @@ -813,7 +813,7 @@ parse_code(pdf_function *func, fz_stream *stream, int *codeptr) a = b = mid; } if (cmp != 0) - fz_throw(ctx, "unknown operator: '%s'", buf); + fz_throw(ctx, "unknown operator: '%s'", buf.scratch); resize_code(ctx, func, *codeptr); func->u.p.code[*codeptr].type = PS_OPERATOR; @@ -832,12 +832,13 @@ load_postscript_func(pdf_function *func, pdf_document *xref, fz_obj *dict, int n { fz_stream *stream = NULL; int codeptr; - char buf[64]; + pdf_lexbuf buf; int tok; - int len; fz_context *ctx = xref->ctx; int locked = 0; + buf.size = PDF_LEXBUF_SMALL; + fz_var(stream); fz_var(locked); @@ -846,7 +847,7 @@ load_postscript_func(pdf_function *func, pdf_document *xref, fz_obj *dict, int n stream = pdf_open_stream(xref, num, gen); /* RJW: "cannot open calculator function stream" */ - tok = pdf_lex(stream, buf, sizeof buf, &len); + tok = pdf_lex(stream, &buf); if (tok != PDF_TOK_OPEN_BRACE) { fz_throw(ctx, "stream is not a calculator function"); diff --git a/pdf/pdf_interpret.c b/pdf/pdf_interpret.c index effea657..1cfe6a96 100644 --- a/pdf/pdf_interpret.c +++ b/pdf/pdf_interpret.c @@ -819,6 +819,7 @@ pdf_show_string(pdf_csi *csi, unsigned char *buf, int len) { int w = pdf_decode_cmap(fontdesc->encoding, buf, &cpt); buf += w; + cid = pdf_lookup_cmap(fontdesc->encoding, cpt); if (cid >= 0) pdf_show_char(csi, cid); @@ -1625,12 +1626,10 @@ static void pdf_run_BI(pdf_csi *csi, fz_obj *rdb, fz_stream *file) { fz_context *ctx = csi->dev->ctx; int ch; - char *buf = csi->xref->scratch; - int buflen = sizeof(csi->xref->scratch); fz_image *img; fz_obj *obj; - obj = pdf_parse_dict(csi->xref, file, buf, buflen); + obj = pdf_parse_dict(csi->xref, file, &csi->xref->lexbuf.base); /* RJW: "cannot parse inline image dictionary" */ /* read whitespace after ID keyword */ @@ -2523,10 +2522,10 @@ pdf_run_keyword(pdf_csi *csi, fz_obj *rdb, fz_stream *file, char *buf) } static void -pdf_run_stream(pdf_csi *csi, fz_obj *rdb, fz_stream *file, char *buf, int buflen) +pdf_run_stream(pdf_csi *csi, fz_obj *rdb, fz_stream *file, pdf_lexbuf *buf) { fz_context *ctx = csi->dev->ctx; - int tok, len, in_array; + int tok, in_array; /* make sure we have a clean slate if we come here from flush_text */ pdf_clear_stack(csi); @@ -2551,7 +2550,7 @@ pdf_run_stream(pdf_csi *csi, fz_obj *rdb, fz_stream *file, char *buf, int buflen csi->cookie->progress++; } - tok = pdf_lex(file, buf, buflen, &len); + tok = pdf_lex(file, buf); /* RJW: "lexical error in content stream" */ if (in_array) @@ -2560,19 +2559,24 @@ pdf_run_stream(pdf_csi *csi, fz_obj *rdb, fz_stream *file, char *buf, int buflen { in_array = 0; } - else if (tok == PDF_TOK_INT || tok == PDF_TOK_REAL) + else if (tok == PDF_TOK_REAL) { pdf_gstate *gstate = csi->gstate + csi->gtop; - pdf_show_space(csi, -fz_atof(buf) * gstate->size * 0.001f); + pdf_show_space(csi, -buf->f * gstate->size * 0.001f); + } + else if (tok == PDF_TOK_INT) + { + pdf_gstate *gstate = csi->gstate + csi->gtop; + pdf_show_space(csi, -buf->i * gstate->size * 0.001f); } else if (tok == PDF_TOK_STRING) { - pdf_show_string(csi, (unsigned char *)buf, len); + pdf_show_string(csi, (unsigned char *)buf->scratch, buf->len); } else if (tok == PDF_TOK_KEYWORD) { - if (!strcmp(buf, "Tw") || !strcmp(buf, "Tc")) - fz_warn(ctx, "ignoring keyword '%s' inside array", buf); + if (!strcmp(buf->scratch, "Tw") || !strcmp(buf->scratch, "Tc")) + fz_warn(ctx, "ignoring keyword '%s' inside array", buf->scratch); else fz_throw(ctx, "syntax error in array"); } @@ -2591,7 +2595,7 @@ pdf_run_stream(pdf_csi *csi, fz_obj *rdb, fz_stream *file, char *buf, int buflen case PDF_TOK_OPEN_ARRAY: if (!csi->in_text) { - csi->obj = pdf_parse_array(csi->xref, file, buf, buflen); + csi->obj = pdf_parse_array(csi->xref, file, buf); /* RJW: "cannot parse array" */ } else @@ -2601,38 +2605,38 @@ pdf_run_stream(pdf_csi *csi, fz_obj *rdb, fz_stream *file, char *buf, int buflen break; case PDF_TOK_OPEN_DICT: - csi->obj = pdf_parse_dict(csi->xref, file, buf, buflen); + csi->obj = pdf_parse_dict(csi->xref, file, buf); /* RJW: "cannot parse dictionary" */ break; case PDF_TOK_NAME: - fz_strlcpy(csi->name, buf, sizeof(csi->name)); + fz_strlcpy(csi->name, buf->scratch, sizeof(csi->name)); break; case PDF_TOK_INT: - csi->stack[csi->top] = atoi(buf); + csi->stack[csi->top] = buf->i; csi->top ++; break; case PDF_TOK_REAL: - csi->stack[csi->top] = fz_atof(buf); + csi->stack[csi->top] = buf->f; csi->top ++; break; case PDF_TOK_STRING: - if (len <= sizeof(csi->string)) + if (buf->len <= sizeof(csi->string)) { - memcpy(csi->string, buf, len); - csi->string_len = len; + memcpy(csi->string, buf->scratch, buf->len); + csi->string_len = buf->len; } else { - csi->obj = fz_new_string(ctx, buf, len); + csi->obj = fz_new_string(ctx, buf->scratch, buf->len); } break; case PDF_TOK_KEYWORD: - pdf_run_keyword(csi, rdb, file, buf); + pdf_run_keyword(csi, rdb, file, buf->scratch); /* RJW: "cannot run keyword" */ pdf_clear_stack(csi); break; @@ -2651,8 +2655,7 @@ static void pdf_run_buffer(pdf_csi *csi, fz_obj *rdb, fz_buffer *contents) { fz_context *ctx = csi->dev->ctx; - int len = sizeof csi->xref->scratch; - char *buf = NULL; + pdf_lexbuf_large *buf; fz_stream * file = NULL; int save_in_text; @@ -2664,13 +2667,14 @@ pdf_run_buffer(pdf_csi *csi, fz_obj *rdb, fz_buffer *contents) fz_try(ctx) { - buf = fz_malloc(ctx, len); /* we must be re-entrant for type3 fonts */ + buf = fz_malloc(ctx, sizeof(*buf)); /* we must be re-entrant for type3 fonts */ + buf->base.size = PDF_LEXBUF_LARGE; file = fz_open_buffer(ctx, contents); save_in_text = csi->in_text; csi->in_text = 0; fz_try(ctx) { - pdf_run_stream(csi, rdb, file, buf, len); + pdf_run_stream(csi, rdb, file, &buf->base); } fz_catch(ctx) { @@ -2678,14 +2682,15 @@ pdf_run_buffer(pdf_csi *csi, fz_obj *rdb, fz_buffer *contents) } csi->in_text = save_in_text; } - fz_catch(ctx) - { + fz_always(ctx) + { fz_close(file); fz_free(ctx, buf); + } + fz_catch(ctx) + { fz_throw(ctx, "cannot parse context stream"); } - fz_close(file); - fz_free(ctx, buf); } void diff --git a/pdf/pdf_lex.c b/pdf/pdf_lex.c index 24828412..322d945c 100644 --- a/pdf/pdf_lex.c +++ b/pdf/pdf_lex.c @@ -63,87 +63,106 @@ lex_comment(fz_stream *f) } static int -lex_number(fz_stream *f, char *s, int n, int *tok) +lex_number(fz_stream *f, pdf_lexbuf *buf, int c) { - char *buf = s; - *tok = PDF_TOK_INT; + int neg = 0; + int i = 0; + int n; + int d; + float v; /* Initially we might have +, -, . or a digit */ - if (n > 1) + switch (c) + { + case '.': + goto loop_after_dot; + case '-': + neg = 1; + break; + case '+': + break; + default: /* Must be a digit */ + i = c - '0'; + break; + } + + while (1) { int c = fz_read_byte(f); switch (c) { case '.': - *tok = PDF_TOK_REAL; - *s++ = c; - n--; goto loop_after_dot; - case '+': - case '-': case RANGE_0_9: - *s++ = c; - n--; - goto loop_after_sign; + i = 10*i + c - '0'; + /* FIXME: Need overflow check here; do we care? */ + break; default: fz_unread_byte(f); - goto end; + /* Fallthrough */ case EOF: - goto end; + if (neg) + i = -i; + buf->i = i; + return PDF_TOK_INT; } } - /* We can't accept a sign from here on in, just . or a digit */ -loop_after_sign: - while (n > 1) + /* In here, we've seen a dot, so can accept just digits */ +loop_after_dot: + n = 0; + d = 1; + while (1) { int c = fz_read_byte(f); switch (c) { - case '.': - *tok = PDF_TOK_REAL; - *s++ = c; - n--; - goto loop_after_dot; case RANGE_0_9: - *s++ = c; + if (d >= INT_MAX/10) + goto underflow; + n = n*10 + (c - '0'); + d *= 10; break; default: fz_unread_byte(f); - goto end; + /* Fallthrough */ case EOF: - goto end; + v = (float)i + ((float)n / (float)d); + if (neg) + v = -v; + buf->f = v; + return PDF_TOK_REAL; } - n--; } - /* In here, we've seen a dot, so can accept just digits */ -loop_after_dot: - while (n > 1) +underflow: + /* Ignore any digits after here, because they are too small */ + while (1) { int c = fz_read_byte(f); switch (c) { case RANGE_0_9: - *s++ = c; break; default: fz_unread_byte(f); - goto end; + /* Fallthrough */ case EOF: - goto end; + v = (float)i + ((float)n / (float)d); + if (neg) + v = -v; + buf->f = v; + return PDF_TOK_REAL; } - n--; } - -end: - *s = '\0'; - return s-buf; } static void -lex_name(fz_stream *f, char *s, int n) +lex_name(fz_stream *f, pdf_lexbuf *buf) { + char *s = buf->scratch; + int n = buf->size; + while (n > 1) { int c = fz_read_byte(f); @@ -208,6 +227,7 @@ lex_name(fz_stream *f, char *s, int n) } end: *s = '\0'; + buf->len = s - buf->scratch; } static int @@ -380,7 +400,7 @@ pdf_token_from_keyword(char *key) } int -pdf_lex(fz_stream *f, char *buf, int n, int *sl) +pdf_lex(fz_stream *f, pdf_lexbuf *buf) { while (1) { @@ -396,11 +416,10 @@ pdf_lex(fz_stream *f, char *buf, int n, int *sl) lex_comment(f); break; case '/': - lex_name(f, buf, n); - *sl = strlen(buf); + lex_name(f, buf); return PDF_TOK_NAME; case '(': - *sl = lex_string(f, buf, n); + buf->len = lex_string(f, buf->scratch, buf->size); return PDF_TOK_STRING; case ')': fz_warn(f->ctx, "lexical error (unexpected ')')"); @@ -414,7 +433,7 @@ pdf_lex(fz_stream *f, char *buf, int n, int *sl) else { fz_unread_byte(f); - *sl = lex_hex_string(f, buf, n); + buf->len = lex_hex_string(f, buf->scratch, buf->size); return PDF_TOK_STRING; } case '>': @@ -434,17 +453,11 @@ pdf_lex(fz_stream *f, char *buf, int n, int *sl) case '}': return PDF_TOK_CLOSE_BRACE; case IS_NUMBER: - { - int tok; - fz_unread_byte(f); - *sl = lex_number(f, buf, n, &tok); - return tok; - } + return lex_number(f, buf, c); default: /* isregular: !isdelim && !iswhite && c != EOF */ fz_unread_byte(f); - lex_name(f, buf, n); - *sl = strlen(buf); - return pdf_token_from_keyword(buf); + lex_name(f, buf); + return pdf_token_from_keyword(buf->scratch); } } } diff --git a/pdf/pdf_parse.c b/pdf/pdf_parse.c index 220eb30c..fb6cb7ef 100644 --- a/pdf/pdf_parse.c +++ b/pdf/pdf_parse.c @@ -171,13 +171,12 @@ pdf_to_utf8_name(fz_context *ctx, fz_obj *src) } fz_obj * -pdf_parse_array(pdf_document *xref, fz_stream *file, char *buf, int cap) +pdf_parse_array(pdf_document *xref, fz_stream *file, pdf_lexbuf *buf) { fz_obj *ary = NULL; fz_obj *obj = NULL; int a = 0, b = 0, n = 0; int tok; - int len; fz_context *ctx = file->ctx; fz_obj *op; @@ -189,7 +188,7 @@ pdf_parse_array(pdf_document *xref, fz_stream *file, char *buf, int cap) { while (1) { - tok = pdf_lex(file, buf, cap, &len); + tok = pdf_lex(file, buf); if (tok != PDF_TOK_INT && tok != PDF_TOK_R) { @@ -228,9 +227,9 @@ pdf_parse_array(pdf_document *xref, fz_stream *file, char *buf, int cap) case PDF_TOK_INT: if (n == 0) - a = atoi(buf); + a = buf->i; if (n == 1) - b = atoi(buf); + b = buf->i; n ++; break; @@ -245,33 +244,33 @@ pdf_parse_array(pdf_document *xref, fz_stream *file, char *buf, int cap) break; case PDF_TOK_OPEN_ARRAY: - obj = pdf_parse_array(xref, file, buf, cap); + obj = pdf_parse_array(xref, file, buf); fz_array_push(ary, obj); fz_drop_obj(obj); obj = NULL; break; case PDF_TOK_OPEN_DICT: - obj = pdf_parse_dict(xref, file, buf, cap); + obj = pdf_parse_dict(xref, file, buf); fz_array_push(ary, obj); fz_drop_obj(obj); obj = NULL; break; case PDF_TOK_NAME: - obj = fz_new_name(ctx, buf); + obj = fz_new_name(ctx, buf->scratch); fz_array_push(ary, obj); fz_drop_obj(obj); obj = NULL; break; case PDF_TOK_REAL: - obj = fz_new_real(ctx, fz_atof(buf)); + obj = fz_new_real(ctx, buf->f); fz_array_push(ary, obj); fz_drop_obj(obj); obj = NULL; break; case PDF_TOK_STRING: - obj = fz_new_string(ctx, buf, len); + obj = fz_new_string(ctx, buf->scratch, buf->len); fz_array_push(ary, obj); fz_drop_obj(obj); obj = NULL; @@ -312,13 +311,12 @@ end: } fz_obj * -pdf_parse_dict(pdf_document *xref, fz_stream *file, char *buf, int cap) +pdf_parse_dict(pdf_document *xref, fz_stream *file, pdf_lexbuf *buf) { fz_obj *dict = NULL; fz_obj *key = NULL; fz_obj *val = NULL; int tok; - int len; int a, b; fz_context *ctx = file->ctx; @@ -332,45 +330,45 @@ pdf_parse_dict(pdf_document *xref, fz_stream *file, char *buf, int cap) { while (1) { - tok = pdf_lex(file, buf, cap, &len); + tok = pdf_lex(file, buf); skip: if (tok == PDF_TOK_CLOSE_DICT) break; /* for BI .. ID .. EI in content streams */ - if (tok == PDF_TOK_KEYWORD && !strcmp(buf, "ID")) + if (tok == PDF_TOK_KEYWORD && !strcmp(buf->scratch, "ID")) break; if (tok != PDF_TOK_NAME) fz_throw(ctx, "invalid key in dict"); - key = fz_new_name(ctx, buf); + key = fz_new_name(ctx, buf->scratch); - tok = pdf_lex(file, buf, cap, &len); + tok = pdf_lex(file, buf); switch (tok) { case PDF_TOK_OPEN_ARRAY: - val = pdf_parse_array(xref, file, buf, cap); + val = pdf_parse_array(xref, file, buf); break; case PDF_TOK_OPEN_DICT: - val = pdf_parse_dict(xref, file, buf, cap); + val = pdf_parse_dict(xref, file, buf); break; - case PDF_TOK_NAME: val = fz_new_name(ctx, buf); break; - case PDF_TOK_REAL: val = fz_new_real(ctx, fz_atof(buf)); break; - case PDF_TOK_STRING: val = fz_new_string(ctx, buf, len); break; + case PDF_TOK_NAME: val = fz_new_name(ctx, buf->scratch); break; + case PDF_TOK_REAL: val = fz_new_real(ctx, buf->f); break; + case PDF_TOK_STRING: val = fz_new_string(ctx, buf->scratch, buf->len); break; case PDF_TOK_TRUE: val = fz_new_bool(ctx, 1); break; case PDF_TOK_FALSE: val = fz_new_bool(ctx, 0); break; case PDF_TOK_NULL: val = fz_new_null(ctx); break; case PDF_TOK_INT: /* 64-bit to allow for numbers > INT_MAX and overflow */ - a = (int) strtoll(buf, 0, 10); - tok = pdf_lex(file, buf, cap, &len); + a = buf->i; + tok = pdf_lex(file, buf); if (tok == PDF_TOK_CLOSE_DICT || tok == PDF_TOK_NAME || - (tok == PDF_TOK_KEYWORD && !strcmp(buf, "ID"))) + (tok == PDF_TOK_KEYWORD && !strcmp(buf->scratch, "ID"))) { val = fz_new_int(ctx, a); fz_dict_put(dict, key, val); @@ -382,8 +380,8 @@ pdf_parse_dict(pdf_document *xref, fz_stream *file, char *buf, int cap) } if (tok == PDF_TOK_INT) { - b = atoi(buf); - tok = pdf_lex(file, buf, cap, &len); + b = buf->i; + tok = pdf_lex(file, buf); if (tok == PDF_TOK_R) { val = fz_new_indirect(ctx, a, b, xref); @@ -414,30 +412,29 @@ pdf_parse_dict(pdf_document *xref, fz_stream *file, char *buf, int cap) } fz_obj * -pdf_parse_stm_obj(pdf_document *xref, fz_stream *file, char *buf, int cap) +pdf_parse_stm_obj(pdf_document *xref, fz_stream *file, pdf_lexbuf *buf) { int tok; - int len; fz_context *ctx = file->ctx; - tok = pdf_lex(file, buf, cap, &len); + tok = pdf_lex(file, buf); /* RJW: "cannot parse token in object stream") */ switch (tok) { case PDF_TOK_OPEN_ARRAY: - return pdf_parse_array(xref, file, buf, cap); + return pdf_parse_array(xref, file, buf); /* RJW: "cannot parse object stream" */ case PDF_TOK_OPEN_DICT: - return pdf_parse_dict(xref, file, buf, cap); + return pdf_parse_dict(xref, file, buf); /* RJW: "cannot parse object stream" */ - case PDF_TOK_NAME: return fz_new_name(ctx, buf); break; - case PDF_TOK_REAL: return fz_new_real(ctx, fz_atof(buf)); break; - case PDF_TOK_STRING: return fz_new_string(ctx, buf, len); break; + case PDF_TOK_NAME: return fz_new_name(ctx, buf->scratch); break; + case PDF_TOK_REAL: return fz_new_real(ctx, buf->f); break; + case PDF_TOK_STRING: return fz_new_string(ctx, buf->scratch, buf->len); break; case PDF_TOK_TRUE: return fz_new_bool(ctx, 1); break; case PDF_TOK_FALSE: return fz_new_bool(ctx, 0); break; case PDF_TOK_NULL: return fz_new_null(ctx); break; - case PDF_TOK_INT: return fz_new_int(ctx, atoi(buf)); break; + case PDF_TOK_INT: return fz_new_int(ctx, buf->i); break; default: fz_throw(ctx, "unknown token in object stream"); } return NULL; /* Stupid MSVC */ @@ -445,60 +442,59 @@ pdf_parse_stm_obj(pdf_document *xref, fz_stream *file, char *buf, int cap) fz_obj * pdf_parse_ind_obj(pdf_document *xref, - fz_stream *file, char *buf, int cap, + fz_stream *file, pdf_lexbuf *buf, int *onum, int *ogen, int *ostmofs) { fz_obj *obj = NULL; int num = 0, gen = 0, stm_ofs; int tok; - int len; int a, b; fz_context *ctx = file->ctx; fz_var(obj); - tok = pdf_lex(file, buf, cap, &len); + tok = pdf_lex(file, buf); /* RJW: cannot parse indirect object (%d %d R)", num, gen */ if (tok != PDF_TOK_INT) fz_throw(ctx, "expected object number (%d %d R)", num, gen); - num = atoi(buf); + num = buf->i; - tok = pdf_lex(file, buf, cap, &len); + tok = pdf_lex(file, buf); /* RJW: "cannot parse indirect object (%d %d R)", num, gen */ if (tok != PDF_TOK_INT) fz_throw(ctx, "expected generation number (%d %d R)", num, gen); - gen = atoi(buf); + gen = buf->i; - tok = pdf_lex(file, buf, cap, &len); + tok = pdf_lex(file, buf); /* RJW: "cannot parse indirect object (%d %d R)", num, gen */ if (tok != PDF_TOK_OBJ) fz_throw(ctx, "expected 'obj' keyword (%d %d R)", num, gen); - tok = pdf_lex(file, buf, cap, &len); + tok = pdf_lex(file, buf); /* RJW: "cannot parse indirect object (%d %d R)", num, gen */ switch (tok) { case PDF_TOK_OPEN_ARRAY: - obj = pdf_parse_array(xref, file, buf, cap); + obj = pdf_parse_array(xref, file, buf); /* RJW: "cannot parse indirect object (%d %d R)", num, gen */ break; case PDF_TOK_OPEN_DICT: - obj = pdf_parse_dict(xref, file, buf, cap); + obj = pdf_parse_dict(xref, file, buf); /* RJW: "cannot parse indirect object (%d %d R)", num, gen */ break; - case PDF_TOK_NAME: obj = fz_new_name(ctx, buf); break; - case PDF_TOK_REAL: obj = fz_new_real(ctx, fz_atof(buf)); break; - case PDF_TOK_STRING: obj = fz_new_string(ctx, buf, len); break; + case PDF_TOK_NAME: obj = fz_new_name(ctx, buf->scratch); break; + case PDF_TOK_REAL: obj = fz_new_real(ctx, buf->f); break; + case PDF_TOK_STRING: obj = fz_new_string(ctx, buf->scratch, buf->len); break; case PDF_TOK_TRUE: obj = fz_new_bool(ctx, 1); break; case PDF_TOK_FALSE: obj = fz_new_bool(ctx, 0); break; case PDF_TOK_NULL: obj = fz_new_null(ctx); break; case PDF_TOK_INT: - a = atoi(buf); - tok = pdf_lex(file, buf, cap, &len); + a = buf->i; + tok = pdf_lex(file, buf); /* "cannot parse indirect object (%d %d R)", num, gen */ if (tok == PDF_TOK_STREAM || tok == PDF_TOK_ENDOBJ) { @@ -507,8 +503,8 @@ pdf_parse_ind_obj(pdf_document *xref, } if (tok == PDF_TOK_INT) { - b = atoi(buf); - tok = pdf_lex(file, buf, cap, &len); + b = buf->i; + tok = pdf_lex(file, buf); /* RJW: "cannot parse indirect object (%d %d R)", num, gen); */ if (tok == PDF_TOK_R) { @@ -528,7 +524,7 @@ pdf_parse_ind_obj(pdf_document *xref, fz_try(ctx) { - tok = pdf_lex(file, buf, cap, &len); + tok = pdf_lex(file, buf); } fz_catch(ctx) { diff --git a/pdf/pdf_repair.c b/pdf/pdf_repair.c index 0dc0e132..c70df3e2 100644 --- a/pdf/pdf_repair.c +++ b/pdf/pdf_repair.c @@ -13,11 +13,10 @@ struct entry }; static void -pdf_repair_obj(fz_stream *file, char *buf, int cap, int *stmofsp, int *stmlenp, fz_obj **encrypt, fz_obj **id) +pdf_repair_obj(fz_stream *file, pdf_lexbuf *buf, int *stmofsp, int *stmlenp, fz_obj **encrypt, fz_obj **id) { int tok; int stm_len; - int len; int n; fz_context *ctx = file->ctx; @@ -26,7 +25,7 @@ pdf_repair_obj(fz_stream *file, char *buf, int cap, int *stmofsp, int *stmlenp, stm_len = 0; - tok = pdf_lex(file, buf, cap, &len); + tok = pdf_lex(file, buf); /* RJW: "cannot parse object" */ if (tok == PDF_TOK_OPEN_DICT) { @@ -35,7 +34,7 @@ pdf_repair_obj(fz_stream *file, char *buf, int cap, int *stmofsp, int *stmlenp, /* Send NULL xref so we don't try to resolve references */ fz_try(ctx) { - dict = pdf_parse_dict(NULL, file, buf, cap); + dict = pdf_parse_dict(NULL, file, buf); } fz_catch(ctx) { @@ -79,13 +78,13 @@ pdf_repair_obj(fz_stream *file, char *buf, int cap, int *stmofsp, int *stmlenp, tok != PDF_TOK_EOF && tok != PDF_TOK_INT ) { - tok = pdf_lex(file, buf, cap, &len); + tok = pdf_lex(file, buf); /* RJW: "cannot scan for endobj or stream token" */ } if (tok == PDF_TOK_INT) { - while (len-- > 0) + while (buf->len-- > 0) fz_unread_byte(file); } else if (tok == PDF_TOK_STREAM) @@ -106,7 +105,7 @@ pdf_repair_obj(fz_stream *file, char *buf, int cap, int *stmofsp, int *stmlenp, fz_seek(file, *stmofsp + stm_len, 0); fz_try(ctx) { - tok = pdf_lex(file, buf, cap, &len); + tok = pdf_lex(file, buf); } fz_catch(ctx) { @@ -117,23 +116,23 @@ pdf_repair_obj(fz_stream *file, char *buf, int cap, int *stmofsp, int *stmlenp, fz_seek(file, *stmofsp, 0); } - n = fz_read(file, (unsigned char *) buf, 9); + n = fz_read(file, (unsigned char *) buf->scratch, 9); if (n < 0) fz_throw(ctx, "cannot read from file"); - while (memcmp(buf, "endstream", 9) != 0) + while (memcmp(buf->scratch, "endstream", 9) != 0) { c = fz_read_byte(file); if (c == EOF) break; - memmove(buf, buf + 1, 8); - buf[8] = c; + memmove(&buf->scratch[0], &buf->scratch[1], 8); + buf->scratch[8] = c; } *stmlenp = fz_tell(file) - *stmofsp - 9; atobjend: - tok = pdf_lex(file, buf, cap, &len); + tok = pdf_lex(file, buf); /* RJW: "cannot scan for endobj token" */ if (tok != PDF_TOK_ENDOBJ) fz_warn(ctx, "object missing 'endobj' token"); @@ -147,11 +146,13 @@ pdf_repair_obj_stm(pdf_document *xref, int num, int gen) fz_stream *stm = NULL; int tok; int i, n, count; - char buf[256]; fz_context *ctx = xref->ctx; + pdf_lexbuf buf; fz_var(stm); + buf.size = PDF_LEXBUF_SMALL; + fz_try(ctx) { obj = pdf_load_object(xref, num, gen); @@ -164,11 +165,11 @@ pdf_repair_obj_stm(pdf_document *xref, int num, int gen) for (i = 0; i < count; i++) { - tok = pdf_lex(stm, buf, sizeof buf, &n); + tok = pdf_lex(stm, &buf); if (tok != PDF_TOK_INT) fz_throw(ctx, "corrupt object stream (%d %d R)", num, gen); - n = atoi(buf); + n = buf.i; if (n >= xref->len) pdf_resize_xref(xref, n + 1); @@ -179,7 +180,7 @@ pdf_repair_obj_stm(pdf_document *xref, int num, int gen) xref->table[n].obj = NULL; xref->table[n].type = 'o'; - tok = pdf_lex(stm, buf, sizeof buf, &n); + tok = pdf_lex(stm, &buf); if (tok != PDF_TOK_INT) fz_throw(ctx, "corrupt object stream (%d %d R)", num, gen); } @@ -195,7 +196,7 @@ pdf_repair_obj_stm(pdf_document *xref, int num, int gen) } void -pdf_repair_xref(pdf_document *xref, char *buf, int bufsize) +pdf_repair_xref(pdf_document *xref, pdf_lexbuf *buf) { fz_obj *dict, *obj; fz_obj *length; @@ -234,14 +235,14 @@ pdf_repair_xref(pdf_document *xref, char *buf, int bufsize) list = fz_malloc_array(ctx, listcap, sizeof(struct entry)); /* look for '%PDF' version marker within first kilobyte of file */ - n = fz_read(xref->file, (unsigned char *)buf, MIN(bufsize, 1024)); + n = fz_read(xref->file, (unsigned char *)buf->scratch, MIN(buf->size, 1024)); if (n < 0) fz_throw(ctx, "cannot read from file"); fz_seek(xref->file, 0, 0); for (i = 0; i < n - 4; i++) { - if (memcmp(buf + i, "%PDF", 4) == 0) + if (memcmp(&buf->scratch[i], "%PDF", 4) == 0) { fz_seek(xref->file, i + 8, 0); /* skip "%PDF-X.Y" */ break; @@ -263,7 +264,7 @@ pdf_repair_xref(pdf_document *xref, char *buf, int bufsize) fz_try(ctx) { - tok = pdf_lex(xref->file, buf, bufsize, &n); + tok = pdf_lex(xref->file, buf); } fz_catch(ctx) { @@ -276,14 +277,14 @@ pdf_repair_xref(pdf_document *xref, char *buf, int bufsize) numofs = genofs; num = gen; genofs = tmpofs; - gen = atoi(buf); + gen = buf->i; } else if (tok == PDF_TOK_OBJ) { fz_try(ctx) { - pdf_repair_obj(xref->file, buf, bufsize, &stm_ofs, &stm_len, &encrypt, &id); + pdf_repair_obj(xref->file, buf, &stm_ofs, &stm_len, &encrypt, &id); } fz_catch(ctx) { @@ -318,7 +319,7 @@ pdf_repair_xref(pdf_document *xref, char *buf, int bufsize) { fz_try(ctx) { - dict = pdf_parse_dict(xref, xref->file, buf, bufsize); + dict = pdf_parse_dict(xref, xref->file, buf); } fz_catch(ctx) { diff --git a/pdf/pdf_xref.c b/pdf/pdf_xref.c index 7500ded3..383747a7 100644 --- a/pdf/pdf_xref.c +++ b/pdf/pdf_xref.c @@ -51,6 +51,7 @@ pdf_read_start_xref(pdf_document *xref) while (iswhite(buf[i]) && i < n) i ++; xref->startxref = atoi((char*)(buf + i)); + return; } } @@ -63,17 +64,16 @@ pdf_read_start_xref(pdf_document *xref) */ static void -pdf_read_old_trailer(pdf_document *xref, char *buf, int cap) +pdf_read_old_trailer(pdf_document *xref, pdf_lexbuf *buf) { int len; char *s; - int n; int t; int tok; int c; - fz_read_line(xref->file, buf, cap); - if (strncmp(buf, "xref", 4) != 0) + fz_read_line(xref->file, buf->scratch, buf->size); + if (strncmp(buf->scratch, "xref", 4) != 0) fz_throw(xref->ctx, "cannot find xref marker"); while (1) @@ -82,8 +82,8 @@ pdf_read_old_trailer(pdf_document *xref, char *buf, int cap) if (!(c >= '0' && c <= '9')) break; - fz_read_line(xref->file, buf, cap); - s = buf; + fz_read_line(xref->file, buf->scratch, buf->size); + s = buf->scratch; fz_strsep(&s, " "); /* ignore ofs */ if (!s) fz_throw(xref->ctx, "invalid range marker in xref"); @@ -102,15 +102,15 @@ pdf_read_old_trailer(pdf_document *xref, char *buf, int cap) fz_try(xref->ctx) { - tok = pdf_lex(xref->file, buf, cap, &n); + tok = pdf_lex(xref->file, buf); if (tok != PDF_TOK_TRAILER) fz_throw(xref->ctx, "expected trailer marker"); - tok = pdf_lex(xref->file, buf, cap, &n); + tok = pdf_lex(xref->file, buf); if (tok != PDF_TOK_OPEN_DICT) fz_throw(xref->ctx, "expected trailer dictionary"); - xref->trailer = pdf_parse_dict(xref, xref->file, buf, cap); + xref->trailer = pdf_parse_dict(xref, xref->file, buf); } fz_catch(xref->ctx) { @@ -119,11 +119,11 @@ pdf_read_old_trailer(pdf_document *xref, char *buf, int cap) } static void -pdf_read_new_trailer(pdf_document *xref, char *buf, int cap) +pdf_read_new_trailer(pdf_document *xref, pdf_lexbuf *buf) { fz_try(xref->ctx) { - xref->trailer = pdf_parse_ind_obj(xref, xref->file, buf, cap, NULL, NULL, NULL); + xref->trailer = pdf_parse_ind_obj(xref, xref->file, buf, NULL, NULL, NULL); } fz_catch(xref->ctx) { @@ -132,7 +132,7 @@ pdf_read_new_trailer(pdf_document *xref, char *buf, int cap) } static void -pdf_read_trailer(pdf_document *xref, char *buf, int cap) +pdf_read_trailer(pdf_document *xref, pdf_lexbuf *buf) { int c; @@ -145,9 +145,9 @@ pdf_read_trailer(pdf_document *xref, char *buf, int cap) { c = fz_peek_byte(xref->file); if (c == 'x') - pdf_read_old_trailer(xref, buf, cap); + pdf_read_old_trailer(xref, buf); else if (c >= '0' && c <= '9') - pdf_read_new_trailer(xref, buf, cap); + pdf_read_new_trailer(xref, buf); else fz_throw(xref->ctx, "cannot recognize xref format: '%c'", c); } @@ -179,7 +179,7 @@ pdf_resize_xref(pdf_document *xref, int newlen) } static fz_obj * -pdf_read_old_xref(pdf_document *xref, char *buf, int cap) +pdf_read_old_xref(pdf_document *xref, pdf_lexbuf *buf) { int ofs, len; char *s; @@ -189,8 +189,8 @@ pdf_read_old_xref(pdf_document *xref, char *buf, int cap) int c; fz_obj *trailer; - fz_read_line(xref->file, buf, cap); - if (strncmp(buf, "xref", 4) != 0) + fz_read_line(xref->file, buf->scratch, buf->size); + if (strncmp(buf->scratch, "xref", 4) != 0) fz_throw(xref->ctx, "cannot find xref marker"); while (1) @@ -199,8 +199,8 @@ pdf_read_old_xref(pdf_document *xref, char *buf, int cap) if (!(c >= '0' && c <= '9')) break; - fz_read_line(xref->file, buf, cap); - s = buf; + fz_read_line(xref->file, buf->scratch, buf->size); + s = buf->scratch; ofs = atoi(fz_strsep(&s, " ")); len = atoi(fz_strsep(&s, " ")); @@ -220,12 +220,12 @@ pdf_read_old_xref(pdf_document *xref, char *buf, int cap) for (i = ofs; i < ofs + len; i++) { - n = fz_read(xref->file, (unsigned char *) buf, 20); + n = fz_read(xref->file, (unsigned char *) buf->scratch, 20); if (n < 0) fz_throw(xref->ctx, "cannot read xref table"); if (!xref->table[i].type) { - s = buf; + s = buf->scratch; /* broken pdfs where line start with white space */ while (*s != '\0' && iswhite(*s)) @@ -242,15 +242,15 @@ pdf_read_old_xref(pdf_document *xref, char *buf, int cap) fz_try(xref->ctx) { - tok = pdf_lex(xref->file, buf, cap, &n); + tok = pdf_lex(xref->file, buf); if (tok != PDF_TOK_TRAILER) fz_throw(xref->ctx, "expected trailer marker"); - tok = pdf_lex(xref->file, buf, cap, &n); + tok = pdf_lex(xref->file, buf); if (tok != PDF_TOK_OPEN_DICT) fz_throw(xref->ctx, "expected trailer dictionary"); - trailer = pdf_parse_dict(xref, xref->file, buf, cap); + trailer = pdf_parse_dict(xref, xref->file, buf); } fz_catch(xref->ctx) { @@ -296,7 +296,7 @@ pdf_read_new_xref_section(pdf_document *xref, fz_stream *stm, int i0, int i1, in /* Entered with file locked. Drops the lock in the middle, but then picks * it up again before exiting. */ static fz_obj * -pdf_read_new_xref(pdf_document *xref, char *buf, int cap) +pdf_read_new_xref(pdf_document *xref, pdf_lexbuf *buf) { fz_stream *stm = NULL; fz_obj *trailer = NULL; @@ -312,7 +312,7 @@ pdf_read_new_xref(pdf_document *xref, char *buf, int cap) fz_try(ctx) { - trailer = pdf_parse_ind_obj(xref, xref->file, buf, cap, &num, &gen, &stm_ofs); + trailer = pdf_parse_ind_obj(xref, xref->file, buf, &num, &gen, &stm_ofs); } fz_catch(ctx) { @@ -378,7 +378,7 @@ pdf_read_new_xref(pdf_document *xref, char *buf, int cap) /* File is locked on entry, and exit (but may be dropped in the middle) */ static fz_obj * -pdf_read_xref(pdf_document *xref, int ofs, char *buf, int cap) +pdf_read_xref(pdf_document *xref, int ofs, pdf_lexbuf *buf) { int c; fz_context *ctx = xref->ctx; @@ -393,9 +393,9 @@ pdf_read_xref(pdf_document *xref, int ofs, char *buf, int cap) { c = fz_peek_byte(xref->file); if (c == 'x') - trailer = pdf_read_old_xref(xref, buf, cap); + trailer = pdf_read_old_xref(xref, buf); else if (c >= '0' && c <= '9') - trailer = pdf_read_new_xref(xref, buf, cap); + trailer = pdf_read_new_xref(xref, buf); else fz_throw(ctx, "cannot recognize xref format"); } @@ -407,7 +407,7 @@ pdf_read_xref(pdf_document *xref, int ofs, char *buf, int cap) } static void -pdf_read_xref_sections(pdf_document *xref, int ofs, char *buf, int cap) +pdf_read_xref_sections(pdf_document *xref, int ofs, pdf_lexbuf *buf) { fz_obj *trailer = NULL; fz_obj *xrefstm = NULL; @@ -416,16 +416,16 @@ pdf_read_xref_sections(pdf_document *xref, int ofs, char *buf, int cap) fz_try(ctx) { - trailer = pdf_read_xref(xref, ofs, buf, cap); + trailer = pdf_read_xref(xref, ofs, buf); /* FIXME: do we overwrite free entries properly? */ xrefstm = fz_dict_gets(trailer, "XRefStm"); if (xrefstm) - pdf_read_xref_sections(xref, fz_to_int(xrefstm), buf, cap); + pdf_read_xref_sections(xref, fz_to_int(xrefstm), buf); prev = fz_dict_gets(trailer, "Prev"); if (prev) - pdf_read_xref_sections(xref, fz_to_int(prev), buf, cap); + pdf_read_xref_sections(xref, fz_to_int(prev), buf); } fz_catch(ctx) { @@ -441,7 +441,7 @@ pdf_read_xref_sections(pdf_document *xref, int ofs, char *buf, int cap) */ static void -pdf_load_xref(pdf_document *xref, char *buf, int bufsize) +pdf_load_xref(pdf_document *xref, pdf_lexbuf *buf) { fz_obj *size; int i; @@ -451,7 +451,7 @@ pdf_load_xref(pdf_document *xref, char *buf, int bufsize) pdf_read_start_xref(xref); - pdf_read_trailer(xref, buf, bufsize); + pdf_read_trailer(xref, buf); size = fz_dict_gets(xref->trailer, "Size"); if (!size) @@ -459,7 +459,7 @@ pdf_load_xref(pdf_document *xref, char *buf, int bufsize) pdf_resize_xref(xref, fz_to_int(size)); - pdf_read_xref_sections(xref, xref->startxref, buf, bufsize); + pdf_read_xref_sections(xref, xref->startxref, buf); /* broken pdfs where first object is not free */ if (xref->table[0].type != 'f') @@ -672,6 +672,7 @@ pdf_open_document_with_stream(fz_stream *file) xref = fz_malloc_struct(ctx, pdf_document); pdf_init_document(xref); + xref->lexbuf.base.size = PDF_LEXBUF_LARGE; xref->file = fz_keep_stream(file); xref->ctx = ctx; @@ -681,7 +682,7 @@ pdf_open_document_with_stream(fz_stream *file) fz_try(ctx) { - pdf_load_xref(xref, xref->scratch, sizeof xref->scratch); + pdf_load_xref(xref, &xref->lexbuf.base); } fz_catch(ctx) { @@ -705,7 +706,7 @@ pdf_open_document_with_stream(fz_stream *file) int hasroot, hasinfo; if (repaired) - pdf_repair_xref(xref, xref->scratch, sizeof xref->scratch); + pdf_repair_xref(xref, &xref->lexbuf.base); fz_unlock(ctx, FZ_LOCK_FILE); locked = 0; @@ -864,7 +865,7 @@ pdf_debug_xref(pdf_document *xref) */ static void -pdf_load_obj_stm(pdf_document *xref, int num, int gen, char *buf, int cap) +pdf_load_obj_stm(pdf_document *xref, int num, int gen, pdf_lexbuf *buf) { fz_stream *stm = NULL; fz_obj *objstm = NULL; @@ -874,7 +875,7 @@ pdf_load_obj_stm(pdf_document *xref, int num, int gen, char *buf, int cap) fz_obj *obj; int first; int count; - int i, n; + int i; int tok; fz_context *ctx = xref->ctx; @@ -896,15 +897,15 @@ pdf_load_obj_stm(pdf_document *xref, int num, int gen, char *buf, int cap) stm = pdf_open_stream(xref, num, gen); for (i = 0; i < count; i++) { - tok = pdf_lex(stm, buf, cap, &n); + tok = pdf_lex(stm, buf); if (tok != PDF_TOK_INT) fz_throw(ctx, "corrupt object stream (%d %d R)", num, gen); - numbuf[i] = atoi(buf); + numbuf[i] = buf->i; - tok = pdf_lex(stm, buf, cap, &n); + tok = pdf_lex(stm, buf); if (tok != PDF_TOK_INT) fz_throw(ctx, "corrupt object stream (%d %d R)", num, gen); - ofsbuf[i] = atoi(buf); + ofsbuf[i] = buf->i; } fz_seek(stm, first, 0); @@ -913,7 +914,7 @@ pdf_load_obj_stm(pdf_document *xref, int num, int gen, char *buf, int cap) { fz_seek(stm, first + ofsbuf[i], 0); - obj = pdf_parse_stm_obj(xref, stm, buf, cap); + obj = pdf_parse_stm_obj(xref, stm, buf); /* RJW: Ensure above does fz_throw(ctx, "cannot parse object %d in stream (%d %d R)", i, num, gen); */ if (numbuf[i] < 1 || numbuf[i] >= xref->len) @@ -978,7 +979,7 @@ pdf_cache_object(pdf_document *xref, int num, int gen) fz_try(ctx) { - x->obj = pdf_parse_ind_obj(xref, xref->file, xref->scratch, sizeof xref->scratch, + x->obj = pdf_parse_ind_obj(xref, xref->file, &xref->lexbuf.base, &rnum, &rgen, &x->stm_ofs); } fz_catch(ctx) @@ -1005,7 +1006,7 @@ pdf_cache_object(pdf_document *xref, int num, int gen) { fz_try(ctx) { - pdf_load_obj_stm(xref, x->ofs, 0, xref->scratch, sizeof xref->scratch); + pdf_load_obj_stm(xref, x->ofs, 0, &xref->lexbuf.base); } fz_catch(ctx) { |