From 2e7007ca39bcea9b7688f6cc5cfd6bb43eb8630c Mon Sep 17 00:00:00 2001 From: Paul Gardiner Date: Wed, 29 May 2013 12:57:41 +0100 Subject: Access the xref table via an interface Avoid all direct access to the xref table so that the impementation can be altered to add new features --- apps/pdfshow.c | 3 +- pdf/mupdf-internal.h | 5 +- pdf/pdf_repair.c | 76 ++++++++------ pdf/pdf_stream.c | 39 ++++--- pdf/pdf_write.c | 124 ++++++++++++----------- pdf/pdf_xref.c | 280 +++++++++++++++++++++++++++++---------------------- 6 files changed, 304 insertions(+), 223 deletions(-) diff --git a/apps/pdfshow.c b/apps/pdfshow.c index 7ccfd50d..02cf6dbb 100644 --- a/apps/pdfshow.c +++ b/apps/pdfshow.c @@ -161,7 +161,8 @@ static void showgrep(char *filename) len = pdf_count_objects(doc); for (i = 0; i < len; i++) { - if (doc->table[i].type == 'n' || doc->table[i].type == 'o') + pdf_xref_entry *entry = pdf_get_xref_entry(doc, i); + if (entry->type == 'n' || entry->type == 'o') { fz_try(ctx) { diff --git a/pdf/mupdf-internal.h b/pdf/mupdf-internal.h index 5b84bed7..d1b923c3 100644 --- a/pdf/mupdf-internal.h +++ b/pdf/mupdf-internal.h @@ -175,9 +175,12 @@ fz_stream *pdf_open_raw_renumbered_stream(pdf_document *doc, int num, int gen, i pdf_obj *pdf_trailer(pdf_document *doc); void pdf_set_xref_trailer(pdf_document *doc, pdf_obj *trailer); +int pdf_xref_len(pdf_document *doc); +pdf_xref_entry *pdf_get_xref_entry(pdf_document *doc, int i); +void pdf_replace_xref(pdf_document *doc, pdf_xref_entry *entries, int n); + void pdf_repair_xref(pdf_document *doc, pdf_lexbuf *buf); void pdf_repair_obj_stms(pdf_document *doc); -void pdf_resize_xref(pdf_document *doc, int newcap); pdf_obj *pdf_new_ref(pdf_document *doc, pdf_obj *obj); void pdf_print_xref(pdf_document *); diff --git a/pdf/pdf_repair.c b/pdf/pdf_repair.c index e26baefa..1127fe1f 100644 --- a/pdf/pdf_repair.c +++ b/pdf/pdf_repair.c @@ -164,6 +164,8 @@ pdf_repair_obj_stm(pdf_document *xref, int num, int gen) for (i = 0; i < count; i++) { + pdf_xref_entry *entry; + tok = pdf_lex(stm, &buf); if (tok != PDF_TOK_INT) fz_throw(ctx, "corrupt object stream (%d %d R)", num, gen); @@ -179,15 +181,14 @@ pdf_repair_obj_stm(pdf_document *xref, int num, int gen) fz_warn(ctx, "ignoring object with invalid object number (%d %d R)", n, i); continue; } - if (n >= xref->len) - pdf_resize_xref(xref, n + 1); - xref->table[n].ofs = num; - xref->table[n].gen = i; - xref->table[n].stm_ofs = 0; - pdf_drop_obj(xref->table[n].obj); - xref->table[n].obj = NULL; - xref->table[n].type = 'o'; + entry = pdf_get_xref_entry(xref, n); + entry->ofs = num; + entry->gen = i; + entry->stm_ofs = 0; + pdf_drop_obj(entry->obj); + entry->obj = NULL; + entry->type = 'o'; tok = pdf_lex(stm, &buf); if (tok != PDF_TOK_INT) @@ -209,7 +210,7 @@ pdf_repair_obj_stm(pdf_document *xref, int num, int gen) void pdf_repair_xref(pdf_document *xref, pdf_lexbuf *buf) { - pdf_obj *dict, *obj; + pdf_obj *dict, *obj = NULL; pdf_obj *length; pdf_obj *encrypt = NULL; @@ -236,6 +237,7 @@ pdf_repair_xref(pdf_document *xref, pdf_lexbuf *buf) fz_var(root); fz_var(info); fz_var(list); + fz_var(obj); xref->dirty = 1; @@ -243,6 +245,7 @@ pdf_repair_xref(pdf_document *xref, pdf_lexbuf *buf) fz_try(ctx) { + pdf_xref_entry *entry; listlen = 0; listcap = 1024; list = fz_malloc_array(ctx, listcap, sizeof(struct entry)); @@ -398,15 +401,20 @@ pdf_repair_xref(pdf_document *xref, pdf_lexbuf *buf) /* make xref reasonable */ - pdf_resize_xref(xref, maxnum + 1); + /* + Dummy access to entry to assure sufficient space in the xref table + and avoid repeated reallocs in the loop + */ + (void)pdf_get_xref_entry(xref, maxnum); for (i = 0; i < listlen; i++) { - xref->table[list[i].num].type = 'n'; - xref->table[list[i].num].ofs = list[i].ofs; - xref->table[list[i].num].gen = list[i].gen; + entry = pdf_get_xref_entry(xref, list[i].num); + entry->type = 'n'; + entry->ofs = list[i].ofs; + entry->gen = list[i].gen; - xref->table[list[i].num].stm_ofs = list[i].stm_ofs; + entry->stm_ofs = list[i].stm_ofs; /* correct stream length for unencrypted documents */ if (!encrypt && list[i].stm_len >= 0) @@ -421,20 +429,22 @@ pdf_repair_xref(pdf_document *xref, pdf_lexbuf *buf) } } - xref->table[0].type = 'f'; - xref->table[0].ofs = 0; - xref->table[0].gen = 65535; - xref->table[0].stm_ofs = 0; - xref->table[0].obj = NULL; + entry = pdf_get_xref_entry(xref, 0); + entry->type = 'f'; + entry->ofs = 0; + entry->gen = 65535; + entry->stm_ofs = 0; + entry->obj = NULL; next = 0; - for (i = xref->len - 1; i >= 0; i--) + for (i = pdf_xref_len(xref) - 1; i >= 0; i--) { - if (xref->table[i].type == 'f') + entry = pdf_get_xref_entry(xref, i); + if (entry->type == 'f') { - xref->table[i].ofs = next; - if (xref->table[i].gen < 65535) - xref->table[i].gen ++; + entry->ofs = next; + if (entry->gen < 65535) + entry->gen ++; next = i; } } @@ -499,6 +509,7 @@ pdf_repair_xref(pdf_document *xref, pdf_lexbuf *buf) pdf_drop_obj(encrypt); pdf_drop_obj(id); pdf_drop_obj(root); + pdf_drop_obj(obj); pdf_drop_obj(info); fz_free(ctx, list); fz_rethrow(ctx); @@ -511,10 +522,13 @@ pdf_repair_obj_stms(pdf_document *xref) fz_context *ctx = xref->ctx; pdf_obj *dict; int i; + int xref_len = pdf_xref_len(xref); - for (i = 0; i < xref->len; i++) + for (i = 0; i < xref_len; i++) { - if (xref->table[i].stm_ofs) + pdf_xref_entry *entry = pdf_get_xref_entry(xref, i); + + if (entry->stm_ofs) { dict = pdf_load_object(xref, i, 0); fz_try(ctx) @@ -534,7 +548,11 @@ pdf_repair_obj_stms(pdf_document *xref) } /* Ensure that streamed objects reside inside a known non-streamed object */ - for (i = 0; i < xref->len; i++) - if (xref->table[i].type == 'o' && xref->table[xref->table[i].ofs].type != 'n') - fz_throw(xref->ctx, "invalid reference to non-object-stream: %d (%d 0 R)", xref->table[i].ofs, i); + for (i = 0; i < xref_len; i++) + { + pdf_xref_entry *entry = pdf_get_xref_entry(xref, i); + + if (entry->type == 'o' && pdf_get_xref_entry(xref, entry->ofs)->type != 'n') + fz_throw(xref->ctx, "invalid reference to non-object-stream: %d (%d 0 R)", entry->ofs, i); + } } diff --git a/pdf/pdf_stream.c b/pdf/pdf_stream.c index 0af1ddcf..50cb78ee 100644 --- a/pdf/pdf_stream.c +++ b/pdf/pdf_stream.c @@ -7,12 +7,15 @@ int pdf_is_stream(pdf_document *xref, int num, int gen) { - if (num < 0 || num >= xref->len) + pdf_xref_entry *entry; + + if (num < 0 || num >= pdf_xref_len(xref)) return 0; pdf_cache_object(xref, num, gen); - return xref->table[num].stm_ofs != 0 || xref->table[num].stm_buf; + entry = pdf_get_xref_entry(xref, num); + return entry->stm_ofs != 0 || entry->stm_buf; } /* @@ -229,8 +232,12 @@ pdf_open_raw_filter(fz_stream *chain, pdf_document *xref, pdf_obj *stmobj, int n int hascrypt; int len; - if (num > 0 && num < xref->len && xref->table[num].stm_buf) - return fz_open_buffer(ctx, xref->table[num].stm_buf); + if (num > 0 && num < pdf_xref_len(xref)) + { + pdf_xref_entry *entry = pdf_get_xref_entry(xref, num); + if (entry->stm_buf) + return fz_open_buffer(ctx, entry->stm_buf); + } /* don't close chain when we close this filter */ fz_keep_stream(chain); @@ -322,10 +329,10 @@ pdf_open_raw_renumbered_stream(pdf_document *xref, int num, int gen, int orig_nu { pdf_xref_entry *x; - if (num < 0 || num >= xref->len) + if (num < 0 || num >= pdf_xref_len(xref)) fz_throw(xref->ctx, "object id out of range (%d %d R)", num, gen); - x = xref->table + num; + x = pdf_get_xref_entry(xref, num); pdf_cache_object(xref, num, gen); @@ -340,10 +347,10 @@ pdf_open_image_stream(pdf_document *xref, int num, int gen, int orig_num, int or { pdf_xref_entry *x; - if (num < 0 || num >= xref->len) + if (num < 0 || num >= pdf_xref_len(xref)) fz_throw(xref->ctx, "object id out of range (%d %d R)", num, gen); - x = xref->table + num; + x = pdf_get_xref_entry(xref, num); pdf_cache_object(xref, num, gen); @@ -390,8 +397,12 @@ pdf_load_raw_renumbered_stream(pdf_document *xref, int num, int gen, int orig_nu int len; fz_buffer *buf; - if (num > 0 && num < xref->len && xref->table[num].stm_buf) - return fz_keep_buffer(xref->ctx, xref->table[num].stm_buf); + if (num > 0 && num < pdf_xref_len(xref)) + { + pdf_xref_entry *entry = pdf_get_xref_entry(xref, num); + if (entry->stm_buf) + return fz_keep_buffer(xref->ctx, entry->stm_buf); + } dict = pdf_load_object(xref, num, gen); @@ -434,8 +445,12 @@ pdf_load_image_stream(pdf_document *xref, int num, int gen, int orig_num, int or fz_var(buf); - if (num > 0 && num < xref->len && xref->table[num].stm_buf) - return fz_keep_buffer(xref->ctx, xref->table[num].stm_buf); + if (num > 0 && num < pdf_xref_len(xref)) + { + pdf_xref_entry *entry = pdf_get_xref_entry(xref, num); + if (entry->stm_buf) + return fz_keep_buffer(xref->ctx, entry->stm_buf); + } dict = pdf_load_object(xref, num, gen); diff --git a/pdf/pdf_write.c b/pdf/pdf_write.c index 5e21e7f8..7906413f 100644 --- a/pdf/pdf_write.c +++ b/pdf/pdf_write.c @@ -492,7 +492,7 @@ objects_dump(pdf_document *xref, pdf_write_options *opts) { int i; - for (i=0; i < xref->len; i++) + for (i=0; i < pdf_xref_len(xref); i++) { fprintf(stderr, "Object %d use=%x offset=%d\n", i, opts->use_list[i], opts->ofs_list[i]); } @@ -509,7 +509,7 @@ static pdf_obj *sweepref(pdf_document *xref, pdf_write_options *opts, pdf_obj *o int gen = pdf_to_gen(obj); fz_context *ctx = xref->ctx; - if (num < 0 || num >= xref->len) + if (num < 0 || num >= pdf_xref_len(xref)) return NULL; if (opts->use_list[num]) return NULL; @@ -568,8 +568,9 @@ static void removeduplicateobjs(pdf_document *xref, pdf_write_options *opts) { int num, other; fz_context *ctx = xref->ctx; + int xref_len = pdf_xref_len(xref); - for (num = 1; num < xref->len; num++) + for (num = 1; num < xref_len; num++) { /* Only compare an object to objects preceding it */ for (other = 1; other < num; other++) @@ -602,8 +603,8 @@ static void removeduplicateobjs(pdf_document *xref, pdf_write_options *opts) if (differ) continue; - a = xref->table[num].obj; - b = xref->table[other].obj; + a = pdf_get_xref_entry(xref, num)->obj; + b = pdf_get_xref_entry(xref, other)->obj; a = pdf_resolve_indirect(a); b = pdf_resolve_indirect(b); @@ -667,6 +668,7 @@ static void removeduplicateobjs(pdf_document *xref, pdf_write_options *opts) static void compactxref(pdf_document *xref, pdf_write_options *opts) { int num, newnum; + int xref_len = pdf_xref_len(xref); /* * Update renumber_map in-place, clustering all used @@ -676,7 +678,7 @@ static void compactxref(pdf_document *xref, pdf_write_options *opts) */ newnum = 1; - for (num = 1; num < xref->len; num++) + for (num = 1; num < xref_len; num++) { /* If it's not used, map it to zero */ if (!opts->use_list[opts->renumber_map[num]]) @@ -752,21 +754,23 @@ static void renumberobj(pdf_document *xref, pdf_write_options *opts, pdf_obj *ob static void renumberobjs(pdf_document *xref, pdf_write_options *opts) { - pdf_xref_entry *oldxref; + pdf_xref_entry *newxref = NULL; int newlen; int num; fz_context *ctx = xref->ctx; int *new_use_list; + int xref_len = pdf_xref_len(xref); - new_use_list = fz_calloc(ctx, xref->len+3, sizeof(int)); + new_use_list = fz_calloc(ctx, pdf_xref_len(xref)+3, sizeof(int)); + fz_var(newxref); fz_try(ctx) { /* Apply renumber map to indirect references in all objects in xref */ renumberobj(xref, opts, pdf_trailer(xref)); - for (num = 0; num < xref->len; num++) + for (num = 0; num < xref_len; num++) { - pdf_obj *obj = xref->table[num].obj; + pdf_obj *obj = pdf_get_xref_entry(xref, num)->obj; if (pdf_is_indirect(obj)) { @@ -781,40 +785,39 @@ static void renumberobjs(pdf_document *xref, pdf_write_options *opts) } /* Create new table for the reordered, compacted xref */ - oldxref = xref->table; - xref->table = fz_malloc_array(ctx, xref->len + 3, sizeof(pdf_xref_entry)); - xref->table[0] = oldxref[0]; + newxref = fz_malloc_array(ctx, xref_len + 3, sizeof(pdf_xref_entry)); + newxref[0] = *pdf_get_xref_entry(xref, 0); /* Move used objects into the new compacted xref */ newlen = 0; - for (num = 1; num < xref->len; num++) + for (num = 1; num < xref_len; num++) { if (opts->use_list[num]) { if (newlen < opts->renumber_map[num]) newlen = opts->renumber_map[num]; - xref->table[opts->renumber_map[num]] = oldxref[num]; + newxref[opts->renumber_map[num]] = *pdf_get_xref_entry(xref, num); new_use_list[opts->renumber_map[num]] = opts->use_list[num]; } else { - pdf_drop_obj(oldxref[num].obj); + pdf_drop_obj(pdf_get_xref_entry(xref, num)->obj); } } + + pdf_replace_xref(xref, newxref, newlen + 1); + newxref = NULL; } fz_catch(ctx) { + fz_free(ctx, newxref); fz_free(ctx, new_use_list); fz_rethrow(ctx); } - fz_free(ctx, oldxref); fz_free(ctx, opts->use_list); opts->use_list = new_use_list; - /* Update the used objects count in compacted xref */ - xref->len = newlen + 1; - - for (num = 1; num < xref->len; num++) + for (num = 1; num < xref_len; num++) { opts->renumber_map[num] = num; } @@ -1112,7 +1115,7 @@ add_linearization_objs(pdf_document *xref, pdf_write_options *opts) pdf_dict_puts_drop(hint_obj, "Filter", pdf_new_name(ctx, "FlateDecode")); opts->hints_length = pdf_new_int(ctx, INT_MIN); pdf_dict_puts(hint_obj, "Length", opts->hints_length); - xref->table[hint_num].stm_ofs = -1; + pdf_get_xref_entry(xref, hint_num)->stm_ofs = -1; } fz_always(ctx) { @@ -1310,7 +1313,7 @@ static void linearize(pdf_document *xref, pdf_write_options *opts) { int i; - int n = xref->len + 2; + int n = pdf_xref_len(xref) + 2; int *reorder; int *rev_renumber_map; int *rev_gen_list; @@ -1333,7 +1336,7 @@ linearize(pdf_document *xref, pdf_write_options *opts) #ifdef DEBUG_WRITING fprintf(stderr, "Usage calculated:\n"); - for (i=0; i < xref->len; i++) + for (i=0; i < pdf_xref_len(xref); i++) { fprintf(stderr, "%d: use=%d\n", i, opts->use_list[i]); } @@ -1353,7 +1356,7 @@ linearize(pdf_document *xref, pdf_write_options *opts) #ifdef DEBUG_WRITING fprintf(stderr, "Reordered:\n"); - for (i=1; i < xref->len; i++) + for (i=1; i < pdf_xref_len(xref); i++) { fprintf(stderr, "%d: use=%d\n", i, opts->use_list[reorder[i]]); } @@ -1389,10 +1392,10 @@ update_linearization_params(pdf_document *xref, pdf_write_options *opts) int offset; pdf_set_int(opts->linear_l, opts->file_len); /* Primary hint stream offset (of object, not stream!) */ - pdf_set_int(opts->linear_h0, opts->ofs_list[xref->len-1]); + pdf_set_int(opts->linear_h0, opts->ofs_list[pdf_xref_len(xref)-1]); /* Primary hint stream length (of object, not stream!) */ offset = (opts->start == 1 ? opts->main_xref_offset : opts->ofs_list[1] + opts->hintstream_len); - pdf_set_int(opts->linear_h1, offset - opts->ofs_list[xref->len-1]); + pdf_set_int(opts->linear_h1, offset - opts->ofs_list[pdf_xref_len(xref)-1]); /* Object number of first pages page object (the first object of page 0) */ pdf_set_int(opts->linear_o, opts->page_object_lists->page[0]->object[0]); /* Offset of end of first page (first page is followed by primary @@ -1419,10 +1422,11 @@ static void preloadobjstms(pdf_document *xref) { pdf_obj *obj; int num; + int xref_len = pdf_xref_len(xref); - for (num = 0; num < xref->len; num++) + for (num = 0; num < xref_len; num++) { - if (xref->table[num].type == 'o') + if (pdf_get_xref_entry(xref, num)->type == 'o') { obj = pdf_load_object(xref, num, 0); pdf_drop_obj(obj); @@ -1628,6 +1632,7 @@ static int filter_implies_image(pdf_document *xref, pdf_obj *o) static void writeobject(pdf_document *xref, pdf_write_options *opts, int num, int gen) { + pdf_xref_entry *entry; pdf_obj *obj; pdf_obj *type; fz_context *ctx = xref->ctx; @@ -1668,13 +1673,14 @@ static void writeobject(pdf_document *xref, pdf_write_options *opts, int num, in } } + entry = pdf_get_xref_entry(xref, num); if (!pdf_is_stream(xref, num, gen)) { fprintf(opts->out, "%d %d obj\n", num, gen); pdf_fprint_obj(opts->out, obj, opts->do_expand == 0); fprintf(opts->out, "endobj\n\n"); } - else if (xref->table[num].stm_ofs < 0 && xref->table[num].stm_buf == NULL) + else if (entry->stm_ofs < 0 && entry->stm_buf == NULL) { fprintf(opts->out, "%d %d obj\n", num, gen); pdf_fprint_obj(opts->out, obj, opts->do_expand == 0); @@ -1823,11 +1829,12 @@ padto(FILE *file, int target) static void dowriteobject(pdf_document *xref, pdf_write_options *opts, int num, int pass) { - if (xref->table[num].type == 'f') - opts->gen_list[num] = xref->table[num].gen; - if (xref->table[num].type == 'n') - opts->gen_list[num] = xref->table[num].gen; - if (xref->table[num].type == 'o') + pdf_xref_entry *entry = pdf_get_xref_entry(xref, num); + if (entry->type == 'f') + opts->gen_list[num] = entry->gen; + if (entry->type == 'n') + opts->gen_list[num] = entry->gen; + if (entry->type == 'o') opts->gen_list[num] = 0; /* If we are renumbering, then make sure all generation numbers are @@ -1841,7 +1848,7 @@ dowriteobject(pdf_document *xref, pdf_write_options *opts, int num, int pass) if (opts->do_garbage && !opts->use_list[num]) return; - if (xref->table[num].type == 'n' || xref->table[num].type == 'o') + if (entry->type == 'n' || entry->type == 'o') { if (pass > 0) padto(opts->out, opts->ofs_list[num]); @@ -1856,6 +1863,7 @@ static void writeobjects(pdf_document *xref, pdf_write_options *opts, int pass) { int num; + int xref_len = pdf_xref_len(xref); fprintf(opts->out, "%%PDF-%d.%d\n", xref->version / 10, xref->version % 10); fprintf(opts->out, "%%\316\274\341\277\246\n\n"); @@ -1869,10 +1877,10 @@ writeobjects(pdf_document *xref, pdf_write_options *opts, int pass) opts->first_xref_offset = ftell(opts->out); else padto(opts->out, opts->first_xref_offset); - writexref(xref, opts, opts->start, xref->len, 1, opts->main_xref_offset, 0); + writexref(xref, opts, opts->start, pdf_xref_len(xref), 1, opts->main_xref_offset, 0); } - for (num = opts->start+1; num < xref->len; num++) + for (num = opts->start+1; num < xref_len; num++) dowriteobject(xref, opts, num, pass); if (opts->do_linear && pass == 1) { @@ -1918,19 +1926,20 @@ make_page_offset_hints(pdf_document *xref, pdf_write_options *opts, fz_buffer *b page_objects **pop = &opts->page_object_lists->page[0]; int page_len_bits, shared_object_bits, shared_object_id_bits; int shared_length_bits; + int xref_len = pdf_xref_len(xref); - min_shared_object = xref->len; + min_shared_object = pdf_xref_len(xref); max_shared_object = 1; min_shared_length = opts->file_len; max_shared_length = 0; - for (i=1; i < xref->len; i++) + for (i=1; i < xref_len; i++) { int min, max, page; min = opts->ofs_list[i]; - if (i == opts->start-1 || (opts->start == 1 && i == xref->len-1)) + if (i == opts->start-1 || (opts->start == 1 && i == xref_len-1)) max = opts->main_xref_offset; - else if (i == xref->len-1) + else if (i == xref_len-1) max = opts->ofs_list[1]; else max = opts->ofs_list[i+1]; @@ -2141,7 +2150,7 @@ make_page_offset_hints(pdf_document *xref, pdf_write_options *opts, fz_buffer *b min = opts->ofs_list[o]; if (o == opts->start-1) max = opts->main_xref_offset; - else if (o < xref->len-1) + else if (o < xref_len-1) max = opts->ofs_list[o+1]; else max = opts->ofs_list[1]; @@ -2155,7 +2164,7 @@ make_page_offset_hints(pdf_document *xref, pdf_write_options *opts, fz_buffer *b min = opts->ofs_list[i]; if (i == opts->start-1) max = opts->main_xref_offset; - else if (i < xref->len-1) + else if (i < xref_len-1) max = opts->ofs_list[i+1]; else max = opts->ofs_list[1]; @@ -2183,7 +2192,7 @@ make_hint_stream(pdf_document *xref, pdf_write_options *opts) fz_try(ctx) { make_page_offset_hints(xref, opts, buf); - pdf_update_stream(xref, xref->len-1, buf); + pdf_update_stream(xref, pdf_xref_len(xref)-1, buf); opts->hintstream_len = buf->len; fz_drop_buffer(ctx, buf); } @@ -2199,7 +2208,7 @@ static void dump_object_details(pdf_document *xref, pdf_write_options *opts) { int i; - for (i = 0; i < xref->len; i++) + for (i = 0; i < pdf_xref_len(xref); i++) { fprintf(stderr, "%d@%d: use=%d\n", i, opts->ofs_list[i], opts->use_list[i]); } @@ -2212,6 +2221,7 @@ void pdf_write_document(pdf_document *xref, char *filename, fz_write_options *fz int num; pdf_write_options opts = { 0 }; fz_context *ctx; + int xref_len = pdf_xref_len(xref); if (!xref) return; @@ -2233,22 +2243,22 @@ void pdf_write_document(pdf_document *xref, char *filename, fz_write_options *fz /* We deliberately make these arrays long enough to cope with * 1 to n access rather than 0..n-1, and add space for 2 new * extra entries that may be required for linearization. */ - opts.use_list = fz_malloc_array(ctx, xref->len + 3, sizeof(int)); - opts.ofs_list = fz_malloc_array(ctx, xref->len + 3, sizeof(int)); - opts.gen_list = fz_calloc(ctx, xref->len + 3, sizeof(int)); - opts.renumber_map = fz_malloc_array(ctx, xref->len + 3, sizeof(int)); - opts.rev_renumber_map = fz_malloc_array(ctx, xref->len + 3, sizeof(int)); - opts.rev_gen_list = fz_malloc_array(ctx, xref->len + 3, sizeof(int)); + opts.use_list = fz_malloc_array(ctx, pdf_xref_len(xref) + 3, sizeof(int)); + opts.ofs_list = fz_malloc_array(ctx, pdf_xref_len(xref) + 3, sizeof(int)); + opts.gen_list = fz_calloc(ctx, pdf_xref_len(xref) + 3, sizeof(int)); + opts.renumber_map = fz_malloc_array(ctx, pdf_xref_len(xref) + 3, sizeof(int)); + opts.rev_renumber_map = fz_malloc_array(ctx, pdf_xref_len(xref) + 3, sizeof(int)); + opts.rev_gen_list = fz_malloc_array(ctx, pdf_xref_len(xref) + 3, sizeof(int)); opts.continue_on_error = fz_opts->continue_on_error; opts.errors = fz_opts->errors; - for (num = 0; num < xref->len; num++) + for (num = 0; num < xref_len; num++) { opts.use_list[num] = 0; opts.ofs_list[num] = 0; opts.renumber_map[num] = num; opts.rev_renumber_map[num] = num; - opts.rev_gen_list[num] = xref->table[num].gen; + opts.rev_gen_list[num] = pdf_get_xref_entry(xref, num)->gen; } /* Make sure any objects hidden in compressed streams have been loaded */ @@ -2258,7 +2268,7 @@ void pdf_write_document(pdf_document *xref, char *filename, fz_write_options *fz if (opts.do_garbage >= 1) sweepobj(xref, &opts, pdf_trailer(xref)); else - for (num = 0; num < xref->len; num++) + for (num = 0; num < xref_len; num++) opts.use_list[num] = 1; /* Coalesce and renumber duplicate objects */ @@ -2286,7 +2296,7 @@ void pdf_write_document(pdf_document *xref, char *filename, fz_write_options *fz /* Construct linked list of free object slots */ lastfree = 0; - for (num = 0; num < xref->len; num++) + for (num = 0; num < xref_len; num++) { if (!opts.use_list[num]) { @@ -2315,7 +2325,7 @@ void pdf_write_document(pdf_document *xref, char *filename, fz_write_options *fz else { opts.first_xref_offset = ftell(opts.out); - writexref(xref, &opts, 0, xref->len, 1, 0, opts.first_xref_offset); + writexref(xref, &opts, 0, xref_len, 1, 0, opts.first_xref_offset); } xref->dirty = 0; diff --git a/pdf/pdf_xref.c b/pdf/pdf_xref.c index 254c0a3f..1f324fff 100644 --- a/pdf/pdf_xref.c +++ b/pdf/pdf_xref.c @@ -9,9 +9,26 @@ static inline int iswhite(int ch) } /* - * magic version tag and startxref + * xref tables */ +static void pdf_resize_xref(pdf_document *xref, int newlen) +{ + int i; + + xref->table = fz_resize_array(xref->ctx, xref->table, newlen, sizeof(pdf_xref_entry)); + for (i = xref->len; i < newlen; i++) + { + xref->table[i].type = 0; + xref->table[i].ofs = 0; + xref->table[i].gen = 0; + xref->table[i].stm_ofs = 0; + xref->table[i].stm_buf = NULL; + xref->table[i].obj = NULL; + } + xref->len = newlen; +} + pdf_obj *pdf_trailer(pdf_document *doc) { return doc->trailer; @@ -23,6 +40,30 @@ void pdf_set_xref_trailer(pdf_document *doc, pdf_obj *trailer) doc->trailer = pdf_keep_obj(trailer); } +int pdf_xref_len(pdf_document *doc) +{ + return doc->len; +} + +pdf_xref_entry *pdf_get_xref_entry(pdf_document *doc, int i) +{ + if (i >= doc->len) + pdf_resize_xref(doc, i+1); + + return &doc->table[i]; +} + +void pdf_replace_xref(pdf_document *doc, pdf_xref_entry *entries, int n) +{ + fz_free(doc->ctx, doc->table); + doc->table = entries; + doc->len = n; +} + +/* + * magic version tag and startxref + */ + static void pdf_load_version(pdf_document *xref) { @@ -137,20 +178,20 @@ pdf_read_new_trailer(pdf_document *xref, pdf_lexbuf *buf) { fz_try(xref->ctx) { + pdf_xref_entry *entry; pdf_obj *trailer; int num, gen, stm_ofs, ofs; ofs = fz_tell(xref->file); trailer = pdf_parse_ind_obj(xref, xref->file, buf, &num, &gen, &stm_ofs); pdf_set_xref_trailer(xref, trailer); pdf_drop_obj(trailer); - if (num >= xref->len) - pdf_resize_xref(xref, num+1); - xref->table[num].ofs = ofs; - xref->table[num].gen = gen; - xref->table[num].stm_ofs = stm_ofs; - pdf_drop_obj(xref->table[num].obj); - xref->table[num].obj = pdf_keep_obj(pdf_trailer(xref)); - xref->table[num].type = 'n'; + entry = pdf_get_xref_entry(xref, num); + entry->ofs = ofs; + entry->gen = gen; + entry->stm_ofs = stm_ofs; + pdf_drop_obj(entry->obj); + entry->obj = pdf_keep_obj(pdf_trailer(xref)); + entry->type = 'n'; } fz_catch(xref->ctx) { @@ -184,28 +225,6 @@ pdf_read_trailer(pdf_document *xref, pdf_lexbuf *buf) } } -/* - * xref tables - */ - -void -pdf_resize_xref(pdf_document *xref, int newlen) -{ - int i; - - xref->table = fz_resize_array(xref->ctx, xref->table, newlen, sizeof(pdf_xref_entry)); - for (i = xref->len; i < newlen; i++) - { - xref->table[i].type = 0; - xref->table[i].ofs = 0; - xref->table[i].gen = 0; - xref->table[i].stm_ofs = 0; - xref->table[i].stm_buf = NULL; - xref->table[i].obj = NULL; - } - xref->len = newlen; -} - pdf_obj * pdf_new_ref(pdf_document *xref, pdf_obj *obj) { @@ -224,6 +243,7 @@ pdf_read_old_xref(pdf_document *xref, pdf_lexbuf *buf) int i; int c; pdf_obj *trailer; + int xref_len = pdf_xref_len(xref); fz_read_line(xref->file, buf->scratch, buf->size); if (strncmp(buf->scratch, "xref", 4) != 0) @@ -251,18 +271,20 @@ pdf_read_old_xref(pdf_document *xref, pdf_lexbuf *buf) fz_throw(xref->ctx, "out of range object num in xref: %d", ofs); /* broken pdfs where size in trailer undershoots entries in xref sections */ - if (ofs + len > xref->len) + if (ofs + len > xref_len) { fz_warn(xref->ctx, "broken xref section, proceeding anyway."); - pdf_resize_xref(xref, ofs + len); + /* Access last entry to ensure size */ + (void)pdf_get_xref_entry(xref, ofs + len - 1); } for (i = ofs; i < ofs + len; i++) { + pdf_xref_entry *entry = pdf_get_xref_entry(xref, i); n = fz_read(xref->file, (unsigned char *) buf->scratch, 20); if (n < 0) fz_throw(xref->ctx, "cannot read xref table"); - if (!xref->table[i].type) + if (!entry->type) { s = buf->scratch; @@ -270,11 +292,11 @@ pdf_read_old_xref(pdf_document *xref, pdf_lexbuf *buf) while (*s != '\0' && iswhite(*s)) s++; - xref->table[i].ofs = atoi(s); - xref->table[i].gen = atoi(s + 11); - xref->table[i].type = s[17]; + entry->ofs = atoi(s); + entry->gen = atoi(s + 11); + entry->type = s[17]; if (s[17] != 'f' && s[17] != 'n' && s[17] != 'o') - fz_throw(xref->ctx, "unexpected xref type: %#x (%d %d R)", s[17], i, xref->table[i].gen); + fz_throw(xref->ctx, "unexpected xref type: %#x (%d %d R)", s[17], i, entry->gen); } } } @@ -305,11 +327,12 @@ pdf_read_new_xref_section(pdf_document *xref, fz_stream *stm, int i0, int i1, in if (i0 < 0 || i1 < 0) fz_throw(xref->ctx, "negative xref stream entry index"); - if (i0 + i1 > xref->len) + if (i0 + i1 > pdf_xref_len(xref)) fz_throw(xref->ctx, "xref stream has too many entries"); for (i = i0; i < i0 + i1; i++) { + pdf_xref_entry *entry = pdf_get_xref_entry(xref, i); int a = 0; int b = 0; int c = 0; @@ -324,12 +347,12 @@ pdf_read_new_xref_section(pdf_document *xref, fz_stream *stm, int i0, int i1, in for (n = 0; n < w2; n++) c = (c << 8) + fz_read_byte(stm); - if (!xref->table[i].type) + if (!entry->type) { int t = w0 ? a : 1; - xref->table[i].type = t == 0 ? 'f' : t == 1 ? 'n' : t == 2 ? 'o' : 0; - xref->table[i].ofs = w1 ? b : 0; - xref->table[i].gen = w2 ? c : 0; + entry->type = t == 0 ? 'f' : t == 1 ? 'n' : t == 2 ? 'o' : 0; + entry->ofs = w1 ? b : 0; + entry->gen = w2 ? c : 0; } } } @@ -352,16 +375,16 @@ pdf_read_new_xref(pdf_document *xref, pdf_lexbuf *buf) fz_try(ctx) { + pdf_xref_entry *entry; int ofs = fz_tell(xref->file); trailer = pdf_parse_ind_obj(xref, xref->file, buf, &num, &gen, &stm_ofs); - if (num >= xref->len) - pdf_resize_xref(xref, num+1); - xref->table[num].ofs = ofs; - xref->table[num].gen = gen; - xref->table[num].stm_ofs = stm_ofs; - pdf_drop_obj(xref->table[num].obj); - xref->table[num].obj = pdf_keep_obj(trailer); - xref->table[num].type = 'n'; + entry = pdf_get_xref_entry(xref, num); + entry->ofs = ofs; + entry->gen = gen; + entry->stm_ofs = stm_ofs; + pdf_drop_obj(entry->obj); + entry->obj = pdf_keep_obj(trailer); + entry->type = 'n'; } fz_catch(ctx) { @@ -375,11 +398,11 @@ pdf_read_new_xref(pdf_document *xref, pdf_lexbuf *buf) fz_throw(ctx, "xref stream missing Size entry (%d %d R)", num, gen); size = pdf_to_int(obj); - if (size >= xref->len) - pdf_resize_xref(xref, size); + /* Access xref entry to assure table size */ + (void)pdf_get_xref_entry(xref, size-1); - if (num < 0 || num >= xref->len) - fz_throw(ctx, "object id (%d %d R) out of range (0..%d)", num, gen, xref->len - 1); + if (num < 0 || num >= pdf_xref_len(xref)) + fz_throw(ctx, "object id (%d %d R) out of range (0..%d)", num, gen, pdf_xref_len(xref) - 1); obj = pdf_dict_gets(trailer, "W"); if (!obj) @@ -570,6 +593,7 @@ pdf_load_xref(pdf_document *xref, pdf_lexbuf *buf) { int size; int i; + int xref_len; fz_context *ctx = xref->ctx; pdf_load_version(xref); @@ -582,30 +606,32 @@ pdf_load_xref(pdf_document *xref, pdf_lexbuf *buf) if (!size) fz_throw(ctx, "trailer missing Size entry"); - if (size >= xref->len) - pdf_resize_xref(xref, size); + /* access entry to ensure xref table size */ + (void)pdf_get_xref_entry(xref, size-1); pdf_read_xref_sections(xref, xref->startxref, buf); /* broken pdfs where first object is not free */ - if (xref->table[0].type != 'f') + if (pdf_get_xref_entry(xref, 0)->type != 'f') fz_throw(ctx, "first object in xref is not free"); /* broken pdfs where object offsets are out of range */ - for (i = 0; i < xref->len; i++) + xref_len = pdf_xref_len(xref); + for (i = 0; i < xref_len; i++) { - if (xref->table[i].type == 'n') + pdf_xref_entry *entry = pdf_get_xref_entry(xref, i); + if (entry->type == 'n') { /* Special case code: "0000000000 * n" means free, * according to some producers (inc Quartz) */ - if (xref->table[i].ofs == 0) - xref->table[i].type = 'f'; - else if (xref->table[i].ofs <= 0 || xref->table[i].ofs >= xref->file_size) - fz_throw(ctx, "object offset out of range: %d (%d 0 R)", xref->table[i].ofs, i); + if (entry->ofs == 0) + entry->type = 'f'; + else if (entry->ofs <= 0 || entry->ofs >= xref->file_size) + fz_throw(ctx, "object offset out of range: %d (%d 0 R)", entry->ofs, i); } - if (xref->table[i].type == 'o') - if (xref->table[i].ofs <= 0 || xref->table[i].ofs >= xref->len || xref->table[xref->table[i].ofs].type != 'n') - fz_throw(ctx, "invalid reference to an objstm that does not exist: %d (%d 0 R)", xref->table[i].ofs, i); + if (entry->type == 'o') + if (entry->ofs <= 0 || entry->ofs >= xref_len || pdf_get_xref_entry(xref, entry->ofs)->type != 'n') + fz_throw(ctx, "invalid reference to an objstm that does not exist: %d (%d 0 R)", entry->ofs, i); } } @@ -792,12 +818,7 @@ pdf_init_document(pdf_document *xref) } fz_catch(ctx) { - if (xref->table) - { - fz_free(xref->ctx, xref->table); - xref->table = NULL; - xref->len = 0; - } + pdf_replace_xref(xref, NULL, 0); pdf_set_xref_trailer(xref, NULL); fz_warn(xref->ctx, "trying to repair broken xref"); repaired = 1; @@ -820,14 +841,16 @@ pdf_init_document(pdf_document *xref) if (repaired) { + int xref_len = pdf_xref_len(xref); pdf_repair_obj_stms(xref); hasroot = (pdf_dict_gets(pdf_trailer(xref), "Root") != NULL); hasinfo = (pdf_dict_gets(pdf_trailer(xref), "Info") != NULL); - for (i = 1; i < xref->len; i++) + for (i = 1; i < xref_len; i++) { - if (xref->table[i].type == 0 || xref->table[i].type == 'f') + pdf_xref_entry *entry = pdf_get_xref_entry(xref, i); + if (entry->type == 0 || entry->type == 'f') continue; fz_try(ctx) @@ -891,7 +914,7 @@ pdf_init_document(pdf_document *xref) void pdf_close_document(pdf_document *xref) { - int i; + int i, xref_len; fz_context *ctx; if (!xref) @@ -900,19 +923,18 @@ pdf_close_document(pdf_document *xref) pdf_drop_js(xref->js); - if (xref->table) + xref_len = pdf_xref_len(xref); + for (i = 0; i < xref_len; i++) { - for (i = 0; i < xref->len; i++) + pdf_xref_entry *entry = pdf_get_xref_entry(xref, i); + if (entry->obj) { - if (xref->table[i].obj) - { - pdf_drop_obj(xref->table[i].obj); - xref->table[i].obj = NULL; - fz_drop_buffer(ctx, xref->table[i].stm_buf); - } + pdf_drop_obj(entry->obj); + entry->obj = NULL; + fz_drop_buffer(ctx, entry->stm_buf); } - fz_free(xref->ctx, xref->table); } + pdf_replace_xref(xref, NULL, 0); if (xref->page_objs) { @@ -949,15 +971,17 @@ void pdf_print_xref(pdf_document *xref) { int i; - printf("xref\n0 %d\n", xref->len); - for (i = 0; i < xref->len; i++) + int xref_len = pdf_xref_len(xref); + printf("xref\n0 %d\n", pdf_xref_len(xref)); + for (i = 0; i < xref_len; i++) { + pdf_xref_entry *entry = pdf_get_xref_entry(xref, i); printf("%05d: %010d %05d %c (stm_ofs=%d; stm_buf=%p)\n", i, - xref->table[i].ofs, - xref->table[i].gen, - xref->table[i].type ? xref->table[i].type : '-', - xref->table[i].stm_ofs, - xref->table[i].stm_buf); + entry->ofs, + entry->gen, + entry->type ? entry->type : '-', + entry->stm_ofs, + entry->stm_buf); } } @@ -1018,17 +1042,21 @@ pdf_load_obj_stm(pdf_document *xref, int num, int gen, pdf_lexbuf *buf) for (i = 0; i < count; i++) { + int xref_len = pdf_xref_len(xref); + pdf_xref_entry *entry; fz_seek(stm, first + ofsbuf[i], 0); obj = pdf_parse_stm_obj(xref, stm, buf); - if (numbuf[i] < 1 || numbuf[i] >= xref->len) + if (numbuf[i] < 1 || numbuf[i] >= xref_len) { pdf_drop_obj(obj); - fz_throw(ctx, "object id (%d 0 R) out of range (0..%d)", numbuf[i], xref->len - 1); + fz_throw(ctx, "object id (%d 0 R) out of range (0..%d)", numbuf[i], xref_len - 1); } - if (xref->table[numbuf[i]].type == 'o' && xref->table[numbuf[i]].ofs == num) + entry = pdf_get_xref_entry(xref, numbuf[i]); + + if (entry->type == 'o' && entry->ofs == num) { /* If we already have an entry for this object, * we'd like to drop it and use the new one - @@ -1036,12 +1064,12 @@ pdf_load_obj_stm(pdf_document *xref, int num, int gen, pdf_lexbuf *buf) * a pointer to the old one will be left with a * stale pointer. Instead, we drop the new one * and trust that the old one is correct. */ - if (xref->table[numbuf[i]].obj) { - if (pdf_objcmp(xref->table[numbuf[i]].obj, obj)) + if (entry->obj) { + if (pdf_objcmp(entry->obj, obj)) fz_warn(ctx, "Encountered new definition for object %d - keeping the original one", numbuf[i]); pdf_drop_obj(obj); } else - xref->table[numbuf[i]].obj = obj; + entry->obj = obj; } else { @@ -1073,10 +1101,10 @@ pdf_cache_object(pdf_document *xref, int num, int gen) int rnum, rgen; fz_context *ctx = xref->ctx; - if (num < 0 || num >= xref->len) - fz_throw(ctx, "object out of range (%d %d R); xref size %d", num, gen, xref->len); + if (num < 0 || num >= pdf_xref_len(xref)) + fz_throw(ctx, "object out of range (%d %d R); xref size %d", num, gen, pdf_xref_len(xref)); - x = &xref->table[num]; + x = pdf_get_xref_entry(xref, num); if (x->obj) return; @@ -1136,6 +1164,7 @@ pdf_obj * pdf_load_object(pdf_document *xref, int num, int gen) { fz_context *ctx = xref->ctx; + pdf_xref_entry *entry; fz_try(ctx) { @@ -1146,9 +1175,11 @@ pdf_load_object(pdf_document *xref, int num, int gen) fz_throw(ctx, "cannot load object (%d %d R) into cache", num, gen); } - assert(xref->table[num].obj); + entry = pdf_get_xref_entry(xref, num); + + assert(entry->obj); - return pdf_keep_obj(xref->table[num].obj); + return pdf_keep_obj(entry->obj); } pdf_obj * @@ -1159,6 +1190,7 @@ pdf_resolve_indirect(pdf_obj *ref) int gen; fz_context *ctx = NULL; /* Avoid warning for stupid compilers */ pdf_document *xref; + pdf_xref_entry *entry; while (pdf_is_indirect(ref)) { @@ -1182,9 +1214,10 @@ pdf_resolve_indirect(pdf_obj *ref) fz_warn(ctx, "cannot load object (%d %d R) into cache", num, gen); return NULL; } - if (!xref->table[num].obj) + entry = pdf_get_xref_entry(xref, num); + if (!entry->obj) return NULL; - ref = xref->table[num].obj; + ref = entry->obj; } return ref; @@ -1193,21 +1226,22 @@ pdf_resolve_indirect(pdf_obj *ref) int pdf_count_objects(pdf_document *doc) { - return doc->len; + return pdf_xref_len(doc); } int pdf_create_object(pdf_document *xref) { /* TODO: reuse free object slots by properly linking free object chains in the ofs field */ - int num = xref->len; - pdf_resize_xref(xref, num + 1); - xref->table[num].type = 'f'; - xref->table[num].ofs = -1; - xref->table[num].gen = 0; - xref->table[num].stm_ofs = 0; - xref->table[num].stm_buf = NULL; - xref->table[num].obj = NULL; + pdf_xref_entry *entry; + int num = pdf_xref_len(xref); + entry = pdf_get_xref_entry(xref, num); + entry->type = 'f'; + entry->ofs = -1; + entry->gen = 0; + entry->stm_ofs = 0; + entry->stm_buf = NULL; + entry->obj = NULL; return num; } @@ -1216,13 +1250,13 @@ pdf_delete_object(pdf_document *xref, int num) { pdf_xref_entry *x; - if (num < 0 || num >= xref->len) + if (num < 0 || num >= pdf_xref_len(xref)) { - fz_warn(xref->ctx, "object out of range (%d 0 R); xref size %d", num, xref->len); + fz_warn(xref->ctx, "object out of range (%d 0 R); xref size %d", num, pdf_xref_len(xref)); return; } - x = &xref->table[num]; + x = pdf_get_xref_entry(xref, num); fz_drop_buffer(xref->ctx, x->stm_buf); pdf_drop_obj(x->obj); @@ -1240,13 +1274,13 @@ pdf_update_object(pdf_document *xref, int num, pdf_obj *newobj) { pdf_xref_entry *x; - if (num < 0 || num >= xref->len) + if (num < 0 || num >= pdf_xref_len(xref)) { - fz_warn(xref->ctx, "object out of range (%d 0 R); xref size %d", num, xref->len); + fz_warn(xref->ctx, "object out of range (%d 0 R); xref size %d", num, pdf_xref_len(xref)); return; } - x = &xref->table[num]; + x = pdf_get_xref_entry(xref, num); pdf_drop_obj(x->obj); @@ -1260,13 +1294,13 @@ pdf_update_stream(pdf_document *xref, int num, fz_buffer *newbuf) { pdf_xref_entry *x; - if (num < 0 || num >= xref->len) + if (num < 0 || num >= pdf_xref_len(xref)) { - fz_warn(xref->ctx, "object out of range (%d 0 R); xref size %d", num, xref->len); + fz_warn(xref->ctx, "object out of range (%d 0 R); xref size %d", num, pdf_xref_len(xref)); return; } - x = &xref->table[num]; + x = pdf_get_xref_entry(xref, num); fz_drop_buffer(xref->ctx, x->stm_buf); x->stm_buf = fz_keep_buffer(xref->ctx, newbuf); -- cgit v1.2.3