diff options
Diffstat (limited to 'pdf/pdf_xref.c')
-rw-r--r-- | pdf/pdf_xref.c | 280 |
1 files changed, 157 insertions, 123 deletions
diff --git a/pdf/pdf_xref.c b/pdf/pdf_xref.c index 254c0a3f..1f324fff 100644 --- a/pdf/pdf_xref.c +++ b/pdf/pdf_xref.c @@ -9,9 +9,26 @@ static inline int iswhite(int ch) } /* - * magic version tag and startxref + * xref tables */ +static void pdf_resize_xref(pdf_document *xref, int newlen) +{ + int i; + + xref->table = fz_resize_array(xref->ctx, xref->table, newlen, sizeof(pdf_xref_entry)); + for (i = xref->len; i < newlen; i++) + { + xref->table[i].type = 0; + xref->table[i].ofs = 0; + xref->table[i].gen = 0; + xref->table[i].stm_ofs = 0; + xref->table[i].stm_buf = NULL; + xref->table[i].obj = NULL; + } + xref->len = newlen; +} + pdf_obj *pdf_trailer(pdf_document *doc) { return doc->trailer; @@ -23,6 +40,30 @@ void pdf_set_xref_trailer(pdf_document *doc, pdf_obj *trailer) doc->trailer = pdf_keep_obj(trailer); } +int pdf_xref_len(pdf_document *doc) +{ + return doc->len; +} + +pdf_xref_entry *pdf_get_xref_entry(pdf_document *doc, int i) +{ + if (i >= doc->len) + pdf_resize_xref(doc, i+1); + + return &doc->table[i]; +} + +void pdf_replace_xref(pdf_document *doc, pdf_xref_entry *entries, int n) +{ + fz_free(doc->ctx, doc->table); + doc->table = entries; + doc->len = n; +} + +/* + * magic version tag and startxref + */ + static void pdf_load_version(pdf_document *xref) { @@ -137,20 +178,20 @@ pdf_read_new_trailer(pdf_document *xref, pdf_lexbuf *buf) { fz_try(xref->ctx) { + pdf_xref_entry *entry; pdf_obj *trailer; int num, gen, stm_ofs, ofs; ofs = fz_tell(xref->file); trailer = pdf_parse_ind_obj(xref, xref->file, buf, &num, &gen, &stm_ofs); pdf_set_xref_trailer(xref, trailer); pdf_drop_obj(trailer); - if (num >= xref->len) - pdf_resize_xref(xref, num+1); - xref->table[num].ofs = ofs; - xref->table[num].gen = gen; - xref->table[num].stm_ofs = stm_ofs; - pdf_drop_obj(xref->table[num].obj); - xref->table[num].obj = pdf_keep_obj(pdf_trailer(xref)); - xref->table[num].type = 'n'; + entry = pdf_get_xref_entry(xref, num); + entry->ofs = ofs; + entry->gen = gen; + entry->stm_ofs = stm_ofs; + pdf_drop_obj(entry->obj); + entry->obj = pdf_keep_obj(pdf_trailer(xref)); + entry->type = 'n'; } fz_catch(xref->ctx) { @@ -184,28 +225,6 @@ pdf_read_trailer(pdf_document *xref, pdf_lexbuf *buf) } } -/* - * xref tables - */ - -void -pdf_resize_xref(pdf_document *xref, int newlen) -{ - int i; - - xref->table = fz_resize_array(xref->ctx, xref->table, newlen, sizeof(pdf_xref_entry)); - for (i = xref->len; i < newlen; i++) - { - xref->table[i].type = 0; - xref->table[i].ofs = 0; - xref->table[i].gen = 0; - xref->table[i].stm_ofs = 0; - xref->table[i].stm_buf = NULL; - xref->table[i].obj = NULL; - } - xref->len = newlen; -} - pdf_obj * pdf_new_ref(pdf_document *xref, pdf_obj *obj) { @@ -224,6 +243,7 @@ pdf_read_old_xref(pdf_document *xref, pdf_lexbuf *buf) int i; int c; pdf_obj *trailer; + int xref_len = pdf_xref_len(xref); fz_read_line(xref->file, buf->scratch, buf->size); if (strncmp(buf->scratch, "xref", 4) != 0) @@ -251,18 +271,20 @@ pdf_read_old_xref(pdf_document *xref, pdf_lexbuf *buf) fz_throw(xref->ctx, "out of range object num in xref: %d", ofs); /* broken pdfs where size in trailer undershoots entries in xref sections */ - if (ofs + len > xref->len) + if (ofs + len > xref_len) { fz_warn(xref->ctx, "broken xref section, proceeding anyway."); - pdf_resize_xref(xref, ofs + len); + /* Access last entry to ensure size */ + (void)pdf_get_xref_entry(xref, ofs + len - 1); } for (i = ofs; i < ofs + len; i++) { + pdf_xref_entry *entry = pdf_get_xref_entry(xref, i); n = fz_read(xref->file, (unsigned char *) buf->scratch, 20); if (n < 0) fz_throw(xref->ctx, "cannot read xref table"); - if (!xref->table[i].type) + if (!entry->type) { s = buf->scratch; @@ -270,11 +292,11 @@ pdf_read_old_xref(pdf_document *xref, pdf_lexbuf *buf) while (*s != '\0' && iswhite(*s)) s++; - xref->table[i].ofs = atoi(s); - xref->table[i].gen = atoi(s + 11); - xref->table[i].type = s[17]; + entry->ofs = atoi(s); + entry->gen = atoi(s + 11); + entry->type = s[17]; if (s[17] != 'f' && s[17] != 'n' && s[17] != 'o') - fz_throw(xref->ctx, "unexpected xref type: %#x (%d %d R)", s[17], i, xref->table[i].gen); + fz_throw(xref->ctx, "unexpected xref type: %#x (%d %d R)", s[17], i, entry->gen); } } } @@ -305,11 +327,12 @@ pdf_read_new_xref_section(pdf_document *xref, fz_stream *stm, int i0, int i1, in if (i0 < 0 || i1 < 0) fz_throw(xref->ctx, "negative xref stream entry index"); - if (i0 + i1 > xref->len) + if (i0 + i1 > pdf_xref_len(xref)) fz_throw(xref->ctx, "xref stream has too many entries"); for (i = i0; i < i0 + i1; i++) { + pdf_xref_entry *entry = pdf_get_xref_entry(xref, i); int a = 0; int b = 0; int c = 0; @@ -324,12 +347,12 @@ pdf_read_new_xref_section(pdf_document *xref, fz_stream *stm, int i0, int i1, in for (n = 0; n < w2; n++) c = (c << 8) + fz_read_byte(stm); - if (!xref->table[i].type) + if (!entry->type) { int t = w0 ? a : 1; - xref->table[i].type = t == 0 ? 'f' : t == 1 ? 'n' : t == 2 ? 'o' : 0; - xref->table[i].ofs = w1 ? b : 0; - xref->table[i].gen = w2 ? c : 0; + entry->type = t == 0 ? 'f' : t == 1 ? 'n' : t == 2 ? 'o' : 0; + entry->ofs = w1 ? b : 0; + entry->gen = w2 ? c : 0; } } } @@ -352,16 +375,16 @@ pdf_read_new_xref(pdf_document *xref, pdf_lexbuf *buf) fz_try(ctx) { + pdf_xref_entry *entry; int ofs = fz_tell(xref->file); trailer = pdf_parse_ind_obj(xref, xref->file, buf, &num, &gen, &stm_ofs); - if (num >= xref->len) - pdf_resize_xref(xref, num+1); - xref->table[num].ofs = ofs; - xref->table[num].gen = gen; - xref->table[num].stm_ofs = stm_ofs; - pdf_drop_obj(xref->table[num].obj); - xref->table[num].obj = pdf_keep_obj(trailer); - xref->table[num].type = 'n'; + entry = pdf_get_xref_entry(xref, num); + entry->ofs = ofs; + entry->gen = gen; + entry->stm_ofs = stm_ofs; + pdf_drop_obj(entry->obj); + entry->obj = pdf_keep_obj(trailer); + entry->type = 'n'; } fz_catch(ctx) { @@ -375,11 +398,11 @@ pdf_read_new_xref(pdf_document *xref, pdf_lexbuf *buf) fz_throw(ctx, "xref stream missing Size entry (%d %d R)", num, gen); size = pdf_to_int(obj); - if (size >= xref->len) - pdf_resize_xref(xref, size); + /* Access xref entry to assure table size */ + (void)pdf_get_xref_entry(xref, size-1); - if (num < 0 || num >= xref->len) - fz_throw(ctx, "object id (%d %d R) out of range (0..%d)", num, gen, xref->len - 1); + if (num < 0 || num >= pdf_xref_len(xref)) + fz_throw(ctx, "object id (%d %d R) out of range (0..%d)", num, gen, pdf_xref_len(xref) - 1); obj = pdf_dict_gets(trailer, "W"); if (!obj) @@ -570,6 +593,7 @@ pdf_load_xref(pdf_document *xref, pdf_lexbuf *buf) { int size; int i; + int xref_len; fz_context *ctx = xref->ctx; pdf_load_version(xref); @@ -582,30 +606,32 @@ pdf_load_xref(pdf_document *xref, pdf_lexbuf *buf) if (!size) fz_throw(ctx, "trailer missing Size entry"); - if (size >= xref->len) - pdf_resize_xref(xref, size); + /* access entry to ensure xref table size */ + (void)pdf_get_xref_entry(xref, size-1); pdf_read_xref_sections(xref, xref->startxref, buf); /* broken pdfs where first object is not free */ - if (xref->table[0].type != 'f') + if (pdf_get_xref_entry(xref, 0)->type != 'f') fz_throw(ctx, "first object in xref is not free"); /* broken pdfs where object offsets are out of range */ - for (i = 0; i < xref->len; i++) + xref_len = pdf_xref_len(xref); + for (i = 0; i < xref_len; i++) { - if (xref->table[i].type == 'n') + pdf_xref_entry *entry = pdf_get_xref_entry(xref, i); + if (entry->type == 'n') { /* Special case code: "0000000000 * n" means free, * according to some producers (inc Quartz) */ - if (xref->table[i].ofs == 0) - xref->table[i].type = 'f'; - else if (xref->table[i].ofs <= 0 || xref->table[i].ofs >= xref->file_size) - fz_throw(ctx, "object offset out of range: %d (%d 0 R)", xref->table[i].ofs, i); + if (entry->ofs == 0) + entry->type = 'f'; + else if (entry->ofs <= 0 || entry->ofs >= xref->file_size) + fz_throw(ctx, "object offset out of range: %d (%d 0 R)", entry->ofs, i); } - if (xref->table[i].type == 'o') - if (xref->table[i].ofs <= 0 || xref->table[i].ofs >= xref->len || xref->table[xref->table[i].ofs].type != 'n') - fz_throw(ctx, "invalid reference to an objstm that does not exist: %d (%d 0 R)", xref->table[i].ofs, i); + if (entry->type == 'o') + if (entry->ofs <= 0 || entry->ofs >= xref_len || pdf_get_xref_entry(xref, entry->ofs)->type != 'n') + fz_throw(ctx, "invalid reference to an objstm that does not exist: %d (%d 0 R)", entry->ofs, i); } } @@ -792,12 +818,7 @@ pdf_init_document(pdf_document *xref) } fz_catch(ctx) { - if (xref->table) - { - fz_free(xref->ctx, xref->table); - xref->table = NULL; - xref->len = 0; - } + pdf_replace_xref(xref, NULL, 0); pdf_set_xref_trailer(xref, NULL); fz_warn(xref->ctx, "trying to repair broken xref"); repaired = 1; @@ -820,14 +841,16 @@ pdf_init_document(pdf_document *xref) if (repaired) { + int xref_len = pdf_xref_len(xref); pdf_repair_obj_stms(xref); hasroot = (pdf_dict_gets(pdf_trailer(xref), "Root") != NULL); hasinfo = (pdf_dict_gets(pdf_trailer(xref), "Info") != NULL); - for (i = 1; i < xref->len; i++) + for (i = 1; i < xref_len; i++) { - if (xref->table[i].type == 0 || xref->table[i].type == 'f') + pdf_xref_entry *entry = pdf_get_xref_entry(xref, i); + if (entry->type == 0 || entry->type == 'f') continue; fz_try(ctx) @@ -891,7 +914,7 @@ pdf_init_document(pdf_document *xref) void pdf_close_document(pdf_document *xref) { - int i; + int i, xref_len; fz_context *ctx; if (!xref) @@ -900,19 +923,18 @@ pdf_close_document(pdf_document *xref) pdf_drop_js(xref->js); - if (xref->table) + xref_len = pdf_xref_len(xref); + for (i = 0; i < xref_len; i++) { - for (i = 0; i < xref->len; i++) + pdf_xref_entry *entry = pdf_get_xref_entry(xref, i); + if (entry->obj) { - if (xref->table[i].obj) - { - pdf_drop_obj(xref->table[i].obj); - xref->table[i].obj = NULL; - fz_drop_buffer(ctx, xref->table[i].stm_buf); - } + pdf_drop_obj(entry->obj); + entry->obj = NULL; + fz_drop_buffer(ctx, entry->stm_buf); } - fz_free(xref->ctx, xref->table); } + pdf_replace_xref(xref, NULL, 0); if (xref->page_objs) { @@ -949,15 +971,17 @@ void pdf_print_xref(pdf_document *xref) { int i; - printf("xref\n0 %d\n", xref->len); - for (i = 0; i < xref->len; i++) + int xref_len = pdf_xref_len(xref); + printf("xref\n0 %d\n", pdf_xref_len(xref)); + for (i = 0; i < xref_len; i++) { + pdf_xref_entry *entry = pdf_get_xref_entry(xref, i); printf("%05d: %010d %05d %c (stm_ofs=%d; stm_buf=%p)\n", i, - xref->table[i].ofs, - xref->table[i].gen, - xref->table[i].type ? xref->table[i].type : '-', - xref->table[i].stm_ofs, - xref->table[i].stm_buf); + entry->ofs, + entry->gen, + entry->type ? entry->type : '-', + entry->stm_ofs, + entry->stm_buf); } } @@ -1018,17 +1042,21 @@ pdf_load_obj_stm(pdf_document *xref, int num, int gen, pdf_lexbuf *buf) for (i = 0; i < count; i++) { + int xref_len = pdf_xref_len(xref); + pdf_xref_entry *entry; fz_seek(stm, first + ofsbuf[i], 0); obj = pdf_parse_stm_obj(xref, stm, buf); - if (numbuf[i] < 1 || numbuf[i] >= xref->len) + if (numbuf[i] < 1 || numbuf[i] >= xref_len) { pdf_drop_obj(obj); - fz_throw(ctx, "object id (%d 0 R) out of range (0..%d)", numbuf[i], xref->len - 1); + fz_throw(ctx, "object id (%d 0 R) out of range (0..%d)", numbuf[i], xref_len - 1); } - if (xref->table[numbuf[i]].type == 'o' && xref->table[numbuf[i]].ofs == num) + entry = pdf_get_xref_entry(xref, numbuf[i]); + + if (entry->type == 'o' && entry->ofs == num) { /* If we already have an entry for this object, * we'd like to drop it and use the new one - @@ -1036,12 +1064,12 @@ pdf_load_obj_stm(pdf_document *xref, int num, int gen, pdf_lexbuf *buf) * a pointer to the old one will be left with a * stale pointer. Instead, we drop the new one * and trust that the old one is correct. */ - if (xref->table[numbuf[i]].obj) { - if (pdf_objcmp(xref->table[numbuf[i]].obj, obj)) + if (entry->obj) { + if (pdf_objcmp(entry->obj, obj)) fz_warn(ctx, "Encountered new definition for object %d - keeping the original one", numbuf[i]); pdf_drop_obj(obj); } else - xref->table[numbuf[i]].obj = obj; + entry->obj = obj; } else { @@ -1073,10 +1101,10 @@ pdf_cache_object(pdf_document *xref, int num, int gen) int rnum, rgen; fz_context *ctx = xref->ctx; - if (num < 0 || num >= xref->len) - fz_throw(ctx, "object out of range (%d %d R); xref size %d", num, gen, xref->len); + if (num < 0 || num >= pdf_xref_len(xref)) + fz_throw(ctx, "object out of range (%d %d R); xref size %d", num, gen, pdf_xref_len(xref)); - x = &xref->table[num]; + x = pdf_get_xref_entry(xref, num); if (x->obj) return; @@ -1136,6 +1164,7 @@ pdf_obj * pdf_load_object(pdf_document *xref, int num, int gen) { fz_context *ctx = xref->ctx; + pdf_xref_entry *entry; fz_try(ctx) { @@ -1146,9 +1175,11 @@ pdf_load_object(pdf_document *xref, int num, int gen) fz_throw(ctx, "cannot load object (%d %d R) into cache", num, gen); } - assert(xref->table[num].obj); + entry = pdf_get_xref_entry(xref, num); + + assert(entry->obj); - return pdf_keep_obj(xref->table[num].obj); + return pdf_keep_obj(entry->obj); } pdf_obj * @@ -1159,6 +1190,7 @@ pdf_resolve_indirect(pdf_obj *ref) int gen; fz_context *ctx = NULL; /* Avoid warning for stupid compilers */ pdf_document *xref; + pdf_xref_entry *entry; while (pdf_is_indirect(ref)) { @@ -1182,9 +1214,10 @@ pdf_resolve_indirect(pdf_obj *ref) fz_warn(ctx, "cannot load object (%d %d R) into cache", num, gen); return NULL; } - if (!xref->table[num].obj) + entry = pdf_get_xref_entry(xref, num); + if (!entry->obj) return NULL; - ref = xref->table[num].obj; + ref = entry->obj; } return ref; @@ -1193,21 +1226,22 @@ pdf_resolve_indirect(pdf_obj *ref) int pdf_count_objects(pdf_document *doc) { - return doc->len; + return pdf_xref_len(doc); } int pdf_create_object(pdf_document *xref) { /* TODO: reuse free object slots by properly linking free object chains in the ofs field */ - int num = xref->len; - pdf_resize_xref(xref, num + 1); - xref->table[num].type = 'f'; - xref->table[num].ofs = -1; - xref->table[num].gen = 0; - xref->table[num].stm_ofs = 0; - xref->table[num].stm_buf = NULL; - xref->table[num].obj = NULL; + pdf_xref_entry *entry; + int num = pdf_xref_len(xref); + entry = pdf_get_xref_entry(xref, num); + entry->type = 'f'; + entry->ofs = -1; + entry->gen = 0; + entry->stm_ofs = 0; + entry->stm_buf = NULL; + entry->obj = NULL; return num; } @@ -1216,13 +1250,13 @@ pdf_delete_object(pdf_document *xref, int num) { pdf_xref_entry *x; - if (num < 0 || num >= xref->len) + if (num < 0 || num >= pdf_xref_len(xref)) { - fz_warn(xref->ctx, "object out of range (%d 0 R); xref size %d", num, xref->len); + fz_warn(xref->ctx, "object out of range (%d 0 R); xref size %d", num, pdf_xref_len(xref)); return; } - x = &xref->table[num]; + x = pdf_get_xref_entry(xref, num); fz_drop_buffer(xref->ctx, x->stm_buf); pdf_drop_obj(x->obj); @@ -1240,13 +1274,13 @@ pdf_update_object(pdf_document *xref, int num, pdf_obj *newobj) { pdf_xref_entry *x; - if (num < 0 || num >= xref->len) + if (num < 0 || num >= pdf_xref_len(xref)) { - fz_warn(xref->ctx, "object out of range (%d 0 R); xref size %d", num, xref->len); + fz_warn(xref->ctx, "object out of range (%d 0 R); xref size %d", num, pdf_xref_len(xref)); return; } - x = &xref->table[num]; + x = pdf_get_xref_entry(xref, num); pdf_drop_obj(x->obj); @@ -1260,13 +1294,13 @@ pdf_update_stream(pdf_document *xref, int num, fz_buffer *newbuf) { pdf_xref_entry *x; - if (num < 0 || num >= xref->len) + if (num < 0 || num >= pdf_xref_len(xref)) { - fz_warn(xref->ctx, "object out of range (%d 0 R); xref size %d", num, xref->len); + fz_warn(xref->ctx, "object out of range (%d 0 R); xref size %d", num, pdf_xref_len(xref)); return; } - x = &xref->table[num]; + x = pdf_get_xref_entry(xref, num); fz_drop_buffer(xref->ctx, x->stm_buf); x->stm_buf = fz_keep_buffer(xref->ctx, newbuf); |