diff options
Diffstat (limited to 'source/pdf/pdf-repair.c')
-rw-r--r-- | source/pdf/pdf-repair.c | 249 |
1 files changed, 123 insertions, 126 deletions
diff --git a/source/pdf/pdf-repair.c b/source/pdf/pdf-repair.c index 7e35c5fb..0c1c504c 100644 --- a/source/pdf/pdf-repair.c +++ b/source/pdf/pdf-repair.c @@ -15,12 +15,11 @@ struct entry }; int -pdf_repair_obj(pdf_document *doc, pdf_lexbuf *buf, int *stmofsp, int *stmlenp, pdf_obj **encrypt, pdf_obj **id, pdf_obj **page, int *tmpofs) +pdf_repair_obj(fz_context *ctx, pdf_document *doc, pdf_lexbuf *buf, int *stmofsp, int *stmlenp, pdf_obj **encrypt, pdf_obj **id, pdf_obj **page, int *tmpofs) { + fz_stream *file = doc->file; pdf_token tok; int stm_len; - fz_stream *file = doc->file; - fz_context *ctx = file->ctx; *stmofsp = 0; if (stmlenp) @@ -32,7 +31,7 @@ pdf_repair_obj(pdf_document *doc, pdf_lexbuf *buf, int *stmofsp, int *stmlenp, p * '<int> <int> obj'. We expect the next thing we see to be a * pdf object. Regardless of the type of thing we meet next * we only need to fully parse it if it is a dictionary. */ - tok = pdf_lex(file, buf); + tok = pdf_lex(ctx, file, buf); if (tok == PDF_TOK_OPEN_DICT) { @@ -41,7 +40,7 @@ pdf_repair_obj(pdf_document *doc, pdf_lexbuf *buf, int *stmofsp, int *stmlenp, p /* Send NULL xref so we don't try to resolve references */ fz_try(ctx) { - dict = pdf_parse_dict(doc, file, buf); + dict = pdf_parse_dict(ctx, doc, file, buf); } fz_catch(ctx) { @@ -50,45 +49,45 @@ pdf_repair_obj(pdf_document *doc, pdf_lexbuf *buf, int *stmofsp, int *stmlenp, p if (file->eof) fz_rethrow_message(ctx, "broken object at EOF ignored"); /* Silently swallow the error */ - dict = pdf_new_dict(doc, 2); + dict = pdf_new_dict(ctx, doc, 2); } if (encrypt && id) { - obj = pdf_dict_gets(dict, "Type"); - if (pdf_is_name(obj) && !strcmp(pdf_to_name(obj), "XRef")) + obj = pdf_dict_gets(ctx, dict, "Type"); + if (pdf_is_name(ctx, obj) && !strcmp(pdf_to_name(ctx, obj), "XRef")) { - obj = pdf_dict_gets(dict, "Encrypt"); + obj = pdf_dict_gets(ctx, dict, "Encrypt"); if (obj) { - pdf_drop_obj(*encrypt); - *encrypt = pdf_keep_obj(obj); + pdf_drop_obj(ctx, *encrypt); + *encrypt = pdf_keep_obj(ctx, obj); } - obj = pdf_dict_gets(dict, "ID"); + obj = pdf_dict_gets(ctx, dict, "ID"); if (obj) { - pdf_drop_obj(*id); - *id = pdf_keep_obj(obj); + pdf_drop_obj(ctx, *id); + *id = pdf_keep_obj(ctx, obj); } } } - obj = pdf_dict_gets(dict, "Length"); - if (!pdf_is_indirect(obj) && pdf_is_int(obj)) - stm_len = pdf_to_int(obj); + obj = pdf_dict_gets(ctx, dict, "Length"); + if (!pdf_is_indirect(ctx, obj) && pdf_is_int(ctx, obj)) + stm_len = pdf_to_int(ctx, obj); if (doc->file_reading_linearly && page) { - obj = pdf_dict_gets(dict, "Type"); - if (!strcmp(pdf_to_name(obj), "Page")) + obj = pdf_dict_gets(ctx, dict, "Type"); + if (!strcmp(pdf_to_name(ctx, obj), "Page")) { - pdf_drop_obj(*page); - *page = pdf_keep_obj(dict); + pdf_drop_obj(ctx, *page); + *page = pdf_keep_obj(ctx, dict); } } - pdf_drop_obj(dict); + pdf_drop_obj(ctx, dict); } while ( tok != PDF_TOK_STREAM && @@ -97,31 +96,31 @@ pdf_repair_obj(pdf_document *doc, pdf_lexbuf *buf, int *stmofsp, int *stmlenp, p tok != PDF_TOK_EOF && tok != PDF_TOK_INT ) { - *tmpofs = fz_tell(file); + *tmpofs = fz_tell(ctx, file); if (*tmpofs < 0) fz_throw(ctx, FZ_ERROR_GENERIC, "cannot tell in file"); - tok = pdf_lex(file, buf); + tok = pdf_lex(ctx, file, buf); } if (tok == PDF_TOK_STREAM) { - int c = fz_read_byte(file); + int c = fz_read_byte(ctx, file); if (c == '\r') { - c = fz_peek_byte(file); + c = fz_peek_byte(ctx, file); if (c == '\n') - fz_read_byte(file); + fz_read_byte(ctx, file); } - *stmofsp = fz_tell(file); + *stmofsp = fz_tell(ctx, file); if (*stmofsp < 0) fz_throw(ctx, FZ_ERROR_GENERIC, "cannot seek in file"); if (stm_len > 0) { - fz_seek(file, *stmofsp + stm_len, 0); + fz_seek(ctx, file, *stmofsp + stm_len, 0); fz_try(ctx) { - tok = pdf_lex(file, buf); + tok = pdf_lex(ctx, file, buf); } fz_catch(ctx) { @@ -130,14 +129,14 @@ pdf_repair_obj(pdf_document *doc, pdf_lexbuf *buf, int *stmofsp, int *stmlenp, p } if (tok == PDF_TOK_ENDSTREAM) goto atobjend; - fz_seek(file, *stmofsp, 0); + fz_seek(ctx, file, *stmofsp, 0); } - (void)fz_read(file, (unsigned char *) buf->scratch, 9); + (void)fz_read(ctx, file, (unsigned char *) buf->scratch, 9); while (memcmp(buf->scratch, "endstream", 9) != 0) { - c = fz_read_byte(file); + c = fz_read_byte(ctx, file); if (c == EOF) break; memmove(&buf->scratch[0], &buf->scratch[1], 8); @@ -145,35 +144,34 @@ pdf_repair_obj(pdf_document *doc, pdf_lexbuf *buf, int *stmofsp, int *stmlenp, p } if (stmlenp) - *stmlenp = fz_tell(file) - *stmofsp - 9; + *stmlenp = fz_tell(ctx, file) - *stmofsp - 9; atobjend: - *tmpofs = fz_tell(file); + *tmpofs = fz_tell(ctx, file); if (*tmpofs < 0) fz_throw(ctx, FZ_ERROR_GENERIC, "cannot tell in file"); - tok = pdf_lex(file, buf); + tok = pdf_lex(ctx, file, buf); if (tok != PDF_TOK_ENDOBJ) fz_warn(ctx, "object missing 'endobj' token"); else { /* Read another token as we always return the next one */ - *tmpofs = fz_tell(file); + *tmpofs = fz_tell(ctx, file); if (*tmpofs < 0) fz_throw(ctx, FZ_ERROR_GENERIC, "cannot tell in file"); - tok = pdf_lex(file, buf); + tok = pdf_lex(ctx, file, buf); } } return tok; } static void -pdf_repair_obj_stm(pdf_document *doc, int num, int gen) +pdf_repair_obj_stm(fz_context *ctx, pdf_document *doc, int num, int gen) { pdf_obj *obj; fz_stream *stm = NULL; pdf_token tok; int i, n, count; - fz_context *ctx = doc->ctx; pdf_lexbuf buf; fz_var(stm); @@ -182,19 +180,19 @@ pdf_repair_obj_stm(pdf_document *doc, int num, int gen) fz_try(ctx) { - obj = pdf_load_object(doc, num, gen); + obj = pdf_load_object(ctx, doc, num, gen); - count = pdf_to_int(pdf_dict_gets(obj, "N")); + count = pdf_to_int(ctx, pdf_dict_gets(ctx, obj, "N")); - pdf_drop_obj(obj); + pdf_drop_obj(ctx, obj); - stm = pdf_open_stream(doc, num, gen); + stm = pdf_open_stream(ctx, doc, num, gen); for (i = 0; i < count; i++) { pdf_xref_entry *entry; - tok = pdf_lex(stm, &buf); + tok = pdf_lex(ctx, stm, &buf); if (tok != PDF_TOK_INT) fz_throw(ctx, FZ_ERROR_GENERIC, "corrupt object stream (%d %d R)", num, gen); @@ -204,29 +202,29 @@ pdf_repair_obj_stm(pdf_document *doc, int num, int gen) fz_warn(ctx, "ignoring object with invalid object number (%d %d R)", n, i); continue; } - else if (n >= pdf_xref_len(doc)) + else if (n >= pdf_xref_len(ctx, doc)) { fz_warn(ctx, "ignoring object with invalid object number (%d %d R)", n, i); continue; } - entry = pdf_get_populating_xref_entry(doc, n); + entry = pdf_get_populating_xref_entry(ctx, doc, n); entry->ofs = num; entry->gen = i; entry->stm_ofs = 0; - pdf_drop_obj(entry->obj); + pdf_drop_obj(ctx, entry->obj); entry->obj = NULL; entry->type = 'o'; - tok = pdf_lex(stm, &buf); + tok = pdf_lex(ctx, stm, &buf); if (tok != PDF_TOK_INT) fz_throw(ctx, FZ_ERROR_GENERIC, "corrupt object stream (%d %d R)", num, gen); } } fz_always(ctx) { - fz_close(stm); - pdf_lexbuf_fin(&buf); + fz_drop_stream(ctx, stm); + pdf_lexbuf_fin(ctx, &buf); } fz_catch(ctx) { @@ -235,7 +233,7 @@ pdf_repair_obj_stm(pdf_document *doc, int num, int gen) } void -pdf_repair_xref(pdf_document *doc, pdf_lexbuf *buf) +pdf_repair_xref(fz_context *ctx, pdf_document *doc) { pdf_obj *dict, *obj = NULL; pdf_obj *length; @@ -257,7 +255,7 @@ pdf_repair_xref(pdf_document *doc, pdf_lexbuf *buf) pdf_token tok; int next; int i, n, c; - fz_context *ctx = doc->ctx; + pdf_lexbuf *buf = &doc->lexbuf.base; fz_var(encrypt); fz_var(id); @@ -267,14 +265,14 @@ pdf_repair_xref(pdf_document *doc, pdf_lexbuf *buf) fz_var(obj); if (doc->repair_attempted) - fz_throw(doc->ctx, FZ_ERROR_GENERIC, "Repair failed already - not trying again"); + fz_throw(ctx, FZ_ERROR_GENERIC, "Repair failed already - not trying again"); doc->repair_attempted = 1; doc->dirty = 1; /* Can't support incremental update after repair */ doc->freeze_updates = 1; - fz_seek(doc->file, 0, 0); + fz_seek(ctx, doc->file, 0, 0); fz_try(ctx) { @@ -284,34 +282,34 @@ pdf_repair_xref(pdf_document *doc, pdf_lexbuf *buf) list = fz_malloc_array(ctx, listcap, sizeof(struct entry)); /* look for '%PDF' version marker within first kilobyte of file */ - n = fz_read(doc->file, (unsigned char *)buf->scratch, fz_mini(buf->size, 1024)); + n = fz_read(ctx, doc->file, (unsigned char *)buf->scratch, fz_mini(buf->size, 1024)); - fz_seek(doc->file, 0, 0); + fz_seek(ctx, doc->file, 0, 0); for (i = 0; i < n - 4; i++) { if (memcmp(&buf->scratch[i], "%PDF", 4) == 0) { - fz_seek(doc->file, i + 8, 0); /* skip "%PDF-X.Y" */ + fz_seek(ctx, doc->file, i + 8, 0); /* skip "%PDF-X.Y" */ break; } } /* skip comment line after version marker since some generators * forget to terminate the comment with a newline */ - c = fz_read_byte(doc->file); + c = fz_read_byte(ctx, doc->file); while (c >= 0 && (c == ' ' || c == '%')) - c = fz_read_byte(doc->file); - fz_unread_byte(doc->file); + c = fz_read_byte(ctx, doc->file); + fz_unread_byte(ctx, doc->file); while (1) { - tmpofs = fz_tell(doc->file); + tmpofs = fz_tell(ctx, doc->file); if (tmpofs < 0) fz_throw(ctx, FZ_ERROR_GENERIC, "cannot tell in file"); fz_try(ctx) { - tok = pdf_lex_no_string(doc->file, buf); + tok = pdf_lex_no_string(ctx, doc->file, buf); } fz_catch(ctx) { @@ -345,7 +343,7 @@ pdf_repair_xref(pdf_document *doc, pdf_lexbuf *buf) { stm_len = 0; stm_ofs = 0; - tok = pdf_repair_obj(doc, buf, &stm_ofs, &stm_len, &encrypt, &id, NULL, &tmpofs); + tok = pdf_repair_obj(ctx, doc, buf, &stm_ofs, &stm_len, &encrypt, &id, NULL, &tmpofs); } fz_catch(ctx) { @@ -393,7 +391,7 @@ pdf_repair_xref(pdf_document *doc, pdf_lexbuf *buf) { fz_try(ctx) { - dict = pdf_parse_dict(doc, doc->file, buf); + dict = pdf_parse_dict(ctx, doc, doc->file, buf); } fz_catch(ctx) { @@ -405,35 +403,35 @@ pdf_repair_xref(pdf_document *doc, pdf_lexbuf *buf) continue; } - obj = pdf_dict_gets(dict, "Encrypt"); + obj = pdf_dict_gets(ctx, dict, "Encrypt"); if (obj) { - pdf_drop_obj(encrypt); - encrypt = pdf_keep_obj(obj); + pdf_drop_obj(ctx, encrypt); + encrypt = pdf_keep_obj(ctx, obj); } - obj = pdf_dict_gets(dict, "ID"); - if (obj && (!id || !encrypt || pdf_dict_gets(dict, "Encrypt"))) + obj = pdf_dict_gets(ctx, dict, "ID"); + if (obj && (!id || !encrypt || pdf_dict_gets(ctx, dict, "Encrypt"))) { - pdf_drop_obj(id); - id = pdf_keep_obj(obj); + pdf_drop_obj(ctx, id); + id = pdf_keep_obj(ctx, obj); } - obj = pdf_dict_gets(dict, "Root"); + obj = pdf_dict_gets(ctx, dict, "Root"); if (obj) { - pdf_drop_obj(root); - root = pdf_keep_obj(obj); + pdf_drop_obj(ctx, root); + root = pdf_keep_obj(ctx, obj); } - obj = pdf_dict_gets(dict, "Info"); + obj = pdf_dict_gets(ctx, dict, "Info"); if (obj) { - pdf_drop_obj(info); - info = pdf_keep_obj(obj); + pdf_drop_obj(ctx, info); + info = pdf_keep_obj(ctx, obj); } - pdf_drop_obj(dict); + pdf_drop_obj(ctx, dict); obj = NULL; } @@ -442,7 +440,7 @@ pdf_repair_xref(pdf_document *doc, pdf_lexbuf *buf) else { if (tok == PDF_TOK_ERROR) - fz_read_byte(doc->file); + fz_read_byte(ctx, doc->file); num = 0; gen = 0; } @@ -457,11 +455,11 @@ pdf_repair_xref(pdf_document *doc, pdf_lexbuf *buf) */ /* Ensure that the first xref table is a 'solid' one from * 0 to maxnum. */ - pdf_ensure_solid_xref(doc, maxnum); + pdf_ensure_solid_xref(ctx, doc, maxnum); for (i = 0; i < listlen; i++) { - entry = pdf_get_populating_xref_entry(doc, list[i].num); + entry = pdf_get_populating_xref_entry(ctx, doc, list[i].num); entry->type = 'n'; entry->ofs = list[i].ofs; entry->gen = list[i].gen; @@ -471,26 +469,26 @@ pdf_repair_xref(pdf_document *doc, pdf_lexbuf *buf) /* correct stream length for unencrypted documents */ if (!encrypt && list[i].stm_len >= 0) { - dict = pdf_load_object(doc, list[i].num, list[i].gen); + dict = pdf_load_object(ctx, doc, list[i].num, list[i].gen); - length = pdf_new_int(doc, list[i].stm_len); - pdf_dict_puts(dict, "Length", length); - pdf_drop_obj(length); + length = pdf_new_int(ctx, doc, list[i].stm_len); + pdf_dict_puts(ctx, dict, "Length", length); + pdf_drop_obj(ctx, length); - pdf_drop_obj(dict); + pdf_drop_obj(ctx, dict); } } - entry = pdf_get_populating_xref_entry(doc, 0); + entry = pdf_get_populating_xref_entry(ctx, doc, 0); entry->type = 'f'; entry->ofs = 0; entry->gen = 65535; entry->stm_ofs = 0; next = 0; - for (i = pdf_xref_len(doc) - 1; i >= 0; i--) + for (i = pdf_xref_len(ctx, doc) - 1; i >= 0; i--) { - entry = pdf_get_populating_xref_entry(doc, i); + entry = pdf_get_populating_xref_entry(ctx, doc, i); if (entry->type == 'f') { entry->ofs = next; @@ -502,57 +500,57 @@ pdf_repair_xref(pdf_document *doc, pdf_lexbuf *buf) /* create a repaired trailer, Root will be added later */ - obj = pdf_new_dict(doc, 5); + obj = pdf_new_dict(ctx, doc, 5); /* During repair there is only a single xref section */ - pdf_set_populating_xref_trailer(doc, obj); - pdf_drop_obj(obj); + pdf_set_populating_xref_trailer(ctx, doc, obj); + pdf_drop_obj(ctx, obj); obj = NULL; - obj = pdf_new_int(doc, maxnum + 1); - pdf_dict_puts(pdf_trailer(doc), "Size", obj); - pdf_drop_obj(obj); + obj = pdf_new_int(ctx, doc, maxnum + 1); + pdf_dict_puts(ctx, pdf_trailer(ctx, doc), "Size", obj); + pdf_drop_obj(ctx, obj); obj = NULL; if (root) { - pdf_dict_puts(pdf_trailer(doc), "Root", root); - pdf_drop_obj(root); + pdf_dict_puts(ctx, pdf_trailer(ctx, doc), "Root", root); + pdf_drop_obj(ctx, root); root = NULL; } if (info) { - pdf_dict_puts(pdf_trailer(doc), "Info", info); - pdf_drop_obj(info); + pdf_dict_puts(ctx, pdf_trailer(ctx, doc), "Info", info); + pdf_drop_obj(ctx, info); info = NULL; } if (encrypt) { - if (pdf_is_indirect(encrypt)) + if (pdf_is_indirect(ctx, encrypt)) { /* create new reference with non-NULL xref pointer */ - obj = pdf_new_indirect(doc, pdf_to_num(encrypt), pdf_to_gen(encrypt)); - pdf_drop_obj(encrypt); + obj = pdf_new_indirect(ctx, doc, pdf_to_num(ctx, encrypt), pdf_to_gen(ctx, encrypt)); + pdf_drop_obj(ctx, encrypt); encrypt = obj; obj = NULL; } - pdf_dict_puts(pdf_trailer(doc), "Encrypt", encrypt); - pdf_drop_obj(encrypt); + pdf_dict_puts(ctx, pdf_trailer(ctx, doc), "Encrypt", encrypt); + pdf_drop_obj(ctx, encrypt); encrypt = NULL; } if (id) { - if (pdf_is_indirect(id)) + if (pdf_is_indirect(ctx, id)) { /* create new reference with non-NULL xref pointer */ - obj = pdf_new_indirect(doc, pdf_to_num(id), pdf_to_gen(id)); - pdf_drop_obj(id); + obj = pdf_new_indirect(ctx, doc, pdf_to_num(ctx, id), pdf_to_gen(ctx, id)); + pdf_drop_obj(ctx, id); id = obj; obj = NULL; } - pdf_dict_puts(pdf_trailer(doc), "ID", id); - pdf_drop_obj(id); + pdf_dict_puts(ctx, pdf_trailer(ctx, doc), "ID", id); + pdf_drop_obj(ctx, id); id = NULL; } @@ -560,50 +558,49 @@ pdf_repair_xref(pdf_document *doc, pdf_lexbuf *buf) } fz_catch(ctx) { - pdf_drop_obj(encrypt); - pdf_drop_obj(id); - pdf_drop_obj(root); - pdf_drop_obj(obj); - pdf_drop_obj(info); + pdf_drop_obj(ctx, encrypt); + pdf_drop_obj(ctx, id); + pdf_drop_obj(ctx, root); + pdf_drop_obj(ctx, obj); + pdf_drop_obj(ctx, info); fz_free(ctx, list); fz_rethrow(ctx); } } void -pdf_repair_obj_stms(pdf_document *doc) +pdf_repair_obj_stms(fz_context *ctx, pdf_document *doc) { - fz_context *ctx = doc->ctx; pdf_obj *dict; int i; - int xref_len = pdf_xref_len(doc); + int xref_len = pdf_xref_len(ctx, doc); for (i = 0; i < xref_len; i++) { - pdf_xref_entry *entry = pdf_get_populating_xref_entry(doc, i); + pdf_xref_entry *entry = pdf_get_populating_xref_entry(ctx, doc, i); if (entry->stm_ofs) { - dict = pdf_load_object(doc, i, 0); + dict = pdf_load_object(ctx, doc, i, 0); fz_try(ctx) { - if (!strcmp(pdf_to_name(pdf_dict_gets(dict, "Type")), "ObjStm")) - pdf_repair_obj_stm(doc, i, 0); + if (!strcmp(pdf_to_name(ctx, pdf_dict_gets(ctx, dict, "Type")), "ObjStm")) + pdf_repair_obj_stm(ctx, doc, i, 0); } fz_catch(ctx) { fz_warn(ctx, "ignoring broken object stream (%d 0 R)", i); } - pdf_drop_obj(dict); + pdf_drop_obj(ctx, dict); } } /* Ensure that streamed objects reside inside a known non-streamed object */ for (i = 0; i < xref_len; i++) { - pdf_xref_entry *entry = pdf_get_populating_xref_entry(doc, i); + pdf_xref_entry *entry = pdf_get_populating_xref_entry(ctx, doc, i); - if (entry->type == 'o' && pdf_get_populating_xref_entry(doc, entry->ofs)->type != 'n') - fz_throw(doc->ctx, FZ_ERROR_GENERIC, "invalid reference to non-object-stream: %d (%d 0 R)", entry->ofs, i); + if (entry->type == 'o' && pdf_get_populating_xref_entry(ctx, doc, entry->ofs)->type != 'n') + fz_throw(ctx, FZ_ERROR_GENERIC, "invalid reference to non-object-stream: %d (%d 0 R)", entry->ofs, i); } } |