diff options
Diffstat (limited to 'source/pdf')
-rw-r--r-- | source/pdf/pdf-repair.c | 89 | ||||
-rw-r--r-- | source/pdf/pdf-xref.c | 4 |
2 files changed, 65 insertions, 28 deletions
diff --git a/source/pdf/pdf-repair.c b/source/pdf/pdf-repair.c index 6e62bf00..fdd46483 100644 --- a/source/pdf/pdf-repair.c +++ b/source/pdf/pdf-repair.c @@ -14,8 +14,21 @@ struct entry int stm_len; }; +static void add_root(fz_context *ctx, pdf_obj *obj, pdf_obj ***roots, int *num_roots, int *max_roots) +{ + if (*num_roots == *max_roots) + { + int new_max_roots = *max_roots * 2; + if (new_max_roots == 0) + new_max_roots = 4; + *roots = fz_resize_array(ctx, *roots, new_max_roots, sizeof(**roots)); + *max_roots = new_max_roots; + } + (*roots)[(*num_roots)++] = pdf_keep_obj(ctx, obj); +} + int -pdf_repair_obj(fz_context *ctx, pdf_document *doc, pdf_lexbuf *buf, fz_off_t *stmofsp, int *stmlenp, pdf_obj **encrypt, pdf_obj **id, pdf_obj **page, fz_off_t *tmpofs) +pdf_repair_obj(fz_context *ctx, pdf_document *doc, pdf_lexbuf *buf, fz_off_t *stmofsp, int *stmlenp, pdf_obj **encrypt, pdf_obj **id, pdf_obj **page, fz_off_t *tmpofs, pdf_obj **root) { fz_stream *file = doc->file; pdf_token tok; @@ -37,10 +50,9 @@ pdf_repair_obj(fz_context *ctx, pdf_document *doc, pdf_lexbuf *buf, fz_off_t *st { pdf_obj *dict, *obj; - /* Send NULL xref so we don't try to resolve references */ fz_try(ctx) { - dict = pdf_parse_dict(ctx, NULL, file, buf); + dict = pdf_parse_dict(ctx, doc, file, buf); } fz_catch(ctx) { @@ -52,24 +64,39 @@ pdf_repair_obj(fz_context *ctx, pdf_document *doc, pdf_lexbuf *buf, fz_off_t *st dict = pdf_new_dict(ctx, NULL, 2); } - if (encrypt && id) + /* We must be careful not to try to resolve any indirections + * here. We have just read dict, so we know it to be a non + * indirected dictionary. Before we look at any values that + * we get back from looking up in it, we need to check they + * aren't indirected. */ + + if (encrypt || id || root) { obj = pdf_dict_get(ctx, dict, PDF_NAME_Type); - if (pdf_name_eq(ctx, obj, PDF_NAME_XRef)) + if (!pdf_is_indirect(ctx, obj) && pdf_name_eq(ctx, obj, PDF_NAME_XRef)) { - obj = pdf_dict_get(ctx, dict, PDF_NAME_Encrypt); - if (obj) + if (encrypt) { - pdf_drop_obj(ctx, *encrypt); - *encrypt = pdf_keep_obj(ctx, obj); + obj = pdf_dict_get(ctx, dict, PDF_NAME_Encrypt); + if (obj) + { + pdf_drop_obj(ctx, *encrypt); + *encrypt = pdf_keep_obj(ctx, obj); + } } - obj = pdf_dict_get(ctx, dict, PDF_NAME_ID); - if (obj) + if (id) { - pdf_drop_obj(ctx, *id); - *id = pdf_keep_obj(ctx, obj); + obj = pdf_dict_get(ctx, dict, PDF_NAME_ID); + if (obj) + { + pdf_drop_obj(ctx, *id); + *id = pdf_keep_obj(ctx, obj); + } } + + if (root) + *root = pdf_keep_obj(ctx, pdf_dict_get(ctx, dict, PDF_NAME_Root)); } } @@ -80,7 +107,7 @@ pdf_repair_obj(fz_context *ctx, pdf_document *doc, pdf_lexbuf *buf, fz_off_t *st if (doc->file_reading_linearly && page) { obj = pdf_dict_get(ctx, dict, PDF_NAME_Type); - if (pdf_name_eq(ctx, obj, PDF_NAME_Page)) + if (!pdf_is_indirect(ctx, obj) && pdf_name_eq(ctx, obj, PDF_NAME_Page)) { pdf_drop_obj(ctx, *page); *page = pdf_keep_obj(ctx, dict); @@ -343,11 +370,19 @@ pdf_repair_xref(fz_context *ctx, pdf_document *doc) else if (tok == PDF_TOK_OBJ) { + pdf_obj *root = NULL; + fz_try(ctx) { stm_len = 0; stm_ofs = 0; - tok = pdf_repair_obj(ctx, doc, buf, &stm_ofs, &stm_len, &encrypt, &id, NULL, &tmpofs); + tok = pdf_repair_obj(ctx, doc, buf, &stm_ofs, &stm_len, &encrypt, &id, NULL, &tmpofs, &root); + if (root) + add_root(ctx, root, &roots, &num_roots, &max_roots); + } + fz_always(ctx) + { + pdf_drop_obj(ctx, root); } fz_catch(ctx) { @@ -423,17 +458,7 @@ pdf_repair_xref(fz_context *ctx, pdf_document *doc) obj = pdf_dict_get(ctx, dict, PDF_NAME_Root); if (obj) - { - if (num_roots == max_roots) - { - int new_max_roots = max_roots * 2; - if (new_max_roots == 0) - new_max_roots = 4; - roots = fz_resize_array(ctx, roots, new_max_roots, sizeof(*roots)); - max_roots = new_max_roots; - } - roots[num_roots++] = pdf_keep_obj(ctx, obj); - } + add_root(ctx, obj, &roots, &num_roots, &max_roots); obj = pdf_dict_get(ctx, dict, PDF_NAME_Info); if (obj) @@ -471,6 +496,18 @@ pdf_repair_xref(fz_context *ctx, pdf_document *doc) * 0 to maxnum. */ pdf_ensure_solid_xref(ctx, doc, maxnum); + for (i = 1; i < maxnum; i++) + { + entry = pdf_get_populating_xref_entry(ctx, doc, i); + if (entry->obj != NULL) + continue; + entry->type = 'f'; + entry->ofs = 0; + entry->gen = 0; + + entry->stm_ofs = 0; + } + for (i = 0; i < listlen; i++) { entry = pdf_get_populating_xref_entry(ctx, doc, list[i].num); diff --git a/source/pdf/pdf-xref.c b/source/pdf/pdf-xref.c index 3a9f70df..727dcda7 100644 --- a/source/pdf/pdf-xref.c +++ b/source/pdf/pdf-xref.c @@ -1807,7 +1807,7 @@ pdf_obj_read(fz_context *ctx, pdf_document *doc, fz_off_t *offset, int *nump, pd * whenever we read an object it should just go into the * previous xref. */ - tok = pdf_repair_obj(ctx, doc, buf, &stmofs, NULL, NULL, NULL, page, &newtmpofs); + tok = pdf_repair_obj(ctx, doc, buf, &stmofs, NULL, NULL, NULL, page, &newtmpofs, NULL); do /* So we can break out of it */ { @@ -2600,7 +2600,7 @@ pdf_load_hint_object(fz_context *ctx, pdf_document *doc) tok = pdf_lex(ctx, doc->file, buf); if (tok != PDF_TOK_OBJ) break; - (void)pdf_repair_obj(ctx, doc, buf, &tmpofs, NULL, NULL, NULL, &page, &tmpofs); + (void)pdf_repair_obj(ctx, doc, buf, &tmpofs, NULL, NULL, NULL, &page, &tmpofs, NULL); pdf_load_hints(ctx, doc, num, gen); } } |