diff options
Diffstat (limited to 'source')
-rw-r--r-- | source/tools/pdfclean.c | 68 |
1 files changed, 62 insertions, 6 deletions
diff --git a/source/tools/pdfclean.c b/source/tools/pdfclean.c index 446c778a..74b70394 100644 --- a/source/tools/pdfclean.c +++ b/source/tools/pdfclean.c @@ -34,15 +34,32 @@ static void usage(void) exit(1); } +static int +string_in_names_list(pdf_obj *p, pdf_obj *names_list) +{ + int n = pdf_array_len(names_list); + int i; + char *str = pdf_to_str_buf(p); + + for (i = 0; i < n ; i += 2) + { + if (!strcmp(pdf_to_str_buf(pdf_array_get(names_list, i)), str)) + return 1; + } + return 0; +} + /* * Recreate page tree to only retain specified pages. */ - static void retainpages(globals *glo, int argc, char **argv) { pdf_obj *oldroot, *root, *pages, *kids, *countobj, *parent, *olddests; pdf_document *doc = glo->doc; int argidx = 0; + pdf_obj *names_list = NULL; + int pagecount; + int i; /* Keep only pages/type and (reduced) dest entries to avoid * references to unretained pages */ @@ -65,7 +82,7 @@ static void retainpages(globals *glo, int argc, char **argv) /* Retain pages specified */ while (argc - argidx) { - int page, spage, epage, pagecount; + int page, spage, epage; char *spec, *dash; char *pagelist = argv[argidx]; @@ -123,26 +140,26 @@ static void retainpages(globals *glo, int argc, char **argv) /* Also preserve the (partial) Dests name tree */ if (olddests) { - int i; pdf_obj *names = pdf_new_dict(doc, 1); pdf_obj *dests = pdf_new_dict(doc, 1); - pdf_obj *names_list = pdf_new_array(doc, 32); int len = pdf_dict_len(olddests); + names_list = pdf_new_array(doc, 32); + for (i = 0; i < len; i++) { pdf_obj *key = pdf_dict_get_key(olddests, i); pdf_obj *val = pdf_dict_get_val(olddests, i); - pdf_obj *key_str = pdf_new_string(doc, pdf_to_name(key), strlen(pdf_to_name(key))); pdf_obj *dest = pdf_dict_gets(val, "D"); dest = pdf_array_get(dest ? dest : val, 0); if (pdf_array_contains(pdf_dict_gets(pages, "Kids"), dest)) { + pdf_obj *key_str = pdf_new_string(doc, pdf_to_name(key), strlen(pdf_to_name(key))); pdf_array_push(names_list, key_str); pdf_array_push(names_list, val); + pdf_drop_obj(key_str); } - pdf_drop_obj(key_str); } root = pdf_dict_gets(pdf_trailer(doc), "Root"); @@ -155,6 +172,45 @@ static void retainpages(globals *glo, int argc, char **argv) pdf_drop_obj(names_list); pdf_drop_obj(olddests); } + + /* Force the next call to pdf_count_pages to recount */ + glo->doc->page_count = 0; + + /* Edit each pages /Annot list to remove any links that point to + * nowhere. */ + pagecount = pdf_count_pages(doc); + for (i = 0; i < pagecount; i++) + { + pdf_obj *pageref = pdf_lookup_page_obj(doc, i); + pdf_obj *pageobj = pdf_resolve_indirect(pageref); + + pdf_obj *annots = pdf_dict_gets(pageobj, "Annots"); + + int len = pdf_array_len(annots); + int j; + + for (j = 0; j < len; j++) + { + pdf_obj *o = pdf_array_get(annots, j); + pdf_obj *p; + + if (strcmp(pdf_to_name(pdf_dict_gets(o, "Subtype")), "Link")) + continue; + + p = pdf_dict_gets(o, "A"); + if (strcmp(pdf_to_name(pdf_dict_gets(p, "S")), "GoTo")) + continue; + + if (string_in_names_list(pdf_dict_gets(p, "D"), names_list)) + continue; + + /* FIXME: Should probably look at Next too */ + + /* Remove this annotation */ + pdf_array_delete(annots, j); + j--; + } + } } void pdfclean_clean(fz_context *ctx, char *infile, char *outfile, char *password, fz_write_options *opts, char *argv[], int argc) |