diff options
author | Robin Watts <robin.watts@artifex.com> | 2014-03-19 19:04:14 +0000 |
---|---|---|
committer | Robin Watts <robin.watts@artifex.com> | 2014-03-19 19:04:14 +0000 |
commit | 441954b6fb378e3af72500653be5636c7ade29ee (patch) | |
tree | e77dd3f1a26dc4e765854514b2fbb073b3ea8638 | |
parent | 92e94b0343c25c31e1b3d950e7029ec4395d66ae (diff) | |
download | mupdf-441954b6fb378e3af72500653be5636c7ade29ee.tar.xz |
Make mutool clean sanitise the Dests lists when subsetting.
When you use mutool clean to subset pages out of a PDF, we already
remove the Name tree entries for named locations that aren't in the
target file. We have henceforth failed to remove references to these
removed names though. This can cause errors (really warnings) on
reading the file back.
-rw-r--r-- | source/tools/pdfclean.c | 68 |
1 files changed, 62 insertions, 6 deletions
diff --git a/source/tools/pdfclean.c b/source/tools/pdfclean.c index 446c778a..74b70394 100644 --- a/source/tools/pdfclean.c +++ b/source/tools/pdfclean.c @@ -34,15 +34,32 @@ static void usage(void) exit(1); } +static int +string_in_names_list(pdf_obj *p, pdf_obj *names_list) +{ + int n = pdf_array_len(names_list); + int i; + char *str = pdf_to_str_buf(p); + + for (i = 0; i < n ; i += 2) + { + if (!strcmp(pdf_to_str_buf(pdf_array_get(names_list, i)), str)) + return 1; + } + return 0; +} + /* * Recreate page tree to only retain specified pages. */ - static void retainpages(globals *glo, int argc, char **argv) { pdf_obj *oldroot, *root, *pages, *kids, *countobj, *parent, *olddests; pdf_document *doc = glo->doc; int argidx = 0; + pdf_obj *names_list = NULL; + int pagecount; + int i; /* Keep only pages/type and (reduced) dest entries to avoid * references to unretained pages */ @@ -65,7 +82,7 @@ static void retainpages(globals *glo, int argc, char **argv) /* Retain pages specified */ while (argc - argidx) { - int page, spage, epage, pagecount; + int page, spage, epage; char *spec, *dash; char *pagelist = argv[argidx]; @@ -123,26 +140,26 @@ static void retainpages(globals *glo, int argc, char **argv) /* Also preserve the (partial) Dests name tree */ if (olddests) { - int i; pdf_obj *names = pdf_new_dict(doc, 1); pdf_obj *dests = pdf_new_dict(doc, 1); - pdf_obj *names_list = pdf_new_array(doc, 32); int len = pdf_dict_len(olddests); + names_list = pdf_new_array(doc, 32); + for (i = 0; i < len; i++) { pdf_obj *key = pdf_dict_get_key(olddests, i); pdf_obj *val = pdf_dict_get_val(olddests, i); - pdf_obj *key_str = pdf_new_string(doc, pdf_to_name(key), strlen(pdf_to_name(key))); pdf_obj *dest = pdf_dict_gets(val, "D"); dest = pdf_array_get(dest ? dest : val, 0); if (pdf_array_contains(pdf_dict_gets(pages, "Kids"), dest)) { + pdf_obj *key_str = pdf_new_string(doc, pdf_to_name(key), strlen(pdf_to_name(key))); pdf_array_push(names_list, key_str); pdf_array_push(names_list, val); + pdf_drop_obj(key_str); } - pdf_drop_obj(key_str); } root = pdf_dict_gets(pdf_trailer(doc), "Root"); @@ -155,6 +172,45 @@ static void retainpages(globals *glo, int argc, char **argv) pdf_drop_obj(names_list); pdf_drop_obj(olddests); } + + /* Force the next call to pdf_count_pages to recount */ + glo->doc->page_count = 0; + + /* Edit each pages /Annot list to remove any links that point to + * nowhere. */ + pagecount = pdf_count_pages(doc); + for (i = 0; i < pagecount; i++) + { + pdf_obj *pageref = pdf_lookup_page_obj(doc, i); + pdf_obj *pageobj = pdf_resolve_indirect(pageref); + + pdf_obj *annots = pdf_dict_gets(pageobj, "Annots"); + + int len = pdf_array_len(annots); + int j; + + for (j = 0; j < len; j++) + { + pdf_obj *o = pdf_array_get(annots, j); + pdf_obj *p; + + if (strcmp(pdf_to_name(pdf_dict_gets(o, "Subtype")), "Link")) + continue; + + p = pdf_dict_gets(o, "A"); + if (strcmp(pdf_to_name(pdf_dict_gets(p, "S")), "GoTo")) + continue; + + if (string_in_names_list(pdf_dict_gets(p, "D"), names_list)) + continue; + + /* FIXME: Should probably look at Next too */ + + /* Remove this annotation */ + pdf_array_delete(annots, j); + j--; + } + } } void pdfclean_clean(fz_context *ctx, char *infile, char *outfile, char *password, fz_write_options *opts, char *argv[], int argc) |