diff options
author | Sebastian Rasmussen <sebras@hotmail.com> | 2010-04-24 23:58:50 +0200 |
---|---|---|
committer | Sebastian Rasmussen <sebras@hotmail.com> | 2010-04-24 23:58:50 +0200 |
commit | c8ee25ca3c114ccd4bceae041835bfc55a993e85 (patch) | |
tree | 0f04ddc0c4c9c476c8be54d93222750d8a62738e /apps/pdfclean.c | |
parent | 41af20e73f952c0581c70267f767b8a1d2871693 (diff) | |
download | mupdf-c8ee25ca3c114ccd4bceae041835bfc55a993e85.tar.xz |
Add support to retain only the specified pages when using pdfclean.
Diffstat (limited to 'apps/pdfclean.c')
-rw-r--r-- | apps/pdfclean.c | 99 |
1 files changed, 96 insertions, 3 deletions
diff --git a/apps/pdfclean.c b/apps/pdfclean.c index 755cb39c..5fce8154 100644 --- a/apps/pdfclean.c +++ b/apps/pdfclean.c @@ -255,7 +255,7 @@ static void savexref(void) static void cleanusage(void) { fprintf(stderr, - "usage: pdfclean [options] input.pdf [outfile.pdf]\n" + "usage: pdfclean [options] input.pdf [outfile.pdf] [pages]\n" " -p -\tpassword for decryption\n" " -g \tgarbage collect unused objects\n" " -x \texpand compressed streams\n"); @@ -270,6 +270,7 @@ int main(int argc, char **argv) fz_error error; int c, oid; int lastfree; + int subset; while ((c = fz_getopt(argc, argv, "gxp:")) != -1) { @@ -286,8 +287,16 @@ int main(int argc, char **argv) cleanusage(); infile = argv[fz_optind++]; - if (argc - fz_optind > 0) + + if (argc - fz_optind > 0 && + (strstr(argv[fz_optind], ".pdf") || strstr(argv[fz_optind], ".PDF"))) + { outfile = argv[fz_optind++]; + } + + subset = 0; + if (argc - fz_optind > 0) + subset = 1; openxref(infile, password, 0); @@ -312,6 +321,90 @@ int main(int argc, char **argv) /* Make sure any objects hidden in compressed streams have been loaded */ preloadobjstms(); + /* Only retain the specified subset of the pages */ + if (subset) + { + fz_obj *root, *pages, *kids; + int count; + + /* Snatch pages entry from root dict */ + root = fz_dictgets(xref->trailer, "Root"); + pages = fz_keepobj(fz_dictgets(root, "Pages")); + + /* Then empty the root dict */ + while (fz_dictlen(root) > 0) + { + fz_obj *key = fz_dictgetkey(root, 0); + fz_dictdel(root, key); + } + + /* And only retain pages and type entries */ + fz_dictputs(root, "Pages", pages); + fz_dictputs(root, "Type", fz_newname("Catalog")); + fz_dropobj(pages); + + /* Create a new kids array too add into pages dict + since each element must be replaced to point to + a retained page */ + kids = fz_newarray(1); + count = 0; + + /* Retain pages specified */ + while (argc - fz_optind) + { + int page, spage, epage; + char *spec, *dash; + char *pagelist = argv[fz_optind]; + + spec = fz_strsep(&pagelist, ","); + while (spec) + { + dash = strchr(spec, '-'); + + if (dash == spec) + spage = epage = 1; + else + spage = epage = atoi(spec); + + if (dash) + { + if (strlen(dash) > 1) + epage = atoi(dash + 1); + else + epage = pagecount; + } + + if (spage > epage) + page = spage, spage = epage, epage = page; + + if (spage < 1) + spage = 1; + if (epage > pagecount) + epage = pagecount; + + for (page = spage; page <= epage; page++) + { + fz_obj *pageobj = pdf_getpageobject(xref, page); + + /* Update parent reference */ + fz_dictputs(pageobj, "Parent", pages); + + /* Store page object in new kids array */ + fz_arraypush(kids, pageobj); + count++; + } + + spec = fz_strsep(&pagelist, ","); + } + + fz_optind++; + } + + /* Update page count and kids array */ + fz_dictputs(pages, "Count", fz_newint(count)); + fz_dictputs(pages, "Kids", kids); + } + /* Sweep & mark objects from the trailer */ error = sweepobj(xref, xref->trailer); if (error) @@ -339,7 +432,7 @@ int main(int argc, char **argv) } } - /* construct linked list of free object slots */ + /* Construct linked list of free object slots */ lastfree = 0; for (oid = 0; oid < xref->len; oid++) { |