Add support to retain only the specified pages when using pdfclean.

author: Sebastian Rasmussen <sebras@hotmail.com> 2010-04-24 23:58:50 +0200
committer: Sebastian Rasmussen <sebras@hotmail.com> 2010-04-24 23:58:50 +0200
commit: c8ee25ca3c114ccd4bceae041835bfc55a993e85 (patch)
tree: 0f04ddc0c4c9c476c8be54d93222750d8a62738e /apps/pdfclean.c
parent: 41af20e73f952c0581c70267f767b8a1d2871693 (diff)
download: mupdf-c8ee25ca3c114ccd4bceae041835bfc55a993e85.tar.xz
1 files changed, 96 insertions, 3 deletions
diff --git a/apps/pdfclean.c b/apps/pdfclean.c
index 755cb39c..5fce8154 100644
--- a/apps/pdfclean.c
+++ b/apps/pdfclean.c
@@ -255,7 +255,7 @@ static void savexref(void)
 static void cleanusage(void)
 {
 	fprintf(stderr,
-		"usage: pdfclean [options] input.pdf [outfile.pdf]\n"
+		"usage: pdfclean [options] input.pdf [outfile.pdf] [pages]\n"
 		"  -p -\tpassword for decryption\n"
 		"  -g  \tgarbage collect unused objects\n"
 		"  -x  \texpand compressed streams\n");
@@ -270,6 +270,7 @@ int main(int argc, char **argv)
 	fz_error error;
 	int c, oid;
 	int lastfree;
+	int subset;
 
 	while ((c = fz_getopt(argc, argv, "gxp:")) != -1)
 	{
@@ -286,8 +287,16 @@ int main(int argc, char **argv)
 		cleanusage();
 
 	infile = argv[fz_optind++];
-	if (argc - fz_optind > 0)
+
+	if (argc - fz_optind > 0 &&
+		(strstr(argv[fz_optind], ".pdf") || strstr(argv[fz_optind], ".PDF")))
+	{
 		outfile = argv[fz_optind++];
+	}
+
+	subset = 0;
+	if (argc - fz_optind > 0)
+		subset = 1;
 
 	openxref(infile, password, 0);
 
@@ -312,6 +321,90 @@ int main(int argc, char **argv)
 	/* Make sure any objects hidden in compressed streams have been loaded */
 	preloadobjstms();
 
+	/* Only retain the specified subset of the pages */
+	if (subset)
+	{
+		fz_obj *root, *pages, *kids;
+		int count;
+
+		/* Snatch pages entry from root dict */
+		root = fz_dictgets(xref->trailer, "Root");
+		pages = fz_keepobj(fz_dictgets(root, "Pages"));
+
+		/* Then empty the root dict */
+		while (fz_dictlen(root) > 0)
+		{
+			fz_obj *key = fz_dictgetkey(root, 0);
+			fz_dictdel(root, key);
+		}
+
+		/* And only retain pages and type entries */
+		fz_dictputs(root, "Pages", pages);
+		fz_dictputs(root, "Type", fz_newname("Catalog"));
+		fz_dropobj(pages);
+
+		/* Create a new kids array too add into pages dict
+		   since each element must be replaced to point to
+		   a retained page */
+		kids = fz_newarray(1);
+		count = 0;
+
+		/* Retain pages specified */
+		while (argc - fz_optind)
+		{
+			int page, spage, epage;
+			char *spec, *dash;
+			char *pagelist = argv[fz_optind];
+
+			spec = fz_strsep(&pagelist, ",");
+			while (spec)
+			{
+				dash = strchr(spec, '-');
+
+				if (dash == spec)
+					spage = epage = 1;
+				else
+					spage = epage = atoi(spec);
+
+				if (dash)
+				{
+					if (strlen(dash) > 1)
+						epage = atoi(dash + 1);
+					else
+						epage = pagecount;
+				}
+
+				if (spage > epage)
+					page = spage, spage = epage, epage = page;
+
+				if (spage < 1)
+					spage = 1;
+				if (epage > pagecount)
+					epage = pagecount;
+
+				for (page = spage; page <= epage; page++)
+				{
+					fz_obj *pageobj = pdf_getpageobject(xref, page);
+
+					/* Update parent reference */
+					fz_dictputs(pageobj, "Parent", pages);
+
+					/* Store page object in new kids array */
+					fz_arraypush(kids, pageobj);
+					count++;
+				}
+
+				spec = fz_strsep(&pagelist, ",");
+			}
+
+			fz_optind++;
+		}
+
+		/* Update page count and kids array */
+		fz_dictputs(pages, "Count", fz_newint(count));
+		fz_dictputs(pages, "Kids", kids);
+	}
+
 	/* Sweep & mark objects from the trailer */
 	error = sweepobj(xref, xref->trailer);
 	if (error)
@@ -339,7 +432,7 @@ int main(int argc, char **argv)
 		}
 	}
 
-	/* construct linked list of free object slots */
+	/* Construct linked list of free object slots */
 	lastfree = 0;
 	for (oid = 0; oid < xref->len; oid++)
 	{
author	Sebastian Rasmussen <sebras@hotmail.com>	2010-04-24 23:58:50 +0200
committer	Sebastian Rasmussen <sebras@hotmail.com>	2010-04-24 23:58:50 +0200
commit	c8ee25ca3c114ccd4bceae041835bfc55a993e85 (patch)
tree	0f04ddc0c4c9c476c8be54d93222750d8a62738e /apps/pdfclean.c
parent	41af20e73f952c0581c70267f767b8a1d2871693 (diff)
download	mupdf-c8ee25ca3c114ccd4bceae041835bfc55a993e85.tar.xz