From e4d36ce68e0467ac4702f717386934a44970f4e5 Mon Sep 17 00:00:00 2001
From: Robin Watts <robin.watts@artifex.com>
Date: Wed, 19 Mar 2014 19:04:50 +0000
Subject: Add routine to clean pdf content streams for pages.

New routine to filter the content streams for pages, xobjects,
type3 charprocs, patterns etc. The filtered streams are guaranteed
to be properly matched with q/Q's, and to not have changed the top
level ctm. Additionally we remove (some) repeated settings of
colors etc. This filtering can be extended to be smarter later.

The idea of this is to both repair after editing, and to leave the
streams in a form that can be easily appended to.

This is preparatory to work on Bates numbering and Watermarking.

Currently the streams produced are uncompressed.
---
 source/tools/pdfclean.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'source/tools')

diff --git a/source/tools/pdfclean.c b/source/tools/pdfclean.c
index 74b70394..d2ded0b7 100644
--- a/source/tools/pdfclean.c
+++ b/source/tools/pdfclean.c
@@ -25,6 +25,7 @@ static void usage(void)
 		"\t-g\tgarbage collect unused objects\n"
 		"\t-gg\tin addition to -g compact xref table\n"
 		"\t-ggg\tin addition to -gg merge duplicate objects\n"
+		"\t-s\tclean content streams\n"
 		"\t-d\tdecompress all streams\n"
 		"\t-l\tlinearize PDF\n"
 		"\t-i\ttoggle decompression of image streams\n"
@@ -260,8 +261,9 @@ int pdfclean_main(int argc, char **argv)
 	opts.do_linear = 0;
 	opts.continue_on_error = 1;
 	opts.errors = &errors;
+	opts.do_clean = 0;
 
-	while ((c = fz_getopt(argc, argv, "adfgilp:")) != -1)
+	while ((c = fz_getopt(argc, argv, "adfgilp:s")) != -1)
 	{
 		switch (c)
 		{
@@ -272,6 +274,7 @@ int pdfclean_main(int argc, char **argv)
 		case 'i': opts.do_expand ^= fz_expand_images; break;
 		case 'l': opts.do_linear ++; break;
 		case 'a': opts.do_ascii ++; break;
+		case 's': opts.do_clean ++; break;
 		default: usage(); break;
 		}
 	}
-- 
cgit v1.2.3