summaryrefslogtreecommitdiff
path: root/source
diff options
context:
space:
mode:
authorTor Andersson <tor.andersson@artifex.com>2015-04-16 15:50:46 +0200
committerTor Andersson <tor.andersson@artifex.com>2015-04-16 15:50:46 +0200
commit102c55759d2404d3e578a2456bc487eddec998fa (patch)
tree9a38a4450067bbb9d6a14f61cb852e642207b1fd /source
parentdbf69ce5890cd0ebe08a75d8f511a2e2e54dd3f2 (diff)
downloadmupdf-102c55759d2404d3e578a2456bc487eddec998fa.tar.xz
mutool clean -z option to compress streams.
Diffstat (limited to 'source')
-rw-r--r--source/pdf/pdf-write.c53
-rw-r--r--source/tools/pdfclean.c5
2 files changed, 52 insertions, 6 deletions
diff --git a/source/pdf/pdf-write.c b/source/pdf/pdf-write.c
index 6ad308f1..54f0ac00 100644
--- a/source/pdf/pdf-write.c
+++ b/source/pdf/pdf-write.c
@@ -1,5 +1,7 @@
#include "mupdf/pdf.h"
+#include <zlib.h>
+
/* #define DEBUG_LINEARIZATION */
/* #define DEBUG_HEAP_SORT */
/* #define DEBUG_WRITING */
@@ -47,8 +49,10 @@ struct pdf_write_options_s
{
FILE *out;
int do_incremental;
+ int do_tight;
int do_ascii;
int do_expand;
+ int do_deflate;
int do_garbage;
int do_linear;
int do_clean;
@@ -1565,6 +1569,24 @@ static void addhexfilter(fz_context *ctx, pdf_document *doc, pdf_obj *dict)
pdf_drop_obj(ctx, newdp);
}
+static fz_buffer *deflatebuf(fz_context *ctx, unsigned char *p, int n)
+{
+ fz_buffer *buf;
+ uLongf csize;
+ int t;
+
+ buf = fz_new_buffer(ctx, compressBound(n));
+ csize = buf->cap;
+ t = compress(buf->data, &csize, p, n);
+ if (t != Z_OK)
+ {
+ fz_drop_buffer(ctx, buf);
+ fz_throw(ctx, FZ_ERROR_GENERIC, "cannot deflate buffer");
+ }
+ buf->len = csize;
+ return buf;
+}
+
static void copystream(fz_context *ctx, pdf_document *doc, pdf_write_options *opts, pdf_obj *obj_orig, int num, int gen)
{
fz_buffer *buf, *tmp;
@@ -1576,6 +1598,16 @@ static void copystream(fz_context *ctx, pdf_document *doc, pdf_write_options *op
buf = pdf_load_raw_renumbered_stream(ctx, doc, num, gen, orig_num, orig_gen);
obj = pdf_copy_dict(ctx, obj_orig);
+
+ if (opts->do_deflate && !pdf_dict_get(ctx, obj, PDF_NAME_Filter))
+ {
+ pdf_dict_put(ctx, obj, PDF_NAME_Filter, PDF_NAME_FlateDecode);
+
+ tmp = deflatebuf(ctx, buf->data, buf->len);
+ fz_drop_buffer(ctx, buf);
+ buf = tmp;
+ }
+
if (opts->do_ascii && isbinarystream(buf))
{
tmp = hexbuf(ctx, buf->data, buf->len);
@@ -1590,7 +1622,7 @@ static void copystream(fz_context *ctx, pdf_document *doc, pdf_write_options *op
}
fz_fprintf(ctx, opts->out, "%d %d obj\n", num, gen);
- pdf_fprint_obj(ctx, opts->out, obj, opts->do_expand == 0);
+ pdf_fprint_obj(ctx, opts->out, obj, opts->do_tight);
fputs("stream\n", opts->out);
fwrite(buf->data, 1, buf->len, opts->out);
fputs("endstream\nendobj\n\n", opts->out);
@@ -1616,6 +1648,15 @@ static void expandstream(fz_context *ctx, pdf_document *doc, pdf_write_options *
pdf_dict_del(ctx, obj, PDF_NAME_Filter);
pdf_dict_del(ctx, obj, PDF_NAME_DecodeParms);
+ if (opts->do_deflate && !pdf_dict_get(ctx, obj, PDF_NAME_Filter))
+ {
+ pdf_dict_put(ctx, obj, PDF_NAME_Filter, PDF_NAME_FlateDecode);
+
+ tmp = deflatebuf(ctx, buf->data, buf->len);
+ fz_drop_buffer(ctx, buf);
+ buf = tmp;
+ }
+
if (opts->do_ascii && isbinarystream(buf))
{
tmp = hexbuf(ctx, buf->data, buf->len);
@@ -1630,7 +1671,7 @@ static void expandstream(fz_context *ctx, pdf_document *doc, pdf_write_options *
pdf_drop_obj(ctx, newlen);
fz_fprintf(ctx, opts->out, "%d %d obj\n", num, gen);
- pdf_fprint_obj(ctx, opts->out, obj, opts->do_expand == 0);
+ pdf_fprint_obj(ctx, opts->out, obj, opts->do_tight);
fputs("stream\n", opts->out);
fwrite(buf->data, 1, buf->len, opts->out);
fputs("endstream\nendobj\n\n", opts->out);
@@ -1714,13 +1755,13 @@ static void writeobject(fz_context *ctx, pdf_document *doc, pdf_write_options *o
if (!pdf_is_stream(ctx, doc, num, gen))
{
fz_fprintf(ctx, opts->out, "%d %d obj\n", num, gen);
- pdf_fprint_obj(ctx, opts->out, obj, opts->do_expand == 0);
+ pdf_fprint_obj(ctx, opts->out, obj, opts->do_tight);
fputs("endobj\n\n", opts->out);
}
else if (entry->stm_ofs < 0 && entry->stm_buf == NULL)
{
fz_fprintf(ctx, opts->out, "%d %d obj\n", num, gen);
- pdf_fprint_obj(ctx, opts->out, obj, opts->do_expand == 0);
+ pdf_fprint_obj(ctx, opts->out, obj, opts->do_tight);
fputs("stream\nendstream\nendobj\n\n", opts->out);
}
else
@@ -1884,7 +1925,7 @@ static void writexref(fz_context *ctx, pdf_document *doc, pdf_write_options *opt
}
fputs("trailer\n", opts->out);
- pdf_fprint_obj(ctx, opts->out, trailer, opts->do_expand == 0);
+ pdf_fprint_obj(ctx, opts->out, trailer, opts->do_tight);
fputs("\n", opts->out);
pdf_drop_obj(ctx, trailer);
@@ -2610,9 +2651,11 @@ void pdf_write_document(fz_context *ctx, pdf_document *doc, char *filename, fz_w
fz_try(ctx)
{
opts.do_incremental = fz_opts->do_incremental;
+ opts.do_tight = (fz_opts->do_expand == 0) || fz_opts->do_deflate;
opts.do_expand = fz_opts->do_expand;
opts.do_garbage = fz_opts->do_garbage;
opts.do_ascii = fz_opts->do_ascii;
+ opts.do_deflate = fz_opts->do_deflate;
opts.do_linear = fz_opts->do_linear;
opts.do_clean = fz_opts->do_clean;
opts.start = 0;
diff --git a/source/tools/pdfclean.c b/source/tools/pdfclean.c
index 9d38d552..7a7f68bc 100644
--- a/source/tools/pdfclean.c
+++ b/source/tools/pdfclean.c
@@ -25,6 +25,7 @@ static void usage(void)
"\t-i\ttoggle decompression of image streams\n"
"\t-f\ttoggle decompression of font streams\n"
"\t-a\tascii hex encode binary streams\n"
+ "\t-z\tdeflate uncompressed streams\n"
"\tpages\tcomma separated list of page numbers and ranges\n"
);
exit(1);
@@ -44,12 +45,13 @@ int pdfclean_main(int argc, char **argv)
opts.do_garbage = 0;
opts.do_expand = 0;
opts.do_ascii = 0;
+ opts.do_deflate = 0;
opts.do_linear = 0;
opts.continue_on_error = 1;
opts.errors = &errors;
opts.do_clean = 0;
- while ((c = fz_getopt(argc, argv, "adfgilp:s")) != -1)
+ while ((c = fz_getopt(argc, argv, "adfgilp:sz")) != -1)
{
switch (c)
{
@@ -60,6 +62,7 @@ int pdfclean_main(int argc, char **argv)
case 'i': opts.do_expand ^= fz_expand_images; break;
case 'l': opts.do_linear ++; break;
case 'a': opts.do_ascii ++; break;
+ case 'z': opts.do_deflate ++; break;
case 's': opts.do_clean ++; break;
default: usage(); break;
}