From 102c55759d2404d3e578a2456bc487eddec998fa Mon Sep 17 00:00:00 2001 From: Tor Andersson Date: Thu, 16 Apr 2015 15:50:46 +0200 Subject: mutool clean -z option to compress streams. --- source/pdf/pdf-write.c | 53 ++++++++++++++++++++++++++++++++++++++++++++----- source/tools/pdfclean.c | 5 ++++- 2 files changed, 52 insertions(+), 6 deletions(-) (limited to 'source') diff --git a/source/pdf/pdf-write.c b/source/pdf/pdf-write.c index 6ad308f1..54f0ac00 100644 --- a/source/pdf/pdf-write.c +++ b/source/pdf/pdf-write.c @@ -1,5 +1,7 @@ #include "mupdf/pdf.h" +#include + /* #define DEBUG_LINEARIZATION */ /* #define DEBUG_HEAP_SORT */ /* #define DEBUG_WRITING */ @@ -47,8 +49,10 @@ struct pdf_write_options_s { FILE *out; int do_incremental; + int do_tight; int do_ascii; int do_expand; + int do_deflate; int do_garbage; int do_linear; int do_clean; @@ -1565,6 +1569,24 @@ static void addhexfilter(fz_context *ctx, pdf_document *doc, pdf_obj *dict) pdf_drop_obj(ctx, newdp); } +static fz_buffer *deflatebuf(fz_context *ctx, unsigned char *p, int n) +{ + fz_buffer *buf; + uLongf csize; + int t; + + buf = fz_new_buffer(ctx, compressBound(n)); + csize = buf->cap; + t = compress(buf->data, &csize, p, n); + if (t != Z_OK) + { + fz_drop_buffer(ctx, buf); + fz_throw(ctx, FZ_ERROR_GENERIC, "cannot deflate buffer"); + } + buf->len = csize; + return buf; +} + static void copystream(fz_context *ctx, pdf_document *doc, pdf_write_options *opts, pdf_obj *obj_orig, int num, int gen) { fz_buffer *buf, *tmp; @@ -1576,6 +1598,16 @@ static void copystream(fz_context *ctx, pdf_document *doc, pdf_write_options *op buf = pdf_load_raw_renumbered_stream(ctx, doc, num, gen, orig_num, orig_gen); obj = pdf_copy_dict(ctx, obj_orig); + + if (opts->do_deflate && !pdf_dict_get(ctx, obj, PDF_NAME_Filter)) + { + pdf_dict_put(ctx, obj, PDF_NAME_Filter, PDF_NAME_FlateDecode); + + tmp = deflatebuf(ctx, buf->data, buf->len); + fz_drop_buffer(ctx, buf); + buf = tmp; + } + if (opts->do_ascii && isbinarystream(buf)) { tmp = hexbuf(ctx, buf->data, buf->len); @@ -1590,7 +1622,7 @@ static void copystream(fz_context *ctx, pdf_document *doc, pdf_write_options *op } fz_fprintf(ctx, opts->out, "%d %d obj\n", num, gen); - pdf_fprint_obj(ctx, opts->out, obj, opts->do_expand == 0); + pdf_fprint_obj(ctx, opts->out, obj, opts->do_tight); fputs("stream\n", opts->out); fwrite(buf->data, 1, buf->len, opts->out); fputs("endstream\nendobj\n\n", opts->out); @@ -1616,6 +1648,15 @@ static void expandstream(fz_context *ctx, pdf_document *doc, pdf_write_options * pdf_dict_del(ctx, obj, PDF_NAME_Filter); pdf_dict_del(ctx, obj, PDF_NAME_DecodeParms); + if (opts->do_deflate && !pdf_dict_get(ctx, obj, PDF_NAME_Filter)) + { + pdf_dict_put(ctx, obj, PDF_NAME_Filter, PDF_NAME_FlateDecode); + + tmp = deflatebuf(ctx, buf->data, buf->len); + fz_drop_buffer(ctx, buf); + buf = tmp; + } + if (opts->do_ascii && isbinarystream(buf)) { tmp = hexbuf(ctx, buf->data, buf->len); @@ -1630,7 +1671,7 @@ static void expandstream(fz_context *ctx, pdf_document *doc, pdf_write_options * pdf_drop_obj(ctx, newlen); fz_fprintf(ctx, opts->out, "%d %d obj\n", num, gen); - pdf_fprint_obj(ctx, opts->out, obj, opts->do_expand == 0); + pdf_fprint_obj(ctx, opts->out, obj, opts->do_tight); fputs("stream\n", opts->out); fwrite(buf->data, 1, buf->len, opts->out); fputs("endstream\nendobj\n\n", opts->out); @@ -1714,13 +1755,13 @@ static void writeobject(fz_context *ctx, pdf_document *doc, pdf_write_options *o if (!pdf_is_stream(ctx, doc, num, gen)) { fz_fprintf(ctx, opts->out, "%d %d obj\n", num, gen); - pdf_fprint_obj(ctx, opts->out, obj, opts->do_expand == 0); + pdf_fprint_obj(ctx, opts->out, obj, opts->do_tight); fputs("endobj\n\n", opts->out); } else if (entry->stm_ofs < 0 && entry->stm_buf == NULL) { fz_fprintf(ctx, opts->out, "%d %d obj\n", num, gen); - pdf_fprint_obj(ctx, opts->out, obj, opts->do_expand == 0); + pdf_fprint_obj(ctx, opts->out, obj, opts->do_tight); fputs("stream\nendstream\nendobj\n\n", opts->out); } else @@ -1884,7 +1925,7 @@ static void writexref(fz_context *ctx, pdf_document *doc, pdf_write_options *opt } fputs("trailer\n", opts->out); - pdf_fprint_obj(ctx, opts->out, trailer, opts->do_expand == 0); + pdf_fprint_obj(ctx, opts->out, trailer, opts->do_tight); fputs("\n", opts->out); pdf_drop_obj(ctx, trailer); @@ -2610,9 +2651,11 @@ void pdf_write_document(fz_context *ctx, pdf_document *doc, char *filename, fz_w fz_try(ctx) { opts.do_incremental = fz_opts->do_incremental; + opts.do_tight = (fz_opts->do_expand == 0) || fz_opts->do_deflate; opts.do_expand = fz_opts->do_expand; opts.do_garbage = fz_opts->do_garbage; opts.do_ascii = fz_opts->do_ascii; + opts.do_deflate = fz_opts->do_deflate; opts.do_linear = fz_opts->do_linear; opts.do_clean = fz_opts->do_clean; opts.start = 0; diff --git a/source/tools/pdfclean.c b/source/tools/pdfclean.c index 9d38d552..7a7f68bc 100644 --- a/source/tools/pdfclean.c +++ b/source/tools/pdfclean.c @@ -25,6 +25,7 @@ static void usage(void) "\t-i\ttoggle decompression of image streams\n" "\t-f\ttoggle decompression of font streams\n" "\t-a\tascii hex encode binary streams\n" + "\t-z\tdeflate uncompressed streams\n" "\tpages\tcomma separated list of page numbers and ranges\n" ); exit(1); @@ -44,12 +45,13 @@ int pdfclean_main(int argc, char **argv) opts.do_garbage = 0; opts.do_expand = 0; opts.do_ascii = 0; + opts.do_deflate = 0; opts.do_linear = 0; opts.continue_on_error = 1; opts.errors = &errors; opts.do_clean = 0; - while ((c = fz_getopt(argc, argv, "adfgilp:s")) != -1) + while ((c = fz_getopt(argc, argv, "adfgilp:sz")) != -1) { switch (c) { @@ -60,6 +62,7 @@ int pdfclean_main(int argc, char **argv) case 'i': opts.do_expand ^= fz_expand_images; break; case 'l': opts.do_linear ++; break; case 'a': opts.do_ascii ++; break; + case 'z': opts.do_deflate ++; break; case 's': opts.do_clean ++; break; default: usage(); break; } -- cgit v1.2.3