summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--docs/mutool/examples/pdf-create.js2
-rw-r--r--docs/mutool/examples/pdf-merge.js2
-rw-r--r--docs/mutool/run.html13
-rw-r--r--include/mupdf/pdf/document.h44
-rw-r--r--source/pdf/pdf-write.c122
-rw-r--r--source/tools/murun.c9
-rw-r--r--source/tools/pdfclean.c28
-rw-r--r--source/tools/pdfcreate.c19
-rw-r--r--source/tools/pdfmerge.c22
9 files changed, 156 insertions, 105 deletions
diff --git a/docs/mutool/examples/pdf-create.js b/docs/mutool/examples/pdf-create.js
index 38c193a3..131e72b3 100644
--- a/docs/mutool/examples/pdf-create.js
+++ b/docs/mutool/examples/pdf-create.js
@@ -32,4 +32,4 @@ var page = pdf.addPage([0,0,300,350], 0, resources, contents)
pdf.insertPage(-1, page)
// Save the document to file.
-pdf.save("out.pdf")
+pdf.save("out.pdf", "paif")
diff --git a/docs/mutool/examples/pdf-merge.js b/docs/mutool/examples/pdf-merge.js
index a468738a..6f5d4692 100644
--- a/docs/mutool/examples/pdf-merge.js
+++ b/docs/mutool/examples/pdf-merge.js
@@ -57,7 +57,7 @@ function pdfmerge() {
srcDoc = new PDFDocument(argv[i])
copyAllPages(dstDoc, srcDoc)
}
- dstDoc.save(argv[1])
+ dstDoc.save(argv[1], "z")
}
if (argv.length < 3)
diff --git a/docs/mutool/run.html b/docs/mutool/run.html
index 0b8e86d1..f9278b2b 100644
--- a/docs/mutool/run.html
+++ b/docs/mutool/run.html
@@ -481,8 +481,18 @@ using low level access to the objects and streams contained in a PDF file.
<dd>Get access to the raw PDFDocument from a Document; returns null if the document is not a PDF.
<dt>PDFDocument#toDocument()
<dd>Cast the PDF document to a Document.
-<dt>PDFDocument#save(fileName)
+<dt>PDFDocument#save(fileName, options)
<dd>Write the PDF document to file.
+The write options are a string of flag characters:
+<br>l: linearize,
+<br>g: garbage collect, gg: ...and compact, ggg: ...and de-duplicate,
+<br>p: pretty-print objects,
+<br>a: ascii hex encode streams,
+<br>f: compress fonts,
+<br>i: compress images,
+<br>z: compress all streams,
+<br>d: decompress all streams (except fonts or images if 'f' or 'i'),
+<br>s: sanitize content streams.
</dl>
<h3>
@@ -631,7 +641,6 @@ There are several areas in MuPDF that still need bindings to access from JavaScr
<ul>
<li>Shadings
<li>PDFDocument#graftObject()
-<li>PDFDocument#save() -- write options
<li>PDFWriteDevice
<li>DocumentWriter
</ul>
diff --git a/include/mupdf/pdf/document.h b/include/mupdf/pdf/document.h
index b598cbe6..51c5427b 100644
--- a/include/mupdf/pdf/document.h
+++ b/include/mupdf/pdf/document.h
@@ -282,14 +282,6 @@ int pdf_recognize(fz_context *doc, const char *magic);
typedef struct pdf_write_options_s pdf_write_options;
-/* An enumeration of bitflags to use in the 'do_expand' field of the options struct. */
-enum
-{
- PDF_EXPAND_IMAGES = 1,
- PDF_EXPAND_FONTS = 2,
- PDF_EXPAND_ALL = -1
-};
-
/*
In calls to fz_save_document, the following options structure can be used
to control aspects of the writing process. This structure may grow
@@ -297,23 +289,33 @@ enum
*/
struct pdf_write_options_s
{
- int do_incremental; /* Write just the changed objects */
- int do_ascii; /* If non-zero then attempt (where possible) to make
- the output ascii. */
- int do_deflate; /* If non-zero then attempt to compress streams. */
- int do_expand; /* Bitflags; each non zero bit indicates an aspect
- of the file that should be 'expanded' on
- writing. */
- int do_garbage; /* If non-zero then attempt (where possible) to
- garbage collect the file before writing. */
- int do_linear; /* If non-zero then write linearised. */
- int do_clean; /* If non-zero then clean contents */
- int continue_on_error; /* If non-zero, errors are (optionally)
- counted and writing continues. */
+ int do_incremental; /* Write just the changed objects. */
+ int do_pretty; /* Pretty-print dictionaries and arrays. */
+ int do_ascii; /* ASCII hex encode binary streams. */
+ int do_compress; /* Compress streams. */
+ int do_compress_images; /* Compress (or leave compressed) image streams. */
+ int do_compress_fonts; /* Compress (or leave compressed) font streams. */
+ int do_decompress; /* Decompress streams (except when compressing images/fonts). */
+ int do_garbage; /* Garbage collect objects before saving; 1=gc, 2=re-number, 3=de-duplicate. */
+ int do_linear; /* Write linearised. */
+ int do_clean; /* Sanitize content streams. */
+ int continue_on_error; /* If set, errors are (optionally) counted and writing continues. */
int *errors; /* Pointer to a place to store a count of errors */
};
/*
+ Parse option string into a pdf_write_options struct.
+ Matches the command line options to 'mutool clean':
+ g: garbage collect
+ d, i, f: expand all, fonts, images
+ l: linearize
+ a: ascii hex encode
+ z: deflate
+ s: sanitize content streams
+*/
+void pdf_parse_write_options(fz_context *ctx, pdf_write_options *opts, const char *args);
+
+/*
pdf_save_document: Write out the document to a file with all changes finalised.
*/
void pdf_save_document(fz_context *ctx, pdf_document *doc, const char *filename, pdf_write_options *opts);
diff --git a/source/pdf/pdf-write.c b/source/pdf/pdf-write.c
index 4e236748..b50e5db3 100644
--- a/source/pdf/pdf-write.c
+++ b/source/pdf/pdf-write.c
@@ -53,7 +53,9 @@ struct pdf_write_state_s
int do_tight;
int do_ascii;
int do_expand;
- int do_deflate;
+ int do_compress;
+ int do_compress_images;
+ int do_compress_fonts;
int do_garbage;
int do_linear;
int do_clean;
@@ -1630,7 +1632,7 @@ static fz_buffer *deflatebuf(fz_context *ctx, unsigned char *p, int n)
return buf;
}
-static void copystream(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_obj *obj_orig, int num, int gen)
+static void copystream(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_obj *obj_orig, int num, int gen, int do_deflate)
{
fz_buffer *buf, *tmp;
pdf_obj *newlen;
@@ -1642,7 +1644,7 @@ static void copystream(fz_context *ctx, pdf_document *doc, pdf_write_state *opts
obj = pdf_copy_dict(ctx, obj_orig);
- if (opts->do_deflate && !pdf_dict_get(ctx, obj, PDF_NAME_Filter))
+ if (do_deflate && !pdf_dict_get(ctx, obj, PDF_NAME_Filter))
{
pdf_dict_put(ctx, obj, PDF_NAME_Filter, PDF_NAME_FlateDecode);
@@ -1676,7 +1678,7 @@ static void copystream(fz_context *ctx, pdf_document *doc, pdf_write_state *opts
pdf_drop_obj(ctx, obj);
}
-static void expandstream(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_obj *obj_orig, int num, int gen)
+static void expandstream(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_obj *obj_orig, int num, int gen, int do_deflate)
{
fz_buffer *buf, *tmp;
pdf_obj *newlen;
@@ -1693,7 +1695,7 @@ static void expandstream(fz_context *ctx, pdf_document *doc, pdf_write_state *op
pdf_dict_del(ctx, obj, PDF_NAME_Filter);
pdf_dict_del(ctx, obj, PDF_NAME_DecodeParms);
- if (opts->do_deflate && !pdf_dict_get(ctx, obj, PDF_NAME_Filter))
+ if (do_deflate)
{
pdf_dict_put(ctx, obj, PDF_NAME_Filter, PDF_NAME_FlateDecode);
@@ -1738,7 +1740,7 @@ static int is_image_filter(char *s)
return 0;
}
-static int filter_implies_image(fz_context *ctx, pdf_document *doc, pdf_obj *o)
+static int filter_implies_image(fz_context *ctx, pdf_obj *o)
{
if (!o)
return 0;
@@ -1755,6 +1757,39 @@ static int filter_implies_image(fz_context *ctx, pdf_document *doc, pdf_obj *o)
return 0;
}
+static int is_image_stream(fz_context *ctx, pdf_obj *obj)
+{
+ pdf_obj *o;
+ if ((o = pdf_dict_get(ctx, obj, PDF_NAME_Type), pdf_name_eq(ctx, o, PDF_NAME_XObject)))
+ if ((o = pdf_dict_get(ctx, obj, PDF_NAME_Subtype), pdf_name_eq(ctx, o, PDF_NAME_Image)))
+ return 1;
+ if (o = pdf_dict_get(ctx, obj, PDF_NAME_Filter), filter_implies_image(ctx, o))
+ return 1;
+ if (pdf_dict_get(ctx, obj, PDF_NAME_Width) != NULL && pdf_dict_get(ctx, obj, PDF_NAME_Height) != NULL)
+ return 1;
+ return 0;
+}
+
+static int is_font_stream(fz_context *ctx, pdf_obj *obj)
+{
+ pdf_obj *o;
+ if (o = pdf_dict_get(ctx, obj, PDF_NAME_Type), pdf_name_eq(ctx, o, PDF_NAME_Font))
+ return 1;
+ if (o = pdf_dict_get(ctx, obj, PDF_NAME_Type), pdf_name_eq(ctx, o, PDF_NAME_FontDescriptor))
+ return 1;
+ if (pdf_dict_get(ctx, obj, PDF_NAME_Length1) != NULL)
+ return 1;
+ if (pdf_dict_get(ctx, obj, PDF_NAME_Length2) != NULL)
+ return 1;
+ if (pdf_dict_get(ctx, obj, PDF_NAME_Length3) != NULL)
+ return 1;
+ if (o = pdf_dict_get(ctx, obj, PDF_NAME_Subtype), pdf_name_eq(ctx, o, PDF_NAME_Type1C))
+ return 1;
+ if (o = pdf_dict_get(ctx, obj, PDF_NAME_Subtype), pdf_name_eq(ctx, o, PDF_NAME_CIDFontType0C))
+ return 1;
+ return 0;
+}
+
static void writeobject(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, int num, int gen, int skip_xrefs)
{
pdf_xref_entry *entry;
@@ -1813,39 +1848,18 @@ static void writeobject(fz_context *ctx, pdf_document *doc, pdf_write_state *opt
}
else
{
- int dontexpand = 0;
- if (opts->do_expand != 0 && opts->do_expand != PDF_EXPAND_ALL)
- {
- pdf_obj *o;
-
- if ((o = pdf_dict_get(ctx, obj, PDF_NAME_Type), pdf_name_eq(ctx, o, PDF_NAME_XObject)) &&
- (o = pdf_dict_get(ctx, obj, PDF_NAME_Subtype), pdf_name_eq(ctx, o, PDF_NAME_Image)))
- dontexpand = !(opts->do_expand & PDF_EXPAND_IMAGES);
- if (o = pdf_dict_get(ctx, obj, PDF_NAME_Type), pdf_name_eq(ctx, o, PDF_NAME_Font))
- dontexpand = !(opts->do_expand & PDF_EXPAND_FONTS);
- if (o = pdf_dict_get(ctx, obj, PDF_NAME_Type), pdf_name_eq(ctx, o, PDF_NAME_FontDescriptor))
- dontexpand = !(opts->do_expand & PDF_EXPAND_FONTS);
- if (pdf_dict_get(ctx, obj, PDF_NAME_Length1) != NULL)
- dontexpand = !(opts->do_expand & PDF_EXPAND_FONTS);
- if (pdf_dict_get(ctx, obj, PDF_NAME_Length2) != NULL)
- dontexpand = !(opts->do_expand & PDF_EXPAND_FONTS);
- if (pdf_dict_get(ctx, obj, PDF_NAME_Length3) != NULL)
- dontexpand = !(opts->do_expand & PDF_EXPAND_FONTS);
- if (o = pdf_dict_get(ctx, obj, PDF_NAME_Subtype), pdf_name_eq(ctx, o, PDF_NAME_Type1C))
- dontexpand = !(opts->do_expand & PDF_EXPAND_FONTS);
- if (o = pdf_dict_get(ctx, obj, PDF_NAME_Subtype), pdf_name_eq(ctx, o, PDF_NAME_CIDFontType0C))
- dontexpand = !(opts->do_expand & PDF_EXPAND_FONTS);
- if (o = pdf_dict_get(ctx, obj, PDF_NAME_Filter), filter_implies_image(ctx, doc, o))
- dontexpand = !(opts->do_expand & PDF_EXPAND_IMAGES);
- if (pdf_dict_get(ctx, obj, PDF_NAME_Width) != NULL && pdf_dict_get(ctx, obj, PDF_NAME_Height) != NULL)
- dontexpand = !(opts->do_expand & PDF_EXPAND_IMAGES);
- }
fz_try(ctx)
{
- if (opts->do_expand && !dontexpand && !pdf_is_jpx_image(ctx, obj))
- expandstream(ctx, doc, opts, obj, num, gen);
+ int do_deflate = opts->do_compress;
+ int do_expand = opts->do_expand;
+ if (opts->do_compress_images && is_image_stream(ctx, obj))
+ do_deflate = 1, do_expand = 0;
+ if (opts->do_compress_fonts && is_font_stream(ctx, obj))
+ do_deflate = 1, do_expand = 0;
+ if (do_expand)
+ expandstream(ctx, doc, opts, obj, num, gen, do_deflate);
else
- copystream(ctx, doc, opts, obj, num, gen);
+ copystream(ctx, doc, opts, obj, num, gen, do_deflate);
}
fz_catch(ctx)
{
@@ -2677,15 +2691,19 @@ static void initialise_write_state(fz_context *ctx, pdf_document *doc, const pdf
int xref_len = pdf_xref_len(ctx, doc);
opts->do_incremental = in_opts->do_incremental;
- opts->do_tight = (in_opts->do_expand == 0) || in_opts->do_deflate;
- opts->do_expand = in_opts->do_expand;
- opts->do_garbage = in_opts->do_garbage;
opts->do_ascii = in_opts->do_ascii;
- opts->do_deflate = in_opts->do_deflate;
+ opts->do_tight = !in_opts->do_pretty;
+ opts->do_expand = in_opts->do_decompress;
+ opts->do_compress = in_opts->do_compress;
+ opts->do_compress_images = in_opts->do_compress_images;
+ opts->do_compress_fonts = in_opts->do_compress_fonts;
+
+ opts->do_garbage = in_opts->do_garbage;
opts->do_linear = in_opts->do_linear;
opts->do_clean = in_opts->do_clean;
opts->start = 0;
opts->main_xref_offset = INT_MIN;
+
/* We deliberately make these arrays long enough to cope with
* 1 to n access rather than 0..n-1, and add space for 2 new
* extra entries that may be required for linearization. */
@@ -2730,6 +2748,30 @@ static void finalise_write_state(fz_context *ctx, pdf_write_state *opts)
fz_drop_output(ctx, opts->out);
}
+void pdf_parse_write_options(fz_context *ctx, pdf_write_options *opts, const char *args)
+{
+ int c;
+
+ memset(opts, 0, sizeof *opts);
+
+ while ((c = *args++))
+ {
+ switch (c)
+ {
+ case 'd': opts->do_decompress += 1; break;
+ case 'z': opts->do_compress += 1; break;
+ case 'f': opts->do_compress_fonts += 1; break;
+ case 'i': opts->do_compress_images += 1; break;
+ case 'p': opts->do_pretty += 1; break;
+ case 'a': opts->do_ascii += 1; break;
+ case 'g': opts->do_garbage += 1; break;
+ case 'l': opts->do_linear += 1; break;
+ case 's': opts->do_clean += 1; break;
+ default: fz_warn(ctx, "unrecognized pdf-write option: '%c'", c);
+ }
+ }
+}
+
void pdf_save_document(fz_context *ctx, pdf_document *doc, const char *filename, pdf_write_options *in_opts)
{
pdf_write_options opts_defaults = { 0 };
diff --git a/source/tools/murun.c b/source/tools/murun.c
index 88d429cc..c2a9f0bc 100644
--- a/source/tools/murun.c
+++ b/source/tools/murun.c
@@ -2513,10 +2513,13 @@ static void ffi_PDFDocument_save(js_State *J)
fz_context *ctx = js_getcontext(J);
pdf_document *pdf = js_touserdata(J, 0, "pdf_document");
const char *filename = js_tostring(J, 1);
+ const char *options = js_isdefined(J, 2) ? js_tostring(J, 2) : "ga";
+ pdf_write_options pwo;
- fz_try(ctx)
- pdf_save_document(ctx, pdf, filename, NULL);
- fz_catch(ctx)
+ fz_try(ctx) {
+ pdf_parse_write_options(ctx, &pwo, options);
+ pdf_save_document(ctx, pdf, filename, &pwo);
+ } fz_catch(ctx)
rethrow(J);
}
diff --git a/source/tools/pdfclean.c b/source/tools/pdfclean.c
index f703b155..22937186 100644
--- a/source/tools/pdfclean.c
+++ b/source/tools/pdfclean.c
@@ -19,13 +19,13 @@ static void usage(void)
"\t-g\tgarbage collect unused objects\n"
"\t-gg\tin addition to -g compact xref table\n"
"\t-ggg\tin addition to -gg merge duplicate objects\n"
- "\t-s\tclean content streams\n"
- "\t-d\tdecompress all streams\n"
"\t-l\tlinearize PDF\n"
- "\t-i\ttoggle decompression of image streams\n"
- "\t-f\ttoggle decompression of font streams\n"
"\t-a\tascii hex encode binary streams\n"
+ "\t-d\tdecompress streams\n"
"\t-z\tdeflate uncompressed streams\n"
+ "\t-f\tcompress font streams\n"
+ "\t-i\tcompress image streams\n"
+ "\t-s\tclean content streams\n"
"\tpages\tcomma separated list of page numbers and ranges\n"
);
exit(1);
@@ -49,18 +49,22 @@ int pdfclean_main(int argc, char **argv)
switch (c)
{
case 'p': password = fz_optarg; break;
- case 'g': opts.do_garbage ++; break;
- case 'd': opts.do_expand ^= PDF_EXPAND_ALL; break;
- case 'f': opts.do_expand ^= PDF_EXPAND_FONTS; break;
- case 'i': opts.do_expand ^= PDF_EXPAND_IMAGES; break;
- case 'l': opts.do_linear ++; break;
- case 'a': opts.do_ascii ++; break;
- case 'z': opts.do_deflate ++; break;
- case 's': opts.do_clean ++; break;
+
+ case 'd': opts.do_decompress += 1; break;
+ case 'z': opts.do_compress += 1; break;
+ case 'f': opts.do_compress_fonts += 1; break;
+ case 'i': opts.do_compress_images += 1; break;
+ case 'a': opts.do_ascii += 1; break;
+ case 'g': opts.do_garbage += 1; break;
+ case 'l': opts.do_linear += 1; break;
+ case 's': opts.do_clean += 1; break;
default: usage(); break;
}
}
+ if ((opts.do_ascii || opts.do_decompress) && !opts.do_compress)
+ opts.do_pretty = 1;
+
if (argc - fz_optind < 1)
usage();
diff --git a/source/tools/pdfcreate.c b/source/tools/pdfcreate.c
index 6f116c6a..839e493e 100644
--- a/source/tools/pdfcreate.c
+++ b/source/tools/pdfcreate.c
@@ -9,13 +9,9 @@
static void usage(void)
{
fprintf(stderr,
- "usage: mutool create [-o output.pdf] [-adlsz] page.txt [page2.txt ...]\n"
+ "usage: mutool create [-o output.pdf] [-O options] page.txt [page2.txt ...]\n"
"\t-o\tname of PDF file to create\n"
- "\t-a\tascii hex encode binary streams\n"
- "\t-d\tdecompress all streams\n"
- "\t-l\tlinearize PDF\n"
- "\t-s\tclean content streams\n"
- "\t-z\tdeflate uncompressed streams\n"
+ "\t-O\tPDF write options\n"
"\tpage.txt file defines page size, fonts, images and contents\n"
);
exit(1);
@@ -145,18 +141,15 @@ int pdfcreate_main(int argc, char **argv)
{
pdf_write_options opts = { 0 };
char *output = "out.pdf";
+ char *flags = "z";
int i, c;
- while ((c = fz_getopt(argc, argv, "adlszo:")) != -1)
+ while ((c = fz_getopt(argc, argv, "o:O:")) != -1)
{
switch (c)
{
case 'o': output = fz_optarg; break;
- case 'a': opts.do_ascii ++; break;
- case 'd': opts.do_expand ^= PDF_EXPAND_ALL; break;
- case 'l': opts.do_linear ++; break;
- case 's': opts.do_clean ++; break;
- case 'z': opts.do_deflate ++; break;
+ case 'O': flags = fz_optarg; break;
default: usage(); break;
}
}
@@ -171,6 +164,8 @@ int pdfcreate_main(int argc, char **argv)
exit(1);
}
+ pdf_parse_write_options(ctx, &opts, flags);
+
doc = pdf_create_document(ctx);
for (i = fz_optind; i < argc; ++i)
diff --git a/source/tools/pdfmerge.c b/source/tools/pdfmerge.c
index 13905460..f63a90c7 100644
--- a/source/tools/pdfmerge.c
+++ b/source/tools/pdfmerge.c
@@ -9,14 +9,11 @@
static void usage(void)
{
fprintf(stderr,
- "usage: mutool merge [-o output.pdf] [-adlsz] input.pdf [pages] [input2.pdf] [pages2] ...\n"
+ "usage: mutool merge [-o output.pdf] [-O options] input.pdf [pages] [input2.pdf] [pages2] ...\n"
"\t-o\tname of PDF file to create\n"
- "\t-a\tascii hex encode binary streams\n"
- "\t-d\tdecompress all streams\n"
- "\t-l\tlinearize PDF\n"
- "\t-s\tclean content streams\n"
- "\t-z\tdeflate uncompressed streams\n"
- "\tinput.pdf name of first PDF file from which we are copying pages\n"
+ "\t-O\tPDF write options\n"
+ "\tinput.pdf\tname of first PDF file from which we are copying pages\n"
+ "\tpages: comma separated list of page ranges (for example: 1-5,6,10-)\n"
);
exit(1);
}
@@ -149,19 +146,16 @@ int pdfmerge_main(int argc, char **argv)
{
pdf_write_options opts = { 0 };
char *output = "out.pdf";
+ char *flags = "";
char *input;
int c;
- while ((c = fz_getopt(argc, argv, "adlszo:")) != -1)
+ while ((c = fz_getopt(argc, argv, "o:O:")) != -1)
{
switch (c)
{
case 'o': output = fz_optarg; break;
- case 'a': opts.do_ascii ++; break;
- case 'd': opts.do_expand ^= PDF_EXPAND_ALL; break;
- case 'l': opts.do_linear ++; break;
- case 's': opts.do_clean ++; break;
- case 'z': opts.do_deflate ++; break;
+ case 'O': flags = fz_optarg; break;
default: usage(); break;
}
}
@@ -176,6 +170,8 @@ int pdfmerge_main(int argc, char **argv)
exit(1);
}
+ pdf_parse_write_options(ctx, &opts, flags);
+
fz_try(ctx)
{
doc_des = pdf_create_document(ctx);