diff options
-rw-r--r-- | docs/mutool/examples/pdf-create.js | 2 | ||||
-rw-r--r-- | docs/mutool/examples/pdf-merge.js | 2 | ||||
-rw-r--r-- | docs/mutool/run.html | 13 | ||||
-rw-r--r-- | include/mupdf/pdf/document.h | 44 | ||||
-rw-r--r-- | source/pdf/pdf-write.c | 122 | ||||
-rw-r--r-- | source/tools/murun.c | 9 | ||||
-rw-r--r-- | source/tools/pdfclean.c | 28 | ||||
-rw-r--r-- | source/tools/pdfcreate.c | 19 | ||||
-rw-r--r-- | source/tools/pdfmerge.c | 22 |
9 files changed, 156 insertions, 105 deletions
diff --git a/docs/mutool/examples/pdf-create.js b/docs/mutool/examples/pdf-create.js index 38c193a3..131e72b3 100644 --- a/docs/mutool/examples/pdf-create.js +++ b/docs/mutool/examples/pdf-create.js @@ -32,4 +32,4 @@ var page = pdf.addPage([0,0,300,350], 0, resources, contents) pdf.insertPage(-1, page) // Save the document to file. -pdf.save("out.pdf") +pdf.save("out.pdf", "paif") diff --git a/docs/mutool/examples/pdf-merge.js b/docs/mutool/examples/pdf-merge.js index a468738a..6f5d4692 100644 --- a/docs/mutool/examples/pdf-merge.js +++ b/docs/mutool/examples/pdf-merge.js @@ -57,7 +57,7 @@ function pdfmerge() { srcDoc = new PDFDocument(argv[i]) copyAllPages(dstDoc, srcDoc) } - dstDoc.save(argv[1]) + dstDoc.save(argv[1], "z") } if (argv.length < 3) diff --git a/docs/mutool/run.html b/docs/mutool/run.html index 0b8e86d1..f9278b2b 100644 --- a/docs/mutool/run.html +++ b/docs/mutool/run.html @@ -481,8 +481,18 @@ using low level access to the objects and streams contained in a PDF file. <dd>Get access to the raw PDFDocument from a Document; returns null if the document is not a PDF. <dt>PDFDocument#toDocument() <dd>Cast the PDF document to a Document. -<dt>PDFDocument#save(fileName) +<dt>PDFDocument#save(fileName, options) <dd>Write the PDF document to file. +The write options are a string of flag characters: +<br>l: linearize, +<br>g: garbage collect, gg: ...and compact, ggg: ...and de-duplicate, +<br>p: pretty-print objects, +<br>a: ascii hex encode streams, +<br>f: compress fonts, +<br>i: compress images, +<br>z: compress all streams, +<br>d: decompress all streams (except fonts or images if 'f' or 'i'), +<br>s: sanitize content streams. </dl> <h3> @@ -631,7 +641,6 @@ There are several areas in MuPDF that still need bindings to access from JavaScr <ul> <li>Shadings <li>PDFDocument#graftObject() -<li>PDFDocument#save() -- write options <li>PDFWriteDevice <li>DocumentWriter </ul> diff --git a/include/mupdf/pdf/document.h b/include/mupdf/pdf/document.h index b598cbe6..51c5427b 100644 --- a/include/mupdf/pdf/document.h +++ b/include/mupdf/pdf/document.h @@ -282,14 +282,6 @@ int pdf_recognize(fz_context *doc, const char *magic); typedef struct pdf_write_options_s pdf_write_options; -/* An enumeration of bitflags to use in the 'do_expand' field of the options struct. */ -enum -{ - PDF_EXPAND_IMAGES = 1, - PDF_EXPAND_FONTS = 2, - PDF_EXPAND_ALL = -1 -}; - /* In calls to fz_save_document, the following options structure can be used to control aspects of the writing process. This structure may grow @@ -297,23 +289,33 @@ enum */ struct pdf_write_options_s { - int do_incremental; /* Write just the changed objects */ - int do_ascii; /* If non-zero then attempt (where possible) to make - the output ascii. */ - int do_deflate; /* If non-zero then attempt to compress streams. */ - int do_expand; /* Bitflags; each non zero bit indicates an aspect - of the file that should be 'expanded' on - writing. */ - int do_garbage; /* If non-zero then attempt (where possible) to - garbage collect the file before writing. */ - int do_linear; /* If non-zero then write linearised. */ - int do_clean; /* If non-zero then clean contents */ - int continue_on_error; /* If non-zero, errors are (optionally) - counted and writing continues. */ + int do_incremental; /* Write just the changed objects. */ + int do_pretty; /* Pretty-print dictionaries and arrays. */ + int do_ascii; /* ASCII hex encode binary streams. */ + int do_compress; /* Compress streams. */ + int do_compress_images; /* Compress (or leave compressed) image streams. */ + int do_compress_fonts; /* Compress (or leave compressed) font streams. */ + int do_decompress; /* Decompress streams (except when compressing images/fonts). */ + int do_garbage; /* Garbage collect objects before saving; 1=gc, 2=re-number, 3=de-duplicate. */ + int do_linear; /* Write linearised. */ + int do_clean; /* Sanitize content streams. */ + int continue_on_error; /* If set, errors are (optionally) counted and writing continues. */ int *errors; /* Pointer to a place to store a count of errors */ }; /* + Parse option string into a pdf_write_options struct. + Matches the command line options to 'mutool clean': + g: garbage collect + d, i, f: expand all, fonts, images + l: linearize + a: ascii hex encode + z: deflate + s: sanitize content streams +*/ +void pdf_parse_write_options(fz_context *ctx, pdf_write_options *opts, const char *args); + +/* pdf_save_document: Write out the document to a file with all changes finalised. */ void pdf_save_document(fz_context *ctx, pdf_document *doc, const char *filename, pdf_write_options *opts); diff --git a/source/pdf/pdf-write.c b/source/pdf/pdf-write.c index 4e236748..b50e5db3 100644 --- a/source/pdf/pdf-write.c +++ b/source/pdf/pdf-write.c @@ -53,7 +53,9 @@ struct pdf_write_state_s int do_tight; int do_ascii; int do_expand; - int do_deflate; + int do_compress; + int do_compress_images; + int do_compress_fonts; int do_garbage; int do_linear; int do_clean; @@ -1630,7 +1632,7 @@ static fz_buffer *deflatebuf(fz_context *ctx, unsigned char *p, int n) return buf; } -static void copystream(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_obj *obj_orig, int num, int gen) +static void copystream(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_obj *obj_orig, int num, int gen, int do_deflate) { fz_buffer *buf, *tmp; pdf_obj *newlen; @@ -1642,7 +1644,7 @@ static void copystream(fz_context *ctx, pdf_document *doc, pdf_write_state *opts obj = pdf_copy_dict(ctx, obj_orig); - if (opts->do_deflate && !pdf_dict_get(ctx, obj, PDF_NAME_Filter)) + if (do_deflate && !pdf_dict_get(ctx, obj, PDF_NAME_Filter)) { pdf_dict_put(ctx, obj, PDF_NAME_Filter, PDF_NAME_FlateDecode); @@ -1676,7 +1678,7 @@ static void copystream(fz_context *ctx, pdf_document *doc, pdf_write_state *opts pdf_drop_obj(ctx, obj); } -static void expandstream(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_obj *obj_orig, int num, int gen) +static void expandstream(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_obj *obj_orig, int num, int gen, int do_deflate) { fz_buffer *buf, *tmp; pdf_obj *newlen; @@ -1693,7 +1695,7 @@ static void expandstream(fz_context *ctx, pdf_document *doc, pdf_write_state *op pdf_dict_del(ctx, obj, PDF_NAME_Filter); pdf_dict_del(ctx, obj, PDF_NAME_DecodeParms); - if (opts->do_deflate && !pdf_dict_get(ctx, obj, PDF_NAME_Filter)) + if (do_deflate) { pdf_dict_put(ctx, obj, PDF_NAME_Filter, PDF_NAME_FlateDecode); @@ -1738,7 +1740,7 @@ static int is_image_filter(char *s) return 0; } -static int filter_implies_image(fz_context *ctx, pdf_document *doc, pdf_obj *o) +static int filter_implies_image(fz_context *ctx, pdf_obj *o) { if (!o) return 0; @@ -1755,6 +1757,39 @@ static int filter_implies_image(fz_context *ctx, pdf_document *doc, pdf_obj *o) return 0; } +static int is_image_stream(fz_context *ctx, pdf_obj *obj) +{ + pdf_obj *o; + if ((o = pdf_dict_get(ctx, obj, PDF_NAME_Type), pdf_name_eq(ctx, o, PDF_NAME_XObject))) + if ((o = pdf_dict_get(ctx, obj, PDF_NAME_Subtype), pdf_name_eq(ctx, o, PDF_NAME_Image))) + return 1; + if (o = pdf_dict_get(ctx, obj, PDF_NAME_Filter), filter_implies_image(ctx, o)) + return 1; + if (pdf_dict_get(ctx, obj, PDF_NAME_Width) != NULL && pdf_dict_get(ctx, obj, PDF_NAME_Height) != NULL) + return 1; + return 0; +} + +static int is_font_stream(fz_context *ctx, pdf_obj *obj) +{ + pdf_obj *o; + if (o = pdf_dict_get(ctx, obj, PDF_NAME_Type), pdf_name_eq(ctx, o, PDF_NAME_Font)) + return 1; + if (o = pdf_dict_get(ctx, obj, PDF_NAME_Type), pdf_name_eq(ctx, o, PDF_NAME_FontDescriptor)) + return 1; + if (pdf_dict_get(ctx, obj, PDF_NAME_Length1) != NULL) + return 1; + if (pdf_dict_get(ctx, obj, PDF_NAME_Length2) != NULL) + return 1; + if (pdf_dict_get(ctx, obj, PDF_NAME_Length3) != NULL) + return 1; + if (o = pdf_dict_get(ctx, obj, PDF_NAME_Subtype), pdf_name_eq(ctx, o, PDF_NAME_Type1C)) + return 1; + if (o = pdf_dict_get(ctx, obj, PDF_NAME_Subtype), pdf_name_eq(ctx, o, PDF_NAME_CIDFontType0C)) + return 1; + return 0; +} + static void writeobject(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, int num, int gen, int skip_xrefs) { pdf_xref_entry *entry; @@ -1813,39 +1848,18 @@ static void writeobject(fz_context *ctx, pdf_document *doc, pdf_write_state *opt } else { - int dontexpand = 0; - if (opts->do_expand != 0 && opts->do_expand != PDF_EXPAND_ALL) - { - pdf_obj *o; - - if ((o = pdf_dict_get(ctx, obj, PDF_NAME_Type), pdf_name_eq(ctx, o, PDF_NAME_XObject)) && - (o = pdf_dict_get(ctx, obj, PDF_NAME_Subtype), pdf_name_eq(ctx, o, PDF_NAME_Image))) - dontexpand = !(opts->do_expand & PDF_EXPAND_IMAGES); - if (o = pdf_dict_get(ctx, obj, PDF_NAME_Type), pdf_name_eq(ctx, o, PDF_NAME_Font)) - dontexpand = !(opts->do_expand & PDF_EXPAND_FONTS); - if (o = pdf_dict_get(ctx, obj, PDF_NAME_Type), pdf_name_eq(ctx, o, PDF_NAME_FontDescriptor)) - dontexpand = !(opts->do_expand & PDF_EXPAND_FONTS); - if (pdf_dict_get(ctx, obj, PDF_NAME_Length1) != NULL) - dontexpand = !(opts->do_expand & PDF_EXPAND_FONTS); - if (pdf_dict_get(ctx, obj, PDF_NAME_Length2) != NULL) - dontexpand = !(opts->do_expand & PDF_EXPAND_FONTS); - if (pdf_dict_get(ctx, obj, PDF_NAME_Length3) != NULL) - dontexpand = !(opts->do_expand & PDF_EXPAND_FONTS); - if (o = pdf_dict_get(ctx, obj, PDF_NAME_Subtype), pdf_name_eq(ctx, o, PDF_NAME_Type1C)) - dontexpand = !(opts->do_expand & PDF_EXPAND_FONTS); - if (o = pdf_dict_get(ctx, obj, PDF_NAME_Subtype), pdf_name_eq(ctx, o, PDF_NAME_CIDFontType0C)) - dontexpand = !(opts->do_expand & PDF_EXPAND_FONTS); - if (o = pdf_dict_get(ctx, obj, PDF_NAME_Filter), filter_implies_image(ctx, doc, o)) - dontexpand = !(opts->do_expand & PDF_EXPAND_IMAGES); - if (pdf_dict_get(ctx, obj, PDF_NAME_Width) != NULL && pdf_dict_get(ctx, obj, PDF_NAME_Height) != NULL) - dontexpand = !(opts->do_expand & PDF_EXPAND_IMAGES); - } fz_try(ctx) { - if (opts->do_expand && !dontexpand && !pdf_is_jpx_image(ctx, obj)) - expandstream(ctx, doc, opts, obj, num, gen); + int do_deflate = opts->do_compress; + int do_expand = opts->do_expand; + if (opts->do_compress_images && is_image_stream(ctx, obj)) + do_deflate = 1, do_expand = 0; + if (opts->do_compress_fonts && is_font_stream(ctx, obj)) + do_deflate = 1, do_expand = 0; + if (do_expand) + expandstream(ctx, doc, opts, obj, num, gen, do_deflate); else - copystream(ctx, doc, opts, obj, num, gen); + copystream(ctx, doc, opts, obj, num, gen, do_deflate); } fz_catch(ctx) { @@ -2677,15 +2691,19 @@ static void initialise_write_state(fz_context *ctx, pdf_document *doc, const pdf int xref_len = pdf_xref_len(ctx, doc); opts->do_incremental = in_opts->do_incremental; - opts->do_tight = (in_opts->do_expand == 0) || in_opts->do_deflate; - opts->do_expand = in_opts->do_expand; - opts->do_garbage = in_opts->do_garbage; opts->do_ascii = in_opts->do_ascii; - opts->do_deflate = in_opts->do_deflate; + opts->do_tight = !in_opts->do_pretty; + opts->do_expand = in_opts->do_decompress; + opts->do_compress = in_opts->do_compress; + opts->do_compress_images = in_opts->do_compress_images; + opts->do_compress_fonts = in_opts->do_compress_fonts; + + opts->do_garbage = in_opts->do_garbage; opts->do_linear = in_opts->do_linear; opts->do_clean = in_opts->do_clean; opts->start = 0; opts->main_xref_offset = INT_MIN; + /* We deliberately make these arrays long enough to cope with * 1 to n access rather than 0..n-1, and add space for 2 new * extra entries that may be required for linearization. */ @@ -2730,6 +2748,30 @@ static void finalise_write_state(fz_context *ctx, pdf_write_state *opts) fz_drop_output(ctx, opts->out); } +void pdf_parse_write_options(fz_context *ctx, pdf_write_options *opts, const char *args) +{ + int c; + + memset(opts, 0, sizeof *opts); + + while ((c = *args++)) + { + switch (c) + { + case 'd': opts->do_decompress += 1; break; + case 'z': opts->do_compress += 1; break; + case 'f': opts->do_compress_fonts += 1; break; + case 'i': opts->do_compress_images += 1; break; + case 'p': opts->do_pretty += 1; break; + case 'a': opts->do_ascii += 1; break; + case 'g': opts->do_garbage += 1; break; + case 'l': opts->do_linear += 1; break; + case 's': opts->do_clean += 1; break; + default: fz_warn(ctx, "unrecognized pdf-write option: '%c'", c); + } + } +} + void pdf_save_document(fz_context *ctx, pdf_document *doc, const char *filename, pdf_write_options *in_opts) { pdf_write_options opts_defaults = { 0 }; diff --git a/source/tools/murun.c b/source/tools/murun.c index 88d429cc..c2a9f0bc 100644 --- a/source/tools/murun.c +++ b/source/tools/murun.c @@ -2513,10 +2513,13 @@ static void ffi_PDFDocument_save(js_State *J) fz_context *ctx = js_getcontext(J); pdf_document *pdf = js_touserdata(J, 0, "pdf_document"); const char *filename = js_tostring(J, 1); + const char *options = js_isdefined(J, 2) ? js_tostring(J, 2) : "ga"; + pdf_write_options pwo; - fz_try(ctx) - pdf_save_document(ctx, pdf, filename, NULL); - fz_catch(ctx) + fz_try(ctx) { + pdf_parse_write_options(ctx, &pwo, options); + pdf_save_document(ctx, pdf, filename, &pwo); + } fz_catch(ctx) rethrow(J); } diff --git a/source/tools/pdfclean.c b/source/tools/pdfclean.c index f703b155..22937186 100644 --- a/source/tools/pdfclean.c +++ b/source/tools/pdfclean.c @@ -19,13 +19,13 @@ static void usage(void) "\t-g\tgarbage collect unused objects\n" "\t-gg\tin addition to -g compact xref table\n" "\t-ggg\tin addition to -gg merge duplicate objects\n" - "\t-s\tclean content streams\n" - "\t-d\tdecompress all streams\n" "\t-l\tlinearize PDF\n" - "\t-i\ttoggle decompression of image streams\n" - "\t-f\ttoggle decompression of font streams\n" "\t-a\tascii hex encode binary streams\n" + "\t-d\tdecompress streams\n" "\t-z\tdeflate uncompressed streams\n" + "\t-f\tcompress font streams\n" + "\t-i\tcompress image streams\n" + "\t-s\tclean content streams\n" "\tpages\tcomma separated list of page numbers and ranges\n" ); exit(1); @@ -49,18 +49,22 @@ int pdfclean_main(int argc, char **argv) switch (c) { case 'p': password = fz_optarg; break; - case 'g': opts.do_garbage ++; break; - case 'd': opts.do_expand ^= PDF_EXPAND_ALL; break; - case 'f': opts.do_expand ^= PDF_EXPAND_FONTS; break; - case 'i': opts.do_expand ^= PDF_EXPAND_IMAGES; break; - case 'l': opts.do_linear ++; break; - case 'a': opts.do_ascii ++; break; - case 'z': opts.do_deflate ++; break; - case 's': opts.do_clean ++; break; + + case 'd': opts.do_decompress += 1; break; + case 'z': opts.do_compress += 1; break; + case 'f': opts.do_compress_fonts += 1; break; + case 'i': opts.do_compress_images += 1; break; + case 'a': opts.do_ascii += 1; break; + case 'g': opts.do_garbage += 1; break; + case 'l': opts.do_linear += 1; break; + case 's': opts.do_clean += 1; break; default: usage(); break; } } + if ((opts.do_ascii || opts.do_decompress) && !opts.do_compress) + opts.do_pretty = 1; + if (argc - fz_optind < 1) usage(); diff --git a/source/tools/pdfcreate.c b/source/tools/pdfcreate.c index 6f116c6a..839e493e 100644 --- a/source/tools/pdfcreate.c +++ b/source/tools/pdfcreate.c @@ -9,13 +9,9 @@ static void usage(void) { fprintf(stderr, - "usage: mutool create [-o output.pdf] [-adlsz] page.txt [page2.txt ...]\n" + "usage: mutool create [-o output.pdf] [-O options] page.txt [page2.txt ...]\n" "\t-o\tname of PDF file to create\n" - "\t-a\tascii hex encode binary streams\n" - "\t-d\tdecompress all streams\n" - "\t-l\tlinearize PDF\n" - "\t-s\tclean content streams\n" - "\t-z\tdeflate uncompressed streams\n" + "\t-O\tPDF write options\n" "\tpage.txt file defines page size, fonts, images and contents\n" ); exit(1); @@ -145,18 +141,15 @@ int pdfcreate_main(int argc, char **argv) { pdf_write_options opts = { 0 }; char *output = "out.pdf"; + char *flags = "z"; int i, c; - while ((c = fz_getopt(argc, argv, "adlszo:")) != -1) + while ((c = fz_getopt(argc, argv, "o:O:")) != -1) { switch (c) { case 'o': output = fz_optarg; break; - case 'a': opts.do_ascii ++; break; - case 'd': opts.do_expand ^= PDF_EXPAND_ALL; break; - case 'l': opts.do_linear ++; break; - case 's': opts.do_clean ++; break; - case 'z': opts.do_deflate ++; break; + case 'O': flags = fz_optarg; break; default: usage(); break; } } @@ -171,6 +164,8 @@ int pdfcreate_main(int argc, char **argv) exit(1); } + pdf_parse_write_options(ctx, &opts, flags); + doc = pdf_create_document(ctx); for (i = fz_optind; i < argc; ++i) diff --git a/source/tools/pdfmerge.c b/source/tools/pdfmerge.c index 13905460..f63a90c7 100644 --- a/source/tools/pdfmerge.c +++ b/source/tools/pdfmerge.c @@ -9,14 +9,11 @@ static void usage(void) { fprintf(stderr, - "usage: mutool merge [-o output.pdf] [-adlsz] input.pdf [pages] [input2.pdf] [pages2] ...\n" + "usage: mutool merge [-o output.pdf] [-O options] input.pdf [pages] [input2.pdf] [pages2] ...\n" "\t-o\tname of PDF file to create\n" - "\t-a\tascii hex encode binary streams\n" - "\t-d\tdecompress all streams\n" - "\t-l\tlinearize PDF\n" - "\t-s\tclean content streams\n" - "\t-z\tdeflate uncompressed streams\n" - "\tinput.pdf name of first PDF file from which we are copying pages\n" + "\t-O\tPDF write options\n" + "\tinput.pdf\tname of first PDF file from which we are copying pages\n" + "\tpages: comma separated list of page ranges (for example: 1-5,6,10-)\n" ); exit(1); } @@ -149,19 +146,16 @@ int pdfmerge_main(int argc, char **argv) { pdf_write_options opts = { 0 }; char *output = "out.pdf"; + char *flags = ""; char *input; int c; - while ((c = fz_getopt(argc, argv, "adlszo:")) != -1) + while ((c = fz_getopt(argc, argv, "o:O:")) != -1) { switch (c) { case 'o': output = fz_optarg; break; - case 'a': opts.do_ascii ++; break; - case 'd': opts.do_expand ^= PDF_EXPAND_ALL; break; - case 'l': opts.do_linear ++; break; - case 's': opts.do_clean ++; break; - case 'z': opts.do_deflate ++; break; + case 'O': flags = fz_optarg; break; default: usage(); break; } } @@ -176,6 +170,8 @@ int pdfmerge_main(int argc, char **argv) exit(1); } + pdf_parse_write_options(ctx, &opts, flags); + fz_try(ctx) { doc_des = pdf_create_document(ctx); |