From 78e29456b051f41073d706ac7d3eb76bfa08b0ab Mon Sep 17 00:00:00 2001 From: Robin Watts Date: Fri, 27 Jan 2012 13:34:32 +0000 Subject: Rename pdfdraw to mupdfdraw etc. This a) improves our branding, and b) avoids conflicts with other pdf tools out there (pdfinfo etc). --- apps/man/mupdf.1 | 10 +- apps/man/mupdfclean.1 | 39 ++ apps/man/mupdfdraw.1 | 78 ++++ apps/man/mupdfshow.1 | 42 ++ apps/man/pdfclean.1 | 39 -- apps/man/pdfdraw.1 | 78 ---- apps/man/pdfshow.1 | 42 -- apps/mubusy.c | 13 + apps/mubusy_pdfclean.c | 2 +- apps/mubusy_pdfdraw.c | 2 +- apps/mubusy_pdfextract.c | 2 +- apps/mubusy_pdfinfo.c | 2 +- apps/mubusy_pdfshow.c | 2 +- apps/mubusy_xpsdraw.c | 2 +- apps/mupdfclean.c | 842 ++++++++++++++++++++++++++++++++++++++ apps/mupdfdraw.c | 495 ++++++++++++++++++++++ apps/mupdfextract.c | 215 ++++++++++ apps/mupdfinfo.c | 1020 ++++++++++++++++++++++++++++++++++++++++++++++ apps/mupdfshow.c | 239 +++++++++++ apps/muxpsdraw.c | 379 +++++++++++++++++ apps/pdfclean.c | 842 -------------------------------------- apps/pdfdraw.c | 495 ---------------------- apps/pdfextract.c | 215 ---------- apps/pdfinfo.c | 1020 ---------------------------------------------- apps/pdfshow.c | 239 ----------- apps/xpsdraw.c | 379 ----------------- 26 files changed, 3373 insertions(+), 3360 deletions(-) create mode 100644 apps/man/mupdfclean.1 create mode 100644 apps/man/mupdfdraw.1 create mode 100644 apps/man/mupdfshow.1 delete mode 100644 apps/man/pdfclean.1 delete mode 100644 apps/man/pdfdraw.1 delete mode 100644 apps/man/pdfshow.1 create mode 100644 apps/mupdfclean.c create mode 100644 apps/mupdfdraw.c create mode 100644 apps/mupdfextract.c create mode 100644 apps/mupdfinfo.c create mode 100644 apps/mupdfshow.c create mode 100644 apps/muxpsdraw.c delete mode 100644 apps/pdfclean.c delete mode 100644 apps/pdfdraw.c delete mode 100644 apps/pdfextract.c delete mode 100644 apps/pdfinfo.c delete mode 100644 apps/pdfshow.c delete mode 100644 apps/xpsdraw.c (limited to 'apps') diff --git a/apps/man/mupdf.1 b/apps/man/mupdf.1 index a4162a6f..e22d0ba9 100644 --- a/apps/man/mupdf.1 +++ b/apps/man/mupdf.1 @@ -1,4 +1,4 @@ -.TH MUPDF 1 "March 15, 2010" +.TH MUPDF 1 "January 27, 2012" .\" Please adjust this date whenever revising the manpage. .SH NAME mupdf \- MuPDF is a lightweight PDF viewer written in portable C @@ -76,11 +76,11 @@ Find the next/previous search result. .B c Toggle between color and grayscale rendering. .SH SEE ALSO -.BR pdfclean (1), -.BR pdfdraw (1), -.BR pdfshow (1). +.BR mupdfclean (1), +.BR mupdfdraw (1), +.BR mupdfshow (1). .SH AUTHOR MuPDF was written by Tor Andersson . -MuPDF is Copyright 2006-2010 Artifex Software, Inc. +MuPDF is Copyright 2006-2012 Artifex Software, Inc. .PP This manual page was written by Sebastian Rasmussen . diff --git a/apps/man/mupdfclean.1 b/apps/man/mupdfclean.1 new file mode 100644 index 00000000..91e796ac --- /dev/null +++ b/apps/man/mupdfclean.1 @@ -0,0 +1,39 @@ +.TH PDFCLEAN 1 "January 27, 2012" +.\" Please adjust this date whenever revising the manpage. +.SH NAME +mupdfclean \- pretty print, decompress and garbage collect PDF files +.SH SYNOPSIS +.B mupdfclean +.RI [ options ] +.RI input.pdf +.RI [ output.pdf ] +.RI [ pages ] +.SH DESCRIPTION +.B mupdfclean +pretty prints and rewrites the contents of a PDF file. +If no output file is specified, the new file will be written to "out.pdf" in +the current directory. +.PP +.SH OPTIONS +.TP +.B \-p password +Use the specified password if the file is encrypted. +.TP +.B \-g +Garbage collect objects that have no references from other objects. +Give the option twice to renumber all objects and compact the cross reference table. +Give it three times to merge and reuse duplicate objects. +.TP +.B \-d +Decompress streams. This will make the output file larger, but provides +easy access for reading and editing the contents with a text editor. +.TP +.B pages +Comma separated list of ranges to clean. +.SH SEE ALSO +.BR mupdf (1), +.BR mupdfdraw (1). +.BR mupdfshow (1). +.SH AUTHOR +MuPDF was written by Tor Andersson . +MuPDF is Copyright 2006-2010 Artifex Software, Inc. diff --git a/apps/man/mupdfdraw.1 b/apps/man/mupdfdraw.1 new file mode 100644 index 00000000..ea58d570 --- /dev/null +++ b/apps/man/mupdfdraw.1 @@ -0,0 +1,78 @@ +.TH PDFDRAW 1 "January 27, 2012" +.\" Please adjust this date whenever revising the manpage. +.SH NAME +mupdfdraw \- render PDF documents +.SH SYNOPSIS +.B mupdfdraw +.RI [ options ] +.RI input.pdf +.RI [ pages] +.SH DESCRIPTION +.B mupdfdraw +will render a PDF document to image files. +The supported image formats are: pgm, ppm, pam and png. +Select the pages to be rendered by specifying a comma +separated list of ranges and individual page numbers (for example: 1,5,10-15). +In no pages are specified all the pages will be rendered. +.SH OPTIONS +.TP +.B \-o output +The image format is deduced from the output file name. +Embed %d in the name to indicate the page number (for example: "page%d.png"). +.TP +.B \-p password +Use the specified password if the file is encrypted. +.TP +.B \-r resolution +Render the page at the specified resolution. +The default resolution is 72 dpi. +.TP +.B \-R angle +Rotate clockwise by given number of degrees. +.TP +.B \-a +Save the alpha channel. +The default behavior is to render each page with a white background. +With this option, the page background is transparent. +Only supported for pam and png output formats. +.TP +.B \-g +Render in grayscale. +The default is to render a full color RGB image. +If the output format is pgm or ppm this option is ignored. +.TP +.B \-m +Show timing information. +Take the time it takes for each page to render and print +a summary at the end. +.TP +.B \-5 +Print an MD5 checksum of the rendered image data for each page. +.TP +.B \-t +Print the text contents of each page in UTF-8 encoding. +Give the option twice to print detailed information +about the location of each character in XML format. +.TP +.B \-x +Print the display list used to render each page. +.TP +.B \-A +Disable the use of accelerated functions. +.TP +.B \-G gamma +Gamma correct the output image. +Some typical values are 0.7 or 1.4 to thin or darken text rendering. +.TP +.B \-I +Invert the output image colors. +.TP +.B pages +Comma separated list of ranges to render. +.SH SEE ALSO +.BR mupdf (1), +.BR mupdfclean (1). +.BR mupdfshow (1). +.SH AUTHOR +MuPDF was written by Tor Andersson . +MuPDF is Copyright 2006-2012 Artifex Software, Inc. diff --git a/apps/man/mupdfshow.1 b/apps/man/mupdfshow.1 new file mode 100644 index 00000000..451dac6b --- /dev/null +++ b/apps/man/mupdfshow.1 @@ -0,0 +1,42 @@ +.TH PDFSHOW 1 "January 27, 2012" +.\" Please adjust this date whenever revising the manpage. +.SH NAME +mupdfshow \- show objects and streams that make up a PDF document +.SH SYNOPSIS +.B mupdfshow +.RI [ options ] +.RI file.pdf +.RI [ xref ] +.RI [ trailer ] +.RI [ pages ] +.RI [ grep ] +.RI [ object-number... ] +.SH DESCRIPTION +.B mupdfshow +pretty prints the objects and streams specified on the command line. +Streams are decoded and non-printable characters are represented +with a period. +Specify objects with their number. +The special names xref, trailer and pages will +respectively print the cross reference, trailer, +and the object numbers for all pages. +The special name grep will print all objects in the file +in a compact one-line format suitable for piping to grep. +.PP +.SH OPTIONS +.TP +.B \-b +Print streams as binary data and omit the object header. +.TP +.B \-e +Print streams in their original encoded form. +.TP +.B \-p password +Use the specified password if the file is encrypted. +.SH SEE ALSO +.BR mupdf (1), +.BR mupdfclean (1). +.BR mupdfdraw (1). +.SH AUTHOR +MuPDF was written by Tor Andersson . +MuPDF is copyright 2006-2012 Artifex Software, Inc. diff --git a/apps/man/pdfclean.1 b/apps/man/pdfclean.1 deleted file mode 100644 index aa9047af..00000000 --- a/apps/man/pdfclean.1 +++ /dev/null @@ -1,39 +0,0 @@ -.TH PDFCLEAN 1 "September 4, 2011" -.\" Please adjust this date whenever revising the manpage. -.SH NAME -pdfclean \- pretty print, decompress and garbage collect PDF files -.SH SYNOPSIS -.B pdfclean -.RI [ options ] -.RI input.pdf -.RI [ output.pdf ] -.RI [ pages ] -.SH DESCRIPTION -.B pdfclean -pretty prints and rewrites the contents of a PDF file. -If no output file is specified, the new file will be written to "out.pdf" in -the current directory. -.PP -.SH OPTIONS -.TP -.B \-p password -Use the specified password if the file is encrypted. -.TP -.B \-g -Garbage collect objects that have no references from other objects. -Give the option twice to renumber all objects and compact the cross reference table. -Give it three times to merge and reuse duplicate objects. -.TP -.B \-d -Decompress streams. This will make the output file larger, but provides -easy access for reading and editing the contents with a text editor. -.TP -.B pages -Comma separated list of ranges to clean. -.SH SEE ALSO -.BR mupdf (1), -.BR pdfdraw (1). -.BR pdfshow (1). -.SH AUTHOR -MuPDF was written by Tor Andersson . -MuPDF is Copyright 2006-2010 Artifex Software, Inc. diff --git a/apps/man/pdfdraw.1 b/apps/man/pdfdraw.1 deleted file mode 100644 index 7fa0e81c..00000000 --- a/apps/man/pdfdraw.1 +++ /dev/null @@ -1,78 +0,0 @@ -.TH PDFDRAW 1 "September 4, 2011" -.\" Please adjust this date whenever revising the manpage. -.SH NAME -pdfdraw \- render PDF documents -.SH SYNOPSIS -.B pdfdraw -.RI [ options ] -.RI input.pdf -.RI [ pages] -.SH DESCRIPTION -.B pdfdraw -will render a PDF document to image files. -The supported image formats are: pgm, ppm, pam and png. -Select the pages to be rendered by specifying a comma -separated list of ranges and individual page numbers (for example: 1,5,10-15). -In no pages are specified all the pages will be rendered. -.SH OPTIONS -.TP -.B \-o output -The image format is deduced from the output file name. -Embed %d in the name to indicate the page number (for example: "page%d.png"). -.TP -.B \-p password -Use the specified password if the file is encrypted. -.TP -.B \-r resolution -Render the page at the specified resolution. -The default resolution is 72 dpi. -.TP -.B \-R angle -Rotate clockwise by given number of degrees. -.TP -.B \-a -Save the alpha channel. -The default behavior is to render each page with a white background. -With this option, the page background is transparent. -Only supported for pam and png output formats. -.TP -.B \-g -Render in grayscale. -The default is to render a full color RGB image. -If the output format is pgm or ppm this option is ignored. -.TP -.B \-m -Show timing information. -Take the time it takes for each page to render and print -a summary at the end. -.TP -.B \-5 -Print an MD5 checksum of the rendered image data for each page. -.TP -.B \-t -Print the text contents of each page in UTF-8 encoding. -Give the option twice to print detailed information -about the location of each character in XML format. -.TP -.B \-x -Print the display list used to render each page. -.TP -.B \-A -Disable the use of accelerated functions. -.TP -.B \-G gamma -Gamma correct the output image. -Some typical values are 0.7 or 1.4 to thin or darken text rendering. -.TP -.B \-I -Invert the output image colors. -.TP -.B pages -Comma separated list of ranges to render. -.SH SEE ALSO -.BR mupdf (1), -.BR pdfclean (1). -.BR pdfshow (1). -.SH AUTHOR -MuPDF was written by Tor Andersson . -MuPDF is Copyright 2006-2010 Artifex Software, Inc. diff --git a/apps/man/pdfshow.1 b/apps/man/pdfshow.1 deleted file mode 100644 index 7574f297..00000000 --- a/apps/man/pdfshow.1 +++ /dev/null @@ -1,42 +0,0 @@ -.TH PDFSHOW 1 "July 19, 2010" -.\" Please adjust this date whenever revising the manpage. -.SH NAME -pdfshow \- show objects and streams that make up a PDF document -.SH SYNOPSIS -.B pdfshow -.RI [ options ] -.RI file.pdf -.RI [ xref ] -.RI [ trailer ] -.RI [ pages ] -.RI [ grep ] -.RI [ object-number... ] -.SH DESCRIPTION -.B pdfshow -pretty prints the objects and streams specified on the command line. -Streams are decoded and non-printable characters are represented -with a period. -Specify objects with their number. -The special names xref, trailer and pages will -respectively print the cross reference, trailer, -and the object numbers for all pages. -The special name grep will print all objects in the file -in a compact one-line format suitable for piping to grep. -.PP -.SH OPTIONS -.TP -.B \-b -Print streams as binary data and omit the object header. -.TP -.B \-e -Print streams in their original encoded form. -.TP -.B \-p password -Use the specified password if the file is encrypted. -.SH SEE ALSO -.BR mupdf (1), -.BR pdfclean (1). -.BR pdfdraw (1). -.SH AUTHOR -MuPDF was written by Tor Andersson . -MuPDF is copyright 2006-2010 Artifex Software, Inc. diff --git a/apps/mubusy.c b/apps/mubusy.c index 8e215959..0e12d53c 100644 --- a/apps/mubusy.c +++ b/apps/mubusy.c @@ -33,6 +33,19 @@ int main(int argc, char **argv) end++; if ((end-4 >= start) && (end[-4] == '.') && (end[-3] == 'e') && (end[-2] == 'x') && (end[-1] == 'e')) end = end-4; + if (namematch(end, start, "mupdfdraw", 9)) + return pdfdraw_main(argc, argv); + if (namematch(end, start, "mupdfclean", 10)) + return pdfclean_main(argc, argv); + if (namematch(end, start, "mupdfextract", 12)) + return pdfextract_main(argc, argv); + if (namematch(end, start, "mupdfshow", 9)) + return pdfshow_main(argc, argv); + if (namematch(end, start, "mupdfinfo", 9)) + return pdfinfo_main(argc, argv); + if (namematch(end, start, "muxpsdraw", 9)) + return xpsdraw_main(argc, argv); + /* And include old names for backward compatibility */ if (namematch(end, start, "pdfdraw", 7)) return pdfdraw_main(argc, argv); if (namematch(end, start, "pdfclean", 8)) diff --git a/apps/mubusy_pdfclean.c b/apps/mubusy_pdfclean.c index 225f30c8..bc3456d1 100644 --- a/apps/mubusy_pdfclean.c +++ b/apps/mubusy_pdfclean.c @@ -1,2 +1,2 @@ #define MUPDF_COMBINED_EXE -#include "pdfclean.c" +#include "mupdfclean.c" diff --git a/apps/mubusy_pdfdraw.c b/apps/mubusy_pdfdraw.c index 638b0206..aadde1c9 100644 --- a/apps/mubusy_pdfdraw.c +++ b/apps/mubusy_pdfdraw.c @@ -1,2 +1,2 @@ #define MUPDF_COMBINED_EXE -#include "pdfdraw.c" +#include "mupdfdraw.c" diff --git a/apps/mubusy_pdfextract.c b/apps/mubusy_pdfextract.c index aa07f78f..30f661a3 100644 --- a/apps/mubusy_pdfextract.c +++ b/apps/mubusy_pdfextract.c @@ -1,2 +1,2 @@ #define MUPDF_COMBINED_EXE -#include "pdfextract.c" +#include "mupdfextract.c" diff --git a/apps/mubusy_pdfinfo.c b/apps/mubusy_pdfinfo.c index 0c9eaa97..df947543 100644 --- a/apps/mubusy_pdfinfo.c +++ b/apps/mubusy_pdfinfo.c @@ -1,2 +1,2 @@ #define MUPDF_COMBINED_EXE -#include "pdfinfo.c" +#include "mupdfinfo.c" diff --git a/apps/mubusy_pdfshow.c b/apps/mubusy_pdfshow.c index e1a6d573..320b93aa 100644 --- a/apps/mubusy_pdfshow.c +++ b/apps/mubusy_pdfshow.c @@ -1,2 +1,2 @@ #define MUPDF_COMBINED_EXE -#include "pdfshow.c" +#include "mupdfshow.c" diff --git a/apps/mubusy_xpsdraw.c b/apps/mubusy_xpsdraw.c index 76a3f7df..9e2f8b87 100644 --- a/apps/mubusy_xpsdraw.c +++ b/apps/mubusy_xpsdraw.c @@ -1,2 +1,2 @@ #define MUPDF_COMBINED_EXE -#include "xpsdraw.c" +#include "muxpsdraw.c" diff --git a/apps/mupdfclean.c b/apps/mupdfclean.c new file mode 100644 index 00000000..8a163502 --- /dev/null +++ b/apps/mupdfclean.c @@ -0,0 +1,842 @@ +/* + * PDF cleaning tool: general purpose pdf syntax washer. + * + * Rewrite PDF with pretty printed objects. + * Garbage collect unreachable objects. + * Inflate compressed streams. + * Create subset documents. + * + * TODO: linearize document for fast web view + */ + +#include "fitz.h" +#include "mupdf.h" + +static FILE *out = NULL; + +enum +{ + expand_images = 1, + expand_fonts = 2, + expand_all = -1 +}; + +static char *uselist = NULL; +static int *ofslist = NULL; +static int *genlist = NULL; +static int *renumbermap = NULL; + +static int dogarbage = 0; +static int doexpand = 0; +static int doascii = 0; + +static pdf_document *xref = NULL; +static fz_context *ctx = NULL; + +static void usage(void) +{ + fprintf(stderr, + "usage: pdfclean [options] input.pdf [output.pdf] [pages]\n" + "\t-p -\tpassword\n" + "\t-g\tgarbage collect unused objects\n" + "\t-gg\tin addition to -g compact xref table\n" + "\t-ggg\tin addition to -gg merge duplicate objects\n" + "\t-d\tdecompress all streams\n" + "\t-i\ttoggle decompression of image streams\n" + "\t-f\ttoggle decompression of font streams\n" + "\t-a\tascii hex encode binary streams\n" + "\tpages\tcomma separated list of ranges\n"); + exit(1); +} + +/* + * Garbage collect objects not reachable from the trailer. + */ + +static void sweepref(fz_obj *ref); + +static void sweepobj(fz_obj *obj) +{ + int i; + + if (fz_is_indirect(obj)) + sweepref(obj); + + else if (fz_is_dict(obj)) + { + int n = fz_dict_len(obj); + for (i = 0; i < n; i++) + sweepobj(fz_dict_get_val(obj, i)); + } + + else if (fz_is_array(obj)) + { + int n = fz_array_len(obj); + for (i = 0; i < n; i++) + sweepobj(fz_array_get(obj, i)); + } +} + +static void sweepref(fz_obj *obj) +{ + int num = fz_to_num(obj); + int gen = fz_to_gen(obj); + + if (num < 0 || num >= xref->len) + return; + if (uselist[num]) + return; + + uselist[num] = 1; + + /* Bake in /Length in stream objects */ + if (pdf_is_stream(xref, num, gen)) + { + fz_obj *len = fz_dict_gets(obj, "Length"); + if (fz_is_indirect(len)) + { + uselist[fz_to_num(len)] = 0; + len = fz_resolve_indirect(len); + fz_dict_puts(obj, "Length", len); + } + } + + sweepobj(fz_resolve_indirect(obj)); +} + +/* + * Scan for and remove duplicate objects (slow) + */ + +static void removeduplicateobjs(void) +{ + int num, other; + + for (num = 1; num < xref->len; num++) + { + /* Only compare an object to objects preceding it */ + for (other = 1; other < num; other++) + { + fz_obj *a, *b; + + if (num == other || !uselist[num] || !uselist[other]) + continue; + + /* + * Comparing stream objects data contents would take too long. + * + * pdf_is_stream calls pdf_cache_object and ensures + * that the xref table has the objects loaded. + */ + if (pdf_is_stream(xref, num, 0) || pdf_is_stream(xref, other, 0)) + continue; + + a = xref->table[num].obj; + b = xref->table[other].obj; + + a = fz_resolve_indirect(a); + b = fz_resolve_indirect(b); + + if (fz_objcmp(a, b)) + continue; + + /* Keep the lowest numbered object */ + renumbermap[num] = MIN(num, other); + renumbermap[other] = MIN(num, other); + uselist[MAX(num, other)] = 0; + + /* One duplicate was found, do not look for another */ + break; + } + } +} + +/* + * Renumber objects sequentially so the xref is more compact + */ + +static void compactxref(void) +{ + int num, newnum; + + /* + * Update renumbermap in-place, clustering all used + * objects together at low object ids. Objects that + * already should be renumbered will have their new + * object ids be updated to reflect the compaction. + */ + + newnum = 1; + for (num = 1; num < xref->len; num++) + { + if (uselist[num] && renumbermap[num] == num) + renumbermap[num] = newnum++; + else if (renumbermap[num] != num) + renumbermap[num] = renumbermap[renumbermap[num]]; + } +} + +/* + * Update indirect objects according to renumbering established when + * removing duplicate objects and compacting the xref. + */ + +static void renumberobj(fz_obj *obj) +{ + int i; + fz_context *ctx = xref->ctx; + + if (fz_is_dict(obj)) + { + int n = fz_dict_len(obj); + for (i = 0; i < n; i++) + { + fz_obj *key = fz_dict_get_key(obj, i); + fz_obj *val = fz_dict_get_val(obj, i); + if (fz_is_indirect(val)) + { + val = fz_new_indirect(ctx, renumbermap[fz_to_num(val)], 0, xref); + fz_dict_put(obj, key, val); + fz_drop_obj(val); + } + else + { + renumberobj(val); + } + } + } + + else if (fz_is_array(obj)) + { + int n = fz_array_len(obj); + for (i = 0; i < n; i++) + { + fz_obj *val = fz_array_get(obj, i); + if (fz_is_indirect(val)) + { + val = fz_new_indirect(ctx, renumbermap[fz_to_num(val)], 0, xref); + fz_array_put(obj, i, val); + fz_drop_obj(val); + } + else + { + renumberobj(val); + } + } + } +} + +static void renumberobjs(void) +{ + pdf_xref_entry *oldxref; + int newlen; + int num; + + /* Apply renumber map to indirect references in all objects in xref */ + renumberobj(xref->trailer); + for (num = 0; num < xref->len; num++) + { + fz_obj *obj = xref->table[num].obj; + + if (fz_is_indirect(obj)) + { + obj = fz_new_indirect(ctx, renumbermap[fz_to_num(obj)], 0, xref); + pdf_update_object(xref, num, 0, obj); + fz_drop_obj(obj); + } + else + { + renumberobj(obj); + } + } + + /* Create new table for the reordered, compacted xref */ + oldxref = xref->table; + xref->table = fz_malloc_array(xref->ctx, xref->len, sizeof(pdf_xref_entry)); + xref->table[0] = oldxref[0]; + + /* Move used objects into the new compacted xref */ + newlen = 0; + for (num = 1; num < xref->len; num++) + { + if (uselist[num]) + { + if (newlen < renumbermap[num]) + newlen = renumbermap[num]; + xref->table[renumbermap[num]] = oldxref[num]; + } + else + { + if (oldxref[num].obj) + fz_drop_obj(oldxref[num].obj); + } + } + + fz_free(xref->ctx, oldxref); + + /* Update the used objects count in compacted xref */ + xref->len = newlen + 1; + + /* Update list of used objects to fit with compacted xref */ + for (num = 1; num < xref->len; num++) + uselist[num] = 1; +} + +/* + * Recreate page tree to only retain specified pages. + */ + +static void retainpages(int argc, char **argv) +{ + fz_obj *oldroot, *root, *pages, *kids, *countobj, *parent, *olddests; + + /* Keep only pages/type and (reduced) dest entries to avoid + * references to unretained pages */ + oldroot = fz_dict_gets(xref->trailer, "Root"); + pages = fz_dict_gets(oldroot, "Pages"); + olddests = pdf_load_name_tree(xref, "Dests"); + + root = fz_new_dict(ctx, 2); + fz_dict_puts(root, "Type", fz_dict_gets(oldroot, "Type")); + fz_dict_puts(root, "Pages", fz_dict_gets(oldroot, "Pages")); + + pdf_update_object(xref, fz_to_num(oldroot), fz_to_gen(oldroot), root); + + fz_drop_obj(root); + + /* Create a new kids array with only the pages we want to keep */ + parent = fz_new_indirect(ctx, fz_to_num(pages), fz_to_gen(pages), xref); + kids = fz_new_array(ctx, 1); + + /* Retain pages specified */ + while (argc - fz_optind) + { + int page, spage, epage; + char *spec, *dash; + char *pagelist = argv[fz_optind]; + + spec = fz_strsep(&pagelist, ","); + while (spec) + { + dash = strchr(spec, '-'); + + if (dash == spec) + spage = epage = pdf_count_pages(xref); + else + spage = epage = atoi(spec); + + if (dash) + { + if (strlen(dash) > 1) + epage = atoi(dash + 1); + else + epage = pdf_count_pages(xref); + } + + if (spage > epage) + page = spage, spage = epage, epage = page; + + if (spage < 1) + spage = 1; + if (epage > pdf_count_pages(xref)) + epage = pdf_count_pages(xref); + + for (page = spage; page <= epage; page++) + { + fz_obj *pageobj = xref->page_objs[page-1]; + fz_obj *pageref = xref->page_refs[page-1]; + + fz_dict_puts(pageobj, "Parent", parent); + + /* Store page object in new kids array */ + fz_array_push(kids, pageref); + } + + spec = fz_strsep(&pagelist, ","); + } + + fz_optind++; + } + + fz_drop_obj(parent); + + /* Update page count and kids array */ + countobj = fz_new_int(ctx, fz_array_len(kids)); + fz_dict_puts(pages, "Count", countobj); + fz_drop_obj(countobj); + fz_dict_puts(pages, "Kids", kids); + fz_drop_obj(kids); + + /* Also preserve the (partial) Dests name tree */ + if (olddests) + { + int i; + fz_obj *names = fz_new_dict(ctx, 1); + fz_obj *dests = fz_new_dict(ctx, 1); + fz_obj *names_list = fz_new_array(ctx, 32); + + for (i = 0; i < fz_dict_len(olddests); i++) + { + fz_obj *key = fz_dict_get_key(olddests, i); + fz_obj *val = fz_dict_get_val(olddests, i); + fz_obj *key_str = fz_new_string(ctx, fz_to_name(key), strlen(fz_to_name(key))); + fz_obj *dest = fz_dict_gets(val, "D"); + + dest = fz_array_get(dest ? dest : val, 0); + if (fz_array_contains(fz_dict_gets(pages, "Kids"), dest)) + { + fz_array_push(names_list, key_str); + fz_array_push(names_list, val); + } + fz_drop_obj(key_str); + } + + root = fz_dict_gets(xref->trailer, "Root"); + fz_dict_puts(dests, "Names", names_list); + fz_dict_puts(names, "Dests", dests); + fz_dict_puts(root, "Names", names); + + fz_drop_obj(names); + fz_drop_obj(dests); + fz_drop_obj(names_list); + fz_drop_obj(olddests); + } +} + +/* + * Make sure we have loaded objects from object streams. + */ + +static void preloadobjstms(void) +{ + fz_obj *obj; + int num; + + for (num = 0; num < xref->len; num++) + { + if (xref->table[num].type == 'o') + { + obj = pdf_load_object(xref, num, 0); + fz_drop_obj(obj); + } + } +} + +/* + * Save streams and objects to the output + */ + +static inline int isbinary(int c) +{ + if (c == '\n' || c == '\r' || c == '\t') + return 0; + return c < 32 || c > 127; +} + +static int isbinarystream(fz_buffer *buf) +{ + int i; + for (i = 0; i < buf->len; i++) + if (isbinary(buf->data[i])) + return 1; + return 0; +} + +static fz_buffer *hexbuf(unsigned char *p, int n) +{ + static const char hex[16] = "0123456789abcdef"; + fz_buffer *buf; + int x = 0; + + buf = fz_new_buffer(ctx, n * 2 + (n / 32) + 2); + + while (n--) + { + buf->data[buf->len++] = hex[*p >> 4]; + buf->data[buf->len++] = hex[*p & 15]; + if (++x == 32) + { + buf->data[buf->len++] = '\n'; + x = 0; + } + p++; + } + + buf->data[buf->len++] = '>'; + buf->data[buf->len++] = '\n'; + + return buf; +} + +static void addhexfilter(fz_obj *dict) +{ + fz_obj *f, *dp, *newf, *newdp; + fz_obj *ahx, *nullobj; + + ahx = fz_new_name(ctx, "ASCIIHexDecode"); + nullobj = fz_new_null(ctx); + newf = newdp = NULL; + + f = fz_dict_gets(dict, "Filter"); + dp = fz_dict_gets(dict, "DecodeParms"); + + if (fz_is_name(f)) + { + newf = fz_new_array(ctx, 2); + fz_array_push(newf, ahx); + fz_array_push(newf, f); + f = newf; + if (fz_is_dict(dp)) + { + newdp = fz_new_array(ctx, 2); + fz_array_push(newdp, nullobj); + fz_array_push(newdp, dp); + dp = newdp; + } + } + else if (fz_is_array(f)) + { + fz_array_insert(f, ahx); + if (fz_is_array(dp)) + fz_array_insert(dp, nullobj); + } + else + f = ahx; + + fz_dict_puts(dict, "Filter", f); + if (dp) + fz_dict_puts(dict, "DecodeParms", dp); + + fz_drop_obj(ahx); + fz_drop_obj(nullobj); + if (newf) + fz_drop_obj(newf); + if (newdp) + fz_drop_obj(newdp); +} + +static void copystream(fz_obj *obj, int num, int gen) +{ + fz_buffer *buf, *tmp; + fz_obj *newlen; + + buf = pdf_load_raw_stream(xref, num, gen); + + if (doascii && isbinarystream(buf)) + { + tmp = hexbuf(buf->data, buf->len); + fz_drop_buffer(ctx, buf); + buf = tmp; + + addhexfilter(obj); + + newlen = fz_new_int(ctx, buf->len); + fz_dict_puts(obj, "Length", newlen); + fz_drop_obj(newlen); + } + + fprintf(out, "%d %d obj\n", num, gen); + fz_fprint_obj(out, obj, doexpand == 0); + fprintf(out, "stream\n"); + fwrite(buf->data, 1, buf->len, out); + fprintf(out, "endstream\nendobj\n\n"); + + fz_drop_buffer(ctx, buf); +} + +static void expandstream(fz_obj *obj, int num, int gen) +{ + fz_buffer *buf, *tmp; + fz_obj *newlen; + + buf = pdf_load_stream(xref, num, gen); + + fz_dict_dels(obj, "Filter"); + fz_dict_dels(obj, "DecodeParms"); + + if (doascii && isbinarystream(buf)) + { + tmp = hexbuf(buf->data, buf->len); + fz_drop_buffer(ctx, buf); + buf = tmp; + + addhexfilter(obj); + } + + newlen = fz_new_int(ctx, buf->len); + fz_dict_puts(obj, "Length", newlen); + fz_drop_obj(newlen); + + fprintf(out, "%d %d obj\n", num, gen); + fz_fprint_obj(out, obj, doexpand == 0); + fprintf(out, "stream\n"); + fwrite(buf->data, 1, buf->len, out); + fprintf(out, "endstream\nendobj\n\n"); + + fz_drop_buffer(ctx, buf); +} + +static void writeobject(int num, int gen) +{ + fz_obj *obj; + fz_obj *type; + + obj = pdf_load_object(xref, num, gen); + + /* skip ObjStm and XRef objects */ + if (fz_is_dict(obj)) + { + type = fz_dict_gets(obj, "Type"); + if (fz_is_name(type) && !strcmp(fz_to_name(type), "ObjStm")) + { + uselist[num] = 0; + fz_drop_obj(obj); + return; + } + if (fz_is_name(type) && !strcmp(fz_to_name(type), "XRef")) + { + uselist[num] = 0; + fz_drop_obj(obj); + return; + } + } + + if (!pdf_is_stream(xref, num, gen)) + { + fprintf(out, "%d %d obj\n", num, gen); + fz_fprint_obj(out, obj, doexpand == 0); + fprintf(out, "endobj\n\n"); + } + else + { + int dontexpand = 0; + if (doexpand != 0 && doexpand != expand_all) + { + fz_obj *o; + + if ((o = fz_dict_gets(obj, "Type"), !strcmp(fz_to_name(o), "XObject")) && + (o = fz_dict_gets(obj, "Subtype"), !strcmp(fz_to_name(o), "Image"))) + dontexpand = !(doexpand & expand_images); + if (o = fz_dict_gets(obj, "Type"), !strcmp(fz_to_name(o), "Font")) + dontexpand = !(doexpand & expand_fonts); + if (o = fz_dict_gets(obj, "Type"), !strcmp(fz_to_name(o), "FontDescriptor")) + dontexpand = !(doexpand & expand_fonts); + if ((o = fz_dict_gets(obj, "Length1")) != NULL) + dontexpand = !(doexpand & expand_fonts); + if ((o = fz_dict_gets(obj, "Length2")) != NULL) + dontexpand = !(doexpand & expand_fonts); + if ((o = fz_dict_gets(obj, "Length3")) != NULL) + dontexpand = !(doexpand & expand_fonts); + if (o = fz_dict_gets(obj, "Subtype"), !strcmp(fz_to_name(o), "Type1C")) + dontexpand = !(doexpand & expand_fonts); + if (o = fz_dict_gets(obj, "Subtype"), !strcmp(fz_to_name(o), "CIDFontType0C")) + dontexpand = !(doexpand & expand_fonts); + } + if (doexpand && !dontexpand && !pdf_is_jpx_image(ctx, obj)) + expandstream(obj, num, gen); + else + copystream(obj, num, gen); + } + + fz_drop_obj(obj); +} + +static void writexref(void) +{ + fz_obj *trailer; + fz_obj *obj; + int startxref; + int num; + + startxref = ftell(out); + + fprintf(out, "xref\n0 %d\n", xref->len); + for (num = 0; num < xref->len; num++) + { + if (uselist[num]) + fprintf(out, "%010d %05d n \n", ofslist[num], genlist[num]); + else + fprintf(out, "%010d %05d f \n", ofslist[num], genlist[num]); + } + fprintf(out, "\n"); + + trailer = fz_new_dict(ctx, 5); + + obj = fz_new_int(ctx, xref->len); + fz_dict_puts(trailer, "Size", obj); + fz_drop_obj(obj); + + obj = fz_dict_gets(xref->trailer, "Info"); + if (obj) + fz_dict_puts(trailer, "Info", obj); + + obj = fz_dict_gets(xref->trailer, "Root"); + if (obj) + fz_dict_puts(trailer, "Root", obj); + + obj = fz_dict_gets(xref->trailer, "ID"); + if (obj) + fz_dict_puts(trailer, "ID", obj); + + fprintf(out, "trailer\n"); + fz_fprint_obj(out, trailer, doexpand == 0); + fprintf(out, "\n"); + + fz_drop_obj(trailer); + + fprintf(out, "startxref\n%d\n%%%%EOF\n", startxref); +} + +static void writepdf(void) +{ + int lastfree; + int num; + + for (num = 0; num < xref->len; num++) + { + if (xref->table[num].type == 'f') + genlist[num] = xref->table[num].gen; + if (xref->table[num].type == 'n') + genlist[num] = xref->table[num].gen; + if (xref->table[num].type == 'o') + genlist[num] = 0; + + if (dogarbage && !uselist[num]) + continue; + + if (xref->table[num].type == 'n' || xref->table[num].type == 'o') + { + uselist[num] = 1; + ofslist[num] = ftell(out); + writeobject(num, genlist[num]); + } + } + + /* Construct linked list of free object slots */ + lastfree = 0; + for (num = 0; num < xref->len; num++) + { + if (!uselist[num]) + { + genlist[num]++; + ofslist[lastfree] = num; + lastfree = num; + } + } + + writexref(); +} + +#ifdef MUPDF_COMBINED_EXE +int pdfclean_main(int argc, char **argv) +#else +int main(int argc, char **argv) +#endif +{ + char *infile; + char *outfile = "out.pdf"; + char *password = ""; + int c, num; + int subset; + + while ((c = fz_getopt(argc, argv, "adfgip:")) != -1) + { + switch (c) + { + case 'p': password = fz_optarg; break; + case 'g': dogarbage ++; break; + case 'd': doexpand ^= expand_all; break; + case 'f': doexpand ^= expand_fonts; break; + case 'i': doexpand ^= expand_images; break; + case 'a': doascii ++; break; + default: usage(); break; + } + } + + if (argc - fz_optind < 1) + usage(); + + infile = argv[fz_optind++]; + + if (argc - fz_optind > 0 && + (strstr(argv[fz_optind], ".pdf") || strstr(argv[fz_optind], ".PDF"))) + { + outfile = argv[fz_optind++]; + } + + subset = 0; + if (argc - fz_optind > 0) + subset = 1; + + ctx = fz_new_context(NULL, FZ_STORE_UNLIMITED); + if (!ctx) + { + fprintf(stderr, "cannot initialise context\n"); + exit(1); + } + + xref = pdf_open_document(ctx, infile); + if (pdf_needs_password(xref)) + if (!pdf_authenticate_password(xref, password)) + fz_throw(ctx, "cannot authenticate password: %s\n", infile); + + out = fopen(outfile, "wb"); + if (!out) + fz_throw(ctx, "cannot open output file '%s'", outfile); + + fprintf(out, "%%PDF-%d.%d\n", xref->version / 10, xref->version % 10); + fprintf(out, "%%\316\274\341\277\246\n\n"); + + uselist = fz_malloc_array(ctx, xref->len + 1, sizeof(char)); + ofslist = fz_malloc_array(ctx, xref->len + 1, sizeof(int)); + genlist = fz_malloc_array(ctx, xref->len + 1, sizeof(int)); + renumbermap = fz_malloc_array(ctx, xref->len + 1, sizeof(int)); + + for (num = 0; num < xref->len; num++) + { + uselist[num] = 0; + ofslist[num] = 0; + genlist[num] = 0; + renumbermap[num] = num; + } + + /* Make sure any objects hidden in compressed streams have been loaded */ + preloadobjstms(); + + /* Only retain the specified subset of the pages */ + if (subset) + retainpages(argc, argv); + + /* Sweep & mark objects from the trailer */ + if (dogarbage >= 1) + sweepobj(xref->trailer); + + /* Coalesce and renumber duplicate objects */ + if (dogarbage >= 3) + removeduplicateobjs(); + + /* Compact xref by renumbering and removing unused objects */ + if (dogarbage >= 2) + compactxref(); + + /* Make renumbering affect all indirect references and update xref */ + /* Do not renumber objects if encryption is in use, as the object + * numbers are baked into the streams/strings, and we can't currently + * cope with moving them. See bug 692627. */ + if (dogarbage >= 2 && !xref->crypt) + renumberobjs(); + + writepdf(); + + if (fclose(out)) + fz_throw(ctx, "cannot close output file '%s'", outfile); + + fz_free(xref->ctx, uselist); + fz_free(xref->ctx, ofslist); + fz_free(xref->ctx, genlist); + fz_free(xref->ctx, renumbermap); + + pdf_close_document(xref); + fz_free_context(ctx); + return 0; +} diff --git a/apps/mupdfdraw.c b/apps/mupdfdraw.c new file mode 100644 index 00000000..552fe31a --- /dev/null +++ b/apps/mupdfdraw.c @@ -0,0 +1,495 @@ +/* + * pdfdraw -- command line tool for drawing pdf documents + */ + +#include "fitz.h" +#include "mupdf.h" + +#ifdef _MSC_VER +#include +#else +#include +#endif + +static char *output = NULL; +static float resolution = 72; +static float rotation = 0; + +static int showxml = 0; +static int showtext = 0; +static int showtime = 0; +static int showmd5 = 0; +static int showoutline = 0; +static int savealpha = 0; +static int uselist = 1; +static int alphabits = 8; +static float gamma_value = 1; +static int invert = 0; + +static fz_colorspace *colorspace; +static char *filename; + +static struct { + int count, total; + int min, max; + int minpage, maxpage; +} timing; + +static void usage(void) +{ + fprintf(stderr, + "usage: pdfdraw [options] input.pdf [pages]\n" + "\t-o -\toutput filename (%%d for page number)\n" + "\t\tsupported formats: pgm, ppm, pam, png, pbm\n" + "\t-p -\tpassword\n" + "\t-r -\tresolution in dpi (default: 72)\n" + "\t-A\tdisable accelerated functions\n" + "\t-a\tsave alpha channel (only pam and png)\n" + "\t-b -\tnumber of bits of antialiasing (0 to 8)\n" + "\t-g\trender in grayscale\n" + "\t-m\tshow timing information\n" + "\t-t\tshow text (-tt for xml)\n" + "\t-x\tshow display list\n" + "\t-d\tdisable use of display list\n" + "\t-5\tshow md5 checksums\n" + "\t-R -\trotate clockwise by given number of degrees\n" + "\t-G gamma\tgamma correct output\n" + "\t-I\tinvert output\n" + "\t-l\tprint outline\n" + "\tpages\tcomma separated list of ranges\n"); + exit(1); +} + +static int gettime(void) +{ + static struct timeval first; + static int once = 1; + struct timeval now; + if (once) + { + gettimeofday(&first, NULL); + once = 0; + } + gettimeofday(&now, NULL); + return (now.tv_sec - first.tv_sec) * 1000 + (now.tv_usec - first.tv_usec) / 1000; +} + +static int isrange(char *s) +{ + while (*s) + { + if ((*s < '0' || *s > '9') && *s != '-' && *s != ',') + return 0; + s++; + } + return 1; +} + +static void drawpage(pdf_document *doc, int pagenum) +{ + pdf_page *page; + fz_display_list *list = NULL; + fz_device *dev = NULL; + int start; + fz_context *ctx = doc->ctx; + + fz_var(list); + fz_var(dev); + + if (showtime) + { + start = gettime(); + } + + fz_try(ctx) + { + page = pdf_load_page(doc, pagenum - 1); + } + fz_catch(ctx) + { + fz_throw(ctx, "cannot load page %d in file '%s'", pagenum, filename); + } + + if (uselist) + { + fz_try(ctx) + { + list = fz_new_display_list(ctx); + dev = fz_new_list_device(ctx, list); + pdf_run_page(doc, page, dev, fz_identity, NULL); + } + fz_catch(ctx) + { + fz_free_device(dev); + fz_free_display_list(ctx, list); + pdf_free_page(ctx, page); + fz_throw(ctx, "cannot draw page %d in file '%s'", pagenum, filename); + } + fz_free_device(dev); + dev = NULL; + } + + if (showxml) + { + fz_try(ctx) + { + dev = fz_new_trace_device(ctx); + printf("\n", pagenum); + if (list) + fz_execute_display_list(list, dev, fz_identity, fz_infinite_bbox, NULL); + else + pdf_run_page(doc, page, dev, fz_identity, NULL); + printf("\n"); + } + fz_catch(ctx) + { + fz_free_device(dev); + fz_free_display_list(ctx, list); + pdf_free_page(ctx, page); + fz_rethrow(ctx); + } + fz_free_device(dev); + dev = NULL; + } + + if (showtext) + { + fz_text_span *text = NULL; + + fz_var(text); + + fz_try(ctx) + { + text = fz_new_text_span(ctx); + dev = fz_new_text_device(ctx, text); + if (list) + fz_execute_display_list(list, dev, fz_identity, fz_infinite_bbox, NULL); + else + pdf_run_page(doc, page, dev, fz_identity, NULL); + fz_free_device(dev); + dev = NULL; + printf("[Page %d]\n", pagenum); + if (showtext > 1) + fz_debug_text_span_xml(text); + else + fz_debug_text_span(text); + printf("\n"); + } + fz_catch(ctx) + { + fz_free_device(dev); + fz_free_text_span(ctx, text); + fz_free_display_list(ctx, list); + pdf_free_page(ctx, page); + fz_rethrow(ctx); + } + fz_free_text_span(ctx, text); + } + + if (showmd5 || showtime) + printf("page %s %d", filename, pagenum); + + if (output || showmd5 || showtime) + { + float zoom; + fz_matrix ctm; + fz_rect bounds; + fz_bbox bbox; + fz_pixmap *pix = NULL; + + fz_var(pix); + + bounds = pdf_bound_page(doc, page); + zoom = resolution / 72; + ctm = fz_scale(zoom, zoom); + ctm = fz_concat(ctm, fz_rotate(rotation)); + bbox = fz_round_rect(fz_transform_rect(ctm, bounds)); + + /* TODO: banded rendering and multi-page ppm */ + + fz_try(ctx) + { + pix = fz_new_pixmap_with_rect(ctx, colorspace, bbox); + + if (savealpha) + fz_clear_pixmap(pix); + else + fz_clear_pixmap_with_color(pix, 255); + + dev = fz_new_draw_device(ctx, pix); + if (list) + fz_execute_display_list(list, dev, ctm, bbox, NULL); + else + pdf_run_page(doc, page, dev, ctm, NULL); + fz_free_device(dev); + dev = NULL; + + if (invert) + fz_invert_pixmap(pix); + if (gamma_value != 1) + fz_gamma_pixmap(pix, gamma_value); + + if (savealpha) + fz_unmultiply_pixmap(pix); + + if (output) + { + char buf[512]; + sprintf(buf, output, pagenum); + if (strstr(output, ".pgm") || strstr(output, ".ppm") || strstr(output, ".pnm")) + fz_write_pnm(ctx, pix, buf); + else if (strstr(output, ".pam")) + fz_write_pam(ctx, pix, buf, savealpha); + else if (strstr(output, ".png")) + fz_write_png(ctx, pix, buf, savealpha); + else if (strstr(output, ".pbm")) { + fz_halftone *ht = fz_get_default_halftone(ctx, 1); + fz_bitmap *bit = fz_halftone_pixmap(ctx, pix, ht); + fz_write_pbm(ctx, bit, buf); + fz_drop_bitmap(ctx, bit); + fz_drop_halftone(ctx, ht); + } + } + + if (showmd5) + { + fz_md5 md5; + unsigned char digest[16]; + int i; + + fz_md5_init(&md5); + fz_md5_update(&md5, pix->samples, pix->w * pix->h * pix->n); + fz_md5_final(&md5, digest); + + printf(" "); + for (i = 0; i < 16; i++) + printf("%02x", digest[i]); + } + + fz_drop_pixmap(ctx, pix); + } + fz_catch(ctx) + { + fz_free_device(dev); + fz_drop_pixmap(ctx, pix); + fz_free_display_list(ctx, list); + pdf_free_page(ctx, page); + fz_rethrow(ctx); + } + } + + if (list) + fz_free_display_list(ctx, list); + + pdf_free_page(ctx, page); + + if (showtime) + { + int end = gettime(); + int diff = end - start; + + if (diff < timing.min) + { + timing.min = diff; + timing.minpage = pagenum; + } + if (diff > timing.max) + { + timing.max = diff; + timing.maxpage = pagenum; + } + timing.total += diff; + timing.count ++; + + printf(" %dms", diff); + } + + if (showmd5 || showtime) + printf("\n"); + + fz_flush_warnings(ctx); +} + +static void drawrange(pdf_document *doc, char *range) +{ + int page, spage, epage; + char *spec, *dash; + + spec = fz_strsep(&range, ","); + while (spec) + { + dash = strchr(spec, '-'); + + if (dash == spec) + spage = epage = pdf_count_pages(doc); + else + spage = epage = atoi(spec); + + if (dash) + { + if (strlen(dash) > 1) + epage = atoi(dash + 1); + else + epage = pdf_count_pages(doc); + } + + spage = CLAMP(spage, 1, pdf_count_pages(doc)); + epage = CLAMP(epage, 1, pdf_count_pages(doc)); + + if (spage < epage) + for (page = spage; page <= epage; page++) + drawpage(doc, page); + else + for (page = spage; page >= epage; page--) + drawpage(doc, page); + + spec = fz_strsep(&range, ","); + } +} + +static void drawoutline(pdf_document *doc) +{ + fz_outline *outline = pdf_load_outline(doc); + if (showoutline > 1) + fz_debug_outline_xml(outline, 0); + else + fz_debug_outline(outline, 0); + fz_free_outline(outline); +} + +#ifdef MUPDF_COMBINED_EXE +int pdfdraw_main(int argc, char **argv) +#else +int main(int argc, char **argv) +#endif +{ + char *password = ""; + int grayscale = 0; + int accelerate = 1; + pdf_document *doc = NULL; + int c; + fz_context *ctx; + + fz_var(doc); + + while ((c = fz_getopt(argc, argv, "lo:p:r:R:Aab:dgmtx5G:I")) != -1) + { + switch (c) + { + case 'o': output = fz_optarg; break; + case 'p': password = fz_optarg; break; + case 'r': resolution = atof(fz_optarg); break; + case 'R': rotation = atof(fz_optarg); break; + case 'A': accelerate = 0; break; + case 'a': savealpha = 1; break; + case 'b': alphabits = atoi(fz_optarg); break; + case 'l': showoutline++; break; + case 'm': showtime++; break; + case 't': showtext++; break; + case 'x': showxml++; break; + case '5': showmd5++; break; + case 'g': grayscale++; break; + case 'd': uselist = 0; break; + case 'G': gamma_value = atof(fz_optarg); break; + case 'I': invert++; break; + default: usage(); break; + } + } + + if (fz_optind == argc) + usage(); + + if (!showtext && !showxml && !showtime && !showmd5 && !showoutline && !output) + { + printf("nothing to do\n"); + exit(0); + } + + if (accelerate) + fz_accelerate(); + + ctx = fz_new_context(NULL, FZ_STORE_DEFAULT); + if (!ctx) + { + fprintf(stderr, "cannot initialise context\n"); + exit(1); + } + + fz_set_aa_level(ctx, alphabits); + + colorspace = fz_device_rgb; + if (grayscale) + colorspace = fz_device_gray; + if (output && strstr(output, ".pgm")) + colorspace = fz_device_gray; + if (output && strstr(output, ".ppm")) + colorspace = fz_device_rgb; + if (output && strstr(output, ".pbm")) + colorspace = fz_device_gray; + + timing.count = 0; + timing.total = 0; + timing.min = 1 << 30; + timing.max = 0; + timing.minpage = 0; + timing.maxpage = 0; + + if (showxml) + printf("\n"); + + fz_try(ctx) + { + while (fz_optind < argc) + { + filename = argv[fz_optind++]; + + fz_try(ctx) + { + doc = pdf_open_document(ctx, filename); + } + fz_catch(ctx) + { + fz_throw(ctx, "cannot open document: %s", filename); + } + + if (pdf_needs_password(doc)) + if (!pdf_authenticate_password(doc, password)) + fz_throw(ctx, "cannot authenticate password: %s", filename); + + if (showxml) + printf("\n", filename); + + if (showoutline) + drawoutline(doc); + + if (showtext || showxml || showtime || showmd5 || output) + { + if (fz_optind == argc || !isrange(argv[fz_optind])) + drawrange(doc, "1-"); + if (fz_optind < argc && isrange(argv[fz_optind])) + drawrange(doc, argv[fz_optind++]); + } + + if (showxml) + printf("\n"); + + pdf_close_document(doc); + doc = NULL; + } + } + fz_catch(ctx) + { + pdf_close_document(doc); + } + + if (showtime) + { + printf("total %dms / %d pages for an average of %dms\n", + timing.total, timing.count, timing.total / timing.count); + printf("fastest page %d: %dms\n", timing.minpage, timing.min); + printf("slowest page %d: %dms\n", timing.maxpage, timing.max); + } + + fz_free_context(ctx); + return 0; +} diff --git a/apps/mupdfextract.c b/apps/mupdfextract.c new file mode 100644 index 00000000..1407f7f3 --- /dev/null +++ b/apps/mupdfextract.c @@ -0,0 +1,215 @@ +/* + * pdfextract -- the ultimate way to extract images and fonts from pdfs + */ + +#include "fitz.h" +#include "mupdf.h" + +static pdf_document *doc = NULL; +static fz_context *ctx = NULL; +static int dorgb = 0; + +static void usage(void) +{ + fprintf(stderr, "usage: pdfextract [options] file.pdf [object numbers]\n"); + fprintf(stderr, "\t-p\tpassword\n"); + fprintf(stderr, "\t-r\tconvert images to rgb\n"); + exit(1); +} + +static int isimage(fz_obj *obj) +{ + fz_obj *type = fz_dict_gets(obj, "Subtype"); + return fz_is_name(type) && !strcmp(fz_to_name(type), "Image"); +} + +static int isfontdesc(fz_obj *obj) +{ + fz_obj *type = fz_dict_gets(obj, "Type"); + return fz_is_name(type) && !strcmp(fz_to_name(type), "FontDescriptor"); +} + +static void saveimage(int num) +{ + fz_pixmap *img; + fz_obj *ref; + char name[1024]; + + ref = fz_new_indirect(ctx, num, 0, doc); + + /* TODO: detect DCTD and save as jpeg */ + + img = pdf_load_image(doc, ref); + + if (dorgb && img->colorspace && img->colorspace != fz_device_rgb) + { + fz_pixmap *temp; + temp = fz_new_pixmap_with_rect(ctx, fz_device_rgb, fz_bound_pixmap(img)); + fz_convert_pixmap(ctx, img, temp); + fz_drop_pixmap(ctx, img); + img = temp; + } + + if (img->n <= 4) + { + sprintf(name, "img-%04d.png", num); + printf("extracting image %s\n", name); + fz_write_png(ctx, img, name, 0); + } + else + { + sprintf(name, "img-%04d.pam", num); + printf("extracting image %s\n", name); + fz_write_pam(ctx, img, name, 0); + } + + fz_drop_pixmap(ctx, img); + fz_drop_obj(ref); +} + +static void savefont(fz_obj *dict, int num) +{ + char name[1024]; + char *subtype; + fz_buffer *buf; + fz_obj *stream = NULL; + fz_obj *obj; + char *ext = ""; + FILE *f; + char *fontname = "font"; + int n; + + obj = fz_dict_gets(dict, "FontName"); + if (obj) + fontname = fz_to_name(obj); + + obj = fz_dict_gets(dict, "FontFile"); + if (obj) + { + stream = obj; + ext = "pfa"; + } + + obj = fz_dict_gets(dict, "FontFile2"); + if (obj) + { + stream = obj; + ext = "ttf"; + } + + obj = fz_dict_gets(dict, "FontFile3"); + if (obj) + { + stream = obj; + + obj = fz_dict_gets(obj, "Subtype"); + if (obj && !fz_is_name(obj)) + fz_throw(ctx, "Invalid font descriptor subtype"); + + subtype = fz_to_name(obj); + if (!strcmp(subtype, "Type1C")) + ext = "cff"; + else if (!strcmp(subtype, "CIDFontType0C")) + ext = "cid"; + else + fz_throw(ctx, "Unhandled font type '%s'", subtype); + } + + if (!stream) + { + fz_warn(ctx, "Unhandled font type"); + return; + } + + buf = pdf_load_stream(doc, fz_to_num(stream), fz_to_gen(stream)); + + sprintf(name, "%s-%04d.%s", fontname, num, ext); + printf("extracting font %s\n", name); + + f = fopen(name, "wb"); + if (!f) + fz_throw(ctx, "Error creating font file"); + + n = fwrite(buf->data, 1, buf->len, f); + if (n < buf->len) + fz_throw(ctx, "Error writing font file"); + + if (fclose(f) < 0) + fz_throw(ctx, "Error closing font file"); + + fz_drop_buffer(ctx, buf); +} + +static void showobject(int num) +{ + fz_obj *obj; + + if (!doc) + fz_throw(ctx, "no file specified"); + + obj = pdf_load_object(doc, num, 0); + + if (isimage(obj)) + saveimage(num); + else if (isfontdesc(obj)) + savefont(obj, num); + + fz_drop_obj(obj); +} + +#ifdef MUPDF_COMBINED_EXE +int pdfextract_main(int argc, char **argv) +#else +int main(int argc, char **argv) +#endif +{ + char *infile; + char *password = ""; + int c, o; + + while ((c = fz_getopt(argc, argv, "p:r")) != -1) + { + switch (c) + { + case 'p': password = fz_optarg; break; + case 'r': dorgb++; break; + default: usage(); break; + } + } + + if (fz_optind == argc) + usage(); + + infile = argv[fz_optind++]; + + ctx = fz_new_context(NULL, FZ_STORE_UNLIMITED); + if (!ctx) + { + fprintf(stderr, "cannot initialise context\n"); + exit(1); + } + + doc = pdf_open_document(ctx, infile); + if (pdf_needs_password(doc)) + if (!pdf_authenticate_password(doc, password)) + fz_throw(ctx, "cannot authenticate password: %s\n", infile); + + if (fz_optind == argc) + { + for (o = 0; o < doc->len; o++) + showobject(o); + } + else + { + while (fz_optind < argc) + { + showobject(atoi(argv[fz_optind])); + fz_optind++; + } + } + + pdf_close_document(doc); + fz_flush_warnings(ctx); + fz_free_context(ctx); + return 0; +} diff --git a/apps/mupdfinfo.c b/apps/mupdfinfo.c new file mode 100644 index 00000000..c6c6b35c --- /dev/null +++ b/apps/mupdfinfo.c @@ -0,0 +1,1020 @@ +/* + * Information tool. + * Print information about the input pdf. + */ + +#include "fitz.h" +#include "mupdf.h" + +pdf_document *xref; +fz_context *ctx; +int pagecount; + +void closexref(void); + +void openxref(char *filename, char *password, int dieonbadpass, int loadpages); + +enum +{ + DIMENSIONS = 0x01, + FONTS = 0x02, + IMAGES = 0x04, + SHADINGS = 0x08, + PATTERNS = 0x10, + XOBJS = 0x20, + ALL = DIMENSIONS | FONTS | IMAGES | SHADINGS | PATTERNS | XOBJS +}; + +struct info +{ + int page; + fz_obj *pageref; + fz_obj *pageobj; + union { + struct { + fz_obj *obj; + } info; + struct { + fz_obj *obj; + } crypt; + struct { + fz_obj *obj; + fz_rect *bbox; + } dim; + struct { + fz_obj *obj; + fz_obj *subtype; + fz_obj *name; + } font; + struct { + fz_obj *obj; + fz_obj *width; + fz_obj *height; + fz_obj *bpc; + fz_obj *filter; + fz_obj *cs; + fz_obj *altcs; + } image; + struct { + fz_obj *obj; + fz_obj *type; + } shading; + struct { + fz_obj *obj; + fz_obj *type; + fz_obj *paint; + fz_obj *tiling; + fz_obj *shading; + } pattern; + struct { + fz_obj *obj; + fz_obj *groupsubtype; + fz_obj *reference; + } form; + } u; +}; + +static struct info *dim = NULL; +static int dims = 0; +static struct info *font = NULL; +static int fonts = 0; +static struct info *image = NULL; +static int images = 0; +static struct info *shading = NULL; +static int shadings = 0; +static struct info *pattern = NULL; +static int patterns = 0; +static struct info *form = NULL; +static int forms = 0; +static struct info *psobj = NULL; +static int psobjs = 0; + +void closexref(void) +{ + int i; + if (xref) + { + pdf_close_document(xref); + xref = NULL; + } + + if (dim) + { + for (i = 0; i < dims; i++) + fz_free(ctx, dim[i].u.dim.bbox); + fz_free(ctx, dim); + dim = NULL; + dims = 0; + } + + if (font) + { + fz_free(ctx, font); + font = NULL; + fonts = 0; + } + + if (image) + { + fz_free(ctx, image); + image = NULL; + images = 0; + } + + if (shading) + { + fz_free(ctx, shading); + shading = NULL; + shadings = 0; + } + + if (pattern) + { + fz_free(ctx, pattern); + pattern = NULL; + patterns = 0; + } + + if (form) + { + fz_free(ctx, form); + form = NULL; + forms = 0; + } + + if (psobj) + { + fz_free(ctx, psobj); + psobj = NULL; + psobjs = 0; + } +} + +static void +infousage(void) +{ + fprintf(stderr, + "usage: pdfinfo [options] [file.pdf ... ]\n" + "\t-d -\tpassword for decryption\n" + "\t-f\tlist fonts\n" + "\t-i\tlist images\n" + "\t-m\tlist dimensions\n" + "\t-p\tlist patterns\n" + "\t-s\tlist shadings\n" + "\t-x\tlist form and postscript xobjects\n"); + exit(1); +} + +static void +showglobalinfo(void) +{ + fz_obj *obj; + + printf("\nPDF-%d.%d\n", xref->version / 10, xref->version % 10); + + obj = fz_dict_gets(xref->trailer, "Info"); + if (obj) + { + printf("Info object (%d %d R):\n", fz_to_num(obj), fz_to_gen(obj)); + fz_debug_obj(fz_resolve_indirect(obj)); + } + + obj = fz_dict_gets(xref->trailer, "Encrypt"); + if (obj) + { + printf("\nEncryption object (%d %d R):\n", fz_to_num(obj), fz_to_gen(obj)); + fz_debug_obj(fz_resolve_indirect(obj)); + } + + printf("\nPages: %d\n\n", pagecount); +} + +static void +gatherdimensions(int page, fz_obj *pageref, fz_obj *pageobj) +{ + fz_rect bbox; + fz_obj *obj; + int j; + + obj = fz_dict_gets(pageobj, "MediaBox"); + if (!fz_is_array(obj)) + return; + + bbox = pdf_to_rect(ctx, obj); + + for (j = 0; j < dims; j++) + if (!memcmp(dim[j].u.dim.bbox, &bbox, sizeof (fz_rect))) + break; + + if (j < dims) + return; + + dims++; + + dim = fz_resize_array(ctx, dim, dims, sizeof(struct info)); + dim[dims - 1].page = page; + dim[dims - 1].pageref = pageref; + dim[dims - 1].pageobj = pageobj; + dim[dims - 1].u.dim.bbox = fz_malloc(ctx, sizeof(fz_rect)); + memcpy(dim[dims - 1].u.dim.bbox, &bbox, sizeof (fz_rect)); + + return; +} + +static void +gatherfonts(int page, fz_obj *pageref, fz_obj *pageobj, fz_obj *dict) +{ + int i, n; + + n = fz_dict_len(dict); + for (i = 0; i < n; i++) + { + fz_obj *fontdict = NULL; + fz_obj *subtype = NULL; + fz_obj *basefont = NULL; + fz_obj *name = NULL; + int k; + + fontdict = fz_dict_get_val(dict, i); + if (!fz_is_dict(fontdict)) + { + fz_warn(ctx, "not a font dict (%d %d R)", fz_to_num(fontdict), fz_to_gen(fontdict)); + continue; + } + + subtype = fz_dict_gets(fontdict, "Subtype"); + basefont = fz_dict_gets(fontdict, "BaseFont"); + if (!basefont || fz_is_null(basefont)) + name = fz_dict_gets(fontdict, "Name"); + + for (k = 0; k < fonts; k++) + if (!fz_objcmp(font[k].u.font.obj, fontdict)) + break; + + if (k < fonts) + continue; + + fonts++; + + font = fz_resize_array(ctx, font, fonts, sizeof(struct info)); + font[fonts - 1].page = page; + font[fonts - 1].pageref = pageref; + font[fonts - 1].pageobj = pageobj; + font[fonts - 1].u.font.obj = fontdict; + font[fonts - 1].u.font.subtype = subtype; + font[fonts - 1].u.font.name = basefont ? basefont : name; + } +} + +static void +gatherimages(int page, fz_obj *pageref, fz_obj *pageobj, fz_obj *dict) +{ + int i, n; + + n = fz_dict_len(dict); + for (i = 0; i < n; i++) + { + fz_obj *imagedict; + fz_obj *type; + fz_obj *width; + fz_obj *height; + fz_obj *bpc = NULL; + fz_obj *filter = NULL; + fz_obj *cs = NULL; + fz_obj *altcs; + int k; + + imagedict = fz_dict_get_val(dict, i); + if (!fz_is_dict(imagedict)) + { + fz_warn(ctx, "not an image dict (%d %d R)", fz_to_num(imagedict), fz_to_gen(imagedict)); + continue; + } + + type = fz_dict_gets(imagedict, "Subtype"); + if (strcmp(fz_to_name(type), "Image")) + continue; + + filter = fz_dict_gets(imagedict, "Filter"); + + altcs = NULL; + cs = fz_dict_gets(imagedict, "ColorSpace"); + if (fz_is_array(cs)) + { + fz_obj *cses = cs; + + cs = fz_array_get(cses, 0); + if (fz_is_name(cs) && (!strcmp(fz_to_name(cs), "DeviceN") || !strcmp(fz_to_name(cs), "Separation"))) + { + altcs = fz_array_get(cses, 2); + if (fz_is_array(altcs)) + altcs = fz_array_get(altcs, 0); + } + } + + width = fz_dict_gets(imagedict, "Width"); + height = fz_dict_gets(imagedict, "Height"); + bpc = fz_dict_gets(imagedict, "BitsPerComponent"); + + for (k = 0; k < images; k++) + if (!fz_objcmp(image[k].u.image.obj, imagedict)) + break; + + if (k < images) + continue; + + images++; + + image = fz_resize_array(ctx, image, images, sizeof(struct info)); + image[images - 1].page = page; + image[images - 1].pageref = pageref; + image[images - 1].pageobj = pageobj; + image[images - 1].u.image.obj = imagedict; + image[images - 1].u.image.width = width; + image[images - 1].u.image.height = height; + image[images - 1].u.image.bpc = bpc; + image[images - 1].u.image.filter = filter; + image[images - 1].u.image.cs = cs; + image[images - 1].u.image.altcs = altcs; + } +} + +static void +gatherforms(int page, fz_obj *pageref, fz_obj *pageobj, fz_obj *dict) +{ + int i, n; + + n = fz_dict_len(dict); + for (i = 0; i < n; i++) + { + fz_obj *xobjdict; + fz_obj *type; + fz_obj *subtype; + fz_obj *group; + fz_obj *groupsubtype; + fz_obj *reference; + int k; + + xobjdict = fz_dict_get_val(dict, i); + if (!fz_is_dict(xobjdict)) + { + fz_warn(ctx, "not a xobject dict (%d %d R)", fz_to_num(xobjdict), fz_to_gen(xobjdict)); + continue; + } + + type = fz_dict_gets(xobjdict, "Subtype"); + if (strcmp(fz_to_name(type), "Form")) + continue; + + subtype = fz_dict_gets(xobjdict, "Subtype2"); + if (!strcmp(fz_to_name(subtype), "PS")) + continue; + + group = fz_dict_gets(xobjdict, "Group"); + groupsubtype = fz_dict_gets(group, "S"); + reference = fz_dict_gets(xobjdict, "Ref"); + + for (k = 0; k < forms; k++) + if (!fz_objcmp(form[k].u.form.obj, xobjdict)) + break; + + if (k < forms) + continue; + + forms++; + + form = fz_resize_array(ctx, form, forms, sizeof(struct info)); + form[forms - 1].page = page; + form[forms - 1].pageref = pageref; + form[forms - 1].pageobj = pageobj; + form[forms - 1].u.form.obj = xobjdict; + form[forms - 1].u.form.groupsubtype = groupsubtype; + form[forms - 1].u.form.reference = reference; + } +} + +static void +gatherpsobjs(int page, fz_obj *pageref, fz_obj *pageobj, fz_obj *dict) +{ + int i, n; + + n = fz_dict_len(dict); + for (i = 0; i < n; i++) + { + fz_obj *xobjdict; + fz_obj *type; + fz_obj *subtype; + int k; + + xobjdict = fz_dict_get_val(dict, i); + if (!fz_is_dict(xobjdict)) + { + fz_warn(ctx, "not a xobject dict (%d %d R)", fz_to_num(xobjdict), fz_to_gen(xobjdict)); + continue; + } + + type = fz_dict_gets(xobjdict, "Subtype"); + subtype = fz_dict_gets(xobjdict, "Subtype2"); + if (strcmp(fz_to_name(type), "PS") && + (strcmp(fz_to_name(type), "Form") || strcmp(fz_to_name(subtype), "PS"))) + continue; + + for (k = 0; k < psobjs; k++) + if (!fz_objcmp(psobj[k].u.form.obj, xobjdict)) + break; + + if (k < psobjs) + continue; + + psobjs++; + + psobj = fz_resize_array(ctx, psobj, psobjs, sizeof(struct info)); + psobj[psobjs - 1].page = page; + psobj[psobjs - 1].pageref = pageref; + psobj[psobjs - 1].pageobj = pageobj; + psobj[psobjs - 1].u.form.obj = xobjdict; + } +} + +static void +gathershadings(int page, fz_obj *pageref, fz_obj *pageobj, fz_obj *dict) +{ + int i, n; + + n = fz_dict_len(dict); + for (i = 0; i < n; i++) + { + fz_obj *shade; + fz_obj *type; + int k; + + shade = fz_dict_get_val(dict, i); + if (!fz_is_dict(shade)) + { + fz_warn(ctx, "not a shading dict (%d %d R)", fz_to_num(shade), fz_to_gen(shade)); + continue; + } + + type = fz_dict_gets(shade, "ShadingType"); + if (!fz_is_int(type) || fz_to_int(type) < 1 || fz_to_int(type) > 7) + { + fz_warn(ctx, "not a shading type (%d %d R)", fz_to_num(shade), fz_to_gen(shade)); + type = NULL; + } + + for (k = 0; k < shadings; k++) + if (!fz_objcmp(shading[k].u.shading.obj, shade)) + break; + + if (k < shadings) + continue; + + shadings++; + + shading = fz_resize_array(ctx, shading, shadings, sizeof(struct info)); + shading[shadings - 1].page = page; + shading[shadings - 1].pageref = pageref; + shading[shadings - 1].pageobj = pageobj; + shading[shadings - 1].u.shading.obj = shade; + shading[shadings - 1].u.shading.type = type; + } +} + +static void +gatherpatterns(int page, fz_obj *pageref, fz_obj *pageobj, fz_obj *dict) +{ + int i, n; + + n = fz_dict_len(dict); + for (i = 0; i < n; i++) + { + fz_obj *patterndict; + fz_obj *type; + fz_obj *paint = NULL; + fz_obj *tiling = NULL; + fz_obj *shading = NULL; + int k; + + patterndict = fz_dict_get_val(dict, i); + if (!fz_is_dict(patterndict)) + { + fz_warn(ctx, "not a pattern dict (%d %d R)", fz_to_num(patterndict), fz_to_gen(patterndict)); + continue; + } + + type = fz_dict_gets(patterndict, "PatternType"); + if (!fz_is_int(type) || fz_to_int(type) < 1 || fz_to_int(type) > 2) + { + fz_warn(ctx, "not a pattern type (%d %d R)", fz_to_num(patterndict), fz_to_gen(patterndict)); + type = NULL; + } + + if (fz_to_int(type) == 1) + { + paint = fz_dict_gets(patterndict, "PaintType"); + if (!fz_is_int(paint) || fz_to_int(paint) < 1 || fz_to_int(paint) > 2) + { + fz_warn(ctx, "not a pattern paint type (%d %d R)", fz_to_num(patterndict), fz_to_gen(patterndict)); + paint = NULL; + } + + tiling = fz_dict_gets(patterndict, "TilingType"); + if (!fz_is_int(tiling) || fz_to_int(tiling) < 1 || fz_to_int(tiling) > 3) + { + fz_warn(ctx, "not a pattern tiling type (%d %d R)", fz_to_num(patterndict), fz_to_gen(patterndict)); + tiling = NULL; + } + } + else + { + shading = fz_dict_gets(patterndict, "Shading"); + } + + for (k = 0; k < patterns; k++) + if (!fz_objcmp(pattern[k].u.pattern.obj, patterndict)) + break; + + if (k < patterns) + continue; + + patterns++; + + pattern = fz_resize_array(ctx, pattern, patterns, sizeof(struct info)); + pattern[patterns - 1].page = page; + pattern[patterns - 1].pageref = pageref; + pattern[patterns - 1].pageobj = pageobj; + pattern[patterns - 1].u.pattern.obj = patterndict; + pattern[patterns - 1].u.pattern.type = type; + pattern[patterns - 1].u.pattern.paint = paint; + pattern[patterns - 1].u.pattern.tiling = tiling; + pattern[patterns - 1].u.pattern.shading = shading; + } +} + +static void +gatherresourceinfo(int page, fz_obj *rsrc) +{ + fz_obj *pageobj; + fz_obj *pageref; + fz_obj *font; + fz_obj *xobj; + fz_obj *shade; + fz_obj *pattern; + fz_obj *subrsrc; + int i; + + pageobj = xref->page_objs[page-1]; + pageref = xref->page_refs[page-1]; + + if (!pageobj) + fz_throw(ctx, "cannot retrieve info from page %d", page); + + font = fz_dict_gets(rsrc, "Font"); + if (font) + { + int n; + + gatherfonts(page, pageref, pageobj, font); + n = fz_dict_len(font); + for (i = 0; i < n; i++) + { + fz_obj *obj = fz_dict_get_val(font, i); + + subrsrc = fz_dict_gets(obj, "Resources"); + if (subrsrc && fz_objcmp(rsrc, subrsrc)) + gatherresourceinfo(page, subrsrc); + } + } + + xobj = fz_dict_gets(rsrc, "XObject"); + if (xobj) + { + int n; + + gatherimages(page, pageref, pageobj, xobj); + gatherforms(page, pageref, pageobj, xobj); + gatherpsobjs(page, pageref, pageobj, xobj); + n = fz_dict_len(xobj); + for (i = 0; i < n; i++) + { + fz_obj *obj = fz_dict_get_val(xobj, i); + subrsrc = fz_dict_gets(obj, "Resources"); + if (subrsrc && fz_objcmp(rsrc, subrsrc)) + gatherresourceinfo(page, subrsrc); + } + } + + shade = fz_dict_gets(rsrc, "Shading"); + if (shade) + gathershadings(page, pageref, pageobj, shade); + + pattern = fz_dict_gets(rsrc, "Pattern"); + if (pattern) + { + int n; + gatherpatterns(page, pageref, pageobj, pattern); + n = fz_dict_len(pattern); + for (i = 0; i < n; i++) + { + fz_obj *obj = fz_dict_get_val(pattern, i); + subrsrc = fz_dict_gets(obj, "Resources"); + if (subrsrc && fz_objcmp(rsrc, subrsrc)) + gatherresourceinfo(page, subrsrc); + } + } +} + +static void +gatherpageinfo(int page) +{ + fz_obj *pageobj; + fz_obj *pageref; + fz_obj *rsrc; + + pageobj = xref->page_objs[page-1]; + pageref = xref->page_refs[page-1]; + + if (!pageobj) + fz_throw(ctx, "cannot retrieve info from page %d", page); + + gatherdimensions(page, pageref, pageobj); + + rsrc = fz_dict_gets(pageobj, "Resources"); + gatherresourceinfo(page, rsrc); +} + +static void +printinfo(char *filename, int show, int page) +{ + int i; + int j; + +#define PAGE_FMT "\t% 5d (% 7d %1d R): " + + if (show & DIMENSIONS && dims > 0) + { + printf("Mediaboxes (%d):\n", dims); + for (i = 0; i < dims; i++) + { + printf(PAGE_FMT "[ %g %g %g %g ]\n", + dim[i].page, + fz_to_num(dim[i].pageref), fz_to_gen(dim[i].pageref), + dim[i].u.dim.bbox->x0, + dim[i].u.dim.bbox->y0, + dim[i].u.dim.bbox->x1, + dim[i].u.dim.bbox->y1); + } + printf("\n"); + } + + if (show & FONTS && fonts > 0) + { + printf("Fonts (%d):\n", fonts); + for (i = 0; i < fonts; i++) + { + printf(PAGE_FMT "%s '%s' (%d %d R)\n", + font[i].page, + fz_to_num(font[i].pageref), fz_to_gen(font[i].pageref), + fz_to_name(font[i].u.font.subtype), + fz_to_name(font[i].u.font.name), + fz_to_num(font[i].u.font.obj), fz_to_gen(font[i].u.font.obj)); + } + printf("\n"); + } + + if (show & IMAGES && images > 0) + { + printf("Images (%d):\n", images); + for (i = 0; i < images; i++) + { + char *cs = NULL; + char *altcs = NULL; + + printf(PAGE_FMT "[ ", + image[i].page, + fz_to_num(image[i].pageref), fz_to_gen(image[i].pageref)); + + if (fz_is_array(image[i].u.image.filter)) + { + int n = fz_array_len(image[i].u.image.filter); + for (j = 0; j < n; j++) + { + fz_obj *obj = fz_array_get(image[i].u.image.filter, j); + char *filter = fz_strdup(ctx, fz_to_name(obj)); + + if (strstr(filter, "Decode")) + *(strstr(filter, "Decode")) = '\0'; + + printf("%s%s", + filter, + j == fz_array_len(image[i].u.image.filter) - 1 ? "" : " "); + fz_free(ctx, filter); + } + } + else if (image[i].u.image.filter) + { + fz_obj *obj = image[i].u.image.filter; + char *filter = fz_strdup(ctx, fz_to_name(obj)); + + if (strstr(filter, "Decode")) + *(strstr(filter, "Decode")) = '\0'; + + printf("%s", filter); + fz_free(ctx, filter); + } + else + printf("Raw"); + + if (image[i].u.image.cs) + { + cs = fz_strdup(ctx, fz_to_name(image[i].u.image.cs)); + + if (!strncmp(cs, "Device", 6)) + { + int len = strlen(cs + 6); + memmove(cs + 3, cs + 6, len + 1); + cs[3 + len + 1] = '\0'; + } + if (strstr(cs, "ICC")) + fz_strlcpy(cs, "ICC", 4); + if (strstr(cs, "Indexed")) + fz_strlcpy(cs, "Idx", 4); + if (strstr(cs, "Pattern")) + fz_strlcpy(cs, "Pat", 4); + if (strstr(cs, "Separation")) + fz_strlcpy(cs, "Sep", 4); + } + if (image[i].u.image.altcs) + { + altcs = fz_strdup(ctx, fz_to_name(image[i].u.image.altcs)); + + if (!strncmp(altcs, "Device", 6)) + { + int len = strlen(altcs + 6); + memmove(altcs + 3, altcs + 6, len + 1); + altcs[3 + len + 1] = '\0'; + } + if (strstr(altcs, "ICC")) + fz_strlcpy(altcs, "ICC", 4); + if (strstr(altcs, "Indexed")) + fz_strlcpy(altcs, "Idx", 4); + if (strstr(altcs, "Pattern")) + fz_strlcpy(altcs, "Pat", 4); + if (strstr(altcs, "Separation")) + fz_strlcpy(altcs, "Sep", 4); + } + + printf(" ] %dx%d %dbpc %s%s%s (%d %d R)\n", + fz_to_int(image[i].u.image.width), + fz_to_int(image[i].u.image.height), + image[i].u.image.bpc ? fz_to_int(image[i].u.image.bpc) : 1, + image[i].u.image.cs ? cs : "ImageMask", + image[i].u.image.altcs ? " " : "", + image[i].u.image.altcs ? altcs : "", + fz_to_num(image[i].u.image.obj), fz_to_gen(image[i].u.image.obj)); + + fz_free(ctx, cs); + fz_free(ctx, altcs); + } + printf("\n"); + } + + if (show & SHADINGS && shadings > 0) + { + printf("Shading patterns (%d):\n", shadings); + for (i = 0; i < shadings; i++) + { + char *shadingtype[] = + { + "", + "Function", + "Axial", + "Radial", + "Triangle mesh", + "Lattice", + "Coons patch", + "Tensor patch", + }; + + printf(PAGE_FMT "%s (%d %d R)\n", + shading[i].page, + fz_to_num(shading[i].pageref), fz_to_gen(shading[i].pageref), + shadingtype[fz_to_int(shading[i].u.shading.type)], + fz_to_num(shading[i].u.shading.obj), fz_to_gen(shading[i].u.shading.obj)); + } + printf("\n"); + } + + if (show & PATTERNS && patterns > 0) + { + printf("Patterns (%d):\n", patterns); + for (i = 0; i < patterns; i++) + { + if (fz_to_int(pattern[i].u.pattern.type) == 1) + { + char *painttype[] = + { + "", + "Colored", + "Uncolored", + }; + char *tilingtype[] = + { + "", + "Constant", + "No distortion", + "Constant/fast tiling", + }; + + printf(PAGE_FMT "Tiling %s %s (%d %d R)\n", + pattern[i].page, + fz_to_num(pattern[i].pageref), fz_to_gen(pattern[i].pageref), + painttype[fz_to_int(pattern[i].u.pattern.paint)], + tilingtype[fz_to_int(pattern[i].u.pattern.tiling)], + fz_to_num(pattern[i].u.pattern.obj), fz_to_gen(pattern[i].u.pattern.obj)); + } + else + { + printf(PAGE_FMT "Shading %d %d R (%d %d R)\n", + pattern[i].page, + fz_to_num(pattern[i].pageref), fz_to_gen(pattern[i].pageref), + fz_to_num(pattern[i].u.pattern.shading), fz_to_gen(pattern[i].u.pattern.shading), + fz_to_num(pattern[i].u.pattern.obj), fz_to_gen(pattern[i].u.pattern.obj)); + } + } + printf("\n"); + } + + if (show & XOBJS && forms > 0) + { + printf("Form xobjects (%d):\n", forms); + for (i = 0; i < forms; i++) + { + printf(PAGE_FMT "Form%s%s%s%s (%d %d R)\n", + form[i].page, + fz_to_num(form[i].pageref), fz_to_gen(form[i].pageref), + form[i].u.form.groupsubtype ? " " : "", + form[i].u.form.groupsubtype ? fz_to_name(form[i].u.form.groupsubtype) : "", + form[i].u.form.groupsubtype ? " Group" : "", + form[i].u.form.reference ? " Reference" : "", + fz_to_num(form[i].u.form.obj), fz_to_gen(form[i].u.form.obj)); + } + printf("\n"); + } + + if (show & XOBJS && psobjs > 0) + { + printf("Postscript xobjects (%d):\n", psobjs); + for (i = 0; i < psobjs; i++) + { + printf(PAGE_FMT "(%d %d R)\n", + psobj[i].page, + fz_to_num(psobj[i].pageref), fz_to_gen(psobj[i].pageref), + fz_to_num(psobj[i].u.form.obj), fz_to_gen(psobj[i].u.form.obj)); + } + printf("\n"); + } +} + +static void +showinfo(char *filename, int show, char *pagelist) +{ + int page, spage, epage; + char *spec, *dash; + int allpages; + + if (!xref) + infousage(); + + allpages = !strcmp(pagelist, "1-"); + + spec = fz_strsep(&pagelist, ","); + while (spec) + { + dash = strchr(spec, '-'); + + if (dash == spec) + spage = epage = pagecount; + else + spage = epage = atoi(spec); + + if (dash) + { + if (strlen(dash) > 1) + epage = atoi(dash + 1); + else + epage = pagecount; + } + + if (spage > epage) + page = spage, spage = epage, epage = page; + + if (spage < 1) + spage = 1; + if (epage > pagecount) + epage = pagecount; + if (spage > pagecount) + spage = pagecount; + + if (allpages) + printf("Retrieving info from pages %d-%d...\n", spage, epage); + if (spage >= 1) + { + for (page = spage; page <= epage; page++) + { + gatherpageinfo(page); + if (!allpages) + { + printf("Page %d:\n", page); + printinfo(filename, show, page); + printf("\n"); + } + } + } + + spec = fz_strsep(&pagelist, ","); + } + + if (allpages) + printinfo(filename, show, -1); +} + +#ifdef MUPDF_COMBINED_EXE +int pdfinfo_main(int argc, char **argv) +#else +int main(int argc, char **argv) +#endif +{ + enum { NO_FILE_OPENED, NO_INFO_GATHERED, INFO_SHOWN } state; + char *filename = ""; + char *password = ""; + int show = ALL; + int c; + + while ((c = fz_getopt(argc, argv, "mfispxd:")) != -1) + { + switch (c) + { + case 'm': if (show == ALL) show = DIMENSIONS; else show |= DIMENSIONS; break; + case 'f': if (show == ALL) show = FONTS; else show |= FONTS; break; + case 'i': if (show == ALL) show = IMAGES; else show |= IMAGES; break; + case 's': if (show == ALL) show = SHADINGS; else show |= SHADINGS; break; + case 'p': if (show == ALL) show = PATTERNS; else show |= PATTERNS; break; + case 'x': if (show == ALL) show = XOBJS; else show |= XOBJS; break; + case 'd': password = fz_optarg; break; + default: + infousage(); + break; + } + } + + if (fz_optind == argc) + infousage(); + + ctx = fz_new_context(NULL, FZ_STORE_UNLIMITED); + if (!ctx) + { + fprintf(stderr, "cannot initialise context\n"); + exit(1); + } + + state = NO_FILE_OPENED; + while (fz_optind < argc) + { + if (strstr(argv[fz_optind], ".pdf") || strstr(argv[fz_optind], ".PDF")) + { + if (state == NO_INFO_GATHERED) + { + showinfo(filename, show, "1-"); + closexref(); + } + + closexref(); + + filename = argv[fz_optind]; + printf("%s:\n", filename); + xref = pdf_open_document(ctx, filename); + if (pdf_needs_password(xref)) + if (!pdf_authenticate_password(xref, password)) + fz_throw(ctx, "cannot authenticate password: %s\n", filename); + pagecount = pdf_count_pages(xref); + + showglobalinfo(); + state = NO_INFO_GATHERED; + } + else + { + showinfo(filename, show, argv[fz_optind]); + state = INFO_SHOWN; + } + + fz_optind++; + } + + if (state == NO_INFO_GATHERED) + showinfo(filename, show, "1-"); + + closexref(); + fz_free_context(ctx); + return 0; +} diff --git a/apps/mupdfshow.c b/apps/mupdfshow.c new file mode 100644 index 00000000..53578fd7 --- /dev/null +++ b/apps/mupdfshow.c @@ -0,0 +1,239 @@ +/* + * pdfshow -- the ultimate pdf debugging tool + */ + +#include "fitz.h" +#include "mupdf.h" + +static pdf_document *doc = NULL; +static fz_context *ctx = NULL; +static int showbinary = 0; +static int showdecode = 1; +static int showcolumn; + +static void usage(void) +{ + fprintf(stderr, "usage: pdfshow [options] file.pdf [grepable] [xref] [trailer] [pagetree] [object numbers]\n"); + fprintf(stderr, "\t-b\tprint streams as binary data\n"); + fprintf(stderr, "\t-e\tprint encoded streams (don't decode)\n"); + fprintf(stderr, "\t-p\tpassword\n"); + exit(1); +} + +static void showtrailer(void) +{ + if (!doc) + fz_throw(ctx, "no file specified"); + printf("trailer\n"); + fz_debug_obj(doc->trailer); + printf("\n"); +} + +static void showxref(void) +{ + if (!doc) + fz_throw(ctx, "no file specified"); + pdf_debug_xref(doc); + printf("\n"); +} + +static void showpagetree(void) +{ + fz_obj *ref; + int count; + int i; + + if (!doc) + fz_throw(ctx, "no file specified"); + + count = pdf_count_pages(doc); + for (i = 0; i < count; i++) + { + ref = doc->page_refs[i]; + printf("page %d = %d %d R\n", i + 1, fz_to_num(ref), fz_to_gen(ref)); + } + printf("\n"); +} + +static void showsafe(unsigned char *buf, int n) +{ + int i; + for (i = 0; i < n; i++) { + if (buf[i] == '\r' || buf[i] == '\n') { + putchar('\n'); + showcolumn = 0; + } + else if (buf[i] < 32 || buf[i] > 126) { + putchar('.'); + showcolumn ++; + } + else { + putchar(buf[i]); + showcolumn ++; + } + if (showcolumn == 79) { + putchar('\n'); + showcolumn = 0; + } + } +} + +static void showstream(int num, int gen) +{ + fz_stream *stm; + unsigned char buf[2048]; + int n; + + showcolumn = 0; + + if (showdecode) + stm = pdf_open_stream(doc, num, gen); + else + stm = pdf_open_raw_stream(doc, num, gen); + + while (1) + { + n = fz_read(stm, buf, sizeof buf); + if (n == 0) + break; + if (showbinary) + fwrite(buf, 1, n, stdout); + else + showsafe(buf, n); + } + + fz_close(stm); +} + +static void showobject(int num, int gen) +{ + fz_obj *obj; + + if (!doc) + fz_throw(ctx, "no file specified"); + + obj = pdf_load_object(doc, num, gen); + + if (pdf_is_stream(doc, num, gen)) + { + if (showbinary) + { + showstream(num, gen); + } + else + { + printf("%d %d obj\n", num, gen); + fz_debug_obj(obj); + printf("stream\n"); + showstream(num, gen); + printf("endstream\n"); + printf("endobj\n\n"); + } + } + else + { + printf("%d %d obj\n", num, gen); + fz_debug_obj(obj); + printf("endobj\n\n"); + } + + fz_drop_obj(obj); +} + +static void showgrep(char *filename) +{ + fz_obj *obj; + int i; + + for (i = 0; i < doc->len; i++) + { + if (doc->table[i].type == 'n' || doc->table[i].type == 'o') + { + fz_try(ctx) + { + obj = pdf_load_object(doc, i, 0); + } + fz_catch(ctx) + { + fz_warn(ctx, "skipping object (%d 0 R)", i); + continue; + } + + fz_sort_dict(obj); + + printf("%s:%d: ", filename, i); + fz_fprint_obj(stdout, obj, 1); + + fz_drop_obj(obj); + } + } + + printf("%s:trailer: ", filename); + fz_fprint_obj(stdout, doc->trailer, 1); +} + +#ifdef MUPDF_COMBINED_EXE +int pdfshow_main(int argc, char **argv) +#else +int main(int argc, char **argv) +#endif +{ + char *password = NULL; /* don't throw errors if encrypted */ + char *filename; + int c; + + while ((c = fz_getopt(argc, argv, "p:be")) != -1) + { + switch (c) + { + case 'p': password = fz_optarg; break; + case 'b': showbinary = 1; break; + case 'e': showdecode = 0; break; + default: usage(); break; + } + } + + if (fz_optind == argc) + usage(); + + filename = argv[fz_optind++]; + + ctx = fz_new_context(NULL, FZ_STORE_UNLIMITED); + if (!ctx) + { + fprintf(stderr, "cannot initialise context\n"); + exit(1); + } + + fz_var(doc); + fz_try(ctx) + { + doc = pdf_open_document(ctx, filename); + if (pdf_needs_password(doc)) + if (!pdf_authenticate_password(doc, password)) + fz_throw(ctx, "cannot authenticate password: %s", filename); + + if (fz_optind == argc) + showtrailer(); + + while (fz_optind < argc) + { + switch (argv[fz_optind][0]) + { + case 't': showtrailer(); break; + case 'x': showxref(); break; + case 'p': showpagetree(); break; + case 'g': showgrep(filename); break; + default: showobject(atoi(argv[fz_optind]), 0); break; + } + fz_optind++; + } + } + fz_catch(ctx) + { + } + + pdf_close_document(doc); + fz_free_context(ctx); + return 0; +} diff --git a/apps/muxpsdraw.c b/apps/muxpsdraw.c new file mode 100644 index 00000000..8fec8efa --- /dev/null +++ b/apps/muxpsdraw.c @@ -0,0 +1,379 @@ +#include "fitz.h" +#include "muxps.h" + +#ifdef _MSC_VER +#include +#else +#include +#endif + +char *output = NULL; +float resolution = 72; + +int showxml = 0; +int showtext = 0; +int showtime = 0; +int showmd5 = 0; +int showoutline = 0; +int savealpha = 0; +int uselist = 1; + +fz_colorspace *colorspace; +char *filename; +fz_context *ctx; + +struct { + int count, total; + int min, max; + int minpage, maxpage; +} timing; + +static void usage(void) +{ + fprintf(stderr, + "usage: xpsdraw [options] input.xps [pages]\n" + "\t-o -\toutput filename (%%d for page number)\n" + "\t\tsupported formats: pgm, ppm, pam, png\n" + "\t-r -\tresolution in dpi (default: 72)\n" + "\t-a\tsave alpha channel (only pam and png)\n" + "\t-g\trender in grayscale\n" + "\t-m\tshow timing information\n" + "\t-t\tshow text (-tt for xml)\n" + "\t-x\tshow display list\n" + "\t-d\tdisable use of display list\n" + "\t-5\tshow md5 checksums\n" + "\t-l\tprint outline\n" + "\tpages\tcomma separated list of ranges\n"); + exit(1); +} + +static int gettime(void) +{ + static struct timeval first; + static int once = 1; + struct timeval now; + if (once) + { + gettimeofday(&first, NULL); + once = 0; + } + gettimeofday(&now, NULL); + return (now.tv_sec - first.tv_sec) * 1000 + (now.tv_usec - first.tv_usec) / 1000; +} + +static int isrange(char *s) +{ + while (*s) + { + if ((*s < '0' || *s > '9') && *s != '-' && *s != ',') + return 0; + s++; + } + return 1; +} + +static void drawpage(xps_document *doc, int pagenum) +{ + xps_page *page; + fz_display_list *list; + fz_device *dev; + int start; + + if (showtime) + { + start = gettime(); + } + + page = xps_load_page(doc, pagenum - 1); + + list = NULL; + + if (uselist) + { + list = fz_new_display_list(doc->ctx); + dev = fz_new_list_device(doc->ctx, list); + xps_run_page(doc, page, dev, fz_identity, NULL); + fz_free_device(dev); + } + + if (showxml) + { + dev = fz_new_trace_device(doc->ctx); + printf("\n", pagenum); + if (list) + fz_execute_display_list(list, dev, fz_identity, fz_infinite_bbox, NULL); + else + xps_run_page(doc, page, dev, fz_identity, NULL); + printf("\n"); + fz_free_device(dev); + } + + if (showtext) + { + fz_text_span *text = fz_new_text_span(doc->ctx); + dev = fz_new_text_device(doc->ctx, text); + if (list) + fz_execute_display_list(list, dev, fz_identity, fz_infinite_bbox, NULL); + else + xps_run_page(doc, page, dev, fz_identity, NULL); + fz_free_device(dev); + printf("[Page %d]\n", pagenum); + if (showtext > 1) + fz_debug_text_span_xml(text); + else + fz_debug_text_span(text); + printf("\n"); + fz_free_text_span(doc->ctx, text); + } + + if (showmd5 || showtime) + printf("page %s %d", filename, pagenum); + + if (output || showmd5 || showtime) + { + float zoom; + fz_matrix ctm; + fz_rect rect; + fz_bbox bbox; + fz_pixmap *pix; + + rect = xps_bound_page(doc, page); + zoom = resolution / 72; + ctm = fz_scale(zoom, zoom); + bbox = fz_round_rect(fz_transform_rect(ctm, rect)); + + /* TODO: banded rendering and multi-page ppm */ + + pix = fz_new_pixmap_with_rect(doc->ctx, colorspace, bbox); + + if (savealpha) + fz_clear_pixmap(pix); + else + fz_clear_pixmap_with_color(pix, 255); + + dev = fz_new_draw_device(doc->ctx, pix); + if (list) + fz_execute_display_list(list, dev, ctm, bbox, NULL); + else + xps_run_page(doc, page, dev, ctm, NULL); + fz_free_device(dev); + + if (output) + { + char buf[512]; + sprintf(buf, output, pagenum); + if (strstr(output, ".pgm") || strstr(output, ".ppm") || strstr(output, ".pnm")) + fz_write_pnm(doc->ctx, pix, buf); + else if (strstr(output, ".pam")) + fz_write_pam(doc->ctx, pix, buf, savealpha); + else if (strstr(output, ".png")) + fz_write_png(doc->ctx, pix, buf, savealpha); + } + + if (showmd5) + { + fz_md5 md5; + unsigned char digest[16]; + int i; + + fz_md5_init(&md5); + fz_md5_update(&md5, pix->samples, pix->w * pix->h * pix->n); + fz_md5_final(&md5, digest); + + printf(" "); + for (i = 0; i < 16; i++) + printf("%02x", digest[i]); + } + + fz_drop_pixmap(doc->ctx, pix); + } + + if (list) + fz_free_display_list(doc->ctx, list); + + if (showtime) + { + int end = gettime(); + int diff = end - start; + + if (diff < timing.min) + { + timing.min = diff; + timing.minpage = pagenum; + } + if (diff > timing.max) + { + timing.max = diff; + timing.maxpage = pagenum; + } + timing.total += diff; + timing.count ++; + + printf(" %dms", diff); + } + + if (showmd5 || showtime) + printf("\n"); +} + +static void drawrange(xps_document *doc, char *range) +{ + int page, spage, epage; + char *spec, *dash; + + spec = fz_strsep(&range, ","); + while (spec) + { + dash = strchr(spec, '-'); + + if (dash == spec) + spage = epage = xps_count_pages(doc); + else + spage = epage = atoi(spec); + + if (dash) + { + if (strlen(dash) > 1) + epage = atoi(dash + 1); + else + epage = xps_count_pages(doc); + } + + spage = CLAMP(spage, 1, xps_count_pages(doc)); + epage = CLAMP(epage, 1, xps_count_pages(doc)); + + if (spage < epage) + for (page = spage; page <= epage; page++) + drawpage(doc, page); + else + for (page = spage; page >= epage; page--) + drawpage(doc, page); + + spec = fz_strsep(&range, ","); + } +} + +static void drawoutline(xps_document *doc) +{ + fz_outline *outline = xps_load_outline(doc); + if (showoutline > 1) + fz_debug_outline_xml(outline, 0); + else + fz_debug_outline(outline, 0); + fz_free_outline(outline); +} + +#ifdef MUPDF_COMBINED_EXE +int xpsdraw_main(int argc, char **argv) +#else +int main(int argc, char **argv) +#endif +{ + int grayscale = 0; + int accelerate = 1; + xps_document *doc = NULL; + int c; + + fz_var(doc); + + while ((c = fz_getopt(argc, argv, "o:p:r:Aadglmtx5")) != -1) + { + switch (c) + { + case 'o': output = fz_optarg; break; + case 'r': resolution = atof(fz_optarg); break; + case 'A': accelerate = 0; break; + case 'a': savealpha = 1; break; + case 'l': showoutline++; break; + case 'm': showtime++; break; + case 't': showtext++; break; + case 'x': showxml++; break; + case '5': showmd5++; break; + case 'g': grayscale++; break; + case 'd': uselist = 0; break; + default: usage(); break; + } + } + + if (fz_optind == argc) + usage(); + + if (!showtext && !showxml && !showtime && !showmd5 && !showoutline && !output) + { + printf("nothing to do\n"); + exit(0); + } + + if (accelerate) + fz_accelerate(); + + ctx = fz_new_context(NULL, FZ_STORE_DEFAULT); + if (!ctx) + { + fprintf(stderr, "cannot initialise context\n"); + exit(1); + } + + colorspace = fz_device_rgb; + if (grayscale) + colorspace = fz_device_gray; + if (output && strstr(output, ".pgm")) + colorspace = fz_device_gray; + if (output && strstr(output, ".ppm")) + colorspace = fz_device_rgb; + + timing.count = 0; + timing.total = 0; + timing.min = 1 << 30; + timing.max = 0; + timing.minpage = 0; + timing.maxpage = 0; + + if (showxml) + printf("\n"); + + while (fz_optind < argc) + { + filename = argv[fz_optind++]; + + fz_try(ctx) + { + doc = xps_open_document(ctx, filename); + + if (showxml) + printf("\n", filename); + + if (showoutline) + drawoutline(doc); + + if (showtext || showxml || showtime || showmd5 || output) + { + if (fz_optind == argc || !isrange(argv[fz_optind])) + drawrange(doc, "1-"); + if (fz_optind < argc && isrange(argv[fz_optind])) + drawrange(doc, argv[fz_optind++]); + } + + if (showxml) + printf("\n"); + + xps_close_document(doc); + } + fz_catch(ctx) + { + xps_close_document(doc); + } + } + + if (showtime) + { + printf("total %dms / %d pages for an average of %dms\n", + timing.total, timing.count, timing.total / timing.count); + printf("fastest page %d: %dms\n", timing.minpage, timing.min); + printf("slowest page %d: %dms\n", timing.maxpage, timing.max); + } + + fz_free_context(ctx); + + return 0; +} diff --git a/apps/pdfclean.c b/apps/pdfclean.c deleted file mode 100644 index 8a163502..00000000 --- a/apps/pdfclean.c +++ /dev/null @@ -1,842 +0,0 @@ -/* - * PDF cleaning tool: general purpose pdf syntax washer. - * - * Rewrite PDF with pretty printed objects. - * Garbage collect unreachable objects. - * Inflate compressed streams. - * Create subset documents. - * - * TODO: linearize document for fast web view - */ - -#include "fitz.h" -#include "mupdf.h" - -static FILE *out = NULL; - -enum -{ - expand_images = 1, - expand_fonts = 2, - expand_all = -1 -}; - -static char *uselist = NULL; -static int *ofslist = NULL; -static int *genlist = NULL; -static int *renumbermap = NULL; - -static int dogarbage = 0; -static int doexpand = 0; -static int doascii = 0; - -static pdf_document *xref = NULL; -static fz_context *ctx = NULL; - -static void usage(void) -{ - fprintf(stderr, - "usage: pdfclean [options] input.pdf [output.pdf] [pages]\n" - "\t-p -\tpassword\n" - "\t-g\tgarbage collect unused objects\n" - "\t-gg\tin addition to -g compact xref table\n" - "\t-ggg\tin addition to -gg merge duplicate objects\n" - "\t-d\tdecompress all streams\n" - "\t-i\ttoggle decompression of image streams\n" - "\t-f\ttoggle decompression of font streams\n" - "\t-a\tascii hex encode binary streams\n" - "\tpages\tcomma separated list of ranges\n"); - exit(1); -} - -/* - * Garbage collect objects not reachable from the trailer. - */ - -static void sweepref(fz_obj *ref); - -static void sweepobj(fz_obj *obj) -{ - int i; - - if (fz_is_indirect(obj)) - sweepref(obj); - - else if (fz_is_dict(obj)) - { - int n = fz_dict_len(obj); - for (i = 0; i < n; i++) - sweepobj(fz_dict_get_val(obj, i)); - } - - else if (fz_is_array(obj)) - { - int n = fz_array_len(obj); - for (i = 0; i < n; i++) - sweepobj(fz_array_get(obj, i)); - } -} - -static void sweepref(fz_obj *obj) -{ - int num = fz_to_num(obj); - int gen = fz_to_gen(obj); - - if (num < 0 || num >= xref->len) - return; - if (uselist[num]) - return; - - uselist[num] = 1; - - /* Bake in /Length in stream objects */ - if (pdf_is_stream(xref, num, gen)) - { - fz_obj *len = fz_dict_gets(obj, "Length"); - if (fz_is_indirect(len)) - { - uselist[fz_to_num(len)] = 0; - len = fz_resolve_indirect(len); - fz_dict_puts(obj, "Length", len); - } - } - - sweepobj(fz_resolve_indirect(obj)); -} - -/* - * Scan for and remove duplicate objects (slow) - */ - -static void removeduplicateobjs(void) -{ - int num, other; - - for (num = 1; num < xref->len; num++) - { - /* Only compare an object to objects preceding it */ - for (other = 1; other < num; other++) - { - fz_obj *a, *b; - - if (num == other || !uselist[num] || !uselist[other]) - continue; - - /* - * Comparing stream objects data contents would take too long. - * - * pdf_is_stream calls pdf_cache_object and ensures - * that the xref table has the objects loaded. - */ - if (pdf_is_stream(xref, num, 0) || pdf_is_stream(xref, other, 0)) - continue; - - a = xref->table[num].obj; - b = xref->table[other].obj; - - a = fz_resolve_indirect(a); - b = fz_resolve_indirect(b); - - if (fz_objcmp(a, b)) - continue; - - /* Keep the lowest numbered object */ - renumbermap[num] = MIN(num, other); - renumbermap[other] = MIN(num, other); - uselist[MAX(num, other)] = 0; - - /* One duplicate was found, do not look for another */ - break; - } - } -} - -/* - * Renumber objects sequentially so the xref is more compact - */ - -static void compactxref(void) -{ - int num, newnum; - - /* - * Update renumbermap in-place, clustering all used - * objects together at low object ids. Objects that - * already should be renumbered will have their new - * object ids be updated to reflect the compaction. - */ - - newnum = 1; - for (num = 1; num < xref->len; num++) - { - if (uselist[num] && renumbermap[num] == num) - renumbermap[num] = newnum++; - else if (renumbermap[num] != num) - renumbermap[num] = renumbermap[renumbermap[num]]; - } -} - -/* - * Update indirect objects according to renumbering established when - * removing duplicate objects and compacting the xref. - */ - -static void renumberobj(fz_obj *obj) -{ - int i; - fz_context *ctx = xref->ctx; - - if (fz_is_dict(obj)) - { - int n = fz_dict_len(obj); - for (i = 0; i < n; i++) - { - fz_obj *key = fz_dict_get_key(obj, i); - fz_obj *val = fz_dict_get_val(obj, i); - if (fz_is_indirect(val)) - { - val = fz_new_indirect(ctx, renumbermap[fz_to_num(val)], 0, xref); - fz_dict_put(obj, key, val); - fz_drop_obj(val); - } - else - { - renumberobj(val); - } - } - } - - else if (fz_is_array(obj)) - { - int n = fz_array_len(obj); - for (i = 0; i < n; i++) - { - fz_obj *val = fz_array_get(obj, i); - if (fz_is_indirect(val)) - { - val = fz_new_indirect(ctx, renumbermap[fz_to_num(val)], 0, xref); - fz_array_put(obj, i, val); - fz_drop_obj(val); - } - else - { - renumberobj(val); - } - } - } -} - -static void renumberobjs(void) -{ - pdf_xref_entry *oldxref; - int newlen; - int num; - - /* Apply renumber map to indirect references in all objects in xref */ - renumberobj(xref->trailer); - for (num = 0; num < xref->len; num++) - { - fz_obj *obj = xref->table[num].obj; - - if (fz_is_indirect(obj)) - { - obj = fz_new_indirect(ctx, renumbermap[fz_to_num(obj)], 0, xref); - pdf_update_object(xref, num, 0, obj); - fz_drop_obj(obj); - } - else - { - renumberobj(obj); - } - } - - /* Create new table for the reordered, compacted xref */ - oldxref = xref->table; - xref->table = fz_malloc_array(xref->ctx, xref->len, sizeof(pdf_xref_entry)); - xref->table[0] = oldxref[0]; - - /* Move used objects into the new compacted xref */ - newlen = 0; - for (num = 1; num < xref->len; num++) - { - if (uselist[num]) - { - if (newlen < renumbermap[num]) - newlen = renumbermap[num]; - xref->table[renumbermap[num]] = oldxref[num]; - } - else - { - if (oldxref[num].obj) - fz_drop_obj(oldxref[num].obj); - } - } - - fz_free(xref->ctx, oldxref); - - /* Update the used objects count in compacted xref */ - xref->len = newlen + 1; - - /* Update list of used objects to fit with compacted xref */ - for (num = 1; num < xref->len; num++) - uselist[num] = 1; -} - -/* - * Recreate page tree to only retain specified pages. - */ - -static void retainpages(int argc, char **argv) -{ - fz_obj *oldroot, *root, *pages, *kids, *countobj, *parent, *olddests; - - /* Keep only pages/type and (reduced) dest entries to avoid - * references to unretained pages */ - oldroot = fz_dict_gets(xref->trailer, "Root"); - pages = fz_dict_gets(oldroot, "Pages"); - olddests = pdf_load_name_tree(xref, "Dests"); - - root = fz_new_dict(ctx, 2); - fz_dict_puts(root, "Type", fz_dict_gets(oldroot, "Type")); - fz_dict_puts(root, "Pages", fz_dict_gets(oldroot, "Pages")); - - pdf_update_object(xref, fz_to_num(oldroot), fz_to_gen(oldroot), root); - - fz_drop_obj(root); - - /* Create a new kids array with only the pages we want to keep */ - parent = fz_new_indirect(ctx, fz_to_num(pages), fz_to_gen(pages), xref); - kids = fz_new_array(ctx, 1); - - /* Retain pages specified */ - while (argc - fz_optind) - { - int page, spage, epage; - char *spec, *dash; - char *pagelist = argv[fz_optind]; - - spec = fz_strsep(&pagelist, ","); - while (spec) - { - dash = strchr(spec, '-'); - - if (dash == spec) - spage = epage = pdf_count_pages(xref); - else - spage = epage = atoi(spec); - - if (dash) - { - if (strlen(dash) > 1) - epage = atoi(dash + 1); - else - epage = pdf_count_pages(xref); - } - - if (spage > epage) - page = spage, spage = epage, epage = page; - - if (spage < 1) - spage = 1; - if (epage > pdf_count_pages(xref)) - epage = pdf_count_pages(xref); - - for (page = spage; page <= epage; page++) - { - fz_obj *pageobj = xref->page_objs[page-1]; - fz_obj *pageref = xref->page_refs[page-1]; - - fz_dict_puts(pageobj, "Parent", parent); - - /* Store page object in new kids array */ - fz_array_push(kids, pageref); - } - - spec = fz_strsep(&pagelist, ","); - } - - fz_optind++; - } - - fz_drop_obj(parent); - - /* Update page count and kids array */ - countobj = fz_new_int(ctx, fz_array_len(kids)); - fz_dict_puts(pages, "Count", countobj); - fz_drop_obj(countobj); - fz_dict_puts(pages, "Kids", kids); - fz_drop_obj(kids); - - /* Also preserve the (partial) Dests name tree */ - if (olddests) - { - int i; - fz_obj *names = fz_new_dict(ctx, 1); - fz_obj *dests = fz_new_dict(ctx, 1); - fz_obj *names_list = fz_new_array(ctx, 32); - - for (i = 0; i < fz_dict_len(olddests); i++) - { - fz_obj *key = fz_dict_get_key(olddests, i); - fz_obj *val = fz_dict_get_val(olddests, i); - fz_obj *key_str = fz_new_string(ctx, fz_to_name(key), strlen(fz_to_name(key))); - fz_obj *dest = fz_dict_gets(val, "D"); - - dest = fz_array_get(dest ? dest : val, 0); - if (fz_array_contains(fz_dict_gets(pages, "Kids"), dest)) - { - fz_array_push(names_list, key_str); - fz_array_push(names_list, val); - } - fz_drop_obj(key_str); - } - - root = fz_dict_gets(xref->trailer, "Root"); - fz_dict_puts(dests, "Names", names_list); - fz_dict_puts(names, "Dests", dests); - fz_dict_puts(root, "Names", names); - - fz_drop_obj(names); - fz_drop_obj(dests); - fz_drop_obj(names_list); - fz_drop_obj(olddests); - } -} - -/* - * Make sure we have loaded objects from object streams. - */ - -static void preloadobjstms(void) -{ - fz_obj *obj; - int num; - - for (num = 0; num < xref->len; num++) - { - if (xref->table[num].type == 'o') - { - obj = pdf_load_object(xref, num, 0); - fz_drop_obj(obj); - } - } -} - -/* - * Save streams and objects to the output - */ - -static inline int isbinary(int c) -{ - if (c == '\n' || c == '\r' || c == '\t') - return 0; - return c < 32 || c > 127; -} - -static int isbinarystream(fz_buffer *buf) -{ - int i; - for (i = 0; i < buf->len; i++) - if (isbinary(buf->data[i])) - return 1; - return 0; -} - -static fz_buffer *hexbuf(unsigned char *p, int n) -{ - static const char hex[16] = "0123456789abcdef"; - fz_buffer *buf; - int x = 0; - - buf = fz_new_buffer(ctx, n * 2 + (n / 32) + 2); - - while (n--) - { - buf->data[buf->len++] = hex[*p >> 4]; - buf->data[buf->len++] = hex[*p & 15]; - if (++x == 32) - { - buf->data[buf->len++] = '\n'; - x = 0; - } - p++; - } - - buf->data[buf->len++] = '>'; - buf->data[buf->len++] = '\n'; - - return buf; -} - -static void addhexfilter(fz_obj *dict) -{ - fz_obj *f, *dp, *newf, *newdp; - fz_obj *ahx, *nullobj; - - ahx = fz_new_name(ctx, "ASCIIHexDecode"); - nullobj = fz_new_null(ctx); - newf = newdp = NULL; - - f = fz_dict_gets(dict, "Filter"); - dp = fz_dict_gets(dict, "DecodeParms"); - - if (fz_is_name(f)) - { - newf = fz_new_array(ctx, 2); - fz_array_push(newf, ahx); - fz_array_push(newf, f); - f = newf; - if (fz_is_dict(dp)) - { - newdp = fz_new_array(ctx, 2); - fz_array_push(newdp, nullobj); - fz_array_push(newdp, dp); - dp = newdp; - } - } - else if (fz_is_array(f)) - { - fz_array_insert(f, ahx); - if (fz_is_array(dp)) - fz_array_insert(dp, nullobj); - } - else - f = ahx; - - fz_dict_puts(dict, "Filter", f); - if (dp) - fz_dict_puts(dict, "DecodeParms", dp); - - fz_drop_obj(ahx); - fz_drop_obj(nullobj); - if (newf) - fz_drop_obj(newf); - if (newdp) - fz_drop_obj(newdp); -} - -static void copystream(fz_obj *obj, int num, int gen) -{ - fz_buffer *buf, *tmp; - fz_obj *newlen; - - buf = pdf_load_raw_stream(xref, num, gen); - - if (doascii && isbinarystream(buf)) - { - tmp = hexbuf(buf->data, buf->len); - fz_drop_buffer(ctx, buf); - buf = tmp; - - addhexfilter(obj); - - newlen = fz_new_int(ctx, buf->len); - fz_dict_puts(obj, "Length", newlen); - fz_drop_obj(newlen); - } - - fprintf(out, "%d %d obj\n", num, gen); - fz_fprint_obj(out, obj, doexpand == 0); - fprintf(out, "stream\n"); - fwrite(buf->data, 1, buf->len, out); - fprintf(out, "endstream\nendobj\n\n"); - - fz_drop_buffer(ctx, buf); -} - -static void expandstream(fz_obj *obj, int num, int gen) -{ - fz_buffer *buf, *tmp; - fz_obj *newlen; - - buf = pdf_load_stream(xref, num, gen); - - fz_dict_dels(obj, "Filter"); - fz_dict_dels(obj, "DecodeParms"); - - if (doascii && isbinarystream(buf)) - { - tmp = hexbuf(buf->data, buf->len); - fz_drop_buffer(ctx, buf); - buf = tmp; - - addhexfilter(obj); - } - - newlen = fz_new_int(ctx, buf->len); - fz_dict_puts(obj, "Length", newlen); - fz_drop_obj(newlen); - - fprintf(out, "%d %d obj\n", num, gen); - fz_fprint_obj(out, obj, doexpand == 0); - fprintf(out, "stream\n"); - fwrite(buf->data, 1, buf->len, out); - fprintf(out, "endstream\nendobj\n\n"); - - fz_drop_buffer(ctx, buf); -} - -static void writeobject(int num, int gen) -{ - fz_obj *obj; - fz_obj *type; - - obj = pdf_load_object(xref, num, gen); - - /* skip ObjStm and XRef objects */ - if (fz_is_dict(obj)) - { - type = fz_dict_gets(obj, "Type"); - if (fz_is_name(type) && !strcmp(fz_to_name(type), "ObjStm")) - { - uselist[num] = 0; - fz_drop_obj(obj); - return; - } - if (fz_is_name(type) && !strcmp(fz_to_name(type), "XRef")) - { - uselist[num] = 0; - fz_drop_obj(obj); - return; - } - } - - if (!pdf_is_stream(xref, num, gen)) - { - fprintf(out, "%d %d obj\n", num, gen); - fz_fprint_obj(out, obj, doexpand == 0); - fprintf(out, "endobj\n\n"); - } - else - { - int dontexpand = 0; - if (doexpand != 0 && doexpand != expand_all) - { - fz_obj *o; - - if ((o = fz_dict_gets(obj, "Type"), !strcmp(fz_to_name(o), "XObject")) && - (o = fz_dict_gets(obj, "Subtype"), !strcmp(fz_to_name(o), "Image"))) - dontexpand = !(doexpand & expand_images); - if (o = fz_dict_gets(obj, "Type"), !strcmp(fz_to_name(o), "Font")) - dontexpand = !(doexpand & expand_fonts); - if (o = fz_dict_gets(obj, "Type"), !strcmp(fz_to_name(o), "FontDescriptor")) - dontexpand = !(doexpand & expand_fonts); - if ((o = fz_dict_gets(obj, "Length1")) != NULL) - dontexpand = !(doexpand & expand_fonts); - if ((o = fz_dict_gets(obj, "Length2")) != NULL) - dontexpand = !(doexpand & expand_fonts); - if ((o = fz_dict_gets(obj, "Length3")) != NULL) - dontexpand = !(doexpand & expand_fonts); - if (o = fz_dict_gets(obj, "Subtype"), !strcmp(fz_to_name(o), "Type1C")) - dontexpand = !(doexpand & expand_fonts); - if (o = fz_dict_gets(obj, "Subtype"), !strcmp(fz_to_name(o), "CIDFontType0C")) - dontexpand = !(doexpand & expand_fonts); - } - if (doexpand && !dontexpand && !pdf_is_jpx_image(ctx, obj)) - expandstream(obj, num, gen); - else - copystream(obj, num, gen); - } - - fz_drop_obj(obj); -} - -static void writexref(void) -{ - fz_obj *trailer; - fz_obj *obj; - int startxref; - int num; - - startxref = ftell(out); - - fprintf(out, "xref\n0 %d\n", xref->len); - for (num = 0; num < xref->len; num++) - { - if (uselist[num]) - fprintf(out, "%010d %05d n \n", ofslist[num], genlist[num]); - else - fprintf(out, "%010d %05d f \n", ofslist[num], genlist[num]); - } - fprintf(out, "\n"); - - trailer = fz_new_dict(ctx, 5); - - obj = fz_new_int(ctx, xref->len); - fz_dict_puts(trailer, "Size", obj); - fz_drop_obj(obj); - - obj = fz_dict_gets(xref->trailer, "Info"); - if (obj) - fz_dict_puts(trailer, "Info", obj); - - obj = fz_dict_gets(xref->trailer, "Root"); - if (obj) - fz_dict_puts(trailer, "Root", obj); - - obj = fz_dict_gets(xref->trailer, "ID"); - if (obj) - fz_dict_puts(trailer, "ID", obj); - - fprintf(out, "trailer\n"); - fz_fprint_obj(out, trailer, doexpand == 0); - fprintf(out, "\n"); - - fz_drop_obj(trailer); - - fprintf(out, "startxref\n%d\n%%%%EOF\n", startxref); -} - -static void writepdf(void) -{ - int lastfree; - int num; - - for (num = 0; num < xref->len; num++) - { - if (xref->table[num].type == 'f') - genlist[num] = xref->table[num].gen; - if (xref->table[num].type == 'n') - genlist[num] = xref->table[num].gen; - if (xref->table[num].type == 'o') - genlist[num] = 0; - - if (dogarbage && !uselist[num]) - continue; - - if (xref->table[num].type == 'n' || xref->table[num].type == 'o') - { - uselist[num] = 1; - ofslist[num] = ftell(out); - writeobject(num, genlist[num]); - } - } - - /* Construct linked list of free object slots */ - lastfree = 0; - for (num = 0; num < xref->len; num++) - { - if (!uselist[num]) - { - genlist[num]++; - ofslist[lastfree] = num; - lastfree = num; - } - } - - writexref(); -} - -#ifdef MUPDF_COMBINED_EXE -int pdfclean_main(int argc, char **argv) -#else -int main(int argc, char **argv) -#endif -{ - char *infile; - char *outfile = "out.pdf"; - char *password = ""; - int c, num; - int subset; - - while ((c = fz_getopt(argc, argv, "adfgip:")) != -1) - { - switch (c) - { - case 'p': password = fz_optarg; break; - case 'g': dogarbage ++; break; - case 'd': doexpand ^= expand_all; break; - case 'f': doexpand ^= expand_fonts; break; - case 'i': doexpand ^= expand_images; break; - case 'a': doascii ++; break; - default: usage(); break; - } - } - - if (argc - fz_optind < 1) - usage(); - - infile = argv[fz_optind++]; - - if (argc - fz_optind > 0 && - (strstr(argv[fz_optind], ".pdf") || strstr(argv[fz_optind], ".PDF"))) - { - outfile = argv[fz_optind++]; - } - - subset = 0; - if (argc - fz_optind > 0) - subset = 1; - - ctx = fz_new_context(NULL, FZ_STORE_UNLIMITED); - if (!ctx) - { - fprintf(stderr, "cannot initialise context\n"); - exit(1); - } - - xref = pdf_open_document(ctx, infile); - if (pdf_needs_password(xref)) - if (!pdf_authenticate_password(xref, password)) - fz_throw(ctx, "cannot authenticate password: %s\n", infile); - - out = fopen(outfile, "wb"); - if (!out) - fz_throw(ctx, "cannot open output file '%s'", outfile); - - fprintf(out, "%%PDF-%d.%d\n", xref->version / 10, xref->version % 10); - fprintf(out, "%%\316\274\341\277\246\n\n"); - - uselist = fz_malloc_array(ctx, xref->len + 1, sizeof(char)); - ofslist = fz_malloc_array(ctx, xref->len + 1, sizeof(int)); - genlist = fz_malloc_array(ctx, xref->len + 1, sizeof(int)); - renumbermap = fz_malloc_array(ctx, xref->len + 1, sizeof(int)); - - for (num = 0; num < xref->len; num++) - { - uselist[num] = 0; - ofslist[num] = 0; - genlist[num] = 0; - renumbermap[num] = num; - } - - /* Make sure any objects hidden in compressed streams have been loaded */ - preloadobjstms(); - - /* Only retain the specified subset of the pages */ - if (subset) - retainpages(argc, argv); - - /* Sweep & mark objects from the trailer */ - if (dogarbage >= 1) - sweepobj(xref->trailer); - - /* Coalesce and renumber duplicate objects */ - if (dogarbage >= 3) - removeduplicateobjs(); - - /* Compact xref by renumbering and removing unused objects */ - if (dogarbage >= 2) - compactxref(); - - /* Make renumbering affect all indirect references and update xref */ - /* Do not renumber objects if encryption is in use, as the object - * numbers are baked into the streams/strings, and we can't currently - * cope with moving them. See bug 692627. */ - if (dogarbage >= 2 && !xref->crypt) - renumberobjs(); - - writepdf(); - - if (fclose(out)) - fz_throw(ctx, "cannot close output file '%s'", outfile); - - fz_free(xref->ctx, uselist); - fz_free(xref->ctx, ofslist); - fz_free(xref->ctx, genlist); - fz_free(xref->ctx, renumbermap); - - pdf_close_document(xref); - fz_free_context(ctx); - return 0; -} diff --git a/apps/pdfdraw.c b/apps/pdfdraw.c deleted file mode 100644 index 552fe31a..00000000 --- a/apps/pdfdraw.c +++ /dev/null @@ -1,495 +0,0 @@ -/* - * pdfdraw -- command line tool for drawing pdf documents - */ - -#include "fitz.h" -#include "mupdf.h" - -#ifdef _MSC_VER -#include -#else -#include -#endif - -static char *output = NULL; -static float resolution = 72; -static float rotation = 0; - -static int showxml = 0; -static int showtext = 0; -static int showtime = 0; -static int showmd5 = 0; -static int showoutline = 0; -static int savealpha = 0; -static int uselist = 1; -static int alphabits = 8; -static float gamma_value = 1; -static int invert = 0; - -static fz_colorspace *colorspace; -static char *filename; - -static struct { - int count, total; - int min, max; - int minpage, maxpage; -} timing; - -static void usage(void) -{ - fprintf(stderr, - "usage: pdfdraw [options] input.pdf [pages]\n" - "\t-o -\toutput filename (%%d for page number)\n" - "\t\tsupported formats: pgm, ppm, pam, png, pbm\n" - "\t-p -\tpassword\n" - "\t-r -\tresolution in dpi (default: 72)\n" - "\t-A\tdisable accelerated functions\n" - "\t-a\tsave alpha channel (only pam and png)\n" - "\t-b -\tnumber of bits of antialiasing (0 to 8)\n" - "\t-g\trender in grayscale\n" - "\t-m\tshow timing information\n" - "\t-t\tshow text (-tt for xml)\n" - "\t-x\tshow display list\n" - "\t-d\tdisable use of display list\n" - "\t-5\tshow md5 checksums\n" - "\t-R -\trotate clockwise by given number of degrees\n" - "\t-G gamma\tgamma correct output\n" - "\t-I\tinvert output\n" - "\t-l\tprint outline\n" - "\tpages\tcomma separated list of ranges\n"); - exit(1); -} - -static int gettime(void) -{ - static struct timeval first; - static int once = 1; - struct timeval now; - if (once) - { - gettimeofday(&first, NULL); - once = 0; - } - gettimeofday(&now, NULL); - return (now.tv_sec - first.tv_sec) * 1000 + (now.tv_usec - first.tv_usec) / 1000; -} - -static int isrange(char *s) -{ - while (*s) - { - if ((*s < '0' || *s > '9') && *s != '-' && *s != ',') - return 0; - s++; - } - return 1; -} - -static void drawpage(pdf_document *doc, int pagenum) -{ - pdf_page *page; - fz_display_list *list = NULL; - fz_device *dev = NULL; - int start; - fz_context *ctx = doc->ctx; - - fz_var(list); - fz_var(dev); - - if (showtime) - { - start = gettime(); - } - - fz_try(ctx) - { - page = pdf_load_page(doc, pagenum - 1); - } - fz_catch(ctx) - { - fz_throw(ctx, "cannot load page %d in file '%s'", pagenum, filename); - } - - if (uselist) - { - fz_try(ctx) - { - list = fz_new_display_list(ctx); - dev = fz_new_list_device(ctx, list); - pdf_run_page(doc, page, dev, fz_identity, NULL); - } - fz_catch(ctx) - { - fz_free_device(dev); - fz_free_display_list(ctx, list); - pdf_free_page(ctx, page); - fz_throw(ctx, "cannot draw page %d in file '%s'", pagenum, filename); - } - fz_free_device(dev); - dev = NULL; - } - - if (showxml) - { - fz_try(ctx) - { - dev = fz_new_trace_device(ctx); - printf("\n", pagenum); - if (list) - fz_execute_display_list(list, dev, fz_identity, fz_infinite_bbox, NULL); - else - pdf_run_page(doc, page, dev, fz_identity, NULL); - printf("\n"); - } - fz_catch(ctx) - { - fz_free_device(dev); - fz_free_display_list(ctx, list); - pdf_free_page(ctx, page); - fz_rethrow(ctx); - } - fz_free_device(dev); - dev = NULL; - } - - if (showtext) - { - fz_text_span *text = NULL; - - fz_var(text); - - fz_try(ctx) - { - text = fz_new_text_span(ctx); - dev = fz_new_text_device(ctx, text); - if (list) - fz_execute_display_list(list, dev, fz_identity, fz_infinite_bbox, NULL); - else - pdf_run_page(doc, page, dev, fz_identity, NULL); - fz_free_device(dev); - dev = NULL; - printf("[Page %d]\n", pagenum); - if (showtext > 1) - fz_debug_text_span_xml(text); - else - fz_debug_text_span(text); - printf("\n"); - } - fz_catch(ctx) - { - fz_free_device(dev); - fz_free_text_span(ctx, text); - fz_free_display_list(ctx, list); - pdf_free_page(ctx, page); - fz_rethrow(ctx); - } - fz_free_text_span(ctx, text); - } - - if (showmd5 || showtime) - printf("page %s %d", filename, pagenum); - - if (output || showmd5 || showtime) - { - float zoom; - fz_matrix ctm; - fz_rect bounds; - fz_bbox bbox; - fz_pixmap *pix = NULL; - - fz_var(pix); - - bounds = pdf_bound_page(doc, page); - zoom = resolution / 72; - ctm = fz_scale(zoom, zoom); - ctm = fz_concat(ctm, fz_rotate(rotation)); - bbox = fz_round_rect(fz_transform_rect(ctm, bounds)); - - /* TODO: banded rendering and multi-page ppm */ - - fz_try(ctx) - { - pix = fz_new_pixmap_with_rect(ctx, colorspace, bbox); - - if (savealpha) - fz_clear_pixmap(pix); - else - fz_clear_pixmap_with_color(pix, 255); - - dev = fz_new_draw_device(ctx, pix); - if (list) - fz_execute_display_list(list, dev, ctm, bbox, NULL); - else - pdf_run_page(doc, page, dev, ctm, NULL); - fz_free_device(dev); - dev = NULL; - - if (invert) - fz_invert_pixmap(pix); - if (gamma_value != 1) - fz_gamma_pixmap(pix, gamma_value); - - if (savealpha) - fz_unmultiply_pixmap(pix); - - if (output) - { - char buf[512]; - sprintf(buf, output, pagenum); - if (strstr(output, ".pgm") || strstr(output, ".ppm") || strstr(output, ".pnm")) - fz_write_pnm(ctx, pix, buf); - else if (strstr(output, ".pam")) - fz_write_pam(ctx, pix, buf, savealpha); - else if (strstr(output, ".png")) - fz_write_png(ctx, pix, buf, savealpha); - else if (strstr(output, ".pbm")) { - fz_halftone *ht = fz_get_default_halftone(ctx, 1); - fz_bitmap *bit = fz_halftone_pixmap(ctx, pix, ht); - fz_write_pbm(ctx, bit, buf); - fz_drop_bitmap(ctx, bit); - fz_drop_halftone(ctx, ht); - } - } - - if (showmd5) - { - fz_md5 md5; - unsigned char digest[16]; - int i; - - fz_md5_init(&md5); - fz_md5_update(&md5, pix->samples, pix->w * pix->h * pix->n); - fz_md5_final(&md5, digest); - - printf(" "); - for (i = 0; i < 16; i++) - printf("%02x", digest[i]); - } - - fz_drop_pixmap(ctx, pix); - } - fz_catch(ctx) - { - fz_free_device(dev); - fz_drop_pixmap(ctx, pix); - fz_free_display_list(ctx, list); - pdf_free_page(ctx, page); - fz_rethrow(ctx); - } - } - - if (list) - fz_free_display_list(ctx, list); - - pdf_free_page(ctx, page); - - if (showtime) - { - int end = gettime(); - int diff = end - start; - - if (diff < timing.min) - { - timing.min = diff; - timing.minpage = pagenum; - } - if (diff > timing.max) - { - timing.max = diff; - timing.maxpage = pagenum; - } - timing.total += diff; - timing.count ++; - - printf(" %dms", diff); - } - - if (showmd5 || showtime) - printf("\n"); - - fz_flush_warnings(ctx); -} - -static void drawrange(pdf_document *doc, char *range) -{ - int page, spage, epage; - char *spec, *dash; - - spec = fz_strsep(&range, ","); - while (spec) - { - dash = strchr(spec, '-'); - - if (dash == spec) - spage = epage = pdf_count_pages(doc); - else - spage = epage = atoi(spec); - - if (dash) - { - if (strlen(dash) > 1) - epage = atoi(dash + 1); - else - epage = pdf_count_pages(doc); - } - - spage = CLAMP(spage, 1, pdf_count_pages(doc)); - epage = CLAMP(epage, 1, pdf_count_pages(doc)); - - if (spage < epage) - for (page = spage; page <= epage; page++) - drawpage(doc, page); - else - for (page = spage; page >= epage; page--) - drawpage(doc, page); - - spec = fz_strsep(&range, ","); - } -} - -static void drawoutline(pdf_document *doc) -{ - fz_outline *outline = pdf_load_outline(doc); - if (showoutline > 1) - fz_debug_outline_xml(outline, 0); - else - fz_debug_outline(outline, 0); - fz_free_outline(outline); -} - -#ifdef MUPDF_COMBINED_EXE -int pdfdraw_main(int argc, char **argv) -#else -int main(int argc, char **argv) -#endif -{ - char *password = ""; - int grayscale = 0; - int accelerate = 1; - pdf_document *doc = NULL; - int c; - fz_context *ctx; - - fz_var(doc); - - while ((c = fz_getopt(argc, argv, "lo:p:r:R:Aab:dgmtx5G:I")) != -1) - { - switch (c) - { - case 'o': output = fz_optarg; break; - case 'p': password = fz_optarg; break; - case 'r': resolution = atof(fz_optarg); break; - case 'R': rotation = atof(fz_optarg); break; - case 'A': accelerate = 0; break; - case 'a': savealpha = 1; break; - case 'b': alphabits = atoi(fz_optarg); break; - case 'l': showoutline++; break; - case 'm': showtime++; break; - case 't': showtext++; break; - case 'x': showxml++; break; - case '5': showmd5++; break; - case 'g': grayscale++; break; - case 'd': uselist = 0; break; - case 'G': gamma_value = atof(fz_optarg); break; - case 'I': invert++; break; - default: usage(); break; - } - } - - if (fz_optind == argc) - usage(); - - if (!showtext && !showxml && !showtime && !showmd5 && !showoutline && !output) - { - printf("nothing to do\n"); - exit(0); - } - - if (accelerate) - fz_accelerate(); - - ctx = fz_new_context(NULL, FZ_STORE_DEFAULT); - if (!ctx) - { - fprintf(stderr, "cannot initialise context\n"); - exit(1); - } - - fz_set_aa_level(ctx, alphabits); - - colorspace = fz_device_rgb; - if (grayscale) - colorspace = fz_device_gray; - if (output && strstr(output, ".pgm")) - colorspace = fz_device_gray; - if (output && strstr(output, ".ppm")) - colorspace = fz_device_rgb; - if (output && strstr(output, ".pbm")) - colorspace = fz_device_gray; - - timing.count = 0; - timing.total = 0; - timing.min = 1 << 30; - timing.max = 0; - timing.minpage = 0; - timing.maxpage = 0; - - if (showxml) - printf("\n"); - - fz_try(ctx) - { - while (fz_optind < argc) - { - filename = argv[fz_optind++]; - - fz_try(ctx) - { - doc = pdf_open_document(ctx, filename); - } - fz_catch(ctx) - { - fz_throw(ctx, "cannot open document: %s", filename); - } - - if (pdf_needs_password(doc)) - if (!pdf_authenticate_password(doc, password)) - fz_throw(ctx, "cannot authenticate password: %s", filename); - - if (showxml) - printf("\n", filename); - - if (showoutline) - drawoutline(doc); - - if (showtext || showxml || showtime || showmd5 || output) - { - if (fz_optind == argc || !isrange(argv[fz_optind])) - drawrange(doc, "1-"); - if (fz_optind < argc && isrange(argv[fz_optind])) - drawrange(doc, argv[fz_optind++]); - } - - if (showxml) - printf("\n"); - - pdf_close_document(doc); - doc = NULL; - } - } - fz_catch(ctx) - { - pdf_close_document(doc); - } - - if (showtime) - { - printf("total %dms / %d pages for an average of %dms\n", - timing.total, timing.count, timing.total / timing.count); - printf("fastest page %d: %dms\n", timing.minpage, timing.min); - printf("slowest page %d: %dms\n", timing.maxpage, timing.max); - } - - fz_free_context(ctx); - return 0; -} diff --git a/apps/pdfextract.c b/apps/pdfextract.c deleted file mode 100644 index 1407f7f3..00000000 --- a/apps/pdfextract.c +++ /dev/null @@ -1,215 +0,0 @@ -/* - * pdfextract -- the ultimate way to extract images and fonts from pdfs - */ - -#include "fitz.h" -#include "mupdf.h" - -static pdf_document *doc = NULL; -static fz_context *ctx = NULL; -static int dorgb = 0; - -static void usage(void) -{ - fprintf(stderr, "usage: pdfextract [options] file.pdf [object numbers]\n"); - fprintf(stderr, "\t-p\tpassword\n"); - fprintf(stderr, "\t-r\tconvert images to rgb\n"); - exit(1); -} - -static int isimage(fz_obj *obj) -{ - fz_obj *type = fz_dict_gets(obj, "Subtype"); - return fz_is_name(type) && !strcmp(fz_to_name(type), "Image"); -} - -static int isfontdesc(fz_obj *obj) -{ - fz_obj *type = fz_dict_gets(obj, "Type"); - return fz_is_name(type) && !strcmp(fz_to_name(type), "FontDescriptor"); -} - -static void saveimage(int num) -{ - fz_pixmap *img; - fz_obj *ref; - char name[1024]; - - ref = fz_new_indirect(ctx, num, 0, doc); - - /* TODO: detect DCTD and save as jpeg */ - - img = pdf_load_image(doc, ref); - - if (dorgb && img->colorspace && img->colorspace != fz_device_rgb) - { - fz_pixmap *temp; - temp = fz_new_pixmap_with_rect(ctx, fz_device_rgb, fz_bound_pixmap(img)); - fz_convert_pixmap(ctx, img, temp); - fz_drop_pixmap(ctx, img); - img = temp; - } - - if (img->n <= 4) - { - sprintf(name, "img-%04d.png", num); - printf("extracting image %s\n", name); - fz_write_png(ctx, img, name, 0); - } - else - { - sprintf(name, "img-%04d.pam", num); - printf("extracting image %s\n", name); - fz_write_pam(ctx, img, name, 0); - } - - fz_drop_pixmap(ctx, img); - fz_drop_obj(ref); -} - -static void savefont(fz_obj *dict, int num) -{ - char name[1024]; - char *subtype; - fz_buffer *buf; - fz_obj *stream = NULL; - fz_obj *obj; - char *ext = ""; - FILE *f; - char *fontname = "font"; - int n; - - obj = fz_dict_gets(dict, "FontName"); - if (obj) - fontname = fz_to_name(obj); - - obj = fz_dict_gets(dict, "FontFile"); - if (obj) - { - stream = obj; - ext = "pfa"; - } - - obj = fz_dict_gets(dict, "FontFile2"); - if (obj) - { - stream = obj; - ext = "ttf"; - } - - obj = fz_dict_gets(dict, "FontFile3"); - if (obj) - { - stream = obj; - - obj = fz_dict_gets(obj, "Subtype"); - if (obj && !fz_is_name(obj)) - fz_throw(ctx, "Invalid font descriptor subtype"); - - subtype = fz_to_name(obj); - if (!strcmp(subtype, "Type1C")) - ext = "cff"; - else if (!strcmp(subtype, "CIDFontType0C")) - ext = "cid"; - else - fz_throw(ctx, "Unhandled font type '%s'", subtype); - } - - if (!stream) - { - fz_warn(ctx, "Unhandled font type"); - return; - } - - buf = pdf_load_stream(doc, fz_to_num(stream), fz_to_gen(stream)); - - sprintf(name, "%s-%04d.%s", fontname, num, ext); - printf("extracting font %s\n", name); - - f = fopen(name, "wb"); - if (!f) - fz_throw(ctx, "Error creating font file"); - - n = fwrite(buf->data, 1, buf->len, f); - if (n < buf->len) - fz_throw(ctx, "Error writing font file"); - - if (fclose(f) < 0) - fz_throw(ctx, "Error closing font file"); - - fz_drop_buffer(ctx, buf); -} - -static void showobject(int num) -{ - fz_obj *obj; - - if (!doc) - fz_throw(ctx, "no file specified"); - - obj = pdf_load_object(doc, num, 0); - - if (isimage(obj)) - saveimage(num); - else if (isfontdesc(obj)) - savefont(obj, num); - - fz_drop_obj(obj); -} - -#ifdef MUPDF_COMBINED_EXE -int pdfextract_main(int argc, char **argv) -#else -int main(int argc, char **argv) -#endif -{ - char *infile; - char *password = ""; - int c, o; - - while ((c = fz_getopt(argc, argv, "p:r")) != -1) - { - switch (c) - { - case 'p': password = fz_optarg; break; - case 'r': dorgb++; break; - default: usage(); break; - } - } - - if (fz_optind == argc) - usage(); - - infile = argv[fz_optind++]; - - ctx = fz_new_context(NULL, FZ_STORE_UNLIMITED); - if (!ctx) - { - fprintf(stderr, "cannot initialise context\n"); - exit(1); - } - - doc = pdf_open_document(ctx, infile); - if (pdf_needs_password(doc)) - if (!pdf_authenticate_password(doc, password)) - fz_throw(ctx, "cannot authenticate password: %s\n", infile); - - if (fz_optind == argc) - { - for (o = 0; o < doc->len; o++) - showobject(o); - } - else - { - while (fz_optind < argc) - { - showobject(atoi(argv[fz_optind])); - fz_optind++; - } - } - - pdf_close_document(doc); - fz_flush_warnings(ctx); - fz_free_context(ctx); - return 0; -} diff --git a/apps/pdfinfo.c b/apps/pdfinfo.c deleted file mode 100644 index c6c6b35c..00000000 --- a/apps/pdfinfo.c +++ /dev/null @@ -1,1020 +0,0 @@ -/* - * Information tool. - * Print information about the input pdf. - */ - -#include "fitz.h" -#include "mupdf.h" - -pdf_document *xref; -fz_context *ctx; -int pagecount; - -void closexref(void); - -void openxref(char *filename, char *password, int dieonbadpass, int loadpages); - -enum -{ - DIMENSIONS = 0x01, - FONTS = 0x02, - IMAGES = 0x04, - SHADINGS = 0x08, - PATTERNS = 0x10, - XOBJS = 0x20, - ALL = DIMENSIONS | FONTS | IMAGES | SHADINGS | PATTERNS | XOBJS -}; - -struct info -{ - int page; - fz_obj *pageref; - fz_obj *pageobj; - union { - struct { - fz_obj *obj; - } info; - struct { - fz_obj *obj; - } crypt; - struct { - fz_obj *obj; - fz_rect *bbox; - } dim; - struct { - fz_obj *obj; - fz_obj *subtype; - fz_obj *name; - } font; - struct { - fz_obj *obj; - fz_obj *width; - fz_obj *height; - fz_obj *bpc; - fz_obj *filter; - fz_obj *cs; - fz_obj *altcs; - } image; - struct { - fz_obj *obj; - fz_obj *type; - } shading; - struct { - fz_obj *obj; - fz_obj *type; - fz_obj *paint; - fz_obj *tiling; - fz_obj *shading; - } pattern; - struct { - fz_obj *obj; - fz_obj *groupsubtype; - fz_obj *reference; - } form; - } u; -}; - -static struct info *dim = NULL; -static int dims = 0; -static struct info *font = NULL; -static int fonts = 0; -static struct info *image = NULL; -static int images = 0; -static struct info *shading = NULL; -static int shadings = 0; -static struct info *pattern = NULL; -static int patterns = 0; -static struct info *form = NULL; -static int forms = 0; -static struct info *psobj = NULL; -static int psobjs = 0; - -void closexref(void) -{ - int i; - if (xref) - { - pdf_close_document(xref); - xref = NULL; - } - - if (dim) - { - for (i = 0; i < dims; i++) - fz_free(ctx, dim[i].u.dim.bbox); - fz_free(ctx, dim); - dim = NULL; - dims = 0; - } - - if (font) - { - fz_free(ctx, font); - font = NULL; - fonts = 0; - } - - if (image) - { - fz_free(ctx, image); - image = NULL; - images = 0; - } - - if (shading) - { - fz_free(ctx, shading); - shading = NULL; - shadings = 0; - } - - if (pattern) - { - fz_free(ctx, pattern); - pattern = NULL; - patterns = 0; - } - - if (form) - { - fz_free(ctx, form); - form = NULL; - forms = 0; - } - - if (psobj) - { - fz_free(ctx, psobj); - psobj = NULL; - psobjs = 0; - } -} - -static void -infousage(void) -{ - fprintf(stderr, - "usage: pdfinfo [options] [file.pdf ... ]\n" - "\t-d -\tpassword for decryption\n" - "\t-f\tlist fonts\n" - "\t-i\tlist images\n" - "\t-m\tlist dimensions\n" - "\t-p\tlist patterns\n" - "\t-s\tlist shadings\n" - "\t-x\tlist form and postscript xobjects\n"); - exit(1); -} - -static void -showglobalinfo(void) -{ - fz_obj *obj; - - printf("\nPDF-%d.%d\n", xref->version / 10, xref->version % 10); - - obj = fz_dict_gets(xref->trailer, "Info"); - if (obj) - { - printf("Info object (%d %d R):\n", fz_to_num(obj), fz_to_gen(obj)); - fz_debug_obj(fz_resolve_indirect(obj)); - } - - obj = fz_dict_gets(xref->trailer, "Encrypt"); - if (obj) - { - printf("\nEncryption object (%d %d R):\n", fz_to_num(obj), fz_to_gen(obj)); - fz_debug_obj(fz_resolve_indirect(obj)); - } - - printf("\nPages: %d\n\n", pagecount); -} - -static void -gatherdimensions(int page, fz_obj *pageref, fz_obj *pageobj) -{ - fz_rect bbox; - fz_obj *obj; - int j; - - obj = fz_dict_gets(pageobj, "MediaBox"); - if (!fz_is_array(obj)) - return; - - bbox = pdf_to_rect(ctx, obj); - - for (j = 0; j < dims; j++) - if (!memcmp(dim[j].u.dim.bbox, &bbox, sizeof (fz_rect))) - break; - - if (j < dims) - return; - - dims++; - - dim = fz_resize_array(ctx, dim, dims, sizeof(struct info)); - dim[dims - 1].page = page; - dim[dims - 1].pageref = pageref; - dim[dims - 1].pageobj = pageobj; - dim[dims - 1].u.dim.bbox = fz_malloc(ctx, sizeof(fz_rect)); - memcpy(dim[dims - 1].u.dim.bbox, &bbox, sizeof (fz_rect)); - - return; -} - -static void -gatherfonts(int page, fz_obj *pageref, fz_obj *pageobj, fz_obj *dict) -{ - int i, n; - - n = fz_dict_len(dict); - for (i = 0; i < n; i++) - { - fz_obj *fontdict = NULL; - fz_obj *subtype = NULL; - fz_obj *basefont = NULL; - fz_obj *name = NULL; - int k; - - fontdict = fz_dict_get_val(dict, i); - if (!fz_is_dict(fontdict)) - { - fz_warn(ctx, "not a font dict (%d %d R)", fz_to_num(fontdict), fz_to_gen(fontdict)); - continue; - } - - subtype = fz_dict_gets(fontdict, "Subtype"); - basefont = fz_dict_gets(fontdict, "BaseFont"); - if (!basefont || fz_is_null(basefont)) - name = fz_dict_gets(fontdict, "Name"); - - for (k = 0; k < fonts; k++) - if (!fz_objcmp(font[k].u.font.obj, fontdict)) - break; - - if (k < fonts) - continue; - - fonts++; - - font = fz_resize_array(ctx, font, fonts, sizeof(struct info)); - font[fonts - 1].page = page; - font[fonts - 1].pageref = pageref; - font[fonts - 1].pageobj = pageobj; - font[fonts - 1].u.font.obj = fontdict; - font[fonts - 1].u.font.subtype = subtype; - font[fonts - 1].u.font.name = basefont ? basefont : name; - } -} - -static void -gatherimages(int page, fz_obj *pageref, fz_obj *pageobj, fz_obj *dict) -{ - int i, n; - - n = fz_dict_len(dict); - for (i = 0; i < n; i++) - { - fz_obj *imagedict; - fz_obj *type; - fz_obj *width; - fz_obj *height; - fz_obj *bpc = NULL; - fz_obj *filter = NULL; - fz_obj *cs = NULL; - fz_obj *altcs; - int k; - - imagedict = fz_dict_get_val(dict, i); - if (!fz_is_dict(imagedict)) - { - fz_warn(ctx, "not an image dict (%d %d R)", fz_to_num(imagedict), fz_to_gen(imagedict)); - continue; - } - - type = fz_dict_gets(imagedict, "Subtype"); - if (strcmp(fz_to_name(type), "Image")) - continue; - - filter = fz_dict_gets(imagedict, "Filter"); - - altcs = NULL; - cs = fz_dict_gets(imagedict, "ColorSpace"); - if (fz_is_array(cs)) - { - fz_obj *cses = cs; - - cs = fz_array_get(cses, 0); - if (fz_is_name(cs) && (!strcmp(fz_to_name(cs), "DeviceN") || !strcmp(fz_to_name(cs), "Separation"))) - { - altcs = fz_array_get(cses, 2); - if (fz_is_array(altcs)) - altcs = fz_array_get(altcs, 0); - } - } - - width = fz_dict_gets(imagedict, "Width"); - height = fz_dict_gets(imagedict, "Height"); - bpc = fz_dict_gets(imagedict, "BitsPerComponent"); - - for (k = 0; k < images; k++) - if (!fz_objcmp(image[k].u.image.obj, imagedict)) - break; - - if (k < images) - continue; - - images++; - - image = fz_resize_array(ctx, image, images, sizeof(struct info)); - image[images - 1].page = page; - image[images - 1].pageref = pageref; - image[images - 1].pageobj = pageobj; - image[images - 1].u.image.obj = imagedict; - image[images - 1].u.image.width = width; - image[images - 1].u.image.height = height; - image[images - 1].u.image.bpc = bpc; - image[images - 1].u.image.filter = filter; - image[images - 1].u.image.cs = cs; - image[images - 1].u.image.altcs = altcs; - } -} - -static void -gatherforms(int page, fz_obj *pageref, fz_obj *pageobj, fz_obj *dict) -{ - int i, n; - - n = fz_dict_len(dict); - for (i = 0; i < n; i++) - { - fz_obj *xobjdict; - fz_obj *type; - fz_obj *subtype; - fz_obj *group; - fz_obj *groupsubtype; - fz_obj *reference; - int k; - - xobjdict = fz_dict_get_val(dict, i); - if (!fz_is_dict(xobjdict)) - { - fz_warn(ctx, "not a xobject dict (%d %d R)", fz_to_num(xobjdict), fz_to_gen(xobjdict)); - continue; - } - - type = fz_dict_gets(xobjdict, "Subtype"); - if (strcmp(fz_to_name(type), "Form")) - continue; - - subtype = fz_dict_gets(xobjdict, "Subtype2"); - if (!strcmp(fz_to_name(subtype), "PS")) - continue; - - group = fz_dict_gets(xobjdict, "Group"); - groupsubtype = fz_dict_gets(group, "S"); - reference = fz_dict_gets(xobjdict, "Ref"); - - for (k = 0; k < forms; k++) - if (!fz_objcmp(form[k].u.form.obj, xobjdict)) - break; - - if (k < forms) - continue; - - forms++; - - form = fz_resize_array(ctx, form, forms, sizeof(struct info)); - form[forms - 1].page = page; - form[forms - 1].pageref = pageref; - form[forms - 1].pageobj = pageobj; - form[forms - 1].u.form.obj = xobjdict; - form[forms - 1].u.form.groupsubtype = groupsubtype; - form[forms - 1].u.form.reference = reference; - } -} - -static void -gatherpsobjs(int page, fz_obj *pageref, fz_obj *pageobj, fz_obj *dict) -{ - int i, n; - - n = fz_dict_len(dict); - for (i = 0; i < n; i++) - { - fz_obj *xobjdict; - fz_obj *type; - fz_obj *subtype; - int k; - - xobjdict = fz_dict_get_val(dict, i); - if (!fz_is_dict(xobjdict)) - { - fz_warn(ctx, "not a xobject dict (%d %d R)", fz_to_num(xobjdict), fz_to_gen(xobjdict)); - continue; - } - - type = fz_dict_gets(xobjdict, "Subtype"); - subtype = fz_dict_gets(xobjdict, "Subtype2"); - if (strcmp(fz_to_name(type), "PS") && - (strcmp(fz_to_name(type), "Form") || strcmp(fz_to_name(subtype), "PS"))) - continue; - - for (k = 0; k < psobjs; k++) - if (!fz_objcmp(psobj[k].u.form.obj, xobjdict)) - break; - - if (k < psobjs) - continue; - - psobjs++; - - psobj = fz_resize_array(ctx, psobj, psobjs, sizeof(struct info)); - psobj[psobjs - 1].page = page; - psobj[psobjs - 1].pageref = pageref; - psobj[psobjs - 1].pageobj = pageobj; - psobj[psobjs - 1].u.form.obj = xobjdict; - } -} - -static void -gathershadings(int page, fz_obj *pageref, fz_obj *pageobj, fz_obj *dict) -{ - int i, n; - - n = fz_dict_len(dict); - for (i = 0; i < n; i++) - { - fz_obj *shade; - fz_obj *type; - int k; - - shade = fz_dict_get_val(dict, i); - if (!fz_is_dict(shade)) - { - fz_warn(ctx, "not a shading dict (%d %d R)", fz_to_num(shade), fz_to_gen(shade)); - continue; - } - - type = fz_dict_gets(shade, "ShadingType"); - if (!fz_is_int(type) || fz_to_int(type) < 1 || fz_to_int(type) > 7) - { - fz_warn(ctx, "not a shading type (%d %d R)", fz_to_num(shade), fz_to_gen(shade)); - type = NULL; - } - - for (k = 0; k < shadings; k++) - if (!fz_objcmp(shading[k].u.shading.obj, shade)) - break; - - if (k < shadings) - continue; - - shadings++; - - shading = fz_resize_array(ctx, shading, shadings, sizeof(struct info)); - shading[shadings - 1].page = page; - shading[shadings - 1].pageref = pageref; - shading[shadings - 1].pageobj = pageobj; - shading[shadings - 1].u.shading.obj = shade; - shading[shadings - 1].u.shading.type = type; - } -} - -static void -gatherpatterns(int page, fz_obj *pageref, fz_obj *pageobj, fz_obj *dict) -{ - int i, n; - - n = fz_dict_len(dict); - for (i = 0; i < n; i++) - { - fz_obj *patterndict; - fz_obj *type; - fz_obj *paint = NULL; - fz_obj *tiling = NULL; - fz_obj *shading = NULL; - int k; - - patterndict = fz_dict_get_val(dict, i); - if (!fz_is_dict(patterndict)) - { - fz_warn(ctx, "not a pattern dict (%d %d R)", fz_to_num(patterndict), fz_to_gen(patterndict)); - continue; - } - - type = fz_dict_gets(patterndict, "PatternType"); - if (!fz_is_int(type) || fz_to_int(type) < 1 || fz_to_int(type) > 2) - { - fz_warn(ctx, "not a pattern type (%d %d R)", fz_to_num(patterndict), fz_to_gen(patterndict)); - type = NULL; - } - - if (fz_to_int(type) == 1) - { - paint = fz_dict_gets(patterndict, "PaintType"); - if (!fz_is_int(paint) || fz_to_int(paint) < 1 || fz_to_int(paint) > 2) - { - fz_warn(ctx, "not a pattern paint type (%d %d R)", fz_to_num(patterndict), fz_to_gen(patterndict)); - paint = NULL; - } - - tiling = fz_dict_gets(patterndict, "TilingType"); - if (!fz_is_int(tiling) || fz_to_int(tiling) < 1 || fz_to_int(tiling) > 3) - { - fz_warn(ctx, "not a pattern tiling type (%d %d R)", fz_to_num(patterndict), fz_to_gen(patterndict)); - tiling = NULL; - } - } - else - { - shading = fz_dict_gets(patterndict, "Shading"); - } - - for (k = 0; k < patterns; k++) - if (!fz_objcmp(pattern[k].u.pattern.obj, patterndict)) - break; - - if (k < patterns) - continue; - - patterns++; - - pattern = fz_resize_array(ctx, pattern, patterns, sizeof(struct info)); - pattern[patterns - 1].page = page; - pattern[patterns - 1].pageref = pageref; - pattern[patterns - 1].pageobj = pageobj; - pattern[patterns - 1].u.pattern.obj = patterndict; - pattern[patterns - 1].u.pattern.type = type; - pattern[patterns - 1].u.pattern.paint = paint; - pattern[patterns - 1].u.pattern.tiling = tiling; - pattern[patterns - 1].u.pattern.shading = shading; - } -} - -static void -gatherresourceinfo(int page, fz_obj *rsrc) -{ - fz_obj *pageobj; - fz_obj *pageref; - fz_obj *font; - fz_obj *xobj; - fz_obj *shade; - fz_obj *pattern; - fz_obj *subrsrc; - int i; - - pageobj = xref->page_objs[page-1]; - pageref = xref->page_refs[page-1]; - - if (!pageobj) - fz_throw(ctx, "cannot retrieve info from page %d", page); - - font = fz_dict_gets(rsrc, "Font"); - if (font) - { - int n; - - gatherfonts(page, pageref, pageobj, font); - n = fz_dict_len(font); - for (i = 0; i < n; i++) - { - fz_obj *obj = fz_dict_get_val(font, i); - - subrsrc = fz_dict_gets(obj, "Resources"); - if (subrsrc && fz_objcmp(rsrc, subrsrc)) - gatherresourceinfo(page, subrsrc); - } - } - - xobj = fz_dict_gets(rsrc, "XObject"); - if (xobj) - { - int n; - - gatherimages(page, pageref, pageobj, xobj); - gatherforms(page, pageref, pageobj, xobj); - gatherpsobjs(page, pageref, pageobj, xobj); - n = fz_dict_len(xobj); - for (i = 0; i < n; i++) - { - fz_obj *obj = fz_dict_get_val(xobj, i); - subrsrc = fz_dict_gets(obj, "Resources"); - if (subrsrc && fz_objcmp(rsrc, subrsrc)) - gatherresourceinfo(page, subrsrc); - } - } - - shade = fz_dict_gets(rsrc, "Shading"); - if (shade) - gathershadings(page, pageref, pageobj, shade); - - pattern = fz_dict_gets(rsrc, "Pattern"); - if (pattern) - { - int n; - gatherpatterns(page, pageref, pageobj, pattern); - n = fz_dict_len(pattern); - for (i = 0; i < n; i++) - { - fz_obj *obj = fz_dict_get_val(pattern, i); - subrsrc = fz_dict_gets(obj, "Resources"); - if (subrsrc && fz_objcmp(rsrc, subrsrc)) - gatherresourceinfo(page, subrsrc); - } - } -} - -static void -gatherpageinfo(int page) -{ - fz_obj *pageobj; - fz_obj *pageref; - fz_obj *rsrc; - - pageobj = xref->page_objs[page-1]; - pageref = xref->page_refs[page-1]; - - if (!pageobj) - fz_throw(ctx, "cannot retrieve info from page %d", page); - - gatherdimensions(page, pageref, pageobj); - - rsrc = fz_dict_gets(pageobj, "Resources"); - gatherresourceinfo(page, rsrc); -} - -static void -printinfo(char *filename, int show, int page) -{ - int i; - int j; - -#define PAGE_FMT "\t% 5d (% 7d %1d R): " - - if (show & DIMENSIONS && dims > 0) - { - printf("Mediaboxes (%d):\n", dims); - for (i = 0; i < dims; i++) - { - printf(PAGE_FMT "[ %g %g %g %g ]\n", - dim[i].page, - fz_to_num(dim[i].pageref), fz_to_gen(dim[i].pageref), - dim[i].u.dim.bbox->x0, - dim[i].u.dim.bbox->y0, - dim[i].u.dim.bbox->x1, - dim[i].u.dim.bbox->y1); - } - printf("\n"); - } - - if (show & FONTS && fonts > 0) - { - printf("Fonts (%d):\n", fonts); - for (i = 0; i < fonts; i++) - { - printf(PAGE_FMT "%s '%s' (%d %d R)\n", - font[i].page, - fz_to_num(font[i].pageref), fz_to_gen(font[i].pageref), - fz_to_name(font[i].u.font.subtype), - fz_to_name(font[i].u.font.name), - fz_to_num(font[i].u.font.obj), fz_to_gen(font[i].u.font.obj)); - } - printf("\n"); - } - - if (show & IMAGES && images > 0) - { - printf("Images (%d):\n", images); - for (i = 0; i < images; i++) - { - char *cs = NULL; - char *altcs = NULL; - - printf(PAGE_FMT "[ ", - image[i].page, - fz_to_num(image[i].pageref), fz_to_gen(image[i].pageref)); - - if (fz_is_array(image[i].u.image.filter)) - { - int n = fz_array_len(image[i].u.image.filter); - for (j = 0; j < n; j++) - { - fz_obj *obj = fz_array_get(image[i].u.image.filter, j); - char *filter = fz_strdup(ctx, fz_to_name(obj)); - - if (strstr(filter, "Decode")) - *(strstr(filter, "Decode")) = '\0'; - - printf("%s%s", - filter, - j == fz_array_len(image[i].u.image.filter) - 1 ? "" : " "); - fz_free(ctx, filter); - } - } - else if (image[i].u.image.filter) - { - fz_obj *obj = image[i].u.image.filter; - char *filter = fz_strdup(ctx, fz_to_name(obj)); - - if (strstr(filter, "Decode")) - *(strstr(filter, "Decode")) = '\0'; - - printf("%s", filter); - fz_free(ctx, filter); - } - else - printf("Raw"); - - if (image[i].u.image.cs) - { - cs = fz_strdup(ctx, fz_to_name(image[i].u.image.cs)); - - if (!strncmp(cs, "Device", 6)) - { - int len = strlen(cs + 6); - memmove(cs + 3, cs + 6, len + 1); - cs[3 + len + 1] = '\0'; - } - if (strstr(cs, "ICC")) - fz_strlcpy(cs, "ICC", 4); - if (strstr(cs, "Indexed")) - fz_strlcpy(cs, "Idx", 4); - if (strstr(cs, "Pattern")) - fz_strlcpy(cs, "Pat", 4); - if (strstr(cs, "Separation")) - fz_strlcpy(cs, "Sep", 4); - } - if (image[i].u.image.altcs) - { - altcs = fz_strdup(ctx, fz_to_name(image[i].u.image.altcs)); - - if (!strncmp(altcs, "Device", 6)) - { - int len = strlen(altcs + 6); - memmove(altcs + 3, altcs + 6, len + 1); - altcs[3 + len + 1] = '\0'; - } - if (strstr(altcs, "ICC")) - fz_strlcpy(altcs, "ICC", 4); - if (strstr(altcs, "Indexed")) - fz_strlcpy(altcs, "Idx", 4); - if (strstr(altcs, "Pattern")) - fz_strlcpy(altcs, "Pat", 4); - if (strstr(altcs, "Separation")) - fz_strlcpy(altcs, "Sep", 4); - } - - printf(" ] %dx%d %dbpc %s%s%s (%d %d R)\n", - fz_to_int(image[i].u.image.width), - fz_to_int(image[i].u.image.height), - image[i].u.image.bpc ? fz_to_int(image[i].u.image.bpc) : 1, - image[i].u.image.cs ? cs : "ImageMask", - image[i].u.image.altcs ? " " : "", - image[i].u.image.altcs ? altcs : "", - fz_to_num(image[i].u.image.obj), fz_to_gen(image[i].u.image.obj)); - - fz_free(ctx, cs); - fz_free(ctx, altcs); - } - printf("\n"); - } - - if (show & SHADINGS && shadings > 0) - { - printf("Shading patterns (%d):\n", shadings); - for (i = 0; i < shadings; i++) - { - char *shadingtype[] = - { - "", - "Function", - "Axial", - "Radial", - "Triangle mesh", - "Lattice", - "Coons patch", - "Tensor patch", - }; - - printf(PAGE_FMT "%s (%d %d R)\n", - shading[i].page, - fz_to_num(shading[i].pageref), fz_to_gen(shading[i].pageref), - shadingtype[fz_to_int(shading[i].u.shading.type)], - fz_to_num(shading[i].u.shading.obj), fz_to_gen(shading[i].u.shading.obj)); - } - printf("\n"); - } - - if (show & PATTERNS && patterns > 0) - { - printf("Patterns (%d):\n", patterns); - for (i = 0; i < patterns; i++) - { - if (fz_to_int(pattern[i].u.pattern.type) == 1) - { - char *painttype[] = - { - "", - "Colored", - "Uncolored", - }; - char *tilingtype[] = - { - "", - "Constant", - "No distortion", - "Constant/fast tiling", - }; - - printf(PAGE_FMT "Tiling %s %s (%d %d R)\n", - pattern[i].page, - fz_to_num(pattern[i].pageref), fz_to_gen(pattern[i].pageref), - painttype[fz_to_int(pattern[i].u.pattern.paint)], - tilingtype[fz_to_int(pattern[i].u.pattern.tiling)], - fz_to_num(pattern[i].u.pattern.obj), fz_to_gen(pattern[i].u.pattern.obj)); - } - else - { - printf(PAGE_FMT "Shading %d %d R (%d %d R)\n", - pattern[i].page, - fz_to_num(pattern[i].pageref), fz_to_gen(pattern[i].pageref), - fz_to_num(pattern[i].u.pattern.shading), fz_to_gen(pattern[i].u.pattern.shading), - fz_to_num(pattern[i].u.pattern.obj), fz_to_gen(pattern[i].u.pattern.obj)); - } - } - printf("\n"); - } - - if (show & XOBJS && forms > 0) - { - printf("Form xobjects (%d):\n", forms); - for (i = 0; i < forms; i++) - { - printf(PAGE_FMT "Form%s%s%s%s (%d %d R)\n", - form[i].page, - fz_to_num(form[i].pageref), fz_to_gen(form[i].pageref), - form[i].u.form.groupsubtype ? " " : "", - form[i].u.form.groupsubtype ? fz_to_name(form[i].u.form.groupsubtype) : "", - form[i].u.form.groupsubtype ? " Group" : "", - form[i].u.form.reference ? " Reference" : "", - fz_to_num(form[i].u.form.obj), fz_to_gen(form[i].u.form.obj)); - } - printf("\n"); - } - - if (show & XOBJS && psobjs > 0) - { - printf("Postscript xobjects (%d):\n", psobjs); - for (i = 0; i < psobjs; i++) - { - printf(PAGE_FMT "(%d %d R)\n", - psobj[i].page, - fz_to_num(psobj[i].pageref), fz_to_gen(psobj[i].pageref), - fz_to_num(psobj[i].u.form.obj), fz_to_gen(psobj[i].u.form.obj)); - } - printf("\n"); - } -} - -static void -showinfo(char *filename, int show, char *pagelist) -{ - int page, spage, epage; - char *spec, *dash; - int allpages; - - if (!xref) - infousage(); - - allpages = !strcmp(pagelist, "1-"); - - spec = fz_strsep(&pagelist, ","); - while (spec) - { - dash = strchr(spec, '-'); - - if (dash == spec) - spage = epage = pagecount; - else - spage = epage = atoi(spec); - - if (dash) - { - if (strlen(dash) > 1) - epage = atoi(dash + 1); - else - epage = pagecount; - } - - if (spage > epage) - page = spage, spage = epage, epage = page; - - if (spage < 1) - spage = 1; - if (epage > pagecount) - epage = pagecount; - if (spage > pagecount) - spage = pagecount; - - if (allpages) - printf("Retrieving info from pages %d-%d...\n", spage, epage); - if (spage >= 1) - { - for (page = spage; page <= epage; page++) - { - gatherpageinfo(page); - if (!allpages) - { - printf("Page %d:\n", page); - printinfo(filename, show, page); - printf("\n"); - } - } - } - - spec = fz_strsep(&pagelist, ","); - } - - if (allpages) - printinfo(filename, show, -1); -} - -#ifdef MUPDF_COMBINED_EXE -int pdfinfo_main(int argc, char **argv) -#else -int main(int argc, char **argv) -#endif -{ - enum { NO_FILE_OPENED, NO_INFO_GATHERED, INFO_SHOWN } state; - char *filename = ""; - char *password = ""; - int show = ALL; - int c; - - while ((c = fz_getopt(argc, argv, "mfispxd:")) != -1) - { - switch (c) - { - case 'm': if (show == ALL) show = DIMENSIONS; else show |= DIMENSIONS; break; - case 'f': if (show == ALL) show = FONTS; else show |= FONTS; break; - case 'i': if (show == ALL) show = IMAGES; else show |= IMAGES; break; - case 's': if (show == ALL) show = SHADINGS; else show |= SHADINGS; break; - case 'p': if (show == ALL) show = PATTERNS; else show |= PATTERNS; break; - case 'x': if (show == ALL) show = XOBJS; else show |= XOBJS; break; - case 'd': password = fz_optarg; break; - default: - infousage(); - break; - } - } - - if (fz_optind == argc) - infousage(); - - ctx = fz_new_context(NULL, FZ_STORE_UNLIMITED); - if (!ctx) - { - fprintf(stderr, "cannot initialise context\n"); - exit(1); - } - - state = NO_FILE_OPENED; - while (fz_optind < argc) - { - if (strstr(argv[fz_optind], ".pdf") || strstr(argv[fz_optind], ".PDF")) - { - if (state == NO_INFO_GATHERED) - { - showinfo(filename, show, "1-"); - closexref(); - } - - closexref(); - - filename = argv[fz_optind]; - printf("%s:\n", filename); - xref = pdf_open_document(ctx, filename); - if (pdf_needs_password(xref)) - if (!pdf_authenticate_password(xref, password)) - fz_throw(ctx, "cannot authenticate password: %s\n", filename); - pagecount = pdf_count_pages(xref); - - showglobalinfo(); - state = NO_INFO_GATHERED; - } - else - { - showinfo(filename, show, argv[fz_optind]); - state = INFO_SHOWN; - } - - fz_optind++; - } - - if (state == NO_INFO_GATHERED) - showinfo(filename, show, "1-"); - - closexref(); - fz_free_context(ctx); - return 0; -} diff --git a/apps/pdfshow.c b/apps/pdfshow.c deleted file mode 100644 index 53578fd7..00000000 --- a/apps/pdfshow.c +++ /dev/null @@ -1,239 +0,0 @@ -/* - * pdfshow -- the ultimate pdf debugging tool - */ - -#include "fitz.h" -#include "mupdf.h" - -static pdf_document *doc = NULL; -static fz_context *ctx = NULL; -static int showbinary = 0; -static int showdecode = 1; -static int showcolumn; - -static void usage(void) -{ - fprintf(stderr, "usage: pdfshow [options] file.pdf [grepable] [xref] [trailer] [pagetree] [object numbers]\n"); - fprintf(stderr, "\t-b\tprint streams as binary data\n"); - fprintf(stderr, "\t-e\tprint encoded streams (don't decode)\n"); - fprintf(stderr, "\t-p\tpassword\n"); - exit(1); -} - -static void showtrailer(void) -{ - if (!doc) - fz_throw(ctx, "no file specified"); - printf("trailer\n"); - fz_debug_obj(doc->trailer); - printf("\n"); -} - -static void showxref(void) -{ - if (!doc) - fz_throw(ctx, "no file specified"); - pdf_debug_xref(doc); - printf("\n"); -} - -static void showpagetree(void) -{ - fz_obj *ref; - int count; - int i; - - if (!doc) - fz_throw(ctx, "no file specified"); - - count = pdf_count_pages(doc); - for (i = 0; i < count; i++) - { - ref = doc->page_refs[i]; - printf("page %d = %d %d R\n", i + 1, fz_to_num(ref), fz_to_gen(ref)); - } - printf("\n"); -} - -static void showsafe(unsigned char *buf, int n) -{ - int i; - for (i = 0; i < n; i++) { - if (buf[i] == '\r' || buf[i] == '\n') { - putchar('\n'); - showcolumn = 0; - } - else if (buf[i] < 32 || buf[i] > 126) { - putchar('.'); - showcolumn ++; - } - else { - putchar(buf[i]); - showcolumn ++; - } - if (showcolumn == 79) { - putchar('\n'); - showcolumn = 0; - } - } -} - -static void showstream(int num, int gen) -{ - fz_stream *stm; - unsigned char buf[2048]; - int n; - - showcolumn = 0; - - if (showdecode) - stm = pdf_open_stream(doc, num, gen); - else - stm = pdf_open_raw_stream(doc, num, gen); - - while (1) - { - n = fz_read(stm, buf, sizeof buf); - if (n == 0) - break; - if (showbinary) - fwrite(buf, 1, n, stdout); - else - showsafe(buf, n); - } - - fz_close(stm); -} - -static void showobject(int num, int gen) -{ - fz_obj *obj; - - if (!doc) - fz_throw(ctx, "no file specified"); - - obj = pdf_load_object(doc, num, gen); - - if (pdf_is_stream(doc, num, gen)) - { - if (showbinary) - { - showstream(num, gen); - } - else - { - printf("%d %d obj\n", num, gen); - fz_debug_obj(obj); - printf("stream\n"); - showstream(num, gen); - printf("endstream\n"); - printf("endobj\n\n"); - } - } - else - { - printf("%d %d obj\n", num, gen); - fz_debug_obj(obj); - printf("endobj\n\n"); - } - - fz_drop_obj(obj); -} - -static void showgrep(char *filename) -{ - fz_obj *obj; - int i; - - for (i = 0; i < doc->len; i++) - { - if (doc->table[i].type == 'n' || doc->table[i].type == 'o') - { - fz_try(ctx) - { - obj = pdf_load_object(doc, i, 0); - } - fz_catch(ctx) - { - fz_warn(ctx, "skipping object (%d 0 R)", i); - continue; - } - - fz_sort_dict(obj); - - printf("%s:%d: ", filename, i); - fz_fprint_obj(stdout, obj, 1); - - fz_drop_obj(obj); - } - } - - printf("%s:trailer: ", filename); - fz_fprint_obj(stdout, doc->trailer, 1); -} - -#ifdef MUPDF_COMBINED_EXE -int pdfshow_main(int argc, char **argv) -#else -int main(int argc, char **argv) -#endif -{ - char *password = NULL; /* don't throw errors if encrypted */ - char *filename; - int c; - - while ((c = fz_getopt(argc, argv, "p:be")) != -1) - { - switch (c) - { - case 'p': password = fz_optarg; break; - case 'b': showbinary = 1; break; - case 'e': showdecode = 0; break; - default: usage(); break; - } - } - - if (fz_optind == argc) - usage(); - - filename = argv[fz_optind++]; - - ctx = fz_new_context(NULL, FZ_STORE_UNLIMITED); - if (!ctx) - { - fprintf(stderr, "cannot initialise context\n"); - exit(1); - } - - fz_var(doc); - fz_try(ctx) - { - doc = pdf_open_document(ctx, filename); - if (pdf_needs_password(doc)) - if (!pdf_authenticate_password(doc, password)) - fz_throw(ctx, "cannot authenticate password: %s", filename); - - if (fz_optind == argc) - showtrailer(); - - while (fz_optind < argc) - { - switch (argv[fz_optind][0]) - { - case 't': showtrailer(); break; - case 'x': showxref(); break; - case 'p': showpagetree(); break; - case 'g': showgrep(filename); break; - default: showobject(atoi(argv[fz_optind]), 0); break; - } - fz_optind++; - } - } - fz_catch(ctx) - { - } - - pdf_close_document(doc); - fz_free_context(ctx); - return 0; -} diff --git a/apps/xpsdraw.c b/apps/xpsdraw.c deleted file mode 100644 index 8fec8efa..00000000 --- a/apps/xpsdraw.c +++ /dev/null @@ -1,379 +0,0 @@ -#include "fitz.h" -#include "muxps.h" - -#ifdef _MSC_VER -#include -#else -#include -#endif - -char *output = NULL; -float resolution = 72; - -int showxml = 0; -int showtext = 0; -int showtime = 0; -int showmd5 = 0; -int showoutline = 0; -int savealpha = 0; -int uselist = 1; - -fz_colorspace *colorspace; -char *filename; -fz_context *ctx; - -struct { - int count, total; - int min, max; - int minpage, maxpage; -} timing; - -static void usage(void) -{ - fprintf(stderr, - "usage: xpsdraw [options] input.xps [pages]\n" - "\t-o -\toutput filename (%%d for page number)\n" - "\t\tsupported formats: pgm, ppm, pam, png\n" - "\t-r -\tresolution in dpi (default: 72)\n" - "\t-a\tsave alpha channel (only pam and png)\n" - "\t-g\trender in grayscale\n" - "\t-m\tshow timing information\n" - "\t-t\tshow text (-tt for xml)\n" - "\t-x\tshow display list\n" - "\t-d\tdisable use of display list\n" - "\t-5\tshow md5 checksums\n" - "\t-l\tprint outline\n" - "\tpages\tcomma separated list of ranges\n"); - exit(1); -} - -static int gettime(void) -{ - static struct timeval first; - static int once = 1; - struct timeval now; - if (once) - { - gettimeofday(&first, NULL); - once = 0; - } - gettimeofday(&now, NULL); - return (now.tv_sec - first.tv_sec) * 1000 + (now.tv_usec - first.tv_usec) / 1000; -} - -static int isrange(char *s) -{ - while (*s) - { - if ((*s < '0' || *s > '9') && *s != '-' && *s != ',') - return 0; - s++; - } - return 1; -} - -static void drawpage(xps_document *doc, int pagenum) -{ - xps_page *page; - fz_display_list *list; - fz_device *dev; - int start; - - if (showtime) - { - start = gettime(); - } - - page = xps_load_page(doc, pagenum - 1); - - list = NULL; - - if (uselist) - { - list = fz_new_display_list(doc->ctx); - dev = fz_new_list_device(doc->ctx, list); - xps_run_page(doc, page, dev, fz_identity, NULL); - fz_free_device(dev); - } - - if (showxml) - { - dev = fz_new_trace_device(doc->ctx); - printf("\n", pagenum); - if (list) - fz_execute_display_list(list, dev, fz_identity, fz_infinite_bbox, NULL); - else - xps_run_page(doc, page, dev, fz_identity, NULL); - printf("\n"); - fz_free_device(dev); - } - - if (showtext) - { - fz_text_span *text = fz_new_text_span(doc->ctx); - dev = fz_new_text_device(doc->ctx, text); - if (list) - fz_execute_display_list(list, dev, fz_identity, fz_infinite_bbox, NULL); - else - xps_run_page(doc, page, dev, fz_identity, NULL); - fz_free_device(dev); - printf("[Page %d]\n", pagenum); - if (showtext > 1) - fz_debug_text_span_xml(text); - else - fz_debug_text_span(text); - printf("\n"); - fz_free_text_span(doc->ctx, text); - } - - if (showmd5 || showtime) - printf("page %s %d", filename, pagenum); - - if (output || showmd5 || showtime) - { - float zoom; - fz_matrix ctm; - fz_rect rect; - fz_bbox bbox; - fz_pixmap *pix; - - rect = xps_bound_page(doc, page); - zoom = resolution / 72; - ctm = fz_scale(zoom, zoom); - bbox = fz_round_rect(fz_transform_rect(ctm, rect)); - - /* TODO: banded rendering and multi-page ppm */ - - pix = fz_new_pixmap_with_rect(doc->ctx, colorspace, bbox); - - if (savealpha) - fz_clear_pixmap(pix); - else - fz_clear_pixmap_with_color(pix, 255); - - dev = fz_new_draw_device(doc->ctx, pix); - if (list) - fz_execute_display_list(list, dev, ctm, bbox, NULL); - else - xps_run_page(doc, page, dev, ctm, NULL); - fz_free_device(dev); - - if (output) - { - char buf[512]; - sprintf(buf, output, pagenum); - if (strstr(output, ".pgm") || strstr(output, ".ppm") || strstr(output, ".pnm")) - fz_write_pnm(doc->ctx, pix, buf); - else if (strstr(output, ".pam")) - fz_write_pam(doc->ctx, pix, buf, savealpha); - else if (strstr(output, ".png")) - fz_write_png(doc->ctx, pix, buf, savealpha); - } - - if (showmd5) - { - fz_md5 md5; - unsigned char digest[16]; - int i; - - fz_md5_init(&md5); - fz_md5_update(&md5, pix->samples, pix->w * pix->h * pix->n); - fz_md5_final(&md5, digest); - - printf(" "); - for (i = 0; i < 16; i++) - printf("%02x", digest[i]); - } - - fz_drop_pixmap(doc->ctx, pix); - } - - if (list) - fz_free_display_list(doc->ctx, list); - - if (showtime) - { - int end = gettime(); - int diff = end - start; - - if (diff < timing.min) - { - timing.min = diff; - timing.minpage = pagenum; - } - if (diff > timing.max) - { - timing.max = diff; - timing.maxpage = pagenum; - } - timing.total += diff; - timing.count ++; - - printf(" %dms", diff); - } - - if (showmd5 || showtime) - printf("\n"); -} - -static void drawrange(xps_document *doc, char *range) -{ - int page, spage, epage; - char *spec, *dash; - - spec = fz_strsep(&range, ","); - while (spec) - { - dash = strchr(spec, '-'); - - if (dash == spec) - spage = epage = xps_count_pages(doc); - else - spage = epage = atoi(spec); - - if (dash) - { - if (strlen(dash) > 1) - epage = atoi(dash + 1); - else - epage = xps_count_pages(doc); - } - - spage = CLAMP(spage, 1, xps_count_pages(doc)); - epage = CLAMP(epage, 1, xps_count_pages(doc)); - - if (spage < epage) - for (page = spage; page <= epage; page++) - drawpage(doc, page); - else - for (page = spage; page >= epage; page--) - drawpage(doc, page); - - spec = fz_strsep(&range, ","); - } -} - -static void drawoutline(xps_document *doc) -{ - fz_outline *outline = xps_load_outline(doc); - if (showoutline > 1) - fz_debug_outline_xml(outline, 0); - else - fz_debug_outline(outline, 0); - fz_free_outline(outline); -} - -#ifdef MUPDF_COMBINED_EXE -int xpsdraw_main(int argc, char **argv) -#else -int main(int argc, char **argv) -#endif -{ - int grayscale = 0; - int accelerate = 1; - xps_document *doc = NULL; - int c; - - fz_var(doc); - - while ((c = fz_getopt(argc, argv, "o:p:r:Aadglmtx5")) != -1) - { - switch (c) - { - case 'o': output = fz_optarg; break; - case 'r': resolution = atof(fz_optarg); break; - case 'A': accelerate = 0; break; - case 'a': savealpha = 1; break; - case 'l': showoutline++; break; - case 'm': showtime++; break; - case 't': showtext++; break; - case 'x': showxml++; break; - case '5': showmd5++; break; - case 'g': grayscale++; break; - case 'd': uselist = 0; break; - default: usage(); break; - } - } - - if (fz_optind == argc) - usage(); - - if (!showtext && !showxml && !showtime && !showmd5 && !showoutline && !output) - { - printf("nothing to do\n"); - exit(0); - } - - if (accelerate) - fz_accelerate(); - - ctx = fz_new_context(NULL, FZ_STORE_DEFAULT); - if (!ctx) - { - fprintf(stderr, "cannot initialise context\n"); - exit(1); - } - - colorspace = fz_device_rgb; - if (grayscale) - colorspace = fz_device_gray; - if (output && strstr(output, ".pgm")) - colorspace = fz_device_gray; - if (output && strstr(output, ".ppm")) - colorspace = fz_device_rgb; - - timing.count = 0; - timing.total = 0; - timing.min = 1 << 30; - timing.max = 0; - timing.minpage = 0; - timing.maxpage = 0; - - if (showxml) - printf("\n"); - - while (fz_optind < argc) - { - filename = argv[fz_optind++]; - - fz_try(ctx) - { - doc = xps_open_document(ctx, filename); - - if (showxml) - printf("\n", filename); - - if (showoutline) - drawoutline(doc); - - if (showtext || showxml || showtime || showmd5 || output) - { - if (fz_optind == argc || !isrange(argv[fz_optind])) - drawrange(doc, "1-"); - if (fz_optind < argc && isrange(argv[fz_optind])) - drawrange(doc, argv[fz_optind++]); - } - - if (showxml) - printf("\n"); - - xps_close_document(doc); - } - fz_catch(ctx) - { - xps_close_document(doc); - } - } - - if (showtime) - { - printf("total %dms / %d pages for an average of %dms\n", - timing.total, timing.count, timing.total / timing.count); - printf("fastest page %d: %dms\n", timing.minpage, timing.min); - printf("slowest page %d: %dms\n", timing.maxpage, timing.max); - } - - fz_free_context(ctx); - - return 0; -} -- cgit v1.2.3