From ae5749c4139982079bd35698a3c3c23e4ec9147e Mon Sep 17 00:00:00 2001 From: Tor Andersson Date: Tue, 2 Oct 2012 16:52:45 +0200 Subject: Rename mubusy to mutool. --- Makefile | 10 +- apps/man/mubusy.1 | 77 ---- apps/man/mutool.1 | 77 ++++ apps/mubusy.c | 83 ----- apps/mupdfclean.c | 233 ------------ apps/mupdfextract.c | 198 ---------- apps/mupdfinfo.c | 1033 --------------------------------------------------- apps/mupdfposter.c | 184 --------- apps/mupdfshow.c | 235 ------------ apps/mutool.c | 83 +++++ apps/pdfclean.c | 233 ++++++++++++ apps/pdfextract.c | 198 ++++++++++ apps/pdfinfo.c | 1033 +++++++++++++++++++++++++++++++++++++++++++++++++++ apps/pdfposter.c | 184 +++++++++ apps/pdfshow.c | 235 ++++++++++++ win32/mubusy.vcproj | 266 ------------- win32/mupdf.sln | 2 +- win32/mutool.vcproj | 266 +++++++++++++ 18 files changed, 2315 insertions(+), 2315 deletions(-) delete mode 100644 apps/man/mubusy.1 create mode 100644 apps/man/mutool.1 delete mode 100644 apps/mubusy.c delete mode 100644 apps/mupdfclean.c delete mode 100644 apps/mupdfextract.c delete mode 100644 apps/mupdfinfo.c delete mode 100644 apps/mupdfposter.c delete mode 100644 apps/mupdfshow.c create mode 100644 apps/mutool.c create mode 100644 apps/pdfclean.c create mode 100644 apps/pdfextract.c create mode 100644 apps/pdfinfo.c create mode 100644 apps/pdfposter.c create mode 100644 apps/pdfshow.c delete mode 100644 win32/mubusy.vcproj create mode 100644 win32/mutool.vcproj diff --git a/Makefile b/Makefile index 6fd0b23e..10b2e5f6 100644 --- a/Makefile +++ b/Makefile @@ -164,8 +164,8 @@ $(OUT)/cmapdump.o : pdf/pdf_cmap.c pdf/pdf_cmap_parse.c MUDRAW := $(addprefix $(OUT)/, mudraw) $(MUDRAW) : $(FITZ_LIB) $(THIRD_LIBS) -MUBUSY := $(addprefix $(OUT)/, mubusy) -$(MUBUSY) : $(addprefix $(OUT)/, mupdfclean.o mupdfextract.o mupdfinfo.o mupdfposter.o mupdfshow.o) $(FITZ_LIB) $(THIRD_LIBS) +MUTOOL := $(addprefix $(OUT)/, mutool) +$(MUTOOL) : $(addprefix $(OUT)/, pdfclean.o pdfextract.o pdfinfo.o pdfposter.o pdfshow.o) $(FITZ_LIB) $(THIRD_LIBS) ifeq "$(NOX11)" "" MUVIEW := $(OUT)/mupdf @@ -208,18 +208,18 @@ libdir ?= $(prefix)/lib incdir ?= $(prefix)/include mandir ?= $(prefix)/share/man -install: $(FITZ_LIB) $(MUVIEW) $(MUDRAW) $(MUBUSY) +install: $(FITZ_LIB) $(MUVIEW) $(MUDRAW) $(MUTOOL) install -d $(bindir) $(libdir) $(incdir) $(mandir)/man1 install $(FITZ_LIB) $(libdir) install fitz/memento.h fitz/fitz.h pdf/mupdf.h xps/muxps.h cbz/mucbz.h $(incdir) - install $(MUVIEW) $(MUDRAW) $(MUBUSY) $(bindir) + install $(MUVIEW) $(MUDRAW) $(MUTOOL) $(bindir) install $(wildcard apps/man/*.1) $(mandir)/man1 # --- Clean and Default --- all: all-nojs $(JSTARGETS) -all-nojs: $(THIRD_LIBS) $(FITZ_LIB) $(MUVIEW) $(MUDRAW) $(MUBUSY) +all-nojs: $(THIRD_LIBS) $(FITZ_LIB) $(MUVIEW) $(MUDRAW) $(MUTOOL) third: $(THIRD_LIBS) diff --git a/apps/man/mubusy.1 b/apps/man/mubusy.1 deleted file mode 100644 index 435dba4d..00000000 --- a/apps/man/mubusy.1 +++ /dev/null @@ -1,77 +0,0 @@ -.TH "MUBUSY" "1" "May 10, 2012" -.\" Please adjust this date whenever revising the manpage. -.\" no hyphenation -.nh -.\" adjust left -.ad l -.SH NAME -mubusy \- all purpose tool for dealing with PDF files -.SH SYNOPSIS -mubusy [options] -.SH DESCRIPTION -mubusy is a tool based on MuPDF for dealing with PDF files in various manners. -There are several sub commands available, as described below. -.SH CLEAN -mubusy clean [options] input.pdf [output.pdf] [pages] -.PP -The clean command pretty prints and rewrites the syntax of a PDF file. -It can be used to repair broken files, expand compressed streams, filter -out a range of pages, etc. -.PP -If no output file is specified, it will write the cleaned PDF to "out.pdf" -in the current directory. -.TP -.B \-p password -Use the specified password if the file is encrypted. -.TP -.B \-g -Garbage collect objects that have no references from other objects. -Give the option twice to renumber all objects and compact the cross reference table. -Give it three times to merge and reuse duplicate objects. -.TP -.B \-d -Decompress streams. This will make the output file larger, but provides -easy access for reading and editing the contents with a text editor. -.TP -.B pages -Comma separated list of page ranges to include. -.SH EXTRACT -TODO -.SH INFO -TODO -.SH POSTER -TODO -.SH SHOW -mubusy show [options] file.pdf [object numbers ...] -.PP -The show command will print the specified objects and streams to stdout. -Streams are decoded and non-printable characters are represented -with a period by default. -.TP -.B \-b -Print streams as binary data and omit the object header. -.TP -.B \-e -Print streams in their original encoded (or compressed) form. -.TP -.B \-p password -Use the specified password if the file is encrypted. -.PP -Specify objects by number, or use one of the following special names: -.TP -.B 'xref' or 'x' -Print the cross reference table. -.TP -.B 'trailer' or 't' -Print the trailer dictionary. -.TP -.B 'pages' or 'p' -List the object numbers for every page. -.TP -.B 'grep' or 'g' -Print all the objects in the file in a compact one-line format suitable for piping to grep. -.SH SEE ALSO -.BR mupdf (1), -.BR mudraw (1). -.SH AUTHOR -MuPDF is Copyright 2006-2012 Artifex Software, Inc. diff --git a/apps/man/mutool.1 b/apps/man/mutool.1 new file mode 100644 index 00000000..a2b7d42c --- /dev/null +++ b/apps/man/mutool.1 @@ -0,0 +1,77 @@ +.TH "MUTOOL" "1" "Oct 02, 2012" +.\" Please adjust this date whenever revising the manpage. +.\" no hyphenation +.nh +.\" adjust left +.ad l +.SH NAME +mutool \- all purpose tool for dealing with PDF files +.SH SYNOPSIS +mutool [options] +.SH DESCRIPTION +mutool is a tool based on MuPDF for dealing with PDF files in various manners. +There are several sub commands available, as described below. +.SH CLEAN +mutool clean [options] input.pdf [output.pdf] [pages] +.PP +The clean command pretty prints and rewrites the syntax of a PDF file. +It can be used to repair broken files, expand compressed streams, filter +out a range of pages, etc. +.PP +If no output file is specified, it will write the cleaned PDF to "out.pdf" +in the current directory. +.TP +.B \-p password +Use the specified password if the file is encrypted. +.TP +.B \-g +Garbage collect objects that have no references from other objects. +Give the option twice to renumber all objects and compact the cross reference table. +Give it three times to merge and reuse duplicate objects. +.TP +.B \-d +Decompress streams. This will make the output file larger, but provides +easy access for reading and editing the contents with a text editor. +.TP +.B pages +Comma separated list of page ranges to include. +.SH EXTRACT +TODO +.SH INFO +TODO +.SH POSTER +TODO +.SH SHOW +mutool show [options] file.pdf [object numbers ...] +.PP +The show command will print the specified objects and streams to stdout. +Streams are decoded and non-printable characters are represented +with a period by default. +.TP +.B \-b +Print streams as binary data and omit the object header. +.TP +.B \-e +Print streams in their original encoded (or compressed) form. +.TP +.B \-p password +Use the specified password if the file is encrypted. +.PP +Specify objects by number, or use one of the following special names: +.TP +.B 'xref' or 'x' +Print the cross reference table. +.TP +.B 'trailer' or 't' +Print the trailer dictionary. +.TP +.B 'pages' or 'p' +List the object numbers for every page. +.TP +.B 'grep' or 'g' +Print all the objects in the file in a compact one-line format suitable for piping to grep. +.SH SEE ALSO +.BR mupdf (1), +.BR mudraw (1). +.SH AUTHOR +MuPDF is Copyright 2006-2012 Artifex Software, Inc. diff --git a/apps/mubusy.c b/apps/mubusy.c deleted file mode 100644 index e11d488c..00000000 --- a/apps/mubusy.c +++ /dev/null @@ -1,83 +0,0 @@ -/* - * mubusy -- swiss army knife of pdf manipulation tools - */ - -#include -#include -#include - -#define nelem(x) (sizeof(x)/sizeof((x)[0])) - -int pdfclean_main(int argc, char *argv[]); -int pdfextract_main(int argc, char *argv[]); -int pdfinfo_main(int argc, char *argv[]); -int pdfposter_main(int argc, char *argv[]); -int pdfshow_main(int argc, char *argv[]); - -static struct { - int (*func)(int argc, char *argv[]); - char *name; - char *desc; -} tools[] = { - { pdfclean_main, "clean", "rewrite pdf file" }, - { pdfextract_main, "extract", "extract font and image resources" }, - { pdfinfo_main, "info", "show information about pdf resources" }, - { pdfposter_main, "poster", "split large page into many tiles" }, - { pdfshow_main, "show", "show internal pdf objects" }, -}; - -static int -namematch(const char *end, const char *start, const char *match) -{ - int len = strlen(match); - return ((end-len >= start) && (strncmp(end-len, match, len) == 0)); -} - -int main(int argc, char **argv) -{ - char *start, *end; - char buf[32]; - int i; - - if (argc == 0) - { - fprintf(stderr, "No command name found!\n"); - return 1; - } - - /* Check argv[0] */ - - if (argc > 0) - { - end = start = argv[0]; - while (*end) - end++; - if ((end-4 >= start) && (end[-4] == '.') && (end[-3] == 'e') && (end[-2] == 'x') && (end[-1] == 'e')) - end = end-4; - for (i = 0; i < nelem(tools); i++) - { - strcpy(buf, "mupdf"); - strcat(buf, tools[i].name); - if (namematch(end, start, buf) || namematch(end, start, buf+2)) - return tools[i].func(argc, argv); - } - } - - /* Check argv[1] */ - - if (argc > 1) - { - for (i = 0; i < nelem(tools); i++) - if (!strcmp(tools[i].name, argv[1])) - return tools[i].func(argc - 1, argv + 1); - } - - /* Print usage */ - - fprintf(stderr, "usage: mubusy [options]\n"); - - for (i = 0; i < nelem(tools); i++) - fprintf(stderr, "\t%s\t-- %s\n", tools[i].name, tools[i].desc); - - return 1; -} diff --git a/apps/mupdfclean.c b/apps/mupdfclean.c deleted file mode 100644 index fdf9b412..00000000 --- a/apps/mupdfclean.c +++ /dev/null @@ -1,233 +0,0 @@ -/* - * PDF cleaning tool: general purpose pdf syntax washer. - * - * Rewrite PDF with pretty printed objects. - * Garbage collect unreachable objects. - * Inflate compressed streams. - * Create subset documents. - * - * TODO: linearize document for fast web view - */ - -#include "fitz.h" -#include "mupdf-internal.h" - -static pdf_document *xref = NULL; -static fz_context *ctx = NULL; - -static void usage(void) -{ - fprintf(stderr, - "usage: mubusy clean [options] input.pdf [output.pdf] [pages]\n" - "\t-p -\tpassword\n" - "\t-g\tgarbage collect unused objects\n" - "\t-gg\tin addition to -g compact xref table\n" - "\t-ggg\tin addition to -gg merge duplicate objects\n" - "\t-d\tdecompress all streams\n" - "\t-l\tlinearize PDF\n" - "\t-i\ttoggle decompression of image streams\n" - "\t-f\ttoggle decompression of font streams\n" - "\t-a\tascii hex encode binary streams\n" - "\tpages\tcomma separated list of ranges\n"); - exit(1); -} - -/* - * Recreate page tree to only retain specified pages. - */ - -static void retainpages(int argc, char **argv) -{ - pdf_obj *oldroot, *root, *pages, *kids, *countobj, *parent, *olddests; - - /* Keep only pages/type and (reduced) dest entries to avoid - * references to unretained pages */ - oldroot = pdf_dict_gets(xref->trailer, "Root"); - pages = pdf_dict_gets(oldroot, "Pages"); - olddests = pdf_load_name_tree(xref, "Dests"); - - root = pdf_new_dict(ctx, 2); - pdf_dict_puts(root, "Type", pdf_dict_gets(oldroot, "Type")); - pdf_dict_puts(root, "Pages", pdf_dict_gets(oldroot, "Pages")); - - pdf_update_object(xref, pdf_to_num(oldroot), root); - - pdf_drop_obj(root); - - /* Create a new kids array with only the pages we want to keep */ - parent = pdf_new_indirect(ctx, pdf_to_num(pages), pdf_to_gen(pages), xref); - kids = pdf_new_array(ctx, 1); - - /* Retain pages specified */ - while (argc - fz_optind) - { - int page, spage, epage, pagecount; - char *spec, *dash; - char *pagelist = argv[fz_optind]; - - pagecount = pdf_count_pages(xref); - spec = fz_strsep(&pagelist, ","); - while (spec) - { - dash = strchr(spec, '-'); - - if (dash == spec) - spage = epage = pagecount; - else - spage = epage = atoi(spec); - - if (dash) - { - if (strlen(dash) > 1) - epage = atoi(dash + 1); - else - epage = pagecount; - } - - if (spage > epage) - page = spage, spage = epage, epage = page; - - spage = fz_clampi(spage, 1, pagecount); - epage = fz_clampi(epage, 1, pagecount); - - for (page = spage; page <= epage; page++) - { - pdf_obj *pageobj = xref->page_objs[page-1]; - pdf_obj *pageref = xref->page_refs[page-1]; - - pdf_dict_puts(pageobj, "Parent", parent); - - /* Store page object in new kids array */ - pdf_array_push(kids, pageref); - } - - spec = fz_strsep(&pagelist, ","); - } - - fz_optind++; - } - - pdf_drop_obj(parent); - - /* Update page count and kids array */ - countobj = pdf_new_int(ctx, pdf_array_len(kids)); - pdf_dict_puts(pages, "Count", countobj); - pdf_drop_obj(countobj); - pdf_dict_puts(pages, "Kids", kids); - pdf_drop_obj(kids); - - /* Also preserve the (partial) Dests name tree */ - if (olddests) - { - int i; - pdf_obj *names = pdf_new_dict(ctx, 1); - pdf_obj *dests = pdf_new_dict(ctx, 1); - pdf_obj *names_list = pdf_new_array(ctx, 32); - int len = pdf_dict_len(olddests); - - for (i = 0; i < len; i++) - { - pdf_obj *key = pdf_dict_get_key(olddests, i); - pdf_obj *val = pdf_dict_get_val(olddests, i); - pdf_obj *key_str = pdf_new_string(ctx, pdf_to_name(key), strlen(pdf_to_name(key))); - pdf_obj *dest = pdf_dict_gets(val, "D"); - - dest = pdf_array_get(dest ? dest : val, 0); - if (pdf_array_contains(pdf_dict_gets(pages, "Kids"), dest)) - { - pdf_array_push(names_list, key_str); - pdf_array_push(names_list, val); - } - pdf_drop_obj(key_str); - } - - root = pdf_dict_gets(xref->trailer, "Root"); - pdf_dict_puts(dests, "Names", names_list); - pdf_dict_puts(names, "Dests", dests); - pdf_dict_puts(root, "Names", names); - - pdf_drop_obj(names); - pdf_drop_obj(dests); - pdf_drop_obj(names_list); - pdf_drop_obj(olddests); - } -} - -int pdfclean_main(int argc, char **argv) -{ - char *infile; - char *outfile = "out.pdf"; - char *password = ""; - int c; - int subset; - fz_write_options opts; - int write_failed = 0; - - opts.do_garbage = 0; - opts.do_expand = 0; - opts.do_ascii = 0; - opts.do_linear = 0; - - while ((c = fz_getopt(argc, argv, "adfgilp:")) != -1) - { - switch (c) - { - case 'p': password = fz_optarg; break; - case 'g': opts.do_garbage ++; break; - case 'd': opts.do_expand ^= fz_expand_all; break; - case 'f': opts.do_expand ^= fz_expand_fonts; break; - case 'i': opts.do_expand ^= fz_expand_images; break; - case 'l': opts.do_linear ++; break; - case 'a': opts.do_ascii ++; break; - default: usage(); break; - } - } - - if (argc - fz_optind < 1) - usage(); - - infile = argv[fz_optind++]; - - if (argc - fz_optind > 0 && - (strstr(argv[fz_optind], ".pdf") || strstr(argv[fz_optind], ".PDF"))) - { - outfile = argv[fz_optind++]; - } - - subset = 0; - if (argc - fz_optind > 0) - subset = 1; - - ctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED); - if (!ctx) - { - fprintf(stderr, "cannot initialise context\n"); - exit(1); - } - - fz_try(ctx) - { - xref = pdf_open_document_no_run(ctx, infile); - if (pdf_needs_password(xref)) - if (!pdf_authenticate_password(xref, password)) - fz_throw(ctx, "cannot authenticate password: %s", infile); - - /* Only retain the specified subset of the pages */ - if (subset) - retainpages(argc, argv); - - pdf_write_document(xref, outfile, &opts); - } - fz_always(ctx) - { - pdf_close_document(xref); - } - fz_catch(ctx) - { - write_failed = 1; - } - - fz_free_context(ctx); - - return write_failed ? 1 : 0; -} diff --git a/apps/mupdfextract.c b/apps/mupdfextract.c deleted file mode 100644 index 95f27be9..00000000 --- a/apps/mupdfextract.c +++ /dev/null @@ -1,198 +0,0 @@ -/* - * pdfextract -- the ultimate way to extract images and fonts from pdfs - */ - -#include "mupdf.h" -#include "mupdf-internal.h" - -static pdf_document *doc = NULL; -static fz_context *ctx = NULL; -static int dorgb = 0; - -static void usage(void) -{ - fprintf(stderr, "usage: mubusy extract [options] file.pdf [object numbers]\n"); - fprintf(stderr, "\t-p\tpassword\n"); - fprintf(stderr, "\t-r\tconvert images to rgb\n"); - exit(1); -} - -static int isimage(pdf_obj *obj) -{ - pdf_obj *type = pdf_dict_gets(obj, "Subtype"); - return pdf_is_name(type) && !strcmp(pdf_to_name(type), "Image"); -} - -static int isfontdesc(pdf_obj *obj) -{ - pdf_obj *type = pdf_dict_gets(obj, "Type"); - return pdf_is_name(type) && !strcmp(pdf_to_name(type), "FontDescriptor"); -} - -static void saveimage(int num) -{ - fz_image *image; - fz_pixmap *img; - pdf_obj *ref; - char name[32]; - - ref = pdf_new_indirect(ctx, num, 0, doc); - - /* TODO: detect DCTD and save as jpeg */ - - image = pdf_load_image(doc, ref); - img = fz_image_to_pixmap(ctx, image, 0, 0); - fz_drop_image(ctx, image); - - sprintf(name, "img-%04d", num); - fz_write_pixmap(ctx, img, name, dorgb); - - fz_drop_pixmap(ctx, img); - pdf_drop_obj(ref); -} - -static void savefont(pdf_obj *dict, int num) -{ - char name[1024]; - char *subtype; - fz_buffer *buf; - pdf_obj *stream = NULL; - pdf_obj *obj; - char *ext = ""; - FILE *f; - char *fontname = "font"; - int n, len; - unsigned char *data; - - obj = pdf_dict_gets(dict, "FontName"); - if (obj) - fontname = pdf_to_name(obj); - - obj = pdf_dict_gets(dict, "FontFile"); - if (obj) - { - stream = obj; - ext = "pfa"; - } - - obj = pdf_dict_gets(dict, "FontFile2"); - if (obj) - { - stream = obj; - ext = "ttf"; - } - - obj = pdf_dict_gets(dict, "FontFile3"); - if (obj) - { - stream = obj; - - obj = pdf_dict_gets(obj, "Subtype"); - if (obj && !pdf_is_name(obj)) - fz_throw(ctx, "Invalid font descriptor subtype"); - - subtype = pdf_to_name(obj); - if (!strcmp(subtype, "Type1C")) - ext = "cff"; - else if (!strcmp(subtype, "CIDFontType0C")) - ext = "cid"; - else - fz_throw(ctx, "Unhandled font type '%s'", subtype); - } - - if (!stream) - { - fz_warn(ctx, "Unhandled font type"); - return; - } - - buf = pdf_load_stream(doc, pdf_to_num(stream), pdf_to_gen(stream)); - - sprintf(name, "%s-%04d.%s", fontname, num, ext); - printf("extracting font %s\n", name); - - f = fopen(name, "wb"); - if (!f) - fz_throw(ctx, "Error creating font file"); - - len = fz_buffer_storage(ctx, buf, &data); - n = fwrite(data, 1, len, f); - if (n < len) - fz_throw(ctx, "Error writing font file"); - - if (fclose(f) < 0) - fz_throw(ctx, "Error closing font file"); - - fz_drop_buffer(ctx, buf); -} - -static void showobject(int num) -{ - pdf_obj *obj; - - if (!doc) - fz_throw(ctx, "no file specified"); - - obj = pdf_load_object(doc, num, 0); - - if (isimage(obj)) - saveimage(num); - else if (isfontdesc(obj)) - savefont(obj, num); - - pdf_drop_obj(obj); -} - -int pdfextract_main(int argc, char **argv) -{ - char *infile; - char *password = ""; - int c, o; - - while ((c = fz_getopt(argc, argv, "p:r")) != -1) - { - switch (c) - { - case 'p': password = fz_optarg; break; - case 'r': dorgb++; break; - default: usage(); break; - } - } - - if (fz_optind == argc) - usage(); - - infile = argv[fz_optind++]; - - ctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED); - if (!ctx) - { - fprintf(stderr, "cannot initialise context\n"); - exit(1); - } - - doc = pdf_open_document_no_run(ctx, infile); - if (pdf_needs_password(doc)) - if (!pdf_authenticate_password(doc, password)) - fz_throw(ctx, "cannot authenticate password: %s", infile); - - if (fz_optind == argc) - { - int len = pdf_count_objects(doc); - for (o = 0; o < len; o++) - showobject(o); - } - else - { - while (fz_optind < argc) - { - showobject(atoi(argv[fz_optind])); - fz_optind++; - } - } - - pdf_close_document(doc); - fz_flush_warnings(ctx); - fz_free_context(ctx); - return 0; -} diff --git a/apps/mupdfinfo.c b/apps/mupdfinfo.c deleted file mode 100644 index 6c0a31d7..00000000 --- a/apps/mupdfinfo.c +++ /dev/null @@ -1,1033 +0,0 @@ -/* - * Information tool. - * Print information about the input pdf. - */ - -#include "fitz.h" -#include "mupdf-internal.h" - -pdf_document *xref; -fz_context *ctx; -int pagecount; - -void closexref(void); - -void openxref(char *filename, char *password, int dieonbadpass, int loadpages); - -enum -{ - DIMENSIONS = 0x01, - FONTS = 0x02, - IMAGES = 0x04, - SHADINGS = 0x08, - PATTERNS = 0x10, - XOBJS = 0x20, - ALL = DIMENSIONS | FONTS | IMAGES | SHADINGS | PATTERNS | XOBJS -}; - -struct info -{ - int page; - pdf_obj *pageref; - pdf_obj *pageobj; - union { - struct { - pdf_obj *obj; - } info; - struct { - pdf_obj *obj; - } crypt; - struct { - pdf_obj *obj; - fz_rect *bbox; - } dim; - struct { - pdf_obj *obj; - pdf_obj *subtype; - pdf_obj *name; - } font; - struct { - pdf_obj *obj; - pdf_obj *width; - pdf_obj *height; - pdf_obj *bpc; - pdf_obj *filter; - pdf_obj *cs; - pdf_obj *altcs; - } image; - struct { - pdf_obj *obj; - pdf_obj *type; - } shading; - struct { - pdf_obj *obj; - pdf_obj *type; - pdf_obj *paint; - pdf_obj *tiling; - pdf_obj *shading; - } pattern; - struct { - pdf_obj *obj; - pdf_obj *groupsubtype; - pdf_obj *reference; - } form; - } u; -}; - -static struct info *dim = NULL; -static int dims = 0; -static struct info *font = NULL; -static int fonts = 0; -static struct info *image = NULL; -static int images = 0; -static struct info *shading = NULL; -static int shadings = 0; -static struct info *pattern = NULL; -static int patterns = 0; -static struct info *form = NULL; -static int forms = 0; -static struct info *psobj = NULL; -static int psobjs = 0; - -void closexref(void) -{ - int i; - if (xref) - { - pdf_close_document(xref); - xref = NULL; - } - - if (dim) - { - for (i = 0; i < dims; i++) - fz_free(ctx, dim[i].u.dim.bbox); - fz_free(ctx, dim); - dim = NULL; - dims = 0; - } - - if (font) - { - fz_free(ctx, font); - font = NULL; - fonts = 0; - } - - if (image) - { - fz_free(ctx, image); - image = NULL; - images = 0; - } - - if (shading) - { - fz_free(ctx, shading); - shading = NULL; - shadings = 0; - } - - if (pattern) - { - fz_free(ctx, pattern); - pattern = NULL; - patterns = 0; - } - - if (form) - { - fz_free(ctx, form); - form = NULL; - forms = 0; - } - - if (psobj) - { - fz_free(ctx, psobj); - psobj = NULL; - psobjs = 0; - } -} - -static void -infousage(void) -{ - fprintf(stderr, - "usage: mubusy info [options] [file.pdf ... ]\n" - "\t-d -\tpassword for decryption\n" - "\t-f\tlist fonts\n" - "\t-i\tlist images\n" - "\t-m\tlist dimensions\n" - "\t-p\tlist patterns\n" - "\t-s\tlist shadings\n" - "\t-x\tlist form and postscript xobjects\n"); - exit(1); -} - -static void -showglobalinfo(void) -{ - pdf_obj *obj; - - printf("\nPDF-%d.%d\n", xref->version / 10, xref->version % 10); - - obj = pdf_dict_gets(xref->trailer, "Info"); - if (obj) - { - printf("Info object (%d %d R):\n", pdf_to_num(obj), pdf_to_gen(obj)); - pdf_fprint_obj(stdout, pdf_resolve_indirect(obj), 0); - } - - obj = pdf_dict_gets(xref->trailer, "Encrypt"); - if (obj) - { - printf("\nEncryption object (%d %d R):\n", pdf_to_num(obj), pdf_to_gen(obj)); - pdf_fprint_obj(stdout, pdf_resolve_indirect(obj), 0); - } - - printf("\nPages: %d\n\n", pagecount); -} - -static void -gatherdimensions(int page, pdf_obj *pageref, pdf_obj *pageobj) -{ - fz_rect bbox; - pdf_obj *obj; - int j; - - obj = pdf_dict_gets(pageobj, "MediaBox"); - if (!pdf_is_array(obj)) - return; - - bbox = pdf_to_rect(ctx, obj); - - obj = pdf_dict_gets(pageobj, "UserUnit"); - if (pdf_is_real(obj)) - { - float unit = pdf_to_real(obj); - bbox.x0 *= unit; - bbox.y0 *= unit; - bbox.x1 *= unit; - bbox.y1 *= unit; - } - - for (j = 0; j < dims; j++) - if (!memcmp(dim[j].u.dim.bbox, &bbox, sizeof (fz_rect))) - break; - - if (j < dims) - return; - - dim = fz_resize_array(ctx, dim, dims+1, sizeof(struct info)); - dims++; - - dim[dims - 1].page = page; - dim[dims - 1].pageref = pageref; - dim[dims - 1].pageobj = pageobj; - dim[dims - 1].u.dim.bbox = fz_malloc(ctx, sizeof(fz_rect)); - memcpy(dim[dims - 1].u.dim.bbox, &bbox, sizeof (fz_rect)); - - return; -} - -static void -gatherfonts(int page, pdf_obj *pageref, pdf_obj *pageobj, pdf_obj *dict) -{ - int i, n; - - n = pdf_dict_len(dict); - for (i = 0; i < n; i++) - { - pdf_obj *fontdict = NULL; - pdf_obj *subtype = NULL; - pdf_obj *basefont = NULL; - pdf_obj *name = NULL; - int k; - - fontdict = pdf_dict_get_val(dict, i); - if (!pdf_is_dict(fontdict)) - { - fz_warn(ctx, "not a font dict (%d %d R)", pdf_to_num(fontdict), pdf_to_gen(fontdict)); - continue; - } - - subtype = pdf_dict_gets(fontdict, "Subtype"); - basefont = pdf_dict_gets(fontdict, "BaseFont"); - if (!basefont || pdf_is_null(basefont)) - name = pdf_dict_gets(fontdict, "Name"); - - for (k = 0; k < fonts; k++) - if (!pdf_objcmp(font[k].u.font.obj, fontdict)) - break; - - if (k < fonts) - continue; - - font = fz_resize_array(ctx, font, fonts+1, sizeof(struct info)); - fonts++; - - font[fonts - 1].page = page; - font[fonts - 1].pageref = pageref; - font[fonts - 1].pageobj = pageobj; - font[fonts - 1].u.font.obj = fontdict; - font[fonts - 1].u.font.subtype = subtype; - font[fonts - 1].u.font.name = basefont ? basefont : name; - } -} - -static void -gatherimages(int page, pdf_obj *pageref, pdf_obj *pageobj, pdf_obj *dict) -{ - int i, n; - - n = pdf_dict_len(dict); - for (i = 0; i < n; i++) - { - pdf_obj *imagedict; - pdf_obj *type; - pdf_obj *width; - pdf_obj *height; - pdf_obj *bpc = NULL; - pdf_obj *filter = NULL; - pdf_obj *cs = NULL; - pdf_obj *altcs; - int k; - - imagedict = pdf_dict_get_val(dict, i); - if (!pdf_is_dict(imagedict)) - { - fz_warn(ctx, "not an image dict (%d %d R)", pdf_to_num(imagedict), pdf_to_gen(imagedict)); - continue; - } - - type = pdf_dict_gets(imagedict, "Subtype"); - if (strcmp(pdf_to_name(type), "Image")) - continue; - - filter = pdf_dict_gets(imagedict, "Filter"); - - altcs = NULL; - cs = pdf_dict_gets(imagedict, "ColorSpace"); - if (pdf_is_array(cs)) - { - pdf_obj *cses = cs; - - cs = pdf_array_get(cses, 0); - if (pdf_is_name(cs) && (!strcmp(pdf_to_name(cs), "DeviceN") || !strcmp(pdf_to_name(cs), "Separation"))) - { - altcs = pdf_array_get(cses, 2); - if (pdf_is_array(altcs)) - altcs = pdf_array_get(altcs, 0); - } - } - - width = pdf_dict_gets(imagedict, "Width"); - height = pdf_dict_gets(imagedict, "Height"); - bpc = pdf_dict_gets(imagedict, "BitsPerComponent"); - - for (k = 0; k < images; k++) - if (!pdf_objcmp(image[k].u.image.obj, imagedict)) - break; - - if (k < images) - continue; - - image = fz_resize_array(ctx, image, images+1, sizeof(struct info)); - images++; - - image[images - 1].page = page; - image[images - 1].pageref = pageref; - image[images - 1].pageobj = pageobj; - image[images - 1].u.image.obj = imagedict; - image[images - 1].u.image.width = width; - image[images - 1].u.image.height = height; - image[images - 1].u.image.bpc = bpc; - image[images - 1].u.image.filter = filter; - image[images - 1].u.image.cs = cs; - image[images - 1].u.image.altcs = altcs; - } -} - -static void -gatherforms(int page, pdf_obj *pageref, pdf_obj *pageobj, pdf_obj *dict) -{ - int i, n; - - n = pdf_dict_len(dict); - for (i = 0; i < n; i++) - { - pdf_obj *xobjdict; - pdf_obj *type; - pdf_obj *subtype; - pdf_obj *group; - pdf_obj *groupsubtype; - pdf_obj *reference; - int k; - - xobjdict = pdf_dict_get_val(dict, i); - if (!pdf_is_dict(xobjdict)) - { - fz_warn(ctx, "not a xobject dict (%d %d R)", pdf_to_num(xobjdict), pdf_to_gen(xobjdict)); - continue; - } - - type = pdf_dict_gets(xobjdict, "Subtype"); - if (strcmp(pdf_to_name(type), "Form")) - continue; - - subtype = pdf_dict_gets(xobjdict, "Subtype2"); - if (!strcmp(pdf_to_name(subtype), "PS")) - continue; - - group = pdf_dict_gets(xobjdict, "Group"); - groupsubtype = pdf_dict_gets(group, "S"); - reference = pdf_dict_gets(xobjdict, "Ref"); - - for (k = 0; k < forms; k++) - if (!pdf_objcmp(form[k].u.form.obj, xobjdict)) - break; - - if (k < forms) - continue; - - form = fz_resize_array(ctx, form, forms+1, sizeof(struct info)); - forms++; - - form[forms - 1].page = page; - form[forms - 1].pageref = pageref; - form[forms - 1].pageobj = pageobj; - form[forms - 1].u.form.obj = xobjdict; - form[forms - 1].u.form.groupsubtype = groupsubtype; - form[forms - 1].u.form.reference = reference; - } -} - -static void -gatherpsobjs(int page, pdf_obj *pageref, pdf_obj *pageobj, pdf_obj *dict) -{ - int i, n; - - n = pdf_dict_len(dict); - for (i = 0; i < n; i++) - { - pdf_obj *xobjdict; - pdf_obj *type; - pdf_obj *subtype; - int k; - - xobjdict = pdf_dict_get_val(dict, i); - if (!pdf_is_dict(xobjdict)) - { - fz_warn(ctx, "not a xobject dict (%d %d R)", pdf_to_num(xobjdict), pdf_to_gen(xobjdict)); - continue; - } - - type = pdf_dict_gets(xobjdict, "Subtype"); - subtype = pdf_dict_gets(xobjdict, "Subtype2"); - if (strcmp(pdf_to_name(type), "PS") && - (strcmp(pdf_to_name(type), "Form") || strcmp(pdf_to_name(subtype), "PS"))) - continue; - - for (k = 0; k < psobjs; k++) - if (!pdf_objcmp(psobj[k].u.form.obj, xobjdict)) - break; - - if (k < psobjs) - continue; - - psobj = fz_resize_array(ctx, psobj, psobjs+1, sizeof(struct info)); - psobjs++; - - psobj[psobjs - 1].page = page; - psobj[psobjs - 1].pageref = pageref; - psobj[psobjs - 1].pageobj = pageobj; - psobj[psobjs - 1].u.form.obj = xobjdict; - } -} - -static void -gathershadings(int page, pdf_obj *pageref, pdf_obj *pageobj, pdf_obj *dict) -{ - int i, n; - - n = pdf_dict_len(dict); - for (i = 0; i < n; i++) - { - pdf_obj *shade; - pdf_obj *type; - int k; - - shade = pdf_dict_get_val(dict, i); - if (!pdf_is_dict(shade)) - { - fz_warn(ctx, "not a shading dict (%d %d R)", pdf_to_num(shade), pdf_to_gen(shade)); - continue; - } - - type = pdf_dict_gets(shade, "ShadingType"); - if (!pdf_is_int(type) || pdf_to_int(type) < 1 || pdf_to_int(type) > 7) - { - fz_warn(ctx, "not a shading type (%d %d R)", pdf_to_num(shade), pdf_to_gen(shade)); - type = NULL; - } - - for (k = 0; k < shadings; k++) - if (!pdf_objcmp(shading[k].u.shading.obj, shade)) - break; - - if (k < shadings) - continue; - - shading = fz_resize_array(ctx, shading, shadings+1, sizeof(struct info)); - shadings++; - - shading[shadings - 1].page = page; - shading[shadings - 1].pageref = pageref; - shading[shadings - 1].pageobj = pageobj; - shading[shadings - 1].u.shading.obj = shade; - shading[shadings - 1].u.shading.type = type; - } -} - -static void -gatherpatterns(int page, pdf_obj *pageref, pdf_obj *pageobj, pdf_obj *dict) -{ - int i, n; - - n = pdf_dict_len(dict); - for (i = 0; i < n; i++) - { - pdf_obj *patterndict; - pdf_obj *type; - pdf_obj *paint = NULL; - pdf_obj *tiling = NULL; - pdf_obj *shading = NULL; - int k; - - patterndict = pdf_dict_get_val(dict, i); - if (!pdf_is_dict(patterndict)) - { - fz_warn(ctx, "not a pattern dict (%d %d R)", pdf_to_num(patterndict), pdf_to_gen(patterndict)); - continue; - } - - type = pdf_dict_gets(patterndict, "PatternType"); - if (!pdf_is_int(type) || pdf_to_int(type) < 1 || pdf_to_int(type) > 2) - { - fz_warn(ctx, "not a pattern type (%d %d R)", pdf_to_num(patterndict), pdf_to_gen(patterndict)); - type = NULL; - } - - if (pdf_to_int(type) == 1) - { - paint = pdf_dict_gets(patterndict, "PaintType"); - if (!pdf_is_int(paint) || pdf_to_int(paint) < 1 || pdf_to_int(paint) > 2) - { - fz_warn(ctx, "not a pattern paint type (%d %d R)", pdf_to_num(patterndict), pdf_to_gen(patterndict)); - paint = NULL; - } - - tiling = pdf_dict_gets(patterndict, "TilingType"); - if (!pdf_is_int(tiling) || pdf_to_int(tiling) < 1 || pdf_to_int(tiling) > 3) - { - fz_warn(ctx, "not a pattern tiling type (%d %d R)", pdf_to_num(patterndict), pdf_to_gen(patterndict)); - tiling = NULL; - } - } - else - { - shading = pdf_dict_gets(patterndict, "Shading"); - } - - for (k = 0; k < patterns; k++) - if (!pdf_objcmp(pattern[k].u.pattern.obj, patterndict)) - break; - - if (k < patterns) - continue; - - pattern = fz_resize_array(ctx, pattern, patterns+1, sizeof(struct info)); - patterns++; - - pattern[patterns - 1].page = page; - pattern[patterns - 1].pageref = pageref; - pattern[patterns - 1].pageobj = pageobj; - pattern[patterns - 1].u.pattern.obj = patterndict; - pattern[patterns - 1].u.pattern.type = type; - pattern[patterns - 1].u.pattern.paint = paint; - pattern[patterns - 1].u.pattern.tiling = tiling; - pattern[patterns - 1].u.pattern.shading = shading; - } -} - -static void -gatherresourceinfo(int page, pdf_obj *rsrc, int show) -{ - pdf_obj *pageobj; - pdf_obj *pageref; - pdf_obj *font; - pdf_obj *xobj; - pdf_obj *shade; - pdf_obj *pattern; - pdf_obj *subrsrc; - int i; - - pageobj = xref->page_objs[page-1]; - pageref = xref->page_refs[page-1]; - - if (!pageobj) - fz_throw(ctx, "cannot retrieve info from page %d", page); - - font = pdf_dict_gets(rsrc, "Font"); - if (show & FONTS && font) - { - int n; - - gatherfonts(page, pageref, pageobj, font); - n = pdf_dict_len(font); - for (i = 0; i < n; i++) - { - pdf_obj *obj = pdf_dict_get_val(font, i); - - subrsrc = pdf_dict_gets(obj, "Resources"); - if (subrsrc && pdf_objcmp(rsrc, subrsrc)) - gatherresourceinfo(page, subrsrc, show); - } - } - - xobj = pdf_dict_gets(rsrc, "XObject"); - if (show & XOBJS && xobj) - { - int n; - - gatherimages(page, pageref, pageobj, xobj); - gatherforms(page, pageref, pageobj, xobj); - gatherpsobjs(page, pageref, pageobj, xobj); - n = pdf_dict_len(xobj); - for (i = 0; i < n; i++) - { - pdf_obj *obj = pdf_dict_get_val(xobj, i); - subrsrc = pdf_dict_gets(obj, "Resources"); - if (subrsrc && pdf_objcmp(rsrc, subrsrc)) - gatherresourceinfo(page, subrsrc, show); - } - } - - shade = pdf_dict_gets(rsrc, "Shading"); - if (show & SHADINGS && shade) - gathershadings(page, pageref, pageobj, shade); - - pattern = pdf_dict_gets(rsrc, "Pattern"); - if (show & PATTERNS && pattern) - { - int n; - gatherpatterns(page, pageref, pageobj, pattern); - n = pdf_dict_len(pattern); - for (i = 0; i < n; i++) - { - pdf_obj *obj = pdf_dict_get_val(pattern, i); - subrsrc = pdf_dict_gets(obj, "Resources"); - if (subrsrc && pdf_objcmp(rsrc, subrsrc)) - gatherresourceinfo(page, subrsrc, show); - } - } -} - -static void -gatherpageinfo(int page, int show) -{ - pdf_obj *pageobj; - pdf_obj *pageref; - pdf_obj *rsrc; - - pageobj = xref->page_objs[page-1]; - pageref = xref->page_refs[page-1]; - - if (!pageobj) - fz_throw(ctx, "cannot retrieve info from page %d", page); - - gatherdimensions(page, pageref, pageobj); - - rsrc = pdf_dict_gets(pageobj, "Resources"); - gatherresourceinfo(page, rsrc, show); -} - -static void -printinfo(char *filename, int show, int page) -{ - int i; - int j; - -#define PAGE_FMT "\t% 5d (% 7d %1d R): " - - if (show & DIMENSIONS && dims > 0) - { - printf("Mediaboxes (%d):\n", dims); - for (i = 0; i < dims; i++) - { - printf(PAGE_FMT "[ %g %g %g %g ]\n", - dim[i].page, - pdf_to_num(dim[i].pageref), pdf_to_gen(dim[i].pageref), - dim[i].u.dim.bbox->x0, - dim[i].u.dim.bbox->y0, - dim[i].u.dim.bbox->x1, - dim[i].u.dim.bbox->y1); - } - printf("\n"); - } - - if (show & FONTS && fonts > 0) - { - printf("Fonts (%d):\n", fonts); - for (i = 0; i < fonts; i++) - { - printf(PAGE_FMT "%s '%s' (%d %d R)\n", - font[i].page, - pdf_to_num(font[i].pageref), pdf_to_gen(font[i].pageref), - pdf_to_name(font[i].u.font.subtype), - pdf_to_name(font[i].u.font.name), - pdf_to_num(font[i].u.font.obj), pdf_to_gen(font[i].u.font.obj)); - } - printf("\n"); - } - - if (show & IMAGES && images > 0) - { - printf("Images (%d):\n", images); - for (i = 0; i < images; i++) - { - char *cs = NULL; - char *altcs = NULL; - - printf(PAGE_FMT "[ ", - image[i].page, - pdf_to_num(image[i].pageref), pdf_to_gen(image[i].pageref)); - - if (pdf_is_array(image[i].u.image.filter)) - { - int n = pdf_array_len(image[i].u.image.filter); - for (j = 0; j < n; j++) - { - pdf_obj *obj = pdf_array_get(image[i].u.image.filter, j); - char *filter = fz_strdup(ctx, pdf_to_name(obj)); - - if (strstr(filter, "Decode")) - *(strstr(filter, "Decode")) = '\0'; - - printf("%s%s", - filter, - j == pdf_array_len(image[i].u.image.filter) - 1 ? "" : " "); - fz_free(ctx, filter); - } - } - else if (image[i].u.image.filter) - { - pdf_obj *obj = image[i].u.image.filter; - char *filter = fz_strdup(ctx, pdf_to_name(obj)); - - if (strstr(filter, "Decode")) - *(strstr(filter, "Decode")) = '\0'; - - printf("%s", filter); - fz_free(ctx, filter); - } - else - printf("Raw"); - - if (image[i].u.image.cs) - { - cs = fz_strdup(ctx, pdf_to_name(image[i].u.image.cs)); - - if (!strncmp(cs, "Device", 6)) - { - int len = strlen(cs + 6); - memmove(cs + 3, cs + 6, len + 1); - cs[3 + len + 1] = '\0'; - } - if (strstr(cs, "ICC")) - fz_strlcpy(cs, "ICC", 4); - if (strstr(cs, "Indexed")) - fz_strlcpy(cs, "Idx", 4); - if (strstr(cs, "Pattern")) - fz_strlcpy(cs, "Pat", 4); - if (strstr(cs, "Separation")) - fz_strlcpy(cs, "Sep", 4); - } - if (image[i].u.image.altcs) - { - altcs = fz_strdup(ctx, pdf_to_name(image[i].u.image.altcs)); - - if (!strncmp(altcs, "Device", 6)) - { - int len = strlen(altcs + 6); - memmove(altcs + 3, altcs + 6, len + 1); - altcs[3 + len + 1] = '\0'; - } - if (strstr(altcs, "ICC")) - fz_strlcpy(altcs, "ICC", 4); - if (strstr(altcs, "Indexed")) - fz_strlcpy(altcs, "Idx", 4); - if (strstr(altcs, "Pattern")) - fz_strlcpy(altcs, "Pat", 4); - if (strstr(altcs, "Separation")) - fz_strlcpy(altcs, "Sep", 4); - } - - printf(" ] %dx%d %dbpc %s%s%s (%d %d R)\n", - pdf_to_int(image[i].u.image.width), - pdf_to_int(image[i].u.image.height), - image[i].u.image.bpc ? pdf_to_int(image[i].u.image.bpc) : 1, - image[i].u.image.cs ? cs : "ImageMask", - image[i].u.image.altcs ? " " : "", - image[i].u.image.altcs ? altcs : "", - pdf_to_num(image[i].u.image.obj), pdf_to_gen(image[i].u.image.obj)); - - fz_free(ctx, cs); - fz_free(ctx, altcs); - } - printf("\n"); - } - - if (show & SHADINGS && shadings > 0) - { - printf("Shading patterns (%d):\n", shadings); - for (i = 0; i < shadings; i++) - { - char *shadingtype[] = - { - "", - "Function", - "Axial", - "Radial", - "Triangle mesh", - "Lattice", - "Coons patch", - "Tensor patch", - }; - - printf(PAGE_FMT "%s (%d %d R)\n", - shading[i].page, - pdf_to_num(shading[i].pageref), pdf_to_gen(shading[i].pageref), - shadingtype[pdf_to_int(shading[i].u.shading.type)], - pdf_to_num(shading[i].u.shading.obj), pdf_to_gen(shading[i].u.shading.obj)); - } - printf("\n"); - } - - if (show & PATTERNS && patterns > 0) - { - printf("Patterns (%d):\n", patterns); - for (i = 0; i < patterns; i++) - { - if (pdf_to_int(pattern[i].u.pattern.type) == 1) - { - char *painttype[] = - { - "", - "Colored", - "Uncolored", - }; - char *tilingtype[] = - { - "", - "Constant", - "No distortion", - "Constant/fast tiling", - }; - - printf(PAGE_FMT "Tiling %s %s (%d %d R)\n", - pattern[i].page, - pdf_to_num(pattern[i].pageref), pdf_to_gen(pattern[i].pageref), - painttype[pdf_to_int(pattern[i].u.pattern.paint)], - tilingtype[pdf_to_int(pattern[i].u.pattern.tiling)], - pdf_to_num(pattern[i].u.pattern.obj), pdf_to_gen(pattern[i].u.pattern.obj)); - } - else - { - printf(PAGE_FMT "Shading %d %d R (%d %d R)\n", - pattern[i].page, - pdf_to_num(pattern[i].pageref), pdf_to_gen(pattern[i].pageref), - pdf_to_num(pattern[i].u.pattern.shading), pdf_to_gen(pattern[i].u.pattern.shading), - pdf_to_num(pattern[i].u.pattern.obj), pdf_to_gen(pattern[i].u.pattern.obj)); - } - } - printf("\n"); - } - - if (show & XOBJS && forms > 0) - { - printf("Form xobjects (%d):\n", forms); - for (i = 0; i < forms; i++) - { - printf(PAGE_FMT "Form%s%s%s%s (%d %d R)\n", - form[i].page, - pdf_to_num(form[i].pageref), pdf_to_gen(form[i].pageref), - form[i].u.form.groupsubtype ? " " : "", - form[i].u.form.groupsubtype ? pdf_to_name(form[i].u.form.groupsubtype) : "", - form[i].u.form.groupsubtype ? " Group" : "", - form[i].u.form.reference ? " Reference" : "", - pdf_to_num(form[i].u.form.obj), pdf_to_gen(form[i].u.form.obj)); - } - printf("\n"); - } - - if (show & XOBJS && psobjs > 0) - { - printf("Postscript xobjects (%d):\n", psobjs); - for (i = 0; i < psobjs; i++) - { - printf(PAGE_FMT "(%d %d R)\n", - psobj[i].page, - pdf_to_num(psobj[i].pageref), pdf_to_gen(psobj[i].pageref), - pdf_to_num(psobj[i].u.form.obj), pdf_to_gen(psobj[i].u.form.obj)); - } - printf("\n"); - } -} - -static void -showinfo(char *filename, int show, char *pagelist) -{ - int page, spage, epage; - char *spec, *dash; - int allpages; - int pagecount; - - if (!xref) - infousage(); - - allpages = !strcmp(pagelist, "1-"); - - pagecount = pdf_count_pages(xref); - spec = fz_strsep(&pagelist, ","); - while (spec && pagecount) - { - dash = strchr(spec, '-'); - - if (dash == spec) - spage = epage = pagecount; - else - spage = epage = atoi(spec); - - if (dash) - { - if (strlen(dash) > 1) - epage = atoi(dash + 1); - else - epage = pagecount; - } - - if (spage > epage) - page = spage, spage = epage, epage = page; - - spage = fz_clampi(spage, 1, pagecount); - epage = fz_clampi(epage, 1, pagecount); - - if (allpages) - printf("Retrieving info from pages %d-%d...\n", spage, epage); - for (page = spage; page <= epage; page++) - { - gatherpageinfo(page, show); - if (!allpages) - { - printf("Page %d:\n", page); - printinfo(filename, show, page); - printf("\n"); - } - } - - spec = fz_strsep(&pagelist, ","); - } - - if (allpages) - printinfo(filename, show, -1); -} - -static int arg_is_page_range(const char *arg) -{ - int c; - - while ((c = *arg++) != 0) - { - if ((c < '0' || c > '9') && (c != '-') && (c != ',')) - return 0; - } - return 1; -} - -int pdfinfo_main(int argc, char **argv) -{ - enum { NO_FILE_OPENED, NO_INFO_GATHERED, INFO_SHOWN } state; - char *filename = ""; - char *password = ""; - int show = ALL; - int c; - - while ((c = fz_getopt(argc, argv, "mfispxd:")) != -1) - { - switch (c) - { - case 'm': if (show == ALL) show = DIMENSIONS; else show |= DIMENSIONS; break; - case 'f': if (show == ALL) show = FONTS; else show |= FONTS; break; - case 'i': if (show == ALL) show = IMAGES; else show |= IMAGES; break; - case 's': if (show == ALL) show = SHADINGS; else show |= SHADINGS; break; - case 'p': if (show == ALL) show = PATTERNS; else show |= PATTERNS; break; - case 'x': if (show == ALL) show = XOBJS; else show |= XOBJS; break; - case 'd': password = fz_optarg; break; - default: - infousage(); - break; - } - } - - if (fz_optind == argc) - infousage(); - - ctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED); - if (!ctx) - { - fprintf(stderr, "cannot initialise context\n"); - exit(1); - } - - state = NO_FILE_OPENED; - while (fz_optind < argc) - { - if (state == NO_FILE_OPENED || !arg_is_page_range(argv[fz_optind])) - { - if (state == NO_INFO_GATHERED) - { - showinfo(filename, show, "1-"); - closexref(); - } - - closexref(); - - filename = argv[fz_optind]; - printf("%s:\n", filename); - xref = pdf_open_document_no_run(ctx, filename); - if (pdf_needs_password(xref)) - if (!pdf_authenticate_password(xref, password)) - fz_throw(ctx, "cannot authenticate password: %s", filename); - pagecount = pdf_count_pages(xref); - - showglobalinfo(); - state = NO_INFO_GATHERED; - } - else - { - showinfo(filename, show, argv[fz_optind]); - state = INFO_SHOWN; - } - - fz_optind++; - } - - if (state == NO_INFO_GATHERED) - showinfo(filename, show, "1-"); - - closexref(); - fz_free_context(ctx); - return 0; -} diff --git a/apps/mupdfposter.c b/apps/mupdfposter.c deleted file mode 100644 index f65495a8..00000000 --- a/apps/mupdfposter.c +++ /dev/null @@ -1,184 +0,0 @@ -/* - * PDF cleaning tool: general purpose pdf syntax washer. - * - * Rewrite PDF with pretty printed objects. - * Garbage collect unreachable objects. - * Inflate compressed streams. - * Create subset documents. - * - * TODO: linearize document for fast web view - */ - -#include "fitz.h" -#include "mupdf-internal.h" - -static int x_factor = 0; -static int y_factor = 0; - -static void usage(void) -{ - fprintf(stderr, - "usage: mubusy poster [options] input.pdf [output.pdf]\n" - "\t-p -\tpassword\n" - "\t-x\tx decimation factor\n" - "\t-y\ty decimation factor\n"); - exit(1); -} - -/* - * Recreate page tree to only retain specified pages. - */ - -static void decimatepages(pdf_document *xref) -{ - pdf_obj *oldroot, *root, *pages, *kids, *parent; - fz_context *ctx = xref->ctx; - int num_pages = pdf_count_pages(xref); - int page, kidcount; - - /* Keep only pages/type and (reduced) dest entries to avoid - * references to unretained pages */ - oldroot = pdf_dict_gets(xref->trailer, "Root"); - pages = pdf_dict_gets(oldroot, "Pages"); - - root = pdf_new_dict(ctx, 2); - pdf_dict_puts(root, "Type", pdf_dict_gets(oldroot, "Type")); - pdf_dict_puts(root, "Pages", pdf_dict_gets(oldroot, "Pages")); - - pdf_update_object(xref, pdf_to_num(oldroot), root); - - pdf_drop_obj(root); - - /* Create a new kids array with only the pages we want to keep */ - parent = pdf_new_indirect(ctx, pdf_to_num(pages), pdf_to_gen(pages), xref); - kids = pdf_new_array(ctx, 1); - - kidcount = 0; - for (page=0; page < num_pages; page++) - { - pdf_page *page_details = pdf_load_page(xref, page); - int xf = x_factor, yf = y_factor; - int x, y; - float w = page_details->mediabox.x1 - page_details->mediabox.x0; - float h = page_details->mediabox.y1 - page_details->mediabox.y0; - - if (xf == 0 && yf == 0) - { - /* Nothing specified, so split along the long edge */ - if (w > h) - xf = 2, yf = 1; - else - xf = 1, yf = 2; - } - else if (xf == 0) - xf = 1; - else if (yf == 0) - yf = 1; - - for (y = yf-1; y >= 0; y--) - { - for (x = 0; x < xf; x++) - { - pdf_obj *newpageobj, *newpageref, *newmediabox; - fz_rect mb; - int num; - - newpageobj = pdf_copy_dict(ctx, xref->page_objs[page]); - num = pdf_create_object(xref); - pdf_update_object(xref, num, newpageobj); - newpageref = pdf_new_indirect(ctx, num, 0, xref); - - newmediabox = pdf_new_array(ctx, 4); - - mb.x0 = page_details->mediabox.x0 + (w/xf)*x; - if (x == xf-1) - mb.x1 = page_details->mediabox.x1; - else - mb.x1 = page_details->mediabox.x0 + (w/xf)*(x+1); - mb.y0 = page_details->mediabox.y0 + (h/yf)*y; - if (y == yf-1) - mb.y1 = page_details->mediabox.y1; - else - mb.y1 = page_details->mediabox.y0 + (h/yf)*(y+1); - - pdf_array_push(newmediabox, pdf_new_real(ctx, mb.x0)); - pdf_array_push(newmediabox, pdf_new_real(ctx, mb.y0)); - pdf_array_push(newmediabox, pdf_new_real(ctx, mb.x1)); - pdf_array_push(newmediabox, pdf_new_real(ctx, mb.y1)); - - pdf_dict_puts(newpageobj, "Parent", parent); - pdf_dict_puts(newpageobj, "MediaBox", newmediabox); - - /* Store page object in new kids array */ - pdf_array_push(kids, newpageref); - - kidcount++; - } - } - } - - pdf_drop_obj(parent); - - /* Update page count and kids array */ - pdf_dict_puts(pages, "Count", pdf_new_int(ctx, kidcount)); - pdf_dict_puts(pages, "Kids", kids); - pdf_drop_obj(kids); -} - -int pdfposter_main(int argc, char **argv) -{ - char *infile; - char *outfile = "out.pdf"; - char *password = ""; - int c; - fz_write_options opts; - pdf_document *xref; - fz_context *ctx; - - opts.do_garbage = 0; - opts.do_expand = 0; - opts.do_ascii = 0; - - while ((c = fz_getopt(argc, argv, "x:y:")) != -1) - { - switch (c) - { - case 'p': password = fz_optarg; break; - case 'x': x_factor = atoi(fz_optarg); break; - case 'y': y_factor = atoi(fz_optarg); break; - default: usage(); break; - } - } - - if (argc - fz_optind < 1) - usage(); - - infile = argv[fz_optind++]; - - if (argc - fz_optind > 0 && - (strstr(argv[fz_optind], ".pdf") || strstr(argv[fz_optind], ".PDF"))) - { - outfile = argv[fz_optind++]; - } - - ctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED); - if (!ctx) - { - fprintf(stderr, "cannot initialise context\n"); - exit(1); - } - - xref = pdf_open_document_no_run(ctx, infile); - if (pdf_needs_password(xref)) - if (!pdf_authenticate_password(xref, password)) - fz_throw(ctx, "cannot authenticate password: %s", infile); - - /* Only retain the specified subset of the pages */ - decimatepages(xref); - - pdf_write_document(xref, outfile, &opts); - - pdf_close_document(xref); - fz_free_context(ctx); - return 0; -} diff --git a/apps/mupdfshow.c b/apps/mupdfshow.c deleted file mode 100644 index f534a5c7..00000000 --- a/apps/mupdfshow.c +++ /dev/null @@ -1,235 +0,0 @@ -/* - * pdfshow -- the ultimate pdf debugging tool - */ - -#include "mupdf-internal.h" - -static pdf_document *doc = NULL; -static fz_context *ctx = NULL; -static int showbinary = 0; -static int showdecode = 1; -static int showcolumn; - -static void usage(void) -{ - fprintf(stderr, "usage: mubusy show [options] file.pdf [grepable] [xref] [trailer] [pagetree] [object numbers]\n"); - fprintf(stderr, "\t-b\tprint streams as binary data\n"); - fprintf(stderr, "\t-e\tprint encoded streams (don't decode)\n"); - fprintf(stderr, "\t-p\tpassword\n"); - exit(1); -} - -static void showtrailer(void) -{ - if (!doc) - fz_throw(ctx, "no file specified"); - printf("trailer\n"); - pdf_fprint_obj(stdout, doc->trailer, 0); - printf("\n"); -} - -static void showxref(void) -{ - if (!doc) - fz_throw(ctx, "no file specified"); - pdf_print_xref(doc); - printf("\n"); -} - -static void showpagetree(void) -{ - pdf_obj *ref; - int count; - int i; - - if (!doc) - fz_throw(ctx, "no file specified"); - - count = pdf_count_pages(doc); - for (i = 0; i < count; i++) - { - ref = doc->page_refs[i]; - printf("page %d = %d %d R\n", i + 1, pdf_to_num(ref), pdf_to_gen(ref)); - } - printf("\n"); -} - -static void showsafe(unsigned char *buf, int n) -{ - int i; - for (i = 0; i < n; i++) { - if (buf[i] == '\r' || buf[i] == '\n') { - putchar('\n'); - showcolumn = 0; - } - else if (buf[i] < 32 || buf[i] > 126) { - putchar('.'); - showcolumn ++; - } - else { - putchar(buf[i]); - showcolumn ++; - } - if (showcolumn == 79) { - putchar('\n'); - showcolumn = 0; - } - } -} - -static void showstream(int num, int gen) -{ - fz_stream *stm; - unsigned char buf[2048]; - int n; - - showcolumn = 0; - - if (showdecode) - stm = pdf_open_stream(doc, num, gen); - else - stm = pdf_open_raw_stream(doc, num, gen); - - while (1) - { - n = fz_read(stm, buf, sizeof buf); - if (n == 0) - break; - if (showbinary) - fwrite(buf, 1, n, stdout); - else - showsafe(buf, n); - } - - fz_close(stm); -} - -static void showobject(int num, int gen) -{ - pdf_obj *obj; - - if (!doc) - fz_throw(ctx, "no file specified"); - - obj = pdf_load_object(doc, num, gen); - - if (pdf_is_stream(doc, num, gen)) - { - if (showbinary) - { - showstream(num, gen); - } - else - { - printf("%d %d obj\n", num, gen); - pdf_fprint_obj(stdout, obj, 0); - printf("stream\n"); - showstream(num, gen); - printf("endstream\n"); - printf("endobj\n\n"); - } - } - else - { - printf("%d %d obj\n", num, gen); - pdf_fprint_obj(stdout, obj, 0); - printf("endobj\n\n"); - } - - pdf_drop_obj(obj); -} - -static void showgrep(char *filename) -{ - pdf_obj *obj; - int i, len; - - len = pdf_count_objects(doc); - for (i = 0; i < len; i++) - { - if (doc->table[i].type == 'n' || doc->table[i].type == 'o') - { - fz_try(ctx) - { - obj = pdf_load_object(doc, i, 0); - } - fz_catch(ctx) - { - fz_warn(ctx, "skipping object (%d 0 R)", i); - continue; - } - - pdf_sort_dict(obj); - - printf("%s:%d: ", filename, i); - pdf_fprint_obj(stdout, obj, 1); - - pdf_drop_obj(obj); - } - } - - printf("%s:trailer: ", filename); - pdf_fprint_obj(stdout, doc->trailer, 1); -} - -int pdfshow_main(int argc, char **argv) -{ - char *password = NULL; /* don't throw errors if encrypted */ - char *filename; - int c; - - while ((c = fz_getopt(argc, argv, "p:be")) != -1) - { - switch (c) - { - case 'p': password = fz_optarg; break; - case 'b': showbinary = 1; break; - case 'e': showdecode = 0; break; - default: usage(); break; - } - } - - if (fz_optind == argc) - usage(); - - filename = argv[fz_optind++]; - - ctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED); - if (!ctx) - { - fprintf(stderr, "cannot initialise context\n"); - exit(1); - } - - fz_var(doc); - fz_try(ctx) - { - doc = pdf_open_document_no_run(ctx, filename); - if (pdf_needs_password(doc)) - if (!pdf_authenticate_password(doc, password)) - fz_warn(ctx, "cannot authenticate password: %s", filename); - - if (fz_optind == argc) - showtrailer(); - - while (fz_optind < argc) - { - switch (argv[fz_optind][0]) - { - case 't': showtrailer(); break; - case 'x': showxref(); break; - case 'p': showpagetree(); break; - case 'g': showgrep(filename); break; - default: showobject(atoi(argv[fz_optind]), 0); break; - } - fz_optind++; - } - } - fz_catch(ctx) - { - } - - pdf_close_document(doc); - fz_free_context(ctx); - return 0; -} diff --git a/apps/mutool.c b/apps/mutool.c new file mode 100644 index 00000000..2eb0d7c5 --- /dev/null +++ b/apps/mutool.c @@ -0,0 +1,83 @@ +/* + * mutool -- swiss army knife of pdf manipulation tools + */ + +#include +#include +#include + +#define nelem(x) (sizeof(x)/sizeof((x)[0])) + +int pdfclean_main(int argc, char *argv[]); +int pdfextract_main(int argc, char *argv[]); +int pdfinfo_main(int argc, char *argv[]); +int pdfposter_main(int argc, char *argv[]); +int pdfshow_main(int argc, char *argv[]); + +static struct { + int (*func)(int argc, char *argv[]); + char *name; + char *desc; +} tools[] = { + { pdfclean_main, "clean", "rewrite pdf file" }, + { pdfextract_main, "extract", "extract font and image resources" }, + { pdfinfo_main, "info", "show information about pdf resources" }, + { pdfposter_main, "poster", "split large page into many tiles" }, + { pdfshow_main, "show", "show internal pdf objects" }, +}; + +static int +namematch(const char *end, const char *start, const char *match) +{ + int len = strlen(match); + return ((end-len >= start) && (strncmp(end-len, match, len) == 0)); +} + +int main(int argc, char **argv) +{ + char *start, *end; + char buf[32]; + int i; + + if (argc == 0) + { + fprintf(stderr, "No command name found!\n"); + return 1; + } + + /* Check argv[0] */ + + if (argc > 0) + { + end = start = argv[0]; + while (*end) + end++; + if ((end-4 >= start) && (end[-4] == '.') && (end[-3] == 'e') && (end[-2] == 'x') && (end[-1] == 'e')) + end = end-4; + for (i = 0; i < nelem(tools); i++) + { + strcpy(buf, "mupdf"); + strcat(buf, tools[i].name); + if (namematch(end, start, buf) || namematch(end, start, buf+2)) + return tools[i].func(argc, argv); + } + } + + /* Check argv[1] */ + + if (argc > 1) + { + for (i = 0; i < nelem(tools); i++) + if (!strcmp(tools[i].name, argv[1])) + return tools[i].func(argc - 1, argv + 1); + } + + /* Print usage */ + + fprintf(stderr, "usage: mutool [options]\n"); + + for (i = 0; i < nelem(tools); i++) + fprintf(stderr, "\t%s\t-- %s\n", tools[i].name, tools[i].desc); + + return 1; +} diff --git a/apps/pdfclean.c b/apps/pdfclean.c new file mode 100644 index 00000000..409d4fbb --- /dev/null +++ b/apps/pdfclean.c @@ -0,0 +1,233 @@ +/* + * PDF cleaning tool: general purpose pdf syntax washer. + * + * Rewrite PDF with pretty printed objects. + * Garbage collect unreachable objects. + * Inflate compressed streams. + * Create subset documents. + * + * TODO: linearize document for fast web view + */ + +#include "fitz.h" +#include "mupdf-internal.h" + +static pdf_document *xref = NULL; +static fz_context *ctx = NULL; + +static void usage(void) +{ + fprintf(stderr, + "usage: mutool clean [options] input.pdf [output.pdf] [pages]\n" + "\t-p -\tpassword\n" + "\t-g\tgarbage collect unused objects\n" + "\t-gg\tin addition to -g compact xref table\n" + "\t-ggg\tin addition to -gg merge duplicate objects\n" + "\t-d\tdecompress all streams\n" + "\t-l\tlinearize PDF\n" + "\t-i\ttoggle decompression of image streams\n" + "\t-f\ttoggle decompression of font streams\n" + "\t-a\tascii hex encode binary streams\n" + "\tpages\tcomma separated list of ranges\n"); + exit(1); +} + +/* + * Recreate page tree to only retain specified pages. + */ + +static void retainpages(int argc, char **argv) +{ + pdf_obj *oldroot, *root, *pages, *kids, *countobj, *parent, *olddests; + + /* Keep only pages/type and (reduced) dest entries to avoid + * references to unretained pages */ + oldroot = pdf_dict_gets(xref->trailer, "Root"); + pages = pdf_dict_gets(oldroot, "Pages"); + olddests = pdf_load_name_tree(xref, "Dests"); + + root = pdf_new_dict(ctx, 2); + pdf_dict_puts(root, "Type", pdf_dict_gets(oldroot, "Type")); + pdf_dict_puts(root, "Pages", pdf_dict_gets(oldroot, "Pages")); + + pdf_update_object(xref, pdf_to_num(oldroot), root); + + pdf_drop_obj(root); + + /* Create a new kids array with only the pages we want to keep */ + parent = pdf_new_indirect(ctx, pdf_to_num(pages), pdf_to_gen(pages), xref); + kids = pdf_new_array(ctx, 1); + + /* Retain pages specified */ + while (argc - fz_optind) + { + int page, spage, epage, pagecount; + char *spec, *dash; + char *pagelist = argv[fz_optind]; + + pagecount = pdf_count_pages(xref); + spec = fz_strsep(&pagelist, ","); + while (spec) + { + dash = strchr(spec, '-'); + + if (dash == spec) + spage = epage = pagecount; + else + spage = epage = atoi(spec); + + if (dash) + { + if (strlen(dash) > 1) + epage = atoi(dash + 1); + else + epage = pagecount; + } + + if (spage > epage) + page = spage, spage = epage, epage = page; + + spage = fz_clampi(spage, 1, pagecount); + epage = fz_clampi(epage, 1, pagecount); + + for (page = spage; page <= epage; page++) + { + pdf_obj *pageobj = xref->page_objs[page-1]; + pdf_obj *pageref = xref->page_refs[page-1]; + + pdf_dict_puts(pageobj, "Parent", parent); + + /* Store page object in new kids array */ + pdf_array_push(kids, pageref); + } + + spec = fz_strsep(&pagelist, ","); + } + + fz_optind++; + } + + pdf_drop_obj(parent); + + /* Update page count and kids array */ + countobj = pdf_new_int(ctx, pdf_array_len(kids)); + pdf_dict_puts(pages, "Count", countobj); + pdf_drop_obj(countobj); + pdf_dict_puts(pages, "Kids", kids); + pdf_drop_obj(kids); + + /* Also preserve the (partial) Dests name tree */ + if (olddests) + { + int i; + pdf_obj *names = pdf_new_dict(ctx, 1); + pdf_obj *dests = pdf_new_dict(ctx, 1); + pdf_obj *names_list = pdf_new_array(ctx, 32); + int len = pdf_dict_len(olddests); + + for (i = 0; i < len; i++) + { + pdf_obj *key = pdf_dict_get_key(olddests, i); + pdf_obj *val = pdf_dict_get_val(olddests, i); + pdf_obj *key_str = pdf_new_string(ctx, pdf_to_name(key), strlen(pdf_to_name(key))); + pdf_obj *dest = pdf_dict_gets(val, "D"); + + dest = pdf_array_get(dest ? dest : val, 0); + if (pdf_array_contains(pdf_dict_gets(pages, "Kids"), dest)) + { + pdf_array_push(names_list, key_str); + pdf_array_push(names_list, val); + } + pdf_drop_obj(key_str); + } + + root = pdf_dict_gets(xref->trailer, "Root"); + pdf_dict_puts(dests, "Names", names_list); + pdf_dict_puts(names, "Dests", dests); + pdf_dict_puts(root, "Names", names); + + pdf_drop_obj(names); + pdf_drop_obj(dests); + pdf_drop_obj(names_list); + pdf_drop_obj(olddests); + } +} + +int pdfclean_main(int argc, char **argv) +{ + char *infile; + char *outfile = "out.pdf"; + char *password = ""; + int c; + int subset; + fz_write_options opts; + int write_failed = 0; + + opts.do_garbage = 0; + opts.do_expand = 0; + opts.do_ascii = 0; + opts.do_linear = 0; + + while ((c = fz_getopt(argc, argv, "adfgilp:")) != -1) + { + switch (c) + { + case 'p': password = fz_optarg; break; + case 'g': opts.do_garbage ++; break; + case 'd': opts.do_expand ^= fz_expand_all; break; + case 'f': opts.do_expand ^= fz_expand_fonts; break; + case 'i': opts.do_expand ^= fz_expand_images; break; + case 'l': opts.do_linear ++; break; + case 'a': opts.do_ascii ++; break; + default: usage(); break; + } + } + + if (argc - fz_optind < 1) + usage(); + + infile = argv[fz_optind++]; + + if (argc - fz_optind > 0 && + (strstr(argv[fz_optind], ".pdf") || strstr(argv[fz_optind], ".PDF"))) + { + outfile = argv[fz_optind++]; + } + + subset = 0; + if (argc - fz_optind > 0) + subset = 1; + + ctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED); + if (!ctx) + { + fprintf(stderr, "cannot initialise context\n"); + exit(1); + } + + fz_try(ctx) + { + xref = pdf_open_document_no_run(ctx, infile); + if (pdf_needs_password(xref)) + if (!pdf_authenticate_password(xref, password)) + fz_throw(ctx, "cannot authenticate password: %s", infile); + + /* Only retain the specified subset of the pages */ + if (subset) + retainpages(argc, argv); + + pdf_write_document(xref, outfile, &opts); + } + fz_always(ctx) + { + pdf_close_document(xref); + } + fz_catch(ctx) + { + write_failed = 1; + } + + fz_free_context(ctx); + + return write_failed ? 1 : 0; +} diff --git a/apps/pdfextract.c b/apps/pdfextract.c new file mode 100644 index 00000000..7d49f997 --- /dev/null +++ b/apps/pdfextract.c @@ -0,0 +1,198 @@ +/* + * pdfextract -- the ultimate way to extract images and fonts from pdfs + */ + +#include "mupdf.h" +#include "mupdf-internal.h" + +static pdf_document *doc = NULL; +static fz_context *ctx = NULL; +static int dorgb = 0; + +static void usage(void) +{ + fprintf(stderr, "usage: mutool extract [options] file.pdf [object numbers]\n"); + fprintf(stderr, "\t-p\tpassword\n"); + fprintf(stderr, "\t-r\tconvert images to rgb\n"); + exit(1); +} + +static int isimage(pdf_obj *obj) +{ + pdf_obj *type = pdf_dict_gets(obj, "Subtype"); + return pdf_is_name(type) && !strcmp(pdf_to_name(type), "Image"); +} + +static int isfontdesc(pdf_obj *obj) +{ + pdf_obj *type = pdf_dict_gets(obj, "Type"); + return pdf_is_name(type) && !strcmp(pdf_to_name(type), "FontDescriptor"); +} + +static void saveimage(int num) +{ + fz_image *image; + fz_pixmap *img; + pdf_obj *ref; + char name[32]; + + ref = pdf_new_indirect(ctx, num, 0, doc); + + /* TODO: detect DCTD and save as jpeg */ + + image = pdf_load_image(doc, ref); + img = fz_image_to_pixmap(ctx, image, 0, 0); + fz_drop_image(ctx, image); + + sprintf(name, "img-%04d", num); + fz_write_pixmap(ctx, img, name, dorgb); + + fz_drop_pixmap(ctx, img); + pdf_drop_obj(ref); +} + +static void savefont(pdf_obj *dict, int num) +{ + char name[1024]; + char *subtype; + fz_buffer *buf; + pdf_obj *stream = NULL; + pdf_obj *obj; + char *ext = ""; + FILE *f; + char *fontname = "font"; + int n, len; + unsigned char *data; + + obj = pdf_dict_gets(dict, "FontName"); + if (obj) + fontname = pdf_to_name(obj); + + obj = pdf_dict_gets(dict, "FontFile"); + if (obj) + { + stream = obj; + ext = "pfa"; + } + + obj = pdf_dict_gets(dict, "FontFile2"); + if (obj) + { + stream = obj; + ext = "ttf"; + } + + obj = pdf_dict_gets(dict, "FontFile3"); + if (obj) + { + stream = obj; + + obj = pdf_dict_gets(obj, "Subtype"); + if (obj && !pdf_is_name(obj)) + fz_throw(ctx, "Invalid font descriptor subtype"); + + subtype = pdf_to_name(obj); + if (!strcmp(subtype, "Type1C")) + ext = "cff"; + else if (!strcmp(subtype, "CIDFontType0C")) + ext = "cid"; + else + fz_throw(ctx, "Unhandled font type '%s'", subtype); + } + + if (!stream) + { + fz_warn(ctx, "Unhandled font type"); + return; + } + + buf = pdf_load_stream(doc, pdf_to_num(stream), pdf_to_gen(stream)); + + sprintf(name, "%s-%04d.%s", fontname, num, ext); + printf("extracting font %s\n", name); + + f = fopen(name, "wb"); + if (!f) + fz_throw(ctx, "Error creating font file"); + + len = fz_buffer_storage(ctx, buf, &data); + n = fwrite(data, 1, len, f); + if (n < len) + fz_throw(ctx, "Error writing font file"); + + if (fclose(f) < 0) + fz_throw(ctx, "Error closing font file"); + + fz_drop_buffer(ctx, buf); +} + +static void showobject(int num) +{ + pdf_obj *obj; + + if (!doc) + fz_throw(ctx, "no file specified"); + + obj = pdf_load_object(doc, num, 0); + + if (isimage(obj)) + saveimage(num); + else if (isfontdesc(obj)) + savefont(obj, num); + + pdf_drop_obj(obj); +} + +int pdfextract_main(int argc, char **argv) +{ + char *infile; + char *password = ""; + int c, o; + + while ((c = fz_getopt(argc, argv, "p:r")) != -1) + { + switch (c) + { + case 'p': password = fz_optarg; break; + case 'r': dorgb++; break; + default: usage(); break; + } + } + + if (fz_optind == argc) + usage(); + + infile = argv[fz_optind++]; + + ctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED); + if (!ctx) + { + fprintf(stderr, "cannot initialise context\n"); + exit(1); + } + + doc = pdf_open_document_no_run(ctx, infile); + if (pdf_needs_password(doc)) + if (!pdf_authenticate_password(doc, password)) + fz_throw(ctx, "cannot authenticate password: %s", infile); + + if (fz_optind == argc) + { + int len = pdf_count_objects(doc); + for (o = 0; o < len; o++) + showobject(o); + } + else + { + while (fz_optind < argc) + { + showobject(atoi(argv[fz_optind])); + fz_optind++; + } + } + + pdf_close_document(doc); + fz_flush_warnings(ctx); + fz_free_context(ctx); + return 0; +} diff --git a/apps/pdfinfo.c b/apps/pdfinfo.c new file mode 100644 index 00000000..3fab99c7 --- /dev/null +++ b/apps/pdfinfo.c @@ -0,0 +1,1033 @@ +/* + * Information tool. + * Print information about the input pdf. + */ + +#include "fitz.h" +#include "mupdf-internal.h" + +pdf_document *xref; +fz_context *ctx; +int pagecount; + +void closexref(void); + +void openxref(char *filename, char *password, int dieonbadpass, int loadpages); + +enum +{ + DIMENSIONS = 0x01, + FONTS = 0x02, + IMAGES = 0x04, + SHADINGS = 0x08, + PATTERNS = 0x10, + XOBJS = 0x20, + ALL = DIMENSIONS | FONTS | IMAGES | SHADINGS | PATTERNS | XOBJS +}; + +struct info +{ + int page; + pdf_obj *pageref; + pdf_obj *pageobj; + union { + struct { + pdf_obj *obj; + } info; + struct { + pdf_obj *obj; + } crypt; + struct { + pdf_obj *obj; + fz_rect *bbox; + } dim; + struct { + pdf_obj *obj; + pdf_obj *subtype; + pdf_obj *name; + } font; + struct { + pdf_obj *obj; + pdf_obj *width; + pdf_obj *height; + pdf_obj *bpc; + pdf_obj *filter; + pdf_obj *cs; + pdf_obj *altcs; + } image; + struct { + pdf_obj *obj; + pdf_obj *type; + } shading; + struct { + pdf_obj *obj; + pdf_obj *type; + pdf_obj *paint; + pdf_obj *tiling; + pdf_obj *shading; + } pattern; + struct { + pdf_obj *obj; + pdf_obj *groupsubtype; + pdf_obj *reference; + } form; + } u; +}; + +static struct info *dim = NULL; +static int dims = 0; +static struct info *font = NULL; +static int fonts = 0; +static struct info *image = NULL; +static int images = 0; +static struct info *shading = NULL; +static int shadings = 0; +static struct info *pattern = NULL; +static int patterns = 0; +static struct info *form = NULL; +static int forms = 0; +static struct info *psobj = NULL; +static int psobjs = 0; + +void closexref(void) +{ + int i; + if (xref) + { + pdf_close_document(xref); + xref = NULL; + } + + if (dim) + { + for (i = 0; i < dims; i++) + fz_free(ctx, dim[i].u.dim.bbox); + fz_free(ctx, dim); + dim = NULL; + dims = 0; + } + + if (font) + { + fz_free(ctx, font); + font = NULL; + fonts = 0; + } + + if (image) + { + fz_free(ctx, image); + image = NULL; + images = 0; + } + + if (shading) + { + fz_free(ctx, shading); + shading = NULL; + shadings = 0; + } + + if (pattern) + { + fz_free(ctx, pattern); + pattern = NULL; + patterns = 0; + } + + if (form) + { + fz_free(ctx, form); + form = NULL; + forms = 0; + } + + if (psobj) + { + fz_free(ctx, psobj); + psobj = NULL; + psobjs = 0; + } +} + +static void +infousage(void) +{ + fprintf(stderr, + "usage: mutool info [options] [file.pdf ... ]\n" + "\t-d -\tpassword for decryption\n" + "\t-f\tlist fonts\n" + "\t-i\tlist images\n" + "\t-m\tlist dimensions\n" + "\t-p\tlist patterns\n" + "\t-s\tlist shadings\n" + "\t-x\tlist form and postscript xobjects\n"); + exit(1); +} + +static void +showglobalinfo(void) +{ + pdf_obj *obj; + + printf("\nPDF-%d.%d\n", xref->version / 10, xref->version % 10); + + obj = pdf_dict_gets(xref->trailer, "Info"); + if (obj) + { + printf("Info object (%d %d R):\n", pdf_to_num(obj), pdf_to_gen(obj)); + pdf_fprint_obj(stdout, pdf_resolve_indirect(obj), 0); + } + + obj = pdf_dict_gets(xref->trailer, "Encrypt"); + if (obj) + { + printf("\nEncryption object (%d %d R):\n", pdf_to_num(obj), pdf_to_gen(obj)); + pdf_fprint_obj(stdout, pdf_resolve_indirect(obj), 0); + } + + printf("\nPages: %d\n\n", pagecount); +} + +static void +gatherdimensions(int page, pdf_obj *pageref, pdf_obj *pageobj) +{ + fz_rect bbox; + pdf_obj *obj; + int j; + + obj = pdf_dict_gets(pageobj, "MediaBox"); + if (!pdf_is_array(obj)) + return; + + bbox = pdf_to_rect(ctx, obj); + + obj = pdf_dict_gets(pageobj, "UserUnit"); + if (pdf_is_real(obj)) + { + float unit = pdf_to_real(obj); + bbox.x0 *= unit; + bbox.y0 *= unit; + bbox.x1 *= unit; + bbox.y1 *= unit; + } + + for (j = 0; j < dims; j++) + if (!memcmp(dim[j].u.dim.bbox, &bbox, sizeof (fz_rect))) + break; + + if (j < dims) + return; + + dim = fz_resize_array(ctx, dim, dims+1, sizeof(struct info)); + dims++; + + dim[dims - 1].page = page; + dim[dims - 1].pageref = pageref; + dim[dims - 1].pageobj = pageobj; + dim[dims - 1].u.dim.bbox = fz_malloc(ctx, sizeof(fz_rect)); + memcpy(dim[dims - 1].u.dim.bbox, &bbox, sizeof (fz_rect)); + + return; +} + +static void +gatherfonts(int page, pdf_obj *pageref, pdf_obj *pageobj, pdf_obj *dict) +{ + int i, n; + + n = pdf_dict_len(dict); + for (i = 0; i < n; i++) + { + pdf_obj *fontdict = NULL; + pdf_obj *subtype = NULL; + pdf_obj *basefont = NULL; + pdf_obj *name = NULL; + int k; + + fontdict = pdf_dict_get_val(dict, i); + if (!pdf_is_dict(fontdict)) + { + fz_warn(ctx, "not a font dict (%d %d R)", pdf_to_num(fontdict), pdf_to_gen(fontdict)); + continue; + } + + subtype = pdf_dict_gets(fontdict, "Subtype"); + basefont = pdf_dict_gets(fontdict, "BaseFont"); + if (!basefont || pdf_is_null(basefont)) + name = pdf_dict_gets(fontdict, "Name"); + + for (k = 0; k < fonts; k++) + if (!pdf_objcmp(font[k].u.font.obj, fontdict)) + break; + + if (k < fonts) + continue; + + font = fz_resize_array(ctx, font, fonts+1, sizeof(struct info)); + fonts++; + + font[fonts - 1].page = page; + font[fonts - 1].pageref = pageref; + font[fonts - 1].pageobj = pageobj; + font[fonts - 1].u.font.obj = fontdict; + font[fonts - 1].u.font.subtype = subtype; + font[fonts - 1].u.font.name = basefont ? basefont : name; + } +} + +static void +gatherimages(int page, pdf_obj *pageref, pdf_obj *pageobj, pdf_obj *dict) +{ + int i, n; + + n = pdf_dict_len(dict); + for (i = 0; i < n; i++) + { + pdf_obj *imagedict; + pdf_obj *type; + pdf_obj *width; + pdf_obj *height; + pdf_obj *bpc = NULL; + pdf_obj *filter = NULL; + pdf_obj *cs = NULL; + pdf_obj *altcs; + int k; + + imagedict = pdf_dict_get_val(dict, i); + if (!pdf_is_dict(imagedict)) + { + fz_warn(ctx, "not an image dict (%d %d R)", pdf_to_num(imagedict), pdf_to_gen(imagedict)); + continue; + } + + type = pdf_dict_gets(imagedict, "Subtype"); + if (strcmp(pdf_to_name(type), "Image")) + continue; + + filter = pdf_dict_gets(imagedict, "Filter"); + + altcs = NULL; + cs = pdf_dict_gets(imagedict, "ColorSpace"); + if (pdf_is_array(cs)) + { + pdf_obj *cses = cs; + + cs = pdf_array_get(cses, 0); + if (pdf_is_name(cs) && (!strcmp(pdf_to_name(cs), "DeviceN") || !strcmp(pdf_to_name(cs), "Separation"))) + { + altcs = pdf_array_get(cses, 2); + if (pdf_is_array(altcs)) + altcs = pdf_array_get(altcs, 0); + } + } + + width = pdf_dict_gets(imagedict, "Width"); + height = pdf_dict_gets(imagedict, "Height"); + bpc = pdf_dict_gets(imagedict, "BitsPerComponent"); + + for (k = 0; k < images; k++) + if (!pdf_objcmp(image[k].u.image.obj, imagedict)) + break; + + if (k < images) + continue; + + image = fz_resize_array(ctx, image, images+1, sizeof(struct info)); + images++; + + image[images - 1].page = page; + image[images - 1].pageref = pageref; + image[images - 1].pageobj = pageobj; + image[images - 1].u.image.obj = imagedict; + image[images - 1].u.image.width = width; + image[images - 1].u.image.height = height; + image[images - 1].u.image.bpc = bpc; + image[images - 1].u.image.filter = filter; + image[images - 1].u.image.cs = cs; + image[images - 1].u.image.altcs = altcs; + } +} + +static void +gatherforms(int page, pdf_obj *pageref, pdf_obj *pageobj, pdf_obj *dict) +{ + int i, n; + + n = pdf_dict_len(dict); + for (i = 0; i < n; i++) + { + pdf_obj *xobjdict; + pdf_obj *type; + pdf_obj *subtype; + pdf_obj *group; + pdf_obj *groupsubtype; + pdf_obj *reference; + int k; + + xobjdict = pdf_dict_get_val(dict, i); + if (!pdf_is_dict(xobjdict)) + { + fz_warn(ctx, "not a xobject dict (%d %d R)", pdf_to_num(xobjdict), pdf_to_gen(xobjdict)); + continue; + } + + type = pdf_dict_gets(xobjdict, "Subtype"); + if (strcmp(pdf_to_name(type), "Form")) + continue; + + subtype = pdf_dict_gets(xobjdict, "Subtype2"); + if (!strcmp(pdf_to_name(subtype), "PS")) + continue; + + group = pdf_dict_gets(xobjdict, "Group"); + groupsubtype = pdf_dict_gets(group, "S"); + reference = pdf_dict_gets(xobjdict, "Ref"); + + for (k = 0; k < forms; k++) + if (!pdf_objcmp(form[k].u.form.obj, xobjdict)) + break; + + if (k < forms) + continue; + + form = fz_resize_array(ctx, form, forms+1, sizeof(struct info)); + forms++; + + form[forms - 1].page = page; + form[forms - 1].pageref = pageref; + form[forms - 1].pageobj = pageobj; + form[forms - 1].u.form.obj = xobjdict; + form[forms - 1].u.form.groupsubtype = groupsubtype; + form[forms - 1].u.form.reference = reference; + } +} + +static void +gatherpsobjs(int page, pdf_obj *pageref, pdf_obj *pageobj, pdf_obj *dict) +{ + int i, n; + + n = pdf_dict_len(dict); + for (i = 0; i < n; i++) + { + pdf_obj *xobjdict; + pdf_obj *type; + pdf_obj *subtype; + int k; + + xobjdict = pdf_dict_get_val(dict, i); + if (!pdf_is_dict(xobjdict)) + { + fz_warn(ctx, "not a xobject dict (%d %d R)", pdf_to_num(xobjdict), pdf_to_gen(xobjdict)); + continue; + } + + type = pdf_dict_gets(xobjdict, "Subtype"); + subtype = pdf_dict_gets(xobjdict, "Subtype2"); + if (strcmp(pdf_to_name(type), "PS") && + (strcmp(pdf_to_name(type), "Form") || strcmp(pdf_to_name(subtype), "PS"))) + continue; + + for (k = 0; k < psobjs; k++) + if (!pdf_objcmp(psobj[k].u.form.obj, xobjdict)) + break; + + if (k < psobjs) + continue; + + psobj = fz_resize_array(ctx, psobj, psobjs+1, sizeof(struct info)); + psobjs++; + + psobj[psobjs - 1].page = page; + psobj[psobjs - 1].pageref = pageref; + psobj[psobjs - 1].pageobj = pageobj; + psobj[psobjs - 1].u.form.obj = xobjdict; + } +} + +static void +gathershadings(int page, pdf_obj *pageref, pdf_obj *pageobj, pdf_obj *dict) +{ + int i, n; + + n = pdf_dict_len(dict); + for (i = 0; i < n; i++) + { + pdf_obj *shade; + pdf_obj *type; + int k; + + shade = pdf_dict_get_val(dict, i); + if (!pdf_is_dict(shade)) + { + fz_warn(ctx, "not a shading dict (%d %d R)", pdf_to_num(shade), pdf_to_gen(shade)); + continue; + } + + type = pdf_dict_gets(shade, "ShadingType"); + if (!pdf_is_int(type) || pdf_to_int(type) < 1 || pdf_to_int(type) > 7) + { + fz_warn(ctx, "not a shading type (%d %d R)", pdf_to_num(shade), pdf_to_gen(shade)); + type = NULL; + } + + for (k = 0; k < shadings; k++) + if (!pdf_objcmp(shading[k].u.shading.obj, shade)) + break; + + if (k < shadings) + continue; + + shading = fz_resize_array(ctx, shading, shadings+1, sizeof(struct info)); + shadings++; + + shading[shadings - 1].page = page; + shading[shadings - 1].pageref = pageref; + shading[shadings - 1].pageobj = pageobj; + shading[shadings - 1].u.shading.obj = shade; + shading[shadings - 1].u.shading.type = type; + } +} + +static void +gatherpatterns(int page, pdf_obj *pageref, pdf_obj *pageobj, pdf_obj *dict) +{ + int i, n; + + n = pdf_dict_len(dict); + for (i = 0; i < n; i++) + { + pdf_obj *patterndict; + pdf_obj *type; + pdf_obj *paint = NULL; + pdf_obj *tiling = NULL; + pdf_obj *shading = NULL; + int k; + + patterndict = pdf_dict_get_val(dict, i); + if (!pdf_is_dict(patterndict)) + { + fz_warn(ctx, "not a pattern dict (%d %d R)", pdf_to_num(patterndict), pdf_to_gen(patterndict)); + continue; + } + + type = pdf_dict_gets(patterndict, "PatternType"); + if (!pdf_is_int(type) || pdf_to_int(type) < 1 || pdf_to_int(type) > 2) + { + fz_warn(ctx, "not a pattern type (%d %d R)", pdf_to_num(patterndict), pdf_to_gen(patterndict)); + type = NULL; + } + + if (pdf_to_int(type) == 1) + { + paint = pdf_dict_gets(patterndict, "PaintType"); + if (!pdf_is_int(paint) || pdf_to_int(paint) < 1 || pdf_to_int(paint) > 2) + { + fz_warn(ctx, "not a pattern paint type (%d %d R)", pdf_to_num(patterndict), pdf_to_gen(patterndict)); + paint = NULL; + } + + tiling = pdf_dict_gets(patterndict, "TilingType"); + if (!pdf_is_int(tiling) || pdf_to_int(tiling) < 1 || pdf_to_int(tiling) > 3) + { + fz_warn(ctx, "not a pattern tiling type (%d %d R)", pdf_to_num(patterndict), pdf_to_gen(patterndict)); + tiling = NULL; + } + } + else + { + shading = pdf_dict_gets(patterndict, "Shading"); + } + + for (k = 0; k < patterns; k++) + if (!pdf_objcmp(pattern[k].u.pattern.obj, patterndict)) + break; + + if (k < patterns) + continue; + + pattern = fz_resize_array(ctx, pattern, patterns+1, sizeof(struct info)); + patterns++; + + pattern[patterns - 1].page = page; + pattern[patterns - 1].pageref = pageref; + pattern[patterns - 1].pageobj = pageobj; + pattern[patterns - 1].u.pattern.obj = patterndict; + pattern[patterns - 1].u.pattern.type = type; + pattern[patterns - 1].u.pattern.paint = paint; + pattern[patterns - 1].u.pattern.tiling = tiling; + pattern[patterns - 1].u.pattern.shading = shading; + } +} + +static void +gatherresourceinfo(int page, pdf_obj *rsrc, int show) +{ + pdf_obj *pageobj; + pdf_obj *pageref; + pdf_obj *font; + pdf_obj *xobj; + pdf_obj *shade; + pdf_obj *pattern; + pdf_obj *subrsrc; + int i; + + pageobj = xref->page_objs[page-1]; + pageref = xref->page_refs[page-1]; + + if (!pageobj) + fz_throw(ctx, "cannot retrieve info from page %d", page); + + font = pdf_dict_gets(rsrc, "Font"); + if (show & FONTS && font) + { + int n; + + gatherfonts(page, pageref, pageobj, font); + n = pdf_dict_len(font); + for (i = 0; i < n; i++) + { + pdf_obj *obj = pdf_dict_get_val(font, i); + + subrsrc = pdf_dict_gets(obj, "Resources"); + if (subrsrc && pdf_objcmp(rsrc, subrsrc)) + gatherresourceinfo(page, subrsrc, show); + } + } + + xobj = pdf_dict_gets(rsrc, "XObject"); + if (show & XOBJS && xobj) + { + int n; + + gatherimages(page, pageref, pageobj, xobj); + gatherforms(page, pageref, pageobj, xobj); + gatherpsobjs(page, pageref, pageobj, xobj); + n = pdf_dict_len(xobj); + for (i = 0; i < n; i++) + { + pdf_obj *obj = pdf_dict_get_val(xobj, i); + subrsrc = pdf_dict_gets(obj, "Resources"); + if (subrsrc && pdf_objcmp(rsrc, subrsrc)) + gatherresourceinfo(page, subrsrc, show); + } + } + + shade = pdf_dict_gets(rsrc, "Shading"); + if (show & SHADINGS && shade) + gathershadings(page, pageref, pageobj, shade); + + pattern = pdf_dict_gets(rsrc, "Pattern"); + if (show & PATTERNS && pattern) + { + int n; + gatherpatterns(page, pageref, pageobj, pattern); + n = pdf_dict_len(pattern); + for (i = 0; i < n; i++) + { + pdf_obj *obj = pdf_dict_get_val(pattern, i); + subrsrc = pdf_dict_gets(obj, "Resources"); + if (subrsrc && pdf_objcmp(rsrc, subrsrc)) + gatherresourceinfo(page, subrsrc, show); + } + } +} + +static void +gatherpageinfo(int page, int show) +{ + pdf_obj *pageobj; + pdf_obj *pageref; + pdf_obj *rsrc; + + pageobj = xref->page_objs[page-1]; + pageref = xref->page_refs[page-1]; + + if (!pageobj) + fz_throw(ctx, "cannot retrieve info from page %d", page); + + gatherdimensions(page, pageref, pageobj); + + rsrc = pdf_dict_gets(pageobj, "Resources"); + gatherresourceinfo(page, rsrc, show); +} + +static void +printinfo(char *filename, int show, int page) +{ + int i; + int j; + +#define PAGE_FMT "\t% 5d (% 7d %1d R): " + + if (show & DIMENSIONS && dims > 0) + { + printf("Mediaboxes (%d):\n", dims); + for (i = 0; i < dims; i++) + { + printf(PAGE_FMT "[ %g %g %g %g ]\n", + dim[i].page, + pdf_to_num(dim[i].pageref), pdf_to_gen(dim[i].pageref), + dim[i].u.dim.bbox->x0, + dim[i].u.dim.bbox->y0, + dim[i].u.dim.bbox->x1, + dim[i].u.dim.bbox->y1); + } + printf("\n"); + } + + if (show & FONTS && fonts > 0) + { + printf("Fonts (%d):\n", fonts); + for (i = 0; i < fonts; i++) + { + printf(PAGE_FMT "%s '%s' (%d %d R)\n", + font[i].page, + pdf_to_num(font[i].pageref), pdf_to_gen(font[i].pageref), + pdf_to_name(font[i].u.font.subtype), + pdf_to_name(font[i].u.font.name), + pdf_to_num(font[i].u.font.obj), pdf_to_gen(font[i].u.font.obj)); + } + printf("\n"); + } + + if (show & IMAGES && images > 0) + { + printf("Images (%d):\n", images); + for (i = 0; i < images; i++) + { + char *cs = NULL; + char *altcs = NULL; + + printf(PAGE_FMT "[ ", + image[i].page, + pdf_to_num(image[i].pageref), pdf_to_gen(image[i].pageref)); + + if (pdf_is_array(image[i].u.image.filter)) + { + int n = pdf_array_len(image[i].u.image.filter); + for (j = 0; j < n; j++) + { + pdf_obj *obj = pdf_array_get(image[i].u.image.filter, j); + char *filter = fz_strdup(ctx, pdf_to_name(obj)); + + if (strstr(filter, "Decode")) + *(strstr(filter, "Decode")) = '\0'; + + printf("%s%s", + filter, + j == pdf_array_len(image[i].u.image.filter) - 1 ? "" : " "); + fz_free(ctx, filter); + } + } + else if (image[i].u.image.filter) + { + pdf_obj *obj = image[i].u.image.filter; + char *filter = fz_strdup(ctx, pdf_to_name(obj)); + + if (strstr(filter, "Decode")) + *(strstr(filter, "Decode")) = '\0'; + + printf("%s", filter); + fz_free(ctx, filter); + } + else + printf("Raw"); + + if (image[i].u.image.cs) + { + cs = fz_strdup(ctx, pdf_to_name(image[i].u.image.cs)); + + if (!strncmp(cs, "Device", 6)) + { + int len = strlen(cs + 6); + memmove(cs + 3, cs + 6, len + 1); + cs[3 + len + 1] = '\0'; + } + if (strstr(cs, "ICC")) + fz_strlcpy(cs, "ICC", 4); + if (strstr(cs, "Indexed")) + fz_strlcpy(cs, "Idx", 4); + if (strstr(cs, "Pattern")) + fz_strlcpy(cs, "Pat", 4); + if (strstr(cs, "Separation")) + fz_strlcpy(cs, "Sep", 4); + } + if (image[i].u.image.altcs) + { + altcs = fz_strdup(ctx, pdf_to_name(image[i].u.image.altcs)); + + if (!strncmp(altcs, "Device", 6)) + { + int len = strlen(altcs + 6); + memmove(altcs + 3, altcs + 6, len + 1); + altcs[3 + len + 1] = '\0'; + } + if (strstr(altcs, "ICC")) + fz_strlcpy(altcs, "ICC", 4); + if (strstr(altcs, "Indexed")) + fz_strlcpy(altcs, "Idx", 4); + if (strstr(altcs, "Pattern")) + fz_strlcpy(altcs, "Pat", 4); + if (strstr(altcs, "Separation")) + fz_strlcpy(altcs, "Sep", 4); + } + + printf(" ] %dx%d %dbpc %s%s%s (%d %d R)\n", + pdf_to_int(image[i].u.image.width), + pdf_to_int(image[i].u.image.height), + image[i].u.image.bpc ? pdf_to_int(image[i].u.image.bpc) : 1, + image[i].u.image.cs ? cs : "ImageMask", + image[i].u.image.altcs ? " " : "", + image[i].u.image.altcs ? altcs : "", + pdf_to_num(image[i].u.image.obj), pdf_to_gen(image[i].u.image.obj)); + + fz_free(ctx, cs); + fz_free(ctx, altcs); + } + printf("\n"); + } + + if (show & SHADINGS && shadings > 0) + { + printf("Shading patterns (%d):\n", shadings); + for (i = 0; i < shadings; i++) + { + char *shadingtype[] = + { + "", + "Function", + "Axial", + "Radial", + "Triangle mesh", + "Lattice", + "Coons patch", + "Tensor patch", + }; + + printf(PAGE_FMT "%s (%d %d R)\n", + shading[i].page, + pdf_to_num(shading[i].pageref), pdf_to_gen(shading[i].pageref), + shadingtype[pdf_to_int(shading[i].u.shading.type)], + pdf_to_num(shading[i].u.shading.obj), pdf_to_gen(shading[i].u.shading.obj)); + } + printf("\n"); + } + + if (show & PATTERNS && patterns > 0) + { + printf("Patterns (%d):\n", patterns); + for (i = 0; i < patterns; i++) + { + if (pdf_to_int(pattern[i].u.pattern.type) == 1) + { + char *painttype[] = + { + "", + "Colored", + "Uncolored", + }; + char *tilingtype[] = + { + "", + "Constant", + "No distortion", + "Constant/fast tiling", + }; + + printf(PAGE_FMT "Tiling %s %s (%d %d R)\n", + pattern[i].page, + pdf_to_num(pattern[i].pageref), pdf_to_gen(pattern[i].pageref), + painttype[pdf_to_int(pattern[i].u.pattern.paint)], + tilingtype[pdf_to_int(pattern[i].u.pattern.tiling)], + pdf_to_num(pattern[i].u.pattern.obj), pdf_to_gen(pattern[i].u.pattern.obj)); + } + else + { + printf(PAGE_FMT "Shading %d %d R (%d %d R)\n", + pattern[i].page, + pdf_to_num(pattern[i].pageref), pdf_to_gen(pattern[i].pageref), + pdf_to_num(pattern[i].u.pattern.shading), pdf_to_gen(pattern[i].u.pattern.shading), + pdf_to_num(pattern[i].u.pattern.obj), pdf_to_gen(pattern[i].u.pattern.obj)); + } + } + printf("\n"); + } + + if (show & XOBJS && forms > 0) + { + printf("Form xobjects (%d):\n", forms); + for (i = 0; i < forms; i++) + { + printf(PAGE_FMT "Form%s%s%s%s (%d %d R)\n", + form[i].page, + pdf_to_num(form[i].pageref), pdf_to_gen(form[i].pageref), + form[i].u.form.groupsubtype ? " " : "", + form[i].u.form.groupsubtype ? pdf_to_name(form[i].u.form.groupsubtype) : "", + form[i].u.form.groupsubtype ? " Group" : "", + form[i].u.form.reference ? " Reference" : "", + pdf_to_num(form[i].u.form.obj), pdf_to_gen(form[i].u.form.obj)); + } + printf("\n"); + } + + if (show & XOBJS && psobjs > 0) + { + printf("Postscript xobjects (%d):\n", psobjs); + for (i = 0; i < psobjs; i++) + { + printf(PAGE_FMT "(%d %d R)\n", + psobj[i].page, + pdf_to_num(psobj[i].pageref), pdf_to_gen(psobj[i].pageref), + pdf_to_num(psobj[i].u.form.obj), pdf_to_gen(psobj[i].u.form.obj)); + } + printf("\n"); + } +} + +static void +showinfo(char *filename, int show, char *pagelist) +{ + int page, spage, epage; + char *spec, *dash; + int allpages; + int pagecount; + + if (!xref) + infousage(); + + allpages = !strcmp(pagelist, "1-"); + + pagecount = pdf_count_pages(xref); + spec = fz_strsep(&pagelist, ","); + while (spec && pagecount) + { + dash = strchr(spec, '-'); + + if (dash == spec) + spage = epage = pagecount; + else + spage = epage = atoi(spec); + + if (dash) + { + if (strlen(dash) > 1) + epage = atoi(dash + 1); + else + epage = pagecount; + } + + if (spage > epage) + page = spage, spage = epage, epage = page; + + spage = fz_clampi(spage, 1, pagecount); + epage = fz_clampi(epage, 1, pagecount); + + if (allpages) + printf("Retrieving info from pages %d-%d...\n", spage, epage); + for (page = spage; page <= epage; page++) + { + gatherpageinfo(page, show); + if (!allpages) + { + printf("Page %d:\n", page); + printinfo(filename, show, page); + printf("\n"); + } + } + + spec = fz_strsep(&pagelist, ","); + } + + if (allpages) + printinfo(filename, show, -1); +} + +static int arg_is_page_range(const char *arg) +{ + int c; + + while ((c = *arg++) != 0) + { + if ((c < '0' || c > '9') && (c != '-') && (c != ',')) + return 0; + } + return 1; +} + +int pdfinfo_main(int argc, char **argv) +{ + enum { NO_FILE_OPENED, NO_INFO_GATHERED, INFO_SHOWN } state; + char *filename = ""; + char *password = ""; + int show = ALL; + int c; + + while ((c = fz_getopt(argc, argv, "mfispxd:")) != -1) + { + switch (c) + { + case 'm': if (show == ALL) show = DIMENSIONS; else show |= DIMENSIONS; break; + case 'f': if (show == ALL) show = FONTS; else show |= FONTS; break; + case 'i': if (show == ALL) show = IMAGES; else show |= IMAGES; break; + case 's': if (show == ALL) show = SHADINGS; else show |= SHADINGS; break; + case 'p': if (show == ALL) show = PATTERNS; else show |= PATTERNS; break; + case 'x': if (show == ALL) show = XOBJS; else show |= XOBJS; break; + case 'd': password = fz_optarg; break; + default: + infousage(); + break; + } + } + + if (fz_optind == argc) + infousage(); + + ctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED); + if (!ctx) + { + fprintf(stderr, "cannot initialise context\n"); + exit(1); + } + + state = NO_FILE_OPENED; + while (fz_optind < argc) + { + if (state == NO_FILE_OPENED || !arg_is_page_range(argv[fz_optind])) + { + if (state == NO_INFO_GATHERED) + { + showinfo(filename, show, "1-"); + closexref(); + } + + closexref(); + + filename = argv[fz_optind]; + printf("%s:\n", filename); + xref = pdf_open_document_no_run(ctx, filename); + if (pdf_needs_password(xref)) + if (!pdf_authenticate_password(xref, password)) + fz_throw(ctx, "cannot authenticate password: %s", filename); + pagecount = pdf_count_pages(xref); + + showglobalinfo(); + state = NO_INFO_GATHERED; + } + else + { + showinfo(filename, show, argv[fz_optind]); + state = INFO_SHOWN; + } + + fz_optind++; + } + + if (state == NO_INFO_GATHERED) + showinfo(filename, show, "1-"); + + closexref(); + fz_free_context(ctx); + return 0; +} diff --git a/apps/pdfposter.c b/apps/pdfposter.c new file mode 100644 index 00000000..2ef44dbb --- /dev/null +++ b/apps/pdfposter.c @@ -0,0 +1,184 @@ +/* + * PDF cleaning tool: general purpose pdf syntax washer. + * + * Rewrite PDF with pretty printed objects. + * Garbage collect unreachable objects. + * Inflate compressed streams. + * Create subset documents. + * + * TODO: linearize document for fast web view + */ + +#include "fitz.h" +#include "mupdf-internal.h" + +static int x_factor = 0; +static int y_factor = 0; + +static void usage(void) +{ + fprintf(stderr, + "usage: mutool poster [options] input.pdf [output.pdf]\n" + "\t-p -\tpassword\n" + "\t-x\tx decimation factor\n" + "\t-y\ty decimation factor\n"); + exit(1); +} + +/* + * Recreate page tree to only retain specified pages. + */ + +static void decimatepages(pdf_document *xref) +{ + pdf_obj *oldroot, *root, *pages, *kids, *parent; + fz_context *ctx = xref->ctx; + int num_pages = pdf_count_pages(xref); + int page, kidcount; + + /* Keep only pages/type and (reduced) dest entries to avoid + * references to unretained pages */ + oldroot = pdf_dict_gets(xref->trailer, "Root"); + pages = pdf_dict_gets(oldroot, "Pages"); + + root = pdf_new_dict(ctx, 2); + pdf_dict_puts(root, "Type", pdf_dict_gets(oldroot, "Type")); + pdf_dict_puts(root, "Pages", pdf_dict_gets(oldroot, "Pages")); + + pdf_update_object(xref, pdf_to_num(oldroot), root); + + pdf_drop_obj(root); + + /* Create a new kids array with only the pages we want to keep */ + parent = pdf_new_indirect(ctx, pdf_to_num(pages), pdf_to_gen(pages), xref); + kids = pdf_new_array(ctx, 1); + + kidcount = 0; + for (page=0; page < num_pages; page++) + { + pdf_page *page_details = pdf_load_page(xref, page); + int xf = x_factor, yf = y_factor; + int x, y; + float w = page_details->mediabox.x1 - page_details->mediabox.x0; + float h = page_details->mediabox.y1 - page_details->mediabox.y0; + + if (xf == 0 && yf == 0) + { + /* Nothing specified, so split along the long edge */ + if (w > h) + xf = 2, yf = 1; + else + xf = 1, yf = 2; + } + else if (xf == 0) + xf = 1; + else if (yf == 0) + yf = 1; + + for (y = yf-1; y >= 0; y--) + { + for (x = 0; x < xf; x++) + { + pdf_obj *newpageobj, *newpageref, *newmediabox; + fz_rect mb; + int num; + + newpageobj = pdf_copy_dict(ctx, xref->page_objs[page]); + num = pdf_create_object(xref); + pdf_update_object(xref, num, newpageobj); + newpageref = pdf_new_indirect(ctx, num, 0, xref); + + newmediabox = pdf_new_array(ctx, 4); + + mb.x0 = page_details->mediabox.x0 + (w/xf)*x; + if (x == xf-1) + mb.x1 = page_details->mediabox.x1; + else + mb.x1 = page_details->mediabox.x0 + (w/xf)*(x+1); + mb.y0 = page_details->mediabox.y0 + (h/yf)*y; + if (y == yf-1) + mb.y1 = page_details->mediabox.y1; + else + mb.y1 = page_details->mediabox.y0 + (h/yf)*(y+1); + + pdf_array_push(newmediabox, pdf_new_real(ctx, mb.x0)); + pdf_array_push(newmediabox, pdf_new_real(ctx, mb.y0)); + pdf_array_push(newmediabox, pdf_new_real(ctx, mb.x1)); + pdf_array_push(newmediabox, pdf_new_real(ctx, mb.y1)); + + pdf_dict_puts(newpageobj, "Parent", parent); + pdf_dict_puts(newpageobj, "MediaBox", newmediabox); + + /* Store page object in new kids array */ + pdf_array_push(kids, newpageref); + + kidcount++; + } + } + } + + pdf_drop_obj(parent); + + /* Update page count and kids array */ + pdf_dict_puts(pages, "Count", pdf_new_int(ctx, kidcount)); + pdf_dict_puts(pages, "Kids", kids); + pdf_drop_obj(kids); +} + +int pdfposter_main(int argc, char **argv) +{ + char *infile; + char *outfile = "out.pdf"; + char *password = ""; + int c; + fz_write_options opts; + pdf_document *xref; + fz_context *ctx; + + opts.do_garbage = 0; + opts.do_expand = 0; + opts.do_ascii = 0; + + while ((c = fz_getopt(argc, argv, "x:y:")) != -1) + { + switch (c) + { + case 'p': password = fz_optarg; break; + case 'x': x_factor = atoi(fz_optarg); break; + case 'y': y_factor = atoi(fz_optarg); break; + default: usage(); break; + } + } + + if (argc - fz_optind < 1) + usage(); + + infile = argv[fz_optind++]; + + if (argc - fz_optind > 0 && + (strstr(argv[fz_optind], ".pdf") || strstr(argv[fz_optind], ".PDF"))) + { + outfile = argv[fz_optind++]; + } + + ctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED); + if (!ctx) + { + fprintf(stderr, "cannot initialise context\n"); + exit(1); + } + + xref = pdf_open_document_no_run(ctx, infile); + if (pdf_needs_password(xref)) + if (!pdf_authenticate_password(xref, password)) + fz_throw(ctx, "cannot authenticate password: %s", infile); + + /* Only retain the specified subset of the pages */ + decimatepages(xref); + + pdf_write_document(xref, outfile, &opts); + + pdf_close_document(xref); + fz_free_context(ctx); + return 0; +} diff --git a/apps/pdfshow.c b/apps/pdfshow.c new file mode 100644 index 00000000..655493f9 --- /dev/null +++ b/apps/pdfshow.c @@ -0,0 +1,235 @@ +/* + * pdfshow -- the ultimate pdf debugging tool + */ + +#include "mupdf-internal.h" + +static pdf_document *doc = NULL; +static fz_context *ctx = NULL; +static int showbinary = 0; +static int showdecode = 1; +static int showcolumn; + +static void usage(void) +{ + fprintf(stderr, "usage: mutool show [options] file.pdf [grepable] [xref] [trailer] [pagetree] [object numbers]\n"); + fprintf(stderr, "\t-b\tprint streams as binary data\n"); + fprintf(stderr, "\t-e\tprint encoded streams (don't decode)\n"); + fprintf(stderr, "\t-p\tpassword\n"); + exit(1); +} + +static void showtrailer(void) +{ + if (!doc) + fz_throw(ctx, "no file specified"); + printf("trailer\n"); + pdf_fprint_obj(stdout, doc->trailer, 0); + printf("\n"); +} + +static void showxref(void) +{ + if (!doc) + fz_throw(ctx, "no file specified"); + pdf_print_xref(doc); + printf("\n"); +} + +static void showpagetree(void) +{ + pdf_obj *ref; + int count; + int i; + + if (!doc) + fz_throw(ctx, "no file specified"); + + count = pdf_count_pages(doc); + for (i = 0; i < count; i++) + { + ref = doc->page_refs[i]; + printf("page %d = %d %d R\n", i + 1, pdf_to_num(ref), pdf_to_gen(ref)); + } + printf("\n"); +} + +static void showsafe(unsigned char *buf, int n) +{ + int i; + for (i = 0; i < n; i++) { + if (buf[i] == '\r' || buf[i] == '\n') { + putchar('\n'); + showcolumn = 0; + } + else if (buf[i] < 32 || buf[i] > 126) { + putchar('.'); + showcolumn ++; + } + else { + putchar(buf[i]); + showcolumn ++; + } + if (showcolumn == 79) { + putchar('\n'); + showcolumn = 0; + } + } +} + +static void showstream(int num, int gen) +{ + fz_stream *stm; + unsigned char buf[2048]; + int n; + + showcolumn = 0; + + if (showdecode) + stm = pdf_open_stream(doc, num, gen); + else + stm = pdf_open_raw_stream(doc, num, gen); + + while (1) + { + n = fz_read(stm, buf, sizeof buf); + if (n == 0) + break; + if (showbinary) + fwrite(buf, 1, n, stdout); + else + showsafe(buf, n); + } + + fz_close(stm); +} + +static void showobject(int num, int gen) +{ + pdf_obj *obj; + + if (!doc) + fz_throw(ctx, "no file specified"); + + obj = pdf_load_object(doc, num, gen); + + if (pdf_is_stream(doc, num, gen)) + { + if (showbinary) + { + showstream(num, gen); + } + else + { + printf("%d %d obj\n", num, gen); + pdf_fprint_obj(stdout, obj, 0); + printf("stream\n"); + showstream(num, gen); + printf("endstream\n"); + printf("endobj\n\n"); + } + } + else + { + printf("%d %d obj\n", num, gen); + pdf_fprint_obj(stdout, obj, 0); + printf("endobj\n\n"); + } + + pdf_drop_obj(obj); +} + +static void showgrep(char *filename) +{ + pdf_obj *obj; + int i, len; + + len = pdf_count_objects(doc); + for (i = 0; i < len; i++) + { + if (doc->table[i].type == 'n' || doc->table[i].type == 'o') + { + fz_try(ctx) + { + obj = pdf_load_object(doc, i, 0); + } + fz_catch(ctx) + { + fz_warn(ctx, "skipping object (%d 0 R)", i); + continue; + } + + pdf_sort_dict(obj); + + printf("%s:%d: ", filename, i); + pdf_fprint_obj(stdout, obj, 1); + + pdf_drop_obj(obj); + } + } + + printf("%s:trailer: ", filename); + pdf_fprint_obj(stdout, doc->trailer, 1); +} + +int pdfshow_main(int argc, char **argv) +{ + char *password = NULL; /* don't throw errors if encrypted */ + char *filename; + int c; + + while ((c = fz_getopt(argc, argv, "p:be")) != -1) + { + switch (c) + { + case 'p': password = fz_optarg; break; + case 'b': showbinary = 1; break; + case 'e': showdecode = 0; break; + default: usage(); break; + } + } + + if (fz_optind == argc) + usage(); + + filename = argv[fz_optind++]; + + ctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED); + if (!ctx) + { + fprintf(stderr, "cannot initialise context\n"); + exit(1); + } + + fz_var(doc); + fz_try(ctx) + { + doc = pdf_open_document_no_run(ctx, filename); + if (pdf_needs_password(doc)) + if (!pdf_authenticate_password(doc, password)) + fz_warn(ctx, "cannot authenticate password: %s", filename); + + if (fz_optind == argc) + showtrailer(); + + while (fz_optind < argc) + { + switch (argv[fz_optind][0]) + { + case 't': showtrailer(); break; + case 'x': showxref(); break; + case 'p': showpagetree(); break; + case 'g': showgrep(filename); break; + default: showobject(atoi(argv[fz_optind]), 0); break; + } + fz_optind++; + } + } + fz_catch(ctx) + { + } + + pdf_close_document(doc); + fz_free_context(ctx); + return 0; +} diff --git a/win32/mubusy.vcproj b/win32/mubusy.vcproj deleted file mode 100644 index c905681e..00000000 --- a/win32/mubusy.vcproj +++ /dev/null @@ -1,266 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/win32/mupdf.sln b/win32/mupdf.sln index 6807711f..bd145d31 100644 --- a/win32/mupdf.sln +++ b/win32/mupdf.sln @@ -21,7 +21,7 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mudraw", "mudraw.vcproj", " {5EDCF4FD-0291-4FB9-8D96-D58957CA5E3C} = {5EDCF4FD-0291-4FB9-8D96-D58957CA5E3C} EndProjectSection EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mubusy", "mubusy.vcproj", "{00811970-815B-4F64-BC9D-219078B1F3AA}" +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mutool", "mutool.vcproj", "{00811970-815B-4F64-BC9D-219078B1F3AA}" ProjectSection(ProjectDependencies) = postProject {5F615F91-DFF8-4F05-BF48-6222B7D86519} = {5F615F91-DFF8-4F05-BF48-6222B7D86519} {5EDCF4FD-0291-4FB9-8D96-D58957CA5E3C} = {5EDCF4FD-0291-4FB9-8D96-D58957CA5E3C} diff --git a/win32/mutool.vcproj b/win32/mutool.vcproj new file mode 100644 index 00000000..bc478b8e --- /dev/null +++ b/win32/mutool.vcproj @@ -0,0 +1,266 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + -- cgit v1.2.3