diff options
Diffstat (limited to 'apps/pdftool.c')
-rw-r--r-- | apps/pdftool.c | 874 |
1 files changed, 874 insertions, 0 deletions
diff --git a/apps/pdftool.c b/apps/pdftool.c new file mode 100644 index 00000000..ba721e96 --- /dev/null +++ b/apps/pdftool.c @@ -0,0 +1,874 @@ +/* + * Swiss army knife for manipulating and debugging PDFs. + * + * There are a few major modes of operation: + * + * show -- pretty-print objects and streams + * draw -- render pages to bitmap + * clean -- simple rewrite of pdf file + * edit -- edit pages (impose and copy operations) + */ + +#include "fitz.h" +#include "mupdf.h" + +/* + * Common operations. + * Parse page selectors. + * Load and decrypt a PDF file. + * Select pages. + */ + +pdf_xref *src = nil; +pdf_pagetree *srcpages = nil; + +void die(fz_error *eo) +{ + fflush(stdout); + fprintf(stderr, "%s:%d: %s(): %s\n", eo->file, eo->line, eo->func, eo->msg); + fflush(stderr); + abort(); +} + +void closesrc(void) +{ + if (srcpages) + { + pdf_droppagetree(srcpages); + srcpages = nil; + } + + if (src) + { + if (src->store) + { + pdf_dropstore(src->store); + src->store = nil; + } + pdf_closexref(src); + src = nil; + } +} + +void opensrc(char *filename, char *password, int loadpages) +{ + fz_error *error; + + closesrc(); + + error = pdf_newxref(&src); + if (error) + die(error); + + error = pdf_loadxref(src, filename); + if (error) + { + fz_warn("trying to repair"); + error = pdf_repairxref(src, filename); + if (error) + die(error); + } + + error = pdf_decryptxref(src); + if (error) + die(error); + + if (src->crypt) + { + error = pdf_setpassword(src->crypt, password); + if (error) + die(error); + } + + if (loadpages) + { + error = pdf_loadpagetree(&srcpages, src); + if (error) + die(error); + } +} + +void preloadobjstms(void) +{ + fz_error *error; + fz_obj *obj; + int i; + + for (i = 0; i < src->len; i++) + { + if (src->table[i].type == 'o') + { + error = pdf_loadobject(&obj, src, i, 0); + if (error) die(error); + fz_dropobj(obj); + } + } +} + +/* --------------------------------------------------------------------- */ + +/* + * Debug print parts of the PDF. + */ + +int showbinary = 0; +int showdecode = 0; +int showcolumn; + +void showusage(void) +{ + fprintf(stderr, "usage: pdftool show [-bd] <file> [xref] [trailer] [object numbers]\n"); + fprintf(stderr, " -b \tprint streams as raw binary data\n"); + fprintf(stderr, " -d \tdecode streams\n"); + exit(1); +} + +void showtrailer(void) +{ + if (!src) + die(fz_throw("no file specified")); + printf("trailer\n"); + fz_debugobj(src->trailer); + printf("\n\n"); +} + +void showxref(void) +{ + if (!src) + die(fz_throw("no file specified")); + pdf_debugxref(src); + printf("\n"); +} + +void showsafe(unsigned char *buf, int n) +{ + int i; + for (i = 0; i < n; i++) { + if (buf[i] == '\r' || buf[i] == '\n') { + putchar('\n'); + showcolumn = 0; + } + else if (buf[i] < 32 || buf[i] > 126) { + putchar('.'); + showcolumn ++; + } + else { + putchar(buf[i]); + showcolumn ++; + } + if (showcolumn == 79) { + putchar('\n'); + showcolumn = 0; + } + } +} + +void showstream(int num, int gen) +{ + fz_error *error; + fz_stream *stm; + unsigned char buf[2048]; + int n; + + showcolumn = 0; + + if (showdecode) + error = pdf_openstream(&stm, src, num, gen); + else + error = pdf_openrawstream(&stm, src, num, gen); + if (error) + die(error); + + while (1) + { + n = fz_read(stm, buf, sizeof buf); + if (n == 0) + break; + if (n < 0) + die(fz_ioerror(stm)); + + if (showbinary) + fwrite(buf, 1, n, stdout); + else + showsafe(buf, n); + } + + fz_dropstream(stm); +} + +void showobject(int num, int gen) +{ + fz_error *error; + fz_obj *obj; + + if (!src) + die(fz_throw("no file specified")); + + error = pdf_loadobject(&obj, src, num, gen); + if (error) + die(error); + + printf("%d %d obj\n", num, gen); + fz_debugobj(obj); + printf("\n"); + + if (pdf_isstream(src, num, gen)) + { + printf("stream\n"); + showstream(num, gen); + printf("endstream\n"); + } + + printf("endobj\n\n"); + + fz_dropobj(obj); +} + +void +showmain(int argc, char **argv) +{ + int c; + + while ((c = getopt(argc, argv, "bd")) != -1) + { + switch (c) + { + case 'b': showbinary ++; break; + case 'd': showdecode ++; break; + default: + showusage(); + break; + } + } + + if (optind == argc) + showusage(); + + opensrc(argv[optind++], "", 0); + + if (optind == argc) + showtrailer(); + + while (optind < argc) + { + if (!strcmp(argv[optind], "trailer")) + showtrailer(); + else if (!strcmp(argv[optind], "xref")) + showxref(); + else + showobject(atoi(argv[optind]), 0); + optind++; + } +} + +/* --------------------------------------------------------------------- */ + +/* + * Clean tool. + * Rewrite PDF. + * Garbage collect. + * Decompress streams. + * Encrypt or decrypt. + */ + +void +cleanusage(void) +{ + fprintf(stderr, + "usage: pdftool clean [options] input.pdf [outfile.pdf]\n" + " -d -\tpassword for decryption\n" + " -g \tgarbage collect unused objects\n" + " -x \texpand compressed streams\n" + " -e \tencrypt output\n" + " -u -\tset user password for encryption\n" + " -o -\tset owner password\n" + " -p -\tset permissions (combine letters 'pmca')\n" + " -n -\tkey length in bits: 40 <= n <= 128\n"); + exit(1); +} + +void +cleanexpand(void) +{ + fz_error *error; + fz_obj *stmobj; + fz_buffer *buf; + fz_obj *stmlen; + int i, gen; + + for (i = 0; i < src->len; i++) + { + if (src->table[i].type == 'n') + { + gen = src->table[i].gen; + + if (pdf_isstream(src, i, gen)) + { + error = pdf_loadobject(&stmobj, src, i, gen); + if (error) die(error); + + error = pdf_loadstream(&buf, src, i, gen); + if (error) die(error); + + fz_dictdels(stmobj, "Filter"); + fz_dictdels(stmobj, "DecodeParms"); + + error = fz_newint(&stmlen, buf->wp - buf->rp); + if (error) die(error); + error = fz_dictputs(stmobj, "Length", stmlen); + if (error) die(error); + fz_dropobj(stmlen); + + pdf_updateobject(src, i, gen, stmobj); + pdf_updatestream(src, i, gen, buf); + + fz_dropobj(stmobj); + } + } + } +} + +void +cleanmain(int argc, char **argv) +{ + int doencrypt = 0; + int dogarbage = 0; + int doexpand = 0; + pdf_crypt *encrypt = nil; + char *infile; + char *outfile = "out.pdf"; + char *userpw = ""; + char *ownerpw = ""; + unsigned perms = 0xfffff0c0; /* nothing allowed */ + int keylen = 40; + char *password = ""; + fz_error *error; + int c; + + while ((c = getopt(argc, argv, "d:egn:o:p:u:x")) != -1) + { + switch (c) + { + case 'p': + /* see TABLE 3.15 User access permissions */ + perms = 0xfffff0c0; + if (strchr(optarg, 'p')) /* print */ + perms |= (1 << 2) | (1 << 11); + if (strchr(optarg, 'm')) /* modify */ + perms |= (1 << 3) | (1 << 10); + if (strchr(optarg, 'c')) /* copy */ + perms |= (1 << 4) | (1 << 9); + if (strchr(optarg, 'a')) /* annotate / forms */ + perms |= (1 << 5) | (1 << 8); + break; + case 'd': password = optarg; break; + case 'e': doencrypt ++; break; + case 'g': dogarbage ++; break; + case 'n': keylen = atoi(optarg); break; + case 'o': ownerpw = optarg; break; + case 'u': userpw = optarg; break; + case 'x': doexpand ++; break; + default: cleanusage(); break; + } + } + + if (argc - optind < 1) + cleanusage(); + + infile = argv[optind++]; + if (argc - optind > 0) + outfile = argv[optind++]; + + opensrc(infile, password, 0); + + if (doencrypt) + { + fz_obj *id = fz_dictgets(src->trailer, "ID"); + if (!id) + { + error = fz_packobj(&id, "[(ABCDEFGHIJKLMNOP)(ABCDEFGHIJKLMNOP)]"); + if (error) + die(error); + } + else + fz_keepobj(id); + + error = pdf_newencrypt(&encrypt, userpw, ownerpw, perms, keylen, id); + if (error) + die(error); + + fz_dropobj(id); + } + + if (doexpand) + cleanexpand(); + + if (dogarbage) + { + preloadobjstms(); + pdf_garbagecollect(src); + } + + error = pdf_savexref(src, outfile, encrypt); + if (error) + die(error); + + if (encrypt) + pdf_dropcrypt(encrypt); + + pdf_closexref(src); +} + + +/* --------------------------------------------------------------------- */ + +/* + * Draw pages to PPM bitmaps. + */ + +enum { DRAWPNM, DRAWTXT, DRAWXML }; + +fz_renderer *drawgc = nil; +int drawmode = DRAWPNM; +char *drawpattern = "out%0.3d.pnm"; +pdf_page *drawpage = nil; +float drawzoom = 1.0; +int drawrotate = 0; +int drawbands = 1; +int drawcount = 0; + +void +drawusage(void) +{ + fprintf(stderr, + "usage: pdftool draw [options] [file.pdf pages ... ]\n" + " -b -\tdraw page in N bands\n" + " -d -\tpassword for decryption\n" + " -o -\tpattern (%%d for page number) for output file\n" + " -r -\tresolution in dpi\n" + " -t \tutf-8 text output instead of graphics\n" + " -x \txml dump of display tree\n" + " example:\n" + " pdftool draw -o out%%0.3d.pnm a.pdf 1-3,5,9-\n"); + exit(1); +} + +void +drawloadpage(int pagenum) +{ + fz_error *error; + fz_obj *pageobj; + + pageobj = pdf_getpageobject(srcpages, pagenum - 1); + error = pdf_loadpage(&drawpage, src, pageobj); + if (error) + die(error); + + fprintf(stderr, "page %d mediabox [ %g %g %g %g ] rotate %d\n", + pagenum, + drawpage->mediabox.x0, drawpage->mediabox.y0, + drawpage->mediabox.x1, drawpage->mediabox.y1, + drawpage->rotate); +} + +void +drawfreepage(void) +{ + pdf_droppage(drawpage); + drawpage = nil; +} + +void +drawpnm(int pagenum) +{ + fz_error *error; + fz_matrix ctm; + fz_irect bbox; + fz_pixmap *pix; + char namebuf[256]; + char buf[256]; + int x, y, w, h, b, bh; + int fd; + + drawloadpage(pagenum); + + ctm = fz_identity(); + ctm = fz_concat(ctm, fz_translate(0, -drawpage->mediabox.y1)); + ctm = fz_concat(ctm, fz_scale(drawzoom, -drawzoom)); + ctm = fz_concat(ctm, fz_rotate(drawrotate + drawpage->rotate)); + + bbox = fz_roundrect(fz_transformaabb(ctm, drawpage->mediabox)); + w = bbox.x1 - bbox.x0; + h = bbox.y1 - bbox.y0; + bh = h / drawbands; + + if (drawpattern) + { + sprintf(namebuf, drawpattern, drawcount++); + fd = open(namebuf, O_BINARY|O_WRONLY|O_CREAT|O_TRUNC, 0666); + if (fd < 0) + die(fz_throw("ioerror: could not open file '%s'", namebuf)); + } + else + fd = 1; + + sprintf(buf, "P6\n%d %d\n255\n", w, h); + write(fd, buf, strlen(buf)); + + error = fz_newpixmap(&pix, bbox.x0, bbox.y0, w, bh, 4); + if (error) + die(error); + + memset(pix->samples, 0xff, pix->h * pix->w * pix->n); + + for (b = 0; b < drawbands; b++) + { + if (drawbands > 1) + fprintf(stderr, "drawing band %d / %d\n", b + 1, drawbands); + + error = fz_rendertreeover(drawgc, pix, drawpage->tree, ctm); + if (error) + die(error); + + for (y = 0; y < pix->h; y++) + { + unsigned char *src = pix->samples + y * pix->w * 4; + unsigned char *dst = src; + + for (x = 0; x < pix->w; x++) + { + dst[x * 3 + 0] = src[x * 4 + 1]; + dst[x * 3 + 1] = src[x * 4 + 2]; + dst[x * 3 + 2] = src[x * 4 + 3]; + } + + write(fd, dst, pix->w * 3); + + memset(src, 0xff, pix->w * 4); + } + + pix->y += bh; + if (pix->y + pix->h > bbox.y1) + pix->h = bbox.y1 - pix->y; + } + + fz_droppixmap(pix); + + if (drawpattern) + close(fd); + + drawfreepage(); +} + +void +drawtxt(int pagenum) +{ + fz_error *error; + pdf_textline *line; + fz_matrix ctm; + + drawloadpage(pagenum); + + ctm = fz_concat( + fz_translate(0, -drawpage->mediabox.y1), + fz_scale(drawzoom, -drawzoom)); + + error = pdf_loadtextfromtree(&line, drawpage->tree, ctm); + if (error) + die(error); + + pdf_debugtextline(line); + pdf_droptextline(line); + + drawfreepage(); +} + +void +drawxml(int pagenum) +{ + drawloadpage(pagenum); + fz_debugtree(drawpage->tree); + drawfreepage(); +} + +void +drawpages(char *pagelist) +{ + int page, spage, epage; + char *spec, *dash; + + if (!src) + drawusage(); + + spec = strsep(&pagelist, ","); + while (spec) + { + dash = strchr(spec, '-'); + + if (dash == spec) + spage = epage = 1; + else + spage = epage = atoi(spec); + + if (dash) + { + if (strlen(dash) > 1) + epage = atoi(dash + 1); + else + epage = pdf_getpagecount(srcpages); + } + + if (spage > epage) + page = spage, spage = epage, epage = page; + + for (page = spage; page <= epage; page++) + { + if (page < 1 || page > pdf_getpagecount(srcpages)) + continue; + switch (drawmode) + { + case DRAWPNM: drawpnm(page); break; + case DRAWTXT: drawtxt(page); break; + case DRAWXML: drawxml(page); break; + } + } + + spec = strsep(&pagelist, ","); + } +} + +void +drawmain(int argc, char **argv) +{ + fz_error *error; + char *password = ""; + int c; + + while ((c = getopt(argc, argv, "b:d:o:r:tx")) != -1) + { + switch (c) + { + case 'b': drawbands = atoi(optarg); break; + case 'd': password = optarg; break; + case 'o': drawpattern = optarg; break; + case 'r': drawzoom = atof(optarg) / 72.0; break; + case 't': drawmode = DRAWTXT; break; + case 'x': drawmode = DRAWXML; break; + default: + drawusage(); + break; + } + } + + if (optind == argc) + drawusage(); + + error = fz_newrenderer(&drawgc, pdf_devicergb, 0, 1024 * 512); + if (error) + die(error); + + while (optind < argc) + { + if (strstr(argv[optind], ".pdf")) + opensrc(argv[optind], password, 1); + else + drawpages(argv[optind]); + optind++; + } + + closesrc(); + + fz_droprenderer(drawgc); +} + +/* --------------------------------------------------------------------- */ + +/* + * Edit tool. + * Copy or impose pages from other pdf files into output pdf. + */ + +/* for each source pdf, build a list of objects to transplant. + * for each source pdf, do the transplants at the end of object collecting. + * build a new page tree structure for output. + * change page nodes into xobjects for over and n-up modes. + * create new page nodes. + * create new page tree. + */ + +enum { COPY, OVER, NUP2, NUP4, NUP8 }; + +pdf_xref *editxref = nil; +fz_obj *editkids = nil; +int editmode = COPY; + +void +editusage(void) +{ + fprintf(stderr, "usage: pdftool edit [-o file.pdf] [mode file.pdf pages ... ]\n"); + fprintf(stderr, " mode is one of: copy over 2up 4up 8up\n"); + fprintf(stderr, " pages is a comma separated list of ranges\n"); + fprintf(stderr, " example:\n"); + fprintf(stderr, " pdftool edit -o output.pdf copy one.pdf 1-3,5,9 two.pdf 1-\n"); + exit(1); +} + +void +editcopy(int pagenum) +{ + printf("copy page %d\n", pagenum); +} + +void editover(int pagenum) { } +void edit2up(int pagenum) { } +void edit4up(int pagenum) { } +void edit8up(int pagenum) { } + +void +editpages(char *pagelist) +{ + int page, spage, epage; + char *spec, *dash; + + if (!src) + editusage(); + + spec = strsep(&pagelist, ","); + while (spec) + { + dash = strchr(spec, '-'); + + if (dash == spec) + spage = epage = 1; + else + spage = epage = atoi(spec); + + if (dash) + { + if (strlen(dash) > 1) + epage = atoi(dash + 1); + else + epage = pdf_getpagecount(srcpages); + } + + if (spage > epage) + page = spage, spage = epage, epage = page; + + for (page = spage; page <= epage; page++) + { + if (page < 1 || page > pdf_getpagecount(srcpages)) + continue; + switch (editmode) + { + case COPY: editcopy(page); break; + case OVER: editover(page); break; + case NUP2: edit2up(page); break; + case NUP4: edit4up(page); break; + case NUP8: edit8up(page); break; + } + } + + spec = strsep(&pagelist, ","); + } +} + +void +editmain(int argc, char **argv) +{ + char *outfile = "out.pdf"; + fz_error *error; + int c; + + while ((c = getopt(argc, argv, "o:")) != -1) + { + switch (c) + { + case 'o': + outfile = optarg; + break; + default: + editusage(); + break; + } + } + + if (optind == argc) + editusage(); + + fprintf(stderr, "edit tool is not implemented yet\n"); + exit(1); + + error = pdf_newxref(&editxref); + if (error) + die(error); + + error = pdf_initxref(editxref); + if (error) + die(error); + + while (optind < argc) + { + if (strstr(argv[optind], ".pdf")) + { + opensrc(argv[optind], "", 1); + } + else if (!strcmp(argv[optind], "copy")) + editmode = COPY; + else if (!strcmp(argv[optind], "over")) + editmode = OVER; + else if (!strcmp(argv[optind], "2up")) + editmode = NUP2; + else if (!strcmp(argv[optind], "4up")) + editmode = NUP4; + else if (!strcmp(argv[optind], "8up")) + editmode = NUP8; + else + editpages(argv[optind]); + optind++; + } + + closesrc(); + + error = pdf_savexref(editxref, outfile, nil); + if (error) + die(error); + + pdf_closexref(editxref); +} + +/* --------------------------------------------------------------------- */ + +/* + * Main! + */ + +void +mainusage(void) +{ + fprintf(stderr, "usage: pdftool <command> [options...]\n"); + fprintf(stderr, " command is one of: show, draw, clean, edit\n"); + exit(1); +} + +int +main(int argc, char **argv) +{ + if (argc >= 2) + { + optind = 2; + if (!strcmp(argv[1], "show")) + showmain(argc, argv); + else if (!strcmp(argv[1], "draw")) + drawmain(argc, argv); + else if (!strcmp(argv[1], "clean")) + cleanmain(argc, argv); + else if (!strcmp(argv[1], "edit")) + editmain(argc, argv); + else + mainusage(); + } + else + mainusage(); + return 0; +} + |