diff options
Diffstat (limited to 'test')
-rw-r--r-- | test/mupdf.c | 337 | ||||
-rw-r--r-- | test/pdfclean.c | 501 | ||||
-rw-r--r-- | test/showcmap.c | 22 |
3 files changed, 860 insertions, 0 deletions
diff --git a/test/mupdf.c b/test/mupdf.c new file mode 100644 index 00000000..b51eb43d --- /dev/null +++ b/test/mupdf.c @@ -0,0 +1,337 @@ +#include <fitz.h> +#include <mupdf.h> + +static char *password = ""; +static int dodecode = 0; +static int dorepair = 0; +static int doprintxref = 0; +static int doprintpages = 0; + +void usage() +{ + fprintf(stderr, "usage: mupdf [-drxp] [-u password] file.pdf\n"); + exit(1); +} + +/* + * Debug-print stream contents + */ + +static int safecol = 0; + +void printsafe(unsigned char *buf, int n) +{ + int i; + for (i = 0; i < n; i++) { + if (buf[i] == '\r' || buf[i] == '\n') { + printf("\n"); + safecol = 0; + } + else if (buf[i] < 32 || buf[i] > 126) { + printf("."); + safecol ++; + } + else { + printf("%c", buf[i]); + safecol ++; + } + if (safecol == 79) { + printf("\n"); + safecol = 0; + } + } +} + +void decodestream(pdf_xref *xref, fz_obj *stream, int oid, int gid, int ofs) +{ + fz_error *error; + unsigned char buf[512]; + + safecol = 0; + + error = pdf_openstream0(xref, stream, oid, gid, ofs); + if (error) fz_abort(error); + + while (1) + { + int n = fz_read(xref->file, buf, sizeof buf); + if (n == 0) + break; + if (n < 0) + fz_abort(fz_ferror(xref->file)); + printsafe(buf, n); + } + + pdf_closestream(xref); +} + +void copystream(pdf_xref *xref, fz_obj *stream, int ofs) +{ + fz_error *error; + unsigned char buf[512]; + fz_filter *filter; + fz_obj *obj; + int len; + + safecol = 0; + + obj = fz_dictgets(stream, "Length"); + error = pdf_resolve(&obj, xref); + if (error) fz_abort(error); + len = fz_toint(obj); + fz_dropobj(obj); + + error = fz_newnullfilter(&filter, len); + if (error) fz_abort(error); + + fz_seek(xref->file, ofs); + + error = fz_pushfilter(xref->file, filter); + if (error) fz_abort(error); + + while (1) + { + int n = fz_read(xref->file, buf, sizeof buf); + if (n == 0) + break; + if (n < 0) + fz_abort(fz_ferror(xref->file)); + printsafe(buf, n); + } + + fz_popfilter(xref->file); +} + +void printobject(pdf_xref *xref, int oid, int gid) +{ + fz_error *error; + int stmofs; + fz_obj *obj; + + error = pdf_loadobject0(&obj, xref, oid, gid, &stmofs); + if (error) fz_abort(error); + + printf("%d %d obj\n", oid, gid); + fz_fprintobj(stdout, obj); + printf("\n"); + if (stmofs != -1) { + printf("stream\n"); + if (dodecode) + decodestream(xref, obj, oid, gid, stmofs); + else + copystream(xref, obj, stmofs); + printf("endstream\n"); + } + printf("endobj\n"); + + fz_dropobj(obj); +} + +/* + * Draw page + */ + +void runcsi(pdf_xref *xref, pdf_csi *csi, pdf_resources *rdb, fz_obj *stmref) +{ + fz_error *error; + + error = pdf_openstream(xref, stmref); + if (error) fz_abort(error); + + error = pdf_runcsi(csi, rdb, xref->file); + if (error) fz_abort(error); + + pdf_closestream(xref); +} + +void showpage(pdf_xref *xref, fz_obj *page) +{ + fz_error *error; + pdf_csi *csi; + pdf_resources *rdb = nil; + fz_obj *resources; + fz_obj *contents; + int i; + + fz_fprintobj(stdout, page); + printf("\n"); + + resources = fz_dictgets(page, "Resources"); + if (resources) + { + error = pdf_resolve(&resources, xref); + if (error) fz_abort(error); + + error = pdf_loadresources(&rdb, xref, resources); + if (error) fz_abort(error); + + // parse resources into native res dict + fz_dropobj(resources); + } + else + fz_abort(fz_throw("syntaxerror: missing resource dictionary")); + +printf("resources:\n"); +printf(" font:\n"); +fz_fprintobj(stdout, rdb->font); +printf("\n extgstate:\n"); +fz_fprintobj(stdout, rdb->extgstate); +printf("\nfitz tree:\n"); + + error = pdf_newcsi(&csi); + if (error) fz_abort(error); + + contents = fz_dictgets(page, "Contents"); + if (contents) + { + if (fz_isarray(contents)) + { + for (i = 0; i < fz_arraylen(contents); i++) + { + runcsi(xref, csi, rdb, fz_arrayget(contents, i)); + } + } + else + { + // XXX resolve and check if it is an array + runcsi(xref, csi, rdb, contents); + } + } + + fz_debugtree(csi->tree); + + { + fz_pixmap *pix; + fz_renderer *gc; + fz_matrix ctm; + +#define W 612 +#define H 792 + +#define xW 1106 +#define xH 1548 + + fz_newrenderer(&gc); + fz_newpixmap(&pix, 0, 0, W, H, 1, 0); + ctm = fz_concat(fz_translate(0, -H), fz_scale(1,-1)); + + memset(pix->samples, 0x00, pix->stride * pix->h * 2); + +printf("rendering!\n"); + fz_rendernode(gc, csi->tree->root, ctm, pix); +printf("done!\n"); + fz_debugpixmap(pix); + + fz_freepixmap(pix); + fz_freerenderer(gc); + } + + pdf_freecsi(csi); +} + +int main(int argc, char **argv) +{ + fz_error *error; + char *filename; + pdf_xref *xref; + pdf_pagetree *pages; + int c; + + while ((c = getopt(argc, argv, "drxopu:")) != -1) + { + switch (c) + { + case 'd': + dodecode ++; + break; + case 'r': + dorepair ++; + break; + case 'x': + doprintxref ++; + break; + case 'p': + doprintpages ++; + break; + case 'u': + password = optarg; + break; + default: + usage(); + } + } + + if (argc - optind == 0) + usage(); + + filename = argv[optind++]; + + error = pdf_newxref(&xref); + if (error) + fz_abort(error); + + if (dorepair) + error = pdf_repairxref(xref, filename); + else + error = pdf_openxref(xref, filename); + if (error) + fz_abort(error); + + error = pdf_decryptxref(xref); + if (error) + fz_abort(error); + + if (xref->crypt) + { + error = pdf_setpassword(xref->crypt, password); + if (error) fz_abort(error); + } + + if (doprintxref) + pdf_debugxref(xref); + + if (doprintpages) + { + error = pdf_loadpagetree(&pages, xref); + if (error) fz_abort(error); + + if (optind == argc) + { + printf("pagetree\n"); + pdf_debugpagetree(pages); + printf("\n"); + } + else + { + for ( ; optind < argc; optind++) + { + int page = atoi(argv[optind]); + if (page < 1 || page > pages->count) + fprintf(stderr, "page out of bounds: %d\n", page); + printf("page %d\n", page); + showpage(xref, pages->pobj[page - 1]); + } + } + } + + else + { + if (optind == argc) + { + printf("trailer\n"); + fz_fprintobj(stdout, xref->trailer); + printf("\n"); + } + + for ( ; optind < argc; optind++) + { + printobject(xref, atoi(argv[optind]), 0); + printf("\n"); + } + } + + pdf_closexref(xref); + + return 0; +} + diff --git a/test/pdfclean.c b/test/pdfclean.c new file mode 100644 index 00000000..00721842 --- /dev/null +++ b/test/pdfclean.c @@ -0,0 +1,501 @@ +#include <fitz.h> +#include <mupdf.h> + +#define encrypt encrypt3am + +int (*printobj)(FILE*,fz_obj*) = fz_fprintobj; + +/* + * Rewrite PDF with cleaned up syntax, and consolidate the xref table. + * Remove encryption while we're at it :) + */ + +static FILE *out; + +static pdf_xref *xt; + +static int *ofslist; +static int *genlist; +static int *uselist; + +static int dorebuild = 0; +static int doexpand = 0; +static pdf_crypt *encrypt = nil; +static fz_obj *encryptdict = nil; +static fz_obj *id = nil; +static int dogc = 0; + +void usage() +{ + fprintf(stderr, + "usage: pdfclean [options] infile.pdf outfile.pdf\n" + " -r\treconstruct broken xref table\n" + " -g\tgarbage collect unused objects\n" + " -x\texpand compressed streams\n" + " -c\twrite compact objects\n" + " -d -\tset user password for decryption\n" + " -e\tencrypt outfile\n" + " -u -\tset user password for encryption\n" + " -o -\tset owner password\n" + " -p -\tset permissions\n" + " -n -\tkey length in bits: 40 <= n <= 128\n" + ); + exit(1); +} + +void garbagecollect(fz_obj *ref); + +void gc0(fz_obj *obj) +{ + int i; + + if (fz_isdict(obj)) + for (i = 0; i < fz_dictlen(obj); i++) + gc0(fz_dictgetval(obj, i)); + + if (fz_isarray(obj)) + for (i = 0; i < fz_arraylen(obj); i++) + gc0(fz_arrayget(obj, i)); + + if (fz_isindirect(obj)) + garbagecollect(obj); +} + +void garbagecollect(fz_obj *ref) +{ + fz_obj *obj; + int stmofs; + + if (uselist[fz_toobjid(ref)]) + return; + + uselist[fz_toobjid(ref)] = 1; + + pdf_loadindirect(&obj, xt, ref, &stmofs); + + if (stmofs != -1) { + fz_obj *len = fz_dictgets(obj, "Length"); + if (fz_isindirect(len)) { + pdf_loadindirect(&len, xt, len, nil); + fz_dictputs(obj, "Length", len); + } + } + + gc0(obj); +} + +void decodestream(fz_obj *obj, int ofs, int oid, int gid) +{ + fz_error *error; + unsigned char buf[512]; + fz_filter *filter; + fz_file *sf; + int n; + int len; + fz_obj *lenobj; + fz_obj *newdict; + + /* count length of decoded data */ + len = 0; + + error = pdf_buildstream(&filter, xt, obj, oid, gid); + if (error) fz_abort(error); + + n = fz_seek(xt->file, ofs); + + fz_chainfile(&sf, xt->file, filter); + + while (1) { + n = fz_read(sf, buf, sizeof buf); + if (n < 0) + fz_abort(fz_ferror(sf)); + if (n == 0) + break; + len += n; + } + + fz_unchainfile(sf); + + /* change dictionary of object... */ + fz_copydict(&newdict, obj); + + fz_newint(&lenobj, len); + fz_dictputs(newdict, "Length", lenobj); + fz_dropobj(lenobj); + + fz_dictdels(newdict, "Filter"); + fz_dictdels(newdict, "DecodeParms"); + + /* save object */ + fprintf(out, "%d %d obj\n", oid, gid); + printobj(out, newdict); + fprintf(out, "\n"); + fprintf(out, "stream\n"); + + fz_dropobj(newdict); + + /* now decode stream for real */ + error = pdf_buildstream(&filter, xt, obj, oid, gid); + if (error) fz_abort(error); + + fz_seek(xt->file, ofs); + + if (encrypt) { + fz_filter *cf; + pdf_cryptstm(&cf, encrypt, oid, gid); + fz_newpipeline(&filter, filter, cf); + } + + fz_chainfile(&sf, xt->file, filter); + while (1) { + n = fz_read(sf, buf, sizeof buf); + if (n < 0) + fz_abort(fz_ferror(sf)); + if (n == 0) + break; + fwrite(buf, 1, n, out); + } + fz_unchainfile(sf); + + /* the end */ + fprintf(out, "endstream\nendobj\n\n"); + + return; +} + +void savestream(fz_obj *obj, int ofs, int oid, int gid) +{ + unsigned char buf[512]; + fz_filter *filter; + fz_file *sf; + int len; + int n; + + /* save object */ + fprintf(out, "%d %d obj\n", oid, gid); + printobj(out, obj); + fprintf(out, "\n"); + fprintf(out, "stream\n"); + + /* copy stream */ + obj = fz_dictgets(obj, "Length"); + if (fz_isindirect(obj)) { + pdf_loadindirect(&obj, xt, obj, nil); + len = fz_toint(obj); + fz_dropobj(obj); + } + else { + len = fz_toint(obj); + } + + fz_newnullfilter(&filter, len); + + if (xt->crypt) { + fz_filter *cf; + pdf_cryptstm(&cf, xt->crypt, oid, gid); + fz_newpipeline(&filter, cf, filter); + } + + if (encrypt) { + fz_filter *cf; + pdf_cryptstm(&cf, encrypt, oid, gid); + fz_newpipeline(&filter, filter, cf); + } + + fz_seek(xt->file, ofs); + fz_chainfile(&sf, xt->file, filter); + while (1) + { + n = fz_read(sf, buf, sizeof buf); + if (n == 0) + break; + if (n < 0) + fz_abort(fz_ferror(sf)); + fwrite(buf, 1, n, out); + } + fz_unchainfile(sf); + + /* the end */ + fprintf(out, "endstream\nendobj\n\n"); +} + +void deleteobject(int oid, int gid) +{ + uselist[oid] = 0; +} + +void saveobject(int oid, int gid) +{ + fz_error *error; + fz_obj *obj; + fz_obj *t; + int stmofs; + + error = pdf_loadobj(&obj, xt, oid, gid, &stmofs); + if (error) fz_abort(error); + + /* trash ObjStm and XrefStm objects */ + if (fz_isdict(obj)) { + t = fz_dictgets(obj, "Type"); + if (fz_isname(t) && strcmp(fz_toname(t), "ObjStm") == 0) { + deleteobject(oid, gid); + fz_dropobj(obj); + return; + } + if (fz_isname(t) && strcmp(fz_toname(t), "XRef") == 0) { + deleteobject(oid, gid); + fz_dropobj(obj); + return; + } + } + + if (encrypt) + pdf_cryptobj(encrypt, obj, oid, gid); + + if (stmofs == -1) { + fprintf(out, "%d %d obj\n", oid, gid); + printobj(out, obj); + fprintf(out, "\nendobj\n\n"); + } + else if (doexpand) { + decodestream(obj, stmofs, oid, gid); + } + else { + savestream(obj, stmofs, oid, gid); + } + + fz_dropobj(obj); +} + +void savexref(void) +{ + fz_obj *newtrailer; + fz_obj *obj; + int startxref; + int i; + + startxref = ftell(out); + + fprintf(out, "xref\n0 %d\n", xt->size); + for (i = 0; i < xt->size; i++) { + if (uselist[i]) + fprintf(out, "%010d %05d n \n", ofslist[i], genlist[i]); + else + fprintf(out, "%010d %05d f \n", ofslist[i], genlist[i]); + } + fprintf(out, "\n"); + + fz_newdict(&newtrailer, 5); + + fz_newint(&obj, xt->size); + fz_dictputs(newtrailer, "Size", obj); + fz_dropobj(obj); + + obj = fz_dictgets(xt->trailer, "Info"); + if (obj) fz_dictputs(newtrailer, "Info", obj); + + obj = fz_dictgets(xt->trailer, "Root"); + if (obj) fz_dictputs(newtrailer, "Root", obj); + + fz_dictputs(newtrailer, "ID", id); + + if (encryptdict) { + fz_newindirect(&obj, xt->size - 1, 0); + fz_dictputs(newtrailer, "Encrypt", obj); + fz_dropobj(obj); + } + + fprintf(out, "trailer\n"); + printobj(out, newtrailer); + fprintf(out, "\n\n"); + + fprintf(out, "startxref\n%d\n%%%%EOF\n", startxref); +} + +int main(int argc, char **argv) +{ + fz_error *error; + fz_obj *obj; + int lastfree; + char *filename; + int i; + int c; + + int doencrypt = 0; + char *password = ""; + char *userpw = ""; + char *ownerpw = ""; + int perms = -4; /* 0xfffffffc */ + int keylen = 40; + + while (1) + { + c = getopt(argc, argv, "rcxgeu:o:p:n:d:"); + + if (c == -1) + break; + + switch (c) + { + case 'r': + dorebuild ++; + break; + case 'x': + doexpand ++; + break; + case 'g': + dogc ++; + break; + case 'c': + printobj = fz_fprintcobj; + break; + case 'd': + password = optarg; + break; + case 'e': + doencrypt ++; + break; + case 'u': + userpw = optarg; + break; + case 'o': + ownerpw = optarg; + break; + case 'p': + perms = atoi(optarg); + break; + case 'n': + keylen = atoi(optarg); + break; + default: + usage(); + } + } + + if (argc - optind != 2) + usage(); + + filename = argv[optind]; + + if (dorebuild) + error = pdf_rebuildxref(&xt, filename); + else + error = pdf_openxref(&xt, filename); + if (error) fz_abort(error); + + if (doencrypt && keylen > 40 && xt->version < 1.4) + xt->version = 1.4; + + id = fz_dictgets(xt->trailer, "ID"); + obj = fz_dictgets(xt->trailer, "Encrypt"); + if (fz_isindirect(obj)) { + pdf_loadindirect(&obj, xt, obj, nil); + } + if (obj && id) { + pdf_newdecrypt(&xt->crypt, obj, id); + error = pdf_setpassword(xt->crypt, password); + if (error) fz_abort(error); + } + + id = fz_dictgets(xt->trailer, "ID"); + if (!id) + fz_parseobj(&id, "[ (foobar) (foobar) ]"); + + if (doencrypt) + pdf_newencrypt(&encrypt, &encryptdict, userpw, ownerpw, perms, keylen, id); + + out = fopen(argv[optind + 1], "w"); + if (!out) { + fz_abort(fz_throw("open(%s): %s", argv[optind + 1], strerror(errno))); + } + + fprintf(out, "%%PDF-%.1f\n\n", xt->version); + + ofslist = fz_malloc(sizeof(int) * (xt->size + 1)); + genlist = fz_malloc(sizeof(int) * (xt->size + 1)); + uselist = fz_malloc(sizeof(int) * (xt->size + 1)); + + lastfree = 0; + + ofslist[0] = 0; + genlist[0] = 65535; + uselist[0] = 0; + + for (i = 1; i < xt->size; i++) { + ofslist[i] = 0; + genlist[i] = 0; + uselist[i] = 1; + } + + /* garbage collect from roots in trailer */ + if (dogc) + { + for (i = 1; i < xt->size; i++) + uselist[i] = 0; + + obj = fz_dictgets(xt->trailer, "Info"); + if (fz_isindirect(obj)) + garbagecollect(obj); + + obj = fz_dictgets(xt->trailer, "Root"); + if (fz_isindirect(obj)) + garbagecollect(obj); + + obj = fz_dictgets(xt->trailer, "ID"); + if (fz_isindirect(obj)) + garbagecollect(obj); + } + + /* pretty print objects */ + for (i = 0; i < xt->size; i++) + { + if (xt->table[i].type == 0) + uselist[i] = 0; + + if (xt->table[i].type == 0) + genlist[i] = xt->table[i].gen; + if (xt->table[i].type == 1) + genlist[i] = xt->table[i].gen; + if (xt->table[i].type == 2) + genlist[i] = 0; + + if (dogc && !uselist[i]) + continue; + + if (xt->table[i].type == 1 || xt->table[i].type == 2) + { + ofslist[i] = ftell(out); + saveobject(i, genlist[i]); + } + } + + /* add encryption dictionary if we crypted */ + if (encryptdict) { + xt->size ++; + ofslist[xt->size - 1] = ftell(out); + genlist[xt->size - 1] = 0; + uselist[xt->size - 1] = 1; + fprintf(out, "%d %d obj\n", xt->size - 1, 0); + printobj(out, encryptdict); + fprintf(out, "\nendobj\n\n"); + } + + /* construct linked list of free object slots */ + lastfree = 0; + for (i = 1; i < xt->size; i++) { + if (!uselist[i]) { + genlist[i] ++; + ofslist[lastfree] = i; + lastfree = i; + } + } + + savexref(); + + pdf_closexref(xt); + + fclose(out); + + return 0; +} + diff --git a/test/showcmap.c b/test/showcmap.c new file mode 100644 index 00000000..80575a4c --- /dev/null +++ b/test/showcmap.c @@ -0,0 +1,22 @@ +#include <fitz.h> +#include <mupdf.h> + +int main(int argc, char **argv) +{ + fz_error *err; + fz_cmap *cmap; + fz_file *file; + + err = fz_openfile(&file, argv[1], O_RDONLY); + if (err) + fz_abort(err); + + err = pdf_parsecmap(&cmap, file); + if (err) + fz_abort(err); + + fz_debugcmap(cmap); + + return 0; +} + |