summaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
Diffstat (limited to 'test')
-rw-r--r--test/mupdf.c337
-rw-r--r--test/pdfclean.c501
-rw-r--r--test/showcmap.c22
3 files changed, 860 insertions, 0 deletions
diff --git a/test/mupdf.c b/test/mupdf.c
new file mode 100644
index 00000000..b51eb43d
--- /dev/null
+++ b/test/mupdf.c
@@ -0,0 +1,337 @@
+#include <fitz.h>
+#include <mupdf.h>
+
+static char *password = "";
+static int dodecode = 0;
+static int dorepair = 0;
+static int doprintxref = 0;
+static int doprintpages = 0;
+
+void usage()
+{
+ fprintf(stderr, "usage: mupdf [-drxp] [-u password] file.pdf\n");
+ exit(1);
+}
+
+/*
+ * Debug-print stream contents
+ */
+
+static int safecol = 0;
+
+void printsafe(unsigned char *buf, int n)
+{
+ int i;
+ for (i = 0; i < n; i++) {
+ if (buf[i] == '\r' || buf[i] == '\n') {
+ printf("\n");
+ safecol = 0;
+ }
+ else if (buf[i] < 32 || buf[i] > 126) {
+ printf(".");
+ safecol ++;
+ }
+ else {
+ printf("%c", buf[i]);
+ safecol ++;
+ }
+ if (safecol == 79) {
+ printf("\n");
+ safecol = 0;
+ }
+ }
+}
+
+void decodestream(pdf_xref *xref, fz_obj *stream, int oid, int gid, int ofs)
+{
+ fz_error *error;
+ unsigned char buf[512];
+
+ safecol = 0;
+
+ error = pdf_openstream0(xref, stream, oid, gid, ofs);
+ if (error) fz_abort(error);
+
+ while (1)
+ {
+ int n = fz_read(xref->file, buf, sizeof buf);
+ if (n == 0)
+ break;
+ if (n < 0)
+ fz_abort(fz_ferror(xref->file));
+ printsafe(buf, n);
+ }
+
+ pdf_closestream(xref);
+}
+
+void copystream(pdf_xref *xref, fz_obj *stream, int ofs)
+{
+ fz_error *error;
+ unsigned char buf[512];
+ fz_filter *filter;
+ fz_obj *obj;
+ int len;
+
+ safecol = 0;
+
+ obj = fz_dictgets(stream, "Length");
+ error = pdf_resolve(&obj, xref);
+ if (error) fz_abort(error);
+ len = fz_toint(obj);
+ fz_dropobj(obj);
+
+ error = fz_newnullfilter(&filter, len);
+ if (error) fz_abort(error);
+
+ fz_seek(xref->file, ofs);
+
+ error = fz_pushfilter(xref->file, filter);
+ if (error) fz_abort(error);
+
+ while (1)
+ {
+ int n = fz_read(xref->file, buf, sizeof buf);
+ if (n == 0)
+ break;
+ if (n < 0)
+ fz_abort(fz_ferror(xref->file));
+ printsafe(buf, n);
+ }
+
+ fz_popfilter(xref->file);
+}
+
+void printobject(pdf_xref *xref, int oid, int gid)
+{
+ fz_error *error;
+ int stmofs;
+ fz_obj *obj;
+
+ error = pdf_loadobject0(&obj, xref, oid, gid, &stmofs);
+ if (error) fz_abort(error);
+
+ printf("%d %d obj\n", oid, gid);
+ fz_fprintobj(stdout, obj);
+ printf("\n");
+ if (stmofs != -1) {
+ printf("stream\n");
+ if (dodecode)
+ decodestream(xref, obj, oid, gid, stmofs);
+ else
+ copystream(xref, obj, stmofs);
+ printf("endstream\n");
+ }
+ printf("endobj\n");
+
+ fz_dropobj(obj);
+}
+
+/*
+ * Draw page
+ */
+
+void runcsi(pdf_xref *xref, pdf_csi *csi, pdf_resources *rdb, fz_obj *stmref)
+{
+ fz_error *error;
+
+ error = pdf_openstream(xref, stmref);
+ if (error) fz_abort(error);
+
+ error = pdf_runcsi(csi, rdb, xref->file);
+ if (error) fz_abort(error);
+
+ pdf_closestream(xref);
+}
+
+void showpage(pdf_xref *xref, fz_obj *page)
+{
+ fz_error *error;
+ pdf_csi *csi;
+ pdf_resources *rdb = nil;
+ fz_obj *resources;
+ fz_obj *contents;
+ int i;
+
+ fz_fprintobj(stdout, page);
+ printf("\n");
+
+ resources = fz_dictgets(page, "Resources");
+ if (resources)
+ {
+ error = pdf_resolve(&resources, xref);
+ if (error) fz_abort(error);
+
+ error = pdf_loadresources(&rdb, xref, resources);
+ if (error) fz_abort(error);
+
+ // parse resources into native res dict
+ fz_dropobj(resources);
+ }
+ else
+ fz_abort(fz_throw("syntaxerror: missing resource dictionary"));
+
+printf("resources:\n");
+printf(" font:\n");
+fz_fprintobj(stdout, rdb->font);
+printf("\n extgstate:\n");
+fz_fprintobj(stdout, rdb->extgstate);
+printf("\nfitz tree:\n");
+
+ error = pdf_newcsi(&csi);
+ if (error) fz_abort(error);
+
+ contents = fz_dictgets(page, "Contents");
+ if (contents)
+ {
+ if (fz_isarray(contents))
+ {
+ for (i = 0; i < fz_arraylen(contents); i++)
+ {
+ runcsi(xref, csi, rdb, fz_arrayget(contents, i));
+ }
+ }
+ else
+ {
+ // XXX resolve and check if it is an array
+ runcsi(xref, csi, rdb, contents);
+ }
+ }
+
+ fz_debugtree(csi->tree);
+
+ {
+ fz_pixmap *pix;
+ fz_renderer *gc;
+ fz_matrix ctm;
+
+#define W 612
+#define H 792
+
+#define xW 1106
+#define xH 1548
+
+ fz_newrenderer(&gc);
+ fz_newpixmap(&pix, 0, 0, W, H, 1, 0);
+ ctm = fz_concat(fz_translate(0, -H), fz_scale(1,-1));
+
+ memset(pix->samples, 0x00, pix->stride * pix->h * 2);
+
+printf("rendering!\n");
+ fz_rendernode(gc, csi->tree->root, ctm, pix);
+printf("done!\n");
+ fz_debugpixmap(pix);
+
+ fz_freepixmap(pix);
+ fz_freerenderer(gc);
+ }
+
+ pdf_freecsi(csi);
+}
+
+int main(int argc, char **argv)
+{
+ fz_error *error;
+ char *filename;
+ pdf_xref *xref;
+ pdf_pagetree *pages;
+ int c;
+
+ while ((c = getopt(argc, argv, "drxopu:")) != -1)
+ {
+ switch (c)
+ {
+ case 'd':
+ dodecode ++;
+ break;
+ case 'r':
+ dorepair ++;
+ break;
+ case 'x':
+ doprintxref ++;
+ break;
+ case 'p':
+ doprintpages ++;
+ break;
+ case 'u':
+ password = optarg;
+ break;
+ default:
+ usage();
+ }
+ }
+
+ if (argc - optind == 0)
+ usage();
+
+ filename = argv[optind++];
+
+ error = pdf_newxref(&xref);
+ if (error)
+ fz_abort(error);
+
+ if (dorepair)
+ error = pdf_repairxref(xref, filename);
+ else
+ error = pdf_openxref(xref, filename);
+ if (error)
+ fz_abort(error);
+
+ error = pdf_decryptxref(xref);
+ if (error)
+ fz_abort(error);
+
+ if (xref->crypt)
+ {
+ error = pdf_setpassword(xref->crypt, password);
+ if (error) fz_abort(error);
+ }
+
+ if (doprintxref)
+ pdf_debugxref(xref);
+
+ if (doprintpages)
+ {
+ error = pdf_loadpagetree(&pages, xref);
+ if (error) fz_abort(error);
+
+ if (optind == argc)
+ {
+ printf("pagetree\n");
+ pdf_debugpagetree(pages);
+ printf("\n");
+ }
+ else
+ {
+ for ( ; optind < argc; optind++)
+ {
+ int page = atoi(argv[optind]);
+ if (page < 1 || page > pages->count)
+ fprintf(stderr, "page out of bounds: %d\n", page);
+ printf("page %d\n", page);
+ showpage(xref, pages->pobj[page - 1]);
+ }
+ }
+ }
+
+ else
+ {
+ if (optind == argc)
+ {
+ printf("trailer\n");
+ fz_fprintobj(stdout, xref->trailer);
+ printf("\n");
+ }
+
+ for ( ; optind < argc; optind++)
+ {
+ printobject(xref, atoi(argv[optind]), 0);
+ printf("\n");
+ }
+ }
+
+ pdf_closexref(xref);
+
+ return 0;
+}
+
diff --git a/test/pdfclean.c b/test/pdfclean.c
new file mode 100644
index 00000000..00721842
--- /dev/null
+++ b/test/pdfclean.c
@@ -0,0 +1,501 @@
+#include <fitz.h>
+#include <mupdf.h>
+
+#define encrypt encrypt3am
+
+int (*printobj)(FILE*,fz_obj*) = fz_fprintobj;
+
+/*
+ * Rewrite PDF with cleaned up syntax, and consolidate the xref table.
+ * Remove encryption while we're at it :)
+ */
+
+static FILE *out;
+
+static pdf_xref *xt;
+
+static int *ofslist;
+static int *genlist;
+static int *uselist;
+
+static int dorebuild = 0;
+static int doexpand = 0;
+static pdf_crypt *encrypt = nil;
+static fz_obj *encryptdict = nil;
+static fz_obj *id = nil;
+static int dogc = 0;
+
+void usage()
+{
+ fprintf(stderr,
+ "usage: pdfclean [options] infile.pdf outfile.pdf\n"
+ " -r\treconstruct broken xref table\n"
+ " -g\tgarbage collect unused objects\n"
+ " -x\texpand compressed streams\n"
+ " -c\twrite compact objects\n"
+ " -d -\tset user password for decryption\n"
+ " -e\tencrypt outfile\n"
+ " -u -\tset user password for encryption\n"
+ " -o -\tset owner password\n"
+ " -p -\tset permissions\n"
+ " -n -\tkey length in bits: 40 <= n <= 128\n"
+ );
+ exit(1);
+}
+
+void garbagecollect(fz_obj *ref);
+
+void gc0(fz_obj *obj)
+{
+ int i;
+
+ if (fz_isdict(obj))
+ for (i = 0; i < fz_dictlen(obj); i++)
+ gc0(fz_dictgetval(obj, i));
+
+ if (fz_isarray(obj))
+ for (i = 0; i < fz_arraylen(obj); i++)
+ gc0(fz_arrayget(obj, i));
+
+ if (fz_isindirect(obj))
+ garbagecollect(obj);
+}
+
+void garbagecollect(fz_obj *ref)
+{
+ fz_obj *obj;
+ int stmofs;
+
+ if (uselist[fz_toobjid(ref)])
+ return;
+
+ uselist[fz_toobjid(ref)] = 1;
+
+ pdf_loadindirect(&obj, xt, ref, &stmofs);
+
+ if (stmofs != -1) {
+ fz_obj *len = fz_dictgets(obj, "Length");
+ if (fz_isindirect(len)) {
+ pdf_loadindirect(&len, xt, len, nil);
+ fz_dictputs(obj, "Length", len);
+ }
+ }
+
+ gc0(obj);
+}
+
+void decodestream(fz_obj *obj, int ofs, int oid, int gid)
+{
+ fz_error *error;
+ unsigned char buf[512];
+ fz_filter *filter;
+ fz_file *sf;
+ int n;
+ int len;
+ fz_obj *lenobj;
+ fz_obj *newdict;
+
+ /* count length of decoded data */
+ len = 0;
+
+ error = pdf_buildstream(&filter, xt, obj, oid, gid);
+ if (error) fz_abort(error);
+
+ n = fz_seek(xt->file, ofs);
+
+ fz_chainfile(&sf, xt->file, filter);
+
+ while (1) {
+ n = fz_read(sf, buf, sizeof buf);
+ if (n < 0)
+ fz_abort(fz_ferror(sf));
+ if (n == 0)
+ break;
+ len += n;
+ }
+
+ fz_unchainfile(sf);
+
+ /* change dictionary of object... */
+ fz_copydict(&newdict, obj);
+
+ fz_newint(&lenobj, len);
+ fz_dictputs(newdict, "Length", lenobj);
+ fz_dropobj(lenobj);
+
+ fz_dictdels(newdict, "Filter");
+ fz_dictdels(newdict, "DecodeParms");
+
+ /* save object */
+ fprintf(out, "%d %d obj\n", oid, gid);
+ printobj(out, newdict);
+ fprintf(out, "\n");
+ fprintf(out, "stream\n");
+
+ fz_dropobj(newdict);
+
+ /* now decode stream for real */
+ error = pdf_buildstream(&filter, xt, obj, oid, gid);
+ if (error) fz_abort(error);
+
+ fz_seek(xt->file, ofs);
+
+ if (encrypt) {
+ fz_filter *cf;
+ pdf_cryptstm(&cf, encrypt, oid, gid);
+ fz_newpipeline(&filter, filter, cf);
+ }
+
+ fz_chainfile(&sf, xt->file, filter);
+ while (1) {
+ n = fz_read(sf, buf, sizeof buf);
+ if (n < 0)
+ fz_abort(fz_ferror(sf));
+ if (n == 0)
+ break;
+ fwrite(buf, 1, n, out);
+ }
+ fz_unchainfile(sf);
+
+ /* the end */
+ fprintf(out, "endstream\nendobj\n\n");
+
+ return;
+}
+
+void savestream(fz_obj *obj, int ofs, int oid, int gid)
+{
+ unsigned char buf[512];
+ fz_filter *filter;
+ fz_file *sf;
+ int len;
+ int n;
+
+ /* save object */
+ fprintf(out, "%d %d obj\n", oid, gid);
+ printobj(out, obj);
+ fprintf(out, "\n");
+ fprintf(out, "stream\n");
+
+ /* copy stream */
+ obj = fz_dictgets(obj, "Length");
+ if (fz_isindirect(obj)) {
+ pdf_loadindirect(&obj, xt, obj, nil);
+ len = fz_toint(obj);
+ fz_dropobj(obj);
+ }
+ else {
+ len = fz_toint(obj);
+ }
+
+ fz_newnullfilter(&filter, len);
+
+ if (xt->crypt) {
+ fz_filter *cf;
+ pdf_cryptstm(&cf, xt->crypt, oid, gid);
+ fz_newpipeline(&filter, cf, filter);
+ }
+
+ if (encrypt) {
+ fz_filter *cf;
+ pdf_cryptstm(&cf, encrypt, oid, gid);
+ fz_newpipeline(&filter, filter, cf);
+ }
+
+ fz_seek(xt->file, ofs);
+ fz_chainfile(&sf, xt->file, filter);
+ while (1)
+ {
+ n = fz_read(sf, buf, sizeof buf);
+ if (n == 0)
+ break;
+ if (n < 0)
+ fz_abort(fz_ferror(sf));
+ fwrite(buf, 1, n, out);
+ }
+ fz_unchainfile(sf);
+
+ /* the end */
+ fprintf(out, "endstream\nendobj\n\n");
+}
+
+void deleteobject(int oid, int gid)
+{
+ uselist[oid] = 0;
+}
+
+void saveobject(int oid, int gid)
+{
+ fz_error *error;
+ fz_obj *obj;
+ fz_obj *t;
+ int stmofs;
+
+ error = pdf_loadobj(&obj, xt, oid, gid, &stmofs);
+ if (error) fz_abort(error);
+
+ /* trash ObjStm and XrefStm objects */
+ if (fz_isdict(obj)) {
+ t = fz_dictgets(obj, "Type");
+ if (fz_isname(t) && strcmp(fz_toname(t), "ObjStm") == 0) {
+ deleteobject(oid, gid);
+ fz_dropobj(obj);
+ return;
+ }
+ if (fz_isname(t) && strcmp(fz_toname(t), "XRef") == 0) {
+ deleteobject(oid, gid);
+ fz_dropobj(obj);
+ return;
+ }
+ }
+
+ if (encrypt)
+ pdf_cryptobj(encrypt, obj, oid, gid);
+
+ if (stmofs == -1) {
+ fprintf(out, "%d %d obj\n", oid, gid);
+ printobj(out, obj);
+ fprintf(out, "\nendobj\n\n");
+ }
+ else if (doexpand) {
+ decodestream(obj, stmofs, oid, gid);
+ }
+ else {
+ savestream(obj, stmofs, oid, gid);
+ }
+
+ fz_dropobj(obj);
+}
+
+void savexref(void)
+{
+ fz_obj *newtrailer;
+ fz_obj *obj;
+ int startxref;
+ int i;
+
+ startxref = ftell(out);
+
+ fprintf(out, "xref\n0 %d\n", xt->size);
+ for (i = 0; i < xt->size; i++) {
+ if (uselist[i])
+ fprintf(out, "%010d %05d n \n", ofslist[i], genlist[i]);
+ else
+ fprintf(out, "%010d %05d f \n", ofslist[i], genlist[i]);
+ }
+ fprintf(out, "\n");
+
+ fz_newdict(&newtrailer, 5);
+
+ fz_newint(&obj, xt->size);
+ fz_dictputs(newtrailer, "Size", obj);
+ fz_dropobj(obj);
+
+ obj = fz_dictgets(xt->trailer, "Info");
+ if (obj) fz_dictputs(newtrailer, "Info", obj);
+
+ obj = fz_dictgets(xt->trailer, "Root");
+ if (obj) fz_dictputs(newtrailer, "Root", obj);
+
+ fz_dictputs(newtrailer, "ID", id);
+
+ if (encryptdict) {
+ fz_newindirect(&obj, xt->size - 1, 0);
+ fz_dictputs(newtrailer, "Encrypt", obj);
+ fz_dropobj(obj);
+ }
+
+ fprintf(out, "trailer\n");
+ printobj(out, newtrailer);
+ fprintf(out, "\n\n");
+
+ fprintf(out, "startxref\n%d\n%%%%EOF\n", startxref);
+}
+
+int main(int argc, char **argv)
+{
+ fz_error *error;
+ fz_obj *obj;
+ int lastfree;
+ char *filename;
+ int i;
+ int c;
+
+ int doencrypt = 0;
+ char *password = "";
+ char *userpw = "";
+ char *ownerpw = "";
+ int perms = -4; /* 0xfffffffc */
+ int keylen = 40;
+
+ while (1)
+ {
+ c = getopt(argc, argv, "rcxgeu:o:p:n:d:");
+
+ if (c == -1)
+ break;
+
+ switch (c)
+ {
+ case 'r':
+ dorebuild ++;
+ break;
+ case 'x':
+ doexpand ++;
+ break;
+ case 'g':
+ dogc ++;
+ break;
+ case 'c':
+ printobj = fz_fprintcobj;
+ break;
+ case 'd':
+ password = optarg;
+ break;
+ case 'e':
+ doencrypt ++;
+ break;
+ case 'u':
+ userpw = optarg;
+ break;
+ case 'o':
+ ownerpw = optarg;
+ break;
+ case 'p':
+ perms = atoi(optarg);
+ break;
+ case 'n':
+ keylen = atoi(optarg);
+ break;
+ default:
+ usage();
+ }
+ }
+
+ if (argc - optind != 2)
+ usage();
+
+ filename = argv[optind];
+
+ if (dorebuild)
+ error = pdf_rebuildxref(&xt, filename);
+ else
+ error = pdf_openxref(&xt, filename);
+ if (error) fz_abort(error);
+
+ if (doencrypt && keylen > 40 && xt->version < 1.4)
+ xt->version = 1.4;
+
+ id = fz_dictgets(xt->trailer, "ID");
+ obj = fz_dictgets(xt->trailer, "Encrypt");
+ if (fz_isindirect(obj)) {
+ pdf_loadindirect(&obj, xt, obj, nil);
+ }
+ if (obj && id) {
+ pdf_newdecrypt(&xt->crypt, obj, id);
+ error = pdf_setpassword(xt->crypt, password);
+ if (error) fz_abort(error);
+ }
+
+ id = fz_dictgets(xt->trailer, "ID");
+ if (!id)
+ fz_parseobj(&id, "[ (foobar) (foobar) ]");
+
+ if (doencrypt)
+ pdf_newencrypt(&encrypt, &encryptdict, userpw, ownerpw, perms, keylen, id);
+
+ out = fopen(argv[optind + 1], "w");
+ if (!out) {
+ fz_abort(fz_throw("open(%s): %s", argv[optind + 1], strerror(errno)));
+ }
+
+ fprintf(out, "%%PDF-%.1f\n\n", xt->version);
+
+ ofslist = fz_malloc(sizeof(int) * (xt->size + 1));
+ genlist = fz_malloc(sizeof(int) * (xt->size + 1));
+ uselist = fz_malloc(sizeof(int) * (xt->size + 1));
+
+ lastfree = 0;
+
+ ofslist[0] = 0;
+ genlist[0] = 65535;
+ uselist[0] = 0;
+
+ for (i = 1; i < xt->size; i++) {
+ ofslist[i] = 0;
+ genlist[i] = 0;
+ uselist[i] = 1;
+ }
+
+ /* garbage collect from roots in trailer */
+ if (dogc)
+ {
+ for (i = 1; i < xt->size; i++)
+ uselist[i] = 0;
+
+ obj = fz_dictgets(xt->trailer, "Info");
+ if (fz_isindirect(obj))
+ garbagecollect(obj);
+
+ obj = fz_dictgets(xt->trailer, "Root");
+ if (fz_isindirect(obj))
+ garbagecollect(obj);
+
+ obj = fz_dictgets(xt->trailer, "ID");
+ if (fz_isindirect(obj))
+ garbagecollect(obj);
+ }
+
+ /* pretty print objects */
+ for (i = 0; i < xt->size; i++)
+ {
+ if (xt->table[i].type == 0)
+ uselist[i] = 0;
+
+ if (xt->table[i].type == 0)
+ genlist[i] = xt->table[i].gen;
+ if (xt->table[i].type == 1)
+ genlist[i] = xt->table[i].gen;
+ if (xt->table[i].type == 2)
+ genlist[i] = 0;
+
+ if (dogc && !uselist[i])
+ continue;
+
+ if (xt->table[i].type == 1 || xt->table[i].type == 2)
+ {
+ ofslist[i] = ftell(out);
+ saveobject(i, genlist[i]);
+ }
+ }
+
+ /* add encryption dictionary if we crypted */
+ if (encryptdict) {
+ xt->size ++;
+ ofslist[xt->size - 1] = ftell(out);
+ genlist[xt->size - 1] = 0;
+ uselist[xt->size - 1] = 1;
+ fprintf(out, "%d %d obj\n", xt->size - 1, 0);
+ printobj(out, encryptdict);
+ fprintf(out, "\nendobj\n\n");
+ }
+
+ /* construct linked list of free object slots */
+ lastfree = 0;
+ for (i = 1; i < xt->size; i++) {
+ if (!uselist[i]) {
+ genlist[i] ++;
+ ofslist[lastfree] = i;
+ lastfree = i;
+ }
+ }
+
+ savexref();
+
+ pdf_closexref(xt);
+
+ fclose(out);
+
+ return 0;
+}
+
diff --git a/test/showcmap.c b/test/showcmap.c
new file mode 100644
index 00000000..80575a4c
--- /dev/null
+++ b/test/showcmap.c
@@ -0,0 +1,22 @@
+#include <fitz.h>
+#include <mupdf.h>
+
+int main(int argc, char **argv)
+{
+ fz_error *err;
+ fz_cmap *cmap;
+ fz_file *file;
+
+ err = fz_openfile(&file, argv[1], O_RDONLY);
+ if (err)
+ fz_abort(err);
+
+ err = pdf_parsecmap(&cmap, file);
+ if (err)
+ fz_abort(err);
+
+ fz_debugcmap(cmap);
+
+ return 0;
+}
+