summaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
Diffstat (limited to 'test')
-rw-r--r--test/pdfclean.c526
-rw-r--r--test/pdfdebug.c152
2 files changed, 89 insertions, 589 deletions
diff --git a/test/pdfclean.c b/test/pdfclean.c
index 00721842..7eb72cda 100644
--- a/test/pdfclean.c
+++ b/test/pdfclean.c
@@ -1,30 +1,6 @@
#include <fitz.h>
#include <mupdf.h>
-#define encrypt encrypt3am
-
-int (*printobj)(FILE*,fz_obj*) = fz_fprintobj;
-
-/*
- * Rewrite PDF with cleaned up syntax, and consolidate the xref table.
- * Remove encryption while we're at it :)
- */
-
-static FILE *out;
-
-static pdf_xref *xt;
-
-static int *ofslist;
-static int *genlist;
-static int *uselist;
-
-static int dorebuild = 0;
-static int doexpand = 0;
-static pdf_crypt *encrypt = nil;
-static fz_obj *encryptdict = nil;
-static fz_obj *id = nil;
-static int dogc = 0;
-
void usage()
{
fprintf(stderr,
@@ -32,469 +8,129 @@ void usage()
" -r\treconstruct broken xref table\n"
" -g\tgarbage collect unused objects\n"
" -x\texpand compressed streams\n"
- " -c\twrite compact objects\n"
" -d -\tset user password for decryption\n"
" -e\tencrypt outfile\n"
- " -u -\tset user password for encryption\n"
- " -o -\tset owner password\n"
- " -p -\tset permissions\n"
- " -n -\tkey length in bits: 40 <= n <= 128\n"
+ " -u -\tset user password for encryption\n"
+ " -o -\tset owner password\n"
+ " -p -\tset permissions\n"
+ " -n -\tkey length in bits: 40 <= n <= 128\n"
);
exit(1);
}
-void garbagecollect(fz_obj *ref);
-
-void gc0(fz_obj *obj)
-{
- int i;
-
- if (fz_isdict(obj))
- for (i = 0; i < fz_dictlen(obj); i++)
- gc0(fz_dictgetval(obj, i));
-
- if (fz_isarray(obj))
- for (i = 0; i < fz_arraylen(obj); i++)
- gc0(fz_arrayget(obj, i));
-
- if (fz_isindirect(obj))
- garbagecollect(obj);
-}
-
-void garbagecollect(fz_obj *ref)
-{
- fz_obj *obj;
- int stmofs;
-
- if (uselist[fz_toobjid(ref)])
- return;
-
- uselist[fz_toobjid(ref)] = 1;
-
- pdf_loadindirect(&obj, xt, ref, &stmofs);
-
- if (stmofs != -1) {
- fz_obj *len = fz_dictgets(obj, "Length");
- if (fz_isindirect(len)) {
- pdf_loadindirect(&len, xt, len, nil);
- fz_dictputs(obj, "Length", len);
- }
- }
-
- gc0(obj);
-}
-
-void decodestream(fz_obj *obj, int ofs, int oid, int gid)
-{
- fz_error *error;
- unsigned char buf[512];
- fz_filter *filter;
- fz_file *sf;
- int n;
- int len;
- fz_obj *lenobj;
- fz_obj *newdict;
-
- /* count length of decoded data */
- len = 0;
-
- error = pdf_buildstream(&filter, xt, obj, oid, gid);
- if (error) fz_abort(error);
-
- n = fz_seek(xt->file, ofs);
-
- fz_chainfile(&sf, xt->file, filter);
-
- while (1) {
- n = fz_read(sf, buf, sizeof buf);
- if (n < 0)
- fz_abort(fz_ferror(sf));
- if (n == 0)
- break;
- len += n;
- }
-
- fz_unchainfile(sf);
-
- /* change dictionary of object... */
- fz_copydict(&newdict, obj);
-
- fz_newint(&lenobj, len);
- fz_dictputs(newdict, "Length", lenobj);
- fz_dropobj(lenobj);
-
- fz_dictdels(newdict, "Filter");
- fz_dictdels(newdict, "DecodeParms");
-
- /* save object */
- fprintf(out, "%d %d obj\n", oid, gid);
- printobj(out, newdict);
- fprintf(out, "\n");
- fprintf(out, "stream\n");
-
- fz_dropobj(newdict);
-
- /* now decode stream for real */
- error = pdf_buildstream(&filter, xt, obj, oid, gid);
- if (error) fz_abort(error);
-
- fz_seek(xt->file, ofs);
-
- if (encrypt) {
- fz_filter *cf;
- pdf_cryptstm(&cf, encrypt, oid, gid);
- fz_newpipeline(&filter, filter, cf);
- }
-
- fz_chainfile(&sf, xt->file, filter);
- while (1) {
- n = fz_read(sf, buf, sizeof buf);
- if (n < 0)
- fz_abort(fz_ferror(sf));
- if (n == 0)
- break;
- fwrite(buf, 1, n, out);
- }
- fz_unchainfile(sf);
-
- /* the end */
- fprintf(out, "endstream\nendobj\n\n");
-
- return;
-}
-
-void savestream(fz_obj *obj, int ofs, int oid, int gid)
-{
- unsigned char buf[512];
- fz_filter *filter;
- fz_file *sf;
- int len;
- int n;
-
- /* save object */
- fprintf(out, "%d %d obj\n", oid, gid);
- printobj(out, obj);
- fprintf(out, "\n");
- fprintf(out, "stream\n");
-
- /* copy stream */
- obj = fz_dictgets(obj, "Length");
- if (fz_isindirect(obj)) {
- pdf_loadindirect(&obj, xt, obj, nil);
- len = fz_toint(obj);
- fz_dropobj(obj);
- }
- else {
- len = fz_toint(obj);
- }
-
- fz_newnullfilter(&filter, len);
-
- if (xt->crypt) {
- fz_filter *cf;
- pdf_cryptstm(&cf, xt->crypt, oid, gid);
- fz_newpipeline(&filter, cf, filter);
- }
-
- if (encrypt) {
- fz_filter *cf;
- pdf_cryptstm(&cf, encrypt, oid, gid);
- fz_newpipeline(&filter, filter, cf);
- }
-
- fz_seek(xt->file, ofs);
- fz_chainfile(&sf, xt->file, filter);
- while (1)
- {
- n = fz_read(sf, buf, sizeof buf);
- if (n == 0)
- break;
- if (n < 0)
- fz_abort(fz_ferror(sf));
- fwrite(buf, 1, n, out);
- }
- fz_unchainfile(sf);
-
- /* the end */
- fprintf(out, "endstream\nendobj\n\n");
-}
-
-void deleteobject(int oid, int gid)
-{
- uselist[oid] = 0;
-}
-
-void saveobject(int oid, int gid)
+void expandstreams(pdf_xref *xref)
{
fz_error *error;
- fz_obj *obj;
- fz_obj *t;
+ fz_obj *stmobj;
int stmofs;
+ fz_buffer *buf;
+ fz_obj *stmlen;
+ int i, gen;
- error = pdf_loadobj(&obj, xt, oid, gid, &stmofs);
- if (error) fz_abort(error);
-
- /* trash ObjStm and XrefStm objects */
- if (fz_isdict(obj)) {
- t = fz_dictgets(obj, "Type");
- if (fz_isname(t) && strcmp(fz_toname(t), "ObjStm") == 0) {
- deleteobject(oid, gid);
- fz_dropobj(obj);
- return;
- }
- if (fz_isname(t) && strcmp(fz_toname(t), "XRef") == 0) {
- deleteobject(oid, gid);
- fz_dropobj(obj);
- return;
+ for (i = 0; i < xref->size; i++)
+ {
+ if (xref->table[i].type == 'n')
+ {
+ gen = xref->table[i].gen;
+
+ error = pdf_loadobject0(&stmobj, xref, i, gen, &stmofs);
+ if (error) fz_abort(error);
+
+ if (stmofs != -1)
+ {
+ error = pdf_readstream0(&buf, xref, stmobj, i, gen, stmofs);
+ if (error) fz_abort(error);
+
+ fz_dictdels(stmobj, "Filter");
+ fz_dictdels(stmobj, "DecodeParms");
+
+ error = fz_newint(&stmlen, buf->wp - buf->rp);
+ if (error) fz_abort(error);
+ error = fz_dictputs(stmobj, "Length", stmlen);
+ if (error) fz_abort(error);
+ fz_dropobj(stmlen);
+
+ error = pdf_saveobject(xref, i, gen, stmobj);
+ if (error) fz_abort(error);
+ error = pdf_savestream(xref, i, gen, buf);
+ if (error) fz_abort(error);
+ }
}
}
-
- if (encrypt)
- pdf_cryptobj(encrypt, obj, oid, gid);
-
- if (stmofs == -1) {
- fprintf(out, "%d %d obj\n", oid, gid);
- printobj(out, obj);
- fprintf(out, "\nendobj\n\n");
- }
- else if (doexpand) {
- decodestream(obj, stmofs, oid, gid);
- }
- else {
- savestream(obj, stmofs, oid, gid);
- }
-
- fz_dropobj(obj);
-}
-
-void savexref(void)
-{
- fz_obj *newtrailer;
- fz_obj *obj;
- int startxref;
- int i;
-
- startxref = ftell(out);
-
- fprintf(out, "xref\n0 %d\n", xt->size);
- for (i = 0; i < xt->size; i++) {
- if (uselist[i])
- fprintf(out, "%010d %05d n \n", ofslist[i], genlist[i]);
- else
- fprintf(out, "%010d %05d f \n", ofslist[i], genlist[i]);
- }
- fprintf(out, "\n");
-
- fz_newdict(&newtrailer, 5);
-
- fz_newint(&obj, xt->size);
- fz_dictputs(newtrailer, "Size", obj);
- fz_dropobj(obj);
-
- obj = fz_dictgets(xt->trailer, "Info");
- if (obj) fz_dictputs(newtrailer, "Info", obj);
-
- obj = fz_dictgets(xt->trailer, "Root");
- if (obj) fz_dictputs(newtrailer, "Root", obj);
-
- fz_dictputs(newtrailer, "ID", id);
-
- if (encryptdict) {
- fz_newindirect(&obj, xt->size - 1, 0);
- fz_dictputs(newtrailer, "Encrypt", obj);
- fz_dropobj(obj);
- }
-
- fprintf(out, "trailer\n");
- printobj(out, newtrailer);
- fprintf(out, "\n\n");
-
- fprintf(out, "startxref\n%d\n%%%%EOF\n", startxref);
}
int main(int argc, char **argv)
{
fz_error *error;
- fz_obj *obj;
- int lastfree;
- char *filename;
- int i;
+ char *infile;
+ char *outfile;
+ pdf_xref *xref;
int c;
int doencrypt = 0;
- char *password = "";
+ int dorepair = 0;
+ int doexpand = 0;
+ int dogc = 0;
+
char *userpw = "";
char *ownerpw = "";
int perms = -4; /* 0xfffffffc */
int keylen = 40;
+ char *password = "";
- while (1)
+ while ((c = getopt(argc, argv, "rgxd:eu:o:p:n:")) != -1)
{
- c = getopt(argc, argv, "rcxgeu:o:p:n:d:");
-
- if (c == -1)
- break;
-
switch (c)
{
- case 'r':
- dorebuild ++;
- break;
- case 'x':
- doexpand ++;
- break;
- case 'g':
- dogc ++;
- break;
- case 'c':
- printobj = fz_fprintcobj;
- break;
- case 'd':
- password = optarg;
- break;
- case 'e':
- doencrypt ++;
- break;
- case 'u':
- userpw = optarg;
- break;
- case 'o':
- ownerpw = optarg;
- break;
- case 'p':
- perms = atoi(optarg);
- break;
- case 'n':
- keylen = atoi(optarg);
- break;
- default:
- usage();
+ case 'r': ++ dorepair; break;
+ case 'x': ++ doexpand; break;
+ case 'g': ++ dogc; break;
+ case 'e': ++ doencrypt; break;
+ case 'u': userpw = optarg; break;
+ case 'o': ownerpw = optarg; break;
+ case 'p': perms = atoi(optarg); break;
+ case 'n': keylen = atoi(optarg); break;
+ case 'd': password = optarg; break;
+ default: usage();
}
}
- if (argc - optind != 2)
+ if (argc - optind < 2)
usage();
- filename = argv[optind];
+ infile = argv[optind++];
+ outfile = argv[optind++];
- if (dorebuild)
- error = pdf_rebuildxref(&xt, filename);
- else
- error = pdf_openxref(&xt, filename);
- if (error) fz_abort(error);
-
- if (doencrypt && keylen > 40 && xt->version < 1.4)
- xt->version = 1.4;
-
- id = fz_dictgets(xt->trailer, "ID");
- obj = fz_dictgets(xt->trailer, "Encrypt");
- if (fz_isindirect(obj)) {
- pdf_loadindirect(&obj, xt, obj, nil);
- }
- if (obj && id) {
- pdf_newdecrypt(&xt->crypt, obj, id);
- error = pdf_setpassword(xt->crypt, password);
- if (error) fz_abort(error);
- }
-
- id = fz_dictgets(xt->trailer, "ID");
- if (!id)
- fz_parseobj(&id, "[ (foobar) (foobar) ]");
-
- if (doencrypt)
- pdf_newencrypt(&encrypt, &encryptdict, userpw, ownerpw, perms, keylen, id);
-
- out = fopen(argv[optind + 1], "w");
- if (!out) {
- fz_abort(fz_throw("open(%s): %s", argv[optind + 1], strerror(errno)));
- }
-
- fprintf(out, "%%PDF-%.1f\n\n", xt->version);
-
- ofslist = fz_malloc(sizeof(int) * (xt->size + 1));
- genlist = fz_malloc(sizeof(int) * (xt->size + 1));
- uselist = fz_malloc(sizeof(int) * (xt->size + 1));
-
- lastfree = 0;
-
- ofslist[0] = 0;
- genlist[0] = 65535;
- uselist[0] = 0;
-
- for (i = 1; i < xt->size; i++) {
- ofslist[i] = 0;
- genlist[i] = 0;
- uselist[i] = 1;
- }
+ error = pdf_newxref(&xref);
+ if (error)
+ fz_abort(error);
- /* garbage collect from roots in trailer */
- if (dogc)
- {
- for (i = 1; i < xt->size; i++)
- uselist[i] = 0;
-
- obj = fz_dictgets(xt->trailer, "Info");
- if (fz_isindirect(obj))
- garbagecollect(obj);
-
- obj = fz_dictgets(xt->trailer, "Root");
- if (fz_isindirect(obj))
- garbagecollect(obj);
+ if (dorepair)
+ error = pdf_repairxref(xref, infile);
+ else
+ error = pdf_openxref(xref, infile);
+ if (error)
+ fz_abort(error);
- obj = fz_dictgets(xt->trailer, "ID");
- if (fz_isindirect(obj))
- garbagecollect(obj);
- }
+ error = pdf_decryptxref(xref);
+ if (error)
+ fz_abort(error);
- /* pretty print objects */
- for (i = 0; i < xt->size; i++)
+ if (xref->crypt)
{
- if (xt->table[i].type == 0)
- uselist[i] = 0;
-
- if (xt->table[i].type == 0)
- genlist[i] = xt->table[i].gen;
- if (xt->table[i].type == 1)
- genlist[i] = xt->table[i].gen;
- if (xt->table[i].type == 2)
- genlist[i] = 0;
-
- if (dogc && !uselist[i])
- continue;
-
- if (xt->table[i].type == 1 || xt->table[i].type == 2)
- {
- ofslist[i] = ftell(out);
- saveobject(i, genlist[i]);
- }
- }
-
- /* add encryption dictionary if we crypted */
- if (encryptdict) {
- xt->size ++;
- ofslist[xt->size - 1] = ftell(out);
- genlist[xt->size - 1] = 0;
- uselist[xt->size - 1] = 1;
- fprintf(out, "%d %d obj\n", xt->size - 1, 0);
- printobj(out, encryptdict);
- fprintf(out, "\nendobj\n\n");
- }
-
- /* construct linked list of free object slots */
- lastfree = 0;
- for (i = 1; i < xt->size; i++) {
- if (!uselist[i]) {
- genlist[i] ++;
- ofslist[lastfree] = i;
- lastfree = i;
- }
+ error = pdf_setpassword(xref->crypt, password);
+ if (error) fz_abort(error);
}
- savexref();
+ if (doexpand)
+ expandstreams(xref);
- pdf_closexref(xt);
+printf("saving %s...\n", outfile);
+ error = pdf_savepdf(xref, outfile);
+ if (error)
+ fz_abort(error);
- fclose(out);
+ pdf_closexref(xref);
return 0;
}
diff --git a/test/pdfdebug.c b/test/pdfdebug.c
index d63a720c..ddc10dff 100644
--- a/test/pdfdebug.c
+++ b/test/pdfdebug.c
@@ -5,11 +5,10 @@ static char *password = "";
static int dodecode = 0;
static int dorepair = 0;
static int doprintxref = 0;
-static int doprintpages = 0;
void usage()
{
- fprintf(stderr, "usage: mupdf [-drxp] [-u password] file.pdf\n");
+ fprintf(stderr, "usage: pdfdebug [-drx] [-u password] file.pdf [oid ...]\n");
exit(1);
}
@@ -127,114 +126,11 @@ void printobject(pdf_xref *xref, int oid, int gid)
fz_dropobj(obj);
}
-/*
- * Draw page
- */
-
-void runcsi(pdf_xref *xref, pdf_csi *csi, pdf_resources *rdb, fz_obj *stmref)
-{
- fz_error *error;
-
- error = pdf_openstream(xref, stmref);
- if (error) fz_abort(error);
-
- error = pdf_runcsi(csi, rdb, xref->file);
- if (error) fz_abort(error);
-
- pdf_closestream(xref);
-}
-
-void showpage(pdf_xref *xref, fz_obj *page)
-{
- fz_error *error;
- pdf_csi *csi;
- pdf_resources *rdb = nil;
- fz_obj *resources;
- fz_obj *contents;
- int i;
-
- fz_debugobj(page);
- printf("\n");
-
- resources = fz_dictgets(page, "Resources");
- if (resources)
- {
- error = pdf_resolve(&resources, xref);
- if (error) fz_abort(error);
-
- error = pdf_loadresources(&rdb, xref, resources);
- if (error) fz_abort(error);
-
- // parse resources into native res dict
- fz_dropobj(resources);
- }
- else
- fz_abort(fz_throw("syntaxerror: missing resource dictionary"));
-
-printf("resources:\n");
-printf(" font:\n");
-fz_debugobj(rdb->font);
-printf("\n extgstate:\n");
-fz_debugobj(rdb->extgstate);
-printf("\nfitz tree:\n");
-
- error = pdf_newcsi(&csi);
- if (error) fz_abort(error);
-
- contents = fz_dictgets(page, "Contents");
- if (contents)
- {
- if (fz_isarray(contents))
- {
- for (i = 0; i < fz_arraylen(contents); i++)
- {
- runcsi(xref, csi, rdb, fz_arrayget(contents, i));
- }
- }
- else
- {
- // XXX resolve and check if it is an array
- runcsi(xref, csi, rdb, contents);
- }
- }
-
- fz_debugtree(csi->tree);
-
- {
- fz_pixmap *pix;
- fz_renderer *gc;
- fz_matrix ctm;
-
-#define W 612
-#define H 792
-
-#define xW 1106
-#define xH 1548
-
- fz_newrenderer(&gc);
- fz_newpixmap(&pix, 0, 0, W, H, 1, 0);
- ctm = fz_concat(fz_translate(0, -H), fz_scale(1,-1));
-
- memset(pix->samples, 0x00, pix->stride * pix->h * 2);
-
-printf("rendering!\n");
- fz_rendernode(gc, csi->tree->root, ctm, pix);
-printf("done!\n");
- fz_debugpixmap(pix);
-
- fz_freepixmap(pix);
- fz_freerenderer(gc);
- }
-
- pdf_freecsi(csi);
-}
-
int main(int argc, char **argv)
{
fz_error *error;
char *filename;
pdf_xref *xref;
- pdf_pagetree *pages;
int c;
while ((c = getopt(argc, argv, "drxopu:")) != -1)
@@ -250,9 +146,6 @@ int main(int argc, char **argv)
case 'x':
doprintxref ++;
break;
- case 'p':
- doprintpages ++;
- break;
case 'u':
password = optarg;
break;
@@ -290,48 +183,19 @@ int main(int argc, char **argv)
if (doprintxref)
pdf_debugxref(xref);
- if (doprintpages)
+ if (optind == argc)
{
- error = pdf_loadpagetree(&pages, xref);
- if (error) fz_abort(error);
-
- if (optind == argc)
- {
- printf("pagetree\n");
- pdf_debugpagetree(pages);
- printf("\n");
- }
- else
- {
- for ( ; optind < argc; optind++)
- {
- int page = atoi(argv[optind]);
- if (page < 1 || page > pages->count)
- fprintf(stderr, "page out of bounds: %d\n", page);
- printf("page %d\n", page);
- showpage(xref, pages->pobj[page - 1]);
- }
- }
+ printf("trailer\n");
+ fz_debugobj(xref->trailer);
+ printf("\n");
}
- else
+ for ( ; optind < argc; optind++)
{
- if (optind == argc)
- {
- printf("trailer\n");
- fz_debugobj(xref->trailer);
- printf("\n");
- }
-
- for ( ; optind < argc; optind++)
- {
- printobject(xref, atoi(argv[optind]), 0);
- printf("\n");
- }
+ printobject(xref, atoi(argv[optind]), 0);
+ printf("\n");
}
-printf("done.\n");
-
pdf_closexref(xref);
return 0;