summaryrefslogtreecommitdiff
path: root/test/pdfclean.c
diff options
context:
space:
mode:
Diffstat (limited to 'test/pdfclean.c')
-rw-r--r--test/pdfclean.c526
1 files changed, 81 insertions, 445 deletions
diff --git a/test/pdfclean.c b/test/pdfclean.c
index 00721842..7eb72cda 100644
--- a/test/pdfclean.c
+++ b/test/pdfclean.c
@@ -1,30 +1,6 @@
#include <fitz.h>
#include <mupdf.h>
-#define encrypt encrypt3am
-
-int (*printobj)(FILE*,fz_obj*) = fz_fprintobj;
-
-/*
- * Rewrite PDF with cleaned up syntax, and consolidate the xref table.
- * Remove encryption while we're at it :)
- */
-
-static FILE *out;
-
-static pdf_xref *xt;
-
-static int *ofslist;
-static int *genlist;
-static int *uselist;
-
-static int dorebuild = 0;
-static int doexpand = 0;
-static pdf_crypt *encrypt = nil;
-static fz_obj *encryptdict = nil;
-static fz_obj *id = nil;
-static int dogc = 0;
-
void usage()
{
fprintf(stderr,
@@ -32,469 +8,129 @@ void usage()
" -r\treconstruct broken xref table\n"
" -g\tgarbage collect unused objects\n"
" -x\texpand compressed streams\n"
- " -c\twrite compact objects\n"
" -d -\tset user password for decryption\n"
" -e\tencrypt outfile\n"
- " -u -\tset user password for encryption\n"
- " -o -\tset owner password\n"
- " -p -\tset permissions\n"
- " -n -\tkey length in bits: 40 <= n <= 128\n"
+ " -u -\tset user password for encryption\n"
+ " -o -\tset owner password\n"
+ " -p -\tset permissions\n"
+ " -n -\tkey length in bits: 40 <= n <= 128\n"
);
exit(1);
}
-void garbagecollect(fz_obj *ref);
-
-void gc0(fz_obj *obj)
-{
- int i;
-
- if (fz_isdict(obj))
- for (i = 0; i < fz_dictlen(obj); i++)
- gc0(fz_dictgetval(obj, i));
-
- if (fz_isarray(obj))
- for (i = 0; i < fz_arraylen(obj); i++)
- gc0(fz_arrayget(obj, i));
-
- if (fz_isindirect(obj))
- garbagecollect(obj);
-}
-
-void garbagecollect(fz_obj *ref)
-{
- fz_obj *obj;
- int stmofs;
-
- if (uselist[fz_toobjid(ref)])
- return;
-
- uselist[fz_toobjid(ref)] = 1;
-
- pdf_loadindirect(&obj, xt, ref, &stmofs);
-
- if (stmofs != -1) {
- fz_obj *len = fz_dictgets(obj, "Length");
- if (fz_isindirect(len)) {
- pdf_loadindirect(&len, xt, len, nil);
- fz_dictputs(obj, "Length", len);
- }
- }
-
- gc0(obj);
-}
-
-void decodestream(fz_obj *obj, int ofs, int oid, int gid)
-{
- fz_error *error;
- unsigned char buf[512];
- fz_filter *filter;
- fz_file *sf;
- int n;
- int len;
- fz_obj *lenobj;
- fz_obj *newdict;
-
- /* count length of decoded data */
- len = 0;
-
- error = pdf_buildstream(&filter, xt, obj, oid, gid);
- if (error) fz_abort(error);
-
- n = fz_seek(xt->file, ofs);
-
- fz_chainfile(&sf, xt->file, filter);
-
- while (1) {
- n = fz_read(sf, buf, sizeof buf);
- if (n < 0)
- fz_abort(fz_ferror(sf));
- if (n == 0)
- break;
- len += n;
- }
-
- fz_unchainfile(sf);
-
- /* change dictionary of object... */
- fz_copydict(&newdict, obj);
-
- fz_newint(&lenobj, len);
- fz_dictputs(newdict, "Length", lenobj);
- fz_dropobj(lenobj);
-
- fz_dictdels(newdict, "Filter");
- fz_dictdels(newdict, "DecodeParms");
-
- /* save object */
- fprintf(out, "%d %d obj\n", oid, gid);
- printobj(out, newdict);
- fprintf(out, "\n");
- fprintf(out, "stream\n");
-
- fz_dropobj(newdict);
-
- /* now decode stream for real */
- error = pdf_buildstream(&filter, xt, obj, oid, gid);
- if (error) fz_abort(error);
-
- fz_seek(xt->file, ofs);
-
- if (encrypt) {
- fz_filter *cf;
- pdf_cryptstm(&cf, encrypt, oid, gid);
- fz_newpipeline(&filter, filter, cf);
- }
-
- fz_chainfile(&sf, xt->file, filter);
- while (1) {
- n = fz_read(sf, buf, sizeof buf);
- if (n < 0)
- fz_abort(fz_ferror(sf));
- if (n == 0)
- break;
- fwrite(buf, 1, n, out);
- }
- fz_unchainfile(sf);
-
- /* the end */
- fprintf(out, "endstream\nendobj\n\n");
-
- return;
-}
-
-void savestream(fz_obj *obj, int ofs, int oid, int gid)
-{
- unsigned char buf[512];
- fz_filter *filter;
- fz_file *sf;
- int len;
- int n;
-
- /* save object */
- fprintf(out, "%d %d obj\n", oid, gid);
- printobj(out, obj);
- fprintf(out, "\n");
- fprintf(out, "stream\n");
-
- /* copy stream */
- obj = fz_dictgets(obj, "Length");
- if (fz_isindirect(obj)) {
- pdf_loadindirect(&obj, xt, obj, nil);
- len = fz_toint(obj);
- fz_dropobj(obj);
- }
- else {
- len = fz_toint(obj);
- }
-
- fz_newnullfilter(&filter, len);
-
- if (xt->crypt) {
- fz_filter *cf;
- pdf_cryptstm(&cf, xt->crypt, oid, gid);
- fz_newpipeline(&filter, cf, filter);
- }
-
- if (encrypt) {
- fz_filter *cf;
- pdf_cryptstm(&cf, encrypt, oid, gid);
- fz_newpipeline(&filter, filter, cf);
- }
-
- fz_seek(xt->file, ofs);
- fz_chainfile(&sf, xt->file, filter);
- while (1)
- {
- n = fz_read(sf, buf, sizeof buf);
- if (n == 0)
- break;
- if (n < 0)
- fz_abort(fz_ferror(sf));
- fwrite(buf, 1, n, out);
- }
- fz_unchainfile(sf);
-
- /* the end */
- fprintf(out, "endstream\nendobj\n\n");
-}
-
-void deleteobject(int oid, int gid)
-{
- uselist[oid] = 0;
-}
-
-void saveobject(int oid, int gid)
+void expandstreams(pdf_xref *xref)
{
fz_error *error;
- fz_obj *obj;
- fz_obj *t;
+ fz_obj *stmobj;
int stmofs;
+ fz_buffer *buf;
+ fz_obj *stmlen;
+ int i, gen;
- error = pdf_loadobj(&obj, xt, oid, gid, &stmofs);
- if (error) fz_abort(error);
-
- /* trash ObjStm and XrefStm objects */
- if (fz_isdict(obj)) {
- t = fz_dictgets(obj, "Type");
- if (fz_isname(t) && strcmp(fz_toname(t), "ObjStm") == 0) {
- deleteobject(oid, gid);
- fz_dropobj(obj);
- return;
- }
- if (fz_isname(t) && strcmp(fz_toname(t), "XRef") == 0) {
- deleteobject(oid, gid);
- fz_dropobj(obj);
- return;
+ for (i = 0; i < xref->size; i++)
+ {
+ if (xref->table[i].type == 'n')
+ {
+ gen = xref->table[i].gen;
+
+ error = pdf_loadobject0(&stmobj, xref, i, gen, &stmofs);
+ if (error) fz_abort(error);
+
+ if (stmofs != -1)
+ {
+ error = pdf_readstream0(&buf, xref, stmobj, i, gen, stmofs);
+ if (error) fz_abort(error);
+
+ fz_dictdels(stmobj, "Filter");
+ fz_dictdels(stmobj, "DecodeParms");
+
+ error = fz_newint(&stmlen, buf->wp - buf->rp);
+ if (error) fz_abort(error);
+ error = fz_dictputs(stmobj, "Length", stmlen);
+ if (error) fz_abort(error);
+ fz_dropobj(stmlen);
+
+ error = pdf_saveobject(xref, i, gen, stmobj);
+ if (error) fz_abort(error);
+ error = pdf_savestream(xref, i, gen, buf);
+ if (error) fz_abort(error);
+ }
}
}
-
- if (encrypt)
- pdf_cryptobj(encrypt, obj, oid, gid);
-
- if (stmofs == -1) {
- fprintf(out, "%d %d obj\n", oid, gid);
- printobj(out, obj);
- fprintf(out, "\nendobj\n\n");
- }
- else if (doexpand) {
- decodestream(obj, stmofs, oid, gid);
- }
- else {
- savestream(obj, stmofs, oid, gid);
- }
-
- fz_dropobj(obj);
-}
-
-void savexref(void)
-{
- fz_obj *newtrailer;
- fz_obj *obj;
- int startxref;
- int i;
-
- startxref = ftell(out);
-
- fprintf(out, "xref\n0 %d\n", xt->size);
- for (i = 0; i < xt->size; i++) {
- if (uselist[i])
- fprintf(out, "%010d %05d n \n", ofslist[i], genlist[i]);
- else
- fprintf(out, "%010d %05d f \n", ofslist[i], genlist[i]);
- }
- fprintf(out, "\n");
-
- fz_newdict(&newtrailer, 5);
-
- fz_newint(&obj, xt->size);
- fz_dictputs(newtrailer, "Size", obj);
- fz_dropobj(obj);
-
- obj = fz_dictgets(xt->trailer, "Info");
- if (obj) fz_dictputs(newtrailer, "Info", obj);
-
- obj = fz_dictgets(xt->trailer, "Root");
- if (obj) fz_dictputs(newtrailer, "Root", obj);
-
- fz_dictputs(newtrailer, "ID", id);
-
- if (encryptdict) {
- fz_newindirect(&obj, xt->size - 1, 0);
- fz_dictputs(newtrailer, "Encrypt", obj);
- fz_dropobj(obj);
- }
-
- fprintf(out, "trailer\n");
- printobj(out, newtrailer);
- fprintf(out, "\n\n");
-
- fprintf(out, "startxref\n%d\n%%%%EOF\n", startxref);
}
int main(int argc, char **argv)
{
fz_error *error;
- fz_obj *obj;
- int lastfree;
- char *filename;
- int i;
+ char *infile;
+ char *outfile;
+ pdf_xref *xref;
int c;
int doencrypt = 0;
- char *password = "";
+ int dorepair = 0;
+ int doexpand = 0;
+ int dogc = 0;
+
char *userpw = "";
char *ownerpw = "";
int perms = -4; /* 0xfffffffc */
int keylen = 40;
+ char *password = "";
- while (1)
+ while ((c = getopt(argc, argv, "rgxd:eu:o:p:n:")) != -1)
{
- c = getopt(argc, argv, "rcxgeu:o:p:n:d:");
-
- if (c == -1)
- break;
-
switch (c)
{
- case 'r':
- dorebuild ++;
- break;
- case 'x':
- doexpand ++;
- break;
- case 'g':
- dogc ++;
- break;
- case 'c':
- printobj = fz_fprintcobj;
- break;
- case 'd':
- password = optarg;
- break;
- case 'e':
- doencrypt ++;
- break;
- case 'u':
- userpw = optarg;
- break;
- case 'o':
- ownerpw = optarg;
- break;
- case 'p':
- perms = atoi(optarg);
- break;
- case 'n':
- keylen = atoi(optarg);
- break;
- default:
- usage();
+ case 'r': ++ dorepair; break;
+ case 'x': ++ doexpand; break;
+ case 'g': ++ dogc; break;
+ case 'e': ++ doencrypt; break;
+ case 'u': userpw = optarg; break;
+ case 'o': ownerpw = optarg; break;
+ case 'p': perms = atoi(optarg); break;
+ case 'n': keylen = atoi(optarg); break;
+ case 'd': password = optarg; break;
+ default: usage();
}
}
- if (argc - optind != 2)
+ if (argc - optind < 2)
usage();
- filename = argv[optind];
+ infile = argv[optind++];
+ outfile = argv[optind++];
- if (dorebuild)
- error = pdf_rebuildxref(&xt, filename);
- else
- error = pdf_openxref(&xt, filename);
- if (error) fz_abort(error);
-
- if (doencrypt && keylen > 40 && xt->version < 1.4)
- xt->version = 1.4;
-
- id = fz_dictgets(xt->trailer, "ID");
- obj = fz_dictgets(xt->trailer, "Encrypt");
- if (fz_isindirect(obj)) {
- pdf_loadindirect(&obj, xt, obj, nil);
- }
- if (obj && id) {
- pdf_newdecrypt(&xt->crypt, obj, id);
- error = pdf_setpassword(xt->crypt, password);
- if (error) fz_abort(error);
- }
-
- id = fz_dictgets(xt->trailer, "ID");
- if (!id)
- fz_parseobj(&id, "[ (foobar) (foobar) ]");
-
- if (doencrypt)
- pdf_newencrypt(&encrypt, &encryptdict, userpw, ownerpw, perms, keylen, id);
-
- out = fopen(argv[optind + 1], "w");
- if (!out) {
- fz_abort(fz_throw("open(%s): %s", argv[optind + 1], strerror(errno)));
- }
-
- fprintf(out, "%%PDF-%.1f\n\n", xt->version);
-
- ofslist = fz_malloc(sizeof(int) * (xt->size + 1));
- genlist = fz_malloc(sizeof(int) * (xt->size + 1));
- uselist = fz_malloc(sizeof(int) * (xt->size + 1));
-
- lastfree = 0;
-
- ofslist[0] = 0;
- genlist[0] = 65535;
- uselist[0] = 0;
-
- for (i = 1; i < xt->size; i++) {
- ofslist[i] = 0;
- genlist[i] = 0;
- uselist[i] = 1;
- }
+ error = pdf_newxref(&xref);
+ if (error)
+ fz_abort(error);
- /* garbage collect from roots in trailer */
- if (dogc)
- {
- for (i = 1; i < xt->size; i++)
- uselist[i] = 0;
-
- obj = fz_dictgets(xt->trailer, "Info");
- if (fz_isindirect(obj))
- garbagecollect(obj);
-
- obj = fz_dictgets(xt->trailer, "Root");
- if (fz_isindirect(obj))
- garbagecollect(obj);
+ if (dorepair)
+ error = pdf_repairxref(xref, infile);
+ else
+ error = pdf_openxref(xref, infile);
+ if (error)
+ fz_abort(error);
- obj = fz_dictgets(xt->trailer, "ID");
- if (fz_isindirect(obj))
- garbagecollect(obj);
- }
+ error = pdf_decryptxref(xref);
+ if (error)
+ fz_abort(error);
- /* pretty print objects */
- for (i = 0; i < xt->size; i++)
+ if (xref->crypt)
{
- if (xt->table[i].type == 0)
- uselist[i] = 0;
-
- if (xt->table[i].type == 0)
- genlist[i] = xt->table[i].gen;
- if (xt->table[i].type == 1)
- genlist[i] = xt->table[i].gen;
- if (xt->table[i].type == 2)
- genlist[i] = 0;
-
- if (dogc && !uselist[i])
- continue;
-
- if (xt->table[i].type == 1 || xt->table[i].type == 2)
- {
- ofslist[i] = ftell(out);
- saveobject(i, genlist[i]);
- }
- }
-
- /* add encryption dictionary if we crypted */
- if (encryptdict) {
- xt->size ++;
- ofslist[xt->size - 1] = ftell(out);
- genlist[xt->size - 1] = 0;
- uselist[xt->size - 1] = 1;
- fprintf(out, "%d %d obj\n", xt->size - 1, 0);
- printobj(out, encryptdict);
- fprintf(out, "\nendobj\n\n");
- }
-
- /* construct linked list of free object slots */
- lastfree = 0;
- for (i = 1; i < xt->size; i++) {
- if (!uselist[i]) {
- genlist[i] ++;
- ofslist[lastfree] = i;
- lastfree = i;
- }
+ error = pdf_setpassword(xref->crypt, password);
+ if (error) fz_abort(error);
}
- savexref();
+ if (doexpand)
+ expandstreams(xref);
- pdf_closexref(xt);
+printf("saving %s...\n", outfile);
+ error = pdf_savepdf(xref, outfile);
+ if (error)
+ fz_abort(error);
- fclose(out);
+ pdf_closexref(xref);
return 0;
}