diff options
-rw-r--r-- | Jamfile | 3 | ||||
-rw-r--r-- | include/mupdf.h | 6 | ||||
-rw-r--r-- | mupdf/doctor.c | 166 | ||||
-rw-r--r-- | mupdf/save.c | 31 | ||||
-rw-r--r-- | mupdf/stream.c | 74 | ||||
-rw-r--r-- | mupdf/xref.c | 42 | ||||
-rw-r--r-- | test/pdfclean.c | 1 | ||||
-rw-r--r-- | test/pdfdebug.c | 16 | ||||
-rw-r--r-- | test/pdfmerge.c | 237 |
9 files changed, 520 insertions, 56 deletions
@@ -116,7 +116,8 @@ LINKLIBS = Main pdfclean : test/pdfclean.c ; Main pdfdebug : test/pdfdebug.c ; +Main pdfmerge : test/pdfmerge.c ; Main pdfrip : test/pdfrip.c ; -LinkLibraries pdfclean pdfdebug pdfrip : libmupdf libfitz ; +LinkLibraries pdfclean pdfdebug pdfmerge pdfrip : libmupdf libfitz ; diff --git a/include/mupdf.h b/include/mupdf.h index 69dc248f..cbacdbd3 100644 --- a/include/mupdf.h +++ b/include/mupdf.h @@ -79,11 +79,13 @@ struct pdf_crypt_s /* stream.c */ fz_error *pdf_buildfilter(fz_filter**, pdf_xref*, fz_obj *stm, int oid, int gid); +fz_error *pdf_openrawstream0(pdf_xref*, fz_obj *stmobj, int oid, int gid, int ofs); +fz_error *pdf_readrawstream0(fz_buffer **bufp, pdf_xref*, fz_obj *stmobj, int oid, int gid, int ofs); fz_error *pdf_openstream0(pdf_xref*, fz_obj *stmobj, int oid, int gid, int ofs); fz_error *pdf_openstream(pdf_xref*, fz_obj *stmref); -void pdf_closestream(pdf_xref*); fz_error *pdf_readstream0(fz_buffer **bufp, pdf_xref*, fz_obj *stmobj, int oid, int gid, int ofs); fz_error *pdf_readstream(fz_buffer **bufp, pdf_xref*, fz_obj *stmref); +void pdf_closestream(pdf_xref*); /* crypt.c */ fz_error *pdf_newdecrypt(pdf_crypt **cp, fz_obj *enc, fz_obj *id); @@ -102,6 +104,7 @@ fz_error *pdf_readobjstm(pdf_xref *xref, int oid, int gid, unsigned char *buf, i /* xref.c */ fz_error *pdf_newxref(pdf_xref **xrefp); +fz_error *pdf_emptyxref(pdf_xref *xref, float version); fz_error *pdf_decryptxref(pdf_xref *xref); void pdf_closexref(pdf_xref*); void pdf_debugxref(pdf_xref*); @@ -125,6 +128,7 @@ fz_error *pdf_deletestream(pdf_xref *xref, int oid, int gid); /* doctor.c */ fz_error *pdf_garbagecollect(pdf_xref *xref); +fz_error *pdf_transplant(pdf_xref *dst, pdf_xref *src, fz_obj **newp, fz_obj *old); /* save.c */ fz_error *pdf_saveincrementalpdf(pdf_xref *xref, char *path); diff --git a/mupdf/doctor.c b/mupdf/doctor.c index fcc9032e..c951276b 100644 --- a/mupdf/doctor.c +++ b/mupdf/doctor.c @@ -1,10 +1,6 @@ #include <fitz.h> #include <mupdf.h> -/* - * Garbage collection - */ - static fz_error *sweepref(pdf_xref *xref, fz_obj *ref); static fz_error * @@ -71,6 +67,10 @@ sweepref(pdf_xref *xref, fz_obj *ref) return nil; } +/* + * Garbage collection + */ + fz_error * pdf_garbagecollect(pdf_xref *xref) { @@ -102,3 +102,161 @@ pdf_garbagecollect(pdf_xref *xref) return nil; } +/* + * Transplant (copy) objects and streams from one file to another + */ + +struct pair +{ + int soid, sgen; + int doid, dgen; +}; + +static fz_error * +remaprefs(fz_obj **newp, fz_obj *old, struct pair *map, int n) +{ + fz_error *error; + int i, o, g; + fz_obj *tmp, *key; + + if (fz_isindirect(old)) + { + o = fz_toobjid(old); + g = fz_togenid(old); + for (i = 0; i < n; i++) + if (map[i].soid == o && map[i].sgen == g) + return fz_newindirect(newp, map[i].doid, map[i].dgen); + } + + else if (fz_isarray(old)) + { + error = fz_newarray(newp, fz_arraylen(old)); + if (error) + return error; + for (i = 0; i < fz_arraylen(old); i++) + { + tmp = fz_arrayget(old, i); + error = remaprefs(&tmp, tmp, map, n); + if (error) + goto cleanup; + error = fz_arraypush(*newp, tmp); + fz_dropobj(tmp); + if (error) + goto cleanup; + } + } + + else if (fz_isdict(old)) + { + error = fz_newdict(newp, fz_dictlen(old)); + if (error) + return error; + for (i = 0; i < fz_dictlen(old); i++) + { + key = fz_dictgetkey(old, i); + tmp = fz_dictgetval(old, i); + error = remaprefs(&tmp, tmp, map, n); + if (error) + goto cleanup; + error = fz_dictput(*newp, key, tmp); + fz_dropobj(tmp); + if (error) + goto cleanup; + } + } + + else + { + *newp = fz_keepobj(old); + } + + return nil; + +cleanup: + fz_dropobj(*newp); + return error; +} + +fz_error * +pdf_transplant(pdf_xref *dst, pdf_xref *src, fz_obj **newp, fz_obj *root) +{ + fz_error *error; + struct pair *map; + fz_obj *old, *new; + fz_buffer *stm; + int stmofs; + int i, n; + + for (i = 0; i < src->size; i++) + src->table[i].mark = 0; + + error = sweepobj(src, root); + if (error) + return error; + + for (n = 0, i = 0; i < src->size; i++) + if (src->table[i].mark) + n++; + + map = fz_malloc(sizeof(struct pair) * n); + if (!map) + return fz_outofmem; + + for (n = 0, i = 0; i < src->size; i++) + { + if (src->table[i].mark) + { + map[n].soid = i; + map[n].sgen = src->table[i].gen; + if (src->table[i].type == 'o') + map[n].sgen = 0; + error = pdf_createobject(dst, &map[n].doid, &map[n].dgen); + if (error) + goto cleanup; + n++; + } + } + + error == remaprefs(newp, root, map, n); + if (error) + goto cleanup; + + for (i = 0; i < n; i++) + { + error = pdf_loadobject0(&old, src, map[i].soid, map[i].sgen, &stmofs); + if (error) + goto cleanup; + + if (stmofs != -1) + { + error = pdf_readrawstream0(&stm, src, old, + map[i].soid, map[i].sgen, stmofs); + if (error) + goto cleanup; + + error = pdf_savestream(dst, map[i].doid, map[i].dgen, stm); + if (error) { + fz_dropobj(old); + goto cleanup; + } + } + + error = remaprefs(&new, old, map, n); + fz_dropobj(old); + if (error) + goto cleanup; + + error = pdf_saveobject(dst, map[i].doid, map[i].dgen, new); + fz_dropobj(new); + if (error) + goto cleanup; + } + + fz_free(map); + return nil; + +cleanup: + fz_free(map); + return error; +} + diff --git a/mupdf/save.c b/mupdf/save.c index 4850f886..d0e44c9b 100644 --- a/mupdf/save.c +++ b/mupdf/save.c @@ -68,12 +68,8 @@ writecopy(fz_file *out, pdf_xref *xref, pdf_crypt *encrypt, int oid) { pdf_xrefentry *x = xref->table + oid; fz_error *error; - fz_obj *length; fz_obj *obj; int stmofs; - fz_filter *cf; - fz_filter *nf; - fz_filter *pipe; fz_filter *ef; int gid; int n; @@ -98,33 +94,10 @@ writecopy(fz_file *out, pdf_xref *xref, pdf_crypt *encrypt, int oid) { fz_print(out, "stream\n"); - length = fz_dictgets(obj, "Length"); - error = pdf_resolve(&length, xref); + error = pdf_openrawstream0(xref, obj, oid, gid, stmofs); if (error) goto cleanup; - if (xref->crypt) - { - error = fz_newnullfilter(&nf, fz_toint(length)); - if (error) - goto cleanup; - error = pdf_cryptstm(&cf, xref->crypt, oid, gid); - if (error) - goto cleanup; - error = fz_newpipeline(&pipe, nf, cf); - if (error) - goto cleanup; - } - else - { - error = fz_newnullfilter(&pipe, fz_toint(length)); - if (error) - goto cleanup; - } - - fz_seek(xref->file, stmofs, 0); - fz_pushfilter(xref->file, pipe); - if (encrypt) { error = pdf_cryptstm(&ef, encrypt, oid, gid); @@ -156,7 +129,7 @@ writecopy(fz_file *out, pdf_xref *xref, pdf_crypt *encrypt, int oid) if (encrypt) fz_popfilter(out); - fz_popfilter(xref->file); + pdf_closestream(xref); fz_print(out, "endstream\n"); } diff --git a/mupdf/stream.c b/mupdf/stream.c index 379a80aa..0359c461 100644 --- a/mupdf/stream.c +++ b/mupdf/stream.c @@ -189,6 +189,64 @@ pdf_buildfilter(fz_filter **fp, pdf_xref *xref, fz_obj *stmobj, int oid, int gid } fz_error * +pdf_openrawstream0(pdf_xref *xref, fz_obj *stmobj, int oid, int gid, int ofs) +{ + fz_error *error; + fz_filter *nf = nil; + fz_filter *cf = nil; + fz_filter *pipe = nil; + fz_obj *obj; + int length; + int n; + + obj = fz_dictgets(stmobj, "Length"); + error = pdf_resolve(&obj, xref); + if (error) + return error; + length = fz_toint(obj); + fz_dropobj(obj); + + n = fz_seek(xref->file, ofs, 0); + if (n < 0) + return fz_ferror(xref->file); + + if (xref->crypt) + { + error = fz_newnullfilter(&nf, length); + if (error) + return error; + + error = pdf_cryptstm(&cf, xref->crypt, oid, gid); + if (error) { + fz_freefilter(nf); + return error; + } + + error = fz_newpipeline(&pipe, nf, cf); + if (error) { + fz_freefilter(nf); + fz_freefilter(cf); + return error; + } + } + + else + { + error = fz_newnullfilter(&pipe, length); + if (error) + return error; + } + + error = fz_pushfilter(xref->file, pipe); + if (error) { + fz_freefilter(pipe); + return error; + } + + return nil; +} + +fz_error * pdf_openstream0(pdf_xref *xref, fz_obj *stmobj, int oid, int gid, int ofs) { fz_error *error; @@ -245,6 +303,22 @@ pdf_closestream(pdf_xref *xref) } fz_error * +pdf_readrawstream0(fz_buffer **bufp, pdf_xref *xref, fz_obj *stmobj, int oid, int gid, int ofs) +{ + fz_error *error; + + error = pdf_openrawstream0(xref, stmobj, oid, gid, ofs); + if (error) + return error; + + error = fz_readfile(bufp, xref->file); + + pdf_closestream(xref); + + return error; +} + +fz_error * pdf_readstream0(fz_buffer **bufp, pdf_xref *xref, fz_obj *stmobj, int oid, int gid, int ofs) { fz_error *error; diff --git a/mupdf/xref.c b/mupdf/xref.c index e1977ad6..4089c852 100644 --- a/mupdf/xref.c +++ b/mupdf/xref.c @@ -35,6 +35,26 @@ pdf_newxref(pdf_xref **xrefp) } fz_error * +pdf_emptyxref(pdf_xref *xref, float version) +{ + assert(xref->table == nil); + + xref->version = version; + xref->capacity = 256; + xref->size = 1; + xref->table = fz_malloc(xref->capacity * sizeof(pdf_xrefentry)); + if (!xref->table) + return fz_outofmem; + + xref->table[0].type = 'f'; + xref->table[0].mark = 0; + xref->table[0].ofs = 0; + xref->table[0].gen = 65535; + + return nil; +} + +fz_error * pdf_decryptxref(pdf_xref *xref) { fz_error *error; @@ -255,7 +275,7 @@ fz_error * pdf_createobject(pdf_xref *xref, int *oidp, int *gidp) { pdf_xrefentry *x; - int prev; + int prev, next; int oid = 0; while (1) @@ -268,6 +288,15 @@ pdf_createobject(pdf_xref *xref, int *oidp, int *gidp) { *oidp = oid; *gidp = x->gen; + + x->type = 'a'; + x->ofs = 0; + + prev = findprev(xref, oid); + next = findnext(xref, oid); + xref->table[prev].type = 'd'; + xref->table[prev].ofs = next; + return nil; } } @@ -293,18 +322,19 @@ pdf_createobject(pdf_xref *xref, int *oidp, int *gidp) oid = xref->size ++; - xref->table[oid].type = 'd'; + xref->table[oid].type = 'a'; xref->table[oid].mark = 0; xref->table[oid].ofs = 0; xref->table[oid].gen = 0; - prev = findprev(xref, oid); - xref->table[prev].type = 'd'; - xref->table[prev].ofs = oid; - *oidp = oid; *gidp = 0; + prev = findprev(xref, oid); + next = findnext(xref, oid); + xref->table[prev].type = 'd'; + xref->table[prev].ofs = next; + return nil; } diff --git a/test/pdfclean.c b/test/pdfclean.c index 69c51590..58e8b828 100644 --- a/test/pdfclean.c +++ b/test/pdfclean.c @@ -163,6 +163,7 @@ int main(int argc, char **argv) } error = pdf_savepdf(xref, outfile, encrypt); +// error = pdf_saveincrementalpdf(xref, infile); if (error) fz_abort(error); diff --git a/test/pdfdebug.c b/test/pdfdebug.c index bf3de918..7536e7e0 100644 --- a/test/pdfdebug.c +++ b/test/pdfdebug.c @@ -5,7 +5,6 @@ static char *password = ""; static int dodecode = 0; static int dorepair = 0; static int doprintxref = 0; -static int dosave = 0; void usage() { @@ -44,13 +43,9 @@ void printsafe(unsigned char *buf, int n) void decodestream(pdf_xref *xref, fz_obj *stream, int oid, int gid, int ofs) { - FILE *copy; fz_error *error; unsigned char buf[512]; - if (dosave) - copy = fopen("/tmp/dump.stm", "wb"); - safecol = 0; error = pdf_openstream0(xref, stream, oid, gid, ofs); @@ -64,14 +59,8 @@ void decodestream(pdf_xref *xref, fz_obj *stream, int oid, int gid, int ofs) if (n < 0) fz_abort(fz_ferror(xref->file)); printsafe(buf, n); - - if (dosave) - fwrite(buf, 1, n, copy); } - if (dosave) - fclose(copy); - pdf_closestream(xref); } @@ -144,13 +133,10 @@ int main(int argc, char **argv) pdf_xref *xref; int c; - while ((c = getopt(argc, argv, "drxsopu:")) != -1) + while ((c = getopt(argc, argv, "drxopu:")) != -1) { switch (c) { - case 's': - dodecode ++; - dosave ++; case 'd': dodecode ++; break; diff --git a/test/pdfmerge.c b/test/pdfmerge.c new file mode 100644 index 00000000..a3041188 --- /dev/null +++ b/test/pdfmerge.c @@ -0,0 +1,237 @@ +#include <fitz.h> +#include <mupdf.h> + +void usage() +{ + fprintf(stderr, + "usage: pdfmerge [options] file.pdf pages ...\n" + " -o -\toutput file name (default out.pdf)\n" + " -d -\tset user password for decryption\n" + " -e\tencrypt outfile\n" + " -U -\tset user password for encryption\n" + " -O -\tset owner password\n" + " -P -\tset permissions\n" + " -N -\tkey length in bits: 40 <= n <= 128\n" + ); + exit(1); +} + +int main(int argc, char **argv) +{ + fz_error *error; + char *savename = "out.pdf"; + pdf_pagetree *srcpages; + fz_obj *srcrefs; + fz_obj *dstrefs; + pdf_xref *dst; + pdf_xref *src; + int rootoid; + int rootgid; + int pagesoid; + int pagesgid; + fz_obj *pagesref; + fz_obj *obj; + int i, k; + int c; + + pdf_crypt *encrypt = 0; + int doencrypt = 0; + + char *userpw = ""; + char *ownerpw = ""; + int perms = -4; /* 0xfffffffc */ + int keylen = 40; + char *password = ""; + + while ((c = getopt(argc, argv, "reo:U:O:P:N:")) != -1) + { + switch (c) + { + case 'e': ++ doencrypt; break; + case 'o': savename = optarg; break; + case 'U': userpw = optarg; break; + case 'O': ownerpw = optarg; break; + case 'P': perms = atoi(optarg); break; + case 'N': keylen = atoi(optarg); break; + case 'd': password = optarg; break; + default: usage(); + } + } + + if (argc - optind < 1) + usage(); + + /* + * Create new blank xref table + */ + + error = pdf_newxref(&dst); + if (error) + fz_abort(error); + + error = pdf_emptyxref(dst, 1.3); + if (error) + fz_abort(error); + + error = fz_newarray(&dstrefs, 100); + if (error) + fz_abort(error); + + /* + * Copy pages saving refs in dstrefs + */ + + for (i = optind; i < argc; i++) + { + error = pdf_newxref(&src); + if (error) + fz_abort(error); + + error = pdf_openxref(src, argv[i]); + if (error) + fz_abort(error); + + error = pdf_decryptxref(src); + if (error) + fz_abort(error); + + if (src->crypt) + { + error = pdf_setpassword(src->crypt, password); + if (error) + fz_abort(error); + } + + error = pdf_loadpagetree(&srcpages, src); + if (error) + fz_abort(error); + + error = fz_newarray(&srcrefs, 100); + if (error) + fz_abort(error); + + for (k = 0; k < srcpages->count; k++) + { + fz_dictdels(srcpages->pobj[k], "Parent"); + fz_dictdels(srcpages->pobj[k], "B"); + fz_dictdels(srcpages->pobj[k], "PieceInfo"); + fz_dictdels(srcpages->pobj[k], "Metadata"); + fz_dictdels(srcpages->pobj[k], "Annots"); + fz_dictdels(srcpages->pobj[k], "Tabs"); + + pdf_saveobject(src, + fz_toobjid(srcpages->pref[k]), + fz_togenid(srcpages->pref[k]), + srcpages->pobj[k]); + error = fz_arraypush(srcrefs, srcpages->pref[k]); + if (error) + fz_abort(error); + } + + error = pdf_transplant(dst, src, &srcrefs, srcrefs); + if (error) + fz_abort(error); + + for (k = 0; k < fz_arraylen(srcrefs); k++) + { + error = fz_arraypush(dstrefs, fz_arrayget(srcrefs, k)); + if (error) + fz_abort(error); + } + + pdf_freepagetree(srcpages); + + pdf_closexref(src); + } + + /* + * Create and relink Pages object + */ + + error = pdf_createobject(dst, &pagesoid, &pagesgid); + if (error) + fz_abort(error); + + error = fz_packobj(&obj, + "<</Type/Pages/Count %i/Kids %o>>", + fz_arraylen(dstrefs), + dstrefs); + if (error) + fz_abort(error); + + error = pdf_saveobject(dst, pagesoid, pagesgid, obj); + if (error) + fz_abort(error); + + fz_dropobj(obj); + + error = fz_newindirect(&pagesref, pagesoid, pagesgid); + if (error) + fz_abort(error); + + for (i = 0; i < fz_arraylen(dstrefs); i++) + { + int oid = fz_toobjid(fz_arrayget(dstrefs, i)); + int gid = fz_togenid(fz_arrayget(dstrefs, i)); + error = pdf_loadobject0(&obj, dst, oid, gid, nil); + if (error) + fz_abort(error); + error = fz_dictputs(obj, "Parent", pagesref); + if (error) + fz_abort(error); + error = pdf_saveobject(dst, oid, gid, obj); + if (error) + fz_abort(error); + fz_dropobj(obj); + } + + fz_dropobj(pagesref); + + /* + * Create Catalog and trailer + */ + + error = pdf_createobject(dst, &rootoid, &rootgid); + if (error) + fz_abort(error); + + error = fz_packobj(&obj, + "<</Type/Catalog/Pages %r>>", + pagesoid, pagesgid); + if (error) + fz_abort(error); + + error = pdf_saveobject(dst, rootoid, rootgid, obj); + if (error) + fz_abort(error); + + fz_dropobj(obj); + + error = fz_packobj(&dst->trailer, "<</Root %r>>", rootoid, rootgid); + if (error) + fz_abort(error); + + /* + * Write out the new PDF + */ + + if (doencrypt) + { + fz_obj *id = fz_dictgets(dst->trailer, "ID"); + if (!id) + fz_packobj(&id, "[(ABCDEFGHIJKLMNOP)(ABCDEFGHIJKLMNOP)]"); + else + fz_keepobj(id); + error = pdf_newencrypt(&encrypt, userpw, ownerpw, perms, keylen, id); + if (error) + fz_abort(error); + fz_dropobj(id); + } + + error = pdf_savepdf(dst, savename, encrypt); + if (error) + fz_abort(error); + + return 0; +} + |