diff options
author | Sebastian Rasmussen <sebras@hotmail.com> | 2009-05-23 02:16:50 +0200 |
---|---|---|
committer | Sebastian Rasmussen <sebras@hotmail.com> | 2009-05-23 02:16:50 +0200 |
commit | da6f61c67c5c591f805a6a933ff7ce03c85563e7 (patch) | |
tree | 9786c3f40dbf571f93650676b5f489bf0e9902ed | |
parent | ac244e57a9c64451b75447620509b038e239e0e8 (diff) | |
download | mupdf-da6f61c67c5c591f805a6a933ff7ce03c85563e7.tar.xz |
Added separate image/font extraction tool.
-rw-r--r-- | apps/Jamfile | 3 | ||||
-rw-r--r-- | apps/pdfextract.c | 332 |
2 files changed, 335 insertions, 0 deletions
diff --git a/apps/Jamfile b/apps/Jamfile index 1c70f554..f2f36e5d 100644 --- a/apps/Jamfile +++ b/apps/Jamfile @@ -16,6 +16,9 @@ LinkLibraries pdfdraw : $(FITZLIBS) ; Main pdfinfo : pdfinfo.c ; LinkLibraries pdfinfo : $(FITZLIBS) ; +Main pdfextract : pdfextract.c ; +LinkLibraries pdfextract : $(FITZLIBS) ; + SubDir TOP apps common ; Library libpdfapp : pdfapp.c ; diff --git a/apps/pdfextract.c b/apps/pdfextract.c new file mode 100644 index 00000000..0e3d71f4 --- /dev/null +++ b/apps/pdfextract.c @@ -0,0 +1,332 @@ +/* + * pdfextract -- the ultimate way to extract images and fonts from pdfs + */ + +#include "fitz.h" +#include "mupdf.h" + +pdf_xref *xref = NULL; + +void die(fz_error eo) +{ + fz_catch(eo, "aborting"); + exit(1); +} + +void openxref(char *filename, char *password) +{ + fz_error error; + fz_obj *obj; + + error = pdf_newxref(&xref); + if (error) + die(error); + + error = pdf_loadxref(xref, filename); + if (error) + { + fz_catch(error, "trying to repair"); + error = pdf_repairxref(xref, filename); + if (error) + die(error); + } + + error = pdf_decryptxref(xref); + if (error) + die(error); + + if (xref->crypt) + { + int okay = pdf_setpassword(xref->crypt, password); + if (!okay) + die(fz_throw("invalid password")); + } + + /* TODO: move into mupdf lib, see pdfapp_open in pdfapp.c */ + obj = fz_dictgets(xref->trailer, "Root"); + if (!obj) + die(error); + + error = pdf_loadindirect(&xref->root, xref, obj); + if (error) + die(error); + + obj = fz_dictgets(xref->trailer, "Info"); + if (obj) + { + error = pdf_loadindirect(&xref->info, xref, obj); + if (error) + die(error); + } +} + +void closexref() +{ + pdf_closexref(xref); + xref = nil; +} + +int showcolumn; + +void showusage(void) +{ + fprintf(stderr, "usage: pdfextract [-d password] <file> [object numbers]\n"); + fprintf(stderr, " -d \tdecrypt password\n"); + exit(1); +} + +int isimage(fz_obj *obj) +{ + fz_obj *type = fz_dictgets(obj, "Subtype"); + return fz_isname(type) && !strcmp(fz_toname(type), "Image"); +} + +int isfontdesc(fz_obj *obj) +{ + fz_obj *type = fz_dictgets(obj, "Type"); + return fz_isname(type) && !strcmp(fz_toname(type), "FontDescriptor"); +} + +void saveimage(fz_obj *obj, int num, int gen) +{ + pdf_image *img = nil; + fz_obj *ref; + fz_error error; + fz_pixmap *pix; + char name[1024]; + FILE *f; + int bpc; + int w; + int h; + int n; + int x; + int y; + + error = fz_newindirect(&ref, num, gen); + if (error) + die(error); + + error = pdf_newstore(&xref->store); + if (error) + die(error); + + error = pdf_loadimage(&img, xref, obj, ref); + if (error) + die(error); + + n = img->super.n; + w = img->super.w; + h = img->super.h; + bpc = img->bpc; + + error = fz_newpixmap(&pix, 0, 0, w, h, n + 1); + if (error) + die(error); + + error = img->super.loadtile(&img->super, pix); + if (error) + die(error); + + if (bpc == 1 && n == 0) + { + fz_pixmap *temp; + + error = fz_newpixmap(&temp, pix->x, pix->y, pix->w, pix->h, pdf_devicergb->n + 1); + if (error) + die(error); + + for (y = 0; y < pix->h; y++) + for (x = 0; x < pix->w; x++) + { + int pixel = y * pix->w + x; + temp->samples[pixel * temp->n + 0] = 255; + temp->samples[pixel * temp->n + 1] = pix->samples[pixel]; + temp->samples[pixel * temp->n + 2] = pix->samples[pixel]; + temp->samples[pixel * temp->n + 3] = pix->samples[pixel]; + } + + fz_droppixmap(pix); + pix = temp; + } + + if (img->super.cs && strcmp(img->super.cs->name, "DeviceRGB")) + { + fz_pixmap *temp; + + error = fz_newpixmap(&temp, pix->x, pix->y, pix->w, pix->h, pdf_devicergb->n + 1); + if (error) + die(error); + + fz_convertpixmap(img->super.cs, pix, pdf_devicergb, temp); + fz_droppixmap(pix); + pix = temp; + } + + sprintf(name, "img-%04d.pnm", num); + + f = fopen(name, "wb"); + if (f == NULL) + die(fz_throw("Error creating image file")); + + fprintf(f, "P6\n%d %d\n%d\n", w, h, 255); + + for (y = 0; y < pix->h; y++) + for (x = 0; x < pix->w; x++) + { + fz_sample *sample = &pix->samples[(y * pix->w + x) * (pdf_devicergb->n + 1)]; + unsigned char r = sample[1]; + unsigned char g = sample[2]; + unsigned char b = sample[3]; + fprintf(f, "%c%c%c", r, g, b); + } + + if (fclose(f) < 0) + die(fz_throw("Error closing image file")); + + fz_droppixmap(pix); + + pdf_dropstore(xref->store); + xref->store = nil; + + fz_dropimage(&img->super); + + fz_dropobj(ref); +} + +void savefont(fz_obj *dict, int num, int gen) +{ + fz_error error; + char name[1024]; + char *subtype; + fz_buffer *buf; + fz_obj *stream; + fz_obj *obj; + char *ext; + FILE *f; + unsigned char *p; + char *fontname; + + obj = fz_dictgets(dict, "FontName"); + if (obj) + fontname = fz_toname(obj); + + obj = fz_dictgets(dict, "FontFile"); + if (obj) + { + stream = obj; + ext = "pfa"; + } + + obj = fz_dictgets(dict, "FontFile2"); + if (obj) + { + stream = obj; + ext = "ttf"; + } + + obj = fz_dictgets(dict, "FontFile3"); + if (obj) + { + + stream = obj; + + error = pdf_resolve(&obj, xref); + if (error) + die(error); + + obj = fz_dictgets(obj, "Subtype"); + if (obj && !fz_isname(obj)) + die(fz_throw("Invalid font descriptor subtype")); + + subtype = fz_toname(obj); + if (!strcmp(subtype, "Type1C")) + ext = "cff"; + else if (!strcmp(subtype, "CIDFontType0C")) + ext = "cid"; + else + die(fz_throw("Unhandled font type '%s'", subtype)); + } + + if (!stream) + { + fz_warn("Unhandled font type"); + return; + } + + error = fz_newbuffer(&buf, 0); + if (error) + die(error); + + error = pdf_loadstream(&buf, xref, fz_tonum(stream), fz_togen(stream)); + if (error) + die(error); + + sprintf(name, "%s-%04d.%s", fontname, num, ext); + + f = fopen(name, "wb"); + if (f == NULL) + die(fz_throw("Error creating image file")); + + for (p = buf->rp; p < buf->wp; p ++) + fprintf(f, "%c", *p); + + if (fclose(f) < 0) + die(fz_throw("Error closing image file")); + + fz_dropbuffer(buf); +} + +void showobject(int num, int gen) +{ + fz_error error; + fz_obj *obj; + + if (!xref) + die(fz_throw("no file specified")); + + error = pdf_loadobject(&obj, xref, num, gen); + if (error) + die(error); + + if (isimage(obj)) + saveimage(obj, num, gen); + else if (isfontdesc(obj)) + savefont(obj, num, gen); + + fz_dropobj(obj); +} + +int main(int argc, char **argv) +{ + char *password = ""; + int c, o; + + while ((c = getopt(argc, argv, "d:")) != -1) + { + switch (c) + { + case 'd': password = optarg; break; + default: + showusage(); + break; + } + } + + if (optind == argc) + showusage(); + + openxref(argv[optind++], password); + + if (optind == argc) + for (o = 0; o < xref->len; o++) + showobject(o, 0); + else + while (optind < argc) + { + showobject(atoi(argv[optind]), 0); + optind++; + } + + closexref(); +} + |