summaryrefslogtreecommitdiff
path: root/apps
diff options
context:
space:
mode:
authorTor Andersson <tor@ghostscript.com>2010-07-15 23:25:00 +0000
committerTor Andersson <tor@ghostscript.com>2010-07-15 23:25:00 +0000
commit22c78bd39b2d8316af4c7d20d7f322fa7d534102 (patch)
tree5e957a0c183049970f4eda05dc6d8f4a1692e835 /apps
parent3effc3ad56c9e90f39638a6edd14a9aa07d9dabe (diff)
downloadmupdf-22c78bd39b2d8316af4c7d20d7f322fa7d534102.tar.xz
Refactored xref opening functions, command line tools, and rewrote pdfdraw to be more illustrative.
Diffstat (limited to 'apps')
-rw-r--r--apps/pdfapp.c6
-rw-r--r--apps/pdfclean.c342
-rw-r--r--apps/pdfdraw.c560
-rw-r--r--apps/pdfextract.c131
-rw-r--r--apps/pdfinfo.c32
-rw-r--r--apps/pdfshow.c65
-rw-r--r--apps/pdftool.c86
-rw-r--r--apps/pdftool.h14
8 files changed, 465 insertions, 771 deletions
diff --git a/apps/pdfapp.c b/apps/pdfapp.c
index c5930129..95885e2a 100644
--- a/apps/pdfapp.c
+++ b/apps/pdfapp.c
@@ -103,7 +103,7 @@ void pdfapp_open(pdfapp_t *app, char *filename, int fd)
*/
file = fz_openfile(fd);
- error = pdf_openxref(&app->xref, file);
+ error = pdf_newxref(&app->xref, file, NULL);
if (error)
pdfapp_error(app, fz_rethrow(error, "cannot open document '%s'", filename));
fz_dropstream(file);
@@ -195,7 +195,7 @@ void pdfapp_close(pdfapp_t *app)
pdf_freestore(app->xref->store);
app->xref->store = nil;
- pdf_closexref(app->xref);
+ pdf_freexref(app->xref);
app->xref = nil;
}
}
@@ -255,8 +255,6 @@ static void pdfapp_showpage(pdfapp_t *app, int loadpage, int drawpage)
pdf_freepage(app->page);
app->page = nil;
- pdf_flushxref(app->xref, 0);
-
obj = pdf_getpageobject(app->xref, app->pageno);
error = pdf_loadpage(&app->page, app->xref, obj);
if (error)
diff --git a/apps/pdfclean.c b/apps/pdfclean.c
index ee6ca722..0ecbb98e 100644
--- a/apps/pdfclean.c
+++ b/apps/pdfclean.c
@@ -4,10 +4,13 @@
* Rewrite PDF with pretty printed objects.
* Garbage collect unreachable objects.
* Inflate compressed streams.
- * Encrypt output.
+ * Create subset documents.
+ *
+ * TODO: linearize document for fast web view
*/
-#include "pdftool.h"
+#include "fitz.h"
+#include "mupdf.h"
static FILE *out = NULL;
@@ -20,6 +23,29 @@ static pdf_xrefentry *oldxreflist = NULL;
static int dogarbage = 0;
static int doexpand = 0;
+static pdf_xref *xref = NULL;
+
+void die(fz_error error)
+{
+ fz_catch(error, "aborting");
+ if (xref)
+ pdf_freexref(xref);
+ exit(1);
+}
+
+static void usage(void)
+{
+ fprintf(stderr,
+ "usage: pdfclean [options] input.pdf [output.pdf] [pages]\n"
+ "\t-p -\tpassword\n"
+ "\t-g\tgarbage collect unused objects\n"
+ "\t-gg\tin addition to -g compact xref table\n"
+ "\t-ggg\tin addition to -gg merge duplicate objects\n"
+ "\t-x\texpand compressed streams\n"
+ "\tpages\tcomma separated list of ranges\n");
+ exit(1);
+}
+
/*
* Garbage collect objects not reachable from the trailer.
*/
@@ -67,6 +93,10 @@ static void sweepref(fz_obj *obj)
sweepobj(fz_resolveindirect(obj));
}
+/*
+ * Renumber objects to compact the xref table
+ */
+
static void renumberobj(fz_obj *obj)
{
int i;
@@ -109,6 +139,53 @@ static void renumberobj(fz_obj *obj)
}
}
+static void renumberxref(void)
+{
+ int num, newnum;
+
+ newnumlist = fz_malloc(xref->len * sizeof(int));
+ oldxreflist = fz_malloc(xref->len * sizeof(pdf_xrefentry));
+ for (num = 0; num < xref->len; num++)
+ {
+ newnumlist[num] = -1;
+ oldxreflist[num] = xref->table[num];
+ }
+
+ newnum = 1;
+ for (num = 0; num < xref->len; num++)
+ {
+ if (xref->table[num].type == 'f')
+ uselist[num] = 0;
+ if (uselist[num])
+ newnumlist[num] = newnum++;
+ }
+
+ renumberobj(xref->trailer);
+ for (num = 0; num < xref->len; num++)
+ renumberobj(xref->table[num].obj);
+
+ for (num = 0; num < xref->len; num++)
+ uselist[num] = 0;
+
+ for (num = 0; num < xref->len; num++)
+ {
+ if (newnumlist[num] >= 0)
+ {
+ xref->table[newnumlist[num]] = oldxreflist[num];
+ uselist[newnumlist[num]] = 1;
+ }
+ }
+
+ fz_free(oldxreflist);
+ fz_free(newnumlist);
+
+ xref->len = newnum;
+}
+
+/*
+ * Scan and remove duplicate objects (slow)
+ */
+
static void removeduplicateobjs(void)
{
int num, other;
@@ -142,6 +219,99 @@ static void removeduplicateobjs(void)
fz_free(newnumlist);
}
+/*
+ * Recreate page tree to only retain specified pages.
+ */
+
+static void retainpages(int argc, char **argv)
+{
+ fz_obj *root, *pages, *kids;
+ int count;
+
+ /* Snatch pages entry from root dict */
+ root = fz_dictgets(xref->trailer, "Root");
+ pages = fz_keepobj(fz_dictgets(root, "Pages"));
+
+ /* Then empty the root dict */
+ while (fz_dictlen(root) > 0)
+ {
+ fz_obj *key = fz_dictgetkey(root, 0);
+ fz_dictdel(root, key);
+ }
+
+ /* And only retain pages and type entries */
+ fz_dictputs(root, "Pages", pages);
+ fz_dictputs(root, "Type", fz_newname("Catalog"));
+ fz_dropobj(pages);
+
+ /* Create a new kids array too add into pages dict
+ * since each element must be replaced to point to
+ * a retained page */
+ kids = fz_newarray(1);
+ count = 0;
+
+ /* Retain pages specified */
+ while (argc - fz_optind)
+ {
+ int page, spage, epage;
+ char *spec, *dash;
+ char *pagelist = argv[fz_optind];
+
+ spec = fz_strsep(&pagelist, ",");
+ while (spec)
+ {
+ dash = strchr(spec, '-');
+
+ if (dash == spec)
+ spage = epage = 1;
+ else
+ spage = epage = atoi(spec);
+
+ if (dash)
+ {
+ if (strlen(dash) > 1)
+ epage = atoi(dash + 1);
+ else
+ epage = pdf_getpagecount(xref);
+ }
+
+ if (spage > epage)
+ page = spage, spage = epage, epage = page;
+
+ if (spage < 1)
+ spage = 1;
+ if (epage > pdf_getpagecount(xref))
+ epage = pdf_getpagecount(xref);
+
+ for (page = spage; page <= epage; page++)
+ {
+ fz_obj *pageobj = pdf_getpageobject(xref, page);
+ fz_obj *pageref = pdf_getpageref(xref, page);
+
+ /* Update parent reference */
+ fz_dictputs(pageobj, "Parent", pages);
+
+ /* Store page object in new kids array */
+ fz_arraypush(kids, pageref);
+ count++;
+
+ fz_dropobj(pageref);
+ }
+
+ spec = fz_strsep(&pagelist, ",");
+ }
+
+ fz_optind++;
+ }
+
+ /* Update page count and kids array */
+ fz_dictputs(pages, "Count", fz_newint(count));
+ fz_dictputs(pages, "Kids", kids);
+}
+
+/*
+ * Make sure we have loaded objects from object streams.
+ */
static void preloadobjstms(void)
{
@@ -161,6 +331,10 @@ static void preloadobjstms(void)
}
}
+/*
+ * Save streams and objects to the output
+ */
+
static void copystream(fz_obj *obj, int num, int gen)
{
fz_error error;
@@ -208,7 +382,7 @@ static void expandstream(fz_obj *obj, int num, int gen)
fz_dropbuffer(buf);
}
-static void saveobject(int num, int gen)
+static void writeobject(int num, int gen)
{
fz_error error;
fz_obj *obj;
@@ -253,7 +427,7 @@ static void saveobject(int num, int gen)
fz_dropobj(obj);
}
-static void savexref(void)
+static void writexref(void)
{
fz_obj *trailer;
fz_obj *obj;
@@ -297,148 +471,7 @@ static void savexref(void)
fprintf(out, "startxref\n%d\n%%%%EOF\n", startxref);
}
-static void cleanusage(void)
-{
- fprintf(stderr,
- "usage: pdfclean [options] input.pdf [outfile.pdf] [pages]\n"
- "\t-p -\tpassword for decryption\n"
- "\t-g\tgarbage collect unused objects\n"
- "\t-gg\tin addition to -g xref is compacted\n"
- "\t-ggg\tin addition to -gg identical objects are garbage collected\n"
- "\t-x\texpand compressed streams\n");
- exit(1);
-}
-
-static void retainpages(int argc, char **argv)
-{
- fz_obj *root, *pages, *kids;
- int count;
-
- /* Snatch pages entry from root dict */
- root = fz_dictgets(xref->trailer, "Root");
- pages = fz_keepobj(fz_dictgets(root, "Pages"));
-
- /* Then empty the root dict */
- while (fz_dictlen(root) > 0)
- {
- fz_obj *key = fz_dictgetkey(root, 0);
- fz_dictdel(root, key);
- }
-
- /* And only retain pages and type entries */
- fz_dictputs(root, "Pages", pages);
- fz_dictputs(root, "Type", fz_newname("Catalog"));
- fz_dropobj(pages);
-
- /* Create a new kids array too add into pages dict
- * since each element must be replaced to point to
- * a retained page */
- kids = fz_newarray(1);
- count = 0;
-
- /* Retain pages specified */
- while (argc - fz_optind)
- {
- int page, spage, epage;
- char *spec, *dash;
- char *pagelist = argv[fz_optind];
-
- spec = fz_strsep(&pagelist, ",");
- while (spec)
- {
- dash = strchr(spec, '-');
-
- if (dash == spec)
- spage = epage = 1;
- else
- spage = epage = atoi(spec);
-
- if (dash)
- {
- if (strlen(dash) > 1)
- epage = atoi(dash + 1);
- else
- epage = pagecount;
- }
-
- if (spage > epage)
- page = spage, spage = epage, epage = page;
-
- if (spage < 1)
- spage = 1;
- if (epage > pagecount)
- epage = pagecount;
-
- for (page = spage; page <= epage; page++)
- {
- fz_obj *pageobj = pdf_getpageobject(xref, page);
- fz_obj *pageref = pdf_getpageref(xref, page);
-
- /* Update parent reference */
- fz_dictputs(pageobj, "Parent", pages);
-
- /* Store page object in new kids array */
- fz_arraypush(kids, pageref);
- count++;
-
- fz_dropobj(pageref);
- }
-
- spec = fz_strsep(&pagelist, ",");
- }
-
- fz_optind++;
- }
-
- /* Update page count and kids array */
- fz_dictputs(pages, "Count", fz_newint(count));
- fz_dictputs(pages, "Kids", kids);
-}
-
-static void renumberxref(void)
-{
- int num, newnum;
-
- newnumlist = fz_malloc(xref->len * sizeof(int));
- oldxreflist = fz_malloc(xref->len * sizeof(pdf_xrefentry));
- for (num = 0; num < xref->len; num++)
- {
- newnumlist[num] = -1;
- oldxreflist[num] = xref->table[num];
- }
-
- newnum = 1;
- for (num = 0; num < xref->len; num++)
- {
- if (xref->table[num].type == 'f')
- uselist[num] = 0;
- if (uselist[num])
- newnumlist[num] = newnum++;
- }
-
- renumberobj(xref->trailer);
- for (num = 0; num < xref->len; num++)
- renumberobj(xref->table[num].obj);
-
- for (num = 0; num < xref->len; num++)
- uselist[num] = 0;
-
- for (num = 0; num < xref->len; num++)
- {
- if (newnumlist[num] >= 0)
- {
- xref->table[newnumlist[num]] = oldxreflist[num];
- uselist[newnumlist[num]] = 1;
- }
- }
-
- fz_free(oldxreflist);
- fz_free(newnumlist);
-
- xref->len = newnum;
-}
-
-static void outputpdf(void)
+static void writepdf(void)
{
int lastfree;
int num;
@@ -461,7 +494,7 @@ static void outputpdf(void)
if (xref->table[num].type == 'n' || xref->table[num].type == 'o')
{
ofslist[num] = ftell(out);
- saveobject(num, genlist[num]);
+ writeobject(num, genlist[num]);
}
}
@@ -477,11 +510,12 @@ static void outputpdf(void)
}
}
- savexref();
+ writexref();
}
int main(int argc, char **argv)
{
+ fz_error error;
char *infile;
char *outfile = "out.pdf";
char *password = "";
@@ -495,12 +529,12 @@ int main(int argc, char **argv)
case 'p': password = fz_optarg; break;
case 'g': dogarbage ++; break;
case 'x': doexpand ++; break;
- default: cleanusage(); break;
+ default: usage(); break;
}
}
if (argc - fz_optind < 1)
- cleanusage();
+ usage();
infile = argv[fz_optind++];
@@ -514,7 +548,9 @@ int main(int argc, char **argv)
if (argc - fz_optind > 0)
subset = 1;
- openxref(infile, password, 0, subset);
+ error = pdf_openxref(&xref, infile, password);
+ if (error)
+ die(fz_rethrow(error, "cannot open input file '%s'", infile));
out = fopen(outfile, "wb");
if (!out)
@@ -552,9 +588,9 @@ int main(int argc, char **argv)
if (dogarbage >= 2)
renumberxref();
- outputpdf();
+ writepdf();
- closexref();
+ pdf_freexref(xref);
return 0;
}
diff --git a/apps/pdfdraw.c b/apps/pdfdraw.c
index 3ec51631..3c645d86 100644
--- a/apps/pdfdraw.c
+++ b/apps/pdfdraw.c
@@ -1,407 +1,156 @@
/*
- * pdfdraw:
- * Draw pages to PPM bitmaps.
- * Dump parsed display list as XML.
- * Dump text content as UTF-8.
- * Benchmark rendering speed.
+ * pdfdraw -- command line tool for drawing pdf documents
*/
-#include "pdftool.h"
+#include "fitz.h"
+#include "mupdf.h"
#define MAXBANDSIZE (3 * 1024 * 1024)
-#ifdef _MSC_VER
-#include <winsock2.h>
-#else
-#include <sys/time.h>
-#endif
+char *output = NULL;
+float resolution = 72;
-enum { DRAWPNM, DRAWPGM, DRAWTXT, DRAWXML };
+int showxml = 0;
+int showtext = 0;
+int showtime = 0;
+int savealpha = 0;
-struct benchmark
-{
- int pages;
- long min;
- int minpage;
- long avg;
- long max;
- int maxpage;
-};
-
-static fz_glyphcache *drawcache = nil;
-static int drawmode = DRAWPNM;
-static char *drawpattern = nil;
-static pdf_page *drawpage = nil;
-static float drawzoom = 1;
-static int drawrotate = 0;
-static int drawbands = 1;
-static int drawcount = 0;
-static int benchmark = 0;
-static int checksum = 0;
-
-static void local_cleanup(void)
-{
- if (xref && xref->store)
- {
- pdf_freestore(xref->store);
- xref->store = nil;
- }
- if (drawcache)
- {
- fz_freeglyphcache(drawcache);
- drawcache = nil;
- }
-}
+fz_glyphcache *glyphcache;
+char *filename;
-static void drawusage(void)
+static void die(fz_error error)
{
- fprintf(stderr,
- "usage: pdfdraw [options] [file.pdf pages ... ]\n"
- "\t-p -\tpassword for decryption\n"
- "\t-o -\tpattern (%%d for page number) for output file\n"
- "\t-r -\tresolution in dpi\n"
- "\t-m\tprint benchmark results\n"
- "\t-g\trender grayscale\n"
- "\t-s\tprint MD5 checksum of page pixel data\n"
- "\t-t\ttext extraction mode\n"
- "\t-x\txml trace mode\n"
- "example:\n"
- "\tpdfdraw -o output%%03d.pnm input.pdf 1-3,5,9-\n");
+ fz_catch(error, "aborting");
exit(1);
}
-static void gettime(long *time_)
-{
- struct timeval tv;
-
- if (gettimeofday(&tv, NULL) < 0)
- abort();
-
- *time_ = tv.tv_sec * 1000000 + tv.tv_usec;
-}
-
-static void drawloadpage(int pagenum, struct benchmark *loadtimes)
+static void usage(void)
{
- fz_error error;
- fz_obj *pageobj;
- long start;
- long end;
- long elapsed;
-
- fprintf(stdout, "draw %s:%03d ", basename, pagenum);
- if (benchmark && loadtimes)
- {
- fflush(stdout);
- gettime(&start);
- }
-
- pageobj = pdf_getpageobject(xref, pagenum);
- error = pdf_loadpage(&drawpage, xref, pageobj);
- if (error)
- die(fz_rethrow(error, "cannot load page %d (%d %d R) in PDF file '%s'", pagenum, fz_tonum(pageobj), fz_togen(pageobj), basename));
-
- if (benchmark && loadtimes)
- {
- gettime(&end);
- elapsed = end - start;
-
- if (elapsed < loadtimes->min)
- {
- loadtimes->min = elapsed;
- loadtimes->minpage = pagenum;
- }
- if (elapsed > loadtimes->max)
- {
- loadtimes->max = elapsed;
- loadtimes->maxpage = pagenum;
- }
- loadtimes->avg += elapsed;
- loadtimes->pages++;
- }
-
- if (benchmark)
- fflush(stdout);
+ fprintf(stderr,
+ "usage: pdfdraw [options] input.pdf [pages]\n"
+ "\t-o -\toutput filename (%%d for page number)\n"
+ "\t\tsupported formats: pgm, ppm, pam, png\n"
+ "\t-p -\tpassword\n"
+ "\t-r -\tresolution in dpi (default: 72)\n"
+ "\t-x\tshow display list as xml\n"
+ "\t-t\textract text (-tt for xml)\n"
+ "\t-a\tsave alpha channel (only pam and png)\n"
+ "\tpages\tcomma separated list of ranges\n");
+ exit(1);
}
-static void drawfreepage(void)
+static int isrange(char *s)
{
- pdf_freepage(drawpage);
- drawpage = nil;
-
- flushxref();
-
- /* Flush resources between pages.
- * TODO: should check memory usage before deciding to do this.
- */
- if (xref && xref->store)
+ while (*s)
{
- /* pdf_debugstore(xref->store); */
- pdf_agestoreditems(xref->store);
- pdf_evictageditems(xref->store);
- fflush(stdout);
+ if ((*s < '0' || *s > '9') && *s != '-' && *s != ',')
+ return 0;
+ s++;
}
+ return 1;
}
-static void drawpnm(int pagenum, struct benchmark *loadtimes, struct benchmark *drawtimes, int greyscale)
+static void drawpage(pdf_xref *xref, int pagenum)
{
- static int fd = -1;
fz_error error;
+ fz_obj *pageobj;
+ pdf_page *page;
+ fz_displaylist *list;
+ fz_device *dev;
fz_matrix ctm;
fz_bbox bbox;
+ fz_colorspace *colorspace;
fz_pixmap *pix;
- char name[256];
- char pnmhdr[256];
- int i, x, y, w, h, b, bh;
- long start;
- long end;
- long elapsed;
- fz_md5 digest;
- int numbands = drawbands;
- fz_displaylist *list = nil;
- fz_device *dev;
-
- if (checksum)
- fz_md5init(&digest);
-
- drawloadpage(pagenum, loadtimes);
-
- if (benchmark)
- gettime(&start);
-
- ctm = fz_identity;
- ctm = fz_concat(ctm, fz_translate(0, -drawpage->mediabox.y1));
- ctm = fz_concat(ctm, fz_scale(drawzoom, -drawzoom));
- ctm = fz_concat(ctm, fz_rotate(drawrotate + drawpage->rotate));
-
- bbox = fz_roundrect(fz_transformrect(ctm, drawpage->mediabox));
- w = bbox.x1 - bbox.x0;
- h = bbox.y1 - bbox.y0;
-
- if (w * h > MAXBANDSIZE)
- numbands = (w * h) / MAXBANDSIZE;
- if (numbands < 1)
- numbands = 1;
-
- bh = h / numbands;
+ char buf[512];
+ float zoom;
- if (drawpattern)
- {
- if (strchr(drawpattern, '%') || fd < 0)
- {
- sprintf(name, drawpattern, drawcount++);
- fd = open(name, O_BINARY|O_WRONLY|O_CREAT|O_TRUNC, 0666);
- if (fd < 0)
- die(fz_throw("ioerror: could not create raster file '%s'", name));
- }
+ pageobj = pdf_getpageobject(xref, pagenum);
+ error = pdf_loadpage(&page, xref, pageobj);
+ if (error)
+ die(fz_rethrow(error, "cannot load page %d in file '%s'", pagenum, filename));
- if (greyscale)
- {
- sprintf(pnmhdr, "P5\n%d %d\n255\n", w, h);
- }
- else
- {
- sprintf(pnmhdr, "P6\n%d %d\n255\n", w, h);
- }
- write(fd, pnmhdr, strlen(pnmhdr));
- }
+ list = fz_newdisplaylist();
- pix = fz_newpixmap((greyscale ? pdf_devicegray : pdf_devicergb), bbox.x0, bbox.y0, w, bh);
+ dev = fz_newlistdevice(list);
+ error = pdf_runpage(xref, page, dev, fz_identity);
+ if (error)
+ die(fz_rethrow(error, "cannot draw page %d in file '%s'", pagenum, filename));
+ fz_freedevice(dev);
- if (numbands > 1)
+ if (showxml)
{
- fprintf(stdout, "creating display list for banded rendering\n");
- list = fz_newdisplaylist();
- dev = fz_newlistdevice(list);
- error = pdf_runpage(xref, drawpage, dev, fz_identity);
- if (error)
- die(fz_rethrow(error, "cannot draw page %d in PDF file '%s'", pagenum, basename));
+ dev = fz_newtracedevice();
+ printf("<page number=\"%d\">\n", pagenum);
+ fz_executedisplaylist(list, dev, fz_identity);
+ printf("</page>\n");
fz_freedevice(dev);
}
- for (b = 0; b < numbands; b++)
+ if (showtext)
{
- fz_clearpixmap(pix, 0xFF);
-
- dev = fz_newdrawdevice(drawcache, pix);
-
- if (numbands > 1)
- {
- fprintf(stdout, "drawing band %d / %d\n", b + 1, numbands);
- fz_executedisplaylist(list, dev, ctm);
- }
- else
- {
- error = pdf_runpage(xref, drawpage, dev, ctm);
- if (error)
- die(fz_rethrow(error, "cannot draw page %d in PDF file '%s'", pagenum, basename));
- }
-
+ fz_textspan *text = fz_newtextspan();
+ dev = fz_newtextdevice(text);
+ fz_executedisplaylist(list, dev, fz_identity);
fz_freedevice(dev);
-
- if (checksum)
- fz_md5update(&digest, pix->samples, pix->h * pix->w * pix->n);
- if (drawpattern)
- {
- unsigned char *src = pix->samples;
- if (greyscale)
- {
- for (y = pix->h; y > 0; y--)
- {
- unsigned char *dst = src;
-
- for (x = pix->w; x > 0; x--)
- {
- *dst++ = *src++;
- src++;
- }
- dst -= pix->w;
-
- write(fd, dst, pix->w);
- }
- }
- else
- {
- for (y = pix->h; y > 0; y--)
- {
- unsigned char *dst = src;
-
- for (x = pix->w; x > 0; x--)
- {
- *dst++ = *src++;
- *dst++ = *src++;
- *dst++ = *src++;
- src++;
- }
- dst -= pix->w * 3;
-
- write(fd, dst, pix->w * 3);
- }
- }
- }
-
- pix->y += bh;
- if (pix->y + pix->h > bbox.y1)
- pix->h = bbox.y1 - pix->y;
+ printf("[Page %d]\n", pagenum);
+ if (showtext > 1)
+ fz_debugtextspanxml(text);
+ else
+ fz_debugtextspan(text);
+ printf("\n");
+ fz_freetextspan(text);
}
- if (list)
- fz_freedisplaylist(list);
-
- fz_droppixmap(pix);
-
- if (checksum)
+ if (output || showtime)
{
- unsigned char buf[16];
- fz_md5final(&digest, buf);
- for (i = 0; i < 16; i++)
- fprintf(stdout, "%02x", buf[i]);
- fprintf(stdout, " ");
- }
-
- if (drawpattern && strchr(drawpattern, '%'))
- close(fd);
-
- drawfreepage();
-
- if (benchmark)
- {
- gettime(&end);
- elapsed = end - start;
-
- if (elapsed < drawtimes->min)
- {
- drawtimes->min = elapsed;
- drawtimes->minpage = pagenum;
- }
- if (elapsed > drawtimes->max)
- {
- drawtimes->max = elapsed;
- drawtimes->maxpage = pagenum;
- }
- drawtimes->avg += elapsed;
- drawtimes->pages++;
-
- fprintf(stdout, "time %.3fs",
- elapsed / 1000000.0);
- }
+ sprintf(buf, output, pagenum);
- fprintf(stdout, "\n");
-}
+ zoom = resolution / 72;
+ ctm = fz_translate(0, -page->mediabox.y1);
+ ctm = fz_concat(ctm, fz_scale(zoom, -zoom));
+ ctm = fz_concat(ctm, fz_rotate(page->rotate));
+ bbox = fz_roundrect(fz_transformrect(ctm, page->mediabox));
-static void drawtxt(int pagenum, struct benchmark *loadtimes)
-{
- fz_error error;
- fz_matrix ctm;
- fz_textspan *text;
- fz_device *dev;
+ colorspace = pdf_devicergb;
+ if (strstr(output, ".pgm"))
+ colorspace = pdf_devicegray;
- drawloadpage(pagenum, loadtimes);
-
- ctm = fz_identity;
-
- text = fz_newtextspan();
- dev = fz_newtextdevice(text);
-
- error = pdf_runpage(xref, drawpage, dev, ctm);
- if (error)
- die(fz_rethrow(error, "cannot extract text from page %d in PDF file '%s'", pagenum, basename));
+ /* TODO: banded rendering and multi-page ppm */
- fz_freedevice(dev);
-
- printf("[Page %d]\n", pagenum);
- fz_debugtextspan(text);
- printf("\n");
-
- fz_freetextspan(text);
-
- drawfreepage();
-}
-
-static void drawxml(int pagenum)
-{
- fz_error error;
- fz_obj *pageobj;
- fz_matrix ctm;
- fz_device *dev;
+ pix = fz_newpixmap(colorspace, bbox.x0, bbox.y0, bbox.x1, bbox.y1);
- pageobj = pdf_getpageobject(xref, pagenum);
- error = pdf_loadpage(&drawpage, xref, pageobj);
- if (error)
- die(fz_rethrow(error, "cannot load page %d (%d %d R) from PDF file '%s'", pagenum, fz_tonum(pageobj), fz_togen(pageobj), basename));
+ if (savealpha)
+ fz_clearpixmap(pix, 0x00);
+ else
+ fz_clearpixmap(pix, 0xff);
- ctm = fz_identity;
+ dev = fz_newdrawdevice(glyphcache, pix);
+ fz_executedisplaylist(list, dev, ctm);
+ fz_freedevice(dev);
- dev = fz_newtracedevice();
- printf("<?xml version=\"1.0\"?>\n");
- printf("<page number=\"%d\">\n", pagenum);
+ if (strstr(output, ".pgm") || strstr(output, ".ppm") || strstr(output, ".pnm"))
+ fz_writepnm(pix, buf);
+ else if (strstr(output, ".pam"))
+ fz_writepam(pix, buf, savealpha);
+ else if (strstr(output, ".png"))
+ fz_writepng(pix, buf, savealpha);
- error = pdf_runpage(xref, drawpage, dev, ctm);
- if (error)
- die(fz_rethrow(error, "cannot display page %d in PDF file '%s' as XML", pagenum, basename));
+ fz_droppixmap(pix);
+ }
- fz_freedevice(dev);
+ fz_freedisplaylist(list);
+ pdf_freepage(page);
- printf("</page>\n");
+ pdf_agestoreditems(xref->store);
+ pdf_evictageditems(xref->store);
}
-static void drawpages(char *pagelist)
+static void drawrange(pdf_xref *xref, char *range)
{
int page, spage, epage;
char *spec, *dash;
- struct benchmark loadtimes, drawtimes;
-
- if (!xref)
- drawusage();
-
- if (benchmark)
- {
- memset(&loadtimes, 0x00, sizeof (loadtimes));
- loadtimes.min = 1<<31;
- memset(&drawtimes, 0x00, sizeof (drawtimes));
- drawtimes.min = 1<<31;
- }
- spec = fz_strsep(&pagelist, ",");
+ spec = fz_strsep(&range, ",");
while (spec)
{
dash = strchr(spec, '-');
@@ -416,109 +165,78 @@ static void drawpages(char *pagelist)
if (strlen(dash) > 1)
epage = atoi(dash + 1);
else
- epage = pagecount;
+ epage = pdf_getpagecount(xref);
}
- if (spage > epage)
- page = spage, spage = epage, epage = page;
-
- if (spage < 1)
- spage = 1;
- if (epage > pagecount)
- epage = pagecount;
-
- for (page = spage; page <= epage; page++)
- {
- switch (drawmode)
- {
- case DRAWPNM: drawpnm(page, &loadtimes, &drawtimes, 0); break;
- case DRAWPGM: drawpnm(page, &loadtimes, &drawtimes, 1); break;
- case DRAWTXT: drawtxt(page, &loadtimes); break;
- case DRAWXML: drawxml(page); break;
- }
- }
+ spage = CLAMP(spage, 1, pdf_getpagecount(xref));
+ epage = CLAMP(epage, 1, pdf_getpagecount(xref));
- spec = fz_strsep(&pagelist, ",");
- }
+ if (spage < epage)
+ for (page = spage; page <= epage; page++)
+ drawpage(xref, page);
+ else
+ for (page = spage; page >= epage; page--)
+ drawpage(xref, page);
- if (benchmark)
- {
- if (loadtimes.pages > 0)
- {
- loadtimes.avg /= loadtimes.pages;
- drawtimes.avg /= drawtimes.pages;
-
- printf("benchmark-load: min: %6.3fs (page % 4d), avg: %6.3fs, max: %6.3fs (page % 4d)\n",
- loadtimes.min / 1000000.0, loadtimes.minpage,
- loadtimes.avg / 1000000.0,
- loadtimes.max / 1000000.0, loadtimes.maxpage);
- printf("benchmark-draw: min: %6.3fs (page % 4d), avg: %6.3fs, max: %6.3fs (page % 4d)\n",
- drawtimes.min / 1000000.0, drawtimes.minpage,
- drawtimes.avg / 1000000.0,
- drawtimes.max / 1000000.0, drawtimes.maxpage);
- }
+ spec = fz_strsep(&range, ",");
}
}
int main(int argc, char **argv)
{
char *password = "";
+ pdf_xref *xref;
+ fz_error error;
int c;
- enum { NO_FILE_OPENED, NO_PAGES_DRAWN, DREW_PAGES } state;
fz_accelerate();
- while ((c = fz_getopt(argc, argv, "b:p:o:r:gtxms")) != -1)
+ while ((c = fz_getopt(argc, argv, "o:p:r:amtx")) != -1)
{
switch (c)
{
- case 'b': drawbands = atoi(fz_optarg); break;
+ case 'o': output = fz_optarg; break;
case 'p': password = fz_optarg; break;
- case 'o': drawpattern = fz_optarg; break;
- case 'r': drawzoom = atof(fz_optarg) / 72; break;
- case 'g': drawmode = DRAWPGM; break;
- case 't': drawmode = DRAWTXT; break;
- case 'x': drawmode = DRAWXML; break;
- case 'm': benchmark = 1; break;
- case 's': checksum = 1; break;
- default:
- drawusage();
- break;
+ case 'r': resolution = atof(fz_optarg) / 72; break;
+ case 'a': savealpha = 1; break;
+ case 'm': showtime++; break;
+ case 't': showtext++; break;
+ case 'x': showxml++; break;
+ default: usage(); break;
}
}
if (fz_optind == argc)
- drawusage();
+ usage();
- setcleanup(local_cleanup);
+ if (showxml)
+ printf("<?xml version=\"1.0\"?>\n");
+
+ glyphcache = fz_newglyphcache();
- state = NO_FILE_OPENED;
while (fz_optind < argc)
{
- if (strstr(argv[fz_optind], ".pdf") || strstr(argv[fz_optind], ".PDF"))
- {
- if (state == NO_PAGES_DRAWN)
- drawpages("1-");
+ filename = argv[fz_optind++];
- closexref();
+ error = pdf_openxref(&xref, filename, password);
+ if (error)
+ die(fz_rethrow(error, "cannot open document: %s", filename));
- drawcache = fz_newglyphcache();
+ if (showxml)
+ printf("<document name=\"%s\">\n", filename);
- openxref(argv[fz_optind], password, 0, 1);
- state = NO_PAGES_DRAWN;
- }
- else
- {
- drawpages(argv[fz_optind]);
- state = DREW_PAGES;
- }
- fz_optind++;
- }
+ if (fz_optind == argc || !isrange(argv[fz_optind]))
+ drawrange(xref, "1-");
+ if (fz_optind < argc && isrange(argv[fz_optind]))
+ drawrange(xref, argv[fz_optind++]);
- if (state == NO_PAGES_DRAWN)
- drawpages("1-");
+ if (showxml)
+ printf("</document>\n");
+
+ pdf_freexref(xref);
+ }
- closexref();
+ fz_freeglyphcache(glyphcache);
return 0;
}
diff --git a/apps/pdfextract.c b/apps/pdfextract.c
index 915bb026..cb0eabd2 100644
--- a/apps/pdfextract.c
+++ b/apps/pdfextract.c
@@ -2,12 +2,25 @@
* pdfextract -- the ultimate way to extract images and fonts from pdfs
*/
-#include "pdftool.h"
+#include "fitz.h"
+#include "mupdf.h"
-static void showusage(void)
+static pdf_xref *xref = NULL;
+static int dorgb = 0;
+
+void die(fz_error error)
+{
+ fz_catch(error, "aborting");
+ if (xref)
+ pdf_freexref(xref);
+ exit(1);
+}
+
+static void usage(void)
{
- fprintf(stderr, "usage: pdfextract [-p password] <file> [object numbers]\n");
+ fprintf(stderr, "usage: pdfextract [options] file.pdf [object numbers]\n");
fprintf(stderr, "\t-p\tpassword\n");
+ fprintf(stderr, "\t-r\tconvert images to rgb\n");
exit(1);
}
@@ -23,89 +36,56 @@ static int isfontdesc(fz_obj *obj)
return fz_isname(type) && !strcmp(fz_toname(type), "FontDescriptor");
}
-static void saveimage(int num, int gen)
+static void saveimage(int num)
{
- pdf_image *img = nil;
- fz_obj *ref;
fz_error error;
+ pdf_image *img;
fz_pixmap *pix;
+ fz_obj *ref;
char name[1024];
- FILE *f;
- int x, y;
- unsigned char *samples;
- ref = fz_newindirect(num, gen, xref);
+ ref = fz_newindirect(num, 0, xref);
+ /* Hack! ...normally installed by pdf_page... */
xref->store = pdf_newstore();
+ /* TODO: detect DCTD and save as jpeg */
+
error = pdf_loadimage(&img, xref, nil, ref);
if (error)
die(error);
pix = pdf_loadtile(img);
- if (img->bpc == 1 && img->n == 0)
+ if (dorgb && img->colorspace && img->colorspace != pdf_devicergb)
{
fz_pixmap *temp;
-
temp = fz_newpixmap(pdf_devicergb, pix->x, pix->y, pix->w, pix->h);
-
- for (y = 0; y < pix->h; y++)
- for (x = 0; x < pix->w; x++)
- {
- int pixel = y * pix->w + x;
- temp->samples[pixel * temp->n + 0] = pix->samples[pixel];
- temp->samples[pixel * temp->n + 1] = pix->samples[pixel];
- temp->samples[pixel * temp->n + 2] = pix->samples[pixel];
- temp->samples[pixel * temp->n + 3] = 255;
- }
-
+ fz_convertpixmap(pix, temp);
fz_droppixmap(pix);
pix = temp;
}
- if (img->colorspace && strcmp(img->colorspace->name, "DeviceRGB"))
+ if (pix->n <= 4)
{
- fz_pixmap *temp;
- temp = fz_newpixmap(pdf_devicergb, pix->x, pix->y, pix->w, pix->h);
- fz_convertpixmap(pix, temp);
- fz_droppixmap(pix);
- pix = temp;
+ sprintf(name, "img-%04d.png", num);
+ printf("extracting image %s\n", name);
+ fz_writepng(pix, name, 0);
+ }
+ else
+ {
+ sprintf(name, "img-%04d.pam", num);
+ printf("extracting image %s\n", name);
+ fz_writepam(pix, name, 0);
}
-
- sprintf(name, "img-%04d.pnm", num);
-
- f = fopen(name, "wb");
- if (f == NULL)
- die(fz_throw("Error creating image file"));
-
- fprintf(f, "P6\n%d %d\n%d\n", img->w, img->h, 255);
-
- samples = pix->samples;
-
- for (y = 0; y < pix->h; y++)
- for (x = 0; x < pix->w; x++)
- {
- unsigned char r, g, b;
-
- r = *(samples++);
- g = *(samples++);
- b = *(samples++);
- samples++;
-
- fprintf(f, "%c%c%c", r, g, b);
- }
-
- if (fclose(f) < 0)
- die(fz_throw("Error closing image file"));
fz_droppixmap(pix);
+ pdf_dropimage(img);
+ /* We never want to cache resources... */
pdf_freestore(xref->store);
xref->store = nil;
- pdf_dropimage(img);
-
fz_dropobj(ref);
}
@@ -171,6 +151,7 @@ static void savefont(fz_obj *dict, int num)
die(error);
sprintf(name, "%s-%04d.%s", fontname, num, ext);
+ printf("extracting font %s\n", name);
f = fopen(name, "wb");
if (f == NULL)
@@ -185,7 +166,7 @@ static void savefont(fz_obj *dict, int num)
fz_dropbuffer(buf);
}
-static void showobject(int num, int gen)
+static void showobject(int num)
{
fz_error error;
fz_obj *obj;
@@ -193,12 +174,12 @@ static void showobject(int num, int gen)
if (!xref)
die(fz_throw("no file specified"));
- error = pdf_loadobject(&obj, xref, num, gen);
+ error = pdf_loadobject(&obj, xref, num, 0);
if (error)
die(error);
if (isimage(obj))
- saveimage(num, gen);
+ saveimage(num);
else if (isfontdesc(obj))
savefont(obj, num);
@@ -207,36 +188,44 @@ static void showobject(int num, int gen)
int main(int argc, char **argv)
{
+ fz_error error;
+ char *infile;
char *password = "";
int c, o;
- while ((c = fz_getopt(argc, argv, "p:")) != -1)
+ while ((c = fz_getopt(argc, argv, "p:r")) != -1)
{
switch (c)
{
case 'p': password = fz_optarg; break;
- default:
- showusage();
- break;
+ case 'r': dorgb++; break;
+ default: usage(); break;
}
}
if (fz_optind == argc)
- showusage();
+ usage();
- openxref(argv[fz_optind++], password, 0, 0);
+ infile = argv[fz_optind++];
+ error = pdf_openxref(&xref, infile, password);
+ if (error)
+ die(fz_rethrow(error, "cannot open input file '%s'", infile));
if (fz_optind == argc)
+ {
for (o = 0; o < xref->len; o++)
- showobject(o, 0);
+ showobject(o);
+ }
else
- while (fz_optind < argc)
{
- showobject(atoi(argv[fz_optind]), 0);
- fz_optind++;
+ while (fz_optind < argc)
+ {
+ showobject(atoi(argv[fz_optind]));
+ fz_optind++;
+ }
}
- closexref();
+ pdf_freexref(xref);
return 0;
}
diff --git a/apps/pdfinfo.c b/apps/pdfinfo.c
index 26a7556f..af29c0d2 100644
--- a/apps/pdfinfo.c
+++ b/apps/pdfinfo.c
@@ -3,7 +3,22 @@
* Print information about the input pdf.
*/
-#include "pdftool.h"
+#include "fitz.h"
+#include "mupdf.h"
+
+pdf_xref *xref;
+int pagecount;
+
+void closexref(void);
+
+void die(fz_error error)
+{
+ fz_catch(error, "aborting");
+ closexref();
+ exit(1);
+}
+
+void openxref(char *filename, char *password, int dieonbadpass, int loadpages);
enum
{
@@ -80,9 +95,14 @@ static int forms = 0;
static struct info *psobj = nil;
static int psobjs = 0;
-static void local_cleanup(void)
+void closexref(void)
{
int i;
+ if (xref)
+ {
+ pdf_freexref(xref);
+ xref = nil;
+ }
if (dim)
{
@@ -922,6 +942,7 @@ showinfo(char *filename, int show, char *pagelist)
int main(int argc, char **argv)
{
enum { NO_FILE_OPENED, NO_INFO_GATHERED, INFO_SHOWN } state;
+ fz_error error;
char *filename = "";
char *password = "";
int show = ALL;
@@ -947,8 +968,6 @@ int main(int argc, char **argv)
if (fz_optind == argc)
infousage();
- setcleanup(local_cleanup);
-
state = NO_FILE_OPENED;
while (fz_optind < argc)
{
@@ -963,7 +982,10 @@ int main(int argc, char **argv)
closexref();
filename = argv[fz_optind];
printf("%s:\n", filename);
- openxref(filename, password, 0, 1);
+ error = pdf_openxref(&xref, filename, password);
+ if (error)
+ die(fz_rethrow(error, "cannot open input file '%s'", filename));
+ pagecount = pdf_getpagecount(xref);
showglobalinfo();
state = NO_INFO_GATHERED;
}
diff --git a/apps/pdfshow.c b/apps/pdfshow.c
index 0e91a0af..b15fae1b 100644
--- a/apps/pdfshow.c
+++ b/apps/pdfshow.c
@@ -2,18 +2,28 @@
* pdfshow -- the ultimate pdf debugging tool
*/
-#include "pdftool.h"
+#include "fitz.h"
+#include "mupdf.h"
+static pdf_xref *xref = NULL;
static int showbinary = 0;
static int showraw = 0;
static int showcolumn;
-static void showusage(void)
+void die(fz_error error)
+{
+ fz_catch(error, "aborting");
+ if (xref)
+ pdf_freexref(xref);
+ exit(1);
+}
+
+static void usage(void)
{
fprintf(stderr, "usage: pdfshow [-bc] [-p password] <file> [xref] [trailer] [object numbers]\n");
- fprintf(stderr, "\t-b\tprint streams as binary data (don't pretty-print)\n");
+ fprintf(stderr, "\t-b\tprint streams as binary data\n");
fprintf(stderr, "\t-c\tprint compressed streams (don't decompress)\n");
- fprintf(stderr, "\t-p\tpassword for encrypted files\n");
+ fprintf(stderr, "\t-p\tpassword\n");
exit(1);
}
@@ -103,12 +113,19 @@ static void showobject(int num, int gen)
if (pdf_isstream(xref, num, gen))
{
- printf("%d %d obj\n", num, gen);
- fz_debugobj(obj);
- printf("stream\n");
- showstream(num, gen);
- printf("endstream\n");
- printf("endobj\n\n");
+ if (showraw)
+ {
+ showstream(num, gen);
+ }
+ else
+ {
+ printf("%d %d obj\n", num, gen);
+ fz_debugobj(obj);
+ printf("stream\n");
+ showstream(num, gen);
+ printf("endstream\n");
+ printf("endobj\n\n");
+ }
}
else
{
@@ -122,7 +139,11 @@ static void showobject(int num, int gen)
int main(int argc, char **argv)
{
- char *password = "";
+ char *password = NULL; /* don't throw errors if encrypted */
+ fz_error error;
+ fz_stream *file;
+ char *filename;
+ int fd;
int c;
while ((c = fz_getopt(argc, argv, "p:bc")) != -1)
@@ -132,16 +153,26 @@ int main(int argc, char **argv)
case 'p': password = fz_optarg; break;
case 'b': showbinary ++; break;
case 'c': showraw ++; break;
- default:
- showusage();
- break;
+ default: usage(); break;
}
}
if (fz_optind == argc)
- showusage();
+ usage();
- openxref(argv[fz_optind++], password, 0, 0);
+ /* Use newxref directly because we don't care about the page tree */
+ {
+ filename = argv[fz_optind++];
+ fd = open(filename, O_BINARY | O_RDONLY);
+ if (fd < 0)
+ return fz_throw("cannot open file '%s': %s", filename, strerror(errno));
+
+ file = fz_openfile(fd);
+ error = pdf_newxref(&xref, file, password);
+ if (error)
+ die(fz_rethrow(error, "cannot load document '%s'", filename));
+ fz_dropstream(file);
+ }
if (fz_optind == argc)
showtrailer();
@@ -157,7 +188,7 @@ int main(int argc, char **argv)
fz_optind++;
}
- closexref();
+ pdf_freexref(xref);
return 0;
}
diff --git a/apps/pdftool.c b/apps/pdftool.c
deleted file mode 100644
index 094eeaa5..00000000
--- a/apps/pdftool.c
+++ /dev/null
@@ -1,86 +0,0 @@
-#include "pdftool.h"
-
-char *basename = nil;
-pdf_xref *xref = nil;
-int pagecount = 0;
-static void (*cleanup)(void) = nil;
-
-void closexref(void);
-
-void die(fz_error error)
-{
- fz_catch(error, "aborting");
- if (cleanup)
- cleanup();
- closexref();
- exit(1);
-}
-
-void setcleanup(void (*func)(void))
-{
- cleanup = func;
-}
-
-void openxref(char *filename, char *password, int dieonbadpass, int loadpages)
-{
- fz_stream *file;
- fz_error error;
- int okay;
- int fd;
-
- basename = strrchr(filename, '/');
- if (!basename)
- basename = filename;
- else
- basename++;
-
- fd = open(filename, O_BINARY | O_RDONLY, 0666);
- if (fd < 0)
- die(fz_throw("cannot open file '%s': %s", filename, strerror(errno)));
-
- file = fz_openfile(fd);
- error = pdf_openxref(&xref, file);
- if (error)
- die(fz_rethrow(error, "cannot open document '%s'", basename));
- fz_dropstream(file);
-
- if (pdf_needspassword(xref))
- {
- okay = pdf_authenticatepassword(xref, password);
- if (!okay && !dieonbadpass)
- fz_warn("invalid password, attempting to continue.");
- else if (!okay && dieonbadpass)
- die(fz_throw("invalid password"));
- }
-
- if (loadpages)
- {
- error = pdf_loadpagetree(xref);
- if (error)
- die(fz_rethrow(error, "cannot load page tree"));
- pagecount = pdf_getpagecount(xref);
- }
-}
-
-void flushxref(void)
-{
- if (xref)
- {
- pdf_flushxref(xref, 0);
- }
-}
-
-void closexref(void)
-{
- if (cleanup)
- cleanup();
-
- if (xref)
- {
- pdf_closexref(xref);
- xref = nil;
- }
-
- basename = nil;
-}
-
diff --git a/apps/pdftool.h b/apps/pdftool.h
deleted file mode 100644
index 1560c225..00000000
--- a/apps/pdftool.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#include "fitz.h"
-#include "mupdf.h"
-
-extern char *basename;
-extern pdf_xref *xref;
-extern int pagecount;
-
-void die(fz_error error);
-void setcleanup(void (*cleanup)(void));
-
-void openxref(char *filename, char *password, int dieonbadpass, int loadpages);
-void flushxref(void);
-void closexref(void);
-