diff options
-rw-r--r-- | Makefile | 31 | ||||
-rw-r--r-- | apps/pdfapp.c | 6 | ||||
-rw-r--r-- | apps/pdfclean.c | 342 | ||||
-rw-r--r-- | apps/pdfdraw.c | 560 | ||||
-rw-r--r-- | apps/pdfextract.c | 131 | ||||
-rw-r--r-- | apps/pdfinfo.c | 32 | ||||
-rw-r--r-- | apps/pdfshow.c | 65 | ||||
-rw-r--r-- | apps/pdftool.c | 86 | ||||
-rw-r--r-- | apps/pdftool.h | 14 | ||||
-rw-r--r-- | fitz/fitz.h | 4 | ||||
-rw-r--r-- | fitz/res_pixmap.c | 39 | ||||
-rw-r--r-- | mupdf/mupdf.h | 14 | ||||
-rw-r--r-- | mupdf/pdf_open.c | 668 | ||||
-rw-r--r-- | mupdf/pdf_page.c | 3 | ||||
-rw-r--r-- | mupdf/pdf_xref.c | 759 | ||||
-rw-r--r-- | win32/mupdf/mupdf.vcproj | 4 |
16 files changed, 1244 insertions, 1514 deletions
@@ -154,7 +154,6 @@ MUPDF_SRC := \ mupdf/pdf_interpret.c \ mupdf/pdf_lex.c \ mupdf/pdf_nametree.c \ - mupdf/pdf_open.c \ mupdf/pdf_outline.c \ mupdf/pdf_page.c \ mupdf/pdf_pagetree.c \ @@ -305,52 +304,46 @@ $(MUPDF_LIB): $(FITZ_OBJ) $(DRAW_OBJ) $(MUPDF_OBJ) $(CMAP_OBJ) $(FONT_OBJ) APPS = $(PDFSHOW_EXE) $(PDFCLEAN_EXE) $(PDFDRAW_EXE) $(PDFEXTRACT_EXE) $(PDFINFO_EXE) $(PDFVIEW_EXE) -PDFAPP_HDR = apps/pdfapp.h -PDFTOOL_HDR = apps/pdftool.h - $(OBJDIR)/%.o: apps/%.c $(CC_CMD) -PDFSHOW_SRC=apps/pdfshow.c apps/pdftool.c +PDFSHOW_SRC=apps/pdfshow.c PDFSHOW_OBJ=$(PDFSHOW_SRC:apps/%.c=$(OBJDIR)/%.o) PDFSHOW_EXE=$(OBJDIR)/pdfshow - -$(PDFSHOW_OBJ): $(MUPDF_HDR) $(PDFTOOL_HDR) +$(PDFSHOW_OBJ): $(MUPDF_HDR) $(PDFSHOW_EXE): $(PDFSHOW_OBJ) $(MUPDF_LIB) $(THIRD_LIBS) $(LD_CMD) -PDFCLEAN_SRC=apps/pdfclean.c apps/pdftool.c +PDFCLEAN_SRC=apps/pdfclean.c PDFCLEAN_OBJ=$(PDFCLEAN_SRC:apps/%.c=$(OBJDIR)/%.o) PDFCLEAN_EXE=$(OBJDIR)/pdfclean - -$(PDFCLEAN_OBJ): $(MUPDF_HDR) $(PDFTOOL_HDR) +$(PDFCLEAN_OBJ): $(MUPDF_HDR) $(PDFCLEAN_EXE): $(PDFCLEAN_OBJ) $(MUPDF_LIB) $(THIRD_LIBS) $(LD_CMD) -PDFDRAW_SRC=apps/pdfdraw.c apps/pdftool.c +PDFDRAW_SRC=apps/pdfdraw.c PDFDRAW_OBJ=$(PDFDRAW_SRC:apps/%.c=$(OBJDIR)/%.o) PDFDRAW_EXE=$(OBJDIR)/pdfdraw - -$(PDFDRAW_OBJ): $(MUPDF_HDR) $(PDFTOOL_HDR) +$(PDFDRAW_OBJ): $(MUPDF_HDR) $(PDFDRAW_EXE): $(PDFDRAW_OBJ) $(MUPDF_LIB) $(THIRD_LIBS) $(LD_CMD) -PDFEXTRACT_SRC=apps/pdfextract.c apps/pdftool.c +PDFEXTRACT_SRC=apps/pdfextract.c PDFEXTRACT_OBJ=$(PDFEXTRACT_SRC:apps/%.c=$(OBJDIR)/%.o) PDFEXTRACT_EXE=$(OBJDIR)/pdfextract - -$(PDFEXTRACT_OBJ): $(MUPDF_HDR) $(PDFTOOL_HDR) +$(PDFEXTRACT_OBJ): $(MUPDF_HDR) $(PDFEXTRACT_EXE): $(PDFEXTRACT_OBJ) $(MUPDF_LIB) $(THIRD_LIBS) $(LD_CMD) -PDFINFO_SRC=apps/pdfinfo.c apps/pdftool.c +PDFINFO_SRC=apps/pdfinfo.c PDFINFO_OBJ=$(PDFINFO_SRC:apps/%.c=$(OBJDIR)/%.o) PDFINFO_EXE=$(OBJDIR)/pdfinfo - -$(PDFINFO_OBJ): $(MUPDF_HDR) $(PDFTOOL_HDR) +$(PDFINFO_OBJ): $(MUPDF_HDR) $(PDFINFO_EXE): $(PDFINFO_OBJ) $(MUPDF_LIB) $(THIRD_LIBS) $(LD_CMD) +PDFAPP_HDR = apps/pdfapp.h + X11VIEW_SRC=apps/x11_main.c apps/x11_image.c apps/pdfapp.c X11VIEW_OBJ=$(X11VIEW_SRC:apps/%.c=$(OBJDIR)/%.o) X11VIEW_EXE=$(OBJDIR)/mupdf diff --git a/apps/pdfapp.c b/apps/pdfapp.c index c5930129..95885e2a 100644 --- a/apps/pdfapp.c +++ b/apps/pdfapp.c @@ -103,7 +103,7 @@ void pdfapp_open(pdfapp_t *app, char *filename, int fd) */ file = fz_openfile(fd); - error = pdf_openxref(&app->xref, file); + error = pdf_newxref(&app->xref, file, NULL); if (error) pdfapp_error(app, fz_rethrow(error, "cannot open document '%s'", filename)); fz_dropstream(file); @@ -195,7 +195,7 @@ void pdfapp_close(pdfapp_t *app) pdf_freestore(app->xref->store); app->xref->store = nil; - pdf_closexref(app->xref); + pdf_freexref(app->xref); app->xref = nil; } } @@ -255,8 +255,6 @@ static void pdfapp_showpage(pdfapp_t *app, int loadpage, int drawpage) pdf_freepage(app->page); app->page = nil; - pdf_flushxref(app->xref, 0); - obj = pdf_getpageobject(app->xref, app->pageno); error = pdf_loadpage(&app->page, app->xref, obj); if (error) diff --git a/apps/pdfclean.c b/apps/pdfclean.c index ee6ca722..0ecbb98e 100644 --- a/apps/pdfclean.c +++ b/apps/pdfclean.c @@ -4,10 +4,13 @@ * Rewrite PDF with pretty printed objects. * Garbage collect unreachable objects. * Inflate compressed streams. - * Encrypt output. + * Create subset documents. + * + * TODO: linearize document for fast web view */ -#include "pdftool.h" +#include "fitz.h" +#include "mupdf.h" static FILE *out = NULL; @@ -20,6 +23,29 @@ static pdf_xrefentry *oldxreflist = NULL; static int dogarbage = 0; static int doexpand = 0; +static pdf_xref *xref = NULL; + +void die(fz_error error) +{ + fz_catch(error, "aborting"); + if (xref) + pdf_freexref(xref); + exit(1); +} + +static void usage(void) +{ + fprintf(stderr, + "usage: pdfclean [options] input.pdf [output.pdf] [pages]\n" + "\t-p -\tpassword\n" + "\t-g\tgarbage collect unused objects\n" + "\t-gg\tin addition to -g compact xref table\n" + "\t-ggg\tin addition to -gg merge duplicate objects\n" + "\t-x\texpand compressed streams\n" + "\tpages\tcomma separated list of ranges\n"); + exit(1); +} + /* * Garbage collect objects not reachable from the trailer. */ @@ -67,6 +93,10 @@ static void sweepref(fz_obj *obj) sweepobj(fz_resolveindirect(obj)); } +/* + * Renumber objects to compact the xref table + */ + static void renumberobj(fz_obj *obj) { int i; @@ -109,6 +139,53 @@ static void renumberobj(fz_obj *obj) } } +static void renumberxref(void) +{ + int num, newnum; + + newnumlist = fz_malloc(xref->len * sizeof(int)); + oldxreflist = fz_malloc(xref->len * sizeof(pdf_xrefentry)); + for (num = 0; num < xref->len; num++) + { + newnumlist[num] = -1; + oldxreflist[num] = xref->table[num]; + } + + newnum = 1; + for (num = 0; num < xref->len; num++) + { + if (xref->table[num].type == 'f') + uselist[num] = 0; + if (uselist[num]) + newnumlist[num] = newnum++; + } + + renumberobj(xref->trailer); + for (num = 0; num < xref->len; num++) + renumberobj(xref->table[num].obj); + + for (num = 0; num < xref->len; num++) + uselist[num] = 0; + + for (num = 0; num < xref->len; num++) + { + if (newnumlist[num] >= 0) + { + xref->table[newnumlist[num]] = oldxreflist[num]; + uselist[newnumlist[num]] = 1; + } + } + + fz_free(oldxreflist); + fz_free(newnumlist); + + xref->len = newnum; +} + +/* + * Scan and remove duplicate objects (slow) + */ + static void removeduplicateobjs(void) { int num, other; @@ -142,6 +219,99 @@ static void removeduplicateobjs(void) fz_free(newnumlist); } +/* + * Recreate page tree to only retain specified pages. + */ + +static void retainpages(int argc, char **argv) +{ + fz_obj *root, *pages, *kids; + int count; + + /* Snatch pages entry from root dict */ + root = fz_dictgets(xref->trailer, "Root"); + pages = fz_keepobj(fz_dictgets(root, "Pages")); + + /* Then empty the root dict */ + while (fz_dictlen(root) > 0) + { + fz_obj *key = fz_dictgetkey(root, 0); + fz_dictdel(root, key); + } + + /* And only retain pages and type entries */ + fz_dictputs(root, "Pages", pages); + fz_dictputs(root, "Type", fz_newname("Catalog")); + fz_dropobj(pages); + + /* Create a new kids array too add into pages dict + * since each element must be replaced to point to + * a retained page */ + kids = fz_newarray(1); + count = 0; + + /* Retain pages specified */ + while (argc - fz_optind) + { + int page, spage, epage; + char *spec, *dash; + char *pagelist = argv[fz_optind]; + + spec = fz_strsep(&pagelist, ","); + while (spec) + { + dash = strchr(spec, '-'); + + if (dash == spec) + spage = epage = 1; + else + spage = epage = atoi(spec); + + if (dash) + { + if (strlen(dash) > 1) + epage = atoi(dash + 1); + else + epage = pdf_getpagecount(xref); + } + + if (spage > epage) + page = spage, spage = epage, epage = page; + + if (spage < 1) + spage = 1; + if (epage > pdf_getpagecount(xref)) + epage = pdf_getpagecount(xref); + + for (page = spage; page <= epage; page++) + { + fz_obj *pageobj = pdf_getpageobject(xref, page); + fz_obj *pageref = pdf_getpageref(xref, page); + + /* Update parent reference */ + fz_dictputs(pageobj, "Parent", pages); + + /* Store page object in new kids array */ + fz_arraypush(kids, pageref); + count++; + + fz_dropobj(pageref); + } + + spec = fz_strsep(&pagelist, ","); + } + + fz_optind++; + } + + /* Update page count and kids array */ + fz_dictputs(pages, "Count", fz_newint(count)); + fz_dictputs(pages, "Kids", kids); +} + +/* + * Make sure we have loaded objects from object streams. + */ static void preloadobjstms(void) { @@ -161,6 +331,10 @@ static void preloadobjstms(void) } } +/* + * Save streams and objects to the output + */ + static void copystream(fz_obj *obj, int num, int gen) { fz_error error; @@ -208,7 +382,7 @@ static void expandstream(fz_obj *obj, int num, int gen) fz_dropbuffer(buf); } -static void saveobject(int num, int gen) +static void writeobject(int num, int gen) { fz_error error; fz_obj *obj; @@ -253,7 +427,7 @@ static void saveobject(int num, int gen) fz_dropobj(obj); } -static void savexref(void) +static void writexref(void) { fz_obj *trailer; fz_obj *obj; @@ -297,148 +471,7 @@ static void savexref(void) fprintf(out, "startxref\n%d\n%%%%EOF\n", startxref); } -static void cleanusage(void) -{ - fprintf(stderr, - "usage: pdfclean [options] input.pdf [outfile.pdf] [pages]\n" - "\t-p -\tpassword for decryption\n" - "\t-g\tgarbage collect unused objects\n" - "\t-gg\tin addition to -g xref is compacted\n" - "\t-ggg\tin addition to -gg identical objects are garbage collected\n" - "\t-x\texpand compressed streams\n"); - exit(1); -} - -static void retainpages(int argc, char **argv) -{ - fz_obj *root, *pages, *kids; - int count; - - /* Snatch pages entry from root dict */ - root = fz_dictgets(xref->trailer, "Root"); - pages = fz_keepobj(fz_dictgets(root, "Pages")); - - /* Then empty the root dict */ - while (fz_dictlen(root) > 0) - { - fz_obj *key = fz_dictgetkey(root, 0); - fz_dictdel(root, key); - } - - /* And only retain pages and type entries */ - fz_dictputs(root, "Pages", pages); - fz_dictputs(root, "Type", fz_newname("Catalog")); - fz_dropobj(pages); - - /* Create a new kids array too add into pages dict - * since each element must be replaced to point to - * a retained page */ - kids = fz_newarray(1); - count = 0; - - /* Retain pages specified */ - while (argc - fz_optind) - { - int page, spage, epage; - char *spec, *dash; - char *pagelist = argv[fz_optind]; - - spec = fz_strsep(&pagelist, ","); - while (spec) - { - dash = strchr(spec, '-'); - - if (dash == spec) - spage = epage = 1; - else - spage = epage = atoi(spec); - - if (dash) - { - if (strlen(dash) > 1) - epage = atoi(dash + 1); - else - epage = pagecount; - } - - if (spage > epage) - page = spage, spage = epage, epage = page; - - if (spage < 1) - spage = 1; - if (epage > pagecount) - epage = pagecount; - - for (page = spage; page <= epage; page++) - { - fz_obj *pageobj = pdf_getpageobject(xref, page); - fz_obj *pageref = pdf_getpageref(xref, page); - - /* Update parent reference */ - fz_dictputs(pageobj, "Parent", pages); - - /* Store page object in new kids array */ - fz_arraypush(kids, pageref); - count++; - - fz_dropobj(pageref); - } - - spec = fz_strsep(&pagelist, ","); - } - - fz_optind++; - } - - /* Update page count and kids array */ - fz_dictputs(pages, "Count", fz_newint(count)); - fz_dictputs(pages, "Kids", kids); -} - -static void renumberxref(void) -{ - int num, newnum; - - newnumlist = fz_malloc(xref->len * sizeof(int)); - oldxreflist = fz_malloc(xref->len * sizeof(pdf_xrefentry)); - for (num = 0; num < xref->len; num++) - { - newnumlist[num] = -1; - oldxreflist[num] = xref->table[num]; - } - - newnum = 1; - for (num = 0; num < xref->len; num++) - { - if (xref->table[num].type == 'f') - uselist[num] = 0; - if (uselist[num]) - newnumlist[num] = newnum++; - } - - renumberobj(xref->trailer); - for (num = 0; num < xref->len; num++) - renumberobj(xref->table[num].obj); - - for (num = 0; num < xref->len; num++) - uselist[num] = 0; - - for (num = 0; num < xref->len; num++) - { - if (newnumlist[num] >= 0) - { - xref->table[newnumlist[num]] = oldxreflist[num]; - uselist[newnumlist[num]] = 1; - } - } - - fz_free(oldxreflist); - fz_free(newnumlist); - - xref->len = newnum; -} - -static void outputpdf(void) +static void writepdf(void) { int lastfree; int num; @@ -461,7 +494,7 @@ static void outputpdf(void) if (xref->table[num].type == 'n' || xref->table[num].type == 'o') { ofslist[num] = ftell(out); - saveobject(num, genlist[num]); + writeobject(num, genlist[num]); } } @@ -477,11 +510,12 @@ static void outputpdf(void) } } - savexref(); + writexref(); } int main(int argc, char **argv) { + fz_error error; char *infile; char *outfile = "out.pdf"; char *password = ""; @@ -495,12 +529,12 @@ int main(int argc, char **argv) case 'p': password = fz_optarg; break; case 'g': dogarbage ++; break; case 'x': doexpand ++; break; - default: cleanusage(); break; + default: usage(); break; } } if (argc - fz_optind < 1) - cleanusage(); + usage(); infile = argv[fz_optind++]; @@ -514,7 +548,9 @@ int main(int argc, char **argv) if (argc - fz_optind > 0) subset = 1; - openxref(infile, password, 0, subset); + error = pdf_openxref(&xref, infile, password); + if (error) + die(fz_rethrow(error, "cannot open input file '%s'", infile)); out = fopen(outfile, "wb"); if (!out) @@ -552,9 +588,9 @@ int main(int argc, char **argv) if (dogarbage >= 2) renumberxref(); - outputpdf(); + writepdf(); - closexref(); + pdf_freexref(xref); return 0; } diff --git a/apps/pdfdraw.c b/apps/pdfdraw.c index 3ec51631..3c645d86 100644 --- a/apps/pdfdraw.c +++ b/apps/pdfdraw.c @@ -1,407 +1,156 @@ /* - * pdfdraw: - * Draw pages to PPM bitmaps. - * Dump parsed display list as XML. - * Dump text content as UTF-8. - * Benchmark rendering speed. + * pdfdraw -- command line tool for drawing pdf documents */ -#include "pdftool.h" +#include "fitz.h" +#include "mupdf.h" #define MAXBANDSIZE (3 * 1024 * 1024) -#ifdef _MSC_VER -#include <winsock2.h> -#else -#include <sys/time.h> -#endif +char *output = NULL; +float resolution = 72; -enum { DRAWPNM, DRAWPGM, DRAWTXT, DRAWXML }; +int showxml = 0; +int showtext = 0; +int showtime = 0; +int savealpha = 0; -struct benchmark -{ - int pages; - long min; - int minpage; - long avg; - long max; - int maxpage; -}; - -static fz_glyphcache *drawcache = nil; -static int drawmode = DRAWPNM; -static char *drawpattern = nil; -static pdf_page *drawpage = nil; -static float drawzoom = 1; -static int drawrotate = 0; -static int drawbands = 1; -static int drawcount = 0; -static int benchmark = 0; -static int checksum = 0; - -static void local_cleanup(void) -{ - if (xref && xref->store) - { - pdf_freestore(xref->store); - xref->store = nil; - } - if (drawcache) - { - fz_freeglyphcache(drawcache); - drawcache = nil; - } -} +fz_glyphcache *glyphcache; +char *filename; -static void drawusage(void) +static void die(fz_error error) { - fprintf(stderr, - "usage: pdfdraw [options] [file.pdf pages ... ]\n" - "\t-p -\tpassword for decryption\n" - "\t-o -\tpattern (%%d for page number) for output file\n" - "\t-r -\tresolution in dpi\n" - "\t-m\tprint benchmark results\n" - "\t-g\trender grayscale\n" - "\t-s\tprint MD5 checksum of page pixel data\n" - "\t-t\ttext extraction mode\n" - "\t-x\txml trace mode\n" - "example:\n" - "\tpdfdraw -o output%%03d.pnm input.pdf 1-3,5,9-\n"); + fz_catch(error, "aborting"); exit(1); } -static void gettime(long *time_) -{ - struct timeval tv; - - if (gettimeofday(&tv, NULL) < 0) - abort(); - - *time_ = tv.tv_sec * 1000000 + tv.tv_usec; -} - -static void drawloadpage(int pagenum, struct benchmark *loadtimes) +static void usage(void) { - fz_error error; - fz_obj *pageobj; - long start; - long end; - long elapsed; - - fprintf(stdout, "draw %s:%03d ", basename, pagenum); - if (benchmark && loadtimes) - { - fflush(stdout); - gettime(&start); - } - - pageobj = pdf_getpageobject(xref, pagenum); - error = pdf_loadpage(&drawpage, xref, pageobj); - if (error) - die(fz_rethrow(error, "cannot load page %d (%d %d R) in PDF file '%s'", pagenum, fz_tonum(pageobj), fz_togen(pageobj), basename)); - - if (benchmark && loadtimes) - { - gettime(&end); - elapsed = end - start; - - if (elapsed < loadtimes->min) - { - loadtimes->min = elapsed; - loadtimes->minpage = pagenum; - } - if (elapsed > loadtimes->max) - { - loadtimes->max = elapsed; - loadtimes->maxpage = pagenum; - } - loadtimes->avg += elapsed; - loadtimes->pages++; - } - - if (benchmark) - fflush(stdout); + fprintf(stderr, + "usage: pdfdraw [options] input.pdf [pages]\n" + "\t-o -\toutput filename (%%d for page number)\n" + "\t\tsupported formats: pgm, ppm, pam, png\n" + "\t-p -\tpassword\n" + "\t-r -\tresolution in dpi (default: 72)\n" + "\t-x\tshow display list as xml\n" + "\t-t\textract text (-tt for xml)\n" + "\t-a\tsave alpha channel (only pam and png)\n" + "\tpages\tcomma separated list of ranges\n"); + exit(1); } -static void drawfreepage(void) +static int isrange(char *s) { - pdf_freepage(drawpage); - drawpage = nil; - - flushxref(); - - /* Flush resources between pages. - * TODO: should check memory usage before deciding to do this. - */ - if (xref && xref->store) + while (*s) { - /* pdf_debugstore(xref->store); */ - pdf_agestoreditems(xref->store); - pdf_evictageditems(xref->store); - fflush(stdout); + if ((*s < '0' || *s > '9') && *s != '-' && *s != ',') + return 0; + s++; } + return 1; } -static void drawpnm(int pagenum, struct benchmark *loadtimes, struct benchmark *drawtimes, int greyscale) +static void drawpage(pdf_xref *xref, int pagenum) { - static int fd = -1; fz_error error; + fz_obj *pageobj; + pdf_page *page; + fz_displaylist *list; + fz_device *dev; fz_matrix ctm; fz_bbox bbox; + fz_colorspace *colorspace; fz_pixmap *pix; - char name[256]; - char pnmhdr[256]; - int i, x, y, w, h, b, bh; - long start; - long end; - long elapsed; - fz_md5 digest; - int numbands = drawbands; - fz_displaylist *list = nil; - fz_device *dev; - - if (checksum) - fz_md5init(&digest); - - drawloadpage(pagenum, loadtimes); - - if (benchmark) - gettime(&start); - - ctm = fz_identity; - ctm = fz_concat(ctm, fz_translate(0, -drawpage->mediabox.y1)); - ctm = fz_concat(ctm, fz_scale(drawzoom, -drawzoom)); - ctm = fz_concat(ctm, fz_rotate(drawrotate + drawpage->rotate)); - - bbox = fz_roundrect(fz_transformrect(ctm, drawpage->mediabox)); - w = bbox.x1 - bbox.x0; - h = bbox.y1 - bbox.y0; - - if (w * h > MAXBANDSIZE) - numbands = (w * h) / MAXBANDSIZE; - if (numbands < 1) - numbands = 1; - - bh = h / numbands; + char buf[512]; + float zoom; - if (drawpattern) - { - if (strchr(drawpattern, '%') || fd < 0) - { - sprintf(name, drawpattern, drawcount++); - fd = open(name, O_BINARY|O_WRONLY|O_CREAT|O_TRUNC, 0666); - if (fd < 0) - die(fz_throw("ioerror: could not create raster file '%s'", name)); - } + pageobj = pdf_getpageobject(xref, pagenum); + error = pdf_loadpage(&page, xref, pageobj); + if (error) + die(fz_rethrow(error, "cannot load page %d in file '%s'", pagenum, filename)); - if (greyscale) - { - sprintf(pnmhdr, "P5\n%d %d\n255\n", w, h); - } - else - { - sprintf(pnmhdr, "P6\n%d %d\n255\n", w, h); - } - write(fd, pnmhdr, strlen(pnmhdr)); - } + list = fz_newdisplaylist(); - pix = fz_newpixmap((greyscale ? pdf_devicegray : pdf_devicergb), bbox.x0, bbox.y0, w, bh); + dev = fz_newlistdevice(list); + error = pdf_runpage(xref, page, dev, fz_identity); + if (error) + die(fz_rethrow(error, "cannot draw page %d in file '%s'", pagenum, filename)); + fz_freedevice(dev); - if (numbands > 1) + if (showxml) { - fprintf(stdout, "creating display list for banded rendering\n"); - list = fz_newdisplaylist(); - dev = fz_newlistdevice(list); - error = pdf_runpage(xref, drawpage, dev, fz_identity); - if (error) - die(fz_rethrow(error, "cannot draw page %d in PDF file '%s'", pagenum, basename)); + dev = fz_newtracedevice(); + printf("<page number=\"%d\">\n", pagenum); + fz_executedisplaylist(list, dev, fz_identity); + printf("</page>\n"); fz_freedevice(dev); } - for (b = 0; b < numbands; b++) + if (showtext) { - fz_clearpixmap(pix, 0xFF); - - dev = fz_newdrawdevice(drawcache, pix); - - if (numbands > 1) - { - fprintf(stdout, "drawing band %d / %d\n", b + 1, numbands); - fz_executedisplaylist(list, dev, ctm); - } - else - { - error = pdf_runpage(xref, drawpage, dev, ctm); - if (error) - die(fz_rethrow(error, "cannot draw page %d in PDF file '%s'", pagenum, basename)); - } - + fz_textspan *text = fz_newtextspan(); + dev = fz_newtextdevice(text); + fz_executedisplaylist(list, dev, fz_identity); fz_freedevice(dev); - - if (checksum) - fz_md5update(&digest, pix->samples, pix->h * pix->w * pix->n); - if (drawpattern) - { - unsigned char *src = pix->samples; - if (greyscale) - { - for (y = pix->h; y > 0; y--) - { - unsigned char *dst = src; - - for (x = pix->w; x > 0; x--) - { - *dst++ = *src++; - src++; - } - dst -= pix->w; - - write(fd, dst, pix->w); - } - } - else - { - for (y = pix->h; y > 0; y--) - { - unsigned char *dst = src; - - for (x = pix->w; x > 0; x--) - { - *dst++ = *src++; - *dst++ = *src++; - *dst++ = *src++; - src++; - } - dst -= pix->w * 3; - - write(fd, dst, pix->w * 3); - } - } - } - - pix->y += bh; - if (pix->y + pix->h > bbox.y1) - pix->h = bbox.y1 - pix->y; + printf("[Page %d]\n", pagenum); + if (showtext > 1) + fz_debugtextspanxml(text); + else + fz_debugtextspan(text); + printf("\n"); + fz_freetextspan(text); } - if (list) - fz_freedisplaylist(list); - - fz_droppixmap(pix); - - if (checksum) + if (output || showtime) { - unsigned char buf[16]; - fz_md5final(&digest, buf); - for (i = 0; i < 16; i++) - fprintf(stdout, "%02x", buf[i]); - fprintf(stdout, " "); - } - - if (drawpattern && strchr(drawpattern, '%')) - close(fd); - - drawfreepage(); - - if (benchmark) - { - gettime(&end); - elapsed = end - start; - - if (elapsed < drawtimes->min) - { - drawtimes->min = elapsed; - drawtimes->minpage = pagenum; - } - if (elapsed > drawtimes->max) - { - drawtimes->max = elapsed; - drawtimes->maxpage = pagenum; - } - drawtimes->avg += elapsed; - drawtimes->pages++; - - fprintf(stdout, "time %.3fs", - elapsed / 1000000.0); - } + sprintf(buf, output, pagenum); - fprintf(stdout, "\n"); -} + zoom = resolution / 72; + ctm = fz_translate(0, -page->mediabox.y1); + ctm = fz_concat(ctm, fz_scale(zoom, -zoom)); + ctm = fz_concat(ctm, fz_rotate(page->rotate)); + bbox = fz_roundrect(fz_transformrect(ctm, page->mediabox)); -static void drawtxt(int pagenum, struct benchmark *loadtimes) -{ - fz_error error; - fz_matrix ctm; - fz_textspan *text; - fz_device *dev; + colorspace = pdf_devicergb; + if (strstr(output, ".pgm")) + colorspace = pdf_devicegray; - drawloadpage(pagenum, loadtimes); - - ctm = fz_identity; - - text = fz_newtextspan(); - dev = fz_newtextdevice(text); - - error = pdf_runpage(xref, drawpage, dev, ctm); - if (error) - die(fz_rethrow(error, "cannot extract text from page %d in PDF file '%s'", pagenum, basename)); + /* TODO: banded rendering and multi-page ppm */ - fz_freedevice(dev); - - printf("[Page %d]\n", pagenum); - fz_debugtextspan(text); - printf("\n"); - - fz_freetextspan(text); - - drawfreepage(); -} - -static void drawxml(int pagenum) -{ - fz_error error; - fz_obj *pageobj; - fz_matrix ctm; - fz_device *dev; + pix = fz_newpixmap(colorspace, bbox.x0, bbox.y0, bbox.x1, bbox.y1); - pageobj = pdf_getpageobject(xref, pagenum); - error = pdf_loadpage(&drawpage, xref, pageobj); - if (error) - die(fz_rethrow(error, "cannot load page %d (%d %d R) from PDF file '%s'", pagenum, fz_tonum(pageobj), fz_togen(pageobj), basename)); + if (savealpha) + fz_clearpixmap(pix, 0x00); + else + fz_clearpixmap(pix, 0xff); - ctm = fz_identity; + dev = fz_newdrawdevice(glyphcache, pix); + fz_executedisplaylist(list, dev, ctm); + fz_freedevice(dev); - dev = fz_newtracedevice(); - printf("<?xml version=\"1.0\"?>\n"); - printf("<page number=\"%d\">\n", pagenum); + if (strstr(output, ".pgm") || strstr(output, ".ppm") || strstr(output, ".pnm")) + fz_writepnm(pix, buf); + else if (strstr(output, ".pam")) + fz_writepam(pix, buf, savealpha); + else if (strstr(output, ".png")) + fz_writepng(pix, buf, savealpha); - error = pdf_runpage(xref, drawpage, dev, ctm); - if (error) - die(fz_rethrow(error, "cannot display page %d in PDF file '%s' as XML", pagenum, basename)); + fz_droppixmap(pix); + } - fz_freedevice(dev); + fz_freedisplaylist(list); + pdf_freepage(page); - printf("</page>\n"); + pdf_agestoreditems(xref->store); + pdf_evictageditems(xref->store); } -static void drawpages(char *pagelist) +static void drawrange(pdf_xref *xref, char *range) { int page, spage, epage; char *spec, *dash; - struct benchmark loadtimes, drawtimes; - - if (!xref) - drawusage(); - - if (benchmark) - { - memset(&loadtimes, 0x00, sizeof (loadtimes)); - loadtimes.min = 1<<31; - memset(&drawtimes, 0x00, sizeof (drawtimes)); - drawtimes.min = 1<<31; - } - spec = fz_strsep(&pagelist, ","); + spec = fz_strsep(&range, ","); while (spec) { dash = strchr(spec, '-'); @@ -416,109 +165,78 @@ static void drawpages(char *pagelist) if (strlen(dash) > 1) epage = atoi(dash + 1); else - epage = pagecount; + epage = pdf_getpagecount(xref); } - if (spage > epage) - page = spage, spage = epage, epage = page; - - if (spage < 1) - spage = 1; - if (epage > pagecount) - epage = pagecount; - - for (page = spage; page <= epage; page++) - { - switch (drawmode) - { - case DRAWPNM: drawpnm(page, &loadtimes, &drawtimes, 0); break; - case DRAWPGM: drawpnm(page, &loadtimes, &drawtimes, 1); break; - case DRAWTXT: drawtxt(page, &loadtimes); break; - case DRAWXML: drawxml(page); break; - } - } + spage = CLAMP(spage, 1, pdf_getpagecount(xref)); + epage = CLAMP(epage, 1, pdf_getpagecount(xref)); - spec = fz_strsep(&pagelist, ","); - } + if (spage < epage) + for (page = spage; page <= epage; page++) + drawpage(xref, page); + else + for (page = spage; page >= epage; page--) + drawpage(xref, page); - if (benchmark) - { - if (loadtimes.pages > 0) - { - loadtimes.avg /= loadtimes.pages; - drawtimes.avg /= drawtimes.pages; - - printf("benchmark-load: min: %6.3fs (page % 4d), avg: %6.3fs, max: %6.3fs (page % 4d)\n", - loadtimes.min / 1000000.0, loadtimes.minpage, - loadtimes.avg / 1000000.0, - loadtimes.max / 1000000.0, loadtimes.maxpage); - printf("benchmark-draw: min: %6.3fs (page % 4d), avg: %6.3fs, max: %6.3fs (page % 4d)\n", - drawtimes.min / 1000000.0, drawtimes.minpage, - drawtimes.avg / 1000000.0, - drawtimes.max / 1000000.0, drawtimes.maxpage); - } + spec = fz_strsep(&range, ","); } } int main(int argc, char **argv) { char *password = ""; + pdf_xref *xref; + fz_error error; int c; - enum { NO_FILE_OPENED, NO_PAGES_DRAWN, DREW_PAGES } state; fz_accelerate(); - while ((c = fz_getopt(argc, argv, "b:p:o:r:gtxms")) != -1) + while ((c = fz_getopt(argc, argv, "o:p:r:amtx")) != -1) { switch (c) { - case 'b': drawbands = atoi(fz_optarg); break; + case 'o': output = fz_optarg; break; case 'p': password = fz_optarg; break; - case 'o': drawpattern = fz_optarg; break; - case 'r': drawzoom = atof(fz_optarg) / 72; break; - case 'g': drawmode = DRAWPGM; break; - case 't': drawmode = DRAWTXT; break; - case 'x': drawmode = DRAWXML; break; - case 'm': benchmark = 1; break; - case 's': checksum = 1; break; - default: - drawusage(); - break; + case 'r': resolution = atof(fz_optarg) / 72; break; + case 'a': savealpha = 1; break; + case 'm': showtime++; break; + case 't': showtext++; break; + case 'x': showxml++; break; + default: usage(); break; } } if (fz_optind == argc) - drawusage(); + usage(); - setcleanup(local_cleanup); + if (showxml) + printf("<?xml version=\"1.0\"?>\n"); + + glyphcache = fz_newglyphcache(); - state = NO_FILE_OPENED; while (fz_optind < argc) { - if (strstr(argv[fz_optind], ".pdf") || strstr(argv[fz_optind], ".PDF")) - { - if (state == NO_PAGES_DRAWN) - drawpages("1-"); + filename = argv[fz_optind++]; - closexref(); + error = pdf_openxref(&xref, filename, password); + if (error) + die(fz_rethrow(error, "cannot open document: %s", filename)); - drawcache = fz_newglyphcache(); + if (showxml) + printf("<document name=\"%s\">\n", filename); - openxref(argv[fz_optind], password, 0, 1); - state = NO_PAGES_DRAWN; - } - else - { - drawpages(argv[fz_optind]); - state = DREW_PAGES; - } - fz_optind++; - } + if (fz_optind == argc || !isrange(argv[fz_optind])) + drawrange(xref, "1-"); + if (fz_optind < argc && isrange(argv[fz_optind])) + drawrange(xref, argv[fz_optind++]); - if (state == NO_PAGES_DRAWN) - drawpages("1-"); + if (showxml) + printf("</document>\n"); + + pdf_freexref(xref); + } - closexref(); + fz_freeglyphcache(glyphcache); return 0; } diff --git a/apps/pdfextract.c b/apps/pdfextract.c index 915bb026..cb0eabd2 100644 --- a/apps/pdfextract.c +++ b/apps/pdfextract.c @@ -2,12 +2,25 @@ * pdfextract -- the ultimate way to extract images and fonts from pdfs */ -#include "pdftool.h" +#include "fitz.h" +#include "mupdf.h" -static void showusage(void) +static pdf_xref *xref = NULL; +static int dorgb = 0; + +void die(fz_error error) +{ + fz_catch(error, "aborting"); + if (xref) + pdf_freexref(xref); + exit(1); +} + +static void usage(void) { - fprintf(stderr, "usage: pdfextract [-p password] <file> [object numbers]\n"); + fprintf(stderr, "usage: pdfextract [options] file.pdf [object numbers]\n"); fprintf(stderr, "\t-p\tpassword\n"); + fprintf(stderr, "\t-r\tconvert images to rgb\n"); exit(1); } @@ -23,89 +36,56 @@ static int isfontdesc(fz_obj *obj) return fz_isname(type) && !strcmp(fz_toname(type), "FontDescriptor"); } -static void saveimage(int num, int gen) +static void saveimage(int num) { - pdf_image *img = nil; - fz_obj *ref; fz_error error; + pdf_image *img; fz_pixmap *pix; + fz_obj *ref; char name[1024]; - FILE *f; - int x, y; - unsigned char *samples; - ref = fz_newindirect(num, gen, xref); + ref = fz_newindirect(num, 0, xref); + /* Hack! ...normally installed by pdf_page... */ xref->store = pdf_newstore(); + /* TODO: detect DCTD and save as jpeg */ + error = pdf_loadimage(&img, xref, nil, ref); if (error) die(error); pix = pdf_loadtile(img); - if (img->bpc == 1 && img->n == 0) + if (dorgb && img->colorspace && img->colorspace != pdf_devicergb) { fz_pixmap *temp; - temp = fz_newpixmap(pdf_devicergb, pix->x, pix->y, pix->w, pix->h); - - for (y = 0; y < pix->h; y++) - for (x = 0; x < pix->w; x++) - { - int pixel = y * pix->w + x; - temp->samples[pixel * temp->n + 0] = pix->samples[pixel]; - temp->samples[pixel * temp->n + 1] = pix->samples[pixel]; - temp->samples[pixel * temp->n + 2] = pix->samples[pixel]; - temp->samples[pixel * temp->n + 3] = 255; - } - + fz_convertpixmap(pix, temp); fz_droppixmap(pix); pix = temp; } - if (img->colorspace && strcmp(img->colorspace->name, "DeviceRGB")) + if (pix->n <= 4) { - fz_pixmap *temp; - temp = fz_newpixmap(pdf_devicergb, pix->x, pix->y, pix->w, pix->h); - fz_convertpixmap(pix, temp); - fz_droppixmap(pix); - pix = temp; + sprintf(name, "img-%04d.png", num); + printf("extracting image %s\n", name); + fz_writepng(pix, name, 0); + } + else + { + sprintf(name, "img-%04d.pam", num); + printf("extracting image %s\n", name); + fz_writepam(pix, name, 0); } - - sprintf(name, "img-%04d.pnm", num); - - f = fopen(name, "wb"); - if (f == NULL) - die(fz_throw("Error creating image file")); - - fprintf(f, "P6\n%d %d\n%d\n", img->w, img->h, 255); - - samples = pix->samples; - - for (y = 0; y < pix->h; y++) - for (x = 0; x < pix->w; x++) - { - unsigned char r, g, b; - - r = *(samples++); - g = *(samples++); - b = *(samples++); - samples++; - - fprintf(f, "%c%c%c", r, g, b); - } - - if (fclose(f) < 0) - die(fz_throw("Error closing image file")); fz_droppixmap(pix); + pdf_dropimage(img); + /* We never want to cache resources... */ pdf_freestore(xref->store); xref->store = nil; - pdf_dropimage(img); - fz_dropobj(ref); } @@ -171,6 +151,7 @@ static void savefont(fz_obj *dict, int num) die(error); sprintf(name, "%s-%04d.%s", fontname, num, ext); + printf("extracting font %s\n", name); f = fopen(name, "wb"); if (f == NULL) @@ -185,7 +166,7 @@ static void savefont(fz_obj *dict, int num) fz_dropbuffer(buf); } -static void showobject(int num, int gen) +static void showobject(int num) { fz_error error; fz_obj *obj; @@ -193,12 +174,12 @@ static void showobject(int num, int gen) if (!xref) die(fz_throw("no file specified")); - error = pdf_loadobject(&obj, xref, num, gen); + error = pdf_loadobject(&obj, xref, num, 0); if (error) die(error); if (isimage(obj)) - saveimage(num, gen); + saveimage(num); else if (isfontdesc(obj)) savefont(obj, num); @@ -207,36 +188,44 @@ static void showobject(int num, int gen) int main(int argc, char **argv) { + fz_error error; + char *infile; char *password = ""; int c, o; - while ((c = fz_getopt(argc, argv, "p:")) != -1) + while ((c = fz_getopt(argc, argv, "p:r")) != -1) { switch (c) { case 'p': password = fz_optarg; break; - default: - showusage(); - break; + case 'r': dorgb++; break; + default: usage(); break; } } if (fz_optind == argc) - showusage(); + usage(); - openxref(argv[fz_optind++], password, 0, 0); + infile = argv[fz_optind++]; + error = pdf_openxref(&xref, infile, password); + if (error) + die(fz_rethrow(error, "cannot open input file '%s'", infile)); if (fz_optind == argc) + { for (o = 0; o < xref->len; o++) - showobject(o, 0); + showobject(o); + } else - while (fz_optind < argc) { - showobject(atoi(argv[fz_optind]), 0); - fz_optind++; + while (fz_optind < argc) + { + showobject(atoi(argv[fz_optind])); + fz_optind++; + } } - closexref(); + pdf_freexref(xref); return 0; } diff --git a/apps/pdfinfo.c b/apps/pdfinfo.c index 26a7556f..af29c0d2 100644 --- a/apps/pdfinfo.c +++ b/apps/pdfinfo.c @@ -3,7 +3,22 @@ * Print information about the input pdf. */ -#include "pdftool.h" +#include "fitz.h" +#include "mupdf.h" + +pdf_xref *xref; +int pagecount; + +void closexref(void); + +void die(fz_error error) +{ + fz_catch(error, "aborting"); + closexref(); + exit(1); +} + +void openxref(char *filename, char *password, int dieonbadpass, int loadpages); enum { @@ -80,9 +95,14 @@ static int forms = 0; static struct info *psobj = nil; static int psobjs = 0; -static void local_cleanup(void) +void closexref(void) { int i; + if (xref) + { + pdf_freexref(xref); + xref = nil; + } if (dim) { @@ -922,6 +942,7 @@ showinfo(char *filename, int show, char *pagelist) int main(int argc, char **argv) { enum { NO_FILE_OPENED, NO_INFO_GATHERED, INFO_SHOWN } state; + fz_error error; char *filename = ""; char *password = ""; int show = ALL; @@ -947,8 +968,6 @@ int main(int argc, char **argv) if (fz_optind == argc) infousage(); - setcleanup(local_cleanup); - state = NO_FILE_OPENED; while (fz_optind < argc) { @@ -963,7 +982,10 @@ int main(int argc, char **argv) closexref(); filename = argv[fz_optind]; printf("%s:\n", filename); - openxref(filename, password, 0, 1); + error = pdf_openxref(&xref, filename, password); + if (error) + die(fz_rethrow(error, "cannot open input file '%s'", filename)); + pagecount = pdf_getpagecount(xref); showglobalinfo(); state = NO_INFO_GATHERED; } diff --git a/apps/pdfshow.c b/apps/pdfshow.c index 0e91a0af..b15fae1b 100644 --- a/apps/pdfshow.c +++ b/apps/pdfshow.c @@ -2,18 +2,28 @@ * pdfshow -- the ultimate pdf debugging tool */ -#include "pdftool.h" +#include "fitz.h" +#include "mupdf.h" +static pdf_xref *xref = NULL; static int showbinary = 0; static int showraw = 0; static int showcolumn; -static void showusage(void) +void die(fz_error error) +{ + fz_catch(error, "aborting"); + if (xref) + pdf_freexref(xref); + exit(1); +} + +static void usage(void) { fprintf(stderr, "usage: pdfshow [-bc] [-p password] <file> [xref] [trailer] [object numbers]\n"); - fprintf(stderr, "\t-b\tprint streams as binary data (don't pretty-print)\n"); + fprintf(stderr, "\t-b\tprint streams as binary data\n"); fprintf(stderr, "\t-c\tprint compressed streams (don't decompress)\n"); - fprintf(stderr, "\t-p\tpassword for encrypted files\n"); + fprintf(stderr, "\t-p\tpassword\n"); exit(1); } @@ -103,12 +113,19 @@ static void showobject(int num, int gen) if (pdf_isstream(xref, num, gen)) { - printf("%d %d obj\n", num, gen); - fz_debugobj(obj); - printf("stream\n"); - showstream(num, gen); - printf("endstream\n"); - printf("endobj\n\n"); + if (showraw) + { + showstream(num, gen); + } + else + { + printf("%d %d obj\n", num, gen); + fz_debugobj(obj); + printf("stream\n"); + showstream(num, gen); + printf("endstream\n"); + printf("endobj\n\n"); + } } else { @@ -122,7 +139,11 @@ static void showobject(int num, int gen) int main(int argc, char **argv) { - char *password = ""; + char *password = NULL; /* don't throw errors if encrypted */ + fz_error error; + fz_stream *file; + char *filename; + int fd; int c; while ((c = fz_getopt(argc, argv, "p:bc")) != -1) @@ -132,16 +153,26 @@ int main(int argc, char **argv) case 'p': password = fz_optarg; break; case 'b': showbinary ++; break; case 'c': showraw ++; break; - default: - showusage(); - break; + default: usage(); break; } } if (fz_optind == argc) - showusage(); + usage(); - openxref(argv[fz_optind++], password, 0, 0); + /* Use newxref directly because we don't care about the page tree */ + { + filename = argv[fz_optind++]; + fd = open(filename, O_BINARY | O_RDONLY); + if (fd < 0) + return fz_throw("cannot open file '%s': %s", filename, strerror(errno)); + + file = fz_openfile(fd); + error = pdf_newxref(&xref, file, password); + if (error) + die(fz_rethrow(error, "cannot load document '%s'", filename)); + fz_dropstream(file); + } if (fz_optind == argc) showtrailer(); @@ -157,7 +188,7 @@ int main(int argc, char **argv) fz_optind++; } - closexref(); + pdf_freexref(xref); return 0; } diff --git a/apps/pdftool.c b/apps/pdftool.c deleted file mode 100644 index 094eeaa5..00000000 --- a/apps/pdftool.c +++ /dev/null @@ -1,86 +0,0 @@ -#include "pdftool.h" - -char *basename = nil; -pdf_xref *xref = nil; -int pagecount = 0; -static void (*cleanup)(void) = nil; - -void closexref(void); - -void die(fz_error error) -{ - fz_catch(error, "aborting"); - if (cleanup) - cleanup(); - closexref(); - exit(1); -} - -void setcleanup(void (*func)(void)) -{ - cleanup = func; -} - -void openxref(char *filename, char *password, int dieonbadpass, int loadpages) -{ - fz_stream *file; - fz_error error; - int okay; - int fd; - - basename = strrchr(filename, '/'); - if (!basename) - basename = filename; - else - basename++; - - fd = open(filename, O_BINARY | O_RDONLY, 0666); - if (fd < 0) - die(fz_throw("cannot open file '%s': %s", filename, strerror(errno))); - - file = fz_openfile(fd); - error = pdf_openxref(&xref, file); - if (error) - die(fz_rethrow(error, "cannot open document '%s'", basename)); - fz_dropstream(file); - - if (pdf_needspassword(xref)) - { - okay = pdf_authenticatepassword(xref, password); - if (!okay && !dieonbadpass) - fz_warn("invalid password, attempting to continue."); - else if (!okay && dieonbadpass) - die(fz_throw("invalid password")); - } - - if (loadpages) - { - error = pdf_loadpagetree(xref); - if (error) - die(fz_rethrow(error, "cannot load page tree")); - pagecount = pdf_getpagecount(xref); - } -} - -void flushxref(void) -{ - if (xref) - { - pdf_flushxref(xref, 0); - } -} - -void closexref(void) -{ - if (cleanup) - cleanup(); - - if (xref) - { - pdf_closexref(xref); - xref = nil; - } - - basename = nil; -} - diff --git a/apps/pdftool.h b/apps/pdftool.h deleted file mode 100644 index 1560c225..00000000 --- a/apps/pdftool.h +++ /dev/null @@ -1,14 +0,0 @@ -#include "fitz.h" -#include "mupdf.h" - -extern char *basename; -extern pdf_xref *xref; -extern int pagecount; - -void die(fz_error error); -void setcleanup(void (*cleanup)(void)); - -void openxref(char *filename, char *password, int dieonbadpass, int loadpages); -void flushxref(void); -void closexref(void); - diff --git a/fitz/fitz.h b/fitz/fitz.h index 38b26619..3d7c5cb8 100644 --- a/fitz/fitz.h +++ b/fitz/fitz.h @@ -703,8 +703,8 @@ void fz_gammapixmap(fz_pixmap *pix, float gamma); fz_pixmap * fz_scalepixmap(fz_pixmap *src, int xdenom, int ydenom); fz_error fz_writepnm(fz_pixmap *pixmap, char *filename); -fz_error fz_writepam(fz_pixmap *pixmap, char *filename); -fz_error fz_writepng(fz_pixmap *pixmap, char *filename, int alpha); +fz_error fz_writepam(fz_pixmap *pixmap, char *filename, int savealpha); +fz_error fz_writepng(fz_pixmap *pixmap, char *filename, int savealpha); /* * Colorspace resources. diff --git a/fitz/res_pixmap.c b/fitz/res_pixmap.c index f089c11c..80ace5fa 100644 --- a/fitz/res_pixmap.c +++ b/fitz/res_pixmap.c @@ -131,29 +131,48 @@ fz_writepnm(fz_pixmap *pixmap, char *filename) */ fz_error -fz_writepam(fz_pixmap *pixmap, char *filename) +fz_writepam(fz_pixmap *pixmap, char *filename, int savealpha) { + unsigned char *sp; + int y, w, k; FILE *fp; + int sn = pixmap->n; + int dn = pixmap->n; + if (!savealpha && dn > 1) + dn--; + fp = fopen(filename, "wb"); if (!fp) return fz_throw("cannot open file '%s': %s", filename, strerror(errno)); fprintf(fp, "P7\n"); fprintf(fp, "WIDTH %d\n", pixmap->w); - fprintf(fp, "HEGIHT %d\n", pixmap->h); - fprintf(fp, "DEPTH %d\n", pixmap->n); + fprintf(fp, "HEIGHT %d\n", pixmap->h); + fprintf(fp, "DEPTH %d\n", dn); fprintf(fp, "MAXVAL 255\n"); - switch (pixmap->n) + if (pixmap->colorspace) + fprintf(fp, "# COLORSPACE %s\n", pixmap->colorspace->name); + switch (dn) { case 1: fprintf(fp, "TUPLTYPE GRAYSCALE\n"); break; - case 2: fprintf(fp, "TUPLTYPE GRAYSCALE_ALPHA\n"); break; - case 4: fprintf(fp, "TUPLTYPE RGB_ALPHA\n"); break; - case 5: fprintf(fp, "TUPLTYPE CMYK_ALPHA\n"); break; + case 2: if (sn == 2) fprintf(fp, "TUPLTYPE GRAYSCALE_ALPHA\n"); break; + case 3: if (sn == 4) fprintf(fp, "TUPLTYPE RGB\n"); break; + case 4: if (sn == 4) fprintf(fp, "TUPLTYPE RGB_ALPHA\n"); break; } fprintf(fp, "ENDHDR\n"); - fwrite(pixmap->samples, pixmap->w * pixmap->n, pixmap->h, fp); + sp = pixmap->samples; + for (y = 0; y < pixmap->h; y++) + { + w = pixmap->w; + while (w--) + { + for (k = 0; k < dn; k++) + putc(sp[k], fp); + sp += sn; + } + } fclose(fp); @@ -195,7 +214,7 @@ static void putchunk(char *tag, unsigned char *data, int size, FILE *fp) } fz_error -fz_writepng(fz_pixmap *pixmap, char *filename, int alpha) +fz_writepng(fz_pixmap *pixmap, char *filename, int savealpha) { static const unsigned char pngsig[8] = { 137, 80, 78, 71, 13, 10, 26, 10 }; FILE *fp; @@ -211,7 +230,7 @@ fz_writepng(fz_pixmap *pixmap, char *filename, int alpha) sn = pixmap->n; dn = pixmap->n; - if (!alpha && dn > 1) + if (!savealpha && dn > 1) dn--; switch (dn) diff --git a/mupdf/mupdf.h b/mupdf/mupdf.h index 24cf2f3d..6babf05c 100644 --- a/mupdf/mupdf.h +++ b/mupdf/mupdf.h @@ -134,7 +134,7 @@ struct pdf_xref_s fz_obj **pagerefs; struct pdf_store_s *store; - struct pdf_outline_s *outlines; + void (*freestore)(struct pdf_store_s *); char scratch[65536]; }; @@ -148,11 +148,6 @@ struct pdf_xrefentry_s int type; /* 0=unset (f)ree i(n)use (o)bjstm */ }; -fz_error pdf_openxref(pdf_xref **xrefp, fz_stream *file); -void pdf_closexref(pdf_xref *); -void pdf_debugxref(pdf_xref *); -void pdf_flushxref(pdf_xref *, int force); - fz_error pdf_cacheobject(pdf_xref *, int num, int gen); fz_error pdf_loadobject(fz_obj **objp, pdf_xref *, int num, int gen); @@ -164,8 +159,13 @@ fz_error pdf_openrawstream(fz_stream **stmp, pdf_xref *, int num, int gen); fz_error pdf_openstream(fz_stream **stmp, pdf_xref *, int num, int gen); fz_error pdf_openstreamat(fz_stream **stmp, pdf_xref *xref, int num, int gen, fz_obj *dict, int stmofs); +fz_error pdf_openxref(pdf_xref **xrefp, char *filename, char *password); +fz_error pdf_newxref(pdf_xref **xrefp, fz_stream *file, char *password); +void pdf_freexref(pdf_xref *); + /* private */ -extern fz_error pdf_repairxref(pdf_xref *xref, char *buf, int bufsize); +fz_error pdf_repairxref(pdf_xref *xref, char *buf, int bufsize); +void pdf_debugxref(pdf_xref *); /* * Resource store diff --git a/mupdf/pdf_open.c b/mupdf/pdf_open.c deleted file mode 100644 index 04d3fb6a..00000000 --- a/mupdf/pdf_open.c +++ /dev/null @@ -1,668 +0,0 @@ -#include "fitz.h" -#include "mupdf.h" - -static inline int iswhite(int ch) -{ - return - ch == '\000' || ch == '\011' || ch == '\012' || - ch == '\014' || ch == '\015' || ch == '\040'; -} - -/* - * magic version tag and startxref - */ - -static fz_error -pdf_loadversion(pdf_xref *xref) -{ - fz_error error; - char buf[20]; - - error = fz_seek(xref->file, 0, 0); - if (error) - return fz_rethrow(error, "cannot seek to beginning of file"); - - error = fz_readline(xref->file, buf, sizeof buf); - if (error) - return fz_rethrow(error, "cannot read version marker"); - if (memcmp(buf, "%PDF-", 5) != 0) - return fz_throw("cannot recognize version marker"); - - xref->version = atof(buf + 5) * 10; - - pdf_logxref("version %d.%d\n", xref->version / 10, xref->version % 10); - - return fz_okay; -} - -static fz_error -pdf_readstartxref(pdf_xref *xref) -{ - fz_error error; - unsigned char buf[1024]; - int t, n; - int i; - - error = fz_seek(xref->file, 0, 2); - if (error) - return fz_rethrow(error, "cannot seek to end of file"); - - t = MAX(0, fz_tell(xref->file) - ((int)sizeof buf)); - error = fz_seek(xref->file, t, 0); - if (error) - return fz_rethrow(error, "cannot seek to offset %d", t); - - error = fz_read(&n, xref->file, buf, sizeof buf); - if (error) - return fz_rethrow(error, "cannot read from file"); - - for (i = n - 9; i >= 0; i--) - { - if (memcmp(buf + i, "startxref", 9) == 0) - { - i += 9; - while (iswhite(buf[i]) && i < n) - i ++; - xref->startxref = atoi((char*)(buf + i)); - pdf_logxref("startxref %d\n", xref->startxref); - return fz_okay; - } - } - - return fz_throw("cannot find startxref"); -} - -/* - * trailer dictionary - */ - -static fz_error -pdf_readoldtrailer(pdf_xref *xref, char *buf, int cap) -{ - fz_error error; - int len; - char *s; - int n; - int t; - pdf_token_e tok; - int c; - - pdf_logxref("load old xref format trailer\n"); - - error = fz_readline(xref->file, buf, cap); - if (error) - return fz_rethrow(error, "cannot read xref marker"); - if (strncmp(buf, "xref", 4) != 0) - return fz_throw("cannot find xref marker"); - - while (1) - { - c = fz_peekbyte(xref->file); - if (!(c >= '0' && c <= '9')) - break; - - error = fz_readline(xref->file, buf, cap); - if (error) - return fz_rethrow(error, "cannot read xref count"); - - s = buf; - fz_strsep(&s, " "); /* ignore ofs */ - if (!s) - return fz_throw("invalid range marker in xref"); - len = atoi(fz_strsep(&s, " ")); - - /* broken pdfs where the section is not on a separate line */ - if (s && *s != '\0') - { - error = fz_seek(xref->file, -(2 + (int)strlen(s)), 1); - if (error) - return fz_rethrow(error, "cannot seek in file"); - } - - t = fz_tell(xref->file); - if (t < 0) - return fz_throw("cannot tell in file"); - - error = fz_seek(xref->file, t + 20 * len, 0); - if (error) - return fz_rethrow(error, "cannot seek in file"); - } - - error = fz_readerror(xref->file); - if (error) - return fz_rethrow(error, "cannot read from file"); - - error = pdf_lex(&tok, xref->file, buf, cap, &n); - if (error) - return fz_rethrow(error, "cannot parse trailer"); - if (tok != PDF_TTRAILER) - return fz_throw("expected trailer marker"); - - error = pdf_lex(&tok, xref->file, buf, cap, &n); - if (error) - return fz_rethrow(error, "cannot parse trailer"); - if (tok != PDF_TODICT) - return fz_throw("expected trailer dictionary"); - - error = pdf_parsedict(&xref->trailer, xref, xref->file, buf, cap); - if (error) - return fz_rethrow(error, "cannot parse trailer"); - return fz_okay; -} - -static fz_error -pdf_readnewtrailer(pdf_xref *xref, char *buf, int cap) -{ - fz_error error; - - pdf_logxref("load new xref format trailer\n"); - - error = pdf_parseindobj(&xref->trailer, xref, xref->file, buf, cap, nil, nil, nil); - if (error) - return fz_rethrow(error, "cannot parse trailer (compressed)"); - return fz_okay; -} - -static fz_error -pdf_readtrailer(pdf_xref *xref, char *buf, int cap) -{ - fz_error error; - int c; - - error = fz_seek(xref->file, xref->startxref, 0); - if (error) - return fz_rethrow(error, "cannot seek to startxref"); - - while (iswhite(fz_peekbyte(xref->file))) - fz_readbyte(xref->file); - - c = fz_peekbyte(xref->file); - error = fz_readerror(xref->file); - if (error) - return fz_rethrow(error, "cannot read trailer"); - - if (c == 'x') - { - error = pdf_readoldtrailer(xref, buf, cap); - if (error) - return fz_rethrow(error, "cannot read trailer"); - } - else if (c >= '0' && c <= '9') - { - error = pdf_readnewtrailer(xref, buf, cap); - if (error) - return fz_rethrow(error, "cannot read trailer"); - } - else - { - return fz_throw("cannot recognize xref format: '%c'", c); - } - - return fz_okay; -} - -/* - * xref tables - */ - -static fz_error -pdf_readoldxref(fz_obj **trailerp, pdf_xref *xref, char *buf, int cap) -{ - fz_error error; - int ofs, len; - char *s; - int n; - pdf_token_e tok; - int i; - int c; - - pdf_logxref("load old xref format\n"); - - error = fz_readline(xref->file, buf, cap); - if (error) - return fz_rethrow(error, "cannot read xref marker"); - if (strncmp(buf, "xref", 4) != 0) - return fz_throw("cannot find xref marker"); - - while (1) - { - c = fz_peekbyte(xref->file); - if (!(c >= '0' && c <= '9')) - break; - - error = fz_readline(xref->file, buf, cap); - if (error) - return fz_rethrow(error, "cannot read xref count"); - - s = buf; - ofs = atoi(fz_strsep(&s, " ")); - len = atoi(fz_strsep(&s, " ")); - - /* broken pdfs where the section is not on a separate line */ - if (s && *s != '\0') - { - fz_warn("broken xref section. proceeding anyway."); - error = fz_seek(xref->file, -(2 + (int)strlen(s)), 1); - if (error) - return fz_rethrow(error, "cannot seek to xref"); - } - - /* broken pdfs where size in trailer undershoots - entries in xref sections */ - if ((ofs + len) > xref->cap) - { - fz_warn("broken xref section, proceeding anyway."); - xref->cap = ofs + len; - xref->table = fz_realloc(xref->table, xref->cap * sizeof(pdf_xrefentry)); - } - - if ((ofs + len) > xref->len) - { - for (i = xref->len; i < (ofs + len); i++) - { - xref->table[i].ofs = 0; - xref->table[i].gen = 0; - xref->table[i].stmofs = 0; - xref->table[i].obj = nil; - xref->table[i].type = 0; - } - xref->len = ofs + len; - } - - for (i = 0; i < len; i++) - { - error = fz_read(&n, xref->file, (unsigned char *) buf, 20); - if (error) - return fz_rethrow(error, "cannot read xref table"); - if (!xref->table[ofs + i].type) - { - s = buf; - - /* broken pdfs where line start with white space */ - while (*s != '\0' && iswhite(*s)) - s++; - - xref->table[ofs + i].ofs = atoi(s); - xref->table[ofs + i].gen = atoi(s + 11); - xref->table[ofs + i].type = s[17]; - } - } - } - - error = pdf_lex(&tok, xref->file, buf, cap, &n); - if (error) - return fz_rethrow(error, "cannot parse trailer"); - if (tok != PDF_TTRAILER) - return fz_throw("expected trailer marker"); - - error = pdf_lex(&tok, xref->file, buf, cap, &n); - if (error) - return fz_rethrow(error, "cannot parse trailer"); - if (tok != PDF_TODICT) - return fz_throw("expected trailer dictionary"); - - error = pdf_parsedict(trailerp, xref, xref->file, buf, cap); - if (error) - return fz_rethrow(error, "cannot parse trailer"); - return fz_okay; -} - -static fz_error -pdf_readnewxrefsection(pdf_xref *xref, fz_stream *stm, int i0, int i1, int w0, int w1, int w2) -{ - fz_error error; - int i, n; - - if (i0 < 0 || i0 + i1 > xref->len) - return fz_throw("xref stream has too many entries"); - - for (i = i0; i < i0 + i1; i++) - { - int a = 0; - int b = 0; - int c = 0; - - if (fz_peekbyte(stm) == EOF) - { - error = fz_readerror(stm); - if (error) - return fz_rethrow(error, "truncated xref stream"); - return fz_throw("truncated xref stream"); - } - - for (n = 0; n < w0; n++) - a = (a << 8) + fz_readbyte(stm); - for (n = 0; n < w1; n++) - b = (b << 8) + fz_readbyte(stm); - for (n = 0; n < w2; n++) - c = (c << 8) + fz_readbyte(stm); - - error = fz_readerror(stm); - if (error) - return fz_rethrow(error, "truncated xref stream"); - - if (!xref->table[i].type) - { - int t = w0 ? a : 1; - xref->table[i].type = t == 0 ? 'f' : t == 1 ? 'n' : t == 2 ? 'o' : 0; - xref->table[i].ofs = w1 ? b : 0; - xref->table[i].gen = w2 ? c : 0; - } - } - - return fz_okay; -} - -static fz_error -pdf_readnewxref(fz_obj **trailerp, pdf_xref *xref, char *buf, int cap) -{ - fz_error error; - fz_stream *stm; - fz_obj *trailer; - fz_obj *index; - fz_obj *obj; - int num, gen, stmofs; - int size, w0, w1, w2; - int t; - int i; - - pdf_logxref("load new xref format\n"); - - error = pdf_parseindobj(&trailer, xref, xref->file, buf, cap, &num, &gen, &stmofs); - if (error) - return fz_rethrow(error, "cannot parse compressed xref stream object"); - - obj = fz_dictgets(trailer, "Size"); - if (!obj) - { - fz_dropobj(trailer); - return fz_throw("xref stream missing Size entry (%d %d R)", num, gen); - } - size = fz_toint(obj); - - if (size >= xref->cap) - { - xref->cap = size + 1; /* for hack to allow broken pdf generators with off-by-one errors */ - xref->table = fz_realloc(xref->table, xref->cap * sizeof(pdf_xrefentry)); - } - - if (size > xref->len) - { - for (i = xref->len; i < xref->cap; i++) - { - xref->table[i].ofs = 0; - xref->table[i].gen = 0; - xref->table[i].stmofs = 0; - xref->table[i].obj = nil; - xref->table[i].type = 0; - } - xref->len = size; - } - - if (num < 0 || num >= xref->len) - { - if (num == xref->len && num < xref->cap) - { - /* allow broken pdf files that have off-by-one errors in the xref */ - fz_warn("object id (%d %d R) out of range (0..%d)", num, gen, xref->len - 1); - xref->len ++; - } - else - { - fz_dropobj(trailer); - return fz_throw("object id (%d %d R) out of range (0..%d)", num, gen, xref->len - 1); - } - } - - pdf_logxref("\tnum=%d gen=%d size=%d\n", num, gen, size); - - obj = fz_dictgets(trailer, "W"); - if (!obj) { - fz_dropobj(trailer); - return fz_throw("xref stream missing W entry (%d %d R)", num, gen); - } - w0 = fz_toint(fz_arrayget(obj, 0)); - w1 = fz_toint(fz_arrayget(obj, 1)); - w2 = fz_toint(fz_arrayget(obj, 2)); - - index = fz_dictgets(trailer, "Index"); - - error = pdf_openstreamat(&stm, xref, num, gen, trailer, stmofs); - if (error) - { - fz_dropobj(trailer); - return fz_rethrow(error, "cannot open compressed xref stream (%d %d R)", num, gen); - } - - if (!index) - { - error = pdf_readnewxrefsection(xref, stm, 0, size, w0, w1, w2); - if (error) - { - fz_dropstream(stm); - fz_dropobj(trailer); - return fz_rethrow(error, "cannot read xref stream (%d %d R)", num, gen); - } - } - else - { - for (t = 0; t < fz_arraylen(index); t += 2) - { - int i0 = fz_toint(fz_arrayget(index, t + 0)); - int i1 = fz_toint(fz_arrayget(index, t + 1)); - error = pdf_readnewxrefsection(xref, stm, i0, i1, w0, w1, w2); - if (error) - { - fz_dropstream(stm); - fz_dropobj(trailer); - return fz_rethrow(error, "cannot read xref stream section (%d %d R)", num, gen); - } - } - } - - fz_dropstream(stm); - - *trailerp = trailer; - - return fz_okay; -} - -static fz_error -pdf_readxref(fz_obj **trailerp, pdf_xref *xref, int ofs, char *buf, int cap) -{ - fz_error error; - int c; - - error = fz_seek(xref->file, ofs, 0); - if (error) - return fz_rethrow(error, "cannot seek to xref"); - - while (iswhite(fz_peekbyte(xref->file))) - fz_readbyte(xref->file); - - c = fz_peekbyte(xref->file); - error = fz_readerror(xref->file); - if (error) - return fz_rethrow(error, "cannot read trailer"); - - if (c == 'x') - { - error = pdf_readoldxref(trailerp, xref, buf, cap); - if (error) - return fz_rethrow(error, "cannot read xref (ofs=%d)", ofs); - } - else if (c >= '0' && c <= '9') - { - error = pdf_readnewxref(trailerp, xref, buf, cap); - if (error) - return fz_rethrow(error, "cannot read xref (ofs=%d)", ofs); - } - else - { - return fz_throw("cannot recognize xref format"); - } - - return fz_okay; -} - -static fz_error -pdf_readxrefsections(pdf_xref *xref, int ofs, char *buf, int cap) -{ - fz_error error; - fz_obj *trailer; - fz_obj *prev; - fz_obj *xrefstm; - - error = pdf_readxref(&trailer, xref, ofs, buf, cap); - if (error) - return fz_rethrow(error, "cannot read xref section"); - - /* FIXME: do we overwrite free entries properly? */ - xrefstm = fz_dictgets(trailer, "XRefStm"); - if (xrefstm) - { - pdf_logxref("load xrefstm\n"); - error = pdf_readxrefsections(xref, fz_toint(xrefstm), buf, cap); - if (error) - { - fz_dropobj(trailer); - return fz_rethrow(error, "cannot read /XRefStm xref section"); - } - } - - prev = fz_dictgets(trailer, "Prev"); - if (prev) - { - pdf_logxref("load prev at 0x%x\n", fz_toint(prev)); - error = pdf_readxrefsections(xref, fz_toint(prev), buf, cap); - if (error) - { - fz_dropobj(trailer); - return fz_rethrow(error, "cannot read /Prev xref section"); - } - } - - fz_dropobj(trailer); - return fz_okay; -} - -/* - * load xref tables from pdf - */ - -static fz_error -pdf_loadxref(pdf_xref *xref, char *buf, int bufsize) -{ - fz_error error; - fz_obj *size; - int i; - - error = pdf_loadversion(xref); - if (error) - return fz_rethrow(error, "cannot read version marker"); - - error = pdf_readstartxref(xref); - if (error) - return fz_rethrow(error, "cannot read startxref"); - - error = pdf_readtrailer(xref, buf, bufsize); - if (error) - return fz_rethrow(error, "cannot read trailer"); - - size = fz_dictgets(xref->trailer, "Size"); - if (!size) - return fz_throw("trailer missing Size entry"); - - pdf_logxref("\tsize %d at 0x%x\n", fz_toint(size), xref->startxref); - - xref->len = fz_toint(size); - xref->cap = xref->len + 1; /* for hack to allow broken pdf generators with off-by-one errors */ - xref->table = fz_malloc(xref->cap * sizeof(pdf_xrefentry)); - for (i = 0; i < xref->cap; i++) - { - xref->table[i].ofs = 0; - xref->table[i].gen = 0; - xref->table[i].stmofs = 0; - xref->table[i].obj = nil; - xref->table[i].type = 0; - } - - error = pdf_readxrefsections(xref, xref->startxref, buf, bufsize); - if (error) - return fz_rethrow(error, "cannot read xref"); - - /* broken pdfs where first object is not free */ - if (xref->table[0].type != 'f') - { - fz_warn("first object in xref is not free"); - xref->table[0].type = 'f'; - } - - /* broken pdfs where freed objects have offset and gen set to 0 but still exist */ - for (i = 0; i < xref->len; i++) - { - if (xref->table[i].type == 'n' && xref->table[i].ofs == 0 && - xref->table[i].gen == 0 && xref->table[i].obj == nil) - { - fz_warn("object (%d %d R) has invalid offset, assumed missing", i, xref->table[i].gen); - xref->table[i].type = 'f'; - } - } - - return fz_okay; -} - -/* - * Open PDF file and load or reconstruct xref table. - */ - -fz_error -pdf_openxref(pdf_xref **xrefp, fz_stream *file) -{ - pdf_xref *xref; - fz_error error; - fz_obj *encrypt; - fz_obj *id; - - xref = fz_malloc(sizeof(pdf_xref)); - - memset(xref, 0, sizeof(pdf_xref)); - - pdf_logxref("openxref %p\n", xref); - - xref->file = fz_keepstream(file); - - error = pdf_loadxref(xref, xref->scratch, sizeof xref->scratch); - if (error) - { - fz_catch(error, "trying to repair"); - if (xref->table) - { - fz_free(xref->table); - xref->table = NULL; - xref->len = 0; - xref->cap = 0; - } - error = pdf_repairxref(xref, xref->scratch, sizeof xref->scratch); - if (error) - { - pdf_closexref(xref); - return fz_rethrow(error, "cannot repair document"); - } - } - - encrypt = fz_dictgets(xref->trailer, "Encrypt"); - id = fz_dictgets(xref->trailer, "ID"); - if (fz_isdict(encrypt)) - { - error = pdf_newcrypt(&xref->crypt, encrypt, id); - if (error) - { - pdf_closexref(xref); - return fz_rethrow(error, "cannot decrypt document"); - } - } - - *xrefp = xref; - return fz_okay; -} diff --git a/mupdf/pdf_page.c b/mupdf/pdf_page.c index 71087336..dfd2f080 100644 --- a/mupdf/pdf_page.c +++ b/mupdf/pdf_page.c @@ -152,7 +152,10 @@ pdf_loadpage(pdf_page **pagep, pdf_xref *xref, fz_obj *dict) // TODO: move this to a more appropriate place /* Ensure that we have a store for resource objects */ if (!xref->store) + { xref->store = pdf_newstore(); + xref->freestore = pdf_freestore; + } page = fz_malloc(sizeof(pdf_page)); page->resources = nil; diff --git a/mupdf/pdf_xref.c b/mupdf/pdf_xref.c index 161bf6e8..a20dc8d5 100644 --- a/mupdf/pdf_xref.c +++ b/mupdf/pdf_xref.c @@ -1,72 +1,732 @@ #include "fitz.h" #include "mupdf.h" -void -pdf_closexref(pdf_xref *xref) +static inline int iswhite(int ch) { + return + ch == '\000' || ch == '\011' || ch == '\012' || + ch == '\014' || ch == '\015' || ch == '\040'; +} + +/* + * magic version tag and startxref + */ + +static fz_error +pdf_loadversion(pdf_xref *xref) +{ + fz_error error; + char buf[20]; + + error = fz_seek(xref->file, 0, 0); + if (error) + return fz_rethrow(error, "cannot seek to beginning of file"); + + error = fz_readline(xref->file, buf, sizeof buf); + if (error) + return fz_rethrow(error, "cannot read version marker"); + if (memcmp(buf, "%PDF-", 5) != 0) + return fz_throw("cannot recognize version marker"); + + xref->version = atof(buf + 5) * 10; + + pdf_logxref("version %d.%d\n", xref->version / 10, xref->version % 10); + + return fz_okay; +} + +static fz_error +pdf_readstartxref(pdf_xref *xref) +{ + fz_error error; + unsigned char buf[1024]; + int t, n; int i; - pdf_logxref("closexref %p\n", xref); + error = fz_seek(xref->file, 0, 2); + if (error) + return fz_rethrow(error, "cannot seek to end of file"); - /* don't touch the pdf_store module ... we don't want that dependency here */ - if (xref->store) - fz_warn("someone forgot to empty the store before freeing xref!"); + t = MAX(0, fz_tell(xref->file) - ((int)sizeof buf)); + error = fz_seek(xref->file, t, 0); + if (error) + return fz_rethrow(error, "cannot seek to offset %d", t); - if (xref->table) + error = fz_read(&n, xref->file, buf, sizeof buf); + if (error) + return fz_rethrow(error, "cannot read from file"); + + for (i = n - 9; i >= 0; i--) { - pdf_flushxref(xref, 1); - fz_free(xref->table); + if (memcmp(buf + i, "startxref", 9) == 0) + { + i += 9; + while (iswhite(buf[i]) && i < n) + i ++; + xref->startxref = atoi((char*)(buf + i)); + pdf_logxref("startxref %d\n", xref->startxref); + return fz_okay; + } } - if (xref->pageobjs) + return fz_throw("cannot find startxref"); +} + +/* + * trailer dictionary + */ + +static fz_error +pdf_readoldtrailer(pdf_xref *xref, char *buf, int cap) +{ + fz_error error; + int len; + char *s; + int n; + int t; + pdf_token_e tok; + int c; + + pdf_logxref("load old xref format trailer\n"); + + error = fz_readline(xref->file, buf, cap); + if (error) + return fz_rethrow(error, "cannot read xref marker"); + if (strncmp(buf, "xref", 4) != 0) + return fz_throw("cannot find xref marker"); + + while (1) { - for (i = 0; i < xref->pagelen; i++) - fz_dropobj(xref->pageobjs[i]); - fz_free(xref->pageobjs); + c = fz_peekbyte(xref->file); + if (!(c >= '0' && c <= '9')) + break; + + error = fz_readline(xref->file, buf, cap); + if (error) + return fz_rethrow(error, "cannot read xref count"); + + s = buf; + fz_strsep(&s, " "); /* ignore ofs */ + if (!s) + return fz_throw("invalid range marker in xref"); + len = atoi(fz_strsep(&s, " ")); + + /* broken pdfs where the section is not on a separate line */ + if (s && *s != '\0') + { + error = fz_seek(xref->file, -(2 + (int)strlen(s)), 1); + if (error) + return fz_rethrow(error, "cannot seek in file"); + } + + t = fz_tell(xref->file); + if (t < 0) + return fz_throw("cannot tell in file"); + + error = fz_seek(xref->file, t + 20 * len, 0); + if (error) + return fz_rethrow(error, "cannot seek in file"); } - if (xref->pagerefs) + + error = fz_readerror(xref->file); + if (error) + return fz_rethrow(error, "cannot read from file"); + + error = pdf_lex(&tok, xref->file, buf, cap, &n); + if (error) + return fz_rethrow(error, "cannot parse trailer"); + if (tok != PDF_TTRAILER) + return fz_throw("expected trailer marker"); + + error = pdf_lex(&tok, xref->file, buf, cap, &n); + if (error) + return fz_rethrow(error, "cannot parse trailer"); + if (tok != PDF_TODICT) + return fz_throw("expected trailer dictionary"); + + error = pdf_parsedict(&xref->trailer, xref, xref->file, buf, cap); + if (error) + return fz_rethrow(error, "cannot parse trailer"); + return fz_okay; +} + +static fz_error +pdf_readnewtrailer(pdf_xref *xref, char *buf, int cap) +{ + fz_error error; + + pdf_logxref("load new xref format trailer\n"); + + error = pdf_parseindobj(&xref->trailer, xref, xref->file, buf, cap, nil, nil, nil); + if (error) + return fz_rethrow(error, "cannot parse trailer (compressed)"); + return fz_okay; +} + +static fz_error +pdf_readtrailer(pdf_xref *xref, char *buf, int cap) +{ + fz_error error; + int c; + + error = fz_seek(xref->file, xref->startxref, 0); + if (error) + return fz_rethrow(error, "cannot seek to startxref"); + + while (iswhite(fz_peekbyte(xref->file))) + fz_readbyte(xref->file); + + c = fz_peekbyte(xref->file); + error = fz_readerror(xref->file); + if (error) + return fz_rethrow(error, "cannot read trailer"); + + if (c == 'x') { - for (i = 0; i < xref->pagelen; i++) - fz_dropobj(xref->pagerefs[i]); - fz_free(xref->pagerefs); + error = pdf_readoldtrailer(xref, buf, cap); + if (error) + return fz_rethrow(error, "cannot read trailer"); + } + else if (c >= '0' && c <= '9') + { + error = pdf_readnewtrailer(xref, buf, cap); + if (error) + return fz_rethrow(error, "cannot read trailer"); + } + else + { + return fz_throw("cannot recognize xref format: '%c'", c); } - if (xref->file) - fz_dropstream(xref->file); - if (xref->trailer) - fz_dropobj(xref->trailer); - if (xref->crypt) - pdf_freecrypt(xref->crypt); - - fz_free(xref); + return fz_okay; } -void -pdf_flushxref(pdf_xref *xref, int force) +/* + * xref tables + */ + +static fz_error +pdf_readoldxref(fz_obj **trailerp, pdf_xref *xref, char *buf, int cap) { + fz_error error; + int ofs, len; + char *s; + int n; + pdf_token_e tok; int i; + int c; - pdf_logxref("flushxref %p (%d)\n", xref, force); + pdf_logxref("load old xref format\n"); - for (i = 0; i < xref->len; i++) + error = fz_readline(xref->file, buf, cap); + if (error) + return fz_rethrow(error, "cannot read xref marker"); + if (strncmp(buf, "xref", 4) != 0) + return fz_throw("cannot find xref marker"); + + while (1) { - if (force) + c = fz_peekbyte(xref->file); + if (!(c >= '0' && c <= '9')) + break; + + error = fz_readline(xref->file, buf, cap); + if (error) + return fz_rethrow(error, "cannot read xref count"); + + s = buf; + ofs = atoi(fz_strsep(&s, " ")); + len = atoi(fz_strsep(&s, " ")); + + /* broken pdfs where the section is not on a separate line */ + if (s && *s != '\0') { - if (xref->table[i].obj) + fz_warn("broken xref section. proceeding anyway."); + error = fz_seek(xref->file, -(2 + (int)strlen(s)), 1); + if (error) + return fz_rethrow(error, "cannot seek to xref"); + } + + /* broken pdfs where size in trailer undershoots + entries in xref sections */ + if ((ofs + len) > xref->cap) + { + fz_warn("broken xref section, proceeding anyway."); + xref->cap = ofs + len; + xref->table = fz_realloc(xref->table, xref->cap * sizeof(pdf_xrefentry)); + } + + if ((ofs + len) > xref->len) + { + for (i = xref->len; i < (ofs + len); i++) { - fz_dropobj(xref->table[i].obj); + xref->table[i].ofs = 0; + xref->table[i].gen = 0; + xref->table[i].stmofs = 0; xref->table[i].obj = nil; + xref->table[i].type = 0; + } + xref->len = ofs + len; + } + + for (i = 0; i < len; i++) + { + error = fz_read(&n, xref->file, (unsigned char *) buf, 20); + if (error) + return fz_rethrow(error, "cannot read xref table"); + if (!xref->table[ofs + i].type) + { + s = buf; + + /* broken pdfs where line start with white space */ + while (*s != '\0' && iswhite(*s)) + s++; + + xref->table[ofs + i].ofs = atoi(s); + xref->table[ofs + i].gen = atoi(s + 11); + xref->table[ofs + i].type = s[17]; } } + } + + error = pdf_lex(&tok, xref->file, buf, cap, &n); + if (error) + return fz_rethrow(error, "cannot parse trailer"); + if (tok != PDF_TTRAILER) + return fz_throw("expected trailer marker"); + + error = pdf_lex(&tok, xref->file, buf, cap, &n); + if (error) + return fz_rethrow(error, "cannot parse trailer"); + if (tok != PDF_TODICT) + return fz_throw("expected trailer dictionary"); + + error = pdf_parsedict(trailerp, xref, xref->file, buf, cap); + if (error) + return fz_rethrow(error, "cannot parse trailer"); + return fz_okay; +} + +static fz_error +pdf_readnewxrefsection(pdf_xref *xref, fz_stream *stm, int i0, int i1, int w0, int w1, int w2) +{ + fz_error error; + int i, n; + + if (i0 < 0 || i0 + i1 > xref->len) + return fz_throw("xref stream has too many entries"); + + for (i = i0; i < i0 + i1; i++) + { + int a = 0; + int b = 0; + int c = 0; + + if (fz_peekbyte(stm) == EOF) + { + error = fz_readerror(stm); + if (error) + return fz_rethrow(error, "truncated xref stream"); + return fz_throw("truncated xref stream"); + } + + for (n = 0; n < w0; n++) + a = (a << 8) + fz_readbyte(stm); + for (n = 0; n < w1; n++) + b = (b << 8) + fz_readbyte(stm); + for (n = 0; n < w2; n++) + c = (c << 8) + fz_readbyte(stm); + + error = fz_readerror(stm); + if (error) + return fz_rethrow(error, "truncated xref stream"); + + if (!xref->table[i].type) + { + int t = w0 ? a : 1; + xref->table[i].type = t == 0 ? 'f' : t == 1 ? 'n' : t == 2 ? 'o' : 0; + xref->table[i].ofs = w1 ? b : 0; + xref->table[i].gen = w2 ? c : 0; + } + } + + return fz_okay; +} + +static fz_error +pdf_readnewxref(fz_obj **trailerp, pdf_xref *xref, char *buf, int cap) +{ + fz_error error; + fz_stream *stm; + fz_obj *trailer; + fz_obj *index; + fz_obj *obj; + int num, gen, stmofs; + int size, w0, w1, w2; + int t; + int i; + + pdf_logxref("load new xref format\n"); + + error = pdf_parseindobj(&trailer, xref, xref->file, buf, cap, &num, &gen, &stmofs); + if (error) + return fz_rethrow(error, "cannot parse compressed xref stream object"); + + obj = fz_dictgets(trailer, "Size"); + if (!obj) + { + fz_dropobj(trailer); + return fz_throw("xref stream missing Size entry (%d %d R)", num, gen); + } + size = fz_toint(obj); + + if (size >= xref->cap) + { + xref->cap = size + 1; /* for hack to allow broken pdf generators with off-by-one errors */ + xref->table = fz_realloc(xref->table, xref->cap * sizeof(pdf_xrefentry)); + } + + if (size > xref->len) + { + for (i = xref->len; i < xref->cap; i++) + { + xref->table[i].ofs = 0; + xref->table[i].gen = 0; + xref->table[i].stmofs = 0; + xref->table[i].obj = nil; + xref->table[i].type = 0; + } + xref->len = size; + } + + if (num < 0 || num >= xref->len) + { + if (num == xref->len && num < xref->cap) + { + /* allow broken pdf files that have off-by-one errors in the xref */ + fz_warn("object id (%d %d R) out of range (0..%d)", num, gen, xref->len - 1); + xref->len ++; + } else { - if (xref->table[i].obj && xref->table[i].obj->refs == 1) + fz_dropobj(trailer); + return fz_throw("object id (%d %d R) out of range (0..%d)", num, gen, xref->len - 1); + } + } + + pdf_logxref("\tnum=%d gen=%d size=%d\n", num, gen, size); + + obj = fz_dictgets(trailer, "W"); + if (!obj) { + fz_dropobj(trailer); + return fz_throw("xref stream missing W entry (%d %d R)", num, gen); + } + w0 = fz_toint(fz_arrayget(obj, 0)); + w1 = fz_toint(fz_arrayget(obj, 1)); + w2 = fz_toint(fz_arrayget(obj, 2)); + + index = fz_dictgets(trailer, "Index"); + + error = pdf_openstreamat(&stm, xref, num, gen, trailer, stmofs); + if (error) + { + fz_dropobj(trailer); + return fz_rethrow(error, "cannot open compressed xref stream (%d %d R)", num, gen); + } + + if (!index) + { + error = pdf_readnewxrefsection(xref, stm, 0, size, w0, w1, w2); + if (error) + { + fz_dropstream(stm); + fz_dropobj(trailer); + return fz_rethrow(error, "cannot read xref stream (%d %d R)", num, gen); + } + } + else + { + for (t = 0; t < fz_arraylen(index); t += 2) + { + int i0 = fz_toint(fz_arrayget(index, t + 0)); + int i1 = fz_toint(fz_arrayget(index, t + 1)); + error = pdf_readnewxrefsection(xref, stm, i0, i1, w0, w1, w2); + if (error) + { + fz_dropstream(stm); + fz_dropobj(trailer); + return fz_rethrow(error, "cannot read xref stream section (%d %d R)", num, gen); + } + } + } + + fz_dropstream(stm); + + *trailerp = trailer; + + return fz_okay; +} + +static fz_error +pdf_readxref(fz_obj **trailerp, pdf_xref *xref, int ofs, char *buf, int cap) +{ + fz_error error; + int c; + + error = fz_seek(xref->file, ofs, 0); + if (error) + return fz_rethrow(error, "cannot seek to xref"); + + while (iswhite(fz_peekbyte(xref->file))) + fz_readbyte(xref->file); + + c = fz_peekbyte(xref->file); + error = fz_readerror(xref->file); + if (error) + return fz_rethrow(error, "cannot read trailer"); + + if (c == 'x') + { + error = pdf_readoldxref(trailerp, xref, buf, cap); + if (error) + return fz_rethrow(error, "cannot read xref (ofs=%d)", ofs); + } + else if (c >= '0' && c <= '9') + { + error = pdf_readnewxref(trailerp, xref, buf, cap); + if (error) + return fz_rethrow(error, "cannot read xref (ofs=%d)", ofs); + } + else + { + return fz_throw("cannot recognize xref format"); + } + + return fz_okay; +} + +static fz_error +pdf_readxrefsections(pdf_xref *xref, int ofs, char *buf, int cap) +{ + fz_error error; + fz_obj *trailer; + fz_obj *prev; + fz_obj *xrefstm; + + error = pdf_readxref(&trailer, xref, ofs, buf, cap); + if (error) + return fz_rethrow(error, "cannot read xref section"); + + /* FIXME: do we overwrite free entries properly? */ + xrefstm = fz_dictgets(trailer, "XRefStm"); + if (xrefstm) + { + pdf_logxref("load xrefstm\n"); + error = pdf_readxrefsections(xref, fz_toint(xrefstm), buf, cap); + if (error) + { + fz_dropobj(trailer); + return fz_rethrow(error, "cannot read /XRefStm xref section"); + } + } + + prev = fz_dictgets(trailer, "Prev"); + if (prev) + { + pdf_logxref("load prev at 0x%x\n", fz_toint(prev)); + error = pdf_readxrefsections(xref, fz_toint(prev), buf, cap); + if (error) + { + fz_dropobj(trailer); + return fz_rethrow(error, "cannot read /Prev xref section"); + } + } + + fz_dropobj(trailer); + return fz_okay; +} + +/* + * load xref tables from pdf + */ + +static fz_error +pdf_loadxref(pdf_xref *xref, char *buf, int bufsize) +{ + fz_error error; + fz_obj *size; + int i; + + error = pdf_loadversion(xref); + if (error) + return fz_rethrow(error, "cannot read version marker"); + + error = pdf_readstartxref(xref); + if (error) + return fz_rethrow(error, "cannot read startxref"); + + error = pdf_readtrailer(xref, buf, bufsize); + if (error) + return fz_rethrow(error, "cannot read trailer"); + + size = fz_dictgets(xref->trailer, "Size"); + if (!size) + return fz_throw("trailer missing Size entry"); + + pdf_logxref("\tsize %d at 0x%x\n", fz_toint(size), xref->startxref); + + xref->len = fz_toint(size); + xref->cap = xref->len + 1; /* for hack to allow broken pdf generators with off-by-one errors */ + xref->table = fz_malloc(xref->cap * sizeof(pdf_xrefentry)); + for (i = 0; i < xref->cap; i++) + { + xref->table[i].ofs = 0; + xref->table[i].gen = 0; + xref->table[i].stmofs = 0; + xref->table[i].obj = nil; + xref->table[i].type = 0; + } + + error = pdf_readxrefsections(xref, xref->startxref, buf, bufsize); + if (error) + return fz_rethrow(error, "cannot read xref"); + + /* broken pdfs where first object is not free */ + if (xref->table[0].type != 'f') + { + fz_warn("first object in xref is not free"); + xref->table[0].type = 'f'; + } + + /* broken pdfs where freed objects have offset and gen set to 0 but still exist */ + for (i = 0; i < xref->len; i++) + { + if (xref->table[i].type == 'n' && xref->table[i].ofs == 0 && + xref->table[i].gen == 0 && xref->table[i].obj == nil) + { + fz_warn("object (%d %d R) has invalid offset, assumed missing", i, xref->table[i].gen); + xref->table[i].type = 'f'; + } + } + + return fz_okay; +} + +/* + * Initialize and load xref tables. + * If password is not null, try to decrypt. + */ + +fz_error +pdf_newxref(pdf_xref **xrefp, fz_stream *file, char *password) +{ + pdf_xref *xref; + fz_error error; + fz_obj *encrypt; + fz_obj *id; + + xref = fz_malloc(sizeof(pdf_xref)); + + memset(xref, 0, sizeof(pdf_xref)); + + pdf_logxref("openxref %p\n", xref); + + xref->file = fz_keepstream(file); + + error = pdf_loadxref(xref, xref->scratch, sizeof xref->scratch); + if (error) + { + fz_catch(error, "trying to repair"); + if (xref->table) + { + fz_free(xref->table); + xref->table = NULL; + xref->len = 0; + xref->cap = 0; + } + error = pdf_repairxref(xref, xref->scratch, sizeof xref->scratch); + if (error) + { + pdf_freexref(xref); + return fz_rethrow(error, "cannot repair document"); + } + } + + encrypt = fz_dictgets(xref->trailer, "Encrypt"); + id = fz_dictgets(xref->trailer, "ID"); + if (fz_isdict(encrypt)) + { + error = pdf_newcrypt(&xref->crypt, encrypt, id); + if (error) + { + pdf_freexref(xref); + return fz_rethrow(error, "cannot decrypt document"); + } + } + + if (pdf_needspassword(xref)) + { + /* Only care if we have a password */ + if (password) + { + int okay = pdf_authenticatepassword(xref, password); + if (!okay) + { + pdf_freexref(xref); + return fz_throw("invalid password"); + } + } + } + + *xrefp = xref; + return fz_okay; +} + +void +pdf_freexref(pdf_xref *xref) +{ + int i; + + pdf_logxref("freexref %p\n", xref); + + if (xref->store) + xref->freestore(xref->store); + + if (xref->table) + { + for (i = 0; i < xref->len; i++) + { + if (xref->table[i].obj) { fz_dropobj(xref->table[i].obj); xref->table[i].obj = nil; } } + fz_free(xref->table); + } + + if (xref->pageobjs) + { + for (i = 0; i < xref->pagelen; i++) + fz_dropobj(xref->pageobjs[i]); + fz_free(xref->pageobjs); + } + + if (xref->pagerefs) + { + for (i = 0; i < xref->pagelen; i++) + fz_dropobj(xref->pagerefs[i]); + fz_free(xref->pagerefs); } + + if (xref->file) + fz_dropstream(xref->file); + if (xref->trailer) + fz_dropobj(xref->trailer); + if (xref->crypt) + pdf_freecrypt(xref->crypt); + + fz_free(xref); } void @@ -278,3 +938,36 @@ pdf_loadobject(fz_obj **objp, pdf_xref *xref, int num, int gen) return fz_okay; } +/* + * Convenience function to open a file, create the xref and load the page tree. + * If password is not null, try to decrypt. + */ + +fz_error +pdf_openxref(pdf_xref **xrefp, char *filename, char *password) +{ + fz_error error; + pdf_xref *xref; + fz_stream *file; + int fd; + + fd = open(filename, O_BINARY | O_RDONLY); + if (fd < 0) + return fz_throw("cannot open file '%s': %s", filename, strerror(errno)); + + file = fz_openfile(fd); + error = pdf_newxref(&xref, file, password); + if (error) + return fz_rethrow(error, "cannot load document '%s'", filename); + fz_dropstream(file); + + error = pdf_loadpagetree(xref); + if (error) + { + pdf_freexref(xref); + return fz_rethrow(error, "cannot load page tree"); + } + + *xrefp = xref; + return fz_okay; +} diff --git a/win32/mupdf/mupdf.vcproj b/win32/mupdf/mupdf.vcproj index 385fa63f..4bc82896 100644 --- a/win32/mupdf/mupdf.vcproj +++ b/win32/mupdf/mupdf.vcproj @@ -229,10 +229,6 @@ >
</File>
<File
- RelativePath="..\..\mupdf\pdf_open.c"
- >
- </File>
- <File
RelativePath="..\..\mupdf\pdf_outline.c"
>
</File>
|