diff options
author | Tor Andersson <tor@ghostscript.com> | 2004-11-17 08:41:37 +0100 |
---|---|---|
committer | Tor Andersson <tor@ghostscript.com> | 2004-11-17 08:41:37 +0100 |
commit | fc5d174cd475c8f2236ea644d63e6b309e41b928 (patch) | |
tree | 72dec2aa55a40b448547a2db77055bb02a172bdd /test | |
parent | ed0f143ca6508817001319c6668602a07fe421dd (diff) | |
download | mupdf-fc5d174cd475c8f2236ea644d63e6b309e41b928.tar.xz |
text clip mode. split text content streams. text extraction api.
Diffstat (limited to 'test')
-rw-r--r-- | test/pdfrip.c | 113 | ||||
-rw-r--r-- | test/x11pdf.c | 35 |
2 files changed, 33 insertions, 115 deletions
diff --git a/test/pdfrip.c b/test/pdfrip.c index 0837e969..5d4aba39 100644 --- a/test/pdfrip.c +++ b/test/pdfrip.c @@ -2,110 +2,14 @@ #include <mupdf.h> int showtree = 0; -int showtext = 0; float zoom = 1.0; void usage() { - fprintf(stderr, "usage: pdfrip [-dt] [-p password] [-z zoom] file.pdf [pages...]\n"); + fprintf(stderr, "usage: pdfrip [-d] [-p password] [-z zoom] file.pdf [pages...]\n"); exit(1); } -enum -{ - Bit1 = 7, - Bitx = 6, - Bit2 = 5, - Bit3 = 4, - Bit4 = 3, - - T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */ - Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */ - T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */ - T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */ - T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */ - - Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0111 1111 */ - Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0111 1111 1111 */ - Rune3 = (1<<(Bit3+2*Bitx))-1, /* 1111 1111 1111 1111 */ - - Maskx = (1<<Bitx)-1, /* 0011 1111 */ - Testx = Maskx ^ 0xFF, /* 1100 0000 */ -}; - -void putrune(int c) -{ - if (c <= Rune1) - { - putchar(c); - return; - } - - if (c <= Rune2) - { - putchar(T2 | (c >> 1*Bitx)); - putchar(Tx | (c & Maskx)); - return; - } - - putchar(T3 | (c >> 2*Bitx)); - putchar(Tx | ((c >> 1*Bitx) & Maskx)); - putchar(Tx | (c & Maskx)); -} - -/* - * Dump text nodes as unicode - */ -void dumptext(fz_node *node) -{ - int i, cid, ucs; - static fz_point old = { 0, 0 }; - fz_point p; - float dx, dy; - fz_vmtx v; - fz_hmtx h; - - if (fz_istextnode(node)) - { - fz_textnode *text = (fz_textnode*)node; - pdf_font *font = (pdf_font*)text->font; - fz_matrix invtrm = fz_invertmatrix(text->trm); - - for (i = 0; i < text->len; i++) - { - cid = text->els[i].cid; - p.x = text->els[i].x; - p.y = text->els[i].y; - p = fz_transformpoint(invtrm, p); - dx = old.x - p.x; - dy = old.y - p.y; - old = p; - - if (fabs(dy) > 1.6) - puts("\n"); - else if (fabs(dy) > 0.2) - putchar('\n'); - else if (fabs(dx) > 0.2) - putchar(' '); - - h = fz_gethmtx(text->font, cid); - old.x += h.w / 1000.0; - - if (font->tounicode) - ucs = fz_lookupcid(font->tounicode, cid); - else if (font->ncidtoucs) - ucs = font->cidtoucs[cid]; - else - ucs = cid; - - putrune(ucs); - } - } - - for (node = node->first; node; node = node->next) - dumptext(node); -} - /* * Draw page */ @@ -139,14 +43,6 @@ void showpage(pdf_xref *xref, fz_obj *pageobj) printf("endtree\n"); } - if (showtext) - { - printf("---begin text dump---\n"); - dumptext(page->tree->root); - printf("\n---end text dump---\n"); - } - - else { fz_pixmap *pix; fz_renderer *gc; @@ -191,14 +87,13 @@ int main(int argc, char **argv) char *password = ""; - while ((c = getopt(argc, argv, "dtz:p:")) != -1) + while ((c = getopt(argc, argv, "dz:p:")) != -1) { switch (c) { case 'p': password = optarg; break; case 'z': zoom = atof(optarg); break; case 'd': ++showtree; break; - case 't': ++showtext; break; default: usage(); } } @@ -226,8 +121,8 @@ int main(int argc, char **argv) if (error) fz_abort(error); outlines = nil; - error = pdf_loadoutlinetree(&outlines, xref); - if (error) { fz_warn(error->msg); fz_droperror(error); } +// error = pdf_loadoutlinetree(&outlines, xref); +// if (error) { fz_warn(error->msg); fz_droperror(error); } if (optind == argc) { diff --git a/test/x11pdf.c b/test/x11pdf.c index 58655f8a..3d424b1c 100644 --- a/test/x11pdf.c +++ b/test/x11pdf.c @@ -29,6 +29,9 @@ static int rotate = 0; static int pageno = 1; static int count = 0; +static pdf_page *page = nil; +static fz_obj *pageobj = nil; + static int hist[256]; static int histlen = 0; @@ -126,16 +129,13 @@ static void xtitle(char *s) static void showpage(void) { fz_error *error; - pdf_page *page; fz_matrix ctm; fz_rect bbox; + fz_obj *obj; char s[256]; - fz_obj *pageobj; assert(pageno > 0 && pageno <= pdf_getpagecount(pages)); - pageobj = pdf_getpageobject(pages, pageno - 1); - XDefineCursor(xdpy, xwin, xcwait); XFlush(xdpy); @@ -143,6 +143,14 @@ static void showpage(void) fz_droppixmap(image); image = nil; + obj = pdf_getpageobject(pages, pageno - 1); + if (obj == pageobj) + goto Lskipload; + pageobj = obj; + + if (page) + pdf_droppage(page); + sprintf(s, "Loading page %d", pageno); XSetForeground(xdpy, xgc, BlackPixel(xdpy, xscr)); XDrawString(xdpy, xwin, xgc, 10, 20, s, strlen(s)); @@ -152,6 +160,8 @@ static void showpage(void) if (error) fz_abort(error); +Lskipload: + sprintf(s, "Rendering..."); XSetForeground(xdpy, xgc, BlackPixel(xdpy, xscr)); XDrawString(xdpy, xwin, xgc, 10, 30, s, strlen(s)); @@ -168,8 +178,6 @@ static void showpage(void) if (error) fz_abort(error); - pdf_droppage(page); - XDefineCursor(xdpy, xwin, xcarrow); XFlush(xdpy); @@ -206,6 +214,20 @@ static void pdfopen(char *filename, char *password) image = nil; } +static void dumptext() +{ + fz_error *error; + pdf_textline *line; + + error = pdf_loadtextfromtree(&line, page->tree); + if (error) + fz_abort(error); + + pdf_debugtextline(line); + + pdf_droptextline(line); +} + static void handlekey(int c) { int oldpage = pageno; @@ -223,6 +245,7 @@ static void handlekey(int c) case 'd': fz_debugglyphcache(rast->cache); break; case 'a': rotate -= 5; break; case 's': rotate += 5; break; + case 'x': dumptext(); break; case 'b': pageno--; |