summaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authorTor Andersson <tor@ghostscript.com>2004-11-17 08:41:37 +0100
committerTor Andersson <tor@ghostscript.com>2004-11-17 08:41:37 +0100
commitfc5d174cd475c8f2236ea644d63e6b309e41b928 (patch)
tree72dec2aa55a40b448547a2db77055bb02a172bdd /test
parented0f143ca6508817001319c6668602a07fe421dd (diff)
downloadmupdf-fc5d174cd475c8f2236ea644d63e6b309e41b928.tar.xz
text clip mode. split text content streams. text extraction api.
Diffstat (limited to 'test')
-rw-r--r--test/pdfrip.c113
-rw-r--r--test/x11pdf.c35
2 files changed, 33 insertions, 115 deletions
diff --git a/test/pdfrip.c b/test/pdfrip.c
index 0837e969..5d4aba39 100644
--- a/test/pdfrip.c
+++ b/test/pdfrip.c
@@ -2,110 +2,14 @@
#include <mupdf.h>
int showtree = 0;
-int showtext = 0;
float zoom = 1.0;
void usage()
{
- fprintf(stderr, "usage: pdfrip [-dt] [-p password] [-z zoom] file.pdf [pages...]\n");
+ fprintf(stderr, "usage: pdfrip [-d] [-p password] [-z zoom] file.pdf [pages...]\n");
exit(1);
}
-enum
-{
- Bit1 = 7,
- Bitx = 6,
- Bit2 = 5,
- Bit3 = 4,
- Bit4 = 3,
-
- T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */
- Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */
- T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */
- T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */
- T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */
-
- Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0111 1111 */
- Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0111 1111 1111 */
- Rune3 = (1<<(Bit3+2*Bitx))-1, /* 1111 1111 1111 1111 */
-
- Maskx = (1<<Bitx)-1, /* 0011 1111 */
- Testx = Maskx ^ 0xFF, /* 1100 0000 */
-};
-
-void putrune(int c)
-{
- if (c <= Rune1)
- {
- putchar(c);
- return;
- }
-
- if (c <= Rune2)
- {
- putchar(T2 | (c >> 1*Bitx));
- putchar(Tx | (c & Maskx));
- return;
- }
-
- putchar(T3 | (c >> 2*Bitx));
- putchar(Tx | ((c >> 1*Bitx) & Maskx));
- putchar(Tx | (c & Maskx));
-}
-
-/*
- * Dump text nodes as unicode
- */
-void dumptext(fz_node *node)
-{
- int i, cid, ucs;
- static fz_point old = { 0, 0 };
- fz_point p;
- float dx, dy;
- fz_vmtx v;
- fz_hmtx h;
-
- if (fz_istextnode(node))
- {
- fz_textnode *text = (fz_textnode*)node;
- pdf_font *font = (pdf_font*)text->font;
- fz_matrix invtrm = fz_invertmatrix(text->trm);
-
- for (i = 0; i < text->len; i++)
- {
- cid = text->els[i].cid;
- p.x = text->els[i].x;
- p.y = text->els[i].y;
- p = fz_transformpoint(invtrm, p);
- dx = old.x - p.x;
- dy = old.y - p.y;
- old = p;
-
- if (fabs(dy) > 1.6)
- puts("\n");
- else if (fabs(dy) > 0.2)
- putchar('\n');
- else if (fabs(dx) > 0.2)
- putchar(' ');
-
- h = fz_gethmtx(text->font, cid);
- old.x += h.w / 1000.0;
-
- if (font->tounicode)
- ucs = fz_lookupcid(font->tounicode, cid);
- else if (font->ncidtoucs)
- ucs = font->cidtoucs[cid];
- else
- ucs = cid;
-
- putrune(ucs);
- }
- }
-
- for (node = node->first; node; node = node->next)
- dumptext(node);
-}
-
/*
* Draw page
*/
@@ -139,14 +43,6 @@ void showpage(pdf_xref *xref, fz_obj *pageobj)
printf("endtree\n");
}
- if (showtext)
- {
- printf("---begin text dump---\n");
- dumptext(page->tree->root);
- printf("\n---end text dump---\n");
- }
-
- else
{
fz_pixmap *pix;
fz_renderer *gc;
@@ -191,14 +87,13 @@ int main(int argc, char **argv)
char *password = "";
- while ((c = getopt(argc, argv, "dtz:p:")) != -1)
+ while ((c = getopt(argc, argv, "dz:p:")) != -1)
{
switch (c)
{
case 'p': password = optarg; break;
case 'z': zoom = atof(optarg); break;
case 'd': ++showtree; break;
- case 't': ++showtext; break;
default: usage();
}
}
@@ -226,8 +121,8 @@ int main(int argc, char **argv)
if (error) fz_abort(error);
outlines = nil;
- error = pdf_loadoutlinetree(&outlines, xref);
- if (error) { fz_warn(error->msg); fz_droperror(error); }
+// error = pdf_loadoutlinetree(&outlines, xref);
+// if (error) { fz_warn(error->msg); fz_droperror(error); }
if (optind == argc)
{
diff --git a/test/x11pdf.c b/test/x11pdf.c
index 58655f8a..3d424b1c 100644
--- a/test/x11pdf.c
+++ b/test/x11pdf.c
@@ -29,6 +29,9 @@ static int rotate = 0;
static int pageno = 1;
static int count = 0;
+static pdf_page *page = nil;
+static fz_obj *pageobj = nil;
+
static int hist[256];
static int histlen = 0;
@@ -126,16 +129,13 @@ static void xtitle(char *s)
static void showpage(void)
{
fz_error *error;
- pdf_page *page;
fz_matrix ctm;
fz_rect bbox;
+ fz_obj *obj;
char s[256];
- fz_obj *pageobj;
assert(pageno > 0 && pageno <= pdf_getpagecount(pages));
- pageobj = pdf_getpageobject(pages, pageno - 1);
-
XDefineCursor(xdpy, xwin, xcwait);
XFlush(xdpy);
@@ -143,6 +143,14 @@ static void showpage(void)
fz_droppixmap(image);
image = nil;
+ obj = pdf_getpageobject(pages, pageno - 1);
+ if (obj == pageobj)
+ goto Lskipload;
+ pageobj = obj;
+
+ if (page)
+ pdf_droppage(page);
+
sprintf(s, "Loading page %d", pageno);
XSetForeground(xdpy, xgc, BlackPixel(xdpy, xscr));
XDrawString(xdpy, xwin, xgc, 10, 20, s, strlen(s));
@@ -152,6 +160,8 @@ static void showpage(void)
if (error)
fz_abort(error);
+Lskipload:
+
sprintf(s, "Rendering...");
XSetForeground(xdpy, xgc, BlackPixel(xdpy, xscr));
XDrawString(xdpy, xwin, xgc, 10, 30, s, strlen(s));
@@ -168,8 +178,6 @@ static void showpage(void)
if (error)
fz_abort(error);
- pdf_droppage(page);
-
XDefineCursor(xdpy, xwin, xcarrow);
XFlush(xdpy);
@@ -206,6 +214,20 @@ static void pdfopen(char *filename, char *password)
image = nil;
}
+static void dumptext()
+{
+ fz_error *error;
+ pdf_textline *line;
+
+ error = pdf_loadtextfromtree(&line, page->tree);
+ if (error)
+ fz_abort(error);
+
+ pdf_debugtextline(line);
+
+ pdf_droptextline(line);
+}
+
static void handlekey(int c)
{
int oldpage = pageno;
@@ -223,6 +245,7 @@ static void handlekey(int c)
case 'd': fz_debugglyphcache(rast->cache); break;
case 'a': rotate -= 5; break;
case 's': rotate += 5; break;
+ case 'x': dumptext(); break;
case 'b':
pageno--;