summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Jamfile2
-rw-r--r--TODO20
-rw-r--r--base/rune.c168
-rw-r--r--filter/faxd.c4
-rw-r--r--include/fitz/base.h6
-rw-r--r--include/mupdf/content.h3
-rw-r--r--include/mupdf/page.h21
-rw-r--r--include/mupdf/rsrc.h3
-rw-r--r--mupdf/build.c126
-rw-r--r--mupdf/font.c65
-rw-r--r--mupdf/fontfile.c1
-rw-r--r--mupdf/interpret.c59
-rw-r--r--mupdf/parse.c3
-rw-r--r--mupdf/type3.c53
-rw-r--r--mupdf/unicode.c278
-rw-r--r--render/renderimage.c46
-rw-r--r--test/pdfrip.c113
-rw-r--r--test/x11pdf.c35
18 files changed, 705 insertions, 301 deletions
diff --git a/Jamfile b/Jamfile
index d3240e76..039dcbc0 100644
--- a/Jamfile
+++ b/Jamfile
@@ -18,6 +18,7 @@ Library libfitz :
#util/strlcat.c
# base runtime
+ base/rune.c
base/error.c
base/memory.c
base/md5.c
@@ -113,6 +114,7 @@ Library libmupdf :
mupdf/fontagl.c
mupdf/fontenc.c
mupdf/fontfile.c
+ mupdf/unicode.c
mupdf/font.c
mupdf/type3.c
mupdf/colorspace.c
diff --git a/TODO b/TODO
index 5be19c71..bfeaee87 100644
--- a/TODO
+++ b/TODO
@@ -1,37 +1,36 @@
-colorspace conversions
+colorspace conversions (v2)
- cal*
- iccbased
- how to normalize Lab components to 0..1
- fast color cubes
- how to cache colorspace cubes (what key?)
-image rendering
+image rendering (v2)
- tiles
- dct case
- better filter than box
+ - lazy decoding
shadings
- ... jeong ...
rendering
- - save non-transformed bbox in nodes
- - explicit mask field in fz_renderer .. general cleanup
+ - bbox culling (cache bbox in over node?)
+ - image mask + color case
- merge gka optims
- - optimize! optimize! optimize! (special case 1 and 4 channel cases)
+ - optimize inner rendering loops
+ - optimize image load/decode/scale
+ - special-case optims (1, 2 and 4)
+ - cpu-specific optims
parser
- - text clip mode
- - split content streams (TJ objects)
- resource dict generate fake ids
- try to clean up colorspace/material handling in interpreter
- - tounicode
clean up
- make source ansi c89 / pedantic
- reference count everything
- standard cleanup mechanism
- - naming conventions (fz_new/renew)
-
- design by contract
- split into private and public
- comments and documentation
@@ -39,5 +38,4 @@ clean up
cache
global cache for cmaps and fontfiles (emb+sys)
render cache (link-nodes and scaled images)
- profile font cache (esp with t3 fonts)
diff --git a/base/rune.c b/base/rune.c
new file mode 100644
index 00000000..8b886637
--- /dev/null
+++ b/base/rune.c
@@ -0,0 +1,168 @@
+enum
+{
+ UTFmax = 3, /* maximum bytes per rune */
+ Runesync = 0x80, /* cannot represent part of a UTF sequence (<) */
+ Runeself = 0x80, /* rune and UTF sequences are the same (<) */
+ Runeerror = 0x80 /* decoding error in UTF */
+};
+
+enum
+{
+ Bit1 = 7,
+ Bitx = 6,
+ Bit2 = 5,
+ Bit3 = 4,
+ Bit4 = 3,
+
+ T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */
+ Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */
+ T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */
+ T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */
+ T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */
+
+ Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0111 1111 */
+ Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0111 1111 1111 */
+ Rune3 = (1<<(Bit3+2*Bitx))-1, /* 1111 1111 1111 1111 */
+
+ Maskx = (1<<Bitx)-1, /* 0011 1111 */
+ Testx = Maskx ^ 0xFF, /* 1100 0000 */
+
+ Bad = Runeerror,
+};
+
+int
+chartorune(int *rune, char *str)
+{
+ int c, c1, c2;
+ int l;
+
+ /*
+ * one character sequence
+ * 00000-0007F => T1
+ */
+ c = *(unsigned char*)str;
+ if(c < Tx) {
+ *rune = c;
+ return 1;
+ }
+
+ /*
+ * two character sequence
+ * 0080-07FF => T2 Tx
+ */
+ c1 = *(unsigned char*)(str+1) ^ Tx;
+ if(c1 & Testx)
+ goto bad;
+ if(c < T3) {
+ if(c < T2)
+ goto bad;
+ l = ((c << Bitx) | c1) & Rune2;
+ if(l <= Rune1)
+ goto bad;
+ *rune = l;
+ return 2;
+ }
+
+ /*
+ * three character sequence
+ * 0800-FFFF => T3 Tx Tx
+ */
+ c2 = *(unsigned char*)(str+2) ^ Tx;
+ if(c2 & Testx)
+ goto bad;
+ if(c < T4) {
+ l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
+ if(l <= Rune2)
+ goto bad;
+ *rune = l;
+ return 3;
+ }
+
+ /*
+ * bad decoding
+ */
+bad:
+ *rune = Bad;
+ return 1;
+}
+
+int
+runetochar(char *str, int *rune)
+{
+ int c;
+
+ /*
+ * one character sequence
+ * 00000-0007F => 00-7F
+ */
+ c = *rune;
+ if(c <= Rune1) {
+ str[0] = c;
+ return 1;
+ }
+
+ /*
+ * two character sequence
+ * 0080-07FF => T2 Tx
+ */
+ if(c <= Rune2) {
+ str[0] = T2 | (c >> 1*Bitx);
+ str[1] = Tx | (c & Maskx);
+ return 2;
+ }
+
+ /*
+ * three character sequence
+ * 0800-FFFF => T3 Tx Tx
+ */
+ str[0] = T3 | (c >> 2*Bitx);
+ str[1] = Tx | ((c >> 1*Bitx) & Maskx);
+ str[2] = Tx | (c & Maskx);
+ return 3;
+}
+
+int
+runelen(int c)
+{
+ int rune;
+ char str[10];
+
+ rune = c;
+ return runetochar(str, &rune);
+}
+
+int
+runenlen(int *r, int nrune)
+{
+ int nb, c;
+
+ nb = 0;
+ while(nrune--) {
+ c = *r++;
+ if(c <= Rune1)
+ nb++;
+ else
+ if(c <= Rune2)
+ nb += 2;
+ else
+ nb += 3;
+ }
+ return nb;
+}
+
+int
+fullrune(char *str, int n)
+{
+ int c;
+
+ if(n > 0) {
+ c = *(unsigned char*)str;
+ if(c < Tx)
+ return 1;
+ if(n > 1)
+ if(c < T3 || n > 2)
+ return 1;
+ }
+ return 0;
+}
+
diff --git a/filter/faxd.c b/filter/faxd.c
index 62793dab..d8686a08 100644
--- a/filter/faxd.c
+++ b/filter/faxd.c
@@ -431,6 +431,10 @@ eol:
goto loop;
rtc:
+ i = (32 - fax->bidx) / 8;
+ while (i-- && in->rp > in->bp)
+ in->rp --;
+
out->eof = 1;
return fz_iodone;
}
diff --git a/include/fitz/base.h b/include/fitz/base.h
index da0132d4..562bb1df 100644
--- a/include/fitz/base.h
+++ b/include/fitz/base.h
@@ -21,6 +21,12 @@
#define STRIDE(n, bcp) (((bpc) * (n) + 7) / 8)
+int chartorune(int *rune, char *str);
+int runetochar(char *str, int *rune);
+int runelen(long c);
+int runenlen(int *r, int nrune);
+int fullrune(char *str, int n);
+
typedef struct fz_error_s fz_error;
struct fz_error_s
diff --git a/include/mupdf/content.h b/include/mupdf/content.h
index 6dae61da..f891c3c7 100644
--- a/include/mupdf/content.h
+++ b/include/mupdf/content.h
@@ -66,15 +66,18 @@ struct pdf_csi_s
fz_obj *stack[32];
int top;
int xbalance;
+ fz_obj *array;
/* path object state */
fz_pathnode *path;
fz_pathnode *clip;
/* text object state */
+ fz_node *textclip;
fz_textnode *text;
fz_matrix tlm;
fz_matrix tm;
+ int textmode;
fz_tree *tree;
};
diff --git a/include/mupdf/page.h b/include/mupdf/page.h
index 0dd1001b..3bb27db6 100644
--- a/include/mupdf/page.h
+++ b/include/mupdf/page.h
@@ -7,6 +7,8 @@ typedef struct pdf_outline_s pdf_outline;
typedef struct pdf_nametree_s pdf_nametree;
typedef struct pdf_pagetree_s pdf_pagetree;
typedef struct pdf_page_s pdf_page;
+typedef struct pdf_textline_s pdf_textline;
+typedef struct pdf_textchar_s pdf_textchar;
struct pdf_outlinetree_s
{
@@ -48,6 +50,19 @@ struct pdf_page_s
int rotate;
fz_obj *resources;
fz_tree *tree;
+ pdf_textline *text;
+};
+
+struct pdf_textchar_s
+{
+ int x, y, c;
+};
+
+struct pdf_textline_s
+{
+ int len, cap;
+ pdf_textchar *text;
+ pdf_textline *next;
};
/* outline.c */
@@ -73,3 +88,9 @@ void pdf_droppagetree(pdf_pagetree *pages);
fz_error *pdf_loadpage(pdf_page **pagep, pdf_xref *xref, fz_obj *ref);
void pdf_droppage(pdf_page *page);
+/* unicode.c */
+fz_error *pdf_loadtextfromtree(pdf_textline **linep, fz_tree *tree);
+void pdf_debugtextline(pdf_textline *line);
+fz_error *pdf_newtextline(pdf_textline **linep);
+void pdf_droptextline(pdf_textline *line);
+
diff --git a/include/mupdf/rsrc.h b/include/mupdf/rsrc.h
index bb29dc8a..eaf20fb2 100644
--- a/include/mupdf/rsrc.h
+++ b/include/mupdf/rsrc.h
@@ -161,6 +161,9 @@ fz_error *pdf_loadembeddedcmap(fz_cmap **cmapp, pdf_xref *xref, fz_obj *stmref);
fz_error *pdf_loadsystemcmap(fz_cmap **cmapp, char *name);
fz_error *pdf_makeidentitycmap(fz_cmap **cmapp, int wmode, int bytes);
+/* unicode.c */
+fz_error *pdf_loadtounicode(pdf_font *font, pdf_xref *xref, char **strings, char *collection, fz_obj *cmapstm);
+
/* fontfile.c */
fz_error *pdf_loadbuiltinfont(pdf_font *font, char *basefont);
fz_error *pdf_loadembeddedfont(pdf_font *font, pdf_xref *xref, fz_obj *stmref);
diff --git a/mupdf/build.c b/mupdf/build.c
index 3b602e89..3b65ff6a 100644
--- a/mupdf/build.c
+++ b/mupdf/build.c
@@ -195,6 +195,28 @@ addcolorshape(pdf_gstate *gs, fz_node *shape, fz_colorspace *cs, float *v)
return nil;
}
+static fz_error *
+addinvisibleshape(pdf_gstate *gs, fz_node *shape)
+{
+ fz_error *error;
+ fz_node *mask;
+ fz_pathnode *path;
+
+ error = fz_newmasknode(&mask);
+ if (error) return error;
+
+ error = fz_newpathnode(&path);
+ if (error) return error;
+ error = fz_endpath(path, FZ_FILL, nil, nil);
+ if (error) return error;
+
+ fz_insertnode(mask, (fz_node*)path);
+ fz_insertnode(mask, shape);
+ fz_insertnode(gs->head, mask);
+
+ return nil;
+}
+
static fz_matrix getmatrix(fz_node *node)
{
if (node->parent)
@@ -401,62 +423,6 @@ pdf_showimage(pdf_csi *csi, pdf_image *img)
return nil;
}
-#if 0
-
-BMC ... EMC object nesting can be completely fucked up
-and out of sync with graphics object nesting.
-
-fz_error *
-pdf_beginmarkedcontent(pdf_gstate *gs, fz_node *meta)
-{
- fz_error *error;
- fz_node *over;
-
- error = fz_newovernode(&over);
- if (error) return error;
-
- fz_insertnode(gs->head, meta);
- fz_insertnode(meta, over);
- gs->head = over;
-
-printf("begin mc meta=%p over=%p\n", meta, over);
-{
-fz_node *node = gs->head;
- while (node)
- {
-printf(" node=%p ismeta=%d\n", node, fz_ismetanode(node));
- node = node->parent;
- }
-printf("okay.\n");
-}
-
- return nil;
-}
-
-fz_error *
-pdf_endmarkedcontent(pdf_gstate *gs)
-{
- fz_node *node = gs->head;
-
-printf("end mc\n");
-printf(" node=%p ismeta=%d\n", node, fz_ismetanode(node));
-
- while (node && !fz_ismetanode(node))
- {
-printf(" node=%p ismeta=%d\n", node, fz_ismetanode(node));
- node = node->parent;
- }
-
- if (node == nil)
- return fz_throw("syntaxerror: unbalanced marked content");
-
- gs->head = node->parent;
-
- return nil;
-}
-
-#endif
-
fz_error *
pdf_showpath(pdf_csi *csi,
int doclose, int dofill, int dostroke, int evenodd)
@@ -525,18 +491,40 @@ pdf_flushtext(pdf_csi *csi)
pdf_gstate *gstate = csi->gstate + csi->gtop;
fz_error *error;
- /* invisible */
- if (gstate->render == 3)
- return nil;
-
- else if (gstate->render != 0)
- fz_warn("unimplemented text render mode: %d", gstate->render);
-
if (csi->text)
{
- error = pdf_addfillshape(gstate, (fz_node*)csi->text);
- if (error)
- return error;
+
+ /* invisible */
+ switch (csi->textmode)
+ {
+ case 0: /* fill */
+ case 1: /* stroke */
+ case 2: /* stroke + fill */
+ error = pdf_addfillshape(gstate, (fz_node*)csi->text);
+ if (error)
+ return error;
+ break;
+
+ case 3: /* invisible */
+ error = addinvisibleshape(gstate, (fz_node*)csi->text);
+ if (error)
+ return error;
+ break;
+
+ case 4: /* fill + clip */
+ case 5: /* stroke + clip */
+ case 6: /* stroke + fill + clip */
+ case 7: /* invisible clip */
+ if (!csi->textclip)
+ {
+ error = fz_newovernode(&csi->textclip);
+ if (error)
+ return error;
+ }
+ fz_insertnode(csi->textclip, (fz_node*)csi->text);
+ break;
+ }
+
csi->text = nil;
}
@@ -570,13 +558,14 @@ showglyph(pdf_csi *csi, int cid)
trm = fz_concat(tsm, csi->tm);
- /* flush buffered text if face or matrix has changed */
+ /* flush buffered text if face or matrix or rendermode has changed */
if (!csi->text ||
((fz_font*)font) != csi->text->font ||
fabs(trm.a - csi->text->trm.a) > FLT_EPSILON ||
fabs(trm.b - csi->text->trm.b) > FLT_EPSILON ||
fabs(trm.c - csi->text->trm.c) > FLT_EPSILON ||
- fabs(trm.d - csi->text->trm.d) > FLT_EPSILON)
+ fabs(trm.d - csi->text->trm.d) > FLT_EPSILON ||
+ gstate->render != csi->textmode)
{
error = pdf_flushtext(csi);
if (error) return error;
@@ -587,6 +576,7 @@ showglyph(pdf_csi *csi, int cid)
csi->text->trm = trm;
csi->text->trm.e = 0;
csi->text->trm.f = 0;
+ csi->textmode = gstate->render;
}
/* add glyph to textobject */
diff --git a/mupdf/font.c b/mupdf/font.c
index d9683596..5f4f5046 100644
--- a/mupdf/font.c
+++ b/mupdf/font.c
@@ -1,11 +1,6 @@
#include <fitz.h>
#include <mupdf.h>
-/*
- * TODO: substitution fonts when no exact match is found.
- * base on a) cid system info and b) fontdescriptor flags
- */
-
#include <ft2build.h>
#include FT_FREETYPE_H
#include <freetype/internal/ftobjs.h>
@@ -241,9 +236,7 @@ loadsimplefont(pdf_font **fontp, pdf_xref *xref, fz_obj *dict)
fz_obj *descriptor = nil;
fz_obj *encoding = nil;
fz_obj *widths = nil;
- fz_obj *tounicode = nil;
unsigned short *etable = nil;
- unsigned short *utable = nil;
pdf_font *font;
FT_Face face;
FT_CharMap cmap;
@@ -441,29 +434,11 @@ printf(" builtin encoding\n");
font->ncidtogid = 256;
font->cidtogid = etable;
- /*
- * ToUnicode
- */
-
- utable = fz_malloc(sizeof(unsigned short) * 256);
- if (!utable)
+ error = pdf_loadtounicode(font, xref,
+ estrings, nil, fz_dictgets(dict, "ToUnicode"));
+ if (error)
goto cleanup;
- for (i = 0; i < 256; i++)
- if (estrings[i])
- utable[i] = pdf_lookupagl(estrings[i]);
- else
- utable[i] = i;
-
- tounicode = fz_dictgets(dict, "ToUnicode");
- if (fz_isindirect(tounicode))
- {
-printf(" load tounicode cmap for simple font\n");
- }
-
- font->ncidtoucs = 256;
- font->cidtoucs = utable;
-
/*
* Widths
*/
@@ -520,7 +495,6 @@ printf("\n");
return nil;
cleanup:
- fz_free(utable);
fz_free(etable);
if (widths)
fz_dropobj(widths);
@@ -534,7 +508,7 @@ cleanup:
*/
static fz_error *
-loadcidfont(pdf_font **fontp, pdf_xref *xref, fz_obj *dict, fz_obj *encoding)
+loadcidfont(pdf_font **fontp, pdf_xref *xref, fz_obj *dict, fz_obj *encoding, fz_obj *tounicode)
{
fz_error *error;
fz_obj *widths = nil;
@@ -678,28 +652,7 @@ printf(" cidtogidmap %d\n", len / 2);
/* win: 3,4 3,6 3,3 3,2 3,1 3,5 */
}
- /*
- * ToUnicode
- */
-
- if (fz_dictgets(dict, "ToUnicode"))
- printf(" load tounicode for cid-font");
-
- if (!strcmp(collection, "Adobe-CNS1"))
- error = pdf_loadsystemcmap(&font->tounicode, "Adobe-CNS1-UCS2");
- else if (!strcmp(collection, "Adobe-GB1"))
- error = pdf_loadsystemcmap(&font->tounicode, "Adobe-GB1-UCS2");
- else if (!strcmp(collection, "Adobe-Japan1"))
- error = pdf_loadsystemcmap(&font->tounicode, "Adobe-Japan1-UCS2");
- else if (!strcmp(collection, "Adobe-Japan2"))
- error = pdf_loadsystemcmap(&font->tounicode, "Adobe-Japan2-UCS2");
- else if (!strcmp(collection, "Adobe-Korea1"))
- error = pdf_loadsystemcmap(&font->tounicode, "Adobe-Korea1-UCS2");
- else
- {
- printf(" unknown character collection\n");
- error = nil;
- }
+ error = pdf_loadtounicode(font, xref, nil, collection, tounicode);
if (error)
goto cleanup;
@@ -840,6 +793,7 @@ loadtype0(pdf_font **fontp, pdf_xref *xref, fz_obj *dict)
fz_obj *dfont;
fz_obj *subtype;
fz_obj *encoding;
+ fz_obj *tounicode;
dfonts = fz_dictgets(dict, "DescendantFonts");
error = pdf_resolve(&dfonts, xref);
@@ -851,13 +805,14 @@ loadtype0(pdf_font **fontp, pdf_xref *xref, fz_obj *dict)
if (error)
return fz_dropobj(dfonts), error;
- encoding = fz_dictgets(dict, "Encoding");
subtype = fz_dictgets(dfont, "Subtype");
+ encoding = fz_dictgets(dict, "Encoding");
+ tounicode = fz_dictgets(dict, "ToUnicode");
if (!strcmp(fz_toname(subtype), "CIDFontType0"))
- error = loadcidfont(fontp, xref, dfont, encoding);
+ error = loadcidfont(fontp, xref, dfont, encoding, tounicode);
else if (!strcmp(fz_toname(subtype), "CIDFontType2"))
- error = loadcidfont(fontp, xref, dfont, encoding);
+ error = loadcidfont(fontp, xref, dfont, encoding, tounicode);
else
error = fz_throw("syntaxerror: unknown cid font type");
diff --git a/mupdf/fontfile.c b/mupdf/fontfile.c
index a17bbfdf..6c525b74 100644
--- a/mupdf/fontfile.c
+++ b/mupdf/fontfile.c
@@ -5,7 +5,6 @@
#include <ft2build.h>
#include FT_FREETYPE_H
-#include <fontconfig/fontconfig.h>
static FT_Library ftlib = nil;
diff --git a/mupdf/interpret.c b/mupdf/interpret.c
index e377c5b6..5e0e12a9 100644
--- a/mupdf/interpret.c
+++ b/mupdf/interpret.c
@@ -16,7 +16,7 @@ pdf_newcsi(pdf_csi **csip, int maskonly)
csi->gtop = 0;
csi->top = 0;
-
+ csi->array = nil;
csi->xbalance = 0;
error = fz_newpathnode(&csi->path);
@@ -44,6 +44,8 @@ pdf_newcsi(pdf_csi **csip, int maskonly)
csi->clip = nil;
+ csi->textclip = nil;
+ csi->textmode = 0;
csi->text = nil;
csi->tm = fz_identity();
csi->tlm = fz_identity();
@@ -65,7 +67,9 @@ pdf_dropcsi(pdf_csi *csi)
{
if (csi->path) fz_dropnode((fz_node*)csi->path);
if (csi->clip) fz_dropnode((fz_node*)csi->clip);
+ if (csi->textclip) fz_dropnode((fz_node*)csi->textclip);
if (csi->text) fz_dropnode((fz_node*)csi->text);
+ if (csi->array) fz_dropobj(csi->array);
clearstack(csi);
fz_free(csi);
}
@@ -145,7 +149,7 @@ runinlineimage(pdf_csi *csi, pdf_xref *xref, fz_file *file, fz_obj *dict)
token = pdf_lex(file, buf, sizeof buf, &len);
if (token != PDF_TKEYWORD || strcmp("EI", buf))
- return fz_throw("syntaxerror: corrupt inline image");
+ fz_warn("syntaxerror: corrupt inline image");
error = pdf_showimage(csi, img);
if (error)
@@ -550,9 +554,17 @@ Lsetcolor:
{
if (csi->top != 0)
goto syntaxerror;
+
error = pdf_flushtext(csi);
if (error)
return error;
+
+ if (csi->textclip)
+ {
+ error = pdf_addclipmask(gstate, csi->textclip);
+ if (error) return error;
+ csi->textclip = nil;
+ }
}
else if (!strcmp(buf, "Tc"))
@@ -1003,6 +1015,7 @@ pdf_runcsi(pdf_csi *csi, pdf_xref *xref, fz_obj *rdb, fz_file *file)
fz_error *error;
char buf[65536];
int token, len;
+ fz_obj *obj;
while (1)
{
@@ -1011,19 +1024,53 @@ pdf_runcsi(pdf_csi *csi, pdf_xref *xref, fz_obj *rdb, fz_file *file)
token = pdf_lex(file, buf, sizeof buf, &len);
- switch (token)
+ if (csi->array)
+ {
+ if (token == PDF_TCARRAY)
+ {
+ csi->stack[csi->top] = csi->array;
+ csi->array = nil;
+ csi->top ++;
+ }
+ else if (token == PDF_TINT || token == PDF_TREAL)
+ {
+ error = fz_newreal(&obj, atof(buf));
+ if (error) return error;
+ error = fz_arraypush(csi->array, obj);
+ fz_dropobj(obj);
+ if (error) return error;
+ }
+ else if (token == PDF_TSTRING)
+ {
+ error = fz_newstring(&obj, buf, len);
+ if (error) return error;
+ error = fz_arraypush(csi->array, obj);
+ fz_dropobj(obj);
+ if (error) return error;
+ }
+ else if (token == PDF_TEOF)
+ {
+ return nil;
+ }
+ else
+ {
+ clearstack(csi);
+ return fz_throw("syntaxerror in content stream");
+ }
+ }
+
+ else switch (token)
{
case PDF_TEOF:
return nil;
- /* FIXME: need to make array parsing be able to span files for
+ /* we need to make array parsing be able to span files for
those stupid pdf files that split TJ arrays across content
streams...
*/
case PDF_TOARRAY:
- error = pdf_parsearray(&csi->stack[csi->top], file, buf, sizeof buf);
+ error = fz_newarray(&csi->array, 8);
if (error) return error;
- csi->top ++;
break;
/* drop down to normal pdf object parsing for dictionaries,
diff --git a/mupdf/parse.c b/mupdf/parse.c
index 2eb3d234..06ae3954 100644
--- a/mupdf/parse.c
+++ b/mupdf/parse.c
@@ -147,7 +147,8 @@ skip:
case PDF_TINT:
a = atoi(buf);
tok = pdf_lex(file, buf, cap, &len);
- if (tok == PDF_TCDICT || tok == PDF_TNAME)
+ if (tok == PDF_TCDICT || tok == PDF_TNAME ||
+ (tok == PDF_TKEYWORD && !strcmp(buf, "ID")))
{
error = fz_newint(&val, a);
if (error) goto cleanup;
diff --git a/mupdf/type3.c b/mupdf/type3.c
index 60d7c38c..81f22086 100644
--- a/mupdf/type3.c
+++ b/mupdf/type3.c
@@ -103,7 +103,6 @@ pdf_loadtype3font(pdf_font **fontp, pdf_xref *xref, fz_obj *dict)
{
fz_error *error;
char buf[256];
- unsigned short *utable;
char *estrings[256];
pdf_font *font;
fz_obj *encoding;
@@ -202,27 +201,10 @@ printf(" matrix [%g %g %g %g %g %g]\n",
if (error)
goto cleanup;
- /*
- * ToUnicode
- */
-
- utable = fz_malloc(sizeof(unsigned short) * 256);
- if (!utable)
- goto cleanup;
-
- for (i = 0; i < 256; i++)
- if (estrings[i])
- utable[i] = pdf_lookupagl(estrings[i]);
- else
- utable[i] = i;
-
- if (fz_dictgets(dict, "ToUnicode"))
- {
-printf(" load tounicode cmap for type3 font\n");
- }
-
- font->ncidtoucs = 256;
- font->cidtoucs = utable;
+ error = pdf_loadtounicode(font, xref,
+ estrings, nil, fz_dictgets(dict, "ToUnicode"));
+ if (error)
+ goto cleanup;
/*
* Widths
@@ -264,22 +246,22 @@ printf(" load tounicode cmap for type3 font\n");
* Resources
*/
- obj = fz_dictgets(dict, "Resources");
- if (!obj) {
- error = fz_throw("syntaxerror: Type3 font missing Resources");
- goto cleanup;
- }
+ resources = nil;
- error = pdf_resolve(&obj, xref);
- if (error)
- goto cleanup;
+ obj = fz_dictgets(dict, "Resources");
+ if (obj)
+ {
+ error = pdf_resolve(&obj, xref);
+ if (error)
+ goto cleanup;
- error = pdf_loadresources(&resources, xref, obj);
+ error = pdf_loadresources(&resources, xref, obj);
- fz_dropobj(obj);
+ fz_dropobj(obj);
- if (error)
- goto cleanup;
+ if (error)
+ goto cleanup;
+ }
/*
* CharProcs
@@ -311,7 +293,8 @@ printf(" load tounicode cmap for type3 font\n");
}
fz_dropobj(charprocs);
- fz_dropobj(resources);
+ if (resources)
+ fz_dropobj(resources);
*fontp = font;
return nil;
diff --git a/mupdf/unicode.c b/mupdf/unicode.c
new file mode 100644
index 00000000..42fba1c0
--- /dev/null
+++ b/mupdf/unicode.c
@@ -0,0 +1,278 @@
+#include <fitz.h>
+#include <mupdf.h>
+
+/*
+ * ToUnicode map for fonts
+ */
+
+fz_error *
+pdf_loadtounicode(pdf_font *font, pdf_xref *xref,
+ char **strings, char *collection, fz_obj *cmapstm)
+{
+ fz_error *error;
+ fz_cmap *cmap;
+ int cid;
+ int ucs;
+ int i;
+
+ if (fz_isindirect(cmapstm))
+ {
+ error = pdf_loadembeddedcmap(&cmap, xref, cmapstm);
+ if (error)
+ return error;
+
+ error = fz_newcmap(&font->tounicode);
+ if (error)
+ goto cleanup;
+
+ for (i = 0; i < (strings ? 256 : 65536); i++)
+ {
+ cid = fz_lookupcid(font->encoding, i);
+ if (cid > 0)
+ {
+ ucs = fz_lookupcid(cmap, i);
+ error = fz_addcidrange(font->tounicode, cid, cid, ucs);
+ if (error)
+ goto cleanup;
+ }
+ }
+
+ error = fz_endcidrange(font->tounicode);
+ if (error)
+ goto cleanup;
+
+ cleanup:
+ fz_dropcmap(cmap);
+ return error;
+ }
+
+ if (collection)
+ {
+ if (!strcmp(collection, "Adobe-CNS1"))
+ return pdf_loadsystemcmap(&font->tounicode, "Adobe-CNS1-UCS2");
+ else if (!strcmp(collection, "Adobe-GB1"))
+ return pdf_loadsystemcmap(&font->tounicode, "Adobe-GB1-UCS2");
+ else if (!strcmp(collection, "Adobe-Japan1"))
+ return pdf_loadsystemcmap(&font->tounicode, "Adobe-Japan1-UCS2");
+ else if (!strcmp(collection, "Adobe-Japan2"))
+ return pdf_loadsystemcmap(&font->tounicode, "Adobe-Japan2-UCS2");
+ else if (!strcmp(collection, "Adobe-Korea1"))
+ return pdf_loadsystemcmap(&font->tounicode, "Adobe-Korea1-UCS2");
+ }
+
+ if (strings)
+ {
+ font->ncidtoucs = 256;
+ font->cidtoucs = fz_malloc(256 * sizeof(unsigned short));
+ if (!font->cidtoucs)
+ return fz_outofmem;
+
+ for (i = 0; i < 256; i++)
+ {
+ if (strings[i])
+ font->cidtoucs[i] = pdf_lookupagl(strings[i]);
+ else
+ font->cidtoucs[i] = 0;
+ }
+
+ return nil;
+ }
+
+ fz_warn("font: cannot create unicode conversion\n", collection);
+ return nil;
+}
+
+/*
+ * Extract lines of text from display tree
+ */
+
+fz_error *
+pdf_newtextline(pdf_textline **linep)
+{
+ pdf_textline *line;
+ line = *linep = fz_malloc(sizeof(pdf_textline));
+ if (!line)
+ return fz_outofmem;
+ line->len = 0;
+ line->cap = 0;
+ line->text = nil;
+ line->next = nil;
+ return nil;
+}
+
+void
+pdf_droptextline(pdf_textline *line)
+{
+ if (line->next)
+ pdf_droptextline(line->next);
+ fz_free(line->text);
+ fz_free(line);
+}
+
+static fz_error *
+addtextchar(pdf_textline *line, int x, int y, int c)
+{
+ pdf_textchar *newtext;
+ int newcap;
+
+ if (line->len + 1 >= line->cap)
+ {
+ newcap = line->cap ? line->cap * 2 : 80;
+ newtext = fz_realloc(line->text, sizeof(pdf_textchar) * newcap);
+ if (!newtext)
+ return fz_outofmem;
+ line->cap = newcap;
+ line->text = newtext;
+ }
+
+ line->text[line->len].x = x;
+ line->text[line->len].y = y;
+ line->text[line->len].c = c;
+ line->len ++;
+
+ return nil;
+}
+
+/* XXX global! not reentrant! */
+static fz_point oldpt = { 0, 0 };
+
+static fz_error *
+findtext(pdf_textline **line, fz_node *node, fz_matrix ctm)
+{
+ fz_error *error;
+
+ if (fz_istextnode(node))
+ {
+ fz_textnode *text = (fz_textnode*)node;
+ pdf_font *font = (pdf_font*)text->font;
+ fz_matrix inv = fz_invertmatrix(text->trm);
+ fz_matrix trm = fz_concat(text->trm, ctm);
+ float dx, dy, t;
+ fz_point p;
+ fz_vmtx v;
+ fz_hmtx h;
+ int i, g, x, y;
+ int c;
+
+ for (i = 0; i < text->len; i++)
+ {
+ g = text->els[i].cid;
+
+ p.x = text->els[i].x;
+ p.y = text->els[i].y;
+ p = fz_transformpoint(trm, p);
+ x = p.x;
+ y = p.y;
+
+ p.x = text->els[i].x;
+ p.y = text->els[i].y;
+ p = fz_transformpoint(inv, p);
+ dx = oldpt.x - p.x;
+ dy = oldpt.y - p.y;
+ oldpt = p;
+
+ if (text->font->wmode == 0)
+ {
+ h = fz_gethmtx(text->font, g);
+ oldpt.x += h.w * 0.001;
+ }
+ else
+ {
+ v = fz_getvmtx(text->font, g);
+ oldpt.y += v.w;
+ t = dy; dy = dx; dx = t;
+ }
+
+ if (fabs(dy) > 0.2)
+ {
+ pdf_textline *newline;
+ error = pdf_newtextline(&newline);
+ if (error)
+ return error;
+ (*line)->next = newline;
+ *line = newline;
+ }
+ else if (fabs(dx) > 0.2)
+ {
+ error = addtextchar(*line, x, y, ' ');
+ if (error)
+ return error;
+ }
+
+ if (font->tounicode)
+ c = fz_lookupcid(font->tounicode, g);
+ else if (g < font->ncidtoucs)
+ c = font->cidtoucs[g];
+ else
+ c = g;
+
+ error = addtextchar(*line, x, y, c);
+ if (error)
+ return error;
+ }
+ }
+
+ if (fz_istransformnode(node))
+ ctm = fz_concat(((fz_transformnode*)node)->m, ctm);
+
+ for (node = node->first; node; node = node->next)
+ {
+ error = findtext(line, node, ctm);
+ if (error)
+ return error;
+ }
+
+ return nil;
+}
+
+fz_error *
+pdf_loadtextfromtree(pdf_textline **outp, fz_tree *tree)
+{
+ pdf_textline *root;
+ pdf_textline *line;
+ fz_error *error;
+
+ oldpt.x = -1;
+ oldpt.y = -1;
+
+ error = pdf_newtextline(&root);
+ if (error)
+ return error;
+
+ line = root;
+
+ error = findtext(&line, tree->root, fz_identity());
+ if (error)
+ {
+ pdf_droptextline(root);
+ return error;
+ }
+
+ *outp = root;
+ return nil;
+}
+
+void
+pdf_debugtextline(pdf_textline *line)
+{
+ char buf[10];
+ int c, n, k, i;
+
+ for (i = 0; i < line->len; i++)
+ {
+ c = line->text[i].c;
+ if (c < 128)
+ putchar(c);
+ else
+ {
+ n = runetochar(buf, &c);
+ for (k = 0; k < n; k++)
+ putchar(buf[k]);
+ }
+ }
+ putchar('\n');
+
+ if (line->next)
+ pdf_debugtextline(line->next);
+}
+
diff --git a/render/renderimage.c b/render/renderimage.c
index a94aa66c..733261aa 100644
--- a/render/renderimage.c
+++ b/render/renderimage.c
@@ -1,9 +1,5 @@
#include <fitz.h>
-#define GAMMA 1.8
-
-void fz_gammapixmap(fz_pixmap *pix, float gamma);
-
#define LERP(a,b,t) (a + (((b - a) * t) >> 16))
static inline int getcomp(fz_pixmap *pix, int u, int v, int k)
@@ -100,6 +96,30 @@ overscanrgb(fz_matrix *invmat, fz_pixmap *dst, fz_pixmap *src, int y, int x0, in
}
}
+static inline void
+overscanmask(fz_matrix *invmat, fz_pixmap *dst, fz_pixmap *src, int y, int x0, int x1)
+{
+ int x;
+
+ int u = (invmat->a * (x0+0.5) + invmat->c * (y+0.5) + invmat->e) * 65536;
+ int v = (invmat->b * (x0+0.5) + invmat->d * (y+0.5) + invmat->f) * 65536;
+ int du = invmat->a * 65536;
+ int dv = invmat->b * 65536;
+
+ u -= 0.5 * 65536;
+ v -= 0.5 * 65536;
+
+ for (x = x0; x <= x1; x++)
+ {
+ int sa = sampleimage(src, u, v, 0);
+ int da = dst->samples[ (y-dst->y) * dst->w + x-dst->x ];
+ da = sa + fz_mul255(da, 255 - sa);
+ dst->samples[ (y-dst->y) * dst->w + x-dst->x ] = da;
+ u += du;
+ v += dv;
+ }
+}
+
static fz_error *
drawtile(fz_renderer *gc, fz_pixmap *out, fz_pixmap *tile, fz_matrix ctm, int over)
{
@@ -135,6 +155,8 @@ drawtile(fz_renderer *gc, fz_pixmap *out, fz_pixmap *tile, fz_matrix ctm, int ov
{
if (over && tile->n == 4)
overscanrgb(&invmat, out, tile, y, x0, x1);
+ else if (over && tile->n == 1)
+ overscanmask(&invmat, out, tile, y, x0, x1);
else
drawscan(&invmat, out, tile, y, x0, x1);
}
@@ -180,9 +202,7 @@ printf(" load tile %d x %d\n", w, h);
if (dx != 1 || dy != 1)
{
printf(" scale tile 1/%d x 1/%d\n", dx, dy);
-/* fz_gammapixmap(tile1, 1.0 / GAMMA); */
error = fz_scalepixmap(&tile2, tile1, dx, dy);
-/* fz_gammapixmap(tile2, GAMMA); */
fz_droppixmap(tile1);
}
else
@@ -198,10 +218,18 @@ printf(" scale tile 1/%d x 1/%d\n", dx, dy);
/* render image mask */
if (n == 0 && a == 1)
{
+ if (gc->acc && !gc->model)
+ {
+printf(" draw image mask over\n");
+ error = drawtile(gc, gc->acc, tile2, ctm, 1);
+ }
+ else
+ {
printf(" draw image mask\n");
- error = fz_newpixmap(&gc->tmp, r.min.x, r.min.y, r.max.x - r.min.x, r.max.y - r.min.y, 1);
- fz_clearpixmap(gc->tmp);
- error = drawtile(gc, gc->tmp, tile2, ctm, 0);
+ error = fz_newpixmap(&gc->tmp, r.min.x, r.min.y, r.max.x - r.min.x, r.max.y - r.min.y, 1);
+ fz_clearpixmap(gc->tmp);
+ error = drawtile(gc, gc->tmp, tile2, ctm, 0);
+ }
}
/* render rgb over */
diff --git a/test/pdfrip.c b/test/pdfrip.c
index 0837e969..5d4aba39 100644
--- a/test/pdfrip.c
+++ b/test/pdfrip.c
@@ -2,110 +2,14 @@
#include <mupdf.h>
int showtree = 0;
-int showtext = 0;
float zoom = 1.0;
void usage()
{
- fprintf(stderr, "usage: pdfrip [-dt] [-p password] [-z zoom] file.pdf [pages...]\n");
+ fprintf(stderr, "usage: pdfrip [-d] [-p password] [-z zoom] file.pdf [pages...]\n");
exit(1);
}
-enum
-{
- Bit1 = 7,
- Bitx = 6,
- Bit2 = 5,
- Bit3 = 4,
- Bit4 = 3,
-
- T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */
- Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */
- T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */
- T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */
- T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */
-
- Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0111 1111 */
- Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0111 1111 1111 */
- Rune3 = (1<<(Bit3+2*Bitx))-1, /* 1111 1111 1111 1111 */
-
- Maskx = (1<<Bitx)-1, /* 0011 1111 */
- Testx = Maskx ^ 0xFF, /* 1100 0000 */
-};
-
-void putrune(int c)
-{
- if (c <= Rune1)
- {
- putchar(c);
- return;
- }
-
- if (c <= Rune2)
- {
- putchar(T2 | (c >> 1*Bitx));
- putchar(Tx | (c & Maskx));
- return;
- }
-
- putchar(T3 | (c >> 2*Bitx));
- putchar(Tx | ((c >> 1*Bitx) & Maskx));
- putchar(Tx | (c & Maskx));
-}
-
-/*
- * Dump text nodes as unicode
- */
-void dumptext(fz_node *node)
-{
- int i, cid, ucs;
- static fz_point old = { 0, 0 };
- fz_point p;
- float dx, dy;
- fz_vmtx v;
- fz_hmtx h;
-
- if (fz_istextnode(node))
- {
- fz_textnode *text = (fz_textnode*)node;
- pdf_font *font = (pdf_font*)text->font;
- fz_matrix invtrm = fz_invertmatrix(text->trm);
-
- for (i = 0; i < text->len; i++)
- {
- cid = text->els[i].cid;
- p.x = text->els[i].x;
- p.y = text->els[i].y;
- p = fz_transformpoint(invtrm, p);
- dx = old.x - p.x;
- dy = old.y - p.y;
- old = p;
-
- if (fabs(dy) > 1.6)
- puts("\n");
- else if (fabs(dy) > 0.2)
- putchar('\n');
- else if (fabs(dx) > 0.2)
- putchar(' ');
-
- h = fz_gethmtx(text->font, cid);
- old.x += h.w / 1000.0;
-
- if (font->tounicode)
- ucs = fz_lookupcid(font->tounicode, cid);
- else if (font->ncidtoucs)
- ucs = font->cidtoucs[cid];
- else
- ucs = cid;
-
- putrune(ucs);
- }
- }
-
- for (node = node->first; node; node = node->next)
- dumptext(node);
-}
-
/*
* Draw page
*/
@@ -139,14 +43,6 @@ void showpage(pdf_xref *xref, fz_obj *pageobj)
printf("endtree\n");
}
- if (showtext)
- {
- printf("---begin text dump---\n");
- dumptext(page->tree->root);
- printf("\n---end text dump---\n");
- }
-
- else
{
fz_pixmap *pix;
fz_renderer *gc;
@@ -191,14 +87,13 @@ int main(int argc, char **argv)
char *password = "";
- while ((c = getopt(argc, argv, "dtz:p:")) != -1)
+ while ((c = getopt(argc, argv, "dz:p:")) != -1)
{
switch (c)
{
case 'p': password = optarg; break;
case 'z': zoom = atof(optarg); break;
case 'd': ++showtree; break;
- case 't': ++showtext; break;
default: usage();
}
}
@@ -226,8 +121,8 @@ int main(int argc, char **argv)
if (error) fz_abort(error);
outlines = nil;
- error = pdf_loadoutlinetree(&outlines, xref);
- if (error) { fz_warn(error->msg); fz_droperror(error); }
+// error = pdf_loadoutlinetree(&outlines, xref);
+// if (error) { fz_warn(error->msg); fz_droperror(error); }
if (optind == argc)
{
diff --git a/test/x11pdf.c b/test/x11pdf.c
index 58655f8a..3d424b1c 100644
--- a/test/x11pdf.c
+++ b/test/x11pdf.c
@@ -29,6 +29,9 @@ static int rotate = 0;
static int pageno = 1;
static int count = 0;
+static pdf_page *page = nil;
+static fz_obj *pageobj = nil;
+
static int hist[256];
static int histlen = 0;
@@ -126,16 +129,13 @@ static void xtitle(char *s)
static void showpage(void)
{
fz_error *error;
- pdf_page *page;
fz_matrix ctm;
fz_rect bbox;
+ fz_obj *obj;
char s[256];
- fz_obj *pageobj;
assert(pageno > 0 && pageno <= pdf_getpagecount(pages));
- pageobj = pdf_getpageobject(pages, pageno - 1);
-
XDefineCursor(xdpy, xwin, xcwait);
XFlush(xdpy);
@@ -143,6 +143,14 @@ static void showpage(void)
fz_droppixmap(image);
image = nil;
+ obj = pdf_getpageobject(pages, pageno - 1);
+ if (obj == pageobj)
+ goto Lskipload;
+ pageobj = obj;
+
+ if (page)
+ pdf_droppage(page);
+
sprintf(s, "Loading page %d", pageno);
XSetForeground(xdpy, xgc, BlackPixel(xdpy, xscr));
XDrawString(xdpy, xwin, xgc, 10, 20, s, strlen(s));
@@ -152,6 +160,8 @@ static void showpage(void)
if (error)
fz_abort(error);
+Lskipload:
+
sprintf(s, "Rendering...");
XSetForeground(xdpy, xgc, BlackPixel(xdpy, xscr));
XDrawString(xdpy, xwin, xgc, 10, 30, s, strlen(s));
@@ -168,8 +178,6 @@ static void showpage(void)
if (error)
fz_abort(error);
- pdf_droppage(page);
-
XDefineCursor(xdpy, xwin, xcarrow);
XFlush(xdpy);
@@ -206,6 +214,20 @@ static void pdfopen(char *filename, char *password)
image = nil;
}
+static void dumptext()
+{
+ fz_error *error;
+ pdf_textline *line;
+
+ error = pdf_loadtextfromtree(&line, page->tree);
+ if (error)
+ fz_abort(error);
+
+ pdf_debugtextline(line);
+
+ pdf_droptextline(line);
+}
+
static void handlekey(int c)
{
int oldpage = pageno;
@@ -223,6 +245,7 @@ static void handlekey(int c)
case 'd': fz_debugglyphcache(rast->cache); break;
case 'a': rotate -= 5; break;
case 's': rotate += 5; break;
+ case 'x': dumptext(); break;
case 'b':
pageno--;