diff options
-rw-r--r-- | mupdf/mupdf.h | 8 | ||||
-rw-r--r-- | mupdf/pdf_build.c | 55 | ||||
-rw-r--r-- | mupdf/pdf_interpret.c | 84 |
3 files changed, 86 insertions, 61 deletions
diff --git a/mupdf/mupdf.h b/mupdf/mupdf.h index c5fa9ca9..da04d32e 100644 --- a/mupdf/mupdf.h +++ b/mupdf/mupdf.h @@ -599,13 +599,17 @@ struct pdf_csi_s fz_device *dev; pdf_xref *xref; - fz_obj *obj, *array; + fz_obj *obj; char name[64]; + unsigned char string[256]; + int stringlen; float stack[32]; int istack[32]; int top; int xbalance; + int intext; + int inarray; /* path object state */ fz_path *path; @@ -632,6 +636,8 @@ void pdf_setcolor(pdf_csi *csi, int what, float *v); void pdf_setpattern(pdf_csi *csi, int what, pdf_pattern *pat, float *v); void pdf_setshade(pdf_csi *csi, int what, fz_shade *shade); void pdf_showpath(pdf_csi*, int close, int fill, int stroke, int evenodd); +void pdf_showspace(pdf_csi *csi, float tadj); +void pdf_showstring(pdf_csi *csi, unsigned char *buf, int len); void pdf_showtext(pdf_csi*, fz_obj *text); void pdf_flushtext(pdf_csi*); void pdf_showimage(pdf_csi*, fz_pixmap *image); diff --git a/mupdf/pdf_build.c b/mupdf/pdf_build.c index 739ec976..3c9f21d1 100644 --- a/mupdf/pdf_build.c +++ b/mupdf/pdf_build.c @@ -629,11 +629,18 @@ pdf_showglyph(pdf_csi *csi, int cid) } } -static void +void pdf_showspace(pdf_csi *csi, float tadj) { pdf_gstate *gstate = csi->gstate + csi->gtop; pdf_fontdesc *fontdesc = gstate->font; + + if (!fontdesc) + { + fz_warn("cannot draw text since font and size not set"); + return; + } + if (fontdesc->wmode == 0) csi->tm = fz_concat(fz_translate(tadj * gstate->scale, 0), csi->tm); else @@ -641,13 +648,11 @@ pdf_showspace(pdf_csi *csi, float tadj) } void -pdf_showtext(pdf_csi *csi, fz_obj *text) +pdf_showstring(pdf_csi *csi, unsigned char *buf, int len) { pdf_gstate *gstate = csi->gstate + csi->gtop; pdf_fontdesc *fontdesc = gstate->font; - unsigned char *buf; - unsigned char *end; - int i, len; + unsigned char *end = buf + len; int cpt, cid; if (!fontdesc) @@ -656,34 +661,38 @@ pdf_showtext(pdf_csi *csi, fz_obj *text) return; } + while (buf < end) + { + buf = pdf_decodecmap(fontdesc->encoding, buf, &cpt); + cid = pdf_lookupcmap(fontdesc->encoding, cpt); + if (cid >= 0) + pdf_showglyph(csi, cid); + else + fz_warn("cannot encode character with code point %#x", cpt); + if (cpt == 32) + pdf_showspace(csi, gstate->wordspace); + } +} + +void +pdf_showtext(pdf_csi *csi, fz_obj *text) +{ + pdf_gstate *gstate = csi->gstate + csi->gtop; + int i; + if (fz_isarray(text)) { for (i = 0; i < fz_arraylen(text); i++) { fz_obj *item = fz_arrayget(text, i); if (fz_isstring(item)) - pdf_showtext(csi, item); + pdf_showstring(csi, (unsigned char *)fz_tostrbuf(item), fz_tostrlen(item)); else pdf_showspace(csi, - fz_toreal(item) * gstate->size * 0.001f); } } - - if (fz_isstring(text)) + else if (fz_isstring(text)) { - buf = (unsigned char *)fz_tostrbuf(text); - len = fz_tostrlen(text); - end = buf + len; - - while (buf < end) - { - buf = pdf_decodecmap(fontdesc->encoding, buf, &cpt); - cid = pdf_lookupcmap(fontdesc->encoding, cpt); - if (cid >= 0) - pdf_showglyph(csi, cid); - else - fz_warn("cannot encode character with code point %#x", cpt); - if (cpt == 32) - pdf_showspace(csi, gstate->wordspace); - } + pdf_showstring(csi, (unsigned char *)fz_tostrbuf(text), fz_tostrlen(text)); } } diff --git a/mupdf/pdf_interpret.c b/mupdf/pdf_interpret.c index 6c7e0307..55d58e79 100644 --- a/mupdf/pdf_interpret.c +++ b/mupdf/pdf_interpret.c @@ -12,12 +12,14 @@ pdf_newcsi(pdf_xref *xref, fz_device *dev, fz_matrix ctm) csi->top = 0; csi->obj = nil; - csi->array = nil; csi->name[0] = 0; + csi->stringlen = 0; memset(csi->stack, 0, sizeof csi->stack); memset(csi->istack, 0, sizeof csi->istack); csi->xbalance = 0; + csi->intext = 0; + csi->inarray = 0; csi->path = fz_newpath(); csi->clip = 0; @@ -46,6 +48,7 @@ pdf_clearstack(pdf_csi *csi) csi->obj = nil; csi->name[0] = 0; + csi->stringlen = 0; for (i = 0; i < csi->top; i++) { @@ -150,7 +153,6 @@ pdf_freecsi(pdf_csi *csi) if (csi->path) fz_freepath(csi->path); if (csi->text) fz_freetext(csi->text); - if (csi->array) fz_dropobj(csi->array); pdf_clearstack(csi); @@ -458,6 +460,7 @@ static void pdf_run_BMC(pdf_csi *csi) static void pdf_run_BT(pdf_csi *csi) { + csi->intext = 1; csi->tm = fz_identity; csi->tlm = fz_identity; } @@ -598,6 +601,7 @@ static void pdf_run_ET(pdf_csi *csi) { pdf_flushtext(csi); csi->accumulate = 1; + csi->intext = 0; } static void pdf_run_EX(pdf_csi *csi) @@ -863,12 +867,18 @@ static void pdf_run_Tstar(pdf_csi *csi) static void pdf_run_Tj(pdf_csi *csi) { - pdf_showtext(csi, csi->obj); + if (csi->stringlen) + pdf_showstring(csi, csi->string, csi->stringlen); + else + pdf_showtext(csi, csi->obj); } static void pdf_run_TJ(pdf_csi *csi) { - pdf_showtext(csi, csi->obj); + if (csi->stringlen) + pdf_showstring(csi, csi->string, csi->stringlen); + else + pdf_showtext(csi, csi->obj); } static void pdf_run_W(pdf_csi *csi) @@ -1134,7 +1144,10 @@ static void pdf_run_squote(pdf_csi *csi) csi->tlm = fz_concat(m, csi->tlm); csi->tm = csi->tlm; - pdf_showtext(csi, csi->obj); + if (csi->stringlen) + pdf_showstring(csi, csi->string, csi->stringlen); + else + pdf_showtext(csi, csi->obj); } static void pdf_run_dquote(pdf_csi *csi) @@ -1149,7 +1162,10 @@ static void pdf_run_dquote(pdf_csi *csi) csi->tlm = fz_concat(m, csi->tlm); csi->tm = csi->tlm; - pdf_showtext(csi, csi->obj); + if (csi->stringlen) + pdf_showstring(csi, csi->string, csi->stringlen); + else + pdf_showtext(csi, csi->obj); } static fz_error @@ -1762,7 +1778,6 @@ pdf_runcsifile(pdf_csi *csi, fz_obj *rdb, fz_stream *file, char *buf, int buflen fz_error error; int tok; int len; - fz_obj *obj; pdf_clearstack(csi); @@ -1775,25 +1790,21 @@ pdf_runcsifile(pdf_csi *csi, fz_obj *rdb, fz_stream *file, char *buf, int buflen if (error) return fz_rethrow(error, "lexical error in content stream"); - if (csi->array) + if (csi->inarray) { if (tok == PDF_TCARRAY) { - csi->obj = csi->array; - csi->array = nil; + csi->inarray = 0; csi->top ++; } else if (tok == PDF_TINT || tok == PDF_TREAL) { - obj = fz_newreal(atof(buf)); - fz_arraypush(csi->array, obj); - fz_dropobj(obj); + pdf_gstate *gstate = csi->gstate + csi->gtop; + pdf_showspace(csi, -atof(buf) * gstate->size * 0.001f); } else if (tok == PDF_TSTRING) { - obj = fz_newstring(buf, len); - fz_arraypush(csi->array, obj); - fz_dropobj(obj); + pdf_showstring(csi, (unsigned char *)buf, len); } else if (tok == PDF_TEOF) { @@ -1812,9 +1823,18 @@ pdf_runcsifile(pdf_csi *csi, fz_obj *rdb, fz_stream *file, char *buf, int buflen case PDF_TEOF: return fz_okay; - /* TODO: optimize text-object array parsing */ case PDF_TOARRAY: - csi->array = fz_newarray(8); + if (!csi->intext) + { + error = pdf_parsearray(&csi->obj, csi->xref, file, buf, buflen); + if (error) + return fz_rethrow(error, "cannot parse array"); + csi->top ++; + } + else + { + csi->inarray = 1; + } break; case PDF_TODICT: @@ -1842,25 +1862,15 @@ pdf_runcsifile(pdf_csi *csi, fz_obj *rdb, fz_stream *file, char *buf, int buflen break; case PDF_TSTRING: - csi->obj = fz_newstring(buf, len); - csi->top ++; - break; - - case PDF_TTRUE: - csi->istack[csi->top] = 1; - csi->stack[csi->top] = 1; - csi->top ++; - break; - - case PDF_TFALSE: - csi->istack[csi->top] = 0; - csi->stack[csi->top] = 0; - csi->top ++; - break; - - case PDF_TNULL: - csi->istack[csi->top] = 0; - csi->stack[csi->top] = 0; + if (len <= sizeof(csi->string)) + { + memcpy(csi->string, buf, len); + csi->stringlen = len; + } + else + { + csi->obj = fz_newstring(buf, len); + } csi->top ++; break; |