diff options
author | Robin Watts <robin.watts@artifex.com> | 2012-01-31 17:58:34 +0000 |
---|---|---|
committer | Robin Watts <robin.watts@artifex.com> | 2012-01-31 18:01:37 +0000 |
commit | 3e65ed4eb3a8aa8cd6375a7029ac3b61a35a1157 (patch) | |
tree | 414965143f8ec0ca505e046847e2ea647e3e6149 /pdf | |
parent | 0da4f606b85007f6c1bdfc9255ccef8307dabb11 (diff) | |
download | mupdf-3e65ed4eb3a8aa8cd6375a7029ac3b61a35a1157.tar.xz |
Fix big 692824: incorrect application of word space.
Word space should only be applied when the codepoint is 32, and
is read from a single byte encoding region. Ghostscript gets
this wrong too.
Diffstat (limited to 'pdf')
-rw-r--r-- | pdf/mupdf.h | 2 | ||||
-rw-r--r-- | pdf/pdf_cmap.c | 4 | ||||
-rw-r--r-- | pdf/pdf_interpret.c | 5 |
3 files changed, 7 insertions, 4 deletions
diff --git a/pdf/mupdf.h b/pdf/mupdf.h index f0951976..5bbce7f3 100644 --- a/pdf/mupdf.h +++ b/pdf/mupdf.h @@ -281,7 +281,7 @@ void pdf_sort_cmap(fz_context *ctx, pdf_cmap *cmap); int pdf_lookup_cmap(pdf_cmap *cmap, int cpt); int pdf_lookup_cmap_full(pdf_cmap *cmap, int cpt, int *out); -unsigned char *pdf_decode_cmap(pdf_cmap *cmap, unsigned char *s, int *cpt); +unsigned char *pdf_decode_cmap(pdf_cmap *cmap, unsigned char *s, int *cpt, int *w); pdf_cmap *pdf_new_identity_cmap(fz_context *ctx, int wmode, int bytes); pdf_cmap *pdf_parse_cmap(fz_stream *file); diff --git a/pdf/pdf_cmap.c b/pdf/pdf_cmap.c index 4430979c..587b82e2 100644 --- a/pdf/pdf_cmap.c +++ b/pdf/pdf_cmap.c @@ -483,11 +483,12 @@ pdf_lookup_cmap_full(pdf_cmap *cmap, int cpt, int *out) * multi-byte encoded string. */ unsigned char * -pdf_decode_cmap(pdf_cmap *cmap, unsigned char *buf, int *cpt) +pdf_decode_cmap(pdf_cmap *cmap, unsigned char *buf, int *cpt, int *w) { int k, n, c; c = 0; + *w = 0; for (n = 0; n < 4; n++) { c = (c << 8) | buf[n]; @@ -498,6 +499,7 @@ pdf_decode_cmap(pdf_cmap *cmap, unsigned char *buf, int *cpt) if (c >= cmap->codespace[k].low && c <= cmap->codespace[k].high) { *cpt = c; + *w = n + 1; return buf + n + 1; } } diff --git a/pdf/pdf_interpret.c b/pdf/pdf_interpret.c index 3b774d71..63f208a1 100644 --- a/pdf/pdf_interpret.c +++ b/pdf/pdf_interpret.c @@ -816,13 +816,14 @@ pdf_show_string(pdf_csi *csi, unsigned char *buf, int len) while (buf < end) { - buf = pdf_decode_cmap(fontdesc->encoding, buf, &cpt); + int w; + buf = pdf_decode_cmap(fontdesc->encoding, buf, &cpt, &w); cid = pdf_lookup_cmap(fontdesc->encoding, cpt); if (cid >= 0) pdf_show_char(csi, cid); else fz_warn(ctx, "cannot encode character with code point %#x", cpt); - if (cpt == 32) + if (cpt == 32 && w == 1) pdf_show_space(csi, gstate->word_space); } } |