From 9f510df7318ff4fc12ed4d35bd0454d5a8ddbd88 Mon Sep 17 00:00:00 2001 From: Tor Andersson Date: Tue, 13 May 2014 14:41:48 +0200 Subject: Use code space ranges when remapping ToUnicode CMaps. --- source/pdf/pdf-cmap-load.c | 5 +++-- source/pdf/pdf-unicode.c | 33 ++++++++++++++++++--------------- 2 files changed, 21 insertions(+), 17 deletions(-) (limited to 'source') diff --git a/source/pdf/pdf-cmap-load.c b/source/pdf/pdf-cmap-load.c index d3f17c35..ec27eddd 100644 --- a/source/pdf/pdf-cmap-load.c +++ b/source/pdf/pdf-cmap-load.c @@ -105,9 +105,10 @@ pdf_new_identity_cmap(fz_context *ctx, int wmode, int bytes) pdf_cmap *cmap = pdf_new_cmap(ctx); fz_try(ctx) { + unsigned int high = (1 << (bytes * 8)) - 1; sprintf(cmap->cmap_name, "Identity-%c", wmode ? 'V' : 'H'); - pdf_add_codespace(ctx, cmap, 0x0000, 0xffff, bytes); - pdf_map_range_to_range(ctx, cmap, 0x0000, 0xffff, 0); + pdf_add_codespace(ctx, cmap, 0, high, bytes); + pdf_map_range_to_range(ctx, cmap, 0, high, 0); pdf_sort_cmap(ctx, cmap); pdf_set_cmap_wmode(ctx, cmap, wmode); } diff --git a/source/pdf/pdf-unicode.c b/source/pdf/pdf-unicode.c index a6891655..3a7fe1c4 100644 --- a/source/pdf/pdf-unicode.c +++ b/source/pdf/pdf-unicode.c @@ -6,36 +6,39 @@ void pdf_load_to_unicode(pdf_document *doc, pdf_font_desc *font, char **strings, char *collection, pdf_obj *cmapstm) { - pdf_cmap *cmap; unsigned int cpt; - int cid; + int gid; int ucsbuf[8]; int ucslen; + int i; fz_context *ctx = doc->ctx; if (pdf_is_stream(doc, pdf_to_num(cmapstm), pdf_to_gen(cmapstm))) { - cmap = pdf_load_embedded_cmap(doc, cmapstm); + pdf_cmap *gid_from_cpt = font->encoding; + pdf_cmap *ucs_from_cpt = pdf_load_embedded_cmap(doc, cmapstm); font->to_unicode = pdf_new_cmap(ctx); - /* TODO: use codespace ranges */ - for (cpt = 0; cpt < (strings ? 256 : 65536); cpt++) + for (i = 0; i < gid_from_cpt->codespace_len; ++i) { - cid = pdf_lookup_cmap(font->encoding, cpt); - if (cid >= 0) + for (cpt = gid_from_cpt->codespace[i].low; cpt <= gid_from_cpt->codespace[i].high; ++cpt) { - ucslen = pdf_lookup_cmap_full(cmap, cpt, ucsbuf); - if (ucslen == 1) - pdf_map_range_to_range(ctx, font->to_unicode, cid, cid, ucsbuf[0]); - if (ucslen > 1) - pdf_map_one_to_many(ctx, font->to_unicode, cid, ucsbuf, ucslen); + gid = pdf_lookup_cmap(gid_from_cpt, cpt); + if (gid >= 0) + { + ucslen = pdf_lookup_cmap_full(ucs_from_cpt, cpt, ucsbuf); + if (ucslen == 1) + pdf_map_range_to_range(ctx, font->to_unicode, gid, gid, ucsbuf[0]); + if (ucslen > 1) + pdf_map_one_to_many(ctx, font->to_unicode, gid, ucsbuf, ucslen); + } } } pdf_sort_cmap(ctx, font->to_unicode); - pdf_drop_cmap(ctx, cmap); + pdf_drop_cmap(ctx, ucs_from_cpt); font->size += pdf_cmap_size(ctx, font->to_unicode); } @@ -58,8 +61,8 @@ pdf_load_to_unicode(pdf_document *doc, pdf_font_desc *font, /* TODO one-to-many mappings */ font->cid_to_ucs_len = 256; - font->cid_to_ucs = fz_malloc_array(ctx, 256, sizeof(unsigned short)); - font->size += 256 * sizeof(unsigned short); + font->cid_to_ucs = fz_malloc_array(ctx, 256, sizeof *font->cid_to_ucs); + font->size += 256 * sizeof *font->cid_to_ucs; for (cpt = 0; cpt < 256; cpt++) { -- cgit v1.2.3