summaryrefslogtreecommitdiff
path: root/source/pdf
diff options
context:
space:
mode:
authorTor Andersson <tor.andersson@artifex.com>2014-09-30 13:25:00 +0200
committerTor Andersson <tor.andersson@artifex.com>2014-09-30 13:25:12 +0200
commit632c9b6ad00a2109c513f3da10d1d56a97617df9 (patch)
tree2825ca46dcca4296f34c0bf46573743200699a77 /source/pdf
parent227c805ad8da410f99fbb57e91ae63ae14eba4cb (diff)
downloadmupdf-632c9b6ad00a2109c513f3da10d1d56a97617df9.tar.xz
Fix 695501: Intelligent cmap remapping.
Use the actual ranges from the cpt-to-gid cmap to optimize the remapping of ToUnicode cmaps from cpt-to-unicode into gid-to-unicode format.
Diffstat (limited to 'source/pdf')
-rw-r--r--source/pdf/pdf-unicode.c96
1 files changed, 41 insertions, 55 deletions
diff --git a/source/pdf/pdf-unicode.c b/source/pdf/pdf-unicode.c
index ffe5f6b7..6b893051 100644
--- a/source/pdf/pdf-unicode.c
+++ b/source/pdf/pdf-unicode.c
@@ -2,83 +2,69 @@
/* Load or synthesize ToUnicode map for fonts */
-static void find_min_max_cpt(pdf_cmap *cmap, unsigned int *minp, unsigned int *maxp)
+static void
+pdf_remap_cmap_range(fz_context *ctx, pdf_cmap *ucs_from_gid,
+ unsigned int cpt, unsigned int gid, unsigned int n, pdf_cmap *ucs_from_cpt)
{
- unsigned int min = UINT_MAX;
- unsigned int max = 0;
- int i;
+ unsigned int k;
+ int ucsbuf[8];
+ int ucslen;
- for (i = 0; i < cmap->rlen; ++i)
+ for (k = 0; k <= n; ++k)
{
- if (cmap->ranges[i].low < min)
- min = cmap->ranges[i].low;
- if (cmap->ranges[i].high > max)
- max = cmap->ranges[i].high;
+ ucslen = pdf_lookup_cmap_full(ucs_from_cpt, cpt + k, ucsbuf);
+ if (ucslen == 1)
+ pdf_map_range_to_range(ctx, ucs_from_gid, gid + k, gid + k, ucsbuf[0]);
+ else if (ucslen > 1)
+ pdf_map_one_to_many(ctx, ucs_from_gid, gid + k, ucsbuf, ucslen);
}
+}
+
+static pdf_cmap *
+pdf_remap_cmap(fz_context *ctx, pdf_cmap *gid_from_cpt, pdf_cmap *ucs_from_cpt)
+{
+ pdf_cmap *ucs_from_gid;
+ unsigned int i, a, b, x;
- for (i = 0; i < cmap->xlen; ++i)
+ ucs_from_gid = pdf_new_cmap(ctx);
+
+ if (gid_from_cpt->usecmap)
+ ucs_from_gid->usecmap = pdf_remap_cmap(ctx, gid_from_cpt->usecmap, ucs_from_cpt);
+
+ for (i = 0; i < gid_from_cpt->rlen; ++i)
{
- if (cmap->xranges[i].low < min)
- min = cmap->xranges[i].low;
- if (cmap->xranges[i].high > max)
- max = cmap->xranges[i].high;
+ a = gid_from_cpt->ranges[i].low;
+ b = gid_from_cpt->ranges[i].high;
+ x = gid_from_cpt->ranges[i].out;
+ pdf_remap_cmap_range(ctx, ucs_from_gid, a, x, b - a, ucs_from_cpt);
}
- for (i = 0; i < cmap->mlen; ++i)
+ for (i = 0; i < gid_from_cpt->xlen; ++i)
{
- if (cmap->mranges[i].low < min)
- min = cmap->mranges[i].low;
- if (cmap->mranges[i].low > max)
- max = cmap->mranges[i].low;
+ a = gid_from_cpt->xranges[i].low;
+ b = gid_from_cpt->xranges[i].high;
+ x = gid_from_cpt->xranges[i].out;
+ pdf_remap_cmap_range(ctx, ucs_from_gid, a, x, b - a, ucs_from_cpt);
}
- *minp = min;
- *maxp = max;
+ /* Font encoding CMaps don't have one-to-many mappings, so we can ignore the mranges. */
+
+ pdf_sort_cmap(ctx, ucs_from_gid);
+
+ return ucs_from_gid;
}
void
pdf_load_to_unicode(pdf_document *doc, pdf_font_desc *font,
char **strings, char *collection, pdf_obj *cmapstm)
{
- unsigned int cpt, min, max;
- int gid;
- int ucsbuf[8];
- int ucslen;
- int i;
fz_context *ctx = doc->ctx;
+ unsigned int cpt;
if (pdf_is_stream(doc, pdf_to_num(cmapstm), pdf_to_gen(cmapstm)))
{
- pdf_cmap *gid_from_cpt = font->encoding;
pdf_cmap *ucs_from_cpt = pdf_load_embedded_cmap(doc, cmapstm);
-
- font->to_unicode = pdf_new_cmap(ctx);
-
- /* in case the code space range is much larger than the actual number of characters */
- find_min_max_cpt(gid_from_cpt, &min, &max);
-
- for (i = 0; i < gid_from_cpt->codespace_len; ++i)
- {
- unsigned int l = gid_from_cpt->codespace[i].low;
- unsigned int h = gid_from_cpt->codespace[i].high;
- l = l < min ? min : l > max ? max : l;
- h = h < min ? min : h > max ? max : h;
- for (cpt = l; cpt <= h; ++cpt)
- {
- gid = pdf_lookup_cmap(gid_from_cpt, cpt);
- if (gid >= 0)
- {
- ucslen = pdf_lookup_cmap_full(ucs_from_cpt, cpt, ucsbuf);
- if (ucslen == 1)
- pdf_map_range_to_range(ctx, font->to_unicode, gid, gid, ucsbuf[0]);
- if (ucslen > 1)
- pdf_map_one_to_many(ctx, font->to_unicode, gid, ucsbuf, ucslen);
- }
- }
- }
-
- pdf_sort_cmap(ctx, font->to_unicode);
-
+ font->to_unicode = pdf_remap_cmap(ctx, font->encoding, ucs_from_cpt);
pdf_drop_cmap(ctx, ucs_from_cpt);
font->size += pdf_cmap_size(ctx, font->to_unicode);
}