diff options
author | Tor Andersson <tor.andersson@artifex.com> | 2011-04-12 14:38:39 +0200 |
---|---|---|
committer | Tor Andersson <tor.andersson@artifex.com> | 2011-04-12 14:38:39 +0200 |
commit | a19a016b48213caedbf49278f26c9055cdbfc2fc (patch) | |
tree | 3c221f2033dd325f3f0bd36cd73dc0ed0fc37595 | |
parent | c6934bd8de9d593ec51f60b7c0db9f8a2125352e (diff) | |
download | mupdf-a19a016b48213caedbf49278f26c9055cdbfc2fc.tar.xz |
Fix serious bug in cmap compacting -- we forgot to set the new length.
This bug fix shaves another 650K off the compiled in cmaps!
Also fix the detection for when the cmap table is full, and ignore
surrogate pair mappings (since we can't do anything useful with
them at the moment).
-rw-r--r-- | Makefile | 1 | ||||
-rw-r--r-- | pdf/pdf_cmap.c | 49 | ||||
-rw-r--r-- | pdf/pdf_unicode.c | 4 |
3 files changed, 35 insertions, 19 deletions
@@ -108,6 +108,7 @@ $(FONT_HDR) : $(FONTDUMP) | $(GEN) $(OUT)/pdf_cmap_table.o : $(CMAP_HDR) $(OUT)/pdf_fontfile.o : $(FONT_HDR) +$(OUT)/cmapdump.o : pdf/pdf_cmap.c pdf/pdf_cmap_parse.c # --- Tools and Apps --- diff --git a/pdf/pdf_cmap.c b/pdf/pdf_cmap.c index cf50c3c3..2715b5dd 100644 --- a/pdf/pdf_cmap.c +++ b/pdf/pdf_cmap.c @@ -181,7 +181,7 @@ pdf_add_codespace(pdf_cmap *cmap, int low, int high, int n) static void add_table(pdf_cmap *cmap, int value) { - if (cmap->tlen == USHRT_MAX || cmap->rlen == USHRT_MAX) + if (cmap->tlen == USHRT_MAX) { fz_warn("cmap table is full; ignoring additional entries"); return; @@ -200,11 +200,6 @@ add_table(pdf_cmap *cmap, int value) static void add_range(pdf_cmap *cmap, int low, int high, int flag, int offset) { - if (cmap->tlen == USHRT_MAX || cmap->rlen == USHRT_MAX) - { - fz_warn("cmap table is full; ignoring additional entries"); - return; - } /* If the range is too large to be represented, split it */ if (high - low > 0x3fff) { @@ -233,9 +228,14 @@ pdf_map_range_to_table(pdf_cmap *cmap, int low, int *table, int len) int i; int high = low + len; int offset = cmap->tlen; - for (i = 0; i < len; i++) - add_table(cmap, table[i]); - add_range(cmap, low, high, PDF_CMAP_TABLE, offset); + if (cmap->tlen + len >= USHRT_MAX) + fz_warn("cannot map range to table; table is full"); + else + { + for (i = 0; i < len; i++) + add_table(cmap, table[i]); + add_range(cmap, low, high, PDF_CMAP_TABLE, offset); + } } /* @@ -267,11 +267,24 @@ pdf_map_one_to_many(pdf_cmap *cmap, int low, int *values, int len) len = 8; } - offset = cmap->tlen; - add_table(cmap, len); - for (i = 0; i < len; i++) - add_table(cmap, values[i]); - add_range(cmap, low, low, PDF_CMAP_MULTI, offset); + if (len == 2 && + values[0] >= 0xD800 && values[0] <= 0xDBFF && + values[1] >= 0xDC00 && values[1] <= 0xDFFF) + { + fz_warn("ignoring surrogate pair mapping in cmap"); + return; + } + + if (cmap->tlen + len + 1 >= USHRT_MAX) + fz_warn("cannot map one to many; table is full"); + else + { + offset = cmap->tlen; + add_table(cmap, len); + for (i = 0; i < len; i++) + add_table(cmap, values[i]); + add_range(cmap, low, low, PDF_CMAP_MULTI, offset); + } } /* @@ -296,7 +309,7 @@ pdf_sort_cmap(pdf_cmap *cmap) qsort(cmap->ranges, cmap->rlen, sizeof(pdf_range), cmprange); - if (cmap->rlen == USHRT_MAX || cmap->tlen == USHRT_MAX) + if (cmap->tlen == USHRT_MAX) { fz_warn("cmap table is full; will not combine ranges"); return; @@ -382,6 +395,10 @@ pdf_sort_cmap(pdf_cmap *cmap) b ++; } + + cmap->rlen = a - cmap->ranges; + + fz_flush_warnings(); } /* @@ -407,7 +424,7 @@ pdf_lookup_cmap(pdf_cmap *cmap, int cpt) if (pdf_range_flags(&cmap->ranges[m]) == PDF_CMAP_TABLE) return cmap->table[i]; if (pdf_range_flags(&cmap->ranges[m]) == PDF_CMAP_MULTI) - return cmap->table[cmap->ranges[m].offset + 1]; /* first char */ + return -1; /* should use lookup_cmap_full */ return i; } } diff --git a/pdf/pdf_unicode.c b/pdf/pdf_unicode.c index 54cf9911..2b28b608 100644 --- a/pdf/pdf_unicode.c +++ b/pdf/pdf_unicode.c @@ -50,13 +50,11 @@ pdf_load_to_unicode(pdf_font_desc *font, pdf_xref *xref, error = pdf_load_system_cmap(&font->to_unicode, "Adobe-GB1-UCS2"); else if (!strcmp(collection, "Adobe-Japan1")) error = pdf_load_system_cmap(&font->to_unicode, "Adobe-Japan1-UCS2"); - else if (!strcmp(collection, "Adobe-Japan2")) - error = pdf_load_system_cmap(&font->to_unicode, "Adobe-Japan2-UCS2"); /* where's this? */ else if (!strcmp(collection, "Adobe-Korea1")) error = pdf_load_system_cmap(&font->to_unicode, "Adobe-Korea1-UCS2"); if (error) - return fz_rethrow(error, "cannot load to_unicode system cmap %s-UCS2", collection); + return fz_rethrow(error, "cannot load ToUnicode system cmap %s-UCS2", collection); } if (strings) |