summaryrefslogtreecommitdiff
path: root/source/pdf
diff options
context:
space:
mode:
authorTor Andersson <tor.andersson@artifex.com>2014-09-22 17:26:31 +0200
committerTor Andersson <tor.andersson@artifex.com>2014-09-23 16:34:09 +0200
commit905bed5522268d16b9dc147d923c60db012c5a68 (patch)
treecae0b44cdd7bd66d11658bb77f9b2766f7d847b1 /source/pdf
parent72290018051ba5b66c77989862847a14493fd18a (diff)
downloadmupdf-905bed5522268d16b9dc147d923c60db012c5a68.tar.xz
Fix 695501: check upper and lower bounds of CMap when computing ToUnicode.
When inverting the CMap to create a ToUnicode, first check the actual range of input characters rather than relying only on the codespace range list.
Diffstat (limited to 'source/pdf')
-rw-r--r--source/pdf/pdf-unicode.c45
1 files changed, 43 insertions, 2 deletions
diff --git a/source/pdf/pdf-unicode.c b/source/pdf/pdf-unicode.c
index 3a7fe1c4..ffe5f6b7 100644
--- a/source/pdf/pdf-unicode.c
+++ b/source/pdf/pdf-unicode.c
@@ -2,11 +2,45 @@
/* Load or synthesize ToUnicode map for fonts */
+static void find_min_max_cpt(pdf_cmap *cmap, unsigned int *minp, unsigned int *maxp)
+{
+ unsigned int min = UINT_MAX;
+ unsigned int max = 0;
+ int i;
+
+ for (i = 0; i < cmap->rlen; ++i)
+ {
+ if (cmap->ranges[i].low < min)
+ min = cmap->ranges[i].low;
+ if (cmap->ranges[i].high > max)
+ max = cmap->ranges[i].high;
+ }
+
+ for (i = 0; i < cmap->xlen; ++i)
+ {
+ if (cmap->xranges[i].low < min)
+ min = cmap->xranges[i].low;
+ if (cmap->xranges[i].high > max)
+ max = cmap->xranges[i].high;
+ }
+
+ for (i = 0; i < cmap->mlen; ++i)
+ {
+ if (cmap->mranges[i].low < min)
+ min = cmap->mranges[i].low;
+ if (cmap->mranges[i].low > max)
+ max = cmap->mranges[i].low;
+ }
+
+ *minp = min;
+ *maxp = max;
+}
+
void
pdf_load_to_unicode(pdf_document *doc, pdf_font_desc *font,
char **strings, char *collection, pdf_obj *cmapstm)
{
- unsigned int cpt;
+ unsigned int cpt, min, max;
int gid;
int ucsbuf[8];
int ucslen;
@@ -20,9 +54,16 @@ pdf_load_to_unicode(pdf_document *doc, pdf_font_desc *font,
font->to_unicode = pdf_new_cmap(ctx);
+ /* in case the code space range is much larger than the actual number of characters */
+ find_min_max_cpt(gid_from_cpt, &min, &max);
+
for (i = 0; i < gid_from_cpt->codespace_len; ++i)
{
- for (cpt = gid_from_cpt->codespace[i].low; cpt <= gid_from_cpt->codespace[i].high; ++cpt)
+ unsigned int l = gid_from_cpt->codespace[i].low;
+ unsigned int h = gid_from_cpt->codespace[i].high;
+ l = l < min ? min : l > max ? max : l;
+ h = h < min ? min : h > max ? max : h;
+ for (cpt = l; cpt <= h; ++cpt)
{
gid = pdf_lookup_cmap(gid_from_cpt, cpt);
if (gid >= 0)