summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTor Andersson <tor.andersson@artifex.com>2011-04-12 14:38:39 +0200
committerTor Andersson <tor.andersson@artifex.com>2011-04-12 14:38:39 +0200
commita19a016b48213caedbf49278f26c9055cdbfc2fc (patch)
tree3c221f2033dd325f3f0bd36cd73dc0ed0fc37595
parentc6934bd8de9d593ec51f60b7c0db9f8a2125352e (diff)
downloadmupdf-a19a016b48213caedbf49278f26c9055cdbfc2fc.tar.xz
Fix serious bug in cmap compacting -- we forgot to set the new length.
This bug fix shaves another 650K off the compiled in cmaps! Also fix the detection for when the cmap table is full, and ignore surrogate pair mappings (since we can't do anything useful with them at the moment).
-rw-r--r--Makefile1
-rw-r--r--pdf/pdf_cmap.c49
-rw-r--r--pdf/pdf_unicode.c4
3 files changed, 35 insertions, 19 deletions
diff --git a/Makefile b/Makefile
index a58a35e9..bea67d58 100644
--- a/Makefile
+++ b/Makefile
@@ -108,6 +108,7 @@ $(FONT_HDR) : $(FONTDUMP) | $(GEN)
$(OUT)/pdf_cmap_table.o : $(CMAP_HDR)
$(OUT)/pdf_fontfile.o : $(FONT_HDR)
+$(OUT)/cmapdump.o : pdf/pdf_cmap.c pdf/pdf_cmap_parse.c
# --- Tools and Apps ---
diff --git a/pdf/pdf_cmap.c b/pdf/pdf_cmap.c
index cf50c3c3..2715b5dd 100644
--- a/pdf/pdf_cmap.c
+++ b/pdf/pdf_cmap.c
@@ -181,7 +181,7 @@ pdf_add_codespace(pdf_cmap *cmap, int low, int high, int n)
static void
add_table(pdf_cmap *cmap, int value)
{
- if (cmap->tlen == USHRT_MAX || cmap->rlen == USHRT_MAX)
+ if (cmap->tlen == USHRT_MAX)
{
fz_warn("cmap table is full; ignoring additional entries");
return;
@@ -200,11 +200,6 @@ add_table(pdf_cmap *cmap, int value)
static void
add_range(pdf_cmap *cmap, int low, int high, int flag, int offset)
{
- if (cmap->tlen == USHRT_MAX || cmap->rlen == USHRT_MAX)
- {
- fz_warn("cmap table is full; ignoring additional entries");
- return;
- }
/* If the range is too large to be represented, split it */
if (high - low > 0x3fff)
{
@@ -233,9 +228,14 @@ pdf_map_range_to_table(pdf_cmap *cmap, int low, int *table, int len)
int i;
int high = low + len;
int offset = cmap->tlen;
- for (i = 0; i < len; i++)
- add_table(cmap, table[i]);
- add_range(cmap, low, high, PDF_CMAP_TABLE, offset);
+ if (cmap->tlen + len >= USHRT_MAX)
+ fz_warn("cannot map range to table; table is full");
+ else
+ {
+ for (i = 0; i < len; i++)
+ add_table(cmap, table[i]);
+ add_range(cmap, low, high, PDF_CMAP_TABLE, offset);
+ }
}
/*
@@ -267,11 +267,24 @@ pdf_map_one_to_many(pdf_cmap *cmap, int low, int *values, int len)
len = 8;
}
- offset = cmap->tlen;
- add_table(cmap, len);
- for (i = 0; i < len; i++)
- add_table(cmap, values[i]);
- add_range(cmap, low, low, PDF_CMAP_MULTI, offset);
+ if (len == 2 &&
+ values[0] >= 0xD800 && values[0] <= 0xDBFF &&
+ values[1] >= 0xDC00 && values[1] <= 0xDFFF)
+ {
+ fz_warn("ignoring surrogate pair mapping in cmap");
+ return;
+ }
+
+ if (cmap->tlen + len + 1 >= USHRT_MAX)
+ fz_warn("cannot map one to many; table is full");
+ else
+ {
+ offset = cmap->tlen;
+ add_table(cmap, len);
+ for (i = 0; i < len; i++)
+ add_table(cmap, values[i]);
+ add_range(cmap, low, low, PDF_CMAP_MULTI, offset);
+ }
}
/*
@@ -296,7 +309,7 @@ pdf_sort_cmap(pdf_cmap *cmap)
qsort(cmap->ranges, cmap->rlen, sizeof(pdf_range), cmprange);
- if (cmap->rlen == USHRT_MAX || cmap->tlen == USHRT_MAX)
+ if (cmap->tlen == USHRT_MAX)
{
fz_warn("cmap table is full; will not combine ranges");
return;
@@ -382,6 +395,10 @@ pdf_sort_cmap(pdf_cmap *cmap)
b ++;
}
+
+ cmap->rlen = a - cmap->ranges;
+
+ fz_flush_warnings();
}
/*
@@ -407,7 +424,7 @@ pdf_lookup_cmap(pdf_cmap *cmap, int cpt)
if (pdf_range_flags(&cmap->ranges[m]) == PDF_CMAP_TABLE)
return cmap->table[i];
if (pdf_range_flags(&cmap->ranges[m]) == PDF_CMAP_MULTI)
- return cmap->table[cmap->ranges[m].offset + 1]; /* first char */
+ return -1; /* should use lookup_cmap_full */
return i;
}
}
diff --git a/pdf/pdf_unicode.c b/pdf/pdf_unicode.c
index 54cf9911..2b28b608 100644
--- a/pdf/pdf_unicode.c
+++ b/pdf/pdf_unicode.c
@@ -50,13 +50,11 @@ pdf_load_to_unicode(pdf_font_desc *font, pdf_xref *xref,
error = pdf_load_system_cmap(&font->to_unicode, "Adobe-GB1-UCS2");
else if (!strcmp(collection, "Adobe-Japan1"))
error = pdf_load_system_cmap(&font->to_unicode, "Adobe-Japan1-UCS2");
- else if (!strcmp(collection, "Adobe-Japan2"))
- error = pdf_load_system_cmap(&font->to_unicode, "Adobe-Japan2-UCS2"); /* where's this? */
else if (!strcmp(collection, "Adobe-Korea1"))
error = pdf_load_system_cmap(&font->to_unicode, "Adobe-Korea1-UCS2");
if (error)
- return fz_rethrow(error, "cannot load to_unicode system cmap %s-UCS2", collection);
+ return fz_rethrow(error, "cannot load ToUnicode system cmap %s-UCS2", collection);
}
if (strings)