summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobin Watts <robin.watts@artifex.com>2017-04-13 19:13:59 +0100
committerRobin Watts <robin.watts@artifex.com>2017-04-18 16:45:49 +0100
commit670fe1b085fde8355b6d1a70b143eab60e34b1b6 (patch)
treeeca6b0c5e2561a2da269fb9abd1c8d7a9bdeffcb
parenta742ac09981f7c681f63f88738e7eded95412599 (diff)
downloadmupdf-670fe1b085fde8355b6d1a70b143eab60e34b1b6.tar.xz
Use splay trees for loading/merging cmaps.
This allows for overlaps, merges adjacent (mergeable) ranges and gets us properly searchable results. This causes 1 diff in the test suites (Bug694353.pdf), which is due to the fallback font not having a hypen present at UCS 0x2010.
-rw-r--r--include/mupdf/pdf/cmap.h13
-rw-r--r--scripts/cmapdump.c23
-rw-r--r--source/pdf/pdf-cmap.c665
3 files changed, 604 insertions, 97 deletions
diff --git a/include/mupdf/pdf/cmap.h b/include/mupdf/pdf/cmap.h
index 9edeb9d8..d98cd0ab 100644
--- a/include/mupdf/pdf/cmap.h
+++ b/include/mupdf/pdf/cmap.h
@@ -24,9 +24,11 @@ struct pdf_xrange_s
struct pdf_mrange_s
{
- unsigned int low, len, out[PDF_MRANGE_CAP];
+ unsigned int low, out;
};
+typedef struct cmap_splay_s cmap_splay;
+
struct pdf_cmap_s
{
fz_storable storable;
@@ -53,6 +55,13 @@ struct pdf_cmap_s
int mlen, mcap;
pdf_mrange *mranges;
+
+ int dlen, dcap;
+ int *dict;
+
+ int tlen, tcap, ttop;
+ cmap_splay *tree;
+
};
pdf_cmap *pdf_new_cmap(fz_context *ctx);
@@ -80,6 +89,4 @@ pdf_cmap *pdf_load_system_cmap(fz_context *ctx, const char *name);
pdf_cmap *pdf_load_builtin_cmap(fz_context *ctx, const char *name);
pdf_cmap *pdf_load_embedded_cmap(fz_context *ctx, pdf_document *doc, pdf_obj *ref);
-void pdf_print_cmap(fz_context *ctx, fz_output *out, pdf_cmap *cmap);
-
#endif
diff --git a/scripts/cmapdump.c b/scripts/cmapdump.c
index eb84cb4c..e8ed5742 100644
--- a/scripts/cmapdump.c
+++ b/scripts/cmapdump.c
@@ -51,7 +51,7 @@ main(int argc, char **argv)
FILE *fo;
char name[256];
char *realname;
- int i, k, m;
+ int i, k;
fz_context *ctx;
if (argc < 3)
@@ -141,10 +141,19 @@ main(int argc, char **argv)
fprintf(fo, "static const pdf_mrange cmap_%s_mranges[] = {", name);
for (k = 0; k < cmap->mlen; k++)
{
- fprintf(fo, "\n{%uu,%uu,{", cmap->mranges[k].low, cmap->mranges[k].len);
- for (m = 0; m < PDF_MRANGE_CAP; ++m)
- fprintf(fo, "%uu,", cmap->mranges[k].out[m]);
- fprintf(fo, "}},");
+ fprintf(fo, "\n{%uu,%uu},", cmap->mranges[k].low, cmap->mranges[k].out);
+ }
+ fprintf(fo, "\n};\n\n");
+ }
+
+ if (cmap->dlen > 0)
+ {
+ fprintf(fo, "static const int cmap_%s_dict[] = {", name);
+ for (k = 0; k < cmap->dlen; k++)
+ {
+ if (k % 4 == 0)
+ fprintf(fo, "\n");
+ fprintf(fo, "%uu,", cmap->dict[k]);
}
fprintf(fo, "\n};\n\n");
}
@@ -177,6 +186,10 @@ main(int argc, char **argv)
fprintf(fo, "\t%u, %u, (pdf_mrange*) cmap_%s_mranges,\n", cmap->mlen, cmap->mlen, name);
else
fprintf(fo, "\t0, 0, NULL,\n");
+ if (cmap->dict)
+ fprintf(fo, "\t%u, %u, (int*) cmap_%s_dict,\n", cmap->dlen, cmap->dlen, name);
+ else
+ fprintf(fo, "\t0, 0, NULL,\n");
fprintf(fo, "};\n");
diff --git a/source/pdf/pdf-cmap.c b/source/pdf/pdf-cmap.c
index 01826234..10e36ba2 100644
--- a/source/pdf/pdf-cmap.c
+++ b/source/pdf/pdf-cmap.c
@@ -1,5 +1,8 @@
#include "mupdf/pdf.h"
+#undef CHECK_SPLAY
+#undef DUMP_SPLAY
+
/*
* Allocate, destroy and simple parameters.
*/
@@ -85,44 +88,525 @@ pdf_add_codespace(fz_context *ctx, pdf_cmap *cmap, unsigned int low, unsigned in
cmap->codespace_len ++;
}
+
+struct cmap_splay_s {
+ unsigned int low;
+ unsigned int high;
+ unsigned int out;
+ unsigned int left;
+ unsigned int right;
+ unsigned int parent : 31;
+ unsigned int many : 1;
+};
+
+#define EMPTY ((unsigned int)0x40000000)
+
+/*
+ The splaying steps used:
+
+ Case 1: | z x
+ | y D => A y
+ | x C B z
+ | A B C D
+
+ Case 2: | z x
+ | y D => y z
+ | A x A B C D
+ | B C
+
+ Case 3: | y x
+ | x C => A y
+ | A B B C
+*/
+
+static void
+move_to_root(cmap_splay *tree, unsigned int x)
+{
+ if (x == EMPTY)
+ return;
+ do
+ {
+ unsigned int z, zp;
+ unsigned int y = tree[x].parent;
+ if (y == EMPTY)
+ break;
+ z = tree[y].parent;
+ if (z == EMPTY)
+ {
+ /* Case 3 */
+ tree[x].parent = EMPTY;
+ tree[y].parent = x;
+ if (tree[y].left == x)
+ {
+ /* Case 3 */
+ tree[y].left = tree[x].right;
+ if (tree[y].left != EMPTY)
+ tree[tree[y].left].parent = y;
+ tree[x].right = y;
+ }
+ else
+ {
+ /* Case 3 - reflected */
+ assert(tree[y].right == x);
+ tree[y].right = tree[x].left;
+ if (tree[y].right != EMPTY)
+ tree[tree[y].right].parent = y;
+ tree[x].left = y;
+ }
+ break;
+ }
+
+ zp = tree[z].parent;
+ tree[x].parent = zp;
+ if (zp != EMPTY) {
+ if (tree[zp].left == z)
+ tree[zp].left = x;
+ else
+ {
+ assert(tree[zp].right == z);
+ tree[zp].right = x;
+ }
+ }
+ tree[y].parent = x;
+ if (tree[y].left == x)
+ {
+ tree[y].left = tree[x].right;
+ if (tree[y].left != EMPTY)
+ tree[tree[y].left].parent = y;
+ tree[x].right = y;
+ if (tree[z].left == y)
+ {
+ /* Case 1 */
+ tree[z].parent = y;
+ tree[z].left = tree[y].right;
+ if (tree[z].left != EMPTY)
+ tree[tree[z].left].parent = z;
+ tree[y].right = z;
+ }
+ else
+ {
+ /* Case 2 - reflected */
+ assert(tree[z].right == y);
+ tree[z].parent = x;
+ tree[z].right = tree[x].left;
+ if (tree[z].right != EMPTY)
+ tree[tree[z].right].parent = z;
+ tree[x].left = z;
+ }
+ }
+ else
+ {
+ assert(tree[y].right == x);
+ tree[y].right = tree[x].left;
+ if (tree[y].right != EMPTY)
+ tree[tree[y].right].parent = y;
+ tree[x].left = y;
+ if (tree[z].left == y)
+ {
+ /* Case 2 */
+ tree[z].parent = x;
+ tree[z].left = tree[x].right;
+ if (tree[z].left != EMPTY)
+ tree[tree[z].left].parent = z;
+ tree[x].right = z;
+ }
+ else
+ {
+ /* Case 1 - reflected */
+ assert(tree[z].right == y);
+ tree[z].parent = y;
+ tree[z].right = tree[y].left;
+ if (tree[z].right != EMPTY)
+ tree[tree[z].right].parent = z;
+ tree[y].left = z;
+ }
+ }
+ } while (1);
+}
+
+static unsigned int delete_node(pdf_cmap *cmap, unsigned int current)
+{
+ cmap_splay *tree = cmap->tree;
+ unsigned int parent;
+ unsigned int replacement;
+
+ assert(current != EMPTY);
+
+ parent = tree[current].parent;
+ if (tree[current].right == EMPTY)
+ {
+ if (parent == EMPTY)
+ {
+ replacement = cmap->ttop = tree[current].left;
+ }
+ else if (tree[parent].left == current)
+ {
+ replacement = tree[parent].left = tree[current].left;
+ }
+ else
+ {
+ assert(tree[parent].right == current);
+ replacement = tree[parent].right = tree[current].left;
+ }
+ if (replacement != EMPTY)
+ tree[replacement].parent = parent;
+ else
+ replacement = parent;
+ }
+ else if (tree[current].left == EMPTY)
+ {
+ if (parent == EMPTY)
+ {
+ replacement = cmap->ttop = tree[current].right;
+ }
+ else if (tree[parent].left == current)
+ {
+ replacement = tree[parent].left = tree[current].right;
+ }
+ else
+ {
+ assert(tree[parent].right == current);
+ replacement = tree[parent].right = tree[current].right;
+ }
+ if (replacement != EMPTY)
+ tree[replacement].parent = parent;
+ else
+ replacement = parent;
+ }
+ else
+ {
+ /* Hard case, find the in-order predecessor of current */
+ int amputee = current;
+ replacement = tree[current].left;
+ while (tree[replacement].right != EMPTY) {
+ amputee = replacement;
+ replacement = tree[replacement].right;
+ }
+ /* Remove replacement from the tree */
+ if (amputee == current)
+ {
+ tree[amputee].left = tree[replacement].left;
+ if (tree[amputee].left != EMPTY)
+ tree[tree[amputee].left].parent = amputee;
+ }
+ else
+ {
+ tree[amputee].right = tree[replacement].left;
+ if (tree[amputee].right != EMPTY)
+ tree[tree[amputee].right].parent = amputee;
+ }
+ /* Insert replacement in place of current */
+ tree[replacement].parent = parent;
+ if (parent == EMPTY)
+ {
+ tree[replacement].parent = EMPTY;
+ cmap->ttop = replacement;
+ }
+ else if (tree[parent].left == current)
+ tree[parent].left = replacement;
+ else
+ {
+ assert(tree[parent].right == current);
+ tree[parent].right = replacement;
+ }
+ tree[replacement].left = tree[current].left;
+ if (tree[replacement].left != EMPTY)
+ tree[tree[replacement].left].parent = replacement;
+ tree[replacement].right = tree[current].right;
+ if (tree[replacement].right != EMPTY)
+ tree[tree[replacement].right].parent = replacement;
+ }
+
+ /* current is now unlinked. We need to remove it from our array. */
+ cmap->tlen--;
+ if (current != cmap->tlen)
+ {
+ if (replacement == cmap->tlen)
+ replacement = current;
+ tree[current] = tree[cmap->tlen];
+ parent = tree[current].parent;
+ if (parent == EMPTY)
+ cmap->ttop = current;
+ else if (tree[parent].left == cmap->tlen)
+ tree[parent].left = current;
+ else
+ {
+ assert(tree[parent].right == cmap->tlen);
+ tree[parent].right = current;
+ }
+ if (tree[current].left != EMPTY)
+ {
+ assert(tree[tree[current].left].parent == cmap->tlen);
+ tree[tree[current].left].parent = current;
+ }
+ if (tree[current].right != EMPTY)
+ {
+ assert(tree[tree[current].right].parent == cmap->tlen);
+ tree[tree[current].right].parent = current;
+ }
+ }
+
+ /* Return the node that we should continue searching from */
+ return replacement;
+}
+
+#ifdef DUMP_SPLAY
+static void
+dump_splay(cmap_splay *tree, unsigned int node, int depth, const char *pre)
+{
+ int i;
+
+ if (node == EMPTY)
+ return;
+
+ for (i = 0; i < depth; i++)
+ fprintf(stderr, " ");
+ fprintf(stderr, "%s%d:", pre, node);
+ if (tree[node].parent == EMPTY)
+ fprintf(stderr, "^EMPTY");
+ else
+ fprintf(stderr, "^%d", tree[node].parent);
+ if (tree[node].left == EMPTY)
+ fprintf(stderr, "<EMPTY");
+ else
+ fprintf(stderr, "<%d", tree[node].left);
+ if (tree[node].right == EMPTY)
+ fprintf(stderr, ">EMPTY");
+ else
+ fprintf(stderr, ">%d", tree[node].right);
+ fprintf(stderr, "(%x,%x,%x,%d)\n", tree[node].low, tree[node].high, tree[node].out, tree[node].many);
+ assert(tree[node].parent == EMPTY || depth);
+ assert(tree[node].left == EMPTY || tree[tree[node].left].parent == node);
+ assert(tree[node].right == EMPTY || tree[tree[node].right].parent == node);
+ dump_splay(tree, tree[node].left, depth+1, "L");
+ dump_splay(tree, tree[node].right, depth+1, "R");
+}
+#endif
+
+enum
+{
+ TOP = 0,
+ LEFT = 1,
+ RIGHT = 2
+};
+
+static void walk_splay(cmap_splay *tree, unsigned int node, void (*fn)(cmap_splay *, void *), void *arg)
+{
+ int from = TOP;
+
+ while (node != EMPTY)
+ {
+ switch (from)
+ {
+ case TOP:
+ if (tree[node].left != EMPTY)
+ {
+ node = tree[node].left;
+ from = TOP;
+ break;
+ }
+ /* fallthrough */
+ case LEFT:
+ fn(&tree[node], arg);
+ if (tree[node].right != EMPTY)
+ {
+ node = tree[node].right;
+ from = TOP;
+ break;
+ }
+ /* fallthrough */
+ case RIGHT:
+ {
+ unsigned int parent = tree[node].parent;
+ if (parent == EMPTY)
+ return;
+ if (tree[parent].left == node)
+ from = LEFT;
+ else
+ {
+ assert(tree[parent].right == node);
+ from = RIGHT;
+ }
+ node = parent;
+ }
+ }
+ }
+}
+
+#ifdef CHECK_SPLAY
+static void
+do_check(cmap_splay *node, void *arg)
+{
+ cmap_splay *tree = arg;
+ unsigned int num = node - tree;
+ assert(node->left == EMPTY || tree[node->left].parent == num);
+ assert(node->right == EMPTY || tree[node->right].parent == num);
+}
+
+static void
+check_splay(cmap_splay *tree, unsigned int node, int depth)
+{
+ if (node == EMPTY)
+ return;
+ assert(tree[node].parent == EMPTY);
+ walk_splay(tree, node, do_check, tree);
+}
+#endif
+
/*
* Add a range.
*/
static void
-add_range(fz_context *ctx, pdf_cmap *cmap, unsigned int low, unsigned int high, unsigned int out)
+add_range(fz_context *ctx, pdf_cmap *cmap, unsigned int low, unsigned int high, unsigned int out, int check_for_overlap, int many)
{
+ int current;
+ cmap_splay *tree;
+
if (low > high)
{
fz_warn(ctx, "range limits out of range in cmap %s", cmap->cmap_name);
return;
}
- if (low <= 0xFFFF && high <= 0xFFFF && out <= 0xFFFF)
+ tree = cmap->tree;
+
+ if (cmap->tlen)
{
- if (cmap->rlen + 1 > cmap->rcap)
+ unsigned int move = cmap->ttop;
+ unsigned int gt = EMPTY;
+ unsigned int lt = EMPTY;
+ if (check_for_overlap)
{
- int new_cap = cmap->rcap ? cmap->rcap * 2 : 256;
- cmap->ranges = fz_resize_array(ctx, cmap->ranges, new_cap, sizeof *cmap->ranges);
- cmap->rcap = new_cap;
+ /* Check for collision with the current node */
+ do
+ {
+ current = move;
+ /* Cases we might meet:
+ * tree[i]: <----->
+ * case 0: <->
+ * case 1: <------->
+ * case 2: <------------->
+ * case 3: <->
+ * case 4: <------->
+ * case 5: <->
+ */
+ if (low <= tree[current].low && tree[current].low <= high)
+ {
+ /* case 1, reduces to case 0 */
+ /* or case 2, deleting the node */
+ tree[current].out += high + 1 - tree[current].low;
+ tree[current].low = high + 1;
+ if (tree[current].low > tree[current].high)
+ {
+ move = delete_node(cmap, current);
+ current = EMPTY;
+ continue;
+ }
+ }
+ else if (low <= tree[current].high && tree[current].high <= high)
+ {
+ /* case 4, reduces to case 5 */
+ tree[current].high = low - 1;
+ assert(tree[current].low <= tree[current].high);
+ }
+ else if (tree[current].low < low && high < tree[current].high)
+ {
+ /* case 3, reduces to case 5 */
+ int new_high = tree[current].high;
+ tree[current].high = low-1;
+ add_range(ctx, cmap, high+1, new_high, tree[current].out + high + 1 - tree[current].low, 0, many);
+ }
+ /* Now look for where to move to next (left for case 0, right for case 5) */
+ if (tree[current].low > high) {
+ move = tree[current].left;
+ gt = current;
+ }
+ else
+ {
+ move = tree[current].right;
+ lt = current;
+ }
+ }
+ while (move != EMPTY);
+ }
+ else
+ {
+ do
+ {
+ current = move;
+ if (tree[current].low > high)
+ {
+ move = tree[current].left;
+ gt = current;
+ }
+ else
+ {
+ move = tree[current].right;
+ lt = current;
+ }
+ } while (move != EMPTY);
+ }
+ /* current is now the node to which we would be adding the new node */
+ /* lt is the last node we traversed which is lt the new node. */
+ /* gt is the last node we traversed which is gt the new node. */
+
+ if (!many)
+ {
+ /* Check for the 'merge' cases. */
+ if (lt != EMPTY && !tree[lt].many && tree[lt].high == low-1 && tree[lt].out - tree[lt].low == out - low)
+ {
+ tree[lt].high = high;
+ if (gt != EMPTY && !tree[gt].many && tree[gt].low == high+1 && tree[gt].out - tree[gt].low == out - low)
+ {
+ tree[lt].high = tree[gt].high;
+ delete_node(cmap, gt);
+ }
+ goto exit;
+ }
+ if (gt != EMPTY && !tree[gt].many && tree[gt].low == high+1 && tree[gt].out - tree[gt].low == out - low)
+ {
+ tree[gt].low = low;
+ tree[gt].out = out;
+ goto exit;
+ }
}
- cmap->ranges[cmap->rlen].low = low;
- cmap->ranges[cmap->rlen].high = high;
- cmap->ranges[cmap->rlen].out = out;
- cmap->rlen++;
}
else
+ current = EMPTY;
+
+ if (cmap->tlen == cmap->tcap)
{
- if (cmap->xlen + 1 > cmap->xcap)
- {
- int new_cap = cmap->xcap ? cmap->xcap * 2 : 256;
- cmap->xranges = fz_resize_array(ctx, cmap->xranges, new_cap, sizeof *cmap->xranges);
- cmap->xcap = new_cap;
- }
- cmap->xranges[cmap->xlen].low = low;
- cmap->xranges[cmap->xlen].high = high;
- cmap->xranges[cmap->xlen].out = out;
- cmap->xlen++;
+ int new_cap = cmap->tcap ? cmap->tcap * 2 : 256;
+ tree = cmap->tree = fz_resize_array(ctx, cmap->tree, new_cap, sizeof *cmap->tree);
+ cmap->tcap = new_cap;
+ }
+ tree[cmap->tlen].low = low;
+ tree[cmap->tlen].high = high;
+ tree[cmap->tlen].out = out;
+ tree[cmap->tlen].parent = current;
+ tree[cmap->tlen].left = EMPTY;
+ tree[cmap->tlen].right = EMPTY;
+ tree[cmap->tlen].many = many;
+ cmap->tlen++;
+ if (current == EMPTY)
+ cmap->ttop = 0;
+ else if (tree[current].low > high)
+ tree[current].left = cmap->tlen-1;
+ else
+ {
+ assert(tree[current].high < low);
+ tree[current].right = cmap->tlen-1;
}
+ move_to_root(tree, cmap->tlen-1);
+ cmap->ttop = cmap->tlen-1;
+exit:
+ {}
+#ifdef CHECK_SPLAY
+ check_splay(cmap->tree, cmap->ttop, 0);
+#endif
+#ifdef DUMP_SPLAY
+ dump_splay(cmap->tree, cmap->ttop, 0, "");
+#endif
}
/*
@@ -131,20 +615,20 @@ add_range(fz_context *ctx, pdf_cmap *cmap, unsigned int low, unsigned int high,
static void
add_mrange(fz_context *ctx, pdf_cmap *cmap, unsigned int low, int *out, int len)
{
- int i;
- if (cmap->mlen + 1 > cmap->mcap)
+ int out_pos;
+
+ if (cmap->dlen + len + 1 > cmap->dcap)
{
- int new_cap = cmap->mcap ? cmap->mcap * 2 : 256;
- cmap->mranges = fz_resize_array(ctx, cmap->mranges, new_cap, sizeof *cmap->mranges);
- cmap->mcap = new_cap;
+ int new_cap = cmap->dcap ? cmap->dcap * 2 : 256;
+ cmap->dict = fz_resize_array(ctx, cmap->dict, new_cap, sizeof *cmap->dict);
+ cmap->dcap = new_cap;
}
- cmap->mranges[cmap->mlen].low = low;
- cmap->mranges[cmap->mlen].len = len;
- for (i = 0; i < len; ++i)
- cmap->mranges[cmap->mlen].out[i] = out[i];
- for (; i < PDF_MRANGE_CAP; ++i)
- cmap->mranges[cmap->mlen].out[i] = 0;
- cmap->mlen++;
+ out_pos = cmap->dlen;
+ cmap->dict[out_pos] = len;
+ memcpy(&cmap->dict[out_pos+1], out, sizeof(int)*len);
+ cmap->dlen += len + 1;
+
+ add_range(ctx, cmap, low, low, out_pos, 1, 1);
}
/*
@@ -153,7 +637,7 @@ add_mrange(fz_context *ctx, pdf_cmap *cmap, unsigned int low, int *out, int len)
void
pdf_map_range_to_range(fz_context *ctx, pdf_cmap *cmap, unsigned int low, unsigned int high, int out)
{
- add_range(ctx, cmap, low, high, out);
+ add_range(ctx, cmap, low, high, out, 1, 0);
}
/*
@@ -164,7 +648,7 @@ pdf_map_one_to_many(fz_context *ctx, pdf_cmap *cmap, unsigned int low, int *valu
{
if (len == 1)
{
- add_range(ctx, cmap, low, low, values[0]);
+ add_range(ctx, cmap, low, low, values[0], 1, 0);
return;
}
@@ -175,7 +659,7 @@ pdf_map_one_to_many(fz_context *ctx, pdf_cmap *cmap, unsigned int low, int *valu
values[1] >= 0xDC00 && values[1] <= 0xDFFF)
{
int rune = ((values[0] - 0xD800) << 10) + (values[1] - 0xDC00) + 0x10000;
- add_range(ctx, cmap, low, low, rune);
+ add_range(ctx, cmap, low, low, rune, 1, 0);
return;
}
@@ -188,70 +672,71 @@ pdf_map_one_to_many(fz_context *ctx, pdf_cmap *cmap, unsigned int low, int *valu
add_mrange(ctx, cmap, low, values, len);
}
-/*
- * Sort the input ranges.
- * Merge contiguous ranges.
- */
-
-static int cmprange(const void *va, const void *vb)
+static void
+count_node_types(cmap_splay *node, void *arg)
{
- unsigned int a = ((const pdf_range*)va)->low;
- unsigned int b = ((const pdf_range*)vb)->low;
- return a < b ? -1 : a > b ? 1 : 0;
-}
+ int *counts = (int *)arg;
-static int cmpxrange(const void *va, const void *vb)
-{
- unsigned int a = ((const pdf_xrange*)va)->low;
- unsigned int b = ((const pdf_xrange*)vb)->low;
- return a < b ? -1 : a > b ? 1 : 0;
+ if (node->many)
+ counts[2]++;
+ else if (node->low <= 0xffff && node->high <= 0xFFFF && node->out <= 0xFFFF)
+ counts[0]++;
+ else
+ counts[1]++;
}
-static int cmpmrange(const void *va, const void *vb)
+static void
+copy_node_types(cmap_splay *node, void *arg)
{
- unsigned int a = ((const pdf_mrange*)va)->low;
- unsigned int b = ((const pdf_mrange*)vb)->low;
- return a < b ? -1 : a > b ? 1 : 0;
+ pdf_cmap *cmap = (pdf_cmap *)arg;
+
+ if (node->many)
+ {
+ assert(node->low == node->high);
+ cmap->mranges[cmap->mlen].low = node->low;
+ cmap->mranges[cmap->mlen].out = node->out;
+ cmap->mlen++;
+ }
+ else if (node->low <= 0xffff && node->high <= 0xFFFF && node->out <= 0xFFFF)
+ {
+ cmap->ranges[cmap->rlen].low = node->low;
+ cmap->ranges[cmap->rlen].high = node->high;
+ cmap->ranges[cmap->rlen].out = node->out;
+ cmap->rlen++;
+ }
+ else
+ {
+ cmap->xranges[cmap->xlen].low = node->low;
+ cmap->xranges[cmap->xlen].high = node->high;
+ cmap->xranges[cmap->xlen].out = node->out;
+ cmap->xlen++;
+ }
}
void
pdf_sort_cmap(fz_context *ctx, pdf_cmap *cmap)
{
- pdf_range *a, *b;
- pdf_xrange *x, *y;
+ int counts[3];
- if (cmap->rlen)
- {
- qsort(cmap->ranges, cmap->rlen, sizeof *cmap->ranges, cmprange);
- a = cmap->ranges;
- for (b = a + 1; b < cmap->ranges + cmap->rlen; ++b)
- {
- if (b->low == a->high + 1 && b->out == a->out + (a->high - a->low) + 1)
- a->high = b->high;
- else
- *(++a) = *b;
- }
- cmap->rlen = a - cmap->ranges + 1;
- }
+ if (cmap->tree == NULL)
+ return;
- if (cmap->xlen)
- {
- qsort(cmap->xranges, cmap->xlen, sizeof *cmap->xranges, cmpxrange);
- x = cmap->xranges;
- for (y = x + 1; y < cmap->xranges + cmap->xlen; ++y)
- {
- if (y->low == x->high + 1 && y->out == x->out + (x->high - x->low) + 1)
- x->high = y->high;
- else
- *(++x) = *y;
- }
- cmap->xlen = x - cmap->xranges + 1;
- }
+ counts[0] = 0;
+ counts[1] = 0;
+ counts[2] = 0;
+ walk_splay(cmap->tree, cmap->ttop, count_node_types, &counts);
- if (cmap->mlen)
- {
- qsort(cmap->mranges, cmap->mlen, sizeof *cmap->mranges, cmpmrange);
- }
+ cmap->ranges = fz_malloc_array(ctx, counts[0], sizeof(*cmap->ranges));
+ cmap->rcap = counts[0];
+ cmap->xranges = fz_malloc_array(ctx, counts[1], sizeof(*cmap->xranges));
+ cmap->xcap = counts[1];
+ cmap->mranges = fz_malloc_array(ctx, counts[2], sizeof(*cmap->mranges));
+ cmap->mcap = counts[2];
+
+ walk_splay(cmap->tree, cmap->ttop, copy_node_types, cmap);
+
+ fz_free(ctx, cmap->tree);
+ cmap->tree = NULL;
}
/*
@@ -348,9 +833,11 @@ pdf_lookup_cmap_full(pdf_cmap *cmap, unsigned int cpt, int *out)
l = m + 1;
else
{
- for (i = 0; i < mranges[m].len; ++i)
- out[i] = mranges[m].out[i];
- return mranges[m].len;
+ int *ptr = &cmap->dict[cmap->mranges[m].out];
+ unsigned int len = (unsigned int)*ptr++;
+ for (i = 0; i < len; ++i)
+ out[i] = *ptr++;
+ return len;
}
}