summaryrefslogtreecommitdiff
path: root/pdf/pdf_cmap.c
diff options
context:
space:
mode:
authorTor Andersson <tor.andersson@artifex.com>2013-06-19 15:29:44 +0200
committerTor Andersson <tor.andersson@artifex.com>2013-06-20 16:45:35 +0200
commit0a927854a10e1e6b9770a81e2e1d9f3093631757 (patch)
tree3d65d820d9fdba2d0d394d99c36290c851b78ca0 /pdf/pdf_cmap.c
parent1ae8f19179c5f0f8c6352b3c7855465325d5449a (diff)
downloadmupdf-0a927854a10e1e6b9770a81e2e1d9f3093631757.tar.xz
Rearrange source files.
Diffstat (limited to 'pdf/pdf_cmap.c')
-rw-r--r--pdf/pdf_cmap.c518
1 files changed, 0 insertions, 518 deletions
diff --git a/pdf/pdf_cmap.c b/pdf/pdf_cmap.c
deleted file mode 100644
index c006c6bb..00000000
--- a/pdf/pdf_cmap.c
+++ /dev/null
@@ -1,518 +0,0 @@
-/*
- * The CMap data structure here is constructed on the fly by
- * adding simple range-to-range mappings. Then the data structure
- * is optimized to contain both range-to-range and range-to-table
- * lookups.
- *
- * Any one-to-many mappings are inserted as one-to-table
- * lookups in the beginning, and are not affected by the optimization
- * stage.
- *
- * There is a special function to add a 256-length range-to-table mapping.
- * The ranges do not have to be added in order.
- *
- * This code can be a lot simpler if we don't care about wasting memory,
- * or can trust the parser to give us optimal mappings.
- */
-
-#include "mupdf/pdf.h"
-
-/* Macros for accessing the combined extent_flags field */
-#define pdf_range_high(r) ((r)->low + ((r)->extent_flags >> 2))
-#define pdf_range_flags(r) ((r)->extent_flags & 3)
-#define pdf_range_set_high(r, h) \
- ((r)->extent_flags = (((r)->extent_flags & 3) | ((h - (r)->low) << 2)))
-#define pdf_range_set_flags(r, f) \
- ((r)->extent_flags = (((r)->extent_flags & ~3) | f))
-
-/*
- * Allocate, destroy and simple parameters.
- */
-
-void
-pdf_free_cmap_imp(fz_context *ctx, fz_storable *cmap_)
-{
- pdf_cmap *cmap = (pdf_cmap *)cmap_;
- if (cmap->usecmap)
- pdf_drop_cmap(ctx, cmap->usecmap);
- fz_free(ctx, cmap->ranges);
- fz_free(ctx, cmap->table);
- fz_free(ctx, cmap);
-}
-
-pdf_cmap *
-pdf_new_cmap(fz_context *ctx)
-{
- pdf_cmap *cmap;
-
- cmap = fz_malloc_struct(ctx, pdf_cmap);
- FZ_INIT_STORABLE(cmap, 1, pdf_free_cmap_imp);
-
- strcpy(cmap->cmap_name, "");
- strcpy(cmap->usecmap_name, "");
- cmap->usecmap = NULL;
- cmap->wmode = 0;
- cmap->codespace_len = 0;
-
- cmap->rlen = 0;
- cmap->rcap = 0;
- cmap->ranges = NULL;
-
- cmap->tlen = 0;
- cmap->tcap = 0;
- cmap->table = NULL;
-
- return cmap;
-}
-
-/* Could be a macro for speed */
-pdf_cmap *
-pdf_keep_cmap(fz_context *ctx, pdf_cmap *cmap)
-{
- return (pdf_cmap *)fz_keep_storable(ctx, &cmap->storable);
-}
-
-/* Could be a macro for speed */
-void
-pdf_drop_cmap(fz_context *ctx, pdf_cmap *cmap)
-{
- fz_drop_storable(ctx, &cmap->storable);
-}
-
-void
-pdf_set_usecmap(fz_context *ctx, pdf_cmap *cmap, pdf_cmap *usecmap)
-{
- int i;
-
- if (cmap->usecmap)
- pdf_drop_cmap(ctx, cmap->usecmap);
- cmap->usecmap = pdf_keep_cmap(ctx, usecmap);
-
- if (cmap->codespace_len == 0)
- {
- cmap->codespace_len = usecmap->codespace_len;
- for (i = 0; i < usecmap->codespace_len; i++)
- cmap->codespace[i] = usecmap->codespace[i];
- }
-}
-
-int
-pdf_cmap_wmode(fz_context *ctx, pdf_cmap *cmap)
-{
- return cmap->wmode;
-}
-
-void
-pdf_set_cmap_wmode(fz_context *ctx, pdf_cmap *cmap, int wmode)
-{
- cmap->wmode = wmode;
-}
-
-#ifndef NDEBUG
-void
-pdf_print_cmap(fz_context *ctx, pdf_cmap *cmap)
-{
- int i, k, n;
-
- printf("cmap $%p /%s {\n", (void *) cmap, cmap->cmap_name);
-
- if (cmap->usecmap_name[0])
- printf("\tusecmap /%s\n", cmap->usecmap_name);
- if (cmap->usecmap)
- printf("\tusecmap $%p\n", (void *) cmap->usecmap);
-
- printf("\twmode %d\n", cmap->wmode);
-
- printf("\tcodespaces {\n");
- for (i = 0; i < cmap->codespace_len; i++)
- {
- printf("\t\t<%x> <%x>\n", cmap->codespace[i].low, cmap->codespace[i].high);
- }
- printf("\t}\n");
-
- printf("\tranges (%d,%d) {\n", cmap->rlen, cmap->tlen);
- for (i = 0; i < cmap->rlen; i++)
- {
- pdf_range *r = &cmap->ranges[i];
- printf("\t\t<%04x> <%04x> ", r->low, pdf_range_high(r));
- if (pdf_range_flags(r) == PDF_CMAP_TABLE)
- {
- printf("[ ");
- for (k = 0; k < pdf_range_high(r) - r->low + 1; k++)
- printf("%d ", cmap->table[r->offset + k]);
- printf("]\n");
- }
- else if (pdf_range_flags(r) == PDF_CMAP_MULTI)
- {
- printf("< ");
- n = cmap->table[r->offset];
- for (k = 0; k < n; k++)
- printf("%04x ", cmap->table[r->offset + 1 + k]);
- printf(">\n");
- }
- else
- printf("%d\n", r->offset);
- }
- printf("\t}\n}\n");
-}
-#endif
-
-/*
- * Add a codespacerange section.
- * These ranges are used by pdf_decode_cmap to decode
- * multi-byte encoded strings.
- */
-void
-pdf_add_codespace(fz_context *ctx, pdf_cmap *cmap, int low, int high, int n)
-{
- if (cmap->codespace_len + 1 == nelem(cmap->codespace))
- {
- fz_warn(ctx, "assert: too many code space ranges");
- return;
- }
-
- cmap->codespace[cmap->codespace_len].n = n;
- cmap->codespace[cmap->codespace_len].low = low;
- cmap->codespace[cmap->codespace_len].high = high;
- cmap->codespace_len ++;
-}
-
-/*
- * Add an integer to the table.
- */
-static void
-add_table(fz_context *ctx, pdf_cmap *cmap, int value)
-{
- if (cmap->tlen >= USHRT_MAX + 1)
- {
- fz_warn(ctx, "cmap table is full; ignoring additional entries");
- return;
- }
- if (cmap->tlen + 1 > cmap->tcap)
- {
- int new_cap = cmap->tcap > 1 ? (cmap->tcap * 3) / 2 : 256;
- cmap->table = fz_resize_array(ctx, cmap->table, new_cap, sizeof(unsigned short));
- cmap->tcap = new_cap;
- }
- cmap->table[cmap->tlen++] = value;
-}
-
-/*
- * Add a range.
- */
-static void
-add_range(fz_context *ctx, pdf_cmap *cmap, int low, int high, int flag, int offset)
-{
- /* Sanity check ranges */
- if (low < 0 || low > 65535 || high < 0 || high > 65535 || low > high)
- {
- fz_warn(ctx, "range limits out of range in cmap %s", cmap->cmap_name);
- return;
- }
- /* If the range is too large to be represented, split it */
- if (high - low > 0x3fff)
- {
- add_range(ctx, cmap, low, low+0x3fff, flag, offset);
- add_range(ctx, cmap, low+0x3fff, high, flag, offset+0x3fff);
- return;
- }
- if (cmap->rlen + 1 > cmap->rcap)
- {
- int new_cap = cmap->rcap > 1 ? (cmap->rcap * 3) / 2 : 256;
- cmap->ranges = fz_resize_array(ctx, cmap->ranges, new_cap, sizeof(pdf_range));
- cmap->rcap = new_cap;
- }
- cmap->ranges[cmap->rlen].low = low;
- pdf_range_set_high(&cmap->ranges[cmap->rlen], high);
- pdf_range_set_flags(&cmap->ranges[cmap->rlen], flag);
- cmap->ranges[cmap->rlen].offset = offset;
- cmap->rlen ++;
-}
-
-/*
- * Add a range-to-table mapping.
- */
-void
-pdf_map_range_to_table(fz_context *ctx, pdf_cmap *cmap, int low, int *table, int len)
-{
- int i;
- int high = low + len;
- int offset = cmap->tlen;
- if (cmap->tlen + len >= USHRT_MAX + 1)
- fz_warn(ctx, "cannot map range to table; table is full");
- else
- {
- for (i = 0; i < len; i++)
- add_table(ctx, cmap, table[i]);
- add_range(ctx, cmap, low, high, PDF_CMAP_TABLE, offset);
- }
-}
-
-/*
- * Add a range of contiguous one-to-one mappings (ie 1..5 maps to 21..25)
- */
-void
-pdf_map_range_to_range(fz_context *ctx, pdf_cmap *cmap, int low, int high, int offset)
-{
- add_range(ctx, cmap, low, high, high - low == 0 ? PDF_CMAP_SINGLE : PDF_CMAP_RANGE, offset);
-}
-
-/*
- * Add a single one-to-many mapping.
- */
-void
-pdf_map_one_to_many(fz_context *ctx, pdf_cmap *cmap, int low, int *values, int len)
-{
- int offset, i;
-
- if (len == 1)
- {
- add_range(ctx, cmap, low, low, PDF_CMAP_SINGLE, values[0]);
- return;
- }
-
- if (len > 8)
- {
- fz_warn(ctx, "one to many mapping is too large (%d); truncating", len);
- len = 8;
- }
-
- if (len == 2 &&
- values[0] >= 0xD800 && values[0] <= 0xDBFF &&
- values[1] >= 0xDC00 && values[1] <= 0xDFFF)
- {
- fz_warn(ctx, "ignoring surrogate pair mapping in cmap %s", cmap->cmap_name);
- return;
- }
-
- if (cmap->tlen + len + 1 >= USHRT_MAX + 1)
- fz_warn(ctx, "cannot map one to many; table is full");
- else
- {
- offset = cmap->tlen;
- add_table(ctx, cmap, len);
- for (i = 0; i < len; i++)
- add_table(ctx, cmap, values[i]);
- add_range(ctx, cmap, low, low, PDF_CMAP_MULTI, offset);
- }
-}
-
-/*
- * Sort the input ranges.
- * Merge contiguous input ranges to range-to-range if the output is contiguous.
- * Merge contiguous input ranges to range-to-table if the output is random.
- */
-
-static int cmprange(const void *va, const void *vb)
-{
- return ((const pdf_range*)va)->low - ((const pdf_range*)vb)->low;
-}
-
-void
-pdf_sort_cmap(fz_context *ctx, pdf_cmap *cmap)
-{
- pdf_range *a; /* last written range on output */
- pdf_range *b; /* current range examined on input */
-
- if (cmap->rlen == 0)
- return;
-
- qsort(cmap->ranges, cmap->rlen, sizeof(pdf_range), cmprange);
-
- if (cmap->tlen >= USHRT_MAX + 1)
- {
- fz_warn(ctx, "cmap table is full; will not combine ranges");
- return;
- }
-
- a = cmap->ranges;
- b = cmap->ranges + 1;
-
- while (b < cmap->ranges + cmap->rlen)
- {
- /* ignore one-to-many mappings */
- if (pdf_range_flags(b) == PDF_CMAP_MULTI)
- {
- *(++a) = *b;
- }
-
- /* input contiguous */
- else if (pdf_range_high(a) + 1 == b->low)
- {
- /* output contiguous */
- if (pdf_range_high(a) - a->low + a->offset + 1 == b->offset)
- {
- /* SR -> R and SS -> R and RR -> R and RS -> R */
- if ((pdf_range_flags(a) == PDF_CMAP_SINGLE || pdf_range_flags(a) == PDF_CMAP_RANGE) && (pdf_range_high(b) - a->low <= 0x3fff))
- {
- pdf_range_set_flags(a, PDF_CMAP_RANGE);
- pdf_range_set_high(a, pdf_range_high(b));
- }
-
- /* LS -> L */
- else if (pdf_range_flags(a) == PDF_CMAP_TABLE && pdf_range_flags(b) == PDF_CMAP_SINGLE && (pdf_range_high(b) - a->low <= 0x3fff))
- {
- pdf_range_set_high(a, pdf_range_high(b));
- add_table(ctx, cmap, b->offset);
- }
-
- /* LR -> LR */
- else if (pdf_range_flags(a) == PDF_CMAP_TABLE && pdf_range_flags(b) == PDF_CMAP_RANGE)
- {
- *(++a) = *b;
- }
-
- /* XX -> XX */
- else
- {
- *(++a) = *b;
- }
- }
-
- /* output separated */
- else
- {
- /* SS -> L */
- if (pdf_range_flags(a) == PDF_CMAP_SINGLE && pdf_range_flags(b) == PDF_CMAP_SINGLE)
- {
- pdf_range_set_flags(a, PDF_CMAP_TABLE);
- pdf_range_set_high(a, pdf_range_high(b));
- add_table(ctx, cmap, a->offset);
- add_table(ctx, cmap, b->offset);
- a->offset = cmap->tlen - 2;
- }
-
- /* LS -> L */
- else if (pdf_range_flags(a) == PDF_CMAP_TABLE && pdf_range_flags(b) == PDF_CMAP_SINGLE && (pdf_range_high(b) - a->low <= 0x3fff))
- {
- pdf_range_set_high(a, pdf_range_high(b));
- add_table(ctx, cmap, b->offset);
- }
-
- /* XX -> XX */
- else
- {
- *(++a) = *b;
- }
- }
- }
-
- /* input separated: XX -> XX */
- else
- {
- *(++a) = *b;
- }
-
- b ++;
- }
-
- cmap->rlen = a - cmap->ranges + 1;
-}
-
-/*
- * Lookup the mapping of a codepoint.
- */
-int
-pdf_lookup_cmap(pdf_cmap *cmap, int cpt)
-{
- int l = 0;
- int r = cmap->rlen - 1;
- int m;
-
- while (l <= r)
- {
- m = (l + r) >> 1;
- if (cpt < cmap->ranges[m].low)
- r = m - 1;
- else if (cpt > pdf_range_high(&cmap->ranges[m]))
- l = m + 1;
- else
- {
- int i = cpt - cmap->ranges[m].low + cmap->ranges[m].offset;
- if (pdf_range_flags(&cmap->ranges[m]) == PDF_CMAP_TABLE)
- return cmap->table[i];
- if (pdf_range_flags(&cmap->ranges[m]) == PDF_CMAP_MULTI)
- return -1; /* should use lookup_cmap_full */
- return i;
- }
- }
-
- if (cmap->usecmap)
- return pdf_lookup_cmap(cmap->usecmap, cpt);
-
- return -1;
-}
-
-int
-pdf_lookup_cmap_full(pdf_cmap *cmap, int cpt, int *out)
-{
- int i, k, n;
- int l = 0;
- int r = cmap->rlen - 1;
- int m;
-
- while (l <= r)
- {
- m = (l + r) >> 1;
- if (cpt < cmap->ranges[m].low)
- r = m - 1;
- else if (cpt > pdf_range_high(&cmap->ranges[m]))
- l = m + 1;
- else
- {
- k = cpt - cmap->ranges[m].low + cmap->ranges[m].offset;
- if (pdf_range_flags(&cmap->ranges[m]) == PDF_CMAP_TABLE)
- {
- out[0] = cmap->table[k];
- return 1;
- }
- else if (pdf_range_flags(&cmap->ranges[m]) == PDF_CMAP_MULTI)
- {
- n = cmap->ranges[m].offset;
- for (i = 0; i < cmap->table[n]; i++)
- out[i] = cmap->table[n + i + 1];
- return cmap->table[n];
- }
- else
- {
- out[0] = k;
- return 1;
- }
- }
- }
-
- if (cmap->usecmap)
- return pdf_lookup_cmap_full(cmap->usecmap, cpt, out);
-
- return 0;
-}
-
-/*
- * Use the codespace ranges to extract a codepoint from a
- * multi-byte encoded string.
- */
-int
-pdf_decode_cmap(pdf_cmap *cmap, unsigned char *buf, int *cpt)
-{
- int k, n, c;
-
- c = 0;
- for (n = 0; n < 4; n++)
- {
- c = (c << 8) | buf[n];
- for (k = 0; k < cmap->codespace_len; k++)
- {
- if (cmap->codespace[k].n == n + 1)
- {
- if (c >= cmap->codespace[k].low && c <= cmap->codespace[k].high)
- {
- *cpt = c;
- return n + 1;
- }
- }
- }
- }
-
- *cpt = 0;
- return 1;
-}