summaryrefslogtreecommitdiff
path: root/mupdf/pdf_cmap.c
diff options
context:
space:
mode:
Diffstat (limited to 'mupdf/pdf_cmap.c')
-rw-r--r--mupdf/pdf_cmap.c1294
1 files changed, 283 insertions, 1011 deletions
diff --git a/mupdf/pdf_cmap.c b/mupdf/pdf_cmap.c
index 6728c333..47402f2e 100644
--- a/mupdf/pdf_cmap.c
+++ b/mupdf/pdf_cmap.c
@@ -18,49 +18,6 @@
#include "fitz.h"
#include "mupdf.h"
-typedef struct pdf_range_s pdf_range;
-
-enum { MAXCODESPACE = 10 };
-enum { SINGLE, RANGE, TABLE, MULTI };
-
-struct pdf_range_s
-{
- int low;
- int high;
- int flag; /* what kind of lookup is this */
- int offset; /* either range-delta or table-index */
-};
-
-static int
-cmprange(const void *va, const void *vb)
-{
- return ((const pdf_range*)va)->low - ((const pdf_range*)vb)->low;
-}
-
-struct pdf_cmap_s
-{
- int refs;
- char cmapname[32];
-
- char usecmapname[32];
- pdf_cmap *usecmap;
-
- int wmode;
-
- int ncspace;
- struct {
- int n;
- unsigned lo;
- unsigned hi;
- } cspace[MAXCODESPACE];
-
- int rlen, rcap;
- pdf_range *ranges;
-
- int tlen, tcap;
- int *table;
-};
-
/*
* Allocate, destroy and simple parameters.
*/
@@ -68,133 +25,137 @@ struct pdf_cmap_s
fz_error *
pdf_newcmap(pdf_cmap **cmapp)
{
- pdf_cmap *cmap;
+ pdf_cmap *cmap;
- cmap = *cmapp = fz_malloc(sizeof(pdf_cmap));
- if (!cmap)
- return fz_throw("outofmem: cmap struct");
+ cmap = *cmapp = fz_malloc(sizeof(pdf_cmap));
+ if (!cmap)
+ return fz_throw("outofmem: cmap struct");
- cmap->refs = 1;
- strcpy(cmap->cmapname, "");
+ cmap->refs = 1;
+ strcpy(cmap->cmapname, "");
- strcpy(cmap->usecmapname, "");
- cmap->usecmap = nil;
+ strcpy(cmap->usecmapname, "");
+ cmap->usecmap = nil;
- cmap->wmode = 0;
+ cmap->wmode = 0;
- cmap->ncspace = 0;
+ cmap->ncspace = 0;
- cmap->rlen = 0;
- cmap->rcap = 0;
- cmap->ranges = nil;
+ cmap->rlen = 0;
+ cmap->rcap = 0;
+ cmap->ranges = nil;
- cmap->tlen = 0;
- cmap->tcap = 0;
- cmap->table = nil;
+ cmap->tlen = 0;
+ cmap->tcap = 0;
+ cmap->table = nil;
- return fz_okay;
+ return fz_okay;
}
pdf_cmap *
pdf_keepcmap(pdf_cmap *cmap)
{
+ if (cmap->refs >= 0)
cmap->refs ++;
- return cmap;
+ return cmap;
}
void
pdf_dropcmap(pdf_cmap *cmap)
{
+ if (cmap->refs >= 0)
+ {
if (--cmap->refs == 0)
{
- if (cmap->usecmap)
- pdf_dropcmap(cmap->usecmap);
- fz_free(cmap->ranges);
- fz_free(cmap->table);
- fz_free(cmap);
+ if (cmap->usecmap)
+ pdf_dropcmap(cmap->usecmap);
+ fz_free(cmap->ranges);
+ fz_free(cmap->table);
+ fz_free(cmap);
}
+ }
}
pdf_cmap *
pdf_getusecmap(pdf_cmap *cmap)
{
- return cmap->usecmap;
+ return cmap->usecmap;
}
void
pdf_setusecmap(pdf_cmap *cmap, pdf_cmap *usecmap)
{
- int i;
+ int i;
- if (cmap->usecmap)
- pdf_dropcmap(cmap->usecmap);
- cmap->usecmap = pdf_keepcmap(usecmap);
+ if (cmap->usecmap)
+ pdf_dropcmap(cmap->usecmap);
+ cmap->usecmap = pdf_keepcmap(usecmap);
- if (cmap->ncspace == 0)
- {
- cmap->ncspace = usecmap->ncspace;
- for (i = 0; i < usecmap->ncspace; i++)
- cmap->cspace[i] = usecmap->cspace[i];
- }
+ if (cmap->ncspace == 0)
+ {
+ cmap->ncspace = usecmap->ncspace;
+ for (i = 0; i < usecmap->ncspace; i++)
+ cmap->cspace[i] = usecmap->cspace[i];
+ }
}
int
pdf_getwmode(pdf_cmap *cmap)
{
- return cmap->wmode;
+ return cmap->wmode;
}
void
pdf_setwmode(pdf_cmap *cmap, int wmode)
{
- cmap->wmode = wmode;
+ cmap->wmode = wmode;
}
void
pdf_debugcmap(pdf_cmap *cmap)
{
- int i, k, n;
+ int i, k, n;
- printf("cmap $%p /%s {\n", (void *) cmap, cmap->cmapname);
+ printf("cmap $%p /%s {\n", (void *) cmap, cmap->cmapname);
- if (cmap->usecmapname[0])
- printf(" usecmap /%s\n", cmap->usecmapname);
- if (cmap->usecmap)
- printf(" usecmap $%p\n", (void *) cmap->usecmap);
+ if (cmap->usecmapname[0])
+ printf(" usecmap /%s\n", cmap->usecmapname);
+ if (cmap->usecmap)
+ printf(" usecmap $%p\n", (void *) cmap->usecmap);
- printf(" wmode %d\n", cmap->wmode);
+ printf(" wmode %d\n", cmap->wmode);
- printf(" codespaces {\n");
- for (i = 0; i < cmap->ncspace; i++)
- {
- printf(" <%x> <%x>\n", cmap->cspace[i].lo, cmap->cspace[i].hi);
- }
- printf(" }\n");
+ printf(" codespaces {\n");
+ for (i = 0; i < cmap->ncspace; i++)
+ {
+ printf(" <%x> <%x>\n", cmap->cspace[i].lo, cmap->cspace[i].hi);
+ }
+ printf(" }\n");
- printf(" ranges (%d,%d) {\n", cmap->rlen, cmap->tlen);
- for (i = 0; i < cmap->rlen; i++)
+ printf(" ranges (%d,%d) {\n", cmap->rlen, cmap->tlen);
+ for (i = 0; i < cmap->rlen; i++)
+ {
+ pdf_range *r = &cmap->ranges[i];
+ printf(" <%04x> <%04x> ", r->low, r->high);
+ if (r->flag == PDF_CMAP_TABLE)
{
- pdf_range *r = &cmap->ranges[i];
- printf(" <%04x> <%04x> ", r->low, r->high);
- if (r->flag == TABLE)
- {
- printf("[ ");
- for (k = 0; k < r->high - r->low + 1; k++)
- printf("%d ", cmap->table[r->offset + k]);
- printf("]\n");
- }
- else if (r->flag == MULTI)
- {
- printf("< ");
- n = cmap->table[r->offset];
- for (k = 0; k < n; k++)
- printf("%04x ", cmap->table[r->offset + 1 + k]);
- printf(">\n");
- }
- else
- printf("%d\n", r->offset);
+ printf("[ ");
+ for (k = 0; k < r->high - r->low + 1; k++)
+ printf("%d ", cmap->table[r->offset + k]);
+ printf("]\n");
}
- printf(" }\n}\n");
+ else if (r->flag == PDF_CMAP_MULTI)
+ {
+ printf("< ");
+ n = cmap->table[r->offset];
+ for (k = 0; k < n; k++)
+ printf("%04x ", cmap->table[r->offset + 1 + k]);
+ printf(">\n");
+ }
+ else
+ printf("%d\n", r->offset);
+ }
+ printf(" }\n}\n");
}
/*
@@ -205,16 +166,16 @@ pdf_debugcmap(pdf_cmap *cmap)
fz_error *
pdf_addcodespace(pdf_cmap *cmap, unsigned lo, unsigned hi, int n)
{
- if (cmap->ncspace + 1 == MAXCODESPACE)
- return fz_throw("assert: too many code space ranges");
+ if (cmap->ncspace + 1 == nelem(cmap->cspace))
+ return fz_throw("assert: too many code space ranges");
- cmap->cspace[cmap->ncspace].n = n;
- cmap->cspace[cmap->ncspace].lo = lo;
- cmap->cspace[cmap->ncspace].hi = hi;
+ cmap->cspace[cmap->ncspace].n = n;
+ cmap->cspace[cmap->ncspace].lo = lo;
+ cmap->cspace[cmap->ncspace].hi = hi;
- cmap->ncspace ++;
+ cmap->ncspace ++;
- return fz_okay;
+ return fz_okay;
}
/*
@@ -223,19 +184,19 @@ pdf_addcodespace(pdf_cmap *cmap, unsigned lo, unsigned hi, int n)
static fz_error *
addtable(pdf_cmap *cmap, int value)
{
- if (cmap->tlen + 1 > cmap->tcap)
- {
- int newcap = cmap->tcap == 0 ? 256 : cmap->tcap * 2;
- int *newtable = fz_realloc(cmap->table, newcap * sizeof(int));
- if (!newtable)
- return fz_throw("outofmem: cmap table");
- cmap->tcap = newcap;
- cmap->table = newtable;
- }
+ if (cmap->tlen + 1 > cmap->tcap)
+ {
+ int newcap = cmap->tcap == 0 ? 256 : cmap->tcap * 2;
+ int *newtable = fz_realloc(cmap->table, newcap * sizeof(int));
+ if (!newtable)
+ return fz_throw("outofmem: cmap table");
+ cmap->tcap = newcap;
+ cmap->table = newtable;
+ }
- cmap->table[cmap->tlen++] = value;
+ cmap->table[cmap->tlen++] = value;
- return fz_okay;
+ return fz_okay;
}
/*
@@ -244,24 +205,24 @@ addtable(pdf_cmap *cmap, int value)
static fz_error *
addrange(pdf_cmap *cmap, int low, int high, int flag, int offset)
{
- if (cmap->rlen + 1 > cmap->rcap)
- {
- pdf_range *newranges;
- int newcap = cmap->rcap == 0 ? 256 : cmap->rcap * 2;
- newranges = fz_realloc(cmap->ranges, newcap * sizeof(pdf_range));
- if (!newranges)
- return fz_throw("outofmem: cmap ranges");
- cmap->rcap = newcap;
- cmap->ranges = newranges;
- }
+ if (cmap->rlen + 1 > cmap->rcap)
+ {
+ pdf_range *newranges;
+ int newcap = cmap->rcap == 0 ? 256 : cmap->rcap * 2;
+ newranges = fz_realloc(cmap->ranges, newcap * sizeof(pdf_range));
+ if (!newranges)
+ return fz_throw("outofmem: cmap ranges");
+ cmap->rcap = newcap;
+ cmap->ranges = newranges;
+ }
- cmap->ranges[cmap->rlen].low = low;
- cmap->ranges[cmap->rlen].high = high;
- cmap->ranges[cmap->rlen].flag = flag;
- cmap->ranges[cmap->rlen].offset = offset;
- cmap->rlen ++;
+ cmap->ranges[cmap->rlen].low = low;
+ cmap->ranges[cmap->rlen].high = high;
+ cmap->ranges[cmap->rlen].flag = flag;
+ cmap->ranges[cmap->rlen].offset = offset;
+ cmap->rlen ++;
- return fz_okay;
+ return fz_okay;
}
/*
@@ -270,26 +231,26 @@ addrange(pdf_cmap *cmap, int low, int high, int flag, int offset)
fz_error *
pdf_maprangetotable(pdf_cmap *cmap, int low, int *table, int len)
{
- fz_error *error;
- int offset;
- int high;
- int i;
-
- high = low + len;
- offset = cmap->tlen;
+ fz_error *error;
+ int offset;
+ int high;
+ int i;
- for (i = 0; i < len; i++)
- {
- error = addtable(cmap, table[i]);
- if (error)
- return fz_rethrow(error, "cannot add range-to-table index");
- }
+ high = low + len;
+ offset = cmap->tlen;
- error = addrange(cmap, low, high, TABLE, offset);
+ for (i = 0; i < len; i++)
+ {
+ error = addtable(cmap, table[i]);
if (error)
- return fz_rethrow(error, "cannot add range-to-table range");
+ return fz_rethrow(error, "cannot add range-to-table index");
+ }
- return fz_okay;
+ error = addrange(cmap, low, high, PDF_CMAP_TABLE, offset);
+ if (error)
+ return fz_rethrow(error, "cannot add range-to-table range");
+
+ return fz_okay;
}
/*
@@ -298,11 +259,11 @@ pdf_maprangetotable(pdf_cmap *cmap, int low, int *table, int len)
fz_error *
pdf_maprangetorange(pdf_cmap *cmap, int low, int high, int offset)
{
- fz_error *error;
- error = addrange(cmap, low, high, high - low == 0 ? SINGLE : RANGE, offset);
- if (error)
- return fz_rethrow(error, "cannot add range-to-range mapping");
- return fz_okay;
+ fz_error *error;
+ error = addrange(cmap, low, high, high - low == 0 ? PDF_CMAP_SINGLE : PDF_CMAP_RANGE, offset);
+ if (error)
+ return fz_rethrow(error, "cannot add range-to-range mapping");
+ return fz_okay;
}
/*
@@ -311,36 +272,36 @@ pdf_maprangetorange(pdf_cmap *cmap, int low, int high, int offset)
fz_error *
pdf_maponetomany(pdf_cmap *cmap, int low, int *values, int len)
{
- fz_error *error;
- int offset;
- int i;
-
- if (len == 1)
- {
- error = addrange(cmap, low, low, SINGLE, values[0]);
- if (error)
- return fz_rethrow(error, "cannot add one-to-one mapping");
- return fz_okay;
- }
-
- offset = cmap->tlen;
+ fz_error *error;
+ int offset;
+ int i;
- error = addtable(cmap, len);
+ if (len == 1)
+ {
+ error = addrange(cmap, low, low, PDF_CMAP_SINGLE, values[0]);
if (error)
- return fz_rethrow(error, "cannot add one-to-many table length");
+ return fz_rethrow(error, "cannot add one-to-one mapping");
+ return fz_okay;
+ }
- for (i = 0; i < len; i++)
- {
- error = addtable(cmap, values[i]);
- if (error)
- return fz_rethrow(error, "cannot add one-to-many table index");
- }
+ offset = cmap->tlen;
+
+ error = addtable(cmap, len);
+ if (error)
+ return fz_rethrow(error, "cannot add one-to-many table length");
- error = addrange(cmap, low, low, MULTI, offset);
+ for (i = 0; i < len; i++)
+ {
+ error = addtable(cmap, values[i]);
if (error)
- return fz_rethrow(error, "cannot add one-to-many mapping");
+ return fz_rethrow(error, "cannot add one-to-many table index");
+ }
- return fz_okay;
+ error = addrange(cmap, low, low, PDF_CMAP_MULTI, offset);
+ if (error)
+ return fz_rethrow(error, "cannot add one-to-many mapping");
+
+ return fz_okay;
}
/*
@@ -348,130 +309,137 @@ pdf_maponetomany(pdf_cmap *cmap, int low, int *values, int len)
* Merge contiguous input ranges to range-to-range if the output is contiguous.
* Merge contiguous input ranges to range-to-table if the output is random.
*/
+
+static int cmprange(const void *va, const void *vb)
+{
+ return ((const pdf_range*)va)->low - ((const pdf_range*)vb)->low;
+}
+
fz_error *
pdf_sortcmap(pdf_cmap *cmap)
{
- fz_error *error;
- pdf_range *newranges;
- int *newtable;
- pdf_range *a; /* last written range on output */
- pdf_range *b; /* current range examined on input */
+ fz_error *error;
+ pdf_range *newranges;
+ int *newtable;
+ pdf_range *a; /* last written range on output */
+ pdf_range *b; /* current range examined on input */
+
+ if (cmap->rlen == 0)
+ return fz_okay;
- if (cmap->rlen == 0)
- return fz_okay;
+ qsort(cmap->ranges, cmap->rlen, sizeof(pdf_range), cmprange);
- qsort(cmap->ranges, cmap->rlen, sizeof(pdf_range), cmprange);
+ a = cmap->ranges;
+ b = cmap->ranges + 1;
- a = cmap->ranges;
- b = cmap->ranges + 1;
+ while (b < cmap->ranges + cmap->rlen)
+ {
+ /* ignore one-to-many mappings */
+ if (b->flag == PDF_CMAP_MULTI)
+ {
+ *(++a) = *b;
+ }
- while (b < cmap->ranges + cmap->rlen)
+ /* input contiguous */
+ else if (a->high + 1 == b->low)
{
- /* ignore one-to-many mappings */
- if (b->flag == MULTI)
+ /* output contiguous */
+ if (a->high - a->low + a->offset + 1 == b->offset)
+ {
+ /* SR -> R and SS -> R and RR -> R and RS -> R */
+ if (a->flag == PDF_CMAP_SINGLE || a->flag == PDF_CMAP_RANGE)
+ {
+ a->flag = PDF_CMAP_RANGE;
+ a->high = b->high;
+ }
+
+ /* LS -> L */
+ else if (a->flag == PDF_CMAP_TABLE && b->flag == PDF_CMAP_SINGLE)
{
- *(++a) = *b;
+ a->high = b->high;
+ error = addtable(cmap, b->offset);
+ if (error)
+ return fz_rethrow(error, "cannot convert LS -> L");
}
- /* input contiguous */
- else if (a->high + 1 == b->low)
+ /* LR -> LR */
+ else if (a->flag == PDF_CMAP_TABLE && b->flag == PDF_CMAP_RANGE)
{
- /* output contiguous */
- if (a->high - a->low + a->offset + 1 == b->offset)
- {
- /* SR -> R and SS -> R and RR -> R and RS -> R */
- if (a->flag == SINGLE || a->flag == RANGE)
- {
- a->flag = RANGE;
- a->high = b->high;
- }
-
- /* LS -> L */
- else if (a->flag == TABLE && b->flag == SINGLE)
- {
- a->high = b->high;
- error = addtable(cmap, b->offset);
- if (error)
- return fz_rethrow(error, "cannot convert LS -> L");
- }
-
- /* LR -> LR */
- else if (a->flag == TABLE && b->flag == RANGE)
- {
- *(++a) = *b;
- }
-
- /* XX -> XX */
- else
- {
- *(++a) = *b;
- }
- }
-
- /* output separated */
- else
- {
- /* SS -> L */
- if (a->flag == SINGLE && b->flag == SINGLE)
- {
- a->flag = TABLE;
- a->high = b->high;
-
- error = addtable(cmap, a->offset);
- if (error)
- return fz_rethrow(error, "cannot convert SS -> L");
-
- error = addtable(cmap, b->offset);
- if (error)
- return fz_rethrow(error, "cannot convert SS -> L");
-
- a->offset = cmap->tlen - 2;
- }
-
- /* LS -> L */
- else if (a->flag == TABLE && b->flag == SINGLE)
- {
- a->high = b->high;
- error = addtable(cmap, b->offset);
- if (error)
- return fz_rethrow(error, "cannot convert LS -> L");
- }
-
- /* XX -> XX */
- else
- {
- *(++a) = *b;
- }
- }
+ *(++a) = *b;
}
- /* input separated: XX -> XX */
+ /* XX -> XX */
else
{
- *(++a) = *b;
+ *(++a) = *b;
}
+ }
- b ++;
- }
+ /* output separated */
+ else
+ {
+ /* SS -> L */
+ if (a->flag == PDF_CMAP_SINGLE && b->flag == PDF_CMAP_SINGLE)
+ {
+ a->flag = PDF_CMAP_TABLE;
+ a->high = b->high;
- cmap->rlen = a - cmap->ranges + 1;
+ error = addtable(cmap, a->offset);
+ if (error)
+ return fz_rethrow(error, "cannot convert SS -> L");
- newranges = fz_realloc(cmap->ranges, cmap->rlen * sizeof(pdf_range));
- if (!newranges)
- return fz_throw("outofmem: cmap ranges");
- cmap->rcap = cmap->rlen;
- cmap->ranges = newranges;
+ error = addtable(cmap, b->offset);
+ if (error)
+ return fz_rethrow(error, "cannot convert SS -> L");
- if (cmap->tlen)
+ a->offset = cmap->tlen - 2;
+ }
+
+ /* LS -> L */
+ else if (a->flag == PDF_CMAP_TABLE && b->flag == PDF_CMAP_SINGLE)
+ {
+ a->high = b->high;
+ error = addtable(cmap, b->offset);
+ if (error)
+ return fz_rethrow(error, "cannot convert LS -> L");
+ }
+
+ /* XX -> XX */
+ else
+ {
+ *(++a) = *b;
+ }
+ }
+ }
+
+ /* input separated: XX -> XX */
+ else
{
- newtable = fz_realloc(cmap->table, cmap->tlen * sizeof(int));
- if (!newtable)
- return fz_throw("outofmem: cmap table");
- cmap->tcap = cmap->tlen;
- cmap->table = newtable;
+ *(++a) = *b;
}
- return fz_okay;
+ b ++;
+ }
+
+ cmap->rlen = a - cmap->ranges + 1;
+
+ newranges = fz_realloc(cmap->ranges, cmap->rlen * sizeof(pdf_range));
+ if (!newranges)
+ return fz_throw("outofmem: cmap ranges");
+ cmap->rcap = cmap->rlen;
+ if (cmap->refs >= 0)
+ cmap->ranges = newranges;
+
+ if (cmap->tlen)
+ {
+ newtable = fz_realloc(cmap->table, cmap->tlen * sizeof(int));
+ if (!newtable)
+ return fz_throw("outofmem: cmap table");
+ cmap->tcap = cmap->tlen;
+ cmap->table = newtable;
+ }
+
+ return fz_okay;
}
/*
@@ -480,32 +448,32 @@ pdf_sortcmap(pdf_cmap *cmap)
int
pdf_lookupcmap(pdf_cmap *cmap, int cpt)
{
- int l = 0;
- int r = cmap->rlen - 1;
- int m;
+ int l = 0;
+ int r = cmap->rlen - 1;
+ int m;
- while (l <= r)
+ while (l <= r)
+ {
+ m = (l + r) >> 1;
+ if (cpt < cmap->ranges[m].low)
+ r = m - 1;
+ else if (cpt > cmap->ranges[m].high)
+ l = m + 1;
+ else
{
- m = (l + r) >> 1;
- if (cpt < cmap->ranges[m].low)
- r = m - 1;
- else if (cpt > cmap->ranges[m].high)
- l = m + 1;
- else
- {
- int i = cpt - cmap->ranges[m].low + cmap->ranges[m].offset;
- if (cmap->ranges[m].flag == TABLE)
- return cmap->table[i];
- if (cmap->ranges[m].flag == MULTI)
- return -1;
- return i;
- }
+ int i = cpt - cmap->ranges[m].low + cmap->ranges[m].offset;
+ if (cmap->ranges[m].flag == PDF_CMAP_TABLE)
+ return cmap->table[i];
+ if (cmap->ranges[m].flag == PDF_CMAP_MULTI)
+ return -1;
+ return i;
}
+ }
- if (cmap->usecmap)
- return pdf_lookupcmap(cmap->usecmap, cpt);
+ if (cmap->usecmap)
+ return pdf_lookupcmap(cmap->usecmap, cpt);
- return -1;
+ return -1;
}
/*
@@ -515,7 +483,7 @@ pdf_lookupcmap(pdf_cmap *cmap, int cpt)
unsigned char *
pdf_decodecmap(pdf_cmap *cmap, unsigned char *buf, int *cpt)
{
- int i, k, n, c;
+ int k, n, c;
c = 0;
for (n = 0; n < 4; n++)
@@ -538,699 +506,3 @@ pdf_decodecmap(pdf_cmap *cmap, unsigned char *buf, int *cpt)
return buf + 1;
}
-/*
- * CMap parser
- */
-
-enum
-{
- TUSECMAP = PDF_NTOKENS,
- TBEGINCODESPACERANGE,
- TENDCODESPACERANGE,
- TBEGINBFCHAR,
- TENDBFCHAR,
- TBEGINBFRANGE,
- TENDBFRANGE,
- TBEGINCIDCHAR,
- TENDCIDCHAR,
- TBEGINCIDRANGE,
- TENDCIDRANGE
-};
-
-static pdf_token_e tokenfromkeyword(char *key)
-{
- if (!strcmp(key, "usecmap")) return TUSECMAP;
- if (!strcmp(key, "begincodespacerange")) return TBEGINCODESPACERANGE;
- if (!strcmp(key, "endcodespacerange")) return TENDCODESPACERANGE;
- if (!strcmp(key, "beginbfchar")) return TBEGINBFCHAR;
- if (!strcmp(key, "endbfchar")) return TENDBFCHAR;
- if (!strcmp(key, "beginbfrange")) return TBEGINBFRANGE;
- if (!strcmp(key, "endbfrange")) return TENDBFRANGE;
- if (!strcmp(key, "begincidchar")) return TBEGINCIDCHAR;
- if (!strcmp(key, "endcidchar")) return TENDCIDCHAR;
- if (!strcmp(key, "begincidrange")) return TBEGINCIDRANGE;
- if (!strcmp(key, "endcidrange")) return TENDCIDRANGE;
- return PDF_TKEYWORD;
-}
-
-static int codefromstring(char *buf, int len)
-{
- int a = 0;
- while (len--)
- a = (a << 8) | *(unsigned char *)buf++;
- return a;
-}
-
-static fz_error *lexcmap(pdf_token_e *tok, fz_stream *file, char *buf, int n, int *sl)
-{
- fz_error *error;
-
- error = pdf_lex(tok, file, buf, n, sl);
- if (error)
- return fz_rethrow(error, "cannot parse cmap token");
-
- if (*tok == PDF_TKEYWORD)
- *tok = tokenfromkeyword(buf);
-
- return fz_okay;
-}
-
-static fz_error *parsecmapname(pdf_cmap *cmap, fz_stream *file)
-{
- fz_error *error;
- char buf[256];
- pdf_token_e tok;
- int len;
-
- error = lexcmap(&tok, file, buf, sizeof buf, &len);
- if (error)
- return fz_rethrow(error, "syntaxerror in cmap");
-
- if (tok == PDF_TNAME)
- {
- strlcpy(cmap->cmapname, buf, sizeof(cmap->cmapname));
- return fz_okay;
- }
-
- return fz_throw("expected name");
-}
-
-static fz_error *parsewmode(pdf_cmap *cmap, fz_stream *file)
-{
- fz_error *error;
- char buf[256];
- pdf_token_e tok;
- int len;
-
- error = lexcmap(&tok, file, buf, sizeof buf, &len);
- if (error)
- return fz_rethrow(error, "syntaxerror in cmap");
-
- if (tok == PDF_TINT)
- {
- pdf_setwmode(cmap, atoi(buf));
- return fz_okay;
- }
-
- return fz_throw("expected integer");
-}
-
-static fz_error *parsecodespacerange(pdf_cmap *cmap, fz_stream *file)
-{
- fz_error *error;
- char buf[256];
- pdf_token_e tok;
- int len;
- int lo, hi;
-
- while (1)
- {
- error = lexcmap(&tok, file, buf, sizeof buf, &len);
- if (error)
- return fz_rethrow(error, "syntaxerror in cmap");
-
- if (tok == TENDCODESPACERANGE)
- return fz_okay;
-
- else if (tok == PDF_TSTRING)
- {
- lo = codefromstring(buf, len);
- error = lexcmap(&tok, file, buf, sizeof buf, &len);
- if (error)
- return fz_rethrow(error, "syntaxerror in cmap");
- if (tok == PDF_TSTRING)
- {
- hi = codefromstring(buf, len);
- error = pdf_addcodespace(cmap, lo, hi, len);
- if (error)
- return fz_rethrow(error, "cannot add code space");
- }
- else break;
- }
-
- else break;
- }
-
- return fz_throw("expected string or endcodespacerange");
-}
-
-static fz_error *parsecidrange(pdf_cmap *cmap, fz_stream *file)
-{
- fz_error *error;
- char buf[256];
- pdf_token_e tok;
- int len;
- int lo, hi, dst;
-
- while (1)
- {
- error = lexcmap(&tok, file, buf, sizeof buf, &len);
- if (error)
- return fz_rethrow(error, "syntaxerror in cmap");
-
- if (tok == TENDCIDRANGE)
- return fz_okay;
-
- else if (tok != PDF_TSTRING)
- return fz_throw("expected string or endcidrange");
-
- lo = codefromstring(buf, len);
-
- error = lexcmap(&tok, file, buf, sizeof buf, &len);
- if (error)
- return fz_rethrow(error, "syntaxerror in cmap");
- if (tok != PDF_TSTRING)
- return fz_throw("expected string");
-
- hi = codefromstring(buf, len);
-
- error = lexcmap(&tok, file, buf, sizeof buf, &len);
- if (error)
- return fz_rethrow(error, "syntaxerror in cmap");
- if (tok != PDF_TINT)
- return fz_throw("expected integer");
-
- dst = atoi(buf);
-
- error = pdf_maprangetorange(cmap, lo, hi, dst);
- if (error)
- return fz_rethrow(error, "cannot map cidrange");
- }
-}
-
-static fz_error *parsecidchar(pdf_cmap *cmap, fz_stream *file)
-{
- fz_error *error;
- char buf[256];
- pdf_token_e tok;
- int len;
- int src, dst;
-
- while (1)
- {
- error = lexcmap(&tok, file, buf, sizeof buf, &len);
- if (error)
- return fz_rethrow(error, "syntaxerror in cmap");
-
- if (tok == TENDCIDCHAR)
- return fz_okay;
-
- else if (tok != PDF_TSTRING)
- return fz_throw("expected string or endcidchar");
-
- src = codefromstring(buf, len);
-
- error = lexcmap(&tok, file, buf, sizeof buf, &len);
- if (error)
- return fz_rethrow(error, "syntaxerror in cmap");
- if (tok != PDF_TINT)
- return fz_throw("expected integer");
-
- dst = atoi(buf);
-
- error = pdf_maprangetorange(cmap, src, src, dst);
- if (error)
- return fz_rethrow(error, "cannot map cidchar");
- }
-}
-
-static fz_error *parsebfrangearray(pdf_cmap *cmap, fz_stream *file, int lo, int hi)
-{
- fz_error *error;
- char buf[256];
- pdf_token_e tok;
- int len;
- int dst[256];
- int i;
-
- while (1)
- {
- error = lexcmap(&tok, file, buf, sizeof buf, &len);
- if (error)
- return fz_rethrow(error, "syntaxerror in cmap");
-
- if (tok == PDF_TCARRAY)
- return fz_okay;
-
- /* Note: does not handle [ /Name /Name ... ] */
- else if (tok != PDF_TSTRING)
- return fz_throw("expected string or ]");
-
- if (len / 2)
- {
- for (i = 0; i < len / 2; i++)
- dst[i] = codefromstring(buf + i * 2, 2);
-
- error = pdf_maponetomany(cmap, lo, dst, len / 2);
- if (error)
- return fz_rethrow(error, "cannot map bfrange array");
- }
-
- lo ++;
- }
-}
-
-static fz_error *parsebfrange(pdf_cmap *cmap, fz_stream *file)
-{
- fz_error *error;
- char buf[256];
- pdf_token_e tok;
- int len;
- int lo, hi, dst;
-
- while (1)
- {
- error = lexcmap(&tok, file, buf, sizeof buf, &len);
- if (error)
- return fz_rethrow(error, "syntaxerror in cmap");
-
- if (tok == TENDBFRANGE)
- return fz_okay;
-
- else if (tok != PDF_TSTRING)
- return fz_throw("expected string or endbfrange");
-
- lo = codefromstring(buf, len);
-
- error = lexcmap(&tok, file, buf, sizeof buf, &len);
- if (error)
- return fz_rethrow(error, "syntaxerror in cmap");
- if (tok != PDF_TSTRING)
- return fz_throw("expected string");
-
- hi = codefromstring(buf, len);
-
- error = lexcmap(&tok, file, buf, sizeof buf, &len);
- if (error)
- return fz_rethrow(error, "syntaxerror in cmap");
-
- if (tok == PDF_TSTRING)
- {
- if (len == 2)
- {
- dst = codefromstring(buf, len);
- error = pdf_maprangetorange(cmap, lo, hi, dst);
- if (error)
- return fz_rethrow(error, "cannot map bfrange");
- }
- else
- {
- int dststr[256];
- int i;
-
- if (len / 2)
- {
- for (i = 0; i < len / 2; i++)
- dststr[i] = codefromstring(buf + i * 2, 2);
-
- while (lo <= hi)
- {
- dststr[i-1] ++;
- error = pdf_maponetomany(cmap, lo, dststr, i);
- if (error)
- return fz_rethrow(error, "cannot map bfrange");
- lo ++;
- }
- }
- }
- }
-
- else if (tok == PDF_TOARRAY)
- {
- error = parsebfrangearray(cmap, file, lo, hi);
- if (error)
- return fz_rethrow(error, "cannot map bfrange");
- }
-
- else
- {
- return fz_throw("expected string or array or endbfrange");
- }
- }
-}
-
-static fz_error *parsebfchar(pdf_cmap *cmap, fz_stream *file)
-{
- fz_error *error;
- char buf[256];
- pdf_token_e tok;
- int len;
- int dst[256];
- int src;
- int i;
-
- while (1)
- {
- error = lexcmap(&tok, file, buf, sizeof buf, &len);
- if (error)
- return fz_rethrow(error, "syntaxerror in cmap");
-
- if (tok == TENDBFCHAR)
- return fz_okay;
-
- else if (tok != PDF_TSTRING)
- return fz_throw("expected string or endbfchar");
-
- src = codefromstring(buf, len);
-
- error = lexcmap(&tok, file, buf, sizeof buf, &len);
- if (error)
- return fz_rethrow(error, "syntaxerror in cmap");
- /* Note: does not handle /dstName */
- if (tok != PDF_TSTRING)
- return fz_throw("expected string");
-
- if (len / 2)
- {
- for (i = 0; i < len / 2; i++)
- dst[i] = codefromstring(buf + i * 2, 2);
-
- error = pdf_maponetomany(cmap, src, dst, i);
- if (error)
- return fz_rethrow(error, "cannot map bfchar");
- }
- }
-}
-
-fz_error *
-pdf_parsecmap(pdf_cmap **cmapp, fz_stream *file)
-{
- fz_error *error;
- pdf_cmap *cmap;
- char key[64];
- char buf[256];
- pdf_token_e tok;
- int len;
-
- error = pdf_newcmap(&cmap);
- if (error)
- return fz_rethrow(error, "cannot create cmap");
-
- strcpy(key, ".notdef");
-
- while (1)
- {
- error = lexcmap(&tok, file, buf, sizeof buf, &len);
- if (error)
- {
- error = fz_rethrow(error, "syntaxerror in cmap");
- goto cleanup;
- }
-
- if (tok == PDF_TEOF)
- break;
-
- else if (tok == PDF_TNAME)
- {
- if (!strcmp(buf, "CMapName"))
- {
- error = parsecmapname(cmap, file);
- if (error)
- {
- error = fz_rethrow(error, "syntaxerror in cmap after /CMapName");
- goto cleanup;
- }
- }
- else if (!strcmp(buf, "WMode"))
- {
- error = parsewmode(cmap, file);
- if (error)
- {
- error = fz_rethrow(error, "syntaxerror in cmap after /WMode");
- goto cleanup;
- }
- }
- else
- strlcpy(key, buf, sizeof key);
- }
-
- else if (tok == TUSECMAP)
- {
- strlcpy(cmap->usecmapname, key, sizeof(cmap->usecmapname));
- }
-
- else if (tok == TBEGINCODESPACERANGE)
- {
- error = parsecodespacerange(cmap, file);
- if (error)
- {
- error = fz_rethrow(error, "syntaxerror in cmap codespacerange");
- goto cleanup;
- }
- }
-
- else if (tok == TBEGINBFCHAR)
- {
- error = parsebfchar(cmap, file);
- if (error)
- {
- error = fz_rethrow(error, "syntaxerror in cmap bfchar");
- goto cleanup;
- }
- }
-
- else if (tok == TBEGINCIDCHAR)
- {
- error = parsecidchar(cmap, file);
- if (error)
- {
- error = fz_rethrow(error, "syntaxerror in cmap cidchar");
- goto cleanup;
- }
- }
-
- else if (tok == TBEGINBFRANGE)
- {
- error = parsebfrange(cmap, file);
- if (error)
- {
- error = fz_rethrow(error, "syntaxerror in cmap bfrange");
- goto cleanup;
- }
- }
-
- else if (tok == TBEGINCIDRANGE)
- {
- error = parsecidrange(cmap, file);
- if (error)
- {
- error = fz_rethrow(error, "syntaxerror in cmap cidrange");
- goto cleanup;
- }
- }
-
- /* ignore everything else */
- }
-
- error = pdf_sortcmap(cmap);
- if (error)
- {
- error = fz_rethrow(error, "cannot sort cmap");
- goto cleanup;
- }
-
- *cmapp = cmap;
- return fz_okay;
-
-cleanup:
- pdf_dropcmap(cmap);
- return error; /* already rethrown */
-}
-
-/*
- * Load CMap stream in PDF file
- */
-fz_error *
-pdf_loadembeddedcmap(pdf_cmap **cmapp, pdf_xref *xref, fz_obj *stmref)
-{
- fz_error *error = fz_okay;
- fz_obj *stmobj = stmref;
- fz_stream *file;
- pdf_cmap *cmap = nil;
- pdf_cmap *usecmap;
- fz_obj *wmode;
- fz_obj *obj;
-
- if ((*cmapp = pdf_finditem(xref->store, PDF_KCMAP, stmref)))
- {
- pdf_keepcmap(*cmapp);
- return fz_okay;
- }
-
- pdf_logfont("load embedded cmap (%d %d R) {\n", fz_tonum(stmref), fz_togen(stmref));
-
- error = pdf_resolve(&stmobj, xref);
- if (error)
- return fz_rethrow(error, "cannot resolve cmap object");
-
- error = pdf_openstream(&file, xref, fz_tonum(stmref), fz_togen(stmref));
- if (error)
- {
- error = fz_rethrow(error, "cannot open cmap stream");
- goto cleanup;
- }
-
- error = pdf_parsecmap(&cmap, file);
- if (error)
- {
- error = fz_rethrow(error, "cannot parse cmap stream");
- goto cleanup;
- }
-
- fz_dropstream(file);
-
- wmode = fz_dictgets(stmobj, "WMode");
- if (fz_isint(wmode))
- {
- pdf_logfont("wmode %d\n", wmode);
- pdf_setwmode(cmap, fz_toint(wmode));
- }
-
- obj = fz_dictgets(stmobj, "UseCMap");
- if (fz_isname(obj))
- {
- pdf_logfont("usecmap /%s\n", fz_toname(obj));
- error = pdf_loadsystemcmap(&usecmap, fz_toname(obj));
- if (error)
- {
- error = fz_rethrow(error, "cannot load system usecmap '%s'", fz_toname(obj));
- goto cleanup;
- }
- pdf_setusecmap(cmap, usecmap);
- pdf_dropcmap(usecmap);
- }
- else if (fz_isindirect(obj))
- {
- pdf_logfont("usecmap (%d %d R)\n", fz_tonum(obj), fz_togen(obj));
- error = pdf_loadembeddedcmap(&usecmap, xref, obj);
- if (error)
- {
- error = fz_rethrow(error, "cannot load embedded usecmap");
- goto cleanup;
- }
- pdf_setusecmap(cmap, usecmap);
- pdf_dropcmap(usecmap);
- }
-
- pdf_logfont("}\n");
-
- error = pdf_storeitem(xref->store, PDF_KCMAP, stmref, cmap);
- if (error)
- {
- error = fz_rethrow(error, "cannot store cmap resource");
- goto cleanup;
- }
-
- fz_dropobj(stmobj);
-
- *cmapp = cmap;
- return fz_okay;
-
-cleanup:
- if (cmap)
- pdf_dropcmap(cmap);
- fz_dropobj(stmobj);
- return error; /* already rethrown */
-}
-
-/*
- * Load predefined CMap from system. We keep these files
- * embedded in the executable, and parse them when needed,
- * and cache the parsed copy for later reuse.
- */
-fz_error *
-pdf_loadsystemcmap(pdf_cmap **cmapp, char *cmapname)
-{
- fz_error *error;
- fz_stream *file;
- pdf_cmap *usecmap;
- pdf_cmap *cmap;
- int i;
-
- for (i = 0; pdf_cmaptable[i].name; i++)
- if (!strcmp(cmapname, pdf_cmaptable[i].name))
- goto found;
-
- return fz_throw("no builtin cmap file: %s", cmapname);
-
-found:
- if (pdf_cmaptable[i].cmap)
- {
- pdf_logfont("reuse system cmap %s\n", cmapname);
- pdf_keepcmap(pdf_cmaptable[i].cmap);
- *cmapp = pdf_cmaptable[i].cmap;
- return fz_okay;
- }
-
- pdf_logfont("load system cmap %s\n", cmapname);
-
- error = fz_openrmemory(&file, pdf_cmaptable[i].buf, *pdf_cmaptable[i].len);
- if (error)
- return fz_rethrow(error, "cannot create stream to read cmap from memory");
-
- error = pdf_parsecmap(&pdf_cmaptable[i].cmap, file);
- if (error)
- {
- fz_dropstream(file);
- return fz_rethrow(error, "cannot parse system cmap '%s'", cmapname);
- }
-
- fz_dropstream(file);
-
- /* Keep an extra reference, since we have it cached */
- pdf_keepcmap(pdf_cmaptable[i].cmap);
-
- if (pdf_cmaptable[i].cmap->usecmapname[0])
- {
- pdf_logfont("usecmap %s\n", pdf_cmaptable[i].cmap->usecmapname);
- error = pdf_loadsystemcmap(&usecmap, pdf_cmaptable[i].cmap->usecmapname);
- if (error)
- return fz_rethrow(error, "cannot load usecmap '%s'", pdf_cmaptable[i].cmap->usecmapname);
- pdf_setusecmap(pdf_cmaptable[i].cmap, usecmap);
- pdf_dropcmap(usecmap);
- }
-
- pdf_keepcmap(pdf_cmaptable[i].cmap);
- *cmapp = pdf_cmaptable[i].cmap;
- return fz_okay;
-}
-
-/*
- * Create an Identity-* CMap (for both 1 and 2-byte encodings)
- */
-fz_error *
-pdf_newidentitycmap(pdf_cmap **cmapp, int wmode, int bytes)
-{
- fz_error *error;
- pdf_cmap *cmap;
-
- error = pdf_newcmap(&cmap);
- if (error)
- return fz_rethrow(error, "cannot create cmap");
-
- sprintf(cmap->cmapname, "Identity-%c", wmode ? 'V' : 'H');
-
- error = pdf_addcodespace(cmap, 0x0000, 0xffff, bytes);
- if (error) {
- pdf_dropcmap(cmap);
- return fz_rethrow(error, "cannot add code space");
- }
-
- error = pdf_maprangetorange(cmap, 0x0000, 0xffff, 0);
- if (error) {
- pdf_dropcmap(cmap);
- return fz_rethrow(error, "cannot map <0000> to <ffff>");
- }
-
- error = pdf_sortcmap(cmap);
- if (error) {
- pdf_dropcmap(cmap);
- return fz_rethrow(error, "cannot sort cmap");
- }
-
- pdf_setwmode(cmap, wmode);
-
- *cmapp = cmap;
- return fz_okay;
-}
-