diff options
-rw-r--r-- | include/fitz-world.h | 1 | ||||
-rw-r--r-- | include/fitz/cmap.h | 26 | ||||
-rw-r--r-- | include/fitz/tree.h | 4 | ||||
-rw-r--r-- | include/mupdf/rsrc.h | 45 | ||||
-rw-r--r-- | mupdf/pdf_build.c | 10 | ||||
-rw-r--r-- | mupdf/pdf_cmap.c | 739 | ||||
-rw-r--r-- | mupdf/pdf_font.c | 20 | ||||
-rw-r--r-- | mupdf/pdf_interpret.c | 18 | ||||
-rw-r--r-- | mupdf/pdf_store.c | 4 | ||||
-rw-r--r-- | mupdf/pdf_type3.c | 4 | ||||
-rw-r--r-- | mupdf/pdf_unicode.c | 25 | ||||
-rw-r--r-- | world/Jamfile | 1 | ||||
-rw-r--r-- | world/node_misc2.c | 15 | ||||
-rw-r--r-- | world/node_toxml.c | 2 | ||||
-rw-r--r-- | world/res_cmap.c | 467 |
15 files changed, 755 insertions, 626 deletions
diff --git a/include/fitz-world.h b/include/fitz-world.h index 86c9e5f1..0f1855f5 100644 --- a/include/fitz-world.h +++ b/include/fitz-world.h @@ -11,7 +11,6 @@ #error "fitz-base.h must be included before fitz-world.h" #endif -#include "fitz/cmap.h" #include "fitz/font.h" #include "fitz/pixmap.h" #include "fitz/colorspace.h" diff --git a/include/fitz/cmap.h b/include/fitz/cmap.h deleted file mode 100644 index c5a4ad6f..00000000 --- a/include/fitz/cmap.h +++ /dev/null @@ -1,26 +0,0 @@ -typedef struct fz_cmap_s fz_cmap; - -fz_error *fz_newcmap(fz_cmap **cmapp); -void fz_debugcmap(fz_cmap *cmap); -fz_cmap *fz_keepcmap(fz_cmap *cmap); -void fz_dropcmap(fz_cmap *cmap); - -char *fz_getcmapname(fz_cmap *cmap); -void fz_setcmapname(fz_cmap *cmap, char *name); -char *fz_getusecmapname(fz_cmap *cmap); -void fz_setusecmapname(fz_cmap *cmap, char *usecmap); -void fz_setusecmap(fz_cmap *cmap, fz_cmap *usecmap); -fz_cmap *fz_getusecmap(fz_cmap *cmap); -void fz_setwmode(fz_cmap *cmap, int wmode); -int fz_getwmode(fz_cmap *cmap); - -fz_error *fz_addcodespacerange(fz_cmap *cmap, unsigned lo, unsigned hi, int n); - -fz_error *fz_setcidlookup(fz_cmap *cmap, int map[256]); - -fz_error *fz_addcidrange(fz_cmap *cmap, int srclo, int srchi, int dstlo); -fz_error *fz_endcidrange(fz_cmap *cmap); - -int fz_lookupcid(fz_cmap *cmap, int cpt); -unsigned char *fz_decodecpt(fz_cmap *cmap, unsigned char *s, int *cpt); - diff --git a/include/fitz/tree.h b/include/fitz/tree.h index 22665736..69143b1f 100644 --- a/include/fitz/tree.h +++ b/include/fitz/tree.h @@ -134,7 +134,7 @@ struct fz_linknode_s struct fz_metanode_s { fz_node super; - void *name; + char *name; void *dict; }; @@ -156,7 +156,7 @@ fz_rect fz_boundnode(fz_node *node, fz_matrix ctm); void fz_dropnode(fz_node *node); /* branch nodes */ -fz_error *fz_newmetanode(fz_node **nodep, void *name, void *dict); +fz_error *fz_newmetanode(fz_node **nodep, char *name, void *dict); fz_error *fz_newovernode(fz_node **nodep); fz_error *fz_newmasknode(fz_node **nodep); fz_error *fz_newblendnode(fz_node **nodep, fz_colorspace *cs, fz_blendkind b, int k, int i); diff --git a/include/mupdf/rsrc.h b/include/mupdf/rsrc.h index 71475452..9d1e4595 100644 --- a/include/mupdf/rsrc.h +++ b/include/mupdf/rsrc.h @@ -140,7 +140,38 @@ fz_error *pdf_loadimage(pdf_image **imgp, pdf_xref *xref, fz_obj *obj, fz_obj *r fz_error *pdf_loadtile(fz_image *image, fz_pixmap *tile); /* - * CMap and Font + * CMap + */ + +typedef struct pdf_cmap_s pdf_cmap; + +fz_error *pdf_newcmap(pdf_cmap **cmapp); +pdf_cmap *pdf_keepcmap(pdf_cmap *cmap); +void pdf_dropcmap(pdf_cmap *cmap); + +void pdf_debugcmap(pdf_cmap *cmap); +int pdf_getwmode(pdf_cmap *cmap); +pdf_cmap *fz_getusecmap(pdf_cmap *cmap); +void fz_setwmode(pdf_cmap *cmap, int wmode); +void fz_setusecmap(pdf_cmap *cmap, pdf_cmap *usecmap); + +fz_error *pdf_addcodespace(pdf_cmap *cmap, unsigned lo, unsigned hi, int n); + +fz_error *pdf_maprangetotable(pdf_cmap *cmap, int low, int *map, int len); +fz_error *pdf_maprangetorange(pdf_cmap *cmap, int srclo, int srchi, int dstlo); +fz_error *pdf_maponetomany(pdf_cmap *cmap, int one, int *many, int len); +fz_error *pdf_sortcmap(pdf_cmap *cmap); + +int pdf_lookupcmap(pdf_cmap *cmap, int cpt); +unsigned char *pdf_decodecmap(pdf_cmap *cmap, unsigned char *s, int *cpt); + +fz_error *pdf_parsecmap(pdf_cmap **cmapp, fz_stream *file); +fz_error *pdf_loadembeddedcmap(pdf_cmap **cmapp, pdf_xref *xref, fz_obj *ref); +fz_error *pdf_loadsystemcmap(pdf_cmap **cmapp, char *name); +fz_error *pdf_newidentitycmap(pdf_cmap **cmapp, int wmode, int bytes); + +/* + * Font */ void pdf_loadencoding(char **estrings, char *encoding); @@ -171,13 +202,13 @@ struct pdf_font_s float missingwidth; /* Encoding (CMap) */ - fz_cmap *encoding; - fz_cmap *tottfcmap; + pdf_cmap *encoding; + pdf_cmap *tottfcmap; int ncidtogid; unsigned short *cidtogid; /* ToUnicode */ - fz_cmap *tounicode; + pdf_cmap *tounicode; int ncidtoucs; unsigned short *cidtoucs; @@ -192,12 +223,6 @@ struct pdf_font_s fz_tree *charprocs[256]; }; -/* cmap.c */ -fz_error *pdf_parsecmap(fz_cmap **cmapp, fz_stream *file); -fz_error *pdf_loadembeddedcmap(fz_cmap **cmapp, pdf_xref *xref, fz_obj *ref); -fz_error *pdf_loadsystemcmap(fz_cmap **cmapp, char *name); -fz_error *pdf_makeidentitycmap(fz_cmap **cmapp, int wmode, int bytes); - /* unicode.c */ fz_error *pdf_loadtounicode(pdf_font *font, pdf_xref *xref, char **strings, char *collection, fz_obj *cmapstm); diff --git a/mupdf/pdf_build.c b/mupdf/pdf_build.c index 736b2de1..40f71d0f 100644 --- a/mupdf/pdf_build.c +++ b/mupdf/pdf_build.c @@ -281,7 +281,6 @@ addpatternshape(pdf_gstate *gs, fz_node *shape, fz_matrix inv; fz_matrix ptm; fz_rect bbox; - fz_obj *name; fz_obj *dict; int x, y, x0, y0, x1, y1; @@ -296,9 +295,6 @@ addpatternshape(pdf_gstate *gs, fz_node *shape, error = fz_newtransformnode(&xform, ptm); if (error) return error; - error = fz_newname(&name, "Pattern"); - if (error) return error; - error = fz_packobj(&dict, "<< /Tree %p /XStep %f /YStep %f " " /Matrix[%f %f %f %f %f %f] >>", pat->tree, pat->xstep, pat->ystep, @@ -307,7 +303,7 @@ addpatternshape(pdf_gstate *gs, fz_node *shape, pat->matrix.e, pat->matrix.f); if (error) return error; - error = fz_newmetanode(&meta, name, dict); + error = fz_newmetanode(&meta, "Pattern", dict); if (error) return error; error = fz_newovernode(&over); @@ -776,8 +772,8 @@ pdf_showtext(pdf_csi *csi, fz_obj *text) while (buf < end) { - buf = fz_decodecpt(font->encoding, buf, &cpt); - cid = fz_lookupcid(font->encoding, cpt); + buf = pdf_decodecmap(font->encoding, buf, &cpt); + cid = pdf_lookupcmap(font->encoding, cpt); if (cid == -1) cid = 0; diff --git a/mupdf/pdf_cmap.c b/mupdf/pdf_cmap.c index 45a0eed7..4729322f 100644 --- a/mupdf/pdf_cmap.c +++ b/mupdf/pdf_cmap.c @@ -1,5 +1,545 @@ -#include <fitz.h> -#include <mupdf.h> +/* + * The CMap data structure here is constructed on the fly by + * adding simple range-to-range mappings. Then the data structure + * is optimized to contain both range-to-range and range-to-table + * lookups. + * + * Any one-to-many mappings are inserted as one-to-table + * lookups in the beginning, and are not affected by the optimization + * stage. + * + * There is a special function to add a 256-length range-to-table mapping. + * The ranges do not have to be added in order. + * + * This code can be a lot simpler if we don't care about wasting memory, + * or can trust the parser to give us optimal mappings. + */ + +#include "fitz.h" +#include "mupdf.h" + +typedef struct pdf_range_s pdf_range; + +enum { MAXCODESPACE = 10 }; +enum { SINGLE, RANGE, TABLE, MULTI }; + +struct pdf_range_s +{ + int low; + int high; + int flag; /* what kind of lookup is this */ + int offset; /* either range-delta or table-index */ +}; + +static int +cmprange(const void *va, const void *vb) +{ + return ((const pdf_range*)va)->low - ((const pdf_range*)vb)->low; +} + +struct pdf_cmap_s +{ + int refs; + char cmapname[32]; + + char usecmapname[32]; + pdf_cmap *usecmap; + + int wmode; + + int ncspace; + struct { + int n; + unsigned char lo[4]; + unsigned char hi[4]; + } cspace[MAXCODESPACE]; + + int rlen, rcap; + pdf_range *ranges; + + int tlen, tcap; + int *table; +}; + +/* + * Allocate, destroy and simple parameters. + */ + +fz_error * +pdf_newcmap(pdf_cmap **cmapp) +{ + pdf_cmap *cmap; + + cmap = *cmapp = fz_malloc(sizeof(pdf_cmap)); + if (!cmap) + return fz_outofmem; + + cmap->refs = 1; + strcpy(cmap->cmapname, ""); + + strcpy(cmap->usecmapname, ""); + cmap->usecmap = nil; + + cmap->wmode = 0; + + cmap->ncspace = 0; + + cmap->rlen = 0; + cmap->rcap = 0; + cmap->ranges = nil; + + cmap->tlen = 0; + cmap->tcap = 0; + cmap->table = nil; + + return nil; +} + +pdf_cmap * +pdf_keepcmap(pdf_cmap *cmap) +{ + cmap->refs ++; + return cmap; +} + +void +pdf_dropcmap(pdf_cmap *cmap) +{ + if (--cmap->refs == 0) + { + if (cmap->usecmap) + pdf_dropcmap(cmap->usecmap); + fz_free(cmap->ranges); + fz_free(cmap->table); + fz_free(cmap); + } +} + +pdf_cmap * +pdf_getusecmap(pdf_cmap *cmap) +{ + return cmap->usecmap; +} + +void +pdf_setusecmap(pdf_cmap *cmap, pdf_cmap *usecmap) +{ + int i; + + if (cmap->usecmap) + pdf_dropcmap(cmap->usecmap); + cmap->usecmap = pdf_keepcmap(usecmap); + + if (cmap->ncspace == 0) + { + cmap->ncspace = usecmap->ncspace; + for (i = 0; i < usecmap->ncspace; i++) + cmap->cspace[i] = usecmap->cspace[i]; + } +} + +int +pdf_getwmode(pdf_cmap *cmap) +{ + return cmap->wmode; +} + +void +pdf_setwmode(pdf_cmap *cmap, int wmode) +{ + cmap->wmode = wmode; +} + +void +pdf_debugcmap(pdf_cmap *cmap) +{ + int i, k, n; + + printf("cmap $%p /%s {\n", cmap, cmap->cmapname); + + if (cmap->usecmapname[0]) + printf(" usecmap /%s\n", cmap->usecmapname); + if (cmap->usecmap) + printf(" usecmap $%p\n", cmap->usecmap); + + printf(" wmode %d\n", cmap->wmode); + + printf(" codespaces {\n"); + for (i = 0; i < cmap->ncspace; i++) + { + printf(" <"); + for (k = 0; k < cmap->cspace[i].n; k++) + printf("%02x", cmap->cspace[i].lo[k]); + printf("> <"); + for (k = 0; k < cmap->cspace[i].n; k++) + printf("%02x", cmap->cspace[i].hi[k]); + printf(">\n"); + } + printf(" }\n"); + + printf(" ranges (%d,%d) {\n", cmap->rlen, cmap->tlen); + for (i = 0; i < cmap->rlen; i++) + { + pdf_range *r = &cmap->ranges[i]; + printf(" <%04x> <%04x> ", r->low, r->high); + if (r->flag == TABLE) + { + printf("[ "); + for (k = 0; k < r->high - r->low + 1; k++) + printf("%d ", cmap->table[r->offset + k]); + printf("]\n"); + } + else if (r->flag == MULTI) + { + printf("< "); + n = cmap->table[r->offset]; + for (k = 0; k < n; k++) + printf("%04x ", cmap->table[r->offset + 1 + k]); + printf(">\n"); + } + else + printf("%d\n", r->offset); + } + printf(" }\n}\n"); +} + +/* + * Add a codespacerange section. + * These ranges are used by pdf_decodecmap to decode + * multi-byte encoded strings. + */ +fz_error * +pdf_addcodespace(pdf_cmap *cmap, unsigned lo, unsigned hi, int n) +{ + int i; + + if (cmap->ncspace + 1 == MAXCODESPACE) + return fz_throw("rangelimit: too many code space ranges"); + + cmap->cspace[cmap->ncspace].n = n; + + for (i = 0; i < n; i++) + { + int o = (n - i - 1) * 8; + cmap->cspace[cmap->ncspace].lo[i] = (lo >> o) & 0xFF; + cmap->cspace[cmap->ncspace].hi[i] = (hi >> o) & 0xFF; + } + + cmap->ncspace ++; + + return nil; +} + +/* + * Add an integer to the table. + */ +static fz_error * +addtable(pdf_cmap *cmap, int value) +{ + if (cmap->tlen + 1 > cmap->tcap) + { + int newcap = cmap->tcap == 0 ? 256 : cmap->tcap * 2; + int *newtable = fz_realloc(cmap->table, newcap * sizeof(int)); + if (!newtable) + return fz_outofmem; + cmap->tcap = newcap; + cmap->table = newtable; + } + + cmap->table[cmap->tlen++] = value; + + return nil; +} + +/* + * Add a range. + */ +static fz_error * +addrange(pdf_cmap *cmap, int low, int high, int flag, int offset) +{ + if (cmap->rlen + 1 > cmap->rcap) + { + pdf_range *newranges; + int newcap = cmap->rcap == 0 ? 256 : cmap->rcap * 2; + newranges = fz_realloc(cmap->ranges, newcap * sizeof(pdf_range)); + if (!newranges) + return fz_outofmem; + cmap->rcap = newcap; + cmap->ranges = newranges; + } + + cmap->ranges[cmap->rlen].low = low; + cmap->ranges[cmap->rlen].high = high; + cmap->ranges[cmap->rlen].flag = flag; + cmap->ranges[cmap->rlen].offset = offset; + cmap->rlen ++; + + return nil; +} + +/* + * Add a range-to-table mapping. + */ +fz_error * +pdf_maprangetotable(pdf_cmap *cmap, int low, int *table, int len) +{ + fz_error *error; + int offset; + int high; + int i; + + high = low + len; + offset = cmap->tlen; + + for (i = 0; i < len; i++) + { + error = addtable(cmap, table[i]); + if (error) + return error; + } + + return addrange(cmap, low, high, TABLE, offset); +} + +/* + * Add a range of contiguous one-to-one mappings (ie 1..5 maps to 21..25) + */ +fz_error * +pdf_maprangetorange(pdf_cmap *cmap, int low, int high, int offset) +{ + return addrange(cmap, low, high, high - low == 0 ? SINGLE : RANGE, offset); +} + +/* + * Add a single one-to-many mapping. + */ +fz_error * +pdf_maponetomany(pdf_cmap *cmap, int low, int *values, int len) +{ + fz_error *error; + int offset; + int i; + + if (len == 1) + return addrange(cmap, low, low, SINGLE, values[0]); + + offset = cmap->tlen; + + error = addtable(cmap, len); + if (error) + return error; + + for (i = 0; i < len; i++) + { + addtable(cmap, values[i]); + if (error) + return error; + } + + return addrange(cmap, low, low, MULTI, offset); +} + +/* + * Sort the input ranges. + * Merge contiguous input ranges to range-to-range if the output is contiguos. + * Merge contiguous input ranges to range-to-table if the output is random. + */ +fz_error * +pdf_sortcmap(pdf_cmap *cmap) +{ + fz_error *error; + pdf_range *newranges; + int *newtable; + pdf_range *a; /* last written range on output */ + pdf_range *b; /* current range examined on input */ + + qsort(cmap->ranges, cmap->rlen, sizeof(pdf_range), cmprange); + + a = cmap->ranges; + b = cmap->ranges + 1; + + while (b < cmap->ranges + cmap->rlen) + { + /* ignore one-to-many mappings */ + if (b->flag == MULTI) + { + *(++a) = *b; + } + + /* input contiguous */ + else if (a->high + 1 == b->low) + { + /* output contiguous */ + if (a->high - a->low + a->offset + 1 == b->offset) + { + /* SR -> R and SS -> R and RR -> R and RS -> R */ + if (a->flag == SINGLE || a->flag == RANGE) + { + a->flag = RANGE; + a->high = b->high; + } + + /* LS -> L */ + else if (a->flag == TABLE && b->flag == SINGLE) + { + a->high = b->high; + error = addtable(cmap, b->offset); + if (error) + return error; + } + + /* LR -> LR */ + else if (a->flag == TABLE && b->flag == RANGE) + { + *(++a) = *b; + } + + /* XX -> XX */ + else + { + *(++a) = *b; + } + } + + /* output separated */ + else + { + /* SS -> L */ + if (a->flag == SINGLE && b->flag == SINGLE) + { + a->flag = TABLE; + a->high = b->high; + + error = addtable(cmap, a->offset); + if (error) + return error; + + error = addtable(cmap, b->offset); + if (error) + return error; + + a->offset = cmap->tlen - 2; + } + + /* LS -> L */ + else if (a->flag == TABLE && b->flag == SINGLE) + { + a->high = b->high; + error = addtable(cmap, b->offset); + if (error) + return error; + } + + /* XX -> XX */ + else + { + *(++a) = *b; + } + } + } + + /* input separated: XX -> XX */ + else + { + *(++a) = *b; + } + + b ++; + } + + cmap->rlen = a - cmap->ranges + 1; + + assert(cmap->rlen > 0); + + newranges = fz_realloc(cmap->ranges, cmap->rlen * sizeof(pdf_range)); + if (!newranges) + return fz_outofmem; + cmap->rcap = cmap->rlen; + cmap->ranges = newranges; + + if (cmap->tlen) + { + newtable = fz_realloc(cmap->table, cmap->tlen * sizeof(int)); + if (!newtable) + return fz_outofmem; + cmap->tcap = cmap->tlen; + cmap->table = newtable; + } + + return nil; +} + +/* + * Lookup the mapping of a codepoint. + */ +int +pdf_lookupcmap(pdf_cmap *cmap, int cpt) +{ + int l = 0; + int r = cmap->rlen - 1; + int m; + + while (l <= r) + { + m = (l + r) >> 1; + if (cpt < cmap->ranges[m].low) + r = m - 1; + else if (cpt > cmap->ranges[m].high) + l = m + 1; + else + { + int i = cpt - cmap->ranges[m].low + cmap->ranges[m].offset; + if (cmap->ranges[m].flag == TABLE) + return cmap->table[i]; + if (cmap->ranges[m].flag == MULTI) + return -1; + return i; + } + } + + if (cmap->usecmap) + return pdf_lookupcmap(cmap->usecmap, cpt); + + return -1; +} + +/* + * Use the codespace ranges to extract a codepoint from a + * multi-byte encoded string. + */ +unsigned char * +pdf_decodecmap(pdf_cmap *cmap, unsigned char *buf, int *cpt) +{ + int i, k; + + for (k = 0; k < cmap->ncspace; k++) + { + unsigned char *lo = cmap->cspace[k].lo; + unsigned char *hi = cmap->cspace[k].hi; + int n = cmap->cspace[k].n; + int c = 0; + + for (i = 0; i < n; i++) + { + if (lo[i] <= buf[i] && buf[i] <= hi[i]) + c = (c << 8) | buf[i]; + else + break; + } + + if (i == n) { + *cpt = c; + return buf + n; + } + } + + *cpt = 0; + return buf + 1; +} + +/* + * CMap parser + */ enum { @@ -48,7 +588,7 @@ static int mylex(fz_stream *file, char *buf, int n, int *sl) return token; } -static fz_error *parsecmapname(fz_cmap *cmap, fz_stream *file) +static fz_error *parsecmapname(pdf_cmap *cmap, fz_stream *file) { char buf[256]; int token; @@ -56,14 +596,14 @@ static fz_error *parsecmapname(fz_cmap *cmap, fz_stream *file) token = mylex(file, buf, sizeof buf, &len); if (token == PDF_TNAME) { - fz_setcmapname(cmap, buf); + strlcpy(cmap->cmapname, buf, sizeof(cmap->cmapname)); return nil; } return fz_throw("syntaxerror in CMap after /CMapName"); } -static fz_error *parsewmode(fz_cmap *cmap, fz_stream *file) +static fz_error *parsewmode(pdf_cmap *cmap, fz_stream *file) { char buf[256]; int token; @@ -71,14 +611,14 @@ static fz_error *parsewmode(fz_cmap *cmap, fz_stream *file) token = mylex(file, buf, sizeof buf, &len); if (token == PDF_TINT) { - fz_setwmode(cmap, atoi(buf)); + pdf_setwmode(cmap, atoi(buf)); return nil; } return fz_throw("syntaxerror in CMap after /WMode"); } -static fz_error *parsecodespacerange(fz_cmap *cmap, fz_stream *file) +static fz_error *parsecodespacerange(pdf_cmap *cmap, fz_stream *file) { char buf[256]; int token; @@ -100,7 +640,7 @@ static fz_error *parsecodespacerange(fz_cmap *cmap, fz_stream *file) if (token == PDF_TSTRING) { hi = codefromstring(buf, len); - error = fz_addcodespacerange(cmap, lo, hi, len); + error = pdf_addcodespace(cmap, lo, hi, len); if (error) return error; } @@ -113,7 +653,7 @@ static fz_error *parsecodespacerange(fz_cmap *cmap, fz_stream *file) return fz_throw("syntaxerror in CMap codespacerange section"); } -static fz_error *parsecidrange(fz_cmap *cmap, fz_stream *file) +static fz_error *parsecidrange(pdf_cmap *cmap, fz_stream *file) { char buf[256]; int token; @@ -145,7 +685,7 @@ static fz_error *parsecidrange(fz_cmap *cmap, fz_stream *file) dst = atoi(buf); - error = fz_addcidrange(cmap, lo, hi, dst); + error = pdf_maprangetorange(cmap, lo, hi, dst); if (error) return error; } @@ -154,7 +694,7 @@ cleanup: return fz_throw("syntaxerror in CMap cidrange section"); } -static fz_error *parsecidchar(fz_cmap *cmap, fz_stream *file) +static fz_error *parsecidchar(pdf_cmap *cmap, fz_stream *file) { char buf[256]; int token; @@ -180,7 +720,7 @@ static fz_error *parsecidchar(fz_cmap *cmap, fz_stream *file) dst = atoi(buf); - error = fz_addcidrange(cmap, src, src, dst); + error = pdf_maprangetorange(cmap, src, src, dst); if (error) return error; } @@ -189,7 +729,41 @@ cleanup: return fz_throw("syntaxerror in CMap cidchar section"); } -static fz_error *parsebfrange(fz_cmap *cmap, fz_stream *file) +static fz_error *parsebfrangearray(pdf_cmap *cmap, fz_stream *file, int lo, int hi) +{ + char buf[256]; + int token; + int len; + fz_error *error; + int dst[256]; + int i; + + while (1) + { + token = mylex(file, buf, sizeof buf, &len); + /* Note: does not handle [ /Name /Name ... ] */ + + if (token == PDF_TCARRAY) + return nil; + + else if (token != PDF_TSTRING) + return fz_throw("syntaxerror in CMap bfrange array section"); + + if (len / 2) + { + for (i = 0; i < len / 2; i++) + dst[i] = codefromstring(buf + i * 2, 2); + + error = pdf_maponetomany(cmap, lo, dst, len / 2); + if (error) + return error; + } + + lo ++; + } +} + +static fz_error *parsebfrange(pdf_cmap *cmap, fz_stream *file) { char buf[256]; int token; @@ -216,28 +790,64 @@ static fz_error *parsebfrange(fz_cmap *cmap, fz_stream *file) hi = codefromstring(buf, len); token = mylex(file, buf, sizeof buf, &len); - /* Note: does not handle [ /Name /Name /Name ... ] */ - if (token != PDF_TSTRING) - goto cleanup; - dst = codefromstring(buf, len); + if (token == PDF_TSTRING) + { + if (len == 2) + { + dst = codefromstring(buf, len); + error = pdf_maprangetorange(cmap, lo, hi, dst); + if (error) + return error; + } + else + { + int dststr[256]; + int i; + + if (len / 2) + { + for (i = 0; i < len / 2; i++) + dststr[i] = codefromstring(buf + i * 2, 2); + + while (lo <= hi) + { + dststr[i-1] ++; + error = pdf_maponetomany(cmap, lo, dststr, i); + if (error) + return error; + lo ++; + } + } + } + } - error = fz_addcidrange(cmap, lo, hi, dst); - if (error) - return error; + else if (token == PDF_TOARRAY) + { + error = parsebfrangearray(cmap, file, lo, hi); + if (error) + return error; + } + + else + { + goto cleanup; + } } cleanup: return fz_throw("syntaxerror in CMap bfrange section"); } -static fz_error *parsebfchar(fz_cmap *cmap, fz_stream *file) +static fz_error *parsebfchar(pdf_cmap *cmap, fz_stream *file) { char buf[256]; int token; int len; fz_error *error; - int src, dst; + int dst[256]; + int src; + int i; while (1) { @@ -256,11 +866,15 @@ static fz_error *parsebfchar(fz_cmap *cmap, fz_stream *file) if (token != PDF_TSTRING) goto cleanup; - dst = codefromstring(buf, len); + if (len / 2) + { + for (i = 0; i < len / 2; i++) + dst[i] = codefromstring(buf + i * 2, 2); - error = fz_addcidrange(cmap, src, src, dst); - if (error) - return error; + error = pdf_maponetomany(cmap, src, dst, i); + if (error) + return error; + } } cleanup: @@ -268,16 +882,16 @@ cleanup: } fz_error * -pdf_parsecmap(fz_cmap **cmapp, fz_stream *file) +pdf_parsecmap(pdf_cmap **cmapp, fz_stream *file) { fz_error *error; - fz_cmap *cmap; + pdf_cmap *cmap; char key[64]; char buf[256]; int token; int len; - error = fz_newcmap(&cmap); + error = pdf_newcmap(&cmap); if (error) return error; @@ -316,7 +930,7 @@ pdf_parsecmap(fz_cmap **cmapp, fz_stream *file) else if (token == TUSECMAP) { - fz_setusecmapname(cmap, key); + strlcpy(cmap->usecmapname, key, sizeof(cmap->usecmapname)); } else if (token == TBEGINCODESPACERANGE) @@ -357,7 +971,7 @@ pdf_parsecmap(fz_cmap **cmapp, fz_stream *file) /* ignore everything else */ } - error = fz_endcidrange(cmap); + error = pdf_sortcmap(cmap); if (error) goto cleanup; @@ -365,7 +979,7 @@ pdf_parsecmap(fz_cmap **cmapp, fz_stream *file) return nil; cleanup: - fz_dropcmap(cmap); + pdf_dropcmap(cmap); return error; } @@ -373,19 +987,19 @@ cleanup: * Load CMap stream in PDF file */ fz_error * -pdf_loadembeddedcmap(fz_cmap **cmapp, pdf_xref *xref, fz_obj *stmref) +pdf_loadembeddedcmap(pdf_cmap **cmapp, pdf_xref *xref, fz_obj *stmref) { fz_obj *stmobj = stmref; fz_error *error = nil; fz_stream *file; - fz_cmap *cmap = nil; - fz_cmap *usecmap; + pdf_cmap *cmap = nil; + pdf_cmap *usecmap; fz_obj *wmode; fz_obj *obj; if ((*cmapp = pdf_finditem(xref->store, PDF_KCMAP, stmref))) { - fz_keepcmap(*cmapp); + pdf_keepcmap(*cmapp); return nil; } @@ -409,7 +1023,7 @@ pdf_loadembeddedcmap(fz_cmap **cmapp, pdf_xref *xref, fz_obj *stmref) if (fz_isint(wmode)) { pdf_logfont("wmode %d\n", wmode); - fz_setwmode(cmap, fz_toint(wmode)); + pdf_setwmode(cmap, fz_toint(wmode)); } obj = fz_dictgets(stmobj, "UseCMap"); @@ -419,8 +1033,8 @@ pdf_loadembeddedcmap(fz_cmap **cmapp, pdf_xref *xref, fz_obj *stmref) error = pdf_loadsystemcmap(&usecmap, fz_toname(obj)); if (error) goto cleanup; - fz_setusecmap(cmap, usecmap); - fz_dropcmap(usecmap); + pdf_setusecmap(cmap, usecmap); + pdf_dropcmap(usecmap); } else if (fz_isindirect(obj)) { @@ -428,8 +1042,8 @@ pdf_loadembeddedcmap(fz_cmap **cmapp, pdf_xref *xref, fz_obj *stmref) error = pdf_loadembeddedcmap(&usecmap, xref, obj); if (error) goto cleanup; - fz_setusecmap(cmap, usecmap); - fz_dropcmap(usecmap); + pdf_setusecmap(cmap, usecmap); + pdf_dropcmap(usecmap); } pdf_logfont("}\n"); @@ -445,7 +1059,7 @@ pdf_loadembeddedcmap(fz_cmap **cmapp, pdf_xref *xref, fz_obj *stmref) cleanup: if (cmap) - fz_dropcmap(cmap); + pdf_dropcmap(cmap); fz_dropobj(stmobj); return error; } @@ -454,16 +1068,19 @@ cleanup: * Load predefined CMap from system */ fz_error * -pdf_loadsystemcmap(fz_cmap **cmapp, char *name) +pdf_loadsystemcmap(pdf_cmap **cmapp, char *name) { fz_error *error = nil; fz_stream *file; char *cmapdir; char *usecmapname; - fz_cmap *usecmap; - fz_cmap *cmap; + pdf_cmap *usecmap; + pdf_cmap *cmap; char path[1024]; + cmap = nil; + file = nil; + pdf_logfont("load system cmap %s {\n", name); cmapdir = getenv("CMAPDIR"); @@ -484,15 +1101,15 @@ pdf_loadsystemcmap(fz_cmap **cmapp, char *name) fz_dropstream(file); - usecmapname = fz_getusecmapname(cmap); - if (usecmapname) + usecmapname = cmap->usecmapname; + if (usecmapname[0]) { pdf_logfont("usecmap %s\n", usecmapname); error = pdf_loadsystemcmap(&usecmap, usecmapname); if (error) goto cleanup; - fz_setusecmap(cmap, usecmap); - fz_dropcmap(usecmap); + pdf_setusecmap(cmap, usecmap); + pdf_dropcmap(usecmap); } pdf_logfont("}\n"); @@ -502,7 +1119,7 @@ pdf_loadsystemcmap(fz_cmap **cmapp, char *name) cleanup: if (cmap) - fz_dropcmap(cmap); + pdf_dropcmap(cmap); if (file) fz_dropstream(file); return error; @@ -512,34 +1129,36 @@ cleanup: * Create an Identity-* CMap (for both 1 and 2-byte encodings) */ fz_error * -pdf_makeidentitycmap(fz_cmap **cmapp, int wmode, int bytes) +pdf_newidentitycmap(pdf_cmap **cmapp, int wmode, int bytes) { fz_error *error; - fz_cmap *cmap; + pdf_cmap *cmap; - error = fz_newcmap(&cmap); + error = pdf_newcmap(&cmap); if (error) return error; - error = fz_addcodespacerange(cmap, 0x0000, 0xffff, bytes); + sprintf(cmap->cmapname, "Identity-%c", wmode ? 'V' : 'H'); + + error = pdf_addcodespace(cmap, 0x0000, 0xffff, bytes); if (error) { - fz_dropcmap(cmap); + pdf_dropcmap(cmap); return error; } - error = fz_addcidrange(cmap, 0x0000, 0xffff, 0); + error = pdf_maprangetorange(cmap, 0x0000, 0xffff, 0); if (error) { - fz_dropcmap(cmap); + pdf_dropcmap(cmap); return error; } - error = fz_endcidrange(cmap); + error = pdf_sortcmap(cmap); if (error) { - fz_dropcmap(cmap); + pdf_dropcmap(cmap); return error; } - fz_setwmode(cmap, wmode); + pdf_setwmode(cmap, wmode); *cmapp = cmap; return nil; diff --git a/mupdf/pdf_font.c b/mupdf/pdf_font.c index 064988ca..0c465b8f 100644 --- a/mupdf/pdf_font.c +++ b/mupdf/pdf_font.c @@ -62,7 +62,7 @@ static inline int ftcidtogid(pdf_font *font, int cid) { if (font->tottfcmap) { - cid = fz_lookupcid(font->tottfcmap, cid); + cid = pdf_lookupcmap(font->tottfcmap, cid); return FT_Get_Char_Index(font->ftface, cid); } @@ -223,11 +223,11 @@ static void ftdropfont(fz_font *font) { pdf_font *pfont = (pdf_font*)font; if (pfont->encoding) - fz_dropcmap(pfont->encoding); + pdf_dropcmap(pfont->encoding); if (pfont->tottfcmap) - fz_dropcmap(pfont->tottfcmap); + pdf_dropcmap(pfont->tottfcmap); if (pfont->tounicode) - fz_dropcmap(pfont->tounicode); + pdf_dropcmap(pfont->tounicode); fz_free(pfont->cidtogid); fz_free(pfont->cidtoucs); if (pfont->ftface) @@ -509,7 +509,7 @@ loadsimplefont(pdf_font **fontp, pdf_xref *xref, fz_obj *dict, fz_obj *ref) } } - error = pdf_makeidentitycmap(&font->encoding, 0, 1); + error = pdf_newidentitycmap(&font->encoding, 0, 1); if (error) goto cleanup; @@ -676,9 +676,9 @@ loadcidfont(pdf_font **fontp, pdf_xref *xref, fz_obj *dict, fz_obj *ref, fz_obj { pdf_logfont("encoding /%s\n", fz_toname(encoding)); if (!strcmp(fz_toname(encoding), "Identity-H")) - error = pdf_makeidentitycmap(&font->encoding, 0, 2); + error = pdf_newidentitycmap(&font->encoding, 0, 2); else if (!strcmp(fz_toname(encoding), "Identity-V")) - error = pdf_makeidentitycmap(&font->encoding, 1, 2); + error = pdf_newidentitycmap(&font->encoding, 1, 2); else error = pdf_loadsystemcmap(&font->encoding, fz_toname(encoding)); } @@ -694,8 +694,8 @@ loadcidfont(pdf_font **fontp, pdf_xref *xref, fz_obj *dict, fz_obj *ref, fz_obj if (error) goto cleanup; - fz_setfontwmode((fz_font*)font, fz_getwmode(font->encoding)); - pdf_logfont("wmode %d\n", fz_getwmode(font->encoding)); + fz_setfontwmode((fz_font*)font, pdf_getwmode(font->encoding)); + pdf_logfont("wmode %d\n", pdf_getwmode(font->encoding)); if (kind == TRUETYPE) { @@ -820,7 +820,7 @@ loadcidfont(pdf_font **fontp, pdf_xref *xref, fz_obj *dict, fz_obj *ref, fz_obj * Vertical */ - if (fz_getwmode(font->encoding) == 1) + if (pdf_getwmode(font->encoding) == 1) { fz_obj *obj; int dw2y = 880; diff --git a/mupdf/pdf_interpret.c b/mupdf/pdf_interpret.c index e5ecf8ee..057ac6d1 100644 --- a/mupdf/pdf_interpret.c +++ b/mupdf/pdf_interpret.c @@ -291,46 +291,32 @@ runkeyword(pdf_csi *csi, pdf_xref *xref, fz_obj *rdb, char *buf) else if (!strcmp(buf, "MP")) { - fz_node *meta; if (csi->top != 1) goto syntaxerror; - error = fz_newmetanode(&meta, csi->stack[0], nil); - if (error) return error; - fz_insertnodelast(gstate->head, meta); } else if (!strcmp(buf, "DP")) { - fz_node *meta; if (csi->top != 2) goto syntaxerror; - error = fz_newmetanode(&meta, csi->stack[0], csi->stack[1]); - if (error) return error; - fz_insertnodelast(gstate->head, meta); } else if (!strcmp(buf, "BMC")) { - fz_node *meta; if (csi->top != 1) goto syntaxerror; - error = fz_newmetanode(&meta, csi->stack[0], nil); - if (error) return error; - fz_insertnodelast(gstate->head, meta); } else if (!strcmp(buf, "BDC")) { - fz_node *meta; if (csi->top != 2) goto syntaxerror; - error = fz_newmetanode(&meta, csi->stack[0], csi->stack[1]); - if (error) return error; - fz_insertnodelast(gstate->head, meta); } else if (!strcmp(buf, "EMC")) { + if (csi->top != 0) + goto syntaxerror; } else if (!strcmp(buf, "cm")) diff --git a/mupdf/pdf_store.c b/mupdf/pdf_store.c index d46cd628..f632657f 100644 --- a/mupdf/pdf_store.c +++ b/mupdf/pdf_store.c @@ -57,7 +57,7 @@ static void dropitem(pdf_itemkind kind, void *val) case PDF_KIMAGE: fz_dropimage(val); break; case PDF_KPATTERN: pdf_droppattern(val); break; case PDF_KSHADE: fz_dropshade(val); break; - case PDF_KCMAP: fz_dropcmap(val); break; + case PDF_KCMAP: pdf_dropcmap(val); break; case PDF_KFONT: fz_dropfont(val); break; } } @@ -112,7 +112,7 @@ pdf_storeitem(pdf_store *store, pdf_itemkind kind, fz_obj *key, void *val) case PDF_KIMAGE: fz_keepimage(val); break; case PDF_KPATTERN: pdf_keeppattern(val); break; case PDF_KSHADE: fz_keepshade(val); break; - case PDF_KCMAP: fz_keepcmap(val); break; + case PDF_KCMAP: pdf_keepcmap(val); break; case PDF_KFONT: fz_keepfont(val); break; } diff --git a/mupdf/pdf_type3.c b/mupdf/pdf_type3.c index 092db4de..b19a7308 100644 --- a/mupdf/pdf_type3.c +++ b/mupdf/pdf_type3.c @@ -11,7 +11,7 @@ t3dropfont(fz_font *font) int i; pdf_font *pfont = (pdf_font*)font; if (pfont->encoding) - fz_dropcmap(pfont->encoding); + pdf_dropcmap(pfont->encoding); for (i = 0; i < 256; i++) if (pfont->charprocs[i]) fz_droptree(pfont->charprocs[i]); @@ -188,7 +188,7 @@ pdf_loadtype3font(pdf_font **fontp, pdf_xref *xref, fz_obj *dict, fz_obj *ref) fz_dropobj(encoding); - error = pdf_makeidentitycmap(&font->encoding, 0, 1); + error = pdf_newidentitycmap(&font->encoding, 0, 1); if (error) goto cleanup; diff --git a/mupdf/pdf_unicode.c b/mupdf/pdf_unicode.c index 28100b78..7affd1b9 100644 --- a/mupdf/pdf_unicode.c +++ b/mupdf/pdf_unicode.c @@ -10,7 +10,7 @@ pdf_loadtounicode(pdf_font *font, pdf_xref *xref, char **strings, char *collection, fz_obj *cmapstm) { fz_error *error; - fz_cmap *cmap; + pdf_cmap *cmap; int cid; int ucs; int i; @@ -23,28 +23,31 @@ pdf_loadtounicode(pdf_font *font, pdf_xref *xref, if (error) return error; - error = fz_newcmap(&font->tounicode); + error = pdf_newcmap(&font->tounicode); if (error) goto cleanup; for (i = 0; i < (strings ? 256 : 65536); i++) { - cid = fz_lookupcid(font->encoding, i); + cid = pdf_lookupcmap(font->encoding, i); if (cid > 0) { - ucs = fz_lookupcid(cmap, i); - error = fz_addcidrange(font->tounicode, cid, cid, ucs); - if (error) - goto cleanup; + ucs = pdf_lookupcmap(cmap, i); + if (ucs > 0) + { + error = pdf_maprangetorange(font->tounicode, cid, cid, ucs); + if (error) + goto cleanup; + } } } - error = fz_endcidrange(font->tounicode); + error = pdf_sortcmap(font->tounicode); if (error) goto cleanup; cleanup: - fz_dropcmap(cmap); + pdf_dropcmap(cmap); return error; } @@ -68,6 +71,8 @@ pdf_loadtounicode(pdf_font *font, pdf_xref *xref, { pdf_logfont("tounicode strings\n"); + /* TODO use tounicode cmap here ... for one-to-many mappings */ + font->ncidtoucs = 256; font->cidtoucs = fz_malloc(256 * sizeof(unsigned short)); if (!font->cidtoucs) @@ -235,7 +240,7 @@ extracttext(pdf_textline **line, fz_node *node, fz_matrix ctm) box.y1 = MAX(0, MAX(vx.y, vy.y)) + y; if (font->tounicode) - c = fz_lookupcid(font->tounicode, g); + c = pdf_lookupcmap(font->tounicode, g); else if (g < font->ncidtoucs) c = font->cidtoucs[g]; else diff --git a/world/Jamfile b/world/Jamfile index b587c805..9f53ef0a 100644 --- a/world/Jamfile +++ b/world/Jamfile @@ -10,7 +10,6 @@ Library libworld : node_text.c node_tree.c - res_cmap.c res_colorspace.c res_font.c res_image.c diff --git a/world/node_misc2.c b/world/node_misc2.c index 8abcd4a5..6446d52f 100644 --- a/world/node_misc2.c +++ b/world/node_misc2.c @@ -162,7 +162,7 @@ fz_boundtransformnode(fz_transformnode *node, fz_matrix ctm) */ fz_error * -fz_newmetanode(fz_node **nodep, void *name, void *dict) +fz_newmetanode(fz_node **nodep, char *name, void *dict) { fz_metanode *node; @@ -172,13 +172,8 @@ fz_newmetanode(fz_node **nodep, void *name, void *dict) *nodep = (fz_node*)node; fz_initnode((fz_node*)node, FZ_NMETA); - node->name = nil; - node->dict = nil; - - if (name) - node->name = name; - if (dict) - node->dict = dict; + node->name = name; + node->dict = dict; return nil; } @@ -186,10 +181,8 @@ fz_newmetanode(fz_node **nodep, void *name, void *dict) void fz_dropmetanode(fz_metanode *node) { - if (node->name) - fz_warn("leaking meta node name"); if (node->dict) - fz_warn("leaking meta node dict"); + fz_warn("leaking meta node '%s'", node->name); } fz_rect diff --git a/world/node_toxml.c b/world/node_toxml.c index 2099c6a8..b7cd6c15 100644 --- a/world/node_toxml.c +++ b/world/node_toxml.c @@ -14,7 +14,7 @@ static void xmlmeta(fz_metanode *node, int level) fz_node *child; indent(level); - printf("<meta>\n"); + printf("<meta name=\"%s\">\n", node->name); for (child = node->super.first; child; child = child->next) xmlnode(child, level + 1); diff --git a/world/res_cmap.c b/world/res_cmap.c deleted file mode 100644 index bc2b4249..00000000 --- a/world/res_cmap.c +++ /dev/null @@ -1,467 +0,0 @@ -#include "fitz-base.h" -#include "fitz-world.h" - -typedef struct fz_range_s fz_range; - -enum { MAXCODESPACE = 10 }; -enum { SINGLE, RANGE, LOOKUP }; - -struct fz_range_s -{ - int low; - int high; - int flag; - int offset; -}; - -struct fz_cmap_s -{ - int refs; - char cmapname[32]; - - char usecmapname[32]; - fz_cmap *usecmap; - - int wmode; - - int ncspace; - struct { - int n; - unsigned char lo[4]; - unsigned char hi[4]; - } cspace[MAXCODESPACE]; - - int rlen, rcap; - fz_range *ranges; - - int tlen, tcap; - int *lookup; -}; - -fz_error * -fz_newcmap(fz_cmap **cmapp) -{ - fz_cmap *cmap; - - cmap = *cmapp = fz_malloc(sizeof(fz_cmap)); - if (!cmap) - return fz_outofmem; - - cmap->refs = 1; - strcpy(cmap->cmapname, ""); - - strcpy(cmap->usecmapname, ""); - cmap->usecmap = nil; - - cmap->wmode = 0; - - cmap->ncspace = 0; - - cmap->rlen = 0; - cmap->rcap = 0; - cmap->ranges = nil; - - cmap->tlen = 0; - cmap->tcap = 0; - cmap->lookup = nil; - - return nil; -} - -fz_cmap * -fz_keepcmap(fz_cmap *cmap) -{ - cmap->refs ++; - return cmap; -} - -void -fz_dropcmap(fz_cmap *cmap) -{ - if (--cmap->refs == 0) - { - if (cmap->usecmap) - fz_dropcmap(cmap->usecmap); - fz_free(cmap->ranges); - fz_free(cmap->lookup); - fz_free(cmap); - } -} - -char * -fz_getcmapname(fz_cmap *cmap) -{ - if (cmap->cmapname[0]) - return cmap->cmapname; - return nil; -} - -void -fz_setcmapname(fz_cmap *cmap, char *cmapname) -{ - strlcpy(cmap->cmapname, cmapname, sizeof cmap->cmapname); -} - -char * -fz_getusecmapname(fz_cmap *cmap) -{ - if (cmap->usecmapname[0]) - return cmap->usecmapname; - return nil; -} - -void -fz_setusecmapname(fz_cmap *cmap, char *usecmap) -{ - strlcpy(cmap->usecmapname, usecmap, sizeof cmap->usecmapname); -} - -fz_cmap * -fz_getusecmap(fz_cmap *cmap) -{ - return cmap->usecmap; -} - -void -fz_setusecmap(fz_cmap *cmap, fz_cmap *usecmap) -{ - int i; - - if (cmap->usecmap) - fz_dropcmap(cmap->usecmap); - cmap->usecmap = fz_keepcmap(usecmap); - - if (cmap->ncspace == 0) - { - cmap->ncspace = usecmap->ncspace; - for (i = 0; i < usecmap->ncspace; i++) - cmap->cspace[i] = usecmap->cspace[i]; - } -} - -void -fz_setwmode(fz_cmap *cmap, int wmode) -{ - cmap->wmode = wmode; -} - -int -fz_getwmode(fz_cmap *cmap) -{ - return cmap->wmode; -} - -fz_error * -fz_addcodespacerange(fz_cmap *cmap, unsigned lo, unsigned hi, int n) -{ - int i; - - if (cmap->ncspace + 1 == MAXCODESPACE) - return fz_throw("rangelimit: too many code space ranges"); - - cmap->cspace[cmap->ncspace].n = n; - - for (i = 0; i < n; i++) - { - int o = (n - i - 1) * 8; - cmap->cspace[cmap->ncspace].lo[i] = (lo >> o) & 0xFF; - cmap->cspace[cmap->ncspace].hi[i] = (hi >> o) & 0xFF; - } - - cmap->ncspace ++; - - return nil; -} - -fz_error * -fz_addcidrange(fz_cmap *cmap, int low, int high, int offset) -{ - if (cmap->rlen + 1 > cmap->rcap) - { - fz_range *newranges; - int newcap = cmap->rcap == 0 ? 256 : cmap->rcap * 2; - newranges = fz_realloc(cmap->ranges, newcap * sizeof(fz_range)); - if (!newranges) - return fz_outofmem; - cmap->rcap = newcap; - cmap->ranges = newranges; - } - - cmap->ranges[cmap->rlen].low = low; - cmap->ranges[cmap->rlen].high = high; - cmap->ranges[cmap->rlen].flag = high - low == 0 ? SINGLE : RANGE; - cmap->ranges[cmap->rlen].offset = offset; - cmap->rlen ++; - - return nil; -} - -static fz_error * -addlookup(fz_cmap *cmap, int value) -{ - if (cmap->tlen + 1 > cmap->tcap) - { - int newcap = cmap->tcap == 0 ? 256 : cmap->tcap * 2; - int *newlookup = fz_realloc(cmap->lookup, newcap * sizeof(int)); - if (!newlookup) - return fz_outofmem; - cmap->tcap = newcap; - cmap->lookup = newlookup; - } - - cmap->lookup[cmap->tlen++] = value; - - return nil; -} - -static int compare(const void *va, const void *vb) -{ - return ((const fz_range*)va)->low - ((const fz_range*)vb)->low; -} - -fz_error * -fz_endcidrange(fz_cmap *cmap) -{ - fz_error *error; - fz_range *newranges; - int *newlookup; - fz_range *a; /* last written range on output */ - fz_range *b; /* current range examined on input */ - - qsort(cmap->ranges, cmap->rlen, sizeof(fz_range), compare); - - a = cmap->ranges; - b = cmap->ranges + 1; - - while (b < cmap->ranges + cmap->rlen) - { - /* input contiguous */ - if (a->high + 1 == b->low) - { - /* output contiguous */ - if (a->high - a->low + a->offset + 1 == b->offset) - { - /* SR -> R and SS -> R and RR -> R and RS -> R */ - if (a->flag == SINGLE || a->flag == RANGE) - { - a->flag = RANGE; - a->high = b->high; - } - - /* LS -> L */ - else if (a->flag == LOOKUP && b->flag == SINGLE) - { - a->high = b->high; - error = addlookup(cmap, b->offset); - if (error) - return error; - } - - /* LR -> LR */ - else if (a->flag == LOOKUP && b->flag == RANGE) - { - *(++a) = *b; - } - } - - /* output separated */ - else - { - /* SS -> L */ - if (a->flag == SINGLE && b->flag == SINGLE) - { - a->flag = LOOKUP; - a->high = b->high; - - error = addlookup(cmap, a->offset); - if (error) - return error; - - error = addlookup(cmap, b->offset); - if (error) - return error; - - a->offset = cmap->tlen - 2; - } - - /* LS -> L */ - else if (a->flag == LOOKUP && b->flag == SINGLE) - { - a->high = b->high; - error = addlookup(cmap, b->offset); - if (error) - return error; - } - - /* XX -> XX */ - else - { - *(++a) = *b; - } - } - } - - /* input separated: XX -> XX */ - else - { - *(++a) = *b; - } - - b ++; - } - - cmap->rlen = a - cmap->ranges + 1; - - assert(cmap->rlen > 0); - - newranges = fz_realloc(cmap->ranges, cmap->rlen * sizeof(fz_range)); - if (!newranges) - return fz_outofmem; - cmap->rcap = cmap->rlen; - cmap->ranges = newranges; - - if (cmap->tlen) - { - newlookup = fz_realloc(cmap->lookup, cmap->tlen * sizeof(int)); - if (!newlookup) - return fz_outofmem; - cmap->tcap = cmap->tlen; - cmap->lookup = newlookup; - } - - return nil; -} - -fz_error * -fz_setcidlookup(fz_cmap *cmap, int map[256]) -{ - int i; - - cmap->rlen = cmap->rcap = 1; - cmap->ranges = fz_malloc(sizeof (fz_range)); - if (!cmap->ranges) { - return fz_outofmem; - } - - cmap->tlen = cmap->tcap = 256; - cmap->lookup = fz_malloc(sizeof (int) * 256); - if (!cmap->lookup) { - fz_free(cmap->ranges); - return fz_outofmem; - } - - cmap->ranges[0].low = 0; - cmap->ranges[0].high = 255; - cmap->ranges[0].flag = LOOKUP; - cmap->ranges[0].offset = 0; - - for (i = 0; i < 256; i++) - cmap->lookup[i] = map[i]; - - return nil; -} - -int -fz_lookupcid(fz_cmap *cmap, int cpt) -{ - int l = 0; - int r = cmap->rlen - 1; - int m; - - while (l <= r) - { - m = (l + r) >> 1; - if (cpt < cmap->ranges[m].low) - r = m - 1; - else if (cpt > cmap->ranges[m].high) - l = m + 1; - else - { - int i = cpt - cmap->ranges[m].low + cmap->ranges[m].offset; - if (cmap->ranges[m].flag == LOOKUP) - return cmap->lookup[i]; - return i; - } - } - - if (cmap->usecmap) - return fz_lookupcid(cmap->usecmap, cpt); - - return -1; -} - -unsigned char * -fz_decodecpt(fz_cmap *cmap, unsigned char *buf, int *cpt) -{ - int i, k; - - for (k = 0; k < cmap->ncspace; k++) - { - unsigned char *lo = cmap->cspace[k].lo; - unsigned char *hi = cmap->cspace[k].hi; - int n = cmap->cspace[k].n; - int c = 0; - - for (i = 0; i < n; i++) - { - if (lo[i] <= buf[i] && buf[i] <= hi[i]) - c = (c << 8) | buf[i]; - else - break; - } - - if (i == n) { - *cpt = c; - return buf + n; - } - } - - *cpt = 0; - return buf + 1; -} - -void -fz_debugcmap(fz_cmap *cmap) -{ - int i, k; - - printf("cmap $%p /%s {\n", cmap, cmap->cmapname); - - if (cmap->usecmapname[0]) - printf(" usecmap /%s\n", cmap->usecmapname); - if (cmap->usecmap) - printf(" usecmap $%p\n", cmap->usecmap); - - printf(" wmode %d\n", cmap->wmode); - - printf(" codespaces {\n"); - for (i = 0; i < cmap->ncspace; i++) - { - printf(" <"); - for (k = 0; k < cmap->cspace[i].n; k++) - printf("%02x", cmap->cspace[i].lo[k]); - printf("> <"); - for (k = 0; k < cmap->cspace[i].n; k++) - printf("%02x", cmap->cspace[i].hi[k]); - printf(">\n"); - } - printf(" }\n"); - - printf(" ranges (%d,%d) {\n", cmap->rlen, cmap->tlen); - for (i = 0; i < cmap->rlen; i++) - { - fz_range *r = &cmap->ranges[i]; - printf(" <%04x> <%04x> ", r->low, r->high); - if (r->flag == LOOKUP) - { - printf("[ "); - for (k = 0; k < r->high - r->low + 1; k++) - printf("%d ", cmap->lookup[r->offset + k]); - printf("]\n"); - } - else - printf("%d\n", r->offset); - } - printf(" }\n}\n"); -} - |