diff options
Diffstat (limited to 'mupdf/pdf_cmap.c')
-rw-r--r-- | mupdf/pdf_cmap.c | 546 |
1 files changed, 546 insertions, 0 deletions
diff --git a/mupdf/pdf_cmap.c b/mupdf/pdf_cmap.c new file mode 100644 index 00000000..5029ae4a --- /dev/null +++ b/mupdf/pdf_cmap.c @@ -0,0 +1,546 @@ +#include <fitz.h> +#include <mupdf.h> + +enum +{ + TUSECMAP = PDF_NTOKENS, + TBEGINCODESPACERANGE, + TENDCODESPACERANGE, + TBEGINBFCHAR, + TENDBFCHAR, + TBEGINBFRANGE, + TENDBFRANGE, + TBEGINCIDCHAR, + TENDCIDCHAR, + TBEGINCIDRANGE, + TENDCIDRANGE +}; + +static int tokenfromkeyword(char *key) +{ + if (!strcmp(key, "usecmap")) return TUSECMAP; + if (!strcmp(key, "begincodespacerange")) return TBEGINCODESPACERANGE; + if (!strcmp(key, "endcodespacerange")) return TENDCODESPACERANGE; + if (!strcmp(key, "beginbfchar")) return TBEGINBFCHAR; + if (!strcmp(key, "endbfchar")) return TENDBFCHAR; + if (!strcmp(key, "beginbfrange")) return TBEGINBFRANGE; + if (!strcmp(key, "endbfrange")) return TENDBFRANGE; + if (!strcmp(key, "begincidchar")) return TBEGINCIDCHAR; + if (!strcmp(key, "endcidchar")) return TENDCIDCHAR; + if (!strcmp(key, "begincidrange")) return TBEGINCIDRANGE; + if (!strcmp(key, "endcidrange")) return TENDCIDRANGE; + return PDF_TKEYWORD; +} + +static int codefromstring(unsigned char *buf, int len) +{ + int a = 0; + while (len--) + a = (a << 8) | *buf++; + return a; +} + +static int mylex(fz_file *file, char *buf, int n, int *sl) +{ + int token = pdf_lex(file, buf, n, sl); + if (token == PDF_TKEYWORD) + token = tokenfromkeyword(buf); + return token; +} + +static fz_error *parsecmapname(fz_cmap *cmap, fz_file *file) +{ + char buf[256]; + int token; + int len; + + token = mylex(file, buf, sizeof buf, &len); + if (token == PDF_TNAME) { + fz_setcmapname(cmap, buf); + return nil; + } + + return fz_throw("syntaxerror in CMap after /CMapName"); +} + +static fz_error *parsewmode(fz_cmap *cmap, fz_file *file) +{ + char buf[256]; + int token; + int len; + + token = mylex(file, buf, sizeof buf, &len); + if (token == PDF_TINT) { + fz_setwmode(cmap, atoi(buf)); + return nil; + } + + return fz_throw("syntaxerror in CMap after /WMode"); +} + +static fz_error *parsecodespacerange(fz_cmap *cmap, fz_file *file) +{ + char buf[256]; + int token; + int len; + fz_error *error; + int lo, hi; + + while (1) + { + token = mylex(file, buf, sizeof buf, &len); + + if (token == TENDCODESPACERANGE) + return nil; + + else if (token == PDF_TSTRING) + { + lo = codefromstring(buf, len); + token = mylex(file, buf, sizeof buf, &len); + if (token == PDF_TSTRING) + { + hi = codefromstring(buf, len); + error = fz_addcodespacerange(cmap, lo, hi, len); + if (error) + return error; + } + else break; + } + + else break; + } + + return fz_throw("syntaxerror in CMap codespacerange section"); +} + +static fz_error *parsecidrange(fz_cmap *cmap, fz_file *file) +{ + char buf[256]; + int token; + int len; + fz_error *error; + int lo, hi, dst; + + while (1) + { + token = mylex(file, buf, sizeof buf, &len); + + if (token == TENDCIDRANGE) + return nil; + + else if (token != PDF_TSTRING) + goto cleanup; + + lo = codefromstring(buf, len); + + token = mylex(file, buf, sizeof buf, &len); + if (token != PDF_TSTRING) + goto cleanup; + + hi = codefromstring(buf, len); + + token = mylex(file, buf, sizeof buf, &len); + if (token != PDF_TINT) + goto cleanup; + + dst = atoi(buf); + + error = fz_addcidrange(cmap, lo, hi, dst); + if (error) + return error; + } + +cleanup: + return fz_throw("syntaxerror in CMap cidrange section"); +} + +static fz_error *parsecidchar(fz_cmap *cmap, fz_file *file) +{ + char buf[256]; + int token; + int len; + fz_error *error; + int src, dst; + + while (1) + { + token = mylex(file, buf, sizeof buf, &len); + + if (token == TENDCIDCHAR) + return nil; + + else if (token != PDF_TSTRING) + goto cleanup; + + src = codefromstring(buf, len); + + token = mylex(file, buf, sizeof buf, &len); + if (token != PDF_TINT) + goto cleanup; + + dst = atoi(buf); + + error = fz_addcidrange(cmap, src, src, dst); + if (error) + return error; + } + +cleanup: + return fz_throw("syntaxerror in CMap cidchar section"); +} + +static fz_error *parsebfrange(fz_cmap *cmap, fz_file *file) +{ + char buf[256]; + int token; + int len; + fz_error *error; + int lo, hi, dst; + + while (1) + { + token = mylex(file, buf, sizeof buf, &len); + + if (token == TENDBFRANGE) + return nil; + + else if (token != PDF_TSTRING) + goto cleanup; + + lo = codefromstring(buf, len); + + token = mylex(file, buf, sizeof buf, &len); + if (token != PDF_TSTRING) + goto cleanup; + + hi = codefromstring(buf, len); + + token = mylex(file, buf, sizeof buf, &len); + /* Note: does not handle [ /Name /Name /Name ... ] */ + if (token != PDF_TSTRING) + goto cleanup; + + dst = codefromstring(buf, len); + + error = fz_addcidrange(cmap, lo, hi, dst); + if (error) + return error; + } + +cleanup: + return fz_throw("syntaxerror in CMap bfrange section"); +} + +static fz_error *parsebfchar(fz_cmap *cmap, fz_file *file) +{ + char buf[256]; + int token; + int len; + fz_error *error; + int src, dst; + + while (1) + { + token = mylex(file, buf, sizeof buf, &len); + + if (token == TENDBFCHAR) + return nil; + + else if (token != PDF_TSTRING) + goto cleanup; + + src = codefromstring(buf, len); + + token = mylex(file, buf, sizeof buf, &len); + /* Note: does not handle /dstName */ + if (token != PDF_TSTRING) + goto cleanup; + + dst = codefromstring(buf, len); + + error = fz_addcidrange(cmap, src, src, dst); + if (error) + return error; + } + +cleanup: + return fz_throw("syntaxerror in CMap bfchar section"); +} + +fz_error * +pdf_parsecmap(fz_cmap **cmapp, fz_file *file) +{ + fz_error *error; + fz_cmap *cmap; + char key[64]; + char buf[256]; + int token; + int len; + + error = fz_newcmap(&cmap); + if (error) + return error; + + strcpy(key, ".notdef"); + + while (1) + { + token = mylex(file, buf, sizeof buf, &len); + + if (token == PDF_TEOF) + break; + + else if (token == PDF_TERROR) + { + error = fz_throw("syntaxerror in CMap"); + goto cleanup; + } + + else if (token == PDF_TNAME) + { + if (!strcmp(buf, "CMapName")) + { + error = parsecmapname(cmap, file); + if (error) + goto cleanup; + } + else if (!strcmp(buf, "WMode")) + { + error = parsewmode(cmap, file); + if (error) + goto cleanup; + } + else + strlcpy(key, buf, sizeof key); + } + + else if (token == TUSECMAP) + { + fz_setusecmapname(cmap, key); + } + + else if (token == TBEGINCODESPACERANGE) + { + error = parsecodespacerange(cmap, file); + if (error) + goto cleanup; + } + + else if (token == TBEGINBFCHAR) + { + error = parsebfchar(cmap, file); + if (error) + goto cleanup; + } + + else if (token == TBEGINCIDCHAR) + { + error = parsecidchar(cmap, file); + if (error) + goto cleanup; + } + + else if (token == TBEGINBFRANGE) + { + error = parsebfrange(cmap, file); + if (error) + goto cleanup; + } + + else if (token == TBEGINCIDRANGE) + { + error = parsecidrange(cmap, file); + if (error) + goto cleanup; + } + + /* ignore everything else */ + } + + error = fz_endcidrange(cmap); + if (error) + goto cleanup; + + *cmapp = cmap; + return nil; + +cleanup: + fz_dropcmap(cmap); + return error; +} + +/* + * Load CMap stream in PDF file + */ +fz_error * +pdf_loadembeddedcmap(fz_cmap **cmapp, pdf_xref *xref, fz_obj *stmref) +{ + fz_obj *stmobj = stmref; + fz_error *error = nil; + fz_cmap *cmap = nil; + fz_cmap *usecmap; + fz_obj *wmode; + fz_obj *obj; + + if ((*cmapp = pdf_finditem(xref->store, PDF_KCMAP, stmref))) + { + fz_keepcmap(*cmapp); + return nil; + } + + pdf_logfont("load embedded cmap %d %d {\n", fz_tonum(stmref), fz_togen(stmref)); + + error = pdf_resolve(&stmobj, xref); + if (error) + return error; + + error = pdf_openstream(xref, fz_tonum(stmref), fz_togen(stmref)); + if (error) + goto cleanup; + + error = pdf_parsecmap(&cmap, xref->file); + if (error) + goto cleanup; + + pdf_closestream(xref); + + wmode = fz_dictgets(stmobj, "WMode"); + if (fz_isint(wmode)) + { + pdf_logfont("wmode %d\n", wmode); + fz_setwmode(cmap, fz_toint(wmode)); + } + + obj = fz_dictgets(stmobj, "UseCMap"); + if (fz_isname(obj)) + { + pdf_logfont("usecmap /%s\n", fz_toname(obj)); + error = pdf_loadsystemcmap(&usecmap, fz_toname(obj)); + if (error) + goto cleanup; + fz_setusecmap(cmap, usecmap); + fz_dropcmap(usecmap); + } + else if (fz_isindirect(obj)) + { + pdf_logfont("usecmap %d %d R\n", fz_tonum(obj), fz_togen(obj)); + error = pdf_loadembeddedcmap(&usecmap, xref, obj); + if (error) + goto cleanup; + fz_setusecmap(cmap, usecmap); + fz_dropcmap(usecmap); + } + + pdf_logfont("}\n"); + + error = pdf_storeitem(xref->store, PDF_KCMAP, stmref, cmap); + if (error) + goto cleanup; + + fz_dropobj(stmobj); + + *cmapp = cmap; + return nil; + +cleanup: + if (cmap) + fz_dropcmap(cmap); + fz_dropobj(stmobj); + return error; +} + +/* + * Load predefined CMap from system + */ +fz_error * +pdf_loadsystemcmap(fz_cmap **cmapp, char *name) +{ + fz_error *error = nil; + fz_file *file; + char *cmapdir; + char *usecmapname; + fz_cmap *usecmap; + fz_cmap *cmap; + char path[1024]; + + pdf_logfont("load system cmap %s {\n", name); + + cmapdir = getenv("CMAPDIR"); + if (!cmapdir) + return fz_throw("ioerror: CMAPDIR environment not set"); + + strlcpy(path, cmapdir, sizeof path); + strlcat(path, "/", sizeof path); + strlcat(path, name, sizeof path); + + error = fz_openfile(&file, path, FZ_READ); + if (error) + goto cleanup; + + error = pdf_parsecmap(&cmap, file); + if (error) + goto cleanup; + + fz_closefile(file); + + usecmapname = fz_getusecmapname(cmap); + if (usecmapname) + { + pdf_logfont("usecmap %s\n", usecmapname); + error = pdf_loadsystemcmap(&usecmap, usecmapname); + if (error) + goto cleanup; + fz_setusecmap(cmap, usecmap); + fz_dropcmap(usecmap); + } + + pdf_logfont("}\n"); + + *cmapp = cmap; + return nil; + +cleanup: + if (cmap) + fz_dropcmap(cmap); + if (file) + fz_closefile(file); + return error; +} + +/* + * Create an Identity-* CMap (for both 1 and 2-byte encodings) + */ +fz_error * +pdf_makeidentitycmap(fz_cmap **cmapp, int wmode, int bytes) +{ + fz_error *error; + fz_cmap *cmap; + + error = fz_newcmap(&cmap); + if (error) + return error; + + error = fz_addcodespacerange(cmap, 0x0000, 0xffff, bytes); + if (error) { + fz_dropcmap(cmap); + return error; + } + + error = fz_addcidrange(cmap, 0x0000, 0xffff, 0); + if (error) { + fz_dropcmap(cmap); + return error; + } + + error = fz_endcidrange(cmap); + if (error) { + fz_dropcmap(cmap); + return error; + } + + fz_setwmode(cmap, wmode); + + *cmapp = cmap; + return nil; +} + |