From 48b026e7500c5f7239e173d14d09b9e2e272e874 Mon Sep 17 00:00:00 2001 From: Tor Andersson Date: Wed, 5 Sep 2018 15:27:22 +0200 Subject: Regularize language and script names. Drop the unused 'serif' argument to the CJK lookup functions. Use the BCP 47 names for CJK scripts and languages: zh-Hant for traditional Chinese, zh-Hans for simplified Chinese, ja for Japanese, ko for Korean. The lookup function also allows commonly used language+country codes: zh-TW and zh-HK for traditional Chinese, zh-CN for simplified Chinese. --- source/fitz/font.c | 19 +++++++++---------- source/fitz/noto.c | 39 ++++++++++++++++++++++++++------------- source/pdf/pdf-font.c | 30 +++++++++++++++--------------- source/tools/murun.c | 20 ++++++++++---------- source/tools/pdfcreate.c | 29 ++++++++++++++--------------- 5 files changed, 74 insertions(+), 63 deletions(-) (limited to 'source') diff --git a/source/fitz/font.c b/source/fitz/font.c index f9609aec..733d91da 100644 --- a/source/fitz/font.c +++ b/source/fitz/font.c @@ -230,7 +230,7 @@ struct fz_font_context_s /* Cached fallback fonts */ fz_font *base14[14]; - fz_font *cjk[8]; + fz_font *cjk[4]; struct { fz_font *serif, *sans; } fallback[256]; fz_font *symbol1, *symbol2; fz_font *emoji; @@ -416,7 +416,7 @@ fz_font *fz_load_fallback_font(fz_context *ctx, int script, int language, int se *fontp = fz_load_system_fallback_font(ctx, script, language, serif, bold, italic); if (!*fontp) { - data = fz_lookup_noto_font(ctx, script, language, serif, &size, &subfont); + data = fz_lookup_noto_font(ctx, script, language, &size, &subfont); if (data) *fontp = fz_new_font_from_memory(ctx, NULL, data, size, subfont, 0); } @@ -697,20 +697,19 @@ fz_new_base14_font(fz_context *ctx, const char *name) } fz_font * -fz_new_cjk_font(fz_context *ctx, int ordering, int serif) +fz_new_cjk_font(fz_context *ctx, int ordering) { const unsigned char *data; int size, index; - int x = (ordering * 2) + !!serif; - if (x >= 0 && x < nelem(ctx->font->cjk)) + if (ordering >= 0 && ordering < nelem(ctx->font->cjk)) { - if (ctx->font->cjk[x]) - return fz_keep_font(ctx, ctx->font->cjk[x]); - data = fz_lookup_cjk_font(ctx, ordering, serif, &size, &index); + if (ctx->font->cjk[ordering]) + return fz_keep_font(ctx, ctx->font->cjk[ordering]); + data = fz_lookup_cjk_font(ctx, ordering, &size, &index); if (data) { - ctx->font->cjk[x] = fz_new_font_from_memory(ctx, NULL, data, size, index, 0); - return fz_keep_font(ctx, ctx->font->cjk[x]); + ctx->font->cjk[ordering] = fz_new_font_from_memory(ctx, NULL, data, size, index, 0); + return fz_keep_font(ctx, ctx->font->cjk[ordering]); } } fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find builtin CJK font"); diff --git a/source/fitz/noto.c b/source/fitz/noto.c index 8301929f..c3eea9d6 100644 --- a/source/fitz/noto.c +++ b/source/fitz/noto.c @@ -139,18 +139,18 @@ fz_lookup_builtin_font(fz_context *ctx, const char *name, int is_bold, int is_it } const unsigned char * -fz_lookup_cjk_font(fz_context *ctx, int ordering, int serif, int *size, int *subfont) +fz_lookup_cjk_font(fz_context *ctx, int ordering, int *size, int *subfont) { *subfont = 0; #ifndef TOFU_CJK #ifndef TOFU_CJK_EXT #ifndef TOFU_CJK_LANG switch (ordering) { - case FZ_ADOBE_JAPAN_1: *subfont=0; RETURN(han, SourceHanSerif_Regular_ttc); - case FZ_ADOBE_KOREA_1: *subfont=1; RETURN(han, SourceHanSerif_Regular_ttc); - case FZ_ADOBE_GB_1: *subfont=2; RETURN(han, SourceHanSerif_Regular_ttc); + case FZ_ADOBE_JAPAN: *subfont=0; RETURN(han, SourceHanSerif_Regular_ttc); + case FZ_ADOBE_KOREA: *subfont=1; RETURN(han, SourceHanSerif_Regular_ttc); + case FZ_ADOBE_GB: *subfont=2; RETURN(han, SourceHanSerif_Regular_ttc); default: - case FZ_ADOBE_CNS_1: *subfont=3; RETURN(han, SourceHanSerif_Regular_ttc); + case FZ_ADOBE_CNS: *subfont=3; RETURN(han, SourceHanSerif_Regular_ttc); } #else RETURN(droid, DroidSansFallbackFull_ttf); @@ -163,8 +163,21 @@ fz_lookup_cjk_font(fz_context *ctx, int ordering, int serif, int *size, int *sub #endif } +int +fz_lookup_cjk_ordering_by_language(const char *name) +{ + if (!strcmp(name, "zh-Hant")) return FZ_ADOBE_CNS; + if (!strcmp(name, "zh-TW")) return FZ_ADOBE_CNS; + if (!strcmp(name, "zh-HK")) return FZ_ADOBE_CNS; + if (!strcmp(name, "zh-Hans")) return FZ_ADOBE_GB; + if (!strcmp(name, "zh-CN")) return FZ_ADOBE_GB; + if (!strcmp(name, "ja")) return FZ_ADOBE_JAPAN; + if (!strcmp(name, "ko")) return FZ_ADOBE_KOREA; + return -1; +} + const unsigned char * -fz_lookup_noto_font(fz_context *ctx, int script, int language, int serif, int *size, int *subfont) +fz_lookup_noto_font(fz_context *ctx, int script, int language, int *size, int *subfont) { /* TODO: Noto(SansSyriacEstrangela); */ /* TODO: Noto(SansSyriacWestern); */ @@ -180,20 +193,20 @@ fz_lookup_noto_font(fz_context *ctx, int script, int language, int serif, int *s break; case UCDN_SCRIPT_HANGUL: - return fz_lookup_cjk_font(ctx, FZ_ADOBE_KOREA_1, serif, size, subfont); + return fz_lookup_cjk_font(ctx, FZ_ADOBE_KOREA, size, subfont); case UCDN_SCRIPT_HIRAGANA: case UCDN_SCRIPT_KATAKANA: - return fz_lookup_cjk_font(ctx, FZ_ADOBE_JAPAN_1, serif, size, subfont); + return fz_lookup_cjk_font(ctx, FZ_ADOBE_JAPAN, size, subfont); case UCDN_SCRIPT_BOPOMOFO: - return fz_lookup_cjk_font(ctx, FZ_ADOBE_CNS_1, serif, size, subfont); + return fz_lookup_cjk_font(ctx, FZ_ADOBE_CNS, size, subfont); case UCDN_SCRIPT_HAN: switch (language) { - case FZ_LANG_ja: return fz_lookup_cjk_font(ctx, FZ_ADOBE_JAPAN_1, serif, size, subfont); - case FZ_LANG_ko: return fz_lookup_cjk_font(ctx, FZ_ADOBE_KOREA_1, serif, size, subfont); - case FZ_LANG_zh_Hans: return fz_lookup_cjk_font(ctx, FZ_ADOBE_GB_1, serif, size, subfont); + case FZ_LANG_ja: return fz_lookup_cjk_font(ctx, FZ_ADOBE_JAPAN, size, subfont); + case FZ_LANG_ko: return fz_lookup_cjk_font(ctx, FZ_ADOBE_KOREA, size, subfont); + case FZ_LANG_zh_Hans: return fz_lookup_cjk_font(ctx, FZ_ADOBE_GB, size, subfont); default: - case FZ_LANG_zh_Hant: return fz_lookup_cjk_font(ctx, FZ_ADOBE_CNS_1, serif, size, subfont); + case FZ_LANG_zh_Hant: return fz_lookup_cjk_font(ctx, FZ_ADOBE_CNS, size, subfont); } case UCDN_SCRIPT_BRAILLE: break; /* no dedicated font; fallback to NotoSansSymbols will cover this */ diff --git a/source/pdf/pdf-font.c b/source/pdf/pdf-font.c index 3dae9884..a0df70fd 100644 --- a/source/pdf/pdf-font.c +++ b/source/pdf/pdf-font.c @@ -409,15 +409,15 @@ pdf_load_substitute_cjk_font(fz_context *ctx, pdf_font_desc *fontdesc, const cha if (!fontdesc->font) { const unsigned char *data; - int len; - int index; + int size; + int subfont; - data = fz_lookup_cjk_font(ctx, ros, serif, &len, &index); + data = fz_lookup_cjk_font(ctx, ros, &size, &subfont); if (!data) fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find builtin CJK font"); /* A glyph bbox cache is too big for CJK fonts. */ - fontdesc->font = fz_new_font_from_memory(ctx, fontname, data, len, index, 0); + fontdesc->font = fz_new_font_from_memory(ctx, fontname, data, size, subfont, 0); } fontdesc->font->flags.ft_substitute = 1; @@ -451,13 +451,13 @@ pdf_load_system_font(fz_context *ctx, pdf_font_desc *fontdesc, const char *fontn if (collection) { if (!strcmp(collection, "Adobe-CNS1")) - pdf_load_substitute_cjk_font(ctx, fontdesc, fontname, FZ_ADOBE_CNS_1, serif); + pdf_load_substitute_cjk_font(ctx, fontdesc, fontname, FZ_ADOBE_CNS, serif); else if (!strcmp(collection, "Adobe-GB1")) - pdf_load_substitute_cjk_font(ctx, fontdesc, fontname, FZ_ADOBE_GB_1, serif); + pdf_load_substitute_cjk_font(ctx, fontdesc, fontname, FZ_ADOBE_GB, serif); else if (!strcmp(collection, "Adobe-Japan1")) - pdf_load_substitute_cjk_font(ctx, fontdesc, fontname, FZ_ADOBE_JAPAN_1, serif); + pdf_load_substitute_cjk_font(ctx, fontdesc, fontname, FZ_ADOBE_JAPAN, serif); else if (!strcmp(collection, "Adobe-Korea1")) - pdf_load_substitute_cjk_font(ctx, fontdesc, fontname, FZ_ADOBE_KOREA_1, serif); + pdf_load_substitute_cjk_font(ctx, fontdesc, fontname, FZ_ADOBE_KOREA, serif); else { if (strcmp(collection, "Adobe-Identity") != 0) @@ -2133,28 +2133,28 @@ pdf_add_cjk_font(fz_context *ctx, pdf_document *doc, fz_font *fzfont, int script switch (script) { - case FZ_ADOBE_CNS_1: /* traditional chinese */ + default: + script = FZ_ADOBE_CNS; + /* fall through */ + case FZ_ADOBE_CNS: /* traditional chinese */ basefont = serif ? "Ming" : "Fangti"; encoding = wmode ? "UniCNS-UTF16-V" : "UniCNS-UTF16-H"; ordering = "CNS1"; supplement = 7; break; - case FZ_ADOBE_GB_1: /* simplified chinese */ + case FZ_ADOBE_GB: /* simplified chinese */ basefont = serif ? "Song" : "Heiti"; encoding = wmode ? "UniGB-UTF16-V" : "UniGB-UTF16-H"; ordering = "GB1"; supplement = 5; break; - default: - script = FZ_ADOBE_JAPAN_1; - /* fall through */ - case FZ_ADOBE_JAPAN_1: + case FZ_ADOBE_JAPAN: basefont = serif ? "Mincho" : "Gothic"; encoding = wmode ? "UniJIS-UTF16-V" : "UniJIS-UTF16-H"; ordering = "Japan1"; supplement = 6; break; - case FZ_ADOBE_KOREA_1: + case FZ_ADOBE_KOREA: basefont = serif ? "Batang" : "Dotum"; encoding = wmode ? "UniKS-UTF16-V" : "UniKS-UTF16-H"; ordering = "Korea1"; diff --git a/source/tools/murun.c b/source/tools/murun.c index 810b32ce..961d6a7a 100644 --- a/source/tools/murun.c +++ b/source/tools/murun.c @@ -3285,9 +3285,12 @@ static void ffi_PDFDocument_addSimpleFont(js_State *J) pdf_obj *ind = NULL; int enc = PDF_SIMPLE_ENCODING_LATIN; - if (!strcmp(encname, "Latin")) enc = PDF_SIMPLE_ENCODING_LATIN; - else if (!strcmp(encname, "Greek")) enc = PDF_SIMPLE_ENCODING_GREEK; - else if (!strcmp(encname, "Cyrillic")) enc = PDF_SIMPLE_ENCODING_CYRILLIC; + if (!strcmp(encname, "Latin") || !strcmp(encname, "Latn")) + enc = PDF_SIMPLE_ENCODING_LATIN; + else if (!strcmp(encname, "Greek") || !strcmp(encname, "Grek")) + enc = PDF_SIMPLE_ENCODING_GREEK; + else if (!strcmp(encname, "Cyrillic") || !strcmp(encname, "Cyrl")) + enc = PDF_SIMPLE_ENCODING_CYRILLIC; fz_try(ctx) ind = pdf_add_simple_font(ctx, pdf, font, enc); @@ -3302,18 +3305,15 @@ static void ffi_PDFDocument_addCJKFont(js_State *J) fz_context *ctx = js_getcontext(J); pdf_document *pdf = js_touserdata(J, 0, "pdf_document"); fz_font *font = js_touserdata(J, 1, "fz_font"); - const char *on = js_tostring(J, 2); + const char *lang = js_tostring(J, 2); const char *wm = js_tostring(J, 3); const char *ss = js_tostring(J, 4); - int ord = FZ_ADOBE_JAPAN_1; + int ordering; int wmode = 0; int serif = 1; pdf_obj *ind = NULL; - if (!strcmp(on, "CNS1") || !strcmp(on, "TW") || !strcmp(on, "TC") || !strcmp(on, "Hant")) ord = FZ_ADOBE_CNS_1; - else if (!strcmp(on, "GB1") || !strcmp(on, "CN") || !strcmp(on, "SC") || !strcmp(on, "Hans")) ord = FZ_ADOBE_GB_1; - else if (!strcmp(on, "Korea1") || !strcmp(on, "KR") || !strcmp(on, "KO")) ord = FZ_ADOBE_KOREA_1; - else if (!strcmp(on, "Japan1") || !strcmp(on, "JP") || !strcmp(on, "JA")) ord = FZ_ADOBE_JAPAN_1; + ordering = fz_lookup_cjk_ordering_by_language(lang); if (!strcmp(wm, "V")) wmode = 1; @@ -3321,7 +3321,7 @@ static void ffi_PDFDocument_addCJKFont(js_State *J) serif = 0; fz_try(ctx) - ind = pdf_add_cjk_font(ctx, pdf, font, ord, wmode, serif); + ind = pdf_add_cjk_font(ctx, pdf, font, ordering, wmode, serif); fz_catch(ctx) rethrow(J); diff --git a/source/tools/pdfcreate.c b/source/tools/pdfcreate.c index 52d16dbe..043936e4 100644 --- a/source/tools/pdfcreate.c +++ b/source/tools/pdfcreate.c @@ -22,7 +22,7 @@ static void usage(void) "\t%%%%MediaBox LLX LLY URX URY\n" "\t%%%%Rotate Angle\n" "\t%%%%Font Name Filename (or base 14 font name)\n" - "\t%%%%CJKFont Name Ordering WMode Style (Ordering=CNS1|GB1|Japan1|Korea1, WMode=H|V, Style=serif|sans)\n" + "\t%%%%CJKFont Name Language WMode Style (Language=zh-Hant|zh-Hans|ja|ko, WMode=H|V, Style=serif|sans)\n" "\t%%%%Image Name Filename\n\n" ); fputs(fz_pdf_write_options_usage, stderr); @@ -55,9 +55,12 @@ static void add_font_res(pdf_obj *resources, char *name, char *path, char *encna enc = PDF_SIMPLE_ENCODING_LATIN; if (encname) { - if (!strcmp(encname, "Latin")) enc = PDF_SIMPLE_ENCODING_LATIN; - else if (!strcmp(encname, "Greek")) enc = PDF_SIMPLE_ENCODING_GREEK; - else if (!strcmp(encname, "Cyrillic")) enc = PDF_SIMPLE_ENCODING_CYRILLIC; + if (!strcmp(encname, "Latin") || !strcmp(encname, "Latn")) + enc = PDF_SIMPLE_ENCODING_LATIN; + else if (!strcmp(encname, "Greek") || !strcmp(encname, "Grek")) + enc = PDF_SIMPLE_ENCODING_GREEK; + else if (!strcmp(encname, "Cyrillic") || !strcmp(encname, "Cyrl")) + enc = PDF_SIMPLE_ENCODING_CYRILLIC; } ref = pdf_add_simple_font(ctx, doc, font, enc); @@ -67,18 +70,14 @@ static void add_font_res(pdf_obj *resources, char *name, char *path, char *encna fz_drop_font(ctx, font); } -static void add_cjkfont_res(pdf_obj *resources, char *name, char *on, char *wm, char *style) +static void add_cjkfont_res(pdf_obj *resources, char *name, char *lang, char *wm, char *style) { const unsigned char *data; int size, index, ordering, wmode, serif; fz_font *font; pdf_obj *subres, *ref; - if (!strcmp(on, "CNS1") || !strcmp(on, "TW") || !strcmp(on, "TC") || !strcmp(on, "Hant")) ordering = FZ_ADOBE_CNS_1; - else if (!strcmp(on, "GB1") || !strcmp(on, "CN") || !strcmp(on, "SC") || !strcmp(on, "Hans")) ordering = FZ_ADOBE_GB_1; - else if (!strcmp(on, "Japan1") || !strcmp(on, "JP") || !strcmp(on, "JA")) ordering = FZ_ADOBE_JAPAN_1; - else if (!strcmp(on, "Korea1") || !strcmp(on, "KR") || !strcmp(on, "KO")) ordering = FZ_ADOBE_KOREA_1; - else ordering = FZ_ADOBE_JAPAN_1; + ordering = fz_lookup_cjk_ordering_by_language(lang); if (wm && !strcmp(wm, "V")) wmode = 1; @@ -90,7 +89,7 @@ static void add_cjkfont_res(pdf_obj *resources, char *name, char *on, char *wm, else serif = 1; - data = fz_lookup_cjk_font(ctx, ordering, serif, &size, &index); + data = fz_lookup_cjk_font(ctx, ordering, &size, &index); font = fz_new_font_from_memory(ctx, NULL, data, size, index, 0); subres = pdf_dict_get(ctx, resources, PDF_NAME(Font)); @@ -134,7 +133,7 @@ The input is a raw content stream, with commands embedded in comments: %%MediaBox LLX LLY URX URY %%Rotate Angle %%Font Name Filename (or base 14 font name) [Encoding (Latin, Greek or Cyrillic)] -%%CJKFont Name Ordering WMode Style (Ordering=CNS1|GB1|Japan1|Korea1, WMode=H|V, Style=serif|sans) +%%CJKFont Name Language WMode Style (Language=zh-Hant|zh-Hans|ja|ko, WMode=H|V, Style=serif|sans) %%Image Name Filename */ static void create_page(char *input) @@ -183,12 +182,12 @@ static void create_page(char *input) else if (!strcmp(s, "%%CJKFont")) { char *name = fz_strsep(&p, " "); - char *ordering = fz_strsep(&p, " "); + char *lang = fz_strsep(&p, " "); char *wmode = fz_strsep(&p, " "); char *style = fz_strsep(&p, " "); - if (!name || !ordering) + if (!name || !lang) fz_throw(ctx, FZ_ERROR_GENERIC, "CJKFont directive missing arguments"); - add_cjkfont_res(resources, name, ordering, wmode, style); + add_cjkfont_res(resources, name, lang, wmode, style); } else if (!strcmp(s, "%%Image")) { -- cgit v1.2.3