summaryrefslogtreecommitdiff
path: root/source
diff options
context:
space:
mode:
authorTor Andersson <tor.andersson@artifex.com>2018-09-05 15:27:22 +0200
committerTor Andersson <tor.andersson@artifex.com>2018-09-21 14:21:05 +0200
commit48b026e7500c5f7239e173d14d09b9e2e272e874 (patch)
treed6231e9b0c39d7a6eb361eff3762855ea05102d8 /source
parent60103f2af83eb04c1e74b32fa9d29c74bf01c118 (diff)
downloadmupdf-48b026e7500c5f7239e173d14d09b9e2e272e874.tar.xz
Regularize language and script names.
Drop the unused 'serif' argument to the CJK lookup functions. Use the BCP 47 names for CJK scripts and languages: zh-Hant for traditional Chinese, zh-Hans for simplified Chinese, ja for Japanese, ko for Korean. The lookup function also allows commonly used language+country codes: zh-TW and zh-HK for traditional Chinese, zh-CN for simplified Chinese.
Diffstat (limited to 'source')
-rw-r--r--source/fitz/font.c19
-rw-r--r--source/fitz/noto.c39
-rw-r--r--source/pdf/pdf-font.c30
-rw-r--r--source/tools/murun.c20
-rw-r--r--source/tools/pdfcreate.c29
5 files changed, 74 insertions, 63 deletions
diff --git a/source/fitz/font.c b/source/fitz/font.c
index f9609aec..733d91da 100644
--- a/source/fitz/font.c
+++ b/source/fitz/font.c
@@ -230,7 +230,7 @@ struct fz_font_context_s
/* Cached fallback fonts */
fz_font *base14[14];
- fz_font *cjk[8];
+ fz_font *cjk[4];
struct { fz_font *serif, *sans; } fallback[256];
fz_font *symbol1, *symbol2;
fz_font *emoji;
@@ -416,7 +416,7 @@ fz_font *fz_load_fallback_font(fz_context *ctx, int script, int language, int se
*fontp = fz_load_system_fallback_font(ctx, script, language, serif, bold, italic);
if (!*fontp)
{
- data = fz_lookup_noto_font(ctx, script, language, serif, &size, &subfont);
+ data = fz_lookup_noto_font(ctx, script, language, &size, &subfont);
if (data)
*fontp = fz_new_font_from_memory(ctx, NULL, data, size, subfont, 0);
}
@@ -697,20 +697,19 @@ fz_new_base14_font(fz_context *ctx, const char *name)
}
fz_font *
-fz_new_cjk_font(fz_context *ctx, int ordering, int serif)
+fz_new_cjk_font(fz_context *ctx, int ordering)
{
const unsigned char *data;
int size, index;
- int x = (ordering * 2) + !!serif;
- if (x >= 0 && x < nelem(ctx->font->cjk))
+ if (ordering >= 0 && ordering < nelem(ctx->font->cjk))
{
- if (ctx->font->cjk[x])
- return fz_keep_font(ctx, ctx->font->cjk[x]);
- data = fz_lookup_cjk_font(ctx, ordering, serif, &size, &index);
+ if (ctx->font->cjk[ordering])
+ return fz_keep_font(ctx, ctx->font->cjk[ordering]);
+ data = fz_lookup_cjk_font(ctx, ordering, &size, &index);
if (data)
{
- ctx->font->cjk[x] = fz_new_font_from_memory(ctx, NULL, data, size, index, 0);
- return fz_keep_font(ctx, ctx->font->cjk[x]);
+ ctx->font->cjk[ordering] = fz_new_font_from_memory(ctx, NULL, data, size, index, 0);
+ return fz_keep_font(ctx, ctx->font->cjk[ordering]);
}
}
fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find builtin CJK font");
diff --git a/source/fitz/noto.c b/source/fitz/noto.c
index 8301929f..c3eea9d6 100644
--- a/source/fitz/noto.c
+++ b/source/fitz/noto.c
@@ -139,18 +139,18 @@ fz_lookup_builtin_font(fz_context *ctx, const char *name, int is_bold, int is_it
}
const unsigned char *
-fz_lookup_cjk_font(fz_context *ctx, int ordering, int serif, int *size, int *subfont)
+fz_lookup_cjk_font(fz_context *ctx, int ordering, int *size, int *subfont)
{
*subfont = 0;
#ifndef TOFU_CJK
#ifndef TOFU_CJK_EXT
#ifndef TOFU_CJK_LANG
switch (ordering) {
- case FZ_ADOBE_JAPAN_1: *subfont=0; RETURN(han, SourceHanSerif_Regular_ttc);
- case FZ_ADOBE_KOREA_1: *subfont=1; RETURN(han, SourceHanSerif_Regular_ttc);
- case FZ_ADOBE_GB_1: *subfont=2; RETURN(han, SourceHanSerif_Regular_ttc);
+ case FZ_ADOBE_JAPAN: *subfont=0; RETURN(han, SourceHanSerif_Regular_ttc);
+ case FZ_ADOBE_KOREA: *subfont=1; RETURN(han, SourceHanSerif_Regular_ttc);
+ case FZ_ADOBE_GB: *subfont=2; RETURN(han, SourceHanSerif_Regular_ttc);
default:
- case FZ_ADOBE_CNS_1: *subfont=3; RETURN(han, SourceHanSerif_Regular_ttc);
+ case FZ_ADOBE_CNS: *subfont=3; RETURN(han, SourceHanSerif_Regular_ttc);
}
#else
RETURN(droid, DroidSansFallbackFull_ttf);
@@ -163,8 +163,21 @@ fz_lookup_cjk_font(fz_context *ctx, int ordering, int serif, int *size, int *sub
#endif
}
+int
+fz_lookup_cjk_ordering_by_language(const char *name)
+{
+ if (!strcmp(name, "zh-Hant")) return FZ_ADOBE_CNS;
+ if (!strcmp(name, "zh-TW")) return FZ_ADOBE_CNS;
+ if (!strcmp(name, "zh-HK")) return FZ_ADOBE_CNS;
+ if (!strcmp(name, "zh-Hans")) return FZ_ADOBE_GB;
+ if (!strcmp(name, "zh-CN")) return FZ_ADOBE_GB;
+ if (!strcmp(name, "ja")) return FZ_ADOBE_JAPAN;
+ if (!strcmp(name, "ko")) return FZ_ADOBE_KOREA;
+ return -1;
+}
+
const unsigned char *
-fz_lookup_noto_font(fz_context *ctx, int script, int language, int serif, int *size, int *subfont)
+fz_lookup_noto_font(fz_context *ctx, int script, int language, int *size, int *subfont)
{
/* TODO: Noto(SansSyriacEstrangela); */
/* TODO: Noto(SansSyriacWestern); */
@@ -180,20 +193,20 @@ fz_lookup_noto_font(fz_context *ctx, int script, int language, int serif, int *s
break;
case UCDN_SCRIPT_HANGUL:
- return fz_lookup_cjk_font(ctx, FZ_ADOBE_KOREA_1, serif, size, subfont);
+ return fz_lookup_cjk_font(ctx, FZ_ADOBE_KOREA, size, subfont);
case UCDN_SCRIPT_HIRAGANA:
case UCDN_SCRIPT_KATAKANA:
- return fz_lookup_cjk_font(ctx, FZ_ADOBE_JAPAN_1, serif, size, subfont);
+ return fz_lookup_cjk_font(ctx, FZ_ADOBE_JAPAN, size, subfont);
case UCDN_SCRIPT_BOPOMOFO:
- return fz_lookup_cjk_font(ctx, FZ_ADOBE_CNS_1, serif, size, subfont);
+ return fz_lookup_cjk_font(ctx, FZ_ADOBE_CNS, size, subfont);
case UCDN_SCRIPT_HAN:
switch (language)
{
- case FZ_LANG_ja: return fz_lookup_cjk_font(ctx, FZ_ADOBE_JAPAN_1, serif, size, subfont);
- case FZ_LANG_ko: return fz_lookup_cjk_font(ctx, FZ_ADOBE_KOREA_1, serif, size, subfont);
- case FZ_LANG_zh_Hans: return fz_lookup_cjk_font(ctx, FZ_ADOBE_GB_1, serif, size, subfont);
+ case FZ_LANG_ja: return fz_lookup_cjk_font(ctx, FZ_ADOBE_JAPAN, size, subfont);
+ case FZ_LANG_ko: return fz_lookup_cjk_font(ctx, FZ_ADOBE_KOREA, size, subfont);
+ case FZ_LANG_zh_Hans: return fz_lookup_cjk_font(ctx, FZ_ADOBE_GB, size, subfont);
default:
- case FZ_LANG_zh_Hant: return fz_lookup_cjk_font(ctx, FZ_ADOBE_CNS_1, serif, size, subfont);
+ case FZ_LANG_zh_Hant: return fz_lookup_cjk_font(ctx, FZ_ADOBE_CNS, size, subfont);
}
case UCDN_SCRIPT_BRAILLE: break; /* no dedicated font; fallback to NotoSansSymbols will cover this */
diff --git a/source/pdf/pdf-font.c b/source/pdf/pdf-font.c
index 3dae9884..a0df70fd 100644
--- a/source/pdf/pdf-font.c
+++ b/source/pdf/pdf-font.c
@@ -409,15 +409,15 @@ pdf_load_substitute_cjk_font(fz_context *ctx, pdf_font_desc *fontdesc, const cha
if (!fontdesc->font)
{
const unsigned char *data;
- int len;
- int index;
+ int size;
+ int subfont;
- data = fz_lookup_cjk_font(ctx, ros, serif, &len, &index);
+ data = fz_lookup_cjk_font(ctx, ros, &size, &subfont);
if (!data)
fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find builtin CJK font");
/* A glyph bbox cache is too big for CJK fonts. */
- fontdesc->font = fz_new_font_from_memory(ctx, fontname, data, len, index, 0);
+ fontdesc->font = fz_new_font_from_memory(ctx, fontname, data, size, subfont, 0);
}
fontdesc->font->flags.ft_substitute = 1;
@@ -451,13 +451,13 @@ pdf_load_system_font(fz_context *ctx, pdf_font_desc *fontdesc, const char *fontn
if (collection)
{
if (!strcmp(collection, "Adobe-CNS1"))
- pdf_load_substitute_cjk_font(ctx, fontdesc, fontname, FZ_ADOBE_CNS_1, serif);
+ pdf_load_substitute_cjk_font(ctx, fontdesc, fontname, FZ_ADOBE_CNS, serif);
else if (!strcmp(collection, "Adobe-GB1"))
- pdf_load_substitute_cjk_font(ctx, fontdesc, fontname, FZ_ADOBE_GB_1, serif);
+ pdf_load_substitute_cjk_font(ctx, fontdesc, fontname, FZ_ADOBE_GB, serif);
else if (!strcmp(collection, "Adobe-Japan1"))
- pdf_load_substitute_cjk_font(ctx, fontdesc, fontname, FZ_ADOBE_JAPAN_1, serif);
+ pdf_load_substitute_cjk_font(ctx, fontdesc, fontname, FZ_ADOBE_JAPAN, serif);
else if (!strcmp(collection, "Adobe-Korea1"))
- pdf_load_substitute_cjk_font(ctx, fontdesc, fontname, FZ_ADOBE_KOREA_1, serif);
+ pdf_load_substitute_cjk_font(ctx, fontdesc, fontname, FZ_ADOBE_KOREA, serif);
else
{
if (strcmp(collection, "Adobe-Identity") != 0)
@@ -2133,28 +2133,28 @@ pdf_add_cjk_font(fz_context *ctx, pdf_document *doc, fz_font *fzfont, int script
switch (script)
{
- case FZ_ADOBE_CNS_1: /* traditional chinese */
+ default:
+ script = FZ_ADOBE_CNS;
+ /* fall through */
+ case FZ_ADOBE_CNS: /* traditional chinese */
basefont = serif ? "Ming" : "Fangti";
encoding = wmode ? "UniCNS-UTF16-V" : "UniCNS-UTF16-H";
ordering = "CNS1";
supplement = 7;
break;
- case FZ_ADOBE_GB_1: /* simplified chinese */
+ case FZ_ADOBE_GB: /* simplified chinese */
basefont = serif ? "Song" : "Heiti";
encoding = wmode ? "UniGB-UTF16-V" : "UniGB-UTF16-H";
ordering = "GB1";
supplement = 5;
break;
- default:
- script = FZ_ADOBE_JAPAN_1;
- /* fall through */
- case FZ_ADOBE_JAPAN_1:
+ case FZ_ADOBE_JAPAN:
basefont = serif ? "Mincho" : "Gothic";
encoding = wmode ? "UniJIS-UTF16-V" : "UniJIS-UTF16-H";
ordering = "Japan1";
supplement = 6;
break;
- case FZ_ADOBE_KOREA_1:
+ case FZ_ADOBE_KOREA:
basefont = serif ? "Batang" : "Dotum";
encoding = wmode ? "UniKS-UTF16-V" : "UniKS-UTF16-H";
ordering = "Korea1";
diff --git a/source/tools/murun.c b/source/tools/murun.c
index 810b32ce..961d6a7a 100644
--- a/source/tools/murun.c
+++ b/source/tools/murun.c
@@ -3285,9 +3285,12 @@ static void ffi_PDFDocument_addSimpleFont(js_State *J)
pdf_obj *ind = NULL;
int enc = PDF_SIMPLE_ENCODING_LATIN;
- if (!strcmp(encname, "Latin")) enc = PDF_SIMPLE_ENCODING_LATIN;
- else if (!strcmp(encname, "Greek")) enc = PDF_SIMPLE_ENCODING_GREEK;
- else if (!strcmp(encname, "Cyrillic")) enc = PDF_SIMPLE_ENCODING_CYRILLIC;
+ if (!strcmp(encname, "Latin") || !strcmp(encname, "Latn"))
+ enc = PDF_SIMPLE_ENCODING_LATIN;
+ else if (!strcmp(encname, "Greek") || !strcmp(encname, "Grek"))
+ enc = PDF_SIMPLE_ENCODING_GREEK;
+ else if (!strcmp(encname, "Cyrillic") || !strcmp(encname, "Cyrl"))
+ enc = PDF_SIMPLE_ENCODING_CYRILLIC;
fz_try(ctx)
ind = pdf_add_simple_font(ctx, pdf, font, enc);
@@ -3302,18 +3305,15 @@ static void ffi_PDFDocument_addCJKFont(js_State *J)
fz_context *ctx = js_getcontext(J);
pdf_document *pdf = js_touserdata(J, 0, "pdf_document");
fz_font *font = js_touserdata(J, 1, "fz_font");
- const char *on = js_tostring(J, 2);
+ const char *lang = js_tostring(J, 2);
const char *wm = js_tostring(J, 3);
const char *ss = js_tostring(J, 4);
- int ord = FZ_ADOBE_JAPAN_1;
+ int ordering;
int wmode = 0;
int serif = 1;
pdf_obj *ind = NULL;
- if (!strcmp(on, "CNS1") || !strcmp(on, "TW") || !strcmp(on, "TC") || !strcmp(on, "Hant")) ord = FZ_ADOBE_CNS_1;
- else if (!strcmp(on, "GB1") || !strcmp(on, "CN") || !strcmp(on, "SC") || !strcmp(on, "Hans")) ord = FZ_ADOBE_GB_1;
- else if (!strcmp(on, "Korea1") || !strcmp(on, "KR") || !strcmp(on, "KO")) ord = FZ_ADOBE_KOREA_1;
- else if (!strcmp(on, "Japan1") || !strcmp(on, "JP") || !strcmp(on, "JA")) ord = FZ_ADOBE_JAPAN_1;
+ ordering = fz_lookup_cjk_ordering_by_language(lang);
if (!strcmp(wm, "V"))
wmode = 1;
@@ -3321,7 +3321,7 @@ static void ffi_PDFDocument_addCJKFont(js_State *J)
serif = 0;
fz_try(ctx)
- ind = pdf_add_cjk_font(ctx, pdf, font, ord, wmode, serif);
+ ind = pdf_add_cjk_font(ctx, pdf, font, ordering, wmode, serif);
fz_catch(ctx)
rethrow(J);
diff --git a/source/tools/pdfcreate.c b/source/tools/pdfcreate.c
index 52d16dbe..043936e4 100644
--- a/source/tools/pdfcreate.c
+++ b/source/tools/pdfcreate.c
@@ -22,7 +22,7 @@ static void usage(void)
"\t%%%%MediaBox LLX LLY URX URY\n"
"\t%%%%Rotate Angle\n"
"\t%%%%Font Name Filename (or base 14 font name)\n"
- "\t%%%%CJKFont Name Ordering WMode Style (Ordering=CNS1|GB1|Japan1|Korea1, WMode=H|V, Style=serif|sans)\n"
+ "\t%%%%CJKFont Name Language WMode Style (Language=zh-Hant|zh-Hans|ja|ko, WMode=H|V, Style=serif|sans)\n"
"\t%%%%Image Name Filename\n\n"
);
fputs(fz_pdf_write_options_usage, stderr);
@@ -55,9 +55,12 @@ static void add_font_res(pdf_obj *resources, char *name, char *path, char *encna
enc = PDF_SIMPLE_ENCODING_LATIN;
if (encname)
{
- if (!strcmp(encname, "Latin")) enc = PDF_SIMPLE_ENCODING_LATIN;
- else if (!strcmp(encname, "Greek")) enc = PDF_SIMPLE_ENCODING_GREEK;
- else if (!strcmp(encname, "Cyrillic")) enc = PDF_SIMPLE_ENCODING_CYRILLIC;
+ if (!strcmp(encname, "Latin") || !strcmp(encname, "Latn"))
+ enc = PDF_SIMPLE_ENCODING_LATIN;
+ else if (!strcmp(encname, "Greek") || !strcmp(encname, "Grek"))
+ enc = PDF_SIMPLE_ENCODING_GREEK;
+ else if (!strcmp(encname, "Cyrillic") || !strcmp(encname, "Cyrl"))
+ enc = PDF_SIMPLE_ENCODING_CYRILLIC;
}
ref = pdf_add_simple_font(ctx, doc, font, enc);
@@ -67,18 +70,14 @@ static void add_font_res(pdf_obj *resources, char *name, char *path, char *encna
fz_drop_font(ctx, font);
}
-static void add_cjkfont_res(pdf_obj *resources, char *name, char *on, char *wm, char *style)
+static void add_cjkfont_res(pdf_obj *resources, char *name, char *lang, char *wm, char *style)
{
const unsigned char *data;
int size, index, ordering, wmode, serif;
fz_font *font;
pdf_obj *subres, *ref;
- if (!strcmp(on, "CNS1") || !strcmp(on, "TW") || !strcmp(on, "TC") || !strcmp(on, "Hant")) ordering = FZ_ADOBE_CNS_1;
- else if (!strcmp(on, "GB1") || !strcmp(on, "CN") || !strcmp(on, "SC") || !strcmp(on, "Hans")) ordering = FZ_ADOBE_GB_1;
- else if (!strcmp(on, "Japan1") || !strcmp(on, "JP") || !strcmp(on, "JA")) ordering = FZ_ADOBE_JAPAN_1;
- else if (!strcmp(on, "Korea1") || !strcmp(on, "KR") || !strcmp(on, "KO")) ordering = FZ_ADOBE_KOREA_1;
- else ordering = FZ_ADOBE_JAPAN_1;
+ ordering = fz_lookup_cjk_ordering_by_language(lang);
if (wm && !strcmp(wm, "V"))
wmode = 1;
@@ -90,7 +89,7 @@ static void add_cjkfont_res(pdf_obj *resources, char *name, char *on, char *wm,
else
serif = 1;
- data = fz_lookup_cjk_font(ctx, ordering, serif, &size, &index);
+ data = fz_lookup_cjk_font(ctx, ordering, &size, &index);
font = fz_new_font_from_memory(ctx, NULL, data, size, index, 0);
subres = pdf_dict_get(ctx, resources, PDF_NAME(Font));
@@ -134,7 +133,7 @@ The input is a raw content stream, with commands embedded in comments:
%%MediaBox LLX LLY URX URY
%%Rotate Angle
%%Font Name Filename (or base 14 font name) [Encoding (Latin, Greek or Cyrillic)]
-%%CJKFont Name Ordering WMode Style (Ordering=CNS1|GB1|Japan1|Korea1, WMode=H|V, Style=serif|sans)
+%%CJKFont Name Language WMode Style (Language=zh-Hant|zh-Hans|ja|ko, WMode=H|V, Style=serif|sans)
%%Image Name Filename
*/
static void create_page(char *input)
@@ -183,12 +182,12 @@ static void create_page(char *input)
else if (!strcmp(s, "%%CJKFont"))
{
char *name = fz_strsep(&p, " ");
- char *ordering = fz_strsep(&p, " ");
+ char *lang = fz_strsep(&p, " ");
char *wmode = fz_strsep(&p, " ");
char *style = fz_strsep(&p, " ");
- if (!name || !ordering)
+ if (!name || !lang)
fz_throw(ctx, FZ_ERROR_GENERIC, "CJKFont directive missing arguments");
- add_cjkfont_res(resources, name, ordering, wmode, style);
+ add_cjkfont_res(resources, name, lang, wmode, style);
}
else if (!strcmp(s, "%%Image"))
{