diff options
author | Tor Andersson <tor.andersson@artifex.com> | 2016-06-23 13:41:53 +0200 |
---|---|---|
committer | Robin Watts <robin.watts@artifex.com> | 2016-06-23 16:10:29 +0100 |
commit | 6e48c939dc9913a6af747d5b6961624551c8d90d (patch) | |
tree | 4b3f69c237dc7af00606c87ac94cd2c4893da628 /source | |
parent | cf7b2cbbfe6192fbf697237735ab45bc951304e4 (diff) | |
download | mupdf-6e48c939dc9913a6af747d5b6961624551c8d90d.tar.xz |
epub: Use markup language when shaping and selecting fallback fonts.
Diffstat (limited to 'source')
-rw-r--r-- | source/fitz/font.c | 44 | ||||
-rw-r--r-- | source/fitz/noto.c | 247 | ||||
-rw-r--r-- | source/fitz/text.c | 41 | ||||
-rw-r--r-- | source/html/html-layout.c | 65 |
4 files changed, 228 insertions, 169 deletions
diff --git a/source/fitz/font.c b/source/fitz/font.c index 792e1875..b0248bd7 100644 --- a/source/fitz/font.c +++ b/source/fitz/font.c @@ -295,9 +295,10 @@ fz_font *fz_load_system_cjk_font(fz_context *ctx, const char *name, int ros, int return font; } -fz_font *fz_load_fallback_font(fz_context *ctx, int script, int serif, int bold, int italic) +fz_font *fz_load_fallback_font(fz_context *ctx, int script, int language, int serif, int bold, int italic) { const char *data; + int index; int size; if (script < 0 || script > nelem(ctx->font->fallback)) @@ -305,25 +306,42 @@ fz_font *fz_load_fallback_font(fz_context *ctx, int script, int serif, int bold, /* TODO: bold and italic */ + index = script; + if (script == UCDN_SCRIPT_HAN) + { + switch (language) + { + case FZ_LANG_ja: index = UCDN_LAST_SCRIPT + 1; break; + case FZ_LANG_ko: index = UCDN_LAST_SCRIPT + 2; break; + case FZ_LANG_zh_Hant: index = UCDN_LAST_SCRIPT + 3; break; + case FZ_LANG_zh_Hans: index = UCDN_LAST_SCRIPT + 4; break; + } + } + if (script == UCDN_SCRIPT_ARABIC) + { + if (language == FZ_LANG_ur || language == FZ_LANG_urd) + index = UCDN_LAST_SCRIPT + 5; + } + if (serif) { - if (ctx->font->fallback[script].serif) - return ctx->font->fallback[script].serif; - data = fz_lookup_noto_font(ctx, script, 1, &size); + if (ctx->font->fallback[index].serif) + return ctx->font->fallback[index].serif; + data = fz_lookup_noto_font(ctx, script, language, 1, &size); if (data) { - ctx->font->fallback[script].serif = fz_new_font_from_memory(ctx, NULL, data, size, 0, 0); - return ctx->font->fallback[script].serif; + ctx->font->fallback[index].serif = fz_new_font_from_memory(ctx, NULL, data, size, 0, 0); + return ctx->font->fallback[index].serif; } } - if (ctx->font->fallback[script].sans) - return ctx->font->fallback[script].sans; - data = fz_lookup_noto_font(ctx, script, 0, &size); + if (ctx->font->fallback[index].sans) + return ctx->font->fallback[index].sans; + data = fz_lookup_noto_font(ctx, script, language, 0, &size); if (data) { - ctx->font->fallback[script].sans = fz_new_font_from_memory(ctx, NULL, data, size, 0, 0); - return ctx->font->fallback[script].sans; + ctx->font->fallback[index].sans = fz_new_font_from_memory(ctx, NULL, data, size, 0, 0); + return ctx->font->fallback[index].sans; } return NULL; @@ -1488,7 +1506,7 @@ fz_encode_character(fz_context *ctx, fz_font *font, int ucs) /* FIXME: This should take language too eventually, to allow for fonts where we can select different * languages using opentype features. */ int -fz_encode_character_with_fallback(fz_context *ctx, fz_font *user_font, int unicode, int script, fz_font **out_font) +fz_encode_character_with_fallback(fz_context *ctx, fz_font *user_font, int unicode, int script, int language, fz_font **out_font) { fz_font *font; int gid; @@ -1500,7 +1518,7 @@ fz_encode_character_with_fallback(fz_context *ctx, fz_font *user_font, int unico if (script == 0) script = ucdn_get_script(unicode); - font = fz_load_fallback_font(ctx, script, user_font->is_serif, user_font->is_bold, user_font->is_italic); + font = fz_load_fallback_font(ctx, script, language, user_font->is_serif, user_font->is_bold, user_font->is_italic); if (font) { gid = fz_encode_character(ctx, font, unicode); diff --git a/source/fitz/noto.c b/source/fitz/noto.c index 70f3bb8f..894d7bd6 100644 --- a/source/fitz/noto.c +++ b/source/fitz/noto.c @@ -6,7 +6,7 @@ DroidSansFallback from Android for CJK. Charis SIL from SIL. - Define TOFU to skip all the Noto fonts except CJK. + Define TOFU to only include the Base14 and CJK fonts. Define TOFU_CJK to skip CJK font. Define TOFU_CJK_EXT to skip CJK Extension A support. @@ -15,6 +15,8 @@ Define TOFU_HISTORIC to skip ancient/historic scripts. Define TOFU_SYMBOL to skip symbol font. Define TOFU_SIL to skip the SIL fonts. + + Define TOFU_BASE14 to skip the Base 14 fonts (warning: makes PDF unusable). */ #ifdef NOTO_SMALL @@ -37,39 +39,39 @@ #endif #define RETURN(NAME) \ - do {\ + do { \ extern const int fz_font_ ## NAME ## _size; \ extern const char fz_font_ ## NAME []; \ - return *size = fz_font_ ## NAME ## _size, fz_font_ ## NAME;\ + return *size = fz_font_ ## NAME ## _size, fz_font_ ## NAME; \ } while (0) const char * fz_lookup_base14_font(fz_context *ctx, const char *name, int *size) { #ifndef TOFU_BASE14 - if (!strcmp(name, "Courier")) { RETURN(NimbusMonoPS_Regular_cff); } - if (!strcmp(name, "Courier-Oblique")) { RETURN(NimbusMonoPS_Italic_cff); } - if (!strcmp(name, "Courier-Bold")) { RETURN(NimbusMonoPS_Bold_cff); } - if (!strcmp(name, "Courier-BoldOblique")) { RETURN(NimbusMonoPS_BoldItalic_cff); } - if (!strcmp(name, "Helvetica")) { RETURN(NimbusSans_Regular_cff); } - if (!strcmp(name, "Helvetica-Oblique")) { RETURN(NimbusSans_Oblique_cff); } - if (!strcmp(name, "Helvetica-Bold")) { RETURN(NimbusSans_Bold_cff); } - if (!strcmp(name, "Helvetica-BoldOblique")) { RETURN(NimbusSans_BoldOblique_cff); } - if (!strcmp(name, "Times-Roman")) { RETURN(NimbusRoman_Regular_cff); } - if (!strcmp(name, "Times-Italic")) { RETURN(NimbusRoman_Italic_cff); } - if (!strcmp(name, "Times-Bold")) { RETURN(NimbusRoman_Bold_cff); } - if (!strcmp(name, "Times-BoldItalic")) { RETURN(NimbusRoman_BoldItalic_cff); } - if (!strcmp(name, "Symbol")) { RETURN(StandardSymbolsPS_cff); } - if (!strcmp(name, "ZapfDingbats")) { RETURN(Dingbats_cff); } + if (!strcmp(name, "Courier")) RETURN(NimbusMonoPS_Regular_cff); + if (!strcmp(name, "Courier-Oblique")) RETURN(NimbusMonoPS_Italic_cff); + if (!strcmp(name, "Courier-Bold")) RETURN(NimbusMonoPS_Bold_cff); + if (!strcmp(name, "Courier-BoldOblique")) RETURN(NimbusMonoPS_BoldItalic_cff); + if (!strcmp(name, "Helvetica")) RETURN(NimbusSans_Regular_cff); + if (!strcmp(name, "Helvetica-Oblique")) RETURN(NimbusSans_Oblique_cff); + if (!strcmp(name, "Helvetica-Bold")) RETURN(NimbusSans_Bold_cff); + if (!strcmp(name, "Helvetica-BoldOblique")) RETURN(NimbusSans_BoldOblique_cff); + if (!strcmp(name, "Times-Roman")) RETURN(NimbusRoman_Regular_cff); + if (!strcmp(name, "Times-Italic")) RETURN(NimbusRoman_Italic_cff); + if (!strcmp(name, "Times-Bold")) RETURN(NimbusRoman_Bold_cff); + if (!strcmp(name, "Times-BoldItalic")) RETURN(NimbusRoman_BoldItalic_cff); + if (!strcmp(name, "Symbol")) RETURN(StandardSymbolsPS_cff); + if (!strcmp(name, "ZapfDingbats")) RETURN(Dingbats_cff); #endif return *size = 0, NULL; } #define FAMILY(R, I, B, BI) \ if (!is_bold) { \ - if (!is_italic) { RETURN(R); } else { RETURN(I); } \ + if (!is_italic) RETURN(R); else RETURN(I); \ } else { \ - if (!is_italic) { RETURN(B); } else { RETURN(BI); } \ + if (!is_italic) RETURN(B); else RETURN(BI); \ } const char * @@ -140,20 +142,16 @@ fz_lookup_cjk_font(fz_context *ctx, int registry, int serif, int wmode, int *siz #endif } -#define Noto(SANS) { RETURN(Noto ## SANS ## _Regular_ttf); } break +#define Noto(SANS) RETURN(Noto ## SANS ## _Regular_ttf) #define Noto2(SANS,SERIF) \ - if (serif) { RETURN(Noto ## SERIF ## _Regular_ttf); } \ - else { RETURN(Noto ## SANS ## _Regular_ttf); } \ - break - -#define Noto3(SANS,SERIF,UNUSED) \ - Noto2(SANS,SERIF) + if (serif) { RETURN(Noto ## SERIF ## _Regular_ttf); } else { RETURN(Noto ## SANS ## _Regular_ttf); } const char * -fz_lookup_noto_font(fz_context *ctx, int script, int serif, int *size) +fz_lookup_noto_font(fz_context *ctx, int script, int language, int serif, int *size) { - /* Unused Noto fonts: NastaliqUrdu, SansSyriacEstrangela */ + /* TODO: Noto(SansSyriacEstrangela); */ + /* TODO: Noto(SansSyriacWestern); */ switch (script) { @@ -171,141 +169,152 @@ fz_lookup_noto_font(fz_context *ctx, int script, int serif, int *size) case UCDN_SCRIPT_BOPOMOFO: return fz_lookup_cjk_font(ctx, FZ_ADOBE_GB_1, serif, 0, size, NULL); case UCDN_SCRIPT_HAN: - return fz_lookup_cjk_font(ctx, FZ_ADOBE_GB_1, serif, 0, size, NULL); + switch (language) + { + case FZ_LANG_ja: return fz_lookup_cjk_font(ctx, FZ_ADOBE_JAPAN_1, serif, 0, size, NULL); + case FZ_LANG_ko: return fz_lookup_cjk_font(ctx, FZ_ADOBE_KOREA_1, serif, 0, size, NULL); + case FZ_LANG_zh_Hant: return fz_lookup_cjk_font(ctx, FZ_ADOBE_CNS_1, serif, 0, size, NULL); + default: + case FZ_LANG_zh_Hans: return fz_lookup_cjk_font(ctx, FZ_ADOBE_GB_1, serif, 0, size, NULL); + } #ifndef TOFU - -#ifndef TOFU_HISTORIC - case UCDN_SCRIPT_IMPERIAL_ARAMAIC: Noto(SansImperialAramaic); - case UCDN_SCRIPT_AVESTAN: Noto(SansAvestan); - case UCDN_SCRIPT_CARIAN: Noto(SansCarian); - case UCDN_SCRIPT_CYPRIOT: Noto(SansCypriot); - case UCDN_SCRIPT_EGYPTIAN_HIEROGLYPHS: Noto(SansEgyptianHieroglyphs); - case UCDN_SCRIPT_GLAGOLITIC: Noto(SansGlagolitic); - case UCDN_SCRIPT_GOTHIC: Noto(SansGothic); - case UCDN_SCRIPT_OLD_ITALIC: Noto(SansOldItalic); - case UCDN_SCRIPT_KHAROSHTHI: Noto(SansKharoshthi); - case UCDN_SCRIPT_KAITHI: Noto(SansKaithi); - case UCDN_SCRIPT_LINEAR_B: Noto(SansLinearB); - case UCDN_SCRIPT_LYCIAN: Noto(SansLycian); - case UCDN_SCRIPT_LYDIAN: Noto(SansLydian); - case UCDN_SCRIPT_OGHAM: Noto(SansOgham); - case UCDN_SCRIPT_OLD_TURKIC: Noto(SansOldTurkic); - case UCDN_SCRIPT_PHAGS_PA: Noto(SansPhagsPa); - case UCDN_SCRIPT_INSCRIPTIONAL_PAHLAVI: Noto(SansInscriptionalPahlavi); - case UCDN_SCRIPT_INSCRIPTIONAL_PARTHIAN: Noto(SansInscriptionalParthian); - case UCDN_SCRIPT_RUNIC: Noto(SansRunic); - case UCDN_SCRIPT_OLD_SOUTH_ARABIAN: Noto(SansOldSouthArabian); - case UCDN_SCRIPT_UGARITIC: Noto(SansUgaritic); - case UCDN_SCRIPT_OLD_PERSIAN: Noto(SansOldPersian); - case UCDN_SCRIPT_CUNEIFORM: Noto(SansCuneiform); - case UCDN_SCRIPT_COPTIC: Noto(SansCoptic); -#endif - case UCDN_SCRIPT_LATIN: Noto2(Sans, Serif); case UCDN_SCRIPT_GREEK: Noto2(Sans, Serif); case UCDN_SCRIPT_CYRILLIC: Noto2(Sans, Serif); + + case UCDN_SCRIPT_ARABIC: + if (language == FZ_LANG_ur || language == FZ_LANG_urd) + Noto(NastaliqUrdu); + Noto2(KufiArabic, NaskhArabic); + case UCDN_SCRIPT_ARMENIAN: Noto2(SansArmenian, SerifArmenian); - case UCDN_SCRIPT_HEBREW: Noto(SansHebrew); - case UCDN_SCRIPT_ARABIC: Noto3(KufiArabic, NaskhArabic, NastaliqUrdu); - case UCDN_SCRIPT_SYRIAC: Noto3(SansSyriacEastern, SansSyriacWestern, SansSyriacEstrangela); - case UCDN_SCRIPT_THAANA: Noto(SansThaana); - case UCDN_SCRIPT_DEVANAGARI: Noto(SansDevanagari); + case UCDN_SCRIPT_BALINESE: Noto(SansBalinese); + case UCDN_SCRIPT_BAMUM: Noto(SansBamum); + case UCDN_SCRIPT_BATAK: Noto(SansBatak); case UCDN_SCRIPT_BENGALI: Noto2(SansBengali, SerifBengali); - case UCDN_SCRIPT_GURMUKHI: Noto(SansGurmukhi); + case UCDN_SCRIPT_CANADIAN_ABORIGINAL: Noto(SansCanadianAboriginal); + case UCDN_SCRIPT_CHAM: Noto(SansCham); + case UCDN_SCRIPT_CHEROKEE: Noto(SansCherokee); + case UCDN_SCRIPT_DEVANAGARI: Noto(SansDevanagari); + case UCDN_SCRIPT_ETHIOPIC: Noto(SansEthiopic); + case UCDN_SCRIPT_GEORGIAN: Noto2(SansGeorgian, SerifGeorgian); case UCDN_SCRIPT_GUJARATI: Noto2(SansGujarati, SerifGujarati); - case UCDN_SCRIPT_ORIYA: Noto(SansOriya); - case UCDN_SCRIPT_TAMIL: Noto2(SansTamil, SerifTamil); - case UCDN_SCRIPT_TELUGU: Noto2(SansTelugu, SerifTelugu); + case UCDN_SCRIPT_GURMUKHI: Noto(SansGurmukhi); + case UCDN_SCRIPT_HEBREW: Noto(SansHebrew); + case UCDN_SCRIPT_JAVANESE: Noto(SansJavanese); case UCDN_SCRIPT_KANNADA: Noto2(SansKannada, SerifKannada); + case UCDN_SCRIPT_KAYAH_LI: Noto(SansKayahLi); + case UCDN_SCRIPT_KHMER: Noto2(SansKhmer, SerifKhmer); + case UCDN_SCRIPT_LAO: Noto2(SansLao, SerifLao); + case UCDN_SCRIPT_LEPCHA: Noto(SansLepcha); + case UCDN_SCRIPT_LIMBU: Noto(SansLimbu); + case UCDN_SCRIPT_LISU: Noto(SansLisu); case UCDN_SCRIPT_MALAYALAM: Noto2(SansMalayalam, SerifMalayalam); + case UCDN_SCRIPT_MANDAIC: Noto(SansMandaic); + case UCDN_SCRIPT_MEETEI_MAYEK: Noto(SansMeeteiMayek); + case UCDN_SCRIPT_MONGOLIAN: Noto(SansMongolian); + case UCDN_SCRIPT_MYANMAR: Noto(SansMyanmar); + case UCDN_SCRIPT_NEW_TAI_LUE: Noto(SansNewTaiLue); + case UCDN_SCRIPT_NKO: Noto(SansNKo); + case UCDN_SCRIPT_OL_CHIKI: Noto(SansOlChiki); + case UCDN_SCRIPT_ORIYA: Noto(SansOriya); + case UCDN_SCRIPT_SAURASHTRA: Noto(SansSaurashtra); case UCDN_SCRIPT_SINHALA: Noto(SansSinhala); + case UCDN_SCRIPT_SUNDANESE: Noto(SansSundanese); + case UCDN_SCRIPT_SYLOTI_NAGRI: Noto(SansSylotiNagri); + case UCDN_SCRIPT_SYRIAC: Noto(SansSyriacEastern); + case UCDN_SCRIPT_TAI_LE: Noto(SansTaiLe); + case UCDN_SCRIPT_TAI_THAM: Noto(SansTaiTham); + case UCDN_SCRIPT_TAI_VIET: Noto(SansTaiViet); + case UCDN_SCRIPT_TAMIL: Noto2(SansTamil, SerifTamil); + case UCDN_SCRIPT_TELUGU: Noto2(SansTelugu, SerifTelugu); + case UCDN_SCRIPT_THAANA: Noto(SansThaana); case UCDN_SCRIPT_THAI: Noto2(SansThai, SerifThai); - case UCDN_SCRIPT_LAO: Noto2(SansLao, SerifLao); case UCDN_SCRIPT_TIBETAN: Noto(SansTibetan); - case UCDN_SCRIPT_MYANMAR: Noto(SansMyanmar); - case UCDN_SCRIPT_GEORGIAN: Noto2(SansGeorgian, SerifGeorgian); - case UCDN_SCRIPT_ETHIOPIC: Noto(SansEthiopic); - case UCDN_SCRIPT_CHEROKEE: Noto(SansCherokee); - case UCDN_SCRIPT_CANADIAN_ABORIGINAL: Noto(SansCanadianAboriginal); - case UCDN_SCRIPT_KHMER: Noto2(SansKhmer, SerifKhmer); - case UCDN_SCRIPT_MONGOLIAN: Noto(SansMongolian); + case UCDN_SCRIPT_TIFINAGH: Noto(SansTifinagh); + case UCDN_SCRIPT_VAI: Noto(SansVai); case UCDN_SCRIPT_YI: Noto(SansYi); + +#ifndef TOFU_HISTORIC + case UCDN_SCRIPT_AVESTAN: Noto(SansAvestan); + case UCDN_SCRIPT_BRAHMI: Noto(SansBrahmi); + case UCDN_SCRIPT_BUGINESE: Noto(SansBuginese); + case UCDN_SCRIPT_BUHID: Noto(SansBuhid); + case UCDN_SCRIPT_CARIAN: Noto(SansCarian); + case UCDN_SCRIPT_COPTIC: Noto(SansCoptic); + case UCDN_SCRIPT_CUNEIFORM: Noto(SansCuneiform); + case UCDN_SCRIPT_CYPRIOT: Noto(SansCypriot); case UCDN_SCRIPT_DESERET: Noto(SansDeseret); - case UCDN_SCRIPT_TAGALOG: Noto(SansTagalog); + case UCDN_SCRIPT_EGYPTIAN_HIEROGLYPHS: Noto(SansEgyptianHieroglyphs); + case UCDN_SCRIPT_GLAGOLITIC: Noto(SansGlagolitic); + case UCDN_SCRIPT_GOTHIC: Noto(SansGothic); case UCDN_SCRIPT_HANUNOO: Noto(SansHanunoo); - case UCDN_SCRIPT_BUHID: Noto(SansBuhid); - case UCDN_SCRIPT_TAGBANWA: Noto(SansTagbanwa); - case UCDN_SCRIPT_LIMBU: Noto(SansLimbu); - case UCDN_SCRIPT_TAI_LE: Noto(SansTaiLe); - case UCDN_SCRIPT_SHAVIAN: Noto(SansShavian); + case UCDN_SCRIPT_IMPERIAL_ARAMAIC: Noto(SansImperialAramaic); + case UCDN_SCRIPT_INSCRIPTIONAL_PAHLAVI: Noto(SansInscriptionalPahlavi); + case UCDN_SCRIPT_INSCRIPTIONAL_PARTHIAN: Noto(SansInscriptionalParthian); + case UCDN_SCRIPT_KAITHI: Noto(SansKaithi); + case UCDN_SCRIPT_KHAROSHTHI: Noto(SansKharoshthi); + case UCDN_SCRIPT_LINEAR_B: Noto(SansLinearB); + case UCDN_SCRIPT_LYCIAN: Noto(SansLycian); + case UCDN_SCRIPT_LYDIAN: Noto(SansLydian); + case UCDN_SCRIPT_OGHAM: Noto(SansOgham); + case UCDN_SCRIPT_OLD_ITALIC: Noto(SansOldItalic); + case UCDN_SCRIPT_OLD_PERSIAN: Noto(SansOldPersian); + case UCDN_SCRIPT_OLD_SOUTH_ARABIAN: Noto(SansOldSouthArabian); + case UCDN_SCRIPT_OLD_TURKIC: Noto(SansOldTurkic); case UCDN_SCRIPT_OSMANYA: Noto(SansOsmanya); - case UCDN_SCRIPT_BUGINESE: Noto(SansBuginese); - case UCDN_SCRIPT_NEW_TAI_LUE: Noto(SansNewTaiLue); - case UCDN_SCRIPT_TIFINAGH: Noto(SansTifinagh); - case UCDN_SCRIPT_SYLOTI_NAGRI: Noto(SansSylotiNagri); - case UCDN_SCRIPT_BALINESE: Noto(SansBalinese); + case UCDN_SCRIPT_PHAGS_PA: Noto(SansPhagsPa); case UCDN_SCRIPT_PHOENICIAN: Noto(SansPhoenician); - case UCDN_SCRIPT_NKO: Noto(SansNKo); - case UCDN_SCRIPT_SUNDANESE: Noto(SansSundanese); - case UCDN_SCRIPT_LEPCHA: Noto(SansLepcha); - case UCDN_SCRIPT_OL_CHIKI: Noto(SansOlChiki); - case UCDN_SCRIPT_VAI: Noto(SansVai); - case UCDN_SCRIPT_SAURASHTRA: Noto(SansSaurashtra); - case UCDN_SCRIPT_KAYAH_LI: Noto(SansKayahLi); case UCDN_SCRIPT_REJANG: Noto(SansRejang); - case UCDN_SCRIPT_CHAM: Noto(SansCham); - case UCDN_SCRIPT_TAI_THAM: Noto(SansTaiTham); - case UCDN_SCRIPT_TAI_VIET: Noto(SansTaiViet); + case UCDN_SCRIPT_RUNIC: Noto(SansRunic); case UCDN_SCRIPT_SAMARITAN: Noto(SansSamaritan); - case UCDN_SCRIPT_LISU: Noto(SansLisu); - case UCDN_SCRIPT_BAMUM: Noto(SansBamum); - case UCDN_SCRIPT_JAVANESE: Noto(SansJavanese); - case UCDN_SCRIPT_MEETEI_MAYEK: Noto(SansMeeteiMayek); - case UCDN_SCRIPT_BATAK: Noto(SansBatak); - case UCDN_SCRIPT_BRAHMI: Noto(SansBrahmi); - case UCDN_SCRIPT_MANDAIC: Noto(SansMandaic); + case UCDN_SCRIPT_SHAVIAN: Noto(SansShavian); + case UCDN_SCRIPT_TAGALOG: Noto(SansTagalog); + case UCDN_SCRIPT_TAGBANWA: Noto(SansTagbanwa); + case UCDN_SCRIPT_UGARITIC: Noto(SansUgaritic); +#endif /* No fonts available for these scripts: */ + case UCDN_SCRIPT_BRAILLE: /* no dedicated font; fallback to NotoSansSymbols will cover this */ + case UCDN_SCRIPT_CHAKMA: break; + case UCDN_SCRIPT_MIAO: break; #ifndef TOFU_HISTORIC case UCDN_SCRIPT_AHOM: break; + case UCDN_SCRIPT_ANATOLIAN_HIEROGLYPHS: break; case UCDN_SCRIPT_BASSA_VAH: break; + case UCDN_SCRIPT_CAUCASIAN_ALBANIAN: break; + case UCDN_SCRIPT_DUPLOYAN: break; case UCDN_SCRIPT_ELBASAN: break; case UCDN_SCRIPT_GRANTHA: break; case UCDN_SCRIPT_HATRAN: break; - case UCDN_SCRIPT_ANATOLIAN_HIEROGLYPHS: break; - case UCDN_SCRIPT_OLD_HUNGARIAN: break; case UCDN_SCRIPT_KHOJKI: break; + case UCDN_SCRIPT_KHUDAWADI: break; case UCDN_SCRIPT_LINEAR_A: break; case UCDN_SCRIPT_MAHAJANI: break; case UCDN_SCRIPT_MANICHAEAN: break; + case UCDN_SCRIPT_MENDE_KIKAKUI: break; case UCDN_SCRIPT_MEROITIC_CURSIVE: break; case UCDN_SCRIPT_MEROITIC_HIEROGLYPHS: break; case UCDN_SCRIPT_MODI: break; + case UCDN_SCRIPT_MRO: break; case UCDN_SCRIPT_MULTANI: break; - case UCDN_SCRIPT_OLD_NORTH_ARABIAN: break; case UCDN_SCRIPT_NABATAEAN: break; - case UCDN_SCRIPT_PALMYRENE: break; + case UCDN_SCRIPT_OLD_HUNGARIAN: break; + case UCDN_SCRIPT_OLD_NORTH_ARABIAN: break; case UCDN_SCRIPT_OLD_PERMIC: break; + case UCDN_SCRIPT_PAHAWH_HMONG: break; + case UCDN_SCRIPT_PALMYRENE: break; + case UCDN_SCRIPT_PAU_CIN_HAU: break; case UCDN_SCRIPT_PSALTER_PAHLAVI: break; - case UCDN_SCRIPT_SIDDHAM: break; -#endif - case UCDN_SCRIPT_BRAILLE: break; /* no dedicated font */ - case UCDN_SCRIPT_CHAKMA: break; - case UCDN_SCRIPT_MIAO: break; case UCDN_SCRIPT_SHARADA: break; + case UCDN_SCRIPT_SIDDHAM: break; + case UCDN_SCRIPT_SIGNWRITING: break; case UCDN_SCRIPT_SORA_SOMPENG: break; case UCDN_SCRIPT_TAKRI: break; - case UCDN_SCRIPT_CAUCASIAN_ALBANIAN: break; - case UCDN_SCRIPT_DUPLOYAN: break; - case UCDN_SCRIPT_KHUDAWADI: break; - case UCDN_SCRIPT_MENDE_KIKAKUI: break; - case UCDN_SCRIPT_MRO: break; - case UCDN_SCRIPT_PAHAWH_HMONG: break; - case UCDN_SCRIPT_PAU_CIN_HAU: break; case UCDN_SCRIPT_TIRHUTA: break; case UCDN_SCRIPT_WARANG_CITI: break; - case UCDN_SCRIPT_SIGNWRITING: break; +#endif #endif } diff --git a/source/fitz/text.c b/source/fitz/text.c index 76838fe8..a0e2abc8 100644 --- a/source/fitz/text.c +++ b/source/fitz/text.c @@ -114,7 +114,7 @@ fz_show_string(fz_context *ctx, fz_text *text, fz_font *user_font, fz_matrix *tr while (*s) { s += fz_chartorune(&ucs, s); - gid = fz_encode_character_with_fallback(ctx, user_font, ucs, 0, &font); + gid = fz_encode_character_with_fallback(ctx, user_font, ucs, 0, language, &font); fz_show_glyph(ctx, text, font, trm, gid, ucs, wmode, bidi_level, markup_dir, language); adv = fz_advance_glyph(ctx, font, gid, wmode); if (wmode == 0) @@ -227,6 +227,16 @@ fz_text_language fz_text_language_from_string(const char *str) if (str == NULL) return FZ_LANG_UNSET; + if (!strcmp(str, "zh-Hant") || + !strcmp(str, "zh-HK") || + !strcmp(str, "zh-MO") || + !strcmp(str, "zh-SG") || + !strcmp(str, "zh-TW")) + return FZ_LANG_zh_Hant; + if (!strcmp(str, "zh-Hans") || + !strcmp(str, "zh-CN")) + return FZ_LANG_zh_Hans; + /* 1st char */ if (str[0] >= 'a' && str[0] <= 'z') lang = str[0] - 'a' + 1; @@ -254,23 +264,30 @@ fz_text_language fz_text_language_from_string(const char *str) return lang; } -char *fz_string_from_text_language(char str[4], fz_text_language lang) +char *fz_string_from_text_language(char str[8], fz_text_language lang) { int c; - /* str is supposed to be at least 4 chars in size */ + /* str is supposed to be at least 8 chars in size */ if (str == NULL) return NULL; - c = lang % 27; - lang = lang / 27; - str[0] = c == 0 ? 0 : c - 1 + 'a'; - c = lang % 27; - lang = lang / 27; - str[1] = c == 0 ? 0 : c - 1 + 'a'; - c = lang % 27; - str[2] = c == 0 ? 0 : c - 1 + 'a'; - str[3] = 0; + if (lang == FZ_LANG_zh_Hant) + fz_strlcpy(str, "zh-Hant", 8); + else if (lang == FZ_LANG_zh_Hans) + fz_strlcpy(str, "zh-Hans", 8); + else + { + c = lang % 27; + lang = lang / 27; + str[0] = c == 0 ? 0 : c - 1 + 'a'; + c = lang % 27; + lang = lang / 27; + str[1] = c == 0 ? 0 : c - 1 + 'a'; + c = lang % 27; + str[2] = c == 0 ? 0 : c - 1 + 'a'; + str[3] = 0; + } return str; } diff --git a/source/html/html-layout.c b/source/html/html-layout.c index 95d6151a..e671c64d 100644 --- a/source/html/html-layout.c +++ b/source/html/html-layout.c @@ -154,6 +154,7 @@ static fz_html_flow *add_flow(fz_context *ctx, fz_pool *pool, fz_html *top, fz_h flow->type = type; flow->expand = 0; flow->bidi_level = 0; + flow->markup_lang = 0; flow->breaks_line = 0; flow->box = inline_box; *top->flow_tail = flow; @@ -182,12 +183,13 @@ static void add_flow_shyphen(fz_context *ctx, fz_pool *pool, fz_html *top, fz_ht (void)add_flow(ctx, pool, top, inline_box, FLOW_SHYPHEN); } -static void add_flow_word(fz_context *ctx, fz_pool *pool, fz_html *top, fz_html *inline_box, const char *a, const char *b) +static void add_flow_word(fz_context *ctx, fz_pool *pool, fz_html *top, fz_html *inline_box, const char *a, const char *b, int lang) { fz_html_flow *flow = add_flow(ctx, pool, top, inline_box, FLOW_WORD); flow->content.text = fz_pool_alloc(ctx, pool, b - a + 1); memcpy(flow->content.text, a, b - a); flow->content.text[b - a] = 0; + flow->markup_lang = lang; } static void add_flow_image(fz_context *ctx, fz_pool *pool, fz_html *top, fz_html *inline_box, fz_image *img) @@ -223,7 +225,7 @@ static fz_html_flow *split_flow(fz_context *ctx, fz_pool *pool, fz_html_flow *fl return new_flow; } -static void flush_space(fz_context *ctx, fz_pool *pool, fz_html *flow, fz_html *inline_box, struct genstate *g) +static void flush_space(fz_context *ctx, fz_pool *pool, fz_html *flow, fz_html *inline_box, int lang, struct genstate *g) { static const char *space = " "; int bsp = inline_box->style.white_space & WS_ALLOW_BREAK_SPACE; @@ -234,7 +236,7 @@ static void flush_space(fz_context *ctx, fz_pool *pool, fz_html *flow, fz_html * if (bsp) add_flow_space(ctx, pool, flow, inline_box); else - add_flow_word(ctx, pool, flow, inline_box, space, space+1); + add_flow_word(ctx, pool, flow, inline_box, space, space+1, lang); } g->emit_white = 0; } @@ -276,7 +278,7 @@ static const char *pairbrk[29] = "_^^%%%^^^_______%%__^^^_____%", /* RI regional indicator */ }; -static void generate_text(fz_context *ctx, fz_pool *pool, fz_html *box, const char *text, struct genstate *g) +static void generate_text(fz_context *ctx, fz_pool *pool, fz_html *box, const char *text, int lang, struct genstate *g) { fz_html *flow; @@ -319,7 +321,7 @@ static void generate_text(fz_context *ctx, fz_pool *pool, fz_html *box, const ch if (bsp) add_flow_space(ctx, pool, flow, box); else - add_flow_word(ctx, pool, flow, box, space, space+1); + add_flow_word(ctx, pool, flow, box, space, space+1, lang); ++text; } g->last_brk_cls = UCDN_LINEBREAK_CLASS_WJ; /* don't add sbreaks after a space */ @@ -329,7 +331,7 @@ static void generate_text(fz_context *ctx, fz_pool *pool, fz_html *box, const ch const char *prev, *mark = text; int c; - flush_space(ctx, pool, flow, box, g); + flush_space(ctx, pool, flow, box, lang, g); if (g->at_bol) g->last_brk_cls = UCDN_LINEBREAK_CLASS_WJ; @@ -341,7 +343,7 @@ static void generate_text(fz_context *ctx, fz_pool *pool, fz_html *box, const ch if (c == 0xAD) /* soft hyphen */ { if (mark != prev) - add_flow_word(ctx, pool, flow, box, mark, prev); + add_flow_word(ctx, pool, flow, box, mark, prev, lang); add_flow_shyphen(ctx, pool, flow, box); mark = text; g->last_brk_cls = UCDN_LINEBREAK_CLASS_WJ; /* don't add sbreaks after a soft hyphen */ @@ -361,7 +363,7 @@ static void generate_text(fz_context *ctx, fz_pool *pool, fz_html *box, const ch if (brk == '_') { if (mark != prev) - add_flow_word(ctx, pool, flow, box, mark, prev); + add_flow_word(ctx, pool, flow, box, mark, prev, lang); add_flow_sbreak(ctx, pool, flow, box); mark = prev; } @@ -371,7 +373,7 @@ static void generate_text(fz_context *ctx, fz_pool *pool, fz_html *box, const ch } } if (mark != text) - add_flow_word(ctx, pool, flow, box, mark, text); + add_flow_word(ctx, pool, flow, box, mark, text, lang); g->at_bol = 0; } @@ -420,12 +422,12 @@ static void generate_image(fz_context *ctx, fz_pool *pool, fz_html *box, fz_imag while (flow->type != BOX_FLOW) flow = flow->up; - flush_space(ctx, pool, flow, box, g); + flush_space(ctx, pool, flow, box, 0, g); if (!img) { const char *alt = "[image]"; - add_flow_word(ctx, pool, flow, box, alt, alt + 7); + add_flow_word(ctx, pool, flow, box, alt, alt + 7, 0); } else { @@ -578,7 +580,7 @@ static void insert_inline_box(fz_context *ctx, fz_pool *pool, fz_html *box, fz_h } static void generate_boxes(fz_context *ctx, fz_xml *node, fz_html *top, - fz_css_match *up_match, int list_counter, int markup_dir, struct genstate *g) + fz_css_match *up_match, int list_counter, int markup_dir, int markup_lang, struct genstate *g) { fz_css_match match; fz_html *box; @@ -656,9 +658,11 @@ static void generate_boxes(fz_context *ctx, fz_xml *node, fz_html *top, else if (display != DIS_NONE) { + const char *dir, *lang; int child_dir = markup_dir; + int child_lang = markup_lang; - const char *dir = fz_xml_att(node, "dir"); + dir = fz_xml_att(node, "dir"); if (dir) { if (!strcmp(dir, "auto")) @@ -671,6 +675,10 @@ static void generate_boxes(fz_context *ctx, fz_xml *node, fz_html *top, child_dir = DEFAULT_DIR; } + lang = fz_xml_att(node, "lang"); + if (lang) + child_lang = fz_text_language_from_string(lang); + box = new_box(ctx, g->pool, child_dir); fz_apply_css_style(ctx, g->set, &box->style, &match); @@ -698,7 +706,7 @@ static void generate_boxes(fz_context *ctx, fz_xml *node, fz_html *top, int child_counter = list_counter; if (!strcmp(tag, "ul") || !strcmp(tag, "ol")) child_counter = 0; - generate_boxes(ctx, fz_xml_down(node), box, &match, child_counter, child_dir, g); + generate_boxes(ctx, fz_xml_down(node), box, &match, child_counter, child_dir, child_lang, g); } } } @@ -721,11 +729,11 @@ static void generate_boxes(fz_context *ctx, fz_xml *node, fz_html *top, /* Make sure not to recursively multiply font sizes. */ box->style.font_size.value = 1; box->style.font_size.unit = N_SCALE; - generate_text(ctx, g->pool, box, text, g); + generate_text(ctx, g->pool, box, text, markup_lang, g); } else { - generate_text(ctx, g->pool, top, text, g); + generate_text(ctx, g->pool, top, text, markup_lang, g); } } } @@ -760,6 +768,7 @@ typedef struct string_walker const char *s; fz_font *base_font; int script; + int language; fz_font *font; fz_font *next_font; hb_glyph_position_t *glyph_pos; @@ -813,7 +822,7 @@ static int quick_ligature(fz_context *ctx, string_walker *walker, unsigned int i return walker->glyph_info[i].codepoint; } -static void init_string_walker(fz_context *ctx, string_walker *walker, hb_buffer_t *hb_buf, int rtl, fz_font *font, int script, const char *text) +static void init_string_walker(fz_context *ctx, string_walker *walker, hb_buffer_t *hb_buf, int rtl, fz_font *font, int script, int language, const char *text) { walker->ctx = ctx; walker->hb_buf = hb_buf; @@ -823,6 +832,7 @@ static void init_string_walker(fz_context *ctx, string_walker *walker, hb_buffer walker->s = text; walker->base_font = font; walker->script = script; + walker->language = language; walker->font = NULL; walker->next_font = NULL; } @@ -835,6 +845,7 @@ static int walk_string(string_walker *walker) FT_Face face; int fterr; int quickshape; + char lang[8]; walker->start = walker->end; walker->end = walker->s; @@ -850,7 +861,7 @@ static int walk_string(string_walker *walker) int c; walker->s += fz_chartorune(&c, walker->s); - (void)fz_encode_character_with_fallback(ctx, walker->base_font, c, walker->script, &walker->next_font); + (void)fz_encode_character_with_fallback(ctx, walker->base_font, c, walker->script, walker->language, &walker->next_font); if (walker->next_font != walker->font) { if (walker->font != NULL) @@ -876,8 +887,12 @@ static int walk_string(string_walker *walker) hb_buffer_clear_contents(walker->hb_buf); hb_buffer_set_direction(walker->hb_buf, walker->rtl ? HB_DIRECTION_RTL : HB_DIRECTION_LTR); - /* hb_buffer_set_script(hb_buf, hb_ucdn_script_translate(script)); */ - /* hb_buffer_set_language(hb_buf, hb_language_from_string("en", strlen("en"))); */ + /* hb_buffer_set_script(walker->hb_buf, hb_ucdn_script_translate(walker->script)); */ + if (walker->language) + { + fz_string_from_text_language(lang, walker->language); + hb_buffer_set_language(walker->hb_buf, hb_language_from_string(lang, strlen(lang))); + } /* hb_buffer_set_cluster_level(hb_buf, HB_BUFFER_CLUSTER_LEVEL_CHARACTERS); */ hb_buffer_add_utf8(walker->hb_buf, walker->start, walker->end - walker->start, 0, -1); @@ -955,7 +970,7 @@ static void measure_string(fz_context *ctx, fz_html_flow *node, hb_buffer_t *hb_ node->h = fz_from_css_number_scale(node->box->style.line_height, em, em, em); s = get_node_text(ctx, node); - init_string_walker(ctx, &walker, hb_buf, node->bidi_level & 1, node->box->style.font, node->script, s); + init_string_walker(ctx, &walker, hb_buf, node->bidi_level & 1, node->box->style.font, node->script, node->markup_lang, s); while (walk_string(&walker)) { int x = 0; @@ -1488,7 +1503,7 @@ static void draw_flow_box(fz_context *ctx, fz_html *box, float page_top, float p trm.f = y; s = get_node_text(ctx, node); - init_string_walker(ctx, &walker, hb_buf, node->bidi_level & 1, style->font, node->script, s); + init_string_walker(ctx, &walker, hb_buf, node->bidi_level & 1, style->font, node->script, node->markup_lang, s); while (walk_string(&walker)) { float node_scale = node->box->em / walker.scale; @@ -1726,7 +1741,7 @@ static void draw_list_mark(fz_context *ctx, fz_html *box, float page_top, float while (*s) { s += fz_chartorune(&c, s); - g = fz_encode_character_with_fallback(ctx, box->style.font, c, UCDN_SCRIPT_LATIN, &font); + g = fz_encode_character_with_fallback(ctx, box->style.font, c, UCDN_SCRIPT_LATIN, FZ_LANG_UNSET, &font); w += fz_advance_glyph(ctx, font, g, 0) * box->em; } @@ -1736,7 +1751,7 @@ static void draw_list_mark(fz_context *ctx, fz_html *box, float page_top, float while (*s) { s += fz_chartorune(&c, s); - g = fz_encode_character_with_fallback(ctx, box->style.font, c, UCDN_SCRIPT_LATIN, &font); + g = fz_encode_character_with_fallback(ctx, box->style.font, c, UCDN_SCRIPT_LATIN, FZ_LANG_UNSET, &font); fz_show_glyph(ctx, text, font, &trm, g, c, 0, 0, FZ_BIDI_NEUTRAL, FZ_LANG_UNSET); trm.e += fz_advance_glyph(ctx, font, g, 0) * box->em; } @@ -2301,7 +2316,7 @@ fz_parse_html(fz_context *ctx, fz_html_font_set *set, fz_archive *zip, const cha fz_apply_css_style(ctx, g.set, &box->style, &match); // TODO: transfer page margins out of this hacky box - generate_boxes(ctx, xml, box, &match, 0, DEFAULT_DIR, &g); + generate_boxes(ctx, xml, box, &match, 0, DEFAULT_DIR, FZ_LANG_UNSET, &g); fz_drop_css(ctx, g.css); fz_drop_xml(ctx, xml); |