From ab62bee35b271f4bf7c73a2cdb7539c20e68b70b Mon Sep 17 00:00:00 2001 From: Sebastian Rasmussen Date: Sat, 29 Oct 2016 14:21:27 +0800 Subject: Update UCDN database to Unicode 9.0.0. --- source/fitz/noto.c | 8 +- source/fitz/ucdn.c | 69 +- source/fitz/unicodedata_db.h | 4436 ++++++++++++++++++++++-------------------- 3 files changed, 2408 insertions(+), 2105 deletions(-) (limited to 'source') diff --git a/source/fitz/noto.c b/source/fitz/noto.c index 16840961..5d467a7c 100644 --- a/source/fitz/noto.c +++ b/source/fitz/noto.c @@ -288,13 +288,16 @@ fz_lookup_noto_font(fz_context *ctx, int script, int language, int serif, int *s #endif /* No fonts available for these scripts: */ - case UCDN_SCRIPT_BRAILLE: /* no dedicated font; fallback to NotoSansSymbols will cover this */ + case UCDN_SCRIPT_ADLAM: break; + case UCDN_SCRIPT_BRAILLE: break; /* no dedicated font; fallback to NotoSansSymbols will cover this */ case UCDN_SCRIPT_CHAKMA: break; case UCDN_SCRIPT_MIAO: break; + case UCDN_SCRIPT_NEWA: break; #ifndef TOFU_HISTORIC case UCDN_SCRIPT_AHOM: break; case UCDN_SCRIPT_ANATOLIAN_HIEROGLYPHS: break; case UCDN_SCRIPT_BASSA_VAH: break; + case UCDN_SCRIPT_BHAIKSUKI: break; case UCDN_SCRIPT_CAUCASIAN_ALBANIAN: break; case UCDN_SCRIPT_DUPLOYAN: break; case UCDN_SCRIPT_ELBASAN: break; @@ -305,6 +308,7 @@ fz_lookup_noto_font(fz_context *ctx, int script, int language, int serif, int *s case UCDN_SCRIPT_LINEAR_A: break; case UCDN_SCRIPT_MAHAJANI: break; case UCDN_SCRIPT_MANICHAEAN: break; + case UCDN_SCRIPT_MARCHEN: break; case UCDN_SCRIPT_MENDE_KIKAKUI: break; case UCDN_SCRIPT_MEROITIC_CURSIVE: break; case UCDN_SCRIPT_MEROITIC_HIEROGLYPHS: break; @@ -315,6 +319,7 @@ fz_lookup_noto_font(fz_context *ctx, int script, int language, int serif, int *s case UCDN_SCRIPT_OLD_HUNGARIAN: break; case UCDN_SCRIPT_OLD_NORTH_ARABIAN: break; case UCDN_SCRIPT_OLD_PERMIC: break; + case UCDN_SCRIPT_OSAGE: break; case UCDN_SCRIPT_PAHAWH_HMONG: break; case UCDN_SCRIPT_PALMYRENE: break; case UCDN_SCRIPT_PAU_CIN_HAU: break; @@ -324,6 +329,7 @@ fz_lookup_noto_font(fz_context *ctx, int script, int language, int serif, int *s case UCDN_SCRIPT_SIGNWRITING: break; case UCDN_SCRIPT_SORA_SOMPENG: break; case UCDN_SCRIPT_TAKRI: break; + case UCDN_SCRIPT_TANGUT: break; case UCDN_SCRIPT_TIRHUTA: break; case UCDN_SCRIPT_WARANG_CITI: break; #endif diff --git a/source/fitz/ucdn.c b/source/fitz/ucdn.c index d3dc0376..916d0ade 100644 --- a/source/fitz/ucdn.c +++ b/source/fitz/ucdn.c @@ -22,7 +22,6 @@ typedef struct { unsigned char bidi_class; unsigned char mirrored; unsigned char east_asian_width; - unsigned char normalization_check; unsigned char script; unsigned char linebreak_class; } UCDRecord; @@ -31,6 +30,11 @@ typedef struct { unsigned short from, to; } MirrorPair; +typedef struct { + unsigned short from, to; + unsigned char type; +} BracketPair; + typedef struct { unsigned int start; short count, index; @@ -56,11 +60,11 @@ static const UCDRecord *get_ucd_record(uint32_t code) if (code >= 0x110000) index = 0; else { - index = index0[code >> (SHIFT1+SHIFT2)] << SHIFT1; + index = index0[code >> (SHIFT1+SHIFT2)] << SHIFT1; offset = (code >> SHIFT2) & ((1<= 0x110000) index = 0; else { - index = decomp_index0[code >> (DECOMP_SHIFT1+DECOMP_SHIFT2)] + index = decomp_index0[code >> (DECOMP_SHIFT1+DECOMP_SHIFT2)] << DECOMP_SHIFT1; offset = (code >> DECOMP_SHIFT2) & ((1<from - mpb->from; } +static int compare_bp(const void *a, const void *b) +{ + BracketPair *bpa = (BracketPair *)a; + BracketPair *bpb = (BracketPair *)b; + return bpa->from - bpb->from; +} + +static BracketPair *search_bp(uint32_t code) +{ + BracketPair bp = {0,0,2}; + BracketPair *res; + + bp.from = code; + res = (BracketPair *) bsearch(&bp, bracket_pairs, BIDI_BRACKET_LEN, + sizeof(BracketPair), compare_bp); + return res; +} + static int hangul_pair_decompose(uint32_t code, uint32_t *a, uint32_t *b) { int si = code - SBASE; @@ -228,9 +250,10 @@ int ucdn_get_resolved_linebreak_class(uint32_t code) case UCDN_LINEBREAK_CLASS_NL: return UCDN_LINEBREAK_CLASS_BK; - } - return record->linebreak_class; + default: + return record->linebreak_class; + } } uint32_t ucdn_mirror(uint32_t code) @@ -242,8 +265,8 @@ uint32_t ucdn_mirror(uint32_t code) return code; mp.from = code; - res = bsearch(&mp, mirror_pairs, BIDI_MIRROR_LEN, sizeof(MirrorPair), - compare_mp); + res = (MirrorPair *) bsearch(&mp, mirror_pairs, BIDI_MIRROR_LEN, + sizeof(MirrorPair), compare_mp); if (res == NULL) return code; @@ -251,6 +274,24 @@ uint32_t ucdn_mirror(uint32_t code) return res->to; } +uint32_t ucdn_paired_bracket(uint32_t code) +{ + BracketPair *res = search_bp(code); + if (res == NULL) + return code; + else + return res->to; +} + +int ucdn_paired_bracket_type(uint32_t code) +{ + BracketPair *res = search_bp(code); + if (res == NULL) + return UCDN_BIDI_PAIRED_BRACKET_TYPE_NONE; + else + return res->type; +} + int ucdn_decompose(uint32_t code, uint32_t *a, uint32_t *b) { const unsigned short *rec; @@ -289,11 +330,11 @@ int ucdn_compose(uint32_t *code, uint32_t a, uint32_t b) return 0; indexi = l * TOTAL_LAST + r; - index = comp_index0[indexi >> (COMP_SHIFT1+COMP_SHIFT2)] << COMP_SHIFT1; + index = comp_index0[indexi >> (COMP_SHIFT1+COMP_SHIFT2)] << COMP_SHIFT1; offset = (indexi >> COMP_SHIFT2) & ((1<