diff options
-rw-r--r-- | include/mupdf/fitz/text.h | 23 | ||||
-rw-r--r-- | platform/java/mupdf_native.c | 4 | ||||
-rw-r--r-- | source/fitz/text.c | 55 | ||||
-rw-r--r-- | source/html/html-layout.c | 2 | ||||
-rw-r--r-- | source/pdf/pdf-appearance.c | 2 | ||||
-rw-r--r-- | source/pdf/pdf-op-run.c | 4 | ||||
-rw-r--r-- | source/tools/murun.c | 4 | ||||
-rw-r--r-- | source/xps/xps-glyphs.c | 2 |
8 files changed, 85 insertions, 11 deletions
diff --git a/include/mupdf/fitz/text.h b/include/mupdf/fitz/text.h index 682517c4..84f72957 100644 --- a/include/mupdf/fitz/text.h +++ b/include/mupdf/fitz/text.h @@ -39,7 +39,7 @@ typedef enum fz_text_direction_e typedef enum fz_text_language_e { - fz_lang_unset = 0 + FZ_LANG_UNSET = 0 /* FIXME: Fill in more */ } fz_text_language; @@ -50,7 +50,7 @@ struct fz_text_span_s int wmode : 1; /* 0 horizontal, 1 vertical */ int bidi_level : 7; /* The bidirectional level of text */ int markup_dir : 2; /* The direction of text as marked in the original document */ - int language : 8; /* The language as marked in the original document */ + int language : 15; /* The language as marked in the original document */ int len, cap; fz_text_item *items; fz_text_span *next; @@ -72,4 +72,23 @@ fz_rect *fz_bound_text(fz_context *ctx, const fz_text *text, const fz_stroke_sta fz_text *fz_clone_text(fz_context *ctx, const fz_text *text); +/* + Convert ISO 639 (639-{1,2,3,5}) language specification + strings losslessly to a 15 bit fz_text_language code. + + No validation is carried out. Obviously invalid (out + of spec) codes will be mapped to FZ_LANG_UNSET, but + well-formed (but undefined) codes will be blithely + accepted. +*/ +fz_text_language fz_text_language_from_string(const char *str); + +/* + Recover ISO 639 (639-{1,2,3,5}) language specification + strings losslessly from a 15 bit fz_text_language code. + + No validation is carried out. See note above. +*/ +char *fz_string_from_text_language(char str[4], fz_text_language lang); + #endif diff --git a/platform/java/mupdf_native.c b/platform/java/mupdf_native.c index f47d9bf6..ee8efd2f 100644 --- a/platform/java/mupdf_native.c +++ b/platform/java/mupdf_native.c @@ -3038,7 +3038,7 @@ FUN(Text_showGlyph)(JNIEnv *env, jobject self, jobject font_, jobject matrix_, j return; fz_try(ctx) - fz_show_glyph(ctx, text, font, &trm, glyph, unicode, wmode); + fz_show_glyph(ctx, text, font, &trm, glyph, unicode, wmode, 0, FZ_DIR_UNSET, FZ_LANG_UNSET); fz_catch(ctx) jni_rethrow(env, ctx); } @@ -3060,7 +3060,7 @@ FUN(Text_showString)(JNIEnv *env, jobject self, jobject font_, jobject matrix_, return; fz_try(ctx) - fz_show_string(ctx, text, font, &trm, string, wmode); + fz_show_string(ctx, text, font, &trm, string, wmode, 0, FZ_DIR_UNSET, FZ_LANG_UNSET); fz_always(ctx) (*env)->ReleaseStringUTFChars(env, string_, string); fz_catch(ctx) diff --git a/source/fitz/text.c b/source/fitz/text.c index 29a4506e..512c671c 100644 --- a/source/fitz/text.c +++ b/source/fitz/text.c @@ -219,3 +219,58 @@ fz_clone_text(fz_context *ctx, const fz_text *text) return new_text; } + +fz_text_language fz_text_language_from_string(const char *str) +{ + fz_text_language lang; + + if (str == NULL) + return FZ_LANG_UNSET; + + /* 1st char */ + if (str[0] >= 'a' && str[0] <= 'z') + lang = str[0] - 'a' + 1; + else if (str[0] >= 'A' && str[0] <= 'Z') + lang = str[0] - 'A' + 1; + else + return 0; + + /* 2nd char */ + if (str[1] >= 'a' && str[1] <= 'z') + lang += 27*(str[1] - 'a' + 1); + else if (str[1] >= 'A' && str[1] <= 'Z') + lang += 27*(str[1] - 'A' + 1); + else + return 0; /* There are no valid 1 char language codes */ + + /* 3nd char */ + if (str[2] >= 'a' && str[2] <= 'z') + lang += 27*27*(str[2] - 'a' + 1); + else if (str[2] >= 'A' && str[2] <= 'Z') + lang += 27*27*(str[2] - 'A' + 1); + + /* We don't support iso 639-6 4 char codes, cos the standard + * has been withdrawn, and no one uses them. */ + return lang; +} + +char *fz_string_from_text_language(char str[4], fz_text_language lang) +{ + int c; + + /* str is supposed to be at least 4 chars in size */ + if (str == NULL) + return NULL; + + c = lang % 27; + lang = lang / 27; + str[0] = c == 0 ? 0 : c - 1 + 'a'; + c = lang % 27; + lang = lang / 27; + str[1] = c == 0 ? 0 : c - 1 + 'a'; + c = lang % 27; + str[2] = c == 0 ? 0 : c - 1 + 'a'; + str[3] = 0; + + return str; +} diff --git a/source/html/html-layout.c b/source/html/html-layout.c index a74354d1..c274460b 100644 --- a/source/html/html-layout.c +++ b/source/html/html-layout.c @@ -1528,7 +1528,7 @@ static void draw_list_mark(fz_context *ctx, fz_html *box, float page_top, float { s += fz_chartorune(&c, s); g = fz_encode_character_with_fallback(ctx, box->style.font, c, UCDN_SCRIPT_LATIN, &font); - fz_show_glyph(ctx, text, font, &trm, g, c, 0, 0, FZ_DIR_UNSET, fz_lang_unset); + fz_show_glyph(ctx, text, font, &trm, g, c, 0, 0, FZ_DIR_UNSET, FZ_LANG_UNSET); trm.e += fz_advance_glyph(ctx, font, g, 0) * box->em; } diff --git a/source/pdf/pdf-appearance.c b/source/pdf/pdf-appearance.c index 9d1442e2..fdca224e 100644 --- a/source/pdf/pdf-appearance.c +++ b/source/pdf/pdf-appearance.c @@ -1918,7 +1918,7 @@ static void add_text(fz_context *ctx, font_info *font_rec, fz_text *text, char * str += n; str_len -= n; gid = fz_encode_character(ctx, font, ucs); - fz_show_glyph(ctx, text, font, &tm, gid, ucs, 0, 0, FZ_DIR_UNSET, fz_lang_unset); + fz_show_glyph(ctx, text, font, &tm, gid, ucs, 0, 0, FZ_DIR_UNSET, FZ_LANG_UNSET); tm.e += fz_advance_glyph(ctx, font, gid, 0) * font_rec->da_rec.font_size; } } diff --git a/source/pdf/pdf-op-run.c b/source/pdf/pdf-op-run.c index 03bca556..962c1845 100644 --- a/source/pdf/pdf-op-run.c +++ b/source/pdf/pdf-op-run.c @@ -943,11 +943,11 @@ pdf_show_char(fz_context *ctx, pdf_run_processor *pr, int cid) fz_union_rect(&pr->text_bbox, &bbox); /* add glyph to textobject */ - fz_show_glyph(ctx, pr->text, fontdesc->font, &trm, gid, ucsbuf[0], fontdesc->wmode, 0, FZ_DIR_UNSET, fz_lang_unset); + fz_show_glyph(ctx, pr->text, fontdesc->font, &trm, gid, ucsbuf[0], fontdesc->wmode, 0, FZ_DIR_UNSET, FZ_LANG_UNSET); /* add filler glyphs for one-to-many unicode mapping */ for (i = 1; i < ucslen; i++) - fz_show_glyph(ctx, pr->text, fontdesc->font, &trm, -1, ucsbuf[i], fontdesc->wmode, 0, FZ_DIR_UNSET, fz_lang_unset); + fz_show_glyph(ctx, pr->text, fontdesc->font, &trm, -1, ucsbuf[i], fontdesc->wmode, 0, FZ_DIR_UNSET, FZ_LANG_UNSET); if (fontdesc->wmode == 0) { diff --git a/source/tools/murun.c b/source/tools/murun.c index 8ff59be7..69fac9bb 100644 --- a/source/tools/murun.c +++ b/source/tools/murun.c @@ -1935,7 +1935,7 @@ static void ffi_Text_showGlyph(js_State *J) int wmode = js_isdefined(J, 5) ? js_toboolean(J, 5) : 0; fz_try(ctx) - fz_show_glyph(ctx, text, font, &trm, glyph, unicode, wmode, 0, FZ_DIR_UNSET, fz_lang_unset); + fz_show_glyph(ctx, text, font, &trm, glyph, unicode, wmode, 0, FZ_DIR_UNSET, FZ_LANG_UNSET); fz_catch(ctx) rethrow(J); } @@ -1950,7 +1950,7 @@ static void ffi_Text_showString(js_State *J) int wmode = js_isdefined(J, 4) ? js_toboolean(J, 4) : 0; fz_try(ctx) - fz_show_string(ctx, text, font, &trm, s, wmode, 0, FZ_DIR_UNSET, fz_lang_unset); + fz_show_string(ctx, text, font, &trm, s, wmode, 0, FZ_DIR_UNSET, FZ_LANG_UNSET); fz_catch(ctx) rethrow(J); diff --git a/source/xps/xps-glyphs.c b/source/xps/xps-glyphs.c index 91c53743..3d0ebfd0 100644 --- a/source/xps/xps-glyphs.c +++ b/source/xps/xps-glyphs.c @@ -452,7 +452,7 @@ xps_parse_glyphs_imp(fz_context *ctx, xps_document *doc, const fz_matrix *ctm, } dir = bidi_level & 1 ? FZ_DIR_R2L : FZ_DIR_L2R; - fz_show_glyph(ctx, text, font, &tm, glyph_index, char_code, is_sideways, bidi_level, dir, fz_lang_unset); + fz_show_glyph(ctx, text, font, &tm, glyph_index, char_code, is_sideways, bidi_level, dir, FZ_LANG_UNSET); x += advance * 0.01f * size; } |