summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobin Watts <robin.watts@artifex.com>2016-03-11 16:10:51 +0000
committerRobin Watts <robin.watts@artifex.com>2016-03-11 16:48:03 +0000
commit3272c66cee887b997171854484fa8a8086884b12 (patch)
tree06c188493903c7d2684d0467c127b0c3efd64ed8
parenta3785935df081674d048655048984bcba09f8387 (diff)
downloadmupdf-3272c66cee887b997171854484fa8a8086884b12.tar.xz
Implement fz_text_language support functions.
Add code to convert to and from fz_text_language codes from ISO 639 language strings. No validation is carried out.
-rw-r--r--include/mupdf/fitz/text.h23
-rw-r--r--platform/java/mupdf_native.c4
-rw-r--r--source/fitz/text.c55
-rw-r--r--source/html/html-layout.c2
-rw-r--r--source/pdf/pdf-appearance.c2
-rw-r--r--source/pdf/pdf-op-run.c4
-rw-r--r--source/tools/murun.c4
-rw-r--r--source/xps/xps-glyphs.c2
8 files changed, 85 insertions, 11 deletions
diff --git a/include/mupdf/fitz/text.h b/include/mupdf/fitz/text.h
index 682517c4..84f72957 100644
--- a/include/mupdf/fitz/text.h
+++ b/include/mupdf/fitz/text.h
@@ -39,7 +39,7 @@ typedef enum fz_text_direction_e
typedef enum fz_text_language_e
{
- fz_lang_unset = 0
+ FZ_LANG_UNSET = 0
/* FIXME: Fill in more */
} fz_text_language;
@@ -50,7 +50,7 @@ struct fz_text_span_s
int wmode : 1; /* 0 horizontal, 1 vertical */
int bidi_level : 7; /* The bidirectional level of text */
int markup_dir : 2; /* The direction of text as marked in the original document */
- int language : 8; /* The language as marked in the original document */
+ int language : 15; /* The language as marked in the original document */
int len, cap;
fz_text_item *items;
fz_text_span *next;
@@ -72,4 +72,23 @@ fz_rect *fz_bound_text(fz_context *ctx, const fz_text *text, const fz_stroke_sta
fz_text *fz_clone_text(fz_context *ctx, const fz_text *text);
+/*
+ Convert ISO 639 (639-{1,2,3,5}) language specification
+ strings losslessly to a 15 bit fz_text_language code.
+
+ No validation is carried out. Obviously invalid (out
+ of spec) codes will be mapped to FZ_LANG_UNSET, but
+ well-formed (but undefined) codes will be blithely
+ accepted.
+*/
+fz_text_language fz_text_language_from_string(const char *str);
+
+/*
+ Recover ISO 639 (639-{1,2,3,5}) language specification
+ strings losslessly from a 15 bit fz_text_language code.
+
+ No validation is carried out. See note above.
+*/
+char *fz_string_from_text_language(char str[4], fz_text_language lang);
+
#endif
diff --git a/platform/java/mupdf_native.c b/platform/java/mupdf_native.c
index f47d9bf6..ee8efd2f 100644
--- a/platform/java/mupdf_native.c
+++ b/platform/java/mupdf_native.c
@@ -3038,7 +3038,7 @@ FUN(Text_showGlyph)(JNIEnv *env, jobject self, jobject font_, jobject matrix_, j
return;
fz_try(ctx)
- fz_show_glyph(ctx, text, font, &trm, glyph, unicode, wmode);
+ fz_show_glyph(ctx, text, font, &trm, glyph, unicode, wmode, 0, FZ_DIR_UNSET, FZ_LANG_UNSET);
fz_catch(ctx)
jni_rethrow(env, ctx);
}
@@ -3060,7 +3060,7 @@ FUN(Text_showString)(JNIEnv *env, jobject self, jobject font_, jobject matrix_,
return;
fz_try(ctx)
- fz_show_string(ctx, text, font, &trm, string, wmode);
+ fz_show_string(ctx, text, font, &trm, string, wmode, 0, FZ_DIR_UNSET, FZ_LANG_UNSET);
fz_always(ctx)
(*env)->ReleaseStringUTFChars(env, string_, string);
fz_catch(ctx)
diff --git a/source/fitz/text.c b/source/fitz/text.c
index 29a4506e..512c671c 100644
--- a/source/fitz/text.c
+++ b/source/fitz/text.c
@@ -219,3 +219,58 @@ fz_clone_text(fz_context *ctx, const fz_text *text)
return new_text;
}
+
+fz_text_language fz_text_language_from_string(const char *str)
+{
+ fz_text_language lang;
+
+ if (str == NULL)
+ return FZ_LANG_UNSET;
+
+ /* 1st char */
+ if (str[0] >= 'a' && str[0] <= 'z')
+ lang = str[0] - 'a' + 1;
+ else if (str[0] >= 'A' && str[0] <= 'Z')
+ lang = str[0] - 'A' + 1;
+ else
+ return 0;
+
+ /* 2nd char */
+ if (str[1] >= 'a' && str[1] <= 'z')
+ lang += 27*(str[1] - 'a' + 1);
+ else if (str[1] >= 'A' && str[1] <= 'Z')
+ lang += 27*(str[1] - 'A' + 1);
+ else
+ return 0; /* There are no valid 1 char language codes */
+
+ /* 3nd char */
+ if (str[2] >= 'a' && str[2] <= 'z')
+ lang += 27*27*(str[2] - 'a' + 1);
+ else if (str[2] >= 'A' && str[2] <= 'Z')
+ lang += 27*27*(str[2] - 'A' + 1);
+
+ /* We don't support iso 639-6 4 char codes, cos the standard
+ * has been withdrawn, and no one uses them. */
+ return lang;
+}
+
+char *fz_string_from_text_language(char str[4], fz_text_language lang)
+{
+ int c;
+
+ /* str is supposed to be at least 4 chars in size */
+ if (str == NULL)
+ return NULL;
+
+ c = lang % 27;
+ lang = lang / 27;
+ str[0] = c == 0 ? 0 : c - 1 + 'a';
+ c = lang % 27;
+ lang = lang / 27;
+ str[1] = c == 0 ? 0 : c - 1 + 'a';
+ c = lang % 27;
+ str[2] = c == 0 ? 0 : c - 1 + 'a';
+ str[3] = 0;
+
+ return str;
+}
diff --git a/source/html/html-layout.c b/source/html/html-layout.c
index a74354d1..c274460b 100644
--- a/source/html/html-layout.c
+++ b/source/html/html-layout.c
@@ -1528,7 +1528,7 @@ static void draw_list_mark(fz_context *ctx, fz_html *box, float page_top, float
{
s += fz_chartorune(&c, s);
g = fz_encode_character_with_fallback(ctx, box->style.font, c, UCDN_SCRIPT_LATIN, &font);
- fz_show_glyph(ctx, text, font, &trm, g, c, 0, 0, FZ_DIR_UNSET, fz_lang_unset);
+ fz_show_glyph(ctx, text, font, &trm, g, c, 0, 0, FZ_DIR_UNSET, FZ_LANG_UNSET);
trm.e += fz_advance_glyph(ctx, font, g, 0) * box->em;
}
diff --git a/source/pdf/pdf-appearance.c b/source/pdf/pdf-appearance.c
index 9d1442e2..fdca224e 100644
--- a/source/pdf/pdf-appearance.c
+++ b/source/pdf/pdf-appearance.c
@@ -1918,7 +1918,7 @@ static void add_text(fz_context *ctx, font_info *font_rec, fz_text *text, char *
str += n;
str_len -= n;
gid = fz_encode_character(ctx, font, ucs);
- fz_show_glyph(ctx, text, font, &tm, gid, ucs, 0, 0, FZ_DIR_UNSET, fz_lang_unset);
+ fz_show_glyph(ctx, text, font, &tm, gid, ucs, 0, 0, FZ_DIR_UNSET, FZ_LANG_UNSET);
tm.e += fz_advance_glyph(ctx, font, gid, 0) * font_rec->da_rec.font_size;
}
}
diff --git a/source/pdf/pdf-op-run.c b/source/pdf/pdf-op-run.c
index 03bca556..962c1845 100644
--- a/source/pdf/pdf-op-run.c
+++ b/source/pdf/pdf-op-run.c
@@ -943,11 +943,11 @@ pdf_show_char(fz_context *ctx, pdf_run_processor *pr, int cid)
fz_union_rect(&pr->text_bbox, &bbox);
/* add glyph to textobject */
- fz_show_glyph(ctx, pr->text, fontdesc->font, &trm, gid, ucsbuf[0], fontdesc->wmode, 0, FZ_DIR_UNSET, fz_lang_unset);
+ fz_show_glyph(ctx, pr->text, fontdesc->font, &trm, gid, ucsbuf[0], fontdesc->wmode, 0, FZ_DIR_UNSET, FZ_LANG_UNSET);
/* add filler glyphs for one-to-many unicode mapping */
for (i = 1; i < ucslen; i++)
- fz_show_glyph(ctx, pr->text, fontdesc->font, &trm, -1, ucsbuf[i], fontdesc->wmode, 0, FZ_DIR_UNSET, fz_lang_unset);
+ fz_show_glyph(ctx, pr->text, fontdesc->font, &trm, -1, ucsbuf[i], fontdesc->wmode, 0, FZ_DIR_UNSET, FZ_LANG_UNSET);
if (fontdesc->wmode == 0)
{
diff --git a/source/tools/murun.c b/source/tools/murun.c
index 8ff59be7..69fac9bb 100644
--- a/source/tools/murun.c
+++ b/source/tools/murun.c
@@ -1935,7 +1935,7 @@ static void ffi_Text_showGlyph(js_State *J)
int wmode = js_isdefined(J, 5) ? js_toboolean(J, 5) : 0;
fz_try(ctx)
- fz_show_glyph(ctx, text, font, &trm, glyph, unicode, wmode, 0, FZ_DIR_UNSET, fz_lang_unset);
+ fz_show_glyph(ctx, text, font, &trm, glyph, unicode, wmode, 0, FZ_DIR_UNSET, FZ_LANG_UNSET);
fz_catch(ctx)
rethrow(J);
}
@@ -1950,7 +1950,7 @@ static void ffi_Text_showString(js_State *J)
int wmode = js_isdefined(J, 4) ? js_toboolean(J, 4) : 0;
fz_try(ctx)
- fz_show_string(ctx, text, font, &trm, s, wmode, 0, FZ_DIR_UNSET, fz_lang_unset);
+ fz_show_string(ctx, text, font, &trm, s, wmode, 0, FZ_DIR_UNSET, FZ_LANG_UNSET);
fz_catch(ctx)
rethrow(J);
diff --git a/source/xps/xps-glyphs.c b/source/xps/xps-glyphs.c
index 91c53743..3d0ebfd0 100644
--- a/source/xps/xps-glyphs.c
+++ b/source/xps/xps-glyphs.c
@@ -452,7 +452,7 @@ xps_parse_glyphs_imp(fz_context *ctx, xps_document *doc, const fz_matrix *ctm,
}
dir = bidi_level & 1 ? FZ_DIR_R2L : FZ_DIR_L2R;
- fz_show_glyph(ctx, text, font, &tm, glyph_index, char_code, is_sideways, bidi_level, dir, fz_lang_unset);
+ fz_show_glyph(ctx, text, font, &tm, glyph_index, char_code, is_sideways, bidi_level, dir, FZ_LANG_UNSET);
x += advance * 0.01f * size;
}