summaryrefslogtreecommitdiff
path: root/source/html
diff options
context:
space:
mode:
authorTor Andersson <tor.andersson@artifex.com>2016-06-23 13:41:53 +0200
committerRobin Watts <robin.watts@artifex.com>2016-06-23 16:10:29 +0100
commit6e48c939dc9913a6af747d5b6961624551c8d90d (patch)
tree4b3f69c237dc7af00606c87ac94cd2c4893da628 /source/html
parentcf7b2cbbfe6192fbf697237735ab45bc951304e4 (diff)
downloadmupdf-6e48c939dc9913a6af747d5b6961624551c8d90d.tar.xz
epub: Use markup language when shaping and selecting fallback fonts.
Diffstat (limited to 'source/html')
-rw-r--r--source/html/html-layout.c65
1 files changed, 40 insertions, 25 deletions
diff --git a/source/html/html-layout.c b/source/html/html-layout.c
index 95d6151a..e671c64d 100644
--- a/source/html/html-layout.c
+++ b/source/html/html-layout.c
@@ -154,6 +154,7 @@ static fz_html_flow *add_flow(fz_context *ctx, fz_pool *pool, fz_html *top, fz_h
flow->type = type;
flow->expand = 0;
flow->bidi_level = 0;
+ flow->markup_lang = 0;
flow->breaks_line = 0;
flow->box = inline_box;
*top->flow_tail = flow;
@@ -182,12 +183,13 @@ static void add_flow_shyphen(fz_context *ctx, fz_pool *pool, fz_html *top, fz_ht
(void)add_flow(ctx, pool, top, inline_box, FLOW_SHYPHEN);
}
-static void add_flow_word(fz_context *ctx, fz_pool *pool, fz_html *top, fz_html *inline_box, const char *a, const char *b)
+static void add_flow_word(fz_context *ctx, fz_pool *pool, fz_html *top, fz_html *inline_box, const char *a, const char *b, int lang)
{
fz_html_flow *flow = add_flow(ctx, pool, top, inline_box, FLOW_WORD);
flow->content.text = fz_pool_alloc(ctx, pool, b - a + 1);
memcpy(flow->content.text, a, b - a);
flow->content.text[b - a] = 0;
+ flow->markup_lang = lang;
}
static void add_flow_image(fz_context *ctx, fz_pool *pool, fz_html *top, fz_html *inline_box, fz_image *img)
@@ -223,7 +225,7 @@ static fz_html_flow *split_flow(fz_context *ctx, fz_pool *pool, fz_html_flow *fl
return new_flow;
}
-static void flush_space(fz_context *ctx, fz_pool *pool, fz_html *flow, fz_html *inline_box, struct genstate *g)
+static void flush_space(fz_context *ctx, fz_pool *pool, fz_html *flow, fz_html *inline_box, int lang, struct genstate *g)
{
static const char *space = " ";
int bsp = inline_box->style.white_space & WS_ALLOW_BREAK_SPACE;
@@ -234,7 +236,7 @@ static void flush_space(fz_context *ctx, fz_pool *pool, fz_html *flow, fz_html *
if (bsp)
add_flow_space(ctx, pool, flow, inline_box);
else
- add_flow_word(ctx, pool, flow, inline_box, space, space+1);
+ add_flow_word(ctx, pool, flow, inline_box, space, space+1, lang);
}
g->emit_white = 0;
}
@@ -276,7 +278,7 @@ static const char *pairbrk[29] =
"_^^%%%^^^_______%%__^^^_____%", /* RI regional indicator */
};
-static void generate_text(fz_context *ctx, fz_pool *pool, fz_html *box, const char *text, struct genstate *g)
+static void generate_text(fz_context *ctx, fz_pool *pool, fz_html *box, const char *text, int lang, struct genstate *g)
{
fz_html *flow;
@@ -319,7 +321,7 @@ static void generate_text(fz_context *ctx, fz_pool *pool, fz_html *box, const ch
if (bsp)
add_flow_space(ctx, pool, flow, box);
else
- add_flow_word(ctx, pool, flow, box, space, space+1);
+ add_flow_word(ctx, pool, flow, box, space, space+1, lang);
++text;
}
g->last_brk_cls = UCDN_LINEBREAK_CLASS_WJ; /* don't add sbreaks after a space */
@@ -329,7 +331,7 @@ static void generate_text(fz_context *ctx, fz_pool *pool, fz_html *box, const ch
const char *prev, *mark = text;
int c;
- flush_space(ctx, pool, flow, box, g);
+ flush_space(ctx, pool, flow, box, lang, g);
if (g->at_bol)
g->last_brk_cls = UCDN_LINEBREAK_CLASS_WJ;
@@ -341,7 +343,7 @@ static void generate_text(fz_context *ctx, fz_pool *pool, fz_html *box, const ch
if (c == 0xAD) /* soft hyphen */
{
if (mark != prev)
- add_flow_word(ctx, pool, flow, box, mark, prev);
+ add_flow_word(ctx, pool, flow, box, mark, prev, lang);
add_flow_shyphen(ctx, pool, flow, box);
mark = text;
g->last_brk_cls = UCDN_LINEBREAK_CLASS_WJ; /* don't add sbreaks after a soft hyphen */
@@ -361,7 +363,7 @@ static void generate_text(fz_context *ctx, fz_pool *pool, fz_html *box, const ch
if (brk == '_')
{
if (mark != prev)
- add_flow_word(ctx, pool, flow, box, mark, prev);
+ add_flow_word(ctx, pool, flow, box, mark, prev, lang);
add_flow_sbreak(ctx, pool, flow, box);
mark = prev;
}
@@ -371,7 +373,7 @@ static void generate_text(fz_context *ctx, fz_pool *pool, fz_html *box, const ch
}
}
if (mark != text)
- add_flow_word(ctx, pool, flow, box, mark, text);
+ add_flow_word(ctx, pool, flow, box, mark, text, lang);
g->at_bol = 0;
}
@@ -420,12 +422,12 @@ static void generate_image(fz_context *ctx, fz_pool *pool, fz_html *box, fz_imag
while (flow->type != BOX_FLOW)
flow = flow->up;
- flush_space(ctx, pool, flow, box, g);
+ flush_space(ctx, pool, flow, box, 0, g);
if (!img)
{
const char *alt = "[image]";
- add_flow_word(ctx, pool, flow, box, alt, alt + 7);
+ add_flow_word(ctx, pool, flow, box, alt, alt + 7, 0);
}
else
{
@@ -578,7 +580,7 @@ static void insert_inline_box(fz_context *ctx, fz_pool *pool, fz_html *box, fz_h
}
static void generate_boxes(fz_context *ctx, fz_xml *node, fz_html *top,
- fz_css_match *up_match, int list_counter, int markup_dir, struct genstate *g)
+ fz_css_match *up_match, int list_counter, int markup_dir, int markup_lang, struct genstate *g)
{
fz_css_match match;
fz_html *box;
@@ -656,9 +658,11 @@ static void generate_boxes(fz_context *ctx, fz_xml *node, fz_html *top,
else if (display != DIS_NONE)
{
+ const char *dir, *lang;
int child_dir = markup_dir;
+ int child_lang = markup_lang;
- const char *dir = fz_xml_att(node, "dir");
+ dir = fz_xml_att(node, "dir");
if (dir)
{
if (!strcmp(dir, "auto"))
@@ -671,6 +675,10 @@ static void generate_boxes(fz_context *ctx, fz_xml *node, fz_html *top,
child_dir = DEFAULT_DIR;
}
+ lang = fz_xml_att(node, "lang");
+ if (lang)
+ child_lang = fz_text_language_from_string(lang);
+
box = new_box(ctx, g->pool, child_dir);
fz_apply_css_style(ctx, g->set, &box->style, &match);
@@ -698,7 +706,7 @@ static void generate_boxes(fz_context *ctx, fz_xml *node, fz_html *top,
int child_counter = list_counter;
if (!strcmp(tag, "ul") || !strcmp(tag, "ol"))
child_counter = 0;
- generate_boxes(ctx, fz_xml_down(node), box, &match, child_counter, child_dir, g);
+ generate_boxes(ctx, fz_xml_down(node), box, &match, child_counter, child_dir, child_lang, g);
}
}
}
@@ -721,11 +729,11 @@ static void generate_boxes(fz_context *ctx, fz_xml *node, fz_html *top,
/* Make sure not to recursively multiply font sizes. */
box->style.font_size.value = 1;
box->style.font_size.unit = N_SCALE;
- generate_text(ctx, g->pool, box, text, g);
+ generate_text(ctx, g->pool, box, text, markup_lang, g);
}
else
{
- generate_text(ctx, g->pool, top, text, g);
+ generate_text(ctx, g->pool, top, text, markup_lang, g);
}
}
}
@@ -760,6 +768,7 @@ typedef struct string_walker
const char *s;
fz_font *base_font;
int script;
+ int language;
fz_font *font;
fz_font *next_font;
hb_glyph_position_t *glyph_pos;
@@ -813,7 +822,7 @@ static int quick_ligature(fz_context *ctx, string_walker *walker, unsigned int i
return walker->glyph_info[i].codepoint;
}
-static void init_string_walker(fz_context *ctx, string_walker *walker, hb_buffer_t *hb_buf, int rtl, fz_font *font, int script, const char *text)
+static void init_string_walker(fz_context *ctx, string_walker *walker, hb_buffer_t *hb_buf, int rtl, fz_font *font, int script, int language, const char *text)
{
walker->ctx = ctx;
walker->hb_buf = hb_buf;
@@ -823,6 +832,7 @@ static void init_string_walker(fz_context *ctx, string_walker *walker, hb_buffer
walker->s = text;
walker->base_font = font;
walker->script = script;
+ walker->language = language;
walker->font = NULL;
walker->next_font = NULL;
}
@@ -835,6 +845,7 @@ static int walk_string(string_walker *walker)
FT_Face face;
int fterr;
int quickshape;
+ char lang[8];
walker->start = walker->end;
walker->end = walker->s;
@@ -850,7 +861,7 @@ static int walk_string(string_walker *walker)
int c;
walker->s += fz_chartorune(&c, walker->s);
- (void)fz_encode_character_with_fallback(ctx, walker->base_font, c, walker->script, &walker->next_font);
+ (void)fz_encode_character_with_fallback(ctx, walker->base_font, c, walker->script, walker->language, &walker->next_font);
if (walker->next_font != walker->font)
{
if (walker->font != NULL)
@@ -876,8 +887,12 @@ static int walk_string(string_walker *walker)
hb_buffer_clear_contents(walker->hb_buf);
hb_buffer_set_direction(walker->hb_buf, walker->rtl ? HB_DIRECTION_RTL : HB_DIRECTION_LTR);
- /* hb_buffer_set_script(hb_buf, hb_ucdn_script_translate(script)); */
- /* hb_buffer_set_language(hb_buf, hb_language_from_string("en", strlen("en"))); */
+ /* hb_buffer_set_script(walker->hb_buf, hb_ucdn_script_translate(walker->script)); */
+ if (walker->language)
+ {
+ fz_string_from_text_language(lang, walker->language);
+ hb_buffer_set_language(walker->hb_buf, hb_language_from_string(lang, strlen(lang)));
+ }
/* hb_buffer_set_cluster_level(hb_buf, HB_BUFFER_CLUSTER_LEVEL_CHARACTERS); */
hb_buffer_add_utf8(walker->hb_buf, walker->start, walker->end - walker->start, 0, -1);
@@ -955,7 +970,7 @@ static void measure_string(fz_context *ctx, fz_html_flow *node, hb_buffer_t *hb_
node->h = fz_from_css_number_scale(node->box->style.line_height, em, em, em);
s = get_node_text(ctx, node);
- init_string_walker(ctx, &walker, hb_buf, node->bidi_level & 1, node->box->style.font, node->script, s);
+ init_string_walker(ctx, &walker, hb_buf, node->bidi_level & 1, node->box->style.font, node->script, node->markup_lang, s);
while (walk_string(&walker))
{
int x = 0;
@@ -1488,7 +1503,7 @@ static void draw_flow_box(fz_context *ctx, fz_html *box, float page_top, float p
trm.f = y;
s = get_node_text(ctx, node);
- init_string_walker(ctx, &walker, hb_buf, node->bidi_level & 1, style->font, node->script, s);
+ init_string_walker(ctx, &walker, hb_buf, node->bidi_level & 1, style->font, node->script, node->markup_lang, s);
while (walk_string(&walker))
{
float node_scale = node->box->em / walker.scale;
@@ -1726,7 +1741,7 @@ static void draw_list_mark(fz_context *ctx, fz_html *box, float page_top, float
while (*s)
{
s += fz_chartorune(&c, s);
- g = fz_encode_character_with_fallback(ctx, box->style.font, c, UCDN_SCRIPT_LATIN, &font);
+ g = fz_encode_character_with_fallback(ctx, box->style.font, c, UCDN_SCRIPT_LATIN, FZ_LANG_UNSET, &font);
w += fz_advance_glyph(ctx, font, g, 0) * box->em;
}
@@ -1736,7 +1751,7 @@ static void draw_list_mark(fz_context *ctx, fz_html *box, float page_top, float
while (*s)
{
s += fz_chartorune(&c, s);
- g = fz_encode_character_with_fallback(ctx, box->style.font, c, UCDN_SCRIPT_LATIN, &font);
+ g = fz_encode_character_with_fallback(ctx, box->style.font, c, UCDN_SCRIPT_LATIN, FZ_LANG_UNSET, &font);
fz_show_glyph(ctx, text, font, &trm, g, c, 0, 0, FZ_BIDI_NEUTRAL, FZ_LANG_UNSET);
trm.e += fz_advance_glyph(ctx, font, g, 0) * box->em;
}
@@ -2301,7 +2316,7 @@ fz_parse_html(fz_context *ctx, fz_html_font_set *set, fz_archive *zip, const cha
fz_apply_css_style(ctx, g.set, &box->style, &match);
// TODO: transfer page margins out of this hacky box
- generate_boxes(ctx, xml, box, &match, 0, DEFAULT_DIR, &g);
+ generate_boxes(ctx, xml, box, &match, 0, DEFAULT_DIR, FZ_LANG_UNSET, &g);
fz_drop_css(ctx, g.css);
fz_drop_xml(ctx, xml);