Make HTML layout use harfbuzz for shaping.

fz_fonts gain a 'shaper' field that will be filled in as required. Use a void * rather than an hb_font_t to avoid polluting top level include space. Harfbuff handles mirroring for us, so lose the 'mirror' fields. This simplifies our wrappers around the 'standard' bidi code in that we don't need to split fragments upon mirroring. We do need to split our fragments at script changes though as harfbuzz only operates on a single font at a time. Update the html flow structure so that each flow node contains details of the the direction specified for it in the markup, the language specified for it in the markup and the script detected by the bidi code. Get the bidi code to pass out the script for each fragment as part of the callback and populate that field in the node. Ensure that we pass in the markup direction to the bidi splitting code as the 'base' direction. When feeding the bidi code, rather than feeding it paragraphs at a time, break those paragraphs if different parts of them have different marked up directions.
author: Robin Watts <robin.watts@artifex.com> 2016-01-29 14:10:53 +0000
committer: Robin Watts <robin.watts@artifex.com> 2016-02-04 13:21:11 +0000
commit: d96bd69b94c12906473a4721c2c2dc5941923253 (patch)
tree: 6101971a4f96dbb5a6c4d189e1dd8ae5cf62cbcf
parent: 5d840271f62c5a51bb83d561181de860086bb6be (diff)
download: mupdf-d96bd69b94c12906473a4721c2c2dc5941923253.tar.xz
6 files changed, 522 insertions, 211 deletions
diff --git a/include/mupdf/fitz/bidi.h b/include/mupdf/fitz/bidi.h
index dfa439f3..8428ffc1 100644
--- a/include/mupdf/fitz/bidi.h
+++ b/include/mupdf/fitz/bidi.h
@@ -10,8 +10,6 @@
  * Processes Unicode text by arranging the characters into an order suitable
  * for display. E.g. Hebrew text will be arranged from right-to-left and
  * any English within the text will remain in the left-to-right order.
- * Characters such as parenthesis will be substituted for their mirrored
- * equivalents if they are part of text which must be reversed.
  *
  * This is an implementation of the Unicode Bidirectional Algorithm which
  * can be found here: http://www.unicode.org/reports/tr9/ and is based
@@ -55,14 +53,15 @@ enum
  *				as right-to-left
  * @param	char_r2l	true if characters within block should be laid out
  *				as right-to-left
- * @param	mirror		The mirror code of the fragment if it exists
+ * @param       script          the script in use for this fragment (other than common
+ *                              or inherited)
  * @param	arg		data from caller of Bidi_fragmentText
  */
 typedef void (fz_bidi_fragment_callback)(const uint32_t *fragment,
 					size_t fragmentLen,
 					int block_r2l,
 					int char_r2l,
-					uint32_t mirror,
+					int script,
 					void *arg);
 
 /**
diff --git a/include/mupdf/fitz/font.h b/include/mupdf/fitz/font.h
index 47866f9e..0de3e00e 100644
--- a/include/mupdf/fitz/font.h
+++ b/include/mupdf/fitz/font.h
@@ -68,6 +68,9 @@ struct fz_font_s
 
 	/* cached encoding lookup */
 	uint16_t *encoding_cache[256];
+
+	/* Shaping information */
+	void *shaper;
 };
 
 /* common CJK font collections */
diff --git a/include/mupdf/html.h b/include/mupdf/html.h
index 4969e15c..c734ea0d 100644
--- a/include/mupdf/html.h
+++ b/include/mupdf/html.h
@@ -221,11 +221,27 @@ enum
  */
 struct fz_html_flow_s
 {
+	/* What type of node */
 	unsigned int type : 2;
+
+	/* Whether this should expand during justification */
 	unsigned int expand : 1;
+
+	/* Whether the chars should be laid out r2l or l2r */
 	unsigned int char_r2l : 1;
+
+	/* Whether this block should stack with its neighbours r2l or l2r */
 	unsigned int block_r2l : 1;
-	unsigned int mirror : 1;
+
+	/* Whether the markup specifies a given direction. */
+	unsigned int markup_r2l : 2;
+
+	/* Whether the markup specifies a given language. */
+	unsigned int markup_lang : 8;
+
+	/* The script detected by the bidi code. */
+	unsigned int script : 8;
+
 	float x, y, w, h, em;
 	fz_css_style *style;
 	union {
diff --git a/source/fitz/bidi.c b/source/fitz/bidi.c
index e711e705..74548d4d 100644
--- a/source/fitz/bidi.c
+++ b/source/fitz/bidi.c
@@ -205,76 +205,83 @@ is_european_number(const uint32_t *str, unsigned int len)
 	return TRUE;
 }
 
+/* Split fragments into single scripts (or punctation + single script) */
 static void
-do_callback(const uint32_t *fragment,
+split_at_script(const uint32_t *fragment,
 		size_t fragment_len,
 		int block_r2l,
-		uint32_t mirror,
+		int char_r2l,
 		void *arg,
 		fz_bidi_fragment_callback *callback)
 {
-	char char_r2l = block_r2l;
+	int script = UCDN_SCRIPT_COMMON;
+	size_t script_start, i;
 
-	char_r2l = block_r2l && !is_european_number(fragment, fragment_len);
-
-	(*callback)(fragment, fragment_len, block_r2l, char_r2l, mirror, arg);
-}
-
-/* Searches a RTL fragment for a mirror character
- * When it finds one it creates a separate fragment for the
- * character and the surrounding fragments. It passes the mirrored
- * character back through the callback.
- */
-static void
-create_fragment_mirrors(const uint32_t *text,
-		int len,
-		fz_bidi_fragment_callback *callback,
-		void *arg)
-{
-	int i;
-	int lastPtr;
-	uint32_t mirror;
-
-	assert(text != NULL);
-	assert(len > 0);
-	lastPtr = 0;
-	for (i = 0; i < len; i ++)
+	script_start = 0;
+	for (i = 0; i < fragment_len; i++)
 	{
-		mirror = ucdn_mirror(text[i]);
-		if (mirror != UNICODE_EOS)
+		int s = ucdn_get_script(fragment[i]);
+		if (s == UCDN_SCRIPT_COMMON || s == UCDN_SCRIPT_INHERITED)
 		{
-			/* create preceding fragment */
-			if (i > lastPtr)
-			{
-				do_callback(&text[lastPtr],
-						i - lastPtr,
-						TRUE,
-						UNICODE_EOS,
-						arg,
-						callback);
-				DBUGVF(("create mirror fragment for %x\n",(int)text[i]));
-			}
-			/* create mirror fragment */
-			do_callback(&text[i],
-					1,
-					TRUE,
-					mirror,
-					arg,
-					callback);
-			lastPtr = i + 1;
+			/* Punctuation etc. This is fine. */
+		}
+		else if (s == script)
+		{
+			/* Same script. Still fine. */
+		}
+		else if (script == UCDN_SCRIPT_COMMON || script == UCDN_SCRIPT_INHERITED)
+		{
+			/* First non punctuation thing. Set the script. */
+			script = s;
 		}
+		else
+		{
+			/* Change of script. Break the fragment. */
+			(*callback)(&fragment[script_start], i - script_start, block_r2l, char_r2l, script, arg);
+			script_start = i+1;
+			script = s;
+		}
+	}
+	if (script_start != fragment_len)
+	{
+		(*callback)(&fragment[script_start], fragment_len - script_start, block_r2l, char_r2l, script, arg);
 	}
+}
+
+static void
+detect_numbers(const uint32_t *fragment,
+		size_t fragment_len,
+		size_t start,
+		size_t end,
+		const fz_bidi_level *levels,
+		void *arg,
+		fz_bidi_fragment_callback *callback)
+{
+	int block_r2l = ODD(levels[start]);
+	int char_r2l = block_r2l;
 
-	if (lastPtr < len)
+	/* Check to see if we've got a number. Numbers should
+	 * never be block_r2l, so we can avoid the test. */
+	if (block_r2l || !is_european_number(&fragment[start], end-start))
 	{
-		/* create end fragment */
-		do_callback(&text[lastPtr],
-				len - lastPtr,
-				TRUE,
-				UNICODE_EOS,
+		/* No number, just split as normal */
+		split_at_script(&fragment[start],
+				end-start,
+				block_r2l,
+				char_r2l,
 				arg,
 				callback);
+		return;
 	}
+
+	/* We have a number. We have to check to see whether this
+	 * should be handled as a block_r2l thing. */
+	if (start != 0)
+		block_r2l = ODD(levels[start-1]);
+	if (block_r2l && end != fragment_len)
+		block_r2l = ODD(levels[end]);
+
+	split_at_script(&fragment[start], end-start, block_r2l, char_r2l, arg, callback);
 }
 
 /* Determines the character classes for all following
@@ -607,45 +614,25 @@ void fz_bidi_fragment_text(fz_context *ctx,
 				 * Create a text object for it, then start
 				 * a new fragment.
 				 */
-				if (ODD(levels[startOfFragment]) != 0)
-				{
-					/* if RTL check for mirrors and create sub-frags */
-					create_fragment_mirrors(&text[startOfFragment],
-							i - startOfFragment,
-							callback,
-							arg);
-				}
-				else
-				{
-					do_callback(&text[startOfFragment],
-							i - startOfFragment,
-							ODD(levels[startOfFragment]),
-							UNICODE_EOS,
-							arg,
-							callback);
-				}
+				detect_numbers(text,
+						textlen,
+						startOfFragment,
+						i,
+						levels,
+						arg,
+						callback);
 				startOfFragment = i;
 			}
 		}
 		/* Now i == textlen. Deal with the final (or maybe only) fragment. */
-		if (ODD(levels[startOfFragment]) != 0)
-		{
-			/* if RTL check for mirrors and create sub-frags */
-			create_fragment_mirrors(&text[startOfFragment],
-					i - startOfFragment,
-					callback,
-					arg);
-		}
-		else
-		{
-			/* otherwise create 1 fragment */
-			do_callback(&text[startOfFragment],
-					i - startOfFragment,
-					ODD(levels[startOfFragment]),
-					UNICODE_EOS,
-					arg,
-					callback);
-		}
+		/* otherwise create 1 fragment */
+		detect_numbers(text,
+				textlen,
+				startOfFragment,
+				i,
+				levels,
+				arg,
+				callback);
 	}
 	fz_always(ctx)
 	{
diff --git a/source/fitz/font.c b/source/fitz/font.c
index a7db05d4..e7dbc827 100644
--- a/source/fitz/font.c
+++ b/source/fitz/font.c
@@ -1,6 +1,9 @@
 #include "mupdf/fitz.h"
 
 #include <ft2build.h>
+#include "hb.h"
+#include "hb-ft.h"
+
 #include FT_FREETYPE_H
 #include FT_ADVANCES_H
 #include FT_STROKER_H
@@ -150,6 +153,7 @@ fz_drop_font(fz_context *ctx, fz_font *font)
 	fz_free(ctx, font->bbox_table);
 	fz_free(ctx, font->width_table);
 	fz_free(ctx, font->advance_cache);
+	hb_font_destroy(font->shaper);
 	fz_free(ctx, font);
 }
 
diff --git a/source/html/html-layout.c b/source/html/html-layout.c
index 3f6fe919..259dcbc5 100644
--- a/source/html/html-layout.c
+++ b/source/html/html-layout.c
@@ -1,5 +1,11 @@
 #include "mupdf/html.h"
 
+#include "hb.h"
+#include "hb-ft.h"
+#include <ft2build.h>
+
+#undef DEBUG_HARFBUZZ
+
 enum { T, R, B, L };
 
 static const char *default_css =
@@ -78,9 +84,9 @@ static fz_html_flow *add_flow(fz_context *ctx, fz_pool *pool, fz_html *top, fz_c
 	fz_html_flow *flow = fz_pool_alloc(ctx, pool, sizeof *flow);
 	flow->type = type;
 	flow->expand = 0;
-	flow->char_r2l = 0;
-	flow->block_r2l = 0;
-	flow->mirror = 0;
+	flow->char_r2l = BIDI_LEFT_TO_RIGHT;
+	flow->block_r2l = BIDI_RIGHT_TO_LEFT;
+	flow->markup_r2l = BIDI_NEUTRAL;
 	flow->style = style;
 	*top->flow_tail = flow;
 	top->flow_tail = &flow->next;
@@ -536,27 +542,102 @@ static void measure_image(fz_context *ctx, fz_html_flow *node, float max_w, floa
 	node->h = node->content.image->h * s;
 }
 
-static void measure_word(fz_context *ctx, fz_html_flow *node, float em)
+static void measure_word(fz_context *ctx, fz_html_flow *node, float em, hb_buffer_t *hb_buf)
 {
-	fz_font *font;
-	const char *s;
-	int c, g;
-	float w;
+	fz_font *font, *next_font;
+	hb_glyph_position_t *glyph_pos;
+	unsigned int glyph_count, i;
+	int max_x, x;
+	const char *s, *start, *end;
+	FT_Face face;
+	int fterr;
+	int scale;
 
 	em = fz_from_css_number(node->style->font_size, em, em);
 	node->x = 0;
 	node->y = 0;
+	node->w = 0;
 	node->h = fz_from_css_number_scale(node->style->line_height, em, em, em);
 
-	w = 0;
-	s = node->content.text;
-	while (*s)
+	start = end = s = node->content.text;
+	font = NULL;
+	while (*start)
 	{
-		s += fz_chartorune(&c, s);
-		g = fz_encode_character_with_fallback(ctx, node->style->font, c, 0, &font);
-		w += fz_advance_glyph(ctx, font, g) * em;
+		/* Run through the string, encoding chars until we find one
+		 * that requires a different fallback font. */
+		while (*s)
+		{
+			int c;
+
+			s += fz_chartorune(&c, s);
+			(void)fz_encode_character_with_fallback(ctx, node->style->font, c, node->script, &next_font);
+			if (next_font != font)
+			{
+				if (font != NULL)
+					break;
+				font = next_font;
+			}
+			end = s;
+		}
+
+		fz_try(ctx)
+		{
+			hb_lock(ctx);
+
+			/* So, shape from start to end in font */
+			face = font->ft_face;
+			scale = face->units_per_EM;
+			fterr = FT_Set_Char_Size(face, scale, scale, 72, 72);
+			if (fterr)
+				fz_throw(ctx, FZ_ERROR_GENERIC, "Failure sizing font (%d)", fterr);
+
+			if (font->shaper == NULL)
+				font->shaper = (void *)hb_ft_font_create(face, NULL);
+
+			hb_buffer_clear_contents(hb_buf);
+			hb_buffer_set_direction(hb_buf, node->char_r2l ? HB_DIRECTION_RTL : HB_DIRECTION_LTR);
+			/* We don't know script or language, so leave them blank */
+			/* hb_buffer_set_script(hb_buf, HB_SCRIPT_LATIN); */
+			/* hb_buffer_set_language(hb_buf, hb_language_from_string("en", strlen("en"))); */
+
+			/* First put the text content into a harfbuzz buffer
+			 * labelled with the position within the word. */
+			hb_buffer_add_utf8(hb_buf, start, end - start, 0, -1);
+			hb_buffer_guess_segment_properties(hb_buf);
+
+			/* Now shape that buffer */
+			hb_shape(font->shaper, hb_buf, NULL, 0);
+
+			glyph_pos = hb_buffer_get_glyph_positions(hb_buf, &glyph_count);
+		}
+		fz_always(ctx)
+		{
+			hb_unlock(ctx);
+		}
+		fz_catch(ctx)
+		{
+			fz_rethrow(ctx);
+		}
+
+		max_x = 0;
+		x = 0;
+		for (i = 0; i < glyph_count; i++)
+		{
+			int lx;
+
+			x += glyph_pos[i].x_advance;
+			lx = x + glyph_pos[i].x_offset;
+			if (lx > max_x)
+				max_x = lx;
+		}
+
+		start = end;
+		end = s;
+		font = next_font;
+
+		node->w += max_x * em / scale;
 	}
-	node->w = w;
+
 	node->em = em;
 }
 
@@ -586,7 +667,7 @@ static float measure_line(fz_html_flow *node, fz_html_flow *end, float *baseline
 	return h;
 }
 
-static void layout_line(fz_context *ctx, float indent, float page_w, float line_w, int align, fz_html_flow *node, fz_html_flow *end, fz_html *box, float baseline)
+static void layout_line(fz_context *ctx, float indent, float page_w, float line_w, int align, fz_html_flow *start, fz_html_flow *end, fz_html *box, float baseline)
 {
 	float x = box->x + indent;
 	float y = box->y + box->h;
@@ -594,7 +675,8 @@ static void layout_line(fz_context *ctx, float indent, float page_w, float line_
 	float justify = 0;
 	float va;
 	int n = 0;
-	fz_html_flow *start, *mid;
+	fz_html_flow *node = start;
+	fz_html_flow *mid;
 
 	if (align == TA_JUSTIFY)
 	{
@@ -613,8 +695,7 @@ static void layout_line(fz_context *ctx, float indent, float page_w, float line_
 	/* We have the invariants that 1) start...mid are always laid out
 	 * correctly and 2) mid..node are the most recent set of right to left
 	 * blocks. */
-	start = node;
-	mid = node;
+	mid = start;
 	while (node != end)
 	{
 		float w = node->w + (node->type == FLOW_GLUE && node->expand ? justify : 0);
@@ -691,7 +772,7 @@ static void flush_line(fz_context *ctx, fz_html *box, float page_h, float page_w
 	box->h += line_h;
 }
 
-static void layout_flow(fz_context *ctx, fz_html *box, fz_html *top, float em, float page_h)
+static void layout_flow(fz_context *ctx, fz_html *box, fz_html *top, float em, float page_h, hb_buffer_t *hb_buf)
 {
 	fz_html_flow *node, *line, *mark;
 	float line_w;
@@ -729,7 +810,7 @@ static void layout_flow(fz_context *ctx, fz_html *box, fz_html *top, float em, f
 		}
 		else
 		{
-			measure_word(ctx, node, em);
+			measure_word(ctx, node, em, hb_buf);
 		}
 	}
 
@@ -793,7 +874,7 @@ static void layout_flow(fz_context *ctx, fz_html *box, fz_html *top, float em, f
 	}
 }
 
-static float layout_block(fz_context *ctx, fz_html *box, fz_html *top, float em, float page_h, float vertical)
+static float layout_block(fz_context *ctx, fz_html *box, fz_html *top, float em, float page_h, float vertical, hb_buffer_t *hb_buf)
 {
 	fz_html *child;
 	int first;
@@ -841,7 +922,7 @@ static float layout_block(fz_context *ctx, fz_html *box, fz_html *top, float em,
 	{
 		if (child->type == BOX_BLOCK)
 		{
-			vertical = layout_block(ctx, child, box, em, page_h, vertical);
+			vertical = layout_block(ctx, child, box, em, page_h, vertical, hb_buf);
 			if (first)
 			{
 				/* move collapsed parent/child top margins to parent */
@@ -863,7 +944,7 @@ static float layout_block(fz_context *ctx, fz_html *box, fz_html *top, float em,
 		}
 		else if (child->type == BOX_FLOW)
 		{
-			layout_flow(ctx, child, box, em, page_h);
+			layout_flow(ctx, child, box, em, page_h, hb_buf);
 			if (child->h > 0)
 			{
 				box->h += child->h;
@@ -897,15 +978,23 @@ static float layout_block(fz_context *ctx, fz_html *box, fz_html *top, float em,
 	return vertical;
 }
 
-static void draw_flow_box(fz_context *ctx, fz_html *box, float page_top, float page_bot, fz_device *dev, const fz_matrix *ctm)
+static void draw_flow_box(fz_context *ctx, fz_html *box, float page_top, float page_bot, fz_device *dev, const fz_matrix *ctm, hb_buffer_t *hb_buf)
 {
-	fz_font *font;
+	fz_font *font, *next_font;
 	fz_html_flow *node;
 	fz_text *text;
 	fz_matrix trm;
 	const char *s;
+	const char *t;
+	const char *start;
+	const char *end;
 	float color[3];
-	int c, g;
+	int c, scale, fterr;
+	float node_scale;
+	FT_Face face;
+	float w, lx, ly;
+
+	/* FIXME: HB_DIRECTION_TTB? */
 
 	for (node = box->flow_head; node; node = node->next)
 	{
@@ -922,6 +1011,15 @@ static void draw_flow_box(fz_context *ctx, fz_html *box, float page_top, float p
 
 		if (node->type == FLOW_WORD)
 		{
+			int idx;
+			unsigned int gp, glyph_count;
+			hb_glyph_info_t *glyph_info;
+			hb_glyph_position_t *glyph_pos;
+			float x, y;
+
+			if (node->content.text == NULL)
+				continue;
+
 			fz_scale(&trm, node->em, -node->em);
 
 			color[0] = node->style->color.r / 255.0f;
@@ -931,54 +1029,189 @@ static void draw_flow_box(fz_context *ctx, fz_html *box, float page_top, float p
 			/* TODO: reuse text object if color is unchanged */
 			text = fz_new_text(ctx);
 
-
-			trm.e = node->x;
-			trm.f = node->y;
-			s = node->content.text;
-			if (node->char_r2l)
+			x = node->x;
+			y = node->y;
+			w = node->w;
+			start = end = s = node->content.text;
+			font = NULL;
+			while (*start)
 			{
-				float w = 0;
-				const char *t = s;
+				/* Run through the string, encoding chars until we find one
+				 * that requires a different fallback font. */
+				while (*s)
+				{
+					int c;
 
-				while (*t)
+					s += fz_chartorune(&c, s);
+					(void)fz_encode_character_with_fallback(ctx, node->style->font, c, node->script, &next_font);
+					if (next_font != font)
+					{
+						if (font != NULL)
+							break;
+						font = next_font;
+					}
+					end = s;
+				}
+
+				fz_try(ctx)
+				{
+					hb_lock(ctx);
+
+					/* So, shape from start to end in font */
+					face = font->ft_face;
+					scale = face->units_per_EM;
+					fterr = FT_Set_Char_Size(face, scale, scale, 72, 72);
+					if (fterr)
+						fz_throw(ctx, FZ_ERROR_GENERIC, "Failure sizing font (%d)", fterr);
+
+					if (font->shaper == NULL)
+						font->shaper = (void *)hb_ft_font_create(face, NULL);
+
+					hb_buffer_clear_contents(hb_buf);
+					hb_buffer_set_direction(hb_buf, node->char_r2l ? HB_DIRECTION_RTL : HB_DIRECTION_LTR);
+					/* We don't know script or language, so leave them blank */
+					/* hb_buffer_set_script(hb_buf, HB_SCRIPT_LATIN); */
+					/* hb_buffer_set_language(hb_buf, hb_language_from_string("en", strlen("en"))); */
+
+					/* First put the text content into a harfbuzz buffer
+					 * labelled with the position within the word. */
+					hb_buffer_add_utf8(hb_buf, start, end - start, 0, -1);
+					hb_buffer_guess_segment_properties(hb_buf);
+
+					face = font->ft_face;
+					scale = face->units_per_EM;
+					fterr = FT_Set_Char_Size(face, scale, scale, 72, 72);
+					if (fterr)
+						fz_throw(ctx, FZ_ERROR_GENERIC, "Failure sizing font (%d)", fterr);
+
+					/* Now shape that buffer */
+					hb_shape(font->shaper, hb_buf, NULL, 0);
+
+					glyph_info = hb_buffer_get_glyph_infos(hb_buf, &glyph_count);
+					glyph_pos = hb_buffer_get_glyph_positions(hb_buf, &glyph_count);
+				}
+				fz_always(ctx)
+				{
+					hb_unlock(ctx);
+				}
+				fz_catch(ctx)
+				{
+					fz_rethrow(ctx);
+				}
+
+#ifdef DEBUG_HARFBUZZ
+				printf("fragment: ");
+				t = start;
+				while (t != end)
 				{
 					t += fz_chartorune(&c, t);
-					if (node->mirror)
-						c = ucdn_mirror(c);
-					g = fz_encode_character_with_fallback(ctx, node->style->font, c, 0, &font);
-					w += fz_advance_glyph(ctx, font, g) * node->em;
+					if (c >= 127)
+						printf("<%x>", c);
+					else
+						printf("%c", c);
+				}
+				printf("\n");
+#endif /* DEBUG_HARFBUZZ */
+
+				/* Now offset the glyph_info with the correct positions.
+				 * Harfbuzz always gives us the shaped glyphs for plotting in l2r
+				 * order. We however still want to send glyphs r2l rather than l2r
+				 * for r2l blocks so that text extraction works. So, regardless
+				 * of ordering we resolve the positions here. The nasty thing is
+				 * that we right the resolved positions back into the Harfbuzz
+				 * buffer with a change of type. */
+				node_scale = node->em / scale;
+
+				lx = 0;
+				ly = 0;
+				for (gp = 0; gp < glyph_count; gp++)
+				{
+					hb_glyph_position_t *p = &glyph_pos[gp];
+#ifdef DEBUG_HARFBUZZ
+					hb_glyph_info_t *g = &glyph_info[gp];
+
+					printf("glyph: %x(%d) @ %d %d + %d %d",
+						g->codepoint, g->cluster, p->x_offset, p->y_offset,
+						p->x_advance, p->y_advance);
+#endif /* DEBUG_HARFBUZZ */
+					*(float *)(&p->x_offset) = x + (lx + p->x_offset) * node_scale;
+					*(float *)(&p->y_offset) = y + (ly + p->y_offset) * node_scale;
+#ifdef DEBUG_HARFBUZZ
+					printf(" => %g %g\n", *(float *)(&p->x_offset), *(float *)(&p->y_offset));
+#endif /* DEBUG_HARFBUZZ */
+					lx += p->x_advance;
+					ly += p->y_advance;
 				}
 
-				trm.e += w;
-				while (*s)
+				if (node->char_r2l)
 				{
-					s += fz_chartorune(&c, s);
-					if (node->mirror)
-						c = ucdn_mirror(c);
-					g = fz_encode_character_with_fallback(ctx, node->style->font, c, 0, &font);
-					trm.e -= fz_advance_glyph(ctx, font, g) * node->em;
-					if (node->style->visibility == V_VISIBLE)
-						fz_add_text(ctx, text, font, 0, &trm, g, c);
+					w -= lx * node_scale;
+					for (gp = 0; gp < glyph_count; gp++)
+					{
+						hb_glyph_position_t *p = &glyph_pos[gp];
+						*(float *)(&p->x_offset) += w;
+					}
 				}
-				trm.e += w;
-			}
-			else
-			{
-				while (*s)
+				else
 				{
-					s += fz_chartorune(&c, s);
-					g = fz_encode_character_with_fallback(ctx, node->style->font, c, 0, &font);
-					if (node->style->visibility == V_VISIBLE)
-						fz_add_text(ctx, text, font, 0, &trm, g, c);
-					trm.e += fz_advance_glyph(ctx, font, g) * node->em;
+					x += node_scale * lx;
+					y += node_scale * ly;
 				}
-			}
 
-			if (text)
-			{
-				fz_fill_text(ctx, dev, text, ctm, fz_device_rgb(ctx), color, 1);
-				fz_drop_text(ctx, text);
+				/* Now read the data back out again, and turn it into
+				 * glyph/ucs pairs to go to fz_text */
+				idx = 0;
+				t = start;
+				if (node->style->visibility == V_VISIBLE)
+				{
+					while (*t)
+					{
+						int l = fz_chartorune(&c, t);
+						t += l;
+
+						for (gp = 0; gp < glyph_count; gp++)
+						{
+							hb_glyph_info_t *g = &glyph_info[gp];
+							hb_glyph_position_t *p = &glyph_pos[gp];
+							if (g->cluster != idx)
+								continue;
+							trm.e = *(float *)&p->x_offset;
+							trm.f = *(float *)&p->y_offset;
+							fz_add_text(ctx, text, font, 0, &trm, g->codepoint, c);
+							break;
+						}
+						if (gp == glyph_count)
+						{
+							/* We failed to find a glyph for this codepoint, presumably
+							 * because we've been shaped away into another. We can't afford
+							 * to just drop the codepoint as this will upset text extraction.
+							 */
+							fz_add_text(ctx, text, font, 0, &trm, -1, c);
+						}
+						else
+						{
+							/* We've send the codepoint and glyph. Make sure there aren't
+							 * more glyphs to come from the same codepoint. */
+							for (gp++ ;gp < glyph_count; gp++)
+							{
+								hb_glyph_info_t *g = &glyph_info[gp];
+								hb_glyph_position_t *p = &glyph_pos[gp];
+								if (g->cluster != idx)
+									continue;
+								trm.e = *(float *)&p->x_offset;
+								trm.f = *(float *)&p->y_offset;
+								fz_add_text(ctx, text, font, 0, &trm, g->codepoint, -1);
+							}
+						}
+						idx += l;
+					}
+				}
+				start = end;
+				end = s;
+				font = next_font;
 			}
+			fz_fill_text(ctx, dev, text, ctm, fz_device_rgb(ctx), color, 1);
+			fz_drop_text(ctx, text);
 		}
 		else if (node->type == FLOW_IMAGE)
 		{
@@ -1179,7 +1412,7 @@ static void draw_list_mark(fz_context *ctx, fz_html *box, float page_top, float
 	fz_drop_text(ctx, text);
 }
 
-static void draw_block_box(fz_context *ctx, fz_html *box, float page_top, float page_bot, fz_device *dev, const fz_matrix *ctm)
+static void draw_block_box(fz_context *ctx, fz_html *box, float page_top, float page_bot, fz_device *dev, const fz_matrix *ctm, hb_buffer_t *hb_buf)
 {
 	float x0, y0, x1, y1;
 
@@ -1215,8 +1448,8 @@ static void draw_block_box(fz_context *ctx, fz_html *box, float page_top, float
 	{
 		switch (box->type)
 		{
-		case BOX_BLOCK: draw_block_box(ctx, box, page_top, page_bot, dev, ctm); break;
-		case BOX_FLOW: draw_flow_box(ctx, box, page_top, page_bot, dev, ctm); break;
+		case BOX_BLOCK: draw_block_box(ctx, box, page_top, page_bot, dev, ctm, hb_buf); break;
+		case BOX_FLOW: draw_flow_box(ctx, box, page_top, page_bot, dev, ctm, hb_buf); break;
 		}
 	}
 }
@@ -1225,8 +1458,33 @@ void
 fz_draw_html(fz_context *ctx, fz_html *box, float page_top, float page_bot, fz_device *dev, const fz_matrix *inctm)
 {
 	fz_matrix ctm = *inctm;
-	fz_pre_translate(&ctm, 0, -page_top);
-	draw_block_box(ctx, box, page_top, page_bot, dev, &ctm);
+	hb_buffer_t *hb_buf = NULL;
+	int unlocked = 0;
+
+	fz_var(hb_buf);
+	fz_var(unlocked);
+
+	hb_lock(ctx);
+
+	fz_try(ctx)
+	{
+		hb_buf = hb_buffer_create();
+		hb_unlock(ctx);
+		unlocked = 1;
+		fz_pre_translate(&ctm, 0, -page_top);
+		draw_block_box(ctx, box, page_top, page_bot, dev, &ctm, hb_buf);
+	}
+	fz_always(ctx)
+	{
+		if (unlocked)
+			hb_lock(ctx);
+		hb_buffer_destroy(hb_buf);
+		hb_unlock(ctx);
+	}
+	fz_catch(ctx)
+	{
+		fz_rethrow(ctx);
+	}
 }
 
 static char *concat_text(fz_context *ctx, fz_xml *root)
@@ -1413,12 +1671,36 @@ void
 fz_layout_html(fz_context *ctx, fz_html *box, float w, float h, float em)
 {
 	fz_html page_box;
+	hb_buffer_t *hb_buf = NULL;
+	int unlocked = 0;
+
+	fz_var(hb_buf);
+	fz_var(unlocked);
 
-	init_box(ctx, &page_box);
-	page_box.w = w;
-	page_box.h = 0;
+	hb_lock(ctx);
 
-	layout_block(ctx, box, &page_box, em, h, 0);
+	fz_try(ctx)
+	{
+		hb_buf = hb_buffer_create();
+		unlocked = 1;
+		hb_unlock(ctx);
+		init_box(ctx, &page_box);
+		page_box.w = w;
+		page_box.h = 0;
+
+		layout_block(ctx, box, &page_box, em, h, 0, hb_buf);
+	}
+	fz_always(ctx)
+	{
+		if (unlocked)
+			hb_lock(ctx);
+		hb_buffer_destroy(hb_buf);
+		hb_unlock(ctx);
+	}
+	fz_catch(ctx)
+	{
+		fz_rethrow(ctx);
+	}
 }
 
 typedef struct
@@ -1453,7 +1735,7 @@ static void newFragCb(const uint32_t *fragment,
 			size_t fragment_len,
 			int block_r2l,
 			int char_r2l,
-			uint32_t mirror,
+			int script,
 			void *arg)
 {
 	bidi_data *data = (bidi_data *)arg;
@@ -1490,74 +1772,94 @@ static void newFragCb(const uint32_t *fragment,
 		/* This flow box is entirely contained within this fragment. */
 		data->flow->block_r2l = block_r2l;
 		data->flow->char_r2l = char_r2l;
-		if (mirror != 0)
-			data->flow->mirror = 1;
+		data->flow->script = script;
 		data->flow = data->flow->next;
 		fragment_offset += len;
 		fragment_len -= len;
 	}
 }
 
+static int
+dirn_matches(int dirn, int dirn2)
+{
+	return (dirn == BIDI_NEUTRAL || dirn2 == BIDI_NEUTRAL || dirn == dirn2);
+}
+
 static void
 detect_flow_directionality(fz_context *ctx, fz_pool *pool, uni_buf *buffer, fz_bidi_direction *baseDir, fz_html_flow *flow)
 {
 	fz_html_flow *end = flow;
 	const char *text;
 	bidi_data data;
+	fz_bidi_direction dirn;
 
-	/* Stage 1: Gather the text from the flow up into a single buffer */
-	buffer->len = 0;
 	while (end)
 	{
-		size_t len;
-		int broken = 0;
+		dirn = BIDI_NEUTRAL;
 
-		switch (end->type)
+		/* Gather the text from the flow up into a single buffer (at
+		 * least, as much of it as has the same direction markup). */
+		buffer->len = 0;
+		while (end && dirn_matches(dirn, end->markup_r2l))
 		{
-		case FLOW_WORD:
-			len = utf8len(end->content.text);
-			text = end->content.text;
-			break;
-		case FLOW_GLUE:
-			len = 1;
-			text = " ";
-			break;
-		case FLOW_BREAK:
-		case FLOW_IMAGE:
-			broken = 1;
-			break;
-		}
+			size_t len;
+			int broken = 0;
 
-		if (broken)
-			break;
+			dirn = end->markup_r2l;
 
-		/* Make sure the buffer is large enough */
-		if (buffer->len + len > buffer->cap)
-		{
-			size_t newcap = buffer->cap * 2;
-			if (newcap == 0)
-				newcap = 128; /* Sensible small default */
-			buffer->data = fz_resize_array(ctx, buffer->data, newcap, sizeof(uint32_t));
-			buffer->cap = newcap;
+			switch (end->type)
+			{
+			case FLOW_WORD:
+				len = utf8len(end->content.text);
+				text = end->content.text;
+				break;
+			case FLOW_GLUE:
+				len = 1;
+				text = " ";
+				break;
+			case FLOW_BREAK:
+			case FLOW_IMAGE:
+				broken = 1;
+				break;
+			}
+
+			if (broken)
+				break;
+
+			/* Make sure the buffer is large enough */
+			if (buffer->len + len > buffer->cap)
+			{
+				size_t newcap = buffer->cap * 2;
+				if (newcap == 0)
+					newcap = 128; /* Sensible small default */
+				buffer->data = fz_resize_array(ctx, buffer->data, newcap, sizeof(uint32_t));
+				buffer->cap = newcap;
+			}
+
+			/* Expand the utf8 text into Unicode and store it in the buffer */
+			while (*text)
+			{
+				int rune;
+				text += fz_chartorune(&rune, text);
+				buffer->data[buffer->len++] = rune;
+			}
+
+			end = end->next;
 		}
 
-		/* Expand the utf8 text into Unicode and store it in the buffer */
-		while (*text)
+		/* Detect directionality for the buffer */
+		data.ctx = ctx;
+		data.pool = pool;
+		data.flow = flow;
+		data.buffer = buffer;
+		fz_bidi_fragment_text(ctx, buffer->data, buffer->len, &dirn, &newFragCb, &data, 0 /* Flags */);
+
+		/* Set the default flow of the box to be the first non NEUTRAL thing we find */
+		if (*baseDir == BIDI_NEUTRAL)
 		{
-			int rune;
-			text += fz_chartorune(&rune, text);
-			buffer->data[buffer->len++] = rune;
+			*baseDir = dirn;
 		}
-
-		end = end->next;
 	}
-
-	/* Detect directionality for the buffer */
-	data.ctx = ctx;
-	data.pool = pool;
-	data.flow = flow;
-	data.buffer = buffer;
-	fz_bidi_fragment_text(ctx, buffer->data, buffer->len, baseDir, &newFragCb, &data, 0 /* Flags */);
 }
 
 static void
author	Robin Watts <robin.watts@artifex.com>	2016-01-29 14:10:53 +0000
committer	Robin Watts <robin.watts@artifex.com>	2016-02-04 13:21:11 +0000
commit	d96bd69b94c12906473a4721c2c2dc5941923253 (patch)
tree	6101971a4f96dbb5a6c4d189e1dd8ae5cf62cbcf
parent	5d840271f62c5a51bb83d561181de860086bb6be (diff)
download	mupdf-d96bd69b94c12906473a4721c2c2dc5941923253.tar.xz