Rejig Bidirectional and Text code.

We move to using bidirectional "levels" throughout. This should give us better behaviour vis-a-vis nested l2r/l2r text. This also allows us to carry xps levels throughout with no loss of information. This also avoids the need to special case numbers. We accordingly carry more information into fz_text. As well as wmode, also hold additional details about the text spans. We now include the directionality of the bidi level text (either as derived from bidi code, or from the original document (e.g. xps)), the directionality of text (as specified in the original document (e.g. html)), and the language of the text (if specified in the original document).
author: Robin Watts <Robin.Watts@artifex.com> 2016-03-02 08:03:53 -0800
committer: Robin Watts <robin.watts@artifex.com> 2016-03-11 11:57:48 +0000
commit: a3785935df081674d048655048984bcba09f8387 (patch)
tree: 31f6a63292d9f3d11be9c4b7003001c700b9b3c5
parent: c5b80367dfcd3d3df09068b7f31119a400cfe241 (diff)
download: mupdf-a3785935df081674d048655048984bcba09f8387.tar.xz
11 files changed, 165 insertions, 221 deletions
diff --git a/include/mupdf/fitz/bidi.h b/include/mupdf/fitz/bidi.h
index 8428ffc1..f458c498 100644
--- a/include/mupdf/fitz/bidi.h
+++ b/include/mupdf/fitz/bidi.h
@@ -49,18 +49,16 @@ enum
  *
  * @param	fragment	first character in fragment
  * @param	fragmentLen	number of characters in fragment
- * @param	block_r2l	true if block should concatenate with other blocks
- *				as right-to-left
- * @param	char_r2l	true if characters within block should be laid out
- *				as right-to-left
+ * @param	bidiLevel	The bidirectional level for this text. The bottom bit
+ *				will be set iff block should concatenate with other
+ *				blocks as right-to-left
  * @param       script          the script in use for this fragment (other than common
  *                              or inherited)
  * @param	arg		data from caller of Bidi_fragmentText
  */
 typedef void (fz_bidi_fragment_callback)(const uint32_t *fragment,
 					size_t fragmentLen,
-					int block_r2l,
-					int char_r2l,
+					int bidiLevel,
 					int script,
 					void *arg);
 
@@ -72,9 +70,9 @@ typedef void (fz_bidi_fragment_callback)(const uint32_t *fragment,
  *			0123456789
  *			rrlllrrrrr,
  * we'll invoke callback with:
- *			&text[0], length == 2, rightToLeft ==  true
- *			&text[2], length == 3, rightToLeft == false
- *			&text[5], length == 5, rightToLeft ==  true.
+ *			&text[0], length == 2
+ *			&text[2], length == 3
+ *			&text[5], length == 5
  *
  * @param[in] text	start of Unicode sequence
  * @param[in] textlen   number of Unicodes to analyse
diff --git a/include/mupdf/fitz/text.h b/include/mupdf/fitz/text.h
index 2d96ce02..682517c4 100644
--- a/include/mupdf/fitz/text.h
+++ b/include/mupdf/fitz/text.h
@@ -29,11 +29,28 @@ struct fz_text_item_s
 	int ucs; /* -1 for one ucs to many gid mappings */
 };
 
+typedef enum fz_text_direction_e
+{
+	/* There are various possible 'directions' for text */
+	FZ_DIR_UNSET = 0,	/* Unset (or Neutral). All PDF text is sent as this. */
+	FZ_DIR_R2L = 1,		/* Text is r2l */
+	FZ_DIR_L2R = 2		/* Text is l2r */
+} fz_text_direction;
+
+typedef enum fz_text_language_e
+{
+	fz_lang_unset = 0
+	/* FIXME: Fill in more */
+} fz_text_language;
+
 struct fz_text_span_s
 {
 	fz_font *font;
 	fz_matrix trm;
-	int wmode;
+	int wmode : 1;		/* 0 horizontal, 1 vertical */
+	int bidi_level : 7;	/* The bidirectional level of text */
+	int markup_dir : 2;	/* The direction of text as marked in the original document */
+	int language : 8;	/* The language as marked in the original document */
 	int len, cap;
 	fz_text_item *items;
 	fz_text_span *next;
@@ -49,8 +66,8 @@ fz_text *fz_new_text(fz_context *ctx);
 fz_text *fz_keep_text(fz_context *ctx, const fz_text *text);
 void fz_drop_text(fz_context *ctx, const fz_text *text);
 
-void fz_show_glyph(fz_context *ctx, fz_text *text, fz_font *font, const fz_matrix *trm, int glyph, int unicode, int wmode);
-void fz_show_string(fz_context *ctx, fz_text *text, fz_font *font, fz_matrix *trm, const char *s, int wmode);
+void fz_show_glyph(fz_context *ctx, fz_text *text, fz_font *font, const fz_matrix *trm, int glyph, int unicode, int wmode, int bidi_level, fz_text_direction markup_dir, fz_text_language language);
+void fz_show_string(fz_context *ctx, fz_text *text, fz_font *font, fz_matrix *trm, const char *s, int wmode, int bidi_level, fz_text_direction markup_dir, fz_text_language language);
 fz_rect *fz_bound_text(fz_context *ctx, const fz_text *text, const fz_stroke_state *stroke, const fz_matrix *ctm, fz_rect *r);
 
 fz_text *fz_clone_text(fz_context *ctx, const fz_text *text);
diff --git a/include/mupdf/html.h b/include/mupdf/html.h
index c3668501..be9733e9 100644
--- a/include/mupdf/html.h
+++ b/include/mupdf/html.h
@@ -185,7 +185,7 @@ struct fz_html_s
 	float em;
 	fz_html *up, *down, *last, *next;
 	fz_html_flow *flow_head, **flow_tail;
-	fz_bidi_direction flow_dir;
+	int flow_dir;
 	fz_css_style style;
 	int list_item;
 	int is_first_flow; /* for text-indent */
@@ -202,25 +202,6 @@ enum
 	FLOW_SHYPHEN = 5
 };
 
-/* We have to recognise the distinction between render direction
- * and layout direction. For most strings render direction and
- * logical direction are the same.
- *
- * Char direction determines whether a string 'ABC' appears as
- * ABC or CBA.
- *
- * Block direction determines how fragments are attached together.
- * 'ABC' and 'DEF' with r2l char and block directions will
- * appear as 'FEDCBA'. With l2r char and block it will appear
- * as 'ABCDEF'.
- *
- * The reason for the distinction is that we can have logical
- * strings like 'ABC0123DEF', where 'ABC' and 'DEF' are in r2l
- * scripts. The bidirectional code breaks this down into 3 fragments
- * 'ABC' '0123' 'DEF', where all three are r2l, but digits need to
- * be rendered left to right. i.e. the desired result is:
- * FED0123CBA, rather than FED3210CBA.
- */
 struct fz_html_flow_s
 {
 	/* What type of node */
@@ -229,14 +210,11 @@ struct fz_html_flow_s
 	/* Whether this should expand during justification */
 	unsigned int expand : 1;
 
-	/* Whether the chars should be laid out r2l or l2r */
-	unsigned int char_r2l : 1;
+	/* Direction setting for text - UAX#9 says 125 is the max */
+	unsigned int bidi_level : 7;
 
-	/* Whether this block should stack with its neighbours r2l or l2r */
-	unsigned int block_r2l : 1;
-
-	/* Whether the markup specifies a given direction. */
-	unsigned int markup_r2l : 2;
+	/* Direction for text set in original document */
+	unsigned int markup_dir : 2;
 
 	/* Whether the markup specifies a given language. */
 	unsigned int markup_lang : 8;
diff --git a/source/fitz/bidi.c b/source/fitz/bidi.c
index 74548d4d..979d2f1c 100644
--- a/source/fitz/bidi.c
+++ b/source/fitz/bidi.c
@@ -170,47 +170,11 @@ static fz_bidi_chartype class_from_ch_n(uint32_t ch)
 	return from_ch_ws;
 }
 
-static int
-is_european_number(const uint32_t *str, unsigned int len)
-{
-	const uint32_t *end = str + len;
-
-	for ( ; str != end; str++)
-	{
-		const uint32_t u = *str;
-		if ((u >= UNICODE_RTL_START && u < UNICODE_ARABIC_INDIC_DIGIT_ZERO) ||
-			(u > UNICODE_ARABIC_INDIC_DIGIT_NINE && u < UNICODE_EXTENDED_ARABIC_INDIC_DIGIT_ZERO) ||
-			(u > UNICODE_EXTENDED_ARABIC_INDIC_DIGIT_NINE && u <= UNICODE_RTL_END))
-		{
-			/* This is just a normal RTL character or accent */
-			return FALSE;
-		}
-		else if (!((u >= UNICODE_DIGIT_ZERO && u <= UNICODE_DIGIT_NINE) ||
-			(u == UNICODE_SUPERSCRIPT_TWO) ||
-			(u == UNICODE_SUPERSCRIPT_THREE) ||
-			(u == UNICODE_SUPERSCRIPT_ONE) ||
-			(u >= UNICODE_ARABIC_INDIC_DIGIT_ZERO && u <= UNICODE_ARABIC_INDIC_DIGIT_NINE) ||
-			(u >= UNICODE_EXTENDED_ARABIC_INDIC_DIGIT_ZERO && u <= UNICODE_EXTENDED_ARABIC_INDIC_DIGIT_NINE) ||
-			(u == UNICODE_SUPERSCRIPT_ZERO) ||
-			(u >= UNICODE_SUPERSCRIPT_FOUR && u <= UNICODE_SUPERSCRIPT_NINE) ||
-			(u >= UNICODE_SUBSCRIPT_ZERO && u <= UNICODE_SUBSCRIPT_NINE) ||
-			(u >= UNICODE_CIRCLED_DIGIT_ONE && u <= UNICODE_NUMBER_TWENTY_FULL_STOP) ||
-			(u == UNICODE_CIRCLED_DIGIT_ZERO) ||
-			(u >= UNICODE_FULLWIDTH_DIGIT_ZERO && u <= UNICODE_FULLWIDTH_DIGIT_NINE) ||
-			(u == UNICODE_ZERO_WIDTH_NON_JOINER)))
-		{
-			return FALSE;
-		}
-	}
-	return TRUE;
-}
-
 /* Split fragments into single scripts (or punctation + single script) */
 static void
 split_at_script(const uint32_t *fragment,
 		size_t fragment_len,
-		int block_r2l,
-		int char_r2l,
+		int level,
 		void *arg,
 		fz_bidi_fragment_callback *callback)
 {
@@ -237,53 +201,17 @@ split_at_script(const uint32_t *fragment,
 		else
 		{
 			/* Change of script. Break the fragment. */
-			(*callback)(&fragment[script_start], i - script_start, block_r2l, char_r2l, script, arg);
+			(*callback)(&fragment[script_start], i - script_start, level, script, arg);
 			script_start = i+1;
 			script = s;
 		}
 	}
 	if (script_start != fragment_len)
 	{
-		(*callback)(&fragment[script_start], fragment_len - script_start, block_r2l, char_r2l, script, arg);
+		(*callback)(&fragment[script_start], fragment_len - script_start, level, script, arg);
 	}
 }
 
-static void
-detect_numbers(const uint32_t *fragment,
-		size_t fragment_len,
-		size_t start,
-		size_t end,
-		const fz_bidi_level *levels,
-		void *arg,
-		fz_bidi_fragment_callback *callback)
-{
-	int block_r2l = ODD(levels[start]);
-	int char_r2l = block_r2l;
-
-	/* Check to see if we've got a number. Numbers should
-	 * never be block_r2l, so we can avoid the test. */
-	if (block_r2l || !is_european_number(&fragment[start], end-start))
-	{
-		/* No number, just split as normal */
-		split_at_script(&fragment[start],
-				end-start,
-				block_r2l,
-				char_r2l,
-				arg,
-				callback);
-		return;
-	}
-
-	/* We have a number. We have to check to see whether this
-	 * should be handled as a block_r2l thing. */
-	if (start != 0)
-		block_r2l = ODD(levels[start-1]);
-	if (block_r2l && end != fragment_len)
-		block_r2l = ODD(levels[end]);
-
-	split_at_script(&fragment[start], end-start, block_r2l, char_r2l, arg, callback);
-}
-
 /* Determines the character classes for all following
  * passes of the algorithm. A character class is basically the type of Bidi
  * behaviour that the character exhibits.
@@ -614,11 +542,9 @@ void fz_bidi_fragment_text(fz_context *ctx,
 				 * Create a text object for it, then start
 				 * a new fragment.
 				 */
-				detect_numbers(text,
-						textlen,
-						startOfFragment,
-						i,
-						levels,
+				split_at_script(&text[startOfFragment],
+						i - startOfFragment,
+						levels[startOfFragment],
 						arg,
 						callback);
 				startOfFragment = i;
@@ -626,11 +552,9 @@ void fz_bidi_fragment_text(fz_context *ctx,
 		}
 		/* Now i == textlen. Deal with the final (or maybe only) fragment. */
 		/* otherwise create 1 fragment */
-		detect_numbers(text,
-				textlen,
-				startOfFragment,
-				i,
-				levels,
+		split_at_script(&text[startOfFragment],
+				i - startOfFragment,
+				levels[startOfFragment],
 				arg,
 				callback);
 	}
diff --git a/source/fitz/font.c b/source/fitz/font.c
index 1bac6efc..8f598be1 100644
--- a/source/fitz/font.c
+++ b/source/fitz/font.c
@@ -1442,6 +1442,8 @@ fz_encode_character(fz_context *ctx, fz_font *font, int ucs)
 	return ucs;
 }
 
+/* FIXME: This should take language too eventually, to allow for fonts where we can select different
+ * languages using opentype features. */
 int
 fz_encode_character_with_fallback(fz_context *ctx, fz_font *user_font, int unicode, int script, fz_font **out_font)
 {
diff --git a/source/fitz/text.c b/source/fitz/text.c
index 735b3a5b..29a4506e 100644
--- a/source/fitz/text.c
+++ b/source/fitz/text.c
@@ -37,11 +37,14 @@ fz_drop_text(fz_context *ctx, const fz_text *textc)
 }
 
 static fz_text_span *
-fz_new_text_span(fz_context *ctx, fz_font *font, int wmode, const fz_matrix *trm)
+fz_new_text_span(fz_context *ctx, fz_font *font, int wmode, int bidi_level, fz_text_direction markup_dir, fz_text_language language, const fz_matrix *trm)
 {
 	fz_text_span *span = fz_malloc_struct(ctx, fz_text_span);
 	span->font = fz_keep_font(ctx, font);
 	span->wmode = wmode;
+	span->bidi_level = bidi_level;
+	span->markup_dir = markup_dir;
+	span->language = language;
 	span->trm = *trm;
 	span->trm.e = 0;
 	span->trm.f = 0;
@@ -49,20 +52,23 @@ fz_new_text_span(fz_context *ctx, fz_font *font, int wmode, const fz_matrix *trm
 }
 
 static fz_text_span *
-fz_add_text_span(fz_context *ctx, fz_text *text, fz_font *font, int wmode, const fz_matrix *trm)
+fz_add_text_span(fz_context *ctx, fz_text *text, fz_font *font, int wmode, int bidi_level, fz_text_direction markup_dir, fz_text_language language, const fz_matrix *trm)
 {
 	if (!text->tail)
 	{
-		text->head = text->tail = fz_new_text_span(ctx, font, wmode, trm);
+		text->head = text->tail = fz_new_text_span(ctx, font, wmode, bidi_level, markup_dir, language, trm);
 	}
 	else if (text->tail->font != font ||
 		text->tail->wmode != wmode ||
+		text->tail->bidi_level != bidi_level ||
+		text->tail->markup_dir != markup_dir ||
+		text->tail->language != language ||
 		text->tail->trm.a != trm->a ||
 		text->tail->trm.b != trm->b ||
 		text->tail->trm.c != trm->c ||
 		text->tail->trm.d != trm->d)
 	{
-		text->tail = text->tail->next = fz_new_text_span(ctx, font, wmode, trm);
+		text->tail = text->tail->next = fz_new_text_span(ctx, font, wmode, bidi_level, markup_dir, language, trm);
 	}
 	return text->tail;
 }
@@ -80,14 +86,14 @@ fz_grow_text_span(fz_context *ctx, fz_text_span *span, int n)
 }
 
 void
-fz_show_glyph(fz_context *ctx, fz_text *text, fz_font *font, const fz_matrix *trm, int gid, int ucs, int wmode)
+fz_show_glyph(fz_context *ctx, fz_text *text, fz_font *font, const fz_matrix *trm, int gid, int ucs, int wmode, int bidi_level, fz_text_direction markup_dir, fz_text_language lang)
 {
 	fz_text_span *span;
 
 	if (text->refs != 1)
 		fz_throw(ctx, FZ_ERROR_GENERIC, "cannot modify shared text objects");
 
-	span = fz_add_text_span(ctx, text, font, wmode, trm);
+	span = fz_add_text_span(ctx, text, font, wmode, bidi_level, markup_dir, lang, trm);
 
 	fz_grow_text_span(ctx, span, 1);
 
@@ -99,7 +105,7 @@ fz_show_glyph(fz_context *ctx, fz_text *text, fz_font *font, const fz_matrix *tr
 }
 
 void
-fz_show_string(fz_context *ctx, fz_text *text, fz_font *user_font, fz_matrix *trm, const char *s, int wmode)
+fz_show_string(fz_context *ctx, fz_text *text, fz_font *user_font, fz_matrix *trm, const char *s, int wmode, int bidi_level, fz_text_direction markup_dir, fz_text_language language)
 {
 	fz_font *font;
 	int gid, ucs;
@@ -109,7 +115,7 @@ fz_show_string(fz_context *ctx, fz_text *text, fz_font *user_font, fz_matrix *tr
 	{
 		s += fz_chartorune(&ucs, s);
 		gid = fz_encode_character_with_fallback(ctx, user_font, ucs, 0, &font);
-		fz_show_glyph(ctx, text, font, trm, gid, ucs, wmode);
+		fz_show_glyph(ctx, text, font, trm, gid, ucs, wmode, bidi_level, markup_dir, language);
 		adv = fz_advance_glyph(ctx, font, gid, wmode);
 		if (wmode == 0)
 			fz_pre_translate(trm, adv, 0);
diff --git a/source/html/html-layout.c b/source/html/html-layout.c
index 2255cb83..a74354d1 100644
--- a/source/html/html-layout.c
+++ b/source/html/html-layout.c
@@ -106,9 +106,8 @@ static fz_html_flow *add_flow(fz_context *ctx, fz_pool *pool, fz_html *top, fz_c
 	fz_html_flow *flow = fz_pool_alloc(ctx, pool, sizeof *flow);
 	flow->type = type;
 	flow->expand = 0;
-	flow->char_r2l = BIDI_LEFT_TO_RIGHT;
-	flow->block_r2l = BIDI_LEFT_TO_RIGHT;
-	flow->markup_r2l = BIDI_NEUTRAL;
+	flow->bidi_level = 0;
+	flow->markup_dir = FZ_DIR_UNSET;
 	flow->breaks_line = 0;
 	flow->style = style;
 	*top->flow_tail = flow;
@@ -381,7 +380,7 @@ static void init_box(fz_context *ctx, fz_html *box)
 
 	box->flow_head = NULL;
 	box->flow_tail = &box->flow_head;
-	box->flow_dir = BIDI_NEUTRAL;
+	box->flow_dir = FZ_DIR_UNSET;
 
 	fz_default_css_style(ctx, &box->style);
 }
@@ -757,7 +756,7 @@ static void measure_string(fz_context *ctx, fz_html_flow *node, float em, hb_buf
 	node->h = fz_from_css_number_scale(node->style->line_height, em, em, em);
 
 	s = get_node_text(ctx, node);
-	init_string_walker(ctx, &walker, hb_buf, node->char_r2l, node->style->font, node->script, s);
+	init_string_walker(ctx, &walker, hb_buf, node->bidi_level & 1, node->style->font, node->script, s);
 	while (walk_string(&walker))
 	{
 		max_x = 0;
@@ -810,31 +809,86 @@ static void layout_line(fz_context *ctx, float indent, float page_w, float line_
 	float slop = page_w - line_w;
 	float justify = 0;
 	float va;
-	int n = 0;
-	fz_html_flow *node = start;
-	fz_html_flow *mid;
+	int n, i;
+	fz_html_flow *node;
+	fz_html_flow **reorder;
+	unsigned int min_level, max_level;
+
+	/* Count the number of nodes on the line */
+	for(i = 0, n = 0, node = start; node != end; node = node->next)
+	{
+		n++;
+		if (node->type == FLOW_SPACE && node->expand && !node->breaks_line)
+			i++;
+	}
 
 	if (align == TA_JUSTIFY)
 	{
-		fz_html_flow *it;
-		for (it = node; it != end; it = it->next)
-			if (it->type == FLOW_SPACE && it->expand && !it->breaks_line)
-				++n;
-		justify = slop / n;
+		justify = slop / i;
 	}
 	else if (align == TA_RIGHT)
 		x += slop;
 	else if (align == TA_CENTER)
 		x += slop / 2;
 
-	/* The line data as supplied is start...end. */
-	/* We have the invariants that 1) start...mid are always laid out
-	 * correctly and 2) mid..node are the most recent set of right to left
-	 * blocks. */
-	mid = start;
-	while (node != end)
+	/* We need a block to hold the node pointers while we reorder */
+	reorder = fz_malloc_array(ctx, n, sizeof(*reorder));
+	min_level = start->bidi_level;
+	max_level = start->bidi_level;
+	for(i = 0, node = start; node != end; i++, node = node->next)
 	{
-		float w = node->w;
+		reorder[i] = node;
+		if (node->bidi_level < min_level)
+			min_level = node->bidi_level;
+		if (node->bidi_level > max_level)
+			max_level = node->bidi_level;
+	}
+
+	/* Do we need to do any reordering? */
+	if (min_level != max_level || (min_level & 1))
+	{
+		/* The lowest level we swap is always a r2l one */
+		min_level |= 1;
+		/* Each time around the loop we swap runs of fragments that have
+		 * levels >= max_level (and decrement max_level). */
+		do
+		{
+			int start = 0;
+			int end;
+			do
+			{
+				/* Skip until we find a level that's >= max_level */
+				while (start < n && reorder[start]->bidi_level < max_level)
+					start++;
+				/* If start >= n-1 then no more runs. */
+				if (start >= n-1)
+					break;
+				/* Find the end of the match */
+				i = start+1;
+				while (i < n && reorder[i]->bidi_level >= max_level)
+					i++;
+				/* Reverse from start to i-1 */
+				end = i-1;
+				while (start < end)
+				{
+					fz_html_flow *t = reorder[start];
+					reorder[start++] = reorder[end];
+					reorder[end--] = t;
+				}
+				start = i+1;
+			}
+			while (start < n);
+			max_level--;
+		}
+		while (max_level >= min_level);
+	}
+
+	for (i = 0; i < n; i++)
+	{
+		float w;
+
+		node = reorder[i];
+		w = node->w;
 
 		if (node->type == FLOW_SPACE && node->breaks_line)
 			w = 0;
@@ -845,30 +899,7 @@ static void layout_line(fz_context *ctx, float indent, float page_w, float line_
 		else if (node->type == FLOW_SHYPHEN && node->breaks_line)
 			w = node->w;
 
-		if (node->block_r2l)
-		{
-			float old_x = x;
-			if (mid != node)
-			{
-				/* We have met a r2l block, and have just had at least
-				 * one other r2l block. Move all the r2l blocks that
-				 * we've just had further right, and position this one
-				 * on the left. */
-				fz_html_flow *temp = mid;
-				while (temp != node)
-				{
-					old_x = temp->x;
-					temp->x += w;
-					temp = temp->next;
-				}
-			}
-			node->x = old_x;
-		}
-		else
-		{
-			node->x = x;
-			mid = node->next;
-		}
+		node->x = x;
 		x += w;
 
 		switch (node->style->vertical_align)
@@ -895,8 +926,9 @@ static void layout_line(fz_context *ctx, float indent, float page_w, float line_
 			node->y = y + baseline - node->h;
 		else
 			node->y = y + baseline + va;
-		node = node->next;
 	}
+
+	fz_free(ctx, reorder);
 }
 
 static void find_accumulated_margins(fz_context *ctx, fz_html *box, float *w, float *h)
@@ -933,7 +965,7 @@ static void layout_flow(fz_context *ctx, fz_html *box, fz_html *top, float em, f
 	indent = box->is_first_flow ? fz_from_css_number(top->style.text_indent, em, top->w) : 0;
 	align = top->style.text_align;
 
-	if (box->flow_dir == BIDI_RIGHT_TO_LEFT)
+	if (box->flow_dir == FZ_DIR_R2L)
 	{
 		if (align == TA_LEFT)
 			align = TA_RIGHT;
@@ -1197,7 +1229,7 @@ static void draw_flow_box(fz_context *ctx, fz_html *box, float page_top, float p
 			w = node->w;
 
 			s = get_node_text(ctx, node);
-			init_string_walker(ctx, &walker, hb_buf, node->char_r2l, node->style->font, node->script, s);
+			init_string_walker(ctx, &walker, hb_buf, node->bidi_level & 1, node->style->font, node->script, s);
 			while (walk_string(&walker))
 			{
 				const char *t;
@@ -1248,7 +1280,7 @@ static void draw_flow_box(fz_context *ctx, fz_html *box, float page_top, float p
 					ly += p->y_advance;
 				}
 
-				if (node->char_r2l)
+				if (node->bidi_level & 1)
 				{
 					w -= lx * node_scale;
 					for (gp = 0; gp < walker.glyph_count; gp++)
@@ -1282,7 +1314,9 @@ static void draw_flow_box(fz_context *ctx, fz_html *box, float page_top, float p
 								continue;
 							trm.e = *(float *)&p->x_offset;
 							trm.f = *(float *)&p->y_offset;
-							fz_show_glyph(ctx, text, walker.font, &trm, g->codepoint, c, 0);
+							fz_show_glyph(ctx, text, walker.font, &trm, g->codepoint, c, 0,
+								node->bidi_level, node->markup_dir,
+								node->markup_lang);
 							break;
 						}
 						if (gp == walker.glyph_count)
@@ -1291,7 +1325,7 @@ static void draw_flow_box(fz_context *ctx, fz_html *box, float page_top, float p
 							 * because we've been shaped away into another. We can't afford
 							 * to just drop the codepoint as this will upset text extraction.
 							 */
-							fz_show_glyph(ctx, text, walker.font, &trm, -1, c, 0);
+							fz_show_glyph(ctx, text, walker.font, &trm, -1, c, 0, node->bidi_level, node->markup_dir, node->markup_lang);
 						}
 						else
 						{
@@ -1305,7 +1339,7 @@ static void draw_flow_box(fz_context *ctx, fz_html *box, float page_top, float p
 									continue;
 								trm.e = *(float *)&p->x_offset;
 								trm.f = *(float *)&p->y_offset;
-								fz_show_glyph(ctx, text, walker.font, &trm, g->codepoint, -1, 0);
+								fz_show_glyph(ctx, text, walker.font, &trm, g->codepoint, -1, 0, node->bidi_level, node->markup_dir, node->markup_lang);
 							}
 						}
 						idx += l;
@@ -1494,7 +1528,7 @@ static void draw_list_mark(fz_context *ctx, fz_html *box, float page_top, float
 	{
 		s += fz_chartorune(&c, s);
 		g = fz_encode_character_with_fallback(ctx, box->style.font, c, UCDN_SCRIPT_LATIN, &font);
-		fz_show_glyph(ctx, text, font, &trm, g, c, 0);
+		fz_show_glyph(ctx, text, font, &trm, g, c, 0, 0, FZ_DIR_UNSET, fz_lang_unset);
 		trm.e += fz_advance_glyph(ctx, font, g, 0) * box->em;
 	}
 
@@ -1817,22 +1851,15 @@ typedef struct
 	uni_buf *buffer;
 } bidi_data;
 
-static void newFragCb(const uint32_t *fragment,
+static void fragment_cb(const uint32_t *fragment,
 			size_t fragment_len,
-			int block_r2l,
-			int char_r2l,
+			int bidi_level,
 			int script,
 			void *arg)
 {
 	bidi_data *data = (bidi_data *)arg;
 	size_t fragment_offset = fragment - data->buffer->data;
 
-	/* The Picsel code used to (effectively) do:
-	 * if (fragment_offset == 0) char_r2l = block_r2l;
-	 * but that makes no sense to me. All that could do is stop
-	 * a european number being treated as l2r because it was the
-	 * first thing on a line. */
-
 	/* We are guaranteed that fragmentOffset will be at the beginning
 	 * of flow. */
 	while (fragment_len > 0)
@@ -1861,8 +1888,7 @@ static void newFragCb(const uint32_t *fragment,
 		}
 
 		/* This flow box is entirely contained within this fragment. */
-		data->flow->block_r2l = block_r2l;
-		data->flow->char_r2l = char_r2l;
+		data->flow->bidi_level = bidi_level;
 		data->flow->script = script;
 		data->flow = data->flow->next;
 		fragment_offset += len;
@@ -1870,34 +1896,31 @@ static void newFragCb(const uint32_t *fragment,
 	}
 }
 
-static int
-dirn_matches(int dirn, int dirn2)
-{
-	return (dirn == BIDI_NEUTRAL || dirn2 == BIDI_NEUTRAL || dirn == dirn2);
-}
-
 static void
-detect_flow_directionality(fz_context *ctx, fz_pool *pool, uni_buf *buffer, fz_bidi_direction *baseDir, fz_html_flow *flow)
+detect_flow_directionality(fz_context *ctx, fz_pool *pool, uni_buf *buffer, int baseDir, fz_html_flow *flow)
 {
 	fz_html_flow *end = flow;
 	const char *text;
 	bidi_data data;
-	fz_bidi_direction dirn;
+	fz_bidi_direction bidi_dir = BIDI_NEUTRAL;
+
+	if (baseDir == FZ_DIR_L2R)
+		bidi_dir = BIDI_LEFT_TO_RIGHT;
+	else if (baseDir == FZ_DIR_R2L)
+		bidi_dir = BIDI_RIGHT_TO_LEFT;
 
 	while (end)
 	{
-		dirn = BIDI_NEUTRAL;
+		int level = end->bidi_level;
 
 		/* Gather the text from the flow up into a single buffer (at
 		 * least, as much of it as has the same direction markup). */
 		buffer->len = 0;
-		while (end && dirn_matches(dirn, end->markup_r2l))
+		while (end && (level & 1) == (end->bidi_level & 1))
 		{
 			size_t len;
 			int broken = 0;
 
-			dirn = end->markup_r2l;
-
 			switch (end->type)
 			{
 			case FLOW_WORD:
@@ -1948,13 +1971,7 @@ detect_flow_directionality(fz_context *ctx, fz_pool *pool, uni_buf *buffer, fz_b
 		data.pool = pool;
 		data.flow = flow;
 		data.buffer = buffer;
-		fz_bidi_fragment_text(ctx, buffer->data, buffer->len, &dirn, &newFragCb, &data, 0 /* Flags */);
-
-		/* Set the default flow of the box to be the first non NEUTRAL thing we find */
-		if (*baseDir == BIDI_NEUTRAL)
-		{
-			*baseDir = dirn;
-		}
+		fz_bidi_fragment_text(ctx, buffer->data, buffer->len, &bidi_dir, &fragment_cb, &data, 0 /* Flags */);
 	}
 }
 
@@ -1964,7 +1981,7 @@ detect_box_directionality(fz_context *ctx, fz_pool *pool, uni_buf *buffer, fz_ht
 	while (box)
 	{
 		if (box->flow_head)
-			detect_flow_directionality(ctx, pool, buffer, &box->flow_dir, box->flow_head);
+			detect_flow_directionality(ctx, pool, buffer, box->flow_dir, box->flow_head);
 		detect_box_directionality(ctx, pool, buffer, box->down);
 		box = box->next;
 	}
diff --git a/source/pdf/pdf-appearance.c b/source/pdf/pdf-appearance.c
index 375b3b1d..9d1442e2 100644
--- a/source/pdf/pdf-appearance.c
+++ b/source/pdf/pdf-appearance.c
@@ -1918,7 +1918,7 @@ static void add_text(fz_context *ctx, font_info *font_rec, fz_text *text, char *
 		str += n;
 		str_len -= n;
 		gid = fz_encode_character(ctx, font, ucs);
-		fz_show_glyph(ctx, text, font, &tm, gid, ucs, 0);
+		fz_show_glyph(ctx, text, font, &tm, gid, ucs, 0, 0, FZ_DIR_UNSET, fz_lang_unset);
 		tm.e += fz_advance_glyph(ctx, font, gid, 0) * font_rec->da_rec.font_size;
 	}
 }
diff --git a/source/pdf/pdf-op-run.c b/source/pdf/pdf-op-run.c
index 8c9f0639..03bca556 100644
--- a/source/pdf/pdf-op-run.c
+++ b/source/pdf/pdf-op-run.c
@@ -943,11 +943,11 @@ pdf_show_char(fz_context *ctx, pdf_run_processor *pr, int cid)
 	fz_union_rect(&pr->text_bbox, &bbox);
 
 	/* add glyph to textobject */
-	fz_show_glyph(ctx, pr->text, fontdesc->font, &trm, gid, ucsbuf[0], fontdesc->wmode);
+	fz_show_glyph(ctx, pr->text, fontdesc->font, &trm, gid, ucsbuf[0], fontdesc->wmode, 0, FZ_DIR_UNSET, fz_lang_unset);
 
 	/* add filler glyphs for one-to-many unicode mapping */
 	for (i = 1; i < ucslen; i++)
-		fz_show_glyph(ctx, pr->text, fontdesc->font, &trm, -1, ucsbuf[i], fontdesc->wmode);
+		fz_show_glyph(ctx, pr->text, fontdesc->font, &trm, -1, ucsbuf[i], fontdesc->wmode, 0, FZ_DIR_UNSET, fz_lang_unset);
 
 	if (fontdesc->wmode == 0)
 	{
diff --git a/source/tools/murun.c b/source/tools/murun.c
index 29e52578..8ff59be7 100644
--- a/source/tools/murun.c
+++ b/source/tools/murun.c
@@ -1935,7 +1935,7 @@ static void ffi_Text_showGlyph(js_State *J)
 	int wmode = js_isdefined(J, 5) ? js_toboolean(J, 5) : 0;
 
 	fz_try(ctx)
-		fz_show_glyph(ctx, text, font, &trm, glyph, unicode, wmode);
+		fz_show_glyph(ctx, text, font, &trm, glyph, unicode, wmode, 0, FZ_DIR_UNSET, fz_lang_unset);
 	fz_catch(ctx)
 		rethrow(J);
 }
@@ -1950,7 +1950,7 @@ static void ffi_Text_showString(js_State *J)
 	int wmode = js_isdefined(J, 4) ? js_toboolean(J, 4) : 0;
 
 	fz_try(ctx)
-		fz_show_string(ctx, text, font, &trm, s, wmode);
+		fz_show_string(ctx, text, font, &trm, s, wmode, 0, FZ_DIR_UNSET, fz_lang_unset);
 	fz_catch(ctx)
 		rethrow(J);
 
diff --git a/source/xps/xps-glyphs.c b/source/xps/xps-glyphs.c
index 624276f8..91c53743 100644
--- a/source/xps/xps-glyphs.c
+++ b/source/xps/xps-glyphs.c
@@ -408,6 +408,7 @@ xps_parse_glyphs_imp(fz_context *ctx, xps_document *doc, const fz_matrix *ctm,
 			float u_offset = 0;
 			float v_offset = 0;
 			float advance;
+			int dir;
 
 			if (is && *is)
 				is = xps_parse_glyph_index(is, &glyph_index);
@@ -450,7 +451,8 @@ xps_parse_glyphs_imp(fz_context *ctx, xps_document *doc, const fz_matrix *ctm,
 				tm.f = y - v_offset;
 			}
 
-			fz_show_glyph(ctx, text, font, &tm, glyph_index, char_code, is_sideways);
+			dir = bidi_level & 1 ? FZ_DIR_R2L : FZ_DIR_L2R;
+			fz_show_glyph(ctx, text, font, &tm, glyph_index, char_code, is_sideways, bidi_level, dir, fz_lang_unset);
 
 			x += advance * 0.01f * size;
 		}
author	Robin Watts <Robin.Watts@artifex.com>	2016-03-02 08:03:53 -0800
committer	Robin Watts <robin.watts@artifex.com>	2016-03-11 11:57:48 +0000
commit	a3785935df081674d048655048984bcba09f8387 (patch)
tree	31f6a63292d9f3d11be9c4b7003001c700b9b3c5
parent	c5b80367dfcd3d3df09068b7f31119a400cfe241 (diff)
download	mupdf-a3785935df081674d048655048984bcba09f8387.tar.xz