11 files changed, 165 insertions, 221 deletions
diff --git a/include/mupdf/fitz/bidi.h b/include/mupdf/fitz/bidi.h
index 8428ffc1..f458c498 100644
--- a/include/mupdf/fitz/bidi.h
+++ b/include/mupdf/fitz/bidi.h
@@ -49,18 +49,16 @@ enum
  *
  * @param	fragment	first character in fragment
  * @param	fragmentLen	number of characters in fragment
- * @param	block_r2l	true if block should concatenate with other blocks
- *				as right-to-left
- * @param	char_r2l	true if characters within block should be laid out
- *				as right-to-left
+ * @param	bidiLevel	The bidirectional level for this text. The bottom bit
+ *				will be set iff block should concatenate with other
+ *				blocks as right-to-left
  * @param       script          the script in use for this fragment (other than common
  *                              or inherited)
  * @param	arg		data from caller of Bidi_fragmentText
  */
 typedef void (fz_bidi_fragment_callback)(const uint32_t *fragment,
 					size_t fragmentLen,
-					int block_r2l,
-					int char_r2l,
+					int bidiLevel,
 					int script,
 					void *arg);
 
@@ -72,9 +70,9 @@ typedef void (fz_bidi_fragment_callback)(const uint32_t *fragment,
  *			0123456789
  *			rrlllrrrrr,
  * we'll invoke callback with:
- *			&text[0], length == 2, rightToLeft ==  true
- *			&text[2], length == 3, rightToLeft == false
- *			&text[5], length == 5, rightToLeft ==  true.
+ *			&text[0], length == 2
+ *			&text[2], length == 3
+ *			&text[5], length == 5
  *
  * @param[in] text	start of Unicode sequence
  * @param[in] textlen   number of Unicodes to analyse
diff --git a/include/mupdf/fitz/text.h b/include/mupdf/fitz/text.h
index 2d96ce02..682517c4 100644
--- a/include/mupdf/fitz/text.h
+++ b/include/mupdf/fitz/text.h
@@ -29,11 +29,28 @@ struct fz_text_item_s
 	int ucs; /* -1 for one ucs to many gid mappings */
 };
 
+typedef enum fz_text_direction_e
+{
+	/* There are various possible 'directions' for text */
+	FZ_DIR_UNSET = 0,	/* Unset (or Neutral). All PDF text is sent as this. */
+	FZ_DIR_R2L = 1,		/* Text is r2l */
+	FZ_DIR_L2R = 2		/* Text is l2r */
+} fz_text_direction;
+
+typedef enum fz_text_language_e
+{
+	fz_lang_unset = 0
+	/* FIXME: Fill in more */
+} fz_text_language;
+
 struct fz_text_span_s
 {
 	fz_font *font;
 	fz_matrix trm;
-	int wmode;
+	int wmode : 1;		/* 0 horizontal, 1 vertical */
+	int bidi_level : 7;	/* The bidirectional level of text */
+	int markup_dir : 2;	/* The direction of text as marked in the original document */
+	int language : 8;	/* The language as marked in the original document */
 	int len, cap;
 	fz_text_item *items;
 	fz_text_span *next;
@@ -49,8 +66,8 @@ fz_text *fz_new_text(fz_context *ctx);
 fz_text *fz_keep_text(fz_context *ctx, const fz_text *text);
 void fz_drop_text(fz_context *ctx, const fz_text *text);
 
-void fz_show_glyph(fz_context *ctx, fz_text *text, fz_font *font, const fz_matrix *trm, int glyph, int unicode, int wmode);
-void fz_show_string(fz_context *ctx, fz_text *text, fz_font *font, fz_matrix *trm, const char *s, int wmode);
+void fz_show_glyph(fz_context *ctx, fz_text *text, fz_font *font, const fz_matrix *trm, int glyph, int unicode, int wmode, int bidi_level, fz_text_direction markup_dir, fz_text_language language);
+void fz_show_string(fz_context *ctx, fz_text *text, fz_font *font, fz_matrix *trm, const char *s, int wmode, int bidi_level, fz_text_direction markup_dir, fz_text_language language);
 fz_rect *fz_bound_text(fz_context *ctx, const fz_text *text, const fz_stroke_state *stroke, const fz_matrix *ctm, fz_rect *r);
 
 fz_text *fz_clone_text(fz_context *ctx, const fz_text *text);
diff --git a/include/mupdf/html.h b/include/mupdf/html.h
index c3668501..be9733e9 100644
--- a/include/mupdf/html.h
+++ b/include/mupdf/html.h
@@ -185,7 +185,7 @@ struct fz_html_s
 	float em;
 	fz_html *up, *down, *last, *next;
 	fz_html_flow *flow_head, **flow_tail;
-	fz_bidi_direction flow_dir;
+	int flow_dir;
 	fz_css_style style;
 	int list_item;
 	int is_first_flow; /* for text-indent */
@@ -202,25 +202,6 @@ enum
 	FLOW_SHYPHEN = 5
 };
 
-/* We have to recognise the distinction between render direction
- * and layout direction. For most strings render direction and
- * logical direction are the same.
- *
- * Char direction determines whether a string 'ABC' appears as
- * ABC or CBA.
- *
- * Block direction determines how fragments are attached together.
- * 'ABC' and 'DEF' with r2l char and block directions will
- * appear as 'FEDCBA'. With l2r char and block it will appear
- * as 'ABCDEF'.
- *
- * The reason for the distinction is that we can have logical
- * strings like 'ABC0123DEF', where 'ABC' and 'DEF' are in r2l
- * scripts. The bidirectional code breaks this down into 3 fragments
- * 'ABC' '0123' 'DEF', where all three are r2l, but digits need to
- * be rendered left to right. i.e. the desired result is:
- * FED0123CBA, rather than FED3210CBA.
- */
 struct fz_html_flow_s
 {
 	/* What type of node */
@@ -229,14 +210,11 @@ struct fz_html_flow_s
 	/* Whether this should expand during justification */
 	unsigned int expand : 1;
 
-	/* Whether the chars should be laid out r2l or l2r */
-	unsigned int char_r2l : 1;
+	/* Direction setting for text - UAX#9 says 125 is the max */
+	unsigned int bidi_level : 7;
 
-	/* Whether this block should stack with its neighbours r2l or l2r */
-	unsigned int block_r2l : 1;
-
-	/* Whether the markup specifies a given direction. */
-	unsigned int markup_r2l : 2;
+	/* Direction for text set in original document */
+	unsigned int markup_dir : 2;
 
 	/* Whether the markup specifies a given language. */
 	unsigned int markup_lang : 8;
diff --git a/source/fitz/bidi.c b/source/fitz/bidi.c
index 74548d4d..979d2f1c 100644
--- a/source/fitz/bidi.c
+++ b/source/fitz/bidi.c
@@ -170,47 +170,11 @@ static fz_bidi_chartype class_from_ch_n(uint32_t ch)
 	return from_ch_ws;
 }
 
-static int
-is_european_number(const uint32_t *str, unsigned int len)
-{
-	const uint32_t *end = str + len;
-
-	for ( ; str != end; str++)
-	{
-		const uint32_t u = *str;
-		if ((u >= UNICODE_RTL_START && u < UNICODE_ARABIC_INDIC_DIGIT_ZERO) ||
-			(u > UNICODE_ARABIC_INDIC_DIGIT_NINE && u < UNICODE_EXTENDED_ARABIC_INDIC_DIGIT_ZERO) ||
-			(u > UNICODE_EXTENDED_ARABIC_INDIC_DIGIT_NINE && u <= UNICODE_RTL_END))
-		{
-			/* This is just a normal RTL character or accent */
-			return FALSE;
-		}
-		else if (!((u >= UNICODE_DIGIT_ZERO && u <= UNICODE_DIGIT_NINE) ||
-			(u == UNICODE_SUPERSCRIPT_TWO) ||
-			(u == UNICODE_SUPERSCRIPT_THREE) ||
-			(u == UNICODE_SUPERSCRIPT_ONE) ||
-			(u >= UNICODE_ARABIC_INDIC_DIGIT_ZERO && u <= UNICODE_ARABIC_INDIC_DIGIT_NINE) ||
-			(u >= UNICODE_EXTENDED_ARABIC_INDIC_DIGIT_ZERO && u <= UNICODE_EXTENDED_ARABIC_INDIC_DIGIT_NINE) ||
-			(u == UNICODE_SUPERSCRIPT_ZERO) ||
-			(u >= UNICODE_SUPERSCRIPT_FOUR && u <= UNICODE_SUPERSCRIPT_NINE) ||
-			(u >= UNICODE_SUBSCRIPT_ZERO && u <= UNICODE_SUBSCRIPT_NINE) ||
-			(u >= UNICODE_CIRCLED_DIGIT_ONE && u <= UNICODE_NUMBER_TWENTY_FULL_STOP) ||
-			(u == UNICODE_CIRCLED_DIGIT_ZERO) ||
-			(u >= UNICODE_FULLWIDTH_DIGIT_ZERO && u <= UNICODE_FULLWIDTH_DIGIT_NINE) ||
-			(u == UNICODE_ZERO_WIDTH_NON_JOINER)))
-		{
-			return FALSE;
-		}
-	}
-	return TRUE;
-}
-
 /* Split fragments into single scripts (or punctation + single script) */
 static void
 split_at_script(const uint32_t *fragment,
 		size_t fragment_len,
-		int block_r2l,
-		int char_r2l,
+		int level,
 		void *arg,
 		fz_bidi_fragment_callback *callback)
 {
@@ -237,53 +201,17 @@ split_at_script(const uint32_t *fragment,
 		else
 		{
 			/* Change of script. Break the fragment. */
-			(*callback)(&fragment[script_start], i - script_start, block_r2l, char_r2l, script, arg);
+			(*callback)(&fragment[script_start], i - script_start, level, script, arg);
 			script_start = i+1;
 			script = s;
 		}
 	}
 	if (script_start != fragment_len)
 	{
-		(*callback)(&fragment[script_start], fragment_len - script_start, block_r2l, char_r2l, script, arg);
+		(*callback)(&fragment[script_start], fragment_len - script_start, level, script, arg);
 	}
 }
 
-static void
-detect_numbers(const uint32_t *fragment,
-		size_t fragment_len,
-		size_t start,
-		size_t end,
-		const fz_bidi_level *levels,
-		void *arg,
-		fz_bidi_fragment_callback *callback)
-{
-	int block_r2l = ODD(levels[start]);
-	int char_r2l = block_r2l;
-
-	/* Check to see if we've got a number. Numbers should
-	 * never be block_r2l, so we can avoid the test. */
-	if (block_r2l || !is_european_number(&fragment[start], end-start))
-	{
-		/* No number, just split as normal */
-		split_at_script(&fragment[start],
-				end-start,
-				block_r2l,
-				char_r2l,
-				arg,
-				callback);
-		return;
-	}
-
-	/* We have a number. We have to check to see whether this
-	 * should be handled as a block_r2l thing. */
-	if (start != 0)
-		block_r2l = ODD(levels[start-1]);
-	if (block_r2l && end != fragment_len)
-		block_r2l = ODD(levels[end]);
-
-	split_at_script(&fragment[start], end-start, block_r2l, char_r2l, arg, callback);
-}
-
 /* Determines the character classes for all following
  * passes of the algorithm. A character class is basically the type of Bidi
  * behaviour that the character exhibits.
@@ -614,11 +542,9 @@ void fz_bidi_fragment_text(fz_context *ctx,
 				 * Create a text object for it, then start
 				 * a new fragment.
 				 */
-				detect_numbers(text,
-						textlen,
-						startOfFragment,
-						i,
-						levels,
+				split_at_script(&text[startOfFragment],
+						i - startOfFragment,
+						levels[startOfFragment],
 						arg,
 						callback);
 				startOfFragment = i;
@@ -626,11 +552,9 @@ void fz_bidi_fragment_text(fz_context *ctx,
 		}
 		/* Now i == textlen. Deal with the final (or maybe only) fragment. */
 		/* otherwise create 1 fragment */
-		detect_numbers(text,
-				textlen,
-				startOfFragment,
-				i,
-				levels,
+		split_at_script(&text[startOfFragment],
+				i - startOfFragment,
+				levels[startOfFragment],
 				arg,
 				callback);
 	}
diff --git a/source/fitz/font.c b/source/fitz/font.c
index 1bac6efc..8f598be1 100644
--- a/source/fitz/font.c
+++ b/source/fitz/font.c
@@ -1442,6 +1442,8 @@ fz_encode_character(fz_context *ctx, fz_font *font, int ucs)
 	return ucs;
 }
 
+/* FIXME: This should take language too eventually, to allow for fonts where we can select different
+ * languages using opentype features. */
 int
 fz_encode_character_with_fallback(fz_context *ctx, fz_font *user_font, int unicode, int script, fz_font **out_font)
 {
diff --git a/source/fitz/text.c b/source/fitz/text.c
index 735b3a5b..29a4506e 100644
--- a/source/fitz/text.c
+++ b/source/fitz/text.c
@@ -37,11 +37,14 @@ fz_drop_text(fz_context *ctx, const fz_text *textc)
 }
 
 static fz_text_span *
-fz_new_text_span(fz_context *ctx, fz_font *font, int wmode, const fz_matrix *trm)
+fz_new_text_span(fz_context *ctx, fz_font *font, int wmode, int bidi_level, fz_text_direction markup_dir, fz_text_language language, const fz_matrix *trm)
 {
 	fz_text_span *span = fz_malloc_struct(ctx, fz_text_span);
 	span->font = fz_keep_font(ctx, font);
 	span->wmode = wmode;
+	span->bidi_level = bidi_level;
+	span->markup_dir = markup_dir;
+	span->language = language;
 	span->trm = *trm;
 	span->trm.e = 0;
 	span->trm.f = 0;
@@ -49,20 +52,23 @@ fz_new_text_span(fz_context *ctx, fz_font *font, int wmode, const fz_matrix *trm
 }
 
 static fz_text_span *
-fz_add_text_span(fz_context *ctx, fz_text *text, fz_font *font, int wmode, const fz_matrix *trm)
+fz_add_text_span(fz_context *ctx, fz_text *text, fz_font *font, int wmode, int bidi_level, fz_text_direction markup_dir, fz_text_language language, const fz_matrix *trm)
 {
 	if (!text->tail)
 	{
-		text->head = text->tail = fz_new_text_span(ctx, font, wmode, trm);
+		text->head = text->tail = fz_new_text_span(ctx, font, wmode, bidi_level, markup_dir, language, trm);
 	}
 	else if (text->tail->font != font ||
 		text->tail->wmode != wmode ||
+		text->tail->bidi_level != bidi_level ||
+		text->tail->markup_dir != markup_dir ||
+		text->tail->language != language ||
 		text->tail->trm.a != trm->a ||
 		text->tail->trm.b != trm->b ||
 		text->tail->trm.c != trm->c ||
 		text->tail->trm.d != trm->d)
 	{
-		text->tail = text->tail->next = fz_new_text_span(ctx, font, wmode, trm);
+		text->tail = text->tail->next = fz_new_text_span(ctx, font, wmode, bidi_level, markup_dir, language, trm);
 	}
 	return text->tail;
 }
@@ -80,14 +86,14 @@ fz_grow_text_span(fz_context *ctx, fz_text_span *span, int n)
 }
 
 void
-fz_show_glyph(fz_context *ctx, fz_text *text, fz_font *font, const fz_matrix *trm, int gid, int ucs, int wmode)
+fz_show_glyph(fz_context *ctx, fz_text *text, fz_font *font, const fz_matrix *trm, int gid, int ucs, int wmode, int bidi_level, fz_text_direction markup_dir, fz_text_language lang)
 {
 	fz_text_span *span;
 
 	if (text->refs != 1)
 		fz_throw(ctx, FZ_ERROR_GENERIC, "cannot modify shared text objects");
 
-	span = fz_add_text_span(ctx, text, font, wmode, trm);
+	span = fz_add_text_span(ctx, text, font, wmode, bidi_level, markup_dir, lang, trm);
 
 	fz_grow_text_span(ctx, span, 1);
 
@@ -99,7 +105,7 @@ fz_show_glyph(fz_context *ctx, fz_text *text, fz_font *font, const fz_matrix *tr
 }
 
 void
-fz_show_string(fz_context *ctx, fz_text *text, fz_font *user_font, fz_matrix *trm, const char *s, int wmode)
+fz_show_string(fz_context *ctx, fz_text *text, fz_font *user_font, fz_matrix *trm, const char *s, int wmode, int bidi_level, fz_text_direction markup_dir, fz_text_language language)
 {
 	fz_font *font;
 	int gid, ucs;
@@ -109,7 +115,7 @@ fz_show_string(fz_context *ctx, fz_text *text, fz_font *user_font, fz_matrix *tr
 	{
 		s += fz_chartorune(&ucs, s);
 		gid = fz_encode_character_with_fallback(ctx, user_font, ucs, 0, &font);
-		fz_show_glyph(ctx, text, font, trm, gid, ucs, wmode);
+		fz_show_glyph(ctx, text, font, trm, gid, ucs, wmode, bidi_level, markup_dir, language);
 		adv = fz_advance_glyph(ctx, font, gid, wmode);
 		if (wmode == 0)
 			fz_pre_translate(trm, adv, 0);
diff --git a/source/html/html-layout.c b/source/html/html-layout.c
index 2255cb83..a74354d1 100644
--- a/source/html/html-layout.c
+++ b/source/html/html-layout.c
@@ -106,9 +106,8 @@ static fz_html_flow *add_flow(fz_context *ctx, fz_pool *pool, fz_html *top, fz_c
 	fz_html_flow *flow = fz_pool_alloc(ctx, pool, sizeof *flow);
 	flow->type = type;
 	flow->expand = 0;
-	flow->char_r2l = BIDI_LEFT_TO_RIGHT;
-	flow->block_r2l = BIDI_LEFT_TO_RIGHT;
-	flow->markup_r2l = BIDI_NEUTRAL;
+	flow->bidi_level = 0;
+	flow->markup_dir = FZ_DIR_UNSET;
 	flow->breaks_line = 0;
 	flow->style = style;
 	*top->flow_tail = flow;
@@ -381,7 +380,7 @@ static void init_box(fz_context *ctx, fz_html *box)
 
 	box->flow_head = NULL;
 	box->flow_tail = &box->flow_head;
-	box->flow_dir = BIDI_NEUTRAL;
+	box->flow_dir = FZ_DIR_UNSET;
 
 	fz_default_css_style(ctx, &box->style);
 }
@@ -757,7 +756,7 @@ static void measure_string(fz_context *ctx, fz_html_flow *node, float em, hb_buf
 	node->h = fz_from_css_number_scale(node->style->line_height, em, em, em);
 
 	s = get_node_text(ctx, node);
-	init_string_walker(ctx, &walker, hb_buf, node->char_r2l, node->style->font, node->script, s);
+	init_string_walker(ctx, &walker, hb_buf, node->bidi_level & 1, node->style->font, node->script, s);
 	while (walk_string(&walker))
 	{
 		max_x = 0;
@@ -810,31 +809,86 @@ static void layout_line(fz_context *ctx, float indent, float page_w, float line_
 	float slop = page_w - line_w;
 	float justify = 0;
 	float va;
-	int n = 0;
-	fz_html_flow *node = start;
-	fz_html_flow *mid;
+	int n, i;
+	fz_html_flow *node;
+	fz_html_flow **reorder;
+	unsigned int min_level, max_level;
+
+	/* Count the number of nodes on the line */
+	for(i = 0, n = 0, node = start; node != end; node = node->next)
+	{
+		n++;
+		if (node->type == FLOW_SPACE && node->expand && !node->breaks_line)
+			i++;
+	}
 
 	if (align == TA_JUSTIFY)
 	{
-		fz_html_flow *it;
-		for (it = node; it != end; it = it->next)
-			if (it->type == FLOW_SPACE && it->expand && !it->breaks_line)
-				++n;
-		justify = slop / n;
+		justify = slop / i;
 	}
 	else if (align == TA_RIGHT)
 		x += slop;
 	else if (align == TA_CENTER)
 		x += slop / 2;
 
-	/* The line data as supplied is start...end. */
-	/* We have the invariants that 1) start...mid are always laid out
-	 * correctly and 2) mid..node are the most recent set of right to left
-	 * blocks. */
-	mid = start;
-	while (node != end)
+	/* We need a block to hold the node pointers while we reorder */
+	reorder = fz_malloc_array(ctx, n, sizeof(*reorder));
+	min_level = start->bidi_level;
+	max_level = start->bidi_level;
+	for(i = 0, node = start; node != end; i++, node = node->next)
 	{
-		float w = node->w;
+		reorder[i] = node;
+		if (node->bidi_level < min_level)
+			min_level = node->bidi_level;
+		if (node->bidi_level > max_level)
+			max_level = node->bidi_level;
+	}
+
+	/* Do we need to do any reordering? */
+	if (min_level != max_level || (min_level & 1))
+	{
+		/* The lowest level we swap is always a r2l one */
+		min_level |= 1;
+		/* Each time around the loop we swap runs of fragments that have
+		 * levels >= max_level (and decrement max_level). */
+		do
+		{
+			int start = 0;
+			int end;
+			do
+			{
+				/* Skip until we find a level that's >= max_level */
+				while (start < n && reorder[start]->bidi_level < max_level)
+					start++;
+				/* If start >= n-1 then no more runs. */
+				if (start >= n-1)
+					break;
+				/* Find the end of the match */
+				i = start+1;
+				while (i < n && reorder[i]->bidi_level >= max_level)
+					i++;
+				/* Reverse from start to i-1 */
+				end = i-1;
+				while (start < end)
+				{
+					fz_html_flow *t = reorder[start];
+					reorder[start++] = reorder[end];
+					reorder[end--] = t;
+				}
+				start = i+1;
+			}
+			while (start < n);
+			max_level--;
+		}
+		while (max_level >= min_level);
+	}
+
+	for (i = 0; i < n; i++)
+	{
+		float w;
+
+		node = reorder[i];
+		w = node->w;
 
 		if (node->type == FLOW_SPACE && node->breaks_line)
 			w = 0;
@@ -845,30 +899,7 @@ static void layout_line(fz_context *ctx, float indent, float page_w, float line_
 		else if (node->type == FLOW_SHYPHEN && node->breaks_line)
 			w = node->w;
 
-		if (node->block_r2l)
-		{
-			float old_x = x;
-			if (mid != node)
-			{
-				/* We have met a r2l block, and have just had at least
-				 * one other r2l block. Move all the r2l blocks that
-				 * we've just had further right, and position this one
-				 * on the left. */
-				fz_html_flow *temp = mid;
-				while (temp != node)
-				{
-					old_x = temp->x;
-					temp->x += w;
-					temp = temp->next;
-				}
-			}
-			node->x = old_x;
-		}
-		else
-		{
-			node->x = x;
-			mid = node->next;
-		}
+		node->x = x;
 		x += w;
 
 		switch (node->style->vertical_align)
@@ -895,8 +926,9 @@ static void layout_line(fz_context *ctx, float indent, float page_w, float line_
 			node->y = y + baseline - node->h;
 		else
 			node->y = y + baseline + va;
-		node = node->next;
 	}
+
+	fz_free(ctx, reorder);
 }
 
 static void find_accumulated_margins(fz_context *ctx, fz_html *box, float *w, float *h)
@@ -933,7 +965,7 @@ static void layout_flow(fz_context *ctx, fz_html *box, fz_html *top, float em, f
 	indent = box->is_first_flow ? fz_from_css_number(top->style.text_indent, em, top->w) : 0;
 	align = top->style.text_align;
 
-	if (box->flow_dir == BIDI_RIGHT_TO_LEFT)
+	if (box->flow_dir == FZ_DIR_R2L)
 	{
 		if (align == TA_LEFT)
 			align = TA_RIGHT;
@@ -1197,7 +1229,7 @@ static void draw_flow_box(fz_context *ctx, fz_html *box, float page_top, float p
 			w = node->w;
 
 			s = get_node_text(ctx, node);
-			init_string_walker(ctx, &walker, hb_buf, node->char_r2l, node->style->font, node->script, s);
+			init_string_walker(ctx, &walker, hb_buf, node->bidi_level & 1, node->style->font, node->script, s);
 			while (walk_string(&walker))
 			{
 				const char *t;
@@ -1248,7 +1280,7 @@ static void draw_flow_box(fz_context *ctx, fz_html *box, float page_top, float p
 					ly += p->y_advance;
 				}
 
-				if (node->char_r2l)
+				if (node->bidi_level & 1)
 				{
 					w -= lx * node_scale;
 					for (gp = 0; gp < walker.glyph_count; gp++)
@@ -1282,7 +1314,9 @@ static void draw_flow_box(fz_context *ctx, fz_html *box, float page_top, float p
 								continue;
 							trm.e = *(float *)&p->x_offset;
 							trm.f = *(float *)&p->y_offset;
-							fz_show_glyph(ctx, text, walker.font, &trm, g->codepoint, c, 0);
+							fz_show_glyph(ctx, text, walker.font, &trm, g->codepoint, c, 0,
+								node->bidi_level, node->markup_dir,
+								node->markup_lang);
 							break;
 						}
 						if (gp == walker.glyph_count)
@@ -1291,7 +1325,7 @@ static void draw_flow_box(fz_context *ctx, fz_html *box, float page_top, float p
 							 * because we've been shaped away into another. We can't afford
 							 * to just drop the codepoint as this will upset text extraction.
 							 */
-							fz_show_glyph(ctx, text, walker.font, &trm, -1, c, 0);
+							fz_show_glyph(ctx, text, walker.font, &trm, -1, c, 0, node->bidi_level, node->markup_dir, node->markup_lang);
 						}
 						else
 						{
@@ -1305,7 +1339,7 @@ static void draw_flow_box(fz_context *ctx, fz_html *box, float page_top, float p
 									continue;
 								trm.e = *(float *)&p->x_offset;
 								trm.f = *(float *)&p->y_offset;
-								fz_show_glyph(ctx, text, walker.font, &trm, g->codepoint, -1, 0);
+								fz_show_glyph(ctx, text, walker.font, &trm, g->codepoint, -1, 0, node->bidi_level, node->markup_dir, node->markup_lang);
 							}
 						}
 						idx += l;
@@ -1494,7 +1528,7 @@ static void draw_list_mark(fz_context *ctx, fz_html *box, float page_top, float
 	{
 		s += fz_chartorune(&c, s);
 		g = fz_encode_character_with_fallback(ctx, box->style.font, c, UCDN_SCRIPT_LATIN, &font);
-		fz_show_glyph(ctx, text, font, &trm, g, c, 0);
+		fz_show_glyph(ctx, text, font, &trm, g, c, 0, 0, FZ_DIR_UNSET, fz_lang_unset);
 		trm.e += fz_advance_glyph(ctx, font, g, 0) * box->em;
 	}
 
@@ -1817,22 +1851,15 @@ typedef struct
 	uni_buf *buffer;
 } bidi_data;
 
-static void newFragCb(const uint32_t *fragment,
+static void fragment_cb(const uint32_t *fragment,
 			size_t fragment_len,
-			int block_r2l,
-			int char_r2l,
+			int bidi_level,
 			int script,
 			void *arg)
 {
 	bidi_data *data = (bidi_data *)arg;
 	size_t fragment_offset = fragment - data->buffer->data;
 
-	/* The Picsel code used to (effectively) do:
-	 * if (fragment_offset == 0) char_r2l = block_r2l;
-	 * but that makes no sense to me. All that could do is stop
-	 * a european number being treated as l2r because it was the
-	 * first thing on a line. */
-
 	/* We are guaranteed that fragmentOffset will be at the beginning
 	 * of flow. */
 	while (fragment_len > 0)
@@ -1861,8 +1888,7 @@ static void newFragCb(const uint32_t *fragment,
 		}
 
 		/* This flow box is entirely contained within this fragment. */
-		data->flow->block_r2l = block_r2l;
-		data->flow->char_r2l = char_r2l;
+		data->flow->bidi_level = bidi_level;
 		data->flow->script = script;
 		data->flow = data->flow->next;
 		fragment_offset += len;
@@ -1870,34 +1896,31 @@ static void newFragCb(const uint32_t *fragment,
 	}
 }
 
-static int
-dirn_matches(int dirn, int dirn2)
-{
-	return (dirn == BIDI_NEUTRAL || dirn2 == BIDI_NEUTRAL || dirn == dirn2);
-}
-
 static void
-detect_flow_directionality(fz_context *ctx, fz_pool *pool, uni_buf *buffer, fz_bidi_direction *baseDir, fz_html_flow *flow)
+detect_flow_directionality(fz_context *ctx, fz_pool *pool, uni_buf *buffer, int baseDir, fz_html_flow *flow)
 {
 	fz_html_flow *end = flow;
 	const char *text;
 	bidi_data data;
-	fz_bidi_direction dirn;
+	fz_bidi_direction bidi_dir = BIDI_NEUTRAL;
+
+	if (baseDir == FZ_DIR_L2R)
+		bidi_dir = BIDI_LEFT_TO_RIGHT;
+	else if (baseDir == FZ_DIR_R2L)
+		bidi_dir = BIDI_RIGHT_TO_LEFT;
 
 	while (end)
 	{
-		dirn = BIDI_NEUTRAL;
+		int level = end->bidi_level;
 
 		/* Gather the text from the flow up into a single buffer (at
 		 * least, as much of it as has the same direction markup). */
 		buffer->len = 0;
-		while (end && dirn_matches(dirn, end->markup_r2l))
+		while (end && (level & 1) == (end->bidi_level & 1))
 		{
 			size_t len;
 			int broken = 0;
 
-			dirn = end->markup_r2l;
-
 			switch (end->type)
 			{
 			case FLOW_WORD:
@@ -1948,13 +1971,7 @@ detect_flow_directionality(fz_context *ctx, fz_pool *pool, uni_buf *buffer, fz_b
 		data.pool = pool;
 		data.flow = flow;
 		data.buffer = buffer;
-		fz_bidi_fragment_text(ctx, buffer->data, buffer->len, &dirn, &newFragCb, &data, 0 /* Flags */);
-
-		/* Set the default flow of the box to be the first non NEUTRAL thing we find */
-		if (*baseDir == BIDI_NEUTRAL)
-		{
-			*baseDir = dirn;
-		}
+		fz_bidi_fragment_text(ctx, buffer->data, buffer->len, &bidi_dir, &fragment_cb, &data, 0 /* Flags */);
 	}
 }
 
@@ -1964,7 +1981,7 @@ detect_box_directionality(fz_context *ctx, fz_pool *pool, uni_buf *buffer, fz_ht
 	while (box)
 	{
 		if (box->flow_head)
-			detect_flow_directionality(ctx, pool, buffer, &box->flow_dir, box->flow_head);
+			detect_flow_directionality(ctx, pool, buffer, box->flow_dir, box->flow_head);
 		detect_box_directionality(ctx, pool, buffer, box->down);
 		box = box->next;
 	}
diff --git a/source/pdf/pdf-appearance.c b/source/pdf/pdf-appearance.c
index 375b3b1d..9d1442e2 100644
--- a/source/pdf/pdf-appearance.c
+++ b/source/pdf/pdf-appearance.c
@@ -1918,7 +1918,7 @@ static void add_text(fz_context *ctx, font_info *font_rec, fz_text *text, char *
 		str += n;
 		str_len -= n;
 		gid = fz_encode_character(ctx, font, ucs);
-		fz_show_glyph(ctx, text, font, &tm, gid, ucs, 0);
+		fz_show_glyph(ctx, text, font, &tm, gid, ucs, 0, 0, FZ_DIR_UNSET, fz_lang_unset);
 		tm.e += fz_advance_glyph(ctx, font, gid, 0) * font_rec->da_rec.font_size;
 	}
 }
diff --git a/source/pdf/pdf-op-run.c b/source/pdf/pdf-op-run.c
index 8c9f0639..03bca556 100644
--- a/source/pdf/pdf-op-run.c
+++ b/source/pdf/pdf-op-run.c
@@ -943,11 +943,11 @@ pdf_show_char(fz_context *ctx, pdf_run_processor *pr, int cid)
 	fz_union_rect(&pr->text_bbox, &bbox);
 
 	/* add glyph to textobject */
-	fz_show_glyph(ctx, pr->text, fontdesc->font, &trm, gid, ucsbuf[0], fontdesc->wmode);
+	fz_show_glyph(ctx, pr->text, fontdesc->font, &trm, gid, ucsbuf[0], fontdesc->wmode, 0, FZ_DIR_UNSET, fz_lang_unset);
 
 	/* add filler glyphs for one-to-many unicode mapping */
 	for (i = 1; i < ucslen; i++)
-		fz_show_glyph(ctx, pr->text, fontdesc->font, &trm, -1, ucsbuf[i], fontdesc->wmode);
+		fz_show_glyph(ctx, pr->text, fontdesc->font, &trm, -1, ucsbuf[i], fontdesc->wmode, 0, FZ_DIR_UNSET, fz_lang_unset);
 
 	if (fontdesc->wmode == 0)
 	{
diff --git a/source/tools/murun.c b/source/tools/murun.c
index 29e52578..8ff59be7 100644
--- a/source/tools/murun.c
+++ b/source/tools/murun.c
@@ -1935,7 +1935,7 @@ static void ffi_Text_showGlyph(js_State *J)
 	int wmode = js_isdefined(J, 5) ? js_toboolean(J, 5) : 0;
 
 	fz_try(ctx)
-		fz_show_glyph(ctx, text, font, &trm, glyph, unicode, wmode);
+		fz_show_glyph(ctx, text, font, &trm, glyph, unicode, wmode, 0, FZ_DIR_UNSET, fz_lang_unset);
 	fz_catch(ctx)
 		rethrow(J);
 }
@@ -1950,7 +1950,7 @@ static void ffi_Text_showString(js_State *J)
 	int wmode = js_isdefined(J, 4) ? js_toboolean(J, 4) : 0;
 
 	fz_try(ctx)
-		fz_show_string(ctx, text, font, &trm, s, wmode);
+		fz_show_string(ctx, text, font, &trm, s, wmode, 0, FZ_DIR_UNSET, fz_lang_unset);
 	fz_catch(ctx)
 		rethrow(J);
 
diff --git a/source/xps/xps-glyphs.c b/source/xps/xps-glyphs.c
index 624276f8..91c53743 100644
--- a/source/xps/xps-glyphs.c
+++ b/source/xps/xps-glyphs.c
@@ -408,6 +408,7 @@ xps_parse_glyphs_imp(fz_context *ctx, xps_document *doc, const fz_matrix *ctm,
 			float u_offset = 0;
 			float v_offset = 0;
 			float advance;
+			int dir;
 
 			if (is && *is)
 				is = xps_parse_glyph_index(is, &glyph_index);
@@ -450,7 +451,8 @@ xps_parse_glyphs_imp(fz_context *ctx, xps_document *doc, const fz_matrix *ctm,
 				tm.f = y - v_offset;
 			}
 
-			fz_show_glyph(ctx, text, font, &tm, glyph_index, char_code, is_sideways);
+			dir = bidi_level & 1 ? FZ_DIR_R2L : FZ_DIR_L2R;
+			fz_show_glyph(ctx, text, font, &tm, glyph_index, char_code, is_sideways, bidi_level, dir, fz_lang_unset);
 
 			x += advance * 0.01f * size;
 		}