summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/mupdf/fitz/bidi.h16
-rw-r--r--include/mupdf/fitz/text.h23
-rw-r--r--include/mupdf/html.h32
-rw-r--r--source/fitz/bidi.c94
-rw-r--r--source/fitz/font.c2
-rw-r--r--source/fitz/text.c22
-rw-r--r--source/html/html-layout.c183
-rw-r--r--source/pdf/pdf-appearance.c2
-rw-r--r--source/pdf/pdf-op-run.c4
-rw-r--r--source/tools/murun.c4
-rw-r--r--source/xps/xps-glyphs.c4
11 files changed, 165 insertions, 221 deletions
diff --git a/include/mupdf/fitz/bidi.h b/include/mupdf/fitz/bidi.h
index 8428ffc1..f458c498 100644
--- a/include/mupdf/fitz/bidi.h
+++ b/include/mupdf/fitz/bidi.h
@@ -49,18 +49,16 @@ enum
*
* @param fragment first character in fragment
* @param fragmentLen number of characters in fragment
- * @param block_r2l true if block should concatenate with other blocks
- * as right-to-left
- * @param char_r2l true if characters within block should be laid out
- * as right-to-left
+ * @param bidiLevel The bidirectional level for this text. The bottom bit
+ * will be set iff block should concatenate with other
+ * blocks as right-to-left
* @param script the script in use for this fragment (other than common
* or inherited)
* @param arg data from caller of Bidi_fragmentText
*/
typedef void (fz_bidi_fragment_callback)(const uint32_t *fragment,
size_t fragmentLen,
- int block_r2l,
- int char_r2l,
+ int bidiLevel,
int script,
void *arg);
@@ -72,9 +70,9 @@ typedef void (fz_bidi_fragment_callback)(const uint32_t *fragment,
* 0123456789
* rrlllrrrrr,
* we'll invoke callback with:
- * &text[0], length == 2, rightToLeft == true
- * &text[2], length == 3, rightToLeft == false
- * &text[5], length == 5, rightToLeft == true.
+ * &text[0], length == 2
+ * &text[2], length == 3
+ * &text[5], length == 5
*
* @param[in] text start of Unicode sequence
* @param[in] textlen number of Unicodes to analyse
diff --git a/include/mupdf/fitz/text.h b/include/mupdf/fitz/text.h
index 2d96ce02..682517c4 100644
--- a/include/mupdf/fitz/text.h
+++ b/include/mupdf/fitz/text.h
@@ -29,11 +29,28 @@ struct fz_text_item_s
int ucs; /* -1 for one ucs to many gid mappings */
};
+typedef enum fz_text_direction_e
+{
+ /* There are various possible 'directions' for text */
+ FZ_DIR_UNSET = 0, /* Unset (or Neutral). All PDF text is sent as this. */
+ FZ_DIR_R2L = 1, /* Text is r2l */
+ FZ_DIR_L2R = 2 /* Text is l2r */
+} fz_text_direction;
+
+typedef enum fz_text_language_e
+{
+ fz_lang_unset = 0
+ /* FIXME: Fill in more */
+} fz_text_language;
+
struct fz_text_span_s
{
fz_font *font;
fz_matrix trm;
- int wmode;
+ int wmode : 1; /* 0 horizontal, 1 vertical */
+ int bidi_level : 7; /* The bidirectional level of text */
+ int markup_dir : 2; /* The direction of text as marked in the original document */
+ int language : 8; /* The language as marked in the original document */
int len, cap;
fz_text_item *items;
fz_text_span *next;
@@ -49,8 +66,8 @@ fz_text *fz_new_text(fz_context *ctx);
fz_text *fz_keep_text(fz_context *ctx, const fz_text *text);
void fz_drop_text(fz_context *ctx, const fz_text *text);
-void fz_show_glyph(fz_context *ctx, fz_text *text, fz_font *font, const fz_matrix *trm, int glyph, int unicode, int wmode);
-void fz_show_string(fz_context *ctx, fz_text *text, fz_font *font, fz_matrix *trm, const char *s, int wmode);
+void fz_show_glyph(fz_context *ctx, fz_text *text, fz_font *font, const fz_matrix *trm, int glyph, int unicode, int wmode, int bidi_level, fz_text_direction markup_dir, fz_text_language language);
+void fz_show_string(fz_context *ctx, fz_text *text, fz_font *font, fz_matrix *trm, const char *s, int wmode, int bidi_level, fz_text_direction markup_dir, fz_text_language language);
fz_rect *fz_bound_text(fz_context *ctx, const fz_text *text, const fz_stroke_state *stroke, const fz_matrix *ctm, fz_rect *r);
fz_text *fz_clone_text(fz_context *ctx, const fz_text *text);
diff --git a/include/mupdf/html.h b/include/mupdf/html.h
index c3668501..be9733e9 100644
--- a/include/mupdf/html.h
+++ b/include/mupdf/html.h
@@ -185,7 +185,7 @@ struct fz_html_s
float em;
fz_html *up, *down, *last, *next;
fz_html_flow *flow_head, **flow_tail;
- fz_bidi_direction flow_dir;
+ int flow_dir;
fz_css_style style;
int list_item;
int is_first_flow; /* for text-indent */
@@ -202,25 +202,6 @@ enum
FLOW_SHYPHEN = 5
};
-/* We have to recognise the distinction between render direction
- * and layout direction. For most strings render direction and
- * logical direction are the same.
- *
- * Char direction determines whether a string 'ABC' appears as
- * ABC or CBA.
- *
- * Block direction determines how fragments are attached together.
- * 'ABC' and 'DEF' with r2l char and block directions will
- * appear as 'FEDCBA'. With l2r char and block it will appear
- * as 'ABCDEF'.
- *
- * The reason for the distinction is that we can have logical
- * strings like 'ABC0123DEF', where 'ABC' and 'DEF' are in r2l
- * scripts. The bidirectional code breaks this down into 3 fragments
- * 'ABC' '0123' 'DEF', where all three are r2l, but digits need to
- * be rendered left to right. i.e. the desired result is:
- * FED0123CBA, rather than FED3210CBA.
- */
struct fz_html_flow_s
{
/* What type of node */
@@ -229,14 +210,11 @@ struct fz_html_flow_s
/* Whether this should expand during justification */
unsigned int expand : 1;
- /* Whether the chars should be laid out r2l or l2r */
- unsigned int char_r2l : 1;
+ /* Direction setting for text - UAX#9 says 125 is the max */
+ unsigned int bidi_level : 7;
- /* Whether this block should stack with its neighbours r2l or l2r */
- unsigned int block_r2l : 1;
-
- /* Whether the markup specifies a given direction. */
- unsigned int markup_r2l : 2;
+ /* Direction for text set in original document */
+ unsigned int markup_dir : 2;
/* Whether the markup specifies a given language. */
unsigned int markup_lang : 8;
diff --git a/source/fitz/bidi.c b/source/fitz/bidi.c
index 74548d4d..979d2f1c 100644
--- a/source/fitz/bidi.c
+++ b/source/fitz/bidi.c
@@ -170,47 +170,11 @@ static fz_bidi_chartype class_from_ch_n(uint32_t ch)
return from_ch_ws;
}
-static int
-is_european_number(const uint32_t *str, unsigned int len)
-{
- const uint32_t *end = str + len;
-
- for ( ; str != end; str++)
- {
- const uint32_t u = *str;
- if ((u >= UNICODE_RTL_START && u < UNICODE_ARABIC_INDIC_DIGIT_ZERO) ||
- (u > UNICODE_ARABIC_INDIC_DIGIT_NINE && u < UNICODE_EXTENDED_ARABIC_INDIC_DIGIT_ZERO) ||
- (u > UNICODE_EXTENDED_ARABIC_INDIC_DIGIT_NINE && u <= UNICODE_RTL_END))
- {
- /* This is just a normal RTL character or accent */
- return FALSE;
- }
- else if (!((u >= UNICODE_DIGIT_ZERO && u <= UNICODE_DIGIT_NINE) ||
- (u == UNICODE_SUPERSCRIPT_TWO) ||
- (u == UNICODE_SUPERSCRIPT_THREE) ||
- (u == UNICODE_SUPERSCRIPT_ONE) ||
- (u >= UNICODE_ARABIC_INDIC_DIGIT_ZERO && u <= UNICODE_ARABIC_INDIC_DIGIT_NINE) ||
- (u >= UNICODE_EXTENDED_ARABIC_INDIC_DIGIT_ZERO && u <= UNICODE_EXTENDED_ARABIC_INDIC_DIGIT_NINE) ||
- (u == UNICODE_SUPERSCRIPT_ZERO) ||
- (u >= UNICODE_SUPERSCRIPT_FOUR && u <= UNICODE_SUPERSCRIPT_NINE) ||
- (u >= UNICODE_SUBSCRIPT_ZERO && u <= UNICODE_SUBSCRIPT_NINE) ||
- (u >= UNICODE_CIRCLED_DIGIT_ONE && u <= UNICODE_NUMBER_TWENTY_FULL_STOP) ||
- (u == UNICODE_CIRCLED_DIGIT_ZERO) ||
- (u >= UNICODE_FULLWIDTH_DIGIT_ZERO && u <= UNICODE_FULLWIDTH_DIGIT_NINE) ||
- (u == UNICODE_ZERO_WIDTH_NON_JOINER)))
- {
- return FALSE;
- }
- }
- return TRUE;
-}
-
/* Split fragments into single scripts (or punctation + single script) */
static void
split_at_script(const uint32_t *fragment,
size_t fragment_len,
- int block_r2l,
- int char_r2l,
+ int level,
void *arg,
fz_bidi_fragment_callback *callback)
{
@@ -237,53 +201,17 @@ split_at_script(const uint32_t *fragment,
else
{
/* Change of script. Break the fragment. */
- (*callback)(&fragment[script_start], i - script_start, block_r2l, char_r2l, script, arg);
+ (*callback)(&fragment[script_start], i - script_start, level, script, arg);
script_start = i+1;
script = s;
}
}
if (script_start != fragment_len)
{
- (*callback)(&fragment[script_start], fragment_len - script_start, block_r2l, char_r2l, script, arg);
+ (*callback)(&fragment[script_start], fragment_len - script_start, level, script, arg);
}
}
-static void
-detect_numbers(const uint32_t *fragment,
- size_t fragment_len,
- size_t start,
- size_t end,
- const fz_bidi_level *levels,
- void *arg,
- fz_bidi_fragment_callback *callback)
-{
- int block_r2l = ODD(levels[start]);
- int char_r2l = block_r2l;
-
- /* Check to see if we've got a number. Numbers should
- * never be block_r2l, so we can avoid the test. */
- if (block_r2l || !is_european_number(&fragment[start], end-start))
- {
- /* No number, just split as normal */
- split_at_script(&fragment[start],
- end-start,
- block_r2l,
- char_r2l,
- arg,
- callback);
- return;
- }
-
- /* We have a number. We have to check to see whether this
- * should be handled as a block_r2l thing. */
- if (start != 0)
- block_r2l = ODD(levels[start-1]);
- if (block_r2l && end != fragment_len)
- block_r2l = ODD(levels[end]);
-
- split_at_script(&fragment[start], end-start, block_r2l, char_r2l, arg, callback);
-}
-
/* Determines the character classes for all following
* passes of the algorithm. A character class is basically the type of Bidi
* behaviour that the character exhibits.
@@ -614,11 +542,9 @@ void fz_bidi_fragment_text(fz_context *ctx,
* Create a text object for it, then start
* a new fragment.
*/
- detect_numbers(text,
- textlen,
- startOfFragment,
- i,
- levels,
+ split_at_script(&text[startOfFragment],
+ i - startOfFragment,
+ levels[startOfFragment],
arg,
callback);
startOfFragment = i;
@@ -626,11 +552,9 @@ void fz_bidi_fragment_text(fz_context *ctx,
}
/* Now i == textlen. Deal with the final (or maybe only) fragment. */
/* otherwise create 1 fragment */
- detect_numbers(text,
- textlen,
- startOfFragment,
- i,
- levels,
+ split_at_script(&text[startOfFragment],
+ i - startOfFragment,
+ levels[startOfFragment],
arg,
callback);
}
diff --git a/source/fitz/font.c b/source/fitz/font.c
index 1bac6efc..8f598be1 100644
--- a/source/fitz/font.c
+++ b/source/fitz/font.c
@@ -1442,6 +1442,8 @@ fz_encode_character(fz_context *ctx, fz_font *font, int ucs)
return ucs;
}
+/* FIXME: This should take language too eventually, to allow for fonts where we can select different
+ * languages using opentype features. */
int
fz_encode_character_with_fallback(fz_context *ctx, fz_font *user_font, int unicode, int script, fz_font **out_font)
{
diff --git a/source/fitz/text.c b/source/fitz/text.c
index 735b3a5b..29a4506e 100644
--- a/source/fitz/text.c
+++ b/source/fitz/text.c
@@ -37,11 +37,14 @@ fz_drop_text(fz_context *ctx, const fz_text *textc)
}
static fz_text_span *
-fz_new_text_span(fz_context *ctx, fz_font *font, int wmode, const fz_matrix *trm)
+fz_new_text_span(fz_context *ctx, fz_font *font, int wmode, int bidi_level, fz_text_direction markup_dir, fz_text_language language, const fz_matrix *trm)
{
fz_text_span *span = fz_malloc_struct(ctx, fz_text_span);
span->font = fz_keep_font(ctx, font);
span->wmode = wmode;
+ span->bidi_level = bidi_level;
+ span->markup_dir = markup_dir;
+ span->language = language;
span->trm = *trm;
span->trm.e = 0;
span->trm.f = 0;
@@ -49,20 +52,23 @@ fz_new_text_span(fz_context *ctx, fz_font *font, int wmode, const fz_matrix *trm
}
static fz_text_span *
-fz_add_text_span(fz_context *ctx, fz_text *text, fz_font *font, int wmode, const fz_matrix *trm)
+fz_add_text_span(fz_context *ctx, fz_text *text, fz_font *font, int wmode, int bidi_level, fz_text_direction markup_dir, fz_text_language language, const fz_matrix *trm)
{
if (!text->tail)
{
- text->head = text->tail = fz_new_text_span(ctx, font, wmode, trm);
+ text->head = text->tail = fz_new_text_span(ctx, font, wmode, bidi_level, markup_dir, language, trm);
}
else if (text->tail->font != font ||
text->tail->wmode != wmode ||
+ text->tail->bidi_level != bidi_level ||
+ text->tail->markup_dir != markup_dir ||
+ text->tail->language != language ||
text->tail->trm.a != trm->a ||
text->tail->trm.b != trm->b ||
text->tail->trm.c != trm->c ||
text->tail->trm.d != trm->d)
{
- text->tail = text->tail->next = fz_new_text_span(ctx, font, wmode, trm);
+ text->tail = text->tail->next = fz_new_text_span(ctx, font, wmode, bidi_level, markup_dir, language, trm);
}
return text->tail;
}
@@ -80,14 +86,14 @@ fz_grow_text_span(fz_context *ctx, fz_text_span *span, int n)
}
void
-fz_show_glyph(fz_context *ctx, fz_text *text, fz_font *font, const fz_matrix *trm, int gid, int ucs, int wmode)
+fz_show_glyph(fz_context *ctx, fz_text *text, fz_font *font, const fz_matrix *trm, int gid, int ucs, int wmode, int bidi_level, fz_text_direction markup_dir, fz_text_language lang)
{
fz_text_span *span;
if (text->refs != 1)
fz_throw(ctx, FZ_ERROR_GENERIC, "cannot modify shared text objects");
- span = fz_add_text_span(ctx, text, font, wmode, trm);
+ span = fz_add_text_span(ctx, text, font, wmode, bidi_level, markup_dir, lang, trm);
fz_grow_text_span(ctx, span, 1);
@@ -99,7 +105,7 @@ fz_show_glyph(fz_context *ctx, fz_text *text, fz_font *font, const fz_matrix *tr
}
void
-fz_show_string(fz_context *ctx, fz_text *text, fz_font *user_font, fz_matrix *trm, const char *s, int wmode)
+fz_show_string(fz_context *ctx, fz_text *text, fz_font *user_font, fz_matrix *trm, const char *s, int wmode, int bidi_level, fz_text_direction markup_dir, fz_text_language language)
{
fz_font *font;
int gid, ucs;
@@ -109,7 +115,7 @@ fz_show_string(fz_context *ctx, fz_text *text, fz_font *user_font, fz_matrix *tr
{
s += fz_chartorune(&ucs, s);
gid = fz_encode_character_with_fallback(ctx, user_font, ucs, 0, &font);
- fz_show_glyph(ctx, text, font, trm, gid, ucs, wmode);
+ fz_show_glyph(ctx, text, font, trm, gid, ucs, wmode, bidi_level, markup_dir, language);
adv = fz_advance_glyph(ctx, font, gid, wmode);
if (wmode == 0)
fz_pre_translate(trm, adv, 0);
diff --git a/source/html/html-layout.c b/source/html/html-layout.c
index 2255cb83..a74354d1 100644
--- a/source/html/html-layout.c
+++ b/source/html/html-layout.c
@@ -106,9 +106,8 @@ static fz_html_flow *add_flow(fz_context *ctx, fz_pool *pool, fz_html *top, fz_c
fz_html_flow *flow = fz_pool_alloc(ctx, pool, sizeof *flow);
flow->type = type;
flow->expand = 0;
- flow->char_r2l = BIDI_LEFT_TO_RIGHT;
- flow->block_r2l = BIDI_LEFT_TO_RIGHT;
- flow->markup_r2l = BIDI_NEUTRAL;
+ flow->bidi_level = 0;
+ flow->markup_dir = FZ_DIR_UNSET;
flow->breaks_line = 0;
flow->style = style;
*top->flow_tail = flow;
@@ -381,7 +380,7 @@ static void init_box(fz_context *ctx, fz_html *box)
box->flow_head = NULL;
box->flow_tail = &box->flow_head;
- box->flow_dir = BIDI_NEUTRAL;
+ box->flow_dir = FZ_DIR_UNSET;
fz_default_css_style(ctx, &box->style);
}
@@ -757,7 +756,7 @@ static void measure_string(fz_context *ctx, fz_html_flow *node, float em, hb_buf
node->h = fz_from_css_number_scale(node->style->line_height, em, em, em);
s = get_node_text(ctx, node);
- init_string_walker(ctx, &walker, hb_buf, node->char_r2l, node->style->font, node->script, s);
+ init_string_walker(ctx, &walker, hb_buf, node->bidi_level & 1, node->style->font, node->script, s);
while (walk_string(&walker))
{
max_x = 0;
@@ -810,31 +809,86 @@ static void layout_line(fz_context *ctx, float indent, float page_w, float line_
float slop = page_w - line_w;
float justify = 0;
float va;
- int n = 0;
- fz_html_flow *node = start;
- fz_html_flow *mid;
+ int n, i;
+ fz_html_flow *node;
+ fz_html_flow **reorder;
+ unsigned int min_level, max_level;
+
+ /* Count the number of nodes on the line */
+ for(i = 0, n = 0, node = start; node != end; node = node->next)
+ {
+ n++;
+ if (node->type == FLOW_SPACE && node->expand && !node->breaks_line)
+ i++;
+ }
if (align == TA_JUSTIFY)
{
- fz_html_flow *it;
- for (it = node; it != end; it = it->next)
- if (it->type == FLOW_SPACE && it->expand && !it->breaks_line)
- ++n;
- justify = slop / n;
+ justify = slop / i;
}
else if (align == TA_RIGHT)
x += slop;
else if (align == TA_CENTER)
x += slop / 2;
- /* The line data as supplied is start...end. */
- /* We have the invariants that 1) start...mid are always laid out
- * correctly and 2) mid..node are the most recent set of right to left
- * blocks. */
- mid = start;
- while (node != end)
+ /* We need a block to hold the node pointers while we reorder */
+ reorder = fz_malloc_array(ctx, n, sizeof(*reorder));
+ min_level = start->bidi_level;
+ max_level = start->bidi_level;
+ for(i = 0, node = start; node != end; i++, node = node->next)
{
- float w = node->w;
+ reorder[i] = node;
+ if (node->bidi_level < min_level)
+ min_level = node->bidi_level;
+ if (node->bidi_level > max_level)
+ max_level = node->bidi_level;
+ }
+
+ /* Do we need to do any reordering? */
+ if (min_level != max_level || (min_level & 1))
+ {
+ /* The lowest level we swap is always a r2l one */
+ min_level |= 1;
+ /* Each time around the loop we swap runs of fragments that have
+ * levels >= max_level (and decrement max_level). */
+ do
+ {
+ int start = 0;
+ int end;
+ do
+ {
+ /* Skip until we find a level that's >= max_level */
+ while (start < n && reorder[start]->bidi_level < max_level)
+ start++;
+ /* If start >= n-1 then no more runs. */
+ if (start >= n-1)
+ break;
+ /* Find the end of the match */
+ i = start+1;
+ while (i < n && reorder[i]->bidi_level >= max_level)
+ i++;
+ /* Reverse from start to i-1 */
+ end = i-1;
+ while (start < end)
+ {
+ fz_html_flow *t = reorder[start];
+ reorder[start++] = reorder[end];
+ reorder[end--] = t;
+ }
+ start = i+1;
+ }
+ while (start < n);
+ max_level--;
+ }
+ while (max_level >= min_level);
+ }
+
+ for (i = 0; i < n; i++)
+ {
+ float w;
+
+ node = reorder[i];
+ w = node->w;
if (node->type == FLOW_SPACE && node->breaks_line)
w = 0;
@@ -845,30 +899,7 @@ static void layout_line(fz_context *ctx, float indent, float page_w, float line_
else if (node->type == FLOW_SHYPHEN && node->breaks_line)
w = node->w;
- if (node->block_r2l)
- {
- float old_x = x;
- if (mid != node)
- {
- /* We have met a r2l block, and have just had at least
- * one other r2l block. Move all the r2l blocks that
- * we've just had further right, and position this one
- * on the left. */
- fz_html_flow *temp = mid;
- while (temp != node)
- {
- old_x = temp->x;
- temp->x += w;
- temp = temp->next;
- }
- }
- node->x = old_x;
- }
- else
- {
- node->x = x;
- mid = node->next;
- }
+ node->x = x;
x += w;
switch (node->style->vertical_align)
@@ -895,8 +926,9 @@ static void layout_line(fz_context *ctx, float indent, float page_w, float line_
node->y = y + baseline - node->h;
else
node->y = y + baseline + va;
- node = node->next;
}
+
+ fz_free(ctx, reorder);
}
static void find_accumulated_margins(fz_context *ctx, fz_html *box, float *w, float *h)
@@ -933,7 +965,7 @@ static void layout_flow(fz_context *ctx, fz_html *box, fz_html *top, float em, f
indent = box->is_first_flow ? fz_from_css_number(top->style.text_indent, em, top->w) : 0;
align = top->style.text_align;
- if (box->flow_dir == BIDI_RIGHT_TO_LEFT)
+ if (box->flow_dir == FZ_DIR_R2L)
{
if (align == TA_LEFT)
align = TA_RIGHT;
@@ -1197,7 +1229,7 @@ static void draw_flow_box(fz_context *ctx, fz_html *box, float page_top, float p
w = node->w;
s = get_node_text(ctx, node);
- init_string_walker(ctx, &walker, hb_buf, node->char_r2l, node->style->font, node->script, s);
+ init_string_walker(ctx, &walker, hb_buf, node->bidi_level & 1, node->style->font, node->script, s);
while (walk_string(&walker))
{
const char *t;
@@ -1248,7 +1280,7 @@ static void draw_flow_box(fz_context *ctx, fz_html *box, float page_top, float p
ly += p->y_advance;
}
- if (node->char_r2l)
+ if (node->bidi_level & 1)
{
w -= lx * node_scale;
for (gp = 0; gp < walker.glyph_count; gp++)
@@ -1282,7 +1314,9 @@ static void draw_flow_box(fz_context *ctx, fz_html *box, float page_top, float p
continue;
trm.e = *(float *)&p->x_offset;
trm.f = *(float *)&p->y_offset;
- fz_show_glyph(ctx, text, walker.font, &trm, g->codepoint, c, 0);
+ fz_show_glyph(ctx, text, walker.font, &trm, g->codepoint, c, 0,
+ node->bidi_level, node->markup_dir,
+ node->markup_lang);
break;
}
if (gp == walker.glyph_count)
@@ -1291,7 +1325,7 @@ static void draw_flow_box(fz_context *ctx, fz_html *box, float page_top, float p
* because we've been shaped away into another. We can't afford
* to just drop the codepoint as this will upset text extraction.
*/
- fz_show_glyph(ctx, text, walker.font, &trm, -1, c, 0);
+ fz_show_glyph(ctx, text, walker.font, &trm, -1, c, 0, node->bidi_level, node->markup_dir, node->markup_lang);
}
else
{
@@ -1305,7 +1339,7 @@ static void draw_flow_box(fz_context *ctx, fz_html *box, float page_top, float p
continue;
trm.e = *(float *)&p->x_offset;
trm.f = *(float *)&p->y_offset;
- fz_show_glyph(ctx, text, walker.font, &trm, g->codepoint, -1, 0);
+ fz_show_glyph(ctx, text, walker.font, &trm, g->codepoint, -1, 0, node->bidi_level, node->markup_dir, node->markup_lang);
}
}
idx += l;
@@ -1494,7 +1528,7 @@ static void draw_list_mark(fz_context *ctx, fz_html *box, float page_top, float
{
s += fz_chartorune(&c, s);
g = fz_encode_character_with_fallback(ctx, box->style.font, c, UCDN_SCRIPT_LATIN, &font);
- fz_show_glyph(ctx, text, font, &trm, g, c, 0);
+ fz_show_glyph(ctx, text, font, &trm, g, c, 0, 0, FZ_DIR_UNSET, fz_lang_unset);
trm.e += fz_advance_glyph(ctx, font, g, 0) * box->em;
}
@@ -1817,22 +1851,15 @@ typedef struct
uni_buf *buffer;
} bidi_data;
-static void newFragCb(const uint32_t *fragment,
+static void fragment_cb(const uint32_t *fragment,
size_t fragment_len,
- int block_r2l,
- int char_r2l,
+ int bidi_level,
int script,
void *arg)
{
bidi_data *data = (bidi_data *)arg;
size_t fragment_offset = fragment - data->buffer->data;
- /* The Picsel code used to (effectively) do:
- * if (fragment_offset == 0) char_r2l = block_r2l;
- * but that makes no sense to me. All that could do is stop
- * a european number being treated as l2r because it was the
- * first thing on a line. */
-
/* We are guaranteed that fragmentOffset will be at the beginning
* of flow. */
while (fragment_len > 0)
@@ -1861,8 +1888,7 @@ static void newFragCb(const uint32_t *fragment,
}
/* This flow box is entirely contained within this fragment. */
- data->flow->block_r2l = block_r2l;
- data->flow->char_r2l = char_r2l;
+ data->flow->bidi_level = bidi_level;
data->flow->script = script;
data->flow = data->flow->next;
fragment_offset += len;
@@ -1870,34 +1896,31 @@ static void newFragCb(const uint32_t *fragment,
}
}
-static int
-dirn_matches(int dirn, int dirn2)
-{
- return (dirn == BIDI_NEUTRAL || dirn2 == BIDI_NEUTRAL || dirn == dirn2);
-}
-
static void
-detect_flow_directionality(fz_context *ctx, fz_pool *pool, uni_buf *buffer, fz_bidi_direction *baseDir, fz_html_flow *flow)
+detect_flow_directionality(fz_context *ctx, fz_pool *pool, uni_buf *buffer, int baseDir, fz_html_flow *flow)
{
fz_html_flow *end = flow;
const char *text;
bidi_data data;
- fz_bidi_direction dirn;
+ fz_bidi_direction bidi_dir = BIDI_NEUTRAL;
+
+ if (baseDir == FZ_DIR_L2R)
+ bidi_dir = BIDI_LEFT_TO_RIGHT;
+ else if (baseDir == FZ_DIR_R2L)
+ bidi_dir = BIDI_RIGHT_TO_LEFT;
while (end)
{
- dirn = BIDI_NEUTRAL;
+ int level = end->bidi_level;
/* Gather the text from the flow up into a single buffer (at
* least, as much of it as has the same direction markup). */
buffer->len = 0;
- while (end && dirn_matches(dirn, end->markup_r2l))
+ while (end && (level & 1) == (end->bidi_level & 1))
{
size_t len;
int broken = 0;
- dirn = end->markup_r2l;
-
switch (end->type)
{
case FLOW_WORD:
@@ -1948,13 +1971,7 @@ detect_flow_directionality(fz_context *ctx, fz_pool *pool, uni_buf *buffer, fz_b
data.pool = pool;
data.flow = flow;
data.buffer = buffer;
- fz_bidi_fragment_text(ctx, buffer->data, buffer->len, &dirn, &newFragCb, &data, 0 /* Flags */);
-
- /* Set the default flow of the box to be the first non NEUTRAL thing we find */
- if (*baseDir == BIDI_NEUTRAL)
- {
- *baseDir = dirn;
- }
+ fz_bidi_fragment_text(ctx, buffer->data, buffer->len, &bidi_dir, &fragment_cb, &data, 0 /* Flags */);
}
}
@@ -1964,7 +1981,7 @@ detect_box_directionality(fz_context *ctx, fz_pool *pool, uni_buf *buffer, fz_ht
while (box)
{
if (box->flow_head)
- detect_flow_directionality(ctx, pool, buffer, &box->flow_dir, box->flow_head);
+ detect_flow_directionality(ctx, pool, buffer, box->flow_dir, box->flow_head);
detect_box_directionality(ctx, pool, buffer, box->down);
box = box->next;
}
diff --git a/source/pdf/pdf-appearance.c b/source/pdf/pdf-appearance.c
index 375b3b1d..9d1442e2 100644
--- a/source/pdf/pdf-appearance.c
+++ b/source/pdf/pdf-appearance.c
@@ -1918,7 +1918,7 @@ static void add_text(fz_context *ctx, font_info *font_rec, fz_text *text, char *
str += n;
str_len -= n;
gid = fz_encode_character(ctx, font, ucs);
- fz_show_glyph(ctx, text, font, &tm, gid, ucs, 0);
+ fz_show_glyph(ctx, text, font, &tm, gid, ucs, 0, 0, FZ_DIR_UNSET, fz_lang_unset);
tm.e += fz_advance_glyph(ctx, font, gid, 0) * font_rec->da_rec.font_size;
}
}
diff --git a/source/pdf/pdf-op-run.c b/source/pdf/pdf-op-run.c
index 8c9f0639..03bca556 100644
--- a/source/pdf/pdf-op-run.c
+++ b/source/pdf/pdf-op-run.c
@@ -943,11 +943,11 @@ pdf_show_char(fz_context *ctx, pdf_run_processor *pr, int cid)
fz_union_rect(&pr->text_bbox, &bbox);
/* add glyph to textobject */
- fz_show_glyph(ctx, pr->text, fontdesc->font, &trm, gid, ucsbuf[0], fontdesc->wmode);
+ fz_show_glyph(ctx, pr->text, fontdesc->font, &trm, gid, ucsbuf[0], fontdesc->wmode, 0, FZ_DIR_UNSET, fz_lang_unset);
/* add filler glyphs for one-to-many unicode mapping */
for (i = 1; i < ucslen; i++)
- fz_show_glyph(ctx, pr->text, fontdesc->font, &trm, -1, ucsbuf[i], fontdesc->wmode);
+ fz_show_glyph(ctx, pr->text, fontdesc->font, &trm, -1, ucsbuf[i], fontdesc->wmode, 0, FZ_DIR_UNSET, fz_lang_unset);
if (fontdesc->wmode == 0)
{
diff --git a/source/tools/murun.c b/source/tools/murun.c
index 29e52578..8ff59be7 100644
--- a/source/tools/murun.c
+++ b/source/tools/murun.c
@@ -1935,7 +1935,7 @@ static void ffi_Text_showGlyph(js_State *J)
int wmode = js_isdefined(J, 5) ? js_toboolean(J, 5) : 0;
fz_try(ctx)
- fz_show_glyph(ctx, text, font, &trm, glyph, unicode, wmode);
+ fz_show_glyph(ctx, text, font, &trm, glyph, unicode, wmode, 0, FZ_DIR_UNSET, fz_lang_unset);
fz_catch(ctx)
rethrow(J);
}
@@ -1950,7 +1950,7 @@ static void ffi_Text_showString(js_State *J)
int wmode = js_isdefined(J, 4) ? js_toboolean(J, 4) : 0;
fz_try(ctx)
- fz_show_string(ctx, text, font, &trm, s, wmode);
+ fz_show_string(ctx, text, font, &trm, s, wmode, 0, FZ_DIR_UNSET, fz_lang_unset);
fz_catch(ctx)
rethrow(J);
diff --git a/source/xps/xps-glyphs.c b/source/xps/xps-glyphs.c
index 624276f8..91c53743 100644
--- a/source/xps/xps-glyphs.c
+++ b/source/xps/xps-glyphs.c
@@ -408,6 +408,7 @@ xps_parse_glyphs_imp(fz_context *ctx, xps_document *doc, const fz_matrix *ctm,
float u_offset = 0;
float v_offset = 0;
float advance;
+ int dir;
if (is && *is)
is = xps_parse_glyph_index(is, &glyph_index);
@@ -450,7 +451,8 @@ xps_parse_glyphs_imp(fz_context *ctx, xps_document *doc, const fz_matrix *ctm,
tm.f = y - v_offset;
}
- fz_show_glyph(ctx, text, font, &tm, glyph_index, char_code, is_sideways);
+ dir = bidi_level & 1 ? FZ_DIR_R2L : FZ_DIR_L2R;
+ fz_show_glyph(ctx, text, font, &tm, glyph_index, char_code, is_sideways, bidi_level, dir, fz_lang_unset);
x += advance * 0.01f * size;
}