diff options
-rw-r--r-- | include/mupdf/fitz/bidi.h | 16 | ||||
-rw-r--r-- | include/mupdf/fitz/text.h | 23 | ||||
-rw-r--r-- | include/mupdf/html.h | 32 | ||||
-rw-r--r-- | source/fitz/bidi.c | 94 | ||||
-rw-r--r-- | source/fitz/font.c | 2 | ||||
-rw-r--r-- | source/fitz/text.c | 22 | ||||
-rw-r--r-- | source/html/html-layout.c | 183 | ||||
-rw-r--r-- | source/pdf/pdf-appearance.c | 2 | ||||
-rw-r--r-- | source/pdf/pdf-op-run.c | 4 | ||||
-rw-r--r-- | source/tools/murun.c | 4 | ||||
-rw-r--r-- | source/xps/xps-glyphs.c | 4 |
11 files changed, 165 insertions, 221 deletions
diff --git a/include/mupdf/fitz/bidi.h b/include/mupdf/fitz/bidi.h index 8428ffc1..f458c498 100644 --- a/include/mupdf/fitz/bidi.h +++ b/include/mupdf/fitz/bidi.h @@ -49,18 +49,16 @@ enum * * @param fragment first character in fragment * @param fragmentLen number of characters in fragment - * @param block_r2l true if block should concatenate with other blocks - * as right-to-left - * @param char_r2l true if characters within block should be laid out - * as right-to-left + * @param bidiLevel The bidirectional level for this text. The bottom bit + * will be set iff block should concatenate with other + * blocks as right-to-left * @param script the script in use for this fragment (other than common * or inherited) * @param arg data from caller of Bidi_fragmentText */ typedef void (fz_bidi_fragment_callback)(const uint32_t *fragment, size_t fragmentLen, - int block_r2l, - int char_r2l, + int bidiLevel, int script, void *arg); @@ -72,9 +70,9 @@ typedef void (fz_bidi_fragment_callback)(const uint32_t *fragment, * 0123456789 * rrlllrrrrr, * we'll invoke callback with: - * &text[0], length == 2, rightToLeft == true - * &text[2], length == 3, rightToLeft == false - * &text[5], length == 5, rightToLeft == true. + * &text[0], length == 2 + * &text[2], length == 3 + * &text[5], length == 5 * * @param[in] text start of Unicode sequence * @param[in] textlen number of Unicodes to analyse diff --git a/include/mupdf/fitz/text.h b/include/mupdf/fitz/text.h index 2d96ce02..682517c4 100644 --- a/include/mupdf/fitz/text.h +++ b/include/mupdf/fitz/text.h @@ -29,11 +29,28 @@ struct fz_text_item_s int ucs; /* -1 for one ucs to many gid mappings */ }; +typedef enum fz_text_direction_e +{ + /* There are various possible 'directions' for text */ + FZ_DIR_UNSET = 0, /* Unset (or Neutral). All PDF text is sent as this. */ + FZ_DIR_R2L = 1, /* Text is r2l */ + FZ_DIR_L2R = 2 /* Text is l2r */ +} fz_text_direction; + +typedef enum fz_text_language_e +{ + fz_lang_unset = 0 + /* FIXME: Fill in more */ +} fz_text_language; + struct fz_text_span_s { fz_font *font; fz_matrix trm; - int wmode; + int wmode : 1; /* 0 horizontal, 1 vertical */ + int bidi_level : 7; /* The bidirectional level of text */ + int markup_dir : 2; /* The direction of text as marked in the original document */ + int language : 8; /* The language as marked in the original document */ int len, cap; fz_text_item *items; fz_text_span *next; @@ -49,8 +66,8 @@ fz_text *fz_new_text(fz_context *ctx); fz_text *fz_keep_text(fz_context *ctx, const fz_text *text); void fz_drop_text(fz_context *ctx, const fz_text *text); -void fz_show_glyph(fz_context *ctx, fz_text *text, fz_font *font, const fz_matrix *trm, int glyph, int unicode, int wmode); -void fz_show_string(fz_context *ctx, fz_text *text, fz_font *font, fz_matrix *trm, const char *s, int wmode); +void fz_show_glyph(fz_context *ctx, fz_text *text, fz_font *font, const fz_matrix *trm, int glyph, int unicode, int wmode, int bidi_level, fz_text_direction markup_dir, fz_text_language language); +void fz_show_string(fz_context *ctx, fz_text *text, fz_font *font, fz_matrix *trm, const char *s, int wmode, int bidi_level, fz_text_direction markup_dir, fz_text_language language); fz_rect *fz_bound_text(fz_context *ctx, const fz_text *text, const fz_stroke_state *stroke, const fz_matrix *ctm, fz_rect *r); fz_text *fz_clone_text(fz_context *ctx, const fz_text *text); diff --git a/include/mupdf/html.h b/include/mupdf/html.h index c3668501..be9733e9 100644 --- a/include/mupdf/html.h +++ b/include/mupdf/html.h @@ -185,7 +185,7 @@ struct fz_html_s float em; fz_html *up, *down, *last, *next; fz_html_flow *flow_head, **flow_tail; - fz_bidi_direction flow_dir; + int flow_dir; fz_css_style style; int list_item; int is_first_flow; /* for text-indent */ @@ -202,25 +202,6 @@ enum FLOW_SHYPHEN = 5 }; -/* We have to recognise the distinction between render direction - * and layout direction. For most strings render direction and - * logical direction are the same. - * - * Char direction determines whether a string 'ABC' appears as - * ABC or CBA. - * - * Block direction determines how fragments are attached together. - * 'ABC' and 'DEF' with r2l char and block directions will - * appear as 'FEDCBA'. With l2r char and block it will appear - * as 'ABCDEF'. - * - * The reason for the distinction is that we can have logical - * strings like 'ABC0123DEF', where 'ABC' and 'DEF' are in r2l - * scripts. The bidirectional code breaks this down into 3 fragments - * 'ABC' '0123' 'DEF', where all three are r2l, but digits need to - * be rendered left to right. i.e. the desired result is: - * FED0123CBA, rather than FED3210CBA. - */ struct fz_html_flow_s { /* What type of node */ @@ -229,14 +210,11 @@ struct fz_html_flow_s /* Whether this should expand during justification */ unsigned int expand : 1; - /* Whether the chars should be laid out r2l or l2r */ - unsigned int char_r2l : 1; + /* Direction setting for text - UAX#9 says 125 is the max */ + unsigned int bidi_level : 7; - /* Whether this block should stack with its neighbours r2l or l2r */ - unsigned int block_r2l : 1; - - /* Whether the markup specifies a given direction. */ - unsigned int markup_r2l : 2; + /* Direction for text set in original document */ + unsigned int markup_dir : 2; /* Whether the markup specifies a given language. */ unsigned int markup_lang : 8; diff --git a/source/fitz/bidi.c b/source/fitz/bidi.c index 74548d4d..979d2f1c 100644 --- a/source/fitz/bidi.c +++ b/source/fitz/bidi.c @@ -170,47 +170,11 @@ static fz_bidi_chartype class_from_ch_n(uint32_t ch) return from_ch_ws; } -static int -is_european_number(const uint32_t *str, unsigned int len) -{ - const uint32_t *end = str + len; - - for ( ; str != end; str++) - { - const uint32_t u = *str; - if ((u >= UNICODE_RTL_START && u < UNICODE_ARABIC_INDIC_DIGIT_ZERO) || - (u > UNICODE_ARABIC_INDIC_DIGIT_NINE && u < UNICODE_EXTENDED_ARABIC_INDIC_DIGIT_ZERO) || - (u > UNICODE_EXTENDED_ARABIC_INDIC_DIGIT_NINE && u <= UNICODE_RTL_END)) - { - /* This is just a normal RTL character or accent */ - return FALSE; - } - else if (!((u >= UNICODE_DIGIT_ZERO && u <= UNICODE_DIGIT_NINE) || - (u == UNICODE_SUPERSCRIPT_TWO) || - (u == UNICODE_SUPERSCRIPT_THREE) || - (u == UNICODE_SUPERSCRIPT_ONE) || - (u >= UNICODE_ARABIC_INDIC_DIGIT_ZERO && u <= UNICODE_ARABIC_INDIC_DIGIT_NINE) || - (u >= UNICODE_EXTENDED_ARABIC_INDIC_DIGIT_ZERO && u <= UNICODE_EXTENDED_ARABIC_INDIC_DIGIT_NINE) || - (u == UNICODE_SUPERSCRIPT_ZERO) || - (u >= UNICODE_SUPERSCRIPT_FOUR && u <= UNICODE_SUPERSCRIPT_NINE) || - (u >= UNICODE_SUBSCRIPT_ZERO && u <= UNICODE_SUBSCRIPT_NINE) || - (u >= UNICODE_CIRCLED_DIGIT_ONE && u <= UNICODE_NUMBER_TWENTY_FULL_STOP) || - (u == UNICODE_CIRCLED_DIGIT_ZERO) || - (u >= UNICODE_FULLWIDTH_DIGIT_ZERO && u <= UNICODE_FULLWIDTH_DIGIT_NINE) || - (u == UNICODE_ZERO_WIDTH_NON_JOINER))) - { - return FALSE; - } - } - return TRUE; -} - /* Split fragments into single scripts (or punctation + single script) */ static void split_at_script(const uint32_t *fragment, size_t fragment_len, - int block_r2l, - int char_r2l, + int level, void *arg, fz_bidi_fragment_callback *callback) { @@ -237,53 +201,17 @@ split_at_script(const uint32_t *fragment, else { /* Change of script. Break the fragment. */ - (*callback)(&fragment[script_start], i - script_start, block_r2l, char_r2l, script, arg); + (*callback)(&fragment[script_start], i - script_start, level, script, arg); script_start = i+1; script = s; } } if (script_start != fragment_len) { - (*callback)(&fragment[script_start], fragment_len - script_start, block_r2l, char_r2l, script, arg); + (*callback)(&fragment[script_start], fragment_len - script_start, level, script, arg); } } -static void -detect_numbers(const uint32_t *fragment, - size_t fragment_len, - size_t start, - size_t end, - const fz_bidi_level *levels, - void *arg, - fz_bidi_fragment_callback *callback) -{ - int block_r2l = ODD(levels[start]); - int char_r2l = block_r2l; - - /* Check to see if we've got a number. Numbers should - * never be block_r2l, so we can avoid the test. */ - if (block_r2l || !is_european_number(&fragment[start], end-start)) - { - /* No number, just split as normal */ - split_at_script(&fragment[start], - end-start, - block_r2l, - char_r2l, - arg, - callback); - return; - } - - /* We have a number. We have to check to see whether this - * should be handled as a block_r2l thing. */ - if (start != 0) - block_r2l = ODD(levels[start-1]); - if (block_r2l && end != fragment_len) - block_r2l = ODD(levels[end]); - - split_at_script(&fragment[start], end-start, block_r2l, char_r2l, arg, callback); -} - /* Determines the character classes for all following * passes of the algorithm. A character class is basically the type of Bidi * behaviour that the character exhibits. @@ -614,11 +542,9 @@ void fz_bidi_fragment_text(fz_context *ctx, * Create a text object for it, then start * a new fragment. */ - detect_numbers(text, - textlen, - startOfFragment, - i, - levels, + split_at_script(&text[startOfFragment], + i - startOfFragment, + levels[startOfFragment], arg, callback); startOfFragment = i; @@ -626,11 +552,9 @@ void fz_bidi_fragment_text(fz_context *ctx, } /* Now i == textlen. Deal with the final (or maybe only) fragment. */ /* otherwise create 1 fragment */ - detect_numbers(text, - textlen, - startOfFragment, - i, - levels, + split_at_script(&text[startOfFragment], + i - startOfFragment, + levels[startOfFragment], arg, callback); } diff --git a/source/fitz/font.c b/source/fitz/font.c index 1bac6efc..8f598be1 100644 --- a/source/fitz/font.c +++ b/source/fitz/font.c @@ -1442,6 +1442,8 @@ fz_encode_character(fz_context *ctx, fz_font *font, int ucs) return ucs; } +/* FIXME: This should take language too eventually, to allow for fonts where we can select different + * languages using opentype features. */ int fz_encode_character_with_fallback(fz_context *ctx, fz_font *user_font, int unicode, int script, fz_font **out_font) { diff --git a/source/fitz/text.c b/source/fitz/text.c index 735b3a5b..29a4506e 100644 --- a/source/fitz/text.c +++ b/source/fitz/text.c @@ -37,11 +37,14 @@ fz_drop_text(fz_context *ctx, const fz_text *textc) } static fz_text_span * -fz_new_text_span(fz_context *ctx, fz_font *font, int wmode, const fz_matrix *trm) +fz_new_text_span(fz_context *ctx, fz_font *font, int wmode, int bidi_level, fz_text_direction markup_dir, fz_text_language language, const fz_matrix *trm) { fz_text_span *span = fz_malloc_struct(ctx, fz_text_span); span->font = fz_keep_font(ctx, font); span->wmode = wmode; + span->bidi_level = bidi_level; + span->markup_dir = markup_dir; + span->language = language; span->trm = *trm; span->trm.e = 0; span->trm.f = 0; @@ -49,20 +52,23 @@ fz_new_text_span(fz_context *ctx, fz_font *font, int wmode, const fz_matrix *trm } static fz_text_span * -fz_add_text_span(fz_context *ctx, fz_text *text, fz_font *font, int wmode, const fz_matrix *trm) +fz_add_text_span(fz_context *ctx, fz_text *text, fz_font *font, int wmode, int bidi_level, fz_text_direction markup_dir, fz_text_language language, const fz_matrix *trm) { if (!text->tail) { - text->head = text->tail = fz_new_text_span(ctx, font, wmode, trm); + text->head = text->tail = fz_new_text_span(ctx, font, wmode, bidi_level, markup_dir, language, trm); } else if (text->tail->font != font || text->tail->wmode != wmode || + text->tail->bidi_level != bidi_level || + text->tail->markup_dir != markup_dir || + text->tail->language != language || text->tail->trm.a != trm->a || text->tail->trm.b != trm->b || text->tail->trm.c != trm->c || text->tail->trm.d != trm->d) { - text->tail = text->tail->next = fz_new_text_span(ctx, font, wmode, trm); + text->tail = text->tail->next = fz_new_text_span(ctx, font, wmode, bidi_level, markup_dir, language, trm); } return text->tail; } @@ -80,14 +86,14 @@ fz_grow_text_span(fz_context *ctx, fz_text_span *span, int n) } void -fz_show_glyph(fz_context *ctx, fz_text *text, fz_font *font, const fz_matrix *trm, int gid, int ucs, int wmode) +fz_show_glyph(fz_context *ctx, fz_text *text, fz_font *font, const fz_matrix *trm, int gid, int ucs, int wmode, int bidi_level, fz_text_direction markup_dir, fz_text_language lang) { fz_text_span *span; if (text->refs != 1) fz_throw(ctx, FZ_ERROR_GENERIC, "cannot modify shared text objects"); - span = fz_add_text_span(ctx, text, font, wmode, trm); + span = fz_add_text_span(ctx, text, font, wmode, bidi_level, markup_dir, lang, trm); fz_grow_text_span(ctx, span, 1); @@ -99,7 +105,7 @@ fz_show_glyph(fz_context *ctx, fz_text *text, fz_font *font, const fz_matrix *tr } void -fz_show_string(fz_context *ctx, fz_text *text, fz_font *user_font, fz_matrix *trm, const char *s, int wmode) +fz_show_string(fz_context *ctx, fz_text *text, fz_font *user_font, fz_matrix *trm, const char *s, int wmode, int bidi_level, fz_text_direction markup_dir, fz_text_language language) { fz_font *font; int gid, ucs; @@ -109,7 +115,7 @@ fz_show_string(fz_context *ctx, fz_text *text, fz_font *user_font, fz_matrix *tr { s += fz_chartorune(&ucs, s); gid = fz_encode_character_with_fallback(ctx, user_font, ucs, 0, &font); - fz_show_glyph(ctx, text, font, trm, gid, ucs, wmode); + fz_show_glyph(ctx, text, font, trm, gid, ucs, wmode, bidi_level, markup_dir, language); adv = fz_advance_glyph(ctx, font, gid, wmode); if (wmode == 0) fz_pre_translate(trm, adv, 0); diff --git a/source/html/html-layout.c b/source/html/html-layout.c index 2255cb83..a74354d1 100644 --- a/source/html/html-layout.c +++ b/source/html/html-layout.c @@ -106,9 +106,8 @@ static fz_html_flow *add_flow(fz_context *ctx, fz_pool *pool, fz_html *top, fz_c fz_html_flow *flow = fz_pool_alloc(ctx, pool, sizeof *flow); flow->type = type; flow->expand = 0; - flow->char_r2l = BIDI_LEFT_TO_RIGHT; - flow->block_r2l = BIDI_LEFT_TO_RIGHT; - flow->markup_r2l = BIDI_NEUTRAL; + flow->bidi_level = 0; + flow->markup_dir = FZ_DIR_UNSET; flow->breaks_line = 0; flow->style = style; *top->flow_tail = flow; @@ -381,7 +380,7 @@ static void init_box(fz_context *ctx, fz_html *box) box->flow_head = NULL; box->flow_tail = &box->flow_head; - box->flow_dir = BIDI_NEUTRAL; + box->flow_dir = FZ_DIR_UNSET; fz_default_css_style(ctx, &box->style); } @@ -757,7 +756,7 @@ static void measure_string(fz_context *ctx, fz_html_flow *node, float em, hb_buf node->h = fz_from_css_number_scale(node->style->line_height, em, em, em); s = get_node_text(ctx, node); - init_string_walker(ctx, &walker, hb_buf, node->char_r2l, node->style->font, node->script, s); + init_string_walker(ctx, &walker, hb_buf, node->bidi_level & 1, node->style->font, node->script, s); while (walk_string(&walker)) { max_x = 0; @@ -810,31 +809,86 @@ static void layout_line(fz_context *ctx, float indent, float page_w, float line_ float slop = page_w - line_w; float justify = 0; float va; - int n = 0; - fz_html_flow *node = start; - fz_html_flow *mid; + int n, i; + fz_html_flow *node; + fz_html_flow **reorder; + unsigned int min_level, max_level; + + /* Count the number of nodes on the line */ + for(i = 0, n = 0, node = start; node != end; node = node->next) + { + n++; + if (node->type == FLOW_SPACE && node->expand && !node->breaks_line) + i++; + } if (align == TA_JUSTIFY) { - fz_html_flow *it; - for (it = node; it != end; it = it->next) - if (it->type == FLOW_SPACE && it->expand && !it->breaks_line) - ++n; - justify = slop / n; + justify = slop / i; } else if (align == TA_RIGHT) x += slop; else if (align == TA_CENTER) x += slop / 2; - /* The line data as supplied is start...end. */ - /* We have the invariants that 1) start...mid are always laid out - * correctly and 2) mid..node are the most recent set of right to left - * blocks. */ - mid = start; - while (node != end) + /* We need a block to hold the node pointers while we reorder */ + reorder = fz_malloc_array(ctx, n, sizeof(*reorder)); + min_level = start->bidi_level; + max_level = start->bidi_level; + for(i = 0, node = start; node != end; i++, node = node->next) { - float w = node->w; + reorder[i] = node; + if (node->bidi_level < min_level) + min_level = node->bidi_level; + if (node->bidi_level > max_level) + max_level = node->bidi_level; + } + + /* Do we need to do any reordering? */ + if (min_level != max_level || (min_level & 1)) + { + /* The lowest level we swap is always a r2l one */ + min_level |= 1; + /* Each time around the loop we swap runs of fragments that have + * levels >= max_level (and decrement max_level). */ + do + { + int start = 0; + int end; + do + { + /* Skip until we find a level that's >= max_level */ + while (start < n && reorder[start]->bidi_level < max_level) + start++; + /* If start >= n-1 then no more runs. */ + if (start >= n-1) + break; + /* Find the end of the match */ + i = start+1; + while (i < n && reorder[i]->bidi_level >= max_level) + i++; + /* Reverse from start to i-1 */ + end = i-1; + while (start < end) + { + fz_html_flow *t = reorder[start]; + reorder[start++] = reorder[end]; + reorder[end--] = t; + } + start = i+1; + } + while (start < n); + max_level--; + } + while (max_level >= min_level); + } + + for (i = 0; i < n; i++) + { + float w; + + node = reorder[i]; + w = node->w; if (node->type == FLOW_SPACE && node->breaks_line) w = 0; @@ -845,30 +899,7 @@ static void layout_line(fz_context *ctx, float indent, float page_w, float line_ else if (node->type == FLOW_SHYPHEN && node->breaks_line) w = node->w; - if (node->block_r2l) - { - float old_x = x; - if (mid != node) - { - /* We have met a r2l block, and have just had at least - * one other r2l block. Move all the r2l blocks that - * we've just had further right, and position this one - * on the left. */ - fz_html_flow *temp = mid; - while (temp != node) - { - old_x = temp->x; - temp->x += w; - temp = temp->next; - } - } - node->x = old_x; - } - else - { - node->x = x; - mid = node->next; - } + node->x = x; x += w; switch (node->style->vertical_align) @@ -895,8 +926,9 @@ static void layout_line(fz_context *ctx, float indent, float page_w, float line_ node->y = y + baseline - node->h; else node->y = y + baseline + va; - node = node->next; } + + fz_free(ctx, reorder); } static void find_accumulated_margins(fz_context *ctx, fz_html *box, float *w, float *h) @@ -933,7 +965,7 @@ static void layout_flow(fz_context *ctx, fz_html *box, fz_html *top, float em, f indent = box->is_first_flow ? fz_from_css_number(top->style.text_indent, em, top->w) : 0; align = top->style.text_align; - if (box->flow_dir == BIDI_RIGHT_TO_LEFT) + if (box->flow_dir == FZ_DIR_R2L) { if (align == TA_LEFT) align = TA_RIGHT; @@ -1197,7 +1229,7 @@ static void draw_flow_box(fz_context *ctx, fz_html *box, float page_top, float p w = node->w; s = get_node_text(ctx, node); - init_string_walker(ctx, &walker, hb_buf, node->char_r2l, node->style->font, node->script, s); + init_string_walker(ctx, &walker, hb_buf, node->bidi_level & 1, node->style->font, node->script, s); while (walk_string(&walker)) { const char *t; @@ -1248,7 +1280,7 @@ static void draw_flow_box(fz_context *ctx, fz_html *box, float page_top, float p ly += p->y_advance; } - if (node->char_r2l) + if (node->bidi_level & 1) { w -= lx * node_scale; for (gp = 0; gp < walker.glyph_count; gp++) @@ -1282,7 +1314,9 @@ static void draw_flow_box(fz_context *ctx, fz_html *box, float page_top, float p continue; trm.e = *(float *)&p->x_offset; trm.f = *(float *)&p->y_offset; - fz_show_glyph(ctx, text, walker.font, &trm, g->codepoint, c, 0); + fz_show_glyph(ctx, text, walker.font, &trm, g->codepoint, c, 0, + node->bidi_level, node->markup_dir, + node->markup_lang); break; } if (gp == walker.glyph_count) @@ -1291,7 +1325,7 @@ static void draw_flow_box(fz_context *ctx, fz_html *box, float page_top, float p * because we've been shaped away into another. We can't afford * to just drop the codepoint as this will upset text extraction. */ - fz_show_glyph(ctx, text, walker.font, &trm, -1, c, 0); + fz_show_glyph(ctx, text, walker.font, &trm, -1, c, 0, node->bidi_level, node->markup_dir, node->markup_lang); } else { @@ -1305,7 +1339,7 @@ static void draw_flow_box(fz_context *ctx, fz_html *box, float page_top, float p continue; trm.e = *(float *)&p->x_offset; trm.f = *(float *)&p->y_offset; - fz_show_glyph(ctx, text, walker.font, &trm, g->codepoint, -1, 0); + fz_show_glyph(ctx, text, walker.font, &trm, g->codepoint, -1, 0, node->bidi_level, node->markup_dir, node->markup_lang); } } idx += l; @@ -1494,7 +1528,7 @@ static void draw_list_mark(fz_context *ctx, fz_html *box, float page_top, float { s += fz_chartorune(&c, s); g = fz_encode_character_with_fallback(ctx, box->style.font, c, UCDN_SCRIPT_LATIN, &font); - fz_show_glyph(ctx, text, font, &trm, g, c, 0); + fz_show_glyph(ctx, text, font, &trm, g, c, 0, 0, FZ_DIR_UNSET, fz_lang_unset); trm.e += fz_advance_glyph(ctx, font, g, 0) * box->em; } @@ -1817,22 +1851,15 @@ typedef struct uni_buf *buffer; } bidi_data; -static void newFragCb(const uint32_t *fragment, +static void fragment_cb(const uint32_t *fragment, size_t fragment_len, - int block_r2l, - int char_r2l, + int bidi_level, int script, void *arg) { bidi_data *data = (bidi_data *)arg; size_t fragment_offset = fragment - data->buffer->data; - /* The Picsel code used to (effectively) do: - * if (fragment_offset == 0) char_r2l = block_r2l; - * but that makes no sense to me. All that could do is stop - * a european number being treated as l2r because it was the - * first thing on a line. */ - /* We are guaranteed that fragmentOffset will be at the beginning * of flow. */ while (fragment_len > 0) @@ -1861,8 +1888,7 @@ static void newFragCb(const uint32_t *fragment, } /* This flow box is entirely contained within this fragment. */ - data->flow->block_r2l = block_r2l; - data->flow->char_r2l = char_r2l; + data->flow->bidi_level = bidi_level; data->flow->script = script; data->flow = data->flow->next; fragment_offset += len; @@ -1870,34 +1896,31 @@ static void newFragCb(const uint32_t *fragment, } } -static int -dirn_matches(int dirn, int dirn2) -{ - return (dirn == BIDI_NEUTRAL || dirn2 == BIDI_NEUTRAL || dirn == dirn2); -} - static void -detect_flow_directionality(fz_context *ctx, fz_pool *pool, uni_buf *buffer, fz_bidi_direction *baseDir, fz_html_flow *flow) +detect_flow_directionality(fz_context *ctx, fz_pool *pool, uni_buf *buffer, int baseDir, fz_html_flow *flow) { fz_html_flow *end = flow; const char *text; bidi_data data; - fz_bidi_direction dirn; + fz_bidi_direction bidi_dir = BIDI_NEUTRAL; + + if (baseDir == FZ_DIR_L2R) + bidi_dir = BIDI_LEFT_TO_RIGHT; + else if (baseDir == FZ_DIR_R2L) + bidi_dir = BIDI_RIGHT_TO_LEFT; while (end) { - dirn = BIDI_NEUTRAL; + int level = end->bidi_level; /* Gather the text from the flow up into a single buffer (at * least, as much of it as has the same direction markup). */ buffer->len = 0; - while (end && dirn_matches(dirn, end->markup_r2l)) + while (end && (level & 1) == (end->bidi_level & 1)) { size_t len; int broken = 0; - dirn = end->markup_r2l; - switch (end->type) { case FLOW_WORD: @@ -1948,13 +1971,7 @@ detect_flow_directionality(fz_context *ctx, fz_pool *pool, uni_buf *buffer, fz_b data.pool = pool; data.flow = flow; data.buffer = buffer; - fz_bidi_fragment_text(ctx, buffer->data, buffer->len, &dirn, &newFragCb, &data, 0 /* Flags */); - - /* Set the default flow of the box to be the first non NEUTRAL thing we find */ - if (*baseDir == BIDI_NEUTRAL) - { - *baseDir = dirn; - } + fz_bidi_fragment_text(ctx, buffer->data, buffer->len, &bidi_dir, &fragment_cb, &data, 0 /* Flags */); } } @@ -1964,7 +1981,7 @@ detect_box_directionality(fz_context *ctx, fz_pool *pool, uni_buf *buffer, fz_ht while (box) { if (box->flow_head) - detect_flow_directionality(ctx, pool, buffer, &box->flow_dir, box->flow_head); + detect_flow_directionality(ctx, pool, buffer, box->flow_dir, box->flow_head); detect_box_directionality(ctx, pool, buffer, box->down); box = box->next; } diff --git a/source/pdf/pdf-appearance.c b/source/pdf/pdf-appearance.c index 375b3b1d..9d1442e2 100644 --- a/source/pdf/pdf-appearance.c +++ b/source/pdf/pdf-appearance.c @@ -1918,7 +1918,7 @@ static void add_text(fz_context *ctx, font_info *font_rec, fz_text *text, char * str += n; str_len -= n; gid = fz_encode_character(ctx, font, ucs); - fz_show_glyph(ctx, text, font, &tm, gid, ucs, 0); + fz_show_glyph(ctx, text, font, &tm, gid, ucs, 0, 0, FZ_DIR_UNSET, fz_lang_unset); tm.e += fz_advance_glyph(ctx, font, gid, 0) * font_rec->da_rec.font_size; } } diff --git a/source/pdf/pdf-op-run.c b/source/pdf/pdf-op-run.c index 8c9f0639..03bca556 100644 --- a/source/pdf/pdf-op-run.c +++ b/source/pdf/pdf-op-run.c @@ -943,11 +943,11 @@ pdf_show_char(fz_context *ctx, pdf_run_processor *pr, int cid) fz_union_rect(&pr->text_bbox, &bbox); /* add glyph to textobject */ - fz_show_glyph(ctx, pr->text, fontdesc->font, &trm, gid, ucsbuf[0], fontdesc->wmode); + fz_show_glyph(ctx, pr->text, fontdesc->font, &trm, gid, ucsbuf[0], fontdesc->wmode, 0, FZ_DIR_UNSET, fz_lang_unset); /* add filler glyphs for one-to-many unicode mapping */ for (i = 1; i < ucslen; i++) - fz_show_glyph(ctx, pr->text, fontdesc->font, &trm, -1, ucsbuf[i], fontdesc->wmode); + fz_show_glyph(ctx, pr->text, fontdesc->font, &trm, -1, ucsbuf[i], fontdesc->wmode, 0, FZ_DIR_UNSET, fz_lang_unset); if (fontdesc->wmode == 0) { diff --git a/source/tools/murun.c b/source/tools/murun.c index 29e52578..8ff59be7 100644 --- a/source/tools/murun.c +++ b/source/tools/murun.c @@ -1935,7 +1935,7 @@ static void ffi_Text_showGlyph(js_State *J) int wmode = js_isdefined(J, 5) ? js_toboolean(J, 5) : 0; fz_try(ctx) - fz_show_glyph(ctx, text, font, &trm, glyph, unicode, wmode); + fz_show_glyph(ctx, text, font, &trm, glyph, unicode, wmode, 0, FZ_DIR_UNSET, fz_lang_unset); fz_catch(ctx) rethrow(J); } @@ -1950,7 +1950,7 @@ static void ffi_Text_showString(js_State *J) int wmode = js_isdefined(J, 4) ? js_toboolean(J, 4) : 0; fz_try(ctx) - fz_show_string(ctx, text, font, &trm, s, wmode); + fz_show_string(ctx, text, font, &trm, s, wmode, 0, FZ_DIR_UNSET, fz_lang_unset); fz_catch(ctx) rethrow(J); diff --git a/source/xps/xps-glyphs.c b/source/xps/xps-glyphs.c index 624276f8..91c53743 100644 --- a/source/xps/xps-glyphs.c +++ b/source/xps/xps-glyphs.c @@ -408,6 +408,7 @@ xps_parse_glyphs_imp(fz_context *ctx, xps_document *doc, const fz_matrix *ctm, float u_offset = 0; float v_offset = 0; float advance; + int dir; if (is && *is) is = xps_parse_glyph_index(is, &glyph_index); @@ -450,7 +451,8 @@ xps_parse_glyphs_imp(fz_context *ctx, xps_document *doc, const fz_matrix *ctm, tm.f = y - v_offset; } - fz_show_glyph(ctx, text, font, &tm, glyph_index, char_code, is_sideways); + dir = bidi_level & 1 ? FZ_DIR_R2L : FZ_DIR_L2R; + fz_show_glyph(ctx, text, font, &tm, glyph_index, char_code, is_sideways, bidi_level, dir, fz_lang_unset); x += advance * 0.01f * size; } |