summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorRobin Watts <Robin.Watts@artifex.com>2016-03-02 08:03:53 -0800
committerRobin Watts <robin.watts@artifex.com>2016-03-11 11:57:48 +0000
commita3785935df081674d048655048984bcba09f8387 (patch)
tree31f6a63292d9f3d11be9c4b7003001c700b9b3c5 /include
parentc5b80367dfcd3d3df09068b7f31119a400cfe241 (diff)
downloadmupdf-a3785935df081674d048655048984bcba09f8387.tar.xz
Rejig Bidirectional and Text code.
We move to using bidirectional "levels" throughout. This should give us better behaviour vis-a-vis nested l2r/l2r text. This also allows us to carry xps levels throughout with no loss of information. This also avoids the need to special case numbers. We accordingly carry more information into fz_text. As well as wmode, also hold additional details about the text spans. We now include the directionality of the bidi level text (either as derived from bidi code, or from the original document (e.g. xps)), the directionality of text (as specified in the original document (e.g. html)), and the language of the text (if specified in the original document).
Diffstat (limited to 'include')
-rw-r--r--include/mupdf/fitz/bidi.h16
-rw-r--r--include/mupdf/fitz/text.h23
-rw-r--r--include/mupdf/html.h32
3 files changed, 32 insertions, 39 deletions
diff --git a/include/mupdf/fitz/bidi.h b/include/mupdf/fitz/bidi.h
index 8428ffc1..f458c498 100644
--- a/include/mupdf/fitz/bidi.h
+++ b/include/mupdf/fitz/bidi.h
@@ -49,18 +49,16 @@ enum
*
* @param fragment first character in fragment
* @param fragmentLen number of characters in fragment
- * @param block_r2l true if block should concatenate with other blocks
- * as right-to-left
- * @param char_r2l true if characters within block should be laid out
- * as right-to-left
+ * @param bidiLevel The bidirectional level for this text. The bottom bit
+ * will be set iff block should concatenate with other
+ * blocks as right-to-left
* @param script the script in use for this fragment (other than common
* or inherited)
* @param arg data from caller of Bidi_fragmentText
*/
typedef void (fz_bidi_fragment_callback)(const uint32_t *fragment,
size_t fragmentLen,
- int block_r2l,
- int char_r2l,
+ int bidiLevel,
int script,
void *arg);
@@ -72,9 +70,9 @@ typedef void (fz_bidi_fragment_callback)(const uint32_t *fragment,
* 0123456789
* rrlllrrrrr,
* we'll invoke callback with:
- * &text[0], length == 2, rightToLeft == true
- * &text[2], length == 3, rightToLeft == false
- * &text[5], length == 5, rightToLeft == true.
+ * &text[0], length == 2
+ * &text[2], length == 3
+ * &text[5], length == 5
*
* @param[in] text start of Unicode sequence
* @param[in] textlen number of Unicodes to analyse
diff --git a/include/mupdf/fitz/text.h b/include/mupdf/fitz/text.h
index 2d96ce02..682517c4 100644
--- a/include/mupdf/fitz/text.h
+++ b/include/mupdf/fitz/text.h
@@ -29,11 +29,28 @@ struct fz_text_item_s
int ucs; /* -1 for one ucs to many gid mappings */
};
+typedef enum fz_text_direction_e
+{
+ /* There are various possible 'directions' for text */
+ FZ_DIR_UNSET = 0, /* Unset (or Neutral). All PDF text is sent as this. */
+ FZ_DIR_R2L = 1, /* Text is r2l */
+ FZ_DIR_L2R = 2 /* Text is l2r */
+} fz_text_direction;
+
+typedef enum fz_text_language_e
+{
+ fz_lang_unset = 0
+ /* FIXME: Fill in more */
+} fz_text_language;
+
struct fz_text_span_s
{
fz_font *font;
fz_matrix trm;
- int wmode;
+ int wmode : 1; /* 0 horizontal, 1 vertical */
+ int bidi_level : 7; /* The bidirectional level of text */
+ int markup_dir : 2; /* The direction of text as marked in the original document */
+ int language : 8; /* The language as marked in the original document */
int len, cap;
fz_text_item *items;
fz_text_span *next;
@@ -49,8 +66,8 @@ fz_text *fz_new_text(fz_context *ctx);
fz_text *fz_keep_text(fz_context *ctx, const fz_text *text);
void fz_drop_text(fz_context *ctx, const fz_text *text);
-void fz_show_glyph(fz_context *ctx, fz_text *text, fz_font *font, const fz_matrix *trm, int glyph, int unicode, int wmode);
-void fz_show_string(fz_context *ctx, fz_text *text, fz_font *font, fz_matrix *trm, const char *s, int wmode);
+void fz_show_glyph(fz_context *ctx, fz_text *text, fz_font *font, const fz_matrix *trm, int glyph, int unicode, int wmode, int bidi_level, fz_text_direction markup_dir, fz_text_language language);
+void fz_show_string(fz_context *ctx, fz_text *text, fz_font *font, fz_matrix *trm, const char *s, int wmode, int bidi_level, fz_text_direction markup_dir, fz_text_language language);
fz_rect *fz_bound_text(fz_context *ctx, const fz_text *text, const fz_stroke_state *stroke, const fz_matrix *ctm, fz_rect *r);
fz_text *fz_clone_text(fz_context *ctx, const fz_text *text);
diff --git a/include/mupdf/html.h b/include/mupdf/html.h
index c3668501..be9733e9 100644
--- a/include/mupdf/html.h
+++ b/include/mupdf/html.h
@@ -185,7 +185,7 @@ struct fz_html_s
float em;
fz_html *up, *down, *last, *next;
fz_html_flow *flow_head, **flow_tail;
- fz_bidi_direction flow_dir;
+ int flow_dir;
fz_css_style style;
int list_item;
int is_first_flow; /* for text-indent */
@@ -202,25 +202,6 @@ enum
FLOW_SHYPHEN = 5
};
-/* We have to recognise the distinction between render direction
- * and layout direction. For most strings render direction and
- * logical direction are the same.
- *
- * Char direction determines whether a string 'ABC' appears as
- * ABC or CBA.
- *
- * Block direction determines how fragments are attached together.
- * 'ABC' and 'DEF' with r2l char and block directions will
- * appear as 'FEDCBA'. With l2r char and block it will appear
- * as 'ABCDEF'.
- *
- * The reason for the distinction is that we can have logical
- * strings like 'ABC0123DEF', where 'ABC' and 'DEF' are in r2l
- * scripts. The bidirectional code breaks this down into 3 fragments
- * 'ABC' '0123' 'DEF', where all three are r2l, but digits need to
- * be rendered left to right. i.e. the desired result is:
- * FED0123CBA, rather than FED3210CBA.
- */
struct fz_html_flow_s
{
/* What type of node */
@@ -229,14 +210,11 @@ struct fz_html_flow_s
/* Whether this should expand during justification */
unsigned int expand : 1;
- /* Whether the chars should be laid out r2l or l2r */
- unsigned int char_r2l : 1;
+ /* Direction setting for text - UAX#9 says 125 is the max */
+ unsigned int bidi_level : 7;
- /* Whether this block should stack with its neighbours r2l or l2r */
- unsigned int block_r2l : 1;
-
- /* Whether the markup specifies a given direction. */
- unsigned int markup_r2l : 2;
+ /* Direction for text set in original document */
+ unsigned int markup_dir : 2;
/* Whether the markup specifies a given language. */
unsigned int markup_lang : 8;