summaryrefslogtreecommitdiff
path: root/source
diff options
context:
space:
mode:
Diffstat (limited to 'source')
-rw-r--r--source/fitz/bidi.c222
-rw-r--r--source/html/html-layout.c195
2 files changed, 199 insertions, 218 deletions
diff --git a/source/fitz/bidi.c b/source/fitz/bidi.c
index a3bbbc1d..59d1578a 100644
--- a/source/fitz/bidi.c
+++ b/source/fitz/bidi.c
@@ -35,7 +35,6 @@
#include "mupdf/fitz.h"
#include "bidi-impl.h" /* standard bidi code interface */
-#include "bidi-internal.h"
/*
* Macros...
@@ -548,7 +547,6 @@ Bidi_CharType Bidi_classFromChN(uint16_t ch)
}
-static
int Bidi_isEuropeanNumber(const uint16_t *str, unsigned int len)
{
const uint16_t *end = str + len;
@@ -650,32 +648,6 @@ uint16_t Bidi_mirrorChar(const uint16_t u)
return UNICODE_EOS;
}
-#ifdef BIDI_LINE_AT_A_TIME
-/** Subsitutes characters with their mirrored equivalents, e.g. swaps
- * '(' with ')'. Implements rule L4 of the uint16_t Bidirectional
- * algorithm. Mirrored characters are found using a binary search through
- * the lookup table bidiOptMirrorList.
- */
-static
-void Bidi_mirror(uint16_t *text,
- const Bidi_Level *poslevel,
- int len)
-{
- int i;
- uint16_t out;
-
- for (i = 0; i < len; i ++)
- {
- if (ODD(poslevel[i]) == 0)
- continue;
- out = Bidi_mirrorChar((const uint16_t)text[i]);
- if(out != UNICODE_EOS)
- text[i] = out;
- }
-}
-#endif
-
-
/** Searches a RTL fragment for a mirror character
* When it finds one it creates a separate fragment for the
* character and the surrounding fragments. It passes the mirrored
@@ -766,11 +738,15 @@ void Bidi_classifyCharacters(const uint16_t *text,
);
}
fprintf(stderr, "\nTypes: ");
+#endif
for (i = 0; i < len; i++)
{
types[i] = Bidi_classFromChN(text[i]);
+#ifdef DEBUG_BIDI_VERBOSE
fprintf(stderr, "%c", charFromTypes[(int)types[i]]);
+#endif
}
+#ifdef DEBUG_BIDI_VERBOSE
fprintf(stderr, "\n");
#endif
}
@@ -1117,193 +1093,3 @@ void Bidi_fragmentText(fz_context *ctx,
fz_rethrow(ctx);
}
}
-
-
-
-static void newFragCb(const uint16_t *fragment,
- size_t fragmentLen,
- int rightToLeft,
- uint16_t mirror,
- void *arg)
-{
- size_t fragOffset;
- int isRtlNumber = FALSE;
- Bidi_ProcessLine_fragData *fragData = arg;
-
- assert(fragData != NULL);
-
- fragOffset = fragment - fragData->entireText;
-
- if((fragOffset != 0) &&
- Bidi_isEuropeanNumber(fragment,fragmentLen))
- {
- /* fragment contains digits only */
- isRtlNumber = TRUE;
- }
- fragData->callersCallback(fragOffset,
- fragmentLen,
- rightToLeft,
- mirror,
- fragData->callersData,
- isRtlNumber);
-}
-
-
-
-/* Currently this creates a text representation of all the objects
- * on a line, then passes it to Bidi_fragmentText. In future, we
- * should create the levels array directly from the objects supplied.
- */
-Bidi_Direction Bidi_processLine(fz_context *ctx,
- Bidi_PL_NextObj_Callback nextObjCb,
- Bidi_PL_Fragment_Callback fragmentCb,
- void *callerData,
- int bidiFlag,
- int *more)
-{
- uint16_t *entireText = NULL;
- size_t entireLength = 0;
- size_t lengthIncrement = 100;
- size_t freeSpace = 0;
- const uint16_t *objText = NULL;
- size_t objLength = 0;
- Bidi_Direction explicitDirection = Bidi_Neutral;
-
- fz_var(entireText);
-
- fz_try(ctx)
- {
- do
- {
- /* Call the client-supplied function to get the next object. */
- nextObjCb(callerData, &objText, &objLength, more, &explicitDirection);
- if (objText == NULL || objLength == 0)
- {
- /* End of sequence, or caller is giving us rubbish. */
- break;
- }
-
- if (freeSpace < objLength)
- {
- /* We need a bigger buffer. */
- uint16_t *newBuffer = NULL;
- size_t increment = (objLength < lengthIncrement) ?
- lengthIncrement : objLength;
- /* Accommodate the string terminator that ustrncat will write */
- if (entireText == NULL)
- {
- /* First Time. Add extra byte */
- entireLength += increment + 1;
- }
- else
- {
- entireLength += increment;
- }
- newBuffer = fz_resize_array(ctx, entireText, entireLength, sizeof(uint16_t));
- if (entireText == NULL)
- {
- /* First time. Pretend buffer holds an empty string. */
- newBuffer[0] = UNICODE_EOS;
- }
- entireText = newBuffer;
- freeSpace += increment;
- }
- ustrncat (entireText, objText, objLength);
-
- freeSpace -= objLength;
- }
- while (objText != NULL);
-
- if (entireText != NULL)
- {
- size_t i;
- size_t length = entireLength - 1 - freeSpace;
- Bidi_ProcessLine_fragData fragData;
- /* replace TAB character with SPACE for html Documents */
- if((bidiFlag & Bidi_replaceTab) != 0)
- {
- for (i = 0; i < length; i++)
- {
- if (entireText[i]=='\t')
- {
- entireText[i] = ' ';
- }
- }
- }
-
- fragData.entireText = entireText;
- fragData.callersCallback = fragmentCb;
- fragData.callersData = callerData;
- Bidi_fragmentText(ctx,
- entireText,
- length, &explicitDirection,
- newFragCb, &fragData,
- bidiFlag);
- }
- }
- fz_always(ctx)
- {
- fz_free(ctx, entireText);
- }
- fz_catch(ctx)
- {
- fz_rethrow(ctx);
- }
-
- return explicitDirection;
-}
-
-
-#ifdef BIDI_LINE_AT_A_TIME
-/* This changes the given text by reordering and possibly mirroring
- * characters. Suitable only for a single line of text.
- */
-void Bidi_processText(fz_context *ctx,
- uint16_t *text,
- Bidi_Direction baseDirection,
- Bidi_Direction outputDirection,
- int len)
-{
- Bidi_Level *levels;
-
- if (text == NULL || len == 0)
- {
- return;
- }
-
- levels = createLevels(ctx, text, (size_t)len, &baseDirection, TRUE, 0);
-
- assert (levels != NULL);
-
- /* resolve mirrored characters, e.g. swap '(' with ')' */
- Bidi_mirror(text, levels, len);
-
- (void)Bidi_reorder(baseDirection, text, levels, len);
-
- /* reverse the string */
- if (outputDirection == Bidi_RightToLeft)
- ustrnreverse(text, len);
-
-#ifdef DEBUG_BIDI_VERBOSE
- fprintf(stderr, "Done: ");
- {
- int i;
- for (i = 0; i < len; i++)
- {
- /* So that we can actually sort of read the debug string, any
- * non-ascii characters are replaced with a 1-digit hash
- * value from 0-9, making non-english characters appear
- * as numbers
- */
- fprintf(stderr, "%c", (text[i] <= 127 && text[i ]>= 32)?
- text[i]
- :(char)((text[i] % 9) + 48)
- );
- }
- fprintf(stderr, "\n\n");
- }
-#endif
-
- fz_free(ctx, levels);
-}
-#endif
diff --git a/source/html/html-layout.c b/source/html/html-layout.c
index ad3dd621..5bfcac85 100644
--- a/source/html/html-layout.c
+++ b/source/html/html-layout.c
@@ -116,6 +116,33 @@ static void add_flow_image(fz_context *ctx, fz_pool *pool, fz_html *top, fz_css_
add_flow_glue(ctx, pool, top, style, "", 0);
}
+static fz_html_flow *split_flow(fz_context *ctx, fz_pool *pool, fz_html_flow *flow, size_t offset)
+{
+ fz_html_flow *new_flow;
+ char *text;
+ size_t len;
+
+ if (offset == 0)
+ return flow;
+ new_flow = fz_pool_alloc(ctx, pool, sizeof *flow);
+ *new_flow = *flow;
+ new_flow->next = flow->next;
+ flow->next = new_flow;
+
+ text = flow->text;
+ while (*text && offset)
+ {
+ int rune;
+ text += fz_chartorune(&rune, text);
+ offset--;
+ }
+ len = strlen(text);
+ new_flow->text = fz_pool_alloc(ctx, pool, len+1);
+ strcpy(new_flow->text, text);
+ *text = 0;
+ return new_flow;
+}
+
static int iscjk(int c)
{
if (c >= 0x3200 && c <= 0x9FFF) return 1; /* CJK Blocks */
@@ -1347,6 +1374,172 @@ fz_layout_html(fz_context *ctx, fz_html *box, float w, float h, float em)
layout_block(ctx, box, &page_box, em, h, 0);
}
+typedef struct
+{
+ uint16_t *buffer;
+ size_t max;
+ size_t len;
+} uni_buf;
+
+typedef struct
+{
+ fz_context *ctx;
+ fz_pool *pool;
+ fz_html_flow *flow;
+ uni_buf *buffer;
+} bidi_data;
+
+static size_t utf8len(const char *text)
+{
+ size_t len = 0;
+
+ while (*text)
+ {
+ int rune;
+ text += fz_chartorune(&rune, text);
+ len++;
+ }
+ return len;
+}
+
+static void newFragCb(const uint16_t *fragment,
+ size_t fragmentLen,
+ int rightToLeft,
+ uint16_t mirror,
+ void *arg)
+{
+ bidi_data *data = (bidi_data *)arg;
+ size_t fragmentOffset = fragment - data->buffer->buffer;
+ int charDirR2L = rightToLeft;
+
+ if((fragmentOffset != 0) &&
+ Bidi_isEuropeanNumber(fragment, fragmentLen))
+ {
+ /* fragment contains digits only */
+ charDirR2L = 0;
+ }
+
+ /* We are guaranteed that fragmentOffset will be at the beginning
+ * of flow. */
+ while (fragmentLen > 0)
+ {
+ size_t len;
+
+ if (data->flow->type == FLOW_GLUE)
+ {
+ len = 1;
+ }
+ else
+ {
+ /* Must be text */
+ len = utf8len(data->flow->text);
+ if (len > fragmentLen)
+ {
+ /* We need to split this flow box */
+ (void)split_flow(data->ctx, data->pool, data->flow, fragmentLen);
+ len = utf8len(data->flow->text);
+ }
+ }
+
+ /* This flow box is entirely contained within this fragment. */
+ data->flow->block_r2l = rightToLeft;
+ data->flow->char_r2l = charDirR2L;
+ data->flow = data->flow->next;
+ fragmentOffset += len;
+ fragmentLen -= len;
+ }
+}
+
+static void
+detect_flow_directionality(fz_context *ctx, fz_pool *pool, uni_buf *buffer, fz_html_flow *flow)
+{
+ fz_html_flow *end = flow;
+ const char *text;
+ bidi_data data;
+ Bidi_Direction baseDir = -1;
+
+ /* Stage 1: Gather the text from the flow up into a single buffer */
+ buffer->len = 0;
+ while (end)
+ {
+ size_t len;
+ int broken = 0;
+
+ switch (end->type)
+ {
+ case FLOW_WORD:
+ len = utf8len(end->text);
+ text = end->text;
+ break;
+ case FLOW_GLUE:
+ len = 1;
+ text = " ";
+ break;
+ case FLOW_BREAK:
+ case FLOW_IMAGE:
+ broken = 1;
+ break;
+ }
+
+ if (broken)
+ break;
+
+ /* Make sure the buffer is large enough */
+ if (buffer->len + len > buffer->max)
+ {
+ size_t newmax = buffer->max * 2;
+ if (newmax == 0)
+ newmax = 128; /* Sensible small default */
+ buffer->buffer = fz_resize_array(ctx, buffer->buffer, newmax, sizeof(uint16_t));
+ buffer->max = newmax;
+ }
+
+ /* Expand the utf8 text into Unicode and store it in the buffer */
+ while (*text)
+ {
+ int rune;
+ text += fz_chartorune(&rune, text);
+ buffer->buffer[buffer->len++] = (uint16_t)rune;
+ }
+
+ end = end->next;
+ }
+
+ /* Detect directionality for the buffer */
+ data.ctx = ctx;
+ data.pool = pool;
+ data.flow = flow;
+ data.buffer = buffer;
+ Bidi_fragmentText(ctx,
+ buffer->buffer,
+ buffer->len,
+ &baseDir,
+ newFragCb, &data,
+ 0 /* Flags */);
+}
+
+static void
+detect_box_directionality(fz_context *ctx, fz_pool *pool, uni_buf *buffer, fz_html *box)
+{
+ while (box)
+ {
+ if (box->flow_head)
+ {
+ detect_flow_directionality(ctx, pool, buffer, box->flow_head);
+ }
+ detect_box_directionality(ctx, pool, buffer, box->down);
+ box = box->next;
+ }
+}
+
+static void
+detect_directionality(fz_context *ctx, fz_pool *pool, fz_html *box)
+{
+ uni_buf buffer = { NULL };
+
+ detect_box_directionality(ctx, pool, &buffer, box);
+}
+
fz_html *
fz_parse_html(fz_context *ctx, fz_html_font_set *set, fz_archive *zip, const char *base_uri, fz_buffer *buf, const char *user_css)
{
@@ -1380,5 +1573,7 @@ fz_parse_html(fz_context *ctx, fz_html_font_set *set, fz_archive *zip, const cha
fz_drop_css(ctx, css);
fz_drop_xml(ctx, xml);
+ detect_directionality(ctx, pool, box);
+
return box;
}