summaryrefslogtreecommitdiff
path: root/source/fitz/bidi.c
diff options
context:
space:
mode:
authorRobin Watts <robin.watts@artifex.com>2016-11-16 12:13:51 +0000
committerRobin Watts <robin.watts@artifex.com>2016-11-16 12:25:52 +0000
commit0c3e1ee387c995f37aa53f358825f334c06324ec (patch)
tree6b42f4ea01f1376e24716da82f2d91f1d0ace89e /source/fitz/bidi.c
parent141870506d8809723e933963e9e001aca839cdfc (diff)
downloadmupdf-0c3e1ee387c995f37aa53f358825f334c06324ec.tar.xz
Bug 697301: Fix "crash" in epub.
Actually an assert. This is caused by a paragraph separator in the text. The Unicode Bidirectional Algorithm says we should operate paragraph by paragraph, and includes code to split paragraphs at paragraph markers, changing their type to boundary neutrals as it goes. The use of this code was left "as an exercise for the reader" in the example code, so we simply hook it up here.
Diffstat (limited to 'source/fitz/bidi.c')
-rw-r--r--source/fitz/bidi.c40
1 files changed, 29 insertions, 11 deletions
diff --git a/source/fitz/bidi.c b/source/fitz/bidi.c
index 4436b615..b31ca749 100644
--- a/source/fitz/bidi.c
+++ b/source/fitz/bidi.c
@@ -414,9 +414,12 @@ create_levels(fz_context *ctx,
int resolveWhiteSpace,
int flags)
{
- fz_bidi_level *levels;
+ fz_bidi_level *levels , *plevels;
fz_bidi_chartype *types = NULL;
+ fz_bidi_chartype *ptypes;
fz_bidi_level baseLevel;
+ const uint32_t *ptext;
+ size_t plen, remaining;
levels = fz_malloc(ctx, len * sizeof(*levels));
@@ -463,18 +466,33 @@ create_levels(fz_context *ctx,
*/
classify_quoted_blocks(text, types, len);
- /* Work out the levels and character types... */
- (void)fz_bidi_resolve_explicit(baseLevel, BDI_N, types, levels, len, 0);
- fz_bidi_resolve_weak(ctx, baseLevel, types, levels, len);
- fz_bidi_resolve_neutrals(baseLevel,types, levels, len);
- fz_bidi_resolve_implicit(types, levels, len);
+ /* Work one paragraph at a time. */
+ plevels = levels;
+ ptypes = types;
+ ptext = text;
+ remaining = len;
+ while (remaining)
+ {
+ plen = fz_bidi_resolve_paragraphs(ptypes, remaining);
+
+ /* Work out the levels and character types... */
+ (void)fz_bidi_resolve_explicit(baseLevel, BDI_N, ptypes, plevels, plen, 0);
+ fz_bidi_resolve_weak(ctx, baseLevel, ptypes, plevels, plen);
+ fz_bidi_resolve_neutrals(baseLevel, ptypes, plevels, plen);
+ fz_bidi_resolve_implicit(ptypes, plevels, plen);
- classify_characters(text, types, len, BIDI_CLASSIFY_WHITE_SPACE);
+ classify_characters(ptext, ptypes, plen, BIDI_CLASSIFY_WHITE_SPACE);
- if (resolveWhiteSpace)
- {
- /* resolve whitespace */
- fz_bidi_resolve_whitespace(baseLevel, types, levels, len);
+ if (resolveWhiteSpace)
+ {
+ /* resolve whitespace */
+ fz_bidi_resolve_whitespace(baseLevel, ptypes, plevels, plen);
+ }
+
+ plevels += plen;
+ ptypes += plen;
+ ptext += plen;
+ remaining -= plen;
}
/* The levels buffer now has odd and even numbers indicating