summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--android/jni/mupdf.c2
-rw-r--r--apps/mudraw.c2
-rw-r--r--fitz/fitz.h2
-rw-r--r--fitz/text_extract.c90
-rw-r--r--fitz/text_paragraph.c16
5 files changed, 94 insertions, 18 deletions
diff --git a/android/jni/mupdf.c b/android/jni/mupdf.c
index 46ddb334..d5e4d5fa 100644
--- a/android/jni/mupdf.c
+++ b/android/jni/mupdf.c
@@ -1347,7 +1347,7 @@ JNI_FN(MuPDFCore_textAsHtml)(JNIEnv * env, jobject thiz)
fz_free_device(dev);
dev = NULL;
- fz_text_analysis(ctx, sheet, text);
+ fz_analyze_text(ctx, sheet, text);
buf = fz_new_buffer(ctx, 256);
out = fz_new_output_with_buffer(ctx, buf);
diff --git a/apps/mudraw.c b/apps/mudraw.c
index 78ce484f..0b99114d 100644
--- a/apps/mudraw.c
+++ b/apps/mudraw.c
@@ -397,7 +397,7 @@ static void drawpage(fz_context *ctx, fz_document *doc, int pagenum)
}
else if (showtext == TEXT_HTML)
{
- fz_text_analysis(ctx, sheet, text);
+ fz_analyze_text(ctx, sheet, text);
fz_print_text_page_html(ctx, out, text);
}
else if (showtext == TEXT_PLAIN)
diff --git a/fitz/fitz.h b/fitz/fitz.h
index 30075c5d..6e9d3515 100644
--- a/fitz/fitz.h
+++ b/fitz/fitz.h
@@ -2097,7 +2097,7 @@ void fz_free_text_sheet(fz_context *ctx, fz_text_sheet *sheet);
fz_text_page *fz_new_text_page(fz_context *ctx, const fz_rect *mediabox);
void fz_free_text_page(fz_context *ctx, fz_text_page *page);
-void fz_text_analysis(fz_context *ctx, fz_text_sheet *sheet, fz_text_page *page);
+void fz_analyze_text(fz_context *ctx, fz_text_sheet *sheet, fz_text_page *page);
/*
Generic output streams - generalise between outputting to a file,
diff --git a/fitz/text_extract.c b/fitz/text_extract.c
index 44acdc48..e32de5ab 100644
--- a/fitz/text_extract.c
+++ b/fitz/text_extract.c
@@ -1,4 +1,5 @@
#include "fitz-internal.h"
+#include "ucdn.h"
/* Extract text into an unsorted span soup. */
@@ -858,6 +859,92 @@ fz_text_fill_image(fz_device *dev, fz_image *img, const fz_matrix *ctm, float al
fz_text_fill_image_mask(dev, img, ctm, NULL, NULL, alpha);
}
+static int
+fz_bidi_direction(int bidiclass, int curdir)
+{
+ switch (bidiclass)
+ {
+ /* strong */
+ case UCDN_BIDI_CLASS_L: return 1;
+ case UCDN_BIDI_CLASS_R: return -1;
+ case UCDN_BIDI_CLASS_AL: return -1;
+
+ /* weak */
+ case UCDN_BIDI_CLASS_EN:
+ case UCDN_BIDI_CLASS_ES:
+ case UCDN_BIDI_CLASS_ET:
+ case UCDN_BIDI_CLASS_AN:
+ case UCDN_BIDI_CLASS_CS:
+ case UCDN_BIDI_CLASS_NSM:
+ case UCDN_BIDI_CLASS_BN:
+ return curdir;
+
+ /* neutral */
+ case UCDN_BIDI_CLASS_B:
+ case UCDN_BIDI_CLASS_S:
+ case UCDN_BIDI_CLASS_WS:
+ case UCDN_BIDI_CLASS_ON:
+ return curdir;
+
+ /* embedding, override, pop ... we don't support them */
+ default:
+ return 0;
+ }
+}
+
+static void
+fz_bidi_reorder_run(fz_text_span *span, int a, int b, int dir)
+{
+ if (a < b && dir == -1)
+ {
+ fz_text_char c;
+ int m = a + (b - a) / 2;
+ while (a < m)
+ {
+ b--;
+ c = span->text[a];
+ span->text[a] = span->text[b];
+ span->text[b] = c;
+ a++;
+ }
+ }
+}
+
+static void
+fz_bidi_reorder_span(fz_text_span *span)
+{
+ int a, b, dir, curdir;
+
+ a = 0;
+ curdir = 1;
+ for (b = 0; b < span->len; b++)
+ {
+ dir = fz_bidi_direction(ucdn_get_bidi_class(span->text[b].c), curdir);
+ if (dir != curdir)
+ {
+ fz_bidi_reorder_run(span, a, b, curdir);
+ curdir = dir;
+ a = b;
+ }
+ }
+ fz_bidi_reorder_run(span, a, b, curdir);
+}
+
+static void
+fz_bidi_reorder_text_page(fz_context *ctx, fz_text_page *page)
+{
+ fz_page_block *pageblock;
+ fz_text_block *block;
+ fz_text_line *line;
+ fz_text_span *span;
+
+ for (pageblock = page->blocks; pageblock < page->blocks + page->len; page++)
+ if (pageblock->type == FZ_PAGE_BLOCK_TEXT)
+ for (block = pageblock->u.text, line = block->lines; line < block->lines + block->len; line++)
+ for (span = line->first_span; span; span = span->next)
+ fz_bidi_reorder_span(span);
+}
+
static void
fz_text_free_user(fz_device *dev)
{
@@ -872,7 +959,8 @@ fz_text_free_user(fz_device *dev)
/* TODO: smart sorting of blocks in reading order */
/* TODO: unicode NFC normalization */
- /* TODO: bidi logical reordering */
+
+ fz_bidi_reorder_text_page(ctx, tdev->page);
fz_free(dev->ctx, tdev);
}
diff --git a/fitz/text_paragraph.c b/fitz/text_paragraph.c
index 893b0aa4..c0fc42d2 100644
--- a/fitz/text_paragraph.c
+++ b/fitz/text_paragraph.c
@@ -988,8 +988,8 @@ dehyphenate(fz_text_span *s1, fz_text_span *s2)
s2->spacing = 0;
}
-static void
-fz_text_analysis_paragraph(fz_context *ctx, fz_text_sheet *sheet, fz_text_page *page)
+void
+fz_analyze_text(fz_context *ctx, fz_text_sheet *sheet, fz_text_page *page)
{
fz_text_line *line;
fz_text_span *span;
@@ -1498,15 +1498,3 @@ force_paragraph:
}
}
}
-
-static void
-fz_text_analysis_rtl(fz_context *ctx, fz_text_sheet *sheet, fz_text_page *page)
-{
-}
-
-void
-fz_text_analysis(fz_context *ctx, fz_text_sheet *sheet, fz_text_page *page)
-{
- fz_text_analysis_paragraph(ctx, sheet, page);
- fz_text_analysis_rtl(ctx, sheet, page);
-}