From caafb37ddf3d58f4d37d8c7b535021ff18d57861 Mon Sep 17 00:00:00 2001 From: Robin Watts Date: Wed, 13 Jul 2016 19:36:39 +0100 Subject: Bug 696699: Fix Text extraction mediabox information. Since the removal of the begin_page device function, structured text extraction has been unable to correctly establish the mediabox for extracted pages. Update the fz_new_stext_page call to take this mediabox information. This is an API change, but hopefully most people are calling fz_new_stext_page_from_page or fz_new_stext_page_from_display_list which are updated here to cope. Update all the apps/tools to behave properly. --- platform/android/viewer/jni/mupdf.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'platform/android/viewer') diff --git a/platform/android/viewer/jni/mupdf.c b/platform/android/viewer/jni/mupdf.c index b70d3f1a..91a002e3 100644 --- a/platform/android/viewer/jni/mupdf.c +++ b/platform/android/viewer/jni/mupdf.c @@ -1251,13 +1251,15 @@ JNI_FN(MuPDFCore_searchPage)(JNIEnv * env, jobject thiz, jstring jtext) fz_try(ctx) { + fz_rect mediabox; + if (glo->hit_bbox == NULL) glo->hit_bbox = fz_malloc_array(ctx, MAX_SEARCH_HITS, sizeof(*glo->hit_bbox)); zoom = glo->resolution / 72; fz_scale(&ctm, zoom, zoom); sheet = fz_new_stext_sheet(ctx); - text = fz_new_stext_page(ctx); + text = fz_new_stext_page(ctx, fz_bound_page(ctx, page, &mediabox)); dev = fz_new_stext_device(ctx, sheet, text); fz_run_page(ctx, pc->page, dev, &ctm, NULL); fz_drop_device(ctx, dev); @@ -1342,12 +1344,13 @@ JNI_FN(MuPDFCore_text)(JNIEnv * env, jobject thiz) fz_try(ctx) { + fz_rect mediabox; int b, l, s, c; zoom = glo->resolution / 72; fz_scale(&ctm, zoom, zoom); sheet = fz_new_stext_sheet(ctx); - text = fz_new_stext_page(ctx); + text = fz_new_stext_page(ctx, fz_bound_page(ctx, page, &mediabox)); dev = fz_new_stext_device(ctx, sheet, text); fz_run_page(ctx, pc->page, dev, &ctm, NULL); fz_drop_device(ctx, dev); @@ -1451,11 +1454,12 @@ JNI_FN(MuPDFCore_textAsHtml)(JNIEnv * env, jobject thiz) fz_try(ctx) { + fz_rect mediabox; int b, l, s, c; ctm = fz_identity; sheet = fz_new_stext_sheet(ctx); - text = fz_new_stext_page(ctx); + text = fz_new_stext_page(ctx, fz_bound_page(ctx, page, &mediabox)); dev = fz_new_stext_device(ctx, sheet, text); fz_run_page(ctx, pc->page, dev, &ctm, NULL); fz_drop_device(ctx, dev); -- cgit v1.2.3