diff options
author | Tor Andersson <tor.andersson@artifex.com> | 2016-07-06 21:45:00 +0200 |
---|---|---|
committer | Tor Andersson <tor.andersson@artifex.com> | 2016-07-08 12:04:51 +0200 |
commit | af3386764c57b83c8de1cdd81fb35f97afc25fd6 (patch) | |
tree | c877f28d9d51f7789d67a2b537245bdde2cf7dff | |
parent | 174cc9758d4dd7e8e3bd295f61271c3216e0cd3d (diff) | |
download | mupdf-af3386764c57b83c8de1cdd81fb35f97afc25fd6.tar.xz |
js: Add wrapper for fz_stext_page to search, select and copy text.
-rw-r--r-- | docs/mutool/run.html | 24 | ||||
-rw-r--r-- | source/tools/murun.c | 143 |
2 files changed, 167 insertions, 0 deletions
diff --git a/docs/mutool/run.html b/docs/mutool/run.html index c6bc80e5..9cf4cdb9 100644 --- a/docs/mutool/run.html +++ b/docs/mutool/run.html @@ -193,6 +193,8 @@ If skipAnnotations is true, ignore annotations. If alpha is true, the page will be drawn on a transparent background, otherwise white. <dt>Page#toDisplayList(skipAnnotations) <dd>Record the contents on the page into a DisplayList. +<dt>Page#toStructuredText() +<dd>Extract the text on the page into a StructuredText object. <dt>Page#search(needle) <dd>Search for 'needle' text on the page, and return an array with rectangles of all matches found. <dt>Page#getAnnotations() @@ -215,6 +217,23 @@ If alpha is true, the page will be drawn on a transparent background, otherwise </dl> <h2> +StructuredText +</h2> + +<p> +StructuredText objects hold text from a page that has been analyzed and grouped into blocks, lines and spans. +</p> + +<dl> +<dt>StructuredText#search(needle) +<dd>Search the text for all instances of 'needle', and return an array with rectangles of all matches found. +<dt>StructuredText#highlight(rect) +<dd>Return an array with rectangles needed to highlight a selection defined by the two corners of the rectangle. +<dt>StructuredText#copy(rect) +<dd>Return the text from the selection defined by the two corners of the rectangle. +</dl> + +<h2> ColorSpace </h2> @@ -297,6 +316,11 @@ will keep all the graphics required in memory, so will increase the amount of me <dd>Play back the recorded device calls onto the device. <dt>DisplayList#toPixmap(transform, colorspace, alpha) <dd>Render display list to a pixmap. If alpha is true, it will render to a transparent background, otherwise white. +<dt>DisplayList#toStructuredText(skipAnnotations) +<dd>Extract the text in the display list into a StructuredText object. +<dt>DisplayList#search(needle) +<dd>Search the display list text for all instances of 'needle', and return an array with rectangles of all matches found. +<dd> </dl> <dl> diff --git a/source/tools/murun.c b/source/tools/murun.c index 8a892372..a3eacaa8 100644 --- a/source/tools/murun.c +++ b/source/tools/murun.c @@ -247,6 +247,12 @@ static void ffi_gc_fz_display_list(js_State *J, void *list) fz_drop_display_list(ctx, list); } +static void ffi_gc_fz_stext_page(js_State *J, void *text) +{ + fz_context *ctx = js_getcontext(J); + fz_drop_stext_page(ctx, text); +} + static void ffi_gc_fz_device(js_State *J, void *device) { fz_context *ctx = js_getcontext(J); @@ -1544,6 +1550,29 @@ static void ffi_Page_toPixmap(js_State *J) js_newuserdata(J, "fz_pixmap", pixmap, ffi_gc_fz_pixmap); } +static void ffi_Page_toStructuredText(js_State *J) +{ + fz_context *ctx = js_getcontext(J); + fz_page *page = js_touserdata(J, 0, "fz_page"); + fz_stext_sheet *sheet = NULL; + fz_stext_page *text; + + fz_var(sheet); + + fz_try(ctx) + { + sheet = fz_new_stext_sheet(ctx); + text = fz_new_stext_page_from_page(ctx, page, sheet); + } + fz_always(ctx) + fz_drop_stext_sheet(ctx, sheet); + fz_catch(ctx) + rethrow(J); + + js_getregistry(J, "fz_stext_page"); + js_newuserdata(J, "fz_stext_page", text, ffi_gc_fz_stext_page); +} + static void ffi_Page_search(js_State *J) { fz_context *ctx = js_getcontext(J); @@ -2211,6 +2240,109 @@ static void ffi_DisplayList_toPixmap(js_State *J) js_newuserdata(J, "fz_pixmap", pixmap, ffi_gc_fz_pixmap); } +static void ffi_DisplayList_toStructuredText(js_State *J) +{ + fz_context *ctx = js_getcontext(J); + fz_display_list *list = js_touserdata(J, 0, "fz_display_list"); + fz_stext_sheet *sheet = NULL; + fz_stext_page *text; + + fz_var(sheet); + + fz_try(ctx) + { + sheet = fz_new_stext_sheet(ctx); + text = fz_new_stext_page_from_display_list(ctx, list, sheet); + } + fz_always(ctx) + fz_drop_stext_sheet(ctx, sheet); + fz_catch(ctx) + rethrow(J); + + js_getregistry(J, "fz_stext_page"); + js_newuserdata(J, "fz_stext_page", text, ffi_gc_fz_stext_page); +} + +static void ffi_DisplayList_search(js_State *J) +{ + fz_context *ctx = js_getcontext(J); + fz_display_list *list = js_touserdata(J, 0, "fz_display_list"); + const char *needle = js_tostring(J, 1); + fz_rect hits[256]; + int i, n; + + fz_try(ctx) + n = fz_search_display_list(ctx, list, needle, hits, nelem(hits)); + fz_catch(ctx) + rethrow(J); + + js_newarray(J); + for (i = 0; i < n; ++i) { + ffi_pushrect(J, hits[i]); + js_setindex(J, -2, i); + } +} + +static void ffi_StructuredText_search(js_State *J) +{ + fz_context *ctx = js_getcontext(J); + fz_stext_page *text = js_touserdata(J, 0, "fz_stext_page"); + const char *needle = js_tostring(J, 1); + fz_rect hits[256]; + int i, n; + + fz_try(ctx) + n = fz_search_stext_page(ctx, text, needle, hits, nelem(hits)); + fz_catch(ctx) + rethrow(J); + + js_newarray(J); + for (i = 0; i < n; ++i) { + ffi_pushrect(J, hits[i]); + js_setindex(J, -2, i); + } +} + +static void ffi_StructuredText_highlight(js_State *J) +{ + fz_context *ctx = js_getcontext(J); + fz_stext_page *text = js_touserdata(J, 0, "fz_stext_page"); + fz_rect rect = ffi_torect(J, 1); + fz_rect hits[256]; + int i, n; + + fz_try(ctx) + n = fz_highlight_selection(ctx, text, rect, hits, nelem(hits)); + fz_catch(ctx) + rethrow(J); + + js_newarray(J); + for (i = 0; i < n; ++i) { + ffi_pushrect(J, hits[i]); + js_setindex(J, -2, i); + } +} + +static void ffi_StructuredText_copy(js_State *J) +{ + fz_context *ctx = js_getcontext(J); + fz_stext_page *text = js_touserdata(J, 0, "fz_stext_page"); + fz_rect rect = ffi_torect(J, 1); + char *s; + + fz_try(ctx) + s = fz_copy_selection(ctx, text, rect); + fz_catch(ctx) + rethrow(J); + + js_pushstring(J, s); + + fz_try(ctx) + fz_free(ctx, s); + fz_catch(ctx) + rethrow(J); +} + static void ffi_new_DisplayListDevice(js_State *J) { fz_context *ctx = js_getcontext(J); @@ -3223,6 +3355,7 @@ int murun_main(int argc, char **argv) jsB_propfun(J, "Page.run", ffi_Page_run, 3); jsB_propfun(J, "Page.toPixmap", ffi_Page_toPixmap, 4); jsB_propfun(J, "Page.toDisplayList", ffi_Page_toDisplayList, 1); + jsB_propfun(J, "Page.toStructuredText", ffi_Page_toStructuredText, 0); jsB_propfun(J, "Page.search", ffi_Page_search, 0); jsB_propfun(J, "Page.getAnnotations", ffi_Page_getAnnotations, 0); } @@ -3339,11 +3472,21 @@ int murun_main(int argc, char **argv) { jsB_propfun(J, "DisplayList.run", ffi_DisplayList_run, 2); jsB_propfun(J, "DisplayList.toPixmap", ffi_DisplayList_toPixmap, 3); + jsB_propfun(J, "DisplayList.toStructuredText", ffi_DisplayList_toStructuredText, 0); + jsB_propfun(J, "DisplayList.search", ffi_DisplayList_search, 1); } js_setregistry(J, "fz_display_list"); js_newobject(J); { + jsB_propfun(J, "StructuredText.search", ffi_StructuredText_search, 1); + jsB_propfun(J, "StructuredText.highlight", ffi_StructuredText_highlight, 1); + jsB_propfun(J, "StructuredText.copy", ffi_StructuredText_copy, 1); + } + js_setregistry(J, "fz_stext_page"); + + js_newobject(J); + { jsB_propfun(J, "Pixmap.bound", ffi_Pixmap_bound, 0); jsB_propfun(J, "Pixmap.clear", ffi_Pixmap_clear, 1); |