summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--docs/mutool/run.html24
-rw-r--r--source/tools/murun.c143
2 files changed, 167 insertions, 0 deletions
diff --git a/docs/mutool/run.html b/docs/mutool/run.html
index c6bc80e5..9cf4cdb9 100644
--- a/docs/mutool/run.html
+++ b/docs/mutool/run.html
@@ -193,6 +193,8 @@ If skipAnnotations is true, ignore annotations.
If alpha is true, the page will be drawn on a transparent background, otherwise white.
<dt>Page#toDisplayList(skipAnnotations)
<dd>Record the contents on the page into a DisplayList.
+<dt>Page#toStructuredText()
+<dd>Extract the text on the page into a StructuredText object.
<dt>Page#search(needle)
<dd>Search for 'needle' text on the page, and return an array with rectangles of all matches found.
<dt>Page#getAnnotations()
@@ -215,6 +217,23 @@ If alpha is true, the page will be drawn on a transparent background, otherwise
</dl>
<h2>
+StructuredText
+</h2>
+
+<p>
+StructuredText objects hold text from a page that has been analyzed and grouped into blocks, lines and spans.
+</p>
+
+<dl>
+<dt>StructuredText#search(needle)
+<dd>Search the text for all instances of 'needle', and return an array with rectangles of all matches found.
+<dt>StructuredText#highlight(rect)
+<dd>Return an array with rectangles needed to highlight a selection defined by the two corners of the rectangle.
+<dt>StructuredText#copy(rect)
+<dd>Return the text from the selection defined by the two corners of the rectangle.
+</dl>
+
+<h2>
ColorSpace
</h2>
@@ -297,6 +316,11 @@ will keep all the graphics required in memory, so will increase the amount of me
<dd>Play back the recorded device calls onto the device.
<dt>DisplayList#toPixmap(transform, colorspace, alpha)
<dd>Render display list to a pixmap. If alpha is true, it will render to a transparent background, otherwise white.
+<dt>DisplayList#toStructuredText(skipAnnotations)
+<dd>Extract the text in the display list into a StructuredText object.
+<dt>DisplayList#search(needle)
+<dd>Search the display list text for all instances of 'needle', and return an array with rectangles of all matches found.
+<dd>
</dl>
<dl>
diff --git a/source/tools/murun.c b/source/tools/murun.c
index 8a892372..a3eacaa8 100644
--- a/source/tools/murun.c
+++ b/source/tools/murun.c
@@ -247,6 +247,12 @@ static void ffi_gc_fz_display_list(js_State *J, void *list)
fz_drop_display_list(ctx, list);
}
+static void ffi_gc_fz_stext_page(js_State *J, void *text)
+{
+ fz_context *ctx = js_getcontext(J);
+ fz_drop_stext_page(ctx, text);
+}
+
static void ffi_gc_fz_device(js_State *J, void *device)
{
fz_context *ctx = js_getcontext(J);
@@ -1544,6 +1550,29 @@ static void ffi_Page_toPixmap(js_State *J)
js_newuserdata(J, "fz_pixmap", pixmap, ffi_gc_fz_pixmap);
}
+static void ffi_Page_toStructuredText(js_State *J)
+{
+ fz_context *ctx = js_getcontext(J);
+ fz_page *page = js_touserdata(J, 0, "fz_page");
+ fz_stext_sheet *sheet = NULL;
+ fz_stext_page *text;
+
+ fz_var(sheet);
+
+ fz_try(ctx)
+ {
+ sheet = fz_new_stext_sheet(ctx);
+ text = fz_new_stext_page_from_page(ctx, page, sheet);
+ }
+ fz_always(ctx)
+ fz_drop_stext_sheet(ctx, sheet);
+ fz_catch(ctx)
+ rethrow(J);
+
+ js_getregistry(J, "fz_stext_page");
+ js_newuserdata(J, "fz_stext_page", text, ffi_gc_fz_stext_page);
+}
+
static void ffi_Page_search(js_State *J)
{
fz_context *ctx = js_getcontext(J);
@@ -2211,6 +2240,109 @@ static void ffi_DisplayList_toPixmap(js_State *J)
js_newuserdata(J, "fz_pixmap", pixmap, ffi_gc_fz_pixmap);
}
+static void ffi_DisplayList_toStructuredText(js_State *J)
+{
+ fz_context *ctx = js_getcontext(J);
+ fz_display_list *list = js_touserdata(J, 0, "fz_display_list");
+ fz_stext_sheet *sheet = NULL;
+ fz_stext_page *text;
+
+ fz_var(sheet);
+
+ fz_try(ctx)
+ {
+ sheet = fz_new_stext_sheet(ctx);
+ text = fz_new_stext_page_from_display_list(ctx, list, sheet);
+ }
+ fz_always(ctx)
+ fz_drop_stext_sheet(ctx, sheet);
+ fz_catch(ctx)
+ rethrow(J);
+
+ js_getregistry(J, "fz_stext_page");
+ js_newuserdata(J, "fz_stext_page", text, ffi_gc_fz_stext_page);
+}
+
+static void ffi_DisplayList_search(js_State *J)
+{
+ fz_context *ctx = js_getcontext(J);
+ fz_display_list *list = js_touserdata(J, 0, "fz_display_list");
+ const char *needle = js_tostring(J, 1);
+ fz_rect hits[256];
+ int i, n;
+
+ fz_try(ctx)
+ n = fz_search_display_list(ctx, list, needle, hits, nelem(hits));
+ fz_catch(ctx)
+ rethrow(J);
+
+ js_newarray(J);
+ for (i = 0; i < n; ++i) {
+ ffi_pushrect(J, hits[i]);
+ js_setindex(J, -2, i);
+ }
+}
+
+static void ffi_StructuredText_search(js_State *J)
+{
+ fz_context *ctx = js_getcontext(J);
+ fz_stext_page *text = js_touserdata(J, 0, "fz_stext_page");
+ const char *needle = js_tostring(J, 1);
+ fz_rect hits[256];
+ int i, n;
+
+ fz_try(ctx)
+ n = fz_search_stext_page(ctx, text, needle, hits, nelem(hits));
+ fz_catch(ctx)
+ rethrow(J);
+
+ js_newarray(J);
+ for (i = 0; i < n; ++i) {
+ ffi_pushrect(J, hits[i]);
+ js_setindex(J, -2, i);
+ }
+}
+
+static void ffi_StructuredText_highlight(js_State *J)
+{
+ fz_context *ctx = js_getcontext(J);
+ fz_stext_page *text = js_touserdata(J, 0, "fz_stext_page");
+ fz_rect rect = ffi_torect(J, 1);
+ fz_rect hits[256];
+ int i, n;
+
+ fz_try(ctx)
+ n = fz_highlight_selection(ctx, text, rect, hits, nelem(hits));
+ fz_catch(ctx)
+ rethrow(J);
+
+ js_newarray(J);
+ for (i = 0; i < n; ++i) {
+ ffi_pushrect(J, hits[i]);
+ js_setindex(J, -2, i);
+ }
+}
+
+static void ffi_StructuredText_copy(js_State *J)
+{
+ fz_context *ctx = js_getcontext(J);
+ fz_stext_page *text = js_touserdata(J, 0, "fz_stext_page");
+ fz_rect rect = ffi_torect(J, 1);
+ char *s;
+
+ fz_try(ctx)
+ s = fz_copy_selection(ctx, text, rect);
+ fz_catch(ctx)
+ rethrow(J);
+
+ js_pushstring(J, s);
+
+ fz_try(ctx)
+ fz_free(ctx, s);
+ fz_catch(ctx)
+ rethrow(J);
+}
+
static void ffi_new_DisplayListDevice(js_State *J)
{
fz_context *ctx = js_getcontext(J);
@@ -3223,6 +3355,7 @@ int murun_main(int argc, char **argv)
jsB_propfun(J, "Page.run", ffi_Page_run, 3);
jsB_propfun(J, "Page.toPixmap", ffi_Page_toPixmap, 4);
jsB_propfun(J, "Page.toDisplayList", ffi_Page_toDisplayList, 1);
+ jsB_propfun(J, "Page.toStructuredText", ffi_Page_toStructuredText, 0);
jsB_propfun(J, "Page.search", ffi_Page_search, 0);
jsB_propfun(J, "Page.getAnnotations", ffi_Page_getAnnotations, 0);
}
@@ -3339,11 +3472,21 @@ int murun_main(int argc, char **argv)
{
jsB_propfun(J, "DisplayList.run", ffi_DisplayList_run, 2);
jsB_propfun(J, "DisplayList.toPixmap", ffi_DisplayList_toPixmap, 3);
+ jsB_propfun(J, "DisplayList.toStructuredText", ffi_DisplayList_toStructuredText, 0);
+ jsB_propfun(J, "DisplayList.search", ffi_DisplayList_search, 1);
}
js_setregistry(J, "fz_display_list");
js_newobject(J);
{
+ jsB_propfun(J, "StructuredText.search", ffi_StructuredText_search, 1);
+ jsB_propfun(J, "StructuredText.highlight", ffi_StructuredText_highlight, 1);
+ jsB_propfun(J, "StructuredText.copy", ffi_StructuredText_copy, 1);
+ }
+ js_setregistry(J, "fz_stext_page");
+
+ js_newobject(J);
+ {
jsB_propfun(J, "Pixmap.bound", ffi_Pixmap_bound, 0);
jsB_propfun(J, "Pixmap.clear", ffi_Pixmap_clear, 1);