summaryrefslogtreecommitdiff
path: root/source
diff options
context:
space:
mode:
authorSebastian Rasmussen <sebras@gmail.com>2016-10-24 21:41:55 +0800
committerSebastian Rasmussen <sebras@gmail.com>2016-10-26 22:36:00 +0800
commite7875fcd16a80d880c33b49c9142bce1d870e9a0 (patch)
treedd2b940bdadb021059d4e1ef21ba70f5e66dee65 /source
parent04760747563826024dabbfdee2f2ad3a38fa3bab (diff)
downloadmupdf-e7875fcd16a80d880c33b49c9142bce1d870e9a0.tar.xz
Introduce options for structured text.
Diffstat (limited to 'source')
-rw-r--r--source/fitz/stext-device.c32
-rw-r--r--source/fitz/util.c12
-rw-r--r--source/tools/muconvert.c1
-rw-r--r--source/tools/murun.c12
4 files changed, 42 insertions, 15 deletions
diff --git a/source/fitz/stext-device.c b/source/fitz/stext-device.c
index 588c1454..4019fb24 100644
--- a/source/fitz/stext-device.c
+++ b/source/fitz/stext-device.c
@@ -30,9 +30,15 @@ struct fz_stext_device_s
span_soup *spans;
fz_stext_span *cur_span;
int lastchar;
- int options;
+ int flags;
};
+const char *fz_stext_options_usage =
+ "Structured text output options:\n"
+ "\tpreserve-ligatures: do not expand all ligatures into constituent characters\n"
+ "\tpreserve-whitespace: do not convert all whitespace characters into spaces\n"
+ "\n";
+
static fz_rect *
add_point_to_rect(fz_rect *a, const fz_point *p)
{
@@ -728,7 +734,7 @@ fz_add_stext_char(fz_context *ctx, fz_stext_device *dev, fz_stext_style *style,
if (c == -1)
return;
- if (!(dev->options & FZ_STEXT_PRESERVE_LIGATURES))
+ if (!(dev->flags & FZ_STEXT_PRESERVE_LIGATURES))
switch (c)
{
case 0xFB00: /* ff */
@@ -760,7 +766,7 @@ fz_add_stext_char(fz_context *ctx, fz_stext_device *dev, fz_stext_style *style,
return;
}
- if (!(dev->options & FZ_STEXT_PRESERVE_WHITESPACE))
+ if (!(dev->flags & FZ_STEXT_PRESERVE_WHITESPACE))
switch (c)
{
case 0x0009: /* tab */
@@ -1074,8 +1080,23 @@ fz_stext_drop_device(fz_context *ctx, fz_device *dev)
tdev->spans = NULL;
}
+fz_stext_options *
+fz_parse_stext_options(fz_context *ctx, fz_stext_options *opts, const char *string)
+{
+ const char *val;
+
+ memset(opts, 0, sizeof *opts);
+
+ if (fz_has_option(ctx, string, "preserve-ligatures", &val) && fz_option_eq(val, "yes"))
+ opts->flags |= FZ_STEXT_PRESERVE_LIGATURES;
+ if (fz_has_option(ctx, string, "preserve-whitespace", &val) && fz_option_eq(val, "yes"))
+ opts->flags |= FZ_STEXT_PRESERVE_WHITESPACE;
+
+ return opts;
+}
+
fz_device *
-fz_new_stext_device(fz_context *ctx, fz_stext_sheet *sheet, fz_stext_page *page, int options)
+fz_new_stext_device(fz_context *ctx, fz_stext_sheet *sheet, fz_stext_page *page, const fz_stext_options *opts)
{
fz_stext_device *dev = fz_new_device(ctx, sizeof *dev);
@@ -1097,7 +1118,8 @@ fz_new_stext_device(fz_context *ctx, fz_stext_sheet *sheet, fz_stext_page *page,
dev->spans = NULL;
dev->cur_span = NULL;
dev->lastchar = ' ';
- dev->options = options;
+ if (opts)
+ dev->flags = opts->flags;
return (fz_device*)dev;
}
diff --git a/source/fitz/util.c b/source/fitz/util.c
index 6e0982e7..be8502a5 100644
--- a/source/fitz/util.c
+++ b/source/fitz/util.c
@@ -265,7 +265,7 @@ fz_new_pixmap_from_page_number(fz_context *ctx, fz_document *doc, int number, co
}
fz_stext_page *
-fz_new_stext_page_from_display_list(fz_context *ctx, fz_display_list *list, fz_stext_sheet *sheet, int options)
+fz_new_stext_page_from_display_list(fz_context *ctx, fz_display_list *list, fz_stext_sheet *sheet, const fz_stext_options *options)
{
fz_stext_page *text;
fz_device *dev;
@@ -295,7 +295,7 @@ fz_new_stext_page_from_display_list(fz_context *ctx, fz_display_list *list, fz_s
}
fz_stext_page *
-fz_new_stext_page_from_page(fz_context *ctx, fz_page *page, fz_stext_sheet *sheet, int options)
+fz_new_stext_page_from_page(fz_context *ctx, fz_page *page, fz_stext_sheet *sheet, const fz_stext_options *options)
{
fz_stext_page *text;
fz_device *dev;
@@ -325,7 +325,7 @@ fz_new_stext_page_from_page(fz_context *ctx, fz_page *page, fz_stext_sheet *shee
}
fz_stext_page *
-fz_new_stext_page_from_page_number(fz_context *ctx, fz_document *doc, int number, fz_stext_sheet *sheet, int options)
+fz_new_stext_page_from_page_number(fz_context *ctx, fz_document *doc, int number, fz_stext_sheet *sheet, const fz_stext_options *options)
{
fz_page *page;
fz_stext_page *text;
@@ -478,7 +478,7 @@ fz_new_buffer_from_stext_page(fz_context *ctx, fz_stext_page *text, const fz_rec
}
fz_buffer *
-fz_new_buffer_from_display_list(fz_context *ctx, fz_display_list *list, const fz_rect *sel, int crlf, int options)
+fz_new_buffer_from_display_list(fz_context *ctx, fz_display_list *list, const fz_rect *sel, int crlf, const fz_stext_options *options)
{
fz_stext_sheet *sheet;
fz_stext_page *text;
@@ -499,7 +499,7 @@ fz_new_buffer_from_display_list(fz_context *ctx, fz_display_list *list, const fz
}
fz_buffer *
-fz_new_buffer_from_page(fz_context *ctx, fz_page *page, const fz_rect *sel, int crlf, int options)
+fz_new_buffer_from_page(fz_context *ctx, fz_page *page, const fz_rect *sel, int crlf, const fz_stext_options *options)
{
fz_stext_sheet *sheet;
fz_stext_page *text;
@@ -520,7 +520,7 @@ fz_new_buffer_from_page(fz_context *ctx, fz_page *page, const fz_rect *sel, int
}
fz_buffer *
-fz_new_buffer_from_page_number(fz_context *ctx, fz_document *doc, int number, const fz_rect *sel, int crlf, int options)
+fz_new_buffer_from_page_number(fz_context *ctx, fz_document *doc, int number, const fz_rect *sel, int crlf, const fz_stext_options *options)
{
fz_page *page;
fz_buffer *buf;
diff --git a/source/tools/muconvert.c b/source/tools/muconvert.c
index 13afee5d..a62a0dd3 100644
--- a/source/tools/muconvert.c
+++ b/source/tools/muconvert.c
@@ -44,6 +44,7 @@ static void usage(void)
"\n"
);
fputs(fz_draw_options_usage, stderr);
+ fputs(fz_stext_options_usage, stderr);
fputs(fz_cbz_write_options_usage, stderr);
fputs(fz_png_write_options_usage, stderr);
#if FZ_ENABLE_PDF
diff --git a/source/tools/murun.c b/source/tools/murun.c
index 898ed456..fc887105 100644
--- a/source/tools/murun.c
+++ b/source/tools/murun.c
@@ -1734,15 +1734,17 @@ static void ffi_Page_toStructuredText(js_State *J)
{
fz_context *ctx = js_getcontext(J);
fz_page *page = ffi_topage(J, 0);
- int options = js_tointeger(J, 1);
+ const char *options = js_iscoercible(J, 1) ? js_tostring(J, 1) : NULL;
fz_stext_sheet *sheet = NULL;
+ fz_stext_options so;
fz_stext_page *text;
fz_var(sheet);
fz_try(ctx) {
sheet = fz_new_stext_sheet(ctx);
- text = fz_new_stext_page_from_page(ctx, page, sheet, options);
+ fz_parse_stext_options(ctx, &so, options);
+ text = fz_new_stext_page_from_page(ctx, page, sheet, &so);
}
fz_always(ctx)
fz_drop_stext_sheet(ctx, sheet);
@@ -2573,15 +2575,17 @@ static void ffi_DisplayList_toStructuredText(js_State *J)
{
fz_context *ctx = js_getcontext(J);
fz_display_list *list = js_touserdata(J, 0, "fz_display_list");
- int options = js_tointeger(J, 1);
+ const char *options = js_iscoercible(J, 1) ? js_tostring(J, 1) : NULL;
fz_stext_sheet *sheet = NULL;
+ fz_stext_options so;
fz_stext_page *text;
fz_var(sheet);
fz_try(ctx) {
sheet = fz_new_stext_sheet(ctx);
- text = fz_new_stext_page_from_display_list(ctx, list, sheet, options);
+ fz_parse_stext_options(ctx, &so, options);
+ text = fz_new_stext_page_from_display_list(ctx, list, sheet, &so);
}
fz_always(ctx)
fz_drop_stext_sheet(ctx, sheet);