summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorSebastian Rasmussen <sebras@gmail.com>2016-08-03 03:40:33 +0800
committerSebastian Rasmussen <sebras@gmail.com>2016-09-08 18:53:00 +0800
commit0c61b5737fd5b8fc03ac1457b2dc85033677e8f0 (patch)
tree3576120d2ed62e5a8c581ace77e425c6098f5157 /include
parentdc2c77351a2b3188c971551b1231cf480dad9986 (diff)
downloadmupdf-0c61b5737fd5b8fc03ac1457b2dc85033677e8f0.tar.xz
Add options to control heuristics in structured text.
Diffstat (limited to 'include')
-rw-r--r--include/mupdf/fitz/structured-text.h29
-rw-r--r--include/mupdf/fitz/util.h12
2 files changed, 32 insertions, 9 deletions
diff --git a/include/mupdf/fitz/structured-text.h b/include/mupdf/fitz/structured-text.h
index e4199272..a12c3cc9 100644
--- a/include/mupdf/fitz/structured-text.h
+++ b/include/mupdf/fitz/structured-text.h
@@ -28,6 +28,25 @@ typedef struct fz_stext_sheet_s fz_stext_sheet;
typedef struct fz_stext_page_s fz_stext_page;
/*
+ FZ_STEXT_PRESERVE_LIGATURES: If this option is activated ligatures
+ are passed through to the application in their original form. If
+ this option is deactivated ligatures are expanded into their
+ constituent parts, e.g. the ligature ffi is expanded into three
+ separate characters f, f and i.
+
+ FZ_STEXT_PRESERVE_WHITESPACE: If this option is actived whitespace
+ is passed through to the application in its original form. If this
+ option is deactivated any type of horizontal whitespace (including
+ horizontal tabs) will be replaced with space characters of variable
+ width.
+*/
+enum
+{
+ FZ_STEXT_PRESERVE_LIGATURES = 1,
+ FZ_STEXT_PRESERVE_WHITESPACE = 2,
+};
+
+/*
fz_stext_sheet: A text sheet contains a list of distinct text styles
used on a page (or a series of pages).
*/
@@ -280,9 +299,13 @@ char *fz_copy_selection(fz_context *ctx, fz_stext_page *page, fz_rect rect);
page: The text page to which content should be added. This will
usually be a newly created (empty) text page, but it can be one
- containing data already (for example when merging multiple pages, or
- watermarking).
+ containing data already (for example when merging multiple pages,
+ or watermarking).
+
+ options: Mask of heuristic options to activate. If 0 is given the
+ default is to activate both FZ_STEXT_PRESERVE_LIGATURES and
+ FZ_STEXT_PRESERVE_WHITESPACE.
*/
-fz_device *fz_new_stext_device(fz_context *ctx, fz_stext_sheet *sheet, fz_stext_page *page);
+fz_device *fz_new_stext_device(fz_context *ctx, fz_stext_sheet *sheet, fz_stext_page *page, int options);
#endif
diff --git a/include/mupdf/fitz/util.h b/include/mupdf/fitz/util.h
index 9f982699..f8dec50a 100644
--- a/include/mupdf/fitz/util.h
+++ b/include/mupdf/fitz/util.h
@@ -38,9 +38,9 @@ fz_pixmap *fz_new_pixmap_from_annot(fz_context *ctx, fz_annot *annot, const fz_m
/*
fz_new_stext_page_from_page: Extract structured text from a page. The sheet must not be NULL.
*/
-fz_stext_page *fz_new_stext_page_from_page(fz_context *ctx, fz_page *page, fz_stext_sheet *sheet);
-fz_stext_page *fz_new_stext_page_from_page_number(fz_context *ctx, fz_document *doc, int number, fz_stext_sheet *sheet);
-fz_stext_page *fz_new_stext_page_from_display_list(fz_context *ctx, fz_display_list *list, fz_stext_sheet *sheet);
+fz_stext_page *fz_new_stext_page_from_page(fz_context *ctx, fz_page *page, fz_stext_sheet *sheet, int options);
+fz_stext_page *fz_new_stext_page_from_page_number(fz_context *ctx, fz_document *doc, int number, fz_stext_sheet *sheet, int options);
+fz_stext_page *fz_new_stext_page_from_display_list(fz_context *ctx, fz_display_list *list, fz_stext_sheet *sheet, int options);
/*
fz_new_buffer_from_stext_page: Convert structured text into plain text, cropped by the selection rectangle.
@@ -48,9 +48,9 @@ fz_stext_page *fz_new_stext_page_from_display_list(fz_context *ctx, fz_display_l
otherwise '\n'.
*/
fz_buffer *fz_new_buffer_from_stext_page(fz_context *ctx, fz_stext_page *text, const fz_rect *sel, int crlf);
-fz_buffer *fz_new_buffer_from_page(fz_context *ctx, fz_page *page, const fz_rect *sel, int crlf);
-fz_buffer *fz_new_buffer_from_page_number(fz_context *ctx, fz_document *doc, int number, const fz_rect *sel, int crlf);
-fz_buffer *fz_new_buffer_from_display_list(fz_context *ctx, fz_display_list *list, const fz_rect *sel, int crlf);
+fz_buffer *fz_new_buffer_from_page(fz_context *ctx, fz_page *page, const fz_rect *sel, int crlf, int options);
+fz_buffer *fz_new_buffer_from_page_number(fz_context *ctx, fz_document *doc, int number, const fz_rect *sel, int crlf, int options);
+fz_buffer *fz_new_buffer_from_display_list(fz_context *ctx, fz_display_list *list, const fz_rect *sel, int crlf, int options);
/*
fz_search_page: Search for the 'needle' text on the page.