diff options
Diffstat (limited to 'include')
-rw-r--r-- | include/mupdf/fitz/structured-text.h | 29 | ||||
-rw-r--r-- | include/mupdf/fitz/util.h | 12 |
2 files changed, 32 insertions, 9 deletions
diff --git a/include/mupdf/fitz/structured-text.h b/include/mupdf/fitz/structured-text.h index e4199272..a12c3cc9 100644 --- a/include/mupdf/fitz/structured-text.h +++ b/include/mupdf/fitz/structured-text.h @@ -28,6 +28,25 @@ typedef struct fz_stext_sheet_s fz_stext_sheet; typedef struct fz_stext_page_s fz_stext_page; /* + FZ_STEXT_PRESERVE_LIGATURES: If this option is activated ligatures + are passed through to the application in their original form. If + this option is deactivated ligatures are expanded into their + constituent parts, e.g. the ligature ffi is expanded into three + separate characters f, f and i. + + FZ_STEXT_PRESERVE_WHITESPACE: If this option is actived whitespace + is passed through to the application in its original form. If this + option is deactivated any type of horizontal whitespace (including + horizontal tabs) will be replaced with space characters of variable + width. +*/ +enum +{ + FZ_STEXT_PRESERVE_LIGATURES = 1, + FZ_STEXT_PRESERVE_WHITESPACE = 2, +}; + +/* fz_stext_sheet: A text sheet contains a list of distinct text styles used on a page (or a series of pages). */ @@ -280,9 +299,13 @@ char *fz_copy_selection(fz_context *ctx, fz_stext_page *page, fz_rect rect); page: The text page to which content should be added. This will usually be a newly created (empty) text page, but it can be one - containing data already (for example when merging multiple pages, or - watermarking). + containing data already (for example when merging multiple pages, + or watermarking). + + options: Mask of heuristic options to activate. If 0 is given the + default is to activate both FZ_STEXT_PRESERVE_LIGATURES and + FZ_STEXT_PRESERVE_WHITESPACE. */ -fz_device *fz_new_stext_device(fz_context *ctx, fz_stext_sheet *sheet, fz_stext_page *page); +fz_device *fz_new_stext_device(fz_context *ctx, fz_stext_sheet *sheet, fz_stext_page *page, int options); #endif diff --git a/include/mupdf/fitz/util.h b/include/mupdf/fitz/util.h index 9f982699..f8dec50a 100644 --- a/include/mupdf/fitz/util.h +++ b/include/mupdf/fitz/util.h @@ -38,9 +38,9 @@ fz_pixmap *fz_new_pixmap_from_annot(fz_context *ctx, fz_annot *annot, const fz_m /* fz_new_stext_page_from_page: Extract structured text from a page. The sheet must not be NULL. */ -fz_stext_page *fz_new_stext_page_from_page(fz_context *ctx, fz_page *page, fz_stext_sheet *sheet); -fz_stext_page *fz_new_stext_page_from_page_number(fz_context *ctx, fz_document *doc, int number, fz_stext_sheet *sheet); -fz_stext_page *fz_new_stext_page_from_display_list(fz_context *ctx, fz_display_list *list, fz_stext_sheet *sheet); +fz_stext_page *fz_new_stext_page_from_page(fz_context *ctx, fz_page *page, fz_stext_sheet *sheet, int options); +fz_stext_page *fz_new_stext_page_from_page_number(fz_context *ctx, fz_document *doc, int number, fz_stext_sheet *sheet, int options); +fz_stext_page *fz_new_stext_page_from_display_list(fz_context *ctx, fz_display_list *list, fz_stext_sheet *sheet, int options); /* fz_new_buffer_from_stext_page: Convert structured text into plain text, cropped by the selection rectangle. @@ -48,9 +48,9 @@ fz_stext_page *fz_new_stext_page_from_display_list(fz_context *ctx, fz_display_l otherwise '\n'. */ fz_buffer *fz_new_buffer_from_stext_page(fz_context *ctx, fz_stext_page *text, const fz_rect *sel, int crlf); -fz_buffer *fz_new_buffer_from_page(fz_context *ctx, fz_page *page, const fz_rect *sel, int crlf); -fz_buffer *fz_new_buffer_from_page_number(fz_context *ctx, fz_document *doc, int number, const fz_rect *sel, int crlf); -fz_buffer *fz_new_buffer_from_display_list(fz_context *ctx, fz_display_list *list, const fz_rect *sel, int crlf); +fz_buffer *fz_new_buffer_from_page(fz_context *ctx, fz_page *page, const fz_rect *sel, int crlf, int options); +fz_buffer *fz_new_buffer_from_page_number(fz_context *ctx, fz_document *doc, int number, const fz_rect *sel, int crlf, int options); +fz_buffer *fz_new_buffer_from_display_list(fz_context *ctx, fz_display_list *list, const fz_rect *sel, int crlf, int options); /* fz_search_page: Search for the 'needle' text on the page. |