From e7875fcd16a80d880c33b49c9142bce1d870e9a0 Mon Sep 17 00:00:00 2001 From: Sebastian Rasmussen Date: Mon, 24 Oct 2016 21:41:55 +0800 Subject: Introduce options for structured text. --- include/mupdf/fitz/structured-text.h | 22 ++++++++++++++++++---- include/mupdf/fitz/util.h | 12 ++++++------ 2 files changed, 24 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/mupdf/fitz/structured-text.h b/include/mupdf/fitz/structured-text.h index a12c3cc9..061fc83c 100644 --- a/include/mupdf/fitz/structured-text.h +++ b/include/mupdf/fitz/structured-text.h @@ -201,6 +201,8 @@ struct fz_char_and_box_s fz_rect bbox; }; +extern const char *fz_stext_options_usage; + fz_char_and_box *fz_stext_char_at(fz_context *ctx, fz_char_and_box *cab, fz_stext_page *page, int idx); /* @@ -285,6 +287,20 @@ int fz_highlight_selection(fz_context *ctx, fz_stext_page *page, fz_rect rect, f */ char *fz_copy_selection(fz_context *ctx, fz_stext_page *page, fz_rect rect); +/* + struct fz_stext_options: Options for creating a pixmap and draw device. +*/ +typedef struct fz_stext_options_s fz_stext_options; + +struct fz_stext_options_s +{ + int flags; +}; +/* + fz_parse_stext_options: Parse stext device options from a comma separated key-value string. +*/ +fz_stext_options *fz_parse_stext_options(fz_context *ctx, fz_stext_options *opts, const char *string); + /* fz_new_stext_device: Create a device to extract the text on a page. @@ -302,10 +318,8 @@ char *fz_copy_selection(fz_context *ctx, fz_stext_page *page, fz_rect rect); containing data already (for example when merging multiple pages, or watermarking). - options: Mask of heuristic options to activate. If 0 is given the - default is to activate both FZ_STEXT_PRESERVE_LIGATURES and - FZ_STEXT_PRESERVE_WHITESPACE. + options: Options to configure the stext device. */ -fz_device *fz_new_stext_device(fz_context *ctx, fz_stext_sheet *sheet, fz_stext_page *page, int options); +fz_device *fz_new_stext_device(fz_context *ctx, fz_stext_sheet *sheet, fz_stext_page *page, const fz_stext_options *options); #endif diff --git a/include/mupdf/fitz/util.h b/include/mupdf/fitz/util.h index f8dec50a..a9a0d59d 100644 --- a/include/mupdf/fitz/util.h +++ b/include/mupdf/fitz/util.h @@ -38,9 +38,9 @@ fz_pixmap *fz_new_pixmap_from_annot(fz_context *ctx, fz_annot *annot, const fz_m /* fz_new_stext_page_from_page: Extract structured text from a page. The sheet must not be NULL. */ -fz_stext_page *fz_new_stext_page_from_page(fz_context *ctx, fz_page *page, fz_stext_sheet *sheet, int options); -fz_stext_page *fz_new_stext_page_from_page_number(fz_context *ctx, fz_document *doc, int number, fz_stext_sheet *sheet, int options); -fz_stext_page *fz_new_stext_page_from_display_list(fz_context *ctx, fz_display_list *list, fz_stext_sheet *sheet, int options); +fz_stext_page *fz_new_stext_page_from_page(fz_context *ctx, fz_page *page, fz_stext_sheet *sheet, const fz_stext_options *options); +fz_stext_page *fz_new_stext_page_from_page_number(fz_context *ctx, fz_document *doc, int number, fz_stext_sheet *sheet, const fz_stext_options *options); +fz_stext_page *fz_new_stext_page_from_display_list(fz_context *ctx, fz_display_list *list, fz_stext_sheet *sheet, const fz_stext_options *options); /* fz_new_buffer_from_stext_page: Convert structured text into plain text, cropped by the selection rectangle. @@ -48,9 +48,9 @@ fz_stext_page *fz_new_stext_page_from_display_list(fz_context *ctx, fz_display_l otherwise '\n'. */ fz_buffer *fz_new_buffer_from_stext_page(fz_context *ctx, fz_stext_page *text, const fz_rect *sel, int crlf); -fz_buffer *fz_new_buffer_from_page(fz_context *ctx, fz_page *page, const fz_rect *sel, int crlf, int options); -fz_buffer *fz_new_buffer_from_page_number(fz_context *ctx, fz_document *doc, int number, const fz_rect *sel, int crlf, int options); -fz_buffer *fz_new_buffer_from_display_list(fz_context *ctx, fz_display_list *list, const fz_rect *sel, int crlf, int options); +fz_buffer *fz_new_buffer_from_page(fz_context *ctx, fz_page *page, const fz_rect *sel, int crlf, const fz_stext_options *options); +fz_buffer *fz_new_buffer_from_page_number(fz_context *ctx, fz_document *doc, int number, const fz_rect *sel, int crlf, const fz_stext_options *options); +fz_buffer *fz_new_buffer_from_display_list(fz_context *ctx, fz_display_list *list, const fz_rect *sel, int crlf, const fz_stext_options *options); /* fz_search_page: Search for the 'needle' text on the page. -- cgit v1.2.3