diff options
-rw-r--r-- | include/mupdf/fitz.h | 2 | ||||
-rw-r--r-- | include/mupdf/fitz/display-list.h | 5 | ||||
-rw-r--r-- | include/mupdf/fitz/util.h | 62 | ||||
-rw-r--r-- | platform/win32/libmupdf.vcproj | 8 | ||||
-rw-r--r-- | source/fitz/list-device.c | 14 | ||||
-rw-r--r-- | source/fitz/util.c | 454 |
6 files changed, 545 insertions, 0 deletions
diff --git a/include/mupdf/fitz.h b/include/mupdf/fitz.h index c5aa2f4c..02f02dde 100644 --- a/include/mupdf/fitz.h +++ b/include/mupdf/fitz.h @@ -51,6 +51,8 @@ #include "mupdf/fitz/write-document.h" +#include "mupdf/fitz/util.h" + /* Output formats */ #include "mupdf/fitz/output-pnm.h" #include "mupdf/fitz/output-png.h" diff --git a/include/mupdf/fitz/display-list.h b/include/mupdf/fitz/display-list.h index 85346624..dc8b9ecb 100644 --- a/include/mupdf/fitz/display-list.h +++ b/include/mupdf/fitz/display-list.h @@ -88,4 +88,9 @@ fz_display_list *fz_keep_display_list(fz_context *ctx, fz_display_list *list); */ void fz_drop_display_list(fz_context *ctx, fz_display_list *list); +/* + fz_bound_display_list: Return the bounding box of the pages recorded in a display list. +*/ +fz_rect *fz_bound_display_list(fz_context *ctx, fz_display_list *list, fz_rect *bounds); + #endif diff --git a/include/mupdf/fitz/util.h b/include/mupdf/fitz/util.h new file mode 100644 index 00000000..fc66b47e --- /dev/null +++ b/include/mupdf/fitz/util.h @@ -0,0 +1,62 @@ +#ifndef MUPDF_FITZ_UTIL_H +#define MUPDF_FITZ_UTIL_H + +#include "mupdf/fitz/system.h" +#include "mupdf/fitz/context.h" +#include "mupdf/fitz/math.h" +#include "mupdf/fitz/document.h" +#include "mupdf/fitz/pixmap.h" +#include "mupdf/fitz/structured-text.h" +#include "mupdf/fitz/buffer.h" + +/* + fz_new_display_list_from_page: Create a display list with the contents of a page. +*/ +fz_display_list *fz_new_display_list_from_page(fz_context *ctx, fz_page *page); +fz_display_list *fz_new_display_list_from_page_number(fz_context *ctx, fz_document *doc, int number); + +/* + fz_new_pixmap_from_page: Render the page to a pixmap using the transform and colorspace. +*/ +fz_pixmap *fz_new_pixmap_from_page(fz_context *ctx, fz_page *page, const fz_matrix *ctm, fz_colorspace *cs); +fz_pixmap *fz_new_pixmap_from_page_number(fz_context *ctx, fz_document *doc, int number, const fz_matrix *ctm, fz_colorspace *cs); +fz_pixmap *fz_new_pixmap_from_display_list(fz_context *ctx, fz_display_list *list, const fz_matrix *ctm, fz_colorspace *cs, int opaque); + +/* + fz_new_pixmap_from_page_contents: Render the page contents without annotations to an opaque pixmap. +*/ +fz_pixmap *fz_new_pixmap_from_page_contents(fz_context *ctx, fz_page *page, const fz_matrix *ctm, fz_colorspace *cs); + +/* + fz_new_pixmap_from_annot: Render an annotation to a transparent pixmap, + suitable for blending on top of the opaque pixmap returned by fz_new_pixmap_from_page_contents. +*/ +fz_pixmap *fz_new_pixmap_from_annot(fz_context *ctx, fz_page *page, fz_annot *annot, const fz_matrix *ctm, fz_colorspace *cs); + +/* + fz_new_text_page_from_page: Extract structured text from a page. The sheet must not be NULL. +*/ +fz_text_page *fz_new_text_page_from_page(fz_context *ctx, fz_page *page, fz_text_sheet *sheet); +fz_text_page *fz_new_text_page_from_page_number(fz_context *ctx, fz_document *doc, int number, fz_text_sheet *sheet); +fz_text_page *fz_new_text_page_from_display_list(fz_context *ctx, fz_display_list *list, fz_text_sheet *sheet); + +/* + fz_new_buffer_from_text_page: Convert structured text into plain text, cropped by the selection rectangle. + Use fz_inifinite_rect to extract all the text on the page. If 'crlf' is true, lines are separated by '\r\n', + otherwise '\n'. +*/ +fz_buffer *fz_new_buffer_from_text_page(fz_context *ctx, fz_text_page *text, const fz_rect *sel, int crlf); +fz_buffer *fz_new_buffer_from_page(fz_context *ctx, fz_page *page, const fz_rect *sel, int crlf); +fz_buffer *fz_new_buffer_from_page_number(fz_context *ctx, fz_document *doc, int number, const fz_rect *sel, int crlf); +fz_buffer *fz_new_buffer_from_display_list(fz_context *ctx, fz_display_list *list, const fz_rect *sel, int crlf); + +/* + fz_search_page: Search for the 'needle' text on the page. + Record the hits in the hit_bbox array and return the number of hits. + Will stop looking once it has filled hit_max rectangles. +*/ +int fz_search_page(fz_context *ctx, fz_page *page, const char *needle, fz_rect *hit_bbox, int hit_max); +int fz_search_page_number(fz_context *ctx, fz_document *doc, int number, const char *needle, fz_rect *hit_bbox, int hit_max); +int fz_search_display_list(fz_context *ctx, fz_display_list *list, const char *needle, fz_rect *hit_bbox, int hit_max); + +#endif diff --git a/platform/win32/libmupdf.vcproj b/platform/win32/libmupdf.vcproj index b12c624f..d2fdf8f6 100644 --- a/platform/win32/libmupdf.vcproj +++ b/platform/win32/libmupdf.vcproj @@ -1058,6 +1058,10 @@ > </File> <File + RelativePath="..\..\source\fitz\util.c" + > + </File> + <File RelativePath="..\..\source\fitz\xml.c" > </File> @@ -1541,6 +1545,10 @@ > </File> <File + RelativePath="..\..\include\mupdf\fitz\util.h" + > + </File> + <File RelativePath="..\..\include\mupdf\fitz\version.h" > </File> diff --git a/source/fitz/list-device.c b/source/fitz/list-device.c index 0a282018..ea61b3d2 100644 --- a/source/fitz/list-device.c +++ b/source/fitz/list-device.c @@ -115,6 +115,7 @@ struct fz_display_list_s { fz_storable storable; fz_display_node *list; + fz_rect mediabox; int max; int len; }; @@ -637,9 +638,14 @@ fz_append_display_node( static void fz_list_begin_page(fz_context *ctx, fz_device *dev, const fz_rect *mediabox, const fz_matrix *ctm) { + fz_list_device *writer = (fz_list_device *)dev; + fz_display_list *list = writer->list; fz_rect rect = *mediabox; fz_transform_rect(&rect, ctm); + + fz_union_rect(&list->mediabox, &rect); + fz_append_display_node( ctx, dev, @@ -1405,6 +1411,7 @@ fz_new_display_list(fz_context *ctx) fz_display_list *list = fz_malloc_struct(ctx, fz_display_list); FZ_INIT_STORABLE(list, 1, fz_drop_display_list_imp); list->list = NULL; + list->mediabox = fz_empty_rect; list->max = 0; list->len = 0; return list; @@ -1422,6 +1429,13 @@ fz_drop_display_list(fz_context *ctx, fz_display_list *list) fz_drop_storable(ctx, &list->storable); } +fz_rect * +fz_bound_display_list(fz_context *ctx, fz_display_list *list, fz_rect *bounds) +{ + *bounds = list->mediabox; + return bounds; +} + void fz_run_display_list(fz_context *ctx, fz_display_list *list, fz_device *dev, const fz_matrix *top_ctm, const fz_rect *scissor, fz_cookie *cookie) { diff --git a/source/fitz/util.c b/source/fitz/util.c new file mode 100644 index 00000000..6e001d56 --- /dev/null +++ b/source/fitz/util.c @@ -0,0 +1,454 @@ +#include "mupdf/fitz.h" + +fz_display_list * +fz_new_display_list_from_page(fz_context *ctx, fz_page *page) +{ + fz_display_list *list; + fz_device *dev; + + list = fz_new_display_list(ctx); + + fz_try(ctx) + { + dev = fz_new_list_device(ctx, list); + fz_run_page(ctx, page, dev, &fz_identity, NULL); + } + fz_always(ctx) + { + fz_drop_device(ctx, dev); + } + fz_catch(ctx) + { + fz_drop_display_list(ctx, list); + fz_rethrow(ctx); + } + + return list; +} + +fz_display_list * +fz_new_display_list_from_page_number(fz_context *ctx, fz_document *doc, int number) +{ + fz_page *page; + fz_display_list *list; + + page = fz_load_page(ctx, doc, number); + fz_try(ctx) + list = fz_new_display_list_from_page(ctx, page); + fz_always(ctx) + fz_drop_page(ctx, page); + fz_catch(ctx) + fz_rethrow(ctx); + return list; +} + +fz_pixmap * +fz_new_pixmap_from_display_list(fz_context *ctx, fz_display_list *list, const fz_matrix *ctm, fz_colorspace *cs, int background) +{ + fz_rect rect; + fz_irect irect; + fz_pixmap *pix; + fz_device *dev; + + fz_bound_display_list(ctx, list, &rect); + fz_transform_rect(&rect, ctm); + fz_round_rect(&irect, &rect); + + pix = fz_new_pixmap_with_bbox(ctx, cs, &irect); + if (background) + fz_clear_pixmap_with_value(ctx, pix, 0xFF); + else + fz_clear_pixmap(ctx, pix); + + fz_try(ctx) + { + dev = fz_new_draw_device(ctx, pix); + fz_run_display_list(ctx, list, dev, ctm, NULL, NULL); + } + fz_always(ctx) + { + fz_drop_device(ctx, dev); + } + fz_catch(ctx) + { + fz_drop_pixmap(ctx, pix); + fz_rethrow(ctx); + } + + return pix; +} + +fz_pixmap * +fz_new_pixmap_from_page_contents(fz_context *ctx, fz_page *page, const fz_matrix *ctm, fz_colorspace *cs) +{ + fz_rect rect; + fz_irect irect; + fz_pixmap *pix; + fz_device *dev; + + fz_bound_page(ctx, page, &rect); + fz_transform_rect(&rect, ctm); + fz_round_rect(&irect, &rect); + + pix = fz_new_pixmap_with_bbox(ctx, cs, &irect); + fz_clear_pixmap_with_value(ctx, pix, 0xFF); + + fz_try(ctx) + { + dev = fz_new_draw_device(ctx, pix); + fz_run_page_contents(ctx, page, dev, ctm, NULL); + } + fz_always(ctx) + { + fz_drop_device(ctx, dev); + } + fz_catch(ctx) + { + fz_drop_pixmap(ctx, pix); + fz_rethrow(ctx); + } + + return pix; +} + +fz_pixmap * +fz_new_pixmap_from_annot(fz_context *ctx, fz_page *page, fz_annot *annot, const fz_matrix *ctm, fz_colorspace *cs) +{ + fz_rect rect; + fz_irect irect; + fz_pixmap *pix; + fz_device *dev; + + fz_bound_annot(ctx, page, annot, &rect); + fz_transform_rect(&rect, ctm); + fz_round_rect(&irect, &rect); + + pix = fz_new_pixmap_with_bbox(ctx, cs, &irect); + fz_clear_pixmap(ctx, pix); + + fz_try(ctx) + { + dev = fz_new_draw_device(ctx, pix); + fz_run_annot(ctx, page, annot, dev, ctm, NULL); + } + fz_always(ctx) + { + fz_drop_device(ctx, dev); + } + fz_catch(ctx) + { + fz_drop_pixmap(ctx, pix); + fz_rethrow(ctx); + } + + return pix; +} + +fz_pixmap * +fz_new_pixmap_from_page(fz_context *ctx, fz_page *page, const fz_matrix *ctm, fz_colorspace *cs) +{ + fz_rect rect; + fz_irect irect; + fz_pixmap *pix; + fz_device *dev; + + fz_bound_page(ctx, page, &rect); + fz_transform_rect(&rect, ctm); + fz_round_rect(&irect, &rect); + + pix = fz_new_pixmap_with_bbox(ctx, cs, &irect); + fz_clear_pixmap_with_value(ctx, pix, 0xFF); + + fz_try(ctx) + { + dev = fz_new_draw_device(ctx, pix); + fz_run_page(ctx, page, dev, ctm, NULL); + } + fz_always(ctx) + { + fz_drop_device(ctx, dev); + } + fz_catch(ctx) + { + fz_drop_pixmap(ctx, pix); + fz_rethrow(ctx); + } + + return pix; +} + +fz_pixmap * +fz_new_pixmap_from_page_number(fz_context *ctx, fz_document *doc, int number, const fz_matrix *ctm, fz_colorspace *cs) +{ + fz_page *page; + fz_pixmap *pix; + + page = fz_load_page(ctx, doc, number); + fz_try(ctx) + pix = fz_new_pixmap_from_page(ctx, page, ctm, cs); + fz_always(ctx) + fz_drop_page(ctx, page); + fz_catch(ctx) + fz_rethrow(ctx); + return pix; +} + +fz_text_page * +fz_new_text_page_from_display_list(fz_context *ctx, fz_display_list *list, fz_text_sheet *sheet) +{ + fz_text_page *text; + fz_device *dev; + + text = fz_new_text_page(ctx); + fz_try(ctx) + { + dev = fz_new_text_device(ctx, sheet, text); + fz_run_display_list(ctx, list, dev, &fz_identity, NULL, NULL); + } + fz_always(ctx) + { + fz_drop_device(ctx, dev); + } + fz_catch(ctx) + { + fz_drop_text_page(ctx, text); + fz_rethrow(ctx); + } + + return text; +} + +fz_text_page * +fz_new_text_page_from_page(fz_context *ctx, fz_page *page, fz_text_sheet *sheet) +{ + fz_text_page *text; + fz_device *dev; + + text = fz_new_text_page(ctx); + fz_try(ctx) + { + dev = fz_new_text_device(ctx, sheet, text); + fz_run_page(ctx, page, dev, &fz_identity, NULL); + } + fz_always(ctx) + { + fz_drop_device(ctx, dev); + } + fz_catch(ctx) + { + fz_drop_text_page(ctx, text); + fz_rethrow(ctx); + } + + return text; +} + +fz_text_page * +fz_new_text_page_from_page_number(fz_context *ctx, fz_document *doc, int number, fz_text_sheet *sheet) +{ + fz_page *page; + fz_text_page *text; + + page = fz_load_page(ctx, doc, number); + fz_try(ctx) + text = fz_new_text_page_from_page(ctx, page, sheet); + fz_always(ctx) + fz_drop_page(ctx, page); + fz_catch(ctx) + fz_rethrow(ctx); + return text; +} + +int +fz_search_display_list(fz_context *ctx, fz_display_list *list, const char *needle, fz_rect *hit_bbox, int hit_max) +{ + fz_text_sheet *sheet; + fz_text_page *text; + int count; + + sheet = fz_new_text_sheet(ctx); + fz_try(ctx) + { + text = fz_new_text_page_from_display_list(ctx, list, sheet); + count = fz_search_text_page(ctx, text, needle, hit_bbox, hit_max); + } + fz_always(ctx) + fz_drop_text_sheet(ctx, sheet); + fz_catch(ctx) + fz_rethrow(ctx); + fz_drop_text_page(ctx, text); + return count; +} + +int +fz_search_page(fz_context *ctx, fz_page *page, const char *needle, fz_rect *hit_bbox, int hit_max) +{ + fz_text_sheet *sheet; + fz_text_page *text; + int count; + + sheet = fz_new_text_sheet(ctx); + fz_try(ctx) + { + text = fz_new_text_page_from_page(ctx, page, sheet); + count = fz_search_text_page(ctx, text, needle, hit_bbox, hit_max); + } + fz_always(ctx) + fz_drop_text_sheet(ctx, sheet); + fz_catch(ctx) + fz_rethrow(ctx); + fz_drop_text_page(ctx, text); + return count; +} + +int +fz_search_page_number(fz_context *ctx, fz_document *doc, int number, const char *needle, fz_rect *hit_bbox, int hit_max) +{ + fz_page *page; + int count; + + page = fz_load_page(ctx, doc, number); + fz_try(ctx) + count = fz_search_page(ctx, page, needle, hit_bbox, hit_max); + fz_always(ctx) + fz_drop_page(ctx, page); + fz_catch(ctx) + fz_rethrow(ctx); + return count; +} + +fz_buffer * +fz_new_buffer_from_text_page(fz_context *ctx, fz_text_page *text, const fz_rect *sel, int crlf) +{ + fz_buffer *buf; + fz_rect hitbox; + float x0, y0, x1, y1; + int block_num; + int need_newline; + int i; + + need_newline = 0; + + if (fz_is_infinite_rect(sel)) + { + x0 = y0 = INT_MIN; + x1 = y1 = INT_MAX; + } + else + { + x0 = sel->x0; + y0 = sel->y0; + x1 = sel->x1; + y1 = sel->y1; + } + + buf = fz_new_buffer(ctx, 256); + fz_try(ctx) + { + for (block_num = 0; block_num < text->len; block_num++) + { + fz_text_line *line; + fz_text_block *block; + fz_text_span *span; + + if (text->blocks[block_num].type != FZ_PAGE_BLOCK_TEXT) + continue; + + block = text->blocks[block_num].u.text; + for (line = block->lines; line < block->lines + block->len; line++) + { + int saw_text = 0; + for (span = line->first_span; span; span = span->next) + { + for (i = 0; i < span->len; i++) + { + fz_text_char_bbox(ctx, &hitbox, span, i); + int c = span->text[i].c; + if (c < 32) + c = '?'; + if (hitbox.x1 >= x0 && hitbox.x0 <= x1 && hitbox.y1 >= y0 && hitbox.y0 <= y1) + { + saw_text = 1; + if (need_newline) + { + if (crlf) + fz_write_buffer_rune(ctx, buf, '\r'); + fz_write_buffer_rune(ctx, buf, '\n'); + need_newline = 0; + } + fz_write_buffer_rune(ctx, buf, c); + } + } + } + + if (saw_text) + need_newline = 1; + } + } + } + fz_catch(ctx) + { + fz_drop_buffer(ctx, buf); + fz_rethrow(ctx); + } + + return buf; +} + +fz_buffer * +fz_new_buffer_from_display_list(fz_context *ctx, fz_display_list *list, const fz_rect *sel, int crlf) +{ + fz_text_sheet *sheet; + fz_text_page *text; + fz_buffer *buf; + + sheet = fz_new_text_sheet(ctx); + fz_try(ctx) + { + text = fz_new_text_page_from_display_list(ctx, list, sheet); + buf = fz_new_buffer_from_text_page(ctx, text, sel, crlf); + } + fz_always(ctx) + fz_drop_text_sheet(ctx, sheet); + fz_catch(ctx) + fz_rethrow(ctx); + fz_drop_text_page(ctx, text); + return buf; +} + +fz_buffer * +fz_new_buffer_from_page(fz_context *ctx, fz_page *page, const fz_rect *sel, int crlf) +{ + fz_text_sheet *sheet; + fz_text_page *text; + fz_buffer *buf; + + sheet = fz_new_text_sheet(ctx); + fz_try(ctx) + { + text = fz_new_text_page_from_page(ctx, page, sheet); + buf = fz_new_buffer_from_text_page(ctx, text, sel, crlf); + } + fz_always(ctx) + fz_drop_text_sheet(ctx, sheet); + fz_catch(ctx) + fz_rethrow(ctx); + fz_drop_text_page(ctx, text); + return buf; +} + +fz_buffer * +fz_new_buffer_from_page_number(fz_context *ctx, fz_document *doc, int number, const fz_rect *sel, int crlf) +{ + fz_page *page; + fz_buffer *buf; + + page = fz_load_page(ctx, doc, number); + fz_try(ctx) + buf = fz_new_buffer_from_page(ctx, page, sel, crlf); + fz_always(ctx) + fz_drop_page(ctx, page); + fz_catch(ctx) + fz_rethrow(ctx); + return buf; +} |