summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTor Andersson <tor.andersson@artifex.com>2015-09-09 16:21:31 +0200
committerTor Andersson <tor.andersson@artifex.com>2015-09-14 16:31:00 +0200
commit6a4444a3f769c3a3334e0e8454602aa0891a3c82 (patch)
tree54e06d505722853a3246cd7455fd3ada0ce803c5
parente856e8047b2f520df4d8a999899ea3c9089ff4d3 (diff)
downloadmupdf-6a4444a3f769c3a3334e0e8454602aa0891a3c82.tar.xz
Add utility functions to help reduce device creation boilerplate.
-rw-r--r--include/mupdf/fitz.h2
-rw-r--r--include/mupdf/fitz/display-list.h5
-rw-r--r--include/mupdf/fitz/util.h62
-rw-r--r--platform/win32/libmupdf.vcproj8
-rw-r--r--source/fitz/list-device.c14
-rw-r--r--source/fitz/util.c454
6 files changed, 545 insertions, 0 deletions
diff --git a/include/mupdf/fitz.h b/include/mupdf/fitz.h
index c5aa2f4c..02f02dde 100644
--- a/include/mupdf/fitz.h
+++ b/include/mupdf/fitz.h
@@ -51,6 +51,8 @@
#include "mupdf/fitz/write-document.h"
+#include "mupdf/fitz/util.h"
+
/* Output formats */
#include "mupdf/fitz/output-pnm.h"
#include "mupdf/fitz/output-png.h"
diff --git a/include/mupdf/fitz/display-list.h b/include/mupdf/fitz/display-list.h
index 85346624..dc8b9ecb 100644
--- a/include/mupdf/fitz/display-list.h
+++ b/include/mupdf/fitz/display-list.h
@@ -88,4 +88,9 @@ fz_display_list *fz_keep_display_list(fz_context *ctx, fz_display_list *list);
*/
void fz_drop_display_list(fz_context *ctx, fz_display_list *list);
+/*
+ fz_bound_display_list: Return the bounding box of the pages recorded in a display list.
+*/
+fz_rect *fz_bound_display_list(fz_context *ctx, fz_display_list *list, fz_rect *bounds);
+
#endif
diff --git a/include/mupdf/fitz/util.h b/include/mupdf/fitz/util.h
new file mode 100644
index 00000000..fc66b47e
--- /dev/null
+++ b/include/mupdf/fitz/util.h
@@ -0,0 +1,62 @@
+#ifndef MUPDF_FITZ_UTIL_H
+#define MUPDF_FITZ_UTIL_H
+
+#include "mupdf/fitz/system.h"
+#include "mupdf/fitz/context.h"
+#include "mupdf/fitz/math.h"
+#include "mupdf/fitz/document.h"
+#include "mupdf/fitz/pixmap.h"
+#include "mupdf/fitz/structured-text.h"
+#include "mupdf/fitz/buffer.h"
+
+/*
+ fz_new_display_list_from_page: Create a display list with the contents of a page.
+*/
+fz_display_list *fz_new_display_list_from_page(fz_context *ctx, fz_page *page);
+fz_display_list *fz_new_display_list_from_page_number(fz_context *ctx, fz_document *doc, int number);
+
+/*
+ fz_new_pixmap_from_page: Render the page to a pixmap using the transform and colorspace.
+*/
+fz_pixmap *fz_new_pixmap_from_page(fz_context *ctx, fz_page *page, const fz_matrix *ctm, fz_colorspace *cs);
+fz_pixmap *fz_new_pixmap_from_page_number(fz_context *ctx, fz_document *doc, int number, const fz_matrix *ctm, fz_colorspace *cs);
+fz_pixmap *fz_new_pixmap_from_display_list(fz_context *ctx, fz_display_list *list, const fz_matrix *ctm, fz_colorspace *cs, int opaque);
+
+/*
+ fz_new_pixmap_from_page_contents: Render the page contents without annotations to an opaque pixmap.
+*/
+fz_pixmap *fz_new_pixmap_from_page_contents(fz_context *ctx, fz_page *page, const fz_matrix *ctm, fz_colorspace *cs);
+
+/*
+ fz_new_pixmap_from_annot: Render an annotation to a transparent pixmap,
+ suitable for blending on top of the opaque pixmap returned by fz_new_pixmap_from_page_contents.
+*/
+fz_pixmap *fz_new_pixmap_from_annot(fz_context *ctx, fz_page *page, fz_annot *annot, const fz_matrix *ctm, fz_colorspace *cs);
+
+/*
+ fz_new_text_page_from_page: Extract structured text from a page. The sheet must not be NULL.
+*/
+fz_text_page *fz_new_text_page_from_page(fz_context *ctx, fz_page *page, fz_text_sheet *sheet);
+fz_text_page *fz_new_text_page_from_page_number(fz_context *ctx, fz_document *doc, int number, fz_text_sheet *sheet);
+fz_text_page *fz_new_text_page_from_display_list(fz_context *ctx, fz_display_list *list, fz_text_sheet *sheet);
+
+/*
+ fz_new_buffer_from_text_page: Convert structured text into plain text, cropped by the selection rectangle.
+ Use fz_inifinite_rect to extract all the text on the page. If 'crlf' is true, lines are separated by '\r\n',
+ otherwise '\n'.
+*/
+fz_buffer *fz_new_buffer_from_text_page(fz_context *ctx, fz_text_page *text, const fz_rect *sel, int crlf);
+fz_buffer *fz_new_buffer_from_page(fz_context *ctx, fz_page *page, const fz_rect *sel, int crlf);
+fz_buffer *fz_new_buffer_from_page_number(fz_context *ctx, fz_document *doc, int number, const fz_rect *sel, int crlf);
+fz_buffer *fz_new_buffer_from_display_list(fz_context *ctx, fz_display_list *list, const fz_rect *sel, int crlf);
+
+/*
+ fz_search_page: Search for the 'needle' text on the page.
+ Record the hits in the hit_bbox array and return the number of hits.
+ Will stop looking once it has filled hit_max rectangles.
+*/
+int fz_search_page(fz_context *ctx, fz_page *page, const char *needle, fz_rect *hit_bbox, int hit_max);
+int fz_search_page_number(fz_context *ctx, fz_document *doc, int number, const char *needle, fz_rect *hit_bbox, int hit_max);
+int fz_search_display_list(fz_context *ctx, fz_display_list *list, const char *needle, fz_rect *hit_bbox, int hit_max);
+
+#endif
diff --git a/platform/win32/libmupdf.vcproj b/platform/win32/libmupdf.vcproj
index b12c624f..d2fdf8f6 100644
--- a/platform/win32/libmupdf.vcproj
+++ b/platform/win32/libmupdf.vcproj
@@ -1058,6 +1058,10 @@
>
</File>
<File
+ RelativePath="..\..\source\fitz\util.c"
+ >
+ </File>
+ <File
RelativePath="..\..\source\fitz\xml.c"
>
</File>
@@ -1541,6 +1545,10 @@
>
</File>
<File
+ RelativePath="..\..\include\mupdf\fitz\util.h"
+ >
+ </File>
+ <File
RelativePath="..\..\include\mupdf\fitz\version.h"
>
</File>
diff --git a/source/fitz/list-device.c b/source/fitz/list-device.c
index 0a282018..ea61b3d2 100644
--- a/source/fitz/list-device.c
+++ b/source/fitz/list-device.c
@@ -115,6 +115,7 @@ struct fz_display_list_s
{
fz_storable storable;
fz_display_node *list;
+ fz_rect mediabox;
int max;
int len;
};
@@ -637,9 +638,14 @@ fz_append_display_node(
static void
fz_list_begin_page(fz_context *ctx, fz_device *dev, const fz_rect *mediabox, const fz_matrix *ctm)
{
+ fz_list_device *writer = (fz_list_device *)dev;
+ fz_display_list *list = writer->list;
fz_rect rect = *mediabox;
fz_transform_rect(&rect, ctm);
+
+ fz_union_rect(&list->mediabox, &rect);
+
fz_append_display_node(
ctx,
dev,
@@ -1405,6 +1411,7 @@ fz_new_display_list(fz_context *ctx)
fz_display_list *list = fz_malloc_struct(ctx, fz_display_list);
FZ_INIT_STORABLE(list, 1, fz_drop_display_list_imp);
list->list = NULL;
+ list->mediabox = fz_empty_rect;
list->max = 0;
list->len = 0;
return list;
@@ -1422,6 +1429,13 @@ fz_drop_display_list(fz_context *ctx, fz_display_list *list)
fz_drop_storable(ctx, &list->storable);
}
+fz_rect *
+fz_bound_display_list(fz_context *ctx, fz_display_list *list, fz_rect *bounds)
+{
+ *bounds = list->mediabox;
+ return bounds;
+}
+
void
fz_run_display_list(fz_context *ctx, fz_display_list *list, fz_device *dev, const fz_matrix *top_ctm, const fz_rect *scissor, fz_cookie *cookie)
{
diff --git a/source/fitz/util.c b/source/fitz/util.c
new file mode 100644
index 00000000..6e001d56
--- /dev/null
+++ b/source/fitz/util.c
@@ -0,0 +1,454 @@
+#include "mupdf/fitz.h"
+
+fz_display_list *
+fz_new_display_list_from_page(fz_context *ctx, fz_page *page)
+{
+ fz_display_list *list;
+ fz_device *dev;
+
+ list = fz_new_display_list(ctx);
+
+ fz_try(ctx)
+ {
+ dev = fz_new_list_device(ctx, list);
+ fz_run_page(ctx, page, dev, &fz_identity, NULL);
+ }
+ fz_always(ctx)
+ {
+ fz_drop_device(ctx, dev);
+ }
+ fz_catch(ctx)
+ {
+ fz_drop_display_list(ctx, list);
+ fz_rethrow(ctx);
+ }
+
+ return list;
+}
+
+fz_display_list *
+fz_new_display_list_from_page_number(fz_context *ctx, fz_document *doc, int number)
+{
+ fz_page *page;
+ fz_display_list *list;
+
+ page = fz_load_page(ctx, doc, number);
+ fz_try(ctx)
+ list = fz_new_display_list_from_page(ctx, page);
+ fz_always(ctx)
+ fz_drop_page(ctx, page);
+ fz_catch(ctx)
+ fz_rethrow(ctx);
+ return list;
+}
+
+fz_pixmap *
+fz_new_pixmap_from_display_list(fz_context *ctx, fz_display_list *list, const fz_matrix *ctm, fz_colorspace *cs, int background)
+{
+ fz_rect rect;
+ fz_irect irect;
+ fz_pixmap *pix;
+ fz_device *dev;
+
+ fz_bound_display_list(ctx, list, &rect);
+ fz_transform_rect(&rect, ctm);
+ fz_round_rect(&irect, &rect);
+
+ pix = fz_new_pixmap_with_bbox(ctx, cs, &irect);
+ if (background)
+ fz_clear_pixmap_with_value(ctx, pix, 0xFF);
+ else
+ fz_clear_pixmap(ctx, pix);
+
+ fz_try(ctx)
+ {
+ dev = fz_new_draw_device(ctx, pix);
+ fz_run_display_list(ctx, list, dev, ctm, NULL, NULL);
+ }
+ fz_always(ctx)
+ {
+ fz_drop_device(ctx, dev);
+ }
+ fz_catch(ctx)
+ {
+ fz_drop_pixmap(ctx, pix);
+ fz_rethrow(ctx);
+ }
+
+ return pix;
+}
+
+fz_pixmap *
+fz_new_pixmap_from_page_contents(fz_context *ctx, fz_page *page, const fz_matrix *ctm, fz_colorspace *cs)
+{
+ fz_rect rect;
+ fz_irect irect;
+ fz_pixmap *pix;
+ fz_device *dev;
+
+ fz_bound_page(ctx, page, &rect);
+ fz_transform_rect(&rect, ctm);
+ fz_round_rect(&irect, &rect);
+
+ pix = fz_new_pixmap_with_bbox(ctx, cs, &irect);
+ fz_clear_pixmap_with_value(ctx, pix, 0xFF);
+
+ fz_try(ctx)
+ {
+ dev = fz_new_draw_device(ctx, pix);
+ fz_run_page_contents(ctx, page, dev, ctm, NULL);
+ }
+ fz_always(ctx)
+ {
+ fz_drop_device(ctx, dev);
+ }
+ fz_catch(ctx)
+ {
+ fz_drop_pixmap(ctx, pix);
+ fz_rethrow(ctx);
+ }
+
+ return pix;
+}
+
+fz_pixmap *
+fz_new_pixmap_from_annot(fz_context *ctx, fz_page *page, fz_annot *annot, const fz_matrix *ctm, fz_colorspace *cs)
+{
+ fz_rect rect;
+ fz_irect irect;
+ fz_pixmap *pix;
+ fz_device *dev;
+
+ fz_bound_annot(ctx, page, annot, &rect);
+ fz_transform_rect(&rect, ctm);
+ fz_round_rect(&irect, &rect);
+
+ pix = fz_new_pixmap_with_bbox(ctx, cs, &irect);
+ fz_clear_pixmap(ctx, pix);
+
+ fz_try(ctx)
+ {
+ dev = fz_new_draw_device(ctx, pix);
+ fz_run_annot(ctx, page, annot, dev, ctm, NULL);
+ }
+ fz_always(ctx)
+ {
+ fz_drop_device(ctx, dev);
+ }
+ fz_catch(ctx)
+ {
+ fz_drop_pixmap(ctx, pix);
+ fz_rethrow(ctx);
+ }
+
+ return pix;
+}
+
+fz_pixmap *
+fz_new_pixmap_from_page(fz_context *ctx, fz_page *page, const fz_matrix *ctm, fz_colorspace *cs)
+{
+ fz_rect rect;
+ fz_irect irect;
+ fz_pixmap *pix;
+ fz_device *dev;
+
+ fz_bound_page(ctx, page, &rect);
+ fz_transform_rect(&rect, ctm);
+ fz_round_rect(&irect, &rect);
+
+ pix = fz_new_pixmap_with_bbox(ctx, cs, &irect);
+ fz_clear_pixmap_with_value(ctx, pix, 0xFF);
+
+ fz_try(ctx)
+ {
+ dev = fz_new_draw_device(ctx, pix);
+ fz_run_page(ctx, page, dev, ctm, NULL);
+ }
+ fz_always(ctx)
+ {
+ fz_drop_device(ctx, dev);
+ }
+ fz_catch(ctx)
+ {
+ fz_drop_pixmap(ctx, pix);
+ fz_rethrow(ctx);
+ }
+
+ return pix;
+}
+
+fz_pixmap *
+fz_new_pixmap_from_page_number(fz_context *ctx, fz_document *doc, int number, const fz_matrix *ctm, fz_colorspace *cs)
+{
+ fz_page *page;
+ fz_pixmap *pix;
+
+ page = fz_load_page(ctx, doc, number);
+ fz_try(ctx)
+ pix = fz_new_pixmap_from_page(ctx, page, ctm, cs);
+ fz_always(ctx)
+ fz_drop_page(ctx, page);
+ fz_catch(ctx)
+ fz_rethrow(ctx);
+ return pix;
+}
+
+fz_text_page *
+fz_new_text_page_from_display_list(fz_context *ctx, fz_display_list *list, fz_text_sheet *sheet)
+{
+ fz_text_page *text;
+ fz_device *dev;
+
+ text = fz_new_text_page(ctx);
+ fz_try(ctx)
+ {
+ dev = fz_new_text_device(ctx, sheet, text);
+ fz_run_display_list(ctx, list, dev, &fz_identity, NULL, NULL);
+ }
+ fz_always(ctx)
+ {
+ fz_drop_device(ctx, dev);
+ }
+ fz_catch(ctx)
+ {
+ fz_drop_text_page(ctx, text);
+ fz_rethrow(ctx);
+ }
+
+ return text;
+}
+
+fz_text_page *
+fz_new_text_page_from_page(fz_context *ctx, fz_page *page, fz_text_sheet *sheet)
+{
+ fz_text_page *text;
+ fz_device *dev;
+
+ text = fz_new_text_page(ctx);
+ fz_try(ctx)
+ {
+ dev = fz_new_text_device(ctx, sheet, text);
+ fz_run_page(ctx, page, dev, &fz_identity, NULL);
+ }
+ fz_always(ctx)
+ {
+ fz_drop_device(ctx, dev);
+ }
+ fz_catch(ctx)
+ {
+ fz_drop_text_page(ctx, text);
+ fz_rethrow(ctx);
+ }
+
+ return text;
+}
+
+fz_text_page *
+fz_new_text_page_from_page_number(fz_context *ctx, fz_document *doc, int number, fz_text_sheet *sheet)
+{
+ fz_page *page;
+ fz_text_page *text;
+
+ page = fz_load_page(ctx, doc, number);
+ fz_try(ctx)
+ text = fz_new_text_page_from_page(ctx, page, sheet);
+ fz_always(ctx)
+ fz_drop_page(ctx, page);
+ fz_catch(ctx)
+ fz_rethrow(ctx);
+ return text;
+}
+
+int
+fz_search_display_list(fz_context *ctx, fz_display_list *list, const char *needle, fz_rect *hit_bbox, int hit_max)
+{
+ fz_text_sheet *sheet;
+ fz_text_page *text;
+ int count;
+
+ sheet = fz_new_text_sheet(ctx);
+ fz_try(ctx)
+ {
+ text = fz_new_text_page_from_display_list(ctx, list, sheet);
+ count = fz_search_text_page(ctx, text, needle, hit_bbox, hit_max);
+ }
+ fz_always(ctx)
+ fz_drop_text_sheet(ctx, sheet);
+ fz_catch(ctx)
+ fz_rethrow(ctx);
+ fz_drop_text_page(ctx, text);
+ return count;
+}
+
+int
+fz_search_page(fz_context *ctx, fz_page *page, const char *needle, fz_rect *hit_bbox, int hit_max)
+{
+ fz_text_sheet *sheet;
+ fz_text_page *text;
+ int count;
+
+ sheet = fz_new_text_sheet(ctx);
+ fz_try(ctx)
+ {
+ text = fz_new_text_page_from_page(ctx, page, sheet);
+ count = fz_search_text_page(ctx, text, needle, hit_bbox, hit_max);
+ }
+ fz_always(ctx)
+ fz_drop_text_sheet(ctx, sheet);
+ fz_catch(ctx)
+ fz_rethrow(ctx);
+ fz_drop_text_page(ctx, text);
+ return count;
+}
+
+int
+fz_search_page_number(fz_context *ctx, fz_document *doc, int number, const char *needle, fz_rect *hit_bbox, int hit_max)
+{
+ fz_page *page;
+ int count;
+
+ page = fz_load_page(ctx, doc, number);
+ fz_try(ctx)
+ count = fz_search_page(ctx, page, needle, hit_bbox, hit_max);
+ fz_always(ctx)
+ fz_drop_page(ctx, page);
+ fz_catch(ctx)
+ fz_rethrow(ctx);
+ return count;
+}
+
+fz_buffer *
+fz_new_buffer_from_text_page(fz_context *ctx, fz_text_page *text, const fz_rect *sel, int crlf)
+{
+ fz_buffer *buf;
+ fz_rect hitbox;
+ float x0, y0, x1, y1;
+ int block_num;
+ int need_newline;
+ int i;
+
+ need_newline = 0;
+
+ if (fz_is_infinite_rect(sel))
+ {
+ x0 = y0 = INT_MIN;
+ x1 = y1 = INT_MAX;
+ }
+ else
+ {
+ x0 = sel->x0;
+ y0 = sel->y0;
+ x1 = sel->x1;
+ y1 = sel->y1;
+ }
+
+ buf = fz_new_buffer(ctx, 256);
+ fz_try(ctx)
+ {
+ for (block_num = 0; block_num < text->len; block_num++)
+ {
+ fz_text_line *line;
+ fz_text_block *block;
+ fz_text_span *span;
+
+ if (text->blocks[block_num].type != FZ_PAGE_BLOCK_TEXT)
+ continue;
+
+ block = text->blocks[block_num].u.text;
+ for (line = block->lines; line < block->lines + block->len; line++)
+ {
+ int saw_text = 0;
+ for (span = line->first_span; span; span = span->next)
+ {
+ for (i = 0; i < span->len; i++)
+ {
+ fz_text_char_bbox(ctx, &hitbox, span, i);
+ int c = span->text[i].c;
+ if (c < 32)
+ c = '?';
+ if (hitbox.x1 >= x0 && hitbox.x0 <= x1 && hitbox.y1 >= y0 && hitbox.y0 <= y1)
+ {
+ saw_text = 1;
+ if (need_newline)
+ {
+ if (crlf)
+ fz_write_buffer_rune(ctx, buf, '\r');
+ fz_write_buffer_rune(ctx, buf, '\n');
+ need_newline = 0;
+ }
+ fz_write_buffer_rune(ctx, buf, c);
+ }
+ }
+ }
+
+ if (saw_text)
+ need_newline = 1;
+ }
+ }
+ }
+ fz_catch(ctx)
+ {
+ fz_drop_buffer(ctx, buf);
+ fz_rethrow(ctx);
+ }
+
+ return buf;
+}
+
+fz_buffer *
+fz_new_buffer_from_display_list(fz_context *ctx, fz_display_list *list, const fz_rect *sel, int crlf)
+{
+ fz_text_sheet *sheet;
+ fz_text_page *text;
+ fz_buffer *buf;
+
+ sheet = fz_new_text_sheet(ctx);
+ fz_try(ctx)
+ {
+ text = fz_new_text_page_from_display_list(ctx, list, sheet);
+ buf = fz_new_buffer_from_text_page(ctx, text, sel, crlf);
+ }
+ fz_always(ctx)
+ fz_drop_text_sheet(ctx, sheet);
+ fz_catch(ctx)
+ fz_rethrow(ctx);
+ fz_drop_text_page(ctx, text);
+ return buf;
+}
+
+fz_buffer *
+fz_new_buffer_from_page(fz_context *ctx, fz_page *page, const fz_rect *sel, int crlf)
+{
+ fz_text_sheet *sheet;
+ fz_text_page *text;
+ fz_buffer *buf;
+
+ sheet = fz_new_text_sheet(ctx);
+ fz_try(ctx)
+ {
+ text = fz_new_text_page_from_page(ctx, page, sheet);
+ buf = fz_new_buffer_from_text_page(ctx, text, sel, crlf);
+ }
+ fz_always(ctx)
+ fz_drop_text_sheet(ctx, sheet);
+ fz_catch(ctx)
+ fz_rethrow(ctx);
+ fz_drop_text_page(ctx, text);
+ return buf;
+}
+
+fz_buffer *
+fz_new_buffer_from_page_number(fz_context *ctx, fz_document *doc, int number, const fz_rect *sel, int crlf)
+{
+ fz_page *page;
+ fz_buffer *buf;
+
+ page = fz_load_page(ctx, doc, number);
+ fz_try(ctx)
+ buf = fz_new_buffer_from_page(ctx, page, sel, crlf);
+ fz_always(ctx)
+ fz_drop_page(ctx, page);
+ fz_catch(ctx)
+ fz_rethrow(ctx);
+ return buf;
+}