summaryrefslogtreecommitdiff
path: root/fitz/text_extract.c
diff options
context:
space:
mode:
authorTor Andersson <tor.andersson@artifex.com>2013-06-12 16:28:14 +0200
committerTor Andersson <tor.andersson@artifex.com>2013-06-12 16:28:14 +0200
commitb86aa63432a2436563bdcc398577ec4884883434 (patch)
tree749002e3de9d52943898a47f8c57d34c6a09b188 /fitz/text_extract.c
parentb975f1b82a061db61124d1cf0cd55ab60c22dc8e (diff)
downloadmupdf-b86aa63432a2436563bdcc398577ec4884883434.tar.xz
Support begin/end page calls in text extraction device.
Diffstat (limited to 'fitz/text_extract.c')
-rw-r--r--fitz/text_extract.c61
1 files changed, 39 insertions, 22 deletions
diff --git a/fitz/text_extract.c b/fitz/text_extract.c
index 45e0395a..bf50e6a4 100644
--- a/fitz/text_extract.c
+++ b/fitz/text_extract.c
@@ -429,13 +429,14 @@ fz_lookup_text_style(fz_context *ctx, fz_text_sheet *sheet, fz_text *text, const
}
fz_text_page *
-fz_new_text_page(fz_context *ctx, const fz_rect *mediabox)
+fz_new_text_page(fz_context *ctx)
{
fz_text_page *page = fz_malloc(ctx, sizeof(*page));
- page->mediabox = *mediabox;
+ page->mediabox = fz_empty_rect;
page->len = 0;
page->cap = 0;
page->blocks = NULL;
+ page->next = NULL;
return page;
}
@@ -946,34 +947,48 @@ fz_bidi_reorder_text_page(fz_context *ctx, fz_text_page *page)
}
static void
-fz_text_free_user(fz_device *dev)
+fz_text_begin_page(fz_device *dev, const fz_rect *mediabox, const fz_matrix *ctm)
{
fz_context *ctx = dev->ctx;
fz_text_device *tdev = dev->user;
- fz_try(ctx)
+ if (tdev->page->len)
{
+ tdev->page->next = fz_new_text_page(ctx);
+ tdev->page = tdev->page->next;
+ }
- add_span_to_soup(tdev->spans, tdev->cur_span);
- tdev->cur_span = NULL;
+ tdev->page->mediabox = *mediabox;
+ fz_transform_rect(&tdev->page->mediabox, ctm);
- strain_soup(ctx, tdev);
+ tdev->spans = new_span_soup(ctx);
+}
- /* TODO: smart sorting of blocks in reading order */
- /* TODO: unicode NFC normalization */
+static void
+fz_text_end_page(fz_device *dev)
+{
+ fz_context *ctx = dev->ctx;
+ fz_text_device *tdev = dev->user;
- fz_bidi_reorder_text_page(ctx, tdev->page);
- }
- fz_always(ctx)
- {
- free_span_soup(tdev->spans);
- fz_free(dev->ctx, tdev);
- }
- fz_catch(ctx)
- {
- /* TODO: mark fz_free_device as "doesn't throw" (else rethrowing would
- have to be caught/rethrown again in fz_free_device) */
- }
+ add_span_to_soup(tdev->spans, tdev->cur_span);
+ tdev->cur_span = NULL;
+
+ strain_soup(ctx, tdev);
+ free_span_soup(tdev->spans);
+ tdev->spans = NULL;
+
+ /* TODO: smart sorting of blocks in reading order */
+ /* TODO: unicode NFC normalization */
+
+ fz_bidi_reorder_text_page(ctx, tdev->page);
+}
+
+static void
+fz_text_free_user(fz_device *dev)
+{
+ fz_text_device *tdev = dev->user;
+ free_span_soup(tdev->spans);
+ fz_free(dev->ctx, tdev);
}
fz_device *
@@ -984,12 +999,14 @@ fz_new_text_device(fz_context *ctx, fz_text_sheet *sheet, fz_text_page *page)
fz_text_device *tdev = fz_malloc_struct(ctx, fz_text_device);
tdev->sheet = sheet;
tdev->page = page;
- tdev->spans = new_span_soup(ctx);
+ tdev->spans = NULL;
tdev->cur_span = NULL;
tdev->lastchar = ' ';
dev = fz_new_device(ctx, tdev);
dev->hints = FZ_IGNORE_IMAGE | FZ_IGNORE_SHADE;
+ dev->begin_page = fz_text_begin_page;
+ dev->end_page = fz_text_end_page;
dev->free_user = fz_text_free_user;
dev->fill_text = fz_text_fill_text;
dev->stroke_text = fz_text_stroke_text;