diff options
author | Tor Andersson <tor.andersson@artifex.com> | 2013-06-12 16:28:14 +0200 |
---|---|---|
committer | Tor Andersson <tor.andersson@artifex.com> | 2013-06-12 16:28:14 +0200 |
commit | b86aa63432a2436563bdcc398577ec4884883434 (patch) | |
tree | 749002e3de9d52943898a47f8c57d34c6a09b188 /fitz/text_extract.c | |
parent | b975f1b82a061db61124d1cf0cd55ab60c22dc8e (diff) | |
download | mupdf-b86aa63432a2436563bdcc398577ec4884883434.tar.xz |
Support begin/end page calls in text extraction device.
Diffstat (limited to 'fitz/text_extract.c')
-rw-r--r-- | fitz/text_extract.c | 61 |
1 files changed, 39 insertions, 22 deletions
diff --git a/fitz/text_extract.c b/fitz/text_extract.c index 45e0395a..bf50e6a4 100644 --- a/fitz/text_extract.c +++ b/fitz/text_extract.c @@ -429,13 +429,14 @@ fz_lookup_text_style(fz_context *ctx, fz_text_sheet *sheet, fz_text *text, const } fz_text_page * -fz_new_text_page(fz_context *ctx, const fz_rect *mediabox) +fz_new_text_page(fz_context *ctx) { fz_text_page *page = fz_malloc(ctx, sizeof(*page)); - page->mediabox = *mediabox; + page->mediabox = fz_empty_rect; page->len = 0; page->cap = 0; page->blocks = NULL; + page->next = NULL; return page; } @@ -946,34 +947,48 @@ fz_bidi_reorder_text_page(fz_context *ctx, fz_text_page *page) } static void -fz_text_free_user(fz_device *dev) +fz_text_begin_page(fz_device *dev, const fz_rect *mediabox, const fz_matrix *ctm) { fz_context *ctx = dev->ctx; fz_text_device *tdev = dev->user; - fz_try(ctx) + if (tdev->page->len) { + tdev->page->next = fz_new_text_page(ctx); + tdev->page = tdev->page->next; + } - add_span_to_soup(tdev->spans, tdev->cur_span); - tdev->cur_span = NULL; + tdev->page->mediabox = *mediabox; + fz_transform_rect(&tdev->page->mediabox, ctm); - strain_soup(ctx, tdev); + tdev->spans = new_span_soup(ctx); +} - /* TODO: smart sorting of blocks in reading order */ - /* TODO: unicode NFC normalization */ +static void +fz_text_end_page(fz_device *dev) +{ + fz_context *ctx = dev->ctx; + fz_text_device *tdev = dev->user; - fz_bidi_reorder_text_page(ctx, tdev->page); - } - fz_always(ctx) - { - free_span_soup(tdev->spans); - fz_free(dev->ctx, tdev); - } - fz_catch(ctx) - { - /* TODO: mark fz_free_device as "doesn't throw" (else rethrowing would - have to be caught/rethrown again in fz_free_device) */ - } + add_span_to_soup(tdev->spans, tdev->cur_span); + tdev->cur_span = NULL; + + strain_soup(ctx, tdev); + free_span_soup(tdev->spans); + tdev->spans = NULL; + + /* TODO: smart sorting of blocks in reading order */ + /* TODO: unicode NFC normalization */ + + fz_bidi_reorder_text_page(ctx, tdev->page); +} + +static void +fz_text_free_user(fz_device *dev) +{ + fz_text_device *tdev = dev->user; + free_span_soup(tdev->spans); + fz_free(dev->ctx, tdev); } fz_device * @@ -984,12 +999,14 @@ fz_new_text_device(fz_context *ctx, fz_text_sheet *sheet, fz_text_page *page) fz_text_device *tdev = fz_malloc_struct(ctx, fz_text_device); tdev->sheet = sheet; tdev->page = page; - tdev->spans = new_span_soup(ctx); + tdev->spans = NULL; tdev->cur_span = NULL; tdev->lastchar = ' '; dev = fz_new_device(ctx, tdev); dev->hints = FZ_IGNORE_IMAGE | FZ_IGNORE_SHADE; + dev->begin_page = fz_text_begin_page; + dev->end_page = fz_text_end_page; dev->free_user = fz_text_free_user; dev->fill_text = fz_text_fill_text; dev->stroke_text = fz_text_stroke_text; |