From 1047ef939a09d0ed3b43224402740704ec1befc9 Mon Sep 17 00:00:00 2001 From: Robin Watts Date: Thu, 4 Apr 2013 17:01:32 +0100 Subject: Tweak fz_text_page to include image records. Extract such records as part of the text device. --- fitz/dev_text.c | 521 ++++++++++++++++++++++++++++++++++-------------------- fitz/doc_search.c | 45 +++-- fitz/fitz.h | 39 +++- 3 files changed, 397 insertions(+), 208 deletions(-) (limited to 'fitz') diff --git a/fitz/dev_text.c b/fitz/dev_text.c index 8d6d34e8..baa8f78b 100644 --- a/fitz/dev_text.c +++ b/fitz/dev_text.c @@ -165,12 +165,16 @@ push_span(fz_context *ctx, fz_text_device *tdev, fz_text_span *span, int new_lin fz_text_line *line; fz_text_block *block; fz_text_page *page = tdev->page; + int prev_not_text = 0; - if (new_line) + if (page->len == 0 || page->blocks[page->len-1].type != FZ_PAGE_BLOCK_TEXT) + prev_not_text = 1; + + if (new_line || prev_not_text) { float size = fz_matrix_expansion(&span->transform); /* So, a new line. Part of the same block or not? */ - if (distance == 0 || distance > size * 1.5 || distance < -size * PARAGRAPH_DIST || page->len == 0) + if (distance == 0 || distance > size * 1.5 || distance < -size * PARAGRAPH_DIST || page->len == 0 || prev_not_text) { /* New block */ if (page->len == page->cap) @@ -179,16 +183,19 @@ push_span(fz_context *ctx, fz_text_device *tdev, fz_text_span *span, int new_lin page->blocks = fz_resize_array(ctx, page->blocks, newcap, sizeof(*page->blocks)); page->cap = newcap; } - page->blocks[page->len].cap = 0; - page->blocks[page->len].len = 0; - page->blocks[page->len].lines = 0; - page->blocks[page->len].bbox = fz_empty_rect; + block = fz_malloc_struct(ctx, fz_text_block); + page->blocks[page->len].type = FZ_PAGE_BLOCK_TEXT; + page->blocks[page->len].u.text = block; + block->cap = 0; + block->len = 0; + block->lines = 0; + block->bbox = fz_empty_rect; page->len++; distance = 0; } /* New line */ - block = &page->blocks[page->len-1]; + block = page->blocks[page->len-1].u.text; if (block->len == block->cap) { int newcap = (block->cap ? block->cap*2 : 4); @@ -204,7 +211,7 @@ push_span(fz_context *ctx, fz_text_device *tdev, fz_text_span *span, int new_lin } /* Find last line and append to it */ - block = &page->blocks[page->len-1]; + block = page->blocks[page->len-1].u.text; line = &block->lines[block->len-1]; if (line->len == line->cap) @@ -455,16 +462,41 @@ fz_free_text_line_contents(fz_context *ctx, fz_text_line *line) fz_free(ctx, line->spans); } +static void +fz_free_text_block(fz_context *ctx, fz_text_block *block) +{ + fz_text_line *line; + if (block == NULL) + return; + for (line = block->lines; line < block->lines + block->len; line++) + fz_free_text_line_contents(ctx, line); + fz_free(ctx, block->lines); +} + +static void +fz_free_image_block(fz_context *ctx, fz_image_block *block) +{ + if (block == NULL) + return; + fz_drop_image(ctx, block->image); + fz_drop_colorspace(ctx, block->cspace); +} + void fz_free_text_page(fz_context *ctx, fz_text_page *page) { - fz_text_block *block; - fz_text_line *line; + fz_page_block *block; for (block = page->blocks; block < page->blocks + page->len; block++) { - for (line = block->lines; line < block->lines + block->len; line++) - fz_free_text_line_contents(ctx, line); - fz_free(ctx, block->lines); + switch(block->type) + { + case FZ_PAGE_BLOCK_TEXT: + fz_free_text_block(ctx, block->u.text); + break; + case FZ_PAGE_BLOCK_IMAGE: + fz_free_image_block(ctx, block->u.image); + break; + } } fz_free(ctx, page->blocks); fz_free(ctx, page); @@ -798,6 +830,43 @@ fz_text_ignore_text(fz_device *dev, fz_text *text, const fz_matrix *ctm) fz_text_extract(dev->ctx, tdev, text, ctm, style); } +static void +fz_text_fill_image_mask(fz_device *dev, fz_image *img, const fz_matrix *ctm, + fz_colorspace *cspace, float *color, float alpha) +{ + fz_text_device *tdev = dev->user; + fz_text_page *page = tdev->page; + fz_image_block *block; + fz_context *ctx = dev->ctx; + + /* If the alpha is less than 50% then it's probably a watermark or + * effect or something. Skip it */ + if (alpha < 0.5) + return; + + /* New block */ + if (page->len == page->cap) + { + int newcap = (page->cap ? page->cap*2 : 4); + page->blocks = fz_resize_array(ctx, page->blocks, newcap, sizeof(*page->blocks)); + page->cap = newcap; + } + block = fz_malloc_struct(ctx, fz_image_block); + page->blocks[page->len].type = FZ_PAGE_BLOCK_IMAGE; + page->blocks[page->len].u.image = block; + block->image = fz_keep_image(ctx, img); + block->cspace = fz_keep_colorspace(ctx, cspace); + if (cspace) + memcpy(block->colors, color, sizeof(block->colors[0])*cspace->n); + page->len++; +} + +static void +fz_text_fill_image(fz_device *dev, fz_image *img, const fz_matrix *ctm, float alpha) +{ + fz_text_fill_image_mask(dev, img, ctm, NULL, NULL, alpha); +} + static void fz_text_free_user(fz_device *dev) { @@ -837,6 +906,8 @@ fz_new_text_device(fz_context *ctx, fz_text_sheet *sheet, fz_text_page *page) dev->clip_text = fz_text_clip_text; dev->clip_stroke_text = fz_text_clip_stroke_text; dev->ignore_text = fz_text_ignore_text; + dev->fill_image = fz_text_fill_image; + dev->fill_image_mask = fz_text_fill_image_mask; return dev; } @@ -912,7 +983,6 @@ fz_print_text_page_html(fz_context *ctx, fz_output *out, fz_text_page *page) { int block_n, line_n, span_n, ch_n; fz_text_style *style = NULL; - fz_text_block *block; fz_text_line *line; void *last_region = NULL; @@ -920,122 +990,129 @@ fz_print_text_page_html(fz_context *ctx, fz_output *out, fz_text_page *page) for (block_n = 0; block_n < page->len; block_n++) { - block = &page->blocks[block_n]; - fz_printf(out, "

\n"); - for (line_n = 0; line_n < block->len; line_n++) + switch(page->blocks[block_n].type) { - int lastcol=-1; - line = &block->lines[line_n]; - style = NULL; - - if (line->region != last_region) - { - if (last_region) - fz_printf(out, "

"); - fz_printf(out, "
"); - last_region = line->region; - } - fz_printf(out, "
region) - fz_printf(out, " region=\"%x\"", line->region); -#endif - fz_printf(out, ">"); - for (span_n = 0; span_n < line->len; span_n++) + case FZ_PAGE_BLOCK_TEXT: + { + fz_text_block * block = page->blocks[block_n].u.text; + fz_printf(out, "

\n"); + for (line_n = 0; line_n < block->len; line_n++) { - fz_text_span *span = line->spans[span_n]; - float size = fz_matrix_expansion(&span->transform); - float base_offset = span->base_offset / size; + int lastcol=-1; + line = &block->lines[line_n]; + style = NULL; - if (lastcol != span->column) + if (line->region != last_region) { - if (lastcol >= 0) - { + if (last_region) fz_printf(out, "

"); - } - /* If we skipped any columns then output some spacer spans */ - while (lastcol < span->column-1) + fz_printf(out, "
"); + last_region = line->region; + } + fz_printf(out, "
region) + fz_printf(out, " region=\"%x\"", line->region); +#endif + fz_printf(out, ">"); + for (span_n = 0; span_n < line->len; span_n++) + { + fz_text_span *span = line->spans[span_n]; + float size = fz_matrix_expansion(&span->transform); + float base_offset = span->base_offset / size; + + if (lastcol != span->column) { - fz_printf(out, "
"); + if (lastcol >= 0) + { + fz_printf(out, "
"); + } + /* If we skipped any columns then output some spacer spans */ + while (lastcol < span->column-1) + { + fz_printf(out, "
"); + lastcol++; + } lastcol++; - } - lastcol++; - /* Now output the span to contain this entire column */ - fz_printf(out, "
len; sn++) + /* Now output the span to contain this entire column */ + fz_printf(out, "
spans[sn]->column != lastcol) - break; + int sn; + for (sn = span_n+1; sn < line->len; sn++) + { + if (line->spans[sn]->column != lastcol) + break; + } + fz_printf(out, "width:%g%%;align:%s", span->column_width, (span->align == 0 ? "left" : (span->align == 1 ? "center" : "right"))); } - fz_printf(out, "width:%g%%;align:%s", span->column_width, (span->align == 0 ? "left" : (span->align == 1 ? "center" : "right"))); + if (span->indent > 1) + fz_printf(out, ";padding-left:1em;text-indent:-1em"); + if (span->indent < -1) + fz_printf(out, ";text-indent:1em"); + fz_printf(out, "\">"); } - if (span->indent > 1) - fz_printf(out, ";padding-left:1em;text-indent:-1em"); - if (span->indent < -1) - fz_printf(out, ";text-indent:1em"); - fz_printf(out, "\">"); - } #ifdef DEBUG_INTERNALS - fz_printf(out, "column) - fz_printf(out, " col=\"%x\"", span->column); - fz_printf(out, ">"); + fz_printf(out, "column) + fz_printf(out, " col=\"%x\"", span->column); + fz_printf(out, ">"); #endif - if (span->spacing >= 1) - fz_printf(out, " "); - if (base_offset > SUBSCRIPT_OFFSET) - fz_printf(out, ""); - else if (base_offset < SUPERSCRIPT_OFFSET) - fz_printf(out, ""); - for (ch_n = 0; ch_n < span->len; ch_n++) - { - fz_text_char *ch = &span->text[ch_n]; - if (style != ch->style) + if (span->spacing >= 1) + fz_printf(out, " "); + if (base_offset > SUBSCRIPT_OFFSET) + fz_printf(out, ""); + else if (base_offset < SUPERSCRIPT_OFFSET) + fz_printf(out, ""); + for (ch_n = 0; ch_n < span->len; ch_n++) { - if (style) - fz_print_style_end(out, style); - fz_print_style_begin(out, ch->style); - style = ch->style; - } + fz_text_char *ch = &span->text[ch_n]; + if (style != ch->style) + { + if (style) + fz_print_style_end(out, style); + fz_print_style_begin(out, ch->style); + style = ch->style; + } - if (ch->c == '<') - fz_printf(out, "<"); - else if (ch->c == '>') - fz_printf(out, ">"); - else if (ch->c == '&') - fz_printf(out, "&"); - else if (ch->c >= 32 && ch->c <= 127) - fz_printf(out, "%c", ch->c); - else - fz_printf(out, "&#x%x;", ch->c); - } - if (style) - { - fz_print_style_end(out, style); - style = NULL; - } - if (base_offset > SUBSCRIPT_OFFSET) - fz_printf(out, ""); - else if (base_offset < SUPERSCRIPT_OFFSET) - fz_printf(out, ""); + if (ch->c == '<') + fz_printf(out, "<"); + else if (ch->c == '>') + fz_printf(out, ">"); + else if (ch->c == '&') + fz_printf(out, "&"); + else if (ch->c >= 32 && ch->c <= 127) + fz_printf(out, "%c", ch->c); + else + fz_printf(out, "&#x%x;", ch->c); + } + if (style) + { + fz_print_style_end(out, style); + style = NULL; + } + if (base_offset > SUBSCRIPT_OFFSET) + fz_printf(out, ""); + else if (base_offset < SUPERSCRIPT_OFFSET) + fz_printf(out, ""); #ifdef DEBUG_INTERNALS - fz_printf(out, ""); + fz_printf(out, ""); #endif + } + /* Close our floating span */ + fz_printf(out, "
"); + /* Close the line */ + fz_printf(out, "
"); + fz_printf(out, "\n"); } - /* Close our floating span */ + /* Close the metaline */ fz_printf(out, "
"); -#ifdef DEBUG_INTERNALS -#endif - /* Close the line */ - fz_printf(out, "
"); - fz_printf(out, "\n"); + last_region = NULL; + fz_printf(out, "

\n"); + break; + } + case FZ_PAGE_BLOCK_IMAGE: + break; } - /* Close the metaline */ - fz_printf(out, ""); - last_region = NULL; - fz_printf(out, "

\n"); } fz_printf(out, "\n"); @@ -1044,69 +1121,82 @@ fz_print_text_page_html(fz_context *ctx, fz_output *out, fz_text_page *page) void fz_print_text_page_xml(fz_context *ctx, fz_output *out, fz_text_page *page) { - fz_text_block *block; - fz_text_line *line; - char *s; + int block_n; fz_printf(out, "\n"); - for (block = page->blocks; block < page->blocks + page->len; block++) + for (block_n = 0; block_n < page->len; block_n++) { - fz_printf(out, "\n", - block->bbox.x0, block->bbox.y0, block->bbox.x1, block->bbox.y1); - for (line = block->lines; line < block->lines + block->len; line++) + switch(page->blocks[block_n].type) { - int span_num; - fz_printf(out, "\n", - line->bbox.x0, line->bbox.y0, line->bbox.x1, line->bbox.y1); - for (span_num = 0; span_num < line->len; span_num++) + case FZ_PAGE_BLOCK_TEXT: + { + fz_text_block *block = page->blocks[block_n].u.text; + fz_text_line *line; + char *s; + + fz_printf(out, "\n", + block->bbox.x0, block->bbox.y0, block->bbox.x1, block->bbox.y1); + for (line = block->lines; line < block->lines + block->len; line++) { - fz_text_span *span = line->spans[span_num]; - fz_text_style *style = NULL; - int char_num; - for (char_num = 0; char_num < span->len; char_num++) + int span_num; + fz_printf(out, "\n", + line->bbox.x0, line->bbox.y0, line->bbox.x1, line->bbox.y1); + for (span_num = 0; span_num < line->len; span_num++) { - fz_text_char *ch = &span->text[char_num]; - if (ch->style != style) + fz_text_span *span = line->spans[span_num]; + fz_text_style *style = NULL; + int char_num; + for (char_num = 0; char_num < span->len; char_num++) { - if (style) + fz_text_char *ch = &span->text[char_num]; + if (ch->style != style) { - fz_printf(out, "\n"); + if (style) + { + fz_printf(out, "\n"); + } + style = ch->style; + s = strchr(style->font->name, '+'); + s = s ? s + 1 : style->font->name; + fz_printf(out, "\n", + span->bbox.x0, span->bbox.y0, span->bbox.x1, span->bbox.y1, + s, style->size); } - style = ch->style; - s = strchr(style->font->name, '+'); - s = s ? s + 1 : style->font->name; - fz_printf(out, "\n", - span->bbox.x0, span->bbox.y0, span->bbox.x1, span->bbox.y1, - s, style->size); - } - { - fz_rect rect; - fz_text_char_bbox(&rect, span, char_num); - fz_printf(out, "p.x, ch->p.y); - } - switch (ch->c) - { - case '<': fz_printf(out, "<"); break; - case '>': fz_printf(out, ">"); break; - case '&': fz_printf(out, "&"); break; - case '"': fz_printf(out, """); break; - case '\'': fz_printf(out, "'"); break; - default: - if (ch->c >= 32 && ch->c <= 127) - fz_printf(out, "%c", ch->c); - else - fz_printf(out, "&#x%x;", ch->c); - break; + { + fz_rect rect; + fz_text_char_bbox(&rect, span, char_num); + fz_printf(out, "p.x, ch->p.y); + } + switch (ch->c) + { + case '<': fz_printf(out, "<"); break; + case '>': fz_printf(out, ">"); break; + case '&': fz_printf(out, "&"); break; + case '"': fz_printf(out, """); break; + case '\'': fz_printf(out, "'"); break; + default: + if (ch->c >= 32 && ch->c <= 127) + fz_printf(out, "%c", ch->c); + else + fz_printf(out, "&#x%x;", ch->c); + break; + } + fz_printf(out, "\"/>\n"); } - fz_printf(out, "\"/>\n"); + if (style) + fz_printf(out, "\n"); } - if (style) - fz_printf(out, "\n"); + fz_printf(out, "\n"); } - fz_printf(out, "\n"); + fz_printf(out, "\n"); + break; + } + case FZ_PAGE_BLOCK_IMAGE: + { + break; } - fz_printf(out, "\n"); + } } fz_printf(out, "\n"); } @@ -1114,30 +1204,41 @@ fz_print_text_page_xml(fz_context *ctx, fz_output *out, fz_text_page *page) void fz_print_text_page(fz_context *ctx, fz_output *out, fz_text_page *page) { - fz_text_block *block; - fz_text_line *line; - fz_text_char *ch; - char utf[10]; - int i, n; + int block_n; - for (block = page->blocks; block < page->blocks + page->len; block++) + for (block_n = 0; block_n < page->len; block_n++) { - for (line = block->lines; line < block->lines + block->len; line++) + switch(page->blocks[block_n].type) { - int span_num; - for (span_num = 0; span_num < line->len; span_num++) + case FZ_PAGE_BLOCK_TEXT: + { + fz_text_block *block = page->blocks[block_n].u.text; + fz_text_line *line; + fz_text_char *ch; + char utf[10]; + int i, n; + + for (line = block->lines; line < block->lines + block->len; line++) { - fz_text_span *span = line->spans[span_num]; - for (ch = span->text; ch < span->text + span->len; ch++) + int span_num; + for (span_num = 0; span_num < line->len; span_num++) { - n = fz_runetochar(utf, ch->c); - for (i = 0; i < n; i++) - fz_printf(out, "%c", utf[i]); + fz_text_span *span = line->spans[span_num]; + for (ch = span->text; ch < span->text + span->len; ch++) + { + n = fz_runetochar(utf, ch->c); + for (i = 0; i < n; i++) + fz_printf(out, "%c", utf[i]); + } } + fz_printf(out, "\n"); } fz_printf(out, "\n"); + break; + } + case FZ_PAGE_BLOCK_IMAGE: + break; } - fz_printf(out, "\n"); } } @@ -1280,6 +1381,7 @@ static void split_block(fz_context *ctx, fz_text_page *page, int block_num, int linenum) { int split_len; + fz_text_block *block, *block2; if (page->len == page->cap) { @@ -1291,17 +1393,22 @@ split_block(fz_context *ctx, fz_text_page *page, int block_num, int linenum) memmove(page->blocks+block_num+1, page->blocks+block_num, (page->len - block_num)*sizeof(*page->blocks)); page->len++; - split_len = page->blocks[block_num].len - linenum; - page->blocks[block_num+1].bbox = page->blocks[block_num].bbox; /* FIXME! */ - page->blocks[block_num+1].cap = 0; - page->blocks[block_num+1].len = 0; - page->blocks[block_num+1].lines = NULL; - page->blocks[block_num+1].lines = fz_malloc_array(ctx, split_len, sizeof(fz_text_line)); - page->blocks[block_num+1].cap = page->blocks[block_num+1].len; - page->blocks[block_num+1].len = split_len; - page->blocks[block_num].len = linenum; - memcpy(page->blocks[block_num+1].lines, page->blocks[block_num].lines + linenum, split_len * sizeof(fz_text_line)); - page->blocks[block_num+1].lines[0].distance = 0; + block2 = fz_malloc_struct(ctx, fz_text_block); + block = page->blocks[block_num].u.text; + + page->blocks[block_num+1].type = FZ_PAGE_BLOCK_TEXT; + page->blocks[block_num+1].u.text = block2; + split_len = block->len - linenum; + block2->bbox = block->bbox; /* FIXME! */ + block2->cap = 0; + block2->len = 0; + block2->lines = NULL; + block2->lines = fz_malloc_array(ctx, split_len, sizeof(fz_text_line)); + block2->cap = block2->len; + block2->len = split_len; + block->len = linenum; + memcpy(block2->lines, block->lines + linenum, split_len * sizeof(fz_text_line)); + block2->lines[0].distance = 0; } static inline int @@ -2115,7 +2222,6 @@ dehyphenate(fz_text_span *s1, fz_text_span *s2) void fz_text_analysis(fz_context *ctx, fz_text_sheet *sheet, fz_text_page *page) { - fz_text_block *block; fz_text_line *line; line_heights *lh; region_masks *rms; @@ -2128,8 +2234,14 @@ fz_text_analysis(fz_context *ctx, fz_text_sheet *sheet, fz_text_page *page) /* Step 1: Gather the line height information */ lh = new_line_heights(ctx); - for (block = page->blocks; block < page->blocks + page->len; block++) + for (block_num = 0; block_num < page->len; block_num++) { + fz_text_block *block; + + if (page->blocks[block_num].type != FZ_PAGE_BLOCK_TEXT) + continue; + block = page->blocks[block_num].u.text; + for (line = block->lines; line < block->lines + block->len; line++) { /* For every style in the line, add lineheight to the @@ -2209,7 +2321,12 @@ list_entry: for (block_num = 0; block_num < page->len; block_num++) { int line_num; - block = &page->blocks[block_num]; + fz_text_block *block; + + if (page->blocks[block_num].type != FZ_PAGE_BLOCK_TEXT) + continue; + block = page->blocks[block_num].u.text; + for (line_num = 0; line_num < block->len; line_num++) { /* For every style in the line, check to see if lineheight @@ -2280,8 +2397,14 @@ force_paragraph: rms = new_region_masks(ctx); /* Step 1: Form the region masks and store them into a list with the * normalised baseline vectors. */ - for (block = page->blocks; block < page->blocks + page->len; block++) + for (block_num = 0; block_num < page->len; block_num++) { + fz_text_block *block; + + if (page->blocks[block_num].type != FZ_PAGE_BLOCK_TEXT) + continue; + block = page->blocks[block_num].u.text; + for (line = block->lines; line < block->lines + block->len; line++) { fz_point blv; @@ -2362,8 +2485,14 @@ force_paragraph: * which region mask. */ { region_mask *prev_match = NULL; - for (block = page->blocks; block < page->blocks + page->len; block++) + for (block_num = 0; block_num < page->len; block_num++) { + fz_text_block *block; + + if (page->blocks[block_num].type != FZ_PAGE_BLOCK_TEXT) + continue; + block = page->blocks[block_num].u.text; + for (line = block->lines; line < block->lines + block->len; line++) { fz_point blv; @@ -2449,12 +2578,18 @@ force_paragraph: /* Step 7: Collate lines within a block that share the same region * mask. */ - for (block = page->blocks; block < page->blocks + page->len; block++) + for (block_num = 0; block_num < page->len; block_num++) { int line_num; int prev_line_num; int last_from = -1; + fz_text_block *block; + + if (page->blocks[block_num].type != FZ_PAGE_BLOCK_TEXT) + continue; + block = page->blocks[block_num].u.text; + /* First merge lines. This may leave empty lines behind. */ for (prev_line_num = 0, line_num = 1; line_num < block->len; line_num++) { diff --git a/fitz/doc_search.c b/fitz/doc_search.c index 1421cbd0..2d4233c2 100644 --- a/fitz/doc_search.c +++ b/fitz/doc_search.c @@ -10,11 +10,17 @@ static inline int fz_tolower(int c) fz_char_and_box *fz_text_char_at(fz_char_and_box *cab, fz_text_page *page, int idx) { - fz_text_block *block; - fz_text_line *line; - int ofs = 0; - for (block = page->blocks; block < page->blocks + page->len; block++) + int block_num; + + for (block_num = 0; block_num < page->len; block_num++) { + fz_text_line *line; + int ofs = 0; + fz_text_block *block; + + if (page->blocks[block_num].type != FZ_PAGE_BLOCK_TEXT) + continue; + block = page->blocks[block_num].u.text; for (line = block->lines; line < block->lines + block->len; line++) { int span_num; @@ -60,11 +66,17 @@ static fz_rect *bboxat(fz_text_page *page, int idx, fz_rect *bbox) static int textlen(fz_text_page *page) { - fz_text_block *block; - fz_text_line *line; int len = 0; - for (block = page->blocks; block < page->blocks + page->len; block++) + int block_num; + + for (block_num = 0; block_num < page->len; block_num++) { + fz_text_block *block; + fz_text_line *line; + + if (page->blocks[block_num].type != FZ_PAGE_BLOCK_TEXT) + continue; + block = page->blocks[block_num].u.text; for (line = block->lines; line < block->lines + block->len; line++) { int span_num; @@ -149,7 +161,7 @@ fz_highlight_selection(fz_context *ctx, fz_text_page *page, fz_rect rect, fz_rec fz_rect linebox, charbox; fz_text_block *block; fz_text_line *line; - int i, hit_count; + int i, block_num, hit_count; float x0 = rect.x0; float x1 = rect.x1; @@ -158,8 +170,11 @@ fz_highlight_selection(fz_context *ctx, fz_text_page *page, fz_rect rect, fz_rec hit_count = 0; - for (block = page->blocks; block < page->blocks + page->len; block++) + for (block_num = 0; block_num < page->len; block_num++) { + if (page->blocks[block_num].type != FZ_PAGE_BLOCK_TEXT) + continue; + block = page->blocks[block_num].u.text; for (line = block->lines; line < block->lines + block->len; line++) { int span_num; @@ -198,9 +213,7 @@ fz_copy_selection(fz_context *ctx, fz_text_page *page, fz_rect rect) { fz_buffer *buffer; fz_rect hitbox; - fz_text_block *block; - fz_text_line *line; - int c, i, seen = 0; + int c, i, block_num, seen = 0; char *s; float x0 = rect.x0; @@ -210,8 +223,14 @@ fz_copy_selection(fz_context *ctx, fz_text_page *page, fz_rect rect) buffer = fz_new_buffer(ctx, 1024); - for (block = page->blocks; block < page->blocks + page->len; block++) + for (block_num = 0; block_num < page->len; block_num++) { + fz_text_block *block; + fz_text_line *line; + + if (page->blocks[block_num].type != FZ_PAGE_BLOCK_TEXT) + continue; + block = page->blocks[block_num].u.text; for (line = block->lines; line < block->lines + block->len; line++) { int span_num; diff --git a/fitz/fitz.h b/fitz/fitz.h index 45abe6d6..ff2fa57e 100644 --- a/fitz/fitz.h +++ b/fitz/fitz.h @@ -1849,6 +1849,8 @@ typedef struct fz_text_char_s fz_text_char; typedef struct fz_text_span_s fz_text_span; typedef struct fz_text_line_s fz_text_line; typedef struct fz_text_block_s fz_text_block; +typedef struct fz_image_block_s fz_image_block; +typedef struct fz_page_block_s fz_page_block; typedef struct fz_text_sheet_s fz_text_sheet; typedef struct fz_text_page_s fz_text_page; @@ -1881,14 +1883,33 @@ struct fz_text_style_s }; /* - fz_text_page: A text page is a list of blocks of text, together with + fz_text_page: A text page is a list of page blocks, together with an overall bounding box. */ struct fz_text_page_s { fz_rect mediabox; int len, cap; - fz_text_block *blocks; + fz_page_block *blocks; +}; + +/* + fz_page_block: A page block is a typed block pointer. +*/ +struct fz_page_block_s +{ + int type; + union + { + fz_text_block *text; + fz_image_block *image; + } u; +}; + +enum +{ + FZ_PAGE_BLOCK_TEXT = 0, + FZ_PAGE_BLOCK_IMAGE = 1 }; /* @@ -1905,6 +1926,20 @@ struct fz_text_block_s enum { FZ_MAX_COLORS = 32 }; +/* + fz_image_block: An image block is an image, together with the list of lines of text. In typical + cases this may correspond to a paragraph or a column of text. A + collection of blocks makes up a page. +*/ +struct fz_image_block_s +{ + fz_rect bbox; + fz_matrix mat; + fz_image *image; + fz_colorspace *cspace; + float colors[FZ_MAX_COLORS]; +}; + /* fz_text_line: A text line is a list of text spans, with the same baseline. In typical cases this should correspond (as expected) to -- cgit v1.2.3