summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobin Watts <robin.watts@artifex.com>2013-04-04 17:01:32 +0100
committerRobin Watts <robin.watts@artifex.com>2013-04-25 17:12:18 +0100
commit1047ef939a09d0ed3b43224402740704ec1befc9 (patch)
tree88c16586fb57fd97e57c647a95828a2f3fb7785e
parentec3c3c846820c0be29f107e731dc2f7616d1fb6c (diff)
downloadmupdf-1047ef939a09d0ed3b43224402740704ec1befc9.tar.xz
Tweak fz_text_page to include image records.
Extract such records as part of the text device.
-rw-r--r--apps/pdfapp.c24
-rw-r--r--fitz/dev_text.c521
-rw-r--r--fitz/doc_search.c45
-rw-r--r--fitz/fitz.h39
4 files changed, 415 insertions, 214 deletions
diff --git a/apps/pdfapp.c b/apps/pdfapp.c
index 99cb386b..c3b7d54f 100644
--- a/apps/pdfapp.c
+++ b/apps/pdfapp.c
@@ -761,11 +761,17 @@ void pdfapp_gotopage(pdfapp_t *app, int number)
static int textlen(fz_text_page *page)
{
- fz_text_block *block;
- fz_text_line *line;
int len = 0;
- for (block = page->blocks; block < page->blocks + page->len; block++)
+ int block_num;
+
+ for (block_num = 0; block_num < page->len; block_num++)
{
+ fz_text_line *line;
+ fz_text_block *block;
+
+ if (page->blocks[block_num].type != FZ_PAGE_BLOCK_TEXT)
+ continue;
+ block = page->blocks[block_num].u.text;
for (line = block->lines; line < block->lines + block->len; line++)
{
int span_num;
@@ -1593,10 +1599,9 @@ void pdfapp_oncopy(pdfapp_t *app, unsigned short *ucsbuf, int ucslen)
fz_rect hitbox;
fz_matrix ctm;
fz_text_page *page = app->page_text;
- fz_text_block *block;
- fz_text_line *line;
int c, i, p;
int seen = 0;
+ int block_num;
int x0 = app->selr.x0;
int x1 = app->selr.x1;
@@ -1607,8 +1612,15 @@ void pdfapp_oncopy(pdfapp_t *app, unsigned short *ucsbuf, int ucslen)
p = 0;
- for (block = page->blocks; block < page->blocks + page->len; block++)
+ for (block_num = 0; block_num < page->len; block_num++)
{
+ fz_text_line *line;
+ fz_text_block *block;
+
+ if (page->blocks[block_num].type != FZ_PAGE_BLOCK_TEXT)
+ continue;
+ block = page->blocks[block_num].u.text;
+
for (line = block->lines; line < block->lines + block->len; line++)
{
int span_num;
diff --git a/fitz/dev_text.c b/fitz/dev_text.c
index 8d6d34e8..baa8f78b 100644
--- a/fitz/dev_text.c
+++ b/fitz/dev_text.c
@@ -165,12 +165,16 @@ push_span(fz_context *ctx, fz_text_device *tdev, fz_text_span *span, int new_lin
fz_text_line *line;
fz_text_block *block;
fz_text_page *page = tdev->page;
+ int prev_not_text = 0;
- if (new_line)
+ if (page->len == 0 || page->blocks[page->len-1].type != FZ_PAGE_BLOCK_TEXT)
+ prev_not_text = 1;
+
+ if (new_line || prev_not_text)
{
float size = fz_matrix_expansion(&span->transform);
/* So, a new line. Part of the same block or not? */
- if (distance == 0 || distance > size * 1.5 || distance < -size * PARAGRAPH_DIST || page->len == 0)
+ if (distance == 0 || distance > size * 1.5 || distance < -size * PARAGRAPH_DIST || page->len == 0 || prev_not_text)
{
/* New block */
if (page->len == page->cap)
@@ -179,16 +183,19 @@ push_span(fz_context *ctx, fz_text_device *tdev, fz_text_span *span, int new_lin
page->blocks = fz_resize_array(ctx, page->blocks, newcap, sizeof(*page->blocks));
page->cap = newcap;
}
- page->blocks[page->len].cap = 0;
- page->blocks[page->len].len = 0;
- page->blocks[page->len].lines = 0;
- page->blocks[page->len].bbox = fz_empty_rect;
+ block = fz_malloc_struct(ctx, fz_text_block);
+ page->blocks[page->len].type = FZ_PAGE_BLOCK_TEXT;
+ page->blocks[page->len].u.text = block;
+ block->cap = 0;
+ block->len = 0;
+ block->lines = 0;
+ block->bbox = fz_empty_rect;
page->len++;
distance = 0;
}
/* New line */
- block = &page->blocks[page->len-1];
+ block = page->blocks[page->len-1].u.text;
if (block->len == block->cap)
{
int newcap = (block->cap ? block->cap*2 : 4);
@@ -204,7 +211,7 @@ push_span(fz_context *ctx, fz_text_device *tdev, fz_text_span *span, int new_lin
}
/* Find last line and append to it */
- block = &page->blocks[page->len-1];
+ block = page->blocks[page->len-1].u.text;
line = &block->lines[block->len-1];
if (line->len == line->cap)
@@ -455,16 +462,41 @@ fz_free_text_line_contents(fz_context *ctx, fz_text_line *line)
fz_free(ctx, line->spans);
}
+static void
+fz_free_text_block(fz_context *ctx, fz_text_block *block)
+{
+ fz_text_line *line;
+ if (block == NULL)
+ return;
+ for (line = block->lines; line < block->lines + block->len; line++)
+ fz_free_text_line_contents(ctx, line);
+ fz_free(ctx, block->lines);
+}
+
+static void
+fz_free_image_block(fz_context *ctx, fz_image_block *block)
+{
+ if (block == NULL)
+ return;
+ fz_drop_image(ctx, block->image);
+ fz_drop_colorspace(ctx, block->cspace);
+}
+
void
fz_free_text_page(fz_context *ctx, fz_text_page *page)
{
- fz_text_block *block;
- fz_text_line *line;
+ fz_page_block *block;
for (block = page->blocks; block < page->blocks + page->len; block++)
{
- for (line = block->lines; line < block->lines + block->len; line++)
- fz_free_text_line_contents(ctx, line);
- fz_free(ctx, block->lines);
+ switch(block->type)
+ {
+ case FZ_PAGE_BLOCK_TEXT:
+ fz_free_text_block(ctx, block->u.text);
+ break;
+ case FZ_PAGE_BLOCK_IMAGE:
+ fz_free_image_block(ctx, block->u.image);
+ break;
+ }
}
fz_free(ctx, page->blocks);
fz_free(ctx, page);
@@ -799,6 +831,43 @@ fz_text_ignore_text(fz_device *dev, fz_text *text, const fz_matrix *ctm)
}
static void
+fz_text_fill_image_mask(fz_device *dev, fz_image *img, const fz_matrix *ctm,
+ fz_colorspace *cspace, float *color, float alpha)
+{
+ fz_text_device *tdev = dev->user;
+ fz_text_page *page = tdev->page;
+ fz_image_block *block;
+ fz_context *ctx = dev->ctx;
+
+ /* If the alpha is less than 50% then it's probably a watermark or
+ * effect or something. Skip it */
+ if (alpha < 0.5)
+ return;
+
+ /* New block */
+ if (page->len == page->cap)
+ {
+ int newcap = (page->cap ? page->cap*2 : 4);
+ page->blocks = fz_resize_array(ctx, page->blocks, newcap, sizeof(*page->blocks));
+ page->cap = newcap;
+ }
+ block = fz_malloc_struct(ctx, fz_image_block);
+ page->blocks[page->len].type = FZ_PAGE_BLOCK_IMAGE;
+ page->blocks[page->len].u.image = block;
+ block->image = fz_keep_image(ctx, img);
+ block->cspace = fz_keep_colorspace(ctx, cspace);
+ if (cspace)
+ memcpy(block->colors, color, sizeof(block->colors[0])*cspace->n);
+ page->len++;
+}
+
+static void
+fz_text_fill_image(fz_device *dev, fz_image *img, const fz_matrix *ctm, float alpha)
+{
+ fz_text_fill_image_mask(dev, img, ctm, NULL, NULL, alpha);
+}
+
+static void
fz_text_free_user(fz_device *dev)
{
fz_context *ctx = dev->ctx;
@@ -837,6 +906,8 @@ fz_new_text_device(fz_context *ctx, fz_text_sheet *sheet, fz_text_page *page)
dev->clip_text = fz_text_clip_text;
dev->clip_stroke_text = fz_text_clip_stroke_text;
dev->ignore_text = fz_text_ignore_text;
+ dev->fill_image = fz_text_fill_image;
+ dev->fill_image_mask = fz_text_fill_image_mask;
return dev;
}
@@ -912,7 +983,6 @@ fz_print_text_page_html(fz_context *ctx, fz_output *out, fz_text_page *page)
{
int block_n, line_n, span_n, ch_n;
fz_text_style *style = NULL;
- fz_text_block *block;
fz_text_line *line;
void *last_region = NULL;
@@ -920,122 +990,129 @@ fz_print_text_page_html(fz_context *ctx, fz_output *out, fz_text_page *page)
for (block_n = 0; block_n < page->len; block_n++)
{
- block = &page->blocks[block_n];
- fz_printf(out, "<div class=\"block\"><p>\n");
- for (line_n = 0; line_n < block->len; line_n++)
+ switch(page->blocks[block_n].type)
{
- int lastcol=-1;
- line = &block->lines[line_n];
- style = NULL;
-
- if (line->region != last_region)
- {
- if (last_region)
- fz_printf(out, "</div>");
- fz_printf(out, "<div class=\"metaline\">");
- last_region = line->region;
- }
- fz_printf(out, "<div class=\"line\"");
-#ifdef DEBUG_INTERNALS
- if (line->region)
- fz_printf(out, " region=\"%x\"", line->region);
-#endif
- fz_printf(out, ">");
- for (span_n = 0; span_n < line->len; span_n++)
+ case FZ_PAGE_BLOCK_TEXT:
+ {
+ fz_text_block * block = page->blocks[block_n].u.text;
+ fz_printf(out, "<div class=\"block\"><p>\n");
+ for (line_n = 0; line_n < block->len; line_n++)
{
- fz_text_span *span = line->spans[span_n];
- float size = fz_matrix_expansion(&span->transform);
- float base_offset = span->base_offset / size;
+ int lastcol=-1;
+ line = &block->lines[line_n];
+ style = NULL;
- if (lastcol != span->column)
+ if (line->region != last_region)
{
- if (lastcol >= 0)
- {
+ if (last_region)
fz_printf(out, "</div>");
- }
- /* If we skipped any columns then output some spacer spans */
- while (lastcol < span->column-1)
+ fz_printf(out, "<div class=\"metaline\">");
+ last_region = line->region;
+ }
+ fz_printf(out, "<div class=\"line\"");
+#ifdef DEBUG_INTERNALS
+ if (line->region)
+ fz_printf(out, " region=\"%x\"", line->region);
+#endif
+ fz_printf(out, ">");
+ for (span_n = 0; span_n < line->len; span_n++)
+ {
+ fz_text_span *span = line->spans[span_n];
+ float size = fz_matrix_expansion(&span->transform);
+ float base_offset = span->base_offset / size;
+
+ if (lastcol != span->column)
{
- fz_printf(out, "<div class=\"cell\"></div>");
+ if (lastcol >= 0)
+ {
+ fz_printf(out, "</div>");
+ }
+ /* If we skipped any columns then output some spacer spans */
+ while (lastcol < span->column-1)
+ {
+ fz_printf(out, "<div class=\"cell\"></div>");
+ lastcol++;
+ }
lastcol++;
- }
- lastcol++;
- /* Now output the span to contain this entire column */
- fz_printf(out, "<div class=\"cell\" style=\"");
- {
- int sn;
- for (sn = span_n+1; sn < line->len; sn++)
+ /* Now output the span to contain this entire column */
+ fz_printf(out, "<div class=\"cell\" style=\"");
{
- if (line->spans[sn]->column != lastcol)
- break;
+ int sn;
+ for (sn = span_n+1; sn < line->len; sn++)
+ {
+ if (line->spans[sn]->column != lastcol)
+ break;
+ }
+ fz_printf(out, "width:%g%%;align:%s", span->column_width, (span->align == 0 ? "left" : (span->align == 1 ? "center" : "right")));
}
- fz_printf(out, "width:%g%%;align:%s", span->column_width, (span->align == 0 ? "left" : (span->align == 1 ? "center" : "right")));
+ if (span->indent > 1)
+ fz_printf(out, ";padding-left:1em;text-indent:-1em");
+ if (span->indent < -1)
+ fz_printf(out, ";text-indent:1em");
+ fz_printf(out, "\">");
}
- if (span->indent > 1)
- fz_printf(out, ";padding-left:1em;text-indent:-1em");
- if (span->indent < -1)
- fz_printf(out, ";text-indent:1em");
- fz_printf(out, "\">");
- }
#ifdef DEBUG_INTERNALS
- fz_printf(out, "<span class=\"internal_span\"");
- if (span->column)
- fz_printf(out, " col=\"%x\"", span->column);
- fz_printf(out, ">");
+ fz_printf(out, "<span class=\"internal_span\"");
+ if (span->column)
+ fz_printf(out, " col=\"%x\"", span->column);
+ fz_printf(out, ">");
#endif
- if (span->spacing >= 1)
- fz_printf(out, " ");
- if (base_offset > SUBSCRIPT_OFFSET)
- fz_printf(out, "<sub>");
- else if (base_offset < SUPERSCRIPT_OFFSET)
- fz_printf(out, "<sup>");
- for (ch_n = 0; ch_n < span->len; ch_n++)
- {
- fz_text_char *ch = &span->text[ch_n];
- if (style != ch->style)
+ if (span->spacing >= 1)
+ fz_printf(out, " ");
+ if (base_offset > SUBSCRIPT_OFFSET)
+ fz_printf(out, "<sub>");
+ else if (base_offset < SUPERSCRIPT_OFFSET)
+ fz_printf(out, "<sup>");
+ for (ch_n = 0; ch_n < span->len; ch_n++)
{
- if (style)
- fz_print_style_end(out, style);
- fz_print_style_begin(out, ch->style);
- style = ch->style;
- }
+ fz_text_char *ch = &span->text[ch_n];
+ if (style != ch->style)
+ {
+ if (style)
+ fz_print_style_end(out, style);
+ fz_print_style_begin(out, ch->style);
+ style = ch->style;
+ }
- if (ch->c == '<')
- fz_printf(out, "&lt;");
- else if (ch->c == '>')
- fz_printf(out, "&gt;");
- else if (ch->c == '&')
- fz_printf(out, "&amp;");
- else if (ch->c >= 32 && ch->c <= 127)
- fz_printf(out, "%c", ch->c);
- else
- fz_printf(out, "&#x%x;", ch->c);
- }
- if (style)
- {
- fz_print_style_end(out, style);
- style = NULL;
- }
- if (base_offset > SUBSCRIPT_OFFSET)
- fz_printf(out, "</sub>");
- else if (base_offset < SUPERSCRIPT_OFFSET)
- fz_printf(out, "</sup>");
+ if (ch->c == '<')
+ fz_printf(out, "&lt;");
+ else if (ch->c == '>')
+ fz_printf(out, "&gt;");
+ else if (ch->c == '&')
+ fz_printf(out, "&amp;");
+ else if (ch->c >= 32 && ch->c <= 127)
+ fz_printf(out, "%c", ch->c);
+ else
+ fz_printf(out, "&#x%x;", ch->c);
+ }
+ if (style)
+ {
+ fz_print_style_end(out, style);
+ style = NULL;
+ }
+ if (base_offset > SUBSCRIPT_OFFSET)
+ fz_printf(out, "</sub>");
+ else if (base_offset < SUPERSCRIPT_OFFSET)
+ fz_printf(out, "</sup>");
#ifdef DEBUG_INTERNALS
- fz_printf(out, "</span>");
+ fz_printf(out, "</span>");
#endif
+ }
+ /* Close our floating span */
+ fz_printf(out, "</div>");
+ /* Close the line */
+ fz_printf(out, "</div>");
+ fz_printf(out, "\n");
}
- /* Close our floating span */
+ /* Close the metaline */
fz_printf(out, "</div>");
-#ifdef DEBUG_INTERNALS
-#endif
- /* Close the line */
- fz_printf(out, "</div>");
- fz_printf(out, "\n");
+ last_region = NULL;
+ fz_printf(out, "</p></div>\n");
+ break;
+ }
+ case FZ_PAGE_BLOCK_IMAGE:
+ break;
}
- /* Close the metaline */
- fz_printf(out, "</div>");
- last_region = NULL;
- fz_printf(out, "</p></div>\n");
}
fz_printf(out, "</div>\n");
@@ -1044,69 +1121,82 @@ fz_print_text_page_html(fz_context *ctx, fz_output *out, fz_text_page *page)
void
fz_print_text_page_xml(fz_context *ctx, fz_output *out, fz_text_page *page)
{
- fz_text_block *block;
- fz_text_line *line;
- char *s;
+ int block_n;
fz_printf(out, "<page>\n");
- for (block = page->blocks; block < page->blocks + page->len; block++)
+ for (block_n = 0; block_n < page->len; block_n++)
{
- fz_printf(out, "<block bbox=\"%g %g %g %g\">\n",
- block->bbox.x0, block->bbox.y0, block->bbox.x1, block->bbox.y1);
- for (line = block->lines; line < block->lines + block->len; line++)
+ switch(page->blocks[block_n].type)
{
- int span_num;
- fz_printf(out, "<line bbox=\"%g %g %g %g\">\n",
- line->bbox.x0, line->bbox.y0, line->bbox.x1, line->bbox.y1);
- for (span_num = 0; span_num < line->len; span_num++)
+ case FZ_PAGE_BLOCK_TEXT:
+ {
+ fz_text_block *block = page->blocks[block_n].u.text;
+ fz_text_line *line;
+ char *s;
+
+ fz_printf(out, "<block bbox=\"%g %g %g %g\">\n",
+ block->bbox.x0, block->bbox.y0, block->bbox.x1, block->bbox.y1);
+ for (line = block->lines; line < block->lines + block->len; line++)
{
- fz_text_span *span = line->spans[span_num];
- fz_text_style *style = NULL;
- int char_num;
- for (char_num = 0; char_num < span->len; char_num++)
+ int span_num;
+ fz_printf(out, "<line bbox=\"%g %g %g %g\">\n",
+ line->bbox.x0, line->bbox.y0, line->bbox.x1, line->bbox.y1);
+ for (span_num = 0; span_num < line->len; span_num++)
{
- fz_text_char *ch = &span->text[char_num];
- if (ch->style != style)
+ fz_text_span *span = line->spans[span_num];
+ fz_text_style *style = NULL;
+ int char_num;
+ for (char_num = 0; char_num < span->len; char_num++)
{
- if (style)
+ fz_text_char *ch = &span->text[char_num];
+ if (ch->style != style)
{
- fz_printf(out, "</span>\n");
+ if (style)
+ {
+ fz_printf(out, "</span>\n");
+ }
+ style = ch->style;
+ s = strchr(style->font->name, '+');
+ s = s ? s + 1 : style->font->name;
+ fz_printf(out, "<span bbox=\"%g %g %g %g\" font=\"%s\" size=\"%g\">\n",
+ span->bbox.x0, span->bbox.y0, span->bbox.x1, span->bbox.y1,
+ s, style->size);
}
- style = ch->style;
- s = strchr(style->font->name, '+');
- s = s ? s + 1 : style->font->name;
- fz_printf(out, "<span bbox=\"%g %g %g %g\" font=\"%s\" size=\"%g\">\n",
- span->bbox.x0, span->bbox.y0, span->bbox.x1, span->bbox.y1,
- s, style->size);
- }
- {
- fz_rect rect;
- fz_text_char_bbox(&rect, span, char_num);
- fz_printf(out, "<char bbox=\"%g %g %g %g\" x=\"%g\" y=\"%g\" c=\"",
- rect.x0, rect.y0, rect.x1, rect.y1, ch->p.x, ch->p.y);
- }
- switch (ch->c)
- {
- case '<': fz_printf(out, "&lt;"); break;
- case '>': fz_printf(out, "&gt;"); break;
- case '&': fz_printf(out, "&amp;"); break;
- case '"': fz_printf(out, "&quot;"); break;
- case '\'': fz_printf(out, "&apos;"); break;
- default:
- if (ch->c >= 32 && ch->c <= 127)
- fz_printf(out, "%c", ch->c);
- else
- fz_printf(out, "&#x%x;", ch->c);
- break;
+ {
+ fz_rect rect;
+ fz_text_char_bbox(&rect, span, char_num);
+ fz_printf(out, "<char bbox=\"%g %g %g %g\" x=\"%g\" y=\"%g\" c=\"",
+ rect.x0, rect.y0, rect.x1, rect.y1, ch->p.x, ch->p.y);
+ }
+ switch (ch->c)
+ {
+ case '<': fz_printf(out, "&lt;"); break;
+ case '>': fz_printf(out, "&gt;"); break;
+ case '&': fz_printf(out, "&amp;"); break;
+ case '"': fz_printf(out, "&quot;"); break;
+ case '\'': fz_printf(out, "&apos;"); break;
+ default:
+ if (ch->c >= 32 && ch->c <= 127)
+ fz_printf(out, "%c", ch->c);
+ else
+ fz_printf(out, "&#x%x;", ch->c);
+ break;
+ }
+ fz_printf(out, "\"/>\n");
}
- fz_printf(out, "\"/>\n");
+ if (style)
+ fz_printf(out, "</span>\n");
}
- if (style)
- fz_printf(out, "</span>\n");
+ fz_printf(out, "</line>\n");
}
- fz_printf(out, "</line>\n");
+ fz_printf(out, "</block>\n");
+ break;
+ }
+ case FZ_PAGE_BLOCK_IMAGE:
+ {
+ break;
}
- fz_printf(out, "</block>\n");
+ }
}
fz_printf(out, "</page>\n");
}
@@ -1114,30 +1204,41 @@ fz_print_text_page_xml(fz_context *ctx, fz_output *out, fz_text_page *page)
void
fz_print_text_page(fz_context *ctx, fz_output *out, fz_text_page *page)
{
- fz_text_block *block;
- fz_text_line *line;
- fz_text_char *ch;
- char utf[10];
- int i, n;
+ int block_n;
- for (block = page->blocks; block < page->blocks + page->len; block++)
+ for (block_n = 0; block_n < page->len; block_n++)
{
- for (line = block->lines; line < block->lines + block->len; line++)
+ switch(page->blocks[block_n].type)
{
- int span_num;
- for (span_num = 0; span_num < line->len; span_num++)
+ case FZ_PAGE_BLOCK_TEXT:
+ {
+ fz_text_block *block = page->blocks[block_n].u.text;
+ fz_text_line *line;
+ fz_text_char *ch;
+ char utf[10];
+ int i, n;
+
+ for (line = block->lines; line < block->lines + block->len; line++)
{
- fz_text_span *span = line->spans[span_num];
- for (ch = span->text; ch < span->text + span->len; ch++)
+ int span_num;
+ for (span_num = 0; span_num < line->len; span_num++)
{
- n = fz_runetochar(utf, ch->c);
- for (i = 0; i < n; i++)
- fz_printf(out, "%c", utf[i]);
+ fz_text_span *span = line->spans[span_num];
+ for (ch = span->text; ch < span->text + span->len; ch++)
+ {
+ n = fz_runetochar(utf, ch->c);
+ for (i = 0; i < n; i++)
+ fz_printf(out, "%c", utf[i]);
+ }
}
+ fz_printf(out, "\n");
}
fz_printf(out, "\n");
+ break;
+ }
+ case FZ_PAGE_BLOCK_IMAGE:
+ break;
}
- fz_printf(out, "\n");
}
}
@@ -1280,6 +1381,7 @@ static void
split_block(fz_context *ctx, fz_text_page *page, int block_num, int linenum)
{
int split_len;
+ fz_text_block *block, *block2;
if (page->len == page->cap)
{
@@ -1291,17 +1393,22 @@ split_block(fz_context *ctx, fz_text_page *page, int block_num, int linenum)
memmove(page->blocks+block_num+1, page->blocks+block_num, (page->len - block_num)*sizeof(*page->blocks));
page->len++;
- split_len = page->blocks[block_num].len - linenum;
- page->blocks[block_num+1].bbox = page->blocks[block_num].bbox; /* FIXME! */
- page->blocks[block_num+1].cap = 0;
- page->blocks[block_num+1].len = 0;
- page->blocks[block_num+1].lines = NULL;
- page->blocks[block_num+1].lines = fz_malloc_array(ctx, split_len, sizeof(fz_text_line));
- page->blocks[block_num+1].cap = page->blocks[block_num+1].len;
- page->blocks[block_num+1].len = split_len;
- page->blocks[block_num].len = linenum;
- memcpy(page->blocks[block_num+1].lines, page->blocks[block_num].lines + linenum, split_len * sizeof(fz_text_line));
- page->blocks[block_num+1].lines[0].distance = 0;
+ block2 = fz_malloc_struct(ctx, fz_text_block);
+ block = page->blocks[block_num].u.text;
+
+ page->blocks[block_num+1].type = FZ_PAGE_BLOCK_TEXT;
+ page->blocks[block_num+1].u.text = block2;
+ split_len = block->len - linenum;
+ block2->bbox = block->bbox; /* FIXME! */
+ block2->cap = 0;
+ block2->len = 0;
+ block2->lines = NULL;
+ block2->lines = fz_malloc_array(ctx, split_len, sizeof(fz_text_line));
+ block2->cap = block2->len;
+ block2->len = split_len;
+ block->len = linenum;
+ memcpy(block2->lines, block->lines + linenum, split_len * sizeof(fz_text_line));
+ block2->lines[0].distance = 0;
}
static inline int
@@ -2115,7 +2222,6 @@ dehyphenate(fz_text_span *s1, fz_text_span *s2)
void
fz_text_analysis(fz_context *ctx, fz_text_sheet *sheet, fz_text_page *page)
{
- fz_text_block *block;
fz_text_line *line;
line_heights *lh;
region_masks *rms;
@@ -2128,8 +2234,14 @@ fz_text_analysis(fz_context *ctx, fz_text_sheet *sheet, fz_text_page *page)
/* Step 1: Gather the line height information */
lh = new_line_heights(ctx);
- for (block = page->blocks; block < page->blocks + page->len; block++)
+ for (block_num = 0; block_num < page->len; block_num++)
{
+ fz_text_block *block;
+
+ if (page->blocks[block_num].type != FZ_PAGE_BLOCK_TEXT)
+ continue;
+ block = page->blocks[block_num].u.text;
+
for (line = block->lines; line < block->lines + block->len; line++)
{
/* For every style in the line, add lineheight to the
@@ -2209,7 +2321,12 @@ list_entry:
for (block_num = 0; block_num < page->len; block_num++)
{
int line_num;
- block = &page->blocks[block_num];
+ fz_text_block *block;
+
+ if (page->blocks[block_num].type != FZ_PAGE_BLOCK_TEXT)
+ continue;
+ block = page->blocks[block_num].u.text;
+
for (line_num = 0; line_num < block->len; line_num++)
{
/* For every style in the line, check to see if lineheight
@@ -2280,8 +2397,14 @@ force_paragraph:
rms = new_region_masks(ctx);
/* Step 1: Form the region masks and store them into a list with the
* normalised baseline vectors. */
- for (block = page->blocks; block < page->blocks + page->len; block++)
+ for (block_num = 0; block_num < page->len; block_num++)
{
+ fz_text_block *block;
+
+ if (page->blocks[block_num].type != FZ_PAGE_BLOCK_TEXT)
+ continue;
+ block = page->blocks[block_num].u.text;
+
for (line = block->lines; line < block->lines + block->len; line++)
{
fz_point blv;
@@ -2362,8 +2485,14 @@ force_paragraph:
* which region mask. */
{
region_mask *prev_match = NULL;
- for (block = page->blocks; block < page->blocks + page->len; block++)
+ for (block_num = 0; block_num < page->len; block_num++)
{
+ fz_text_block *block;
+
+ if (page->blocks[block_num].type != FZ_PAGE_BLOCK_TEXT)
+ continue;
+ block = page->blocks[block_num].u.text;
+
for (line = block->lines; line < block->lines + block->len; line++)
{
fz_point blv;
@@ -2449,12 +2578,18 @@ force_paragraph:
/* Step 7: Collate lines within a block that share the same region
* mask. */
- for (block = page->blocks; block < page->blocks + page->len; block++)
+ for (block_num = 0; block_num < page->len; block_num++)
{
int line_num;
int prev_line_num;
int last_from = -1;
+ fz_text_block *block;
+
+ if (page->blocks[block_num].type != FZ_PAGE_BLOCK_TEXT)
+ continue;
+ block = page->blocks[block_num].u.text;
+
/* First merge lines. This may leave empty lines behind. */
for (prev_line_num = 0, line_num = 1; line_num < block->len; line_num++)
{
diff --git a/fitz/doc_search.c b/fitz/doc_search.c
index 1421cbd0..2d4233c2 100644
--- a/fitz/doc_search.c
+++ b/fitz/doc_search.c
@@ -10,11 +10,17 @@ static inline int fz_tolower(int c)
fz_char_and_box *fz_text_char_at(fz_char_and_box *cab, fz_text_page *page, int idx)
{
- fz_text_block *block;
- fz_text_line *line;
- int ofs = 0;
- for (block = page->blocks; block < page->blocks + page->len; block++)
+ int block_num;
+
+ for (block_num = 0; block_num < page->len; block_num++)
{
+ fz_text_line *line;
+ int ofs = 0;
+ fz_text_block *block;
+
+ if (page->blocks[block_num].type != FZ_PAGE_BLOCK_TEXT)
+ continue;
+ block = page->blocks[block_num].u.text;
for (line = block->lines; line < block->lines + block->len; line++)
{
int span_num;
@@ -60,11 +66,17 @@ static fz_rect *bboxat(fz_text_page *page, int idx, fz_rect *bbox)
static int textlen(fz_text_page *page)
{
- fz_text_block *block;
- fz_text_line *line;
int len = 0;
- for (block = page->blocks; block < page->blocks + page->len; block++)
+ int block_num;
+
+ for (block_num = 0; block_num < page->len; block_num++)
{
+ fz_text_block *block;
+ fz_text_line *line;
+
+ if (page->blocks[block_num].type != FZ_PAGE_BLOCK_TEXT)
+ continue;
+ block = page->blocks[block_num].u.text;
for (line = block->lines; line < block->lines + block->len; line++)
{
int span_num;
@@ -149,7 +161,7 @@ fz_highlight_selection(fz_context *ctx, fz_text_page *page, fz_rect rect, fz_rec
fz_rect linebox, charbox;
fz_text_block *block;
fz_text_line *line;
- int i, hit_count;
+ int i, block_num, hit_count;
float x0 = rect.x0;
float x1 = rect.x1;
@@ -158,8 +170,11 @@ fz_highlight_selection(fz_context *ctx, fz_text_page *page, fz_rect rect, fz_rec
hit_count = 0;
- for (block = page->blocks; block < page->blocks + page->len; block++)
+ for (block_num = 0; block_num < page->len; block_num++)
{
+ if (page->blocks[block_num].type != FZ_PAGE_BLOCK_TEXT)
+ continue;
+ block = page->blocks[block_num].u.text;
for (line = block->lines; line < block->lines + block->len; line++)
{
int span_num;
@@ -198,9 +213,7 @@ fz_copy_selection(fz_context *ctx, fz_text_page *page, fz_rect rect)
{
fz_buffer *buffer;
fz_rect hitbox;
- fz_text_block *block;
- fz_text_line *line;
- int c, i, seen = 0;
+ int c, i, block_num, seen = 0;
char *s;
float x0 = rect.x0;
@@ -210,8 +223,14 @@ fz_copy_selection(fz_context *ctx, fz_text_page *page, fz_rect rect)
buffer = fz_new_buffer(ctx, 1024);
- for (block = page->blocks; block < page->blocks + page->len; block++)
+ for (block_num = 0; block_num < page->len; block_num++)
{
+ fz_text_block *block;
+ fz_text_line *line;
+
+ if (page->blocks[block_num].type != FZ_PAGE_BLOCK_TEXT)
+ continue;
+ block = page->blocks[block_num].u.text;
for (line = block->lines; line < block->lines + block->len; line++)
{
int span_num;
diff --git a/fitz/fitz.h b/fitz/fitz.h
index 45abe6d6..ff2fa57e 100644
--- a/fitz/fitz.h
+++ b/fitz/fitz.h
@@ -1849,6 +1849,8 @@ typedef struct fz_text_char_s fz_text_char;
typedef struct fz_text_span_s fz_text_span;
typedef struct fz_text_line_s fz_text_line;
typedef struct fz_text_block_s fz_text_block;
+typedef struct fz_image_block_s fz_image_block;
+typedef struct fz_page_block_s fz_page_block;
typedef struct fz_text_sheet_s fz_text_sheet;
typedef struct fz_text_page_s fz_text_page;
@@ -1881,14 +1883,33 @@ struct fz_text_style_s
};
/*
- fz_text_page: A text page is a list of blocks of text, together with
+ fz_text_page: A text page is a list of page blocks, together with
an overall bounding box.
*/
struct fz_text_page_s
{
fz_rect mediabox;
int len, cap;
- fz_text_block *blocks;
+ fz_page_block *blocks;
+};
+
+/*
+ fz_page_block: A page block is a typed block pointer.
+*/
+struct fz_page_block_s
+{
+ int type;
+ union
+ {
+ fz_text_block *text;
+ fz_image_block *image;
+ } u;
+};
+
+enum
+{
+ FZ_PAGE_BLOCK_TEXT = 0,
+ FZ_PAGE_BLOCK_IMAGE = 1
};
/*
@@ -1906,6 +1927,20 @@ struct fz_text_block_s
enum { FZ_MAX_COLORS = 32 };
/*
+ fz_image_block: An image block is an image, together with the list of lines of text. In typical
+ cases this may correspond to a paragraph or a column of text. A
+ collection of blocks makes up a page.
+*/
+struct fz_image_block_s
+{
+ fz_rect bbox;
+ fz_matrix mat;
+ fz_image *image;
+ fz_colorspace *cspace;
+ float colors[FZ_MAX_COLORS];
+};
+
+/*
fz_text_line: A text line is a list of text spans, with the same
baseline. In typical cases this should correspond (as expected) to
complete lines of text. A collection of lines makes up a block.