diff options
author | Tor Andersson <tor.andersson@artifex.com> | 2013-06-19 15:29:44 +0200 |
---|---|---|
committer | Tor Andersson <tor.andersson@artifex.com> | 2013-06-20 16:45:35 +0200 |
commit | 0a927854a10e1e6b9770a81e2e1d9f3093631757 (patch) | |
tree | 3d65d820d9fdba2d0d394d99c36290c851b78ca0 /source/fitz/stext-device.c | |
parent | 1ae8f19179c5f0f8c6352b3c7855465325d5449a (diff) | |
download | mupdf-0a927854a10e1e6b9770a81e2e1d9f3093631757.tar.xz |
Rearrange source files.
Diffstat (limited to 'source/fitz/stext-device.c')
-rw-r--r-- | source/fitz/stext-device.c | 1027 |
1 files changed, 1027 insertions, 0 deletions
diff --git a/source/fitz/stext-device.c b/source/fitz/stext-device.c new file mode 100644 index 00000000..89cf8566 --- /dev/null +++ b/source/fitz/stext-device.c @@ -0,0 +1,1027 @@ +#include "mupdf/fitz.h" +#include "ucdn.h" + +/* Extract text into an unsorted span soup. */ + +#define LINE_DIST 0.9f +#define SPACE_DIST 0.2f +#define SPACE_MAX_DIST 0.8f +#define PARAGRAPH_DIST 0.5f + +#undef DEBUG_SPANS +#undef DEBUG_INTERNALS +#undef DEBUG_LINE_HEIGHTS +#undef DEBUG_MASKS +#undef DEBUG_ALIGN +#undef DEBUG_INDENTS + +#include <ft2build.h> +#include FT_FREETYPE_H +#include FT_ADVANCES_H + +typedef struct fz_text_device_s fz_text_device; + +typedef struct span_soup_s span_soup; + +struct fz_text_device_s +{ + fz_text_sheet *sheet; + fz_text_page *page; + span_soup *spans; + fz_text_span *cur_span; + int lastchar; +}; + +static fz_rect * +add_point_to_rect(fz_rect *a, const fz_point *p) +{ + if (p->x < a->x0) + a->x0 = p->x; + if (p->x > a->x1) + a->x1 = p->x; + if (p->y < a->y0) + a->y0 = p->y; + if (p->y > a->y1) + a->y1 = p->y; + return a; +} + +fz_rect * +fz_text_char_bbox(fz_rect *bbox, fz_text_span *span, int i) +{ + fz_point a, d; + const fz_point *max; + fz_text_char *ch; + + if (!span || i >= span->len) + { + *bbox = fz_empty_rect; + } + ch = &span->text[i]; + if (i == span->len-1) + max = &span->max; + else + max = &span->text[i+1].p; + a.x = 0; + a.y = span->ascender_max; + fz_transform_vector(&a, &span->transform); + d.x = 0; + d.y = span->descender_min; + fz_transform_vector(&d, &span->transform); + bbox->x0 = bbox->x1 = ch->p.x + a.x; + bbox->y0 = bbox->y1 = ch->p.y + a.y; + a.x += max->x; + a.y += max->y; + add_point_to_rect(bbox, &a); + a.x = ch->p.x + d.x; + a.y = ch->p.y + d.y; + add_point_to_rect(bbox, &a); + a.x = max->x + d.x; + a.y = max->y + d.y; + add_point_to_rect(bbox, &a); + return bbox; +} + +static void +add_bbox_to_span(fz_text_span *span) +{ + fz_point a, d; + fz_rect *bbox = &span->bbox; + + if (!span) + return; + a.x = 0; + a.y = span->ascender_max; + fz_transform_vector(&a, &span->transform); + d.x = 0; + d.y = span->descender_min; + fz_transform_vector(&d, &span->transform); + bbox->x0 = bbox->x1 = span->min.x + a.x; + bbox->y0 = bbox->y1 = span->min.y + a.y; + a.x += span->max.x; + a.y += span->max.y; + add_point_to_rect(bbox, &a); + a.x = span->min.x + d.x; + a.y = span->min.y + d.y; + add_point_to_rect(bbox, &a); + a.x = span->max.x + d.x; + a.y = span->max.y + d.y; + add_point_to_rect(bbox, &a); +} + +struct span_soup_s +{ + fz_context *ctx; + int len, cap; + fz_text_span **spans; +}; + +static span_soup * +new_span_soup(fz_context *ctx) +{ + span_soup *soup = fz_malloc_struct(ctx, span_soup); + soup->ctx = ctx; + soup->len = 0; + soup->cap = 0; + soup->spans = NULL; + return soup; +} + +static void +free_span_soup(span_soup *soup) +{ + int i; + + if (soup == NULL) + return; + for (i = 0; i < soup->len; i++) + { + fz_free(soup->ctx, soup->spans[i]); + } + fz_free(soup->ctx, soup->spans); + fz_free(soup->ctx, soup); +} + +static void +add_span_to_soup(span_soup *soup, fz_text_span *span) +{ + if (span == NULL) + return; + if (soup->len == soup->cap) + { + int newcap = (soup->cap ? soup->cap * 2 : 16); + soup->spans = fz_resize_array(soup->ctx, soup->spans, newcap, sizeof(*soup->spans)); + soup->cap = newcap; + } + add_bbox_to_span(span); + soup->spans[soup->len++] = span; +} + +static fz_text_line * +push_span(fz_context *ctx, fz_text_device *tdev, fz_text_span *span, int new_line, float distance) +{ + fz_text_line *line; + fz_text_block *block; + fz_text_page *page = tdev->page; + int prev_not_text = 0; + + if (page->len == 0 || page->blocks[page->len-1].type != FZ_PAGE_BLOCK_TEXT) + prev_not_text = 1; + + if (new_line || prev_not_text) + { + float size = fz_matrix_expansion(&span->transform); + /* So, a new line. Part of the same block or not? */ + if (distance == 0 || distance > size * 1.5 || distance < -size * PARAGRAPH_DIST || page->len == 0 || prev_not_text) + { + /* New block */ + if (page->len == page->cap) + { + int newcap = (page->cap ? page->cap*2 : 4); + page->blocks = fz_resize_array(ctx, page->blocks, newcap, sizeof(*page->blocks)); + page->cap = newcap; + } + block = fz_malloc_struct(ctx, fz_text_block); + page->blocks[page->len].type = FZ_PAGE_BLOCK_TEXT; + page->blocks[page->len].u.text = block; + block->cap = 0; + block->len = 0; + block->lines = 0; + block->bbox = fz_empty_rect; + page->len++; + distance = 0; + } + + /* New line */ + block = page->blocks[page->len-1].u.text; + if (block->len == block->cap) + { + int newcap = (block->cap ? block->cap*2 : 4); + block->lines = fz_resize_array(ctx, block->lines, newcap, sizeof(*block->lines)); + block->cap = newcap; + } + block->lines[block->len].first_span = NULL; + block->lines[block->len].last_span = NULL; + block->lines[block->len].distance = distance; + block->lines[block->len].bbox = fz_empty_rect; + block->len++; + } + + /* Find last line and append to it */ + block = page->blocks[page->len-1].u.text; + line = &block->lines[block->len-1]; + + fz_union_rect(&block->lines[block->len-1].bbox, &span->bbox); + fz_union_rect(&block->bbox, &span->bbox); + span->base_offset = (new_line ? 0 : distance); + + if (!line->first_span) + { + line->first_span = line->last_span = span; + span->next = NULL; + } + else + { + line->last_span->next = span; + line->last_span = span; + } + + return line; +} + +#if defined(DEBUG_SPANS) || defined(DEBUG_ALIGN) || defined(DEBUG_INDENTS) +static void +dump_span(fz_text_span *s) +{ + int i; + for (i=0; i < s->len; i++) + { + printf("%c", s->text[i].c); + } +} +#endif + +#ifdef DEBUG_ALIGN +static void +dump_line(fz_text_line *line) +{ + int i; + for (i=0; i < line->len; i++) + { + fz_text_span *s = line->spans[i]; + if (s->spacing > 1) + printf(" "); + dump_span(s); + } + printf("\n"); +} +#endif + +static void +strain_soup(fz_context *ctx, fz_text_device *tdev) +{ + span_soup *soup = tdev->spans; + fz_text_line *last_line = NULL; + fz_text_span *last_span = NULL; + int span_num; + + /* Really dumb implementation to match what we had before */ + for (span_num=0; span_num < soup->len; span_num++) + { + fz_text_span *span = soup->spans[span_num]; + int new_line = 1; + float distance = 0; + float spacing = 0; + soup->spans[span_num] = NULL; + if (last_span) + { + /* If we have a last_span, we must have a last_line */ + /* Do span and last_line share the same baseline? */ + fz_point p, q, perp_r; + float dot; + float size = fz_matrix_expansion(&span->transform); + +#ifdef DEBUG_SPANS + { + printf("Comparing: \""); + dump_span(last_span); + printf("\" and \""); + dump_span(span); + printf("\"\n"); + } +#endif + + p.x = last_line->first_span->max.x - last_line->first_span->min.x; + p.y = last_line->first_span->max.y - last_line->first_span->min.y; + fz_normalize_vector(&p); + q.x = span->max.x - span->min.x; + q.y = span->max.y - span->min.y; + fz_normalize_vector(&q); +#ifdef DEBUG_SPANS + printf("last_span=%g %g -> %g %g = %g %g\n", last_span->min.x, last_span->min.y, last_span->max.x, last_span->max.y, p.x, p.y); + printf("span =%g %g -> %g %g = %g %g\n", span->min.x, span->min.y, span->max.x, span->max.y, q.x, q.y); +#endif + perp_r.y = last_line->first_span->min.x - span->min.x; + perp_r.x = -(last_line->first_span->min.y - span->min.y); + /* Check if p and q are parallel. If so, then this + * line is parallel with the last one. */ + dot = p.x * q.x + p.y * q.y; + if (fabsf(dot) > 0.9995) + { + /* If we take the dot product of normalised(p) and + * perp(r), we get the perpendicular distance from + * one line to the next (assuming they are parallel). */ + distance = p.x * perp_r.x + p.y * perp_r.y; + /* We allow 'small' distances of baseline changes + * to cope with super/subscript. FIXME: We should + * gather subscript/superscript information here. */ + new_line = (fabsf(distance) > size * LINE_DIST); + } + else + { + new_line = 1; + distance = 0; + } + if (!new_line) + { + fz_point delta; + + delta.x = span->min.x - last_span->max.x; + delta.y = span->min.y - last_span->max.y; + + spacing = (p.x * delta.x + p.y * delta.y); + spacing = fabsf(spacing); + /* Only allow changes in baseline (subscript/superscript etc) + * when the spacing is small. */ + if (spacing * fabsf(distance) > size * LINE_DIST && fabsf(distance) > size * 0.1f) + { + new_line = 1; + distance = 0; + spacing = 0; + } + else + { + spacing /= size * SPACE_DIST; + /* Apply the same logic here as when we're adding chars to build spans. */ + if (spacing >= 1 && spacing < (SPACE_MAX_DIST/SPACE_DIST)) + spacing = 1; + } + } +#ifdef DEBUG_SPANS + printf("dot=%g new_line=%d distance=%g size=%g spacing=%g\n", dot, new_line, distance, size, spacing); +#endif + } + span->spacing = spacing; + last_line = push_span(ctx, tdev, span, new_line, distance); + last_span = span; + } +} + +fz_text_sheet * +fz_new_text_sheet(fz_context *ctx) +{ + fz_text_sheet *sheet = fz_malloc(ctx, sizeof *sheet); + sheet->maxid = 0; + sheet->style = NULL; + return sheet; +} + +void +fz_free_text_sheet(fz_context *ctx, fz_text_sheet *sheet) +{ + fz_text_style *style; + + if (sheet == NULL) + return; + + style = sheet->style; + while (style) + { + fz_text_style *next = style->next; + fz_drop_font(ctx, style->font); + fz_free(ctx, style); + style = next; + } + fz_free(ctx, sheet); +} + +static fz_text_style * +fz_lookup_text_style_imp(fz_context *ctx, fz_text_sheet *sheet, + float size, fz_font *font, int wmode, int script) +{ + fz_text_style *style; + + for (style = sheet->style; style; style = style->next) + { + if (style->font == font && + style->size == size && + style->wmode == wmode && + style->script == script) /* FIXME: others */ + { + return style; + } + } + + /* Better make a new one and add it to our list */ + style = fz_malloc(ctx, sizeof *style); + style->id = sheet->maxid++; + style->font = fz_keep_font(ctx, font); + style->size = size; + style->wmode = wmode; + style->script = script; + style->next = sheet->style; + sheet->style = style; + return style; +} + +static fz_text_style * +fz_lookup_text_style(fz_context *ctx, fz_text_sheet *sheet, fz_text *text, const fz_matrix *ctm, + fz_colorspace *colorspace, float *color, float alpha, fz_stroke_state *stroke) +{ + float size = 1.0f; + fz_font *font = text ? text->font : NULL; + int wmode = text ? text->wmode : 0; + if (ctm && text) + { + fz_matrix tm = text->trm; + fz_matrix trm; + tm.e = 0; + tm.f = 0; + fz_concat(&trm, &tm, ctm); + size = fz_matrix_expansion(&trm); + } + return fz_lookup_text_style_imp(ctx, sheet, size, font, wmode, 0); +} + +fz_text_page * +fz_new_text_page(fz_context *ctx) +{ + fz_text_page *page = fz_malloc(ctx, sizeof(*page)); + page->mediabox = fz_empty_rect; + page->len = 0; + page->cap = 0; + page->blocks = NULL; + page->next = NULL; + return page; +} + +static void +fz_free_text_line_contents(fz_context *ctx, fz_text_line *line) +{ + fz_text_span *span, *next; + for (span = line->first_span; span; span=next) + { + next = span->next; + fz_free(ctx, span->text); + fz_free(ctx, span); + } +} + +static void +fz_free_text_block(fz_context *ctx, fz_text_block *block) +{ + fz_text_line *line; + if (block == NULL) + return; + for (line = block->lines; line < block->lines + block->len; line++) + fz_free_text_line_contents(ctx, line); + fz_free(ctx, block->lines); + fz_free(ctx, block); +} + +static void +fz_free_image_block(fz_context *ctx, fz_image_block *block) +{ + if (block == NULL) + return; + fz_drop_image(ctx, block->image); + fz_drop_colorspace(ctx, block->cspace); + fz_free(ctx, block); +} + +void +fz_free_text_page(fz_context *ctx, fz_text_page *page) +{ + fz_page_block *block; + if (page == NULL) + return; + for (block = page->blocks; block < page->blocks + page->len; block++) + { + switch (block->type) + { + case FZ_PAGE_BLOCK_TEXT: + fz_free_text_block(ctx, block->u.text); + break; + case FZ_PAGE_BLOCK_IMAGE: + fz_free_image_block(ctx, block->u.image); + break; + } + } + fz_free(ctx, page->blocks); + fz_free(ctx, page); +} + +static fz_text_span * +fz_new_text_span(fz_context *ctx, const fz_point *p, int wmode, const fz_matrix *trm) +{ + fz_text_span *span = fz_malloc_struct(ctx, fz_text_span); + span->ascender_max = 0; + span->descender_min = 0; + span->cap = 0; + span->len = 0; + span->min = *p; + span->max = *p; + span->wmode = wmode; + span->transform.a = trm->a; + span->transform.b = trm->b; + span->transform.c = trm->c; + span->transform.d = trm->d; + span->transform.e = 0; + span->transform.f = 0; + span->text = NULL; + span->next = NULL; + return span; +} + +static void +add_char_to_span(fz_context *ctx, fz_text_span *span, int c, fz_point *p, fz_point *max, fz_text_style *style) +{ + if (span->len == span->cap) + { + int newcap = (span->cap ? span->cap * 2 : 16); + span->text = fz_resize_array(ctx, span->text, newcap, sizeof(fz_text_char)); + span->cap = newcap; + span->bbox = fz_empty_rect; + } + span->max = *max; + if (style->ascender > span->ascender_max) + span->ascender_max = style->ascender; + if (style->descender < span->descender_min) + span->descender_min = style->descender; + span->text[span->len].c = c; + span->text[span->len].p = *p; + span->text[span->len].style = style; + span->len++; +} + +static void +fz_add_text_char_imp(fz_context *ctx, fz_text_device *dev, fz_text_style *style, int c, fz_matrix *trm, float adv, int wmode) +{ + int can_append = 1; + int add_space = 0; + fz_point dir, ndir, p, q; + float size; + fz_point delta; + float spacing = 0; + float base_offset = 0; + + if (wmode == 0) + { + dir.x = 1; + dir.y = 0; + } + else + { + dir.x = 0; + dir.y = 1; + } + fz_transform_vector(&dir, trm); + ndir = dir; + fz_normalize_vector(&ndir); + /* dir = direction vector for motion. ndir = normalised(dir) */ + + size = fz_matrix_expansion(trm); + + if (dev->cur_span == NULL || + trm->a != dev->cur_span->transform.a || trm->b != dev->cur_span->transform.b || + trm->c != dev->cur_span->transform.c || trm->d != dev->cur_span->transform.d) + { + /* If the matrix has changed (or if we don't have a span at + * all), then we can't append. */ +#ifdef DEBUG_SPANS + printf("Transform changed\n"); +#endif + can_append = 0; + } + else + { + /* Calculate how far we've moved since the end of the current + * span. */ + delta.x = trm->e - dev->cur_span->max.x; + delta.y = trm->f - dev->cur_span->max.y; + + /* The transform has not changed, so we know we're in the same + * direction. Calculate 2 distances; how far off the previous + * baseline we are, together with how far along the baseline + * we are from the expected position. */ + spacing = ndir.x * delta.x + ndir.y * delta.y; + base_offset = -ndir.y * delta.x + ndir.x * delta.y; + + spacing /= size * SPACE_DIST; + spacing = fabsf(spacing); + if (fabsf(base_offset) < size * 0.1) + { + /* Only a small amount off the baseline - we'll take this */ + if (spacing < 1.0) + { + /* Motion is in line, and small. */ + } + else if (spacing >= 1 && spacing < (SPACE_MAX_DIST/SPACE_DIST)) + { + /* Motion is in line, but large enough + * to warrant us adding a space */ + if (dev->lastchar != ' ' && wmode == 0) + add_space = 1; + } + else + { + /* Motion is in line, but too large - split to a new span */ + can_append = 0; + } + } + else + { + can_append = 0; + spacing = 0; + } + } + +#ifdef DEBUG_SPANS + printf("%c%c append=%d space=%d size=%g spacing=%g base_offset=%g\n", dev->lastchar, c, can_append, add_space, size, spacing, base_offset); +#endif + + p.x = trm->e; + p.y = trm->f; + if (can_append == 0) + { + /* Start a new span */ + add_span_to_soup(dev->spans, dev->cur_span); + dev->cur_span = NULL; + dev->cur_span = fz_new_text_span(ctx, &p, wmode, trm); + dev->cur_span->spacing = 0; + } + if (add_space) + { + q.x = - 0.2f; + q.y = 0; + fz_transform_point(&q, trm); + add_char_to_span(ctx, dev->cur_span, ' ', &p, &q, style); + } + /* Advance the matrix */ + q.x = trm->e += adv * dir.x; + q.y = trm->f += adv * dir.y; + add_char_to_span(ctx, dev->cur_span, c, &p, &q, style); +} + +static void +fz_add_text_char(fz_context *ctx, fz_text_device *dev, fz_text_style *style, int c, fz_matrix *trm, float adv, int wmode) +{ + switch (c) + { + case -1: /* ignore when one unicode character maps to multiple glyphs */ + break; + case 0xFB00: /* ff */ + fz_add_text_char_imp(ctx, dev, style, 'f', trm, adv/2, wmode); + fz_add_text_char_imp(ctx, dev, style, 'f', trm, adv/2, wmode); + break; + case 0xFB01: /* fi */ + fz_add_text_char_imp(ctx, dev, style, 'f', trm, adv/2, wmode); + fz_add_text_char_imp(ctx, dev, style, 'i', trm, adv/2, wmode); + break; + case 0xFB02: /* fl */ + fz_add_text_char_imp(ctx, dev, style, 'f', trm, adv/2, wmode); + fz_add_text_char_imp(ctx, dev, style, 'l', trm, adv/2, wmode); + break; + case 0xFB03: /* ffi */ + fz_add_text_char_imp(ctx, dev, style, 'f', trm, adv/3, wmode); + fz_add_text_char_imp(ctx, dev, style, 'f', trm, adv/3, wmode); + fz_add_text_char_imp(ctx, dev, style, 'i', trm, adv/3, wmode); + break; + case 0xFB04: /* ffl */ + fz_add_text_char_imp(ctx, dev, style, 'f', trm, adv/3, wmode); + fz_add_text_char_imp(ctx, dev, style, 'f', trm, adv/3, wmode); + fz_add_text_char_imp(ctx, dev, style, 'l', trm, adv/3, wmode); + break; + case 0xFB05: /* long st */ + case 0xFB06: /* st */ + fz_add_text_char_imp(ctx, dev, style, 's', trm, adv/2, wmode); + fz_add_text_char_imp(ctx, dev, style, 't', trm, adv/2, wmode); + break; + default: + fz_add_text_char_imp(ctx, dev, style, c, trm, adv, wmode); + break; + } +} + +static void +fz_text_extract(fz_context *ctx, fz_text_device *dev, fz_text *text, const fz_matrix *ctm, fz_text_style *style) +{ + fz_font *font = text->font; + FT_Face face = font->ft_face; + fz_matrix tm = text->trm; + fz_matrix trm; + float adv; + float ascender = 1; + float descender = 0; + int multi; + int i, j, err; + + if (text->len == 0) + return; + + if (font->ft_face) + { + fz_lock(ctx, FZ_LOCK_FREETYPE); + err = FT_Set_Char_Size(font->ft_face, 64, 64, 72, 72); + if (err) + fz_warn(ctx, "freetype set character size: %s", ft_error_string(err)); + ascender = (float)face->ascender / face->units_per_EM; + descender = (float)face->descender / face->units_per_EM; + fz_unlock(ctx, FZ_LOCK_FREETYPE); + } + else if (font->t3procs && !fz_is_empty_rect(&font->bbox)) + { + ascender = font->bbox.y1; + descender = font->bbox.y0; + } + style->ascender = ascender; + style->descender = descender; + + tm.e = 0; + tm.f = 0; + fz_concat(&trm, &tm, ctm); + + for (i = 0; i < text->len; i++) + { + /* Calculate new pen location and delta */ + tm.e = text->items[i].x; + tm.f = text->items[i].y; + fz_concat(&trm, &tm, ctm); + + /* Calculate bounding box and new pen position based on font metrics */ + if (font->ft_face) + { + FT_Fixed ftadv = 0; + int mask = FT_LOAD_NO_BITMAP | FT_LOAD_NO_HINTING | FT_LOAD_IGNORE_TRANSFORM; + + /* TODO: freetype returns broken vertical metrics */ + /* if (text->wmode) mask |= FT_LOAD_VERTICAL_LAYOUT; */ + + fz_lock(ctx, FZ_LOCK_FREETYPE); + err = FT_Set_Char_Size(font->ft_face, 64, 64, 72, 72); + if (err) + fz_warn(ctx, "freetype set character size: %s", ft_error_string(err)); + FT_Get_Advance(font->ft_face, text->items[i].gid, mask, &ftadv); + adv = ftadv / 65536.0f; + fz_unlock(ctx, FZ_LOCK_FREETYPE); + } + else + { + adv = font->t3widths[text->items[i].gid]; + } + + /* Check for one glyph to many char mapping */ + for (j = i + 1; j < text->len; j++) + if (text->items[j].gid >= 0) + break; + multi = j - i; + + if (multi == 1) + { + fz_add_text_char(ctx, dev, style, text->items[i].ucs, &trm, adv, text->wmode); + } + else + { + for (j = 0; j < multi; j++) + { + fz_add_text_char(ctx, dev, style, text->items[i + j].ucs, &trm, adv/multi, text->wmode); + } + i += j - 1; + } + + dev->lastchar = text->items[i].ucs; + } +} + +static void +fz_text_fill_text(fz_device *dev, fz_text *text, const fz_matrix *ctm, + fz_colorspace *colorspace, float *color, float alpha) +{ + fz_text_device *tdev = dev->user; + fz_text_style *style; + style = fz_lookup_text_style(dev->ctx, tdev->sheet, text, ctm, colorspace, color, alpha, NULL); + fz_text_extract(dev->ctx, tdev, text, ctm, style); +} + +static void +fz_text_stroke_text(fz_device *dev, fz_text *text, fz_stroke_state *stroke, const fz_matrix *ctm, + fz_colorspace *colorspace, float *color, float alpha) +{ + fz_text_device *tdev = dev->user; + fz_text_style *style; + style = fz_lookup_text_style(dev->ctx, tdev->sheet, text, ctm, colorspace, color, alpha, stroke); + fz_text_extract(dev->ctx, tdev, text, ctm, style); +} + +static void +fz_text_clip_text(fz_device *dev, fz_text *text, const fz_matrix *ctm, int accumulate) +{ + fz_text_device *tdev = dev->user; + fz_text_style *style; + style = fz_lookup_text_style(dev->ctx, tdev->sheet, text, ctm, NULL, NULL, 0, NULL); + fz_text_extract(dev->ctx, tdev, text, ctm, style); +} + +static void +fz_text_clip_stroke_text(fz_device *dev, fz_text *text, fz_stroke_state *stroke, const fz_matrix *ctm) +{ + fz_text_device *tdev = dev->user; + fz_text_style *style; + style = fz_lookup_text_style(dev->ctx, tdev->sheet, text, ctm, NULL, NULL, 0, stroke); + fz_text_extract(dev->ctx, tdev, text, ctm, style); +} + +static void +fz_text_ignore_text(fz_device *dev, fz_text *text, const fz_matrix *ctm) +{ + fz_text_device *tdev = dev->user; + fz_text_style *style; + style = fz_lookup_text_style(dev->ctx, tdev->sheet, text, ctm, NULL, NULL, 0, NULL); + fz_text_extract(dev->ctx, tdev, text, ctm, style); +} + +static void +fz_text_fill_image_mask(fz_device *dev, fz_image *img, const fz_matrix *ctm, + fz_colorspace *cspace, float *color, float alpha) +{ + fz_text_device *tdev = dev->user; + fz_text_page *page = tdev->page; + fz_image_block *block; + fz_context *ctx = dev->ctx; + + /* If the alpha is less than 50% then it's probably a watermark or + * effect or something. Skip it */ + if (alpha < 0.5) + return; + + /* New block */ + if (page->len == page->cap) + { + int newcap = (page->cap ? page->cap*2 : 4); + page->blocks = fz_resize_array(ctx, page->blocks, newcap, sizeof(*page->blocks)); + page->cap = newcap; + } + block = fz_malloc_struct(ctx, fz_image_block); + page->blocks[page->len].type = FZ_PAGE_BLOCK_IMAGE; + page->blocks[page->len].u.image = block; + block->image = fz_keep_image(ctx, img); + block->cspace = fz_keep_colorspace(ctx, cspace); + if (cspace) + memcpy(block->colors, color, sizeof(block->colors[0])*cspace->n); + page->len++; +} + +static void +fz_text_fill_image(fz_device *dev, fz_image *img, const fz_matrix *ctm, float alpha) +{ + fz_text_fill_image_mask(dev, img, ctm, NULL, NULL, alpha); +} + +static int +fz_bidi_direction(int bidiclass, int curdir) +{ + switch (bidiclass) + { + /* strong */ + case UCDN_BIDI_CLASS_L: return 1; + case UCDN_BIDI_CLASS_R: return -1; + case UCDN_BIDI_CLASS_AL: return -1; + + /* weak */ + case UCDN_BIDI_CLASS_EN: + case UCDN_BIDI_CLASS_ES: + case UCDN_BIDI_CLASS_ET: + case UCDN_BIDI_CLASS_AN: + case UCDN_BIDI_CLASS_CS: + case UCDN_BIDI_CLASS_NSM: + case UCDN_BIDI_CLASS_BN: + return curdir; + + /* neutral */ + case UCDN_BIDI_CLASS_B: + case UCDN_BIDI_CLASS_S: + case UCDN_BIDI_CLASS_WS: + case UCDN_BIDI_CLASS_ON: + return curdir; + + /* embedding, override, pop ... we don't support them */ + default: + return 0; + } +} + +static void +fz_bidi_reorder_run(fz_text_span *span, int a, int b, int dir) +{ + if (a < b && dir == -1) + { + fz_text_char c; + int m = a + (b - a) / 2; + while (a < m) + { + b--; + c = span->text[a]; + span->text[a] = span->text[b]; + span->text[b] = c; + a++; + } + } +} + +static void +fz_bidi_reorder_span(fz_text_span *span) +{ + int a, b, dir, curdir; + + a = 0; + curdir = 1; + for (b = 0; b < span->len; b++) + { + dir = fz_bidi_direction(ucdn_get_bidi_class(span->text[b].c), curdir); + if (dir != curdir) + { + fz_bidi_reorder_run(span, a, b, curdir); + curdir = dir; + a = b; + } + } + fz_bidi_reorder_run(span, a, b, curdir); +} + +static void +fz_bidi_reorder_text_page(fz_context *ctx, fz_text_page *page) +{ + fz_page_block *pageblock; + fz_text_block *block; + fz_text_line *line; + fz_text_span *span; + + for (pageblock = page->blocks; pageblock < page->blocks + page->len; pageblock++) + if (pageblock->type == FZ_PAGE_BLOCK_TEXT) + for (block = pageblock->u.text, line = block->lines; line < block->lines + block->len; line++) + for (span = line->first_span; span; span = span->next) + fz_bidi_reorder_span(span); +} + +static void +fz_text_begin_page(fz_device *dev, const fz_rect *mediabox, const fz_matrix *ctm) +{ + fz_context *ctx = dev->ctx; + fz_text_device *tdev = dev->user; + + if (tdev->page->len) + { + tdev->page->next = fz_new_text_page(ctx); + tdev->page = tdev->page->next; + } + + tdev->page->mediabox = *mediabox; + fz_transform_rect(&tdev->page->mediabox, ctm); + + tdev->spans = new_span_soup(ctx); +} + +static void +fz_text_end_page(fz_device *dev) +{ + fz_context *ctx = dev->ctx; + fz_text_device *tdev = dev->user; + + add_span_to_soup(tdev->spans, tdev->cur_span); + tdev->cur_span = NULL; + + strain_soup(ctx, tdev); + free_span_soup(tdev->spans); + tdev->spans = NULL; + + /* TODO: smart sorting of blocks in reading order */ + /* TODO: unicode NFC normalization */ + + fz_bidi_reorder_text_page(ctx, tdev->page); +} + +static void +fz_text_free_user(fz_device *dev) +{ + fz_text_device *tdev = dev->user; + free_span_soup(tdev->spans); + fz_free(dev->ctx, tdev); +} + +fz_device * +fz_new_text_device(fz_context *ctx, fz_text_sheet *sheet, fz_text_page *page) +{ + fz_device *dev; + + fz_text_device *tdev = fz_malloc_struct(ctx, fz_text_device); + tdev->sheet = sheet; + tdev->page = page; + tdev->spans = NULL; + tdev->cur_span = NULL; + tdev->lastchar = ' '; + + dev = fz_new_device(ctx, tdev); + dev->hints = FZ_IGNORE_IMAGE | FZ_IGNORE_SHADE; + dev->begin_page = fz_text_begin_page; + dev->end_page = fz_text_end_page; + dev->free_user = fz_text_free_user; + dev->fill_text = fz_text_fill_text; + dev->stroke_text = fz_text_stroke_text; + dev->clip_text = fz_text_clip_text; + dev->clip_stroke_text = fz_text_clip_stroke_text; + dev->ignore_text = fz_text_ignore_text; + dev->fill_image = fz_text_fill_image; + dev->fill_image_mask = fz_text_fill_image_mask; + + return dev; +} |