From 626ea2ea771735492c9a4350ae02b26ea09d1423 Mon Sep 17 00:00:00 2001 From: Tor Andersson Date: Tue, 1 Aug 2017 18:15:23 +0200 Subject: Simplify stext structure and device. * Use pool allocator and linked lists for all levels. * Remove separate fz_stext_sheet struct. * Remove unused 'script' style. * Remove 'span' level items. * Detect visual/logical RTL layouts. * Detect indented paragraphs. --- source/fitz/stext-device.c | 1047 ++++++++++++++------------------------------ 1 file changed, 330 insertions(+), 717 deletions(-) (limited to 'source/fitz/stext-device.c') diff --git a/source/fitz/stext-device.c b/source/fitz/stext-device.c index 73fa309e..166f5aa0 100644 --- a/source/fitz/stext-device.c +++ b/source/fitz/stext-device.c @@ -4,36 +4,25 @@ #include #include -/* Extract text into an unsorted span soup. */ +#include /* for debug printing */ + +/* Extract text into blocks and lines. */ #define LINE_DIST 0.9f #define SPACE_DIST 0.15f #define SPACE_MAX_DIST 0.8f #define PARAGRAPH_DIST 0.5f -#include /* for debug printing */ -#undef DEBUG_SPANS -#undef DEBUG_INTERNALS -#undef DEBUG_LINE_HEIGHTS -#undef DEBUG_MASKS -#undef DEBUG_ALIGN -#undef DEBUG_INDENTS - -#include -#include FT_FREETYPE_H -#include FT_ADVANCES_H - typedef struct fz_stext_device_s fz_stext_device; -typedef struct span_soup_s span_soup; - struct fz_stext_device_s { fz_device super; - fz_stext_sheet *sheet; fz_stext_page *page; - span_soup *spans; - fz_stext_span *cur_span; + fz_point pen, start; + fz_matrix trm; + int new_obj; + int curdir; int lastchar; int flags; }; @@ -42,553 +31,235 @@ const char *fz_stext_options_usage = "Structured text output options:\n" "\tpreserve-ligatures: do not expand all ligatures into constituent characters\n" "\tpreserve-whitespace: do not convert all whitespace characters into spaces\n" + "\tpreserve-images: keep images in output\n" "\n"; -static fz_rect * -add_point_to_rect(fz_rect *a, const fz_point *p) +fz_rect * +fz_stext_char_bbox(fz_context *ctx, fz_rect *bbox, fz_stext_line *line, fz_stext_char *ch) { - if (p->x < a->x0) - a->x0 = p->x; - if (p->x > a->x1) - a->x1 = p->x; - if (p->y < a->y0) - a->y0 = p->y; - if (p->y > a->y1) - a->y1 = p->y; - return a; + *bbox = ch->bbox; + return bbox; } -fz_rect * -fz_stext_char_bbox(fz_context *ctx, fz_rect *bbox, fz_stext_span *span, int i) +fz_stext_page * +fz_new_stext_page(fz_context *ctx, const fz_rect *mediabox) { - fz_point a, d; - const fz_point *max; - fz_stext_char *ch; - - if (!span || i >= span->len) - { - *bbox = fz_empty_rect; - return bbox; - } - ch = &span->text[i]; - if (i == span->len-1) - max = &span->max; - else - max = &span->text[i+1].p; - if (span->wmode == 0) + fz_pool *pool = fz_new_pool(ctx); + fz_stext_page *page; + fz_try(ctx) { - a.x = 0; - a.y = span->ascender_max; - d.x = 0; - d.y = span->descender_min; + page = fz_pool_alloc(ctx, pool, sizeof(*page)); + page->pool = pool; + page->mediabox = *mediabox; + page->first_block = NULL; + page->last_block = NULL; } - else + fz_catch(ctx) { - a.x = span->ascender_max; - a.y = 0; - d.x = span->descender_min; - d.y = 0; + fz_drop_pool(ctx, pool); + fz_rethrow(ctx); } - fz_transform_vector(&a, &span->transform); - fz_transform_vector(&d, &span->transform); - bbox->x0 = bbox->x1 = ch->p.x + a.x; - bbox->y0 = bbox->y1 = ch->p.y + a.y; - a.x += max->x; - a.y += max->y; - add_point_to_rect(bbox, &a); - a.x = ch->p.x + d.x; - a.y = ch->p.y + d.y; - add_point_to_rect(bbox, &a); - a.x = max->x + d.x; - a.y = max->y + d.y; - add_point_to_rect(bbox, &a); - return bbox; + return page; } -static void -add_bbox_to_span(fz_stext_span *span) +void +fz_drop_stext_page(fz_context *ctx, fz_stext_page *page) { - fz_point a, d; - fz_rect *bbox = &span->bbox; - - if (!span) - return; - if (span->wmode == 0) - { - a.x = 0; - a.y = span->ascender_max; - d.x = 0; - d.y = span->descender_min; - } - else + if (page) { - a.x = span->ascender_max; - a.y = 0; - d.x = span->descender_min; - d.y = 0; + fz_stext_block *block; + for (block = page->first_block; block; block = block->next) + if (block->type == FZ_STEXT_BLOCK_IMAGE) + fz_drop_image(ctx, block->u.i.image); + fz_drop_pool(ctx, page->pool); } - fz_transform_vector(&a, &span->transform); - fz_transform_vector(&d, &span->transform); - bbox->x0 = bbox->x1 = span->min.x + a.x; - bbox->y0 = bbox->y1 = span->min.y + a.y; - a.x += span->max.x; - a.y += span->max.y; - add_point_to_rect(bbox, &a); - a.x = span->min.x + d.x; - a.y = span->min.y + d.y; - add_point_to_rect(bbox, &a); - a.x = span->max.x + d.x; - a.y = span->max.y + d.y; - add_point_to_rect(bbox, &a); } -struct span_soup_s -{ - int len, cap; - fz_stext_span **spans; -}; - -static span_soup * -new_span_soup(fz_context *ctx) +static fz_stext_block * +add_block_to_page(fz_context *ctx, fz_stext_page *page) { - span_soup *soup = fz_malloc_struct(ctx, span_soup); - soup->len = 0; - soup->cap = 0; - soup->spans = NULL; - return soup; + fz_stext_block *block = fz_pool_alloc(ctx, page->pool, sizeof *page->first_block); + if (!page->first_block) + page->first_block = page->last_block = block; + else + { + page->last_block->next = block; + page->last_block = block; + } + return block; } -static void -free_span_soup(fz_context *ctx, span_soup *soup) +static fz_stext_block * +add_text_block_to_page(fz_context *ctx, fz_stext_page *page) { - int i; - - if (soup == NULL) - return; - for (i = 0; i < soup->len; i++) - { - fz_free(ctx, soup->spans[i]); - } - fz_free(ctx, soup->spans); - fz_free(ctx, soup); + fz_stext_block *block = add_block_to_page(ctx, page); + block->type = FZ_STEXT_BLOCK_TEXT; + return block; } -static void -add_span_to_soup(fz_context *ctx, span_soup *soup, fz_stext_span *span) +static fz_stext_block * +add_image_block_to_page(fz_context *ctx, fz_stext_page *page, const fz_matrix *ctm, fz_image *image) { - if (span == NULL) - return; - if (soup->len == soup->cap) - { - int newcap = (soup->cap ? soup->cap * 2 : 16); - soup->spans = fz_resize_array(ctx, soup->spans, newcap, sizeof(*soup->spans)); - soup->cap = newcap; - } - add_bbox_to_span(span); - soup->spans[soup->len++] = span; + fz_stext_block *block = add_block_to_page(ctx, page); + block->type = FZ_STEXT_BLOCK_IMAGE; + block->u.i.transform = *ctm; + block->u.i.image = fz_keep_image(ctx, image); + block->bbox.x0 = 0; + block->bbox.y0 = 0; + block->bbox.x1 = 1; + block->bbox.y1 = 1; + fz_transform_rect(&block->bbox, ctm); + return block; } static fz_stext_line * -push_span(fz_context *ctx, fz_stext_device *tdev, fz_stext_span *span, int new_line, float distance) +add_line_to_block(fz_context *ctx, fz_stext_page *page, fz_stext_block *block, int wmode) { - fz_stext_line *line; - fz_stext_block *block; - fz_stext_page *page = tdev->page; - int prev_not_text = 0; - - if (page->len == 0 || page->blocks[page->len-1].type != FZ_PAGE_BLOCK_TEXT) - prev_not_text = 1; - - if (new_line || prev_not_text) - { - float size = fz_matrix_expansion(&span->transform); - /* So, a new line. Part of the same block or not? */ - if (distance == 0 || distance > size * 1.5f || distance < -size * PARAGRAPH_DIST || page->len == 0 || prev_not_text) - { - /* New block */ - if (page->len == page->cap) - { - int newcap = (page->cap ? page->cap*2 : 4); - page->blocks = fz_resize_array(ctx, page->blocks, newcap, sizeof(*page->blocks)); - page->cap = newcap; - } - block = fz_malloc_struct(ctx, fz_stext_block); - page->blocks[page->len].type = FZ_PAGE_BLOCK_TEXT; - page->blocks[page->len].u.text = block; - block->cap = 0; - block->len = 0; - block->lines = 0; - block->bbox = fz_empty_rect; - page->len++; - distance = 0; - } - - /* New line */ - block = page->blocks[page->len-1].u.text; - if (block->len == block->cap) - { - int newcap = (block->cap ? block->cap*2 : 4); - block->lines = fz_resize_array(ctx, block->lines, newcap, sizeof(*block->lines)); - block->cap = newcap; - } - block->lines[block->len].first_span = NULL; - block->lines[block->len].last_span = NULL; - block->lines[block->len].distance = distance; - block->lines[block->len].bbox = fz_empty_rect; - block->len++; - } - - /* Find last line and append to it */ - block = page->blocks[page->len-1].u.text; - line = &block->lines[block->len-1]; - - fz_union_rect(&block->lines[block->len-1].bbox, &span->bbox); - fz_union_rect(&block->bbox, &span->bbox); - span->base_offset = (new_line ? 0 : distance); - - if (!line->first_span) - { - line->first_span = line->last_span = span; - span->next = NULL; - } + fz_stext_line *line = fz_pool_alloc(ctx, page->pool, sizeof *block->u.t.first_line); + if (!block->u.t.first_line) + block->u.t.first_line = block->u.t.last_line = line; else { - line->last_span->next = span; - line->last_span = span; + block->u.t.last_line->next = line; + block->u.t.last_line = line; } + line->wmode = wmode; + return line; } -#if defined(DEBUG_SPANS) || defined(DEBUG_ALIGN) || defined(DEBUG_INDENTS) -static void -dump_span(fz_stext_span *s) +static float min4(float a, float b, float c, float d) { - int i; - for (i=0; i < s->len; i++) - { - printf("%c", s->text[i].c); - } + return fz_min(fz_min(a, b), fz_min(c, d)); } -#endif -#ifdef DEBUG_ALIGN -static void -dump_line(fz_stext_line *line) +static float max4(float a, float b, float c, float d) { - int i; - for (i=0; i < line->len; i++) - { - fz_stext_span *s = line->spans[i]; - if (s->spacing > 1) - printf(" "); - dump_span(s); - } - printf("\n"); + return fz_max(fz_max(a, b), fz_max(c, d)); } -#endif -static void -strain_soup(fz_context *ctx, fz_stext_device *tdev) +static fz_stext_char * +add_char_to_line(fz_context *ctx, fz_stext_page *page, fz_stext_line *line, const fz_matrix *trm, fz_font *font, float size, int c, fz_point *p, fz_point *q, int rtl) { - span_soup *soup = tdev->spans; - fz_stext_line *last_line = NULL; - fz_stext_span *last_span = NULL; - int span_num; - - if (soup == NULL) - return; + fz_stext_char *ch = fz_pool_alloc(ctx, page->pool, sizeof *line->first_char); + fz_point a, d; - /* Really dumb implementation to match what we had before */ - for (span_num=0; span_num < soup->len; span_num++) + if (!line->first_char) + line->first_char = line->last_char = ch; + else { - fz_stext_span *span = soup->spans[span_num]; - int new_line = 1; - float distance = 0; - float spacing = 0; - soup->spans[span_num] = NULL; - if (last_span) - { - /* If we have a last_span, we must have a last_line */ - /* Do span and last_line share the same baseline? */ - fz_point p, q, perp_r; - float dot; - float size = fz_matrix_expansion(&span->transform); - -#ifdef DEBUG_SPANS - { - printf("Comparing: \""); - dump_span(last_span); - printf("\" and \""); - dump_span(span); - printf("\"\n"); - } -#endif - - p.x = last_line->first_span->max.x - last_line->first_span->min.x; - p.y = last_line->first_span->max.y - last_line->first_span->min.y; - fz_normalize_vector(&p); - q.x = span->max.x - span->min.x; - q.y = span->max.y - span->min.y; - fz_normalize_vector(&q); -#ifdef DEBUG_SPANS - printf("last_span=%g %g -> %g %g = %g %g\n", last_span->min.x, last_span->min.y, last_span->max.x, last_span->max.y, p.x, p.y); - printf("span =%g %g -> %g %g = %g %g\n", span->min.x, span->min.y, span->max.x, span->max.y, q.x, q.y); -#endif - perp_r.y = last_line->first_span->min.x - span->min.x; - perp_r.x = -(last_line->first_span->min.y - span->min.y); - /* Check if p and q are parallel. If so, then this - * line is parallel with the last one. */ - dot = p.x * q.x + p.y * q.y; - if (fabsf(dot) > 0.9995f) - { - /* If we take the dot product of normalised(p) and - * perp(r), we get the perpendicular distance from - * one line to the next (assuming they are parallel). */ - distance = p.x * perp_r.x + p.y * perp_r.y; - /* We allow 'small' distances of baseline changes - * to cope with super/subscript. FIXME: We should - * gather subscript/superscript information here. */ - new_line = (fabsf(distance) > size * LINE_DIST); - } - else - { - new_line = 1; - distance = 0; - } - if (!new_line) - { - fz_point delta; - - delta.x = span->min.x - last_span->max.x; - delta.y = span->min.y - last_span->max.y; - - spacing = (p.x * delta.x + p.y * delta.y); - spacing = fabsf(spacing); - /* Only allow changes in baseline (subscript/superscript etc) - * when the spacing is small. */ - if (spacing * fabsf(distance) > size * LINE_DIST && fabsf(distance) > size * 0.1f) - { - new_line = 1; - distance = 0; - spacing = 0; - } - else - { - spacing /= size * SPACE_DIST; - /* Apply the same logic here as when we're adding chars to build spans. */ - if (spacing >= 1 && spacing < (SPACE_MAX_DIST/SPACE_DIST)) - spacing = 1; - } - } -#ifdef DEBUG_SPANS - printf("dot=%g new_line=%d distance=%g size=%g spacing=%g\n", dot, new_line, distance, size, spacing); -#endif - } - span->spacing = spacing; - last_line = push_span(ctx, tdev, span, new_line, distance); - last_span = span; + line->last_char->next = ch; + line->last_char = ch; } -} - -fz_stext_sheet * -fz_new_stext_sheet(fz_context *ctx) -{ - fz_stext_sheet *sheet = fz_malloc(ctx, sizeof *sheet); - sheet->maxid = 0; - sheet->style = NULL; - return sheet; -} -void -fz_drop_stext_sheet(fz_context *ctx, fz_stext_sheet *sheet) -{ - fz_stext_style *style; + ch->c = c; + ch->rtl = rtl; + ch->origin = *p; + ch->size = size; + ch->font = font; /* TODO: keep and drop */ - if (sheet == NULL) - return; - - style = sheet->style; - while (style) + if (line->wmode == 0) { - fz_stext_style *next = style->next; - fz_drop_font(ctx, style->font); - fz_free(ctx, style); - style = next; + a.x = 0; + d.x = 0; + a.y = fz_font_ascender(ctx, font); + d.y = fz_font_descender(ctx, font); } - fz_free(ctx, sheet); -} - -static fz_stext_style * -fz_lookup_stext_style_imp(fz_context *ctx, fz_stext_sheet *sheet, - float size, fz_font *font, int wmode, int script) -{ - fz_stext_style *style; - - for (style = sheet->style; style; style = style->next) + else { - if (style->font == font && - style->size == size && - style->wmode == wmode && - style->script == script) /* FIXME: others */ - { - return style; - } + fz_rect *bbox = fz_font_bbox(ctx, font); + a.x = bbox->x1; + d.x = bbox->x0; + a.y = 0; + d.y = 0; } + fz_transform_vector(&a, trm); + fz_transform_vector(&d, trm); - /* Better make a new one and add it to our list */ - style = fz_malloc(ctx, sizeof *style); - style->id = sheet->maxid++; - style->font = fz_keep_font(ctx, font); - style->size = size; - style->wmode = wmode; - style->script = script; - style->next = sheet->style; - sheet->style = style; - return style; -} + ch->bbox.x0 = min4(p->x + a.x, q->x + a.x, p->x + d.x, q->x + d.x); + ch->bbox.x1 = max4(p->x + a.x, q->x + a.x, p->x + d.x, q->x + d.x); + ch->bbox.y0 = min4(p->y + a.y, q->y + a.y, p->y + d.y, q->y + d.y); + ch->bbox.y1 = max4(p->y + a.y, q->y + a.y, p->y + d.y, q->y + d.y); -static fz_stext_style * -fz_lookup_stext_style(fz_context *ctx, fz_stext_sheet *sheet, fz_text_span *span, const fz_matrix *ctm, - fz_colorspace *colorspace, const float *color, float alpha, const fz_stroke_state *stroke) -{ - float size = 1.0f; - fz_font *font = span ? span->font : NULL; - int wmode = span ? span->wmode : 0; - if (ctm && span) + if (fz_is_empty_rect(&line->bbox)) + line->bbox = ch->bbox; + else { - fz_matrix tm = span->trm; - fz_matrix trm; - tm.e = 0; - tm.f = 0; - fz_concat(&trm, &tm, ctm); - size = fz_matrix_expansion(&trm); + line->bbox.x0 = fz_min(line->bbox.x0, ch->bbox.x0); + line->bbox.y0 = fz_min(line->bbox.y0, ch->bbox.y0); + line->bbox.x1 = fz_min(line->bbox.x1, ch->bbox.x1); + line->bbox.y1 = fz_min(line->bbox.y1, ch->bbox.y1); } - return fz_lookup_stext_style_imp(ctx, sheet, size, font, wmode, 0); -} -fz_stext_page * -fz_new_stext_page(fz_context *ctx, const fz_rect *mediabox) -{ - fz_stext_page *page = fz_malloc(ctx, sizeof(*page)); - page->mediabox = *mediabox; - page->len = 0; - page->cap = 0; - page->blocks = NULL; - page->next = NULL; - return page; + return ch; } -static void -fz_drop_stext_line_contents(fz_context *ctx, fz_stext_line *line) +static int +direction_from_bidi_class(int bidiclass, int curdir) { - fz_stext_span *span, *next; - for (span = line->first_span; span; span=next) + switch (bidiclass) { - next = span->next; - fz_free(ctx, span->text); - fz_free(ctx, span); - } -} + /* strong */ + case UCDN_BIDI_CLASS_L: return 1; + case UCDN_BIDI_CLASS_R: return -1; + case UCDN_BIDI_CLASS_AL: return -1; -static void -fz_drop_stext_block(fz_context *ctx, fz_stext_block *block) -{ - fz_stext_line *line; - if (block == NULL) - return; - for (line = block->lines; line < block->lines + block->len; line++) - fz_drop_stext_line_contents(ctx, line); - fz_free(ctx, block->lines); - fz_free(ctx, block); -} + /* weak */ + case UCDN_BIDI_CLASS_EN: + case UCDN_BIDI_CLASS_ES: + case UCDN_BIDI_CLASS_ET: + case UCDN_BIDI_CLASS_AN: + case UCDN_BIDI_CLASS_CS: + case UCDN_BIDI_CLASS_NSM: + case UCDN_BIDI_CLASS_BN: + return curdir; -static void -fz_drop_image_block(fz_context *ctx, fz_image_block *block) -{ - if (block == NULL) - return; - fz_drop_image(ctx, block->image); - fz_drop_colorspace(ctx, block->cspace); - fz_free(ctx, block); -} + /* neutral */ + case UCDN_BIDI_CLASS_B: + case UCDN_BIDI_CLASS_S: + case UCDN_BIDI_CLASS_WS: + case UCDN_BIDI_CLASS_ON: + return curdir; -void -fz_drop_stext_page(fz_context *ctx, fz_stext_page *page) -{ - fz_page_block *block; - if (page == NULL) - return; - for (block = page->blocks; block < page->blocks + page->len; block++) - { - switch (block->type) - { - case FZ_PAGE_BLOCK_TEXT: - fz_drop_stext_block(ctx, block->u.text); - break; - case FZ_PAGE_BLOCK_IMAGE: - fz_drop_image_block(ctx, block->u.image); - break; - } + /* embedding, override, pop ... we don't support them */ + default: + return 0; } - fz_free(ctx, page->blocks); - fz_free(ctx, page); } -static fz_stext_span * -fz_new_stext_span(fz_context *ctx, const fz_point *p, int wmode, const fz_matrix *trm) +static int +sign_eq(float x, float y) { - fz_stext_span *span = fz_malloc_struct(ctx, fz_stext_span); - span->ascender_max = 0; - span->descender_min = 0; - span->cap = 0; - span->len = 0; - span->min = *p; - span->max = *p; - span->wmode = wmode; - span->transform.a = trm->a; - span->transform.b = trm->b; - span->transform.c = trm->c; - span->transform.d = trm->d; - span->transform.e = 0; - span->transform.f = 0; - span->text = NULL; - span->next = NULL; - return span; + return (x < 0 && y < 0) || (x > 0 && y > 0) || (x == 0 && y == 0); } -static void -add_char_to_span(fz_context *ctx, fz_stext_span *span, int c, fz_point *p, fz_point *max, fz_stext_style *style) +static int +mat_sign_eq(const fz_matrix *x, const fz_matrix *y) { - if (span->len == span->cap) - { - int newcap = (span->cap ? span->cap * 2 : 16); - span->text = fz_resize_array(ctx, span->text, newcap, sizeof(fz_stext_char)); - span->cap = newcap; - span->bbox = fz_empty_rect; - } - span->max = *max; - if (style->ascender > span->ascender_max) - span->ascender_max = style->ascender; - if (style->descender < span->descender_min) - span->descender_min = style->descender; - span->text[span->len].c = c; - span->text[span->len].p = *p; - span->text[span->len].style = style; - span->len++; + return sign_eq(x->a, y->a) && sign_eq(x->b, y->b) && sign_eq(x->c, y->c) && sign_eq(x->d, y->d); } static void -fz_add_stext_char_imp(fz_context *ctx, fz_stext_device *dev, fz_stext_style *style, int c, int glyph, fz_matrix *trm, float adv, int wmode) +fz_add_stext_char_imp(fz_context *ctx, fz_stext_device *dev, fz_font *font, int c, int glyph, fz_matrix *trm, float adv, int wmode) { - int can_append = 1; + fz_stext_page *page = dev->page; + fz_stext_block *cur_block; + fz_stext_line *cur_line; + + int new_para = 0; + int new_line = 1; int add_space = 0; - fz_point dir, ndir, p, q, r; + fz_point dir, ndir, p, q; float size; fz_point delta; float spacing = 0; float base_offset = 0; + int rtl = 0; + + dev->curdir = direction_from_bidi_class(ucdn_get_bidi_class(c), dev->curdir); + /* dir = direction vector for motion. ndir = normalised(dir) */ if (wmode == 0) { dir.x = 1; @@ -602,17 +273,16 @@ fz_add_stext_char_imp(fz_context *ctx, fz_stext_device *dev, fz_stext_style *sty fz_transform_vector(&dir, trm); ndir = dir; fz_normalize_vector(&ndir); - /* dir = direction vector for motion. ndir = normalised(dir) */ size = fz_matrix_expansion(trm); /* We need to identify where glyphs 'start' (p) and 'stop' (q). - * Each glyph holds it's 'start' position, and the next glyph in the - * span (or span->max if there is no next glyph) holds it's 'end' + * Each glyph holds its 'start' position, and the next glyph in the + * span (or span->max if there is no next glyph) holds its 'end' * position. * * For both horizontal and vertical motion, trm->{e,f} gives the - * bottom left corner of the glyph. + * origin (usually the bottom left) of the glyph. * * In horizontal mode: * + p is bottom left. @@ -636,37 +306,38 @@ fz_add_stext_char_imp(fz_context *ctx, fz_stext_device *dev, fz_stext_style *sty q.y = trm->f; } - if (glyph < 0) + /* Find current position to enter new text. */ + cur_block = page->last_block; + if (cur_block && cur_block->type != FZ_STEXT_BLOCK_TEXT) + cur_block = NULL; + cur_line = cur_block ? cur_block->u.t.last_line : NULL; + + if (cur_line && glyph < 0) { - /* Don't reset 'pen' to start of no-glyph characters in cluster */ - if (dev->cur_span) - q = dev->cur_span->max; - goto no_glyph; + /* Don't advance pen or break lines for no-glyph characters in a cluster */ + add_char_to_line(ctx, page, cur_line, trm, font, size, c, &dev->pen, &dev->pen, 0); + dev->lastchar = c; + return; } - if (dev->cur_span == NULL || - trm->a != dev->cur_span->transform.a || trm->b != dev->cur_span->transform.b || - trm->c != dev->cur_span->transform.c || trm->d != dev->cur_span->transform.d || - dev->cur_span->wmode != wmode) + if (cur_line == NULL || !mat_sign_eq(trm, &dev->trm) || cur_line->wmode != wmode) { - /* If the matrix has changed, or the wmode is different (or - * if we don't have a span at all), then we can't append. */ -#ifdef DEBUG_SPANS - printf("Transform/WMode changed\n"); -#endif - can_append = 0; + /* If the matrix has changed rotation, or the wmode is different (or if we don't have a line at all), + * then we can't append to the current block/line. */ + new_para = 1; + new_line = 1; } else { - delta.x = q.x - dev->cur_span->max.x; - delta.y = q.y - dev->cur_span->max.y; + /* Detect fake bold where text is printed twice in the same place. */ + delta.x = q.x - dev->pen.x; + delta.y = q.y - dev->pen.y; if (delta.x < FLT_EPSILON && delta.y < FLT_EPSILON && c == dev->lastchar) return; - /* Calculate how far we've moved since the end of the current - * span. */ - delta.x = p.x - dev->cur_span->max.x; - delta.y = p.y - dev->cur_span->max.y; + /* Calculate how far we've moved since the last character. */ + delta.x = p.x - dev->pen.x; + delta.y = p.y - dev->pen.y; /* The transform has not changed, so we know we're in the same * direction. Calculate 2 distances; how far off the previous @@ -675,102 +346,129 @@ fz_add_stext_char_imp(fz_context *ctx, fz_stext_device *dev, fz_stext_style *sty spacing = ndir.x * delta.x + ndir.y * delta.y; base_offset = -ndir.y * delta.x + ndir.x * delta.y; - spacing /= size * SPACE_DIST; - if (fabsf(base_offset) < size * 0.1f) + /* Only a small amount off the baseline - we'll take this */ + if (fabsf(base_offset) < size * 0.8f) { - /* Only a small amount off the baseline - we'll take this */ - if (fabsf(spacing) < 1.0f) + /* LTR or neutral character */ + if (dev->curdir >= 0) { - /* Motion is in line, and small. */ - } - else if (spacing >= 1 && spacing < (SPACE_MAX_DIST/SPACE_DIST)) - { - /* Motion is in line, but large enough - * to warrant us adding a space */ - if (dev->lastchar != ' ' && wmode == 0) - add_space = 1; + if (fabs(spacing) < size * SPACE_DIST) + { + /* Motion is in line, and small. */ + new_line = 0; + } + else if (spacing >= size * SPACE_DIST && spacing < size * SPACE_MAX_DIST) + { + /* Motion is in line, but large enough to warrant us adding a space. */ + if (dev->lastchar != ' ' && wmode == 0) + add_space = 1; + new_line = 0; + } + else + { + /* Motion is in line, but large enough to warrant splitting to a new line */ + new_line = 1; + } } + + /* RTL character -- disable space character and column detection heuristics */ else { - /* Motion is in line, but too large - split to a new span */ - can_append = 0; + new_line = 0; + if (spacing > size * SPACE_DIST || spacing < 0) + rtl = 0; /* backward (or big jump to 'right' side) means logical order */ + else + rtl = 1; /* visual order, we need to reverse in a post process pass */ } } + + /* Enough for a new line, but not enough for a new paragraph */ + else if (fabsf(base_offset) < size * 1.3f) + { + /* Check indent to spot text-indent style paragraphs */ + if (wmode == 0 && cur_line && dev->new_obj) + if (fabsf(p.x - dev->start.x) > size * 0.5f) + new_para = 1; + new_line = 1; + } + + /* Way off the baseline - open a new paragraph */ else { - can_append = 0; -#ifdef DEBUG_SPANS - spacing = 0; -#endif + new_para = 1; + new_line = 1; } } -#ifdef DEBUG_SPANS - printf("%c%c append=%d space=%d size=%g spacing=%g base_offset=%g\n", dev->lastchar, c, can_append, add_space, size, spacing, base_offset); -#endif + /* Start a new block (but only at the beginning of a text object) */ + if (new_para || !cur_block) + { + cur_block = add_text_block_to_page(ctx, page); + cur_line = cur_block->u.t.last_line; + } - /* Start a new span */ - if (!can_append) + /* Start a new line */ + if (new_line || !cur_line) { - add_span_to_soup(ctx, dev->spans, dev->cur_span); - dev->cur_span = NULL; - dev->cur_span = fz_new_stext_span(ctx, &p, wmode, trm); - dev->cur_span->spacing = 0; + cur_line = add_line_to_block(ctx, page, cur_block, wmode); + dev->start = p; } /* Add synthetic space */ if (add_space) - { - /* We know we always have a cur_span here */ - r = dev->cur_span->max; - add_char_to_span(ctx, dev->cur_span, ' ', &r, &p, style); - } + add_char_to_line(ctx, page, cur_line, trm, font, size, ' ', &dev->pen, &p, rtl); -no_glyph: - add_char_to_span(ctx, dev->cur_span, c, &p, &q, style); + add_char_to_line(ctx, page, cur_line, trm, font, size, c, &p, &q, rtl); dev->lastchar = c; + dev->pen = q; + + dev->new_obj = 0; + dev->trm = *trm; } static void -fz_add_stext_char(fz_context *ctx, fz_stext_device *dev, fz_stext_style *style, int c, int glyph, fz_matrix *trm, float adv, int wmode) +fz_add_stext_char(fz_context *ctx, fz_stext_device *dev, fz_font *font, int c, int glyph, fz_matrix *trm, float adv, int wmode) { /* ignore when one unicode character maps to multiple glyphs */ if (c == -1) return; if (!(dev->flags & FZ_STEXT_PRESERVE_LIGATURES)) + { switch (c) { case 0xFB00: /* ff */ - fz_add_stext_char_imp(ctx, dev, style, 'f', glyph, trm, adv, wmode); - fz_add_stext_char_imp(ctx, dev, style, 'f', -1, trm, 0, wmode); + fz_add_stext_char_imp(ctx, dev, font, 'f', glyph, trm, adv, wmode); + fz_add_stext_char_imp(ctx, dev, font, 'f', -1, trm, 0, wmode); return; case 0xFB01: /* fi */ - fz_add_stext_char_imp(ctx, dev, style, 'f', glyph, trm, adv, wmode); - fz_add_stext_char_imp(ctx, dev, style, 'i', -1, trm, 0, wmode); + fz_add_stext_char_imp(ctx, dev, font, 'f', glyph, trm, adv, wmode); + fz_add_stext_char_imp(ctx, dev, font, 'i', -1, trm, 0, wmode); return; case 0xFB02: /* fl */ - fz_add_stext_char_imp(ctx, dev, style, 'f', glyph, trm, adv, wmode); - fz_add_stext_char_imp(ctx, dev, style, 'l', -1, trm, 0, wmode); + fz_add_stext_char_imp(ctx, dev, font, 'f', glyph, trm, adv, wmode); + fz_add_stext_char_imp(ctx, dev, font, 'l', -1, trm, 0, wmode); return; case 0xFB03: /* ffi */ - fz_add_stext_char_imp(ctx, dev, style, 'f', glyph, trm, adv, wmode); - fz_add_stext_char_imp(ctx, dev, style, 'f', -1, trm, 0, wmode); - fz_add_stext_char_imp(ctx, dev, style, 'i', -1, trm, 0, wmode); + fz_add_stext_char_imp(ctx, dev, font, 'f', glyph, trm, adv, wmode); + fz_add_stext_char_imp(ctx, dev, font, 'f', -1, trm, 0, wmode); + fz_add_stext_char_imp(ctx, dev, font, 'i', -1, trm, 0, wmode); return; case 0xFB04: /* ffl */ - fz_add_stext_char_imp(ctx, dev, style, 'f', glyph, trm, adv, wmode); - fz_add_stext_char_imp(ctx, dev, style, 'f', -1, trm, 0, wmode); - fz_add_stext_char_imp(ctx, dev, style, 'l', -1, trm, 0, wmode); + fz_add_stext_char_imp(ctx, dev, font, 'f', glyph, trm, adv, wmode); + fz_add_stext_char_imp(ctx, dev, font, 'f', -1, trm, 0, wmode); + fz_add_stext_char_imp(ctx, dev, font, 'l', -1, trm, 0, wmode); return; case 0xFB05: /* long st */ case 0xFB06: /* st */ - fz_add_stext_char_imp(ctx, dev, style, 's', glyph, trm, adv, wmode); - fz_add_stext_char_imp(ctx, dev, style, 't', -1, trm, 0, wmode); + fz_add_stext_char_imp(ctx, dev, font, 's', glyph, trm, adv, wmode); + fz_add_stext_char_imp(ctx, dev, font, 't', -1, trm, 0, wmode); return; } + } if (!(dev->flags & FZ_STEXT_PRESERVE_WHITESPACE)) + { switch (c) { case 0x0009: /* tab */ @@ -794,56 +492,23 @@ fz_add_stext_char(fz_context *ctx, fz_stext_device *dev, fz_stext_style *style, case 0x3000: /* ideographic space */ c = ' '; } + } - fz_add_stext_char_imp(ctx, dev, style, c, glyph, trm, adv, wmode); + fz_add_stext_char_imp(ctx, dev, font, c, glyph, trm, adv, wmode); } static void -fz_stext_extract(fz_context *ctx, fz_stext_device *dev, fz_text_span *span, const fz_matrix *ctm, fz_stext_style *style) +fz_stext_extract(fz_context *ctx, fz_stext_device *dev, fz_text_span *span, const fz_matrix *ctm) { fz_font *font = span->font; - FT_Face face = fz_font_ft_face(ctx, font); - fz_buffer **t3procs = fz_font_t3_procs(ctx, font); - fz_rect *bbox = fz_font_bbox(ctx, font); fz_matrix tm = span->trm; fz_matrix trm; float adv; - float ascender = 1; - float descender = 0; - int i, err; + int i; if (span->len == 0) return; - if (dev->spans == NULL) - dev->spans = new_span_soup(ctx); - - if (style->wmode == 0) - { - if (face) - { - fz_lock(ctx, FZ_LOCK_FREETYPE); - err = FT_Set_Char_Size(face, 64, 64, 72, 72); - if (err) - fz_warn(ctx, "freetype set character size: %s", ft_error_string(err)); - ascender = (float)face->ascender / face->units_per_EM; - descender = (float)face->descender / face->units_per_EM; - fz_unlock(ctx, FZ_LOCK_FREETYPE); - } - else if (t3procs && !fz_is_empty_rect(bbox)) - { - ascender = bbox->y1; - descender = bbox->y0; - } - } - else - { - ascender = bbox->x1; - descender = bbox->x0; - } - style->ascender = ascender; - style->descender = descender; - tm.e = 0; tm.f = 0; fz_concat(&trm, &tm, ctm); @@ -857,11 +522,11 @@ fz_stext_extract(fz_context *ctx, fz_stext_device *dev, fz_text_span *span, cons /* Calculate bounding box and new pen position based on font metrics */ if (span->items[i].gid >= 0) - adv = fz_advance_glyph(ctx, font, span->items[i].gid, style->wmode); + adv = fz_advance_glyph(ctx, font, span->items[i].gid, span->wmode); else adv = 0; - fz_add_stext_char(ctx, dev, style, span->items[i].ucs, span->items[i].gid, &trm, adv, span->wmode); + fz_add_stext_char(ctx, dev, font, span->items[i].ucs, span->items[i].gid, &trm, adv, span->wmode); } } @@ -870,13 +535,10 @@ fz_stext_fill_text(fz_context *ctx, fz_device *dev, const fz_text *text, const f fz_colorspace *colorspace, const float *color, float alpha, const fz_color_params *color_params) { fz_stext_device *tdev = (fz_stext_device*)dev; - fz_stext_style *style; fz_text_span *span; + tdev->new_obj = 1; for (span = text->head; span; span = span->next) - { - style = fz_lookup_stext_style(ctx, tdev->sheet, span, ctm, colorspace, color, alpha, NULL); - fz_stext_extract(ctx, tdev, span, ctm, style); - } + fz_stext_extract(ctx, tdev, span, ctm); } static void @@ -884,94 +546,61 @@ fz_stext_stroke_text(fz_context *ctx, fz_device *dev, const fz_text *text, const fz_colorspace *colorspace, const float *color, float alpha, const fz_color_params *color_params) { fz_stext_device *tdev = (fz_stext_device*)dev; - fz_stext_style *style; fz_text_span *span; + tdev->new_obj = 1; for (span = text->head; span; span = span->next) - { - style = fz_lookup_stext_style(ctx, tdev->sheet, span, ctm, colorspace, color, alpha, stroke); - fz_stext_extract(ctx, tdev, span, ctm, style); - } + fz_stext_extract(ctx, tdev, span, ctm); } static void fz_stext_clip_text(fz_context *ctx, fz_device *dev, const fz_text *text, const fz_matrix *ctm, const fz_rect *scissor) { fz_stext_device *tdev = (fz_stext_device*)dev; - fz_stext_style *style; fz_text_span *span; + tdev->new_obj = 1; for (span = text->head; span; span = span->next) - { - style = fz_lookup_stext_style(ctx, tdev->sheet, span, ctm, NULL, NULL, 0, NULL); - fz_stext_extract(ctx, tdev, span, ctm, style); - } + fz_stext_extract(ctx, tdev, span, ctm); } static void fz_stext_clip_stroke_text(fz_context *ctx, fz_device *dev, const fz_text *text, const fz_stroke_state *stroke, const fz_matrix *ctm, const fz_rect *scissor) { fz_stext_device *tdev = (fz_stext_device*)dev; - fz_stext_style *style; fz_text_span *span; + tdev->new_obj = 1; for (span = text->head; span; span = span->next) - { - style = fz_lookup_stext_style(ctx, tdev->sheet, span, ctm, NULL, NULL, 0, stroke); - fz_stext_extract(ctx, tdev, span, ctm, style); - } + fz_stext_extract(ctx, tdev, span, ctm); } static void fz_stext_ignore_text(fz_context *ctx, fz_device *dev, const fz_text *text, const fz_matrix *ctm) { fz_stext_device *tdev = (fz_stext_device*)dev; - fz_stext_style *style; fz_text_span *span; + tdev->new_obj = 1; for (span = text->head; span; span = span->next) - { - style = fz_lookup_stext_style(ctx, tdev->sheet, span, ctm, NULL, NULL, 0, NULL); - fz_stext_extract(ctx, tdev, span, ctm, style); - } + fz_stext_extract(ctx, tdev, span, ctm); } +/* Images and shadings */ + static void -fz_stext_fill_image_mask(fz_context *ctx, fz_device *dev, fz_image *img, const fz_matrix *ctm, - fz_colorspace *cspace, const float *color, float alpha, const fz_color_params *color_params) +fz_stext_fill_image(fz_context *ctx, fz_device *dev, fz_image *img, const fz_matrix *ctm, float alpha, const fz_color_params *color_params) { fz_stext_device *tdev = (fz_stext_device*)dev; - fz_stext_page *page = tdev->page; - fz_image_block *block; - /* If the alpha is less than 50% then it's probably a watermark or - * effect or something. Skip it */ + /* If the alpha is less than 50% then it's probably a watermark or effect or something. Skip it. */ if (alpha < 0.5f) return; - /* New block */ - if (page->len == page->cap) - { - int newcap = (page->cap ? page->cap*2 : 4); - page->blocks = fz_resize_array(ctx, page->blocks, newcap, sizeof(*page->blocks)); - page->cap = newcap; - } - block = fz_malloc_struct(ctx, fz_image_block); - page->blocks[page->len].type = FZ_PAGE_BLOCK_IMAGE; - page->blocks[page->len].u.image = block; - block->image = fz_keep_image(ctx, img); - block->cspace = fz_keep_colorspace(ctx, cspace); - if (cspace) - memcpy(block->colors, color, sizeof(block->colors[0])*fz_colorspace_n(ctx, cspace)); - block->mat = *ctm; - block->bbox.x0 = 0; - block->bbox.y0 = 0; - block->bbox.x1 = 1; - block->bbox.y1 = 1; - fz_transform_rect(&block->bbox, ctm); - page->len++; + add_image_block_to_page(ctx, tdev->page, ctm, img); } static void -fz_stext_fill_image(fz_context *ctx, fz_device *dev, fz_image *img, const fz_matrix *ctm, float alpha, const fz_color_params *color_params) +fz_stext_fill_image_mask(fz_context *ctx, fz_device *dev, fz_image *img, const fz_matrix *ctm, + fz_colorspace *cspace, const float *color, float alpha, const fz_color_params *color_params) { - fz_stext_fill_image_mask(ctx, dev, img, ctm, NULL, NULL, alpha, color_params); + fz_stext_fill_image(ctx, dev, img, ctm, alpha, color_params); } static fz_image * @@ -1025,103 +654,89 @@ fz_stext_fill_shade(fz_context *ctx, fz_device *dev, fz_shade *shade, const fz_m fz_rethrow(ctx); } -static int -direction_from_bidi_class(int bidiclass, int curdir) -{ - switch (bidiclass) - { - /* strong */ - case UCDN_BIDI_CLASS_L: return 1; - case UCDN_BIDI_CLASS_R: return -1; - case UCDN_BIDI_CLASS_AL: return -1; - - /* weak */ - case UCDN_BIDI_CLASS_EN: - case UCDN_BIDI_CLASS_ES: - case UCDN_BIDI_CLASS_ET: - case UCDN_BIDI_CLASS_AN: - case UCDN_BIDI_CLASS_CS: - case UCDN_BIDI_CLASS_NSM: - case UCDN_BIDI_CLASS_BN: - return curdir; - - /* neutral */ - case UCDN_BIDI_CLASS_B: - case UCDN_BIDI_CLASS_S: - case UCDN_BIDI_CLASS_WS: - case UCDN_BIDI_CLASS_ON: - return curdir; - - /* embedding, override, pop ... we don't support them */ - default: - return 0; - } -} +/* RTL visual to logical order pass */ static void -fz_bidi_reorder_run(fz_stext_span *span, int a, int b, int dir) +fz_bidi_reorder_run(fz_stext_char *a, fz_stext_char *b, int dir) { if (a < b && dir == -1) { - fz_stext_char c; - int m = a + (b - a) / 2; + fz_stext_char tmp; + fz_stext_char *m = a + (b - a) / 2; while (a < m) { b--; - c = span->text[a]; - span->text[a] = span->text[b]; - span->text[b] = c; + + tmp.c = a->c; + tmp.origin = a->origin; + tmp.bbox = a->bbox; + tmp.size = a->size; + tmp.font = a->font; + + a->c = b->c; + a->origin = b->origin; + a->bbox = b->bbox; + a->size = b->size; + a->font = b->font; + + b->c = tmp.c; + b->origin = tmp.origin; + b->bbox = tmp.bbox; + b->size = tmp.size; + b->font = tmp.font; + a++; } } } static void -fz_bidi_reorder_span(fz_stext_span *span) +fz_bidi_reorder_line(fz_stext_line *line) { - int a, b, dir, curdir; + fz_stext_char *a, *b; + int dir, curdir; - a = 0; - curdir = 1; - for (b = 0; b < span->len; b++) + a = line->first_char; + curdir = 0; + for (b = line->first_char; b; b = b->next) { - dir = direction_from_bidi_class(ucdn_get_bidi_class(span->text[b].c), curdir); + dir = b->rtl; if (dir != curdir) { - fz_bidi_reorder_run(span, a, b, curdir); + fz_bidi_reorder_run(a, b, curdir); curdir = dir; a = b; } } - fz_bidi_reorder_run(span, a, b, curdir); + fz_bidi_reorder_run(a, b, curdir); } static void fz_bidi_reorder_stext_page(fz_context *ctx, fz_stext_page *page) { - fz_page_block *pageblock; fz_stext_block *block; fz_stext_line *line; - fz_stext_span *span; - for (pageblock = page->blocks; pageblock < page->blocks + page->len; pageblock++) - if (pageblock->type == FZ_PAGE_BLOCK_TEXT) - for (block = pageblock->u.text, line = block->lines; line < block->lines + block->len; line++) - for (span = line->first_span; span; span = span->next) - fz_bidi_reorder_span(span); + for (block = page->first_block; block; block = block->next) + if (block->type == FZ_STEXT_BLOCK_TEXT) + for (line = block->u.t.first_line; line; line = line->next) + fz_bidi_reorder_line(line); } static void fz_stext_close_device(fz_context *ctx, fz_device *dev) { fz_stext_device *tdev = (fz_stext_device*)dev; + fz_stext_page *page = tdev->page; + fz_stext_block *block; + fz_stext_line *line; - add_span_to_soup(ctx, tdev->spans, tdev->cur_span); - tdev->cur_span = NULL; - - strain_soup(ctx, tdev); + for (block = page->first_block; block; block = block->next) + if (block->type == FZ_STEXT_BLOCK_TEXT) + for (line = block->u.t.first_line; line; line = line->next) + fz_union_rect(&block->bbox, &line->bbox); - /* TODO: smart sorting of blocks in reading order */ + /* TODO: smart sorting of blocks and lines in reading order */ /* TODO: unicode NFC normalization */ fz_bidi_reorder_stext_page(ctx, tdev->page); @@ -1130,9 +745,6 @@ fz_stext_close_device(fz_context *ctx, fz_device *dev) static void fz_stext_drop_device(fz_context *ctx, fz_device *dev) { - fz_stext_device *tdev = (fz_stext_device*)dev; - free_span_soup(ctx, tdev->spans); - tdev->spans = NULL; } fz_stext_options * @@ -1153,7 +765,7 @@ fz_parse_stext_options(fz_context *ctx, fz_stext_options *opts, const char *stri } fz_device * -fz_new_stext_device(fz_context *ctx, fz_stext_sheet *sheet, fz_stext_page *page, const fz_stext_options *opts) +fz_new_stext_device(fz_context *ctx, fz_stext_page *page, const fz_stext_options *opts) { fz_stext_device *dev = fz_new_derived_device(ctx, fz_stext_device); @@ -1174,11 +786,12 @@ fz_new_stext_device(fz_context *ctx, fz_stext_sheet *sheet, fz_stext_page *page, dev->super.fill_image_mask = fz_stext_fill_image_mask; } - dev->sheet = sheet; dev->page = page; - dev->spans = NULL; - dev->cur_span = NULL; + dev->pen.x = 0; + dev->pen.y = 0; + dev->trm = fz_identity; dev->lastchar = ' '; + dev->curdir = 1; return (fz_device*)dev; } -- cgit v1.2.3