diff options
author | Robin Watts <robin.watts@artifex.com> | 2015-04-06 13:29:41 +0100 |
---|---|---|
committer | Robin Watts <robin.watts@artifex.com> | 2015-04-07 13:22:15 +0100 |
commit | 88586d2282a70aad99887d0209c55b0bccfcb86f (patch) | |
tree | fef3dba439bc396b898ab3365e9e596e3bb8a6fd | |
parent | d47fae804572cd0a6cc78e60020e65334a088c0b (diff) | |
download | mupdf-88586d2282a70aad99887d0209c55b0bccfcb86f.tar.xz |
Structured text extraction; improve glyph bounding box calculations.
In vertical motion mode, when calculating bboxes we should use
horizontal rather vertical displacements from the 'axis of
movement'.
In horizontal mode, we displace by 'ascender' and 'descender'.
Those concepts don't rotate with the motion mode, so repurpose
those fields to hold bbox.x0 and bbox.x1 in vertical mode.
-rw-r--r-- | include/mupdf/fitz/structured-text.h | 6 | ||||
-rw-r--r-- | source/fitz/stext-device.c | 75 |
2 files changed, 58 insertions, 23 deletions
diff --git a/include/mupdf/fitz/structured-text.h b/include/mupdf/fitz/structured-text.h index df34a678..e31276ef 100644 --- a/include/mupdf/fitz/structured-text.h +++ b/include/mupdf/fitz/structured-text.h @@ -49,6 +49,9 @@ struct fz_text_style_s float size; int wmode; int script; + /* Ascender and Descender only have the conventional sense in + * horizontal mode; in vertical mode they are rotated too - they are + * the maximum and minimum bounds respectively. */ float ascender; float descender; /* etc... */ @@ -141,6 +144,9 @@ struct fz_text_span_s fz_point max; /* Device space */ int wmode; /* 0 for horizontal, 1 for vertical */ fz_matrix transform; /* e and f are always 0 here */ + /* Ascender_max and Descender_min only have the conventional sense in + * horizontal mode; in vertical mode they are rotated too - they are + * the maximum and minimum bounds respectively. */ float ascender_max; /* Document space */ float descender_min; /* Document space */ fz_rect bbox; /* Device space */ diff --git a/source/fitz/stext-device.c b/source/fitz/stext-device.c index 3ecc3302..8cc4d2ed 100644 --- a/source/fitz/stext-device.c +++ b/source/fitz/stext-device.c @@ -64,11 +64,21 @@ fz_text_char_bbox(fz_context *ctx, fz_rect *bbox, fz_text_span *span, int i) max = &span->max; else max = &span->text[i+1].p; - a.x = 0; - a.y = span->ascender_max; + if (span->wmode == 0) + { + a.x = 0; + a.y = span->ascender_max; + d.x = 0; + d.y = span->descender_min; + } + else + { + a.x = span->ascender_max; + a.y = 0; + d.x = span->descender_min; + d.y = 0; + } fz_transform_vector(&a, &span->transform); - d.x = 0; - d.y = span->descender_min; fz_transform_vector(&d, &span->transform); bbox->x0 = bbox->x1 = ch->p.x + a.x; bbox->y0 = bbox->y1 = ch->p.y + a.y; @@ -92,11 +102,21 @@ add_bbox_to_span(fz_text_span *span) if (!span) return; - a.x = 0; - a.y = span->ascender_max; + if (span->wmode == 0) + { + a.x = 0; + a.y = span->ascender_max; + d.x = 0; + d.y = span->descender_min; + } + else + { + a.x = span->ascender_max; + a.y = 0; + d.x = span->descender_min; + d.y = 0; + } fz_transform_vector(&a, &span->transform); - d.x = 0; - d.y = span->descender_min; fz_transform_vector(&d, &span->transform); bbox->x0 = bbox->x1 = span->min.x + a.x; bbox->y0 = bbox->y1 = span->min.y + a.y; @@ -577,12 +597,13 @@ fz_add_text_char_imp(fz_context *ctx, fz_text_device *dev, fz_text_style *style, if (dev->cur_span == NULL || trm->a != dev->cur_span->transform.a || trm->b != dev->cur_span->transform.b || - trm->c != dev->cur_span->transform.c || trm->d != dev->cur_span->transform.d) + trm->c != dev->cur_span->transform.c || trm->d != dev->cur_span->transform.d || + dev->cur_span->wmode != wmode) { - /* If the matrix has changed (or if we don't have a span at - * all), then we can't append. */ + /* If the matrix has changed, or the wmode is different (or + * if we don't have a span at all), then we can't append. */ #ifdef DEBUG_SPANS - printf("Transform changed\n"); + printf("Transform/WMode changed\n"); #endif can_append = 0; } @@ -714,20 +735,28 @@ fz_text_extract(fz_context *ctx, fz_text_device *dev, fz_text *text, const fz_ma if (text->len == 0) return; - if (font->ft_face) + if (style->wmode == 0) { - fz_lock(ctx, FZ_LOCK_FREETYPE); - err = FT_Set_Char_Size(font->ft_face, 64, 64, 72, 72); - if (err) - fz_warn(ctx, "freetype set character size: %s", ft_error_string(err)); - ascender = (float)face->ascender / face->units_per_EM; - descender = (float)face->descender / face->units_per_EM; - fz_unlock(ctx, FZ_LOCK_FREETYPE); + if (font->ft_face) + { + fz_lock(ctx, FZ_LOCK_FREETYPE); + err = FT_Set_Char_Size(font->ft_face, 64, 64, 72, 72); + if (err) + fz_warn(ctx, "freetype set character size: %s", ft_error_string(err)); + ascender = (float)face->ascender / face->units_per_EM; + descender = (float)face->descender / face->units_per_EM; + fz_unlock(ctx, FZ_LOCK_FREETYPE); + } + else if (font->t3procs && !fz_is_empty_rect(&font->bbox)) + { + ascender = font->bbox.y1; + descender = font->bbox.y0; + } } - else if (font->t3procs && !fz_is_empty_rect(&font->bbox)) + else { - ascender = font->bbox.y1; - descender = font->bbox.y0; + ascender = font->bbox.x1; + descender = font->bbox.x0; } style->ascender = ascender; style->descender = descender; |