summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobin Watts <robin.watts@artifex.com>2015-04-06 13:29:41 +0100
committerRobin Watts <robin.watts@artifex.com>2015-04-07 13:22:15 +0100
commit88586d2282a70aad99887d0209c55b0bccfcb86f (patch)
treefef3dba439bc396b898ab3365e9e596e3bb8a6fd
parentd47fae804572cd0a6cc78e60020e65334a088c0b (diff)
downloadmupdf-88586d2282a70aad99887d0209c55b0bccfcb86f.tar.xz
Structured text extraction; improve glyph bounding box calculations.
In vertical motion mode, when calculating bboxes we should use horizontal rather vertical displacements from the 'axis of movement'. In horizontal mode, we displace by 'ascender' and 'descender'. Those concepts don't rotate with the motion mode, so repurpose those fields to hold bbox.x0 and bbox.x1 in vertical mode.
-rw-r--r--include/mupdf/fitz/structured-text.h6
-rw-r--r--source/fitz/stext-device.c75
2 files changed, 58 insertions, 23 deletions
diff --git a/include/mupdf/fitz/structured-text.h b/include/mupdf/fitz/structured-text.h
index df34a678..e31276ef 100644
--- a/include/mupdf/fitz/structured-text.h
+++ b/include/mupdf/fitz/structured-text.h
@@ -49,6 +49,9 @@ struct fz_text_style_s
float size;
int wmode;
int script;
+ /* Ascender and Descender only have the conventional sense in
+ * horizontal mode; in vertical mode they are rotated too - they are
+ * the maximum and minimum bounds respectively. */
float ascender;
float descender;
/* etc... */
@@ -141,6 +144,9 @@ struct fz_text_span_s
fz_point max; /* Device space */
int wmode; /* 0 for horizontal, 1 for vertical */
fz_matrix transform; /* e and f are always 0 here */
+ /* Ascender_max and Descender_min only have the conventional sense in
+ * horizontal mode; in vertical mode they are rotated too - they are
+ * the maximum and minimum bounds respectively. */
float ascender_max; /* Document space */
float descender_min; /* Document space */
fz_rect bbox; /* Device space */
diff --git a/source/fitz/stext-device.c b/source/fitz/stext-device.c
index 3ecc3302..8cc4d2ed 100644
--- a/source/fitz/stext-device.c
+++ b/source/fitz/stext-device.c
@@ -64,11 +64,21 @@ fz_text_char_bbox(fz_context *ctx, fz_rect *bbox, fz_text_span *span, int i)
max = &span->max;
else
max = &span->text[i+1].p;
- a.x = 0;
- a.y = span->ascender_max;
+ if (span->wmode == 0)
+ {
+ a.x = 0;
+ a.y = span->ascender_max;
+ d.x = 0;
+ d.y = span->descender_min;
+ }
+ else
+ {
+ a.x = span->ascender_max;
+ a.y = 0;
+ d.x = span->descender_min;
+ d.y = 0;
+ }
fz_transform_vector(&a, &span->transform);
- d.x = 0;
- d.y = span->descender_min;
fz_transform_vector(&d, &span->transform);
bbox->x0 = bbox->x1 = ch->p.x + a.x;
bbox->y0 = bbox->y1 = ch->p.y + a.y;
@@ -92,11 +102,21 @@ add_bbox_to_span(fz_text_span *span)
if (!span)
return;
- a.x = 0;
- a.y = span->ascender_max;
+ if (span->wmode == 0)
+ {
+ a.x = 0;
+ a.y = span->ascender_max;
+ d.x = 0;
+ d.y = span->descender_min;
+ }
+ else
+ {
+ a.x = span->ascender_max;
+ a.y = 0;
+ d.x = span->descender_min;
+ d.y = 0;
+ }
fz_transform_vector(&a, &span->transform);
- d.x = 0;
- d.y = span->descender_min;
fz_transform_vector(&d, &span->transform);
bbox->x0 = bbox->x1 = span->min.x + a.x;
bbox->y0 = bbox->y1 = span->min.y + a.y;
@@ -577,12 +597,13 @@ fz_add_text_char_imp(fz_context *ctx, fz_text_device *dev, fz_text_style *style,
if (dev->cur_span == NULL ||
trm->a != dev->cur_span->transform.a || trm->b != dev->cur_span->transform.b ||
- trm->c != dev->cur_span->transform.c || trm->d != dev->cur_span->transform.d)
+ trm->c != dev->cur_span->transform.c || trm->d != dev->cur_span->transform.d ||
+ dev->cur_span->wmode != wmode)
{
- /* If the matrix has changed (or if we don't have a span at
- * all), then we can't append. */
+ /* If the matrix has changed, or the wmode is different (or
+ * if we don't have a span at all), then we can't append. */
#ifdef DEBUG_SPANS
- printf("Transform changed\n");
+ printf("Transform/WMode changed\n");
#endif
can_append = 0;
}
@@ -714,20 +735,28 @@ fz_text_extract(fz_context *ctx, fz_text_device *dev, fz_text *text, const fz_ma
if (text->len == 0)
return;
- if (font->ft_face)
+ if (style->wmode == 0)
{
- fz_lock(ctx, FZ_LOCK_FREETYPE);
- err = FT_Set_Char_Size(font->ft_face, 64, 64, 72, 72);
- if (err)
- fz_warn(ctx, "freetype set character size: %s", ft_error_string(err));
- ascender = (float)face->ascender / face->units_per_EM;
- descender = (float)face->descender / face->units_per_EM;
- fz_unlock(ctx, FZ_LOCK_FREETYPE);
+ if (font->ft_face)
+ {
+ fz_lock(ctx, FZ_LOCK_FREETYPE);
+ err = FT_Set_Char_Size(font->ft_face, 64, 64, 72, 72);
+ if (err)
+ fz_warn(ctx, "freetype set character size: %s", ft_error_string(err));
+ ascender = (float)face->ascender / face->units_per_EM;
+ descender = (float)face->descender / face->units_per_EM;
+ fz_unlock(ctx, FZ_LOCK_FREETYPE);
+ }
+ else if (font->t3procs && !fz_is_empty_rect(&font->bbox))
+ {
+ ascender = font->bbox.y1;
+ descender = font->bbox.y0;
+ }
}
- else if (font->t3procs && !fz_is_empty_rect(&font->bbox))
+ else
{
- ascender = font->bbox.y1;
- descender = font->bbox.y0;
+ ascender = font->bbox.x1;
+ descender = font->bbox.x0;
}
style->ascender = ascender;
style->descender = descender;