summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobin Watts <robin.watts@artifex.com>2015-04-06 16:37:49 +0100
committerRobin Watts <robin.watts@artifex.com>2015-04-07 13:22:16 +0100
commit2b36d199c5e9cbf6b94f547ec9e3def61ab17233 (patch)
treeb878b73cead8e77f14254b0c080dea5eba312b26
parent88586d2282a70aad99887d0209c55b0bccfcb86f (diff)
downloadmupdf-2b36d199c5e9cbf6b94f547ec9e3def61ab17233.tar.xz
Fix structured text extraction in vertical mode.
When advancing a glyph in vertical mode, it should advance down the page. The origin of the glyph as supplied is bottom left, not top right - allow for this in calculations. Previously glyphs were not being collated into spans because of this.
-rw-r--r--source/fitz/stext-device.c50
1 files changed, 37 insertions, 13 deletions
diff --git a/source/fitz/stext-device.c b/source/fitz/stext-device.c
index 8cc4d2ed..6ca26912 100644
--- a/source/fitz/stext-device.c
+++ b/source/fitz/stext-device.c
@@ -572,7 +572,7 @@ fz_add_text_char_imp(fz_context *ctx, fz_text_device *dev, fz_text_style *style,
{
int can_append = 1;
int add_space = 0;
- fz_point dir, ndir, p, q;
+ fz_point dir, ndir, p, q, r, match;
float size;
fz_point delta;
float spacing = 0;
@@ -586,7 +586,7 @@ fz_add_text_char_imp(fz_context *ctx, fz_text_device *dev, fz_text_style *style,
else
{
dir.x = 0;
- dir.y = 1;
+ dir.y = -1;
}
fz_transform_vector(&dir, trm);
ndir = dir;
@@ -595,6 +595,35 @@ fz_add_text_char_imp(fz_context *ctx, fz_text_device *dev, fz_text_style *style,
size = fz_matrix_expansion(trm);
+ /* We need to identify where glyphs 'start' (p) and 'stop' (q).
+ * Each glyph holds it's 'start' position, and the next glyph in the
+ * span (or span->max if there is no next glyph) holds it's 'end'
+ * position.
+ *
+ * For both horizontal and vertical motion, trm->{e,f} gives the
+ * bottom left corner of the glyph.
+ /* In horizontal mode:
+ * + p is bottom left.
+ * + q is the bottom right
+ * In vertical mode:
+ * + p is top left (where it advanced from)
+ * + q is bottom left
+ */
+ if (wmode == 0)
+ {
+ p.x = trm->e;
+ p.y = trm->f;
+ q.x = trm->e + adv * dir.x;
+ q.y = trm->f + adv * dir.y;
+ }
+ else
+ {
+ p.x = trm->e - adv * dir.x;
+ p.y = trm->f - adv * dir.y;
+ q.x = trm->e;
+ q.y = trm->f;
+ }
+
if (dev->cur_span == NULL ||
trm->a != dev->cur_span->transform.a || trm->b != dev->cur_span->transform.b ||
trm->c != dev->cur_span->transform.c || trm->d != dev->cur_span->transform.d ||
@@ -611,8 +640,8 @@ fz_add_text_char_imp(fz_context *ctx, fz_text_device *dev, fz_text_style *style,
{
/* Calculate how far we've moved since the end of the current
* span. */
- delta.x = trm->e - dev->cur_span->max.x;
- delta.y = trm->f - dev->cur_span->max.y;
+ delta.x = p.x - dev->cur_span->max.x;
+ delta.y = p.y - dev->cur_span->max.y;
/* The transform has not changed, so we know we're in the same
* direction. Calculate 2 distances; how far off the previous
@@ -656,8 +685,6 @@ fz_add_text_char_imp(fz_context *ctx, fz_text_device *dev, fz_text_style *style,
printf("%c%c append=%d space=%d size=%g spacing=%g base_offset=%g\n", dev->lastchar, c, can_append, add_space, size, spacing, base_offset);
#endif
- p.x = trm->e;
- p.y = trm->f;
if (can_append == 0)
{
/* Start a new span */
@@ -668,14 +695,11 @@ fz_add_text_char_imp(fz_context *ctx, fz_text_device *dev, fz_text_style *style,
}
if (add_space)
{
- q.x = - 0.2f;
- q.y = 0;
- fz_transform_point(&q, trm);
- add_char_to_span(ctx, dev->cur_span, ' ', &p, &q, style);
+ r.x = - 0.2f;
+ r.y = 0;
+ fz_transform_point(&r, trm);
+ add_char_to_span(ctx, dev->cur_span, ' ', &p, &r, style);
}
- /* Advance the matrix */
- q.x = trm->e += adv * dir.x;
- q.y = trm->f += adv * dir.y;
add_char_to_span(ctx, dev->cur_span, c, &p, &q, style);
}