summaryrefslogtreecommitdiff
path: root/source
diff options
context:
space:
mode:
authorTor Andersson <tor.andersson@artifex.com>2018-11-06 15:41:25 +0100
committerTor Andersson <tor.andersson@artifex.com>2018-11-07 14:06:24 +0100
commitc4f6ee0e5edfa95a5b9030356f683c12226d91d4 (patch)
tree9633a080fa0f4f638aacaf241217d2743080fbb6 /source
parent95cc62bdac939c2b65af0323362cd1ac50084e8c (diff)
downloadmupdf-c4f6ee0e5edfa95a5b9030356f683c12226d91d4.tar.xz
Fix 700030: Tweak text extraction space adding heuristics.
Ignore space-sized backward motions. Assume that these motions are either extreme levels of kerning, or something else fishy going on.
Diffstat (limited to 'source')
-rw-r--r--source/fitz/stext-device.c19
1 files changed, 12 insertions, 7 deletions
diff --git a/source/fitz/stext-device.c b/source/fitz/stext-device.c
index 0ba944d4..e8f04e1f 100644
--- a/source/fitz/stext-device.c
+++ b/source/fitz/stext-device.c
@@ -317,20 +317,25 @@ fz_add_stext_char_imp(fz_context *ctx, fz_stext_device *dev, fz_font *font, int
{
if (fabsf(spacing) < size * SPACE_DIST)
{
- /* Motion is in line, and small. */
+ /* Motion is in line and small enough to ignore. */
new_line = 0;
}
- else if (spacing >= size * SPACE_DIST && spacing < size * SPACE_MAX_DIST)
+ else if (fabsf(spacing) > size * SPACE_MAX_DIST)
{
- /* Motion is in line, but large enough to warrant us adding a space. */
- if (dev->lastchar != ' ' && wmode == 0)
- add_space = 1;
+ /* Motion is in line and large enough to warrant splitting to a new line */
+ new_line = 1;
+ }
+ else if (spacing < 0)
+ {
+ /* Motion is backward in line! Ignore this odd spacing. */
new_line = 0;
}
else
{
- /* Motion is in line, but large enough to warrant splitting to a new line */
- new_line = 1;
+ /* Motion is forward in line and large enough to warrant us adding a space. */
+ if (dev->lastchar != ' ' && wmode == 0)
+ add_space = 1;
+ new_line = 0;
}
}