summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTor Andersson <tor.andersson@artifex.com>2016-08-25 12:44:13 +0200
committerTor Andersson <tor.andersson@artifex.com>2016-08-30 16:55:25 +0200
commit85ee87997e3ee4eb579084f92d109b9b78dcf9c7 (patch)
tree25bf20b8d2d5acf7a4ff61c52fdd41528f48da6d
parentbf32163059811c822c46e2e17142f517cf9a0bac (diff)
downloadmupdf-85ee87997e3ee4eb579084f92d109b9b78dcf9c7.tar.xz
Use U+FFFD instead of '?' for bad encodings in text extraction.
-rw-r--r--source/fitz/stext-search.c2
-rw-r--r--source/fitz/util.c2
-rw-r--r--source/pdf/pdf-op-run.c2
-rw-r--r--source/pdf/pdf-unicode.c2
-rw-r--r--source/xps/xps-glyphs.c2
5 files changed, 5 insertions, 5 deletions
diff --git a/source/fitz/stext-search.c b/source/fitz/stext-search.c
index 4afbe6b7..2246f627 100644
--- a/source/fitz/stext-search.c
+++ b/source/fitz/stext-search.c
@@ -259,7 +259,7 @@ fz_copy_selection(fz_context *ctx, fz_stext_page *page, fz_rect rect)
fz_stext_char_bbox(ctx, &hitbox, span, i);
c = span->text[i].c;
if (c < 32)
- c = '?';
+ c = 0xFFFD;
if (hitbox.x1 >= x0 && hitbox.x0 <= x1 && hitbox.y1 >= y0 && hitbox.y0 <= y1)
{
fz_write_buffer_rune(ctx, buffer, c);
diff --git a/source/fitz/util.c b/source/fitz/util.c
index fc49d84f..4c7b3232 100644
--- a/source/fitz/util.c
+++ b/source/fitz/util.c
@@ -447,7 +447,7 @@ fz_new_buffer_from_stext_page(fz_context *ctx, fz_stext_page *text, const fz_rec
fz_stext_char_bbox(ctx, &hitbox, span, i);
c = span->text[i].c;
if (c < 32)
- c = '?';
+ c = 0xFFFD;
if (hitbox.x1 >= x0 && hitbox.x0 <= x1 && hitbox.y1 >= y0 && hitbox.y0 <= y1)
{
saw_text = 1;
diff --git a/source/pdf/pdf-op-run.c b/source/pdf/pdf-op-run.c
index 96084cdf..aad0aebf 100644
--- a/source/pdf/pdf-op-run.c
+++ b/source/pdf/pdf-op-run.c
@@ -902,7 +902,7 @@ pdf_show_char(fz_context *ctx, pdf_run_processor *pr, int cid)
}
if (ucslen == 0 || (ucslen == 1 && ucsbuf[0] == 0))
{
- ucsbuf[0] = '?';
+ ucsbuf[0] = 0xFFFD;
ucslen = 1;
}
diff --git a/source/pdf/pdf-unicode.c b/source/pdf/pdf-unicode.c
index ca84341d..65bda460 100644
--- a/source/pdf/pdf-unicode.c
+++ b/source/pdf/pdf-unicode.c
@@ -96,7 +96,7 @@ pdf_load_to_unicode(fz_context *ctx, pdf_document *doc, pdf_font_desc *font,
if (strings[cpt])
font->cid_to_ucs[cpt] = pdf_lookup_agl(strings[cpt]);
else
- font->cid_to_ucs[cpt] = '?';
+ font->cid_to_ucs[cpt] = 0xFFFD; /* replacement character */
}
}
diff --git a/source/xps/xps-glyphs.c b/source/xps/xps-glyphs.c
index a5d3f196..407953e8 100644
--- a/source/xps/xps-glyphs.c
+++ b/source/xps/xps-glyphs.c
@@ -389,7 +389,7 @@ xps_parse_glyphs_imp(fz_context *ctx, xps_document *doc, const fz_matrix *ctm,
while ((us && un > 0) || (is && *is))
{
- int char_code = '?';
+ int char_code = 0xFFFD;
int code_count = 1;
int glyph_count = 1;