diff options
author | Tor Andersson <tor.andersson@artifex.com> | 2016-08-25 12:44:13 +0200 |
---|---|---|
committer | Tor Andersson <tor.andersson@artifex.com> | 2016-08-30 16:55:25 +0200 |
commit | 85ee87997e3ee4eb579084f92d109b9b78dcf9c7 (patch) | |
tree | 25bf20b8d2d5acf7a4ff61c52fdd41528f48da6d /source/pdf | |
parent | bf32163059811c822c46e2e17142f517cf9a0bac (diff) | |
download | mupdf-85ee87997e3ee4eb579084f92d109b9b78dcf9c7.tar.xz |
Use U+FFFD instead of '?' for bad encodings in text extraction.
Diffstat (limited to 'source/pdf')
-rw-r--r-- | source/pdf/pdf-op-run.c | 2 | ||||
-rw-r--r-- | source/pdf/pdf-unicode.c | 2 |
2 files changed, 2 insertions, 2 deletions
diff --git a/source/pdf/pdf-op-run.c b/source/pdf/pdf-op-run.c index 96084cdf..aad0aebf 100644 --- a/source/pdf/pdf-op-run.c +++ b/source/pdf/pdf-op-run.c @@ -902,7 +902,7 @@ pdf_show_char(fz_context *ctx, pdf_run_processor *pr, int cid) } if (ucslen == 0 || (ucslen == 1 && ucsbuf[0] == 0)) { - ucsbuf[0] = '?'; + ucsbuf[0] = 0xFFFD; ucslen = 1; } diff --git a/source/pdf/pdf-unicode.c b/source/pdf/pdf-unicode.c index ca84341d..65bda460 100644 --- a/source/pdf/pdf-unicode.c +++ b/source/pdf/pdf-unicode.c @@ -96,7 +96,7 @@ pdf_load_to_unicode(fz_context *ctx, pdf_document *doc, pdf_font_desc *font, if (strings[cpt]) font->cid_to_ucs[cpt] = pdf_lookup_agl(strings[cpt]); else - font->cid_to_ucs[cpt] = '?'; + font->cid_to_ucs[cpt] = 0xFFFD; /* replacement character */ } } |