summaryrefslogtreecommitdiff
path: root/source/pdf
diff options
context:
space:
mode:
authorTor Andersson <tor.andersson@artifex.com>2018-06-29 14:28:46 +0200
committerTor Andersson <tor.andersson@artifex.com>2018-07-05 15:32:34 +0200
commitf48db730bc063809c8c86b4434a91facd2c4846c (patch)
treeb7d17f43e7cb161ddfb72382df820cc88cedebed /source/pdf
parentf61d5f7cada6fa2eb0c457e4bf7d820a4aeb004e (diff)
downloadmupdf-f48db730bc063809c8c86b4434a91facd2c4846c.tar.xz
Use real WinAnsi encoding when writing appearance stream text.
Diffstat (limited to 'source/pdf')
-rw-r--r--source/pdf/pdf-appearance.c12
-rw-r--r--source/pdf/pdf-encoding.c19
-rw-r--r--source/pdf/pdf-encodings.h28
3 files changed, 53 insertions, 6 deletions
diff --git a/source/pdf/pdf-appearance.c b/source/pdf/pdf-appearance.c
index b231b6b5..faea5b33 100644
--- a/source/pdf/pdf-appearance.c
+++ b/source/pdf/pdf-appearance.c
@@ -674,8 +674,8 @@ measure_simple_string(fz_context *ctx, fz_font *font, const char *text)
{
int c, g;
text += fz_chartorune(&c, text);
- /* WinAnsi is close enough to Latin-1 to not matter. Use middle dot for unencodable characters. */
- if (c >= 256) c = REPLACEMENT;
+ c = pdf_winansi_from_unicode(c);
+ if (c < 0) c = REPLACEMENT;
g = fz_encode_character(ctx, font, c);
w += fz_advance_glyph(ctx, font, g, 0);
}
@@ -690,8 +690,8 @@ write_simple_string(fz_context *ctx, fz_buffer *buf, const char *a, const char *
{
int c;
a += fz_chartorune(&c, a);
- /* WinAnsi is close enough to Latin-1 to not matter. Use middle dot for unencodable characters. */
- if (c >= 256) c = REPLACEMENT;
+ c = pdf_winansi_from_unicode(c);
+ if (c < 0) c = REPLACEMENT;
if (c == '(' || c == ')' || c == '\\')
fz_append_byte(ctx, buf, '\\');
fz_append_byte(ctx, buf, c);
@@ -872,8 +872,8 @@ write_comb_string(fz_context *ctx, fz_buffer *buf, const char *a, const char *b,
int c, g;
a += fz_chartorune(&c, a);
- /* WinAnsi is close enough to Latin-1 to not matter. Use middle dot for unencodable characters. */
- if (c >= 256) c = REPLACEMENT;
+ c = pdf_winansi_from_unicode(c);
+ if (c < 0) c = REPLACEMENT;
g = fz_encode_character(ctx, font, c);
gw = fz_advance_glyph(ctx, font, g, 0) * 1000;
diff --git a/source/pdf/pdf-encoding.c b/source/pdf/pdf-encoding.c
index 04874457..f4fe584c 100644
--- a/source/pdf/pdf-encoding.c
+++ b/source/pdf/pdf-encoding.c
@@ -123,3 +123,22 @@ int pdf_greek_from_unicode(int u)
}
return -1;
}
+
+int pdf_winansi_from_unicode(int u)
+{
+ int l = 0;
+ int r = nelem(winansi_from_unicode) - 1;
+ if (u < 128)
+ return u;
+ while (l <= r)
+ {
+ int m = (l + r) >> 1;
+ if (u < winansi_from_unicode[m].u)
+ r = m - 1;
+ else if (u > winansi_from_unicode[m].u)
+ l = m + 1;
+ else
+ return winansi_from_unicode[m].c;
+ }
+ return -1;
+}
diff --git a/source/pdf/pdf-encodings.h b/source/pdf/pdf-encodings.h
index 3c5bedfd..f9e84c49 100644
--- a/source/pdf/pdf-encodings.h
+++ b/source/pdf/pdf-encodings.h
@@ -341,3 +341,31 @@ static const struct { unsigned short u, c; } iso8859_7_from_unicode[] = {
{0x03cc,252}, {0x03cd,253}, {0x03ce,254}, {0x2015,175}, {0x2018,161},
{0x2019,162}, {0x20ac,164},
};
+
+static const struct { unsigned short u, c; } winansi_from_unicode[] = {
+ {0x00a0,160}, {0x00a1,161}, {0x00a2,162}, {0x00a3,163}, {0x00a4,164},
+ {0x00a5,165}, {0x00a6,166}, {0x00a7,167}, {0x00a8,168}, {0x00a9,169},
+ {0x00aa,170}, {0x00ab,171}, {0x00ac,172}, {0x00ad,173}, {0x00ae,174},
+ {0x00af,175}, {0x00b0,176}, {0x00b1,177}, {0x00b2,178}, {0x00b3,179},
+ {0x00b4,180}, {0x00b5,181}, {0x00b6,182}, {0x00b7,183}, {0x00b8,184},
+ {0x00b9,185}, {0x00ba,186}, {0x00bb,187}, {0x00bc,188}, {0x00bd,189},
+ {0x00be,190}, {0x00bf,191}, {0x00c0,192}, {0x00c1,193}, {0x00c2,194},
+ {0x00c3,195}, {0x00c4,196}, {0x00c5,197}, {0x00c6,198}, {0x00c7,199},
+ {0x00c8,200}, {0x00c9,201}, {0x00ca,202}, {0x00cb,203}, {0x00cc,204},
+ {0x00cd,205}, {0x00ce,206}, {0x00cf,207}, {0x00d0,208}, {0x00d1,209},
+ {0x00d2,210}, {0x00d3,211}, {0x00d4,212}, {0x00d5,213}, {0x00d6,214},
+ {0x00d7,215}, {0x00d8,216}, {0x00d9,217}, {0x00da,218}, {0x00db,219},
+ {0x00dc,220}, {0x00dd,221}, {0x00de,222}, {0x00df,223}, {0x00e0,224},
+ {0x00e1,225}, {0x00e2,226}, {0x00e3,227}, {0x00e4,228}, {0x00e5,229},
+ {0x00e6,230}, {0x00e7,231}, {0x00e8,232}, {0x00e9,233}, {0x00ea,234},
+ {0x00eb,235}, {0x00ec,236}, {0x00ed,237}, {0x00ee,238}, {0x00ef,239},
+ {0x00f0,240}, {0x00f1,241}, {0x00f2,242}, {0x00f3,243}, {0x00f4,244},
+ {0x00f5,245}, {0x00f6,246}, {0x00f7,247}, {0x00f8,248}, {0x00f9,249},
+ {0x00fa,250}, {0x00fb,251}, {0x00fc,252}, {0x00fd,253}, {0x00fe,254},
+ {0x00ff,255}, {0x0152,140}, {0x0153,156}, {0x0160,138}, {0x0161,154},
+ {0x0178,159}, {0x017d,142}, {0x017e,158}, {0x0192,131}, {0x02c6,136},
+ {0x02dc,152}, {0x2013,150}, {0x2014,151}, {0x2018,145}, {0x2019,146},
+ {0x201a,130}, {0x201c,147}, {0x201d,148}, {0x201e,132}, {0x2020,134},
+ {0x2021,135}, {0x2022,149}, {0x2026,133}, {0x2030,137}, {0x2039,139},
+ {0x203a,155}, {0x20ac,128}, {0x2122,153},
+};