From f48db730bc063809c8c86b4434a91facd2c4846c Mon Sep 17 00:00:00 2001 From: Tor Andersson Date: Fri, 29 Jun 2018 14:28:46 +0200 Subject: Use real WinAnsi encoding when writing appearance stream text. --- source/pdf/pdf-appearance.c | 12 ++++++------ source/pdf/pdf-encoding.c | 19 +++++++++++++++++++ source/pdf/pdf-encodings.h | 28 ++++++++++++++++++++++++++++ 3 files changed, 53 insertions(+), 6 deletions(-) (limited to 'source') diff --git a/source/pdf/pdf-appearance.c b/source/pdf/pdf-appearance.c index b231b6b5..faea5b33 100644 --- a/source/pdf/pdf-appearance.c +++ b/source/pdf/pdf-appearance.c @@ -674,8 +674,8 @@ measure_simple_string(fz_context *ctx, fz_font *font, const char *text) { int c, g; text += fz_chartorune(&c, text); - /* WinAnsi is close enough to Latin-1 to not matter. Use middle dot for unencodable characters. */ - if (c >= 256) c = REPLACEMENT; + c = pdf_winansi_from_unicode(c); + if (c < 0) c = REPLACEMENT; g = fz_encode_character(ctx, font, c); w += fz_advance_glyph(ctx, font, g, 0); } @@ -690,8 +690,8 @@ write_simple_string(fz_context *ctx, fz_buffer *buf, const char *a, const char * { int c; a += fz_chartorune(&c, a); - /* WinAnsi is close enough to Latin-1 to not matter. Use middle dot for unencodable characters. */ - if (c >= 256) c = REPLACEMENT; + c = pdf_winansi_from_unicode(c); + if (c < 0) c = REPLACEMENT; if (c == '(' || c == ')' || c == '\\') fz_append_byte(ctx, buf, '\\'); fz_append_byte(ctx, buf, c); @@ -872,8 +872,8 @@ write_comb_string(fz_context *ctx, fz_buffer *buf, const char *a, const char *b, int c, g; a += fz_chartorune(&c, a); - /* WinAnsi is close enough to Latin-1 to not matter. Use middle dot for unencodable characters. */ - if (c >= 256) c = REPLACEMENT; + c = pdf_winansi_from_unicode(c); + if (c < 0) c = REPLACEMENT; g = fz_encode_character(ctx, font, c); gw = fz_advance_glyph(ctx, font, g, 0) * 1000; diff --git a/source/pdf/pdf-encoding.c b/source/pdf/pdf-encoding.c index 04874457..f4fe584c 100644 --- a/source/pdf/pdf-encoding.c +++ b/source/pdf/pdf-encoding.c @@ -123,3 +123,22 @@ int pdf_greek_from_unicode(int u) } return -1; } + +int pdf_winansi_from_unicode(int u) +{ + int l = 0; + int r = nelem(winansi_from_unicode) - 1; + if (u < 128) + return u; + while (l <= r) + { + int m = (l + r) >> 1; + if (u < winansi_from_unicode[m].u) + r = m - 1; + else if (u > winansi_from_unicode[m].u) + l = m + 1; + else + return winansi_from_unicode[m].c; + } + return -1; +} diff --git a/source/pdf/pdf-encodings.h b/source/pdf/pdf-encodings.h index 3c5bedfd..f9e84c49 100644 --- a/source/pdf/pdf-encodings.h +++ b/source/pdf/pdf-encodings.h @@ -341,3 +341,31 @@ static const struct { unsigned short u, c; } iso8859_7_from_unicode[] = { {0x03cc,252}, {0x03cd,253}, {0x03ce,254}, {0x2015,175}, {0x2018,161}, {0x2019,162}, {0x20ac,164}, }; + +static const struct { unsigned short u, c; } winansi_from_unicode[] = { + {0x00a0,160}, {0x00a1,161}, {0x00a2,162}, {0x00a3,163}, {0x00a4,164}, + {0x00a5,165}, {0x00a6,166}, {0x00a7,167}, {0x00a8,168}, {0x00a9,169}, + {0x00aa,170}, {0x00ab,171}, {0x00ac,172}, {0x00ad,173}, {0x00ae,174}, + {0x00af,175}, {0x00b0,176}, {0x00b1,177}, {0x00b2,178}, {0x00b3,179}, + {0x00b4,180}, {0x00b5,181}, {0x00b6,182}, {0x00b7,183}, {0x00b8,184}, + {0x00b9,185}, {0x00ba,186}, {0x00bb,187}, {0x00bc,188}, {0x00bd,189}, + {0x00be,190}, {0x00bf,191}, {0x00c0,192}, {0x00c1,193}, {0x00c2,194}, + {0x00c3,195}, {0x00c4,196}, {0x00c5,197}, {0x00c6,198}, {0x00c7,199}, + {0x00c8,200}, {0x00c9,201}, {0x00ca,202}, {0x00cb,203}, {0x00cc,204}, + {0x00cd,205}, {0x00ce,206}, {0x00cf,207}, {0x00d0,208}, {0x00d1,209}, + {0x00d2,210}, {0x00d3,211}, {0x00d4,212}, {0x00d5,213}, {0x00d6,214}, + {0x00d7,215}, {0x00d8,216}, {0x00d9,217}, {0x00da,218}, {0x00db,219}, + {0x00dc,220}, {0x00dd,221}, {0x00de,222}, {0x00df,223}, {0x00e0,224}, + {0x00e1,225}, {0x00e2,226}, {0x00e3,227}, {0x00e4,228}, {0x00e5,229}, + {0x00e6,230}, {0x00e7,231}, {0x00e8,232}, {0x00e9,233}, {0x00ea,234}, + {0x00eb,235}, {0x00ec,236}, {0x00ed,237}, {0x00ee,238}, {0x00ef,239}, + {0x00f0,240}, {0x00f1,241}, {0x00f2,242}, {0x00f3,243}, {0x00f4,244}, + {0x00f5,245}, {0x00f6,246}, {0x00f7,247}, {0x00f8,248}, {0x00f9,249}, + {0x00fa,250}, {0x00fb,251}, {0x00fc,252}, {0x00fd,253}, {0x00fe,254}, + {0x00ff,255}, {0x0152,140}, {0x0153,156}, {0x0160,138}, {0x0161,154}, + {0x0178,159}, {0x017d,142}, {0x017e,158}, {0x0192,131}, {0x02c6,136}, + {0x02dc,152}, {0x2013,150}, {0x2014,151}, {0x2018,145}, {0x2019,146}, + {0x201a,130}, {0x201c,147}, {0x201d,148}, {0x201e,132}, {0x2020,134}, + {0x2021,135}, {0x2022,149}, {0x2026,133}, {0x2030,137}, {0x2039,139}, + {0x203a,155}, {0x20ac,128}, {0x2122,153}, +}; -- cgit v1.2.3