summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/mupdf/fitz/font.h23
-rw-r--r--include/mupdf/pdf/font.h7
-rw-r--r--platform/win32/libmupdf.vcproj4
-rw-r--r--scripts/8859-1.TXT292
-rw-r--r--scripts/CP1250.TXT274
-rw-r--r--scripts/CP1251.TXT274
-rw-r--r--scripts/makeencoding.py47
-rw-r--r--source/fitz/encodings.c33
-rw-r--r--source/fitz/encodings.h3784
-rw-r--r--source/pdf/pdf-appearance.c6
-rw-r--r--source/pdf/pdf-encoding.c59
-rw-r--r--source/pdf/pdf-encodings.h203
-rw-r--r--source/pdf/pdf-font.c12
13 files changed, 4735 insertions, 283 deletions
diff --git a/include/mupdf/fitz/font.h b/include/mupdf/fitz/font.h
index 4565983c..421f477a 100644
--- a/include/mupdf/fitz/font.h
+++ b/include/mupdf/fitz/font.h
@@ -9,6 +9,29 @@
/* forward declaration for circular dependency */
struct fz_device_s;
+/* Various font encoding tables and lookup functions */
+
+extern const char *fz_glyph_name_from_iso8859_1[256];
+extern const char *fz_glyph_name_from_iso8859_7[256];
+extern const char *fz_glyph_name_from_koi8u[256];
+extern const char *fz_glyph_name_from_windows_1250[256];
+extern const char *fz_glyph_name_from_windows_1251[256];
+extern const char *fz_glyph_name_from_windows_1252[256];
+
+extern unsigned short fz_unicode_from_iso8859_1[256];
+extern unsigned short fz_unicode_from_iso8859_7[256];
+extern unsigned short fz_unicode_from_koi8u[256];
+extern unsigned short fz_unicode_from_windows_1250[256];
+extern unsigned short fz_unicode_from_windows_1251[256];
+extern unsigned short fz_unicode_from_windows_1252[256];
+
+int fz_iso8859_1_from_unicode(int u);
+int fz_iso8859_7_from_unicode(int u);
+int fz_koi8u_from_unicode(int u);
+int fz_windows_1250_from_unicode(int u);
+int fz_windows_1251_from_unicode(int u);
+int fz_windows_1252_from_unicode(int u);
+
/*
An abstract font handle.
*/
diff --git a/include/mupdf/pdf/font.h b/include/mupdf/pdf/font.h
index 812377e5..d386e87c 100644
--- a/include/mupdf/pdf/font.h
+++ b/include/mupdf/pdf/font.h
@@ -30,13 +30,6 @@ extern const char *pdf_mac_expert[256];
extern const char *pdf_win_ansi[256];
extern const char *pdf_standard[256];
-extern const char *pdf_glyph_name_from_koi8u[256];
-extern const char *pdf_glyph_name_from_iso8859_7[256];
-
-int pdf_cyrillic_from_unicode(int u);
-int pdf_greek_from_unicode(int u);
-int pdf_winansi_from_unicode(int u);
-
typedef struct pdf_font_desc_s pdf_font_desc;
typedef struct pdf_hmtx_s pdf_hmtx;
typedef struct pdf_vmtx_s pdf_vmtx;
diff --git a/platform/win32/libmupdf.vcproj b/platform/win32/libmupdf.vcproj
index 18823802..ebef2db0 100644
--- a/platform/win32/libmupdf.vcproj
+++ b/platform/win32/libmupdf.vcproj
@@ -1900,6 +1900,10 @@
>
</File>
<File
+ RelativePath="..\..\source\fitz\encodings.c"
+ >
+ </File>
+ <File
RelativePath="..\..\source\fitz\error.c"
>
</File>
diff --git a/scripts/8859-1.TXT b/scripts/8859-1.TXT
new file mode 100644
index 00000000..3a55afef
--- /dev/null
+++ b/scripts/8859-1.TXT
@@ -0,0 +1,292 @@
+# 8859-1.TXT
+# Date: 2015-12-02 20:19:00 GMT [KW]
+# © 2015 Unicode®, Inc.
+# For terms of use, see http://www.unicode.org/terms_of_use.html
+#
+# Name: ISO/IEC 8859-1:1998 to Unicode
+# Unicode version: 3.0
+# Table version: 2.0
+# Table format: Format A
+# Date: 1999 July 27 (header updated: 2015 December 02)
+# Authors: Ken Whistler <ken@unicode.org>
+#
+# General notes:
+#
+# This table contains the data the Unicode Consortium has on how
+# ISO/IEC 8859-1:1998 characters map into Unicode.
+#
+# Format: Three tab-separated columns
+# Column #1 is the ISO/IEC 8859-1 code (in hex as 0xXX)
+# Column #2 is the Unicode (in hex as 0xXXXX)
+# Column #3 the Unicode name (follows a comment sign, '#')
+#
+# The entries are in ISO/IEC 8859-1 order.
+#
+# Version history
+# 1.0 version: updates 0.1 version by adding mappings for all
+# control characters.
+# 2.0 version: updates to copyright notice and terms of use; no
+# changes to character mappings
+#
+# Updated versions of this file may be found in:
+# http://www.unicode.org/Public/MAPPINGS/
+#
+# Any comments or problems, contact us at:
+# http://www.unicode.org/reporting.html
+#
+0x00 0x0000 # NULL
+0x01 0x0001 # START OF HEADING
+0x02 0x0002 # START OF TEXT
+0x03 0x0003 # END OF TEXT
+0x04 0x0004 # END OF TRANSMISSION
+0x05 0x0005 # ENQUIRY
+0x06 0x0006 # ACKNOWLEDGE
+0x07 0x0007 # BELL
+0x08 0x0008 # BACKSPACE
+0x09 0x0009 # HORIZONTAL TABULATION
+0x0A 0x000A # LINE FEED
+0x0B 0x000B # VERTICAL TABULATION
+0x0C 0x000C # FORM FEED
+0x0D 0x000D # CARRIAGE RETURN
+0x0E 0x000E # SHIFT OUT
+0x0F 0x000F # SHIFT IN
+0x10 0x0010 # DATA LINK ESCAPE
+0x11 0x0011 # DEVICE CONTROL ONE
+0x12 0x0012 # DEVICE CONTROL TWO
+0x13 0x0013 # DEVICE CONTROL THREE
+0x14 0x0014 # DEVICE CONTROL FOUR
+0x15 0x0015 # NEGATIVE ACKNOWLEDGE
+0x16 0x0016 # SYNCHRONOUS IDLE
+0x17 0x0017 # END OF TRANSMISSION BLOCK
+0x18 0x0018 # CANCEL
+0x19 0x0019 # END OF MEDIUM
+0x1A 0x001A # SUBSTITUTE
+0x1B 0x001B # ESCAPE
+0x1C 0x001C # FILE SEPARATOR
+0x1D 0x001D # GROUP SEPARATOR
+0x1E 0x001E # RECORD SEPARATOR
+0x1F 0x001F # UNIT SEPARATOR
+0x20 0x0020 # SPACE
+0x21 0x0021 # EXCLAMATION MARK
+0x22 0x0022 # QUOTATION MARK
+0x23 0x0023 # NUMBER SIGN
+0x24 0x0024 # DOLLAR SIGN
+0x25 0x0025 # PERCENT SIGN
+0x26 0x0026 # AMPERSAND
+0x27 0x0027 # APOSTROPHE
+0x28 0x0028 # LEFT PARENTHESIS
+0x29 0x0029 # RIGHT PARENTHESIS
+0x2A 0x002A # ASTERISK
+0x2B 0x002B # PLUS SIGN
+0x2C 0x002C # COMMA
+0x2D 0x002D # HYPHEN-MINUS
+0x2E 0x002E # FULL STOP
+0x2F 0x002F # SOLIDUS
+0x30 0x0030 # DIGIT ZERO
+0x31 0x0031 # DIGIT ONE
+0x32 0x0032 # DIGIT TWO
+0x33 0x0033 # DIGIT THREE
+0x34 0x0034 # DIGIT FOUR
+0x35 0x0035 # DIGIT FIVE
+0x36 0x0036 # DIGIT SIX
+0x37 0x0037 # DIGIT SEVEN
+0x38 0x0038 # DIGIT EIGHT
+0x39 0x0039 # DIGIT NINE
+0x3A 0x003A # COLON
+0x3B 0x003B # SEMICOLON
+0x3C 0x003C # LESS-THAN SIGN
+0x3D 0x003D # EQUALS SIGN
+0x3E 0x003E # GREATER-THAN SIGN
+0x3F 0x003F # QUESTION MARK
+0x40 0x0040 # COMMERCIAL AT
+0x41 0x0041 # LATIN CAPITAL LETTER A
+0x42 0x0042 # LATIN CAPITAL LETTER B
+0x43 0x0043 # LATIN CAPITAL LETTER C
+0x44 0x0044 # LATIN CAPITAL LETTER D
+0x45 0x0045 # LATIN CAPITAL LETTER E
+0x46 0x0046 # LATIN CAPITAL LETTER F
+0x47 0x0047 # LATIN CAPITAL LETTER G
+0x48 0x0048 # LATIN CAPITAL LETTER H
+0x49 0x0049 # LATIN CAPITAL LETTER I
+0x4A 0x004A # LATIN CAPITAL LETTER J
+0x4B 0x004B # LATIN CAPITAL LETTER K
+0x4C 0x004C # LATIN CAPITAL LETTER L
+0x4D 0x004D # LATIN CAPITAL LETTER M
+0x4E 0x004E # LATIN CAPITAL LETTER N
+0x4F 0x004F # LATIN CAPITAL LETTER O
+0x50 0x0050 # LATIN CAPITAL LETTER P
+0x51 0x0051 # LATIN CAPITAL LETTER Q
+0x52 0x0052 # LATIN CAPITAL LETTER R
+0x53 0x0053 # LATIN CAPITAL LETTER S
+0x54 0x0054 # LATIN CAPITAL LETTER T
+0x55 0x0055 # LATIN CAPITAL LETTER U
+0x56 0x0056 # LATIN CAPITAL LETTER V
+0x57 0x0057 # LATIN CAPITAL LETTER W
+0x58 0x0058 # LATIN CAPITAL LETTER X
+0x59 0x0059 # LATIN CAPITAL LETTER Y
+0x5A 0x005A # LATIN CAPITAL LETTER Z
+0x5B 0x005B # LEFT SQUARE BRACKET
+0x5C 0x005C # REVERSE SOLIDUS
+0x5D 0x005D # RIGHT SQUARE BRACKET
+0x5E 0x005E # CIRCUMFLEX ACCENT
+0x5F 0x005F # LOW LINE
+0x60 0x0060 # GRAVE ACCENT
+0x61 0x0061 # LATIN SMALL LETTER A
+0x62 0x0062 # LATIN SMALL LETTER B
+0x63 0x0063 # LATIN SMALL LETTER C
+0x64 0x0064 # LATIN SMALL LETTER D
+0x65 0x0065 # LATIN SMALL LETTER E
+0x66 0x0066 # LATIN SMALL LETTER F
+0x67 0x0067 # LATIN SMALL LETTER G
+0x68 0x0068 # LATIN SMALL LETTER H
+0x69 0x0069 # LATIN SMALL LETTER I
+0x6A 0x006A # LATIN SMALL LETTER J
+0x6B 0x006B # LATIN SMALL LETTER K
+0x6C 0x006C # LATIN SMALL LETTER L
+0x6D 0x006D # LATIN SMALL LETTER M
+0x6E 0x006E # LATIN SMALL LETTER N
+0x6F 0x006F # LATIN SMALL LETTER O
+0x70 0x0070 # LATIN SMALL LETTER P
+0x71 0x0071 # LATIN SMALL LETTER Q
+0x72 0x0072 # LATIN SMALL LETTER R
+0x73 0x0073 # LATIN SMALL LETTER S
+0x74 0x0074 # LATIN SMALL LETTER T
+0x75 0x0075 # LATIN SMALL LETTER U
+0x76 0x0076 # LATIN SMALL LETTER V
+0x77 0x0077 # LATIN SMALL LETTER W
+0x78 0x0078 # LATIN SMALL LETTER X
+0x79 0x0079 # LATIN SMALL LETTER Y
+0x7A 0x007A # LATIN SMALL LETTER Z
+0x7B 0x007B # LEFT CURLY BRACKET
+0x7C 0x007C # VERTICAL LINE
+0x7D 0x007D # RIGHT CURLY BRACKET
+0x7E 0x007E # TILDE
+0x7F 0x007F # DELETE
+0x80 0x0080 # <control>
+0x81 0x0081 # <control>
+0x82 0x0082 # <control>
+0x83 0x0083 # <control>
+0x84 0x0084 # <control>
+0x85 0x0085 # <control>
+0x86 0x0086 # <control>
+0x87 0x0087 # <control>
+0x88 0x0088 # <control>
+0x89 0x0089 # <control>
+0x8A 0x008A # <control>
+0x8B 0x008B # <control>
+0x8C 0x008C # <control>
+0x8D 0x008D # <control>
+0x8E 0x008E # <control>
+0x8F 0x008F # <control>
+0x90 0x0090 # <control>
+0x91 0x0091 # <control>
+0x92 0x0092 # <control>
+0x93 0x0093 # <control>
+0x94 0x0094 # <control>
+0x95 0x0095 # <control>
+0x96 0x0096 # <control>
+0x97 0x0097 # <control>
+0x98 0x0098 # <control>
+0x99 0x0099 # <control>
+0x9A 0x009A # <control>
+0x9B 0x009B # <control>
+0x9C 0x009C # <control>
+0x9D 0x009D # <control>
+0x9E 0x009E # <control>
+0x9F 0x009F # <control>
+0xA0 0x00A0 # NO-BREAK SPACE
+0xA1 0x00A1 # INVERTED EXCLAMATION MARK
+0xA2 0x00A2 # CENT SIGN
+0xA3 0x00A3 # POUND SIGN
+0xA4 0x00A4 # CURRENCY SIGN
+0xA5 0x00A5 # YEN SIGN
+0xA6 0x00A6 # BROKEN BAR
+0xA7 0x00A7 # SECTION SIGN
+0xA8 0x00A8 # DIAERESIS
+0xA9 0x00A9 # COPYRIGHT SIGN
+0xAA 0x00AA # FEMININE ORDINAL INDICATOR
+0xAB 0x00AB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+0xAC 0x00AC # NOT SIGN
+0xAD 0x00AD # SOFT HYPHEN
+0xAE 0x00AE # REGISTERED SIGN
+0xAF 0x00AF # MACRON
+0xB0 0x00B0 # DEGREE SIGN
+0xB1 0x00B1 # PLUS-MINUS SIGN
+0xB2 0x00B2 # SUPERSCRIPT TWO
+0xB3 0x00B3 # SUPERSCRIPT THREE
+0xB4 0x00B4 # ACUTE ACCENT
+0xB5 0x00B5 # MICRO SIGN
+0xB6 0x00B6 # PILCROW SIGN
+0xB7 0x00B7 # MIDDLE DOT
+0xB8 0x00B8 # CEDILLA
+0xB9 0x00B9 # SUPERSCRIPT ONE
+0xBA 0x00BA # MASCULINE ORDINAL INDICATOR
+0xBB 0x00BB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+0xBC 0x00BC # VULGAR FRACTION ONE QUARTER
+0xBD 0x00BD # VULGAR FRACTION ONE HALF
+0xBE 0x00BE # VULGAR FRACTION THREE QUARTERS
+0xBF 0x00BF # INVERTED QUESTION MARK
+0xC0 0x00C0 # LATIN CAPITAL LETTER A WITH GRAVE
+0xC1 0x00C1 # LATIN CAPITAL LETTER A WITH ACUTE
+0xC2 0x00C2 # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+0xC3 0x00C3 # LATIN CAPITAL LETTER A WITH TILDE
+0xC4 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS
+0xC5 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE
+0xC6 0x00C6 # LATIN CAPITAL LETTER AE
+0xC7 0x00C7 # LATIN CAPITAL LETTER C WITH CEDILLA
+0xC8 0x00C8 # LATIN CAPITAL LETTER E WITH GRAVE
+0xC9 0x00C9 # LATIN CAPITAL LETTER E WITH ACUTE
+0xCA 0x00CA # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+0xCB 0x00CB # LATIN CAPITAL LETTER E WITH DIAERESIS
+0xCC 0x00CC # LATIN CAPITAL LETTER I WITH GRAVE
+0xCD 0x00CD # LATIN CAPITAL LETTER I WITH ACUTE
+0xCE 0x00CE # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+0xCF 0x00CF # LATIN CAPITAL LETTER I WITH DIAERESIS
+0xD0 0x00D0 # LATIN CAPITAL LETTER ETH (Icelandic)
+0xD1 0x00D1 # LATIN CAPITAL LETTER N WITH TILDE
+0xD2 0x00D2 # LATIN CAPITAL LETTER O WITH GRAVE
+0xD3 0x00D3 # LATIN CAPITAL LETTER O WITH ACUTE
+0xD4 0x00D4 # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+0xD5 0x00D5 # LATIN CAPITAL LETTER O WITH TILDE
+0xD6 0x00D6 # LATIN CAPITAL LETTER O WITH DIAERESIS
+0xD7 0x00D7 # MULTIPLICATION SIGN
+0xD8 0x00D8 # LATIN CAPITAL LETTER O WITH STROKE
+0xD9 0x00D9 # LATIN CAPITAL LETTER U WITH GRAVE
+0xDA 0x00DA # LATIN CAPITAL LETTER U WITH ACUTE
+0xDB 0x00DB # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+0xDC 0x00DC # LATIN CAPITAL LETTER U WITH DIAERESIS
+0xDD 0x00DD # LATIN CAPITAL LETTER Y WITH ACUTE
+0xDE 0x00DE # LATIN CAPITAL LETTER THORN (Icelandic)
+0xDF 0x00DF # LATIN SMALL LETTER SHARP S (German)
+0xE0 0x00E0 # LATIN SMALL LETTER A WITH GRAVE
+0xE1 0x00E1 # LATIN SMALL LETTER A WITH ACUTE
+0xE2 0x00E2 # LATIN SMALL LETTER A WITH CIRCUMFLEX
+0xE3 0x00E3 # LATIN SMALL LETTER A WITH TILDE
+0xE4 0x00E4 # LATIN SMALL LETTER A WITH DIAERESIS
+0xE5 0x00E5 # LATIN SMALL LETTER A WITH RING ABOVE
+0xE6 0x00E6 # LATIN SMALL LETTER AE
+0xE7 0x00E7 # LATIN SMALL LETTER C WITH CEDILLA
+0xE8 0x00E8 # LATIN SMALL LETTER E WITH GRAVE
+0xE9 0x00E9 # LATIN SMALL LETTER E WITH ACUTE
+0xEA 0x00EA # LATIN SMALL LETTER E WITH CIRCUMFLEX
+0xEB 0x00EB # LATIN SMALL LETTER E WITH DIAERESIS
+0xEC 0x00EC # LATIN SMALL LETTER I WITH GRAVE
+0xED 0x00ED # LATIN SMALL LETTER I WITH ACUTE
+0xEE 0x00EE # LATIN SMALL LETTER I WITH CIRCUMFLEX
+0xEF 0x00EF # LATIN SMALL LETTER I WITH DIAERESIS
+0xF0 0x00F0 # LATIN SMALL LETTER ETH (Icelandic)
+0xF1 0x00F1 # LATIN SMALL LETTER N WITH TILDE
+0xF2 0x00F2 # LATIN SMALL LETTER O WITH GRAVE
+0xF3 0x00F3 # LATIN SMALL LETTER O WITH ACUTE
+0xF4 0x00F4 # LATIN SMALL LETTER O WITH CIRCUMFLEX
+0xF5 0x00F5 # LATIN SMALL LETTER O WITH TILDE
+0xF6 0x00F6 # LATIN SMALL LETTER O WITH DIAERESIS
+0xF7 0x00F7 # DIVISION SIGN
+0xF8 0x00F8 # LATIN SMALL LETTER O WITH STROKE
+0xF9 0x00F9 # LATIN SMALL LETTER U WITH GRAVE
+0xFA 0x00FA # LATIN SMALL LETTER U WITH ACUTE
+0xFB 0x00FB # LATIN SMALL LETTER U WITH CIRCUMFLEX
+0xFC 0x00FC # LATIN SMALL LETTER U WITH DIAERESIS
+0xFD 0x00FD # LATIN SMALL LETTER Y WITH ACUTE
+0xFE 0x00FE # LATIN SMALL LETTER THORN (Icelandic)
+0xFF 0x00FF # LATIN SMALL LETTER Y WITH DIAERESIS
diff --git a/scripts/CP1250.TXT b/scripts/CP1250.TXT
new file mode 100644
index 00000000..6bfab938
--- /dev/null
+++ b/scripts/CP1250.TXT
@@ -0,0 +1,274 @@
+#
+# Name: cp1250 to Unicode table
+# Unicode version: 2.0
+# Table version: 2.01
+# Table format: Format A
+# Date: 04/15/98
+#
+# Contact: Shawn.Steele@microsoft.com
+#
+# General notes: none
+#
+# Format: Three tab-separated columns
+# Column #1 is the cp1250 code (in hex)
+# Column #2 is the Unicode (in hex as 0xXXXX)
+# Column #3 is the Unicode name (follows a comment sign, '#')
+#
+# The entries are in cp1250 order
+#
+0x00 0x0000 #NULL
+0x01 0x0001 #START OF HEADING
+0x02 0x0002 #START OF TEXT
+0x03 0x0003 #END OF TEXT
+0x04 0x0004 #END OF TRANSMISSION
+0x05 0x0005 #ENQUIRY
+0x06 0x0006 #ACKNOWLEDGE
+0x07 0x0007 #BELL
+0x08 0x0008 #BACKSPACE
+0x09 0x0009 #HORIZONTAL TABULATION
+0x0A 0x000A #LINE FEED
+0x0B 0x000B #VERTICAL TABULATION
+0x0C 0x000C #FORM FEED
+0x0D 0x000D #CARRIAGE RETURN
+0x0E 0x000E #SHIFT OUT
+0x0F 0x000F #SHIFT IN
+0x10 0x0010 #DATA LINK ESCAPE
+0x11 0x0011 #DEVICE CONTROL ONE
+0x12 0x0012 #DEVICE CONTROL TWO
+0x13 0x0013 #DEVICE CONTROL THREE
+0x14 0x0014 #DEVICE CONTROL FOUR
+0x15 0x0015 #NEGATIVE ACKNOWLEDGE
+0x16 0x0016 #SYNCHRONOUS IDLE
+0x17 0x0017 #END OF TRANSMISSION BLOCK
+0x18 0x0018 #CANCEL
+0x19 0x0019 #END OF MEDIUM
+0x1A 0x001A #SUBSTITUTE
+0x1B 0x001B #ESCAPE
+0x1C 0x001C #FILE SEPARATOR
+0x1D 0x001D #GROUP SEPARATOR
+0x1E 0x001E #RECORD SEPARATOR
+0x1F 0x001F #UNIT SEPARATOR
+0x20 0x0020 #SPACE
+0x21 0x0021 #EXCLAMATION MARK
+0x22 0x0022 #QUOTATION MARK
+0x23 0x0023 #NUMBER SIGN
+0x24 0x0024 #DOLLAR SIGN
+0x25 0x0025 #PERCENT SIGN
+0x26 0x0026 #AMPERSAND
+0x27 0x0027 #APOSTROPHE
+0x28 0x0028 #LEFT PARENTHESIS
+0x29 0x0029 #RIGHT PARENTHESIS
+0x2A 0x002A #ASTERISK
+0x2B 0x002B #PLUS SIGN
+0x2C 0x002C #COMMA
+0x2D 0x002D #HYPHEN-MINUS
+0x2E 0x002E #FULL STOP
+0x2F 0x002F #SOLIDUS
+0x30 0x0030 #DIGIT ZERO
+0x31 0x0031 #DIGIT ONE
+0x32 0x0032 #DIGIT TWO
+0x33 0x0033 #DIGIT THREE
+0x34 0x0034 #DIGIT FOUR
+0x35 0x0035 #DIGIT FIVE
+0x36 0x0036 #DIGIT SIX
+0x37 0x0037 #DIGIT SEVEN
+0x38 0x0038 #DIGIT EIGHT
+0x39 0x0039 #DIGIT NINE
+0x3A 0x003A #COLON
+0x3B 0x003B #SEMICOLON
+0x3C 0x003C #LESS-THAN SIGN
+0x3D 0x003D #EQUALS SIGN
+0x3E 0x003E #GREATER-THAN SIGN
+0x3F 0x003F #QUESTION MARK
+0x40 0x0040 #COMMERCIAL AT
+0x41 0x0041 #LATIN CAPITAL LETTER A
+0x42 0x0042 #LATIN CAPITAL LETTER B
+0x43 0x0043 #LATIN CAPITAL LETTER C
+0x44 0x0044 #LATIN CAPITAL LETTER D
+0x45 0x0045 #LATIN CAPITAL LETTER E
+0x46 0x0046 #LATIN CAPITAL LETTER F
+0x47 0x0047 #LATIN CAPITAL LETTER G
+0x48 0x0048 #LATIN CAPITAL LETTER H
+0x49 0x0049 #LATIN CAPITAL LETTER I
+0x4A 0x004A #LATIN CAPITAL LETTER J
+0x4B 0x004B #LATIN CAPITAL LETTER K
+0x4C 0x004C #LATIN CAPITAL LETTER L
+0x4D 0x004D #LATIN CAPITAL LETTER M
+0x4E 0x004E #LATIN CAPITAL LETTER N
+0x4F 0x004F #LATIN CAPITAL LETTER O
+0x50 0x0050 #LATIN CAPITAL LETTER P
+0x51 0x0051 #LATIN CAPITAL LETTER Q
+0x52 0x0052 #LATIN CAPITAL LETTER R
+0x53 0x0053 #LATIN CAPITAL LETTER S
+0x54 0x0054 #LATIN CAPITAL LETTER T
+0x55 0x0055 #LATIN CAPITAL LETTER U
+0x56 0x0056 #LATIN CAPITAL LETTER V
+0x57 0x0057 #LATIN CAPITAL LETTER W
+0x58 0x0058 #LATIN CAPITAL LETTER X
+0x59 0x0059 #LATIN CAPITAL LETTER Y
+0x5A 0x005A #LATIN CAPITAL LETTER Z
+0x5B 0x005B #LEFT SQUARE BRACKET
+0x5C 0x005C #REVERSE SOLIDUS
+0x5D 0x005D #RIGHT SQUARE BRACKET
+0x5E 0x005E #CIRCUMFLEX ACCENT
+0x5F 0x005F #LOW LINE
+0x60 0x0060 #GRAVE ACCENT
+0x61 0x0061 #LATIN SMALL LETTER A
+0x62 0x0062 #LATIN SMALL LETTER B
+0x63 0x0063 #LATIN SMALL LETTER C
+0x64 0x0064 #LATIN SMALL LETTER D
+0x65 0x0065 #LATIN SMALL LETTER E
+0x66 0x0066 #LATIN SMALL LETTER F
+0x67 0x0067 #LATIN SMALL LETTER G
+0x68 0x0068 #LATIN SMALL LETTER H
+0x69 0x0069 #LATIN SMALL LETTER I
+0x6A 0x006A #LATIN SMALL LETTER J
+0x6B 0x006B #LATIN SMALL LETTER K
+0x6C 0x006C #LATIN SMALL LETTER L
+0x6D 0x006D #LATIN SMALL LETTER M
+0x6E 0x006E #LATIN SMALL LETTER N
+0x6F 0x006F #LATIN SMALL LETTER O
+0x70 0x0070 #LATIN SMALL LETTER P
+0x71 0x0071 #LATIN SMALL LETTER Q
+0x72 0x0072 #LATIN SMALL LETTER R
+0x73 0x0073 #LATIN SMALL LETTER S
+0x74 0x0074 #LATIN SMALL LETTER T
+0x75 0x0075 #LATIN SMALL LETTER U
+0x76 0x0076 #LATIN SMALL LETTER V
+0x77 0x0077 #LATIN SMALL LETTER W
+0x78 0x0078 #LATIN SMALL LETTER X
+0x79 0x0079 #LATIN SMALL LETTER Y
+0x7A 0x007A #LATIN SMALL LETTER Z
+0x7B 0x007B #LEFT CURLY BRACKET
+0x7C 0x007C #VERTICAL LINE
+0x7D 0x007D #RIGHT CURLY BRACKET
+0x7E 0x007E #TILDE
+0x7F 0x007F #DELETE
+0x80 0x20AC #EURO SIGN
+0x81 #UNDEFINED
+0x82 0x201A #SINGLE LOW-9 QUOTATION MARK
+0x83 #UNDEFINED
+0x84 0x201E #DOUBLE LOW-9 QUOTATION MARK
+0x85 0x2026 #HORIZONTAL ELLIPSIS
+0x86 0x2020 #DAGGER
+0x87 0x2021 #DOUBLE DAGGER
+0x88 #UNDEFINED
+0x89 0x2030 #PER MILLE SIGN
+0x8A 0x0160 #LATIN CAPITAL LETTER S WITH CARON
+0x8B 0x2039 #SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+0x8C 0x015A #LATIN CAPITAL LETTER S WITH ACUTE
+0x8D 0x0164 #LATIN CAPITAL LETTER T WITH CARON
+0x8E 0x017D #LATIN CAPITAL LETTER Z WITH CARON
+0x8F 0x0179 #LATIN CAPITAL LETTER Z WITH ACUTE
+0x90 #UNDEFINED
+0x91 0x2018 #LEFT SINGLE QUOTATION MARK
+0x92 0x2019 #RIGHT SINGLE QUOTATION MARK
+0x93 0x201C #LEFT DOUBLE QUOTATION MARK
+0x94 0x201D #RIGHT DOUBLE QUOTATION MARK
+0x95 0x2022 #BULLET
+0x96 0x2013 #EN DASH
+0x97 0x2014 #EM DASH
+0x98 #UNDEFINED
+0x99 0x2122 #TRADE MARK SIGN
+0x9A 0x0161 #LATIN SMALL LETTER S WITH CARON
+0x9B 0x203A #SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+0x9C 0x015B #LATIN SMALL LETTER S WITH ACUTE
+0x9D 0x0165 #LATIN SMALL LETTER T WITH CARON
+0x9E 0x017E #LATIN SMALL LETTER Z WITH CARON
+0x9F 0x017A #LATIN SMALL LETTER Z WITH ACUTE
+0xA0 0x00A0 #NO-BREAK SPACE
+0xA1 0x02C7 #CARON
+0xA2 0x02D8 #BREVE
+0xA3 0x0141 #LATIN CAPITAL LETTER L WITH STROKE
+0xA4 0x00A4 #CURRENCY SIGN
+0xA5 0x0104 #LATIN CAPITAL LETTER A WITH OGONEK
+0xA6 0x00A6 #BROKEN BAR
+0xA7 0x00A7 #SECTION SIGN
+0xA8 0x00A8 #DIAERESIS
+0xA9 0x00A9 #COPYRIGHT SIGN
+0xAA 0x015E #LATIN CAPITAL LETTER S WITH CEDILLA
+0xAB 0x00AB #LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+0xAC 0x00AC #NOT SIGN
+0xAD 0x00AD #SOFT HYPHEN
+0xAE 0x00AE #REGISTERED SIGN
+0xAF 0x017B #LATIN CAPITAL LETTER Z WITH DOT ABOVE
+0xB0 0x00B0 #DEGREE SIGN
+0xB1 0x00B1 #PLUS-MINUS SIGN
+0xB2 0x02DB #OGONEK
+0xB3 0x0142 #LATIN SMALL LETTER L WITH STROKE
+0xB4 0x00B4 #ACUTE ACCENT
+0xB5 0x00B5 #MICRO SIGN
+0xB6 0x00B6 #PILCROW SIGN
+0xB7 0x00B7 #MIDDLE DOT
+0xB8 0x00B8 #CEDILLA
+0xB9 0x0105 #LATIN SMALL LETTER A WITH OGONEK
+0xBA 0x015F #LATIN SMALL LETTER S WITH CEDILLA
+0xBB 0x00BB #RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+0xBC 0x013D #LATIN CAPITAL LETTER L WITH CARON
+0xBD 0x02DD #DOUBLE ACUTE ACCENT
+0xBE 0x013E #LATIN SMALL LETTER L WITH CARON
+0xBF 0x017C #LATIN SMALL LETTER Z WITH DOT ABOVE
+0xC0 0x0154 #LATIN CAPITAL LETTER R WITH ACUTE
+0xC1 0x00C1 #LATIN CAPITAL LETTER A WITH ACUTE
+0xC2 0x00C2 #LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+0xC3 0x0102 #LATIN CAPITAL LETTER A WITH BREVE
+0xC4 0x00C4 #LATIN CAPITAL LETTER A WITH DIAERESIS
+0xC5 0x0139 #LATIN CAPITAL LETTER L WITH ACUTE
+0xC6 0x0106 #LATIN CAPITAL LETTER C WITH ACUTE
+0xC7 0x00C7 #LATIN CAPITAL LETTER C WITH CEDILLA
+0xC8 0x010C #LATIN CAPITAL LETTER C WITH CARON
+0xC9 0x00C9 #LATIN CAPITAL LETTER E WITH ACUTE
+0xCA 0x0118 #LATIN CAPITAL LETTER E WITH OGONEK
+0xCB 0x00CB #LATIN CAPITAL LETTER E WITH DIAERESIS
+0xCC 0x011A #LATIN CAPITAL LETTER E WITH CARON
+0xCD 0x00CD #LATIN CAPITAL LETTER I WITH ACUTE
+0xCE 0x00CE #LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+0xCF 0x010E #LATIN CAPITAL LETTER D WITH CARON
+0xD0 0x0110 #LATIN CAPITAL LETTER D WITH STROKE
+0xD1 0x0143 #LATIN CAPITAL LETTER N WITH ACUTE
+0xD2 0x0147 #LATIN CAPITAL LETTER N WITH CARON
+0xD3 0x00D3 #LATIN CAPITAL LETTER O WITH ACUTE
+0xD4 0x00D4 #LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+0xD5 0x0150 #LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
+0xD6 0x00D6 #LATIN CAPITAL LETTER O WITH DIAERESIS
+0xD7 0x00D7 #MULTIPLICATION SIGN
+0xD8 0x0158 #LATIN CAPITAL LETTER R WITH CARON
+0xD9 0x016E #LATIN CAPITAL LETTER U WITH RING ABOVE
+0xDA 0x00DA #LATIN CAPITAL LETTER U WITH ACUTE
+0xDB 0x0170 #LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
+0xDC 0x00DC #LATIN CAPITAL LETTER U WITH DIAERESIS
+0xDD 0x00DD #LATIN CAPITAL LETTER Y WITH ACUTE
+0xDE 0x0162 #LATIN CAPITAL LETTER T WITH CEDILLA
+0xDF 0x00DF #LATIN SMALL LETTER SHARP S
+0xE0 0x0155 #LATIN SMALL LETTER R WITH ACUTE
+0xE1 0x00E1 #LATIN SMALL LETTER A WITH ACUTE
+0xE2 0x00E2 #LATIN SMALL LETTER A WITH CIRCUMFLEX
+0xE3 0x0103 #LATIN SMALL LETTER A WITH BREVE
+0xE4 0x00E4 #LATIN SMALL LETTER A WITH DIAERESIS
+0xE5 0x013A #LATIN SMALL LETTER L WITH ACUTE
+0xE6 0x0107 #LATIN SMALL LETTER C WITH ACUTE
+0xE7 0x00E7 #LATIN SMALL LETTER C WITH CEDILLA
+0xE8 0x010D #LATIN SMALL LETTER C WITH CARON
+0xE9 0x00E9 #LATIN SMALL LETTER E WITH ACUTE
+0xEA 0x0119 #LATIN SMALL LETTER E WITH OGONEK
+0xEB 0x00EB #LATIN SMALL LETTER E WITH DIAERESIS
+0xEC 0x011B #LATIN SMALL LETTER E WITH CARON
+0xED 0x00ED #LATIN SMALL LETTER I WITH ACUTE
+0xEE 0x00EE #LATIN SMALL LETTER I WITH CIRCUMFLEX
+0xEF 0x010F #LATIN SMALL LETTER D WITH CARON
+0xF0 0x0111 #LATIN SMALL LETTER D WITH STROKE
+0xF1 0x0144 #LATIN SMALL LETTER N WITH ACUTE
+0xF2 0x0148 #LATIN SMALL LETTER N WITH CARON
+0xF3 0x00F3 #LATIN SMALL LETTER O WITH ACUTE
+0xF4 0x00F4 #LATIN SMALL LETTER O WITH CIRCUMFLEX
+0xF5 0x0151 #LATIN SMALL LETTER O WITH DOUBLE ACUTE
+0xF6 0x00F6 #LATIN SMALL LETTER O WITH DIAERESIS
+0xF7 0x00F7 #DIVISION SIGN
+0xF8 0x0159 #LATIN SMALL LETTER R WITH CARON
+0xF9 0x016F #LATIN SMALL LETTER U WITH RING ABOVE
+0xFA 0x00FA #LATIN SMALL LETTER U WITH ACUTE
+0xFB 0x0171 #LATIN SMALL LETTER U WITH DOUBLE ACUTE
+0xFC 0x00FC #LATIN SMALL LETTER U WITH DIAERESIS
+0xFD 0x00FD #LATIN SMALL LETTER Y WITH ACUTE
+0xFE 0x0163 #LATIN SMALL LETTER T WITH CEDILLA
+0xFF 0x02D9 #DOT ABOVE
diff --git a/scripts/CP1251.TXT b/scripts/CP1251.TXT
new file mode 100644
index 00000000..4d9b3558
--- /dev/null
+++ b/scripts/CP1251.TXT
@@ -0,0 +1,274 @@
+#
+# Name: cp1251 to Unicode table
+# Unicode version: 2.0
+# Table version: 2.01
+# Table format: Format A
+# Date: 04/15/98
+#
+# Contact: Shawn.Steele@microsoft.com
+#
+# General notes: none
+#
+# Format: Three tab-separated columns
+# Column #1 is the cp1251 code (in hex)
+# Column #2 is the Unicode (in hex as 0xXXXX)
+# Column #3 is the Unicode name (follows a comment sign, '#')
+#
+# The entries are in cp1251 order
+#
+0x00 0x0000 #NULL
+0x01 0x0001 #START OF HEADING
+0x02 0x0002 #START OF TEXT
+0x03 0x0003 #END OF TEXT
+0x04 0x0004 #END OF TRANSMISSION
+0x05 0x0005 #ENQUIRY
+0x06 0x0006 #ACKNOWLEDGE
+0x07 0x0007 #BELL
+0x08 0x0008 #BACKSPACE
+0x09 0x0009 #HORIZONTAL TABULATION
+0x0A 0x000A #LINE FEED
+0x0B 0x000B #VERTICAL TABULATION
+0x0C 0x000C #FORM FEED
+0x0D 0x000D #CARRIAGE RETURN
+0x0E 0x000E #SHIFT OUT
+0x0F 0x000F #SHIFT IN
+0x10 0x0010 #DATA LINK ESCAPE
+0x11 0x0011 #DEVICE CONTROL ONE
+0x12 0x0012 #DEVICE CONTROL TWO
+0x13 0x0013 #DEVICE CONTROL THREE
+0x14 0x0014 #DEVICE CONTROL FOUR
+0x15 0x0015 #NEGATIVE ACKNOWLEDGE
+0x16 0x0016 #SYNCHRONOUS IDLE
+0x17 0x0017 #END OF TRANSMISSION BLOCK
+0x18 0x0018 #CANCEL
+0x19 0x0019 #END OF MEDIUM
+0x1A 0x001A #SUBSTITUTE
+0x1B 0x001B #ESCAPE
+0x1C 0x001C #FILE SEPARATOR
+0x1D 0x001D #GROUP SEPARATOR
+0x1E 0x001E #RECORD SEPARATOR
+0x1F 0x001F #UNIT SEPARATOR
+0x20 0x0020 #SPACE
+0x21 0x0021 #EXCLAMATION MARK
+0x22 0x0022 #QUOTATION MARK
+0x23 0x0023 #NUMBER SIGN
+0x24 0x0024 #DOLLAR SIGN
+0x25 0x0025 #PERCENT SIGN
+0x26 0x0026 #AMPERSAND
+0x27 0x0027 #APOSTROPHE
+0x28 0x0028 #LEFT PARENTHESIS
+0x29 0x0029 #RIGHT PARENTHESIS
+0x2A 0x002A #ASTERISK
+0x2B 0x002B #PLUS SIGN
+0x2C 0x002C #COMMA
+0x2D 0x002D #HYPHEN-MINUS
+0x2E 0x002E #FULL STOP
+0x2F 0x002F #SOLIDUS
+0x30 0x0030 #DIGIT ZERO
+0x31 0x0031 #DIGIT ONE
+0x32 0x0032 #DIGIT TWO
+0x33 0x0033 #DIGIT THREE
+0x34 0x0034 #DIGIT FOUR
+0x35 0x0035 #DIGIT FIVE
+0x36 0x0036 #DIGIT SIX
+0x37 0x0037 #DIGIT SEVEN
+0x38 0x0038 #DIGIT EIGHT
+0x39 0x0039 #DIGIT NINE
+0x3A 0x003A #COLON
+0x3B 0x003B #SEMICOLON
+0x3C 0x003C #LESS-THAN SIGN
+0x3D 0x003D #EQUALS SIGN
+0x3E 0x003E #GREATER-THAN SIGN
+0x3F 0x003F #QUESTION MARK
+0x40 0x0040 #COMMERCIAL AT
+0x41 0x0041 #LATIN CAPITAL LETTER A
+0x42 0x0042 #LATIN CAPITAL LETTER B
+0x43 0x0043 #LATIN CAPITAL LETTER C
+0x44 0x0044 #LATIN CAPITAL LETTER D
+0x45 0x0045 #LATIN CAPITAL LETTER E
+0x46 0x0046 #LATIN CAPITAL LETTER F
+0x47 0x0047 #LATIN CAPITAL LETTER G
+0x48 0x0048 #LATIN CAPITAL LETTER H
+0x49 0x0049 #LATIN CAPITAL LETTER I
+0x4A 0x004A #LATIN CAPITAL LETTER J
+0x4B 0x004B #LATIN CAPITAL LETTER K
+0x4C 0x004C #LATIN CAPITAL LETTER L
+0x4D 0x004D #LATIN CAPITAL LETTER M
+0x4E 0x004E #LATIN CAPITAL LETTER N
+0x4F 0x004F #LATIN CAPITAL LETTER O
+0x50 0x0050 #LATIN CAPITAL LETTER P
+0x51 0x0051 #LATIN CAPITAL LETTER Q
+0x52 0x0052 #LATIN CAPITAL LETTER R
+0x53 0x0053 #LATIN CAPITAL LETTER S
+0x54 0x0054 #LATIN CAPITAL LETTER T
+0x55 0x0055 #LATIN CAPITAL LETTER U
+0x56 0x0056 #LATIN CAPITAL LETTER V
+0x57 0x0057 #LATIN CAPITAL LETTER W
+0x58 0x0058 #LATIN CAPITAL LETTER X
+0x59 0x0059 #LATIN CAPITAL LETTER Y
+0x5A 0x005A #LATIN CAPITAL LETTER Z
+0x5B 0x005B #LEFT SQUARE BRACKET
+0x5C 0x005C #REVERSE SOLIDUS
+0x5D 0x005D #RIGHT SQUARE BRACKET
+0x5E 0x005E #CIRCUMFLEX ACCENT
+0x5F 0x005F #LOW LINE
+0x60 0x0060 #GRAVE ACCENT
+0x61 0x0061 #LATIN SMALL LETTER A
+0x62 0x0062 #LATIN SMALL LETTER B
+0x63 0x0063 #LATIN SMALL LETTER C
+0x64 0x0064 #LATIN SMALL LETTER D
+0x65 0x0065 #LATIN SMALL LETTER E
+0x66 0x0066 #LATIN SMALL LETTER F
+0x67 0x0067 #LATIN SMALL LETTER G
+0x68 0x0068 #LATIN SMALL LETTER H
+0x69 0x0069 #LATIN SMALL LETTER I
+0x6A 0x006A #LATIN SMALL LETTER J
+0x6B 0x006B #LATIN SMALL LETTER K
+0x6C 0x006C #LATIN SMALL LETTER L
+0x6D 0x006D #LATIN SMALL LETTER M
+0x6E 0x006E #LATIN SMALL LETTER N
+0x6F 0x006F #LATIN SMALL LETTER O
+0x70 0x0070 #LATIN SMALL LETTER P
+0x71 0x0071 #LATIN SMALL LETTER Q
+0x72 0x0072 #LATIN SMALL LETTER R
+0x73 0x0073 #LATIN SMALL LETTER S
+0x74 0x0074 #LATIN SMALL LETTER T
+0x75 0x0075 #LATIN SMALL LETTER U
+0x76 0x0076 #LATIN SMALL LETTER V
+0x77 0x0077 #LATIN SMALL LETTER W
+0x78 0x0078 #LATIN SMALL LETTER X
+0x79 0x0079 #LATIN SMALL LETTER Y
+0x7A 0x007A #LATIN SMALL LETTER Z
+0x7B 0x007B #LEFT CURLY BRACKET
+0x7C 0x007C #VERTICAL LINE
+0x7D 0x007D #RIGHT CURLY BRACKET
+0x7E 0x007E #TILDE
+0x7F 0x007F #DELETE
+0x80 0x0402 #CYRILLIC CAPITAL LETTER DJE
+0x81 0x0403 #CYRILLIC CAPITAL LETTER GJE
+0x82 0x201A #SINGLE LOW-9 QUOTATION MARK
+0x83 0x0453 #CYRILLIC SMALL LETTER GJE
+0x84 0x201E #DOUBLE LOW-9 QUOTATION MARK
+0x85 0x2026 #HORIZONTAL ELLIPSIS
+0x86 0x2020 #DAGGER
+0x87 0x2021 #DOUBLE DAGGER
+0x88 0x20AC #EURO SIGN
+0x89 0x2030 #PER MILLE SIGN
+0x8A 0x0409 #CYRILLIC CAPITAL LETTER LJE
+0x8B 0x2039 #SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+0x8C 0x040A #CYRILLIC CAPITAL LETTER NJE
+0x8D 0x040C #CYRILLIC CAPITAL LETTER KJE
+0x8E 0x040B #CYRILLIC CAPITAL LETTER TSHE
+0x8F 0x040F #CYRILLIC CAPITAL LETTER DZHE
+0x90 0x0452 #CYRILLIC SMALL LETTER DJE
+0x91 0x2018 #LEFT SINGLE QUOTATION MARK
+0x92 0x2019 #RIGHT SINGLE QUOTATION MARK
+0x93 0x201C #LEFT DOUBLE QUOTATION MARK
+0x94 0x201D #RIGHT DOUBLE QUOTATION MARK
+0x95 0x2022 #BULLET
+0x96 0x2013 #EN DASH
+0x97 0x2014 #EM DASH
+0x98 #UNDEFINED
+0x99 0x2122 #TRADE MARK SIGN
+0x9A 0x0459 #CYRILLIC SMALL LETTER LJE
+0x9B 0x203A #SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+0x9C 0x045A #CYRILLIC SMALL LETTER NJE
+0x9D 0x045C #CYRILLIC SMALL LETTER KJE
+0x9E 0x045B #CYRILLIC SMALL LETTER TSHE
+0x9F 0x045F #CYRILLIC SMALL LETTER DZHE
+0xA0 0x00A0 #NO-BREAK SPACE
+0xA1 0x040E #CYRILLIC CAPITAL LETTER SHORT U
+0xA2 0x045E #CYRILLIC SMALL LETTER SHORT U
+0xA3 0x0408 #CYRILLIC CAPITAL LETTER JE
+0xA4 0x00A4 #CURRENCY SIGN
+0xA5 0x0490 #CYRILLIC CAPITAL LETTER GHE WITH UPTURN
+0xA6 0x00A6 #BROKEN BAR
+0xA7 0x00A7 #SECTION SIGN
+0xA8 0x0401 #CYRILLIC CAPITAL LETTER IO
+0xA9 0x00A9 #COPYRIGHT SIGN
+0xAA 0x0404 #CYRILLIC CAPITAL LETTER UKRAINIAN IE
+0xAB 0x00AB #LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+0xAC 0x00AC #NOT SIGN
+0xAD 0x00AD #SOFT HYPHEN
+0xAE 0x00AE #REGISTERED SIGN
+0xAF 0x0407 #CYRILLIC CAPITAL LETTER YI
+0xB0 0x00B0 #DEGREE SIGN
+0xB1 0x00B1 #PLUS-MINUS SIGN
+0xB2 0x0406 #CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
+0xB3 0x0456 #CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
+0xB4 0x0491 #CYRILLIC SMALL LETTER GHE WITH UPTURN
+0xB5 0x00B5 #MICRO SIGN
+0xB6 0x00B6 #PILCROW SIGN
+0xB7 0x00B7 #MIDDLE DOT
+0xB8 0x0451 #CYRILLIC SMALL LETTER IO
+0xB9 0x2116 #NUMERO SIGN
+0xBA 0x0454 #CYRILLIC SMALL LETTER UKRAINIAN IE
+0xBB 0x00BB #RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+0xBC 0x0458 #CYRILLIC SMALL LETTER JE
+0xBD 0x0405 #CYRILLIC CAPITAL LETTER DZE
+0xBE 0x0455 #CYRILLIC SMALL LETTER DZE
+0xBF 0x0457 #CYRILLIC SMALL LETTER YI
+0xC0 0x0410 #CYRILLIC CAPITAL LETTER A
+0xC1 0x0411 #CYRILLIC CAPITAL LETTER BE
+0xC2 0x0412 #CYRILLIC CAPITAL LETTER VE
+0xC3 0x0413 #CYRILLIC CAPITAL LETTER GHE
+0xC4 0x0414 #CYRILLIC CAPITAL LETTER DE
+0xC5 0x0415 #CYRILLIC CAPITAL LETTER IE
+0xC6 0x0416 #CYRILLIC CAPITAL LETTER ZHE
+0xC7 0x0417 #CYRILLIC CAPITAL LETTER ZE
+0xC8 0x0418 #CYRILLIC CAPITAL LETTER I
+0xC9 0x0419 #CYRILLIC CAPITAL LETTER SHORT I
+0xCA 0x041A #CYRILLIC CAPITAL LETTER KA
+0xCB 0x041B #CYRILLIC CAPITAL LETTER EL
+0xCC 0x041C #CYRILLIC CAPITAL LETTER EM
+0xCD 0x041D #CYRILLIC CAPITAL LETTER EN
+0xCE 0x041E #CYRILLIC CAPITAL LETTER O
+0xCF 0x041F #CYRILLIC CAPITAL LETTER PE
+0xD0 0x0420 #CYRILLIC CAPITAL LETTER ER
+0xD1 0x0421 #CYRILLIC CAPITAL LETTER ES
+0xD2 0x0422 #CYRILLIC CAPITAL LETTER TE
+0xD3 0x0423 #CYRILLIC CAPITAL LETTER U
+0xD4 0x0424 #CYRILLIC CAPITAL LETTER EF
+0xD5 0x0425 #CYRILLIC CAPITAL LETTER HA
+0xD6 0x0426 #CYRILLIC CAPITAL LETTER TSE
+0xD7 0x0427 #CYRILLIC CAPITAL LETTER CHE
+0xD8 0x0428 #CYRILLIC CAPITAL LETTER SHA
+0xD9 0x0429 #CYRILLIC CAPITAL LETTER SHCHA
+0xDA 0x042A #CYRILLIC CAPITAL LETTER HARD SIGN
+0xDB 0x042B #CYRILLIC CAPITAL LETTER YERU
+0xDC 0x042C #CYRILLIC CAPITAL LETTER SOFT SIGN
+0xDD 0x042D #CYRILLIC CAPITAL LETTER E
+0xDE 0x042E #CYRILLIC CAPITAL LETTER YU
+0xDF 0x042F #CYRILLIC CAPITAL LETTER YA
+0xE0 0x0430 #CYRILLIC SMALL LETTER A
+0xE1 0x0431 #CYRILLIC SMALL LETTER BE
+0xE2 0x0432 #CYRILLIC SMALL LETTER VE
+0xE3 0x0433 #CYRILLIC SMALL LETTER GHE
+0xE4 0x0434 #CYRILLIC SMALL LETTER DE
+0xE5 0x0435 #CYRILLIC SMALL LETTER IE
+0xE6 0x0436 #CYRILLIC SMALL LETTER ZHE
+0xE7 0x0437 #CYRILLIC SMALL LETTER ZE
+0xE8 0x0438 #CYRILLIC SMALL LETTER I
+0xE9 0x0439 #CYRILLIC SMALL LETTER SHORT I
+0xEA 0x043A #CYRILLIC SMALL LETTER KA
+0xEB 0x043B #CYRILLIC SMALL LETTER EL
+0xEC 0x043C #CYRILLIC SMALL LETTER EM
+0xED 0x043D #CYRILLIC SMALL LETTER EN
+0xEE 0x043E #CYRILLIC SMALL LETTER O
+0xEF 0x043F #CYRILLIC SMALL LETTER PE
+0xF0 0x0440 #CYRILLIC SMALL LETTER ER
+0xF1 0x0441 #CYRILLIC SMALL LETTER ES
+0xF2 0x0442 #CYRILLIC SMALL LETTER TE
+0xF3 0x0443 #CYRILLIC SMALL LETTER U
+0xF4 0x0444 #CYRILLIC SMALL LETTER EF
+0xF5 0x0445 #CYRILLIC SMALL LETTER HA
+0xF6 0x0446 #CYRILLIC SMALL LETTER TSE
+0xF7 0x0447 #CYRILLIC SMALL LETTER CHE
+0xF8 0x0448 #CYRILLIC SMALL LETTER SHA
+0xF9 0x0449 #CYRILLIC SMALL LETTER SHCHA
+0xFA 0x044A #CYRILLIC SMALL LETTER HARD SIGN
+0xFB 0x044B #CYRILLIC SMALL LETTER YERU
+0xFC 0x044C #CYRILLIC SMALL LETTER SOFT SIGN
+0xFD 0x044D #CYRILLIC SMALL LETTER E
+0xFE 0x044E #CYRILLIC SMALL LETTER YU
+0xFF 0x044F #CYRILLIC SMALL LETTER YA
diff --git a/scripts/makeencoding.py b/scripts/makeencoding.py
index 3656a64b..cf24b57c 100644
--- a/scripts/makeencoding.py
+++ b/scripts/makeencoding.py
@@ -1,17 +1,37 @@
# Convert unicode mapping table to C arrays mapping glyph names and unicode values.
#
# ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MISC/KOI8-U.TXT
+# ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-1.TXT
# ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-7.TXT
+# ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1250.TXT
+# ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1251.TXT
# ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT
#
+BANNED = [
+ "controlSTX", "controlSOT", "controlETX", "controlEOT", "controlENQ",
+ "controlACK", "controlBEL", "controlBS", "controlHT", "controlLF",
+ "controlVT", "controlFF", "controlCR", "controlSO", "controlSI",
+ "controlDLE", "controlDC1", "controlDC2", "controlDC3", "controlDC4",
+ "controlNAK", "controlSYN", "controlETB", "controlCAN", "controlEM",
+ "controlSUB", "controlESC", "controlFS", "controlGS", "controlRS",
+ "controlUS",
+ "SF100000", "SF110000", "SF010000", "SF030000", "SF020000", "SF040000",
+ "SF080000", "SF090000", "SF060000", "SF070000", "SF050000", "SF430000",
+ "SF240000", "SF510000", "SF390000", "SF250000", "SF500000", "SF490000",
+ "SF380000", "SF280000", "SF260000", "SF360000", "SF370000", "SF420000",
+ "SF190000", "SF230000", "SF410000", "SF450000", "SF460000", "SF400000",
+ "SF540000", "SF440000",
+]
+
glyphs = {}
for line in open("scripts/glyphlist.txt").readlines():
if line[0] != '#':
n, u = line.rstrip().split(';')
if len(u) == 4:
u = int(u, base=16)
- glyphs[u] = n
+ if u not in glyphs and n not in BANNED:
+ glyphs[u] = n
def load_table(fn):
table = [0] * 256
@@ -25,15 +45,21 @@ def load_table(fn):
return table
def dump_table(name, table):
- print "const char *pdf_glyph_name_from_%s[%d] = {" % (name, len(table))
+ print "unsigned short fz_unicode_from_%s[256] = {" % name
+ for u in table:
+ print '\t%d,' % u
+ print "};"
+ print
+
+ print "const char *fz_glyph_name_from_%s[%d] = {" % (name, len(table))
for u in table:
if u in glyphs:
- print '"%s",' % glyphs[u]
+ print '\t"%s",' % glyphs[u]
else:
- print '_notdef,'
+ print '\t_notdef,'
print "};"
print
- print "static const struct { unsigned short u, c; } %s_from_unicode[] = {" % name
+
rev = []
i = 0
for u in table:
@@ -42,11 +68,16 @@ def dump_table(name, table):
rev += ['{0x%04x,%d},' % (u, i)]
i = i + 1
rev.sort()
+
+ print "static const struct { unsigned short u, c; } %s_from_unicode[] = {" % name
for s in rev:
- print s
+ print "\t" + s
print "};"
print
-dump_table("koi8u", load_table("scripts/KOI8-U.TXT"))
+dump_table("iso8859_1", load_table("scripts/8859-1.TXT"))
dump_table("iso8859_7", load_table("scripts/8859-7.TXT"))
-dump_table("winansi", load_table("scripts/CP1252.TXT"))
+dump_table("koi8u", load_table("scripts/KOI8-U.TXT"))
+dump_table("windows_1250", load_table("scripts/CP1250.TXT"))
+dump_table("windows_1251", load_table("scripts/CP1251.TXT"))
+dump_table("windows_1252", load_table("scripts/CP1252.TXT"))
diff --git a/source/fitz/encodings.c b/source/fitz/encodings.c
new file mode 100644
index 00000000..004ae90f
--- /dev/null
+++ b/source/fitz/encodings.c
@@ -0,0 +1,33 @@
+#include "mupdf/fitz.h"
+#include "mupdf/pdf.h"
+
+#include "encodings.h"
+
+#include <string.h>
+#include <stdlib.h>
+
+#define FROM_UNICODE(ENC) \
+{ \
+ int l = 0; \
+ int r = nelem(ENC##_from_unicode) - 1; \
+ if (u < 128) \
+ return u; \
+ while (l <= r) \
+ { \
+ int m = (l + r) >> 1; \
+ if (u < ENC##_from_unicode[m].u) \
+ r = m - 1; \
+ else if (u > ENC##_from_unicode[m].u) \
+ l = m + 1; \
+ else \
+ return ENC##_from_unicode[m].c; \
+ } \
+ return -1; \
+}
+
+int fz_iso8859_1_from_unicode(int u) FROM_UNICODE(iso8859_1)
+int fz_iso8859_7_from_unicode(int u) FROM_UNICODE(iso8859_7)
+int fz_koi8u_from_unicode(int u) FROM_UNICODE(koi8u)
+int fz_windows_1250_from_unicode(int u) FROM_UNICODE(windows_1250)
+int fz_windows_1251_from_unicode(int u) FROM_UNICODE(windows_1251)
+int fz_windows_1252_from_unicode(int u) FROM_UNICODE(windows_1252)
diff --git a/source/fitz/encodings.h b/source/fitz/encodings.h
new file mode 100644
index 00000000..89f81e8f
--- /dev/null
+++ b/source/fitz/encodings.h
@@ -0,0 +1,3784 @@
+#define _notdef NULL
+
+unsigned short fz_unicode_from_iso8859_1[256] = {
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 8,
+ 9,
+ 10,
+ 11,
+ 12,
+ 13,
+ 14,
+ 15,
+ 16,
+ 17,
+ 18,
+ 19,
+ 20,
+ 21,
+ 22,
+ 23,
+ 24,
+ 25,
+ 26,
+ 27,
+ 28,
+ 29,
+ 30,
+ 31,
+ 32,
+ 33,
+ 34,
+ 35,
+ 36,
+ 37,
+ 38,
+ 39,
+ 40,
+ 41,
+ 42,
+ 43,
+ 44,
+ 45,
+ 46,
+ 47,
+ 48,
+ 49,
+ 50,
+ 51,
+ 52,
+ 53,
+ 54,
+ 55,
+ 56,
+ 57,
+ 58,
+ 59,
+ 60,
+ 61,
+ 62,
+ 63,
+ 64,
+ 65,
+ 66,
+ 67,
+ 68,
+ 69,
+ 70,
+ 71,
+ 72,
+ 73,
+ 74,
+ 75,
+ 76,
+ 77,
+ 78,
+ 79,
+ 80,
+ 81,
+ 82,
+ 83,
+ 84,
+ 85,
+ 86,
+ 87,
+ 88,
+ 89,
+ 90,
+ 91,
+ 92,
+ 93,
+ 94,
+ 95,
+ 96,
+ 97,
+ 98,
+ 99,
+ 100,
+ 101,
+ 102,
+ 103,
+ 104,
+ 105,
+ 106,
+ 107,
+ 108,
+ 109,
+ 110,
+ 111,
+ 112,
+ 113,
+ 114,
+ 115,
+ 116,
+ 117,
+ 118,
+ 119,
+ 120,
+ 121,
+ 122,
+ 123,
+ 124,
+ 125,
+ 126,
+ 127,
+ 128,
+ 129,
+ 130,
+ 131,
+ 132,
+ 133,
+ 134,
+ 135,
+ 136,
+ 137,
+ 138,
+ 139,
+ 140,
+ 141,
+ 142,
+ 143,
+ 144,
+ 145,
+ 146,
+ 147,
+ 148,
+ 149,
+ 150,
+ 151,
+ 152,
+ 153,
+ 154,
+ 155,
+ 156,
+ 157,
+ 158,
+ 159,
+ 160,
+ 161,
+ 162,
+ 163,
+ 164,
+ 165,
+ 166,
+ 167,
+ 168,
+ 169,
+ 170,
+ 171,
+ 172,
+ 173,
+ 174,
+ 175,
+ 176,
+ 177,
+ 178,
+ 179,
+ 180,
+ 181,
+ 182,
+ 183,
+ 184,
+ 185,
+ 186,
+ 187,
+ 188,
+ 189,
+ 190,
+ 191,
+ 192,
+ 193,
+ 194,
+ 195,
+ 196,
+ 197,
+ 198,
+ 199,
+ 200,
+ 201,
+ 202,
+ 203,
+ 204,
+ 205,
+ 206,
+ 207,
+ 208,
+ 209,
+ 210,
+ 211,
+ 212,
+ 213,
+ 214,
+ 215,
+ 216,
+ 217,
+ 218,
+ 219,
+ 220,
+ 221,
+ 222,
+ 223,
+ 224,
+ 225,
+ 226,
+ 227,
+ 228,
+ 229,
+ 230,
+ 231,
+ 232,
+ 233,
+ 234,
+ 235,
+ 236,
+ 237,
+ 238,
+ 239,
+ 240,
+ 241,
+ 242,
+ 243,
+ 244,
+ 245,
+ 246,
+ 247,
+ 248,
+ 249,
+ 250,
+ 251,
+ 252,
+ 253,
+ 254,
+ 255,
+};
+
+const char *fz_glyph_name_from_iso8859_1[256] = {
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ "space",
+ "exclam",
+ "quotedbl",
+ "numbersign",
+ "dollar",
+ "percent",
+ "ampersand",
+ "quotesingle",
+ "parenleft",
+ "parenright",
+ "asterisk",
+ "plus",
+ "comma",
+ "hyphen",
+ "period",
+ "slash",
+ "zero",
+ "one",
+ "two",
+ "three",
+ "four",
+ "five",
+ "six",
+ "seven",
+ "eight",
+ "nine",
+ "colon",
+ "semicolon",
+ "less",
+ "equal",
+ "greater",
+ "question",
+ "at",
+ "A",
+ "B",
+ "C",
+ "D",
+ "E",
+ "F",
+ "G",
+ "H",
+ "I",
+ "J",
+ "K",
+ "L",
+ "M",
+ "N",
+ "O",
+ "P",
+ "Q",
+ "R",
+ "S",
+ "T",
+ "U",
+ "V",
+ "W",
+ "X",
+ "Y",
+ "Z",
+ "bracketleft",
+ "backslash",
+ "bracketright",
+ "asciicircum",
+ "underscore",
+ "grave",
+ "a",
+ "b",
+ "c",
+ "d",
+ "e",
+ "f",
+ "g",
+ "h",
+ "i",
+ "j",
+ "k",
+ "l",
+ "m",
+ "n",
+ "o",
+ "p",
+ "q",
+ "r",
+ "s",
+ "t",
+ "u",
+ "v",
+ "w",
+ "x",
+ "y",
+ "z",
+ "braceleft",
+ "bar",
+ "braceright",
+ "asciitilde",
+ "controlDEL",
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ "nbspace",
+ "exclamdown",
+ "cent",
+ "sterling",
+ "currency",
+ "yen",
+ "brokenbar",
+ "section",
+ "dieresis",
+ "copyright",
+ "ordfeminine",
+ "guillemotleft",
+ "logicalnot",
+ "sfthyphen",
+ "registered",
+ "macron",
+ "degree",
+ "plusminus",
+ "twosuperior",
+ "threesuperior",
+ "acute",
+ "mu",
+ "paragraph",
+ "middot",
+ "cedilla",
+ "onesuperior",
+ "ordmasculine",
+ "guillemotright",
+ "onequarter",
+ "onehalf",
+ "threequarters",
+ "questiondown",
+ "Agrave",
+ "Aacute",
+ "Acircumflex",
+ "Atilde",
+ "Adieresis",
+ "Aring",
+ "AE",
+ "Ccedilla",
+ "Egrave",
+ "Eacute",
+ "Ecircumflex",
+ "Edieresis",
+ "Igrave",
+ "Iacute",
+ "Icircumflex",
+ "Idieresis",
+ "Eth",
+ "Ntilde",
+ "Ograve",
+ "Oacute",
+ "Ocircumflex",
+ "Otilde",
+ "Odieresis",
+ "multiply",
+ "Oslash",
+ "Ugrave",
+ "Uacute",
+ "Ucircumflex",
+ "Udieresis",
+ "Yacute",
+ "Thorn",
+ "germandbls",
+ "agrave",
+ "aacute",
+ "acircumflex",
+ "atilde",
+ "adieresis",
+ "aring",
+ "ae",
+ "ccedilla",
+ "egrave",
+ "eacute",
+ "ecircumflex",
+ "edieresis",
+ "igrave",
+ "iacute",
+ "icircumflex",
+ "idieresis",
+ "eth",
+ "ntilde",
+ "ograve",
+ "oacute",
+ "ocircumflex",
+ "otilde",
+ "odieresis",
+ "divide",
+ "oslash",
+ "ugrave",
+ "uacute",
+ "ucircumflex",
+ "udieresis",
+ "yacute",
+ "thorn",
+ "ydieresis",
+};
+
+static const struct { unsigned short u, c; } iso8859_1_from_unicode[] = {
+ {0x00a0,160},
+ {0x00a1,161},
+ {0x00a2,162},
+ {0x00a3,163},
+ {0x00a4,164},
+ {0x00a5,165},
+ {0x00a6,166},
+ {0x00a7,167},
+ {0x00a8,168},
+ {0x00a9,169},
+ {0x00aa,170},
+ {0x00ab,171},
+ {0x00ac,172},
+ {0x00ad,173},
+ {0x00ae,174},
+ {0x00af,175},
+ {0x00b0,176},
+ {0x00b1,177},
+ {0x00b2,178},
+ {0x00b3,179},
+ {0x00b4,180},
+ {0x00b5,181},
+ {0x00b6,182},
+ {0x00b7,183},
+ {0x00b8,184},
+ {0x00b9,185},
+ {0x00ba,186},
+ {0x00bb,187},
+ {0x00bc,188},
+ {0x00bd,189},
+ {0x00be,190},
+ {0x00bf,191},
+ {0x00c0,192},
+ {0x00c1,193},
+ {0x00c2,194},
+ {0x00c3,195},
+ {0x00c4,196},
+ {0x00c5,197},
+ {0x00c6,198},
+ {0x00c7,199},
+ {0x00c8,200},
+ {0x00c9,201},
+ {0x00ca,202},
+ {0x00cb,203},
+ {0x00cc,204},
+ {0x00cd,205},
+ {0x00ce,206},
+ {0x00cf,207},
+ {0x00d0,208},
+ {0x00d1,209},
+ {0x00d2,210},
+ {0x00d3,211},
+ {0x00d4,212},
+ {0x00d5,213},
+ {0x00d6,214},
+ {0x00d7,215},
+ {0x00d8,216},
+ {0x00d9,217},
+ {0x00da,218},
+ {0x00db,219},
+ {0x00dc,220},
+ {0x00dd,221},
+ {0x00de,222},
+ {0x00df,223},
+ {0x00e0,224},
+ {0x00e1,225},
+ {0x00e2,226},
+ {0x00e3,227},
+ {0x00e4,228},
+ {0x00e5,229},
+ {0x00e6,230},
+ {0x00e7,231},
+ {0x00e8,232},
+ {0x00e9,233},
+ {0x00ea,234},
+ {0x00eb,235},
+ {0x00ec,236},
+ {0x00ed,237},
+ {0x00ee,238},
+ {0x00ef,239},
+ {0x00f0,240},
+ {0x00f1,241},
+ {0x00f2,242},
+ {0x00f3,243},
+ {0x00f4,244},
+ {0x00f5,245},
+ {0x00f6,246},
+ {0x00f7,247},
+ {0x00f8,248},
+ {0x00f9,249},
+ {0x00fa,250},
+ {0x00fb,251},
+ {0x00fc,252},
+ {0x00fd,253},
+ {0x00fe,254},
+ {0x00ff,255},
+};
+
+unsigned short fz_unicode_from_iso8859_7[256] = {
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 8,
+ 9,
+ 10,
+ 11,
+ 12,
+ 13,
+ 14,
+ 15,
+ 16,
+ 17,
+ 18,
+ 19,
+ 20,
+ 21,
+ 22,
+ 23,
+ 24,
+ 25,
+ 26,
+ 27,
+ 28,
+ 29,
+ 30,
+ 31,
+ 32,
+ 33,
+ 34,
+ 35,
+ 36,
+ 37,
+ 38,
+ 39,
+ 40,
+ 41,
+ 42,
+ 43,
+ 44,
+ 45,
+ 46,
+ 47,
+ 48,
+ 49,
+ 50,
+ 51,
+ 52,
+ 53,
+ 54,
+ 55,
+ 56,
+ 57,
+ 58,
+ 59,
+ 60,
+ 61,
+ 62,
+ 63,
+ 64,
+ 65,
+ 66,
+ 67,
+ 68,
+ 69,
+ 70,
+ 71,
+ 72,
+ 73,
+ 74,
+ 75,
+ 76,
+ 77,
+ 78,
+ 79,
+ 80,
+ 81,
+ 82,
+ 83,
+ 84,
+ 85,
+ 86,
+ 87,
+ 88,
+ 89,
+ 90,
+ 91,
+ 92,
+ 93,
+ 94,
+ 95,
+ 96,
+ 97,
+ 98,
+ 99,
+ 100,
+ 101,
+ 102,
+ 103,
+ 104,
+ 105,
+ 106,
+ 107,
+ 108,
+ 109,
+ 110,
+ 111,
+ 112,
+ 113,
+ 114,
+ 115,
+ 116,
+ 117,
+ 118,
+ 119,
+ 120,
+ 121,
+ 122,
+ 123,
+ 124,
+ 125,
+ 126,
+ 127,
+ 128,
+ 129,
+ 130,
+ 131,
+ 132,
+ 133,
+ 134,
+ 135,
+ 136,
+ 137,
+ 138,
+ 139,
+ 140,
+ 141,
+ 142,
+ 143,
+ 144,
+ 145,
+ 146,
+ 147,
+ 148,
+ 149,
+ 150,
+ 151,
+ 152,
+ 153,
+ 154,
+ 155,
+ 156,
+ 157,
+ 158,
+ 159,
+ 160,
+ 8216,
+ 8217,
+ 163,
+ 8364,
+ 8367,
+ 166,
+ 167,
+ 168,
+ 169,
+ 890,
+ 171,
+ 172,
+ 173,
+ 0,
+ 8213,
+ 176,
+ 177,
+ 178,
+ 179,
+ 900,
+ 901,
+ 902,
+ 183,
+ 904,
+ 905,
+ 906,
+ 187,
+ 908,
+ 189,
+ 910,
+ 911,
+ 912,
+ 913,
+ 914,
+ 915,
+ 916,
+ 917,
+ 918,
+ 919,
+ 920,
+ 921,
+ 922,
+ 923,
+ 924,
+ 925,
+ 926,
+ 927,
+ 928,
+ 929,
+ 0,
+ 931,
+ 932,
+ 933,
+ 934,
+ 935,
+ 936,
+ 937,
+ 938,
+ 939,
+ 940,
+ 941,
+ 942,
+ 943,
+ 944,
+ 945,
+ 946,
+ 947,
+ 948,
+ 949,
+ 950,
+ 951,
+ 952,
+ 953,
+ 954,
+ 955,
+ 956,
+ 957,
+ 958,
+ 959,
+ 960,
+ 961,
+ 962,
+ 963,
+ 964,
+ 965,
+ 966,
+ 967,
+ 968,
+ 969,
+ 970,
+ 971,
+ 972,
+ 973,
+ 974,
+ 0,
+};
+
+const char *fz_glyph_name_from_iso8859_7[256] = {
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ "space",
+ "exclam",
+ "quotedbl",
+ "numbersign",
+ "dollar",
+ "percent",
+ "ampersand",
+ "quotesingle",
+ "parenleft",
+ "parenright",
+ "asterisk",
+ "plus",
+ "comma",
+ "hyphen",
+ "period",
+ "slash",
+ "zero",
+ "one",
+ "two",
+ "three",
+ "four",
+ "five",
+ "six",
+ "seven",
+ "eight",
+ "nine",
+ "colon",
+ "semicolon",
+ "less",
+ "equal",
+ "greater",
+ "question",
+ "at",
+ "A",
+ "B",
+ "C",
+ "D",
+ "E",
+ "F",
+ "G",
+ "H",
+ "I",
+ "J",
+ "K",
+ "L",
+ "M",
+ "N",
+ "O",
+ "P",
+ "Q",
+ "R",
+ "S",
+ "T",
+ "U",
+ "V",
+ "W",
+ "X",
+ "Y",
+ "Z",
+ "bracketleft",
+ "backslash",
+ "bracketright",
+ "asciicircum",
+ "underscore",
+ "grave",
+ "a",
+ "b",
+ "c",
+ "d",
+ "e",
+ "f",
+ "g",
+ "h",
+ "i",
+ "j",
+ "k",
+ "l",
+ "m",
+ "n",
+ "o",
+ "p",
+ "q",
+ "r",
+ "s",
+ "t",
+ "u",
+ "v",
+ "w",
+ "x",
+ "y",
+ "z",
+ "braceleft",
+ "bar",
+ "braceright",
+ "asciitilde",
+ "controlDEL",
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ "nbspace",
+ "quoteleft",
+ "quoteright",
+ "sterling",
+ "Euro",
+ _notdef,
+ "brokenbar",
+ "section",
+ "dieresis",
+ "copyright",
+ "ypogegrammeni",
+ "guillemotleft",
+ "logicalnot",
+ "sfthyphen",
+ _notdef,
+ "afii00208",
+ "degree",
+ "plusminus",
+ "twosuperior",
+ "threesuperior",
+ "tonos",
+ "dialytikatonos",
+ "Alphatonos",
+ "middot",
+ "Epsilontonos",
+ "Etatonos",
+ "Iotatonos",
+ "guillemotright",
+ "Omicrontonos",
+ "onehalf",
+ "Upsilontonos",
+ "Omegatonos",
+ "iotadieresistonos",
+ "Alpha",
+ "Beta",
+ "Gamma",
+ "Deltagreek",
+ "Epsilon",
+ "Zeta",
+ "Eta",
+ "Theta",
+ "Iota",
+ "Kappa",
+ "Lambda",
+ "Mu",
+ "Nu",
+ "Xi",
+ "Omicron",
+ "Pi",
+ "Rho",
+ _notdef,
+ "Sigma",
+ "Tau",
+ "Upsilon",
+ "Phi",
+ "Chi",
+ "Psi",
+ "Omegagreek",
+ "Iotadieresis",
+ "Upsilondieresis",
+ "alphatonos",
+ "epsilontonos",
+ "etatonos",
+ "iotatonos",
+ "upsilondieresistonos",
+ "alpha",
+ "beta",
+ "gamma",
+ "delta",
+ "epsilon",
+ "zeta",
+ "eta",
+ "theta",
+ "iota",
+ "kappa",
+ "lambda",
+ "mugreek",
+ "nu",
+ "xi",
+ "omicron",
+ "pi",
+ "rho",
+ "sigma1",
+ "sigma",
+ "tau",
+ "upsilon",
+ "phi",
+ "chi",
+ "psi",
+ "omega",
+ "iotadieresis",
+ "upsilondieresis",
+ "omicrontonos",
+ "upsilontonos",
+ "omegatonos",
+ _notdef,
+};
+
+static const struct { unsigned short u, c; } iso8859_7_from_unicode[] = {
+ {0x00a0,160},
+ {0x00a3,163},
+ {0x00a6,166},
+ {0x00a7,167},
+ {0x00a8,168},
+ {0x00a9,169},
+ {0x00ab,171},
+ {0x00ac,172},
+ {0x00ad,173},
+ {0x00b0,176},
+ {0x00b1,177},
+ {0x00b2,178},
+ {0x00b3,179},
+ {0x00b7,183},
+ {0x00bb,187},
+ {0x00bd,189},
+ {0x037a,170},
+ {0x0384,180},
+ {0x0385,181},
+ {0x0386,182},
+ {0x0388,184},
+ {0x0389,185},
+ {0x038a,186},
+ {0x038c,188},
+ {0x038e,190},
+ {0x038f,191},
+ {0x0390,192},
+ {0x0391,193},
+ {0x0392,194},
+ {0x0393,195},
+ {0x0394,196},
+ {0x0395,197},
+ {0x0396,198},
+ {0x0397,199},
+ {0x0398,200},
+ {0x0399,201},
+ {0x039a,202},
+ {0x039b,203},
+ {0x039c,204},
+ {0x039d,205},
+ {0x039e,206},
+ {0x039f,207},
+ {0x03a0,208},
+ {0x03a1,209},
+ {0x03a3,211},
+ {0x03a4,212},
+ {0x03a5,213},
+ {0x03a6,214},
+ {0x03a7,215},
+ {0x03a8,216},
+ {0x03a9,217},
+ {0x03aa,218},
+ {0x03ab,219},
+ {0x03ac,220},
+ {0x03ad,221},
+ {0x03ae,222},
+ {0x03af,223},
+ {0x03b0,224},
+ {0x03b1,225},
+ {0x03b2,226},
+ {0x03b3,227},
+ {0x03b4,228},
+ {0x03b5,229},
+ {0x03b6,230},
+ {0x03b7,231},
+ {0x03b8,232},
+ {0x03b9,233},
+ {0x03ba,234},
+ {0x03bb,235},
+ {0x03bc,236},
+ {0x03bd,237},
+ {0x03be,238},
+ {0x03bf,239},
+ {0x03c0,240},
+ {0x03c1,241},
+ {0x03c2,242},
+ {0x03c3,243},
+ {0x03c4,244},
+ {0x03c5,245},
+ {0x03c6,246},
+ {0x03c7,247},
+ {0x03c8,248},
+ {0x03c9,249},
+ {0x03ca,250},
+ {0x03cb,251},
+ {0x03cc,252},
+ {0x03cd,253},
+ {0x03ce,254},
+ {0x2015,175},
+ {0x2018,161},
+ {0x2019,162},
+ {0x20ac,164},
+};
+
+unsigned short fz_unicode_from_koi8u[256] = {
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 8,
+ 9,
+ 10,
+ 11,
+ 12,
+ 13,
+ 14,
+ 15,
+ 16,
+ 17,
+ 18,
+ 19,
+ 20,
+ 21,
+ 22,
+ 23,
+ 24,
+ 25,
+ 26,
+ 27,
+ 28,
+ 29,
+ 30,
+ 31,
+ 32,
+ 33,
+ 34,
+ 35,
+ 36,
+ 37,
+ 38,
+ 39,
+ 40,
+ 41,
+ 42,
+ 43,
+ 44,
+ 45,
+ 46,
+ 47,
+ 48,
+ 49,
+ 50,
+ 51,
+ 52,
+ 53,
+ 54,
+ 55,
+ 56,
+ 57,
+ 58,
+ 59,
+ 60,
+ 61,
+ 62,
+ 63,
+ 64,
+ 65,
+ 66,
+ 67,
+ 68,
+ 69,
+ 70,
+ 71,
+ 72,
+ 73,
+ 74,
+ 75,
+ 76,
+ 77,
+ 78,
+ 79,
+ 80,
+ 81,
+ 82,
+ 83,
+ 84,
+ 85,
+ 86,
+ 87,
+ 88,
+ 89,
+ 90,
+ 91,
+ 92,
+ 93,
+ 94,
+ 95,
+ 96,
+ 97,
+ 98,
+ 99,
+ 100,
+ 101,
+ 102,
+ 103,
+ 104,
+ 105,
+ 106,
+ 107,
+ 108,
+ 109,
+ 110,
+ 111,
+ 112,
+ 113,
+ 114,
+ 115,
+ 116,
+ 117,
+ 118,
+ 119,
+ 120,
+ 121,
+ 122,
+ 123,
+ 124,
+ 125,
+ 126,
+ 127,
+ 9472,
+ 9474,
+ 9484,
+ 9488,
+ 9492,
+ 9496,
+ 9500,
+ 9508,
+ 9516,
+ 9524,
+ 9532,
+ 9600,
+ 9604,
+ 9608,
+ 9612,
+ 9616,
+ 9617,
+ 9618,
+ 9619,
+ 8992,
+ 9632,
+ 8729,
+ 8730,
+ 8776,
+ 8804,
+ 8805,
+ 160,
+ 8993,
+ 176,
+ 178,
+ 183,
+ 247,
+ 9552,
+ 9553,
+ 9554,
+ 1105,
+ 1108,
+ 9556,
+ 1110,
+ 1111,
+ 9559,
+ 9560,
+ 9561,
+ 9562,
+ 9563,
+ 1169,
+ 9565,
+ 9566,
+ 9567,
+ 9568,
+ 9569,
+ 1025,
+ 1028,
+ 9571,
+ 1030,
+ 1031,
+ 9574,
+ 9575,
+ 9576,
+ 9577,
+ 9578,
+ 1168,
+ 9580,
+ 169,
+ 1102,
+ 1072,
+ 1073,
+ 1094,
+ 1076,
+ 1077,
+ 1092,
+ 1075,
+ 1093,
+ 1080,
+ 1081,
+ 1082,
+ 1083,
+ 1084,
+ 1085,
+ 1086,
+ 1087,
+ 1103,
+ 1088,
+ 1089,
+ 1090,
+ 1091,
+ 1078,
+ 1074,
+ 1100,
+ 1099,
+ 1079,
+ 1096,
+ 1101,
+ 1097,
+ 1095,
+ 1098,
+ 1070,
+ 1040,
+ 1041,
+ 1062,
+ 1044,
+ 1045,
+ 1060,
+ 1043,
+ 1061,
+ 1048,
+ 1049,
+ 1050,
+ 1051,
+ 1052,
+ 1053,
+ 1054,
+ 1055,
+ 1071,
+ 1056,
+ 1057,
+ 1058,
+ 1059,
+ 1046,
+ 1042,
+ 1068,
+ 1067,
+ 1047,
+ 1064,
+ 1069,
+ 1065,
+ 1063,
+ 1066,
+};
+
+const char *fz_glyph_name_from_koi8u[256] = {
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ "space",
+ "exclam",
+ "quotedbl",
+ "numbersign",
+ "dollar",
+ "percent",
+ "ampersand",
+ "quotesingle",
+ "parenleft",
+ "parenright",
+ "asterisk",
+ "plus",
+ "comma",
+ "hyphen",
+ "period",
+ "slash",
+ "zero",
+ "one",
+ "two",
+ "three",
+ "four",
+ "five",
+ "six",
+ "seven",
+ "eight",
+ "nine",
+ "colon",
+ "semicolon",
+ "less",
+ "equal",
+ "greater",
+ "question",
+ "at",
+ "A",
+ "B",
+ "C",
+ "D",
+ "E",
+ "F",
+ "G",
+ "H",
+ "I",
+ "J",
+ "K",
+ "L",
+ "M",
+ "N",
+ "O",
+ "P",
+ "Q",
+ "R",
+ "S",
+ "T",
+ "U",
+ "V",
+ "W",
+ "X",
+ "Y",
+ "Z",
+ "bracketleft",
+ "backslash",
+ "bracketright",
+ "asciicircum",
+ "underscore",
+ "grave",
+ "a",
+ "b",
+ "c",
+ "d",
+ "e",
+ "f",
+ "g",
+ "h",
+ "i",
+ "j",
+ "k",
+ "l",
+ "m",
+ "n",
+ "o",
+ "p",
+ "q",
+ "r",
+ "s",
+ "t",
+ "u",
+ "v",
+ "w",
+ "x",
+ "y",
+ "z",
+ "braceleft",
+ "bar",
+ "braceright",
+ "asciitilde",
+ "controlDEL",
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ "upblock",
+ "dnblock",
+ "block",
+ "lfblock",
+ "rtblock",
+ "ltshade",
+ "shade",
+ "dkshade",
+ "integraltop",
+ "blacksquare",
+ "bulletoperator",
+ "radical",
+ "approxequal",
+ "lessequal",
+ "greaterequal",
+ "nbspace",
+ "integralbottom",
+ "degree",
+ "twosuperior",
+ "middot",
+ "divide",
+ _notdef,
+ _notdef,
+ _notdef,
+ "afii10071",
+ "afii10101",
+ _notdef,
+ "afii10103",
+ "afii10104",
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ "afii10098",
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ "Iocyrillic",
+ "Ecyrillic",
+ _notdef,
+ "Icyrillic",
+ "Yicyrillic",
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ "Gheupturncyrillic",
+ _notdef,
+ "copyright",
+ "afii10096",
+ "acyrillic",
+ "afii10066",
+ "afii10088",
+ "afii10069",
+ "afii10070",
+ "afii10086",
+ "afii10068",
+ "afii10087",
+ "afii10074",
+ "afii10075",
+ "afii10076",
+ "afii10077",
+ "afii10078",
+ "afii10079",
+ "afii10080",
+ "afii10081",
+ "afii10097",
+ "afii10082",
+ "afii10083",
+ "afii10084",
+ "afii10085",
+ "afii10072",
+ "afii10067",
+ "afii10094",
+ "afii10093",
+ "afii10073",
+ "afii10090",
+ "afii10095",
+ "afii10091",
+ "afii10089",
+ "afii10092",
+ "IUcyrillic",
+ "Acyrillic",
+ "Becyrillic",
+ "Tsecyrillic",
+ "Decyrillic",
+ "Iecyrillic",
+ "Efcyrillic",
+ "Gecyrillic",
+ "Khacyrillic",
+ "Iicyrillic",
+ "Iishortcyrillic",
+ "Kacyrillic",
+ "Elcyrillic",
+ "Emcyrillic",
+ "Encyrillic",
+ "Ocyrillic",
+ "Pecyrillic",
+ "IAcyrillic",
+ "Ercyrillic",
+ "Escyrillic",
+ "Tecyrillic",
+ "Ucyrillic",
+ "Zhecyrillic",
+ "Vecyrillic",
+ "Softsigncyrillic",
+ "Yericyrillic",
+ "Zecyrillic",
+ "Shacyrillic",
+ "Ereversedcyrillic",
+ "Shchacyrillic",
+ "Checyrillic",
+ "Hardsigncyrillic",
+};
+
+static const struct { unsigned short u, c; } koi8u_from_unicode[] = {
+ {0x00a0,154},
+ {0x00a9,191},
+ {0x00b0,156},
+ {0x00b2,157},
+ {0x00b7,158},
+ {0x00f7,159},
+ {0x0401,179},
+ {0x0404,180},
+ {0x0406,182},
+ {0x0407,183},
+ {0x0410,225},
+ {0x0411,226},
+ {0x0412,247},
+ {0x0413,231},
+ {0x0414,228},
+ {0x0415,229},
+ {0x0416,246},
+ {0x0417,250},
+ {0x0418,233},
+ {0x0419,234},
+ {0x041a,235},
+ {0x041b,236},
+ {0x041c,237},
+ {0x041d,238},
+ {0x041e,239},
+ {0x041f,240},
+ {0x0420,242},
+ {0x0421,243},
+ {0x0422,244},
+ {0x0423,245},
+ {0x0424,230},
+ {0x0425,232},
+ {0x0426,227},
+ {0x0427,254},
+ {0x0428,251},
+ {0x0429,253},
+ {0x042a,255},
+ {0x042b,249},
+ {0x042c,248},
+ {0x042d,252},
+ {0x042e,224},
+ {0x042f,241},
+ {0x0430,193},
+ {0x0431,194},
+ {0x0432,215},
+ {0x0433,199},
+ {0x0434,196},
+ {0x0435,197},
+ {0x0436,214},
+ {0x0437,218},
+ {0x0438,201},
+ {0x0439,202},
+ {0x043a,203},
+ {0x043b,204},
+ {0x043c,205},
+ {0x043d,206},
+ {0x043e,207},
+ {0x043f,208},
+ {0x0440,210},
+ {0x0441,211},
+ {0x0442,212},
+ {0x0443,213},
+ {0x0444,198},
+ {0x0445,200},
+ {0x0446,195},
+ {0x0447,222},
+ {0x0448,219},
+ {0x0449,221},
+ {0x044a,223},
+ {0x044b,217},
+ {0x044c,216},
+ {0x044d,220},
+ {0x044e,192},
+ {0x044f,209},
+ {0x0451,163},
+ {0x0454,164},
+ {0x0456,166},
+ {0x0457,167},
+ {0x0490,189},
+ {0x0491,173},
+ {0x2219,149},
+ {0x221a,150},
+ {0x2248,151},
+ {0x2264,152},
+ {0x2265,153},
+ {0x2320,147},
+ {0x2321,155},
+ {0x2580,139},
+ {0x2584,140},
+ {0x2588,141},
+ {0x258c,142},
+ {0x2590,143},
+ {0x2591,144},
+ {0x2592,145},
+ {0x2593,146},
+ {0x25a0,148},
+};
+
+unsigned short fz_unicode_from_windows_1250[256] = {
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 8,
+ 9,
+ 10,
+ 11,
+ 12,
+ 13,
+ 14,
+ 15,
+ 16,
+ 17,
+ 18,
+ 19,
+ 20,
+ 21,
+ 22,
+ 23,
+ 24,
+ 25,
+ 26,
+ 27,
+ 28,
+ 29,
+ 30,
+ 31,
+ 32,
+ 33,
+ 34,
+ 35,
+ 36,
+ 37,
+ 38,
+ 39,
+ 40,
+ 41,
+ 42,
+ 43,
+ 44,
+ 45,
+ 46,
+ 47,
+ 48,
+ 49,
+ 50,
+ 51,
+ 52,
+ 53,
+ 54,
+ 55,
+ 56,
+ 57,
+ 58,
+ 59,
+ 60,
+ 61,
+ 62,
+ 63,
+ 64,
+ 65,
+ 66,
+ 67,
+ 68,
+ 69,
+ 70,
+ 71,
+ 72,
+ 73,
+ 74,
+ 75,
+ 76,
+ 77,
+ 78,
+ 79,
+ 80,
+ 81,
+ 82,
+ 83,
+ 84,
+ 85,
+ 86,
+ 87,
+ 88,
+ 89,
+ 90,
+ 91,
+ 92,
+ 93,
+ 94,
+ 95,
+ 96,
+ 97,
+ 98,
+ 99,
+ 100,
+ 101,
+ 102,
+ 103,
+ 104,
+ 105,
+ 106,
+ 107,
+ 108,
+ 109,
+ 110,
+ 111,
+ 112,
+ 113,
+ 114,
+ 115,
+ 116,
+ 117,
+ 118,
+ 119,
+ 120,
+ 121,
+ 122,
+ 123,
+ 124,
+ 125,
+ 126,
+ 127,
+ 8364,
+ 0,
+ 8218,
+ 0,
+ 8222,
+ 8230,
+ 8224,
+ 8225,
+ 0,
+ 8240,
+ 352,
+ 8249,
+ 346,
+ 356,
+ 381,
+ 377,
+ 0,
+ 8216,
+ 8217,
+ 8220,
+ 8221,
+ 8226,
+ 8211,
+ 8212,
+ 0,
+ 8482,
+ 353,
+ 8250,
+ 347,
+ 357,
+ 382,
+ 378,
+ 160,
+ 711,
+ 728,
+ 321,
+ 164,
+ 260,
+ 166,
+ 167,
+ 168,
+ 169,
+ 350,
+ 171,
+ 172,
+ 173,
+ 174,
+ 379,
+ 176,
+ 177,
+ 731,
+ 322,
+ 180,
+ 181,
+ 182,
+ 183,
+ 184,
+ 261,
+ 351,
+ 187,
+ 317,
+ 733,
+ 318,
+ 380,
+ 340,
+ 193,
+ 194,
+ 258,
+ 196,
+ 313,
+ 262,
+ 199,
+ 268,
+ 201,
+ 280,
+ 203,
+ 282,
+ 205,
+ 206,
+ 270,
+ 272,
+ 323,
+ 327,
+ 211,
+ 212,
+ 336,
+ 214,
+ 215,
+ 344,
+ 366,
+ 218,
+ 368,
+ 220,
+ 221,
+ 354,
+ 223,
+ 341,
+ 225,
+ 226,
+ 259,
+ 228,
+ 314,
+ 263,
+ 231,
+ 269,
+ 233,
+ 281,
+ 235,
+ 283,
+ 237,
+ 238,
+ 271,
+ 273,
+ 324,
+ 328,
+ 243,
+ 244,
+ 337,
+ 246,
+ 247,
+ 345,
+ 367,
+ 250,
+ 369,
+ 252,
+ 253,
+ 355,
+ 729,
+};
+
+const char *fz_glyph_name_from_windows_1250[256] = {
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ "space",
+ "exclam",
+ "quotedbl",
+ "numbersign",
+ "dollar",
+ "percent",
+ "ampersand",
+ "quotesingle",
+ "parenleft",
+ "parenright",
+ "asterisk",
+ "plus",
+ "comma",
+ "hyphen",
+ "period",
+ "slash",
+ "zero",
+ "one",
+ "two",
+ "three",
+ "four",
+ "five",
+ "six",
+ "seven",
+ "eight",
+ "nine",
+ "colon",
+ "semicolon",
+ "less",
+ "equal",
+ "greater",
+ "question",
+ "at",
+ "A",
+ "B",
+ "C",
+ "D",
+ "E",
+ "F",
+ "G",
+ "H",
+ "I",
+ "J",
+ "K",
+ "L",
+ "M",
+ "N",
+ "O",
+ "P",
+ "Q",
+ "R",
+ "S",
+ "T",
+ "U",
+ "V",
+ "W",
+ "X",
+ "Y",
+ "Z",
+ "bracketleft",
+ "backslash",
+ "bracketright",
+ "asciicircum",
+ "underscore",
+ "grave",
+ "a",
+ "b",
+ "c",
+ "d",
+ "e",
+ "f",
+ "g",
+ "h",
+ "i",
+ "j",
+ "k",
+ "l",
+ "m",
+ "n",
+ "o",
+ "p",
+ "q",
+ "r",
+ "s",
+ "t",
+ "u",
+ "v",
+ "w",
+ "x",
+ "y",
+ "z",
+ "braceleft",
+ "bar",
+ "braceright",
+ "asciitilde",
+ "controlDEL",
+ "Euro",
+ _notdef,
+ "quotesinglbase",
+ _notdef,
+ "quotedblbase",
+ "ellipsis",
+ "dagger",
+ "daggerdbl",
+ _notdef,
+ "perthousand",
+ "Scaron",
+ "guilsinglleft",
+ "Sacute",
+ "Tcaron",
+ "Zcaron",
+ "Zacute",
+ _notdef,
+ "quoteleft",
+ "quoteright",
+ "quotedblleft",
+ "quotedblright",
+ "bullet",
+ "endash",
+ "emdash",
+ _notdef,
+ "trademark",
+ "scaron",
+ "guilsinglright",
+ "sacute",
+ "tcaron",
+ "zcaron",
+ "zacute",
+ "nbspace",
+ "caron",
+ "breve",
+ "Lslash",
+ "currency",
+ "Aogonek",
+ "brokenbar",
+ "section",
+ "dieresis",
+ "copyright",
+ "Scedilla",
+ "guillemotleft",
+ "logicalnot",
+ "sfthyphen",
+ "registered",
+ "Zdot",
+ "degree",
+ "plusminus",
+ "ogonek",
+ "lslash",
+ "acute",
+ "mu",
+ "paragraph",
+ "middot",
+ "cedilla",
+ "aogonek",
+ "scedilla",
+ "guillemotright",
+ "Lcaron",
+ "hungarumlaut",
+ "lcaron",
+ "zdot",
+ "Racute",
+ "Aacute",
+ "Acircumflex",
+ "Abreve",
+ "Adieresis",
+ "Lacute",
+ "Cacute",
+ "Ccedilla",
+ "Ccaron",
+ "Eacute",
+ "Eogonek",
+ "Edieresis",
+ "Ecaron",
+ "Iacute",
+ "Icircumflex",
+ "Dcaron",
+ "Dcroat",
+ "Nacute",
+ "Ncaron",
+ "Oacute",
+ "Ocircumflex",
+ "Odblacute",
+ "Odieresis",
+ "multiply",
+ "Rcaron",
+ "Uring",
+ "Uacute",
+ "Udblacute",
+ "Udieresis",
+ "Yacute",
+ "Tcedilla",
+ "germandbls",
+ "racute",
+ "aacute",
+ "acircumflex",
+ "abreve",
+ "adieresis",
+ "lacute",
+ "cacute",
+ "ccedilla",
+ "ccaron",
+ "eacute",
+ "eogonek",
+ "edieresis",
+ "ecaron",
+ "iacute",
+ "icircumflex",
+ "dcaron",
+ "dcroat",
+ "nacute",
+ "ncaron",
+ "oacute",
+ "ocircumflex",
+ "odblacute",
+ "odieresis",
+ "divide",
+ "rcaron",
+ "uring",
+ "uacute",
+ "udblacute",
+ "udieresis",
+ "yacute",
+ "tcedilla",
+ "dotaccent",
+};
+
+static const struct { unsigned short u, c; } windows_1250_from_unicode[] = {
+ {0x00a0,160},
+ {0x00a4,164},
+ {0x00a6,166},
+ {0x00a7,167},
+ {0x00a8,168},
+ {0x00a9,169},
+ {0x00ab,171},
+ {0x00ac,172},
+ {0x00ad,173},
+ {0x00ae,174},
+ {0x00b0,176},
+ {0x00b1,177},
+ {0x00b4,180},
+ {0x00b5,181},
+ {0x00b6,182},
+ {0x00b7,183},
+ {0x00b8,184},
+ {0x00bb,187},
+ {0x00c1,193},
+ {0x00c2,194},
+ {0x00c4,196},
+ {0x00c7,199},
+ {0x00c9,201},
+ {0x00cb,203},
+ {0x00cd,205},
+ {0x00ce,206},
+ {0x00d3,211},
+ {0x00d4,212},
+ {0x00d6,214},
+ {0x00d7,215},
+ {0x00da,218},
+ {0x00dc,220},
+ {0x00dd,221},
+ {0x00df,223},
+ {0x00e1,225},
+ {0x00e2,226},
+ {0x00e4,228},
+ {0x00e7,231},
+ {0x00e9,233},
+ {0x00eb,235},
+ {0x00ed,237},
+ {0x00ee,238},
+ {0x00f3,243},
+ {0x00f4,244},
+ {0x00f6,246},
+ {0x00f7,247},
+ {0x00fa,250},
+ {0x00fc,252},
+ {0x00fd,253},
+ {0x0102,195},
+ {0x0103,227},
+ {0x0104,165},
+ {0x0105,185},
+ {0x0106,198},
+ {0x0107,230},
+ {0x010c,200},
+ {0x010d,232},
+ {0x010e,207},
+ {0x010f,239},
+ {0x0110,208},
+ {0x0111,240},
+ {0x0118,202},
+ {0x0119,234},
+ {0x011a,204},
+ {0x011b,236},
+ {0x0139,197},
+ {0x013a,229},
+ {0x013d,188},
+ {0x013e,190},
+ {0x0141,163},
+ {0x0142,179},
+ {0x0143,209},
+ {0x0144,241},
+ {0x0147,210},
+ {0x0148,242},
+ {0x0150,213},
+ {0x0151,245},
+ {0x0154,192},
+ {0x0155,224},
+ {0x0158,216},
+ {0x0159,248},
+ {0x015a,140},
+ {0x015b,156},
+ {0x015e,170},
+ {0x015f,186},
+ {0x0160,138},
+ {0x0161,154},
+ {0x0162,222},
+ {0x0163,254},
+ {0x0164,141},
+ {0x0165,157},
+ {0x016e,217},
+ {0x016f,249},
+ {0x0170,219},
+ {0x0171,251},
+ {0x0179,143},
+ {0x017a,159},
+ {0x017b,175},
+ {0x017c,191},
+ {0x017d,142},
+ {0x017e,158},
+ {0x02c7,161},
+ {0x02d8,162},
+ {0x02d9,255},
+ {0x02db,178},
+ {0x02dd,189},
+ {0x2013,150},
+ {0x2014,151},
+ {0x2018,145},
+ {0x2019,146},
+ {0x201a,130},
+ {0x201c,147},
+ {0x201d,148},
+ {0x201e,132},
+ {0x2020,134},
+ {0x2021,135},
+ {0x2022,149},
+ {0x2026,133},
+ {0x2030,137},
+ {0x2039,139},
+ {0x203a,155},
+ {0x20ac,128},
+ {0x2122,153},
+};
+
+unsigned short fz_unicode_from_windows_1251[256] = {
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 8,
+ 9,
+ 10,
+ 11,
+ 12,
+ 13,
+ 14,
+ 15,
+ 16,
+ 17,
+ 18,
+ 19,
+ 20,
+ 21,
+ 22,
+ 23,
+ 24,
+ 25,
+ 26,
+ 27,
+ 28,
+ 29,
+ 30,
+ 31,
+ 32,
+ 33,
+ 34,
+ 35,
+ 36,
+ 37,
+ 38,
+ 39,
+ 40,
+ 41,
+ 42,
+ 43,
+ 44,
+ 45,
+ 46,
+ 47,
+ 48,
+ 49,
+ 50,
+ 51,
+ 52,
+ 53,
+ 54,
+ 55,
+ 56,
+ 57,
+ 58,
+ 59,
+ 60,
+ 61,
+ 62,
+ 63,
+ 64,
+ 65,
+ 66,
+ 67,
+ 68,
+ 69,
+ 70,
+ 71,
+ 72,
+ 73,
+ 74,
+ 75,
+ 76,
+ 77,
+ 78,
+ 79,
+ 80,
+ 81,
+ 82,
+ 83,
+ 84,
+ 85,
+ 86,
+ 87,
+ 88,
+ 89,
+ 90,
+ 91,
+ 92,
+ 93,
+ 94,
+ 95,
+ 96,
+ 97,
+ 98,
+ 99,
+ 100,
+ 101,
+ 102,
+ 103,
+ 104,
+ 105,
+ 106,
+ 107,
+ 108,
+ 109,
+ 110,
+ 111,
+ 112,
+ 113,
+ 114,
+ 115,
+ 116,
+ 117,
+ 118,
+ 119,
+ 120,
+ 121,
+ 122,
+ 123,
+ 124,
+ 125,
+ 126,
+ 127,
+ 1026,
+ 1027,
+ 8218,
+ 1107,
+ 8222,
+ 8230,
+ 8224,
+ 8225,
+ 8364,
+ 8240,
+ 1033,
+ 8249,
+ 1034,
+ 1036,
+ 1035,
+ 1039,
+ 1106,
+ 8216,
+ 8217,
+ 8220,
+ 8221,
+ 8226,
+ 8211,
+ 8212,
+ 0,
+ 8482,
+ 1113,
+ 8250,
+ 1114,
+ 1116,
+ 1115,
+ 1119,
+ 160,
+ 1038,
+ 1118,
+ 1032,
+ 164,
+ 1168,
+ 166,
+ 167,
+ 1025,
+ 169,
+ 1028,
+ 171,
+ 172,
+ 173,
+ 174,
+ 1031,
+ 176,
+ 177,
+ 1030,
+ 1110,
+ 1169,
+ 181,
+ 182,
+ 183,
+ 1105,
+ 8470,
+ 1108,
+ 187,
+ 1112,
+ 1029,
+ 1109,
+ 1111,
+ 1040,
+ 1041,
+ 1042,
+ 1043,
+ 1044,
+ 1045,
+ 1046,
+ 1047,
+ 1048,
+ 1049,
+ 1050,
+ 1051,
+ 1052,
+ 1053,
+ 1054,
+ 1055,
+ 1056,
+ 1057,
+ 1058,
+ 1059,
+ 1060,
+ 1061,
+ 1062,
+ 1063,
+ 1064,
+ 1065,
+ 1066,
+ 1067,
+ 1068,
+ 1069,
+ 1070,
+ 1071,
+ 1072,
+ 1073,
+ 1074,
+ 1075,
+ 1076,
+ 1077,
+ 1078,
+ 1079,
+ 1080,
+ 1081,
+ 1082,
+ 1083,
+ 1084,
+ 1085,
+ 1086,
+ 1087,
+ 1088,
+ 1089,
+ 1090,
+ 1091,
+ 1092,
+ 1093,
+ 1094,
+ 1095,
+ 1096,
+ 1097,
+ 1098,
+ 1099,
+ 1100,
+ 1101,
+ 1102,
+ 1103,
+};
+
+const char *fz_glyph_name_from_windows_1251[256] = {
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ "space",
+ "exclam",
+ "quotedbl",
+ "numbersign",
+ "dollar",
+ "percent",
+ "ampersand",
+ "quotesingle",
+ "parenleft",
+ "parenright",
+ "asterisk",
+ "plus",
+ "comma",
+ "hyphen",
+ "period",
+ "slash",
+ "zero",
+ "one",
+ "two",
+ "three",
+ "four",
+ "five",
+ "six",
+ "seven",
+ "eight",
+ "nine",
+ "colon",
+ "semicolon",
+ "less",
+ "equal",
+ "greater",
+ "question",
+ "at",
+ "A",
+ "B",
+ "C",
+ "D",
+ "E",
+ "F",
+ "G",
+ "H",
+ "I",
+ "J",
+ "K",
+ "L",
+ "M",
+ "N",
+ "O",
+ "P",
+ "Q",
+ "R",
+ "S",
+ "T",
+ "U",
+ "V",
+ "W",
+ "X",
+ "Y",
+ "Z",
+ "bracketleft",
+ "backslash",
+ "bracketright",
+ "asciicircum",
+ "underscore",
+ "grave",
+ "a",
+ "b",
+ "c",
+ "d",
+ "e",
+ "f",
+ "g",
+ "h",
+ "i",
+ "j",
+ "k",
+ "l",
+ "m",
+ "n",
+ "o",
+ "p",
+ "q",
+ "r",
+ "s",
+ "t",
+ "u",
+ "v",
+ "w",
+ "x",
+ "y",
+ "z",
+ "braceleft",
+ "bar",
+ "braceright",
+ "asciitilde",
+ "controlDEL",
+ "Djecyrillic",
+ "Gjecyrillic",
+ "quotesinglbase",
+ "afii10100",
+ "quotedblbase",
+ "ellipsis",
+ "dagger",
+ "daggerdbl",
+ "Euro",
+ "perthousand",
+ "Ljecyrillic",
+ "guilsinglleft",
+ "Njecyrillic",
+ "Kjecyrillic",
+ "Tshecyrillic",
+ "Dzhecyrillic",
+ "afii10099",
+ "quoteleft",
+ "quoteright",
+ "quotedblleft",
+ "quotedblright",
+ "bullet",
+ "endash",
+ "emdash",
+ _notdef,
+ "trademark",
+ "afii10106",
+ "guilsinglright",
+ "afii10107",
+ "afii10109",
+ "afii10108",
+ "afii10193",
+ "nbspace",
+ "Ushortcyrillic",
+ "afii10110",
+ "Jecyrillic",
+ "currency",
+ "Gheupturncyrillic",
+ "brokenbar",
+ "section",
+ "Iocyrillic",
+ "copyright",
+ "Ecyrillic",
+ "guillemotleft",
+ "logicalnot",
+ "sfthyphen",
+ "registered",
+ "Yicyrillic",
+ "degree",
+ "plusminus",
+ "Icyrillic",
+ "afii10103",
+ "afii10098",
+ "mu",
+ "paragraph",
+ "middot",
+ "afii10071",
+ "afii61352",
+ "afii10101",
+ "guillemotright",
+ "afii10105",
+ "Dzecyrillic",
+ "afii10102",
+ "afii10104",
+ "Acyrillic",
+ "Becyrillic",
+ "Vecyrillic",
+ "Gecyrillic",
+ "Decyrillic",
+ "Iecyrillic",
+ "Zhecyrillic",
+ "Zecyrillic",
+ "Iicyrillic",
+ "Iishortcyrillic",
+ "Kacyrillic",
+ "Elcyrillic",
+ "Emcyrillic",
+ "Encyrillic",
+ "Ocyrillic",
+ "Pecyrillic",
+ "Ercyrillic",
+ "Escyrillic",
+ "Tecyrillic",
+ "Ucyrillic",
+ "Efcyrillic",
+ "Khacyrillic",
+ "Tsecyrillic",
+ "Checyrillic",
+ "Shacyrillic",
+ "Shchacyrillic",
+ "Hardsigncyrillic",
+ "Yericyrillic",
+ "Softsigncyrillic",
+ "Ereversedcyrillic",
+ "IUcyrillic",
+ "IAcyrillic",
+ "acyrillic",
+ "afii10066",
+ "afii10067",
+ "afii10068",
+ "afii10069",
+ "afii10070",
+ "afii10072",
+ "afii10073",
+ "afii10074",
+ "afii10075",
+ "afii10076",
+ "afii10077",
+ "afii10078",
+ "afii10079",
+ "afii10080",
+ "afii10081",
+ "afii10082",
+ "afii10083",
+ "afii10084",
+ "afii10085",
+ "afii10086",
+ "afii10087",
+ "afii10088",
+ "afii10089",
+ "afii10090",
+ "afii10091",
+ "afii10092",
+ "afii10093",
+ "afii10094",
+ "afii10095",
+ "afii10096",
+ "afii10097",
+};
+
+static const struct { unsigned short u, c; } windows_1251_from_unicode[] = {
+ {0x00a0,160},
+ {0x00a4,164},
+ {0x00a6,166},
+ {0x00a7,167},
+ {0x00a9,169},
+ {0x00ab,171},
+ {0x00ac,172},
+ {0x00ad,173},
+ {0x00ae,174},
+ {0x00b0,176},
+ {0x00b1,177},
+ {0x00b5,181},
+ {0x00b6,182},
+ {0x00b7,183},
+ {0x00bb,187},
+ {0x0401,168},
+ {0x0402,128},
+ {0x0403,129},
+ {0x0404,170},
+ {0x0405,189},
+ {0x0406,178},
+ {0x0407,175},
+ {0x0408,163},
+ {0x0409,138},
+ {0x040a,140},
+ {0x040b,142},
+ {0x040c,141},
+ {0x040e,161},
+ {0x040f,143},
+ {0x0410,192},
+ {0x0411,193},
+ {0x0412,194},
+ {0x0413,195},
+ {0x0414,196},
+ {0x0415,197},
+ {0x0416,198},
+ {0x0417,199},
+ {0x0418,200},
+ {0x0419,201},
+ {0x041a,202},
+ {0x041b,203},
+ {0x041c,204},
+ {0x041d,205},
+ {0x041e,206},
+ {0x041f,207},
+ {0x0420,208},
+ {0x0421,209},
+ {0x0422,210},
+ {0x0423,211},
+ {0x0424,212},
+ {0x0425,213},
+ {0x0426,214},
+ {0x0427,215},
+ {0x0428,216},
+ {0x0429,217},
+ {0x042a,218},
+ {0x042b,219},
+ {0x042c,220},
+ {0x042d,221},
+ {0x042e,222},
+ {0x042f,223},
+ {0x0430,224},
+ {0x0431,225},
+ {0x0432,226},
+ {0x0433,227},
+ {0x0434,228},
+ {0x0435,229},
+ {0x0436,230},
+ {0x0437,231},
+ {0x0438,232},
+ {0x0439,233},
+ {0x043a,234},
+ {0x043b,235},
+ {0x043c,236},
+ {0x043d,237},
+ {0x043e,238},
+ {0x043f,239},
+ {0x0440,240},
+ {0x0441,241},
+ {0x0442,242},
+ {0x0443,243},
+ {0x0444,244},
+ {0x0445,245},
+ {0x0446,246},
+ {0x0447,247},
+ {0x0448,248},
+ {0x0449,249},
+ {0x044a,250},
+ {0x044b,251},
+ {0x044c,252},
+ {0x044d,253},
+ {0x044e,254},
+ {0x044f,255},
+ {0x0451,184},
+ {0x0452,144},
+ {0x0453,131},
+ {0x0454,186},
+ {0x0455,190},
+ {0x0456,179},
+ {0x0457,191},
+ {0x0458,188},
+ {0x0459,154},
+ {0x045a,156},
+ {0x045b,158},
+ {0x045c,157},
+ {0x045e,162},
+ {0x045f,159},
+ {0x0490,165},
+ {0x0491,180},
+ {0x2013,150},
+ {0x2014,151},
+ {0x2018,145},
+ {0x2019,146},
+ {0x201a,130},
+ {0x201c,147},
+ {0x201d,148},
+ {0x201e,132},
+ {0x2020,134},
+ {0x2021,135},
+ {0x2022,149},
+ {0x2026,133},
+ {0x2030,137},
+ {0x2039,139},
+ {0x203a,155},
+ {0x20ac,136},
+ {0x2116,185},
+ {0x2122,153},
+};
+
+unsigned short fz_unicode_from_windows_1252[256] = {
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 8,
+ 9,
+ 10,
+ 11,
+ 12,
+ 13,
+ 14,
+ 15,
+ 16,
+ 17,
+ 18,
+ 19,
+ 20,
+ 21,
+ 22,
+ 23,
+ 24,
+ 25,
+ 26,
+ 27,
+ 28,
+ 29,
+ 30,
+ 31,
+ 32,
+ 33,
+ 34,
+ 35,
+ 36,
+ 37,
+ 38,
+ 39,
+ 40,
+ 41,
+ 42,
+ 43,
+ 44,
+ 45,
+ 46,
+ 47,
+ 48,
+ 49,
+ 50,
+ 51,
+ 52,
+ 53,
+ 54,
+ 55,
+ 56,
+ 57,
+ 58,
+ 59,
+ 60,
+ 61,
+ 62,
+ 63,
+ 64,
+ 65,
+ 66,
+ 67,
+ 68,
+ 69,
+ 70,
+ 71,
+ 72,
+ 73,
+ 74,
+ 75,
+ 76,
+ 77,
+ 78,
+ 79,
+ 80,
+ 81,
+ 82,
+ 83,
+ 84,
+ 85,
+ 86,
+ 87,
+ 88,
+ 89,
+ 90,
+ 91,
+ 92,
+ 93,
+ 94,
+ 95,
+ 96,
+ 97,
+ 98,
+ 99,
+ 100,
+ 101,
+ 102,
+ 103,
+ 104,
+ 105,
+ 106,
+ 107,
+ 108,
+ 109,
+ 110,
+ 111,
+ 112,
+ 113,
+ 114,
+ 115,
+ 116,
+ 117,
+ 118,
+ 119,
+ 120,
+ 121,
+ 122,
+ 123,
+ 124,
+ 125,
+ 126,
+ 127,
+ 8364,
+ 0,
+ 8218,
+ 402,
+ 8222,
+ 8230,
+ 8224,
+ 8225,
+ 710,
+ 8240,
+ 352,
+ 8249,
+ 338,
+ 0,
+ 381,
+ 0,
+ 0,
+ 8216,
+ 8217,
+ 8220,
+ 8221,
+ 8226,
+ 8211,
+ 8212,
+ 732,
+ 8482,
+ 353,
+ 8250,
+ 339,
+ 0,
+ 382,
+ 376,
+ 160,
+ 161,
+ 162,
+ 163,
+ 164,
+ 165,
+ 166,
+ 167,
+ 168,
+ 169,
+ 170,
+ 171,
+ 172,
+ 173,
+ 174,
+ 175,
+ 176,
+ 177,
+ 178,
+ 179,
+ 180,
+ 181,
+ 182,
+ 183,
+ 184,
+ 185,
+ 186,
+ 187,
+ 188,
+ 189,
+ 190,
+ 191,
+ 192,
+ 193,
+ 194,
+ 195,
+ 196,
+ 197,
+ 198,
+ 199,
+ 200,
+ 201,
+ 202,
+ 203,
+ 204,
+ 205,
+ 206,
+ 207,
+ 208,
+ 209,
+ 210,
+ 211,
+ 212,
+ 213,
+ 214,
+ 215,
+ 216,
+ 217,
+ 218,
+ 219,
+ 220,
+ 221,
+ 222,
+ 223,
+ 224,
+ 225,
+ 226,
+ 227,
+ 228,
+ 229,
+ 230,
+ 231,
+ 232,
+ 233,
+ 234,
+ 235,
+ 236,
+ 237,
+ 238,
+ 239,
+ 240,
+ 241,
+ 242,
+ 243,
+ 244,
+ 245,
+ 246,
+ 247,
+ 248,
+ 249,
+ 250,
+ 251,
+ 252,
+ 253,
+ 254,
+ 255,
+};
+
+const char *fz_glyph_name_from_windows_1252[256] = {
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ _notdef,
+ "space",
+ "exclam",
+ "quotedbl",
+ "numbersign",
+ "dollar",
+ "percent",
+ "ampersand",
+ "quotesingle",
+ "parenleft",
+ "parenright",
+ "asterisk",
+ "plus",
+ "comma",
+ "hyphen",
+ "period",
+ "slash",
+ "zero",
+ "one",
+ "two",
+ "three",
+ "four",
+ "five",
+ "six",
+ "seven",
+ "eight",
+ "nine",
+ "colon",
+ "semicolon",
+ "less",
+ "equal",
+ "greater",
+ "question",
+ "at",
+ "A",
+ "B",
+ "C",
+ "D",
+ "E",
+ "F",
+ "G",
+ "H",
+ "I",
+ "J",
+ "K",
+ "L",
+ "M",
+ "N",
+ "O",
+ "P",
+ "Q",
+ "R",
+ "S",
+ "T",
+ "U",
+ "V",
+ "W",
+ "X",
+ "Y",
+ "Z",
+ "bracketleft",
+ "backslash",
+ "bracketright",
+ "asciicircum",
+ "underscore",
+ "grave",
+ "a",
+ "b",
+ "c",
+ "d",
+ "e",
+ "f",
+ "g",
+ "h",
+ "i",
+ "j",
+ "k",
+ "l",
+ "m",
+ "n",
+ "o",
+ "p",
+ "q",
+ "r",
+ "s",
+ "t",
+ "u",
+ "v",
+ "w",
+ "x",
+ "y",
+ "z",
+ "braceleft",
+ "bar",
+ "braceright",
+ "asciitilde",
+ "controlDEL",
+ "Euro",
+ _notdef,
+ "quotesinglbase",
+ "florin",
+ "quotedblbase",
+ "ellipsis",
+ "dagger",
+ "daggerdbl",
+ "circumflex",
+ "perthousand",
+ "Scaron",
+ "guilsinglleft",
+ "OE",
+ _notdef,
+ "Zcaron",
+ _notdef,
+ _notdef,
+ "quoteleft",
+ "quoteright",
+ "quotedblleft",
+ "quotedblright",
+ "bullet",
+ "endash",
+ "emdash",
+ "ilde",
+ "trademark",
+ "scaron",
+ "guilsinglright",
+ "oe",
+ _notdef,
+ "zcaron",
+ "Ydieresis",
+ "nbspace",
+ "exclamdown",
+ "cent",
+ "sterling",
+ "currency",
+ "yen",
+ "brokenbar",
+ "section",
+ "dieresis",
+ "copyright",
+ "ordfeminine",
+ "guillemotleft",
+ "logicalnot",
+ "sfthyphen",
+ "registered",
+ "macron",
+ "degree",
+ "plusminus",
+ "twosuperior",
+ "threesuperior",
+ "acute",
+ "mu",
+ "paragraph",
+ "middot",
+ "cedilla",
+ "onesuperior",
+ "ordmasculine",
+ "guillemotright",
+ "onequarter",
+ "onehalf",
+ "threequarters",
+ "questiondown",
+ "Agrave",
+ "Aacute",
+ "Acircumflex",
+ "Atilde",
+ "Adieresis",
+ "Aring",
+ "AE",
+ "Ccedilla",
+ "Egrave",
+ "Eacute",
+ "Ecircumflex",
+ "Edieresis",
+ "Igrave",
+ "Iacute",
+ "Icircumflex",
+ "Idieresis",
+ "Eth",
+ "Ntilde",
+ "Ograve",
+ "Oacute",
+ "Ocircumflex",
+ "Otilde",
+ "Odieresis",
+ "multiply",
+ "Oslash",
+ "Ugrave",
+ "Uacute",
+ "Ucircumflex",
+ "Udieresis",
+ "Yacute",
+ "Thorn",
+ "germandbls",
+ "agrave",
+ "aacute",
+ "acircumflex",
+ "atilde",
+ "adieresis",
+ "aring",
+ "ae",
+ "ccedilla",
+ "egrave",
+ "eacute",
+ "ecircumflex",
+ "edieresis",
+ "igrave",
+ "iacute",
+ "icircumflex",
+ "idieresis",
+ "eth",
+ "ntilde",
+ "ograve",
+ "oacute",
+ "ocircumflex",
+ "otilde",
+ "odieresis",
+ "divide",
+ "oslash",
+ "ugrave",
+ "uacute",
+ "ucircumflex",
+ "udieresis",
+ "yacute",
+ "thorn",
+ "ydieresis",
+};
+
+static const struct { unsigned short u, c; } windows_1252_from_unicode[] = {
+ {0x00a0,160},
+ {0x00a1,161},
+ {0x00a2,162},
+ {0x00a3,163},
+ {0x00a4,164},
+ {0x00a5,165},
+ {0x00a6,166},
+ {0x00a7,167},
+ {0x00a8,168},
+ {0x00a9,169},
+ {0x00aa,170},
+ {0x00ab,171},
+ {0x00ac,172},
+ {0x00ad,173},
+ {0x00ae,174},
+ {0x00af,175},
+ {0x00b0,176},
+ {0x00b1,177},
+ {0x00b2,178},
+ {0x00b3,179},
+ {0x00b4,180},
+ {0x00b5,181},
+ {0x00b6,182},
+ {0x00b7,183},
+ {0x00b8,184},
+ {0x00b9,185},
+ {0x00ba,186},
+ {0x00bb,187},
+ {0x00bc,188},
+ {0x00bd,189},
+ {0x00be,190},
+ {0x00bf,191},
+ {0x00c0,192},
+ {0x00c1,193},
+ {0x00c2,194},
+ {0x00c3,195},
+ {0x00c4,196},
+ {0x00c5,197},
+ {0x00c6,198},
+ {0x00c7,199},
+ {0x00c8,200},
+ {0x00c9,201},
+ {0x00ca,202},
+ {0x00cb,203},
+ {0x00cc,204},
+ {0x00cd,205},
+ {0x00ce,206},
+ {0x00cf,207},
+ {0x00d0,208},
+ {0x00d1,209},
+ {0x00d2,210},
+ {0x00d3,211},
+ {0x00d4,212},
+ {0x00d5,213},
+ {0x00d6,214},
+ {0x00d7,215},
+ {0x00d8,216},
+ {0x00d9,217},
+ {0x00da,218},
+ {0x00db,219},
+ {0x00dc,220},
+ {0x00dd,221},
+ {0x00de,222},
+ {0x00df,223},
+ {0x00e0,224},
+ {0x00e1,225},
+ {0x00e2,226},
+ {0x00e3,227},
+ {0x00e4,228},
+ {0x00e5,229},
+ {0x00e6,230},
+ {0x00e7,231},
+ {0x00e8,232},
+ {0x00e9,233},
+ {0x00ea,234},
+ {0x00eb,235},
+ {0x00ec,236},
+ {0x00ed,237},
+ {0x00ee,238},
+ {0x00ef,239},
+ {0x00f0,240},
+ {0x00f1,241},
+ {0x00f2,242},
+ {0x00f3,243},
+ {0x00f4,244},
+ {0x00f5,245},
+ {0x00f6,246},
+ {0x00f7,247},
+ {0x00f8,248},
+ {0x00f9,249},
+ {0x00fa,250},
+ {0x00fb,251},
+ {0x00fc,252},
+ {0x00fd,253},
+ {0x00fe,254},
+ {0x00ff,255},
+ {0x0152,140},
+ {0x0153,156},
+ {0x0160,138},
+ {0x0161,154},
+ {0x0178,159},
+ {0x017d,142},
+ {0x017e,158},
+ {0x0192,131},
+ {0x02c6,136},
+ {0x02dc,152},
+ {0x2013,150},
+ {0x2014,151},
+ {0x2018,145},
+ {0x2019,146},
+ {0x201a,130},
+ {0x201c,147},
+ {0x201d,148},
+ {0x201e,132},
+ {0x2020,134},
+ {0x2021,135},
+ {0x2022,149},
+ {0x2026,133},
+ {0x2030,137},
+ {0x2039,139},
+ {0x203a,155},
+ {0x20ac,128},
+ {0x2122,153},
+};
diff --git a/source/pdf/pdf-appearance.c b/source/pdf/pdf-appearance.c
index c18d5860..a3067e16 100644
--- a/source/pdf/pdf-appearance.c
+++ b/source/pdf/pdf-appearance.c
@@ -673,7 +673,7 @@ measure_simple_string(fz_context *ctx, fz_font *font, const char *text)
{
int c, g;
text += fz_chartorune(&c, text);
- c = pdf_winansi_from_unicode(c);
+ c = fz_windows_1252_from_unicode(c);
if (c < 0) c = REPLACEMENT;
g = fz_encode_character(ctx, font, c);
w += fz_advance_glyph(ctx, font, g, 0);
@@ -689,7 +689,7 @@ write_simple_string(fz_context *ctx, fz_buffer *buf, const char *a, const char *
{
int c;
a += fz_chartorune(&c, a);
- c = pdf_winansi_from_unicode(c);
+ c = fz_windows_1252_from_unicode(c);
if (c < 0) c = REPLACEMENT;
if (c == '(' || c == ')' || c == '\\')
fz_append_byte(ctx, buf, '\\');
@@ -871,7 +871,7 @@ write_comb_string(fz_context *ctx, fz_buffer *buf, const char *a, const char *b,
int c, g;
a += fz_chartorune(&c, a);
- c = pdf_winansi_from_unicode(c);
+ c = fz_windows_1252_from_unicode(c);
if (c < 0) c = REPLACEMENT;
g = fz_encode_character(ctx, font, c);
diff --git a/source/pdf/pdf-encoding.c b/source/pdf/pdf-encoding.c
index f4fe584c..bf0d3df8 100644
--- a/source/pdf/pdf-encoding.c
+++ b/source/pdf/pdf-encoding.c
@@ -7,6 +7,8 @@
#include <string.h>
#include <stdlib.h>
+#define pdf_win_ansi fz_glyph_name_from_windows_1252
+
void
pdf_load_encoding(const char **estrings, const char *encoding)
{
@@ -85,60 +87,3 @@ pdf_lookup_agl_duplicates(int ucs)
}
return empty_dup_list;
}
-
-int pdf_cyrillic_from_unicode(int u)
-{
- int l = 0;
- int r = nelem(koi8u_from_unicode) - 1;
- if (u < 128)
- return u;
- while (l <= r)
- {
- int m = (l + r) >> 1;
- if (u < koi8u_from_unicode[m].u)
- r = m - 1;
- else if (u > koi8u_from_unicode[m].u)
- l = m + 1;
- else
- return koi8u_from_unicode[m].c;
- }
- return -1;
-}
-
-int pdf_greek_from_unicode(int u)
-{
- int l = 0;
- int r = nelem(iso8859_7_from_unicode) - 1;
- if (u < 128)
- return u;
- while (l <= r)
- {
- int m = (l + r) >> 1;
- if (u < iso8859_7_from_unicode[m].u)
- r = m - 1;
- else if (u > iso8859_7_from_unicode[m].u)
- l = m + 1;
- else
- return iso8859_7_from_unicode[m].c;
- }
- return -1;
-}
-
-int pdf_winansi_from_unicode(int u)
-{
- int l = 0;
- int r = nelem(winansi_from_unicode) - 1;
- if (u < 128)
- return u;
- while (l <= r)
- {
- int m = (l + r) >> 1;
- if (u < winansi_from_unicode[m].u)
- r = m - 1;
- else if (u > winansi_from_unicode[m].u)
- l = m + 1;
- else
- return winansi_from_unicode[m].c;
- }
- return -1;
-}
diff --git a/source/pdf/pdf-encodings.h b/source/pdf/pdf-encodings.h
index f9e84c49..efdfe1f7 100644
--- a/source/pdf/pdf-encodings.h
+++ b/source/pdf/pdf-encodings.h
@@ -166,206 +166,3 @@ const char *pdf_mac_expert[256] = {
"periodsuperior", "Dotaccentsmall", "Ringsmall", _notdef, _notdef,
_notdef, _notdef
};
-
-const char *pdf_win_ansi[256] = {
- _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef,
- _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef,
- _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef,
- _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef,
- "space", "exclam", "quotedbl", "numbersign", "dollar", "percent",
- "ampersand", "quotesingle", "parenleft", "parenright", "asterisk",
- "plus", "comma", "hyphen", "period", "slash", "zero", "one", "two",
- "three", "four", "five", "six", "seven", "eight", "nine", "colon",
- "semicolon", "less", "equal", "greater", "question", "at", "A", "B",
- "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P",
- "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "bracketleft",
- "backslash", "bracketright", "asciicircum", "underscore", "grave", "a",
- "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o",
- "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "braceleft",
- "bar", "braceright", "asciitilde", "bullet", "Euro", "bullet",
- "quotesinglbase", "florin", "quotedblbase", "ellipsis", "dagger",
- "daggerdbl", "circumflex", "perthousand", "Scaron", "guilsinglleft",
- "OE", "bullet", "Zcaron", "bullet", "bullet", "quoteleft",
- "quoteright", "quotedblleft", "quotedblright", "bullet", "endash",
- "emdash", "tilde", "trademark", "scaron", "guilsinglright", "oe",
- "bullet", "zcaron", "Ydieresis", "space", "exclamdown", "cent",
- "sterling", "currency", "yen", "brokenbar", "section", "dieresis",
- "copyright", "ordfeminine", "guillemotleft", "logicalnot", "hyphen",
- "registered", "macron", "degree", "plusminus", "twosuperior",
- "threesuperior", "acute", "mu", "paragraph", "periodcentered",
- "cedilla", "onesuperior", "ordmasculine", "guillemotright",
- "onequarter", "onehalf", "threequarters", "questiondown", "Agrave",
- "Aacute", "Acircumflex", "Atilde", "Adieresis", "Aring", "AE",
- "Ccedilla", "Egrave", "Eacute", "Ecircumflex", "Edieresis", "Igrave",
- "Iacute", "Icircumflex", "Idieresis", "Eth", "Ntilde", "Ograve",
- "Oacute", "Ocircumflex", "Otilde", "Odieresis", "multiply", "Oslash",
- "Ugrave", "Uacute", "Ucircumflex", "Udieresis", "Yacute", "Thorn",
- "germandbls", "agrave", "aacute", "acircumflex", "atilde", "adieresis",
- "aring", "ae", "ccedilla", "egrave", "eacute", "ecircumflex",
- "edieresis", "igrave", "iacute", "icircumflex", "idieresis", "eth",
- "ntilde", "ograve", "oacute", "ocircumflex", "otilde", "odieresis",
- "divide", "oslash", "ugrave", "uacute", "ucircumflex", "udieresis",
- "yacute", "thorn", "ydieresis"
-};
-
-const char *pdf_glyph_name_from_koi8u[256] = {
- _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef,
- _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef,
- _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef,
- _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef,
- "space", "exclam", "quotedbl", "numbersign", "dollar", "percent",
- "ampersand", "quotesingle", "parenleft", "parenright", "asterisk",
- "plus", "comma", "hyphen", "period", "slash", "zero", "one", "two",
- "three", "four", "five", "six", "seven", "eight", "nine", "colon",
- "semicolon", "less", "equal", "greater", "question", "at", "A", "B",
- "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P",
- "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "bracketleft",
- "backslash", "bracketright", "asciicircum", "underscore", "grave", "a",
- "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o",
- "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "braceleft",
- "bar", "braceright", "asciitilde", _notdef,
- _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef,
- _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef,
- _notdef, _notdef, _notdef, "integraltp", _notdef, "bulletoperator",
- "radical", "approxequal", "lessequal", "greaterequal",
- "nonbreakingspace", "integralbt", "degree", "twosuperior",
- "periodcentered", "divide", _notdef, _notdef, _notdef, "iocyrillic",
- "ecyrillic", _notdef, "icyrillic", "yicyrillic", _notdef, _notdef,
- _notdef, _notdef, _notdef, "gheupturncyrillic", _notdef, _notdef,
- _notdef, _notdef, _notdef, "afii10023", "afii10053", _notdef,
- "afii10055", "afii10056", _notdef, _notdef, _notdef, _notdef, _notdef,
- "afii10050", _notdef, "copyright", "iucyrillic", "afii10065",
- "becyrillic", "tsecyrillic", "decyrillic", "iecyrillic", "efcyrillic",
- "gecyrillic", "khacyrillic", "iicyrillic", "iishortcyrillic",
- "kacyrillic", "elcyrillic", "emcyrillic", "encyrillic", "ocyrillic",
- "pecyrillic", "iacyrillic", "ercyrillic", "escyrillic", "tecyrillic",
- "ucyrillic", "zhecyrillic", "vecyrillic", "softsigncyrillic",
- "yericyrillic", "zecyrillic", "shacyrillic", "ereversedcyrillic",
- "shchacyrillic", "checyrillic", "hardsigncyrillic", "afii10048",
- "afii10017", "afii10018", "afii10040", "afii10021", "afii10022",
- "afii10038", "afii10020", "afii10039", "afii10026", "afii10027",
- "afii10028", "afii10029", "afii10030", "afii10031", "afii10032",
- "afii10033", "afii10049", "afii10034", "afii10035", "afii10036",
- "afii10037", "afii10024", "afii10019", "afii10046", "afii10045",
- "afii10025", "afii10042", "afii10047", "afii10043", "afii10041",
- "afii10044",
-};
-
-static const struct { unsigned short u, c; } koi8u_from_unicode[] = {
- {0x00a0,154}, {0x00a9,191}, {0x00b0,156}, {0x00b2,157}, {0x00b7,158},
- {0x00f7,159}, {0x0401,179}, {0x0404,180}, {0x0406,182}, {0x0407,183},
- {0x0410,225}, {0x0411,226}, {0x0412,247}, {0x0413,231}, {0x0414,228},
- {0x0415,229}, {0x0416,246}, {0x0417,250}, {0x0418,233}, {0x0419,234},
- {0x041a,235}, {0x041b,236}, {0x041c,237}, {0x041d,238}, {0x041e,239},
- {0x041f,240}, {0x0420,242}, {0x0421,243}, {0x0422,244}, {0x0423,245},
- {0x0424,230}, {0x0425,232}, {0x0426,227}, {0x0427,254}, {0x0428,251},
- {0x0429,253}, {0x042a,255}, {0x042b,249}, {0x042c,248}, {0x042d,252},
- {0x042e,224}, {0x042f,241}, {0x0430,193}, {0x0431,194}, {0x0432,215},
- {0x0433,199}, {0x0434,196}, {0x0435,197}, {0x0436,214}, {0x0437,218},
- {0x0438,201}, {0x0439,202}, {0x043a,203}, {0x043b,204}, {0x043c,205},
- {0x043d,206}, {0x043e,207}, {0x043f,208}, {0x0440,210}, {0x0441,211},
- {0x0442,212}, {0x0443,213}, {0x0444,198}, {0x0445,200}, {0x0446,195},
- {0x0447,222}, {0x0448,219}, {0x0449,221}, {0x044a,223}, {0x044b,217},
- {0x044c,216}, {0x044d,220}, {0x044e,192}, {0x044f,209}, {0x0451,163},
- {0x0454,164}, {0x0456,166}, {0x0457,167}, {0x0490,189}, {0x0491,173},
- {0x2219,149}, {0x221a,150}, {0x2248,151}, {0x2264,152}, {0x2265,153},
- {0x2320,147}, {0x2321,155}, {0x2500,128}, {0x2502,129}, {0x250c,130},
- {0x2510,131}, {0x2514,132}, {0x2518,133}, {0x251c,134}, {0x2524,135},
- {0x252c,136}, {0x2534,137}, {0x253c,138}, {0x2550,160}, {0x2551,161},
- {0x2552,162}, {0x2554,165}, {0x2557,168}, {0x2558,169}, {0x2559,170},
- {0x255a,171}, {0x255b,172}, {0x255d,174}, {0x255e,175}, {0x255f,176},
- {0x2560,177}, {0x2561,178}, {0x2563,181}, {0x2566,184}, {0x2567,185},
- {0x2568,186}, {0x2569,187}, {0x256a,188}, {0x256c,190}, {0x2580,139},
- {0x2584,140}, {0x2588,141}, {0x258c,142}, {0x2590,143}, {0x2591,144},
- {0x2592,145}, {0x2593,146}, {0x25a0,148}
-};
-
-const char *pdf_glyph_name_from_iso8859_7[256] = {
- _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef,
- _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef,
- _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef,
- _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef,
- "space", "exclam", "quotedbl", "numbersign", "dollar", "percent",
- "ampersand", "quotesingle", "parenleft", "parenright", "asterisk",
- "plus", "comma", "hyphen", "period", "slash", "zero", "one", "two",
- "three", "four", "five", "six", "seven", "eight", "nine", "colon",
- "semicolon", "less", "equal", "greater", "question", "at", "A", "B",
- "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P",
- "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "bracketleft",
- "backslash", "bracketright", "asciicircum", "underscore", "grave", "a",
- "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o",
- "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "braceleft",
- "bar", "braceright", "asciitilde", _notdef,
- /* the block drawing characters have been omitted */
- _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef,
- _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef,
- _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef,
- _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef, _notdef,
- "nonbreakingspace", "quoteleft", "quoteright", "sterling", "euro",
- _notdef, "brokenbar", "section", "dieresis", "copyright",
- "ypogegrammeni", "guillemotleft", "logicalnot", "softhyphen", _notdef,
- "horizontalbar", "degree", "plusminus", "twosuperior", "threesuperior",
- "tonos", "dieresistonos", "Alphatonos", "periodcentered",
- "Epsilontonos", "Etatonos", "Iotatonos", "guillemotright",
- "Omicrontonos", "onehalf", "Upsilontonos", "Omegatonos",
- "iotadieresistonos", "Alpha", "Beta", "Gamma", "Deltagreek", "Epsilon",
- "Zeta", "Eta", "Theta", "Iota", "Kappa", "Lambda", "Mu", "Nu", "Xi",
- "Omicron", "Pi", "Rho", _notdef, "Sigma", "Tau", "Upsilon", "Phi",
- "Chi", "Psi", "Omegagreek", "Iotadieresis", "Upsilondieresis",
- "alphatonos", "epsilontonos", "etatonos", "iotatonos",
- "upsilondieresistonos", "alpha", "beta", "gamma", "delta", "epsilon",
- "zeta", "eta", "theta", "iota", "kappa", "lambda", "mugreek", "nu",
- "xi", "omicron", "pi", "rho", "sigmafinal", "sigma", "tau", "upsilon",
- "phi", "chi", "psi", "omega", "iotadieresis", "upsilondieresis",
- "omicrontonos", "upsilontonos", "omegatonos", _notdef,
-};
-
-static const struct { unsigned short u, c; } iso8859_7_from_unicode[] = {
- {0x00a0,160}, {0x00a3,163}, {0x00a6,166}, {0x00a7,167}, {0x00a8,168},
- {0x00a9,169}, {0x00ab,171}, {0x00ac,172}, {0x00ad,173}, {0x00b0,176},
- {0x00b1,177}, {0x00b2,178}, {0x00b3,179}, {0x00b7,183}, {0x00bb,187},
- {0x00bd,189}, {0x037a,170}, {0x0384,180}, {0x0385,181}, {0x0386,182},
- {0x0388,184}, {0x0389,185}, {0x038a,186}, {0x038c,188}, {0x038e,190},
- {0x038f,191}, {0x0390,192}, {0x0391,193}, {0x0392,194}, {0x0393,195},
- {0x0394,196}, {0x0395,197}, {0x0396,198}, {0x0397,199}, {0x0398,200},
- {0x0399,201}, {0x039a,202}, {0x039b,203}, {0x039c,204}, {0x039d,205},
- {0x039e,206}, {0x039f,207}, {0x03a0,208}, {0x03a1,209}, {0x03a3,211},
- {0x03a4,212}, {0x03a5,213}, {0x03a6,214}, {0x03a7,215}, {0x03a8,216},
- {0x03a9,217}, {0x03aa,218}, {0x03ab,219}, {0x03ac,220}, {0x03ad,221},
- {0x03ae,222}, {0x03af,223}, {0x03b0,224}, {0x03b1,225}, {0x03b2,226},
- {0x03b3,227}, {0x03b4,228}, {0x03b5,229}, {0x03b6,230}, {0x03b7,231},
- {0x03b8,232}, {0x03b9,233}, {0x03ba,234}, {0x03bb,235}, {0x03bc,236},
- {0x03bd,237}, {0x03be,238}, {0x03bf,239}, {0x03c0,240}, {0x03c1,241},
- {0x03c2,242}, {0x03c3,243}, {0x03c4,244}, {0x03c5,245}, {0x03c6,246},
- {0x03c7,247}, {0x03c8,248}, {0x03c9,249}, {0x03ca,250}, {0x03cb,251},
- {0x03cc,252}, {0x03cd,253}, {0x03ce,254}, {0x2015,175}, {0x2018,161},
- {0x2019,162}, {0x20ac,164},
-};
-
-static const struct { unsigned short u, c; } winansi_from_unicode[] = {
- {0x00a0,160}, {0x00a1,161}, {0x00a2,162}, {0x00a3,163}, {0x00a4,164},
- {0x00a5,165}, {0x00a6,166}, {0x00a7,167}, {0x00a8,168}, {0x00a9,169},
- {0x00aa,170}, {0x00ab,171}, {0x00ac,172}, {0x00ad,173}, {0x00ae,174},
- {0x00af,175}, {0x00b0,176}, {0x00b1,177}, {0x00b2,178}, {0x00b3,179},
- {0x00b4,180}, {0x00b5,181}, {0x00b6,182}, {0x00b7,183}, {0x00b8,184},
- {0x00b9,185}, {0x00ba,186}, {0x00bb,187}, {0x00bc,188}, {0x00bd,189},
- {0x00be,190}, {0x00bf,191}, {0x00c0,192}, {0x00c1,193}, {0x00c2,194},
- {0x00c3,195}, {0x00c4,196}, {0x00c5,197}, {0x00c6,198}, {0x00c7,199},
- {0x00c8,200}, {0x00c9,201}, {0x00ca,202}, {0x00cb,203}, {0x00cc,204},
- {0x00cd,205}, {0x00ce,206}, {0x00cf,207}, {0x00d0,208}, {0x00d1,209},
- {0x00d2,210}, {0x00d3,211}, {0x00d4,212}, {0x00d5,213}, {0x00d6,214},
- {0x00d7,215}, {0x00d8,216}, {0x00d9,217}, {0x00da,218}, {0x00db,219},
- {0x00dc,220}, {0x00dd,221}, {0x00de,222}, {0x00df,223}, {0x00e0,224},
- {0x00e1,225}, {0x00e2,226}, {0x00e3,227}, {0x00e4,228}, {0x00e5,229},
- {0x00e6,230}, {0x00e7,231}, {0x00e8,232}, {0x00e9,233}, {0x00ea,234},
- {0x00eb,235}, {0x00ec,236}, {0x00ed,237}, {0x00ee,238}, {0x00ef,239},
- {0x00f0,240}, {0x00f1,241}, {0x00f2,242}, {0x00f3,243}, {0x00f4,244},
- {0x00f5,245}, {0x00f6,246}, {0x00f7,247}, {0x00f8,248}, {0x00f9,249},
- {0x00fa,250}, {0x00fb,251}, {0x00fc,252}, {0x00fd,253}, {0x00fe,254},
- {0x00ff,255}, {0x0152,140}, {0x0153,156}, {0x0160,138}, {0x0161,154},
- {0x0178,159}, {0x017d,142}, {0x017e,158}, {0x0192,131}, {0x02c6,136},
- {0x02dc,152}, {0x2013,150}, {0x2014,151}, {0x2018,145}, {0x2019,146},
- {0x201a,130}, {0x201c,147}, {0x201d,148}, {0x201e,132}, {0x2020,134},
- {0x2021,135}, {0x2022,149}, {0x2026,133}, {0x2030,137}, {0x2039,139},
- {0x203a,155}, {0x20ac,128}, {0x2122,153},
-};
diff --git a/source/pdf/pdf-font.c b/source/pdf/pdf-font.c
index a0df70fd..391a1024 100644
--- a/source/pdf/pdf-font.c
+++ b/source/pdf/pdf-font.c
@@ -20,6 +20,8 @@
#define FT_SFNT_HEAD ft_sfnt_head
#endif
+#define pdf_win_ansi fz_glyph_name_from_windows_1252
+
static void pdf_load_font_descriptor(fz_context *ctx, pdf_document *doc, pdf_font_desc *fontdesc, pdf_obj *dict,
const char *collection, const char *basefont, int iscidfont);
@@ -2037,10 +2039,10 @@ pdf_add_simple_font_encoding(fz_context *ctx, pdf_document *doc, pdf_obj *fobj,
pdf_dict_put(ctx, fobj, PDF_NAME(Encoding), PDF_NAME(WinAnsiEncoding));
break;
case PDF_SIMPLE_ENCODING_GREEK:
- pdf_add_simple_font_encoding_imp(ctx, doc, fobj, pdf_glyph_name_from_iso8859_7);
+ pdf_add_simple_font_encoding_imp(ctx, doc, fobj, fz_glyph_name_from_iso8859_7);
break;
case PDF_SIMPLE_ENCODING_CYRILLIC:
- pdf_add_simple_font_encoding_imp(ctx, doc, fobj, pdf_glyph_name_from_koi8u);
+ pdf_add_simple_font_encoding_imp(ctx, doc, fobj, fz_glyph_name_from_koi8u);
break;
}
}
@@ -2061,9 +2063,9 @@ pdf_add_simple_font(fz_context *ctx, pdf_document *doc, fz_font *font, int encod
switch (encoding)
{
default: enc = pdf_win_ansi; break;
- case PDF_SIMPLE_ENCODING_LATIN: enc = pdf_win_ansi; break;
- case PDF_SIMPLE_ENCODING_GREEK: enc = pdf_glyph_name_from_iso8859_7; break;
- case PDF_SIMPLE_ENCODING_CYRILLIC: enc = pdf_glyph_name_from_koi8u; break;
+ case PDF_SIMPLE_ENCODING_LATIN: enc = fz_glyph_name_from_windows_1252; break;
+ case PDF_SIMPLE_ENCODING_GREEK: enc = fz_glyph_name_from_iso8859_7; break;
+ case PDF_SIMPLE_ENCODING_CYRILLIC: enc = fz_glyph_name_from_koi8u; break;
}
fobj = pdf_add_new_dict(ctx, doc, 10);