diff options
author | Tor Andersson <tor.andersson@artifex.com> | 2018-03-20 13:03:21 +0100 |
---|---|---|
committer | Tor Andersson <tor.andersson@artifex.com> | 2018-03-22 14:58:01 +0100 |
commit | dd0badd75b48f50e7bd9f15c0ebafcdb48e16e66 (patch) | |
tree | cf749bb9c776323a9bd7c0b71264605661063187 /scripts/makeencoding.py | |
parent | 81c77a6b6464f5af66f851837f85ed6f9724f19e (diff) | |
download | mupdf-dd0badd75b48f50e7bd9f15c0ebafcdb48e16e66.tar.xz |
Check in script used to create encoding tables.
Diffstat (limited to 'scripts/makeencoding.py')
-rw-r--r-- | scripts/makeencoding.py | 49 |
1 files changed, 49 insertions, 0 deletions
diff --git a/scripts/makeencoding.py b/scripts/makeencoding.py new file mode 100644 index 00000000..054be9c0 --- /dev/null +++ b/scripts/makeencoding.py @@ -0,0 +1,49 @@ +# Convert unicode mapping table to C arrays mapping glyph names and unicode values. +# +# ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MISC/KOI8-U.TXT +# ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-7.TXT +# + +glyphs = {} +for line in open("scripts/glyphlist.txt").readlines(): + if line[0] != '#': + n, u = line.rstrip().split(';') + if len(u) == 4: + u = int(u, base=16) + glyphs[u] = n + +def load_table(fn): + table = [0] * 256 + for line in open(fn).readlines(): + if line[0] != '#': + line = line.split() + c = int(line[0][2:], base=16) + u = int(line[1][2:], base=16) + table[c] = u + return table + +def dump_table(name, table): + print "const char *pdf_glyph_name_from_%s[%d] = {" % (name, len(table)) + for u in table: + if u in glyphs: + print '"%s",' % glyphs[u] + else: + print '_notdef,' + print "};" + print + print "static const struct { unsigned short u, c; } %s_from_unicode[] = {" % name + rev = [] + i = 0 + for u in table: + if u in glyphs: + if u >= 128: + rev += ['{0x%04x,%d},' % (u, i)] + i = i + 1 + rev.sort() + for s in rev: + print s + print "};" + print + +dump_table("koi8u", load_table("scripts/KOI8-U.TXT")) +dump_table("iso8859_7", load_table("scripts/8859-7.TXT")) |