summaryrefslogtreecommitdiff
path: root/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode
diff options
context:
space:
mode:
Diffstat (limited to 'AppPkg/Applications/Python/Python-2.7.2/Tools/unicode')
-rw-r--r--AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/Makefile84
-rw-r--r--AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/comparecodecs.py53
-rw-r--r--AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/gencjkcodecs.py68
-rw-r--r--AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/gencodec.py423
-rw-r--r--AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/genwincodec.py61
-rw-r--r--AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/genwincodecs.bat7
-rw-r--r--AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/listcodecs.py41
-rw-r--r--AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/makeunicodedata.py1135
-rw-r--r--AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/mkstringprep.py425
-rw-r--r--AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/python-mappings/CP1140.TXT291
-rw-r--r--AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/python-mappings/KOI8-U.TXT298
-rw-r--r--AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/python-mappings/TIS-620.TXT284
12 files changed, 3170 insertions, 0 deletions
diff --git a/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/Makefile b/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/Makefile
new file mode 100644
index 0000000000..2fc88b9c5e
--- /dev/null
+++ b/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/Makefile
@@ -0,0 +1,84 @@
+#
+# Recreate the Python charmap codecs from the Unicode mapping
+# files available at ftp://ftp.unicode.org/
+#
+#(c) Copyright Marc-Andre Lemburg, 2005.
+# Licensed to PSF under a Contributor Agreement.
+
+# Python binary to use
+PYTHON = python
+
+# Remove tool to use
+RM = /bin/rm
+
+### Generic targets
+
+all: distclean mappings codecs
+
+codecs: misc windows iso apple ebcdic custom-mappings cjk
+
+### Mappings
+
+mappings:
+ ncftpget -R ftp.unicode.org . Public/MAPPINGS
+
+### Codecs
+
+build/:
+ mkdir build
+
+misc: build/
+ $(PYTHON) gencodec.py MAPPINGS/VENDORS/MISC/ build/
+ $(RM) build/atarist.*
+ $(RM) build/us_ascii_quotes.*
+ $(RM) build/ibmgraph.*
+ $(RM) build/sgml.*
+ $(RM) -f build/readme.*
+
+custom-mappings: build/
+ $(PYTHON) gencodec.py python-mappings/ build/
+
+windows: build/
+ $(PYTHON) gencodec.py MAPPINGS/VENDORS/MICSFT/WINDOWS/ build/
+ $(RM) build/cp9*
+ $(RM) -f build/readme.*
+
+iso: build/
+ $(PYTHON) gencodec.py MAPPINGS/ISO8859/ build/ iso
+ $(RM) -f build/isoreadme.*
+
+apple: build/
+ $(PYTHON) gencodec.py MAPPINGS/VENDORS/APPLE/ build/ mac_
+ $(RM) build/mac_dingbats.*
+ $(RM) build/mac_japanese.*
+ $(RM) build/mac_chin*
+ $(RM) build/mac_korean.*
+ $(RM) build/mac_symbol.*
+ $(RM) build/mac_corpchar.*
+ $(RM) build/mac_devanaga.*
+ $(RM) build/mac_gaelic.*
+ $(RM) build/mac_gurmukhi.*
+ $(RM) build/mac_hebrew.*
+ $(RM) build/mac_inuit.*
+ $(RM) build/mac_thai.*
+ $(RM) build/mac_ukraine.*
+ $(RM) build/mac_arabic.py
+ $(RM) build/mac_celtic.*
+ $(RM) build/mac_gujarati.*
+ $(RM) build/mac_keyboard.*
+ $(RM) -f build/mac_readme.*
+
+ebcdic: build/
+ $(PYTHON) gencodec.py MAPPINGS/VENDORS/MICSFT/EBCDIC/ build/
+ $(RM) -f build/readme.*
+
+cjk: build/
+ $(PYTHON) gencjkcodecs.py build/
+
+### Cleanup
+
+clean:
+ $(RM) -f build/*
+
+distclean: clean
+ $(RM) -rf MAPPINGS/
diff --git a/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/comparecodecs.py b/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/comparecodecs.py
new file mode 100644
index 0000000000..a6f3102f1d
--- /dev/null
+++ b/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/comparecodecs.py
@@ -0,0 +1,53 @@
+#!/usr/bin/env python
+
+""" Compare the output of two codecs.
+
+(c) Copyright 2005, Marc-Andre Lemburg (mal@lemburg.com).
+
+ Licensed to PSF under a Contributor Agreement.
+
+"""
+import sys
+
+def compare_codecs(encoding1, encoding2):
+
+ print 'Comparing encoding/decoding of %r and %r' % (encoding1, encoding2)
+ mismatch = 0
+ # Check encoding
+ for i in range(sys.maxunicode):
+ u = unichr(i)
+ try:
+ c1 = u.encode(encoding1)
+ except UnicodeError, reason:
+ c1 = '<undefined>'
+ try:
+ c2 = u.encode(encoding2)
+ except UnicodeError, reason:
+ c2 = '<undefined>'
+ if c1 != c2:
+ print ' * encoding mismatch for 0x%04X: %-14r != %r' % \
+ (i, c1, c2)
+ mismatch += 1
+ # Check decoding
+ for i in range(256):
+ c = chr(i)
+ try:
+ u1 = c.decode(encoding1)
+ except UnicodeError:
+ u1 = u'<undefined>'
+ try:
+ u2 = c.decode(encoding2)
+ except UnicodeError:
+ u2 = u'<undefined>'
+ if u1 != u2:
+ print ' * decoding mismatch for 0x%04X: %-14r != %r' % \
+ (i, u1, u2)
+ mismatch += 1
+ if mismatch:
+ print
+ print 'Found %i mismatches' % mismatch
+ else:
+ print '-> Codecs are identical.'
+
+if __name__ == '__main__':
+ compare_codecs(sys.argv[1], sys.argv[2])
diff --git a/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/gencjkcodecs.py b/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/gencjkcodecs.py
new file mode 100644
index 0000000000..5ac97d4d9d
--- /dev/null
+++ b/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/gencjkcodecs.py
@@ -0,0 +1,68 @@
+import os, string
+
+codecs = {
+ 'cn': ('gb2312', 'gbk', 'gb18030', 'hz'),
+ 'tw': ('big5', 'cp950'),
+ 'hk': ('big5hkscs',),
+ 'jp': ('cp932', 'shift_jis', 'euc_jp', 'euc_jisx0213', 'shift_jisx0213',
+ 'euc_jis_2004', 'shift_jis_2004'),
+ 'kr': ('cp949', 'euc_kr', 'johab'),
+ 'iso2022': ('iso2022_jp', 'iso2022_jp_1', 'iso2022_jp_2',
+ 'iso2022_jp_2004', 'iso2022_jp_3', 'iso2022_jp_ext',
+ 'iso2022_kr'),
+}
+
+TEMPLATE = string.Template("""\
+#
+# $encoding.py: Python Unicode Codec for $ENCODING
+#
+# Written by Hye-Shik Chang <perky@FreeBSD.org>
+#
+
+import _codecs_$owner, codecs
+import _multibytecodec as mbc
+
+codec = _codecs_$owner.getcodec('$encoding')
+
+class Codec(codecs.Codec):
+ encode = codec.encode
+ decode = codec.decode
+
+class IncrementalEncoder(mbc.MultibyteIncrementalEncoder,
+ codecs.IncrementalEncoder):
+ codec = codec
+
+class IncrementalDecoder(mbc.MultibyteIncrementalDecoder,
+ codecs.IncrementalDecoder):
+ codec = codec
+
+class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader):
+ codec = codec
+
+class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter):
+ codec = codec
+
+def getregentry():
+ return codecs.CodecInfo(
+ name='$encoding',
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+""")
+
+def gencodecs(prefix):
+ for loc, encodings in codecs.iteritems():
+ for enc in encodings:
+ code = TEMPLATE.substitute(ENCODING=enc.upper(),
+ encoding=enc.lower(),
+ owner=loc)
+ codecpath = os.path.join(prefix, enc + '.py')
+ open(codecpath, 'w').write(code)
+
+if __name__ == '__main__':
+ import sys
+ gencodecs(sys.argv[1])
diff --git a/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/gencodec.py b/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/gencodec.py
new file mode 100644
index 0000000000..2626279d1e
--- /dev/null
+++ b/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/gencodec.py
@@ -0,0 +1,423 @@
+""" Unicode Mapping Parser and Codec Generator.
+
+This script parses Unicode mapping files as available from the Unicode
+site (ftp://ftp.unicode.org/Public/MAPPINGS/) and creates Python codec
+modules from them. The codecs use the standard character mapping codec
+to actually apply the mapping.
+
+Synopsis: gencodec.py dir codec_prefix
+
+All files in dir are scanned and those producing non-empty mappings
+will be written to <codec_prefix><mapname>.py with <mapname> being the
+first part of the map's filename ('a' in a.b.c.txt) converted to
+lowercase with hyphens replaced by underscores.
+
+The tool also writes marshalled versions of the mapping tables to the
+same location (with .mapping extension).
+
+Written by Marc-Andre Lemburg (mal@lemburg.com).
+
+(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+(c) Copyright Guido van Rossum, 2000.
+
+Table generation:
+(c) Copyright Marc-Andre Lemburg, 2005.
+ Licensed to PSF under a Contributor Agreement.
+
+"""#"
+
+import re, os, marshal, codecs
+
+# Maximum allowed size of charmap tables
+MAX_TABLE_SIZE = 8192
+
+# Standard undefined Unicode code point
+UNI_UNDEFINED = unichr(0xFFFE)
+
+mapRE = re.compile('((?:0x[0-9a-fA-F]+\+?)+)'
+ '\s+'
+ '((?:(?:0x[0-9a-fA-Z]+|<[A-Za-z]+>)\+?)*)'
+ '\s*'
+ '(#.+)?')
+
+def parsecodes(codes, len=len, range=range):
+
+ """ Converts code combinations to either a single code integer
+ or a tuple of integers.
+
+ meta-codes (in angular brackets, e.g. <LR> and <RL>) are
+ ignored.
+
+ Empty codes or illegal ones are returned as None.
+
+ """
+ if not codes:
+ return None
+ l = codes.split('+')
+ if len(l) == 1:
+ return int(l[0],16)
+ for i in range(len(l)):
+ try:
+ l[i] = int(l[i],16)
+ except ValueError:
+ l[i] = None
+ l = [x for x in l if x is not None]
+ if len(l) == 1:
+ return l[0]
+ else:
+ return tuple(l)
+
+def readmap(filename):
+
+ f = open(filename,'r')
+ lines = f.readlines()
+ f.close()
+ enc2uni = {}
+ identity = []
+ unmapped = range(256)
+
+ # UTC mapping tables per convention don't include the identity
+ # mappings for code points 0x00 - 0x1F and 0x7F, unless these are
+ # explicitly mapped to different characters or undefined
+ for i in range(32) + [127]:
+ identity.append(i)
+ unmapped.remove(i)
+ enc2uni[i] = (i, 'CONTROL CHARACTER')
+
+ for line in lines:
+ line = line.strip()
+ if not line or line[0] == '#':
+ continue
+ m = mapRE.match(line)
+ if not m:
+ #print '* not matched: %s' % repr(line)
+ continue
+ enc,uni,comment = m.groups()
+ enc = parsecodes(enc)
+ uni = parsecodes(uni)
+ if comment is None:
+ comment = ''
+ else:
+ comment = comment[1:].strip()
+ if enc < 256:
+ if enc in unmapped:
+ unmapped.remove(enc)
+ if enc == uni:
+ identity.append(enc)
+ enc2uni[enc] = (uni,comment)
+ else:
+ enc2uni[enc] = (uni,comment)
+
+ # If there are more identity-mapped entries than unmapped entries,
+ # it pays to generate an identity dictionary first, and add explicit
+ # mappings to None for the rest
+ if len(identity) >= len(unmapped):
+ for enc in unmapped:
+ enc2uni[enc] = (None, "")
+ enc2uni['IDENTITY'] = 256
+
+ return enc2uni
+
+def hexrepr(t, precision=4):
+
+ if t is None:
+ return 'None'
+ try:
+ len(t)
+ except:
+ return '0x%0*X' % (precision, t)
+ try:
+ return '(' + ', '.join(['0x%0*X' % (precision, item)
+ for item in t]) + ')'
+ except TypeError, why:
+ print '* failed to convert %r: %s' % (t, why)
+ raise
+
+def python_mapdef_code(varname, map, comments=1, precisions=(2, 4)):
+
+ l = []
+ append = l.append
+ if "IDENTITY" in map:
+ append("%s = codecs.make_identity_dict(range(%d))" %
+ (varname, map["IDENTITY"]))
+ append("%s.update({" % varname)
+ splits = 1
+ del map["IDENTITY"]
+ identity = 1
+ else:
+ append("%s = {" % varname)
+ splits = 0
+ identity = 0
+
+ mappings = sorted(map.items())
+ i = 0
+ key_precision, value_precision = precisions
+ for mapkey, mapvalue in mappings:
+ mapcomment = ''
+ if isinstance(mapkey, tuple):
+ (mapkey, mapcomment) = mapkey
+ if isinstance(mapvalue, tuple):
+ (mapvalue, mapcomment) = mapvalue
+ if mapkey is None:
+ continue
+ if (identity and
+ mapkey == mapvalue and
+ mapkey < 256):
+ # No need to include identity mappings, since these
+ # are already set for the first 256 code points.
+ continue
+ key = hexrepr(mapkey, key_precision)
+ value = hexrepr(mapvalue, value_precision)
+ if mapcomment and comments:
+ append(' %s: %s,\t# %s' % (key, value, mapcomment))
+ else:
+ append(' %s: %s,' % (key, value))
+ i += 1
+ if i == 4096:
+ # Split the definition into parts to that the Python
+ # parser doesn't dump core
+ if splits == 0:
+ append('}')
+ else:
+ append('})')
+ append('%s.update({' % varname)
+ i = 0
+ splits = splits + 1
+ if splits == 0:
+ append('}')
+ else:
+ append('})')
+
+ return l
+
+def python_tabledef_code(varname, map, comments=1, key_precision=2):
+
+ l = []
+ append = l.append
+ append('%s = (' % varname)
+
+ # Analyze map and create table dict
+ mappings = sorted(map.items())
+ table = {}
+ maxkey = 0
+ if 'IDENTITY' in map:
+ for key in range(256):
+ table[key] = (key, '')
+ maxkey = 255
+ del map['IDENTITY']
+ for mapkey, mapvalue in mappings:
+ mapcomment = ''
+ if isinstance(mapkey, tuple):
+ (mapkey, mapcomment) = mapkey
+ if isinstance(mapvalue, tuple):
+ (mapvalue, mapcomment) = mapvalue
+ if mapkey is None:
+ continue
+ table[mapkey] = (mapvalue, mapcomment)
+ if mapkey > maxkey:
+ maxkey = mapkey
+ if maxkey > MAX_TABLE_SIZE:
+ # Table too large
+ return None
+
+ # Create table code
+ for key in range(maxkey + 1):
+ if key not in table:
+ mapvalue = None
+ mapcomment = 'UNDEFINED'
+ else:
+ mapvalue, mapcomment = table[key]
+ if mapvalue is None:
+ mapchar = UNI_UNDEFINED
+ else:
+ if isinstance(mapvalue, tuple):
+ # 1-n mappings not supported
+ return None
+ else:
+ mapchar = unichr(mapvalue)
+ if mapcomment and comments:
+ append(' %r\t# %s -> %s' % (mapchar,
+ hexrepr(key, key_precision),
+ mapcomment))
+ else:
+ append(' %r' % mapchar)
+
+ append(')')
+ return l
+
+def codegen(name, map, encodingname, comments=1):
+
+ """ Returns Python source for the given map.
+
+ Comments are included in the source, if comments is true (default).
+
+ """
+ # Generate code
+ decoding_map_code = python_mapdef_code(
+ 'decoding_map',
+ map,
+ comments=comments)
+ decoding_table_code = python_tabledef_code(
+ 'decoding_table',
+ map,
+ comments=comments)
+ encoding_map_code = python_mapdef_code(
+ 'encoding_map',
+ codecs.make_encoding_map(map),
+ comments=comments,
+ precisions=(4, 2))
+
+ if decoding_table_code:
+ suffix = 'table'
+ else:
+ suffix = 'map'
+
+ l = [
+ '''\
+""" Python Character Mapping Codec %s generated from '%s' with gencodec.py.
+
+"""#"
+
+import codecs
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+ def encode(self,input,errors='strict'):
+ return codecs.charmap_encode(input,errors,encoding_%s)
+
+ def decode(self,input,errors='strict'):
+ return codecs.charmap_decode(input,errors,decoding_%s)
+''' % (encodingname, name, suffix, suffix)]
+ l.append('''\
+class IncrementalEncoder(codecs.IncrementalEncoder):
+ def encode(self, input, final=False):
+ return codecs.charmap_encode(input,self.errors,encoding_%s)[0]
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+ def decode(self, input, final=False):
+ return codecs.charmap_decode(input,self.errors,decoding_%s)[0]''' %
+ (suffix, suffix))
+
+ l.append('''
+class StreamWriter(Codec,codecs.StreamWriter):
+ pass
+
+class StreamReader(Codec,codecs.StreamReader):
+ pass
+
+### encodings module API
+
+def getregentry():
+ return codecs.CodecInfo(
+ name=%r,
+ encode=Codec().encode,
+ decode=Codec().decode,
+ incrementalencoder=IncrementalEncoder,
+ incrementaldecoder=IncrementalDecoder,
+ streamreader=StreamReader,
+ streamwriter=StreamWriter,
+ )
+''' % encodingname.replace('_', '-'))
+
+ # Add decoding table or map (with preference to the table)
+ if not decoding_table_code:
+ l.append('''
+### Decoding Map
+''')
+ l.extend(decoding_map_code)
+ else:
+ l.append('''
+### Decoding Table
+''')
+ l.extend(decoding_table_code)
+
+ # Add encoding map
+ if decoding_table_code:
+ l.append('''
+### Encoding table
+encoding_table=codecs.charmap_build(decoding_table)
+''')
+ else:
+ l.append('''
+### Encoding Map
+''')
+ l.extend(encoding_map_code)
+
+ # Final new-line
+ l.append('')
+
+ return '\n'.join(l).expandtabs()
+
+def pymap(name,map,pyfile,encodingname,comments=1):
+
+ code = codegen(name,map,encodingname,comments)
+ f = open(pyfile,'w')
+ f.write(code)
+ f.close()
+
+def marshalmap(name,map,marshalfile):
+
+ d = {}
+ for e,(u,c) in map.items():
+ d[e] = (u,c)
+ f = open(marshalfile,'wb')
+ marshal.dump(d,f)
+ f.close()
+
+def convertdir(dir, dirprefix='', nameprefix='', comments=1):
+
+ mapnames = os.listdir(dir)
+ for mapname in mapnames:
+ mappathname = os.path.join(dir, mapname)
+ if not os.path.isfile(mappathname):
+ continue
+ name = os.path.split(mapname)[1]
+ name = name.replace('-','_')
+ name = name.split('.')[0]
+ name = name.lower()
+ name = nameprefix + name
+ codefile = name + '.py'
+ marshalfile = name + '.mapping'
+ print 'converting %s to %s and %s' % (mapname,
+ dirprefix + codefile,
+ dirprefix + marshalfile)
+ try:
+ map = readmap(os.path.join(dir,mapname))
+ if not map:
+ print '* map is empty; skipping'
+ else:
+ pymap(mappathname, map, dirprefix + codefile,name,comments)
+ marshalmap(mappathname, map, dirprefix + marshalfile)
+ except ValueError, why:
+ print '* conversion failed: %s' % why
+ raise
+
+def rewritepythondir(dir, dirprefix='', comments=1):
+
+ mapnames = os.listdir(dir)
+ for mapname in mapnames:
+ if not mapname.endswith('.mapping'):
+ continue
+ name = mapname[:-len('.mapping')]
+ codefile = name + '.py'
+ print 'converting %s to %s' % (mapname,
+ dirprefix + codefile)
+ try:
+ map = marshal.load(open(os.path.join(dir,mapname),
+ 'rb'))
+ if not map:
+ print '* map is empty; skipping'
+ else:
+ pymap(mapname, map, dirprefix + codefile,name,comments)
+ except ValueError, why:
+ print '* conversion failed: %s' % why
+
+if __name__ == '__main__':
+
+ import sys
+ if 1:
+ convertdir(*sys.argv[1:])
+ else:
+ rewritepythondir(*sys.argv[1:])
diff --git a/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/genwincodec.py b/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/genwincodec.py
new file mode 100644
index 0000000000..32dcadabf2
--- /dev/null
+++ b/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/genwincodec.py
@@ -0,0 +1,61 @@
+"""This script generates a Python codec module from a Windows Code Page.
+
+It uses the function MultiByteToWideChar to generate a decoding table.
+"""
+
+import ctypes
+from ctypes import wintypes
+from gencodec import codegen
+import unicodedata
+
+def genwinmap(codepage):
+ MultiByteToWideChar = ctypes.windll.kernel32.MultiByteToWideChar
+ MultiByteToWideChar.argtypes = [wintypes.UINT, wintypes.DWORD,
+ wintypes.LPCSTR, ctypes.c_int,
+ wintypes.LPWSTR, ctypes.c_int]
+ MultiByteToWideChar.restype = ctypes.c_int
+
+ enc2uni = {}
+
+ for i in range(32) + [127]:
+ enc2uni[i] = (i, 'CONTROL CHARACTER')
+
+ for i in range(256):
+ buf = ctypes.create_unicode_buffer(2)
+ ret = MultiByteToWideChar(
+ codepage, 0,
+ chr(i), 1,
+ buf, 2)
+ assert ret == 1, "invalid code page"
+ assert buf[1] == '\x00'
+ try:
+ name = unicodedata.name(buf[0])
+ except ValueError:
+ try:
+ name = enc2uni[i][1]
+ except KeyError:
+ name = ''
+
+ enc2uni[i] = (ord(buf[0]), name)
+
+ return enc2uni
+
+def genwincodec(codepage):
+ import platform
+ map = genwinmap(codepage)
+ encodingname = 'cp%d' % codepage
+ code = codegen("", map, encodingname)
+ # Replace first lines with our own docstring
+ code = '''\
+"""Python Character Mapping Codec %s generated on Windows:
+%s with the command:
+ python Tools/unicode/genwincodec.py %s
+"""#"
+''' % (encodingname, ' '.join(platform.win32_ver()), codepage
+ ) + code.split('"""#"', 1)[1]
+
+ print code
+
+if __name__ == '__main__':
+ import sys
+ genwincodec(int(sys.argv[1]))
diff --git a/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/genwincodecs.bat b/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/genwincodecs.bat
new file mode 100644
index 0000000000..6a6a671f81
--- /dev/null
+++ b/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/genwincodecs.bat
@@ -0,0 +1,7 @@
+@rem Recreate some python charmap codecs from the Windows function
+@rem MultiByteToWideChar.
+
+@cd /d %~dp0
+@mkdir build
+@rem Arabic DOS code page
+c:\python26\python genwincodec.py 720 > build/cp720.py
diff --git a/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/listcodecs.py b/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/listcodecs.py
new file mode 100644
index 0000000000..5ad4309be0
--- /dev/null
+++ b/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/listcodecs.py
@@ -0,0 +1,41 @@
+""" List all available codec modules.
+
+(c) Copyright 2005, Marc-Andre Lemburg (mal@lemburg.com).
+
+ Licensed to PSF under a Contributor Agreement.
+
+"""
+
+import os, codecs, encodings
+
+_debug = 0
+
+def listcodecs(dir):
+ names = []
+ for filename in os.listdir(dir):
+ if filename[-3:] != '.py':
+ continue
+ name = filename[:-3]
+ # Check whether we've found a true codec
+ try:
+ codecs.lookup(name)
+ except LookupError:
+ # Codec not found
+ continue
+ except Exception, reason:
+ # Probably an error from importing the codec; still it's
+ # a valid code name
+ if _debug:
+ print '* problem importing codec %r: %s' % \
+ (name, reason)
+ names.append(name)
+ return names
+
+
+if __name__ == '__main__':
+ names = listcodecs(encodings.__path__[0])
+ names.sort()
+ print 'all_codecs = ['
+ for name in names:
+ print ' %r,' % name
+ print ']'
diff --git a/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/makeunicodedata.py b/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/makeunicodedata.py
new file mode 100644
index 0000000000..37b9f6eb21
--- /dev/null
+++ b/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/makeunicodedata.py
@@ -0,0 +1,1135 @@
+#
+# (re)generate unicode property and type databases
+#
+# this script converts a unicode 3.2 database file to
+# Modules/unicodedata_db.h, Modules/unicodename_db.h,
+# and Objects/unicodetype_db.h
+#
+# history:
+# 2000-09-24 fl created (based on bits and pieces from unidb)
+# 2000-09-25 fl merged tim's splitbin fixes, separate decomposition table
+# 2000-09-25 fl added character type table
+# 2000-09-26 fl added LINEBREAK, DECIMAL, and DIGIT flags/fields (2.0)
+# 2000-11-03 fl expand first/last ranges
+# 2001-01-19 fl added character name tables (2.1)
+# 2001-01-21 fl added decomp compression; dynamic phrasebook threshold
+# 2002-09-11 wd use string methods
+# 2002-10-18 mvl update to Unicode 3.2
+# 2002-10-22 mvl generate NFC tables
+# 2002-11-24 mvl expand all ranges, sort names version-independently
+# 2002-11-25 mvl add UNIDATA_VERSION
+# 2004-05-29 perky add east asian width information
+# 2006-03-10 mvl update to Unicode 4.1; add UCD 3.2 delta
+#
+# written by Fredrik Lundh (fredrik@pythonware.com)
+#
+
+import sys
+
+SCRIPT = sys.argv[0]
+VERSION = "2.6"
+
+# The Unicode Database
+UNIDATA_VERSION = "5.2.0"
+UNICODE_DATA = "UnicodeData%s.txt"
+COMPOSITION_EXCLUSIONS = "CompositionExclusions%s.txt"
+EASTASIAN_WIDTH = "EastAsianWidth%s.txt"
+UNIHAN = "Unihan%s.txt"
+DERIVEDNORMALIZATION_PROPS = "DerivedNormalizationProps%s.txt"
+LINE_BREAK = "LineBreak%s.txt"
+
+old_versions = ["3.2.0"]
+
+CATEGORY_NAMES = [ "Cn", "Lu", "Ll", "Lt", "Mn", "Mc", "Me", "Nd",
+ "Nl", "No", "Zs", "Zl", "Zp", "Cc", "Cf", "Cs", "Co", "Cn", "Lm",
+ "Lo", "Pc", "Pd", "Ps", "Pe", "Pi", "Pf", "Po", "Sm", "Sc", "Sk",
+ "So" ]
+
+BIDIRECTIONAL_NAMES = [ "", "L", "LRE", "LRO", "R", "AL", "RLE", "RLO",
+ "PDF", "EN", "ES", "ET", "AN", "CS", "NSM", "BN", "B", "S", "WS",
+ "ON" ]
+
+EASTASIANWIDTH_NAMES = [ "F", "H", "W", "Na", "A", "N" ]
+
+MANDATORY_LINE_BREAKS = [ "BK", "CR", "LF", "NL" ]
+
+# note: should match definitions in Objects/unicodectype.c
+ALPHA_MASK = 0x01
+DECIMAL_MASK = 0x02
+DIGIT_MASK = 0x04
+LOWER_MASK = 0x08
+LINEBREAK_MASK = 0x10
+SPACE_MASK = 0x20
+TITLE_MASK = 0x40
+UPPER_MASK = 0x80
+NODELTA_MASK = 0x100
+NUMERIC_MASK = 0x200
+
+def maketables(trace=0):
+
+ print "--- Reading", UNICODE_DATA % "", "..."
+
+ version = ""
+ unicode = UnicodeData(UNICODE_DATA % version,
+ COMPOSITION_EXCLUSIONS % version,
+ EASTASIAN_WIDTH % version,
+ UNIHAN % version,
+ DERIVEDNORMALIZATION_PROPS % version,
+ LINE_BREAK % version)
+
+ print len(filter(None, unicode.table)), "characters"
+
+ for version in old_versions:
+ print "--- Reading", UNICODE_DATA % ("-"+version), "..."
+ old_unicode = UnicodeData(UNICODE_DATA % ("-"+version),
+ COMPOSITION_EXCLUSIONS % ("-"+version),
+ EASTASIAN_WIDTH % ("-"+version),
+ UNIHAN % ("-"+version))
+ print len(filter(None, old_unicode.table)), "characters"
+ merge_old_version(version, unicode, old_unicode)
+
+ makeunicodename(unicode, trace)
+ makeunicodedata(unicode, trace)
+ makeunicodetype(unicode, trace)
+
+# --------------------------------------------------------------------
+# unicode character properties
+
+def makeunicodedata(unicode, trace):
+
+ dummy = (0, 0, 0, 0, 0, 0)
+ table = [dummy]
+ cache = {0: dummy}
+ index = [0] * len(unicode.chars)
+
+ FILE = "Modules/unicodedata_db.h"
+
+ print "--- Preparing", FILE, "..."
+
+ # 1) database properties
+
+ for char in unicode.chars:
+ record = unicode.table[char]
+ if record:
+ # extract database properties
+ category = CATEGORY_NAMES.index(record[2])
+ combining = int(record[3])
+ bidirectional = BIDIRECTIONAL_NAMES.index(record[4])
+ mirrored = record[9] == "Y"
+ eastasianwidth = EASTASIANWIDTH_NAMES.index(record[15])
+ normalizationquickcheck = record[17]
+ item = (
+ category, combining, bidirectional, mirrored, eastasianwidth,
+ normalizationquickcheck
+ )
+ # add entry to index and item tables
+ i = cache.get(item)
+ if i is None:
+ cache[item] = i = len(table)
+ table.append(item)
+ index[char] = i
+
+ # 2) decomposition data
+
+ decomp_data = [0]
+ decomp_prefix = [""]
+ decomp_index = [0] * len(unicode.chars)
+ decomp_size = 0
+
+ comp_pairs = []
+ comp_first = [None] * len(unicode.chars)
+ comp_last = [None] * len(unicode.chars)
+
+ for char in unicode.chars:
+ record = unicode.table[char]
+ if record:
+ if record[5]:
+ decomp = record[5].split()
+ if len(decomp) > 19:
+ raise Exception, "character %x has a decomposition too large for nfd_nfkd" % char
+ # prefix
+ if decomp[0][0] == "<":
+ prefix = decomp.pop(0)
+ else:
+ prefix = ""
+ try:
+ i = decomp_prefix.index(prefix)
+ except ValueError:
+ i = len(decomp_prefix)
+ decomp_prefix.append(prefix)
+ prefix = i
+ assert prefix < 256
+ # content
+ decomp = [prefix + (len(decomp)<<8)] + [int(s, 16) for s in decomp]
+ # Collect NFC pairs
+ if not prefix and len(decomp) == 3 and \
+ char not in unicode.exclusions and \
+ unicode.table[decomp[1]][3] == "0":
+ p, l, r = decomp
+ comp_first[l] = 1
+ comp_last[r] = 1
+ comp_pairs.append((l,r,char))
+ try:
+ i = decomp_data.index(decomp)
+ except ValueError:
+ i = len(decomp_data)
+ decomp_data.extend(decomp)
+ decomp_size = decomp_size + len(decomp) * 2
+ else:
+ i = 0
+ decomp_index[char] = i
+
+ f = l = 0
+ comp_first_ranges = []
+ comp_last_ranges = []
+ prev_f = prev_l = None
+ for i in unicode.chars:
+ if comp_first[i] is not None:
+ comp_first[i] = f
+ f += 1
+ if prev_f is None:
+ prev_f = (i,i)
+ elif prev_f[1]+1 == i:
+ prev_f = prev_f[0],i
+ else:
+ comp_first_ranges.append(prev_f)
+ prev_f = (i,i)
+ if comp_last[i] is not None:
+ comp_last[i] = l
+ l += 1
+ if prev_l is None:
+ prev_l = (i,i)
+ elif prev_l[1]+1 == i:
+ prev_l = prev_l[0],i
+ else:
+ comp_last_ranges.append(prev_l)
+ prev_l = (i,i)
+ comp_first_ranges.append(prev_f)
+ comp_last_ranges.append(prev_l)
+ total_first = f
+ total_last = l
+
+ comp_data = [0]*(total_first*total_last)
+ for f,l,char in comp_pairs:
+ f = comp_first[f]
+ l = comp_last[l]
+ comp_data[f*total_last+l] = char
+
+ print len(table), "unique properties"
+ print len(decomp_prefix), "unique decomposition prefixes"
+ print len(decomp_data), "unique decomposition entries:",
+ print decomp_size, "bytes"
+ print total_first, "first characters in NFC"
+ print total_last, "last characters in NFC"
+ print len(comp_pairs), "NFC pairs"
+
+ print "--- Writing", FILE, "..."
+
+ fp = open(FILE, "w")
+ print >>fp, "/* this file was generated by %s %s */" % (SCRIPT, VERSION)
+ print >>fp
+ print >>fp, '#define UNIDATA_VERSION "%s"' % UNIDATA_VERSION
+ print >>fp, "/* a list of unique database records */"
+ print >>fp, \
+ "const _PyUnicode_DatabaseRecord _PyUnicode_Database_Records[] = {"
+ for item in table:
+ print >>fp, " {%d, %d, %d, %d, %d, %d}," % item
+ print >>fp, "};"
+ print >>fp
+
+ print >>fp, "/* Reindexing of NFC first characters. */"
+ print >>fp, "#define TOTAL_FIRST",total_first
+ print >>fp, "#define TOTAL_LAST",total_last
+ print >>fp, "struct reindex{int start;short count,index;};"
+ print >>fp, "static struct reindex nfc_first[] = {"
+ for start,end in comp_first_ranges:
+ print >>fp," { %d, %d, %d}," % (start,end-start,comp_first[start])
+ print >>fp," {0,0,0}"
+ print >>fp,"};\n"
+ print >>fp, "static struct reindex nfc_last[] = {"
+ for start,end in comp_last_ranges:
+ print >>fp," { %d, %d, %d}," % (start,end-start,comp_last[start])
+ print >>fp," {0,0,0}"
+ print >>fp,"};\n"
+
+ # FIXME: <fl> the following tables could be made static, and
+ # the support code moved into unicodedatabase.c
+
+ print >>fp, "/* string literals */"
+ print >>fp, "const char *_PyUnicode_CategoryNames[] = {"
+ for name in CATEGORY_NAMES:
+ print >>fp, " \"%s\"," % name
+ print >>fp, " NULL"
+ print >>fp, "};"
+
+ print >>fp, "const char *_PyUnicode_BidirectionalNames[] = {"
+ for name in BIDIRECTIONAL_NAMES:
+ print >>fp, " \"%s\"," % name
+ print >>fp, " NULL"
+ print >>fp, "};"
+
+ print >>fp, "const char *_PyUnicode_EastAsianWidthNames[] = {"
+ for name in EASTASIANWIDTH_NAMES:
+ print >>fp, " \"%s\"," % name
+ print >>fp, " NULL"
+ print >>fp, "};"
+
+ print >>fp, "static const char *decomp_prefix[] = {"
+ for name in decomp_prefix:
+ print >>fp, " \"%s\"," % name
+ print >>fp, " NULL"
+ print >>fp, "};"
+
+ # split record index table
+ index1, index2, shift = splitbins(index, trace)
+
+ print >>fp, "/* index tables for the database records */"
+ print >>fp, "#define SHIFT", shift
+ Array("index1", index1).dump(fp, trace)
+ Array("index2", index2).dump(fp, trace)
+
+ # split decomposition index table
+ index1, index2, shift = splitbins(decomp_index, trace)
+
+ print >>fp, "/* decomposition data */"
+ Array("decomp_data", decomp_data).dump(fp, trace)
+
+ print >>fp, "/* index tables for the decomposition data */"
+ print >>fp, "#define DECOMP_SHIFT", shift
+ Array("decomp_index1", index1).dump(fp, trace)
+ Array("decomp_index2", index2).dump(fp, trace)
+
+ index, index2, shift = splitbins(comp_data, trace)
+ print >>fp, "/* NFC pairs */"
+ print >>fp, "#define COMP_SHIFT", shift
+ Array("comp_index", index).dump(fp, trace)
+ Array("comp_data", index2).dump(fp, trace)
+
+ # Generate delta tables for old versions
+ for version, table, normalization in unicode.changed:
+ cversion = version.replace(".","_")
+ records = [table[0]]
+ cache = {table[0]:0}
+ index = [0] * len(table)
+ for i, record in enumerate(table):
+ try:
+ index[i] = cache[record]
+ except KeyError:
+ index[i] = cache[record] = len(records)
+ records.append(record)
+ index1, index2, shift = splitbins(index, trace)
+ print >>fp, "static const change_record change_records_%s[] = {" % cversion
+ for record in records:
+ print >>fp, "\t{ %s }," % ", ".join(map(str,record))
+ print >>fp, "};"
+ Array("changes_%s_index" % cversion, index1).dump(fp, trace)
+ Array("changes_%s_data" % cversion, index2).dump(fp, trace)
+ print >>fp, "static const change_record* get_change_%s(Py_UCS4 n)" % cversion
+ print >>fp, "{"
+ print >>fp, "\tint index;"
+ print >>fp, "\tif (n >= 0x110000) index = 0;"
+ print >>fp, "\telse {"
+ print >>fp, "\t\tindex = changes_%s_index[n>>%d];" % (cversion, shift)
+ print >>fp, "\t\tindex = changes_%s_data[(index<<%d)+(n & %d)];" % \
+ (cversion, shift, ((1<<shift)-1))
+ print >>fp, "\t}"
+ print >>fp, "\treturn change_records_%s+index;" % cversion
+ print >>fp, "}\n"
+ print >>fp, "static Py_UCS4 normalization_%s(Py_UCS4 n)" % cversion
+ print >>fp, "{"
+ print >>fp, "\tswitch(n) {"
+ for k, v in normalization:
+ print >>fp, "\tcase %s: return 0x%s;" % (hex(k), v)
+ print >>fp, "\tdefault: return 0;"
+ print >>fp, "\t}\n}\n"
+
+ fp.close()
+
+# --------------------------------------------------------------------
+# unicode character type tables
+
+def makeunicodetype(unicode, trace):
+
+ FILE = "Objects/unicodetype_db.h"
+
+ print "--- Preparing", FILE, "..."
+
+ # extract unicode types
+ dummy = (0, 0, 0, 0, 0, 0)
+ table = [dummy]
+ cache = {0: dummy}
+ index = [0] * len(unicode.chars)
+ numeric = {}
+ spaces = []
+ linebreaks = []
+
+ for char in unicode.chars:
+ record = unicode.table[char]
+ if record:
+ # extract database properties
+ category = record[2]
+ bidirectional = record[4]
+ properties = record[16]
+ flags = 0
+ delta = True
+ if category in ["Lm", "Lt", "Lu", "Ll", "Lo"]:
+ flags |= ALPHA_MASK
+ if category == "Ll":
+ flags |= LOWER_MASK
+ if 'Line_Break' in properties or bidirectional == "B":
+ flags |= LINEBREAK_MASK
+ linebreaks.append(char)
+ if category == "Zs" or bidirectional in ("WS", "B", "S"):
+ flags |= SPACE_MASK
+ spaces.append(char)
+ if category == "Lt":
+ flags |= TITLE_MASK
+ if category == "Lu":
+ flags |= UPPER_MASK
+ # use delta predictor for upper/lower/title if it fits
+ if record[12]:
+ upper = int(record[12], 16)
+ else:
+ upper = char
+ if record[13]:
+ lower = int(record[13], 16)
+ else:
+ lower = char
+ if record[14]:
+ title = int(record[14], 16)
+ else:
+ # UCD.html says that a missing title char means that
+ # it defaults to the uppercase character, not to the
+ # character itself. Apparently, in the current UCD (5.x)
+ # this feature is never used
+ title = upper
+ upper_d = upper - char
+ lower_d = lower - char
+ title_d = title - char
+ if -32768 <= upper_d <= 32767 and \
+ -32768 <= lower_d <= 32767 and \
+ -32768 <= title_d <= 32767:
+ # use deltas
+ upper = upper_d & 0xffff
+ lower = lower_d & 0xffff
+ title = title_d & 0xffff
+ else:
+ flags |= NODELTA_MASK
+ # decimal digit, integer digit
+ decimal = 0
+ if record[6]:
+ flags |= DECIMAL_MASK
+ decimal = int(record[6])
+ digit = 0
+ if record[7]:
+ flags |= DIGIT_MASK
+ digit = int(record[7])
+ if record[8]:
+ flags |= NUMERIC_MASK
+ numeric.setdefault(record[8], []).append(char)
+ item = (
+ upper, lower, title, decimal, digit, flags
+ )
+ # add entry to index and item tables
+ i = cache.get(item)
+ if i is None:
+ cache[item] = i = len(table)
+ table.append(item)
+ index[char] = i
+
+ print len(table), "unique character type entries"
+ print sum(map(len, numeric.values())), "numeric code points"
+ print len(spaces), "whitespace code points"
+ print len(linebreaks), "linebreak code points"
+
+ print "--- Writing", FILE, "..."
+
+ fp = open(FILE, "w")
+ print >>fp, "/* this file was generated by %s %s */" % (SCRIPT, VERSION)
+ print >>fp
+ print >>fp, "/* a list of unique character type descriptors */"
+ print >>fp, "const _PyUnicode_TypeRecord _PyUnicode_TypeRecords[] = {"
+ for item in table:
+ print >>fp, " {%d, %d, %d, %d, %d, %d}," % item
+ print >>fp, "};"
+ print >>fp
+
+ # split decomposition index table
+ index1, index2, shift = splitbins(index, trace)
+
+ print >>fp, "/* type indexes */"
+ print >>fp, "#define SHIFT", shift
+ Array("index1", index1).dump(fp, trace)
+ Array("index2", index2).dump(fp, trace)
+
+ # Generate code for _PyUnicode_ToNumeric()
+ numeric_items = sorted(numeric.items())
+ print >>fp, '/* Returns the numeric value as double for Unicode characters'
+ print >>fp, ' * having this property, -1.0 otherwise.'
+ print >>fp, ' */'
+ print >>fp, 'double _PyUnicode_ToNumeric(Py_UNICODE ch)'
+ print >>fp, '{'
+ print >>fp, ' switch (ch) {'
+ for value, codepoints in numeric_items:
+ # Turn text into float literals
+ parts = value.split('/')
+ parts = [repr(float(part)) for part in parts]
+ value = '/'.join(parts)
+
+ haswide = False
+ hasnonewide = False
+ codepoints.sort()
+ for codepoint in codepoints:
+ if codepoint < 0x10000:
+ hasnonewide = True
+ if codepoint >= 0x10000 and not haswide:
+ print >>fp, '#ifdef Py_UNICODE_WIDE'
+ haswide = True
+ print >>fp, ' case 0x%04X:' % (codepoint,)
+ if haswide and hasnonewide:
+ print >>fp, '#endif'
+ print >>fp, ' return (double) %s;' % (value,)
+ if haswide and not hasnonewide:
+ print >>fp, '#endif'
+ print >>fp,' }'
+ print >>fp,' return -1.0;'
+ print >>fp,'}'
+ print >>fp
+
+ # Generate code for _PyUnicode_IsWhitespace()
+ print >>fp, "/* Returns 1 for Unicode characters having the bidirectional"
+ print >>fp, " * type 'WS', 'B' or 'S' or the category 'Zs', 0 otherwise."
+ print >>fp, " */"
+ print >>fp, 'int _PyUnicode_IsWhitespace(register const Py_UNICODE ch)'
+ print >>fp, '{'
+ print >>fp, '#ifdef WANT_WCTYPE_FUNCTIONS'
+ print >>fp, ' return iswspace(ch);'
+ print >>fp, '#else'
+ print >>fp, ' switch (ch) {'
+
+ haswide = False
+ hasnonewide = False
+ for codepoint in sorted(spaces):
+ if codepoint < 0x10000:
+ hasnonewide = True
+ if codepoint >= 0x10000 and not haswide:
+ print >>fp, '#ifdef Py_UNICODE_WIDE'
+ haswide = True
+ print >>fp, ' case 0x%04X:' % (codepoint,)
+ if haswide and hasnonewide:
+ print >>fp, '#endif'
+ print >>fp, ' return 1;'
+ if haswide and not hasnonewide:
+ print >>fp, '#endif'
+
+ print >>fp,' }'
+ print >>fp,' return 0;'
+ print >>fp, '#endif'
+ print >>fp,'}'
+ print >>fp
+
+ # Generate code for _PyUnicode_IsLinebreak()
+ print >>fp, "/* Returns 1 for Unicode characters having the line break"
+ print >>fp, " * property 'BK', 'CR', 'LF' or 'NL' or having bidirectional"
+ print >>fp, " * type 'B', 0 otherwise."
+ print >>fp, " */"
+ print >>fp, 'int _PyUnicode_IsLinebreak(register const Py_UNICODE ch)'
+ print >>fp, '{'
+ print >>fp, ' switch (ch) {'
+ haswide = False
+ hasnonewide = False
+ for codepoint in sorted(linebreaks):
+ if codepoint < 0x10000:
+ hasnonewide = True
+ if codepoint >= 0x10000 and not haswide:
+ print >>fp, '#ifdef Py_UNICODE_WIDE'
+ haswide = True
+ print >>fp, ' case 0x%04X:' % (codepoint,)
+ if haswide and hasnonewide:
+ print >>fp, '#endif'
+ print >>fp, ' return 1;'
+ if haswide and not hasnonewide:
+ print >>fp, '#endif'
+
+ print >>fp,' }'
+ print >>fp,' return 0;'
+ print >>fp,'}'
+ print >>fp
+
+ fp.close()
+
+# --------------------------------------------------------------------
+# unicode name database
+
+def makeunicodename(unicode, trace):
+
+ FILE = "Modules/unicodename_db.h"
+
+ print "--- Preparing", FILE, "..."
+
+ # collect names
+ names = [None] * len(unicode.chars)
+
+ for char in unicode.chars:
+ record = unicode.table[char]
+ if record:
+ name = record[1].strip()
+ if name and name[0] != "<":
+ names[char] = name + chr(0)
+
+ print len(filter(lambda n: n is not None, names)), "distinct names"
+
+ # collect unique words from names (note that we differ between
+ # words inside a sentence, and words ending a sentence. the
+ # latter includes the trailing null byte.
+
+ words = {}
+ n = b = 0
+ for char in unicode.chars:
+ name = names[char]
+ if name:
+ w = name.split()
+ b = b + len(name)
+ n = n + len(w)
+ for w in w:
+ l = words.get(w)
+ if l:
+ l.append(None)
+ else:
+ words[w] = [len(words)]
+
+ print n, "words in text;", b, "bytes"
+
+ wordlist = words.items()
+
+ # sort on falling frequency, then by name
+ def word_key(a):
+ aword, alist = a
+ return -len(alist), aword
+ wordlist.sort(key=word_key)
+
+ # figure out how many phrasebook escapes we need
+ escapes = 0
+ while escapes * 256 < len(wordlist):
+ escapes = escapes + 1
+ print escapes, "escapes"
+
+ short = 256 - escapes
+
+ assert short > 0
+
+ print short, "short indexes in lexicon"
+
+ # statistics
+ n = 0
+ for i in range(short):
+ n = n + len(wordlist[i][1])
+ print n, "short indexes in phrasebook"
+
+ # pick the most commonly used words, and sort the rest on falling
+ # length (to maximize overlap)
+
+ wordlist, wordtail = wordlist[:short], wordlist[short:]
+ wordtail.sort(key=lambda a: a[0], reverse=True)
+ wordlist.extend(wordtail)
+
+ # generate lexicon from words
+
+ lexicon_offset = [0]
+ lexicon = ""
+ words = {}
+
+ # build a lexicon string
+ offset = 0
+ for w, x in wordlist:
+ # encoding: bit 7 indicates last character in word (chr(128)
+ # indicates the last character in an entire string)
+ ww = w[:-1] + chr(ord(w[-1])+128)
+ # reuse string tails, when possible
+ o = lexicon.find(ww)
+ if o < 0:
+ o = offset
+ lexicon = lexicon + ww
+ offset = offset + len(w)
+ words[w] = len(lexicon_offset)
+ lexicon_offset.append(o)
+
+ lexicon = map(ord, lexicon)
+
+ # generate phrasebook from names and lexicon
+ phrasebook = [0]
+ phrasebook_offset = [0] * len(unicode.chars)
+ for char in unicode.chars:
+ name = names[char]
+ if name:
+ w = name.split()
+ phrasebook_offset[char] = len(phrasebook)
+ for w in w:
+ i = words[w]
+ if i < short:
+ phrasebook.append(i)
+ else:
+ # store as two bytes
+ phrasebook.append((i>>8) + short)
+ phrasebook.append(i&255)
+
+ assert getsize(phrasebook) == 1
+
+ #
+ # unicode name hash table
+
+ # extract names
+ data = []
+ for char in unicode.chars:
+ record = unicode.table[char]
+ if record:
+ name = record[1].strip()
+ if name and name[0] != "<":
+ data.append((name, char))
+
+ # the magic number 47 was chosen to minimize the number of
+ # collisions on the current data set. if you like, change it
+ # and see what happens...
+
+ codehash = Hash("code", data, 47)
+
+ print "--- Writing", FILE, "..."
+
+ fp = open(FILE, "w")
+ print >>fp, "/* this file was generated by %s %s */" % (SCRIPT, VERSION)
+ print >>fp
+ print >>fp, "#define NAME_MAXLEN", 256
+ print >>fp
+ print >>fp, "/* lexicon */"
+ Array("lexicon", lexicon).dump(fp, trace)
+ Array("lexicon_offset", lexicon_offset).dump(fp, trace)
+
+ # split decomposition index table
+ offset1, offset2, shift = splitbins(phrasebook_offset, trace)
+
+ print >>fp, "/* code->name phrasebook */"
+ print >>fp, "#define phrasebook_shift", shift
+ print >>fp, "#define phrasebook_short", short
+
+ Array("phrasebook", phrasebook).dump(fp, trace)
+ Array("phrasebook_offset1", offset1).dump(fp, trace)
+ Array("phrasebook_offset2", offset2).dump(fp, trace)
+
+ print >>fp, "/* name->code dictionary */"
+ codehash.dump(fp, trace)
+
+ fp.close()
+
+
+def merge_old_version(version, new, old):
+ # Changes to exclusion file not implemented yet
+ if old.exclusions != new.exclusions:
+ raise NotImplementedError, "exclusions differ"
+
+ # In these change records, 0xFF means "no change"
+ bidir_changes = [0xFF]*0x110000
+ category_changes = [0xFF]*0x110000
+ decimal_changes = [0xFF]*0x110000
+ mirrored_changes = [0xFF]*0x110000
+ # In numeric data, 0 means "no change",
+ # -1 means "did not have a numeric value
+ numeric_changes = [0] * 0x110000
+ # normalization_changes is a list of key-value pairs
+ normalization_changes = []
+ for i in range(0x110000):
+ if new.table[i] is None:
+ # Characters unassigned in the new version ought to
+ # be unassigned in the old one
+ assert old.table[i] is None
+ continue
+ # check characters unassigned in the old version
+ if old.table[i] is None:
+ # category 0 is "unassigned"
+ category_changes[i] = 0
+ continue
+ # check characters that differ
+ if old.table[i] != new.table[i]:
+ for k in range(len(old.table[i])):
+ if old.table[i][k] != new.table[i][k]:
+ value = old.table[i][k]
+ if k == 2:
+ #print "CATEGORY",hex(i), old.table[i][k], new.table[i][k]
+ category_changes[i] = CATEGORY_NAMES.index(value)
+ elif k == 4:
+ #print "BIDIR",hex(i), old.table[i][k], new.table[i][k]
+ bidir_changes[i] = BIDIRECTIONAL_NAMES.index(value)
+ elif k == 5:
+ #print "DECOMP",hex(i), old.table[i][k], new.table[i][k]
+ # We assume that all normalization changes are in 1:1 mappings
+ assert " " not in value
+ normalization_changes.append((i, value))
+ elif k == 6:
+ #print "DECIMAL",hex(i), old.table[i][k], new.table[i][k]
+ # we only support changes where the old value is a single digit
+ assert value in "0123456789"
+ decimal_changes[i] = int(value)
+ elif k == 8:
+ # print "NUMERIC",hex(i), `old.table[i][k]`, new.table[i][k]
+ # Since 0 encodes "no change", the old value is better not 0
+ if not value:
+ numeric_changes[i] = -1
+ else:
+ numeric_changes[i] = float(value)
+ assert numeric_changes[i] not in (0, -1)
+ elif k == 9:
+ if value == 'Y':
+ mirrored_changes[i] = '1'
+ else:
+ mirrored_changes[i] = '0'
+ elif k == 11:
+ # change to ISO comment, ignore
+ pass
+ elif k == 12:
+ # change to simple uppercase mapping; ignore
+ pass
+ elif k == 13:
+ # change to simple lowercase mapping; ignore
+ pass
+ elif k == 14:
+ # change to simple titlecase mapping; ignore
+ pass
+ elif k == 16:
+ # change to properties; not yet
+ pass
+ else:
+ class Difference(Exception):pass
+ raise Difference, (hex(i), k, old.table[i], new.table[i])
+ new.changed.append((version, zip(bidir_changes, category_changes,
+ decimal_changes, mirrored_changes,
+ numeric_changes),
+ normalization_changes))
+
+
+# --------------------------------------------------------------------
+# the following support code is taken from the unidb utilities
+# Copyright (c) 1999-2000 by Secret Labs AB
+
+# load a unicode-data file from disk
+
+class UnicodeData:
+ # Record structure:
+ # [ID, name, category, combining, bidi, decomp, (6)
+ # decimal, digit, numeric, bidi-mirrored, Unicode-1-name, (11)
+ # ISO-comment, uppercase, lowercase, titlecase, ea-width, (16)
+ # properties] (17)
+
+ def __init__(self, filename, exclusions, eastasianwidth, unihan,
+ derivednormalizationprops=None, linebreakprops=None,
+ expand=1):
+ self.changed = []
+ file = open(filename)
+ table = [None] * 0x110000
+ while 1:
+ s = file.readline()
+ if not s:
+ break
+ s = s.strip().split(";")
+ char = int(s[0], 16)
+ table[char] = s
+
+ # expand first-last ranges
+ if expand:
+ field = None
+ for i in range(0, 0x110000):
+ s = table[i]
+ if s:
+ if s[1][-6:] == "First>":
+ s[1] = ""
+ field = s
+ elif s[1][-5:] == "Last>":
+ s[1] = ""
+ field = None
+ elif field:
+ f2 = field[:]
+ f2[0] = "%X" % i
+ table[i] = f2
+
+ # public attributes
+ self.filename = filename
+ self.table = table
+ self.chars = range(0x110000) # unicode 3.2
+
+ file = open(exclusions)
+ self.exclusions = {}
+ for s in file:
+ s = s.strip()
+ if not s:
+ continue
+ if s[0] == '#':
+ continue
+ char = int(s.split()[0],16)
+ self.exclusions[char] = 1
+
+ widths = [None] * 0x110000
+ for s in open(eastasianwidth):
+ s = s.strip()
+ if not s:
+ continue
+ if s[0] == '#':
+ continue
+ s = s.split()[0].split(';')
+ if '..' in s[0]:
+ first, last = [int(c, 16) for c in s[0].split('..')]
+ chars = range(first, last+1)
+ else:
+ chars = [int(s[0], 16)]
+ for char in chars:
+ widths[char] = s[1]
+ for i in range(0, 0x110000):
+ if table[i] is not None:
+ table[i].append(widths[i])
+
+ for i in range(0, 0x110000):
+ if table[i] is not None:
+ table[i].append(set())
+ if linebreakprops:
+ for s in open(linebreakprops):
+ s = s.partition('#')[0]
+ s = [i.strip() for i in s.split(';')]
+ if len(s) < 2 or s[1] not in MANDATORY_LINE_BREAKS:
+ continue
+ if '..' not in s[0]:
+ first = last = int(s[0], 16)
+ else:
+ first, last = [int(c, 16) for c in s[0].split('..')]
+ for char in range(first, last+1):
+ table[char][-1].add('Line_Break')
+
+ if derivednormalizationprops:
+ quickchecks = [0] * 0x110000 # default is Yes
+ qc_order = 'NFD_QC NFKD_QC NFC_QC NFKC_QC'.split()
+ for s in open(derivednormalizationprops):
+ if '#' in s:
+ s = s[:s.index('#')]
+ s = [i.strip() for i in s.split(';')]
+ if len(s) < 2 or s[1] not in qc_order:
+ continue
+ quickcheck = 'MN'.index(s[2]) + 1 # Maybe or No
+ quickcheck_shift = qc_order.index(s[1])*2
+ quickcheck <<= quickcheck_shift
+ if '..' not in s[0]:
+ first = last = int(s[0], 16)
+ else:
+ first, last = [int(c, 16) for c in s[0].split('..')]
+ for char in range(first, last+1):
+ assert not (quickchecks[char]>>quickcheck_shift)&3
+ quickchecks[char] |= quickcheck
+ for i in range(0, 0x110000):
+ if table[i] is not None:
+ table[i].append(quickchecks[i])
+
+ for line in open(unihan):
+ if not line.startswith('U+'):
+ continue
+ code, tag, value = line.split(None, 3)[:3]
+ if tag not in ('kAccountingNumeric', 'kPrimaryNumeric',
+ 'kOtherNumeric'):
+ continue
+ value = value.strip().replace(',', '')
+ i = int(code[2:], 16)
+ # Patch the numeric field
+ if table[i] is not None:
+ table[i][8] = value
+
+ def uselatin1(self):
+ # restrict character range to ISO Latin 1
+ self.chars = range(256)
+
+# hash table tools
+
+# this is a straight-forward reimplementation of Python's built-in
+# dictionary type, using a static data structure, and a custom string
+# hash algorithm.
+
+def myhash(s, magic):
+ h = 0
+ for c in map(ord, s.upper()):
+ h = (h * magic) + c
+ ix = h & 0xff000000L
+ if ix:
+ h = (h ^ ((ix>>24) & 0xff)) & 0x00ffffff
+ return h
+
+SIZES = [
+ (4,3), (8,3), (16,3), (32,5), (64,3), (128,3), (256,29), (512,17),
+ (1024,9), (2048,5), (4096,83), (8192,27), (16384,43), (32768,3),
+ (65536,45), (131072,9), (262144,39), (524288,39), (1048576,9),
+ (2097152,5), (4194304,3), (8388608,33), (16777216,27)
+]
+
+class Hash:
+ def __init__(self, name, data, magic):
+ # turn a (key, value) list into a static hash table structure
+
+ # determine table size
+ for size, poly in SIZES:
+ if size > len(data):
+ poly = size + poly
+ break
+ else:
+ raise AssertionError, "ran out of polynominals"
+
+ print size, "slots in hash table"
+
+ table = [None] * size
+
+ mask = size-1
+
+ n = 0
+
+ hash = myhash
+
+ # initialize hash table
+ for key, value in data:
+ h = hash(key, magic)
+ i = (~h) & mask
+ v = table[i]
+ if v is None:
+ table[i] = value
+ continue
+ incr = (h ^ (h >> 3)) & mask;
+ if not incr:
+ incr = mask
+ while 1:
+ n = n + 1
+ i = (i + incr) & mask
+ v = table[i]
+ if v is None:
+ table[i] = value
+ break
+ incr = incr << 1
+ if incr > mask:
+ incr = incr ^ poly
+
+ print n, "collisions"
+ self.collisions = n
+
+ for i in range(len(table)):
+ if table[i] is None:
+ table[i] = 0
+
+ self.data = Array(name + "_hash", table)
+ self.magic = magic
+ self.name = name
+ self.size = size
+ self.poly = poly
+
+ def dump(self, file, trace):
+ # write data to file, as a C array
+ self.data.dump(file, trace)
+ file.write("#define %s_magic %d\n" % (self.name, self.magic))
+ file.write("#define %s_size %d\n" % (self.name, self.size))
+ file.write("#define %s_poly %d\n" % (self.name, self.poly))
+
+# stuff to deal with arrays of unsigned integers
+
+class Array:
+
+ def __init__(self, name, data):
+ self.name = name
+ self.data = data
+
+ def dump(self, file, trace=0):
+ # write data to file, as a C array
+ size = getsize(self.data)
+ if trace:
+ print >>sys.stderr, self.name+":", size*len(self.data), "bytes"
+ file.write("static ")
+ if size == 1:
+ file.write("unsigned char")
+ elif size == 2:
+ file.write("unsigned short")
+ else:
+ file.write("unsigned int")
+ file.write(" " + self.name + "[] = {\n")
+ if self.data:
+ s = " "
+ for item in self.data:
+ i = str(item) + ", "
+ if len(s) + len(i) > 78:
+ file.write(s + "\n")
+ s = " " + i
+ else:
+ s = s + i
+ if s.strip():
+ file.write(s + "\n")
+ file.write("};\n\n")
+
+def getsize(data):
+ # return smallest possible integer size for the given array
+ maxdata = max(data)
+ if maxdata < 256:
+ return 1
+ elif maxdata < 65536:
+ return 2
+ else:
+ return 4
+
+def splitbins(t, trace=0):
+ """t, trace=0 -> (t1, t2, shift). Split a table to save space.
+
+ t is a sequence of ints. This function can be useful to save space if
+ many of the ints are the same. t1 and t2 are lists of ints, and shift
+ is an int, chosen to minimize the combined size of t1 and t2 (in C
+ code), and where for each i in range(len(t)),
+ t[i] == t2[(t1[i >> shift] << shift) + (i & mask)]
+ where mask is a bitmask isolating the last "shift" bits.
+
+ If optional arg trace is non-zero (default zero), progress info
+ is printed to sys.stderr. The higher the value, the more info
+ you'll get.
+ """
+
+ if trace:
+ def dump(t1, t2, shift, bytes):
+ print >>sys.stderr, "%d+%d bins at shift %d; %d bytes" % (
+ len(t1), len(t2), shift, bytes)
+ print >>sys.stderr, "Size of original table:", len(t)*getsize(t), \
+ "bytes"
+ n = len(t)-1 # last valid index
+ maxshift = 0 # the most we can shift n and still have something left
+ if n > 0:
+ while n >> 1:
+ n >>= 1
+ maxshift += 1
+ del n
+ bytes = sys.maxint # smallest total size so far
+ t = tuple(t) # so slices can be dict keys
+ for shift in range(maxshift + 1):
+ t1 = []
+ t2 = []
+ size = 2**shift
+ bincache = {}
+ for i in range(0, len(t), size):
+ bin = t[i:i+size]
+ index = bincache.get(bin)
+ if index is None:
+ index = len(t2)
+ bincache[bin] = index
+ t2.extend(bin)
+ t1.append(index >> shift)
+ # determine memory size
+ b = len(t1)*getsize(t1) + len(t2)*getsize(t2)
+ if trace > 1:
+ dump(t1, t2, shift, b)
+ if b < bytes:
+ best = t1, t2, shift
+ bytes = b
+ t1, t2, shift = best
+ if trace:
+ print >>sys.stderr, "Best:",
+ dump(t1, t2, shift, bytes)
+ if __debug__:
+ # exhaustively verify that the decomposition is correct
+ mask = ~((~0) << shift) # i.e., low-bit mask of shift bits
+ for i in xrange(len(t)):
+ assert t[i] == t2[(t1[i >> shift] << shift) + (i & mask)]
+ return best
+
+if __name__ == "__main__":
+ maketables(1)
diff --git a/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/mkstringprep.py b/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/mkstringprep.py
new file mode 100644
index 0000000000..49d1393386
--- /dev/null
+++ b/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/mkstringprep.py
@@ -0,0 +1,425 @@
+import re, unicodedata, sys
+
+if sys.maxunicode == 65535:
+ raise RuntimeError("need UCS-4 Python")
+
+def gen_category(cats):
+ for i in range(0, 0x110000):
+ if unicodedata.category(unichr(i)) in cats:
+ yield(i)
+
+def gen_bidirectional(cats):
+ for i in range(0, 0x110000):
+ if unicodedata.bidirectional(unichr(i)) in cats:
+ yield(i)
+
+def compact_set(l):
+ single = []
+ tuple = []
+ prev = None
+ span = 0
+ for e in l:
+ if prev is None:
+ prev = e
+ span = 0
+ continue
+ if prev+span+1 != e:
+ if span > 2:
+ tuple.append((prev,prev+span+1))
+ else:
+ for i in range(prev, prev+span+1):
+ single.append(i)
+ prev = e
+ span = 0
+ else:
+ span += 1
+ if span:
+ tuple.append((prev,prev+span+1))
+ else:
+ single.append(prev)
+ tuple = " + ".join(["range(%d,%d)" % t for t in tuple])
+ if not single:
+ return "set(%s)" % tuple
+ if not tuple:
+ return "set(%s)" % repr(single)
+ return "set(%s + %s)" % (repr(single),tuple)
+
+############## Read the tables in the RFC #######################
+
+data = open("rfc3454.txt").readlines()
+
+tables = []
+curname = None
+for l in data:
+ l = l.strip()
+ if not l:
+ continue
+ # Skip RFC page breaks
+ if l.startswith("Hoffman & Blanchet") or\
+ l.startswith("RFC 3454"):
+ continue
+ # Find start/end lines
+ m = re.match("----- (Start|End) Table ([A-Z](.[0-9])+) -----", l)
+ if m:
+ if m.group(1) == "Start":
+ if curname:
+ raise RuntimeError("Double Start", (curname, l))
+ curname = m.group(2)
+ table = {}
+ tables.append((curname, table))
+ continue
+ else:
+ if not curname:
+ raise RuntimeError("End without start", l)
+ curname = None
+ continue
+ if not curname:
+ continue
+ # Now we are in a table
+ fields = l.split(";")
+ if len(fields) > 1:
+ # Drop comment field
+ fields = fields[:-1]
+ if len(fields) == 1:
+ fields = fields[0].split("-")
+ if len(fields) > 1:
+ # range
+ try:
+ start, end = fields
+ except ValueError:
+ raise RuntimeError("Unpacking problem", l)
+ else:
+ start = end = fields[0]
+ start = int(start, 16)
+ end = int(end, 16)
+ for i in range(start, end+1):
+ table[i] = i
+ else:
+ code, value = fields
+ value = value.strip()
+ if value:
+ value = [int(v, 16) for v in value.split(" ")]
+ else:
+ # table B.1
+ value = None
+ table[int(code, 16)] = value
+
+########### Generate compact Python versions of the tables #############
+
+print """# This file is generated by mkstringprep.py. DO NOT EDIT.
+\"\"\"Library that exposes various tables found in the StringPrep RFC 3454.
+
+There are two kinds of tables: sets, for which a member test is provided,
+and mappings, for which a mapping function is provided.
+\"\"\"
+
+import unicodedata
+"""
+
+print "assert unicodedata.unidata_version == %s" % repr(unicodedata.unidata_version)
+
+# A.1 is the table of unassigned characters
+# XXX Plane 15 PUA is listed as unassigned in Python.
+name, table = tables[0]
+del tables[0]
+assert name == "A.1"
+table = set(table.keys())
+Cn = set(gen_category(["Cn"]))
+
+# FDD0..FDEF are process internal codes
+Cn -= set(range(0xFDD0, 0xFDF0))
+# not a character
+Cn -= set(range(0xFFFE, 0x110000, 0x10000))
+Cn -= set(range(0xFFFF, 0x110000, 0x10000))
+
+# assert table == Cn
+
+print """
+def in_table_a1(code):
+ if unicodedata.category(code) != 'Cn': return False
+ c = ord(code)
+ if 0xFDD0 <= c < 0xFDF0: return False
+ return (c & 0xFFFF) not in (0xFFFE, 0xFFFF)
+"""
+
+# B.1 cannot easily be derived
+name, table = tables[0]
+del tables[0]
+assert name == "B.1"
+table = sorted(table.keys())
+print """
+b1_set = """ + compact_set(table) + """
+def in_table_b1(code):
+ return ord(code) in b1_set
+"""
+
+# B.2 and B.3 is case folding.
+# It takes CaseFolding.txt into account, which is
+# not available in the Python database. Since
+# B.2 is derived from B.3, we process B.3 first.
+# B.3 supposedly *is* CaseFolding-3.2.0.txt.
+
+name, table_b2 = tables[0]
+del tables[0]
+assert name == "B.2"
+
+name, table_b3 = tables[0]
+del tables[0]
+assert name == "B.3"
+
+# B.3 is mostly Python's .lower, except for a number
+# of special cases, e.g. considering canonical forms.
+
+b3_exceptions = {}
+
+for k,v in table_b2.items():
+ if map(ord, unichr(k).lower()) != v:
+ b3_exceptions[k] = u"".join(map(unichr,v))
+
+b3 = sorted(b3_exceptions.items())
+
+print """
+b3_exceptions = {"""
+for i,(k,v) in enumerate(b3):
+ print "0x%x:%s," % (k, repr(v)),
+ if i % 4 == 3:
+ print
+print "}"
+
+print """
+def map_table_b3(code):
+ r = b3_exceptions.get(ord(code))
+ if r is not None: return r
+ return code.lower()
+"""
+
+def map_table_b3(code):
+ r = b3_exceptions.get(ord(code))
+ if r is not None: return r
+ return code.lower()
+
+# B.2 is case folding for NFKC. This is the same as B.3,
+# except where NormalizeWithKC(Fold(a)) !=
+# NormalizeWithKC(Fold(NormalizeWithKC(Fold(a))))
+
+def map_table_b2(a):
+ al = map_table_b3(a)
+ b = unicodedata.normalize("NFKC", al)
+ bl = u"".join([map_table_b3(ch) for ch in b])
+ c = unicodedata.normalize("NFKC", bl)
+ if b != c:
+ return c
+ else:
+ return al
+
+specials = {}
+for k,v in table_b2.items():
+ if map(ord, map_table_b2(unichr(k))) != v:
+ specials[k] = v
+
+# B.3 should not add any additional special cases
+assert specials == {}
+
+print """
+def map_table_b2(a):
+ al = map_table_b3(a)
+ b = unicodedata.normalize("NFKC", al)
+ bl = u"".join([map_table_b3(ch) for ch in b])
+ c = unicodedata.normalize("NFKC", bl)
+ if b != c:
+ return c
+ else:
+ return al
+"""
+
+# C.1.1 is a table with a single character
+name, table = tables[0]
+del tables[0]
+assert name == "C.1.1"
+assert table == {0x20:0x20}
+
+print """
+def in_table_c11(code):
+ return code == u" "
+"""
+
+# C.1.2 is the rest of all space characters
+name, table = tables[0]
+del tables[0]
+assert name == "C.1.2"
+
+# table = set(table.keys())
+# Zs = set(gen_category(["Zs"])) - set([0x20])
+# assert Zs == table
+
+print """
+def in_table_c12(code):
+ return unicodedata.category(code) == "Zs" and code != u" "
+
+def in_table_c11_c12(code):
+ return unicodedata.category(code) == "Zs"
+"""
+
+# C.2.1 ASCII control characters
+name, table_c21 = tables[0]
+del tables[0]
+assert name == "C.2.1"
+
+Cc = set(gen_category(["Cc"]))
+Cc_ascii = Cc & set(range(128))
+table_c21 = set(table_c21.keys())
+assert Cc_ascii == table_c21
+
+print """
+def in_table_c21(code):
+ return ord(code) < 128 and unicodedata.category(code) == "Cc"
+"""
+
+# C.2.2 Non-ASCII control characters. It also includes
+# a number of characters in category Cf.
+name, table_c22 = tables[0]
+del tables[0]
+assert name == "C.2.2"
+
+Cc_nonascii = Cc - Cc_ascii
+table_c22 = set(table_c22.keys())
+assert len(Cc_nonascii - table_c22) == 0
+
+specials = list(table_c22 - Cc_nonascii)
+specials.sort()
+
+print """c22_specials = """ + compact_set(specials) + """
+def in_table_c22(code):
+ c = ord(code)
+ if c < 128: return False
+ if unicodedata.category(code) == "Cc": return True
+ return c in c22_specials
+
+def in_table_c21_c22(code):
+ return unicodedata.category(code) == "Cc" or \\
+ ord(code) in c22_specials
+"""
+
+# C.3 Private use
+name, table = tables[0]
+del tables[0]
+assert name == "C.3"
+
+Co = set(gen_category(["Co"]))
+assert set(table.keys()) == Co
+
+print """
+def in_table_c3(code):
+ return unicodedata.category(code) == "Co"
+"""
+
+# C.4 Non-character code points, xFFFE, xFFFF
+# plus process internal codes
+name, table = tables[0]
+del tables[0]
+assert name == "C.4"
+
+nonchar = set(range(0xFDD0,0xFDF0) +
+ range(0xFFFE,0x110000,0x10000) +
+ range(0xFFFF,0x110000,0x10000))
+table = set(table.keys())
+assert table == nonchar
+
+print """
+def in_table_c4(code):
+ c = ord(code)
+ if c < 0xFDD0: return False
+ if c < 0xFDF0: return True
+ return (ord(code) & 0xFFFF) in (0xFFFE, 0xFFFF)
+"""
+
+# C.5 Surrogate codes
+name, table = tables[0]
+del tables[0]
+assert name == "C.5"
+
+Cs = set(gen_category(["Cs"]))
+assert set(table.keys()) == Cs
+
+print """
+def in_table_c5(code):
+ return unicodedata.category(code) == "Cs"
+"""
+
+# C.6 Inappropriate for plain text
+name, table = tables[0]
+del tables[0]
+assert name == "C.6"
+
+table = sorted(table.keys())
+
+print """
+c6_set = """ + compact_set(table) + """
+def in_table_c6(code):
+ return ord(code) in c6_set
+"""
+
+# C.7 Inappropriate for canonical representation
+name, table = tables[0]
+del tables[0]
+assert name == "C.7"
+
+table = sorted(table.keys())
+
+print """
+c7_set = """ + compact_set(table) + """
+def in_table_c7(code):
+ return ord(code) in c7_set
+"""
+
+# C.8 Change display properties or are deprecated
+name, table = tables[0]
+del tables[0]
+assert name == "C.8"
+
+table = sorted(table.keys())
+
+print """
+c8_set = """ + compact_set(table) + """
+def in_table_c8(code):
+ return ord(code) in c8_set
+"""
+
+# C.9 Tagging characters
+name, table = tables[0]
+del tables[0]
+assert name == "C.9"
+
+table = sorted(table.keys())
+
+print """
+c9_set = """ + compact_set(table) + """
+def in_table_c9(code):
+ return ord(code) in c9_set
+"""
+
+# D.1 Characters with bidirectional property "R" or "AL"
+name, table = tables[0]
+del tables[0]
+assert name == "D.1"
+
+RandAL = set(gen_bidirectional(["R","AL"]))
+assert set(table.keys()) == RandAL
+
+print """
+def in_table_d1(code):
+ return unicodedata.bidirectional(code) in ("R","AL")
+"""
+
+# D.2 Characters with bidirectional property "L"
+name, table = tables[0]
+del tables[0]
+assert name == "D.2"
+
+L = set(gen_bidirectional(["L"]))
+assert set(table.keys()) == L
+
+print """
+def in_table_d2(code):
+ return unicodedata.bidirectional(code) == "L"
+"""
diff --git a/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/python-mappings/CP1140.TXT b/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/python-mappings/CP1140.TXT
new file mode 100644
index 0000000000..68e0fdc3d2
--- /dev/null
+++ b/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/python-mappings/CP1140.TXT
@@ -0,0 +1,291 @@
+#
+# Name: CP1140
+# Unicode version: 3.2
+# Table version: 1.0
+# Table format: Format A
+# Date: 2005-10-25
+# Authors: Marc-Andre Lemburg <mal@egenix.com>
+#
+# This encoding is a modified CP037 encoding (with added Euro
+# currency sign).
+#
+# (c) Copyright Marc-Andre Lemburg, 2005.
+# Licensed to PSF under a Contributor Agreement.
+#
+# Based on the file
+# ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/EBCDIC/CP037.TXT
+# which is:
+#
+# Copyright (c) 2002 Unicode, Inc. All Rights reserved.
+#
+# This file is provided as-is by Unicode, Inc. (The Unicode Consortium).
+# No claims are made as to fitness for any particular purpose. No
+# warranties of any kind are expressed or implied. The recipient
+# agrees to determine applicability of information provided. If this
+# file has been provided on optical media by Unicode, Inc., the sole
+# remedy for any claim will be exchange of defective media within 90
+# days of receipt.
+#
+# Unicode, Inc. hereby grants the right to freely use the information
+# supplied in this file in the creation of products supporting the
+# Unicode Standard, and to make copies of this file in any form for
+# internal or external distribution as long as this notice remains
+# attached.
+#
+0x00 0x0000 #NULL
+0x01 0x0001 #START OF HEADING
+0x02 0x0002 #START OF TEXT
+0x03 0x0003 #END OF TEXT
+0x04 0x009C #CONTROL
+0x05 0x0009 #HORIZONTAL TABULATION
+0x06 0x0086 #CONTROL
+0x07 0x007F #DELETE
+0x08 0x0097 #CONTROL
+0x09 0x008D #CONTROL
+0x0A 0x008E #CONTROL
+0x0B 0x000B #VERTICAL TABULATION
+0x0C 0x000C #FORM FEED
+0x0D 0x000D #CARRIAGE RETURN
+0x0E 0x000E #SHIFT OUT
+0x0F 0x000F #SHIFT IN
+0x10 0x0010 #DATA LINK ESCAPE
+0x11 0x0011 #DEVICE CONTROL ONE
+0x12 0x0012 #DEVICE CONTROL TWO
+0x13 0x0013 #DEVICE CONTROL THREE
+0x14 0x009D #CONTROL
+0x15 0x0085 #CONTROL
+0x16 0x0008 #BACKSPACE
+0x17 0x0087 #CONTROL
+0x18 0x0018 #CANCEL
+0x19 0x0019 #END OF MEDIUM
+0x1A 0x0092 #CONTROL
+0x1B 0x008F #CONTROL
+0x1C 0x001C #FILE SEPARATOR
+0x1D 0x001D #GROUP SEPARATOR
+0x1E 0x001E #RECORD SEPARATOR
+0x1F 0x001F #UNIT SEPARATOR
+0x20 0x0080 #CONTROL
+0x21 0x0081 #CONTROL
+0x22 0x0082 #CONTROL
+0x23 0x0083 #CONTROL
+0x24 0x0084 #CONTROL
+0x25 0x000A #LINE FEED
+0x26 0x0017 #END OF TRANSMISSION BLOCK
+0x27 0x001B #ESCAPE
+0x28 0x0088 #CONTROL
+0x29 0x0089 #CONTROL
+0x2A 0x008A #CONTROL
+0x2B 0x008B #CONTROL
+0x2C 0x008C #CONTROL
+0x2D 0x0005 #ENQUIRY
+0x2E 0x0006 #ACKNOWLEDGE
+0x2F 0x0007 #BELL
+0x30 0x0090 #CONTROL
+0x31 0x0091 #CONTROL
+0x32 0x0016 #SYNCHRONOUS IDLE
+0x33 0x0093 #CONTROL
+0x34 0x0094 #CONTROL
+0x35 0x0095 #CONTROL
+0x36 0x0096 #CONTROL
+0x37 0x0004 #END OF TRANSMISSION
+0x38 0x0098 #CONTROL
+0x39 0x0099 #CONTROL
+0x3A 0x009A #CONTROL
+0x3B 0x009B #CONTROL
+0x3C 0x0014 #DEVICE CONTROL FOUR
+0x3D 0x0015 #NEGATIVE ACKNOWLEDGE
+0x3E 0x009E #CONTROL
+0x3F 0x001A #SUBSTITUTE
+0x40 0x0020 #SPACE
+0x41 0x00A0 #NO-BREAK SPACE
+0x42 0x00E2 #LATIN SMALL LETTER A WITH CIRCUMFLEX
+0x43 0x00E4 #LATIN SMALL LETTER A WITH DIAERESIS
+0x44 0x00E0 #LATIN SMALL LETTER A WITH GRAVE
+0x45 0x00E1 #LATIN SMALL LETTER A WITH ACUTE
+0x46 0x00E3 #LATIN SMALL LETTER A WITH TILDE
+0x47 0x00E5 #LATIN SMALL LETTER A WITH RING ABOVE
+0x48 0x00E7 #LATIN SMALL LETTER C WITH CEDILLA
+0x49 0x00F1 #LATIN SMALL LETTER N WITH TILDE
+0x4A 0x00A2 #CENT SIGN
+0x4B 0x002E #FULL STOP
+0x4C 0x003C #LESS-THAN SIGN
+0x4D 0x0028 #LEFT PARENTHESIS
+0x4E 0x002B #PLUS SIGN
+0x4F 0x007C #VERTICAL LINE
+0x50 0x0026 #AMPERSAND
+0x51 0x00E9 #LATIN SMALL LETTER E WITH ACUTE
+0x52 0x00EA #LATIN SMALL LETTER E WITH CIRCUMFLEX
+0x53 0x00EB #LATIN SMALL LETTER E WITH DIAERESIS
+0x54 0x00E8 #LATIN SMALL LETTER E WITH GRAVE
+0x55 0x00ED #LATIN SMALL LETTER I WITH ACUTE
+0x56 0x00EE #LATIN SMALL LETTER I WITH CIRCUMFLEX
+0x57 0x00EF #LATIN SMALL LETTER I WITH DIAERESIS
+0x58 0x00EC #LATIN SMALL LETTER I WITH GRAVE
+0x59 0x00DF #LATIN SMALL LETTER SHARP S (GERMAN)
+0x5A 0x0021 #EXCLAMATION MARK
+0x5B 0x0024 #DOLLAR SIGN
+0x5C 0x002A #ASTERISK
+0x5D 0x0029 #RIGHT PARENTHESIS
+0x5E 0x003B #SEMICOLON
+0x5F 0x00AC #NOT SIGN
+0x60 0x002D #HYPHEN-MINUS
+0x61 0x002F #SOLIDUS
+0x62 0x00C2 #LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+0x63 0x00C4 #LATIN CAPITAL LETTER A WITH DIAERESIS
+0x64 0x00C0 #LATIN CAPITAL LETTER A WITH GRAVE
+0x65 0x00C1 #LATIN CAPITAL LETTER A WITH ACUTE
+0x66 0x00C3 #LATIN CAPITAL LETTER A WITH TILDE
+0x67 0x00C5 #LATIN CAPITAL LETTER A WITH RING ABOVE
+0x68 0x00C7 #LATIN CAPITAL LETTER C WITH CEDILLA
+0x69 0x00D1 #LATIN CAPITAL LETTER N WITH TILDE
+0x6A 0x00A6 #BROKEN BAR
+0x6B 0x002C #COMMA
+0x6C 0x0025 #PERCENT SIGN
+0x6D 0x005F #LOW LINE
+0x6E 0x003E #GREATER-THAN SIGN
+0x6F 0x003F #QUESTION MARK
+0x70 0x00F8 #LATIN SMALL LETTER O WITH STROKE
+0x71 0x00C9 #LATIN CAPITAL LETTER E WITH ACUTE
+0x72 0x00CA #LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+0x73 0x00CB #LATIN CAPITAL LETTER E WITH DIAERESIS
+0x74 0x00C8 #LATIN CAPITAL LETTER E WITH GRAVE
+0x75 0x00CD #LATIN CAPITAL LETTER I WITH ACUTE
+0x76 0x00CE #LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+0x77 0x00CF #LATIN CAPITAL LETTER I WITH DIAERESIS
+0x78 0x00CC #LATIN CAPITAL LETTER I WITH GRAVE
+0x79 0x0060 #GRAVE ACCENT
+0x7A 0x003A #COLON
+0x7B 0x0023 #NUMBER SIGN
+0x7C 0x0040 #COMMERCIAL AT
+0x7D 0x0027 #APOSTROPHE
+0x7E 0x003D #EQUALS SIGN
+0x7F 0x0022 #QUOTATION MARK
+0x80 0x00D8 #LATIN CAPITAL LETTER O WITH STROKE
+0x81 0x0061 #LATIN SMALL LETTER A
+0x82 0x0062 #LATIN SMALL LETTER B
+0x83 0x0063 #LATIN SMALL LETTER C
+0x84 0x0064 #LATIN SMALL LETTER D
+0x85 0x0065 #LATIN SMALL LETTER E
+0x86 0x0066 #LATIN SMALL LETTER F
+0x87 0x0067 #LATIN SMALL LETTER G
+0x88 0x0068 #LATIN SMALL LETTER H
+0x89 0x0069 #LATIN SMALL LETTER I
+0x8A 0x00AB #LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+0x8B 0x00BB #RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+0x8C 0x00F0 #LATIN SMALL LETTER ETH (ICELANDIC)
+0x8D 0x00FD #LATIN SMALL LETTER Y WITH ACUTE
+0x8E 0x00FE #LATIN SMALL LETTER THORN (ICELANDIC)
+0x8F 0x00B1 #PLUS-MINUS SIGN
+0x90 0x00B0 #DEGREE SIGN
+0x91 0x006A #LATIN SMALL LETTER J
+0x92 0x006B #LATIN SMALL LETTER K
+0x93 0x006C #LATIN SMALL LETTER L
+0x94 0x006D #LATIN SMALL LETTER M
+0x95 0x006E #LATIN SMALL LETTER N
+0x96 0x006F #LATIN SMALL LETTER O
+0x97 0x0070 #LATIN SMALL LETTER P
+0x98 0x0071 #LATIN SMALL LETTER Q
+0x99 0x0072 #LATIN SMALL LETTER R
+0x9A 0x00AA #FEMININE ORDINAL INDICATOR
+0x9B 0x00BA #MASCULINE ORDINAL INDICATOR
+0x9C 0x00E6 #LATIN SMALL LIGATURE AE
+0x9D 0x00B8 #CEDILLA
+0x9E 0x00C6 #LATIN CAPITAL LIGATURE AE
+#0x9F 0x00A4 #CURRENCY SIGN
+0x9F 0x20AC # EURO SIGN
+0xA0 0x00B5 #MICRO SIGN
+0xA1 0x007E #TILDE
+0xA2 0x0073 #LATIN SMALL LETTER S
+0xA3 0x0074 #LATIN SMALL LETTER T
+0xA4 0x0075 #LATIN SMALL LETTER U
+0xA5 0x0076 #LATIN SMALL LETTER V
+0xA6 0x0077 #LATIN SMALL LETTER W
+0xA7 0x0078 #LATIN SMALL LETTER X
+0xA8 0x0079 #LATIN SMALL LETTER Y
+0xA9 0x007A #LATIN SMALL LETTER Z
+0xAA 0x00A1 #INVERTED EXCLAMATION MARK
+0xAB 0x00BF #INVERTED QUESTION MARK
+0xAC 0x00D0 #LATIN CAPITAL LETTER ETH (ICELANDIC)
+0xAD 0x00DD #LATIN CAPITAL LETTER Y WITH ACUTE
+0xAE 0x00DE #LATIN CAPITAL LETTER THORN (ICELANDIC)
+0xAF 0x00AE #REGISTERED SIGN
+0xB0 0x005E #CIRCUMFLEX ACCENT
+0xB1 0x00A3 #POUND SIGN
+0xB2 0x00A5 #YEN SIGN
+0xB3 0x00B7 #MIDDLE DOT
+0xB4 0x00A9 #COPYRIGHT SIGN
+0xB5 0x00A7 #SECTION SIGN
+0xB6 0x00B6 #PILCROW SIGN
+0xB7 0x00BC #VULGAR FRACTION ONE QUARTER
+0xB8 0x00BD #VULGAR FRACTION ONE HALF
+0xB9 0x00BE #VULGAR FRACTION THREE QUARTERS
+0xBA 0x005B #LEFT SQUARE BRACKET
+0xBB 0x005D #RIGHT SQUARE BRACKET
+0xBC 0x00AF #MACRON
+0xBD 0x00A8 #DIAERESIS
+0xBE 0x00B4 #ACUTE ACCENT
+0xBF 0x00D7 #MULTIPLICATION SIGN
+0xC0 0x007B #LEFT CURLY BRACKET
+0xC1 0x0041 #LATIN CAPITAL LETTER A
+0xC2 0x0042 #LATIN CAPITAL LETTER B
+0xC3 0x0043 #LATIN CAPITAL LETTER C
+0xC4 0x0044 #LATIN CAPITAL LETTER D
+0xC5 0x0045 #LATIN CAPITAL LETTER E
+0xC6 0x0046 #LATIN CAPITAL LETTER F
+0xC7 0x0047 #LATIN CAPITAL LETTER G
+0xC8 0x0048 #LATIN CAPITAL LETTER H
+0xC9 0x0049 #LATIN CAPITAL LETTER I
+0xCA 0x00AD #SOFT HYPHEN
+0xCB 0x00F4 #LATIN SMALL LETTER O WITH CIRCUMFLEX
+0xCC 0x00F6 #LATIN SMALL LETTER O WITH DIAERESIS
+0xCD 0x00F2 #LATIN SMALL LETTER O WITH GRAVE
+0xCE 0x00F3 #LATIN SMALL LETTER O WITH ACUTE
+0xCF 0x00F5 #LATIN SMALL LETTER O WITH TILDE
+0xD0 0x007D #RIGHT CURLY BRACKET
+0xD1 0x004A #LATIN CAPITAL LETTER J
+0xD2 0x004B #LATIN CAPITAL LETTER K
+0xD3 0x004C #LATIN CAPITAL LETTER L
+0xD4 0x004D #LATIN CAPITAL LETTER M
+0xD5 0x004E #LATIN CAPITAL LETTER N
+0xD6 0x004F #LATIN CAPITAL LETTER O
+0xD7 0x0050 #LATIN CAPITAL LETTER P
+0xD8 0x0051 #LATIN CAPITAL LETTER Q
+0xD9 0x0052 #LATIN CAPITAL LETTER R
+0xDA 0x00B9 #SUPERSCRIPT ONE
+0xDB 0x00FB #LATIN SMALL LETTER U WITH CIRCUMFLEX
+0xDC 0x00FC #LATIN SMALL LETTER U WITH DIAERESIS
+0xDD 0x00F9 #LATIN SMALL LETTER U WITH GRAVE
+0xDE 0x00FA #LATIN SMALL LETTER U WITH ACUTE
+0xDF 0x00FF #LATIN SMALL LETTER Y WITH DIAERESIS
+0xE0 0x005C #REVERSE SOLIDUS
+0xE1 0x00F7 #DIVISION SIGN
+0xE2 0x0053 #LATIN CAPITAL LETTER S
+0xE3 0x0054 #LATIN CAPITAL LETTER T
+0xE4 0x0055 #LATIN CAPITAL LETTER U
+0xE5 0x0056 #LATIN CAPITAL LETTER V
+0xE6 0x0057 #LATIN CAPITAL LETTER W
+0xE7 0x0058 #LATIN CAPITAL LETTER X
+0xE8 0x0059 #LATIN CAPITAL LETTER Y
+0xE9 0x005A #LATIN CAPITAL LETTER Z
+0xEA 0x00B2 #SUPERSCRIPT TWO
+0xEB 0x00D4 #LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+0xEC 0x00D6 #LATIN CAPITAL LETTER O WITH DIAERESIS
+0xED 0x00D2 #LATIN CAPITAL LETTER O WITH GRAVE
+0xEE 0x00D3 #LATIN CAPITAL LETTER O WITH ACUTE
+0xEF 0x00D5 #LATIN CAPITAL LETTER O WITH TILDE
+0xF0 0x0030 #DIGIT ZERO
+0xF1 0x0031 #DIGIT ONE
+0xF2 0x0032 #DIGIT TWO
+0xF3 0x0033 #DIGIT THREE
+0xF4 0x0034 #DIGIT FOUR
+0xF5 0x0035 #DIGIT FIVE
+0xF6 0x0036 #DIGIT SIX
+0xF7 0x0037 #DIGIT SEVEN
+0xF8 0x0038 #DIGIT EIGHT
+0xF9 0x0039 #DIGIT NINE
+0xFA 0x00B3 #SUPERSCRIPT THREE
+0xFB 0x00DB #LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+0xFC 0x00DC #LATIN CAPITAL LETTER U WITH DIAERESIS
+0xFD 0x00D9 #LATIN CAPITAL LETTER U WITH GRAVE
+0xFE 0x00DA #LATIN CAPITAL LETTER U WITH ACUTE
+0xFF 0x009F #CONTROL
diff --git a/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/python-mappings/KOI8-U.TXT b/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/python-mappings/KOI8-U.TXT
new file mode 100644
index 0000000000..77160cf7e6
--- /dev/null
+++ b/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/python-mappings/KOI8-U.TXT
@@ -0,0 +1,298 @@
+#
+# Name: KOI8-U (RFC2319) to Unicode
+# Unicode version: 3.2
+# Table version: 1.0
+# Table format: Format A
+# Date: 2005-10-25
+# Authors: Marc-Andre Lemburg <mal@egenix.com>
+#
+# See RFC2319 for details. This encoding is a modified KOI8-R
+# encoding.
+#
+# (c) Copyright Marc-Andre Lemburg, 2005.
+# Licensed to PSF under a Contributor Agreement.
+#
+# Based on the file
+# ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MISC/KOI8-R.TXT
+# which is:
+#
+# Copyright (c) 1991-1999 Unicode, Inc. All Rights reserved.
+#
+# This file is provided as-is by Unicode, Inc. (The Unicode Consortium).
+# No claims are made as to fitness for any particular purpose. No
+# warranties of any kind are expressed or implied. The recipient
+# agrees to determine applicability of information provided. If this
+# file has been provided on optical media by Unicode, Inc., the sole
+# remedy for any claim will be exchange of defective media within 90
+# days of receipt.
+#
+# Unicode, Inc. hereby grants the right to freely use the information
+# supplied in this file in the creation of products supporting the
+# Unicode Standard, and to make copies of this file in any form for
+# internal or external distribution as long as this notice remains
+# attached.
+#
+0x00 0x0000 # NULL
+0x01 0x0001 # START OF HEADING
+0x02 0x0002 # START OF TEXT
+0x03 0x0003 # END OF TEXT
+0x04 0x0004 # END OF TRANSMISSION
+0x05 0x0005 # ENQUIRY
+0x06 0x0006 # ACKNOWLEDGE
+0x07 0x0007 # BELL
+0x08 0x0008 # BACKSPACE
+0x09 0x0009 # HORIZONTAL TABULATION
+0x0A 0x000A # LINE FEED
+0x0B 0x000B # VERTICAL TABULATION
+0x0C 0x000C # FORM FEED
+0x0D 0x000D # CARRIAGE RETURN
+0x0E 0x000E # SHIFT OUT
+0x0F 0x000F # SHIFT IN
+0x10 0x0010 # DATA LINK ESCAPE
+0x11 0x0011 # DEVICE CONTROL ONE
+0x12 0x0012 # DEVICE CONTROL TWO
+0x13 0x0013 # DEVICE CONTROL THREE
+0x14 0x0014 # DEVICE CONTROL FOUR
+0x15 0x0015 # NEGATIVE ACKNOWLEDGE
+0x16 0x0016 # SYNCHRONOUS IDLE
+0x17 0x0017 # END OF TRANSMISSION BLOCK
+0x18 0x0018 # CANCEL
+0x19 0x0019 # END OF MEDIUM
+0x1A 0x001A # SUBSTITUTE
+0x1B 0x001B # ESCAPE
+0x1C 0x001C # FILE SEPARATOR
+0x1D 0x001D # GROUP SEPARATOR
+0x1E 0x001E # RECORD SEPARATOR
+0x1F 0x001F # UNIT SEPARATOR
+0x20 0x0020 # SPACE
+0x21 0x0021 # EXCLAMATION MARK
+0x22 0x0022 # QUOTATION MARK
+0x23 0x0023 # NUMBER SIGN
+0x24 0x0024 # DOLLAR SIGN
+0x25 0x0025 # PERCENT SIGN
+0x26 0x0026 # AMPERSAND
+0x27 0x0027 # APOSTROPHE
+0x28 0x0028 # LEFT PARENTHESIS
+0x29 0x0029 # RIGHT PARENTHESIS
+0x2A 0x002A # ASTERISK
+0x2B 0x002B # PLUS SIGN
+0x2C 0x002C # COMMA
+0x2D 0x002D # HYPHEN-MINUS
+0x2E 0x002E # FULL STOP
+0x2F 0x002F # SOLIDUS
+0x30 0x0030 # DIGIT ZERO
+0x31 0x0031 # DIGIT ONE
+0x32 0x0032 # DIGIT TWO
+0x33 0x0033 # DIGIT THREE
+0x34 0x0034 # DIGIT FOUR
+0x35 0x0035 # DIGIT FIVE
+0x36 0x0036 # DIGIT SIX
+0x37 0x0037 # DIGIT SEVEN
+0x38 0x0038 # DIGIT EIGHT
+0x39 0x0039 # DIGIT NINE
+0x3A 0x003A # COLON
+0x3B 0x003B # SEMICOLON
+0x3C 0x003C # LESS-THAN SIGN
+0x3D 0x003D # EQUALS SIGN
+0x3E 0x003E # GREATER-THAN SIGN
+0x3F 0x003F # QUESTION MARK
+0x40 0x0040 # COMMERCIAL AT
+0x41 0x0041 # LATIN CAPITAL LETTER A
+0x42 0x0042 # LATIN CAPITAL LETTER B
+0x43 0x0043 # LATIN CAPITAL LETTER C
+0x44 0x0044 # LATIN CAPITAL LETTER D
+0x45 0x0045 # LATIN CAPITAL LETTER E
+0x46 0x0046 # LATIN CAPITAL LETTER F
+0x47 0x0047 # LATIN CAPITAL LETTER G
+0x48 0x0048 # LATIN CAPITAL LETTER H
+0x49 0x0049 # LATIN CAPITAL LETTER I
+0x4A 0x004A # LATIN CAPITAL LETTER J
+0x4B 0x004B # LATIN CAPITAL LETTER K
+0x4C 0x004C # LATIN CAPITAL LETTER L
+0x4D 0x004D # LATIN CAPITAL LETTER M
+0x4E 0x004E # LATIN CAPITAL LETTER N
+0x4F 0x004F # LATIN CAPITAL LETTER O
+0x50 0x0050 # LATIN CAPITAL LETTER P
+0x51 0x0051 # LATIN CAPITAL LETTER Q
+0x52 0x0052 # LATIN CAPITAL LETTER R
+0x53 0x0053 # LATIN CAPITAL LETTER S
+0x54 0x0054 # LATIN CAPITAL LETTER T
+0x55 0x0055 # LATIN CAPITAL LETTER U
+0x56 0x0056 # LATIN CAPITAL LETTER V
+0x57 0x0057 # LATIN CAPITAL LETTER W
+0x58 0x0058 # LATIN CAPITAL LETTER X
+0x59 0x0059 # LATIN CAPITAL LETTER Y
+0x5A 0x005A # LATIN CAPITAL LETTER Z
+0x5B 0x005B # LEFT SQUARE BRACKET
+0x5C 0x005C # REVERSE SOLIDUS
+0x5D 0x005D # RIGHT SQUARE BRACKET
+0x5E 0x005E # CIRCUMFLEX ACCENT
+0x5F 0x005F # LOW LINE
+0x60 0x0060 # GRAVE ACCENT
+0x61 0x0061 # LATIN SMALL LETTER A
+0x62 0x0062 # LATIN SMALL LETTER B
+0x63 0x0063 # LATIN SMALL LETTER C
+0x64 0x0064 # LATIN SMALL LETTER D
+0x65 0x0065 # LATIN SMALL LETTER E
+0x66 0x0066 # LATIN SMALL LETTER F
+0x67 0x0067 # LATIN SMALL LETTER G
+0x68 0x0068 # LATIN SMALL LETTER H
+0x69 0x0069 # LATIN SMALL LETTER I
+0x6A 0x006A # LATIN SMALL LETTER J
+0x6B 0x006B # LATIN SMALL LETTER K
+0x6C 0x006C # LATIN SMALL LETTER L
+0x6D 0x006D # LATIN SMALL LETTER M
+0x6E 0x006E # LATIN SMALL LETTER N
+0x6F 0x006F # LATIN SMALL LETTER O
+0x70 0x0070 # LATIN SMALL LETTER P
+0x71 0x0071 # LATIN SMALL LETTER Q
+0x72 0x0072 # LATIN SMALL LETTER R
+0x73 0x0073 # LATIN SMALL LETTER S
+0x74 0x0074 # LATIN SMALL LETTER T
+0x75 0x0075 # LATIN SMALL LETTER U
+0x76 0x0076 # LATIN SMALL LETTER V
+0x77 0x0077 # LATIN SMALL LETTER W
+0x78 0x0078 # LATIN SMALL LETTER X
+0x79 0x0079 # LATIN SMALL LETTER Y
+0x7A 0x007A # LATIN SMALL LETTER Z
+0x7B 0x007B # LEFT CURLY BRACKET
+0x7C 0x007C # VERTICAL LINE
+0x7D 0x007D # RIGHT CURLY BRACKET
+0x7E 0x007E # TILDE
+0x7F 0x007F # DELETE
+0x80 0x2500 # BOX DRAWINGS LIGHT HORIZONTAL
+0x81 0x2502 # BOX DRAWINGS LIGHT VERTICAL
+0x82 0x250C # BOX DRAWINGS LIGHT DOWN AND RIGHT
+0x83 0x2510 # BOX DRAWINGS LIGHT DOWN AND LEFT
+0x84 0x2514 # BOX DRAWINGS LIGHT UP AND RIGHT
+0x85 0x2518 # BOX DRAWINGS LIGHT UP AND LEFT
+0x86 0x251C # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+0x87 0x2524 # BOX DRAWINGS LIGHT VERTICAL AND LEFT
+0x88 0x252C # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+0x89 0x2534 # BOX DRAWINGS LIGHT UP AND HORIZONTAL
+0x8A 0x253C # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+0x8B 0x2580 # UPPER HALF BLOCK
+0x8C 0x2584 # LOWER HALF BLOCK
+0x8D 0x2588 # FULL BLOCK
+0x8E 0x258C # LEFT HALF BLOCK
+0x8F 0x2590 # RIGHT HALF BLOCK
+0x90 0x2591 # LIGHT SHADE
+0x91 0x2592 # MEDIUM SHADE
+0x92 0x2593 # DARK SHADE
+0x93 0x2320 # TOP HALF INTEGRAL
+0x94 0x25A0 # BLACK SQUARE
+0x95 0x2219 # BULLET OPERATOR
+0x96 0x221A # SQUARE ROOT
+0x97 0x2248 # ALMOST EQUAL TO
+0x98 0x2264 # LESS-THAN OR EQUAL TO
+0x99 0x2265 # GREATER-THAN OR EQUAL TO
+0x9A 0x00A0 # NO-BREAK SPACE
+0x9B 0x2321 # BOTTOM HALF INTEGRAL
+0x9C 0x00B0 # DEGREE SIGN
+0x9D 0x00B2 # SUPERSCRIPT TWO
+0x9E 0x00B7 # MIDDLE DOT
+0x9F 0x00F7 # DIVISION SIGN
+0xA0 0x2550 # BOX DRAWINGS DOUBLE HORIZONTAL
+0xA1 0x2551 # BOX DRAWINGS DOUBLE VERTICAL
+0xA2 0x2552 # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+0xA3 0x0451 # CYRILLIC SMALL LETTER IO
+#0xA4 0x2553 # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+0xA4 0x0454 # CYRILLIC SMALL LETTER UKRAINIAN IE
+0xA5 0x2554 # BOX DRAWINGS DOUBLE DOWN AND RIGHT
+#0xA6 0x2555 # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+0xA6 0x0456 # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
+#0xA7 0x2556 # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+0xA7 0x0457 # CYRILLIC SMALL LETTER YI (UKRAINIAN)
+0xA8 0x2557 # BOX DRAWINGS DOUBLE DOWN AND LEFT
+0xA9 0x2558 # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+0xAA 0x2559 # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+0xAB 0x255A # BOX DRAWINGS DOUBLE UP AND RIGHT
+0xAC 0x255B # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+#0xAD 0x255C # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+0xAD 0x0491 # CYRILLIC SMALL LETTER UKRAINIAN GHE WITH UPTURN
+0xAE 0x255D # BOX DRAWINGS DOUBLE UP AND LEFT
+0xAF 0x255E # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+0xB0 0x255F # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+0xB1 0x2560 # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+0xB2 0x2561 # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+0xB3 0x0401 # CYRILLIC CAPITAL LETTER IO
+#0xB4 0x2562 # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+0xB4 0x0404 # CYRILLIC CAPITAL LETTER UKRAINIAN IE
+0xB5 0x2563 # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+#0xB6 0x2564 # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+0xB6 0x0406 # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
+#0xB7 0x2565 # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+0xB7 0x0407 # CYRILLIC CAPITAL LETTER YI (UKRAINIAN)
+0xB8 0x2566 # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+0xB9 0x2567 # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+0xBA 0x2568 # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+0xBB 0x2569 # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+0xBC 0x256A # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+#0xBD 0x256B # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+0xBD 0x0490 # CYRILLIC CAPITAL LETTER UKRAINIAN GHE WITH UPTURN
+0xBE 0x256C # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+0xBF 0x00A9 # COPYRIGHT SIGN
+0xC0 0x044E # CYRILLIC SMALL LETTER YU
+0xC1 0x0430 # CYRILLIC SMALL LETTER A
+0xC2 0x0431 # CYRILLIC SMALL LETTER BE
+0xC3 0x0446 # CYRILLIC SMALL LETTER TSE
+0xC4 0x0434 # CYRILLIC SMALL LETTER DE
+0xC5 0x0435 # CYRILLIC SMALL LETTER IE
+0xC6 0x0444 # CYRILLIC SMALL LETTER EF
+0xC7 0x0433 # CYRILLIC SMALL LETTER GHE
+0xC8 0x0445 # CYRILLIC SMALL LETTER HA
+0xC9 0x0438 # CYRILLIC SMALL LETTER I
+0xCA 0x0439 # CYRILLIC SMALL LETTER SHORT I
+0xCB 0x043A # CYRILLIC SMALL LETTER KA
+0xCC 0x043B # CYRILLIC SMALL LETTER EL
+0xCD 0x043C # CYRILLIC SMALL LETTER EM
+0xCE 0x043D # CYRILLIC SMALL LETTER EN
+0xCF 0x043E # CYRILLIC SMALL LETTER O
+0xD0 0x043F # CYRILLIC SMALL LETTER PE
+0xD1 0x044F # CYRILLIC SMALL LETTER YA
+0xD2 0x0440 # CYRILLIC SMALL LETTER ER
+0xD3 0x0441 # CYRILLIC SMALL LETTER ES
+0xD4 0x0442 # CYRILLIC SMALL LETTER TE
+0xD5 0x0443 # CYRILLIC SMALL LETTER U
+0xD6 0x0436 # CYRILLIC SMALL LETTER ZHE
+0xD7 0x0432 # CYRILLIC SMALL LETTER VE
+0xD8 0x044C # CYRILLIC SMALL LETTER SOFT SIGN
+0xD9 0x044B # CYRILLIC SMALL LETTER YERU
+0xDA 0x0437 # CYRILLIC SMALL LETTER ZE
+0xDB 0x0448 # CYRILLIC SMALL LETTER SHA
+0xDC 0x044D # CYRILLIC SMALL LETTER E
+0xDD 0x0449 # CYRILLIC SMALL LETTER SHCHA
+0xDE 0x0447 # CYRILLIC SMALL LETTER CHE
+0xDF 0x044A # CYRILLIC SMALL LETTER HARD SIGN
+0xE0 0x042E # CYRILLIC CAPITAL LETTER YU
+0xE1 0x0410 # CYRILLIC CAPITAL LETTER A
+0xE2 0x0411 # CYRILLIC CAPITAL LETTER BE
+0xE3 0x0426 # CYRILLIC CAPITAL LETTER TSE
+0xE4 0x0414 # CYRILLIC CAPITAL LETTER DE
+0xE5 0x0415 # CYRILLIC CAPITAL LETTER IE
+0xE6 0x0424 # CYRILLIC CAPITAL LETTER EF
+0xE7 0x0413 # CYRILLIC CAPITAL LETTER GHE
+0xE8 0x0425 # CYRILLIC CAPITAL LETTER HA
+0xE9 0x0418 # CYRILLIC CAPITAL LETTER I
+0xEA 0x0419 # CYRILLIC CAPITAL LETTER SHORT I
+0xEB 0x041A # CYRILLIC CAPITAL LETTER KA
+0xEC 0x041B # CYRILLIC CAPITAL LETTER EL
+0xED 0x041C # CYRILLIC CAPITAL LETTER EM
+0xEE 0x041D # CYRILLIC CAPITAL LETTER EN
+0xEF 0x041E # CYRILLIC CAPITAL LETTER O
+0xF0 0x041F # CYRILLIC CAPITAL LETTER PE
+0xF1 0x042F # CYRILLIC CAPITAL LETTER YA
+0xF2 0x0420 # CYRILLIC CAPITAL LETTER ER
+0xF3 0x0421 # CYRILLIC CAPITAL LETTER ES
+0xF4 0x0422 # CYRILLIC CAPITAL LETTER TE
+0xF5 0x0423 # CYRILLIC CAPITAL LETTER U
+0xF6 0x0416 # CYRILLIC CAPITAL LETTER ZHE
+0xF7 0x0412 # CYRILLIC CAPITAL LETTER VE
+0xF8 0x042C # CYRILLIC CAPITAL LETTER SOFT SIGN
+0xF9 0x042B # CYRILLIC CAPITAL LETTER YERU
+0xFA 0x0417 # CYRILLIC CAPITAL LETTER ZE
+0xFB 0x0428 # CYRILLIC CAPITAL LETTER SHA
+0xFC 0x042D # CYRILLIC CAPITAL LETTER E
+0xFD 0x0429 # CYRILLIC CAPITAL LETTER SHCHA
+0xFE 0x0427 # CYRILLIC CAPITAL LETTER CHE
+0xFF 0x042A # CYRILLIC CAPITAL LETTER HARD SIGN
diff --git a/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/python-mappings/TIS-620.TXT b/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/python-mappings/TIS-620.TXT
new file mode 100644
index 0000000000..05173e9720
--- /dev/null
+++ b/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/python-mappings/TIS-620.TXT
@@ -0,0 +1,284 @@
+#
+# Name: TIS-620
+# Unicode version: 3.2
+# Table version: 1.0
+# Table format: Format A
+# Date: 2005-10-25
+# Authors: Marc-Andre Lemburg <mal@egenix.com>
+#
+# According to
+# ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-11.TXT the
+# TIS-620 is the identical to ISO_8859-11 with the 0xA0
+# (no-break space) mapping removed.
+#
+# (c) Copyright Marc-Andre Lemburg, 2005.
+# Licensed to PSF under a Contributor Agreement.
+#
+# Based on the file
+# ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-11.TXT
+# which is:
+#
+# Copyright (c) 2002 Unicode, Inc. All Rights reserved.
+#
+# This file is provided as-is by Unicode, Inc. (The Unicode Consortium).
+# No claims are made as to fitness for any particular purpose. No
+# warranties of any kind are expressed or implied. The recipient
+# agrees to determine applicability of information provided. If this
+# file has been provided on optical media by Unicode, Inc., the sole
+# remedy for any claim will be exchange of defective media within 90
+# days of receipt.
+#
+# Unicode, Inc. hereby grants the right to freely use the information
+# supplied in this file in the creation of products supporting the
+# Unicode Standard, and to make copies of this file in any form for
+# internal or external distribution as long as this notice remains
+# attached.
+#
+0x00 0x0000 # NULL
+0x01 0x0001 # START OF HEADING
+0x02 0x0002 # START OF TEXT
+0x03 0x0003 # END OF TEXT
+0x04 0x0004 # END OF TRANSMISSION
+0x05 0x0005 # ENQUIRY
+0x06 0x0006 # ACKNOWLEDGE
+0x07 0x0007 # BELL
+0x08 0x0008 # BACKSPACE
+0x09 0x0009 # HORIZONTAL TABULATION
+0x0A 0x000A # LINE FEED
+0x0B 0x000B # VERTICAL TABULATION
+0x0C 0x000C # FORM FEED
+0x0D 0x000D # CARRIAGE RETURN
+0x0E 0x000E # SHIFT OUT
+0x0F 0x000F # SHIFT IN
+0x10 0x0010 # DATA LINK ESCAPE
+0x11 0x0011 # DEVICE CONTROL ONE
+0x12 0x0012 # DEVICE CONTROL TWO
+0x13 0x0013 # DEVICE CONTROL THREE
+0x14 0x0014 # DEVICE CONTROL FOUR
+0x15 0x0015 # NEGATIVE ACKNOWLEDGE
+0x16 0x0016 # SYNCHRONOUS IDLE
+0x17 0x0017 # END OF TRANSMISSION BLOCK
+0x18 0x0018 # CANCEL
+0x19 0x0019 # END OF MEDIUM
+0x1A 0x001A # SUBSTITUTE
+0x1B 0x001B # ESCAPE
+0x1C 0x001C # FILE SEPARATOR
+0x1D 0x001D # GROUP SEPARATOR
+0x1E 0x001E # RECORD SEPARATOR
+0x1F 0x001F # UNIT SEPARATOR
+0x20 0x0020 # SPACE
+0x21 0x0021 # EXCLAMATION MARK
+0x22 0x0022 # QUOTATION MARK
+0x23 0x0023 # NUMBER SIGN
+0x24 0x0024 # DOLLAR SIGN
+0x25 0x0025 # PERCENT SIGN
+0x26 0x0026 # AMPERSAND
+0x27 0x0027 # APOSTROPHE
+0x28 0x0028 # LEFT PARENTHESIS
+0x29 0x0029 # RIGHT PARENTHESIS
+0x2A 0x002A # ASTERISK
+0x2B 0x002B # PLUS SIGN
+0x2C 0x002C # COMMA
+0x2D 0x002D # HYPHEN-MINUS
+0x2E 0x002E # FULL STOP
+0x2F 0x002F # SOLIDUS
+0x30 0x0030 # DIGIT ZERO
+0x31 0x0031 # DIGIT ONE
+0x32 0x0032 # DIGIT TWO
+0x33 0x0033 # DIGIT THREE
+0x34 0x0034 # DIGIT FOUR
+0x35 0x0035 # DIGIT FIVE
+0x36 0x0036 # DIGIT SIX
+0x37 0x0037 # DIGIT SEVEN
+0x38 0x0038 # DIGIT EIGHT
+0x39 0x0039 # DIGIT NINE
+0x3A 0x003A # COLON
+0x3B 0x003B # SEMICOLON
+0x3C 0x003C # LESS-THAN SIGN
+0x3D 0x003D # EQUALS SIGN
+0x3E 0x003E # GREATER-THAN SIGN
+0x3F 0x003F # QUESTION MARK
+0x40 0x0040 # COMMERCIAL AT
+0x41 0x0041 # LATIN CAPITAL LETTER A
+0x42 0x0042 # LATIN CAPITAL LETTER B
+0x43 0x0043 # LATIN CAPITAL LETTER C
+0x44 0x0044 # LATIN CAPITAL LETTER D
+0x45 0x0045 # LATIN CAPITAL LETTER E
+0x46 0x0046 # LATIN CAPITAL LETTER F
+0x47 0x0047 # LATIN CAPITAL LETTER G
+0x48 0x0048 # LATIN CAPITAL LETTER H
+0x49 0x0049 # LATIN CAPITAL LETTER I
+0x4A 0x004A # LATIN CAPITAL LETTER J
+0x4B 0x004B # LATIN CAPITAL LETTER K
+0x4C 0x004C # LATIN CAPITAL LETTER L
+0x4D 0x004D # LATIN CAPITAL LETTER M
+0x4E 0x004E # LATIN CAPITAL LETTER N
+0x4F 0x004F # LATIN CAPITAL LETTER O
+0x50 0x0050 # LATIN CAPITAL LETTER P
+0x51 0x0051 # LATIN CAPITAL LETTER Q
+0x52 0x0052 # LATIN CAPITAL LETTER R
+0x53 0x0053 # LATIN CAPITAL LETTER S
+0x54 0x0054 # LATIN CAPITAL LETTER T
+0x55 0x0055 # LATIN CAPITAL LETTER U
+0x56 0x0056 # LATIN CAPITAL LETTER V
+0x57 0x0057 # LATIN CAPITAL LETTER W
+0x58 0x0058 # LATIN CAPITAL LETTER X
+0x59 0x0059 # LATIN CAPITAL LETTER Y
+0x5A 0x005A # LATIN CAPITAL LETTER Z
+0x5B 0x005B # LEFT SQUARE BRACKET
+0x5C 0x005C # REVERSE SOLIDUS
+0x5D 0x005D # RIGHT SQUARE BRACKET
+0x5E 0x005E # CIRCUMFLEX ACCENT
+0x5F 0x005F # LOW LINE
+0x60 0x0060 # GRAVE ACCENT
+0x61 0x0061 # LATIN SMALL LETTER A
+0x62 0x0062 # LATIN SMALL LETTER B
+0x63 0x0063 # LATIN SMALL LETTER C
+0x64 0x0064 # LATIN SMALL LETTER D
+0x65 0x0065 # LATIN SMALL LETTER E
+0x66 0x0066 # LATIN SMALL LETTER F
+0x67 0x0067 # LATIN SMALL LETTER G
+0x68 0x0068 # LATIN SMALL LETTER H
+0x69 0x0069 # LATIN SMALL LETTER I
+0x6A 0x006A # LATIN SMALL LETTER J
+0x6B 0x006B # LATIN SMALL LETTER K
+0x6C 0x006C # LATIN SMALL LETTER L
+0x6D 0x006D # LATIN SMALL LETTER M
+0x6E 0x006E # LATIN SMALL LETTER N
+0x6F 0x006F # LATIN SMALL LETTER O
+0x70 0x0070 # LATIN SMALL LETTER P
+0x71 0x0071 # LATIN SMALL LETTER Q
+0x72 0x0072 # LATIN SMALL LETTER R
+0x73 0x0073 # LATIN SMALL LETTER S
+0x74 0x0074 # LATIN SMALL LETTER T
+0x75 0x0075 # LATIN SMALL LETTER U
+0x76 0x0076 # LATIN SMALL LETTER V
+0x77 0x0077 # LATIN SMALL LETTER W
+0x78 0x0078 # LATIN SMALL LETTER X
+0x79 0x0079 # LATIN SMALL LETTER Y
+0x7A 0x007A # LATIN SMALL LETTER Z
+0x7B 0x007B # LEFT CURLY BRACKET
+0x7C 0x007C # VERTICAL LINE
+0x7D 0x007D # RIGHT CURLY BRACKET
+0x7E 0x007E # TILDE
+0x7F 0x007F # DELETE
+0x80 0x0080 # <control>
+0x81 0x0081 # <control>
+0x82 0x0082 # <control>
+0x83 0x0083 # <control>
+0x84 0x0084 # <control>
+0x85 0x0085 # <control>
+0x86 0x0086 # <control>
+0x87 0x0087 # <control>
+0x88 0x0088 # <control>
+0x89 0x0089 # <control>
+0x8A 0x008A # <control>
+0x8B 0x008B # <control>
+0x8C 0x008C # <control>
+0x8D 0x008D # <control>
+0x8E 0x008E # <control>
+0x8F 0x008F # <control>
+0x90 0x0090 # <control>
+0x91 0x0091 # <control>
+0x92 0x0092 # <control>
+0x93 0x0093 # <control>
+0x94 0x0094 # <control>
+0x95 0x0095 # <control>
+0x96 0x0096 # <control>
+0x97 0x0097 # <control>
+0x98 0x0098 # <control>
+0x99 0x0099 # <control>
+0x9A 0x009A # <control>
+0x9B 0x009B # <control>
+0x9C 0x009C # <control>
+0x9D 0x009D # <control>
+0x9E 0x009E # <control>
+0x9F 0x009F # <control>
+#0xA0 0x00A0 # NO-BREAK SPACE
+0xA1 0x0E01 # THAI CHARACTER KO KAI
+0xA2 0x0E02 # THAI CHARACTER KHO KHAI
+0xA3 0x0E03 # THAI CHARACTER KHO KHUAT
+0xA4 0x0E04 # THAI CHARACTER KHO KHWAI
+0xA5 0x0E05 # THAI CHARACTER KHO KHON
+0xA6 0x0E06 # THAI CHARACTER KHO RAKHANG
+0xA7 0x0E07 # THAI CHARACTER NGO NGU
+0xA8 0x0E08 # THAI CHARACTER CHO CHAN
+0xA9 0x0E09 # THAI CHARACTER CHO CHING
+0xAA 0x0E0A # THAI CHARACTER CHO CHANG
+0xAB 0x0E0B # THAI CHARACTER SO SO
+0xAC 0x0E0C # THAI CHARACTER CHO CHOE
+0xAD 0x0E0D # THAI CHARACTER YO YING
+0xAE 0x0E0E # THAI CHARACTER DO CHADA
+0xAF 0x0E0F # THAI CHARACTER TO PATAK
+0xB0 0x0E10 # THAI CHARACTER THO THAN
+0xB1 0x0E11 # THAI CHARACTER THO NANGMONTHO
+0xB2 0x0E12 # THAI CHARACTER THO PHUTHAO
+0xB3 0x0E13 # THAI CHARACTER NO NEN
+0xB4 0x0E14 # THAI CHARACTER DO DEK
+0xB5 0x0E15 # THAI CHARACTER TO TAO
+0xB6 0x0E16 # THAI CHARACTER THO THUNG
+0xB7 0x0E17 # THAI CHARACTER THO THAHAN
+0xB8 0x0E18 # THAI CHARACTER THO THONG
+0xB9 0x0E19 # THAI CHARACTER NO NU
+0xBA 0x0E1A # THAI CHARACTER BO BAIMAI
+0xBB 0x0E1B # THAI CHARACTER PO PLA
+0xBC 0x0E1C # THAI CHARACTER PHO PHUNG
+0xBD 0x0E1D # THAI CHARACTER FO FA
+0xBE 0x0E1E # THAI CHARACTER PHO PHAN
+0xBF 0x0E1F # THAI CHARACTER FO FAN
+0xC0 0x0E20 # THAI CHARACTER PHO SAMPHAO
+0xC1 0x0E21 # THAI CHARACTER MO MA
+0xC2 0x0E22 # THAI CHARACTER YO YAK
+0xC3 0x0E23 # THAI CHARACTER RO RUA
+0xC4 0x0E24 # THAI CHARACTER RU
+0xC5 0x0E25 # THAI CHARACTER LO LING
+0xC6 0x0E26 # THAI CHARACTER LU
+0xC7 0x0E27 # THAI CHARACTER WO WAEN
+0xC8 0x0E28 # THAI CHARACTER SO SALA
+0xC9 0x0E29 # THAI CHARACTER SO RUSI
+0xCA 0x0E2A # THAI CHARACTER SO SUA
+0xCB 0x0E2B # THAI CHARACTER HO HIP
+0xCC 0x0E2C # THAI CHARACTER LO CHULA
+0xCD 0x0E2D # THAI CHARACTER O ANG
+0xCE 0x0E2E # THAI CHARACTER HO NOKHUK
+0xCF 0x0E2F # THAI CHARACTER PAIYANNOI
+0xD0 0x0E30 # THAI CHARACTER SARA A
+0xD1 0x0E31 # THAI CHARACTER MAI HAN-AKAT
+0xD2 0x0E32 # THAI CHARACTER SARA AA
+0xD3 0x0E33 # THAI CHARACTER SARA AM
+0xD4 0x0E34 # THAI CHARACTER SARA I
+0xD5 0x0E35 # THAI CHARACTER SARA II
+0xD6 0x0E36 # THAI CHARACTER SARA UE
+0xD7 0x0E37 # THAI CHARACTER SARA UEE
+0xD8 0x0E38 # THAI CHARACTER SARA U
+0xD9 0x0E39 # THAI CHARACTER SARA UU
+0xDA 0x0E3A # THAI CHARACTER PHINTHU
+0xDF 0x0E3F # THAI CURRENCY SYMBOL BAHT
+0xE0 0x0E40 # THAI CHARACTER SARA E
+0xE1 0x0E41 # THAI CHARACTER SARA AE
+0xE2 0x0E42 # THAI CHARACTER SARA O
+0xE3 0x0E43 # THAI CHARACTER SARA AI MAIMUAN
+0xE4 0x0E44 # THAI CHARACTER SARA AI MAIMALAI
+0xE5 0x0E45 # THAI CHARACTER LAKKHANGYAO
+0xE6 0x0E46 # THAI CHARACTER MAIYAMOK
+0xE7 0x0E47 # THAI CHARACTER MAITAIKHU
+0xE8 0x0E48 # THAI CHARACTER MAI EK
+0xE9 0x0E49 # THAI CHARACTER MAI THO
+0xEA 0x0E4A # THAI CHARACTER MAI TRI
+0xEB 0x0E4B # THAI CHARACTER MAI CHATTAWA
+0xEC 0x0E4C # THAI CHARACTER THANTHAKHAT
+0xED 0x0E4D # THAI CHARACTER NIKHAHIT
+0xEE 0x0E4E # THAI CHARACTER YAMAKKAN
+0xEF 0x0E4F # THAI CHARACTER FONGMAN
+0xF0 0x0E50 # THAI DIGIT ZERO
+0xF1 0x0E51 # THAI DIGIT ONE
+0xF2 0x0E52 # THAI DIGIT TWO
+0xF3 0x0E53 # THAI DIGIT THREE
+0xF4 0x0E54 # THAI DIGIT FOUR
+0xF5 0x0E55 # THAI DIGIT FIVE
+0xF6 0x0E56 # THAI DIGIT SIX
+0xF7 0x0E57 # THAI DIGIT SEVEN
+0xF8 0x0E58 # THAI DIGIT EIGHT
+0xF9 0x0E59 # THAI DIGIT NINE
+0xFA 0x0E5A # THAI CHARACTER ANGKHANKHU
+0xFB 0x0E5B # THAI CHARACTER KHOMUT