From 4710c53dcad1ebf3755f3efb9e80ac24bd72a9b2 Mon Sep 17 00:00:00 2001 From: darylm503 Date: Mon, 16 Apr 2012 22:12:42 +0000 Subject: AppPkg/Applications/Python: Add Python 2.7.2 sources since the release of Python 2.7.3 made them unavailable from the python.org web site. These files are a subset of the python-2.7.2.tgz distribution from python.org. Changed files from PyMod-2.7.2 have been copied into the corresponding directories of this tree, replacing the original files in the distribution. Signed-off-by: daryl.mcdaniel@intel.com git-svn-id: https://edk2.svn.sourceforge.net/svnroot/edk2/trunk/edk2@13197 6f19259b-4bc3-4df7-8a09-765794883524 --- .../Python/Python-2.7.2/Tools/unicode/Makefile | 84 ++ .../Python-2.7.2/Tools/unicode/comparecodecs.py | 53 + .../Python-2.7.2/Tools/unicode/gencjkcodecs.py | 68 ++ .../Python/Python-2.7.2/Tools/unicode/gencodec.py | 423 ++++++++ .../Python-2.7.2/Tools/unicode/genwincodec.py | 61 ++ .../Python-2.7.2/Tools/unicode/genwincodecs.bat | 7 + .../Python-2.7.2/Tools/unicode/listcodecs.py | 41 + .../Python-2.7.2/Tools/unicode/makeunicodedata.py | 1135 ++++++++++++++++++++ .../Python-2.7.2/Tools/unicode/mkstringprep.py | 425 ++++++++ .../Tools/unicode/python-mappings/CP1140.TXT | 291 +++++ .../Tools/unicode/python-mappings/KOI8-U.TXT | 298 +++++ .../Tools/unicode/python-mappings/TIS-620.TXT | 284 +++++ 12 files changed, 3170 insertions(+) create mode 100644 AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/Makefile create mode 100644 AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/comparecodecs.py create mode 100644 AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/gencjkcodecs.py create mode 100644 AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/gencodec.py create mode 100644 AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/genwincodec.py create mode 100644 AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/genwincodecs.bat create mode 100644 AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/listcodecs.py create mode 100644 AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/makeunicodedata.py create mode 100644 AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/mkstringprep.py create mode 100644 AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/python-mappings/CP1140.TXT create mode 100644 AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/python-mappings/KOI8-U.TXT create mode 100644 AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/python-mappings/TIS-620.TXT (limited to 'AppPkg/Applications/Python/Python-2.7.2/Tools/unicode') diff --git a/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/Makefile b/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/Makefile new file mode 100644 index 0000000000..2fc88b9c5e --- /dev/null +++ b/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/Makefile @@ -0,0 +1,84 @@ +# +# Recreate the Python charmap codecs from the Unicode mapping +# files available at ftp://ftp.unicode.org/ +# +#(c) Copyright Marc-Andre Lemburg, 2005. +# Licensed to PSF under a Contributor Agreement. + +# Python binary to use +PYTHON = python + +# Remove tool to use +RM = /bin/rm + +### Generic targets + +all: distclean mappings codecs + +codecs: misc windows iso apple ebcdic custom-mappings cjk + +### Mappings + +mappings: + ncftpget -R ftp.unicode.org . Public/MAPPINGS + +### Codecs + +build/: + mkdir build + +misc: build/ + $(PYTHON) gencodec.py MAPPINGS/VENDORS/MISC/ build/ + $(RM) build/atarist.* + $(RM) build/us_ascii_quotes.* + $(RM) build/ibmgraph.* + $(RM) build/sgml.* + $(RM) -f build/readme.* + +custom-mappings: build/ + $(PYTHON) gencodec.py python-mappings/ build/ + +windows: build/ + $(PYTHON) gencodec.py MAPPINGS/VENDORS/MICSFT/WINDOWS/ build/ + $(RM) build/cp9* + $(RM) -f build/readme.* + +iso: build/ + $(PYTHON) gencodec.py MAPPINGS/ISO8859/ build/ iso + $(RM) -f build/isoreadme.* + +apple: build/ + $(PYTHON) gencodec.py MAPPINGS/VENDORS/APPLE/ build/ mac_ + $(RM) build/mac_dingbats.* + $(RM) build/mac_japanese.* + $(RM) build/mac_chin* + $(RM) build/mac_korean.* + $(RM) build/mac_symbol.* + $(RM) build/mac_corpchar.* + $(RM) build/mac_devanaga.* + $(RM) build/mac_gaelic.* + $(RM) build/mac_gurmukhi.* + $(RM) build/mac_hebrew.* + $(RM) build/mac_inuit.* + $(RM) build/mac_thai.* + $(RM) build/mac_ukraine.* + $(RM) build/mac_arabic.py + $(RM) build/mac_celtic.* + $(RM) build/mac_gujarati.* + $(RM) build/mac_keyboard.* + $(RM) -f build/mac_readme.* + +ebcdic: build/ + $(PYTHON) gencodec.py MAPPINGS/VENDORS/MICSFT/EBCDIC/ build/ + $(RM) -f build/readme.* + +cjk: build/ + $(PYTHON) gencjkcodecs.py build/ + +### Cleanup + +clean: + $(RM) -f build/* + +distclean: clean + $(RM) -rf MAPPINGS/ diff --git a/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/comparecodecs.py b/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/comparecodecs.py new file mode 100644 index 0000000000..a6f3102f1d --- /dev/null +++ b/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/comparecodecs.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python + +""" Compare the output of two codecs. + +(c) Copyright 2005, Marc-Andre Lemburg (mal@lemburg.com). + + Licensed to PSF under a Contributor Agreement. + +""" +import sys + +def compare_codecs(encoding1, encoding2): + + print 'Comparing encoding/decoding of %r and %r' % (encoding1, encoding2) + mismatch = 0 + # Check encoding + for i in range(sys.maxunicode): + u = unichr(i) + try: + c1 = u.encode(encoding1) + except UnicodeError, reason: + c1 = '' + try: + c2 = u.encode(encoding2) + except UnicodeError, reason: + c2 = '' + if c1 != c2: + print ' * encoding mismatch for 0x%04X: %-14r != %r' % \ + (i, c1, c2) + mismatch += 1 + # Check decoding + for i in range(256): + c = chr(i) + try: + u1 = c.decode(encoding1) + except UnicodeError: + u1 = u'' + try: + u2 = c.decode(encoding2) + except UnicodeError: + u2 = u'' + if u1 != u2: + print ' * decoding mismatch for 0x%04X: %-14r != %r' % \ + (i, u1, u2) + mismatch += 1 + if mismatch: + print + print 'Found %i mismatches' % mismatch + else: + print '-> Codecs are identical.' + +if __name__ == '__main__': + compare_codecs(sys.argv[1], sys.argv[2]) diff --git a/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/gencjkcodecs.py b/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/gencjkcodecs.py new file mode 100644 index 0000000000..5ac97d4d9d --- /dev/null +++ b/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/gencjkcodecs.py @@ -0,0 +1,68 @@ +import os, string + +codecs = { + 'cn': ('gb2312', 'gbk', 'gb18030', 'hz'), + 'tw': ('big5', 'cp950'), + 'hk': ('big5hkscs',), + 'jp': ('cp932', 'shift_jis', 'euc_jp', 'euc_jisx0213', 'shift_jisx0213', + 'euc_jis_2004', 'shift_jis_2004'), + 'kr': ('cp949', 'euc_kr', 'johab'), + 'iso2022': ('iso2022_jp', 'iso2022_jp_1', 'iso2022_jp_2', + 'iso2022_jp_2004', 'iso2022_jp_3', 'iso2022_jp_ext', + 'iso2022_kr'), +} + +TEMPLATE = string.Template("""\ +# +# $encoding.py: Python Unicode Codec for $ENCODING +# +# Written by Hye-Shik Chang +# + +import _codecs_$owner, codecs +import _multibytecodec as mbc + +codec = _codecs_$owner.getcodec('$encoding') + +class Codec(codecs.Codec): + encode = codec.encode + decode = codec.decode + +class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, + codecs.IncrementalEncoder): + codec = codec + +class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, + codecs.IncrementalDecoder): + codec = codec + +class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): + codec = codec + +class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): + codec = codec + +def getregentry(): + return codecs.CodecInfo( + name='$encoding', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) +""") + +def gencodecs(prefix): + for loc, encodings in codecs.iteritems(): + for enc in encodings: + code = TEMPLATE.substitute(ENCODING=enc.upper(), + encoding=enc.lower(), + owner=loc) + codecpath = os.path.join(prefix, enc + '.py') + open(codecpath, 'w').write(code) + +if __name__ == '__main__': + import sys + gencodecs(sys.argv[1]) diff --git a/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/gencodec.py b/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/gencodec.py new file mode 100644 index 0000000000..2626279d1e --- /dev/null +++ b/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/gencodec.py @@ -0,0 +1,423 @@ +""" Unicode Mapping Parser and Codec Generator. + +This script parses Unicode mapping files as available from the Unicode +site (ftp://ftp.unicode.org/Public/MAPPINGS/) and creates Python codec +modules from them. The codecs use the standard character mapping codec +to actually apply the mapping. + +Synopsis: gencodec.py dir codec_prefix + +All files in dir are scanned and those producing non-empty mappings +will be written to .py with being the +first part of the map's filename ('a' in a.b.c.txt) converted to +lowercase with hyphens replaced by underscores. + +The tool also writes marshalled versions of the mapping tables to the +same location (with .mapping extension). + +Written by Marc-Andre Lemburg (mal@lemburg.com). + +(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. +(c) Copyright Guido van Rossum, 2000. + +Table generation: +(c) Copyright Marc-Andre Lemburg, 2005. + Licensed to PSF under a Contributor Agreement. + +"""#" + +import re, os, marshal, codecs + +# Maximum allowed size of charmap tables +MAX_TABLE_SIZE = 8192 + +# Standard undefined Unicode code point +UNI_UNDEFINED = unichr(0xFFFE) + +mapRE = re.compile('((?:0x[0-9a-fA-F]+\+?)+)' + '\s+' + '((?:(?:0x[0-9a-fA-Z]+|<[A-Za-z]+>)\+?)*)' + '\s*' + '(#.+)?') + +def parsecodes(codes, len=len, range=range): + + """ Converts code combinations to either a single code integer + or a tuple of integers. + + meta-codes (in angular brackets, e.g. and ) are + ignored. + + Empty codes or illegal ones are returned as None. + + """ + if not codes: + return None + l = codes.split('+') + if len(l) == 1: + return int(l[0],16) + for i in range(len(l)): + try: + l[i] = int(l[i],16) + except ValueError: + l[i] = None + l = [x for x in l if x is not None] + if len(l) == 1: + return l[0] + else: + return tuple(l) + +def readmap(filename): + + f = open(filename,'r') + lines = f.readlines() + f.close() + enc2uni = {} + identity = [] + unmapped = range(256) + + # UTC mapping tables per convention don't include the identity + # mappings for code points 0x00 - 0x1F and 0x7F, unless these are + # explicitly mapped to different characters or undefined + for i in range(32) + [127]: + identity.append(i) + unmapped.remove(i) + enc2uni[i] = (i, 'CONTROL CHARACTER') + + for line in lines: + line = line.strip() + if not line or line[0] == '#': + continue + m = mapRE.match(line) + if not m: + #print '* not matched: %s' % repr(line) + continue + enc,uni,comment = m.groups() + enc = parsecodes(enc) + uni = parsecodes(uni) + if comment is None: + comment = '' + else: + comment = comment[1:].strip() + if enc < 256: + if enc in unmapped: + unmapped.remove(enc) + if enc == uni: + identity.append(enc) + enc2uni[enc] = (uni,comment) + else: + enc2uni[enc] = (uni,comment) + + # If there are more identity-mapped entries than unmapped entries, + # it pays to generate an identity dictionary first, and add explicit + # mappings to None for the rest + if len(identity) >= len(unmapped): + for enc in unmapped: + enc2uni[enc] = (None, "") + enc2uni['IDENTITY'] = 256 + + return enc2uni + +def hexrepr(t, precision=4): + + if t is None: + return 'None' + try: + len(t) + except: + return '0x%0*X' % (precision, t) + try: + return '(' + ', '.join(['0x%0*X' % (precision, item) + for item in t]) + ')' + except TypeError, why: + print '* failed to convert %r: %s' % (t, why) + raise + +def python_mapdef_code(varname, map, comments=1, precisions=(2, 4)): + + l = [] + append = l.append + if "IDENTITY" in map: + append("%s = codecs.make_identity_dict(range(%d))" % + (varname, map["IDENTITY"])) + append("%s.update({" % varname) + splits = 1 + del map["IDENTITY"] + identity = 1 + else: + append("%s = {" % varname) + splits = 0 + identity = 0 + + mappings = sorted(map.items()) + i = 0 + key_precision, value_precision = precisions + for mapkey, mapvalue in mappings: + mapcomment = '' + if isinstance(mapkey, tuple): + (mapkey, mapcomment) = mapkey + if isinstance(mapvalue, tuple): + (mapvalue, mapcomment) = mapvalue + if mapkey is None: + continue + if (identity and + mapkey == mapvalue and + mapkey < 256): + # No need to include identity mappings, since these + # are already set for the first 256 code points. + continue + key = hexrepr(mapkey, key_precision) + value = hexrepr(mapvalue, value_precision) + if mapcomment and comments: + append(' %s: %s,\t# %s' % (key, value, mapcomment)) + else: + append(' %s: %s,' % (key, value)) + i += 1 + if i == 4096: + # Split the definition into parts to that the Python + # parser doesn't dump core + if splits == 0: + append('}') + else: + append('})') + append('%s.update({' % varname) + i = 0 + splits = splits + 1 + if splits == 0: + append('}') + else: + append('})') + + return l + +def python_tabledef_code(varname, map, comments=1, key_precision=2): + + l = [] + append = l.append + append('%s = (' % varname) + + # Analyze map and create table dict + mappings = sorted(map.items()) + table = {} + maxkey = 0 + if 'IDENTITY' in map: + for key in range(256): + table[key] = (key, '') + maxkey = 255 + del map['IDENTITY'] + for mapkey, mapvalue in mappings: + mapcomment = '' + if isinstance(mapkey, tuple): + (mapkey, mapcomment) = mapkey + if isinstance(mapvalue, tuple): + (mapvalue, mapcomment) = mapvalue + if mapkey is None: + continue + table[mapkey] = (mapvalue, mapcomment) + if mapkey > maxkey: + maxkey = mapkey + if maxkey > MAX_TABLE_SIZE: + # Table too large + return None + + # Create table code + for key in range(maxkey + 1): + if key not in table: + mapvalue = None + mapcomment = 'UNDEFINED' + else: + mapvalue, mapcomment = table[key] + if mapvalue is None: + mapchar = UNI_UNDEFINED + else: + if isinstance(mapvalue, tuple): + # 1-n mappings not supported + return None + else: + mapchar = unichr(mapvalue) + if mapcomment and comments: + append(' %r\t# %s -> %s' % (mapchar, + hexrepr(key, key_precision), + mapcomment)) + else: + append(' %r' % mapchar) + + append(')') + return l + +def codegen(name, map, encodingname, comments=1): + + """ Returns Python source for the given map. + + Comments are included in the source, if comments is true (default). + + """ + # Generate code + decoding_map_code = python_mapdef_code( + 'decoding_map', + map, + comments=comments) + decoding_table_code = python_tabledef_code( + 'decoding_table', + map, + comments=comments) + encoding_map_code = python_mapdef_code( + 'encoding_map', + codecs.make_encoding_map(map), + comments=comments, + precisions=(4, 2)) + + if decoding_table_code: + suffix = 'table' + else: + suffix = 'map' + + l = [ + '''\ +""" Python Character Mapping Codec %s generated from '%s' with gencodec.py. + +"""#" + +import codecs + +### Codec APIs + +class Codec(codecs.Codec): + + def encode(self,input,errors='strict'): + return codecs.charmap_encode(input,errors,encoding_%s) + + def decode(self,input,errors='strict'): + return codecs.charmap_decode(input,errors,decoding_%s) +''' % (encodingname, name, suffix, suffix)] + l.append('''\ +class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return codecs.charmap_encode(input,self.errors,encoding_%s)[0] + +class IncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input, final=False): + return codecs.charmap_decode(input,self.errors,decoding_%s)[0]''' % + (suffix, suffix)) + + l.append(''' +class StreamWriter(Codec,codecs.StreamWriter): + pass + +class StreamReader(Codec,codecs.StreamReader): + pass + +### encodings module API + +def getregentry(): + return codecs.CodecInfo( + name=%r, + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) +''' % encodingname.replace('_', '-')) + + # Add decoding table or map (with preference to the table) + if not decoding_table_code: + l.append(''' +### Decoding Map +''') + l.extend(decoding_map_code) + else: + l.append(''' +### Decoding Table +''') + l.extend(decoding_table_code) + + # Add encoding map + if decoding_table_code: + l.append(''' +### Encoding table +encoding_table=codecs.charmap_build(decoding_table) +''') + else: + l.append(''' +### Encoding Map +''') + l.extend(encoding_map_code) + + # Final new-line + l.append('') + + return '\n'.join(l).expandtabs() + +def pymap(name,map,pyfile,encodingname,comments=1): + + code = codegen(name,map,encodingname,comments) + f = open(pyfile,'w') + f.write(code) + f.close() + +def marshalmap(name,map,marshalfile): + + d = {} + for e,(u,c) in map.items(): + d[e] = (u,c) + f = open(marshalfile,'wb') + marshal.dump(d,f) + f.close() + +def convertdir(dir, dirprefix='', nameprefix='', comments=1): + + mapnames = os.listdir(dir) + for mapname in mapnames: + mappathname = os.path.join(dir, mapname) + if not os.path.isfile(mappathname): + continue + name = os.path.split(mapname)[1] + name = name.replace('-','_') + name = name.split('.')[0] + name = name.lower() + name = nameprefix + name + codefile = name + '.py' + marshalfile = name + '.mapping' + print 'converting %s to %s and %s' % (mapname, + dirprefix + codefile, + dirprefix + marshalfile) + try: + map = readmap(os.path.join(dir,mapname)) + if not map: + print '* map is empty; skipping' + else: + pymap(mappathname, map, dirprefix + codefile,name,comments) + marshalmap(mappathname, map, dirprefix + marshalfile) + except ValueError, why: + print '* conversion failed: %s' % why + raise + +def rewritepythondir(dir, dirprefix='', comments=1): + + mapnames = os.listdir(dir) + for mapname in mapnames: + if not mapname.endswith('.mapping'): + continue + name = mapname[:-len('.mapping')] + codefile = name + '.py' + print 'converting %s to %s' % (mapname, + dirprefix + codefile) + try: + map = marshal.load(open(os.path.join(dir,mapname), + 'rb')) + if not map: + print '* map is empty; skipping' + else: + pymap(mapname, map, dirprefix + codefile,name,comments) + except ValueError, why: + print '* conversion failed: %s' % why + +if __name__ == '__main__': + + import sys + if 1: + convertdir(*sys.argv[1:]) + else: + rewritepythondir(*sys.argv[1:]) diff --git a/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/genwincodec.py b/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/genwincodec.py new file mode 100644 index 0000000000..32dcadabf2 --- /dev/null +++ b/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/genwincodec.py @@ -0,0 +1,61 @@ +"""This script generates a Python codec module from a Windows Code Page. + +It uses the function MultiByteToWideChar to generate a decoding table. +""" + +import ctypes +from ctypes import wintypes +from gencodec import codegen +import unicodedata + +def genwinmap(codepage): + MultiByteToWideChar = ctypes.windll.kernel32.MultiByteToWideChar + MultiByteToWideChar.argtypes = [wintypes.UINT, wintypes.DWORD, + wintypes.LPCSTR, ctypes.c_int, + wintypes.LPWSTR, ctypes.c_int] + MultiByteToWideChar.restype = ctypes.c_int + + enc2uni = {} + + for i in range(32) + [127]: + enc2uni[i] = (i, 'CONTROL CHARACTER') + + for i in range(256): + buf = ctypes.create_unicode_buffer(2) + ret = MultiByteToWideChar( + codepage, 0, + chr(i), 1, + buf, 2) + assert ret == 1, "invalid code page" + assert buf[1] == '\x00' + try: + name = unicodedata.name(buf[0]) + except ValueError: + try: + name = enc2uni[i][1] + except KeyError: + name = '' + + enc2uni[i] = (ord(buf[0]), name) + + return enc2uni + +def genwincodec(codepage): + import platform + map = genwinmap(codepage) + encodingname = 'cp%d' % codepage + code = codegen("", map, encodingname) + # Replace first lines with our own docstring + code = '''\ +"""Python Character Mapping Codec %s generated on Windows: +%s with the command: + python Tools/unicode/genwincodec.py %s +"""#" +''' % (encodingname, ' '.join(platform.win32_ver()), codepage + ) + code.split('"""#"', 1)[1] + + print code + +if __name__ == '__main__': + import sys + genwincodec(int(sys.argv[1])) diff --git a/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/genwincodecs.bat b/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/genwincodecs.bat new file mode 100644 index 0000000000..6a6a671f81 --- /dev/null +++ b/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/genwincodecs.bat @@ -0,0 +1,7 @@ +@rem Recreate some python charmap codecs from the Windows function +@rem MultiByteToWideChar. + +@cd /d %~dp0 +@mkdir build +@rem Arabic DOS code page +c:\python26\python genwincodec.py 720 > build/cp720.py diff --git a/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/listcodecs.py b/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/listcodecs.py new file mode 100644 index 0000000000..5ad4309be0 --- /dev/null +++ b/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/listcodecs.py @@ -0,0 +1,41 @@ +""" List all available codec modules. + +(c) Copyright 2005, Marc-Andre Lemburg (mal@lemburg.com). + + Licensed to PSF under a Contributor Agreement. + +""" + +import os, codecs, encodings + +_debug = 0 + +def listcodecs(dir): + names = [] + for filename in os.listdir(dir): + if filename[-3:] != '.py': + continue + name = filename[:-3] + # Check whether we've found a true codec + try: + codecs.lookup(name) + except LookupError: + # Codec not found + continue + except Exception, reason: + # Probably an error from importing the codec; still it's + # a valid code name + if _debug: + print '* problem importing codec %r: %s' % \ + (name, reason) + names.append(name) + return names + + +if __name__ == '__main__': + names = listcodecs(encodings.__path__[0]) + names.sort() + print 'all_codecs = [' + for name in names: + print ' %r,' % name + print ']' diff --git a/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/makeunicodedata.py b/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/makeunicodedata.py new file mode 100644 index 0000000000..37b9f6eb21 --- /dev/null +++ b/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/makeunicodedata.py @@ -0,0 +1,1135 @@ +# +# (re)generate unicode property and type databases +# +# this script converts a unicode 3.2 database file to +# Modules/unicodedata_db.h, Modules/unicodename_db.h, +# and Objects/unicodetype_db.h +# +# history: +# 2000-09-24 fl created (based on bits and pieces from unidb) +# 2000-09-25 fl merged tim's splitbin fixes, separate decomposition table +# 2000-09-25 fl added character type table +# 2000-09-26 fl added LINEBREAK, DECIMAL, and DIGIT flags/fields (2.0) +# 2000-11-03 fl expand first/last ranges +# 2001-01-19 fl added character name tables (2.1) +# 2001-01-21 fl added decomp compression; dynamic phrasebook threshold +# 2002-09-11 wd use string methods +# 2002-10-18 mvl update to Unicode 3.2 +# 2002-10-22 mvl generate NFC tables +# 2002-11-24 mvl expand all ranges, sort names version-independently +# 2002-11-25 mvl add UNIDATA_VERSION +# 2004-05-29 perky add east asian width information +# 2006-03-10 mvl update to Unicode 4.1; add UCD 3.2 delta +# +# written by Fredrik Lundh (fredrik@pythonware.com) +# + +import sys + +SCRIPT = sys.argv[0] +VERSION = "2.6" + +# The Unicode Database +UNIDATA_VERSION = "5.2.0" +UNICODE_DATA = "UnicodeData%s.txt" +COMPOSITION_EXCLUSIONS = "CompositionExclusions%s.txt" +EASTASIAN_WIDTH = "EastAsianWidth%s.txt" +UNIHAN = "Unihan%s.txt" +DERIVEDNORMALIZATION_PROPS = "DerivedNormalizationProps%s.txt" +LINE_BREAK = "LineBreak%s.txt" + +old_versions = ["3.2.0"] + +CATEGORY_NAMES = [ "Cn", "Lu", "Ll", "Lt", "Mn", "Mc", "Me", "Nd", + "Nl", "No", "Zs", "Zl", "Zp", "Cc", "Cf", "Cs", "Co", "Cn", "Lm", + "Lo", "Pc", "Pd", "Ps", "Pe", "Pi", "Pf", "Po", "Sm", "Sc", "Sk", + "So" ] + +BIDIRECTIONAL_NAMES = [ "", "L", "LRE", "LRO", "R", "AL", "RLE", "RLO", + "PDF", "EN", "ES", "ET", "AN", "CS", "NSM", "BN", "B", "S", "WS", + "ON" ] + +EASTASIANWIDTH_NAMES = [ "F", "H", "W", "Na", "A", "N" ] + +MANDATORY_LINE_BREAKS = [ "BK", "CR", "LF", "NL" ] + +# note: should match definitions in Objects/unicodectype.c +ALPHA_MASK = 0x01 +DECIMAL_MASK = 0x02 +DIGIT_MASK = 0x04 +LOWER_MASK = 0x08 +LINEBREAK_MASK = 0x10 +SPACE_MASK = 0x20 +TITLE_MASK = 0x40 +UPPER_MASK = 0x80 +NODELTA_MASK = 0x100 +NUMERIC_MASK = 0x200 + +def maketables(trace=0): + + print "--- Reading", UNICODE_DATA % "", "..." + + version = "" + unicode = UnicodeData(UNICODE_DATA % version, + COMPOSITION_EXCLUSIONS % version, + EASTASIAN_WIDTH % version, + UNIHAN % version, + DERIVEDNORMALIZATION_PROPS % version, + LINE_BREAK % version) + + print len(filter(None, unicode.table)), "characters" + + for version in old_versions: + print "--- Reading", UNICODE_DATA % ("-"+version), "..." + old_unicode = UnicodeData(UNICODE_DATA % ("-"+version), + COMPOSITION_EXCLUSIONS % ("-"+version), + EASTASIAN_WIDTH % ("-"+version), + UNIHAN % ("-"+version)) + print len(filter(None, old_unicode.table)), "characters" + merge_old_version(version, unicode, old_unicode) + + makeunicodename(unicode, trace) + makeunicodedata(unicode, trace) + makeunicodetype(unicode, trace) + +# -------------------------------------------------------------------- +# unicode character properties + +def makeunicodedata(unicode, trace): + + dummy = (0, 0, 0, 0, 0, 0) + table = [dummy] + cache = {0: dummy} + index = [0] * len(unicode.chars) + + FILE = "Modules/unicodedata_db.h" + + print "--- Preparing", FILE, "..." + + # 1) database properties + + for char in unicode.chars: + record = unicode.table[char] + if record: + # extract database properties + category = CATEGORY_NAMES.index(record[2]) + combining = int(record[3]) + bidirectional = BIDIRECTIONAL_NAMES.index(record[4]) + mirrored = record[9] == "Y" + eastasianwidth = EASTASIANWIDTH_NAMES.index(record[15]) + normalizationquickcheck = record[17] + item = ( + category, combining, bidirectional, mirrored, eastasianwidth, + normalizationquickcheck + ) + # add entry to index and item tables + i = cache.get(item) + if i is None: + cache[item] = i = len(table) + table.append(item) + index[char] = i + + # 2) decomposition data + + decomp_data = [0] + decomp_prefix = [""] + decomp_index = [0] * len(unicode.chars) + decomp_size = 0 + + comp_pairs = [] + comp_first = [None] * len(unicode.chars) + comp_last = [None] * len(unicode.chars) + + for char in unicode.chars: + record = unicode.table[char] + if record: + if record[5]: + decomp = record[5].split() + if len(decomp) > 19: + raise Exception, "character %x has a decomposition too large for nfd_nfkd" % char + # prefix + if decomp[0][0] == "<": + prefix = decomp.pop(0) + else: + prefix = "" + try: + i = decomp_prefix.index(prefix) + except ValueError: + i = len(decomp_prefix) + decomp_prefix.append(prefix) + prefix = i + assert prefix < 256 + # content + decomp = [prefix + (len(decomp)<<8)] + [int(s, 16) for s in decomp] + # Collect NFC pairs + if not prefix and len(decomp) == 3 and \ + char not in unicode.exclusions and \ + unicode.table[decomp[1]][3] == "0": + p, l, r = decomp + comp_first[l] = 1 + comp_last[r] = 1 + comp_pairs.append((l,r,char)) + try: + i = decomp_data.index(decomp) + except ValueError: + i = len(decomp_data) + decomp_data.extend(decomp) + decomp_size = decomp_size + len(decomp) * 2 + else: + i = 0 + decomp_index[char] = i + + f = l = 0 + comp_first_ranges = [] + comp_last_ranges = [] + prev_f = prev_l = None + for i in unicode.chars: + if comp_first[i] is not None: + comp_first[i] = f + f += 1 + if prev_f is None: + prev_f = (i,i) + elif prev_f[1]+1 == i: + prev_f = prev_f[0],i + else: + comp_first_ranges.append(prev_f) + prev_f = (i,i) + if comp_last[i] is not None: + comp_last[i] = l + l += 1 + if prev_l is None: + prev_l = (i,i) + elif prev_l[1]+1 == i: + prev_l = prev_l[0],i + else: + comp_last_ranges.append(prev_l) + prev_l = (i,i) + comp_first_ranges.append(prev_f) + comp_last_ranges.append(prev_l) + total_first = f + total_last = l + + comp_data = [0]*(total_first*total_last) + for f,l,char in comp_pairs: + f = comp_first[f] + l = comp_last[l] + comp_data[f*total_last+l] = char + + print len(table), "unique properties" + print len(decomp_prefix), "unique decomposition prefixes" + print len(decomp_data), "unique decomposition entries:", + print decomp_size, "bytes" + print total_first, "first characters in NFC" + print total_last, "last characters in NFC" + print len(comp_pairs), "NFC pairs" + + print "--- Writing", FILE, "..." + + fp = open(FILE, "w") + print >>fp, "/* this file was generated by %s %s */" % (SCRIPT, VERSION) + print >>fp + print >>fp, '#define UNIDATA_VERSION "%s"' % UNIDATA_VERSION + print >>fp, "/* a list of unique database records */" + print >>fp, \ + "const _PyUnicode_DatabaseRecord _PyUnicode_Database_Records[] = {" + for item in table: + print >>fp, " {%d, %d, %d, %d, %d, %d}," % item + print >>fp, "};" + print >>fp + + print >>fp, "/* Reindexing of NFC first characters. */" + print >>fp, "#define TOTAL_FIRST",total_first + print >>fp, "#define TOTAL_LAST",total_last + print >>fp, "struct reindex{int start;short count,index;};" + print >>fp, "static struct reindex nfc_first[] = {" + for start,end in comp_first_ranges: + print >>fp," { %d, %d, %d}," % (start,end-start,comp_first[start]) + print >>fp," {0,0,0}" + print >>fp,"};\n" + print >>fp, "static struct reindex nfc_last[] = {" + for start,end in comp_last_ranges: + print >>fp," { %d, %d, %d}," % (start,end-start,comp_last[start]) + print >>fp," {0,0,0}" + print >>fp,"};\n" + + # FIXME: the following tables could be made static, and + # the support code moved into unicodedatabase.c + + print >>fp, "/* string literals */" + print >>fp, "const char *_PyUnicode_CategoryNames[] = {" + for name in CATEGORY_NAMES: + print >>fp, " \"%s\"," % name + print >>fp, " NULL" + print >>fp, "};" + + print >>fp, "const char *_PyUnicode_BidirectionalNames[] = {" + for name in BIDIRECTIONAL_NAMES: + print >>fp, " \"%s\"," % name + print >>fp, " NULL" + print >>fp, "};" + + print >>fp, "const char *_PyUnicode_EastAsianWidthNames[] = {" + for name in EASTASIANWIDTH_NAMES: + print >>fp, " \"%s\"," % name + print >>fp, " NULL" + print >>fp, "};" + + print >>fp, "static const char *decomp_prefix[] = {" + for name in decomp_prefix: + print >>fp, " \"%s\"," % name + print >>fp, " NULL" + print >>fp, "};" + + # split record index table + index1, index2, shift = splitbins(index, trace) + + print >>fp, "/* index tables for the database records */" + print >>fp, "#define SHIFT", shift + Array("index1", index1).dump(fp, trace) + Array("index2", index2).dump(fp, trace) + + # split decomposition index table + index1, index2, shift = splitbins(decomp_index, trace) + + print >>fp, "/* decomposition data */" + Array("decomp_data", decomp_data).dump(fp, trace) + + print >>fp, "/* index tables for the decomposition data */" + print >>fp, "#define DECOMP_SHIFT", shift + Array("decomp_index1", index1).dump(fp, trace) + Array("decomp_index2", index2).dump(fp, trace) + + index, index2, shift = splitbins(comp_data, trace) + print >>fp, "/* NFC pairs */" + print >>fp, "#define COMP_SHIFT", shift + Array("comp_index", index).dump(fp, trace) + Array("comp_data", index2).dump(fp, trace) + + # Generate delta tables for old versions + for version, table, normalization in unicode.changed: + cversion = version.replace(".","_") + records = [table[0]] + cache = {table[0]:0} + index = [0] * len(table) + for i, record in enumerate(table): + try: + index[i] = cache[record] + except KeyError: + index[i] = cache[record] = len(records) + records.append(record) + index1, index2, shift = splitbins(index, trace) + print >>fp, "static const change_record change_records_%s[] = {" % cversion + for record in records: + print >>fp, "\t{ %s }," % ", ".join(map(str,record)) + print >>fp, "};" + Array("changes_%s_index" % cversion, index1).dump(fp, trace) + Array("changes_%s_data" % cversion, index2).dump(fp, trace) + print >>fp, "static const change_record* get_change_%s(Py_UCS4 n)" % cversion + print >>fp, "{" + print >>fp, "\tint index;" + print >>fp, "\tif (n >= 0x110000) index = 0;" + print >>fp, "\telse {" + print >>fp, "\t\tindex = changes_%s_index[n>>%d];" % (cversion, shift) + print >>fp, "\t\tindex = changes_%s_data[(index<<%d)+(n & %d)];" % \ + (cversion, shift, ((1<>fp, "\t}" + print >>fp, "\treturn change_records_%s+index;" % cversion + print >>fp, "}\n" + print >>fp, "static Py_UCS4 normalization_%s(Py_UCS4 n)" % cversion + print >>fp, "{" + print >>fp, "\tswitch(n) {" + for k, v in normalization: + print >>fp, "\tcase %s: return 0x%s;" % (hex(k), v) + print >>fp, "\tdefault: return 0;" + print >>fp, "\t}\n}\n" + + fp.close() + +# -------------------------------------------------------------------- +# unicode character type tables + +def makeunicodetype(unicode, trace): + + FILE = "Objects/unicodetype_db.h" + + print "--- Preparing", FILE, "..." + + # extract unicode types + dummy = (0, 0, 0, 0, 0, 0) + table = [dummy] + cache = {0: dummy} + index = [0] * len(unicode.chars) + numeric = {} + spaces = [] + linebreaks = [] + + for char in unicode.chars: + record = unicode.table[char] + if record: + # extract database properties + category = record[2] + bidirectional = record[4] + properties = record[16] + flags = 0 + delta = True + if category in ["Lm", "Lt", "Lu", "Ll", "Lo"]: + flags |= ALPHA_MASK + if category == "Ll": + flags |= LOWER_MASK + if 'Line_Break' in properties or bidirectional == "B": + flags |= LINEBREAK_MASK + linebreaks.append(char) + if category == "Zs" or bidirectional in ("WS", "B", "S"): + flags |= SPACE_MASK + spaces.append(char) + if category == "Lt": + flags |= TITLE_MASK + if category == "Lu": + flags |= UPPER_MASK + # use delta predictor for upper/lower/title if it fits + if record[12]: + upper = int(record[12], 16) + else: + upper = char + if record[13]: + lower = int(record[13], 16) + else: + lower = char + if record[14]: + title = int(record[14], 16) + else: + # UCD.html says that a missing title char means that + # it defaults to the uppercase character, not to the + # character itself. Apparently, in the current UCD (5.x) + # this feature is never used + title = upper + upper_d = upper - char + lower_d = lower - char + title_d = title - char + if -32768 <= upper_d <= 32767 and \ + -32768 <= lower_d <= 32767 and \ + -32768 <= title_d <= 32767: + # use deltas + upper = upper_d & 0xffff + lower = lower_d & 0xffff + title = title_d & 0xffff + else: + flags |= NODELTA_MASK + # decimal digit, integer digit + decimal = 0 + if record[6]: + flags |= DECIMAL_MASK + decimal = int(record[6]) + digit = 0 + if record[7]: + flags |= DIGIT_MASK + digit = int(record[7]) + if record[8]: + flags |= NUMERIC_MASK + numeric.setdefault(record[8], []).append(char) + item = ( + upper, lower, title, decimal, digit, flags + ) + # add entry to index and item tables + i = cache.get(item) + if i is None: + cache[item] = i = len(table) + table.append(item) + index[char] = i + + print len(table), "unique character type entries" + print sum(map(len, numeric.values())), "numeric code points" + print len(spaces), "whitespace code points" + print len(linebreaks), "linebreak code points" + + print "--- Writing", FILE, "..." + + fp = open(FILE, "w") + print >>fp, "/* this file was generated by %s %s */" % (SCRIPT, VERSION) + print >>fp + print >>fp, "/* a list of unique character type descriptors */" + print >>fp, "const _PyUnicode_TypeRecord _PyUnicode_TypeRecords[] = {" + for item in table: + print >>fp, " {%d, %d, %d, %d, %d, %d}," % item + print >>fp, "};" + print >>fp + + # split decomposition index table + index1, index2, shift = splitbins(index, trace) + + print >>fp, "/* type indexes */" + print >>fp, "#define SHIFT", shift + Array("index1", index1).dump(fp, trace) + Array("index2", index2).dump(fp, trace) + + # Generate code for _PyUnicode_ToNumeric() + numeric_items = sorted(numeric.items()) + print >>fp, '/* Returns the numeric value as double for Unicode characters' + print >>fp, ' * having this property, -1.0 otherwise.' + print >>fp, ' */' + print >>fp, 'double _PyUnicode_ToNumeric(Py_UNICODE ch)' + print >>fp, '{' + print >>fp, ' switch (ch) {' + for value, codepoints in numeric_items: + # Turn text into float literals + parts = value.split('/') + parts = [repr(float(part)) for part in parts] + value = '/'.join(parts) + + haswide = False + hasnonewide = False + codepoints.sort() + for codepoint in codepoints: + if codepoint < 0x10000: + hasnonewide = True + if codepoint >= 0x10000 and not haswide: + print >>fp, '#ifdef Py_UNICODE_WIDE' + haswide = True + print >>fp, ' case 0x%04X:' % (codepoint,) + if haswide and hasnonewide: + print >>fp, '#endif' + print >>fp, ' return (double) %s;' % (value,) + if haswide and not hasnonewide: + print >>fp, '#endif' + print >>fp,' }' + print >>fp,' return -1.0;' + print >>fp,'}' + print >>fp + + # Generate code for _PyUnicode_IsWhitespace() + print >>fp, "/* Returns 1 for Unicode characters having the bidirectional" + print >>fp, " * type 'WS', 'B' or 'S' or the category 'Zs', 0 otherwise." + print >>fp, " */" + print >>fp, 'int _PyUnicode_IsWhitespace(register const Py_UNICODE ch)' + print >>fp, '{' + print >>fp, '#ifdef WANT_WCTYPE_FUNCTIONS' + print >>fp, ' return iswspace(ch);' + print >>fp, '#else' + print >>fp, ' switch (ch) {' + + haswide = False + hasnonewide = False + for codepoint in sorted(spaces): + if codepoint < 0x10000: + hasnonewide = True + if codepoint >= 0x10000 and not haswide: + print >>fp, '#ifdef Py_UNICODE_WIDE' + haswide = True + print >>fp, ' case 0x%04X:' % (codepoint,) + if haswide and hasnonewide: + print >>fp, '#endif' + print >>fp, ' return 1;' + if haswide and not hasnonewide: + print >>fp, '#endif' + + print >>fp,' }' + print >>fp,' return 0;' + print >>fp, '#endif' + print >>fp,'}' + print >>fp + + # Generate code for _PyUnicode_IsLinebreak() + print >>fp, "/* Returns 1 for Unicode characters having the line break" + print >>fp, " * property 'BK', 'CR', 'LF' or 'NL' or having bidirectional" + print >>fp, " * type 'B', 0 otherwise." + print >>fp, " */" + print >>fp, 'int _PyUnicode_IsLinebreak(register const Py_UNICODE ch)' + print >>fp, '{' + print >>fp, ' switch (ch) {' + haswide = False + hasnonewide = False + for codepoint in sorted(linebreaks): + if codepoint < 0x10000: + hasnonewide = True + if codepoint >= 0x10000 and not haswide: + print >>fp, '#ifdef Py_UNICODE_WIDE' + haswide = True + print >>fp, ' case 0x%04X:' % (codepoint,) + if haswide and hasnonewide: + print >>fp, '#endif' + print >>fp, ' return 1;' + if haswide and not hasnonewide: + print >>fp, '#endif' + + print >>fp,' }' + print >>fp,' return 0;' + print >>fp,'}' + print >>fp + + fp.close() + +# -------------------------------------------------------------------- +# unicode name database + +def makeunicodename(unicode, trace): + + FILE = "Modules/unicodename_db.h" + + print "--- Preparing", FILE, "..." + + # collect names + names = [None] * len(unicode.chars) + + for char in unicode.chars: + record = unicode.table[char] + if record: + name = record[1].strip() + if name and name[0] != "<": + names[char] = name + chr(0) + + print len(filter(lambda n: n is not None, names)), "distinct names" + + # collect unique words from names (note that we differ between + # words inside a sentence, and words ending a sentence. the + # latter includes the trailing null byte. + + words = {} + n = b = 0 + for char in unicode.chars: + name = names[char] + if name: + w = name.split() + b = b + len(name) + n = n + len(w) + for w in w: + l = words.get(w) + if l: + l.append(None) + else: + words[w] = [len(words)] + + print n, "words in text;", b, "bytes" + + wordlist = words.items() + + # sort on falling frequency, then by name + def word_key(a): + aword, alist = a + return -len(alist), aword + wordlist.sort(key=word_key) + + # figure out how many phrasebook escapes we need + escapes = 0 + while escapes * 256 < len(wordlist): + escapes = escapes + 1 + print escapes, "escapes" + + short = 256 - escapes + + assert short > 0 + + print short, "short indexes in lexicon" + + # statistics + n = 0 + for i in range(short): + n = n + len(wordlist[i][1]) + print n, "short indexes in phrasebook" + + # pick the most commonly used words, and sort the rest on falling + # length (to maximize overlap) + + wordlist, wordtail = wordlist[:short], wordlist[short:] + wordtail.sort(key=lambda a: a[0], reverse=True) + wordlist.extend(wordtail) + + # generate lexicon from words + + lexicon_offset = [0] + lexicon = "" + words = {} + + # build a lexicon string + offset = 0 + for w, x in wordlist: + # encoding: bit 7 indicates last character in word (chr(128) + # indicates the last character in an entire string) + ww = w[:-1] + chr(ord(w[-1])+128) + # reuse string tails, when possible + o = lexicon.find(ww) + if o < 0: + o = offset + lexicon = lexicon + ww + offset = offset + len(w) + words[w] = len(lexicon_offset) + lexicon_offset.append(o) + + lexicon = map(ord, lexicon) + + # generate phrasebook from names and lexicon + phrasebook = [0] + phrasebook_offset = [0] * len(unicode.chars) + for char in unicode.chars: + name = names[char] + if name: + w = name.split() + phrasebook_offset[char] = len(phrasebook) + for w in w: + i = words[w] + if i < short: + phrasebook.append(i) + else: + # store as two bytes + phrasebook.append((i>>8) + short) + phrasebook.append(i&255) + + assert getsize(phrasebook) == 1 + + # + # unicode name hash table + + # extract names + data = [] + for char in unicode.chars: + record = unicode.table[char] + if record: + name = record[1].strip() + if name and name[0] != "<": + data.append((name, char)) + + # the magic number 47 was chosen to minimize the number of + # collisions on the current data set. if you like, change it + # and see what happens... + + codehash = Hash("code", data, 47) + + print "--- Writing", FILE, "..." + + fp = open(FILE, "w") + print >>fp, "/* this file was generated by %s %s */" % (SCRIPT, VERSION) + print >>fp + print >>fp, "#define NAME_MAXLEN", 256 + print >>fp + print >>fp, "/* lexicon */" + Array("lexicon", lexicon).dump(fp, trace) + Array("lexicon_offset", lexicon_offset).dump(fp, trace) + + # split decomposition index table + offset1, offset2, shift = splitbins(phrasebook_offset, trace) + + print >>fp, "/* code->name phrasebook */" + print >>fp, "#define phrasebook_shift", shift + print >>fp, "#define phrasebook_short", short + + Array("phrasebook", phrasebook).dump(fp, trace) + Array("phrasebook_offset1", offset1).dump(fp, trace) + Array("phrasebook_offset2", offset2).dump(fp, trace) + + print >>fp, "/* name->code dictionary */" + codehash.dump(fp, trace) + + fp.close() + + +def merge_old_version(version, new, old): + # Changes to exclusion file not implemented yet + if old.exclusions != new.exclusions: + raise NotImplementedError, "exclusions differ" + + # In these change records, 0xFF means "no change" + bidir_changes = [0xFF]*0x110000 + category_changes = [0xFF]*0x110000 + decimal_changes = [0xFF]*0x110000 + mirrored_changes = [0xFF]*0x110000 + # In numeric data, 0 means "no change", + # -1 means "did not have a numeric value + numeric_changes = [0] * 0x110000 + # normalization_changes is a list of key-value pairs + normalization_changes = [] + for i in range(0x110000): + if new.table[i] is None: + # Characters unassigned in the new version ought to + # be unassigned in the old one + assert old.table[i] is None + continue + # check characters unassigned in the old version + if old.table[i] is None: + # category 0 is "unassigned" + category_changes[i] = 0 + continue + # check characters that differ + if old.table[i] != new.table[i]: + for k in range(len(old.table[i])): + if old.table[i][k] != new.table[i][k]: + value = old.table[i][k] + if k == 2: + #print "CATEGORY",hex(i), old.table[i][k], new.table[i][k] + category_changes[i] = CATEGORY_NAMES.index(value) + elif k == 4: + #print "BIDIR",hex(i), old.table[i][k], new.table[i][k] + bidir_changes[i] = BIDIRECTIONAL_NAMES.index(value) + elif k == 5: + #print "DECOMP",hex(i), old.table[i][k], new.table[i][k] + # We assume that all normalization changes are in 1:1 mappings + assert " " not in value + normalization_changes.append((i, value)) + elif k == 6: + #print "DECIMAL",hex(i), old.table[i][k], new.table[i][k] + # we only support changes where the old value is a single digit + assert value in "0123456789" + decimal_changes[i] = int(value) + elif k == 8: + # print "NUMERIC",hex(i), `old.table[i][k]`, new.table[i][k] + # Since 0 encodes "no change", the old value is better not 0 + if not value: + numeric_changes[i] = -1 + else: + numeric_changes[i] = float(value) + assert numeric_changes[i] not in (0, -1) + elif k == 9: + if value == 'Y': + mirrored_changes[i] = '1' + else: + mirrored_changes[i] = '0' + elif k == 11: + # change to ISO comment, ignore + pass + elif k == 12: + # change to simple uppercase mapping; ignore + pass + elif k == 13: + # change to simple lowercase mapping; ignore + pass + elif k == 14: + # change to simple titlecase mapping; ignore + pass + elif k == 16: + # change to properties; not yet + pass + else: + class Difference(Exception):pass + raise Difference, (hex(i), k, old.table[i], new.table[i]) + new.changed.append((version, zip(bidir_changes, category_changes, + decimal_changes, mirrored_changes, + numeric_changes), + normalization_changes)) + + +# -------------------------------------------------------------------- +# the following support code is taken from the unidb utilities +# Copyright (c) 1999-2000 by Secret Labs AB + +# load a unicode-data file from disk + +class UnicodeData: + # Record structure: + # [ID, name, category, combining, bidi, decomp, (6) + # decimal, digit, numeric, bidi-mirrored, Unicode-1-name, (11) + # ISO-comment, uppercase, lowercase, titlecase, ea-width, (16) + # properties] (17) + + def __init__(self, filename, exclusions, eastasianwidth, unihan, + derivednormalizationprops=None, linebreakprops=None, + expand=1): + self.changed = [] + file = open(filename) + table = [None] * 0x110000 + while 1: + s = file.readline() + if not s: + break + s = s.strip().split(";") + char = int(s[0], 16) + table[char] = s + + # expand first-last ranges + if expand: + field = None + for i in range(0, 0x110000): + s = table[i] + if s: + if s[1][-6:] == "First>": + s[1] = "" + field = s + elif s[1][-5:] == "Last>": + s[1] = "" + field = None + elif field: + f2 = field[:] + f2[0] = "%X" % i + table[i] = f2 + + # public attributes + self.filename = filename + self.table = table + self.chars = range(0x110000) # unicode 3.2 + + file = open(exclusions) + self.exclusions = {} + for s in file: + s = s.strip() + if not s: + continue + if s[0] == '#': + continue + char = int(s.split()[0],16) + self.exclusions[char] = 1 + + widths = [None] * 0x110000 + for s in open(eastasianwidth): + s = s.strip() + if not s: + continue + if s[0] == '#': + continue + s = s.split()[0].split(';') + if '..' in s[0]: + first, last = [int(c, 16) for c in s[0].split('..')] + chars = range(first, last+1) + else: + chars = [int(s[0], 16)] + for char in chars: + widths[char] = s[1] + for i in range(0, 0x110000): + if table[i] is not None: + table[i].append(widths[i]) + + for i in range(0, 0x110000): + if table[i] is not None: + table[i].append(set()) + if linebreakprops: + for s in open(linebreakprops): + s = s.partition('#')[0] + s = [i.strip() for i in s.split(';')] + if len(s) < 2 or s[1] not in MANDATORY_LINE_BREAKS: + continue + if '..' not in s[0]: + first = last = int(s[0], 16) + else: + first, last = [int(c, 16) for c in s[0].split('..')] + for char in range(first, last+1): + table[char][-1].add('Line_Break') + + if derivednormalizationprops: + quickchecks = [0] * 0x110000 # default is Yes + qc_order = 'NFD_QC NFKD_QC NFC_QC NFKC_QC'.split() + for s in open(derivednormalizationprops): + if '#' in s: + s = s[:s.index('#')] + s = [i.strip() for i in s.split(';')] + if len(s) < 2 or s[1] not in qc_order: + continue + quickcheck = 'MN'.index(s[2]) + 1 # Maybe or No + quickcheck_shift = qc_order.index(s[1])*2 + quickcheck <<= quickcheck_shift + if '..' not in s[0]: + first = last = int(s[0], 16) + else: + first, last = [int(c, 16) for c in s[0].split('..')] + for char in range(first, last+1): + assert not (quickchecks[char]>>quickcheck_shift)&3 + quickchecks[char] |= quickcheck + for i in range(0, 0x110000): + if table[i] is not None: + table[i].append(quickchecks[i]) + + for line in open(unihan): + if not line.startswith('U+'): + continue + code, tag, value = line.split(None, 3)[:3] + if tag not in ('kAccountingNumeric', 'kPrimaryNumeric', + 'kOtherNumeric'): + continue + value = value.strip().replace(',', '') + i = int(code[2:], 16) + # Patch the numeric field + if table[i] is not None: + table[i][8] = value + + def uselatin1(self): + # restrict character range to ISO Latin 1 + self.chars = range(256) + +# hash table tools + +# this is a straight-forward reimplementation of Python's built-in +# dictionary type, using a static data structure, and a custom string +# hash algorithm. + +def myhash(s, magic): + h = 0 + for c in map(ord, s.upper()): + h = (h * magic) + c + ix = h & 0xff000000L + if ix: + h = (h ^ ((ix>>24) & 0xff)) & 0x00ffffff + return h + +SIZES = [ + (4,3), (8,3), (16,3), (32,5), (64,3), (128,3), (256,29), (512,17), + (1024,9), (2048,5), (4096,83), (8192,27), (16384,43), (32768,3), + (65536,45), (131072,9), (262144,39), (524288,39), (1048576,9), + (2097152,5), (4194304,3), (8388608,33), (16777216,27) +] + +class Hash: + def __init__(self, name, data, magic): + # turn a (key, value) list into a static hash table structure + + # determine table size + for size, poly in SIZES: + if size > len(data): + poly = size + poly + break + else: + raise AssertionError, "ran out of polynominals" + + print size, "slots in hash table" + + table = [None] * size + + mask = size-1 + + n = 0 + + hash = myhash + + # initialize hash table + for key, value in data: + h = hash(key, magic) + i = (~h) & mask + v = table[i] + if v is None: + table[i] = value + continue + incr = (h ^ (h >> 3)) & mask; + if not incr: + incr = mask + while 1: + n = n + 1 + i = (i + incr) & mask + v = table[i] + if v is None: + table[i] = value + break + incr = incr << 1 + if incr > mask: + incr = incr ^ poly + + print n, "collisions" + self.collisions = n + + for i in range(len(table)): + if table[i] is None: + table[i] = 0 + + self.data = Array(name + "_hash", table) + self.magic = magic + self.name = name + self.size = size + self.poly = poly + + def dump(self, file, trace): + # write data to file, as a C array + self.data.dump(file, trace) + file.write("#define %s_magic %d\n" % (self.name, self.magic)) + file.write("#define %s_size %d\n" % (self.name, self.size)) + file.write("#define %s_poly %d\n" % (self.name, self.poly)) + +# stuff to deal with arrays of unsigned integers + +class Array: + + def __init__(self, name, data): + self.name = name + self.data = data + + def dump(self, file, trace=0): + # write data to file, as a C array + size = getsize(self.data) + if trace: + print >>sys.stderr, self.name+":", size*len(self.data), "bytes" + file.write("static ") + if size == 1: + file.write("unsigned char") + elif size == 2: + file.write("unsigned short") + else: + file.write("unsigned int") + file.write(" " + self.name + "[] = {\n") + if self.data: + s = " " + for item in self.data: + i = str(item) + ", " + if len(s) + len(i) > 78: + file.write(s + "\n") + s = " " + i + else: + s = s + i + if s.strip(): + file.write(s + "\n") + file.write("};\n\n") + +def getsize(data): + # return smallest possible integer size for the given array + maxdata = max(data) + if maxdata < 256: + return 1 + elif maxdata < 65536: + return 2 + else: + return 4 + +def splitbins(t, trace=0): + """t, trace=0 -> (t1, t2, shift). Split a table to save space. + + t is a sequence of ints. This function can be useful to save space if + many of the ints are the same. t1 and t2 are lists of ints, and shift + is an int, chosen to minimize the combined size of t1 and t2 (in C + code), and where for each i in range(len(t)), + t[i] == t2[(t1[i >> shift] << shift) + (i & mask)] + where mask is a bitmask isolating the last "shift" bits. + + If optional arg trace is non-zero (default zero), progress info + is printed to sys.stderr. The higher the value, the more info + you'll get. + """ + + if trace: + def dump(t1, t2, shift, bytes): + print >>sys.stderr, "%d+%d bins at shift %d; %d bytes" % ( + len(t1), len(t2), shift, bytes) + print >>sys.stderr, "Size of original table:", len(t)*getsize(t), \ + "bytes" + n = len(t)-1 # last valid index + maxshift = 0 # the most we can shift n and still have something left + if n > 0: + while n >> 1: + n >>= 1 + maxshift += 1 + del n + bytes = sys.maxint # smallest total size so far + t = tuple(t) # so slices can be dict keys + for shift in range(maxshift + 1): + t1 = [] + t2 = [] + size = 2**shift + bincache = {} + for i in range(0, len(t), size): + bin = t[i:i+size] + index = bincache.get(bin) + if index is None: + index = len(t2) + bincache[bin] = index + t2.extend(bin) + t1.append(index >> shift) + # determine memory size + b = len(t1)*getsize(t1) + len(t2)*getsize(t2) + if trace > 1: + dump(t1, t2, shift, b) + if b < bytes: + best = t1, t2, shift + bytes = b + t1, t2, shift = best + if trace: + print >>sys.stderr, "Best:", + dump(t1, t2, shift, bytes) + if __debug__: + # exhaustively verify that the decomposition is correct + mask = ~((~0) << shift) # i.e., low-bit mask of shift bits + for i in xrange(len(t)): + assert t[i] == t2[(t1[i >> shift] << shift) + (i & mask)] + return best + +if __name__ == "__main__": + maketables(1) diff --git a/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/mkstringprep.py b/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/mkstringprep.py new file mode 100644 index 0000000000..49d1393386 --- /dev/null +++ b/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/mkstringprep.py @@ -0,0 +1,425 @@ +import re, unicodedata, sys + +if sys.maxunicode == 65535: + raise RuntimeError("need UCS-4 Python") + +def gen_category(cats): + for i in range(0, 0x110000): + if unicodedata.category(unichr(i)) in cats: + yield(i) + +def gen_bidirectional(cats): + for i in range(0, 0x110000): + if unicodedata.bidirectional(unichr(i)) in cats: + yield(i) + +def compact_set(l): + single = [] + tuple = [] + prev = None + span = 0 + for e in l: + if prev is None: + prev = e + span = 0 + continue + if prev+span+1 != e: + if span > 2: + tuple.append((prev,prev+span+1)) + else: + for i in range(prev, prev+span+1): + single.append(i) + prev = e + span = 0 + else: + span += 1 + if span: + tuple.append((prev,prev+span+1)) + else: + single.append(prev) + tuple = " + ".join(["range(%d,%d)" % t for t in tuple]) + if not single: + return "set(%s)" % tuple + if not tuple: + return "set(%s)" % repr(single) + return "set(%s + %s)" % (repr(single),tuple) + +############## Read the tables in the RFC ####################### + +data = open("rfc3454.txt").readlines() + +tables = [] +curname = None +for l in data: + l = l.strip() + if not l: + continue + # Skip RFC page breaks + if l.startswith("Hoffman & Blanchet") or\ + l.startswith("RFC 3454"): + continue + # Find start/end lines + m = re.match("----- (Start|End) Table ([A-Z](.[0-9])+) -----", l) + if m: + if m.group(1) == "Start": + if curname: + raise RuntimeError("Double Start", (curname, l)) + curname = m.group(2) + table = {} + tables.append((curname, table)) + continue + else: + if not curname: + raise RuntimeError("End without start", l) + curname = None + continue + if not curname: + continue + # Now we are in a table + fields = l.split(";") + if len(fields) > 1: + # Drop comment field + fields = fields[:-1] + if len(fields) == 1: + fields = fields[0].split("-") + if len(fields) > 1: + # range + try: + start, end = fields + except ValueError: + raise RuntimeError("Unpacking problem", l) + else: + start = end = fields[0] + start = int(start, 16) + end = int(end, 16) + for i in range(start, end+1): + table[i] = i + else: + code, value = fields + value = value.strip() + if value: + value = [int(v, 16) for v in value.split(" ")] + else: + # table B.1 + value = None + table[int(code, 16)] = value + +########### Generate compact Python versions of the tables ############# + +print """# This file is generated by mkstringprep.py. DO NOT EDIT. +\"\"\"Library that exposes various tables found in the StringPrep RFC 3454. + +There are two kinds of tables: sets, for which a member test is provided, +and mappings, for which a mapping function is provided. +\"\"\" + +import unicodedata +""" + +print "assert unicodedata.unidata_version == %s" % repr(unicodedata.unidata_version) + +# A.1 is the table of unassigned characters +# XXX Plane 15 PUA is listed as unassigned in Python. +name, table = tables[0] +del tables[0] +assert name == "A.1" +table = set(table.keys()) +Cn = set(gen_category(["Cn"])) + +# FDD0..FDEF are process internal codes +Cn -= set(range(0xFDD0, 0xFDF0)) +# not a character +Cn -= set(range(0xFFFE, 0x110000, 0x10000)) +Cn -= set(range(0xFFFF, 0x110000, 0x10000)) + +# assert table == Cn + +print """ +def in_table_a1(code): + if unicodedata.category(code) != 'Cn': return False + c = ord(code) + if 0xFDD0 <= c < 0xFDF0: return False + return (c & 0xFFFF) not in (0xFFFE, 0xFFFF) +""" + +# B.1 cannot easily be derived +name, table = tables[0] +del tables[0] +assert name == "B.1" +table = sorted(table.keys()) +print """ +b1_set = """ + compact_set(table) + """ +def in_table_b1(code): + return ord(code) in b1_set +""" + +# B.2 and B.3 is case folding. +# It takes CaseFolding.txt into account, which is +# not available in the Python database. Since +# B.2 is derived from B.3, we process B.3 first. +# B.3 supposedly *is* CaseFolding-3.2.0.txt. + +name, table_b2 = tables[0] +del tables[0] +assert name == "B.2" + +name, table_b3 = tables[0] +del tables[0] +assert name == "B.3" + +# B.3 is mostly Python's .lower, except for a number +# of special cases, e.g. considering canonical forms. + +b3_exceptions = {} + +for k,v in table_b2.items(): + if map(ord, unichr(k).lower()) != v: + b3_exceptions[k] = u"".join(map(unichr,v)) + +b3 = sorted(b3_exceptions.items()) + +print """ +b3_exceptions = {""" +for i,(k,v) in enumerate(b3): + print "0x%x:%s," % (k, repr(v)), + if i % 4 == 3: + print +print "}" + +print """ +def map_table_b3(code): + r = b3_exceptions.get(ord(code)) + if r is not None: return r + return code.lower() +""" + +def map_table_b3(code): + r = b3_exceptions.get(ord(code)) + if r is not None: return r + return code.lower() + +# B.2 is case folding for NFKC. This is the same as B.3, +# except where NormalizeWithKC(Fold(a)) != +# NormalizeWithKC(Fold(NormalizeWithKC(Fold(a)))) + +def map_table_b2(a): + al = map_table_b3(a) + b = unicodedata.normalize("NFKC", al) + bl = u"".join([map_table_b3(ch) for ch in b]) + c = unicodedata.normalize("NFKC", bl) + if b != c: + return c + else: + return al + +specials = {} +for k,v in table_b2.items(): + if map(ord, map_table_b2(unichr(k))) != v: + specials[k] = v + +# B.3 should not add any additional special cases +assert specials == {} + +print """ +def map_table_b2(a): + al = map_table_b3(a) + b = unicodedata.normalize("NFKC", al) + bl = u"".join([map_table_b3(ch) for ch in b]) + c = unicodedata.normalize("NFKC", bl) + if b != c: + return c + else: + return al +""" + +# C.1.1 is a table with a single character +name, table = tables[0] +del tables[0] +assert name == "C.1.1" +assert table == {0x20:0x20} + +print """ +def in_table_c11(code): + return code == u" " +""" + +# C.1.2 is the rest of all space characters +name, table = tables[0] +del tables[0] +assert name == "C.1.2" + +# table = set(table.keys()) +# Zs = set(gen_category(["Zs"])) - set([0x20]) +# assert Zs == table + +print """ +def in_table_c12(code): + return unicodedata.category(code) == "Zs" and code != u" " + +def in_table_c11_c12(code): + return unicodedata.category(code) == "Zs" +""" + +# C.2.1 ASCII control characters +name, table_c21 = tables[0] +del tables[0] +assert name == "C.2.1" + +Cc = set(gen_category(["Cc"])) +Cc_ascii = Cc & set(range(128)) +table_c21 = set(table_c21.keys()) +assert Cc_ascii == table_c21 + +print """ +def in_table_c21(code): + return ord(code) < 128 and unicodedata.category(code) == "Cc" +""" + +# C.2.2 Non-ASCII control characters. It also includes +# a number of characters in category Cf. +name, table_c22 = tables[0] +del tables[0] +assert name == "C.2.2" + +Cc_nonascii = Cc - Cc_ascii +table_c22 = set(table_c22.keys()) +assert len(Cc_nonascii - table_c22) == 0 + +specials = list(table_c22 - Cc_nonascii) +specials.sort() + +print """c22_specials = """ + compact_set(specials) + """ +def in_table_c22(code): + c = ord(code) + if c < 128: return False + if unicodedata.category(code) == "Cc": return True + return c in c22_specials + +def in_table_c21_c22(code): + return unicodedata.category(code) == "Cc" or \\ + ord(code) in c22_specials +""" + +# C.3 Private use +name, table = tables[0] +del tables[0] +assert name == "C.3" + +Co = set(gen_category(["Co"])) +assert set(table.keys()) == Co + +print """ +def in_table_c3(code): + return unicodedata.category(code) == "Co" +""" + +# C.4 Non-character code points, xFFFE, xFFFF +# plus process internal codes +name, table = tables[0] +del tables[0] +assert name == "C.4" + +nonchar = set(range(0xFDD0,0xFDF0) + + range(0xFFFE,0x110000,0x10000) + + range(0xFFFF,0x110000,0x10000)) +table = set(table.keys()) +assert table == nonchar + +print """ +def in_table_c4(code): + c = ord(code) + if c < 0xFDD0: return False + if c < 0xFDF0: return True + return (ord(code) & 0xFFFF) in (0xFFFE, 0xFFFF) +""" + +# C.5 Surrogate codes +name, table = tables[0] +del tables[0] +assert name == "C.5" + +Cs = set(gen_category(["Cs"])) +assert set(table.keys()) == Cs + +print """ +def in_table_c5(code): + return unicodedata.category(code) == "Cs" +""" + +# C.6 Inappropriate for plain text +name, table = tables[0] +del tables[0] +assert name == "C.6" + +table = sorted(table.keys()) + +print """ +c6_set = """ + compact_set(table) + """ +def in_table_c6(code): + return ord(code) in c6_set +""" + +# C.7 Inappropriate for canonical representation +name, table = tables[0] +del tables[0] +assert name == "C.7" + +table = sorted(table.keys()) + +print """ +c7_set = """ + compact_set(table) + """ +def in_table_c7(code): + return ord(code) in c7_set +""" + +# C.8 Change display properties or are deprecated +name, table = tables[0] +del tables[0] +assert name == "C.8" + +table = sorted(table.keys()) + +print """ +c8_set = """ + compact_set(table) + """ +def in_table_c8(code): + return ord(code) in c8_set +""" + +# C.9 Tagging characters +name, table = tables[0] +del tables[0] +assert name == "C.9" + +table = sorted(table.keys()) + +print """ +c9_set = """ + compact_set(table) + """ +def in_table_c9(code): + return ord(code) in c9_set +""" + +# D.1 Characters with bidirectional property "R" or "AL" +name, table = tables[0] +del tables[0] +assert name == "D.1" + +RandAL = set(gen_bidirectional(["R","AL"])) +assert set(table.keys()) == RandAL + +print """ +def in_table_d1(code): + return unicodedata.bidirectional(code) in ("R","AL") +""" + +# D.2 Characters with bidirectional property "L" +name, table = tables[0] +del tables[0] +assert name == "D.2" + +L = set(gen_bidirectional(["L"])) +assert set(table.keys()) == L + +print """ +def in_table_d2(code): + return unicodedata.bidirectional(code) == "L" +""" diff --git a/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/python-mappings/CP1140.TXT b/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/python-mappings/CP1140.TXT new file mode 100644 index 0000000000..68e0fdc3d2 --- /dev/null +++ b/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/python-mappings/CP1140.TXT @@ -0,0 +1,291 @@ +# +# Name: CP1140 +# Unicode version: 3.2 +# Table version: 1.0 +# Table format: Format A +# Date: 2005-10-25 +# Authors: Marc-Andre Lemburg +# +# This encoding is a modified CP037 encoding (with added Euro +# currency sign). +# +# (c) Copyright Marc-Andre Lemburg, 2005. +# Licensed to PSF under a Contributor Agreement. +# +# Based on the file +# ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/EBCDIC/CP037.TXT +# which is: +# +# Copyright (c) 2002 Unicode, Inc. All Rights reserved. +# +# This file is provided as-is by Unicode, Inc. (The Unicode Consortium). +# No claims are made as to fitness for any particular purpose. No +# warranties of any kind are expressed or implied. The recipient +# agrees to determine applicability of information provided. If this +# file has been provided on optical media by Unicode, Inc., the sole +# remedy for any claim will be exchange of defective media within 90 +# days of receipt. +# +# Unicode, Inc. hereby grants the right to freely use the information +# supplied in this file in the creation of products supporting the +# Unicode Standard, and to make copies of this file in any form for +# internal or external distribution as long as this notice remains +# attached. +# +0x00 0x0000 #NULL +0x01 0x0001 #START OF HEADING +0x02 0x0002 #START OF TEXT +0x03 0x0003 #END OF TEXT +0x04 0x009C #CONTROL +0x05 0x0009 #HORIZONTAL TABULATION +0x06 0x0086 #CONTROL +0x07 0x007F #DELETE +0x08 0x0097 #CONTROL +0x09 0x008D #CONTROL +0x0A 0x008E #CONTROL +0x0B 0x000B #VERTICAL TABULATION +0x0C 0x000C #FORM FEED +0x0D 0x000D #CARRIAGE RETURN +0x0E 0x000E #SHIFT OUT +0x0F 0x000F #SHIFT IN +0x10 0x0010 #DATA LINK ESCAPE +0x11 0x0011 #DEVICE CONTROL ONE +0x12 0x0012 #DEVICE CONTROL TWO +0x13 0x0013 #DEVICE CONTROL THREE +0x14 0x009D #CONTROL +0x15 0x0085 #CONTROL +0x16 0x0008 #BACKSPACE +0x17 0x0087 #CONTROL +0x18 0x0018 #CANCEL +0x19 0x0019 #END OF MEDIUM +0x1A 0x0092 #CONTROL +0x1B 0x008F #CONTROL +0x1C 0x001C #FILE SEPARATOR +0x1D 0x001D #GROUP SEPARATOR +0x1E 0x001E #RECORD SEPARATOR +0x1F 0x001F #UNIT SEPARATOR +0x20 0x0080 #CONTROL +0x21 0x0081 #CONTROL +0x22 0x0082 #CONTROL +0x23 0x0083 #CONTROL +0x24 0x0084 #CONTROL +0x25 0x000A #LINE FEED +0x26 0x0017 #END OF TRANSMISSION BLOCK +0x27 0x001B #ESCAPE +0x28 0x0088 #CONTROL +0x29 0x0089 #CONTROL +0x2A 0x008A #CONTROL +0x2B 0x008B #CONTROL +0x2C 0x008C #CONTROL +0x2D 0x0005 #ENQUIRY +0x2E 0x0006 #ACKNOWLEDGE +0x2F 0x0007 #BELL +0x30 0x0090 #CONTROL +0x31 0x0091 #CONTROL +0x32 0x0016 #SYNCHRONOUS IDLE +0x33 0x0093 #CONTROL +0x34 0x0094 #CONTROL +0x35 0x0095 #CONTROL +0x36 0x0096 #CONTROL +0x37 0x0004 #END OF TRANSMISSION +0x38 0x0098 #CONTROL +0x39 0x0099 #CONTROL +0x3A 0x009A #CONTROL +0x3B 0x009B #CONTROL +0x3C 0x0014 #DEVICE CONTROL FOUR +0x3D 0x0015 #NEGATIVE ACKNOWLEDGE +0x3E 0x009E #CONTROL +0x3F 0x001A #SUBSTITUTE +0x40 0x0020 #SPACE +0x41 0x00A0 #NO-BREAK SPACE +0x42 0x00E2 #LATIN SMALL LETTER A WITH CIRCUMFLEX +0x43 0x00E4 #LATIN SMALL LETTER A WITH DIAERESIS +0x44 0x00E0 #LATIN SMALL LETTER A WITH GRAVE +0x45 0x00E1 #LATIN SMALL LETTER A WITH ACUTE +0x46 0x00E3 #LATIN SMALL LETTER A WITH TILDE +0x47 0x00E5 #LATIN SMALL LETTER A WITH RING ABOVE +0x48 0x00E7 #LATIN SMALL LETTER C WITH CEDILLA +0x49 0x00F1 #LATIN SMALL LETTER N WITH TILDE +0x4A 0x00A2 #CENT SIGN +0x4B 0x002E #FULL STOP +0x4C 0x003C #LESS-THAN SIGN +0x4D 0x0028 #LEFT PARENTHESIS +0x4E 0x002B #PLUS SIGN +0x4F 0x007C #VERTICAL LINE +0x50 0x0026 #AMPERSAND +0x51 0x00E9 #LATIN SMALL LETTER E WITH ACUTE +0x52 0x00EA #LATIN SMALL LETTER E WITH CIRCUMFLEX +0x53 0x00EB #LATIN SMALL LETTER E WITH DIAERESIS +0x54 0x00E8 #LATIN SMALL LETTER E WITH GRAVE +0x55 0x00ED #LATIN SMALL LETTER I WITH ACUTE +0x56 0x00EE #LATIN SMALL LETTER I WITH CIRCUMFLEX +0x57 0x00EF #LATIN SMALL LETTER I WITH DIAERESIS +0x58 0x00EC #LATIN SMALL LETTER I WITH GRAVE +0x59 0x00DF #LATIN SMALL LETTER SHARP S (GERMAN) +0x5A 0x0021 #EXCLAMATION MARK +0x5B 0x0024 #DOLLAR SIGN +0x5C 0x002A #ASTERISK +0x5D 0x0029 #RIGHT PARENTHESIS +0x5E 0x003B #SEMICOLON +0x5F 0x00AC #NOT SIGN +0x60 0x002D #HYPHEN-MINUS +0x61 0x002F #SOLIDUS +0x62 0x00C2 #LATIN CAPITAL LETTER A WITH CIRCUMFLEX +0x63 0x00C4 #LATIN CAPITAL LETTER A WITH DIAERESIS +0x64 0x00C0 #LATIN CAPITAL LETTER A WITH GRAVE +0x65 0x00C1 #LATIN CAPITAL LETTER A WITH ACUTE +0x66 0x00C3 #LATIN CAPITAL LETTER A WITH TILDE +0x67 0x00C5 #LATIN CAPITAL LETTER A WITH RING ABOVE +0x68 0x00C7 #LATIN CAPITAL LETTER C WITH CEDILLA +0x69 0x00D1 #LATIN CAPITAL LETTER N WITH TILDE +0x6A 0x00A6 #BROKEN BAR +0x6B 0x002C #COMMA +0x6C 0x0025 #PERCENT SIGN +0x6D 0x005F #LOW LINE +0x6E 0x003E #GREATER-THAN SIGN +0x6F 0x003F #QUESTION MARK +0x70 0x00F8 #LATIN SMALL LETTER O WITH STROKE +0x71 0x00C9 #LATIN CAPITAL LETTER E WITH ACUTE +0x72 0x00CA #LATIN CAPITAL LETTER E WITH CIRCUMFLEX +0x73 0x00CB #LATIN CAPITAL LETTER E WITH DIAERESIS +0x74 0x00C8 #LATIN CAPITAL LETTER E WITH GRAVE +0x75 0x00CD #LATIN CAPITAL LETTER I WITH ACUTE +0x76 0x00CE #LATIN CAPITAL LETTER I WITH CIRCUMFLEX +0x77 0x00CF #LATIN CAPITAL LETTER I WITH DIAERESIS +0x78 0x00CC #LATIN CAPITAL LETTER I WITH GRAVE +0x79 0x0060 #GRAVE ACCENT +0x7A 0x003A #COLON +0x7B 0x0023 #NUMBER SIGN +0x7C 0x0040 #COMMERCIAL AT +0x7D 0x0027 #APOSTROPHE +0x7E 0x003D #EQUALS SIGN +0x7F 0x0022 #QUOTATION MARK +0x80 0x00D8 #LATIN CAPITAL LETTER O WITH STROKE +0x81 0x0061 #LATIN SMALL LETTER A +0x82 0x0062 #LATIN SMALL LETTER B +0x83 0x0063 #LATIN SMALL LETTER C +0x84 0x0064 #LATIN SMALL LETTER D +0x85 0x0065 #LATIN SMALL LETTER E +0x86 0x0066 #LATIN SMALL LETTER F +0x87 0x0067 #LATIN SMALL LETTER G +0x88 0x0068 #LATIN SMALL LETTER H +0x89 0x0069 #LATIN SMALL LETTER I +0x8A 0x00AB #LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +0x8B 0x00BB #RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +0x8C 0x00F0 #LATIN SMALL LETTER ETH (ICELANDIC) +0x8D 0x00FD #LATIN SMALL LETTER Y WITH ACUTE +0x8E 0x00FE #LATIN SMALL LETTER THORN (ICELANDIC) +0x8F 0x00B1 #PLUS-MINUS SIGN +0x90 0x00B0 #DEGREE SIGN +0x91 0x006A #LATIN SMALL LETTER J +0x92 0x006B #LATIN SMALL LETTER K +0x93 0x006C #LATIN SMALL LETTER L +0x94 0x006D #LATIN SMALL LETTER M +0x95 0x006E #LATIN SMALL LETTER N +0x96 0x006F #LATIN SMALL LETTER O +0x97 0x0070 #LATIN SMALL LETTER P +0x98 0x0071 #LATIN SMALL LETTER Q +0x99 0x0072 #LATIN SMALL LETTER R +0x9A 0x00AA #FEMININE ORDINAL INDICATOR +0x9B 0x00BA #MASCULINE ORDINAL INDICATOR +0x9C 0x00E6 #LATIN SMALL LIGATURE AE +0x9D 0x00B8 #CEDILLA +0x9E 0x00C6 #LATIN CAPITAL LIGATURE AE +#0x9F 0x00A4 #CURRENCY SIGN +0x9F 0x20AC # EURO SIGN +0xA0 0x00B5 #MICRO SIGN +0xA1 0x007E #TILDE +0xA2 0x0073 #LATIN SMALL LETTER S +0xA3 0x0074 #LATIN SMALL LETTER T +0xA4 0x0075 #LATIN SMALL LETTER U +0xA5 0x0076 #LATIN SMALL LETTER V +0xA6 0x0077 #LATIN SMALL LETTER W +0xA7 0x0078 #LATIN SMALL LETTER X +0xA8 0x0079 #LATIN SMALL LETTER Y +0xA9 0x007A #LATIN SMALL LETTER Z +0xAA 0x00A1 #INVERTED EXCLAMATION MARK +0xAB 0x00BF #INVERTED QUESTION MARK +0xAC 0x00D0 #LATIN CAPITAL LETTER ETH (ICELANDIC) +0xAD 0x00DD #LATIN CAPITAL LETTER Y WITH ACUTE +0xAE 0x00DE #LATIN CAPITAL LETTER THORN (ICELANDIC) +0xAF 0x00AE #REGISTERED SIGN +0xB0 0x005E #CIRCUMFLEX ACCENT +0xB1 0x00A3 #POUND SIGN +0xB2 0x00A5 #YEN SIGN +0xB3 0x00B7 #MIDDLE DOT +0xB4 0x00A9 #COPYRIGHT SIGN +0xB5 0x00A7 #SECTION SIGN +0xB6 0x00B6 #PILCROW SIGN +0xB7 0x00BC #VULGAR FRACTION ONE QUARTER +0xB8 0x00BD #VULGAR FRACTION ONE HALF +0xB9 0x00BE #VULGAR FRACTION THREE QUARTERS +0xBA 0x005B #LEFT SQUARE BRACKET +0xBB 0x005D #RIGHT SQUARE BRACKET +0xBC 0x00AF #MACRON +0xBD 0x00A8 #DIAERESIS +0xBE 0x00B4 #ACUTE ACCENT +0xBF 0x00D7 #MULTIPLICATION SIGN +0xC0 0x007B #LEFT CURLY BRACKET +0xC1 0x0041 #LATIN CAPITAL LETTER A +0xC2 0x0042 #LATIN CAPITAL LETTER B +0xC3 0x0043 #LATIN CAPITAL LETTER C +0xC4 0x0044 #LATIN CAPITAL LETTER D +0xC5 0x0045 #LATIN CAPITAL LETTER E +0xC6 0x0046 #LATIN CAPITAL LETTER F +0xC7 0x0047 #LATIN CAPITAL LETTER G +0xC8 0x0048 #LATIN CAPITAL LETTER H +0xC9 0x0049 #LATIN CAPITAL LETTER I +0xCA 0x00AD #SOFT HYPHEN +0xCB 0x00F4 #LATIN SMALL LETTER O WITH CIRCUMFLEX +0xCC 0x00F6 #LATIN SMALL LETTER O WITH DIAERESIS +0xCD 0x00F2 #LATIN SMALL LETTER O WITH GRAVE +0xCE 0x00F3 #LATIN SMALL LETTER O WITH ACUTE +0xCF 0x00F5 #LATIN SMALL LETTER O WITH TILDE +0xD0 0x007D #RIGHT CURLY BRACKET +0xD1 0x004A #LATIN CAPITAL LETTER J +0xD2 0x004B #LATIN CAPITAL LETTER K +0xD3 0x004C #LATIN CAPITAL LETTER L +0xD4 0x004D #LATIN CAPITAL LETTER M +0xD5 0x004E #LATIN CAPITAL LETTER N +0xD6 0x004F #LATIN CAPITAL LETTER O +0xD7 0x0050 #LATIN CAPITAL LETTER P +0xD8 0x0051 #LATIN CAPITAL LETTER Q +0xD9 0x0052 #LATIN CAPITAL LETTER R +0xDA 0x00B9 #SUPERSCRIPT ONE +0xDB 0x00FB #LATIN SMALL LETTER U WITH CIRCUMFLEX +0xDC 0x00FC #LATIN SMALL LETTER U WITH DIAERESIS +0xDD 0x00F9 #LATIN SMALL LETTER U WITH GRAVE +0xDE 0x00FA #LATIN SMALL LETTER U WITH ACUTE +0xDF 0x00FF #LATIN SMALL LETTER Y WITH DIAERESIS +0xE0 0x005C #REVERSE SOLIDUS +0xE1 0x00F7 #DIVISION SIGN +0xE2 0x0053 #LATIN CAPITAL LETTER S +0xE3 0x0054 #LATIN CAPITAL LETTER T +0xE4 0x0055 #LATIN CAPITAL LETTER U +0xE5 0x0056 #LATIN CAPITAL LETTER V +0xE6 0x0057 #LATIN CAPITAL LETTER W +0xE7 0x0058 #LATIN CAPITAL LETTER X +0xE8 0x0059 #LATIN CAPITAL LETTER Y +0xE9 0x005A #LATIN CAPITAL LETTER Z +0xEA 0x00B2 #SUPERSCRIPT TWO +0xEB 0x00D4 #LATIN CAPITAL LETTER O WITH CIRCUMFLEX +0xEC 0x00D6 #LATIN CAPITAL LETTER O WITH DIAERESIS +0xED 0x00D2 #LATIN CAPITAL LETTER O WITH GRAVE +0xEE 0x00D3 #LATIN CAPITAL LETTER O WITH ACUTE +0xEF 0x00D5 #LATIN CAPITAL LETTER O WITH TILDE +0xF0 0x0030 #DIGIT ZERO +0xF1 0x0031 #DIGIT ONE +0xF2 0x0032 #DIGIT TWO +0xF3 0x0033 #DIGIT THREE +0xF4 0x0034 #DIGIT FOUR +0xF5 0x0035 #DIGIT FIVE +0xF6 0x0036 #DIGIT SIX +0xF7 0x0037 #DIGIT SEVEN +0xF8 0x0038 #DIGIT EIGHT +0xF9 0x0039 #DIGIT NINE +0xFA 0x00B3 #SUPERSCRIPT THREE +0xFB 0x00DB #LATIN CAPITAL LETTER U WITH CIRCUMFLEX +0xFC 0x00DC #LATIN CAPITAL LETTER U WITH DIAERESIS +0xFD 0x00D9 #LATIN CAPITAL LETTER U WITH GRAVE +0xFE 0x00DA #LATIN CAPITAL LETTER U WITH ACUTE +0xFF 0x009F #CONTROL diff --git a/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/python-mappings/KOI8-U.TXT b/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/python-mappings/KOI8-U.TXT new file mode 100644 index 0000000000..77160cf7e6 --- /dev/null +++ b/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/python-mappings/KOI8-U.TXT @@ -0,0 +1,298 @@ +# +# Name: KOI8-U (RFC2319) to Unicode +# Unicode version: 3.2 +# Table version: 1.0 +# Table format: Format A +# Date: 2005-10-25 +# Authors: Marc-Andre Lemburg +# +# See RFC2319 for details. This encoding is a modified KOI8-R +# encoding. +# +# (c) Copyright Marc-Andre Lemburg, 2005. +# Licensed to PSF under a Contributor Agreement. +# +# Based on the file +# ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MISC/KOI8-R.TXT +# which is: +# +# Copyright (c) 1991-1999 Unicode, Inc. All Rights reserved. +# +# This file is provided as-is by Unicode, Inc. (The Unicode Consortium). +# No claims are made as to fitness for any particular purpose. No +# warranties of any kind are expressed or implied. The recipient +# agrees to determine applicability of information provided. If this +# file has been provided on optical media by Unicode, Inc., the sole +# remedy for any claim will be exchange of defective media within 90 +# days of receipt. +# +# Unicode, Inc. hereby grants the right to freely use the information +# supplied in this file in the creation of products supporting the +# Unicode Standard, and to make copies of this file in any form for +# internal or external distribution as long as this notice remains +# attached. +# +0x00 0x0000 # NULL +0x01 0x0001 # START OF HEADING +0x02 0x0002 # START OF TEXT +0x03 0x0003 # END OF TEXT +0x04 0x0004 # END OF TRANSMISSION +0x05 0x0005 # ENQUIRY +0x06 0x0006 # ACKNOWLEDGE +0x07 0x0007 # BELL +0x08 0x0008 # BACKSPACE +0x09 0x0009 # HORIZONTAL TABULATION +0x0A 0x000A # LINE FEED +0x0B 0x000B # VERTICAL TABULATION +0x0C 0x000C # FORM FEED +0x0D 0x000D # CARRIAGE RETURN +0x0E 0x000E # SHIFT OUT +0x0F 0x000F # SHIFT IN +0x10 0x0010 # DATA LINK ESCAPE +0x11 0x0011 # DEVICE CONTROL ONE +0x12 0x0012 # DEVICE CONTROL TWO +0x13 0x0013 # DEVICE CONTROL THREE +0x14 0x0014 # DEVICE CONTROL FOUR +0x15 0x0015 # NEGATIVE ACKNOWLEDGE +0x16 0x0016 # SYNCHRONOUS IDLE +0x17 0x0017 # END OF TRANSMISSION BLOCK +0x18 0x0018 # CANCEL +0x19 0x0019 # END OF MEDIUM +0x1A 0x001A # SUBSTITUTE +0x1B 0x001B # ESCAPE +0x1C 0x001C # FILE SEPARATOR +0x1D 0x001D # GROUP SEPARATOR +0x1E 0x001E # RECORD SEPARATOR +0x1F 0x001F # UNIT SEPARATOR +0x20 0x0020 # SPACE +0x21 0x0021 # EXCLAMATION MARK +0x22 0x0022 # QUOTATION MARK +0x23 0x0023 # NUMBER SIGN +0x24 0x0024 # DOLLAR SIGN +0x25 0x0025 # PERCENT SIGN +0x26 0x0026 # AMPERSAND +0x27 0x0027 # APOSTROPHE +0x28 0x0028 # LEFT PARENTHESIS +0x29 0x0029 # RIGHT PARENTHESIS +0x2A 0x002A # ASTERISK +0x2B 0x002B # PLUS SIGN +0x2C 0x002C # COMMA +0x2D 0x002D # HYPHEN-MINUS +0x2E 0x002E # FULL STOP +0x2F 0x002F # SOLIDUS +0x30 0x0030 # DIGIT ZERO +0x31 0x0031 # DIGIT ONE +0x32 0x0032 # DIGIT TWO +0x33 0x0033 # DIGIT THREE +0x34 0x0034 # DIGIT FOUR +0x35 0x0035 # DIGIT FIVE +0x36 0x0036 # DIGIT SIX +0x37 0x0037 # DIGIT SEVEN +0x38 0x0038 # DIGIT EIGHT +0x39 0x0039 # DIGIT NINE +0x3A 0x003A # COLON +0x3B 0x003B # SEMICOLON +0x3C 0x003C # LESS-THAN SIGN +0x3D 0x003D # EQUALS SIGN +0x3E 0x003E # GREATER-THAN SIGN +0x3F 0x003F # QUESTION MARK +0x40 0x0040 # COMMERCIAL AT +0x41 0x0041 # LATIN CAPITAL LETTER A +0x42 0x0042 # LATIN CAPITAL LETTER B +0x43 0x0043 # LATIN CAPITAL LETTER C +0x44 0x0044 # LATIN CAPITAL LETTER D +0x45 0x0045 # LATIN CAPITAL LETTER E +0x46 0x0046 # LATIN CAPITAL LETTER F +0x47 0x0047 # LATIN CAPITAL LETTER G +0x48 0x0048 # LATIN CAPITAL LETTER H +0x49 0x0049 # LATIN CAPITAL LETTER I +0x4A 0x004A # LATIN CAPITAL LETTER J +0x4B 0x004B # LATIN CAPITAL LETTER K +0x4C 0x004C # LATIN CAPITAL LETTER L +0x4D 0x004D # LATIN CAPITAL LETTER M +0x4E 0x004E # LATIN CAPITAL LETTER N +0x4F 0x004F # LATIN CAPITAL LETTER O +0x50 0x0050 # LATIN CAPITAL LETTER P +0x51 0x0051 # LATIN CAPITAL LETTER Q +0x52 0x0052 # LATIN CAPITAL LETTER R +0x53 0x0053 # LATIN CAPITAL LETTER S +0x54 0x0054 # LATIN CAPITAL LETTER T +0x55 0x0055 # LATIN CAPITAL LETTER U +0x56 0x0056 # LATIN CAPITAL LETTER V +0x57 0x0057 # LATIN CAPITAL LETTER W +0x58 0x0058 # LATIN CAPITAL LETTER X +0x59 0x0059 # LATIN CAPITAL LETTER Y +0x5A 0x005A # LATIN CAPITAL LETTER Z +0x5B 0x005B # LEFT SQUARE BRACKET +0x5C 0x005C # REVERSE SOLIDUS +0x5D 0x005D # RIGHT SQUARE BRACKET +0x5E 0x005E # CIRCUMFLEX ACCENT +0x5F 0x005F # LOW LINE +0x60 0x0060 # GRAVE ACCENT +0x61 0x0061 # LATIN SMALL LETTER A +0x62 0x0062 # LATIN SMALL LETTER B +0x63 0x0063 # LATIN SMALL LETTER C +0x64 0x0064 # LATIN SMALL LETTER D +0x65 0x0065 # LATIN SMALL LETTER E +0x66 0x0066 # LATIN SMALL LETTER F +0x67 0x0067 # LATIN SMALL LETTER G +0x68 0x0068 # LATIN SMALL LETTER H +0x69 0x0069 # LATIN SMALL LETTER I +0x6A 0x006A # LATIN SMALL LETTER J +0x6B 0x006B # LATIN SMALL LETTER K +0x6C 0x006C # LATIN SMALL LETTER L +0x6D 0x006D # LATIN SMALL LETTER M +0x6E 0x006E # LATIN SMALL LETTER N +0x6F 0x006F # LATIN SMALL LETTER O +0x70 0x0070 # LATIN SMALL LETTER P +0x71 0x0071 # LATIN SMALL LETTER Q +0x72 0x0072 # LATIN SMALL LETTER R +0x73 0x0073 # LATIN SMALL LETTER S +0x74 0x0074 # LATIN SMALL LETTER T +0x75 0x0075 # LATIN SMALL LETTER U +0x76 0x0076 # LATIN SMALL LETTER V +0x77 0x0077 # LATIN SMALL LETTER W +0x78 0x0078 # LATIN SMALL LETTER X +0x79 0x0079 # LATIN SMALL LETTER Y +0x7A 0x007A # LATIN SMALL LETTER Z +0x7B 0x007B # LEFT CURLY BRACKET +0x7C 0x007C # VERTICAL LINE +0x7D 0x007D # RIGHT CURLY BRACKET +0x7E 0x007E # TILDE +0x7F 0x007F # DELETE +0x80 0x2500 # BOX DRAWINGS LIGHT HORIZONTAL +0x81 0x2502 # BOX DRAWINGS LIGHT VERTICAL +0x82 0x250C # BOX DRAWINGS LIGHT DOWN AND RIGHT +0x83 0x2510 # BOX DRAWINGS LIGHT DOWN AND LEFT +0x84 0x2514 # BOX DRAWINGS LIGHT UP AND RIGHT +0x85 0x2518 # BOX DRAWINGS LIGHT UP AND LEFT +0x86 0x251C # BOX DRAWINGS LIGHT VERTICAL AND RIGHT +0x87 0x2524 # BOX DRAWINGS LIGHT VERTICAL AND LEFT +0x88 0x252C # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL +0x89 0x2534 # BOX DRAWINGS LIGHT UP AND HORIZONTAL +0x8A 0x253C # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL +0x8B 0x2580 # UPPER HALF BLOCK +0x8C 0x2584 # LOWER HALF BLOCK +0x8D 0x2588 # FULL BLOCK +0x8E 0x258C # LEFT HALF BLOCK +0x8F 0x2590 # RIGHT HALF BLOCK +0x90 0x2591 # LIGHT SHADE +0x91 0x2592 # MEDIUM SHADE +0x92 0x2593 # DARK SHADE +0x93 0x2320 # TOP HALF INTEGRAL +0x94 0x25A0 # BLACK SQUARE +0x95 0x2219 # BULLET OPERATOR +0x96 0x221A # SQUARE ROOT +0x97 0x2248 # ALMOST EQUAL TO +0x98 0x2264 # LESS-THAN OR EQUAL TO +0x99 0x2265 # GREATER-THAN OR EQUAL TO +0x9A 0x00A0 # NO-BREAK SPACE +0x9B 0x2321 # BOTTOM HALF INTEGRAL +0x9C 0x00B0 # DEGREE SIGN +0x9D 0x00B2 # SUPERSCRIPT TWO +0x9E 0x00B7 # MIDDLE DOT +0x9F 0x00F7 # DIVISION SIGN +0xA0 0x2550 # BOX DRAWINGS DOUBLE HORIZONTAL +0xA1 0x2551 # BOX DRAWINGS DOUBLE VERTICAL +0xA2 0x2552 # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE +0xA3 0x0451 # CYRILLIC SMALL LETTER IO +#0xA4 0x2553 # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE +0xA4 0x0454 # CYRILLIC SMALL LETTER UKRAINIAN IE +0xA5 0x2554 # BOX DRAWINGS DOUBLE DOWN AND RIGHT +#0xA6 0x2555 # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE +0xA6 0x0456 # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I +#0xA7 0x2556 # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE +0xA7 0x0457 # CYRILLIC SMALL LETTER YI (UKRAINIAN) +0xA8 0x2557 # BOX DRAWINGS DOUBLE DOWN AND LEFT +0xA9 0x2558 # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE +0xAA 0x2559 # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE +0xAB 0x255A # BOX DRAWINGS DOUBLE UP AND RIGHT +0xAC 0x255B # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE +#0xAD 0x255C # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE +0xAD 0x0491 # CYRILLIC SMALL LETTER UKRAINIAN GHE WITH UPTURN +0xAE 0x255D # BOX DRAWINGS DOUBLE UP AND LEFT +0xAF 0x255E # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE +0xB0 0x255F # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE +0xB1 0x2560 # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT +0xB2 0x2561 # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE +0xB3 0x0401 # CYRILLIC CAPITAL LETTER IO +#0xB4 0x2562 # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE +0xB4 0x0404 # CYRILLIC CAPITAL LETTER UKRAINIAN IE +0xB5 0x2563 # BOX DRAWINGS DOUBLE VERTICAL AND LEFT +#0xB6 0x2564 # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE +0xB6 0x0406 # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I +#0xB7 0x2565 # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE +0xB7 0x0407 # CYRILLIC CAPITAL LETTER YI (UKRAINIAN) +0xB8 0x2566 # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL +0xB9 0x2567 # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE +0xBA 0x2568 # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE +0xBB 0x2569 # BOX DRAWINGS DOUBLE UP AND HORIZONTAL +0xBC 0x256A # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE +#0xBD 0x256B # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE +0xBD 0x0490 # CYRILLIC CAPITAL LETTER UKRAINIAN GHE WITH UPTURN +0xBE 0x256C # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL +0xBF 0x00A9 # COPYRIGHT SIGN +0xC0 0x044E # CYRILLIC SMALL LETTER YU +0xC1 0x0430 # CYRILLIC SMALL LETTER A +0xC2 0x0431 # CYRILLIC SMALL LETTER BE +0xC3 0x0446 # CYRILLIC SMALL LETTER TSE +0xC4 0x0434 # CYRILLIC SMALL LETTER DE +0xC5 0x0435 # CYRILLIC SMALL LETTER IE +0xC6 0x0444 # CYRILLIC SMALL LETTER EF +0xC7 0x0433 # CYRILLIC SMALL LETTER GHE +0xC8 0x0445 # CYRILLIC SMALL LETTER HA +0xC9 0x0438 # CYRILLIC SMALL LETTER I +0xCA 0x0439 # CYRILLIC SMALL LETTER SHORT I +0xCB 0x043A # CYRILLIC SMALL LETTER KA +0xCC 0x043B # CYRILLIC SMALL LETTER EL +0xCD 0x043C # CYRILLIC SMALL LETTER EM +0xCE 0x043D # CYRILLIC SMALL LETTER EN +0xCF 0x043E # CYRILLIC SMALL LETTER O +0xD0 0x043F # CYRILLIC SMALL LETTER PE +0xD1 0x044F # CYRILLIC SMALL LETTER YA +0xD2 0x0440 # CYRILLIC SMALL LETTER ER +0xD3 0x0441 # CYRILLIC SMALL LETTER ES +0xD4 0x0442 # CYRILLIC SMALL LETTER TE +0xD5 0x0443 # CYRILLIC SMALL LETTER U +0xD6 0x0436 # CYRILLIC SMALL LETTER ZHE +0xD7 0x0432 # CYRILLIC SMALL LETTER VE +0xD8 0x044C # CYRILLIC SMALL LETTER SOFT SIGN +0xD9 0x044B # CYRILLIC SMALL LETTER YERU +0xDA 0x0437 # CYRILLIC SMALL LETTER ZE +0xDB 0x0448 # CYRILLIC SMALL LETTER SHA +0xDC 0x044D # CYRILLIC SMALL LETTER E +0xDD 0x0449 # CYRILLIC SMALL LETTER SHCHA +0xDE 0x0447 # CYRILLIC SMALL LETTER CHE +0xDF 0x044A # CYRILLIC SMALL LETTER HARD SIGN +0xE0 0x042E # CYRILLIC CAPITAL LETTER YU +0xE1 0x0410 # CYRILLIC CAPITAL LETTER A +0xE2 0x0411 # CYRILLIC CAPITAL LETTER BE +0xE3 0x0426 # CYRILLIC CAPITAL LETTER TSE +0xE4 0x0414 # CYRILLIC CAPITAL LETTER DE +0xE5 0x0415 # CYRILLIC CAPITAL LETTER IE +0xE6 0x0424 # CYRILLIC CAPITAL LETTER EF +0xE7 0x0413 # CYRILLIC CAPITAL LETTER GHE +0xE8 0x0425 # CYRILLIC CAPITAL LETTER HA +0xE9 0x0418 # CYRILLIC CAPITAL LETTER I +0xEA 0x0419 # CYRILLIC CAPITAL LETTER SHORT I +0xEB 0x041A # CYRILLIC CAPITAL LETTER KA +0xEC 0x041B # CYRILLIC CAPITAL LETTER EL +0xED 0x041C # CYRILLIC CAPITAL LETTER EM +0xEE 0x041D # CYRILLIC CAPITAL LETTER EN +0xEF 0x041E # CYRILLIC CAPITAL LETTER O +0xF0 0x041F # CYRILLIC CAPITAL LETTER PE +0xF1 0x042F # CYRILLIC CAPITAL LETTER YA +0xF2 0x0420 # CYRILLIC CAPITAL LETTER ER +0xF3 0x0421 # CYRILLIC CAPITAL LETTER ES +0xF4 0x0422 # CYRILLIC CAPITAL LETTER TE +0xF5 0x0423 # CYRILLIC CAPITAL LETTER U +0xF6 0x0416 # CYRILLIC CAPITAL LETTER ZHE +0xF7 0x0412 # CYRILLIC CAPITAL LETTER VE +0xF8 0x042C # CYRILLIC CAPITAL LETTER SOFT SIGN +0xF9 0x042B # CYRILLIC CAPITAL LETTER YERU +0xFA 0x0417 # CYRILLIC CAPITAL LETTER ZE +0xFB 0x0428 # CYRILLIC CAPITAL LETTER SHA +0xFC 0x042D # CYRILLIC CAPITAL LETTER E +0xFD 0x0429 # CYRILLIC CAPITAL LETTER SHCHA +0xFE 0x0427 # CYRILLIC CAPITAL LETTER CHE +0xFF 0x042A # CYRILLIC CAPITAL LETTER HARD SIGN diff --git a/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/python-mappings/TIS-620.TXT b/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/python-mappings/TIS-620.TXT new file mode 100644 index 0000000000..05173e9720 --- /dev/null +++ b/AppPkg/Applications/Python/Python-2.7.2/Tools/unicode/python-mappings/TIS-620.TXT @@ -0,0 +1,284 @@ +# +# Name: TIS-620 +# Unicode version: 3.2 +# Table version: 1.0 +# Table format: Format A +# Date: 2005-10-25 +# Authors: Marc-Andre Lemburg +# +# According to +# ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-11.TXT the +# TIS-620 is the identical to ISO_8859-11 with the 0xA0 +# (no-break space) mapping removed. +# +# (c) Copyright Marc-Andre Lemburg, 2005. +# Licensed to PSF under a Contributor Agreement. +# +# Based on the file +# ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-11.TXT +# which is: +# +# Copyright (c) 2002 Unicode, Inc. All Rights reserved. +# +# This file is provided as-is by Unicode, Inc. (The Unicode Consortium). +# No claims are made as to fitness for any particular purpose. No +# warranties of any kind are expressed or implied. The recipient +# agrees to determine applicability of information provided. If this +# file has been provided on optical media by Unicode, Inc., the sole +# remedy for any claim will be exchange of defective media within 90 +# days of receipt. +# +# Unicode, Inc. hereby grants the right to freely use the information +# supplied in this file in the creation of products supporting the +# Unicode Standard, and to make copies of this file in any form for +# internal or external distribution as long as this notice remains +# attached. +# +0x00 0x0000 # NULL +0x01 0x0001 # START OF HEADING +0x02 0x0002 # START OF TEXT +0x03 0x0003 # END OF TEXT +0x04 0x0004 # END OF TRANSMISSION +0x05 0x0005 # ENQUIRY +0x06 0x0006 # ACKNOWLEDGE +0x07 0x0007 # BELL +0x08 0x0008 # BACKSPACE +0x09 0x0009 # HORIZONTAL TABULATION +0x0A 0x000A # LINE FEED +0x0B 0x000B # VERTICAL TABULATION +0x0C 0x000C # FORM FEED +0x0D 0x000D # CARRIAGE RETURN +0x0E 0x000E # SHIFT OUT +0x0F 0x000F # SHIFT IN +0x10 0x0010 # DATA LINK ESCAPE +0x11 0x0011 # DEVICE CONTROL ONE +0x12 0x0012 # DEVICE CONTROL TWO +0x13 0x0013 # DEVICE CONTROL THREE +0x14 0x0014 # DEVICE CONTROL FOUR +0x15 0x0015 # NEGATIVE ACKNOWLEDGE +0x16 0x0016 # SYNCHRONOUS IDLE +0x17 0x0017 # END OF TRANSMISSION BLOCK +0x18 0x0018 # CANCEL +0x19 0x0019 # END OF MEDIUM +0x1A 0x001A # SUBSTITUTE +0x1B 0x001B # ESCAPE +0x1C 0x001C # FILE SEPARATOR +0x1D 0x001D # GROUP SEPARATOR +0x1E 0x001E # RECORD SEPARATOR +0x1F 0x001F # UNIT SEPARATOR +0x20 0x0020 # SPACE +0x21 0x0021 # EXCLAMATION MARK +0x22 0x0022 # QUOTATION MARK +0x23 0x0023 # NUMBER SIGN +0x24 0x0024 # DOLLAR SIGN +0x25 0x0025 # PERCENT SIGN +0x26 0x0026 # AMPERSAND +0x27 0x0027 # APOSTROPHE +0x28 0x0028 # LEFT PARENTHESIS +0x29 0x0029 # RIGHT PARENTHESIS +0x2A 0x002A # ASTERISK +0x2B 0x002B # PLUS SIGN +0x2C 0x002C # COMMA +0x2D 0x002D # HYPHEN-MINUS +0x2E 0x002E # FULL STOP +0x2F 0x002F # SOLIDUS +0x30 0x0030 # DIGIT ZERO +0x31 0x0031 # DIGIT ONE +0x32 0x0032 # DIGIT TWO +0x33 0x0033 # DIGIT THREE +0x34 0x0034 # DIGIT FOUR +0x35 0x0035 # DIGIT FIVE +0x36 0x0036 # DIGIT SIX +0x37 0x0037 # DIGIT SEVEN +0x38 0x0038 # DIGIT EIGHT +0x39 0x0039 # DIGIT NINE +0x3A 0x003A # COLON +0x3B 0x003B # SEMICOLON +0x3C 0x003C # LESS-THAN SIGN +0x3D 0x003D # EQUALS SIGN +0x3E 0x003E # GREATER-THAN SIGN +0x3F 0x003F # QUESTION MARK +0x40 0x0040 # COMMERCIAL AT +0x41 0x0041 # LATIN CAPITAL LETTER A +0x42 0x0042 # LATIN CAPITAL LETTER B +0x43 0x0043 # LATIN CAPITAL LETTER C +0x44 0x0044 # LATIN CAPITAL LETTER D +0x45 0x0045 # LATIN CAPITAL LETTER E +0x46 0x0046 # LATIN CAPITAL LETTER F +0x47 0x0047 # LATIN CAPITAL LETTER G +0x48 0x0048 # LATIN CAPITAL LETTER H +0x49 0x0049 # LATIN CAPITAL LETTER I +0x4A 0x004A # LATIN CAPITAL LETTER J +0x4B 0x004B # LATIN CAPITAL LETTER K +0x4C 0x004C # LATIN CAPITAL LETTER L +0x4D 0x004D # LATIN CAPITAL LETTER M +0x4E 0x004E # LATIN CAPITAL LETTER N +0x4F 0x004F # LATIN CAPITAL LETTER O +0x50 0x0050 # LATIN CAPITAL LETTER P +0x51 0x0051 # LATIN CAPITAL LETTER Q +0x52 0x0052 # LATIN CAPITAL LETTER R +0x53 0x0053 # LATIN CAPITAL LETTER S +0x54 0x0054 # LATIN CAPITAL LETTER T +0x55 0x0055 # LATIN CAPITAL LETTER U +0x56 0x0056 # LATIN CAPITAL LETTER V +0x57 0x0057 # LATIN CAPITAL LETTER W +0x58 0x0058 # LATIN CAPITAL LETTER X +0x59 0x0059 # LATIN CAPITAL LETTER Y +0x5A 0x005A # LATIN CAPITAL LETTER Z +0x5B 0x005B # LEFT SQUARE BRACKET +0x5C 0x005C # REVERSE SOLIDUS +0x5D 0x005D # RIGHT SQUARE BRACKET +0x5E 0x005E # CIRCUMFLEX ACCENT +0x5F 0x005F # LOW LINE +0x60 0x0060 # GRAVE ACCENT +0x61 0x0061 # LATIN SMALL LETTER A +0x62 0x0062 # LATIN SMALL LETTER B +0x63 0x0063 # LATIN SMALL LETTER C +0x64 0x0064 # LATIN SMALL LETTER D +0x65 0x0065 # LATIN SMALL LETTER E +0x66 0x0066 # LATIN SMALL LETTER F +0x67 0x0067 # LATIN SMALL LETTER G +0x68 0x0068 # LATIN SMALL LETTER H +0x69 0x0069 # LATIN SMALL LETTER I +0x6A 0x006A # LATIN SMALL LETTER J +0x6B 0x006B # LATIN SMALL LETTER K +0x6C 0x006C # LATIN SMALL LETTER L +0x6D 0x006D # LATIN SMALL LETTER M +0x6E 0x006E # LATIN SMALL LETTER N +0x6F 0x006F # LATIN SMALL LETTER O +0x70 0x0070 # LATIN SMALL LETTER P +0x71 0x0071 # LATIN SMALL LETTER Q +0x72 0x0072 # LATIN SMALL LETTER R +0x73 0x0073 # LATIN SMALL LETTER S +0x74 0x0074 # LATIN SMALL LETTER T +0x75 0x0075 # LATIN SMALL LETTER U +0x76 0x0076 # LATIN SMALL LETTER V +0x77 0x0077 # LATIN SMALL LETTER W +0x78 0x0078 # LATIN SMALL LETTER X +0x79 0x0079 # LATIN SMALL LETTER Y +0x7A 0x007A # LATIN SMALL LETTER Z +0x7B 0x007B # LEFT CURLY BRACKET +0x7C 0x007C # VERTICAL LINE +0x7D 0x007D # RIGHT CURLY BRACKET +0x7E 0x007E # TILDE +0x7F 0x007F # DELETE +0x80 0x0080 # +0x81 0x0081 # +0x82 0x0082 # +0x83 0x0083 # +0x84 0x0084 # +0x85 0x0085 # +0x86 0x0086 # +0x87 0x0087 # +0x88 0x0088 # +0x89 0x0089 # +0x8A 0x008A # +0x8B 0x008B # +0x8C 0x008C # +0x8D 0x008D # +0x8E 0x008E # +0x8F 0x008F # +0x90 0x0090 # +0x91 0x0091 # +0x92 0x0092 # +0x93 0x0093 # +0x94 0x0094 # +0x95 0x0095 # +0x96 0x0096 # +0x97 0x0097 # +0x98 0x0098 # +0x99 0x0099 # +0x9A 0x009A # +0x9B 0x009B # +0x9C 0x009C # +0x9D 0x009D # +0x9E 0x009E # +0x9F 0x009F # +#0xA0 0x00A0 # NO-BREAK SPACE +0xA1 0x0E01 # THAI CHARACTER KO KAI +0xA2 0x0E02 # THAI CHARACTER KHO KHAI +0xA3 0x0E03 # THAI CHARACTER KHO KHUAT +0xA4 0x0E04 # THAI CHARACTER KHO KHWAI +0xA5 0x0E05 # THAI CHARACTER KHO KHON +0xA6 0x0E06 # THAI CHARACTER KHO RAKHANG +0xA7 0x0E07 # THAI CHARACTER NGO NGU +0xA8 0x0E08 # THAI CHARACTER CHO CHAN +0xA9 0x0E09 # THAI CHARACTER CHO CHING +0xAA 0x0E0A # THAI CHARACTER CHO CHANG +0xAB 0x0E0B # THAI CHARACTER SO SO +0xAC 0x0E0C # THAI CHARACTER CHO CHOE +0xAD 0x0E0D # THAI CHARACTER YO YING +0xAE 0x0E0E # THAI CHARACTER DO CHADA +0xAF 0x0E0F # THAI CHARACTER TO PATAK +0xB0 0x0E10 # THAI CHARACTER THO THAN +0xB1 0x0E11 # THAI CHARACTER THO NANGMONTHO +0xB2 0x0E12 # THAI CHARACTER THO PHUTHAO +0xB3 0x0E13 # THAI CHARACTER NO NEN +0xB4 0x0E14 # THAI CHARACTER DO DEK +0xB5 0x0E15 # THAI CHARACTER TO TAO +0xB6 0x0E16 # THAI CHARACTER THO THUNG +0xB7 0x0E17 # THAI CHARACTER THO THAHAN +0xB8 0x0E18 # THAI CHARACTER THO THONG +0xB9 0x0E19 # THAI CHARACTER NO NU +0xBA 0x0E1A # THAI CHARACTER BO BAIMAI +0xBB 0x0E1B # THAI CHARACTER PO PLA +0xBC 0x0E1C # THAI CHARACTER PHO PHUNG +0xBD 0x0E1D # THAI CHARACTER FO FA +0xBE 0x0E1E # THAI CHARACTER PHO PHAN +0xBF 0x0E1F # THAI CHARACTER FO FAN +0xC0 0x0E20 # THAI CHARACTER PHO SAMPHAO +0xC1 0x0E21 # THAI CHARACTER MO MA +0xC2 0x0E22 # THAI CHARACTER YO YAK +0xC3 0x0E23 # THAI CHARACTER RO RUA +0xC4 0x0E24 # THAI CHARACTER RU +0xC5 0x0E25 # THAI CHARACTER LO LING +0xC6 0x0E26 # THAI CHARACTER LU +0xC7 0x0E27 # THAI CHARACTER WO WAEN +0xC8 0x0E28 # THAI CHARACTER SO SALA +0xC9 0x0E29 # THAI CHARACTER SO RUSI +0xCA 0x0E2A # THAI CHARACTER SO SUA +0xCB 0x0E2B # THAI CHARACTER HO HIP +0xCC 0x0E2C # THAI CHARACTER LO CHULA +0xCD 0x0E2D # THAI CHARACTER O ANG +0xCE 0x0E2E # THAI CHARACTER HO NOKHUK +0xCF 0x0E2F # THAI CHARACTER PAIYANNOI +0xD0 0x0E30 # THAI CHARACTER SARA A +0xD1 0x0E31 # THAI CHARACTER MAI HAN-AKAT +0xD2 0x0E32 # THAI CHARACTER SARA AA +0xD3 0x0E33 # THAI CHARACTER SARA AM +0xD4 0x0E34 # THAI CHARACTER SARA I +0xD5 0x0E35 # THAI CHARACTER SARA II +0xD6 0x0E36 # THAI CHARACTER SARA UE +0xD7 0x0E37 # THAI CHARACTER SARA UEE +0xD8 0x0E38 # THAI CHARACTER SARA U +0xD9 0x0E39 # THAI CHARACTER SARA UU +0xDA 0x0E3A # THAI CHARACTER PHINTHU +0xDF 0x0E3F # THAI CURRENCY SYMBOL BAHT +0xE0 0x0E40 # THAI CHARACTER SARA E +0xE1 0x0E41 # THAI CHARACTER SARA AE +0xE2 0x0E42 # THAI CHARACTER SARA O +0xE3 0x0E43 # THAI CHARACTER SARA AI MAIMUAN +0xE4 0x0E44 # THAI CHARACTER SARA AI MAIMALAI +0xE5 0x0E45 # THAI CHARACTER LAKKHANGYAO +0xE6 0x0E46 # THAI CHARACTER MAIYAMOK +0xE7 0x0E47 # THAI CHARACTER MAITAIKHU +0xE8 0x0E48 # THAI CHARACTER MAI EK +0xE9 0x0E49 # THAI CHARACTER MAI THO +0xEA 0x0E4A # THAI CHARACTER MAI TRI +0xEB 0x0E4B # THAI CHARACTER MAI CHATTAWA +0xEC 0x0E4C # THAI CHARACTER THANTHAKHAT +0xED 0x0E4D # THAI CHARACTER NIKHAHIT +0xEE 0x0E4E # THAI CHARACTER YAMAKKAN +0xEF 0x0E4F # THAI CHARACTER FONGMAN +0xF0 0x0E50 # THAI DIGIT ZERO +0xF1 0x0E51 # THAI DIGIT ONE +0xF2 0x0E52 # THAI DIGIT TWO +0xF3 0x0E53 # THAI DIGIT THREE +0xF4 0x0E54 # THAI DIGIT FOUR +0xF5 0x0E55 # THAI DIGIT FIVE +0xF6 0x0E56 # THAI DIGIT SIX +0xF7 0x0E57 # THAI DIGIT SEVEN +0xF8 0x0E58 # THAI DIGIT EIGHT +0xF9 0x0E59 # THAI DIGIT NINE +0xFA 0x0E5A # THAI CHARACTER ANGKHANKHU +0xFB 0x0E5B # THAI CHARACTER KHOMUT -- cgit v1.2.3