From 4710c53dcad1ebf3755f3efb9e80ac24bd72a9b2 Mon Sep 17 00:00:00 2001 From: darylm503 Date: Mon, 16 Apr 2012 22:12:42 +0000 Subject: AppPkg/Applications/Python: Add Python 2.7.2 sources since the release of Python 2.7.3 made them unavailable from the python.org web site. These files are a subset of the python-2.7.2.tgz distribution from python.org. Changed files from PyMod-2.7.2 have been copied into the corresponding directories of this tree, replacing the original files in the distribution. Signed-off-by: daryl.mcdaniel@intel.com git-svn-id: https://edk2.svn.sourceforge.net/svnroot/edk2/trunk/edk2@13197 6f19259b-4bc3-4df7-8a09-765794883524 --- .../Python/Python-2.7.2/Parser/Python.asdl | 115 ++ .../Python/Python-2.7.2/Parser/acceler.c | 125 ++ .../Python/Python-2.7.2/Parser/asdl.py | 413 +++++ .../Python/Python-2.7.2/Parser/asdl_c.py | 1220 ++++++++++++++ .../Python/Python-2.7.2/Parser/bitset.c | 66 + .../Python/Python-2.7.2/Parser/firstsets.c | 113 ++ .../Python/Python-2.7.2/Parser/grammar.c | 254 +++ .../Python/Python-2.7.2/Parser/grammar1.c | 57 + .../Python/Python-2.7.2/Parser/intrcheck.c | 178 ++ .../Python/Python-2.7.2/Parser/listnode.c | 66 + .../Python/Python-2.7.2/Parser/metagrammar.c | 159 ++ .../Python/Python-2.7.2/Parser/myreadline.c | 221 +++ .../Applications/Python/Python-2.7.2/Parser/node.c | 138 ++ .../Python/Python-2.7.2/Parser/parser.c | 436 +++++ .../Python/Python-2.7.2/Parser/parser.h | 42 + .../Python/Python-2.7.2/Parser/parsetok.c | 283 ++++ .../Applications/Python/Python-2.7.2/Parser/pgen.c | 708 ++++++++ .../Python/Python-2.7.2/Parser/pgenmain.c | 173 ++ .../Python/Python-2.7.2/Parser/printgrammar.c | 117 ++ .../Python/Python-2.7.2/Parser/spark.py | 839 ++++++++++ .../Python/Python-2.7.2/Parser/tokenizer.c | 1726 ++++++++++++++++++++ .../Python/Python-2.7.2/Parser/tokenizer.h | 70 + .../Python/Python-2.7.2/Parser/tokenizer_pgen.c | 2 + 23 files changed, 7521 insertions(+) create mode 100644 AppPkg/Applications/Python/Python-2.7.2/Parser/Python.asdl create mode 100644 AppPkg/Applications/Python/Python-2.7.2/Parser/acceler.c create mode 100644 AppPkg/Applications/Python/Python-2.7.2/Parser/asdl.py create mode 100644 AppPkg/Applications/Python/Python-2.7.2/Parser/asdl_c.py create mode 100644 AppPkg/Applications/Python/Python-2.7.2/Parser/bitset.c create mode 100644 AppPkg/Applications/Python/Python-2.7.2/Parser/firstsets.c create mode 100644 AppPkg/Applications/Python/Python-2.7.2/Parser/grammar.c create mode 100644 AppPkg/Applications/Python/Python-2.7.2/Parser/grammar1.c create mode 100644 AppPkg/Applications/Python/Python-2.7.2/Parser/intrcheck.c create mode 100644 AppPkg/Applications/Python/Python-2.7.2/Parser/listnode.c create mode 100644 AppPkg/Applications/Python/Python-2.7.2/Parser/metagrammar.c create mode 100644 AppPkg/Applications/Python/Python-2.7.2/Parser/myreadline.c create mode 100644 AppPkg/Applications/Python/Python-2.7.2/Parser/node.c create mode 100644 AppPkg/Applications/Python/Python-2.7.2/Parser/parser.c create mode 100644 AppPkg/Applications/Python/Python-2.7.2/Parser/parser.h create mode 100644 AppPkg/Applications/Python/Python-2.7.2/Parser/parsetok.c create mode 100644 AppPkg/Applications/Python/Python-2.7.2/Parser/pgen.c create mode 100644 AppPkg/Applications/Python/Python-2.7.2/Parser/pgenmain.c create mode 100644 AppPkg/Applications/Python/Python-2.7.2/Parser/printgrammar.c create mode 100644 AppPkg/Applications/Python/Python-2.7.2/Parser/spark.py create mode 100644 AppPkg/Applications/Python/Python-2.7.2/Parser/tokenizer.c create mode 100644 AppPkg/Applications/Python/Python-2.7.2/Parser/tokenizer.h create mode 100644 AppPkg/Applications/Python/Python-2.7.2/Parser/tokenizer_pgen.c (limited to 'AppPkg/Applications/Python/Python-2.7.2/Parser') diff --git a/AppPkg/Applications/Python/Python-2.7.2/Parser/Python.asdl b/AppPkg/Applications/Python/Python-2.7.2/Parser/Python.asdl new file mode 100644 index 0000000000..9e7c1f5c2b --- /dev/null +++ b/AppPkg/Applications/Python/Python-2.7.2/Parser/Python.asdl @@ -0,0 +1,115 @@ +-- ASDL's five builtin types are identifier, int, string, object, bool + +module Python version "$Revision$" +{ + mod = Module(stmt* body) + | Interactive(stmt* body) + | Expression(expr body) + + -- not really an actual node but useful in Jython's typesystem. + | Suite(stmt* body) + + stmt = FunctionDef(identifier name, arguments args, + stmt* body, expr* decorator_list) + | ClassDef(identifier name, expr* bases, stmt* body, expr* decorator_list) + | Return(expr? value) + + | Delete(expr* targets) + | Assign(expr* targets, expr value) + | AugAssign(expr target, operator op, expr value) + + -- not sure if bool is allowed, can always use int + | Print(expr? dest, expr* values, bool nl) + + -- use 'orelse' because else is a keyword in target languages + | For(expr target, expr iter, stmt* body, stmt* orelse) + | While(expr test, stmt* body, stmt* orelse) + | If(expr test, stmt* body, stmt* orelse) + | With(expr context_expr, expr? optional_vars, stmt* body) + + -- 'type' is a bad name + | Raise(expr? type, expr? inst, expr? tback) + | TryExcept(stmt* body, excepthandler* handlers, stmt* orelse) + | TryFinally(stmt* body, stmt* finalbody) + | Assert(expr test, expr? msg) + + | Import(alias* names) + | ImportFrom(identifier? module, alias* names, int? level) + + -- Doesn't capture requirement that locals must be + -- defined if globals is + -- still supports use as a function! + | Exec(expr body, expr? globals, expr? locals) + + | Global(identifier* names) + | Expr(expr value) + | Pass | Break | Continue + + -- XXX Jython will be different + -- col_offset is the byte offset in the utf8 string the parser uses + attributes (int lineno, int col_offset) + + -- BoolOp() can use left & right? + expr = BoolOp(boolop op, expr* values) + | BinOp(expr left, operator op, expr right) + | UnaryOp(unaryop op, expr operand) + | Lambda(arguments args, expr body) + | IfExp(expr test, expr body, expr orelse) + | Dict(expr* keys, expr* values) + | Set(expr* elts) + | ListComp(expr elt, comprehension* generators) + | SetComp(expr elt, comprehension* generators) + | DictComp(expr key, expr value, comprehension* generators) + | GeneratorExp(expr elt, comprehension* generators) + -- the grammar constrains where yield expressions can occur + | Yield(expr? value) + -- need sequences for compare to distinguish between + -- x < 4 < 3 and (x < 4) < 3 + | Compare(expr left, cmpop* ops, expr* comparators) + | Call(expr func, expr* args, keyword* keywords, + expr? starargs, expr? kwargs) + | Repr(expr value) + | Num(object n) -- a number as a PyObject. + | Str(string s) -- need to specify raw, unicode, etc? + -- other literals? bools? + + -- the following expression can appear in assignment context + | Attribute(expr value, identifier attr, expr_context ctx) + | Subscript(expr value, slice slice, expr_context ctx) + | Name(identifier id, expr_context ctx) + | List(expr* elts, expr_context ctx) + | Tuple(expr* elts, expr_context ctx) + + -- col_offset is the byte offset in the utf8 string the parser uses + attributes (int lineno, int col_offset) + + expr_context = Load | Store | Del | AugLoad | AugStore | Param + + slice = Ellipsis | Slice(expr? lower, expr? upper, expr? step) + | ExtSlice(slice* dims) + | Index(expr value) + + boolop = And | Or + + operator = Add | Sub | Mult | Div | Mod | Pow | LShift + | RShift | BitOr | BitXor | BitAnd | FloorDiv + + unaryop = Invert | Not | UAdd | USub + + cmpop = Eq | NotEq | Lt | LtE | Gt | GtE | Is | IsNot | In | NotIn + + comprehension = (expr target, expr iter, expr* ifs) + + -- not sure what to call the first argument for raise and except + excepthandler = ExceptHandler(expr? type, expr? name, stmt* body) + attributes (int lineno, int col_offset) + + arguments = (expr* args, identifier? vararg, + identifier? kwarg, expr* defaults) + + -- keyword arguments supplied to call + keyword = (identifier arg, expr value) + + -- import name with optional 'as' alias. + alias = (identifier name, identifier? asname) +} diff --git a/AppPkg/Applications/Python/Python-2.7.2/Parser/acceler.c b/AppPkg/Applications/Python/Python-2.7.2/Parser/acceler.c new file mode 100644 index 0000000000..f6036d9739 --- /dev/null +++ b/AppPkg/Applications/Python/Python-2.7.2/Parser/acceler.c @@ -0,0 +1,125 @@ + +/* Parser accelerator module */ + +/* The parser as originally conceived had disappointing performance. + This module does some precomputation that speeds up the selection + of a DFA based upon a token, turning a search through an array + into a simple indexing operation. The parser now cannot work + without the accelerators installed. Note that the accelerators + are installed dynamically when the parser is initialized, they + are not part of the static data structure written on graminit.[ch] + by the parser generator. */ + +#include "pgenheaders.h" +#include "grammar.h" +#include "node.h" +#include "token.h" +#include "parser.h" + +/* Forward references */ +static void fixdfa(grammar *, dfa *); +static void fixstate(grammar *, state *); + +void +PyGrammar_AddAccelerators(grammar *g) +{ + dfa *d; + int i; + d = g->g_dfa; + for (i = g->g_ndfas; --i >= 0; d++) + fixdfa(g, d); + g->g_accel = 1; +} + +void +PyGrammar_RemoveAccelerators(grammar *g) +{ + dfa *d; + int i; + g->g_accel = 0; + d = g->g_dfa; + for (i = g->g_ndfas; --i >= 0; d++) { + state *s; + int j; + s = d->d_state; + for (j = 0; j < d->d_nstates; j++, s++) { + if (s->s_accel) + PyObject_FREE(s->s_accel); + s->s_accel = NULL; + } + } +} + +static void +fixdfa(grammar *g, dfa *d) +{ + state *s; + int j; + s = d->d_state; + for (j = 0; j < d->d_nstates; j++, s++) + fixstate(g, s); +} + +static void +fixstate(grammar *g, state *s) +{ + arc *a; + int k; + int *accel; + int nl = g->g_ll.ll_nlabels; + s->s_accept = 0; + accel = (int *) PyObject_MALLOC(nl * sizeof(int)); + if (accel == NULL) { + fprintf(stderr, "no mem to build parser accelerators\n"); + exit(1); + } + for (k = 0; k < nl; k++) + accel[k] = -1; + a = s->s_arc; + for (k = s->s_narcs; --k >= 0; a++) { + int lbl = a->a_lbl; + label *l = &g->g_ll.ll_label[lbl]; + int type = l->lb_type; + if (a->a_arrow >= (1 << 7)) { + printf("XXX too many states!\n"); + continue; + } + if (ISNONTERMINAL(type)) { + dfa *d1 = PyGrammar_FindDFA(g, type); + int ibit; + if (type - NT_OFFSET >= (1 << 7)) { + printf("XXX too high nonterminal number!\n"); + continue; + } + for (ibit = 0; ibit < g->g_ll.ll_nlabels; ibit++) { + if (testbit(d1->d_first, ibit)) { + if (accel[ibit] != -1) + printf("XXX ambiguity!\n"); + accel[ibit] = a->a_arrow | (1 << 7) | + ((type - NT_OFFSET) << 8); + } + } + } + else if (lbl == EMPTY) + s->s_accept = 1; + else if (lbl >= 0 && lbl < nl) + accel[lbl] = a->a_arrow; + } + while (nl > 0 && accel[nl-1] == -1) + nl--; + for (k = 0; k < nl && accel[k] == -1;) + k++; + if (k < nl) { + int i; + s->s_accel = (int *) PyObject_MALLOC((nl-k) * sizeof(int)); + if (s->s_accel == NULL) { + fprintf(stderr, "no mem to add parser accelerators\n"); + exit(1); + } + s->s_lower = k; + s->s_upper = nl; + for (i = 0; k < nl; i++, k++) + s->s_accel[i] = accel[k]; + } + PyObject_FREE(accel); +} diff --git a/AppPkg/Applications/Python/Python-2.7.2/Parser/asdl.py b/AppPkg/Applications/Python/Python-2.7.2/Parser/asdl.py new file mode 100644 index 0000000000..7af84e89c9 --- /dev/null +++ b/AppPkg/Applications/Python/Python-2.7.2/Parser/asdl.py @@ -0,0 +1,413 @@ +"""An implementation of the Zephyr Abstract Syntax Definition Language. + +See http://asdl.sourceforge.net/ and +http://www.cs.princeton.edu/research/techreps/TR-554-97 + +Only supports top level module decl, not view. I'm guessing that view +is intended to support the browser and I'm not interested in the +browser. + +Changes for Python: Add support for module versions +""" + +import os +import traceback + +import spark + +class Token(object): + # spark seems to dispatch in the parser based on a token's + # type attribute + def __init__(self, type, lineno): + self.type = type + self.lineno = lineno + + def __str__(self): + return self.type + + def __repr__(self): + return str(self) + +class Id(Token): + def __init__(self, value, lineno): + self.type = 'Id' + self.value = value + self.lineno = lineno + + def __str__(self): + return self.value + +class String(Token): + def __init__(self, value, lineno): + self.type = 'String' + self.value = value + self.lineno = lineno + +class ASDLSyntaxError(Exception): + + def __init__(self, lineno, token=None, msg=None): + self.lineno = lineno + self.token = token + self.msg = msg + + def __str__(self): + if self.msg is None: + return "Error at '%s', line %d" % (self.token, self.lineno) + else: + return "%s, line %d" % (self.msg, self.lineno) + +class ASDLScanner(spark.GenericScanner, object): + + def tokenize(self, input): + self.rv = [] + self.lineno = 1 + super(ASDLScanner, self).tokenize(input) + return self.rv + + def t_id(self, s): + r"[\w\.]+" + # XXX doesn't distinguish upper vs. lower, which is + # significant for ASDL. + self.rv.append(Id(s, self.lineno)) + + def t_string(self, s): + r'"[^"]*"' + self.rv.append(String(s, self.lineno)) + + def t_xxx(self, s): # not sure what this production means + r"<=" + self.rv.append(Token(s, self.lineno)) + + def t_punctuation(self, s): + r"[\{\}\*\=\|\(\)\,\?\:]" + self.rv.append(Token(s, self.lineno)) + + def t_comment(self, s): + r"\-\-[^\n]*" + pass + + def t_newline(self, s): + r"\n" + self.lineno += 1 + + def t_whitespace(self, s): + r"[ \t]+" + pass + + def t_default(self, s): + r" . +" + raise ValueError, "unmatched input: %s" % `s` + +class ASDLParser(spark.GenericParser, object): + def __init__(self): + super(ASDLParser, self).__init__("module") + + def typestring(self, tok): + return tok.type + + def error(self, tok): + raise ASDLSyntaxError(tok.lineno, tok) + + def p_module_0(self, (module, name, version, _0, _1)): + " module ::= Id Id version { } " + if module.value != "module": + raise ASDLSyntaxError(module.lineno, + msg="expected 'module', found %s" % module) + return Module(name, None, version) + + def p_module(self, (module, name, version, _0, definitions, _1)): + " module ::= Id Id version { definitions } " + if module.value != "module": + raise ASDLSyntaxError(module.lineno, + msg="expected 'module', found %s" % module) + return Module(name, definitions, version) + + def p_version(self, (version, V)): + "version ::= Id String" + if version.value != "version": + raise ASDLSyntaxError(version.lineno, + msg="expected 'version', found %" % version) + return V + + def p_definition_0(self, (definition,)): + " definitions ::= definition " + return definition + + def p_definition_1(self, (definitions, definition)): + " definitions ::= definition definitions " + return definitions + definition + + def p_definition(self, (id, _, type)): + " definition ::= Id = type " + return [Type(id, type)] + + def p_type_0(self, (product,)): + " type ::= product " + return product + + def p_type_1(self, (sum,)): + " type ::= sum " + return Sum(sum) + + def p_type_2(self, (sum, id, _0, attributes, _1)): + " type ::= sum Id ( fields ) " + if id.value != "attributes": + raise ASDLSyntaxError(id.lineno, + msg="expected attributes, found %s" % id) + if attributes: + attributes.reverse() + return Sum(sum, attributes) + + def p_product(self, (_0, fields, _1)): + " product ::= ( fields ) " + # XXX can't I just construct things in the right order? + fields.reverse() + return Product(fields) + + def p_sum_0(self, (constructor,)): + " sum ::= constructor " + return [constructor] + + def p_sum_1(self, (constructor, _, sum)): + " sum ::= constructor | sum " + return [constructor] + sum + + def p_sum_2(self, (constructor, _, sum)): + " sum ::= constructor | sum " + return [constructor] + sum + + def p_constructor_0(self, (id,)): + " constructor ::= Id " + return Constructor(id) + + def p_constructor_1(self, (id, _0, fields, _1)): + " constructor ::= Id ( fields ) " + # XXX can't I just construct things in the right order? + fields.reverse() + return Constructor(id, fields) + + def p_fields_0(self, (field,)): + " fields ::= field " + return [field] + + def p_fields_1(self, (field, _, fields)): + " fields ::= field , fields " + return fields + [field] + + def p_field_0(self, (type,)): + " field ::= Id " + return Field(type) + + def p_field_1(self, (type, name)): + " field ::= Id Id " + return Field(type, name) + + def p_field_2(self, (type, _, name)): + " field ::= Id * Id " + return Field(type, name, seq=True) + + def p_field_3(self, (type, _, name)): + " field ::= Id ? Id " + return Field(type, name, opt=True) + + def p_field_4(self, (type, _)): + " field ::= Id * " + return Field(type, seq=True) + + def p_field_5(self, (type, _)): + " field ::= Id ? " + return Field(type, opt=True) + +builtin_types = ("identifier", "string", "int", "bool", "object") + +# below is a collection of classes to capture the AST of an AST :-) +# not sure if any of the methods are useful yet, but I'm adding them +# piecemeal as they seem helpful + +class AST(object): + pass # a marker class + +class Module(AST): + def __init__(self, name, dfns, version): + self.name = name + self.dfns = dfns + self.version = version + self.types = {} # maps type name to value (from dfns) + for type in dfns: + self.types[type.name.value] = type.value + + def __repr__(self): + return "Module(%s, %s)" % (self.name, self.dfns) + +class Type(AST): + def __init__(self, name, value): + self.name = name + self.value = value + + def __repr__(self): + return "Type(%s, %s)" % (self.name, self.value) + +class Constructor(AST): + def __init__(self, name, fields=None): + self.name = name + self.fields = fields or [] + + def __repr__(self): + return "Constructor(%s, %s)" % (self.name, self.fields) + +class Field(AST): + def __init__(self, type, name=None, seq=False, opt=False): + self.type = type + self.name = name + self.seq = seq + self.opt = opt + + def __repr__(self): + if self.seq: + extra = ", seq=True" + elif self.opt: + extra = ", opt=True" + else: + extra = "" + if self.name is None: + return "Field(%s%s)" % (self.type, extra) + else: + return "Field(%s, %s%s)" % (self.type, self.name, extra) + +class Sum(AST): + def __init__(self, types, attributes=None): + self.types = types + self.attributes = attributes or [] + + def __repr__(self): + if self.attributes is None: + return "Sum(%s)" % self.types + else: + return "Sum(%s, %s)" % (self.types, self.attributes) + +class Product(AST): + def __init__(self, fields): + self.fields = fields + + def __repr__(self): + return "Product(%s)" % self.fields + +class VisitorBase(object): + + def __init__(self, skip=False): + self.cache = {} + self.skip = skip + + def visit(self, object, *args): + meth = self._dispatch(object) + if meth is None: + return + try: + meth(object, *args) + except Exception, err: + print "Error visiting", repr(object) + print err + traceback.print_exc() + # XXX hack + if hasattr(self, 'file'): + self.file.flush() + os._exit(1) + + def _dispatch(self, object): + assert isinstance(object, AST), repr(object) + klass = object.__class__ + meth = self.cache.get(klass) + if meth is None: + methname = "visit" + klass.__name__ + if self.skip: + meth = getattr(self, methname, None) + else: + meth = getattr(self, methname) + self.cache[klass] = meth + return meth + +class Check(VisitorBase): + + def __init__(self): + super(Check, self).__init__(skip=True) + self.cons = {} + self.errors = 0 + self.types = {} + + def visitModule(self, mod): + for dfn in mod.dfns: + self.visit(dfn) + + def visitType(self, type): + self.visit(type.value, str(type.name)) + + def visitSum(self, sum, name): + for t in sum.types: + self.visit(t, name) + + def visitConstructor(self, cons, name): + key = str(cons.name) + conflict = self.cons.get(key) + if conflict is None: + self.cons[key] = name + else: + print "Redefinition of constructor %s" % key + print "Defined in %s and %s" % (conflict, name) + self.errors += 1 + for f in cons.fields: + self.visit(f, key) + + def visitField(self, field, name): + key = str(field.type) + l = self.types.setdefault(key, []) + l.append(name) + + def visitProduct(self, prod, name): + for f in prod.fields: + self.visit(f, name) + +def check(mod): + v = Check() + v.visit(mod) + + for t in v.types: + if t not in mod.types and not t in builtin_types: + v.errors += 1 + uses = ", ".join(v.types[t]) + print "Undefined type %s, used in %s" % (t, uses) + + return not v.errors + +def parse(file): + scanner = ASDLScanner() + parser = ASDLParser() + + buf = open(file).read() + tokens = scanner.tokenize(buf) + try: + return parser.parse(tokens) + except ASDLSyntaxError, err: + print err + lines = buf.split("\n") + print lines[err.lineno - 1] # lines starts at 0, files at 1 + +if __name__ == "__main__": + import glob + import sys + + if len(sys.argv) > 1: + files = sys.argv[1:] + else: + testdir = "tests" + files = glob.glob(testdir + "/*.asdl") + + for file in files: + print file + mod = parse(file) + print "module", mod.name + print len(mod.dfns), "definitions" + if not check(mod): + print "Check failed" + else: + for dfn in mod.dfns: + print dfn.type diff --git a/AppPkg/Applications/Python/Python-2.7.2/Parser/asdl_c.py b/AppPkg/Applications/Python/Python-2.7.2/Parser/asdl_c.py new file mode 100644 index 0000000000..421ee0bb81 --- /dev/null +++ b/AppPkg/Applications/Python/Python-2.7.2/Parser/asdl_c.py @@ -0,0 +1,1220 @@ +#! /usr/bin/env python +"""Generate C code from an ASDL description.""" + +# TO DO +# handle fields that have a type but no name + +import os, sys + +import asdl + +TABSIZE = 8 +MAX_COL = 80 + +def get_c_type(name): + """Return a string for the C name of the type. + + This function special cases the default types provided by asdl: + identifier, string, int, bool. + """ + # XXX ack! need to figure out where Id is useful and where string + if isinstance(name, asdl.Id): + name = name.value + if name in asdl.builtin_types: + return name + else: + return "%s_ty" % name + +def reflow_lines(s, depth): + """Reflow the line s indented depth tabs. + + Return a sequence of lines where no line extends beyond MAX_COL + when properly indented. The first line is properly indented based + exclusively on depth * TABSIZE. All following lines -- these are + the reflowed lines generated by this function -- start at the same + column as the first character beyond the opening { in the first + line. + """ + size = MAX_COL - depth * TABSIZE + if len(s) < size: + return [s] + + lines = [] + cur = s + padding = "" + while len(cur) > size: + i = cur.rfind(' ', 0, size) + # XXX this should be fixed for real + if i == -1 and 'GeneratorExp' in cur: + i = size + 3 + assert i != -1, "Impossible line %d to reflow: %r" % (size, s) + lines.append(padding + cur[:i]) + if len(lines) == 1: + # find new size based on brace + j = cur.find('{', 0, i) + if j >= 0: + j += 2 # account for the brace and the space after it + size -= j + padding = " " * j + else: + j = cur.find('(', 0, i) + if j >= 0: + j += 1 # account for the paren (no space after it) + size -= j + padding = " " * j + cur = cur[i+1:] + else: + lines.append(padding + cur) + return lines + +def is_simple(sum): + """Return True if a sum is a simple. + + A sum is simple if its types have no fields, e.g. + unaryop = Invert | Not | UAdd | USub + """ + for t in sum.types: + if t.fields: + return False + return True + + +class EmitVisitor(asdl.VisitorBase): + """Visit that emits lines""" + + def __init__(self, file): + self.file = file + super(EmitVisitor, self).__init__() + + def emit(self, s, depth, reflow=True): + # XXX reflow long lines? + if reflow: + lines = reflow_lines(s, depth) + else: + lines = [s] + for line in lines: + line = (" " * TABSIZE * depth) + line + "\n" + self.file.write(line) + + +class TypeDefVisitor(EmitVisitor): + def visitModule(self, mod): + for dfn in mod.dfns: + self.visit(dfn) + + def visitType(self, type, depth=0): + self.visit(type.value, type.name, depth) + + def visitSum(self, sum, name, depth): + if is_simple(sum): + self.simple_sum(sum, name, depth) + else: + self.sum_with_constructors(sum, name, depth) + + def simple_sum(self, sum, name, depth): + enum = [] + for i in range(len(sum.types)): + type = sum.types[i] + enum.append("%s=%d" % (type.name, i + 1)) + enums = ", ".join(enum) + ctype = get_c_type(name) + s = "typedef enum _%s { %s } %s;" % (name, enums, ctype) + self.emit(s, depth) + self.emit("", depth) + + def sum_with_constructors(self, sum, name, depth): + ctype = get_c_type(name) + s = "typedef struct _%(name)s *%(ctype)s;" % locals() + self.emit(s, depth) + self.emit("", depth) + + def visitProduct(self, product, name, depth): + ctype = get_c_type(name) + s = "typedef struct _%(name)s *%(ctype)s;" % locals() + self.emit(s, depth) + self.emit("", depth) + + +class StructVisitor(EmitVisitor): + """Visitor to generate typdefs for AST.""" + + def visitModule(self, mod): + for dfn in mod.dfns: + self.visit(dfn) + + def visitType(self, type, depth=0): + self.visit(type.value, type.name, depth) + + def visitSum(self, sum, name, depth): + if not is_simple(sum): + self.sum_with_constructors(sum, name, depth) + + def sum_with_constructors(self, sum, name, depth): + def emit(s, depth=depth): + self.emit(s % sys._getframe(1).f_locals, depth) + enum = [] + for i in range(len(sum.types)): + type = sum.types[i] + enum.append("%s_kind=%d" % (type.name, i + 1)) + + emit("enum _%(name)s_kind {" + ", ".join(enum) + "};") + + emit("struct _%(name)s {") + emit("enum _%(name)s_kind kind;", depth + 1) + emit("union {", depth + 1) + for t in sum.types: + self.visit(t, depth + 2) + emit("} v;", depth + 1) + for field in sum.attributes: + # rudimentary attribute handling + type = str(field.type) + assert type in asdl.builtin_types, type + emit("%s %s;" % (type, field.name), depth + 1); + emit("};") + emit("") + + def visitConstructor(self, cons, depth): + if cons.fields: + self.emit("struct {", depth) + for f in cons.fields: + self.visit(f, depth + 1) + self.emit("} %s;" % cons.name, depth) + self.emit("", depth) + else: + # XXX not sure what I want here, nothing is probably fine + pass + + def visitField(self, field, depth): + # XXX need to lookup field.type, because it might be something + # like a builtin... + ctype = get_c_type(field.type) + name = field.name + if field.seq: + if field.type.value in ('cmpop',): + self.emit("asdl_int_seq *%(name)s;" % locals(), depth) + else: + self.emit("asdl_seq *%(name)s;" % locals(), depth) + else: + self.emit("%(ctype)s %(name)s;" % locals(), depth) + + def visitProduct(self, product, name, depth): + self.emit("struct _%(name)s {" % locals(), depth) + for f in product.fields: + self.visit(f, depth + 1) + self.emit("};", depth) + self.emit("", depth) + + +class PrototypeVisitor(EmitVisitor): + """Generate function prototypes for the .h file""" + + def visitModule(self, mod): + for dfn in mod.dfns: + self.visit(dfn) + + def visitType(self, type): + self.visit(type.value, type.name) + + def visitSum(self, sum, name): + if is_simple(sum): + pass # XXX + else: + for t in sum.types: + self.visit(t, name, sum.attributes) + + def get_args(self, fields): + """Return list of C argument into, one for each field. + + Argument info is 3-tuple of a C type, variable name, and flag + that is true if type can be NULL. + """ + args = [] + unnamed = {} + for f in fields: + if f.name is None: + name = f.type + c = unnamed[name] = unnamed.get(name, 0) + 1 + if c > 1: + name = "name%d" % (c - 1) + else: + name = f.name + # XXX should extend get_c_type() to handle this + if f.seq: + if f.type.value in ('cmpop',): + ctype = "asdl_int_seq *" + else: + ctype = "asdl_seq *" + else: + ctype = get_c_type(f.type) + args.append((ctype, name, f.opt or f.seq)) + return args + + def visitConstructor(self, cons, type, attrs): + args = self.get_args(cons.fields) + attrs = self.get_args(attrs) + ctype = get_c_type(type) + self.emit_function(cons.name, ctype, args, attrs) + + def emit_function(self, name, ctype, args, attrs, union=True): + args = args + attrs + if args: + argstr = ", ".join(["%s %s" % (atype, aname) + for atype, aname, opt in args]) + argstr += ", PyArena *arena" + else: + argstr = "PyArena *arena" + margs = "a0" + for i in range(1, len(args)+1): + margs += ", a%d" % i + self.emit("#define %s(%s) _Py_%s(%s)" % (name, margs, name, margs), 0, + reflow=False) + self.emit("%s _Py_%s(%s);" % (ctype, name, argstr), False) + + def visitProduct(self, prod, name): + self.emit_function(name, get_c_type(name), + self.get_args(prod.fields), [], union=False) + + +class FunctionVisitor(PrototypeVisitor): + """Visitor to generate constructor functions for AST.""" + + def emit_function(self, name, ctype, args, attrs, union=True): + def emit(s, depth=0, reflow=True): + self.emit(s, depth, reflow) + argstr = ", ".join(["%s %s" % (atype, aname) + for atype, aname, opt in args + attrs]) + if argstr: + argstr += ", PyArena *arena" + else: + argstr = "PyArena *arena" + self.emit("%s" % ctype, 0) + emit("%s(%s)" % (name, argstr)) + emit("{") + emit("%s p;" % ctype, 1) + for argtype, argname, opt in args: + # XXX hack alert: false is allowed for a bool + if not opt and not (argtype == "bool" or argtype == "int"): + emit("if (!%s) {" % argname, 1) + emit("PyErr_SetString(PyExc_ValueError,", 2) + msg = "field %s is required for %s" % (argname, name) + emit(' "%s");' % msg, + 2, reflow=False) + emit('return NULL;', 2) + emit('}', 1) + + emit("p = (%s)PyArena_Malloc(arena, sizeof(*p));" % ctype, 1); + emit("if (!p)", 1) + emit("return NULL;", 2) + if union: + self.emit_body_union(name, args, attrs) + else: + self.emit_body_struct(name, args, attrs) + emit("return p;", 1) + emit("}") + emit("") + + def emit_body_union(self, name, args, attrs): + def emit(s, depth=0, reflow=True): + self.emit(s, depth, reflow) + emit("p->kind = %s_kind;" % name, 1) + for argtype, argname, opt in args: + emit("p->v.%s.%s = %s;" % (name, argname, argname), 1) + for argtype, argname, opt in attrs: + emit("p->%s = %s;" % (argname, argname), 1) + + def emit_body_struct(self, name, args, attrs): + def emit(s, depth=0, reflow=True): + self.emit(s, depth, reflow) + for argtype, argname, opt in args: + emit("p->%s = %s;" % (argname, argname), 1) + assert not attrs + + +class PickleVisitor(EmitVisitor): + + def visitModule(self, mod): + for dfn in mod.dfns: + self.visit(dfn) + + def visitType(self, type): + self.visit(type.value, type.name) + + def visitSum(self, sum, name): + pass + + def visitProduct(self, sum, name): + pass + + def visitConstructor(self, cons, name): + pass + + def visitField(self, sum): + pass + + +class Obj2ModPrototypeVisitor(PickleVisitor): + def visitProduct(self, prod, name): + code = "static int obj2ast_%s(PyObject* obj, %s* out, PyArena* arena);" + self.emit(code % (name, get_c_type(name)), 0) + + visitSum = visitProduct + + +class Obj2ModVisitor(PickleVisitor): + def funcHeader(self, name): + ctype = get_c_type(name) + self.emit("int", 0) + self.emit("obj2ast_%s(PyObject* obj, %s* out, PyArena* arena)" % (name, ctype), 0) + self.emit("{", 0) + self.emit("PyObject* tmp = NULL;", 1) + self.emit("int isinstance;", 1) + self.emit("", 0) + + def sumTrailer(self, name): + self.emit("", 0) + self.emit("tmp = PyObject_Repr(obj);", 1) + # there's really nothing more we can do if this fails ... + self.emit("if (tmp == NULL) goto failed;", 1) + error = "expected some sort of %s, but got %%.400s" % name + format = "PyErr_Format(PyExc_TypeError, \"%s\", PyString_AS_STRING(tmp));" + self.emit(format % error, 1, reflow=False) + self.emit("failed:", 0) + self.emit("Py_XDECREF(tmp);", 1) + self.emit("return 1;", 1) + self.emit("}", 0) + self.emit("", 0) + + def simpleSum(self, sum, name): + self.funcHeader(name) + for t in sum.types: + line = ("isinstance = PyObject_IsInstance(obj, " + "(PyObject *)%s_type);") + self.emit(line % (t.name,), 1) + self.emit("if (isinstance == -1) {", 1) + self.emit("return 1;", 2) + self.emit("}", 1) + self.emit("if (isinstance) {", 1) + self.emit("*out = %s;" % t.name, 2) + self.emit("return 0;", 2) + self.emit("}", 1) + self.sumTrailer(name) + + def buildArgs(self, fields): + return ", ".join(fields + ["arena"]) + + def complexSum(self, sum, name): + self.funcHeader(name) + for a in sum.attributes: + self.visitAttributeDeclaration(a, name, sum=sum) + self.emit("", 0) + # XXX: should we only do this for 'expr'? + self.emit("if (obj == Py_None) {", 1) + self.emit("*out = NULL;", 2) + self.emit("return 0;", 2) + self.emit("}", 1) + for a in sum.attributes: + self.visitField(a, name, sum=sum, depth=1) + for t in sum.types: + line = "isinstance = PyObject_IsInstance(obj, (PyObject*)%s_type);" + self.emit(line % (t.name,), 1) + self.emit("if (isinstance == -1) {", 1) + self.emit("return 1;", 2) + self.emit("}", 1) + self.emit("if (isinstance) {", 1) + for f in t.fields: + self.visitFieldDeclaration(f, t.name, sum=sum, depth=2) + self.emit("", 0) + for f in t.fields: + self.visitField(f, t.name, sum=sum, depth=2) + args = [f.name.value for f in t.fields] + [a.name.value for a in sum.attributes] + self.emit("*out = %s(%s);" % (t.name, self.buildArgs(args)), 2) + self.emit("if (*out == NULL) goto failed;", 2) + self.emit("return 0;", 2) + self.emit("}", 1) + self.sumTrailer(name) + + def visitAttributeDeclaration(self, a, name, sum=sum): + ctype = get_c_type(a.type) + self.emit("%s %s;" % (ctype, a.name), 1) + + def visitSum(self, sum, name): + if is_simple(sum): + self.simpleSum(sum, name) + else: + self.complexSum(sum, name) + + def visitProduct(self, prod, name): + ctype = get_c_type(name) + self.emit("int", 0) + self.emit("obj2ast_%s(PyObject* obj, %s* out, PyArena* arena)" % (name, ctype), 0) + self.emit("{", 0) + self.emit("PyObject* tmp = NULL;", 1) + for f in prod.fields: + self.visitFieldDeclaration(f, name, prod=prod, depth=1) + self.emit("", 0) + for f in prod.fields: + self.visitField(f, name, prod=prod, depth=1) + args = [f.name.value for f in prod.fields] + self.emit("*out = %s(%s);" % (name, self.buildArgs(args)), 1) + self.emit("return 0;", 1) + self.emit("failed:", 0) + self.emit("Py_XDECREF(tmp);", 1) + self.emit("return 1;", 1) + self.emit("}", 0) + self.emit("", 0) + + def visitFieldDeclaration(self, field, name, sum=None, prod=None, depth=0): + ctype = get_c_type(field.type) + if field.seq: + if self.isSimpleType(field): + self.emit("asdl_int_seq* %s;" % field.name, depth) + else: + self.emit("asdl_seq* %s;" % field.name, depth) + else: + ctype = get_c_type(field.type) + self.emit("%s %s;" % (ctype, field.name), depth) + + def isSimpleSum(self, field): + # XXX can the members of this list be determined automatically? + return field.type.value in ('expr_context', 'boolop', 'operator', + 'unaryop', 'cmpop') + + def isNumeric(self, field): + return get_c_type(field.type) in ("int", "bool") + + def isSimpleType(self, field): + return self.isSimpleSum(field) or self.isNumeric(field) + + def visitField(self, field, name, sum=None, prod=None, depth=0): + ctype = get_c_type(field.type) + self.emit("if (PyObject_HasAttrString(obj, \"%s\")) {" % field.name, depth) + self.emit("int res;", depth+1) + if field.seq: + self.emit("Py_ssize_t len;", depth+1) + self.emit("Py_ssize_t i;", depth+1) + self.emit("tmp = PyObject_GetAttrString(obj, \"%s\");" % field.name, depth+1) + self.emit("if (tmp == NULL) goto failed;", depth+1) + if field.seq: + self.emit("if (!PyList_Check(tmp)) {", depth+1) + self.emit("PyErr_Format(PyExc_TypeError, \"%s field \\\"%s\\\" must " + "be a list, not a %%.200s\", tmp->ob_type->tp_name);" % + (name, field.name), + depth+2, reflow=False) + self.emit("goto failed;", depth+2) + self.emit("}", depth+1) + self.emit("len = PyList_GET_SIZE(tmp);", depth+1) + if self.isSimpleType(field): + self.emit("%s = asdl_int_seq_new(len, arena);" % field.name, depth+1) + else: + self.emit("%s = asdl_seq_new(len, arena);" % field.name, depth+1) + self.emit("if (%s == NULL) goto failed;" % field.name, depth+1) + self.emit("for (i = 0; i < len; i++) {", depth+1) + self.emit("%s value;" % ctype, depth+2) + self.emit("res = obj2ast_%s(PyList_GET_ITEM(tmp, i), &value, arena);" % + field.type, depth+2, reflow=False) + self.emit("if (res != 0) goto failed;", depth+2) + self.emit("asdl_seq_SET(%s, i, value);" % field.name, depth+2) + self.emit("}", depth+1) + else: + self.emit("res = obj2ast_%s(tmp, &%s, arena);" % + (field.type, field.name), depth+1) + self.emit("if (res != 0) goto failed;", depth+1) + + self.emit("Py_XDECREF(tmp);", depth+1) + self.emit("tmp = NULL;", depth+1) + self.emit("} else {", depth) + if not field.opt: + message = "required field \\\"%s\\\" missing from %s" % (field.name, name) + format = "PyErr_SetString(PyExc_TypeError, \"%s\");" + self.emit(format % message, depth+1, reflow=False) + self.emit("return 1;", depth+1) + else: + if self.isNumeric(field): + self.emit("%s = 0;" % field.name, depth+1) + elif not self.isSimpleType(field): + self.emit("%s = NULL;" % field.name, depth+1) + else: + raise TypeError("could not determine the default value for %s" % field.name) + self.emit("}", depth) + + +class MarshalPrototypeVisitor(PickleVisitor): + + def prototype(self, sum, name): + ctype = get_c_type(name) + self.emit("static int marshal_write_%s(PyObject **, int *, %s);" + % (name, ctype), 0) + + visitProduct = visitSum = prototype + + +class PyTypesDeclareVisitor(PickleVisitor): + + def visitProduct(self, prod, name): + self.emit("static PyTypeObject *%s_type;" % name, 0) + self.emit("static PyObject* ast2obj_%s(void*);" % name, 0) + if prod.fields: + self.emit("static char *%s_fields[]={" % name,0) + for f in prod.fields: + self.emit('"%s",' % f.name, 1) + self.emit("};", 0) + + def visitSum(self, sum, name): + self.emit("static PyTypeObject *%s_type;" % name, 0) + if sum.attributes: + self.emit("static char *%s_attributes[] = {" % name, 0) + for a in sum.attributes: + self.emit('"%s",' % a.name, 1) + self.emit("};", 0) + ptype = "void*" + if is_simple(sum): + ptype = get_c_type(name) + tnames = [] + for t in sum.types: + tnames.append(str(t.name)+"_singleton") + tnames = ", *".join(tnames) + self.emit("static PyObject *%s;" % tnames, 0) + self.emit("static PyObject* ast2obj_%s(%s);" % (name, ptype), 0) + for t in sum.types: + self.visitConstructor(t, name) + + def visitConstructor(self, cons, name): + self.emit("static PyTypeObject *%s_type;" % cons.name, 0) + if cons.fields: + self.emit("static char *%s_fields[]={" % cons.name, 0) + for t in cons.fields: + self.emit('"%s",' % t.name, 1) + self.emit("};",0) + +class PyTypesVisitor(PickleVisitor): + + def visitModule(self, mod): + self.emit(""" +static int +ast_type_init(PyObject *self, PyObject *args, PyObject *kw) +{ + Py_ssize_t i, numfields = 0; + int res = -1; + PyObject *key, *value, *fields; + fields = PyObject_GetAttrString((PyObject*)Py_TYPE(self), "_fields"); + if (!fields) + PyErr_Clear(); + if (fields) { + numfields = PySequence_Size(fields); + if (numfields == -1) + goto cleanup; + } + res = 0; /* if no error occurs, this stays 0 to the end */ + if (PyTuple_GET_SIZE(args) > 0) { + if (numfields != PyTuple_GET_SIZE(args)) { + PyErr_Format(PyExc_TypeError, "%.400s constructor takes %s" + "%zd positional argument%s", + Py_TYPE(self)->tp_name, + numfields == 0 ? "" : "either 0 or ", + numfields, numfields == 1 ? "" : "s"); + res = -1; + goto cleanup; + } + for (i = 0; i < PyTuple_GET_SIZE(args); i++) { + /* cannot be reached when fields is NULL */ + PyObject *name = PySequence_GetItem(fields, i); + if (!name) { + res = -1; + goto cleanup; + } + res = PyObject_SetAttr(self, name, PyTuple_GET_ITEM(args, i)); + Py_DECREF(name); + if (res < 0) + goto cleanup; + } + } + if (kw) { + i = 0; /* needed by PyDict_Next */ + while (PyDict_Next(kw, &i, &key, &value)) { + res = PyObject_SetAttr(self, key, value); + if (res < 0) + goto cleanup; + } + } + cleanup: + Py_XDECREF(fields); + return res; +} + +/* Pickling support */ +static PyObject * +ast_type_reduce(PyObject *self, PyObject *unused) +{ + PyObject *res; + PyObject *dict = PyObject_GetAttrString(self, "__dict__"); + if (dict == NULL) { + if (PyErr_ExceptionMatches(PyExc_AttributeError)) + PyErr_Clear(); + else + return NULL; + } + if (dict) { + res = Py_BuildValue("O()O", Py_TYPE(self), dict); + Py_DECREF(dict); + return res; + } + return Py_BuildValue("O()", Py_TYPE(self)); +} + +static PyMethodDef ast_type_methods[] = { + {"__reduce__", ast_type_reduce, METH_NOARGS, NULL}, + {NULL} +}; + +static PyTypeObject AST_type = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + "_ast.AST", + sizeof(PyObject), + 0, + 0, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + PyObject_GenericSetAttr, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ + 0, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + ast_type_methods, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)ast_type_init, /* tp_init */ + PyType_GenericAlloc, /* tp_alloc */ + PyType_GenericNew, /* tp_new */ + PyObject_Del, /* tp_free */ +}; + + +static PyTypeObject* make_type(char *type, PyTypeObject* base, char**fields, int num_fields) +{ + PyObject *fnames, *result; + int i; + fnames = PyTuple_New(num_fields); + if (!fnames) return NULL; + for (i = 0; i < num_fields; i++) { + PyObject *field = PyString_FromString(fields[i]); + if (!field) { + Py_DECREF(fnames); + return NULL; + } + PyTuple_SET_ITEM(fnames, i, field); + } + result = PyObject_CallFunction((PyObject*)&PyType_Type, "s(O){sOss}", + type, base, "_fields", fnames, "__module__", "_ast"); + Py_DECREF(fnames); + return (PyTypeObject*)result; +} + +static int add_attributes(PyTypeObject* type, char**attrs, int num_fields) +{ + int i, result; + PyObject *s, *l = PyTuple_New(num_fields); + if (!l) + return 0; + for (i = 0; i < num_fields; i++) { + s = PyString_FromString(attrs[i]); + if (!s) { + Py_DECREF(l); + return 0; + } + PyTuple_SET_ITEM(l, i, s); + } + result = PyObject_SetAttrString((PyObject*)type, "_attributes", l) >= 0; + Py_DECREF(l); + return result; +} + +/* Conversion AST -> Python */ + +static PyObject* ast2obj_list(asdl_seq *seq, PyObject* (*func)(void*)) +{ + int i, n = asdl_seq_LEN(seq); + PyObject *result = PyList_New(n); + PyObject *value; + if (!result) + return NULL; + for (i = 0; i < n; i++) { + value = func(asdl_seq_GET(seq, i)); + if (!value) { + Py_DECREF(result); + return NULL; + } + PyList_SET_ITEM(result, i, value); + } + return result; +} + +static PyObject* ast2obj_object(void *o) +{ + if (!o) + o = Py_None; + Py_INCREF((PyObject*)o); + return (PyObject*)o; +} +#define ast2obj_identifier ast2obj_object +#define ast2obj_string ast2obj_object +static PyObject* ast2obj_bool(bool b) +{ + return PyBool_FromLong(b); +} + +static PyObject* ast2obj_int(long b) +{ + return PyInt_FromLong(b); +} + +/* Conversion Python -> AST */ + +static int obj2ast_object(PyObject* obj, PyObject** out, PyArena* arena) +{ + if (obj == Py_None) + obj = NULL; + if (obj) + PyArena_AddPyObject(arena, obj); + Py_XINCREF(obj); + *out = obj; + return 0; +} + +#define obj2ast_identifier obj2ast_object +#define obj2ast_string obj2ast_object + +static int obj2ast_int(PyObject* obj, int* out, PyArena* arena) +{ + int i; + if (!PyInt_Check(obj) && !PyLong_Check(obj)) { + PyObject *s = PyObject_Repr(obj); + if (s == NULL) return 1; + PyErr_Format(PyExc_ValueError, "invalid integer value: %.400s", + PyString_AS_STRING(s)); + Py_DECREF(s); + return 1; + } + + i = (int)PyLong_AsLong(obj); + if (i == -1 && PyErr_Occurred()) + return 1; + *out = i; + return 0; +} + +static int obj2ast_bool(PyObject* obj, bool* out, PyArena* arena) +{ + if (!PyBool_Check(obj)) { + PyObject *s = PyObject_Repr(obj); + if (s == NULL) return 1; + PyErr_Format(PyExc_ValueError, "invalid boolean value: %.400s", + PyString_AS_STRING(s)); + Py_DECREF(s); + return 1; + } + + *out = (obj == Py_True); + return 0; +} + +static int add_ast_fields(void) +{ + PyObject *empty_tuple, *d; + if (PyType_Ready(&AST_type) < 0) + return -1; + d = AST_type.tp_dict; + empty_tuple = PyTuple_New(0); + if (!empty_tuple || + PyDict_SetItemString(d, "_fields", empty_tuple) < 0 || + PyDict_SetItemString(d, "_attributes", empty_tuple) < 0) { + Py_XDECREF(empty_tuple); + return -1; + } + Py_DECREF(empty_tuple); + return 0; +} + +""", 0, reflow=False) + + self.emit("static int init_types(void)",0) + self.emit("{", 0) + self.emit("static int initialized;", 1) + self.emit("if (initialized) return 1;", 1) + self.emit("if (add_ast_fields() < 0) return 0;", 1) + for dfn in mod.dfns: + self.visit(dfn) + self.emit("initialized = 1;", 1) + self.emit("return 1;", 1); + self.emit("}", 0) + + def visitProduct(self, prod, name): + if prod.fields: + fields = name.value+"_fields" + else: + fields = "NULL" + self.emit('%s_type = make_type("%s", &AST_type, %s, %d);' % + (name, name, fields, len(prod.fields)), 1) + self.emit("if (!%s_type) return 0;" % name, 1) + + def visitSum(self, sum, name): + self.emit('%s_type = make_type("%s", &AST_type, NULL, 0);' % + (name, name), 1) + self.emit("if (!%s_type) return 0;" % name, 1) + if sum.attributes: + self.emit("if (!add_attributes(%s_type, %s_attributes, %d)) return 0;" % + (name, name, len(sum.attributes)), 1) + else: + self.emit("if (!add_attributes(%s_type, NULL, 0)) return 0;" % name, 1) + simple = is_simple(sum) + for t in sum.types: + self.visitConstructor(t, name, simple) + + def visitConstructor(self, cons, name, simple): + if cons.fields: + fields = cons.name.value+"_fields" + else: + fields = "NULL" + self.emit('%s_type = make_type("%s", %s_type, %s, %d);' % + (cons.name, cons.name, name, fields, len(cons.fields)), 1) + self.emit("if (!%s_type) return 0;" % cons.name, 1) + if simple: + self.emit("%s_singleton = PyType_GenericNew(%s_type, NULL, NULL);" % + (cons.name, cons.name), 1) + self.emit("if (!%s_singleton) return 0;" % cons.name, 1) + + +def parse_version(mod): + return mod.version.value[12:-3] + +class ASTModuleVisitor(PickleVisitor): + + def visitModule(self, mod): + self.emit("PyMODINIT_FUNC", 0) + self.emit("init_ast(void)", 0) + self.emit("{", 0) + self.emit("PyObject *m, *d;", 1) + self.emit("if (!init_types()) return;", 1) + self.emit('m = Py_InitModule3("_ast", NULL, NULL);', 1) + self.emit("if (!m) return;", 1) + self.emit("d = PyModule_GetDict(m);", 1) + self.emit('if (PyDict_SetItemString(d, "AST", (PyObject*)&AST_type) < 0) return;', 1) + self.emit('if (PyModule_AddIntConstant(m, "PyCF_ONLY_AST", PyCF_ONLY_AST) < 0)', 1) + self.emit("return;", 2) + # Value of version: "$Revision$" + self.emit('if (PyModule_AddStringConstant(m, "__version__", "%s") < 0)' + % parse_version(mod), 1) + self.emit("return;", 2) + for dfn in mod.dfns: + self.visit(dfn) + self.emit("}", 0) + + def visitProduct(self, prod, name): + self.addObj(name) + + def visitSum(self, sum, name): + self.addObj(name) + for t in sum.types: + self.visitConstructor(t, name) + + def visitConstructor(self, cons, name): + self.addObj(cons.name) + + def addObj(self, name): + self.emit('if (PyDict_SetItemString(d, "%s", (PyObject*)%s_type) < 0) return;' % (name, name), 1) + + +_SPECIALIZED_SEQUENCES = ('stmt', 'expr') + +def find_sequence(fields, doing_specialization): + """Return True if any field uses a sequence.""" + for f in fields: + if f.seq: + if not doing_specialization: + return True + if str(f.type) not in _SPECIALIZED_SEQUENCES: + return True + return False + +def has_sequence(types, doing_specialization): + for t in types: + if find_sequence(t.fields, doing_specialization): + return True + return False + + +class StaticVisitor(PickleVisitor): + CODE = '''Very simple, always emit this static code. Overide CODE''' + + def visit(self, object): + self.emit(self.CODE, 0, reflow=False) + + +class ObjVisitor(PickleVisitor): + + def func_begin(self, name): + ctype = get_c_type(name) + self.emit("PyObject*", 0) + self.emit("ast2obj_%s(void* _o)" % (name), 0) + self.emit("{", 0) + self.emit("%s o = (%s)_o;" % (ctype, ctype), 1) + self.emit("PyObject *result = NULL, *value = NULL;", 1) + self.emit('if (!o) {', 1) + self.emit("Py_INCREF(Py_None);", 2) + self.emit('return Py_None;', 2) + self.emit("}", 1) + self.emit('', 0) + + def func_end(self): + self.emit("return result;", 1) + self.emit("failed:", 0) + self.emit("Py_XDECREF(value);", 1) + self.emit("Py_XDECREF(result);", 1) + self.emit("return NULL;", 1) + self.emit("}", 0) + self.emit("", 0) + + def visitSum(self, sum, name): + if is_simple(sum): + self.simpleSum(sum, name) + return + self.func_begin(name) + self.emit("switch (o->kind) {", 1) + for i in range(len(sum.types)): + t = sum.types[i] + self.visitConstructor(t, i + 1, name) + self.emit("}", 1) + for a in sum.attributes: + self.emit("value = ast2obj_%s(o->%s);" % (a.type, a.name), 1) + self.emit("if (!value) goto failed;", 1) + self.emit('if (PyObject_SetAttrString(result, "%s", value) < 0)' % a.name, 1) + self.emit('goto failed;', 2) + self.emit('Py_DECREF(value);', 1) + self.func_end() + + def simpleSum(self, sum, name): + self.emit("PyObject* ast2obj_%s(%s_ty o)" % (name, name), 0) + self.emit("{", 0) + self.emit("switch(o) {", 1) + for t in sum.types: + self.emit("case %s:" % t.name, 2) + self.emit("Py_INCREF(%s_singleton);" % t.name, 3) + self.emit("return %s_singleton;" % t.name, 3) + self.emit("default:" % name, 2) + self.emit('/* should never happen, but just in case ... */', 3) + code = "PyErr_Format(PyExc_SystemError, \"unknown %s found\");" % name + self.emit(code, 3, reflow=False) + self.emit("return NULL;", 3) + self.emit("}", 1) + self.emit("}", 0) + + def visitProduct(self, prod, name): + self.func_begin(name) + self.emit("result = PyType_GenericNew(%s_type, NULL, NULL);" % name, 1); + self.emit("if (!result) return NULL;", 1) + for field in prod.fields: + self.visitField(field, name, 1, True) + self.func_end() + + def visitConstructor(self, cons, enum, name): + self.emit("case %s_kind:" % cons.name, 1) + self.emit("result = PyType_GenericNew(%s_type, NULL, NULL);" % cons.name, 2); + self.emit("if (!result) goto failed;", 2) + for f in cons.fields: + self.visitField(f, cons.name, 2, False) + self.emit("break;", 2) + + def visitField(self, field, name, depth, product): + def emit(s, d): + self.emit(s, depth + d) + if product: + value = "o->%s" % field.name + else: + value = "o->v.%s.%s" % (name, field.name) + self.set(field, value, depth) + emit("if (!value) goto failed;", 0) + emit('if (PyObject_SetAttrString(result, "%s", value) == -1)' % field.name, 0) + emit("goto failed;", 1) + emit("Py_DECREF(value);", 0) + + def emitSeq(self, field, value, depth, emit): + emit("seq = %s;" % value, 0) + emit("n = asdl_seq_LEN(seq);", 0) + emit("value = PyList_New(n);", 0) + emit("if (!value) goto failed;", 0) + emit("for (i = 0; i < n; i++) {", 0) + self.set("value", field, "asdl_seq_GET(seq, i)", depth + 1) + emit("if (!value1) goto failed;", 1) + emit("PyList_SET_ITEM(value, i, value1);", 1) + emit("value1 = NULL;", 1) + emit("}", 0) + + def set(self, field, value, depth): + if field.seq: + # XXX should really check for is_simple, but that requires a symbol table + if field.type.value == "cmpop": + # While the sequence elements are stored as void*, + # ast2obj_cmpop expects an enum + self.emit("{", depth) + self.emit("int i, n = asdl_seq_LEN(%s);" % value, depth+1) + self.emit("value = PyList_New(n);", depth+1) + self.emit("if (!value) goto failed;", depth+1) + self.emit("for(i = 0; i < n; i++)", depth+1) + # This cannot fail, so no need for error handling + self.emit("PyList_SET_ITEM(value, i, ast2obj_cmpop((cmpop_ty)asdl_seq_GET(%s, i)));" % value, + depth+2, reflow=False) + self.emit("}", depth) + else: + self.emit("value = ast2obj_list(%s, ast2obj_%s);" % (value, field.type), depth) + else: + ctype = get_c_type(field.type) + self.emit("value = ast2obj_%s(%s);" % (field.type, value), depth, reflow=False) + + +class PartingShots(StaticVisitor): + + CODE = """ +PyObject* PyAST_mod2obj(mod_ty t) +{ + init_types(); + return ast2obj_mod(t); +} + +/* mode is 0 for "exec", 1 for "eval" and 2 for "single" input */ +mod_ty PyAST_obj2mod(PyObject* ast, PyArena* arena, int mode) +{ + mod_ty res; + PyObject *req_type[] = {(PyObject*)Module_type, (PyObject*)Expression_type, + (PyObject*)Interactive_type}; + char *req_name[] = {"Module", "Expression", "Interactive"}; + int isinstance; + assert(0 <= mode && mode <= 2); + + init_types(); + + isinstance = PyObject_IsInstance(ast, req_type[mode]); + if (isinstance == -1) + return NULL; + if (!isinstance) { + PyErr_Format(PyExc_TypeError, "expected %s node, got %.400s", + req_name[mode], Py_TYPE(ast)->tp_name); + return NULL; + } + if (obj2ast_mod(ast, &res, arena) != 0) + return NULL; + else + return res; +} + +int PyAST_Check(PyObject* obj) +{ + init_types(); + return PyObject_IsInstance(obj, (PyObject*)&AST_type); +} +""" + +class ChainOfVisitors: + def __init__(self, *visitors): + self.visitors = visitors + + def visit(self, object): + for v in self.visitors: + v.visit(object) + v.emit("", 0) + +common_msg = "/* File automatically generated by %s. */\n\n" + +c_file_msg = """ +/* + __version__ %s. + + This module must be committed separately after each AST grammar change; + The __version__ number is set to the revision number of the commit + containing the grammar change. +*/ + +""" + +def main(srcfile): + argv0 = sys.argv[0] + components = argv0.split(os.sep) + argv0 = os.sep.join(components[-2:]) + auto_gen_msg = common_msg % argv0 + mod = asdl.parse(srcfile) + if not asdl.check(mod): + sys.exit(1) + if INC_DIR: + p = "%s/%s-ast.h" % (INC_DIR, mod.name) + f = open(p, "wb") + f.write(auto_gen_msg) + f.write('#include "asdl.h"\n\n') + c = ChainOfVisitors(TypeDefVisitor(f), + StructVisitor(f), + PrototypeVisitor(f), + ) + c.visit(mod) + f.write("PyObject* PyAST_mod2obj(mod_ty t);\n") + f.write("mod_ty PyAST_obj2mod(PyObject* ast, PyArena* arena, int mode);\n") + f.write("int PyAST_Check(PyObject* obj);\n") + f.close() + + if SRC_DIR: + p = os.path.join(SRC_DIR, str(mod.name) + "-ast.c") + f = open(p, "wb") + f.write(auto_gen_msg) + f.write(c_file_msg % parse_version(mod)) + f.write('#include "Python.h"\n') + f.write('#include "%s-ast.h"\n' % mod.name) + f.write('\n') + f.write("static PyTypeObject AST_type;\n") + v = ChainOfVisitors( + PyTypesDeclareVisitor(f), + PyTypesVisitor(f), + Obj2ModPrototypeVisitor(f), + FunctionVisitor(f), + ObjVisitor(f), + Obj2ModVisitor(f), + ASTModuleVisitor(f), + PartingShots(f), + ) + v.visit(mod) + f.close() + +if __name__ == "__main__": + import sys + import getopt + + INC_DIR = '' + SRC_DIR = '' + opts, args = getopt.getopt(sys.argv[1:], "h:c:") + if len(opts) != 1: + print "Must specify exactly one output file" + sys.exit(1) + for o, v in opts: + if o == '-h': + INC_DIR = v + if o == '-c': + SRC_DIR = v + if len(args) != 1: + print "Must specify single input file" + sys.exit(1) + main(args[0]) diff --git a/AppPkg/Applications/Python/Python-2.7.2/Parser/bitset.c b/AppPkg/Applications/Python/Python-2.7.2/Parser/bitset.c new file mode 100644 index 0000000000..3bf5da1dba --- /dev/null +++ b/AppPkg/Applications/Python/Python-2.7.2/Parser/bitset.c @@ -0,0 +1,66 @@ + +/* Bitset primitives used by the parser generator */ + +#include "pgenheaders.h" +#include "bitset.h" + +bitset +newbitset(int nbits) +{ + int nbytes = NBYTES(nbits); + bitset ss = (char *)PyObject_MALLOC(sizeof(BYTE) * nbytes); + + if (ss == NULL) + Py_FatalError("no mem for bitset"); + + ss += nbytes; + while (--nbytes >= 0) + *--ss = 0; + return ss; +} + +void +delbitset(bitset ss) +{ + PyObject_FREE(ss); +} + +int +addbit(bitset ss, int ibit) +{ + int ibyte = BIT2BYTE(ibit); + BYTE mask = BIT2MASK(ibit); + + if (ss[ibyte] & mask) + return 0; /* Bit already set */ + ss[ibyte] |= mask; + return 1; +} + +#if 0 /* Now a macro */ +int +testbit(bitset ss, int ibit) +{ + return (ss[BIT2BYTE(ibit)] & BIT2MASK(ibit)) != 0; +} +#endif + +int +samebitset(bitset ss1, bitset ss2, int nbits) +{ + int i; + + for (i = NBYTES(nbits); --i >= 0; ) + if (*ss1++ != *ss2++) + return 0; + return 1; +} + +void +mergebitset(bitset ss1, bitset ss2, int nbits) +{ + int i; + + for (i = NBYTES(nbits); --i >= 0; ) + *ss1++ |= *ss2++; +} diff --git a/AppPkg/Applications/Python/Python-2.7.2/Parser/firstsets.c b/AppPkg/Applications/Python/Python-2.7.2/Parser/firstsets.c new file mode 100644 index 0000000000..69faf8f09e --- /dev/null +++ b/AppPkg/Applications/Python/Python-2.7.2/Parser/firstsets.c @@ -0,0 +1,113 @@ + +/* Computation of FIRST stets */ + +#include "pgenheaders.h" +#include "grammar.h" +#include "token.h" + +extern int Py_DebugFlag; + +/* Forward */ +static void calcfirstset(grammar *, dfa *); + +void +addfirstsets(grammar *g) +{ + int i; + dfa *d; + + if (Py_DebugFlag) + printf("Adding FIRST sets ...\n"); + for (i = 0; i < g->g_ndfas; i++) { + d = &g->g_dfa[i]; + if (d->d_first == NULL) + calcfirstset(g, d); + } +} + +static void +calcfirstset(grammar *g, dfa *d) +{ + int i, j; + state *s; + arc *a; + int nsyms; + int *sym; + int nbits; + static bitset dummy; + bitset result; + int type; + dfa *d1; + label *l0; + + if (Py_DebugFlag) + printf("Calculate FIRST set for '%s'\n", d->d_name); + + if (dummy == NULL) + dummy = newbitset(1); + if (d->d_first == dummy) { + fprintf(stderr, "Left-recursion for '%s'\n", d->d_name); + return; + } + if (d->d_first != NULL) { + fprintf(stderr, "Re-calculating FIRST set for '%s' ???\n", + d->d_name); + } + d->d_first = dummy; + + l0 = g->g_ll.ll_label; + nbits = g->g_ll.ll_nlabels; + result = newbitset(nbits); + + sym = (int *)PyObject_MALLOC(sizeof(int)); + if (sym == NULL) + Py_FatalError("no mem for new sym in calcfirstset"); + nsyms = 1; + sym[0] = findlabel(&g->g_ll, d->d_type, (char *)NULL); + + s = &d->d_state[d->d_initial]; + for (i = 0; i < s->s_narcs; i++) { + a = &s->s_arc[i]; + for (j = 0; j < nsyms; j++) { + if (sym[j] == a->a_lbl) + break; + } + if (j >= nsyms) { /* New label */ + sym = (int *)PyObject_REALLOC(sym, + sizeof(int) * (nsyms + 1)); + if (sym == NULL) + Py_FatalError( + "no mem to resize sym in calcfirstset"); + sym[nsyms++] = a->a_lbl; + type = l0[a->a_lbl].lb_type; + if (ISNONTERMINAL(type)) { + d1 = PyGrammar_FindDFA(g, type); + if (d1->d_first == dummy) { + fprintf(stderr, + "Left-recursion below '%s'\n", + d->d_name); + } + else { + if (d1->d_first == NULL) + calcfirstset(g, d1); + mergebitset(result, + d1->d_first, nbits); + } + } + else if (ISTERMINAL(type)) { + addbit(result, a->a_lbl); + } + } + } + d->d_first = result; + if (Py_DebugFlag) { + printf("FIRST set for '%s': {", d->d_name); + for (i = 0; i < nbits; i++) { + if (testbit(result, i)) + printf(" %s", PyGrammar_LabelRepr(&l0[i])); + } + printf(" }\n"); + } + + PyObject_FREE(sym); +} diff --git a/AppPkg/Applications/Python/Python-2.7.2/Parser/grammar.c b/AppPkg/Applications/Python/Python-2.7.2/Parser/grammar.c new file mode 100644 index 0000000000..4cce422d1a --- /dev/null +++ b/AppPkg/Applications/Python/Python-2.7.2/Parser/grammar.c @@ -0,0 +1,254 @@ + +/* Grammar implementation */ + +#include "Python.h" +#include "pgenheaders.h" + +#include + +#include "token.h" +#include "grammar.h" + +#ifdef RISCOS +#include +#endif + +extern int Py_DebugFlag; + +grammar * +newgrammar(int start) +{ + grammar *g; + + g = (grammar *)PyObject_MALLOC(sizeof(grammar)); + if (g == NULL) + Py_FatalError("no mem for new grammar"); + g->g_ndfas = 0; + g->g_dfa = NULL; + g->g_start = start; + g->g_ll.ll_nlabels = 0; + g->g_ll.ll_label = NULL; + g->g_accel = 0; + return g; +} + +dfa * +adddfa(grammar *g, int type, char *name) +{ + dfa *d; + + g->g_dfa = (dfa *)PyObject_REALLOC(g->g_dfa, + sizeof(dfa) * (g->g_ndfas + 1)); + if (g->g_dfa == NULL) + Py_FatalError("no mem to resize dfa in adddfa"); + d = &g->g_dfa[g->g_ndfas++]; + d->d_type = type; + d->d_name = strdup(name); + d->d_nstates = 0; + d->d_state = NULL; + d->d_initial = -1; + d->d_first = NULL; + return d; /* Only use while fresh! */ +} + +int +addstate(dfa *d) +{ + state *s; + + d->d_state = (state *)PyObject_REALLOC(d->d_state, + sizeof(state) * (d->d_nstates + 1)); + if (d->d_state == NULL) + Py_FatalError("no mem to resize state in addstate"); + s = &d->d_state[d->d_nstates++]; + s->s_narcs = 0; + s->s_arc = NULL; + s->s_lower = 0; + s->s_upper = 0; + s->s_accel = NULL; + s->s_accept = 0; + return s - d->d_state; +} + +void +addarc(dfa *d, int from, int to, int lbl) +{ + state *s; + arc *a; + + assert(0 <= from && from < d->d_nstates); + assert(0 <= to && to < d->d_nstates); + + s = &d->d_state[from]; + s->s_arc = (arc *)PyObject_REALLOC(s->s_arc, sizeof(arc) * (s->s_narcs + 1)); + if (s->s_arc == NULL) + Py_FatalError("no mem to resize arc list in addarc"); + a = &s->s_arc[s->s_narcs++]; + a->a_lbl = lbl; + a->a_arrow = to; +} + +int +addlabel(labellist *ll, int type, char *str) +{ + int i; + label *lb; + + for (i = 0; i < ll->ll_nlabels; i++) { + if (ll->ll_label[i].lb_type == type && + strcmp(ll->ll_label[i].lb_str, str) == 0) + return i; + } + ll->ll_label = (label *)PyObject_REALLOC(ll->ll_label, + sizeof(label) * (ll->ll_nlabels + 1)); + if (ll->ll_label == NULL) + Py_FatalError("no mem to resize labellist in addlabel"); + lb = &ll->ll_label[ll->ll_nlabels++]; + lb->lb_type = type; + lb->lb_str = strdup(str); + if (Py_DebugFlag) + printf("Label @ %8p, %d: %s\n", ll, ll->ll_nlabels, + PyGrammar_LabelRepr(lb)); + return lb - ll->ll_label; +} + +/* Same, but rather dies than adds */ + +int +findlabel(labellist *ll, int type, char *str) +{ + int i; + + for (i = 0; i < ll->ll_nlabels; i++) { + if (ll->ll_label[i].lb_type == type /*&& + strcmp(ll->ll_label[i].lb_str, str) == 0*/) + return i; + } + fprintf(stderr, "Label %d/'%s' not found\n", type, str); + Py_FatalError("grammar.c:findlabel()"); + return 0; /* Make gcc -Wall happy */ +} + +/* Forward */ +static void translabel(grammar *, label *); + +void +translatelabels(grammar *g) +{ + int i; + +#ifdef Py_DEBUG + printf("Translating labels ...\n"); +#endif + /* Don't translate EMPTY */ + for (i = EMPTY+1; i < g->g_ll.ll_nlabels; i++) + translabel(g, &g->g_ll.ll_label[i]); +} + +static void +translabel(grammar *g, label *lb) +{ + int i; + + if (Py_DebugFlag) + printf("Translating label %s ...\n", PyGrammar_LabelRepr(lb)); + + if (lb->lb_type == NAME) { + for (i = 0; i < g->g_ndfas; i++) { + if (strcmp(lb->lb_str, g->g_dfa[i].d_name) == 0) { + if (Py_DebugFlag) + printf( + "Label %s is non-terminal %d.\n", + lb->lb_str, + g->g_dfa[i].d_type); + lb->lb_type = g->g_dfa[i].d_type; + free(lb->lb_str); + lb->lb_str = NULL; + return; + } + } + for (i = 0; i < (int)N_TOKENS; i++) { + if (strcmp(lb->lb_str, _PyParser_TokenNames[i]) == 0) { + if (Py_DebugFlag) + printf("Label %s is terminal %d.\n", + lb->lb_str, i); + lb->lb_type = i; + free(lb->lb_str); + lb->lb_str = NULL; + return; + } + } + printf("Can't translate NAME label '%s'\n", lb->lb_str); + return; + } + + if (lb->lb_type == STRING) { + if (isalpha(Py_CHARMASK(lb->lb_str[1])) || + lb->lb_str[1] == '_') { + char *p; + char *src; + char *dest; + size_t name_len; + if (Py_DebugFlag) + printf("Label %s is a keyword\n", lb->lb_str); + lb->lb_type = NAME; + src = lb->lb_str + 1; + p = strchr(src, '\''); + if (p) + name_len = p - src; + else + name_len = strlen(src); + dest = (char *)malloc(name_len + 1); + if (!dest) { + printf("Can't alloc dest '%s'\n", src); + return; + } + strncpy(dest, src, name_len); + dest[name_len] = '\0'; + free(lb->lb_str); + lb->lb_str = dest; + } + else if (lb->lb_str[2] == lb->lb_str[0]) { + int type = (int) PyToken_OneChar(lb->lb_str[1]); + if (type != OP) { + lb->lb_type = type; + free(lb->lb_str); + lb->lb_str = NULL; + } + else + printf("Unknown OP label %s\n", + lb->lb_str); + } + else if (lb->lb_str[2] && lb->lb_str[3] == lb->lb_str[0]) { + int type = (int) PyToken_TwoChars(lb->lb_str[1], + lb->lb_str[2]); + if (type != OP) { + lb->lb_type = type; + free(lb->lb_str); + lb->lb_str = NULL; + } + else + printf("Unknown OP label %s\n", + lb->lb_str); + } + else if (lb->lb_str[2] && lb->lb_str[3] && lb->lb_str[4] == lb->lb_str[0]) { + int type = (int) PyToken_ThreeChars(lb->lb_str[1], + lb->lb_str[2], + lb->lb_str[3]); + if (type != OP) { + lb->lb_type = type; + free(lb->lb_str); + lb->lb_str = NULL; + } + else + printf("Unknown OP label %s\n", + lb->lb_str); + } + else + printf("Can't translate STRING label %s\n", + lb->lb_str); + } + else + printf("Can't translate label '%s'\n", + PyGrammar_LabelRepr(lb)); +} diff --git a/AppPkg/Applications/Python/Python-2.7.2/Parser/grammar1.c b/AppPkg/Applications/Python/Python-2.7.2/Parser/grammar1.c new file mode 100644 index 0000000000..27db22f3f9 --- /dev/null +++ b/AppPkg/Applications/Python/Python-2.7.2/Parser/grammar1.c @@ -0,0 +1,57 @@ + +/* Grammar subroutines needed by parser */ + +#include "Python.h" +#include "pgenheaders.h" +#include "grammar.h" +#include "token.h" + +/* Return the DFA for the given type */ + +dfa * +PyGrammar_FindDFA(grammar *g, register int type) +{ + register dfa *d; +#if 1 + /* Massive speed-up */ + d = &g->g_dfa[type - NT_OFFSET]; + assert(d->d_type == type); + return d; +#else + /* Old, slow version */ + register int i; + + for (i = g->g_ndfas, d = g->g_dfa; --i >= 0; d++) { + if (d->d_type == type) + return d; + } + assert(0); + /* NOTREACHED */ +#endif +} + +char * +PyGrammar_LabelRepr(label *lb) +{ + static char buf[100]; + + if (lb->lb_type == ENDMARKER) + return "EMPTY"; + else if (ISNONTERMINAL(lb->lb_type)) { + if (lb->lb_str == NULL) { + PyOS_snprintf(buf, sizeof(buf), "NT%d", lb->lb_type); + return buf; + } + else + return lb->lb_str; + } + else { + if (lb->lb_str == NULL) + return _PyParser_TokenNames[lb->lb_type]; + else { + PyOS_snprintf(buf, sizeof(buf), "%.32s(%.32s)", + _PyParser_TokenNames[lb->lb_type], lb->lb_str); + return buf; + } + } +} diff --git a/AppPkg/Applications/Python/Python-2.7.2/Parser/intrcheck.c b/AppPkg/Applications/Python/Python-2.7.2/Parser/intrcheck.c new file mode 100644 index 0000000000..bc2b8e4315 --- /dev/null +++ b/AppPkg/Applications/Python/Python-2.7.2/Parser/intrcheck.c @@ -0,0 +1,178 @@ + +/* Check for interrupts */ + +#include "Python.h" +#include "pythread.h" + +#ifdef QUICKWIN + +#include + +void +PyOS_InitInterrupts(void) +{ +} + +void +PyOS_FiniInterrupts(void) +{ +} + +int +PyOS_InterruptOccurred(void) +{ + _wyield(); +} + +#define OK + +#endif /* QUICKWIN */ + +#if defined(_M_IX86) && !defined(__QNX__) +#include +#endif + +#if defined(MSDOS) && !defined(QUICKWIN) + +#ifdef __GNUC__ + +/* This is for DJGPP's GO32 extender. I don't know how to trap + * control-C (There's no API for ctrl-C, and I don't want to mess with + * the interrupt vectors.) However, this DOES catch control-break. + * --Amrit + */ + +#include + +void +PyOS_InitInterrupts(void) +{ + _go32_want_ctrl_break(1 /* TRUE */); +} + +void +PyOS_FiniInterrupts(void) +{ +} + +int +PyOS_InterruptOccurred(void) +{ + return _go32_was_ctrl_break_hit(); +} + +#else /* !__GNUC__ */ + +/* This might work for MS-DOS (untested though): */ + +void +PyOS_InitInterrupts(void) +{ +} + +void +PyOS_FiniInterrupts(void) +{ +} + +int +PyOS_InterruptOccurred(void) +{ + int interrupted = 0; + while (kbhit()) { + if (getch() == '\003') + interrupted = 1; + } + return interrupted; +} + +#endif /* __GNUC__ */ + +#define OK + +#endif /* MSDOS && !QUICKWIN */ + + +#ifndef OK + +/* Default version -- for real operating systems and for Standard C */ + +#include +#include +#include + +static int interrupted; + +void +PyErr_SetInterrupt(void) +{ + interrupted = 1; +} + +extern int PyErr_CheckSignals(void); + +static int +checksignals_witharg(void * arg) +{ + return PyErr_CheckSignals(); +} + +static void +intcatcher(int sig) +{ + extern void Py_Exit(int); + static char message[] = +"python: to interrupt a truly hanging Python program, interrupt once more.\n"; + switch (interrupted++) { + case 0: + break; + case 1: +#ifdef RISCOS + fprintf(stderr, message); +#else + write(2, message, strlen(message)); +#endif + break; + case 2: + interrupted = 0; + Py_Exit(1); + break; + } + PyOS_setsig(SIGINT, intcatcher); + Py_AddPendingCall(checksignals_witharg, NULL); +} + +static void (*old_siginthandler)(int) = SIG_DFL; + +void +PyOS_InitInterrupts(void) +{ + if ((old_siginthandler = PyOS_setsig(SIGINT, SIG_IGN)) != SIG_IGN) + PyOS_setsig(SIGINT, intcatcher); +} + +void +PyOS_FiniInterrupts(void) +{ + PyOS_setsig(SIGINT, old_siginthandler); +} + +int +PyOS_InterruptOccurred(void) +{ + if (!interrupted) + return 0; + interrupted = 0; + return 1; +} + +#endif /* !OK */ + +void +PyOS_AfterFork(void) +{ +#ifdef WITH_THREAD + PyEval_ReInitThreads(); + PyThread_ReInitTLS(); +#endif +} diff --git a/AppPkg/Applications/Python/Python-2.7.2/Parser/listnode.c b/AppPkg/Applications/Python/Python-2.7.2/Parser/listnode.c new file mode 100644 index 0000000000..8d59233599 --- /dev/null +++ b/AppPkg/Applications/Python/Python-2.7.2/Parser/listnode.c @@ -0,0 +1,66 @@ + +/* List a node on a file */ + +#include "pgenheaders.h" +#include "token.h" +#include "node.h" + +/* Forward */ +static void list1node(FILE *, node *); +static void listnode(FILE *, node *); + +void +PyNode_ListTree(node *n) +{ + listnode(stdout, n); +} + +static int level, atbol; + +static void +listnode(FILE *fp, node *n) +{ + level = 0; + atbol = 1; + list1node(fp, n); +} + +static void +list1node(FILE *fp, node *n) +{ + if (n == 0) + return; + if (ISNONTERMINAL(TYPE(n))) { + int i; + for (i = 0; i < NCH(n); i++) + list1node(fp, CHILD(n, i)); + } + else if (ISTERMINAL(TYPE(n))) { + switch (TYPE(n)) { + case INDENT: + ++level; + break; + case DEDENT: + --level; + break; + default: + if (atbol) { + int i; + for (i = 0; i < level; ++i) + fprintf(fp, "\t"); + atbol = 0; + } + if (TYPE(n) == NEWLINE) { + if (STR(n) != NULL) + fprintf(fp, "%s", STR(n)); + fprintf(fp, "\n"); + atbol = 1; + } + else + fprintf(fp, "%s ", STR(n)); + break; + } + } + else + fprintf(fp, "? "); +} diff --git a/AppPkg/Applications/Python/Python-2.7.2/Parser/metagrammar.c b/AppPkg/Applications/Python/Python-2.7.2/Parser/metagrammar.c new file mode 100644 index 0000000000..299ccaa079 --- /dev/null +++ b/AppPkg/Applications/Python/Python-2.7.2/Parser/metagrammar.c @@ -0,0 +1,159 @@ + +#include "pgenheaders.h" +#include "metagrammar.h" +#include "grammar.h" +#include "pgen.h" +static arc arcs_0_0[3] = { + {2, 0}, + {3, 0}, + {4, 1}, +}; +static arc arcs_0_1[1] = { + {0, 1}, +}; +static state states_0[2] = { + {3, arcs_0_0}, + {1, arcs_0_1}, +}; +static arc arcs_1_0[1] = { + {5, 1}, +}; +static arc arcs_1_1[1] = { + {6, 2}, +}; +static arc arcs_1_2[1] = { + {7, 3}, +}; +static arc arcs_1_3[1] = { + {3, 4}, +}; +static arc arcs_1_4[1] = { + {0, 4}, +}; +static state states_1[5] = { + {1, arcs_1_0}, + {1, arcs_1_1}, + {1, arcs_1_2}, + {1, arcs_1_3}, + {1, arcs_1_4}, +}; +static arc arcs_2_0[1] = { + {8, 1}, +}; +static arc arcs_2_1[2] = { + {9, 0}, + {0, 1}, +}; +static state states_2[2] = { + {1, arcs_2_0}, + {2, arcs_2_1}, +}; +static arc arcs_3_0[1] = { + {10, 1}, +}; +static arc arcs_3_1[2] = { + {10, 1}, + {0, 1}, +}; +static state states_3[2] = { + {1, arcs_3_0}, + {2, arcs_3_1}, +}; +static arc arcs_4_0[2] = { + {11, 1}, + {13, 2}, +}; +static arc arcs_4_1[1] = { + {7, 3}, +}; +static arc arcs_4_2[3] = { + {14, 4}, + {15, 4}, + {0, 2}, +}; +static arc arcs_4_3[1] = { + {12, 4}, +}; +static arc arcs_4_4[1] = { + {0, 4}, +}; +static state states_4[5] = { + {2, arcs_4_0}, + {1, arcs_4_1}, + {3, arcs_4_2}, + {1, arcs_4_3}, + {1, arcs_4_4}, +}; +static arc arcs_5_0[3] = { + {5, 1}, + {16, 1}, + {17, 2}, +}; +static arc arcs_5_1[1] = { + {0, 1}, +}; +static arc arcs_5_2[1] = { + {7, 3}, +}; +static arc arcs_5_3[1] = { + {18, 1}, +}; +static state states_5[4] = { + {3, arcs_5_0}, + {1, arcs_5_1}, + {1, arcs_5_2}, + {1, arcs_5_3}, +}; +static dfa dfas[6] = { + {256, "MSTART", 0, 2, states_0, + "\070\000\000"}, + {257, "RULE", 0, 5, states_1, + "\040\000\000"}, + {258, "RHS", 0, 2, states_2, + "\040\010\003"}, + {259, "ALT", 0, 2, states_3, + "\040\010\003"}, + {260, "ITEM", 0, 5, states_4, + "\040\010\003"}, + {261, "ATOM", 0, 4, states_5, + "\040\000\003"}, +}; +static label labels[19] = { + {0, "EMPTY"}, + {256, 0}, + {257, 0}, + {4, 0}, + {0, 0}, + {1, 0}, + {11, 0}, + {258, 0}, + {259, 0}, + {18, 0}, + {260, 0}, + {9, 0}, + {10, 0}, + {261, 0}, + {16, 0}, + {14, 0}, + {3, 0}, + {7, 0}, + {8, 0}, +}; +static grammar _PyParser_Grammar = { + 6, + dfas, + {19, labels}, + 256 +}; + +grammar * +meta_grammar(void) +{ + return &_PyParser_Grammar; +} + +grammar * +Py_meta_grammar(void) +{ + return meta_grammar(); +} diff --git a/AppPkg/Applications/Python/Python-2.7.2/Parser/myreadline.c b/AppPkg/Applications/Python/Python-2.7.2/Parser/myreadline.c new file mode 100644 index 0000000000..05d74a4e4f --- /dev/null +++ b/AppPkg/Applications/Python/Python-2.7.2/Parser/myreadline.c @@ -0,0 +1,221 @@ + +/* Readline interface for tokenizer.c and [raw_]input() in bltinmodule.c. + By default, or when stdin is not a tty device, we have a super + simple my_readline function using fgets. + Optionally, we can use the GNU readline library. + my_readline() has a different return value from GNU readline(): + - NULL if an interrupt occurred or if an error occurred + - a malloc'ed empty string if EOF was read + - a malloc'ed string ending in \n normally +*/ + +#include "Python.h" +#ifdef MS_WINDOWS +#define WIN32_LEAN_AND_MEAN +#include "windows.h" +#endif /* MS_WINDOWS */ + +#ifdef __VMS +extern char* vms__StdioReadline(FILE *sys_stdin, FILE *sys_stdout, char *prompt); +#endif + + +PyThreadState* _PyOS_ReadlineTState; + +#ifdef WITH_THREAD +#include "pythread.h" +static PyThread_type_lock _PyOS_ReadlineLock = NULL; +#endif + +int (*PyOS_InputHook)(void) = NULL; + +#ifdef RISCOS +int Py_RISCOSWimpFlag; +#endif + +/* This function restarts a fgets() after an EINTR error occurred + except if PyOS_InterruptOccurred() returns true. */ + +static int +my_fgets(char *buf, int len, FILE *fp) +{ + char *p; + while (1) { + if (PyOS_InputHook != NULL) + (void)(PyOS_InputHook)(); + errno = 0; + p = fgets(buf, len, fp); + if (p != NULL) + return 0; /* No error */ +#ifdef MS_WINDOWS + /* In the case of a Ctrl+C or some other external event + interrupting the operation: + Win2k/NT: ERROR_OPERATION_ABORTED is the most recent Win32 + error code (and feof() returns TRUE). + Win9x: Ctrl+C seems to have no effect on fgets() returning + early - the signal handler is called, but the fgets() + only returns "normally" (ie, when Enter hit or feof()) + */ + if (GetLastError()==ERROR_OPERATION_ABORTED) { + /* Signals come asynchronously, so we sleep a brief + moment before checking if the handler has been + triggered (we cant just return 1 before the + signal handler has been called, as the later + signal may be treated as a separate interrupt). + */ + Sleep(1); + if (PyOS_InterruptOccurred()) { + return 1; /* Interrupt */ + } + /* Either the sleep wasn't long enough (need a + short loop retrying?) or not interrupted at all + (in which case we should revisit the whole thing!) + Logging some warning would be nice. assert is not + viable as under the debugger, the various dialogs + mean the condition is not true. + */ + } +#endif /* MS_WINDOWS */ + if (feof(fp)) { + clearerr(fp); + return -1; /* EOF */ + } +#ifdef EINTR + if (errno == EINTR) { + int s; +#ifdef WITH_THREAD + PyEval_RestoreThread(_PyOS_ReadlineTState); +#endif + s = PyErr_CheckSignals(); +#ifdef WITH_THREAD + PyEval_SaveThread(); +#endif + if (s < 0) + return 1; + /* try again */ + continue; + } +#endif + if (PyOS_InterruptOccurred()) { + return 1; /* Interrupt */ + } + return -2; /* Error */ + } + /* NOTREACHED */ +} + + +/* Readline implementation using fgets() */ + +char * +PyOS_StdioReadline(FILE *sys_stdin, FILE *sys_stdout, char *prompt) +{ + size_t n; + char *p; + n = 100; + if ((p = (char *)PyMem_MALLOC(n)) == NULL) + return NULL; + fflush(sys_stdout); +#ifndef RISCOS + if (prompt) + fprintf(stderr, "%s", prompt); +#else + if (prompt) { + if(Py_RISCOSWimpFlag) + fprintf(stderr, "\x0cr%s\x0c", prompt); + else + fprintf(stderr, "%s", prompt); + } +#endif + fflush(stderr); + switch (my_fgets(p, (int)n, sys_stdin)) { + case 0: /* Normal case */ + break; + case 1: /* Interrupt */ + PyMem_FREE(p); + return NULL; + case -1: /* EOF */ + case -2: /* Error */ + default: /* Shouldn't happen */ + *p = '\0'; + break; + } + n = strlen(p); + while (n > 0 && p[n-1] != '\n') { + size_t incr = n+2; + p = (char *)PyMem_REALLOC(p, n + incr); + if (p == NULL) + return NULL; + if (incr > INT_MAX) { + PyErr_SetString(PyExc_OverflowError, "input line too long"); + } + if (my_fgets(p+n, (int)incr, sys_stdin) != 0) + break; + n += strlen(p+n); + } + return (char *)PyMem_REALLOC(p, n+1); +} + + +/* By initializing this function pointer, systems embedding Python can + override the readline function. + + Note: Python expects in return a buffer allocated with PyMem_Malloc. */ + +char *(*PyOS_ReadlineFunctionPointer)(FILE *, FILE *, char *); + + +/* Interface used by tokenizer.c and bltinmodule.c */ + +char * +PyOS_Readline(FILE *sys_stdin, FILE *sys_stdout, char *prompt) +{ + char *rv; + + if (_PyOS_ReadlineTState == PyThreadState_GET()) { + PyErr_SetString(PyExc_RuntimeError, + "can't re-enter readline"); + return NULL; + } + + + if (PyOS_ReadlineFunctionPointer == NULL) { +#ifdef __VMS + PyOS_ReadlineFunctionPointer = vms__StdioReadline; +#else + PyOS_ReadlineFunctionPointer = PyOS_StdioReadline; +#endif + } + +#ifdef WITH_THREAD + if (_PyOS_ReadlineLock == NULL) { + _PyOS_ReadlineLock = PyThread_allocate_lock(); + } +#endif + + _PyOS_ReadlineTState = PyThreadState_GET(); + Py_BEGIN_ALLOW_THREADS +#ifdef WITH_THREAD + PyThread_acquire_lock(_PyOS_ReadlineLock, 1); +#endif + + /* This is needed to handle the unlikely case that the + * interpreter is in interactive mode *and* stdin/out are not + * a tty. This can happen, for example if python is run like + * this: python -i < test1.py + */ + if (!isatty (fileno (sys_stdin)) || !isatty (fileno (sys_stdout))) + rv = PyOS_StdioReadline (sys_stdin, sys_stdout, prompt); + else + rv = (*PyOS_ReadlineFunctionPointer)(sys_stdin, sys_stdout, + prompt); + Py_END_ALLOW_THREADS + +#ifdef WITH_THREAD + PyThread_release_lock(_PyOS_ReadlineLock); +#endif + + _PyOS_ReadlineTState = NULL; + + return rv; +} diff --git a/AppPkg/Applications/Python/Python-2.7.2/Parser/node.c b/AppPkg/Applications/Python/Python-2.7.2/Parser/node.c new file mode 100644 index 0000000000..4949b7cb64 --- /dev/null +++ b/AppPkg/Applications/Python/Python-2.7.2/Parser/node.c @@ -0,0 +1,138 @@ +/* Parse tree node implementation */ + +#include "Python.h" +#include "node.h" +#include "errcode.h" + +node * +PyNode_New(int type) +{ + node *n = (node *) PyObject_MALLOC(1 * sizeof(node)); + if (n == NULL) + return NULL; + n->n_type = type; + n->n_str = NULL; + n->n_lineno = 0; + n->n_nchildren = 0; + n->n_child = NULL; + return n; +} + +/* See comments at XXXROUNDUP below. Returns -1 on overflow. */ +static int +fancy_roundup(int n) +{ + /* Round up to the closest power of 2 >= n. */ + int result = 256; + assert(n > 128); + while (result < n) { + result <<= 1; + if (result <= 0) + return -1; + } + return result; +} + +/* A gimmick to make massive numbers of reallocs quicker. The result is + * a number >= the input. In PyNode_AddChild, it's used like so, when + * we're about to add child number current_size + 1: + * + * if XXXROUNDUP(current_size) < XXXROUNDUP(current_size + 1): + * allocate space for XXXROUNDUP(current_size + 1) total children + * else: + * we already have enough space + * + * Since a node starts out empty, we must have + * + * XXXROUNDUP(0) < XXXROUNDUP(1) + * + * so that we allocate space for the first child. One-child nodes are very + * common (presumably that would change if we used a more abstract form + * of syntax tree), so to avoid wasting memory it's desirable that + * XXXROUNDUP(1) == 1. That in turn forces XXXROUNDUP(0) == 0. + * + * Else for 2 <= n <= 128, we round up to the closest multiple of 4. Why 4? + * Rounding up to a multiple of an exact power of 2 is very efficient, and + * most nodes with more than one child have <= 4 kids. + * + * Else we call fancy_roundup() to grow proportionately to n. We've got an + * extreme case then (like test_longexp.py), and on many platforms doing + * anything less than proportional growth leads to exorbitant runtime + * (e.g., MacPython), or extreme fragmentation of user address space (e.g., + * Win98). + * + * In a run of compileall across the 2.3a0 Lib directory, Andrew MacIntyre + * reported that, with this scheme, 89% of PyObject_REALLOC calls in + * PyNode_AddChild passed 1 for the size, and 9% passed 4. So this usually + * wastes very little memory, but is very effective at sidestepping + * platform-realloc disasters on vulnerable platforms. + * + * Note that this would be straightforward if a node stored its current + * capacity. The code is tricky to avoid that. + */ +#define XXXROUNDUP(n) ((n) <= 1 ? (n) : \ + (n) <= 128 ? (((n) + 3) & ~3) : \ + fancy_roundup(n)) + + +int +PyNode_AddChild(register node *n1, int type, char *str, int lineno, int col_offset) +{ + const int nch = n1->n_nchildren; + int current_capacity; + int required_capacity; + node *n; + + if (nch == INT_MAX || nch < 0) + return E_OVERFLOW; + + current_capacity = XXXROUNDUP(nch); + required_capacity = XXXROUNDUP(nch + 1); + if (current_capacity < 0 || required_capacity < 0) + return E_OVERFLOW; + if (current_capacity < required_capacity) { + if (required_capacity > PY_SIZE_MAX / sizeof(node)) { + return E_NOMEM; + } + n = n1->n_child; + n = (node *) PyObject_REALLOC(n, + required_capacity * sizeof(node)); + if (n == NULL) + return E_NOMEM; + n1->n_child = n; + } + + n = &n1->n_child[n1->n_nchildren++]; + n->n_type = type; + n->n_str = str; + n->n_lineno = lineno; + n->n_col_offset = col_offset; + n->n_nchildren = 0; + n->n_child = NULL; + return 0; +} + +/* Forward */ +static void freechildren(node *); + + +void +PyNode_Free(node *n) +{ + if (n != NULL) { + freechildren(n); + PyObject_FREE(n); + } +} + +static void +freechildren(node *n) +{ + int i; + for (i = NCH(n); --i >= 0; ) + freechildren(CHILD(n, i)); + if (n->n_child != NULL) + PyObject_FREE(n->n_child); + if (STR(n) != NULL) + PyObject_FREE(STR(n)); +} diff --git a/AppPkg/Applications/Python/Python-2.7.2/Parser/parser.c b/AppPkg/Applications/Python/Python-2.7.2/Parser/parser.c new file mode 100644 index 0000000000..d98dfaaca5 --- /dev/null +++ b/AppPkg/Applications/Python/Python-2.7.2/Parser/parser.c @@ -0,0 +1,436 @@ + +/* Parser implementation */ + +/* For a description, see the comments at end of this file */ + +/* XXX To do: error recovery */ + +#include "Python.h" +#include "pgenheaders.h" +#include "token.h" +#include "grammar.h" +#include "node.h" +#include "parser.h" +#include "errcode.h" + + +#ifdef Py_DEBUG +extern int Py_DebugFlag; +#define D(x) if (!Py_DebugFlag); else x +#else +#define D(x) +#endif + + +/* STACK DATA TYPE */ + +static void s_reset(stack *); + +static void +s_reset(stack *s) +{ + s->s_top = &s->s_base[MAXSTACK]; +} + +#define s_empty(s) ((s)->s_top == &(s)->s_base[MAXSTACK]) + +static int +s_push(register stack *s, dfa *d, node *parent) +{ + register stackentry *top; + if (s->s_top == s->s_base) { + fprintf(stderr, "s_push: parser stack overflow\n"); + return E_NOMEM; + } + top = --s->s_top; + top->s_dfa = d; + top->s_parent = parent; + top->s_state = 0; + return 0; +} + +#ifdef Py_DEBUG + +static void +s_pop(register stack *s) +{ + if (s_empty(s)) + Py_FatalError("s_pop: parser stack underflow -- FATAL"); + s->s_top++; +} + +#else /* !Py_DEBUG */ + +#define s_pop(s) (s)->s_top++ + +#endif + + +/* PARSER CREATION */ + +parser_state * +PyParser_New(grammar *g, int start) +{ + parser_state *ps; + + if (!g->g_accel) + PyGrammar_AddAccelerators(g); + ps = (parser_state *)PyMem_MALLOC(sizeof(parser_state)); + if (ps == NULL) + return NULL; + ps->p_grammar = g; +#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD + ps->p_flags = 0; +#endif + ps->p_tree = PyNode_New(start); + if (ps->p_tree == NULL) { + PyMem_FREE(ps); + return NULL; + } + s_reset(&ps->p_stack); + (void) s_push(&ps->p_stack, PyGrammar_FindDFA(g, start), ps->p_tree); + return ps; +} + +void +PyParser_Delete(parser_state *ps) +{ + /* NB If you want to save the parse tree, + you must set p_tree to NULL before calling delparser! */ + PyNode_Free(ps->p_tree); + PyMem_FREE(ps); +} + + +/* PARSER STACK OPERATIONS */ + +static int +shift(register stack *s, int type, char *str, int newstate, int lineno, int col_offset) +{ + int err; + assert(!s_empty(s)); + err = PyNode_AddChild(s->s_top->s_parent, type, str, lineno, col_offset); + if (err) + return err; + s->s_top->s_state = newstate; + return 0; +} + +static int +push(register stack *s, int type, dfa *d, int newstate, int lineno, int col_offset) +{ + int err; + register node *n; + n = s->s_top->s_parent; + assert(!s_empty(s)); + err = PyNode_AddChild(n, type, (char *)NULL, lineno, col_offset); + if (err) + return err; + s->s_top->s_state = newstate; + return s_push(s, d, CHILD(n, NCH(n)-1)); +} + + +/* PARSER PROPER */ + +static int +classify(parser_state *ps, int type, char *str) +{ + grammar *g = ps->p_grammar; + register int n = g->g_ll.ll_nlabels; + + if (type == NAME) { + register char *s = str; + register label *l = g->g_ll.ll_label; + register int i; + for (i = n; i > 0; i--, l++) { + if (l->lb_type != NAME || l->lb_str == NULL || + l->lb_str[0] != s[0] || + strcmp(l->lb_str, s) != 0) + continue; +#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD + if (ps->p_flags & CO_FUTURE_PRINT_FUNCTION && + s[0] == 'p' && strcmp(s, "print") == 0) { + break; /* no longer a keyword */ + } +#endif + D(printf("It's a keyword\n")); + return n - i; + } + } + + { + register label *l = g->g_ll.ll_label; + register int i; + for (i = n; i > 0; i--, l++) { + if (l->lb_type == type && l->lb_str == NULL) { + D(printf("It's a token we know\n")); + return n - i; + } + } + } + + D(printf("Illegal token\n")); + return -1; +} + +#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD +static void +future_hack(parser_state *ps) +{ + node *n = ps->p_stack.s_top->s_parent; + node *ch, *cch; + int i; + + /* from __future__ import ..., must have at least 4 children */ + n = CHILD(n, 0); + if (NCH(n) < 4) + return; + ch = CHILD(n, 0); + if (STR(ch) == NULL || strcmp(STR(ch), "from") != 0) + return; + ch = CHILD(n, 1); + if (NCH(ch) == 1 && STR(CHILD(ch, 0)) && + strcmp(STR(CHILD(ch, 0)), "__future__") != 0) + return; + ch = CHILD(n, 3); + /* ch can be a star, a parenthesis or import_as_names */ + if (TYPE(ch) == STAR) + return; + if (TYPE(ch) == LPAR) + ch = CHILD(n, 4); + + for (i = 0; i < NCH(ch); i += 2) { + cch = CHILD(ch, i); + if (NCH(cch) >= 1 && TYPE(CHILD(cch, 0)) == NAME) { + char *str_ch = STR(CHILD(cch, 0)); + if (strcmp(str_ch, FUTURE_WITH_STATEMENT) == 0) { + ps->p_flags |= CO_FUTURE_WITH_STATEMENT; + } else if (strcmp(str_ch, FUTURE_PRINT_FUNCTION) == 0) { + ps->p_flags |= CO_FUTURE_PRINT_FUNCTION; + } else if (strcmp(str_ch, FUTURE_UNICODE_LITERALS) == 0) { + ps->p_flags |= CO_FUTURE_UNICODE_LITERALS; + } + } + } +} +#endif /* future keyword */ + +int +PyParser_AddToken(register parser_state *ps, register int type, char *str, + int lineno, int col_offset, int *expected_ret) +{ + register int ilabel; + int err; + + D(printf("Token %s/'%s' ... ", _PyParser_TokenNames[type], str)); + + /* Find out which label this token is */ + ilabel = classify(ps, type, str); + if (ilabel < 0) + return E_SYNTAX; + + /* Loop until the token is shifted or an error occurred */ + for (;;) { + /* Fetch the current dfa and state */ + register dfa *d = ps->p_stack.s_top->s_dfa; + register state *s = &d->d_state[ps->p_stack.s_top->s_state]; + + D(printf(" DFA '%s', state %d:", + d->d_name, ps->p_stack.s_top->s_state)); + + /* Check accelerator */ + if (s->s_lower <= ilabel && ilabel < s->s_upper) { + register int x = s->s_accel[ilabel - s->s_lower]; + if (x != -1) { + if (x & (1<<7)) { + /* Push non-terminal */ + int nt = (x >> 8) + NT_OFFSET; + int arrow = x & ((1<<7)-1); + dfa *d1 = PyGrammar_FindDFA( + ps->p_grammar, nt); + if ((err = push(&ps->p_stack, nt, d1, + arrow, lineno, col_offset)) > 0) { + D(printf(" MemError: push\n")); + return err; + } + D(printf(" Push ...\n")); + continue; + } + + /* Shift the token */ + if ((err = shift(&ps->p_stack, type, str, + x, lineno, col_offset)) > 0) { + D(printf(" MemError: shift.\n")); + return err; + } + D(printf(" Shift.\n")); + /* Pop while we are in an accept-only state */ + while (s = &d->d_state + [ps->p_stack.s_top->s_state], + s->s_accept && s->s_narcs == 1) { + D(printf(" DFA '%s', state %d: " + "Direct pop.\n", + d->d_name, + ps->p_stack.s_top->s_state)); +#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD + if (d->d_name[0] == 'i' && + strcmp(d->d_name, + "import_stmt") == 0) + future_hack(ps); +#endif + s_pop(&ps->p_stack); + if (s_empty(&ps->p_stack)) { + D(printf(" ACCEPT.\n")); + return E_DONE; + } + d = ps->p_stack.s_top->s_dfa; + } + return E_OK; + } + } + + if (s->s_accept) { +#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD + if (d->d_name[0] == 'i' && + strcmp(d->d_name, "import_stmt") == 0) + future_hack(ps); +#endif + /* Pop this dfa and try again */ + s_pop(&ps->p_stack); + D(printf(" Pop ...\n")); + if (s_empty(&ps->p_stack)) { + D(printf(" Error: bottom of stack.\n")); + return E_SYNTAX; + } + continue; + } + + /* Stuck, report syntax error */ + D(printf(" Error.\n")); + if (expected_ret) { + if (s->s_lower == s->s_upper - 1) { + /* Only one possible expected token */ + *expected_ret = ps->p_grammar-> + g_ll.ll_label[s->s_lower].lb_type; + } + else + *expected_ret = -1; + } + return E_SYNTAX; + } +} + + +#ifdef Py_DEBUG + +/* DEBUG OUTPUT */ + +void +dumptree(grammar *g, node *n) +{ + int i; + + if (n == NULL) + printf("NIL"); + else { + label l; + l.lb_type = TYPE(n); + l.lb_str = STR(n); + printf("%s", PyGrammar_LabelRepr(&l)); + if (ISNONTERMINAL(TYPE(n))) { + printf("("); + for (i = 0; i < NCH(n); i++) { + if (i > 0) + printf(","); + dumptree(g, CHILD(n, i)); + } + printf(")"); + } + } +} + +void +showtree(grammar *g, node *n) +{ + int i; + + if (n == NULL) + return; + if (ISNONTERMINAL(TYPE(n))) { + for (i = 0; i < NCH(n); i++) + showtree(g, CHILD(n, i)); + } + else if (ISTERMINAL(TYPE(n))) { + printf("%s", _PyParser_TokenNames[TYPE(n)]); + if (TYPE(n) == NUMBER || TYPE(n) == NAME) + printf("(%s)", STR(n)); + printf(" "); + } + else + printf("? "); +} + +void +printtree(parser_state *ps) +{ + if (Py_DebugFlag) { + printf("Parse tree:\n"); + dumptree(ps->p_grammar, ps->p_tree); + printf("\n"); + printf("Tokens:\n"); + showtree(ps->p_grammar, ps->p_tree); + printf("\n"); + } + printf("Listing:\n"); + PyNode_ListTree(ps->p_tree); + printf("\n"); +} + +#endif /* Py_DEBUG */ + +/* + +Description +----------- + +The parser's interface is different than usual: the function addtoken() +must be called for each token in the input. This makes it possible to +turn it into an incremental parsing system later. The parsing system +constructs a parse tree as it goes. + +A parsing rule is represented as a Deterministic Finite-state Automaton +(DFA). A node in a DFA represents a state of the parser; an arc represents +a transition. Transitions are either labeled with terminal symbols or +with non-terminals. When the parser decides to follow an arc labeled +with a non-terminal, it is invoked recursively with the DFA representing +the parsing rule for that as its initial state; when that DFA accepts, +the parser that invoked it continues. The parse tree constructed by the +recursively called parser is inserted as a child in the current parse tree. + +The DFA's can be constructed automatically from a more conventional +language description. An extended LL(1) grammar (ELL(1)) is suitable. +Certain restrictions make the parser's life easier: rules that can produce +the empty string should be outlawed (there are other ways to put loops +or optional parts in the language). To avoid the need to construct +FIRST sets, we can require that all but the last alternative of a rule +(really: arc going out of a DFA's state) must begin with a terminal +symbol. + +As an example, consider this grammar: + +expr: term (OP term)* +term: CONSTANT | '(' expr ')' + +The DFA corresponding to the rule for expr is: + +------->.---term-->.-------> + ^ | + | | + \----OP----/ + +The parse tree generated for the input a+b is: + +(expr: (term: (NAME: a)), (OP: +), (term: (NAME: b))) + +*/ diff --git a/AppPkg/Applications/Python/Python-2.7.2/Parser/parser.h b/AppPkg/Applications/Python/Python-2.7.2/Parser/parser.h new file mode 100644 index 0000000000..bc09396769 --- /dev/null +++ b/AppPkg/Applications/Python/Python-2.7.2/Parser/parser.h @@ -0,0 +1,42 @@ +#ifndef Py_PARSER_H +#define Py_PARSER_H +#ifdef __cplusplus +extern "C" { +#endif + + +/* Parser interface */ + +#define MAXSTACK 1500 + +typedef struct { + int s_state; /* State in current DFA */ + dfa *s_dfa; /* Current DFA */ + struct _node *s_parent; /* Where to add next node */ +} stackentry; + +typedef struct { + stackentry *s_top; /* Top entry */ + stackentry s_base[MAXSTACK];/* Array of stack entries */ + /* NB The stack grows down */ +} stack; + +typedef struct { + stack p_stack; /* Stack of parser states */ + grammar *p_grammar; /* Grammar to use */ + node *p_tree; /* Top of parse tree */ +#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD + unsigned long p_flags; /* see co_flags in Include/code.h */ +#endif +} parser_state; + +parser_state *PyParser_New(grammar *g, int start); +void PyParser_Delete(parser_state *ps); +int PyParser_AddToken(parser_state *ps, int type, char *str, int lineno, int col_offset, + int *expected_ret); +void PyGrammar_AddAccelerators(grammar *g); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_PARSER_H */ diff --git a/AppPkg/Applications/Python/Python-2.7.2/Parser/parsetok.c b/AppPkg/Applications/Python/Python-2.7.2/Parser/parsetok.c new file mode 100644 index 0000000000..6446b10a22 --- /dev/null +++ b/AppPkg/Applications/Python/Python-2.7.2/Parser/parsetok.c @@ -0,0 +1,283 @@ + +/* Parser-tokenizer link implementation */ + +#include "pgenheaders.h" +#include "tokenizer.h" +#include "node.h" +#include "grammar.h" +#include "parser.h" +#include "parsetok.h" +#include "errcode.h" +#include "graminit.h" + +int Py_TabcheckFlag; + + +/* Forward */ +static node *parsetok(struct tok_state *, grammar *, int, perrdetail *, int *); +static void initerr(perrdetail *err_ret, const char* filename); + +/* Parse input coming from a string. Return error code, print some errors. */ +node * +PyParser_ParseString(const char *s, grammar *g, int start, perrdetail *err_ret) +{ + return PyParser_ParseStringFlagsFilename(s, NULL, g, start, err_ret, 0); +} + +node * +PyParser_ParseStringFlags(const char *s, grammar *g, int start, + perrdetail *err_ret, int flags) +{ + return PyParser_ParseStringFlagsFilename(s, NULL, + g, start, err_ret, flags); +} + +node * +PyParser_ParseStringFlagsFilename(const char *s, const char *filename, + grammar *g, int start, + perrdetail *err_ret, int flags) +{ + int iflags = flags; + return PyParser_ParseStringFlagsFilenameEx(s, filename, g, start, + err_ret, &iflags); +} + +node * +PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename, + grammar *g, int start, + perrdetail *err_ret, int *flags) +{ + struct tok_state *tok; + + initerr(err_ret, filename); + + if ((tok = PyTokenizer_FromString(s, start == file_input)) == NULL) { + err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM; + return NULL; + } + + tok->filename = filename ? filename : ""; + if (Py_TabcheckFlag || Py_VerboseFlag) { + tok->altwarning = (tok->filename != NULL); + if (Py_TabcheckFlag >= 2) + tok->alterror++; + } + + return parsetok(tok, g, start, err_ret, flags); +} + +/* Parse input coming from a file. Return error code, print some errors. */ + +node * +PyParser_ParseFile(FILE *fp, const char *filename, grammar *g, int start, + char *ps1, char *ps2, perrdetail *err_ret) +{ + return PyParser_ParseFileFlags(fp, filename, g, start, ps1, ps2, + err_ret, 0); +} + +node * +PyParser_ParseFileFlags(FILE *fp, const char *filename, grammar *g, int start, + char *ps1, char *ps2, perrdetail *err_ret, int flags) +{ + int iflags = flags; + return PyParser_ParseFileFlagsEx(fp, filename, g, start, ps1, ps2, err_ret, &iflags); +} + +node * +PyParser_ParseFileFlagsEx(FILE *fp, const char *filename, grammar *g, int start, + char *ps1, char *ps2, perrdetail *err_ret, int *flags) +{ + struct tok_state *tok; + + initerr(err_ret, filename); + + if ((tok = PyTokenizer_FromFile(fp, ps1, ps2)) == NULL) { + err_ret->error = E_NOMEM; + return NULL; + } + tok->filename = filename; + if (Py_TabcheckFlag || Py_VerboseFlag) { + tok->altwarning = (filename != NULL); + if (Py_TabcheckFlag >= 2) + tok->alterror++; + } + + return parsetok(tok, g, start, err_ret, flags); +} + +#if 0 +static char with_msg[] = +"%s:%d: Warning: 'with' will become a reserved keyword in Python 2.6\n"; + +static char as_msg[] = +"%s:%d: Warning: 'as' will become a reserved keyword in Python 2.6\n"; + +static void +warn(const char *msg, const char *filename, int lineno) +{ + if (filename == NULL) + filename = ""; + PySys_WriteStderr(msg, filename, lineno); +} +#endif + +/* Parse input coming from the given tokenizer structure. + Return error code. */ + +static node * +parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret, + int *flags) +{ + parser_state *ps; + node *n; + int started = 0, handling_import = 0, handling_with = 0; + + if ((ps = PyParser_New(g, start)) == NULL) { + fprintf(stderr, "no mem for new parser\n"); + err_ret->error = E_NOMEM; + PyTokenizer_Free(tok); + return NULL; + } +#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD + if (*flags & PyPARSE_PRINT_IS_FUNCTION) { + ps->p_flags |= CO_FUTURE_PRINT_FUNCTION; + } + if (*flags & PyPARSE_UNICODE_LITERALS) { + ps->p_flags |= CO_FUTURE_UNICODE_LITERALS; + } + +#endif + + for (;;) { + char *a, *b; + int type; + size_t len; + char *str; + int col_offset; + + type = PyTokenizer_Get(tok, &a, &b); + if (type == ERRORTOKEN) { + err_ret->error = tok->done; + break; + } + if (type == ENDMARKER && started) { + type = NEWLINE; /* Add an extra newline */ + handling_with = handling_import = 0; + started = 0; + /* Add the right number of dedent tokens, + except if a certain flag is given -- + codeop.py uses this. */ + if (tok->indent && + !(*flags & PyPARSE_DONT_IMPLY_DEDENT)) + { + tok->pendin = -tok->indent; + tok->indent = 0; + } + } + else + started = 1; + len = b - a; /* XXX this may compute NULL - NULL */ + str = (char *) PyObject_MALLOC(len + 1); + if (str == NULL) { + fprintf(stderr, "no mem for next token\n"); + err_ret->error = E_NOMEM; + break; + } + if (len > 0) + strncpy(str, a, len); + str[len] = '\0'; + +#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD +#endif + if (a >= tok->line_start) + col_offset = a - tok->line_start; + else + col_offset = -1; + + if ((err_ret->error = + PyParser_AddToken(ps, (int)type, str, tok->lineno, col_offset, + &(err_ret->expected))) != E_OK) { + if (err_ret->error != E_DONE) { + PyObject_FREE(str); + err_ret->token = type; + } + break; + } + } + + if (err_ret->error == E_DONE) { + n = ps->p_tree; + ps->p_tree = NULL; + } + else + n = NULL; + +#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD + *flags = ps->p_flags; +#endif + PyParser_Delete(ps); + + if (n == NULL) { + if (tok->lineno <= 1 && tok->done == E_EOF) + err_ret->error = E_EOF; + err_ret->lineno = tok->lineno; + if (tok->buf != NULL) { + char *text = NULL; + size_t len; + assert(tok->cur - tok->buf < INT_MAX); + err_ret->offset = (int)(tok->cur - tok->buf); + len = tok->inp - tok->buf; +#ifdef Py_USING_UNICODE + text = PyTokenizer_RestoreEncoding(tok, len, &err_ret->offset); + +#endif + if (text == NULL) { + text = (char *) PyObject_MALLOC(len + 1); + if (text != NULL) { + if (len > 0) + strncpy(text, tok->buf, len); + text[len] = '\0'; + } + } + err_ret->text = text; + } + } else if (tok->encoding != NULL) { + /* 'nodes->n_str' uses PyObject_*, while 'tok->encoding' was + * allocated using PyMem_ + */ + node* r = PyNode_New(encoding_decl); + if (r) + r->n_str = PyObject_MALLOC(strlen(tok->encoding)+1); + if (!r || !r->n_str) { + err_ret->error = E_NOMEM; + if (r) + PyObject_FREE(r); + n = NULL; + goto done; + } + strcpy(r->n_str, tok->encoding); + PyMem_FREE(tok->encoding); + tok->encoding = NULL; + r->n_nchildren = 1; + r->n_child = n; + n = r; + } + +done: + PyTokenizer_Free(tok); + + return n; +} + +static void +initerr(perrdetail *err_ret, const char *filename) +{ + err_ret->error = E_OK; + err_ret->filename = filename; + err_ret->lineno = 0; + err_ret->offset = 0; + err_ret->text = NULL; + err_ret->token = -1; + err_ret->expected = -1; +} diff --git a/AppPkg/Applications/Python/Python-2.7.2/Parser/pgen.c b/AppPkg/Applications/Python/Python-2.7.2/Parser/pgen.c new file mode 100644 index 0000000000..4e36faf53a --- /dev/null +++ b/AppPkg/Applications/Python/Python-2.7.2/Parser/pgen.c @@ -0,0 +1,708 @@ +/* Parser generator */ + +/* For a description, see the comments at end of this file */ + +#include "Python.h" +#include "pgenheaders.h" +#include "token.h" +#include "node.h" +#include "grammar.h" +#include "metagrammar.h" +#include "pgen.h" + +extern int Py_DebugFlag; +extern int Py_IgnoreEnvironmentFlag; /* needed by Py_GETENV */ + + +/* PART ONE -- CONSTRUCT NFA -- Cf. Algorithm 3.2 from [Aho&Ullman 77] */ + +typedef struct _nfaarc { + int ar_label; + int ar_arrow; +} nfaarc; + +typedef struct _nfastate { + int st_narcs; + nfaarc *st_arc; +} nfastate; + +typedef struct _nfa { + int nf_type; + char *nf_name; + int nf_nstates; + nfastate *nf_state; + int nf_start, nf_finish; +} nfa; + +/* Forward */ +static void compile_rhs(labellist *ll, + nfa *nf, node *n, int *pa, int *pb); +static void compile_alt(labellist *ll, + nfa *nf, node *n, int *pa, int *pb); +static void compile_item(labellist *ll, + nfa *nf, node *n, int *pa, int *pb); +static void compile_atom(labellist *ll, + nfa *nf, node *n, int *pa, int *pb); + +static int +addnfastate(nfa *nf) +{ + nfastate *st; + + nf->nf_state = (nfastate *)PyObject_REALLOC(nf->nf_state, + sizeof(nfastate) * (nf->nf_nstates + 1)); + if (nf->nf_state == NULL) + Py_FatalError("out of mem"); + st = &nf->nf_state[nf->nf_nstates++]; + st->st_narcs = 0; + st->st_arc = NULL; + return st - nf->nf_state; +} + +static void +addnfaarc(nfa *nf, int from, int to, int lbl) +{ + nfastate *st; + nfaarc *ar; + + st = &nf->nf_state[from]; + st->st_arc = (nfaarc *)PyObject_REALLOC(st->st_arc, + sizeof(nfaarc) * (st->st_narcs + 1)); + if (st->st_arc == NULL) + Py_FatalError("out of mem"); + ar = &st->st_arc[st->st_narcs++]; + ar->ar_label = lbl; + ar->ar_arrow = to; +} + +static nfa * +newnfa(char *name) +{ + nfa *nf; + static int type = NT_OFFSET; /* All types will be disjunct */ + + nf = (nfa *)PyObject_MALLOC(sizeof(nfa)); + if (nf == NULL) + Py_FatalError("no mem for new nfa"); + nf->nf_type = type++; + nf->nf_name = name; /* XXX strdup(name) ??? */ + nf->nf_nstates = 0; + nf->nf_state = NULL; + nf->nf_start = nf->nf_finish = -1; + return nf; +} + +typedef struct _nfagrammar { + int gr_nnfas; + nfa **gr_nfa; + labellist gr_ll; +} nfagrammar; + +/* Forward */ +static void compile_rule(nfagrammar *gr, node *n); + +static nfagrammar * +newnfagrammar(void) +{ + nfagrammar *gr; + + gr = (nfagrammar *)PyObject_MALLOC(sizeof(nfagrammar)); + if (gr == NULL) + Py_FatalError("no mem for new nfa grammar"); + gr->gr_nnfas = 0; + gr->gr_nfa = NULL; + gr->gr_ll.ll_nlabels = 0; + gr->gr_ll.ll_label = NULL; + addlabel(&gr->gr_ll, ENDMARKER, "EMPTY"); + return gr; +} + +static nfa * +addnfa(nfagrammar *gr, char *name) +{ + nfa *nf; + + nf = newnfa(name); + gr->gr_nfa = (nfa **)PyObject_REALLOC(gr->gr_nfa, + sizeof(nfa*) * (gr->gr_nnfas + 1)); + if (gr->gr_nfa == NULL) + Py_FatalError("out of mem"); + gr->gr_nfa[gr->gr_nnfas++] = nf; + addlabel(&gr->gr_ll, NAME, nf->nf_name); + return nf; +} + +#ifdef Py_DEBUG + +static char REQNFMT[] = "metacompile: less than %d children\n"; + +#define REQN(i, count) \ + if (i < count) { \ + fprintf(stderr, REQNFMT, count); \ + Py_FatalError("REQN"); \ + } else + +#else +#define REQN(i, count) /* empty */ +#endif + +static nfagrammar * +metacompile(node *n) +{ + nfagrammar *gr; + int i; + + if (Py_DebugFlag) + printf("Compiling (meta-) parse tree into NFA grammar\n"); + gr = newnfagrammar(); + REQ(n, MSTART); + i = n->n_nchildren - 1; /* Last child is ENDMARKER */ + n = n->n_child; + for (; --i >= 0; n++) { + if (n->n_type != NEWLINE) + compile_rule(gr, n); + } + return gr; +} + +static void +compile_rule(nfagrammar *gr, node *n) +{ + nfa *nf; + + REQ(n, RULE); + REQN(n->n_nchildren, 4); + n = n->n_child; + REQ(n, NAME); + nf = addnfa(gr, n->n_str); + n++; + REQ(n, COLON); + n++; + REQ(n, RHS); + compile_rhs(&gr->gr_ll, nf, n, &nf->nf_start, &nf->nf_finish); + n++; + REQ(n, NEWLINE); +} + +static void +compile_rhs(labellist *ll, nfa *nf, node *n, int *pa, int *pb) +{ + int i; + int a, b; + + REQ(n, RHS); + i = n->n_nchildren; + REQN(i, 1); + n = n->n_child; + REQ(n, ALT); + compile_alt(ll, nf, n, pa, pb); + if (--i <= 0) + return; + n++; + a = *pa; + b = *pb; + *pa = addnfastate(nf); + *pb = addnfastate(nf); + addnfaarc(nf, *pa, a, EMPTY); + addnfaarc(nf, b, *pb, EMPTY); + for (; --i >= 0; n++) { + REQ(n, VBAR); + REQN(i, 1); + --i; + n++; + REQ(n, ALT); + compile_alt(ll, nf, n, &a, &b); + addnfaarc(nf, *pa, a, EMPTY); + addnfaarc(nf, b, *pb, EMPTY); + } +} + +static void +compile_alt(labellist *ll, nfa *nf, node *n, int *pa, int *pb) +{ + int i; + int a, b; + + REQ(n, ALT); + i = n->n_nchildren; + REQN(i, 1); + n = n->n_child; + REQ(n, ITEM); + compile_item(ll, nf, n, pa, pb); + --i; + n++; + for (; --i >= 0; n++) { + REQ(n, ITEM); + compile_item(ll, nf, n, &a, &b); + addnfaarc(nf, *pb, a, EMPTY); + *pb = b; + } +} + +static void +compile_item(labellist *ll, nfa *nf, node *n, int *pa, int *pb) +{ + int i; + int a, b; + + REQ(n, ITEM); + i = n->n_nchildren; + REQN(i, 1); + n = n->n_child; + if (n->n_type == LSQB) { + REQN(i, 3); + n++; + REQ(n, RHS); + *pa = addnfastate(nf); + *pb = addnfastate(nf); + addnfaarc(nf, *pa, *pb, EMPTY); + compile_rhs(ll, nf, n, &a, &b); + addnfaarc(nf, *pa, a, EMPTY); + addnfaarc(nf, b, *pb, EMPTY); + REQN(i, 1); + n++; + REQ(n, RSQB); + } + else { + compile_atom(ll, nf, n, pa, pb); + if (--i <= 0) + return; + n++; + addnfaarc(nf, *pb, *pa, EMPTY); + if (n->n_type == STAR) + *pb = *pa; + else + REQ(n, PLUS); + } +} + +static void +compile_atom(labellist *ll, nfa *nf, node *n, int *pa, int *pb) +{ + int i; + + REQ(n, ATOM); + i = n->n_nchildren; + REQN(i, 1); + n = n->n_child; + if (n->n_type == LPAR) { + REQN(i, 3); + n++; + REQ(n, RHS); + compile_rhs(ll, nf, n, pa, pb); + n++; + REQ(n, RPAR); + } + else if (n->n_type == NAME || n->n_type == STRING) { + *pa = addnfastate(nf); + *pb = addnfastate(nf); + addnfaarc(nf, *pa, *pb, addlabel(ll, n->n_type, n->n_str)); + } + else + REQ(n, NAME); +} + +static void +dumpstate(labellist *ll, nfa *nf, int istate) +{ + nfastate *st; + int i; + nfaarc *ar; + + printf("%c%2d%c", + istate == nf->nf_start ? '*' : ' ', + istate, + istate == nf->nf_finish ? '.' : ' '); + st = &nf->nf_state[istate]; + ar = st->st_arc; + for (i = 0; i < st->st_narcs; i++) { + if (i > 0) + printf("\n "); + printf("-> %2d %s", ar->ar_arrow, + PyGrammar_LabelRepr(&ll->ll_label[ar->ar_label])); + ar++; + } + printf("\n"); +} + +static void +dumpnfa(labellist *ll, nfa *nf) +{ + int i; + + printf("NFA '%s' has %d states; start %d, finish %d\n", + nf->nf_name, nf->nf_nstates, nf->nf_start, nf->nf_finish); + for (i = 0; i < nf->nf_nstates; i++) + dumpstate(ll, nf, i); +} + + +/* PART TWO -- CONSTRUCT DFA -- Algorithm 3.1 from [Aho&Ullman 77] */ + +static void +addclosure(bitset ss, nfa *nf, int istate) +{ + if (addbit(ss, istate)) { + nfastate *st = &nf->nf_state[istate]; + nfaarc *ar = st->st_arc; + int i; + + for (i = st->st_narcs; --i >= 0; ) { + if (ar->ar_label == EMPTY) + addclosure(ss, nf, ar->ar_arrow); + ar++; + } + } +} + +typedef struct _ss_arc { + bitset sa_bitset; + int sa_arrow; + int sa_label; +} ss_arc; + +typedef struct _ss_state { + bitset ss_ss; + int ss_narcs; + struct _ss_arc *ss_arc; + int ss_deleted; + int ss_finish; + int ss_rename; +} ss_state; + +typedef struct _ss_dfa { + int sd_nstates; + ss_state *sd_state; +} ss_dfa; + +/* Forward */ +static void printssdfa(int xx_nstates, ss_state *xx_state, int nbits, + labellist *ll, char *msg); +static void simplify(int xx_nstates, ss_state *xx_state); +static void convert(dfa *d, int xx_nstates, ss_state *xx_state); + +static void +makedfa(nfagrammar *gr, nfa *nf, dfa *d) +{ + int nbits = nf->nf_nstates; + bitset ss; + int xx_nstates; + ss_state *xx_state, *yy; + ss_arc *zz; + int istate, jstate, iarc, jarc, ibit; + nfastate *st; + nfaarc *ar; + + ss = newbitset(nbits); + addclosure(ss, nf, nf->nf_start); + xx_state = (ss_state *)PyObject_MALLOC(sizeof(ss_state)); + if (xx_state == NULL) + Py_FatalError("no mem for xx_state in makedfa"); + xx_nstates = 1; + yy = &xx_state[0]; + yy->ss_ss = ss; + yy->ss_narcs = 0; + yy->ss_arc = NULL; + yy->ss_deleted = 0; + yy->ss_finish = testbit(ss, nf->nf_finish); + if (yy->ss_finish) + printf("Error: nonterminal '%s' may produce empty.\n", + nf->nf_name); + + /* This algorithm is from a book written before + the invention of structured programming... */ + + /* For each unmarked state... */ + for (istate = 0; istate < xx_nstates; ++istate) { + size_t size; + yy = &xx_state[istate]; + ss = yy->ss_ss; + /* For all its states... */ + for (ibit = 0; ibit < nf->nf_nstates; ++ibit) { + if (!testbit(ss, ibit)) + continue; + st = &nf->nf_state[ibit]; + /* For all non-empty arcs from this state... */ + for (iarc = 0; iarc < st->st_narcs; iarc++) { + ar = &st->st_arc[iarc]; + if (ar->ar_label == EMPTY) + continue; + /* Look up in list of arcs from this state */ + for (jarc = 0; jarc < yy->ss_narcs; ++jarc) { + zz = &yy->ss_arc[jarc]; + if (ar->ar_label == zz->sa_label) + goto found; + } + /* Add new arc for this state */ + size = sizeof(ss_arc) * (yy->ss_narcs + 1); + yy->ss_arc = (ss_arc *)PyObject_REALLOC( + yy->ss_arc, size); + if (yy->ss_arc == NULL) + Py_FatalError("out of mem"); + zz = &yy->ss_arc[yy->ss_narcs++]; + zz->sa_label = ar->ar_label; + zz->sa_bitset = newbitset(nbits); + zz->sa_arrow = -1; + found: ; + /* Add destination */ + addclosure(zz->sa_bitset, nf, ar->ar_arrow); + } + } + /* Now look up all the arrow states */ + for (jarc = 0; jarc < xx_state[istate].ss_narcs; jarc++) { + zz = &xx_state[istate].ss_arc[jarc]; + for (jstate = 0; jstate < xx_nstates; jstate++) { + if (samebitset(zz->sa_bitset, + xx_state[jstate].ss_ss, nbits)) { + zz->sa_arrow = jstate; + goto done; + } + } + size = sizeof(ss_state) * (xx_nstates + 1); + xx_state = (ss_state *)PyObject_REALLOC(xx_state, + size); + if (xx_state == NULL) + Py_FatalError("out of mem"); + zz->sa_arrow = xx_nstates; + yy = &xx_state[xx_nstates++]; + yy->ss_ss = zz->sa_bitset; + yy->ss_narcs = 0; + yy->ss_arc = NULL; + yy->ss_deleted = 0; + yy->ss_finish = testbit(yy->ss_ss, nf->nf_finish); + done: ; + } + } + + if (Py_DebugFlag) + printssdfa(xx_nstates, xx_state, nbits, &gr->gr_ll, + "before minimizing"); + + simplify(xx_nstates, xx_state); + + if (Py_DebugFlag) + printssdfa(xx_nstates, xx_state, nbits, &gr->gr_ll, + "after minimizing"); + + convert(d, xx_nstates, xx_state); + + /* XXX cleanup */ + PyObject_FREE(xx_state); +} + +static void +printssdfa(int xx_nstates, ss_state *xx_state, int nbits, + labellist *ll, char *msg) +{ + int i, ibit, iarc; + ss_state *yy; + ss_arc *zz; + + printf("Subset DFA %s\n", msg); + for (i = 0; i < xx_nstates; i++) { + yy = &xx_state[i]; + if (yy->ss_deleted) + continue; + printf(" Subset %d", i); + if (yy->ss_finish) + printf(" (finish)"); + printf(" { "); + for (ibit = 0; ibit < nbits; ibit++) { + if (testbit(yy->ss_ss, ibit)) + printf("%d ", ibit); + } + printf("}\n"); + for (iarc = 0; iarc < yy->ss_narcs; iarc++) { + zz = &yy->ss_arc[iarc]; + printf(" Arc to state %d, label %s\n", + zz->sa_arrow, + PyGrammar_LabelRepr( + &ll->ll_label[zz->sa_label])); + } + } +} + + +/* PART THREE -- SIMPLIFY DFA */ + +/* Simplify the DFA by repeatedly eliminating states that are + equivalent to another oner. This is NOT Algorithm 3.3 from + [Aho&Ullman 77]. It does not always finds the minimal DFA, + but it does usually make a much smaller one... (For an example + of sub-optimal behavior, try S: x a b+ | y a b+.) +*/ + +static int +samestate(ss_state *s1, ss_state *s2) +{ + int i; + + if (s1->ss_narcs != s2->ss_narcs || s1->ss_finish != s2->ss_finish) + return 0; + for (i = 0; i < s1->ss_narcs; i++) { + if (s1->ss_arc[i].sa_arrow != s2->ss_arc[i].sa_arrow || + s1->ss_arc[i].sa_label != s2->ss_arc[i].sa_label) + return 0; + } + return 1; +} + +static void +renamestates(int xx_nstates, ss_state *xx_state, int from, int to) +{ + int i, j; + + if (Py_DebugFlag) + printf("Rename state %d to %d.\n", from, to); + for (i = 0; i < xx_nstates; i++) { + if (xx_state[i].ss_deleted) + continue; + for (j = 0; j < xx_state[i].ss_narcs; j++) { + if (xx_state[i].ss_arc[j].sa_arrow == from) + xx_state[i].ss_arc[j].sa_arrow = to; + } + } +} + +static void +simplify(int xx_nstates, ss_state *xx_state) +{ + int changes; + int i, j; + + do { + changes = 0; + for (i = 1; i < xx_nstates; i++) { + if (xx_state[i].ss_deleted) + continue; + for (j = 0; j < i; j++) { + if (xx_state[j].ss_deleted) + continue; + if (samestate(&xx_state[i], &xx_state[j])) { + xx_state[i].ss_deleted++; + renamestates(xx_nstates, xx_state, + i, j); + changes++; + break; + } + } + } + } while (changes); +} + + +/* PART FOUR -- GENERATE PARSING TABLES */ + +/* Convert the DFA into a grammar that can be used by our parser */ + +static void +convert(dfa *d, int xx_nstates, ss_state *xx_state) +{ + int i, j; + ss_state *yy; + ss_arc *zz; + + for (i = 0; i < xx_nstates; i++) { + yy = &xx_state[i]; + if (yy->ss_deleted) + continue; + yy->ss_rename = addstate(d); + } + + for (i = 0; i < xx_nstates; i++) { + yy = &xx_state[i]; + if (yy->ss_deleted) + continue; + for (j = 0; j < yy->ss_narcs; j++) { + zz = &yy->ss_arc[j]; + addarc(d, yy->ss_rename, + xx_state[zz->sa_arrow].ss_rename, + zz->sa_label); + } + if (yy->ss_finish) + addarc(d, yy->ss_rename, yy->ss_rename, 0); + } + + d->d_initial = 0; +} + + +/* PART FIVE -- GLUE IT ALL TOGETHER */ + +static grammar * +maketables(nfagrammar *gr) +{ + int i; + nfa *nf; + dfa *d; + grammar *g; + + if (gr->gr_nnfas == 0) + return NULL; + g = newgrammar(gr->gr_nfa[0]->nf_type); + /* XXX first rule must be start rule */ + g->g_ll = gr->gr_ll; + + for (i = 0; i < gr->gr_nnfas; i++) { + nf = gr->gr_nfa[i]; + if (Py_DebugFlag) { + printf("Dump of NFA for '%s' ...\n", nf->nf_name); + dumpnfa(&gr->gr_ll, nf); + printf("Making DFA for '%s' ...\n", nf->nf_name); + } + d = adddfa(g, nf->nf_type, nf->nf_name); + makedfa(gr, gr->gr_nfa[i], d); + } + + return g; +} + +grammar * +pgen(node *n) +{ + nfagrammar *gr; + grammar *g; + + gr = metacompile(n); + g = maketables(gr); + translatelabels(g); + addfirstsets(g); + PyObject_FREE(gr); + return g; +} + +grammar * +Py_pgen(node *n) +{ + return pgen(n); +} + +/* + +Description +----------- + +Input is a grammar in extended BNF (using * for repetition, + for +at-least-once repetition, [] for optional parts, | for alternatives and +() for grouping). This has already been parsed and turned into a parse +tree. + +Each rule is considered as a regular expression in its own right. +It is turned into a Non-deterministic Finite Automaton (NFA), which +is then turned into a Deterministic Finite Automaton (DFA), which is then +optimized to reduce the number of states. See [Aho&Ullman 77] chapter 3, +or similar compiler books (this technique is more often used for lexical +analyzers). + +The DFA's are used by the parser as parsing tables in a special way +that's probably unique. Before they are usable, the FIRST sets of all +non-terminals are computed. + +Reference +--------- + +[Aho&Ullman 77] + Aho&Ullman, Principles of Compiler Design, Addison-Wesley 1977 + (first edition) + +*/ diff --git a/AppPkg/Applications/Python/Python-2.7.2/Parser/pgenmain.c b/AppPkg/Applications/Python/Python-2.7.2/Parser/pgenmain.c new file mode 100644 index 0000000000..0dc5556d9b --- /dev/null +++ b/AppPkg/Applications/Python/Python-2.7.2/Parser/pgenmain.c @@ -0,0 +1,173 @@ + +/* Parser generator main program */ + +/* This expects a filename containing the grammar as argv[1] (UNIX) + or asks the console for such a file name (THINK C). + It writes its output on two files in the current directory: + - "graminit.c" gets the grammar as a bunch of initialized data + - "graminit.h" gets the grammar's non-terminals as #defines. + Error messages and status info during the generation process are + written to stdout, or sometimes to stderr. */ + +/* XXX TO DO: + - check for duplicate definitions of names (instead of fatal err) +*/ + +#include "Python.h" +#include "pgenheaders.h" +#include "grammar.h" +#include "node.h" +#include "parsetok.h" +#include "pgen.h" + +int Py_DebugFlag; +int Py_VerboseFlag; +int Py_IgnoreEnvironmentFlag; + +/* Forward */ +grammar *getgrammar(char *filename); + +void +Py_Exit(int sts) +{ + exit(sts); +} + +int +main(int argc, char **argv) +{ + grammar *g; + FILE *fp; + char *filename, *graminit_h, *graminit_c; + + if (argc != 4) { + fprintf(stderr, + "usage: %s grammar graminit.h graminit.c\n", argv[0]); + Py_Exit(2); + } + filename = argv[1]; + graminit_h = argv[2]; + graminit_c = argv[3]; + g = getgrammar(filename); + fp = fopen(graminit_c, "w"); + if (fp == NULL) { + perror(graminit_c); + Py_Exit(1); + } + if (Py_DebugFlag) + printf("Writing %s ...\n", graminit_c); + printgrammar(g, fp); + fclose(fp); + fp = fopen(graminit_h, "w"); + if (fp == NULL) { + perror(graminit_h); + Py_Exit(1); + } + if (Py_DebugFlag) + printf("Writing %s ...\n", graminit_h); + printnonterminals(g, fp); + fclose(fp); + Py_Exit(0); + return 0; /* Make gcc -Wall happy */ +} + +grammar * +getgrammar(char *filename) +{ + FILE *fp; + node *n; + grammar *g0, *g; + perrdetail err; + + fp = fopen(filename, "r"); + if (fp == NULL) { + perror(filename); + Py_Exit(1); + } + g0 = meta_grammar(); + n = PyParser_ParseFile(fp, filename, g0, g0->g_start, + (char *)NULL, (char *)NULL, &err); + fclose(fp); + if (n == NULL) { + fprintf(stderr, "Parsing error %d, line %d.\n", + err.error, err.lineno); + if (err.text != NULL) { + size_t i; + fprintf(stderr, "%s", err.text); + i = strlen(err.text); + if (i == 0 || err.text[i-1] != '\n') + fprintf(stderr, "\n"); + for (i = 0; i < err.offset; i++) { + if (err.text[i] == '\t') + putc('\t', stderr); + else + putc(' ', stderr); + } + fprintf(stderr, "^\n"); + PyObject_FREE(err.text); + } + Py_Exit(1); + } + g = pgen(n); + if (g == NULL) { + printf("Bad grammar.\n"); + Py_Exit(1); + } + return g; +} + +/* Can't happen in pgen */ +PyObject* +PyErr_Occurred() +{ + return 0; +} + +void +Py_FatalError(const char *msg) +{ + fprintf(stderr, "pgen: FATAL ERROR: %s\n", msg); + Py_Exit(1); +} + +/* No-nonsense my_readline() for tokenizer.c */ + +char * +PyOS_Readline(FILE *sys_stdin, FILE *sys_stdout, char *prompt) +{ + size_t n = 1000; + char *p = (char *)PyMem_MALLOC(n); + char *q; + if (p == NULL) + return NULL; + fprintf(stderr, "%s", prompt); + q = fgets(p, n, sys_stdin); + if (q == NULL) { + *p = '\0'; + return p; + } + n = strlen(p); + if (n > 0 && p[n-1] != '\n') + p[n-1] = '\n'; + return (char *)PyMem_REALLOC(p, n+1); +} + +/* No-nonsense fgets */ +char * +Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj) +{ + return fgets(buf, n, stream); +} + + +#include + +void +PySys_WriteStderr(const char *format, ...) +{ + va_list va; + + va_start(va, format); + vfprintf(stderr, format, va); + va_end(va); +} diff --git a/AppPkg/Applications/Python/Python-2.7.2/Parser/printgrammar.c b/AppPkg/Applications/Python/Python-2.7.2/Parser/printgrammar.c new file mode 100644 index 0000000000..d92ccc37a1 --- /dev/null +++ b/AppPkg/Applications/Python/Python-2.7.2/Parser/printgrammar.c @@ -0,0 +1,117 @@ + +/* Print a bunch of C initializers that represent a grammar */ + +#include "pgenheaders.h" +#include "grammar.h" + +/* Forward */ +static void printarcs(int, dfa *, FILE *); +static void printstates(grammar *, FILE *); +static void printdfas(grammar *, FILE *); +static void printlabels(grammar *, FILE *); + +void +printgrammar(grammar *g, FILE *fp) +{ + fprintf(fp, "/* Generated by Parser/pgen */\n\n"); + fprintf(fp, "#include \"pgenheaders.h\"\n"); + fprintf(fp, "#include \"grammar.h\"\n"); + fprintf(fp, "PyAPI_DATA(grammar) _PyParser_Grammar;\n"); + printdfas(g, fp); + printlabels(g, fp); + fprintf(fp, "grammar _PyParser_Grammar = {\n"); + fprintf(fp, " %d,\n", g->g_ndfas); + fprintf(fp, " dfas,\n"); + fprintf(fp, " {%d, labels},\n", g->g_ll.ll_nlabels); + fprintf(fp, " %d\n", g->g_start); + fprintf(fp, "};\n"); +} + +void +printnonterminals(grammar *g, FILE *fp) +{ + dfa *d; + int i; + + fprintf(fp, "/* Generated by Parser/pgen */\n\n"); + + d = g->g_dfa; + for (i = g->g_ndfas; --i >= 0; d++) + fprintf(fp, "#define %s %d\n", d->d_name, d->d_type); +} + +static void +printarcs(int i, dfa *d, FILE *fp) +{ + arc *a; + state *s; + int j, k; + + s = d->d_state; + for (j = 0; j < d->d_nstates; j++, s++) { + fprintf(fp, "static arc arcs_%d_%d[%d] = {\n", + i, j, s->s_narcs); + a = s->s_arc; + for (k = 0; k < s->s_narcs; k++, a++) + fprintf(fp, " {%d, %d},\n", a->a_lbl, a->a_arrow); + fprintf(fp, "};\n"); + } +} + +static void +printstates(grammar *g, FILE *fp) +{ + state *s; + dfa *d; + int i, j; + + d = g->g_dfa; + for (i = 0; i < g->g_ndfas; i++, d++) { + printarcs(i, d, fp); + fprintf(fp, "static state states_%d[%d] = {\n", + i, d->d_nstates); + s = d->d_state; + for (j = 0; j < d->d_nstates; j++, s++) + fprintf(fp, " {%d, arcs_%d_%d},\n", + s->s_narcs, i, j); + fprintf(fp, "};\n"); + } +} + +static void +printdfas(grammar *g, FILE *fp) +{ + dfa *d; + int i, j; + + printstates(g, fp); + fprintf(fp, "static dfa dfas[%d] = {\n", g->g_ndfas); + d = g->g_dfa; + for (i = 0; i < g->g_ndfas; i++, d++) { + fprintf(fp, " {%d, \"%s\", %d, %d, states_%d,\n", + d->d_type, d->d_name, d->d_initial, d->d_nstates, i); + fprintf(fp, " \""); + for (j = 0; j < NBYTES(g->g_ll.ll_nlabels); j++) + fprintf(fp, "\\%03o", d->d_first[j] & 0xff); + fprintf(fp, "\"},\n"); + } + fprintf(fp, "};\n"); +} + +static void +printlabels(grammar *g, FILE *fp) +{ + label *l; + int i; + + fprintf(fp, "static label labels[%d] = {\n", g->g_ll.ll_nlabels); + l = g->g_ll.ll_label; + for (i = g->g_ll.ll_nlabels; --i >= 0; l++) { + if (l->lb_str == NULL) + fprintf(fp, " {%d, 0},\n", l->lb_type); + else + fprintf(fp, " {%d, \"%s\"},\n", + l->lb_type, l->lb_str); + } + fprintf(fp, "};\n"); +} diff --git a/AppPkg/Applications/Python/Python-2.7.2/Parser/spark.py b/AppPkg/Applications/Python/Python-2.7.2/Parser/spark.py new file mode 100644 index 0000000000..e9911297d7 --- /dev/null +++ b/AppPkg/Applications/Python/Python-2.7.2/Parser/spark.py @@ -0,0 +1,839 @@ +# Copyright (c) 1998-2002 John Aycock +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +__version__ = 'SPARK-0.7 (pre-alpha-5)' + +import re +import string + +def _namelist(instance): + namelist, namedict, classlist = [], {}, [instance.__class__] + for c in classlist: + for b in c.__bases__: + classlist.append(b) + for name in c.__dict__.keys(): + if not namedict.has_key(name): + namelist.append(name) + namedict[name] = 1 + return namelist + +class GenericScanner: + def __init__(self, flags=0): + pattern = self.reflect() + self.re = re.compile(pattern, re.VERBOSE|flags) + + self.index2func = {} + for name, number in self.re.groupindex.items(): + self.index2func[number-1] = getattr(self, 't_' + name) + + def makeRE(self, name): + doc = getattr(self, name).__doc__ + rv = '(?P<%s>%s)' % (name[2:], doc) + return rv + + def reflect(self): + rv = [] + for name in _namelist(self): + if name[:2] == 't_' and name != 't_default': + rv.append(self.makeRE(name)) + + rv.append(self.makeRE('t_default')) + return string.join(rv, '|') + + def error(self, s, pos): + print "Lexical error at position %s" % pos + raise SystemExit + + def tokenize(self, s): + pos = 0 + n = len(s) + while pos < n: + m = self.re.match(s, pos) + if m is None: + self.error(s, pos) + + groups = m.groups() + for i in range(len(groups)): + if groups[i] and self.index2func.has_key(i): + self.index2func[i](groups[i]) + pos = m.end() + + def t_default(self, s): + r'( . | \n )+' + print "Specification error: unmatched input" + raise SystemExit + +# +# Extracted from GenericParser and made global so that [un]picking works. +# +class _State: + def __init__(self, stateno, items): + self.T, self.complete, self.items = [], [], items + self.stateno = stateno + +class GenericParser: + # + # An Earley parser, as per J. Earley, "An Efficient Context-Free + # Parsing Algorithm", CACM 13(2), pp. 94-102. Also J. C. Earley, + # "An Efficient Context-Free Parsing Algorithm", Ph.D. thesis, + # Carnegie-Mellon University, August 1968. New formulation of + # the parser according to J. Aycock, "Practical Earley Parsing + # and the SPARK Toolkit", Ph.D. thesis, University of Victoria, + # 2001, and J. Aycock and R. N. Horspool, "Practical Earley + # Parsing", unpublished paper, 2001. + # + + def __init__(self, start): + self.rules = {} + self.rule2func = {} + self.rule2name = {} + self.collectRules() + self.augment(start) + self.ruleschanged = 1 + + _NULLABLE = '\e_' + _START = 'START' + _BOF = '|-' + + # + # When pickling, take the time to generate the full state machine; + # some information is then extraneous, too. Unfortunately we + # can't save the rule2func map. + # + def __getstate__(self): + if self.ruleschanged: + # + # XXX - duplicated from parse() + # + self.computeNull() + self.newrules = {} + self.new2old = {} + self.makeNewRules() + self.ruleschanged = 0 + self.edges, self.cores = {}, {} + self.states = { 0: self.makeState0() } + self.makeState(0, self._BOF) + # + # XXX - should find a better way to do this.. + # + changes = 1 + while changes: + changes = 0 + for k, v in self.edges.items(): + if v is None: + state, sym = k + if self.states.has_key(state): + self.goto(state, sym) + changes = 1 + rv = self.__dict__.copy() + for s in self.states.values(): + del s.items + del rv['rule2func'] + del rv['nullable'] + del rv['cores'] + return rv + + def __setstate__(self, D): + self.rules = {} + self.rule2func = {} + self.rule2name = {} + self.collectRules() + start = D['rules'][self._START][0][1][1] # Blech. + self.augment(start) + D['rule2func'] = self.rule2func + D['makeSet'] = self.makeSet_fast + self.__dict__ = D + + # + # A hook for GenericASTBuilder and GenericASTMatcher. Mess + # thee not with this; nor shall thee toucheth the _preprocess + # argument to addRule. + # + def preprocess(self, rule, func): return rule, func + + def addRule(self, doc, func, _preprocess=1): + fn = func + rules = string.split(doc) + + index = [] + for i in range(len(rules)): + if rules[i] == '::=': + index.append(i-1) + index.append(len(rules)) + + for i in range(len(index)-1): + lhs = rules[index[i]] + rhs = rules[index[i]+2:index[i+1]] + rule = (lhs, tuple(rhs)) + + if _preprocess: + rule, fn = self.preprocess(rule, func) + + if self.rules.has_key(lhs): + self.rules[lhs].append(rule) + else: + self.rules[lhs] = [ rule ] + self.rule2func[rule] = fn + self.rule2name[rule] = func.__name__[2:] + self.ruleschanged = 1 + + def collectRules(self): + for name in _namelist(self): + if name[:2] == 'p_': + func = getattr(self, name) + doc = func.__doc__ + self.addRule(doc, func) + + def augment(self, start): + rule = '%s ::= %s %s' % (self._START, self._BOF, start) + self.addRule(rule, lambda args: args[1], 0) + + def computeNull(self): + self.nullable = {} + tbd = [] + + for rulelist in self.rules.values(): + lhs = rulelist[0][0] + self.nullable[lhs] = 0 + for rule in rulelist: + rhs = rule[1] + if len(rhs) == 0: + self.nullable[lhs] = 1 + continue + # + # We only need to consider rules which + # consist entirely of nonterminal symbols. + # This should be a savings on typical + # grammars. + # + for sym in rhs: + if not self.rules.has_key(sym): + break + else: + tbd.append(rule) + changes = 1 + while changes: + changes = 0 + for lhs, rhs in tbd: + if self.nullable[lhs]: + continue + for sym in rhs: + if not self.nullable[sym]: + break + else: + self.nullable[lhs] = 1 + changes = 1 + + def makeState0(self): + s0 = _State(0, []) + for rule in self.newrules[self._START]: + s0.items.append((rule, 0)) + return s0 + + def finalState(self, tokens): + # + # Yuck. + # + if len(self.newrules[self._START]) == 2 and len(tokens) == 0: + return 1 + start = self.rules[self._START][0][1][1] + return self.goto(1, start) + + def makeNewRules(self): + worklist = [] + for rulelist in self.rules.values(): + for rule in rulelist: + worklist.append((rule, 0, 1, rule)) + + for rule, i, candidate, oldrule in worklist: + lhs, rhs = rule + n = len(rhs) + while i < n: + sym = rhs[i] + if not self.rules.has_key(sym) or \ + not self.nullable[sym]: + candidate = 0 + i = i + 1 + continue + + newrhs = list(rhs) + newrhs[i] = self._NULLABLE+sym + newrule = (lhs, tuple(newrhs)) + worklist.append((newrule, i+1, + candidate, oldrule)) + candidate = 0 + i = i + 1 + else: + if candidate: + lhs = self._NULLABLE+lhs + rule = (lhs, rhs) + if self.newrules.has_key(lhs): + self.newrules[lhs].append(rule) + else: + self.newrules[lhs] = [ rule ] + self.new2old[rule] = oldrule + + def typestring(self, token): + return None + + def error(self, token): + print "Syntax error at or near `%s' token" % token + raise SystemExit + + def parse(self, tokens): + sets = [ [(1,0), (2,0)] ] + self.links = {} + + if self.ruleschanged: + self.computeNull() + self.newrules = {} + self.new2old = {} + self.makeNewRules() + self.ruleschanged = 0 + self.edges, self.cores = {}, {} + self.states = { 0: self.makeState0() } + self.makeState(0, self._BOF) + + for i in xrange(len(tokens)): + sets.append([]) + + if sets[i] == []: + break + self.makeSet(tokens[i], sets, i) + else: + sets.append([]) + self.makeSet(None, sets, len(tokens)) + + #_dump(tokens, sets, self.states) + + finalitem = (self.finalState(tokens), 0) + if finalitem not in sets[-2]: + if len(tokens) > 0: + self.error(tokens[i-1]) + else: + self.error(None) + + return self.buildTree(self._START, finalitem, + tokens, len(sets)-2) + + def isnullable(self, sym): + # + # For symbols in G_e only. If we weren't supporting 1.5, + # could just use sym.startswith(). + # + return self._NULLABLE == sym[0:len(self._NULLABLE)] + + def skip(self, (lhs, rhs), pos=0): + n = len(rhs) + while pos < n: + if not self.isnullable(rhs[pos]): + break + pos = pos + 1 + return pos + + def makeState(self, state, sym): + assert sym is not None + # + # Compute \epsilon-kernel state's core and see if + # it exists already. + # + kitems = [] + for rule, pos in self.states[state].items: + lhs, rhs = rule + if rhs[pos:pos+1] == (sym,): + kitems.append((rule, self.skip(rule, pos+1))) + core = kitems + + core.sort() + tcore = tuple(core) + if self.cores.has_key(tcore): + return self.cores[tcore] + # + # Nope, doesn't exist. Compute it and the associated + # \epsilon-nonkernel state together; we'll need it right away. + # + k = self.cores[tcore] = len(self.states) + K, NK = _State(k, kitems), _State(k+1, []) + self.states[k] = K + predicted = {} + + edges = self.edges + rules = self.newrules + for X in K, NK: + worklist = X.items + for item in worklist: + rule, pos = item + lhs, rhs = rule + if pos == len(rhs): + X.complete.append(rule) + continue + + nextSym = rhs[pos] + key = (X.stateno, nextSym) + if not rules.has_key(nextSym): + if not edges.has_key(key): + edges[key] = None + X.T.append(nextSym) + else: + edges[key] = None + if not predicted.has_key(nextSym): + predicted[nextSym] = 1 + for prule in rules[nextSym]: + ppos = self.skip(prule) + new = (prule, ppos) + NK.items.append(new) + # + # Problem: we know K needs generating, but we + # don't yet know about NK. Can't commit anything + # regarding NK to self.edges until we're sure. Should + # we delay committing on both K and NK to avoid this + # hacky code? This creates other problems.. + # + if X is K: + edges = {} + + if NK.items == []: + return k + + # + # Check for \epsilon-nonkernel's core. Unfortunately we + # need to know the entire set of predicted nonterminals + # to do this without accidentally duplicating states. + # + core = predicted.keys() + core.sort() + tcore = tuple(core) + if self.cores.has_key(tcore): + self.edges[(k, None)] = self.cores[tcore] + return k + + nk = self.cores[tcore] = self.edges[(k, None)] = NK.stateno + self.edges.update(edges) + self.states[nk] = NK + return k + + def goto(self, state, sym): + key = (state, sym) + if not self.edges.has_key(key): + # + # No transitions from state on sym. + # + return None + + rv = self.edges[key] + if rv is None: + # + # Target state isn't generated yet. Remedy this. + # + rv = self.makeState(state, sym) + self.edges[key] = rv + return rv + + def gotoT(self, state, t): + return [self.goto(state, t)] + + def gotoST(self, state, st): + rv = [] + for t in self.states[state].T: + if st == t: + rv.append(self.goto(state, t)) + return rv + + def add(self, set, item, i=None, predecessor=None, causal=None): + if predecessor is None: + if item not in set: + set.append(item) + else: + key = (item, i) + if item not in set: + self.links[key] = [] + set.append(item) + self.links[key].append((predecessor, causal)) + + def makeSet(self, token, sets, i): + cur, next = sets[i], sets[i+1] + + ttype = token is not None and self.typestring(token) or None + if ttype is not None: + fn, arg = self.gotoT, ttype + else: + fn, arg = self.gotoST, token + + for item in cur: + ptr = (item, i) + state, parent = item + add = fn(state, arg) + for k in add: + if k is not None: + self.add(next, (k, parent), i+1, ptr) + nk = self.goto(k, None) + if nk is not None: + self.add(next, (nk, i+1)) + + if parent == i: + continue + + for rule in self.states[state].complete: + lhs, rhs = rule + for pitem in sets[parent]: + pstate, pparent = pitem + k = self.goto(pstate, lhs) + if k is not None: + why = (item, i, rule) + pptr = (pitem, parent) + self.add(cur, (k, pparent), + i, pptr, why) + nk = self.goto(k, None) + if nk is not None: + self.add(cur, (nk, i)) + + def makeSet_fast(self, token, sets, i): + # + # Call *only* when the entire state machine has been built! + # It relies on self.edges being filled in completely, and + # then duplicates and inlines code to boost speed at the + # cost of extreme ugliness. + # + cur, next = sets[i], sets[i+1] + ttype = token is not None and self.typestring(token) or None + + for item in cur: + ptr = (item, i) + state, parent = item + if ttype is not None: + k = self.edges.get((state, ttype), None) + if k is not None: + #self.add(next, (k, parent), i+1, ptr) + #INLINED --v + new = (k, parent) + key = (new, i+1) + if new not in next: + self.links[key] = [] + next.append(new) + self.links[key].append((ptr, None)) + #INLINED --^ + #nk = self.goto(k, None) + nk = self.edges.get((k, None), None) + if nk is not None: + #self.add(next, (nk, i+1)) + #INLINED --v + new = (nk, i+1) + if new not in next: + next.append(new) + #INLINED --^ + else: + add = self.gotoST(state, token) + for k in add: + if k is not None: + self.add(next, (k, parent), i+1, ptr) + #nk = self.goto(k, None) + nk = self.edges.get((k, None), None) + if nk is not None: + self.add(next, (nk, i+1)) + + if parent == i: + continue + + for rule in self.states[state].complete: + lhs, rhs = rule + for pitem in sets[parent]: + pstate, pparent = pitem + #k = self.goto(pstate, lhs) + k = self.edges.get((pstate, lhs), None) + if k is not None: + why = (item, i, rule) + pptr = (pitem, parent) + #self.add(cur, (k, pparent), + # i, pptr, why) + #INLINED --v + new = (k, pparent) + key = (new, i) + if new not in cur: + self.links[key] = [] + cur.append(new) + self.links[key].append((pptr, why)) + #INLINED --^ + #nk = self.goto(k, None) + nk = self.edges.get((k, None), None) + if nk is not None: + #self.add(cur, (nk, i)) + #INLINED --v + new = (nk, i) + if new not in cur: + cur.append(new) + #INLINED --^ + + def predecessor(self, key, causal): + for p, c in self.links[key]: + if c == causal: + return p + assert 0 + + def causal(self, key): + links = self.links[key] + if len(links) == 1: + return links[0][1] + choices = [] + rule2cause = {} + for p, c in links: + rule = c[2] + choices.append(rule) + rule2cause[rule] = c + return rule2cause[self.ambiguity(choices)] + + def deriveEpsilon(self, nt): + if len(self.newrules[nt]) > 1: + rule = self.ambiguity(self.newrules[nt]) + else: + rule = self.newrules[nt][0] + #print rule + + rhs = rule[1] + attr = [None] * len(rhs) + + for i in range(len(rhs)-1, -1, -1): + attr[i] = self.deriveEpsilon(rhs[i]) + return self.rule2func[self.new2old[rule]](attr) + + def buildTree(self, nt, item, tokens, k): + state, parent = item + + choices = [] + for rule in self.states[state].complete: + if rule[0] == nt: + choices.append(rule) + rule = choices[0] + if len(choices) > 1: + rule = self.ambiguity(choices) + #print rule + + rhs = rule[1] + attr = [None] * len(rhs) + + for i in range(len(rhs)-1, -1, -1): + sym = rhs[i] + if not self.newrules.has_key(sym): + if sym != self._BOF: + attr[i] = tokens[k-1] + key = (item, k) + item, k = self.predecessor(key, None) + #elif self.isnullable(sym): + elif self._NULLABLE == sym[0:len(self._NULLABLE)]: + attr[i] = self.deriveEpsilon(sym) + else: + key = (item, k) + why = self.causal(key) + attr[i] = self.buildTree(sym, why[0], + tokens, why[1]) + item, k = self.predecessor(key, why) + return self.rule2func[self.new2old[rule]](attr) + + def ambiguity(self, rules): + # + # XXX - problem here and in collectRules() if the same rule + # appears in >1 method. Also undefined results if rules + # causing the ambiguity appear in the same method. + # + sortlist = [] + name2index = {} + for i in range(len(rules)): + lhs, rhs = rule = rules[i] + name = self.rule2name[self.new2old[rule]] + sortlist.append((len(rhs), name)) + name2index[name] = i + sortlist.sort() + list = map(lambda (a,b): b, sortlist) + return rules[name2index[self.resolve(list)]] + + def resolve(self, list): + # + # Resolve ambiguity in favor of the shortest RHS. + # Since we walk the tree from the top down, this + # should effectively resolve in favor of a "shift". + # + return list[0] + +# +# GenericASTBuilder automagically constructs a concrete/abstract syntax tree +# for a given input. The extra argument is a class (not an instance!) +# which supports the "__setslice__" and "__len__" methods. +# +# XXX - silently overrides any user code in methods. +# + +class GenericASTBuilder(GenericParser): + def __init__(self, AST, start): + GenericParser.__init__(self, start) + self.AST = AST + + def preprocess(self, rule, func): + rebind = lambda lhs, self=self: \ + lambda args, lhs=lhs, self=self: \ + self.buildASTNode(args, lhs) + lhs, rhs = rule + return rule, rebind(lhs) + + def buildASTNode(self, args, lhs): + children = [] + for arg in args: + if isinstance(arg, self.AST): + children.append(arg) + else: + children.append(self.terminal(arg)) + return self.nonterminal(lhs, children) + + def terminal(self, token): return token + + def nonterminal(self, type, args): + rv = self.AST(type) + rv[:len(args)] = args + return rv + +# +# GenericASTTraversal is a Visitor pattern according to Design Patterns. For +# each node it attempts to invoke the method n_, falling +# back onto the default() method if the n_* can't be found. The preorder +# traversal also looks for an exit hook named n__exit (no default +# routine is called if it's not found). To prematurely halt traversal +# of a subtree, call the prune() method -- this only makes sense for a +# preorder traversal. Node type is determined via the typestring() method. +# + +class GenericASTTraversalPruningException: + pass + +class GenericASTTraversal: + def __init__(self, ast): + self.ast = ast + + def typestring(self, node): + return node.type + + def prune(self): + raise GenericASTTraversalPruningException + + def preorder(self, node=None): + if node is None: + node = self.ast + + try: + name = 'n_' + self.typestring(node) + if hasattr(self, name): + func = getattr(self, name) + func(node) + else: + self.default(node) + except GenericASTTraversalPruningException: + return + + for kid in node: + self.preorder(kid) + + name = name + '_exit' + if hasattr(self, name): + func = getattr(self, name) + func(node) + + def postorder(self, node=None): + if node is None: + node = self.ast + + for kid in node: + self.postorder(kid) + + name = 'n_' + self.typestring(node) + if hasattr(self, name): + func = getattr(self, name) + func(node) + else: + self.default(node) + + + def default(self, node): + pass + +# +# GenericASTMatcher. AST nodes must have "__getitem__" and "__cmp__" +# implemented. +# +# XXX - makes assumptions about how GenericParser walks the parse tree. +# + +class GenericASTMatcher(GenericParser): + def __init__(self, start, ast): + GenericParser.__init__(self, start) + self.ast = ast + + def preprocess(self, rule, func): + rebind = lambda func, self=self: \ + lambda args, func=func, self=self: \ + self.foundMatch(args, func) + lhs, rhs = rule + rhslist = list(rhs) + rhslist.reverse() + + return (lhs, tuple(rhslist)), rebind(func) + + def foundMatch(self, args, func): + func(args[-1]) + return args[-1] + + def match_r(self, node): + self.input.insert(0, node) + children = 0 + + for child in node: + if children == 0: + self.input.insert(0, '(') + children = children + 1 + self.match_r(child) + + if children > 0: + self.input.insert(0, ')') + + def match(self, ast=None): + if ast is None: + ast = self.ast + self.input = [] + + self.match_r(ast) + self.parse(self.input) + + def resolve(self, list): + # + # Resolve ambiguity in favor of the longest RHS. + # + return list[-1] + +def _dump(tokens, sets, states): + for i in range(len(sets)): + print 'set', i + for item in sets[i]: + print '\t', item + for (lhs, rhs), pos in states[item[0]].items: + print '\t\t', lhs, '::=', + print string.join(rhs[:pos]), + print '.', + print string.join(rhs[pos:]) + if i < len(tokens): + print + print 'token', str(tokens[i]) + print diff --git a/AppPkg/Applications/Python/Python-2.7.2/Parser/tokenizer.c b/AppPkg/Applications/Python/Python-2.7.2/Parser/tokenizer.c new file mode 100644 index 0000000000..3511cbee24 --- /dev/null +++ b/AppPkg/Applications/Python/Python-2.7.2/Parser/tokenizer.c @@ -0,0 +1,1726 @@ + +/* Tokenizer implementation */ + +#include "Python.h" +#include "pgenheaders.h" + +#include +#include + +#include "tokenizer.h" +#include "errcode.h" + +#ifndef PGEN +#include "unicodeobject.h" +#include "stringobject.h" +#include "fileobject.h" +#include "codecs.h" +#include "abstract.h" +#include "pydebug.h" +#endif /* PGEN */ + +extern char *PyOS_Readline(FILE *, FILE *, char *); +/* Return malloc'ed string including trailing \n; + empty malloc'ed string for EOF; + NULL if interrupted */ + +/* Don't ever change this -- it would break the portability of Python code */ +#define TABSIZE 8 + +/* Forward */ +static struct tok_state *tok_new(void); +static int tok_nextc(struct tok_state *tok); +static void tok_backup(struct tok_state *tok, int c); + +/* Token names */ + +char *_PyParser_TokenNames[] = { + "ENDMARKER", + "NAME", + "NUMBER", + "STRING", + "NEWLINE", + "INDENT", + "DEDENT", + "LPAR", + "RPAR", + "LSQB", + "RSQB", + "COLON", + "COMMA", + "SEMI", + "PLUS", + "MINUS", + "STAR", + "SLASH", + "VBAR", + "AMPER", + "LESS", + "GREATER", + "EQUAL", + "DOT", + "PERCENT", + "BACKQUOTE", + "LBRACE", + "RBRACE", + "EQEQUAL", + "NOTEQUAL", + "LESSEQUAL", + "GREATEREQUAL", + "TILDE", + "CIRCUMFLEX", + "LEFTSHIFT", + "RIGHTSHIFT", + "DOUBLESTAR", + "PLUSEQUAL", + "MINEQUAL", + "STAREQUAL", + "SLASHEQUAL", + "PERCENTEQUAL", + "AMPEREQUAL", + "VBAREQUAL", + "CIRCUMFLEXEQUAL", + "LEFTSHIFTEQUAL", + "RIGHTSHIFTEQUAL", + "DOUBLESTAREQUAL", + "DOUBLESLASH", + "DOUBLESLASHEQUAL", + "AT", + /* This table must match the #defines in token.h! */ + "OP", + "", + "" +}; + +/* Create and initialize a new tok_state structure */ + +static struct tok_state * +tok_new(void) +{ + struct tok_state *tok = (struct tok_state *)PyMem_MALLOC( + sizeof(struct tok_state)); + if (tok == NULL) + return NULL; + tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL; + tok->done = E_OK; + tok->fp = NULL; + tok->input = NULL; + tok->tabsize = TABSIZE; + tok->indent = 0; + tok->indstack[0] = 0; + tok->atbol = 1; + tok->pendin = 0; + tok->prompt = tok->nextprompt = NULL; + tok->lineno = 0; + tok->level = 0; + tok->filename = NULL; + tok->altwarning = 0; + tok->alterror = 0; + tok->alttabsize = 1; + tok->altindstack[0] = 0; + tok->decoding_state = 0; + tok->decoding_erred = 0; + tok->read_coding_spec = 0; + tok->encoding = NULL; + tok->cont_line = 0; +#ifndef PGEN + tok->decoding_readline = NULL; + tok->decoding_buffer = NULL; +#endif + return tok; +} + +static char * +new_string(const char *s, Py_ssize_t len) +{ + char* result = (char *)PyMem_MALLOC(len + 1); + if (result != NULL) { + memcpy(result, s, len); + result[len] = '\0'; + } + return result; +} + +#ifdef PGEN + +static char * +decoding_fgets(char *s, int size, struct tok_state *tok) +{ + return fgets(s, size, tok->fp); +} + +static int +decoding_feof(struct tok_state *tok) +{ + return feof(tok->fp); +} + +static char * +decode_str(const char *str, int exec_input, struct tok_state *tok) +{ + return new_string(str, strlen(str)); +} + +#else /* PGEN */ + +static char * +error_ret(struct tok_state *tok) /* XXX */ +{ + tok->decoding_erred = 1; + if (tok->fp != NULL && tok->buf != NULL) /* see PyTokenizer_Free */ + PyMem_FREE(tok->buf); + tok->buf = NULL; + return NULL; /* as if it were EOF */ +} + + +static char * +get_normal_name(char *s) /* for utf-8 and latin-1 */ +{ + char buf[13]; + int i; + for (i = 0; i < 12; i++) { + int c = s[i]; + if (c == '\0') + break; + else if (c == '_') + buf[i] = '-'; + else + buf[i] = tolower(c); + } + buf[i] = '\0'; + if (strcmp(buf, "utf-8") == 0 || + strncmp(buf, "utf-8-", 6) == 0) + return "utf-8"; + else if (strcmp(buf, "latin-1") == 0 || + strcmp(buf, "iso-8859-1") == 0 || + strcmp(buf, "iso-latin-1") == 0 || + strncmp(buf, "latin-1-", 8) == 0 || + strncmp(buf, "iso-8859-1-", 11) == 0 || + strncmp(buf, "iso-latin-1-", 12) == 0) + return "iso-8859-1"; + else + return s; +} + +/* Return the coding spec in S, or NULL if none is found. */ + +static char * +get_coding_spec(const char *s, Py_ssize_t size) +{ + Py_ssize_t i; + /* Coding spec must be in a comment, and that comment must be + * the only statement on the source code line. */ + for (i = 0; i < size - 6; i++) { + if (s[i] == '#') + break; + if (s[i] != ' ' && s[i] != '\t' && s[i] != '\014') + return NULL; + } + for (; i < size - 6; i++) { /* XXX inefficient search */ + const char* t = s + i; + if (strncmp(t, "coding", 6) == 0) { + const char* begin = NULL; + t += 6; + if (t[0] != ':' && t[0] != '=') + continue; + do { + t++; + } while (t[0] == '\x20' || t[0] == '\t'); + + begin = t; + while (Py_ISALNUM(t[0]) || + t[0] == '-' || t[0] == '_' || t[0] == '.') + t++; + + if (begin < t) { + char* r = new_string(begin, t - begin); + char* q = get_normal_name(r); + if (r != q) { + PyMem_FREE(r); + r = new_string(q, strlen(q)); + } + return r; + } + } + } + return NULL; +} + +/* Check whether the line contains a coding spec. If it does, + invoke the set_readline function for the new encoding. + This function receives the tok_state and the new encoding. + Return 1 on success, 0 on failure. */ + +static int +check_coding_spec(const char* line, Py_ssize_t size, struct tok_state *tok, + int set_readline(struct tok_state *, const char *)) +{ + char * cs; + int r = 1; + + if (tok->cont_line) + /* It's a continuation line, so it can't be a coding spec. */ + return 1; + cs = get_coding_spec(line, size); + if (cs != NULL) { + tok->read_coding_spec = 1; + if (tok->encoding == NULL) { + assert(tok->decoding_state == 1); /* raw */ + if (strcmp(cs, "utf-8") == 0 || + strcmp(cs, "iso-8859-1") == 0) { + tok->encoding = cs; + } else { +#ifdef Py_USING_UNICODE + r = set_readline(tok, cs); + if (r) { + tok->encoding = cs; + tok->decoding_state = -1; + } + else + PyMem_FREE(cs); +#else + /* Without Unicode support, we cannot + process the coding spec. Since there + won't be any Unicode literals, that + won't matter. */ + PyMem_FREE(cs); +#endif + } + } else { /* then, compare cs with BOM */ + r = (strcmp(tok->encoding, cs) == 0); + PyMem_FREE(cs); + } + } + if (!r) { + cs = tok->encoding; + if (!cs) + cs = "with BOM"; + PyErr_Format(PyExc_SyntaxError, "encoding problem: %s", cs); + } + return r; +} + +/* See whether the file starts with a BOM. If it does, + invoke the set_readline function with the new encoding. + Return 1 on success, 0 on failure. */ + +static int +check_bom(int get_char(struct tok_state *), + void unget_char(int, struct tok_state *), + int set_readline(struct tok_state *, const char *), + struct tok_state *tok) +{ + int ch1, ch2, ch3; + ch1 = get_char(tok); + tok->decoding_state = 1; + if (ch1 == EOF) { + return 1; + } else if (ch1 == 0xEF) { + ch2 = get_char(tok); + if (ch2 != 0xBB) { + unget_char(ch2, tok); + unget_char(ch1, tok); + return 1; + } + ch3 = get_char(tok); + if (ch3 != 0xBF) { + unget_char(ch3, tok); + unget_char(ch2, tok); + unget_char(ch1, tok); + return 1; + } +#if 0 + /* Disable support for UTF-16 BOMs until a decision + is made whether this needs to be supported. */ + } else if (ch1 == 0xFE) { + ch2 = get_char(tok); + if (ch2 != 0xFF) { + unget_char(ch2, tok); + unget_char(ch1, tok); + return 1; + } + if (!set_readline(tok, "utf-16-be")) + return 0; + tok->decoding_state = -1; + } else if (ch1 == 0xFF) { + ch2 = get_char(tok); + if (ch2 != 0xFE) { + unget_char(ch2, tok); + unget_char(ch1, tok); + return 1; + } + if (!set_readline(tok, "utf-16-le")) + return 0; + tok->decoding_state = -1; +#endif + } else { + unget_char(ch1, tok); + return 1; + } + if (tok->encoding != NULL) + PyMem_FREE(tok->encoding); + tok->encoding = new_string("utf-8", 5); /* resulting is in utf-8 */ + return 1; +} + +/* Read a line of text from TOK into S, using the stream in TOK. + Return NULL on failure, else S. + + On entry, tok->decoding_buffer will be one of: + 1) NULL: need to call tok->decoding_readline to get a new line + 2) PyUnicodeObject *: decoding_feof has called tok->decoding_readline and + stored the result in tok->decoding_buffer + 3) PyStringObject *: previous call to fp_readl did not have enough room + (in the s buffer) to copy entire contents of the line read + by tok->decoding_readline. tok->decoding_buffer has the overflow. + In this case, fp_readl is called in a loop (with an expanded buffer) + until the buffer ends with a '\n' (or until the end of the file is + reached): see tok_nextc and its calls to decoding_fgets. +*/ + +static char * +fp_readl(char *s, int size, struct tok_state *tok) +{ +#ifndef Py_USING_UNICODE + /* In a non-Unicode built, this should never be called. */ + Py_FatalError("fp_readl should not be called in this build."); + return NULL; /* Keep compiler happy (not reachable) */ +#else + PyObject* utf8 = NULL; + PyObject* buf = tok->decoding_buffer; + char *str; + Py_ssize_t utf8len; + + /* Ask for one less byte so we can terminate it */ + assert(size > 0); + size--; + + if (buf == NULL) { + buf = PyObject_CallObject(tok->decoding_readline, NULL); + if (buf == NULL) + return error_ret(tok); + } else { + tok->decoding_buffer = NULL; + if (PyString_CheckExact(buf)) + utf8 = buf; + } + if (utf8 == NULL) { + utf8 = PyUnicode_AsUTF8String(buf); + Py_DECREF(buf); + if (utf8 == NULL) + return error_ret(tok); + } + str = PyString_AsString(utf8); + utf8len = PyString_GET_SIZE(utf8); + if (utf8len > size) { + tok->decoding_buffer = PyString_FromStringAndSize(str+size, utf8len-size); + if (tok->decoding_buffer == NULL) { + Py_DECREF(utf8); + return error_ret(tok); + } + utf8len = size; + } + memcpy(s, str, utf8len); + s[utf8len] = '\0'; + Py_DECREF(utf8); + if (utf8len == 0) + return NULL; /* EOF */ + return s; +#endif +} + +/* Set the readline function for TOK to a StreamReader's + readline function. The StreamReader is named ENC. + + This function is called from check_bom and check_coding_spec. + + ENC is usually identical to the future value of tok->encoding, + except for the (currently unsupported) case of UTF-16. + + Return 1 on success, 0 on failure. */ + +static int +fp_setreadl(struct tok_state *tok, const char* enc) +{ + PyObject *reader, *stream, *readline; + + /* XXX: constify filename argument. */ + stream = PyFile_FromFile(tok->fp, (char*)tok->filename, "rb", NULL); + if (stream == NULL) + return 0; + + reader = PyCodec_StreamReader(enc, stream, NULL); + Py_DECREF(stream); + if (reader == NULL) + return 0; + + readline = PyObject_GetAttrString(reader, "readline"); + Py_DECREF(reader); + if (readline == NULL) + return 0; + + tok->decoding_readline = readline; + return 1; +} + +/* Fetch the next byte from TOK. */ + +static int fp_getc(struct tok_state *tok) { + return getc(tok->fp); +} + +/* Unfetch the last byte back into TOK. */ + +static void fp_ungetc(int c, struct tok_state *tok) { + ungetc(c, tok->fp); +} + +/* Read a line of input from TOK. Determine encoding + if necessary. */ + +static char * +decoding_fgets(char *s, int size, struct tok_state *tok) +{ + char *line = NULL; + int badchar = 0; + for (;;) { + if (tok->decoding_state < 0) { + /* We already have a codec associated with + this input. */ + line = fp_readl(s, size, tok); + break; + } else if (tok->decoding_state > 0) { + /* We want a 'raw' read. */ + line = Py_UniversalNewlineFgets(s, size, + tok->fp, NULL); + break; + } else { + /* We have not yet determined the encoding. + If an encoding is found, use the file-pointer + reader functions from now on. */ + if (!check_bom(fp_getc, fp_ungetc, fp_setreadl, tok)) + return error_ret(tok); + assert(tok->decoding_state != 0); + } + } + if (line != NULL && tok->lineno < 2 && !tok->read_coding_spec) { + if (!check_coding_spec(line, strlen(line), tok, fp_setreadl)) { + return error_ret(tok); + } + } +#ifndef PGEN + /* The default encoding is ASCII, so make sure we don't have any + non-ASCII bytes in it. */ + if (line && !tok->encoding) { + unsigned char *c; + for (c = (unsigned char *)line; *c; c++) + if (*c > 127) { + badchar = *c; + break; + } + } + if (badchar) { + char buf[500]; + /* Need to add 1 to the line number, since this line + has not been counted, yet. */ + sprintf(buf, + "Non-ASCII character '\\x%.2x' " + "in file %.200s on line %i, " + "but no encoding declared; " + "see http://www.python.org/peps/pep-0263.html for details", + badchar, tok->filename, tok->lineno + 1); + PyErr_SetString(PyExc_SyntaxError, buf); + return error_ret(tok); + } +#endif + return line; +} + +static int +decoding_feof(struct tok_state *tok) +{ + if (tok->decoding_state >= 0) { + return feof(tok->fp); + } else { + PyObject* buf = tok->decoding_buffer; + if (buf == NULL) { + buf = PyObject_CallObject(tok->decoding_readline, NULL); + if (buf == NULL) { + error_ret(tok); + return 1; + } else { + tok->decoding_buffer = buf; + } + } + return PyObject_Length(buf) == 0; + } +} + +/* Fetch a byte from TOK, using the string buffer. */ + +static int +buf_getc(struct tok_state *tok) { + return Py_CHARMASK(*tok->str++); +} + +/* Unfetch a byte from TOK, using the string buffer. */ + +static void +buf_ungetc(int c, struct tok_state *tok) { + tok->str--; + assert(Py_CHARMASK(*tok->str) == c); /* tok->cur may point to read-only segment */ +} + +/* Set the readline function for TOK to ENC. For the string-based + tokenizer, this means to just record the encoding. */ + +static int +buf_setreadl(struct tok_state *tok, const char* enc) { + tok->enc = enc; + return 1; +} + +/* Return a UTF-8 encoding Python string object from the + C byte string STR, which is encoded with ENC. */ + +#ifdef Py_USING_UNICODE +static PyObject * +translate_into_utf8(const char* str, const char* enc) { + PyObject *utf8; + PyObject* buf = PyUnicode_Decode(str, strlen(str), enc, NULL); + if (buf == NULL) + return NULL; + utf8 = PyUnicode_AsUTF8String(buf); + Py_DECREF(buf); + return utf8; +} +#endif + + +static char * +translate_newlines(const char *s, int exec_input, struct tok_state *tok) { + int skip_next_lf = 0, needed_length = strlen(s) + 2, final_length; + char *buf, *current; + char c = '\0'; + buf = PyMem_MALLOC(needed_length); + if (buf == NULL) { + tok->done = E_NOMEM; + return NULL; + } + for (current = buf; *s; s++, current++) { + c = *s; + if (skip_next_lf) { + skip_next_lf = 0; + if (c == '\n') { + c = *++s; + if (!c) + break; + } + } + if (c == '\r') { + skip_next_lf = 1; + c = '\n'; + } + *current = c; + } + /* If this is exec input, add a newline to the end of the string if + there isn't one already. */ + if (exec_input && c != '\n') { + *current = '\n'; + current++; + } + *current = '\0'; + final_length = current - buf + 1; + if (final_length < needed_length && final_length) + /* should never fail */ + buf = PyMem_REALLOC(buf, final_length); + return buf; +} + +/* Decode a byte string STR for use as the buffer of TOK. + Look for encoding declarations inside STR, and record them + inside TOK. */ + +static const char * +decode_str(const char *input, int single, struct tok_state *tok) +{ + PyObject* utf8 = NULL; + const char *str; + const char *s; + const char *newl[2] = {NULL, NULL}; + int lineno = 0; + tok->input = str = translate_newlines(input, single, tok); + if (str == NULL) + return NULL; + tok->enc = NULL; + tok->str = str; + if (!check_bom(buf_getc, buf_ungetc, buf_setreadl, tok)) + return error_ret(tok); + str = tok->str; /* string after BOM if any */ + assert(str); +#ifdef Py_USING_UNICODE + if (tok->enc != NULL) { + utf8 = translate_into_utf8(str, tok->enc); + if (utf8 == NULL) + return error_ret(tok); + str = PyString_AsString(utf8); + } +#endif + for (s = str;; s++) { + if (*s == '\0') break; + else if (*s == '\n') { + assert(lineno < 2); + newl[lineno] = s; + lineno++; + if (lineno == 2) break; + } + } + tok->enc = NULL; + /* need to check line 1 and 2 separately since check_coding_spec + assumes a single line as input */ + if (newl[0]) { + if (!check_coding_spec(str, newl[0] - str, tok, buf_setreadl)) + return error_ret(tok); + if (tok->enc == NULL && newl[1]) { + if (!check_coding_spec(newl[0]+1, newl[1] - newl[0], + tok, buf_setreadl)) + return error_ret(tok); + } + } +#ifdef Py_USING_UNICODE + if (tok->enc != NULL) { + assert(utf8 == NULL); + utf8 = translate_into_utf8(str, tok->enc); + if (utf8 == NULL) + return error_ret(tok); + str = PyString_AsString(utf8); + } +#endif + assert(tok->decoding_buffer == NULL); + tok->decoding_buffer = utf8; /* CAUTION */ + return str; +} + +#endif /* PGEN */ + +/* Set up tokenizer for string */ + +struct tok_state * +PyTokenizer_FromString(const char *str, int exec_input) +{ + struct tok_state *tok = tok_new(); + if (tok == NULL) + return NULL; + str = (char *)decode_str(str, exec_input, tok); + if (str == NULL) { + PyTokenizer_Free(tok); + return NULL; + } + + /* XXX: constify members. */ + tok->buf = tok->cur = tok->end = tok->inp = (char*)str; + return tok; +} + + +/* Set up tokenizer for file */ + +struct tok_state * +PyTokenizer_FromFile(FILE *fp, char *ps1, char *ps2) +{ + struct tok_state *tok = tok_new(); + if (tok == NULL) + return NULL; + if ((tok->buf = (char *)PyMem_MALLOC(BUFSIZ)) == NULL) { + PyTokenizer_Free(tok); + return NULL; + } + tok->cur = tok->inp = tok->buf; + tok->end = tok->buf + BUFSIZ; + tok->fp = fp; + tok->prompt = ps1; + tok->nextprompt = ps2; + return tok; +} + + +/* Free a tok_state structure */ + +void +PyTokenizer_Free(struct tok_state *tok) +{ + if (tok->encoding != NULL) + PyMem_FREE(tok->encoding); +#ifndef PGEN + Py_XDECREF(tok->decoding_readline); + Py_XDECREF(tok->decoding_buffer); +#endif + if (tok->fp != NULL && tok->buf != NULL) + PyMem_FREE(tok->buf); + if (tok->input) + PyMem_FREE((char *)tok->input); + PyMem_FREE(tok); +} + +#if !defined(PGEN) && defined(Py_USING_UNICODE) +static int +tok_stdin_decode(struct tok_state *tok, char **inp) +{ + PyObject *enc, *sysstdin, *decoded, *utf8; + const char *encoding; + char *converted; + + if (PySys_GetFile((char *)"stdin", NULL) != stdin) + return 0; + sysstdin = PySys_GetObject("stdin"); + if (sysstdin == NULL || !PyFile_Check(sysstdin)) + return 0; + + enc = ((PyFileObject *)sysstdin)->f_encoding; + if (enc == NULL || !PyString_Check(enc)) + return 0; + Py_INCREF(enc); + + encoding = PyString_AsString(enc); + decoded = PyUnicode_Decode(*inp, strlen(*inp), encoding, NULL); + if (decoded == NULL) + goto error_clear; + + utf8 = PyUnicode_AsEncodedString(decoded, "utf-8", NULL); + Py_DECREF(decoded); + if (utf8 == NULL) + goto error_clear; + + assert(PyString_Check(utf8)); + converted = new_string(PyString_AS_STRING(utf8), + PyString_GET_SIZE(utf8)); + Py_DECREF(utf8); + if (converted == NULL) + goto error_nomem; + + PyMem_FREE(*inp); + *inp = converted; + if (tok->encoding != NULL) + PyMem_FREE(tok->encoding); + tok->encoding = new_string(encoding, strlen(encoding)); + if (tok->encoding == NULL) + goto error_nomem; + + Py_DECREF(enc); + return 0; + +error_nomem: + Py_DECREF(enc); + tok->done = E_NOMEM; + return -1; + +error_clear: + Py_DECREF(enc); + if (!PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) { + tok->done = E_ERROR; + return -1; + } + /* Fallback to iso-8859-1: for backward compatibility */ + PyErr_Clear(); + return 0; +} +#endif + +/* Get next char, updating state; error code goes into tok->done */ + +static int +tok_nextc(register struct tok_state *tok) +{ + for (;;) { + if (tok->cur != tok->inp) { + return Py_CHARMASK(*tok->cur++); /* Fast path */ + } + if (tok->done != E_OK) + return EOF; + if (tok->fp == NULL) { + char *end = strchr(tok->inp, '\n'); + if (end != NULL) + end++; + else { + end = strchr(tok->inp, '\0'); + if (end == tok->inp) { + tok->done = E_EOF; + return EOF; + } + } + if (tok->start == NULL) + tok->buf = tok->cur; + tok->line_start = tok->cur; + tok->lineno++; + tok->inp = end; + return Py_CHARMASK(*tok->cur++); + } + if (tok->prompt != NULL) { + char *newtok = PyOS_Readline(stdin, stdout, tok->prompt); + if (tok->nextprompt != NULL) + tok->prompt = tok->nextprompt; + if (newtok == NULL) + tok->done = E_INTR; + else if (*newtok == '\0') { + PyMem_FREE(newtok); + tok->done = E_EOF; + } +#if !defined(PGEN) && defined(Py_USING_UNICODE) + else if (tok_stdin_decode(tok, &newtok) != 0) + PyMem_FREE(newtok); +#endif + else if (tok->start != NULL) { + size_t start = tok->start - tok->buf; + size_t oldlen = tok->cur - tok->buf; + size_t newlen = oldlen + strlen(newtok); + char *buf = tok->buf; + buf = (char *)PyMem_REALLOC(buf, newlen+1); + tok->lineno++; + if (buf == NULL) { + PyMem_FREE(tok->buf); + tok->buf = NULL; + PyMem_FREE(newtok); + tok->done = E_NOMEM; + return EOF; + } + tok->buf = buf; + tok->cur = tok->buf + oldlen; + tok->line_start = tok->cur; + strcpy(tok->buf + oldlen, newtok); + PyMem_FREE(newtok); + tok->inp = tok->buf + newlen; + tok->end = tok->inp + 1; + tok->start = tok->buf + start; + } + else { + tok->lineno++; + if (tok->buf != NULL) + PyMem_FREE(tok->buf); + tok->buf = newtok; + tok->line_start = tok->buf; + tok->cur = tok->buf; + tok->line_start = tok->buf; + tok->inp = strchr(tok->buf, '\0'); + tok->end = tok->inp + 1; + } + } + else { + int done = 0; + Py_ssize_t cur = 0; + char *pt; + if (tok->start == NULL) { + if (tok->buf == NULL) { + tok->buf = (char *) + PyMem_MALLOC(BUFSIZ); + if (tok->buf == NULL) { + tok->done = E_NOMEM; + return EOF; + } + tok->end = tok->buf + BUFSIZ; + } + if (decoding_fgets(tok->buf, (int)(tok->end - tok->buf), + tok) == NULL) { + tok->done = E_EOF; + done = 1; + } + else { + tok->done = E_OK; + tok->inp = strchr(tok->buf, '\0'); + done = tok->inp[-1] == '\n'; + } + } + else { + cur = tok->cur - tok->buf; + if (decoding_feof(tok)) { + tok->done = E_EOF; + done = 1; + } + else + tok->done = E_OK; + } + tok->lineno++; + /* Read until '\n' or EOF */ + while (!done) { + Py_ssize_t curstart = tok->start == NULL ? -1 : + tok->start - tok->buf; + Py_ssize_t curvalid = tok->inp - tok->buf; + Py_ssize_t newsize = curvalid + BUFSIZ; + char *newbuf = tok->buf; + newbuf = (char *)PyMem_REALLOC(newbuf, + newsize); + if (newbuf == NULL) { + tok->done = E_NOMEM; + tok->cur = tok->inp; + return EOF; + } + tok->buf = newbuf; + tok->inp = tok->buf + curvalid; + tok->end = tok->buf + newsize; + tok->start = curstart < 0 ? NULL : + tok->buf + curstart; + if (decoding_fgets(tok->inp, + (int)(tok->end - tok->inp), + tok) == NULL) { + /* Break out early on decoding + errors, as tok->buf will be NULL + */ + if (tok->decoding_erred) + return EOF; + /* Last line does not end in \n, + fake one */ + strcpy(tok->inp, "\n"); + } + tok->inp = strchr(tok->inp, '\0'); + done = tok->inp[-1] == '\n'; + } + if (tok->buf != NULL) { + tok->cur = tok->buf + cur; + tok->line_start = tok->cur; + /* replace "\r\n" with "\n" */ + /* For Mac leave the \r, giving a syntax error */ + pt = tok->inp - 2; + if (pt >= tok->buf && *pt == '\r') { + *pt++ = '\n'; + *pt = '\0'; + tok->inp = pt; + } + } + } + if (tok->done != E_OK) { + if (tok->prompt != NULL) + PySys_WriteStderr("\n"); + tok->cur = tok->inp; + return EOF; + } + } + /*NOTREACHED*/ +} + + +/* Back-up one character */ + +static void +tok_backup(register struct tok_state *tok, register int c) +{ + if (c != EOF) { + if (--tok->cur < tok->buf) + Py_FatalError("tok_backup: beginning of buffer"); + if (*tok->cur != c) + *tok->cur = c; + } +} + + +/* Return the token corresponding to a single character */ + +int +PyToken_OneChar(int c) +{ + switch (c) { + case '(': return LPAR; + case ')': return RPAR; + case '[': return LSQB; + case ']': return RSQB; + case ':': return COLON; + case ',': return COMMA; + case ';': return SEMI; + case '+': return PLUS; + case '-': return MINUS; + case '*': return STAR; + case '/': return SLASH; + case '|': return VBAR; + case '&': return AMPER; + case '<': return LESS; + case '>': return GREATER; + case '=': return EQUAL; + case '.': return DOT; + case '%': return PERCENT; + case '`': return BACKQUOTE; + case '{': return LBRACE; + case '}': return RBRACE; + case '^': return CIRCUMFLEX; + case '~': return TILDE; + case '@': return AT; + default: return OP; + } +} + + +int +PyToken_TwoChars(int c1, int c2) +{ + switch (c1) { + case '=': + switch (c2) { + case '=': return EQEQUAL; + } + break; + case '!': + switch (c2) { + case '=': return NOTEQUAL; + } + break; + case '<': + switch (c2) { + case '>': return NOTEQUAL; + case '=': return LESSEQUAL; + case '<': return LEFTSHIFT; + } + break; + case '>': + switch (c2) { + case '=': return GREATEREQUAL; + case '>': return RIGHTSHIFT; + } + break; + case '+': + switch (c2) { + case '=': return PLUSEQUAL; + } + break; + case '-': + switch (c2) { + case '=': return MINEQUAL; + } + break; + case '*': + switch (c2) { + case '*': return DOUBLESTAR; + case '=': return STAREQUAL; + } + break; + case '/': + switch (c2) { + case '/': return DOUBLESLASH; + case '=': return SLASHEQUAL; + } + break; + case '|': + switch (c2) { + case '=': return VBAREQUAL; + } + break; + case '%': + switch (c2) { + case '=': return PERCENTEQUAL; + } + break; + case '&': + switch (c2) { + case '=': return AMPEREQUAL; + } + break; + case '^': + switch (c2) { + case '=': return CIRCUMFLEXEQUAL; + } + break; + } + return OP; +} + +int +PyToken_ThreeChars(int c1, int c2, int c3) +{ + switch (c1) { + case '<': + switch (c2) { + case '<': + switch (c3) { + case '=': + return LEFTSHIFTEQUAL; + } + break; + } + break; + case '>': + switch (c2) { + case '>': + switch (c3) { + case '=': + return RIGHTSHIFTEQUAL; + } + break; + } + break; + case '*': + switch (c2) { + case '*': + switch (c3) { + case '=': + return DOUBLESTAREQUAL; + } + break; + } + break; + case '/': + switch (c2) { + case '/': + switch (c3) { + case '=': + return DOUBLESLASHEQUAL; + } + break; + } + break; + } + return OP; +} + +static int +indenterror(struct tok_state *tok) +{ + if (tok->alterror) { + tok->done = E_TABSPACE; + tok->cur = tok->inp; + return 1; + } + if (tok->altwarning) { + PySys_WriteStderr("%s: inconsistent use of tabs and spaces " + "in indentation\n", tok->filename); + tok->altwarning = 0; + } + return 0; +} + +/* Get next token, after space stripping etc. */ + +static int +tok_get(register struct tok_state *tok, char **p_start, char **p_end) +{ + register int c; + int blankline; + + *p_start = *p_end = NULL; + nextline: + tok->start = NULL; + blankline = 0; + + /* Get indentation level */ + if (tok->atbol) { + register int col = 0; + register int altcol = 0; + tok->atbol = 0; + for (;;) { + c = tok_nextc(tok); + if (c == ' ') + col++, altcol++; + else if (c == '\t') { + col = (col/tok->tabsize + 1) * tok->tabsize; + altcol = (altcol/tok->alttabsize + 1) + * tok->alttabsize; + } + else if (c == '\014') /* Control-L (formfeed) */ + col = altcol = 0; /* For Emacs users */ + else + break; + } + tok_backup(tok, c); + if (c == '#' || c == '\n') { + /* Lines with only whitespace and/or comments + shouldn't affect the indentation and are + not passed to the parser as NEWLINE tokens, + except *totally* empty lines in interactive + mode, which signal the end of a command group. */ + if (col == 0 && c == '\n' && tok->prompt != NULL) + blankline = 0; /* Let it through */ + else + blankline = 1; /* Ignore completely */ + /* We can't jump back right here since we still + may need to skip to the end of a comment */ + } + if (!blankline && tok->level == 0) { + if (col == tok->indstack[tok->indent]) { + /* No change */ + if (altcol != tok->altindstack[tok->indent]) { + if (indenterror(tok)) + return ERRORTOKEN; + } + } + else if (col > tok->indstack[tok->indent]) { + /* Indent -- always one */ + if (tok->indent+1 >= MAXINDENT) { + tok->done = E_TOODEEP; + tok->cur = tok->inp; + return ERRORTOKEN; + } + if (altcol <= tok->altindstack[tok->indent]) { + if (indenterror(tok)) + return ERRORTOKEN; + } + tok->pendin++; + tok->indstack[++tok->indent] = col; + tok->altindstack[tok->indent] = altcol; + } + else /* col < tok->indstack[tok->indent] */ { + /* Dedent -- any number, must be consistent */ + while (tok->indent > 0 && + col < tok->indstack[tok->indent]) { + tok->pendin--; + tok->indent--; + } + if (col != tok->indstack[tok->indent]) { + tok->done = E_DEDENT; + tok->cur = tok->inp; + return ERRORTOKEN; + } + if (altcol != tok->altindstack[tok->indent]) { + if (indenterror(tok)) + return ERRORTOKEN; + } + } + } + } + + tok->start = tok->cur; + + /* Return pending indents/dedents */ + if (tok->pendin != 0) { + if (tok->pendin < 0) { + tok->pendin++; + return DEDENT; + } + else { + tok->pendin--; + return INDENT; + } + } + + again: + tok->start = NULL; + /* Skip spaces */ + do { + c = tok_nextc(tok); + } while (c == ' ' || c == '\t' || c == '\014'); + + /* Set start of current token */ + tok->start = tok->cur - 1; + + /* Skip comment, while looking for tab-setting magic */ + if (c == '#') { + static char *tabforms[] = { + "tab-width:", /* Emacs */ + ":tabstop=", /* vim, full form */ + ":ts=", /* vim, abbreviated form */ + "set tabsize=", /* will vi never die? */ + /* more templates can be added here to support other editors */ + }; + char cbuf[80]; + char *tp, **cp; + tp = cbuf; + do { + *tp++ = c = tok_nextc(tok); + } while (c != EOF && c != '\n' && + (size_t)(tp - cbuf + 1) < sizeof(cbuf)); + *tp = '\0'; + for (cp = tabforms; + cp < tabforms + sizeof(tabforms)/sizeof(tabforms[0]); + cp++) { + if ((tp = strstr(cbuf, *cp))) { + int newsize = atoi(tp + strlen(*cp)); + + if (newsize >= 1 && newsize <= 40) { + tok->tabsize = newsize; + if (Py_VerboseFlag) + PySys_WriteStderr( + "Tab size set to %d\n", + newsize); + } + } + } + while (c != EOF && c != '\n') + c = tok_nextc(tok); + } + + /* Check for EOF and errors now */ + if (c == EOF) { + return tok->done == E_EOF ? ENDMARKER : ERRORTOKEN; + } + + /* Identifier (most frequent token!) */ + if (Py_ISALPHA(c) || c == '_') { + /* Process r"", u"" and ur"" */ + switch (c) { + case 'b': + case 'B': + c = tok_nextc(tok); + if (c == 'r' || c == 'R') + c = tok_nextc(tok); + if (c == '"' || c == '\'') + goto letter_quote; + break; + case 'r': + case 'R': + c = tok_nextc(tok); + if (c == '"' || c == '\'') + goto letter_quote; + break; + case 'u': + case 'U': + c = tok_nextc(tok); + if (c == 'r' || c == 'R') + c = tok_nextc(tok); + if (c == '"' || c == '\'') + goto letter_quote; + break; + } + while (c != EOF && (Py_ISALNUM(c) || c == '_')) { + c = tok_nextc(tok); + } + tok_backup(tok, c); + *p_start = tok->start; + *p_end = tok->cur; + return NAME; + } + + /* Newline */ + if (c == '\n') { + tok->atbol = 1; + if (blankline || tok->level > 0) + goto nextline; + *p_start = tok->start; + *p_end = tok->cur - 1; /* Leave '\n' out of the string */ + tok->cont_line = 0; + return NEWLINE; + } + + /* Period or number starting with period? */ + if (c == '.') { + c = tok_nextc(tok); + if (isdigit(c)) { + goto fraction; + } + else { + tok_backup(tok, c); + *p_start = tok->start; + *p_end = tok->cur; + return DOT; + } + } + + /* Number */ + if (isdigit(c)) { + if (c == '0') { + /* Hex, octal or binary -- maybe. */ + c = tok_nextc(tok); + if (c == '.') + goto fraction; +#ifndef WITHOUT_COMPLEX + if (c == 'j' || c == 'J') + goto imaginary; +#endif + if (c == 'x' || c == 'X') { + + /* Hex */ + c = tok_nextc(tok); + if (!isxdigit(c)) { + tok->done = E_TOKEN; + tok_backup(tok, c); + return ERRORTOKEN; + } + do { + c = tok_nextc(tok); + } while (isxdigit(c)); + } + else if (c == 'o' || c == 'O') { + /* Octal */ + c = tok_nextc(tok); + if (c < '0' || c >= '8') { + tok->done = E_TOKEN; + tok_backup(tok, c); + return ERRORTOKEN; + } + do { + c = tok_nextc(tok); + } while ('0' <= c && c < '8'); + } + else if (c == 'b' || c == 'B') { + /* Binary */ + c = tok_nextc(tok); + if (c != '0' && c != '1') { + tok->done = E_TOKEN; + tok_backup(tok, c); + return ERRORTOKEN; + } + do { + c = tok_nextc(tok); + } while (c == '0' || c == '1'); + } + else { + int found_decimal = 0; + /* Octal; c is first char of it */ + /* There's no 'isoctdigit' macro, sigh */ + while ('0' <= c && c < '8') { + c = tok_nextc(tok); + } + if (isdigit(c)) { + found_decimal = 1; + do { + c = tok_nextc(tok); + } while (isdigit(c)); + } + if (c == '.') + goto fraction; + else if (c == 'e' || c == 'E') + goto exponent; +#ifndef WITHOUT_COMPLEX + else if (c == 'j' || c == 'J') + goto imaginary; +#endif + else if (found_decimal) { + tok->done = E_TOKEN; + tok_backup(tok, c); + return ERRORTOKEN; + } + } + if (c == 'l' || c == 'L') + c = tok_nextc(tok); + } + else { + /* Decimal */ + do { + c = tok_nextc(tok); + } while (isdigit(c)); + if (c == 'l' || c == 'L') + c = tok_nextc(tok); + else { + /* Accept floating point numbers. */ + if (c == '.') { + fraction: + /* Fraction */ + do { + c = tok_nextc(tok); + } while (isdigit(c)); + } + if (c == 'e' || c == 'E') { + exponent: + /* Exponent part */ + c = tok_nextc(tok); + if (c == '+' || c == '-') + c = tok_nextc(tok); + if (!isdigit(c)) { + tok->done = E_TOKEN; + tok_backup(tok, c); + return ERRORTOKEN; + } + do { + c = tok_nextc(tok); + } while (isdigit(c)); + } +#ifndef WITHOUT_COMPLEX + if (c == 'j' || c == 'J') + /* Imaginary part */ + imaginary: + c = tok_nextc(tok); +#endif + } + } + tok_backup(tok, c); + *p_start = tok->start; + *p_end = tok->cur; + return NUMBER; + } + + letter_quote: + /* String */ + if (c == '\'' || c == '"') { + Py_ssize_t quote2 = tok->cur - tok->start + 1; + int quote = c; + int triple = 0; + int tripcount = 0; + for (;;) { + c = tok_nextc(tok); + if (c == '\n') { + if (!triple) { + tok->done = E_EOLS; + tok_backup(tok, c); + return ERRORTOKEN; + } + tripcount = 0; + tok->cont_line = 1; /* multiline string. */ + } + else if (c == EOF) { + if (triple) + tok->done = E_EOFS; + else + tok->done = E_EOLS; + tok->cur = tok->inp; + return ERRORTOKEN; + } + else if (c == quote) { + tripcount++; + if (tok->cur - tok->start == quote2) { + c = tok_nextc(tok); + if (c == quote) { + triple = 1; + tripcount = 0; + continue; + } + tok_backup(tok, c); + } + if (!triple || tripcount == 3) + break; + } + else if (c == '\\') { + tripcount = 0; + c = tok_nextc(tok); + if (c == EOF) { + tok->done = E_EOLS; + tok->cur = tok->inp; + return ERRORTOKEN; + } + } + else + tripcount = 0; + } + *p_start = tok->start; + *p_end = tok->cur; + return STRING; + } + + /* Line continuation */ + if (c == '\\') { + c = tok_nextc(tok); + if (c != '\n') { + tok->done = E_LINECONT; + tok->cur = tok->inp; + return ERRORTOKEN; + } + tok->cont_line = 1; + goto again; /* Read next line */ + } + + /* Check for two-character token */ + { + int c2 = tok_nextc(tok); + int token = PyToken_TwoChars(c, c2); +#ifndef PGEN + if (Py_Py3kWarningFlag && token == NOTEQUAL && c == '<') { + if (PyErr_WarnExplicit(PyExc_DeprecationWarning, + "<> not supported in 3.x; use !=", + tok->filename, tok->lineno, + NULL, NULL)) { + return ERRORTOKEN; + } + } +#endif + if (token != OP) { + int c3 = tok_nextc(tok); + int token3 = PyToken_ThreeChars(c, c2, c3); + if (token3 != OP) { + token = token3; + } else { + tok_backup(tok, c3); + } + *p_start = tok->start; + *p_end = tok->cur; + return token; + } + tok_backup(tok, c2); + } + + /* Keep track of parentheses nesting level */ + switch (c) { + case '(': + case '[': + case '{': + tok->level++; + break; + case ')': + case ']': + case '}': + tok->level--; + break; + } + + /* Punctuation character */ + *p_start = tok->start; + *p_end = tok->cur; + return PyToken_OneChar(c); +} + +int +PyTokenizer_Get(struct tok_state *tok, char **p_start, char **p_end) +{ + int result = tok_get(tok, p_start, p_end); + if (tok->decoding_erred) { + result = ERRORTOKEN; + tok->done = E_DECODE; + } + return result; +} + +/* This function is only called from parsetok. However, it cannot live + there, as it must be empty for PGEN, and we can check for PGEN only + in this file. */ + +#if defined(PGEN) || !defined(Py_USING_UNICODE) +char* +PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int* offset) +{ + return NULL; +} +#else +#ifdef Py_USING_UNICODE +static PyObject * +dec_utf8(const char *enc, const char *text, size_t len) { + PyObject *ret = NULL; + PyObject *unicode_text = PyUnicode_DecodeUTF8(text, len, "replace"); + if (unicode_text) { + ret = PyUnicode_AsEncodedString(unicode_text, enc, "replace"); + Py_DECREF(unicode_text); + } + if (!ret) { + PyErr_Clear(); + } + return ret; +} +char * +PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int *offset) +{ + char *text = NULL; + if (tok->encoding) { + /* convert source to original encondig */ + PyObject *lineobj = dec_utf8(tok->encoding, tok->buf, len); + if (lineobj != NULL) { + int linelen = PyString_Size(lineobj); + const char *line = PyString_AsString(lineobj); + text = PyObject_MALLOC(linelen + 1); + if (text != NULL && line != NULL) { + if (linelen) + strncpy(text, line, linelen); + text[linelen] = '\0'; + } + Py_DECREF(lineobj); + + /* adjust error offset */ + if (*offset > 1) { + PyObject *offsetobj = dec_utf8(tok->encoding, + tok->buf, *offset-1); + if (offsetobj) { + *offset = PyString_Size(offsetobj) + 1; + Py_DECREF(offsetobj); + } + } + + } + } + return text; + +} +#endif /* defined(Py_USING_UNICODE) */ +#endif + + +#ifdef Py_DEBUG + +void +tok_dump(int type, char *start, char *end) +{ + printf("%s", _PyParser_TokenNames[type]); + if (type == NAME || type == NUMBER || type == STRING || type == OP) + printf("(%.*s)", (int)(end - start), start); +} + +#endif diff --git a/AppPkg/Applications/Python/Python-2.7.2/Parser/tokenizer.h b/AppPkg/Applications/Python/Python-2.7.2/Parser/tokenizer.h new file mode 100644 index 0000000000..3de3280d05 --- /dev/null +++ b/AppPkg/Applications/Python/Python-2.7.2/Parser/tokenizer.h @@ -0,0 +1,70 @@ +#ifndef Py_TOKENIZER_H +#define Py_TOKENIZER_H +#ifdef __cplusplus +extern "C" { +#endif + +#include "object.h" + +/* Tokenizer interface */ + +#include "token.h" /* For token types */ + +#define MAXINDENT 100 /* Max indentation level */ + +/* Tokenizer state */ +struct tok_state { + /* Input state; buf <= cur <= inp <= end */ + /* NB an entire line is held in the buffer */ + char *buf; /* Input buffer, or NULL; malloc'ed if fp != NULL */ + char *cur; /* Next character in buffer */ + char *inp; /* End of data in buffer */ + char *end; /* End of input buffer if buf != NULL */ + char *start; /* Start of current token if not NULL */ + int done; /* E_OK normally, E_EOF at EOF, otherwise error code */ + /* NB If done != E_OK, cur must be == inp!!! */ + FILE *fp; /* Rest of input; NULL if tokenizing a string */ + int tabsize; /* Tab spacing */ + int indent; /* Current indentation index */ + int indstack[MAXINDENT]; /* Stack of indents */ + int atbol; /* Nonzero if at begin of new line */ + int pendin; /* Pending indents (if > 0) or dedents (if < 0) */ + char *prompt, *nextprompt; /* For interactive prompting */ + int lineno; /* Current line number */ + int level; /* () [] {} Parentheses nesting level */ + /* Used to allow free continuations inside them */ + /* Stuff for checking on different tab sizes */ + const char *filename; /* For error messages */ + int altwarning; /* Issue warning if alternate tabs don't match */ + int alterror; /* Issue error if alternate tabs don't match */ + int alttabsize; /* Alternate tab spacing */ + int altindstack[MAXINDENT]; /* Stack of alternate indents */ + /* Stuff for PEP 0263 */ + int decoding_state; /* -1:decoding, 0:init, 1:raw */ + int decoding_erred; /* whether erred in decoding */ + int read_coding_spec; /* whether 'coding:...' has been read */ + char *encoding; + int cont_line; /* whether we are in a continuation line. */ + const char* line_start; /* pointer to start of current line */ +#ifndef PGEN + PyObject *decoding_readline; /* codecs.open(...).readline */ + PyObject *decoding_buffer; +#endif + const char* enc; + const char* str; + const char* input; /* Tokenizer's newline translated copy of the string. */ +}; + +extern struct tok_state *PyTokenizer_FromString(const char *, int); +extern struct tok_state *PyTokenizer_FromFile(FILE *, char *, char *); +extern void PyTokenizer_Free(struct tok_state *); +extern int PyTokenizer_Get(struct tok_state *, char **, char **); +#if defined(PGEN) || defined(Py_USING_UNICODE) +extern char * PyTokenizer_RestoreEncoding(struct tok_state* tok, + int len, int *offset); +#endif + +#ifdef __cplusplus +} +#endif +#endif /* !Py_TOKENIZER_H */ diff --git a/AppPkg/Applications/Python/Python-2.7.2/Parser/tokenizer_pgen.c b/AppPkg/Applications/Python/Python-2.7.2/Parser/tokenizer_pgen.c new file mode 100644 index 0000000000..ecbda9c4ef --- /dev/null +++ b/AppPkg/Applications/Python/Python-2.7.2/Parser/tokenizer_pgen.c @@ -0,0 +1,2 @@ +#define PGEN +#include "tokenizer.c" -- cgit v1.2.3