diff options
author | Nathan Binkert <binkertn@umich.edu> | 2007-05-24 21:54:51 -0700 |
---|---|---|
committer | Nathan Binkert <binkertn@umich.edu> | 2007-05-24 21:54:51 -0700 |
commit | 44ebb8d3e27329e9f0b501897585359b4ab696f2 (patch) | |
tree | 536ed9dba1458f0d13d680ccfbb5f7ec3b79109c /ext/ply/example | |
parent | 9f1c104ccd835ce390d9e9fd24e59a6ea626ed17 (diff) | |
download | gem5-44ebb8d3e27329e9f0b501897585359b4ab696f2.tar.xz |
Update to ply 2.3
ext/ply/ply/lex.py:
ext/ply/ply/yacc.py:
ext/ply/CHANGES:
ext/ply/README:
ext/ply/TODO:
ext/ply/doc/ply.html:
ext/ply/example/ansic/clex.py:
ext/ply/example/ansic/cparse.py:
ext/ply/example/calc/calc.py:
ext/ply/example/hedit/hedit.py:
ext/ply/example/optcalc/calc.py:
ext/ply/test/README:
ext/ply/test/calclex.py:
ext/ply/test/lex_doc1.exp:
ext/ply/test/lex_doc1.py:
ext/ply/test/lex_dup1.exp:
ext/ply/test/lex_dup1.py:
ext/ply/test/lex_dup2.exp:
ext/ply/test/lex_dup2.py:
ext/ply/test/lex_dup3.exp:
ext/ply/test/lex_dup3.py:
ext/ply/test/lex_empty.py:
ext/ply/test/lex_error1.py:
ext/ply/test/lex_error2.py:
ext/ply/test/lex_error3.exp:
ext/ply/test/lex_error3.py:
ext/ply/test/lex_error4.exp:
ext/ply/test/lex_error4.py:
ext/ply/test/lex_hedit.exp:
ext/ply/test/lex_hedit.py:
ext/ply/test/lex_ignore.exp:
ext/ply/test/lex_ignore.py:
ext/ply/test/lex_re1.exp:
ext/ply/test/lex_re1.py:
ext/ply/test/lex_rule1.py:
ext/ply/test/lex_token1.py:
ext/ply/test/lex_token2.py:
ext/ply/test/lex_token3.py:
ext/ply/test/lex_token4.py:
ext/ply/test/lex_token5.exp:
ext/ply/test/lex_token5.py:
ext/ply/test/yacc_badargs.exp:
ext/ply/test/yacc_badargs.py:
ext/ply/test/yacc_badprec.exp:
ext/ply/test/yacc_badprec.py:
ext/ply/test/yacc_badprec2.exp:
ext/ply/test/yacc_badprec2.py:
ext/ply/test/yacc_badrule.exp:
ext/ply/test/yacc_badrule.py:
ext/ply/test/yacc_badtok.exp:
ext/ply/test/yacc_badtok.py:
ext/ply/test/yacc_dup.exp:
ext/ply/test/yacc_dup.py:
ext/ply/test/yacc_error1.exp:
ext/ply/test/yacc_error1.py:
ext/ply/test/yacc_error2.exp:
ext/ply/test/yacc_error2.py:
ext/ply/test/yacc_error3.exp:
ext/ply/test/yacc_error3.py:
ext/ply/test/yacc_inf.exp:
ext/ply/test/yacc_inf.py:
ext/ply/test/yacc_missing1.exp:
ext/ply/test/yacc_missing1.py:
ext/ply/test/yacc_nodoc.exp:
ext/ply/test/yacc_nodoc.py:
ext/ply/test/yacc_noerror.exp:
ext/ply/test/yacc_noerror.py:
ext/ply/test/yacc_nop.exp:
ext/ply/test/yacc_nop.py:
ext/ply/test/yacc_notfunc.exp:
ext/ply/test/yacc_notfunc.py:
ext/ply/test/yacc_notok.exp:
ext/ply/test/yacc_notok.py:
ext/ply/test/yacc_rr.exp:
ext/ply/test/yacc_rr.py:
ext/ply/test/yacc_simple.exp:
ext/ply/test/yacc_simple.py:
ext/ply/test/yacc_sr.exp:
ext/ply/test/yacc_sr.py:
ext/ply/test/yacc_term1.exp:
ext/ply/test/yacc_term1.py:
ext/ply/test/yacc_unused.exp:
ext/ply/test/yacc_unused.py:
ext/ply/test/yacc_uprec.exp:
ext/ply/test/yacc_uprec.py:
Import patch ply.diff
src/arch/isa_parser.py:
everything is now within the ply package
--HG--
rename : ext/ply/lex.py => ext/ply/ply/lex.py
rename : ext/ply/yacc.py => ext/ply/ply/yacc.py
extra : convert_revision : fca8deabd5c095bdeabd52a1f236ae1404ef106e
Diffstat (limited to 'ext/ply/example')
34 files changed, 2879 insertions, 62 deletions
diff --git a/ext/ply/example/BASIC/README b/ext/ply/example/BASIC/README new file mode 100644 index 000000000..be24a3005 --- /dev/null +++ b/ext/ply/example/BASIC/README @@ -0,0 +1,79 @@ +Inspired by a September 14, 2006 Salon article "Why Johnny Can't Code" by +David Brin (http://www.salon.com/tech/feature/2006/09/14/basic/index.html), +I thought that a fully working BASIC interpreter might be an interesting, +if not questionable, PLY example. Uh, okay, so maybe it's just a bad idea, +but in any case, here it is. + +In this example, you'll find a rough implementation of 1964 Dartmouth BASIC +as described in the manual at: + + http://www.bitsavers.org/pdf/dartmouth/BASIC_Oct64.pdf + +See also: + + http://en.wikipedia.org/wiki/Dartmouth_BASIC + +This dialect is downright primitive---there are no string variables +and no facilities for interactive input. Moreover, subroutines and functions +are brain-dead even more than they usually are for BASIC. Of course, +the GOTO statement is provided. + +Nevertheless, there are a few interesting aspects of this example: + + - It illustrates a fully working interpreter including lexing, parsing, + and interpretation of instructions. + + - The parser shows how to catch and report various kinds of parsing + errors in a more graceful way. + + - The example both parses files (supplied on command line) and + interactive input entered line by line. + + - It shows how you might represent parsed information. In this case, + each BASIC statement is encoded into a Python tuple containing the + statement type and parameters. These tuples are then stored in + a dictionary indexed by program line numbers. + + - Even though it's just BASIC, the parser contains more than 80 + rules and 150 parsing states. Thus, it's a little more meaty than + the calculator example. + +To use the example, run it as follows: + + % python basic.py hello.bas + HELLO WORLD + % + +or use it interactively: + + % python basic.py + [BASIC] 10 PRINT "HELLO WORLD" + [BASIC] 20 END + [BASIC] RUN + HELLO WORLD + [BASIC] + +The following files are defined: + + basic.py - High level script that controls everything + basiclex.py - BASIC tokenizer + basparse.py - BASIC parser + basinterp.py - BASIC interpreter that runs parsed programs. + +In addition, a number of sample BASIC programs (.bas suffix) are +provided. These were taken out of the Dartmouth manual. + +Disclaimer: I haven't spent a ton of time testing this and it's likely that +I've skimped here and there on a few finer details (e.g., strictly enforcing +variable naming rules). However, the interpreter seems to be able to run +the examples in the BASIC manual. + +Have fun! + +-Dave + + + + + + diff --git a/ext/ply/example/BASIC/basic.py b/ext/ply/example/BASIC/basic.py new file mode 100644 index 000000000..3a07acdbf --- /dev/null +++ b/ext/ply/example/BASIC/basic.py @@ -0,0 +1,68 @@ +# An implementation of Dartmouth BASIC (1964) +# + +import sys +sys.path.insert(0,"../..") + +import basiclex +import basparse +import basinterp + +# If a filename has been specified, we try to run it. +# If a runtime error occurs, we bail out and enter +# interactive mode below +if len(sys.argv) == 2: + data = open(sys.argv[1]).read() + prog = basparse.parse(data) + if not prog: raise SystemExit + b = basinterp.BasicInterpreter(prog) + try: + b.run() + raise SystemExit + except RuntimeError: + pass + +else: + b = basinterp.BasicInterpreter({}) + +# Interactive mode. This incrementally adds/deletes statements +# from the program stored in the BasicInterpreter object. In +# addition, special commands 'NEW','LIST',and 'RUN' are added. +# Specifying a line number with no code deletes that line from +# the program. + +while 1: + try: + line = raw_input("[BASIC] ") + except EOFError: + raise SystemExit + if not line: continue + line += "\n" + prog = basparse.parse(line) + if not prog: continue + + keys = prog.keys() + if keys[0] > 0: + b.add_statements(prog) + else: + stat = prog[keys[0]] + if stat[0] == 'RUN': + try: + b.run() + except RuntimeError: + pass + elif stat[0] == 'LIST': + b.list() + elif stat[0] == 'BLANK': + b.del_line(stat[1]) + elif stat[0] == 'NEW': + b.new() + + + + + + + + + diff --git a/ext/ply/example/BASIC/basiclex.py b/ext/ply/example/BASIC/basiclex.py new file mode 100644 index 000000000..727383f2b --- /dev/null +++ b/ext/ply/example/BASIC/basiclex.py @@ -0,0 +1,74 @@ +# An implementation of Dartmouth BASIC (1964) + +from ply import * + +keywords = ( + 'LET','READ','DATA','PRINT','GOTO','IF','THEN','FOR','NEXT','TO','STEP', + 'END','STOP','DEF','GOSUB','DIM','REM','RETURN','RUN','LIST','NEW', +) + +tokens = keywords + ( + 'EQUALS','PLUS','MINUS','TIMES','DIVIDE','POWER', + 'LPAREN','RPAREN','LT','LE','GT','GE','NE', + 'COMMA','SEMI', 'INTEGER','FLOAT', 'STRING', + 'ID','NEWLINE' +) + +t_ignore = ' \t' + +def t_REM(t): + r'REM .*' + return t + +def t_ID(t): + r'[A-Z][A-Z0-9]*' + if t.value in keywords: + t.type = t.value + return t + +t_EQUALS = r'=' +t_PLUS = r'\+' +t_MINUS = r'-' +t_TIMES = r'\*' +t_POWER = r'\^' +t_DIVIDE = r'/' +t_LPAREN = r'\(' +t_RPAREN = r'\)' +t_LT = r'<' +t_LE = r'<=' +t_GT = r'>' +t_GE = r'>=' +t_NE = r'<>' +t_COMMA = r'\,' +t_SEMI = r';' +t_INTEGER = r'\d+' +t_FLOAT = r'((\d*\.\d+)(E[\+-]?\d+)?|([1-9]\d*E[\+-]?\d+))' +t_STRING = r'\".*?\"' + +def t_NEWLINE(t): + r'\n' + t.lexer.lineno += 1 + return t + +def t_error(t): + print "Illegal character", t.value[0] + t.lexer.skip(1) + +lex.lex() + + + + + + + + + + + + + + + + + diff --git a/ext/ply/example/BASIC/basinterp.py b/ext/ply/example/BASIC/basinterp.py new file mode 100644 index 000000000..5850457cb --- /dev/null +++ b/ext/ply/example/BASIC/basinterp.py @@ -0,0 +1,440 @@ +# This file provides the runtime support for running a basic program +# Assumes the program has been parsed using basparse.py + +import sys +import math +import random + +class BasicInterpreter: + + # Initialize the interpreter. prog is a dictionary + # containing (line,statement) mappings + def __init__(self,prog): + self.prog = prog + + self.functions = { # Built-in function table + 'SIN' : lambda z: math.sin(self.eval(z)), + 'COS' : lambda z: math.cos(self.eval(z)), + 'TAN' : lambda z: math.tan(self.eval(z)), + 'ATN' : lambda z: math.atan(self.eval(z)), + 'EXP' : lambda z: math.exp(self.eval(z)), + 'ABS' : lambda z: abs(self.eval(z)), + 'LOG' : lambda z: math.log(self.eval(z)), + 'SQR' : lambda z: math.sqrt(self.eval(z)), + 'INT' : lambda z: int(self.eval(z)), + 'RND' : lambda z: random.random() + } + + # Collect all data statements + def collect_data(self): + self.data = [] + for lineno in self.stat: + if self.prog[lineno][0] == 'DATA': + self.data = self.data + self.prog[lineno][1] + self.dc = 0 # Initialize the data counter + + # Check for end statements + def check_end(self): + has_end = 0 + for lineno in self.stat: + if self.prog[lineno][0] == 'END' and not has_end: + has_end = lineno + if not has_end: + print "NO END INSTRUCTION" + self.error = 1 + if has_end != lineno: + print "END IS NOT LAST" + self.error = 1 + + # Check loops + def check_loops(self): + for pc in range(len(self.stat)): + lineno = self.stat[pc] + if self.prog[lineno][0] == 'FOR': + forinst = self.prog[lineno] + loopvar = forinst[1] + for i in range(pc+1,len(self.stat)): + if self.prog[self.stat[i]][0] == 'NEXT': + nextvar = self.prog[self.stat[i]][1] + if nextvar != loopvar: continue + self.loopend[pc] = i + break + else: + print "FOR WITHOUT NEXT AT LINE" % self.stat[pc] + self.error = 1 + + # Evaluate an expression + def eval(self,expr): + etype = expr[0] + if etype == 'NUM': return expr[1] + elif etype == 'GROUP': return self.eval(expr[1]) + elif etype == 'UNARY': + if expr[1] == '-': return -self.eval(expr[2]) + elif etype == 'BINOP': + if expr[1] == '+': return self.eval(expr[2])+self.eval(expr[3]) + elif expr[1] == '-': return self.eval(expr[2])-self.eval(expr[3]) + elif expr[1] == '*': return self.eval(expr[2])*self.eval(expr[3]) + elif expr[1] == '/': return float(self.eval(expr[2]))/self.eval(expr[3]) + elif expr[1] == '^': return abs(self.eval(expr[2]))**self.eval(expr[3]) + elif etype == 'VAR': + var,dim1,dim2 = expr[1] + if not dim1 and not dim2: + if self.vars.has_key(var): + return self.vars[var] + else: + print "UNDEFINED VARIABLE", var, "AT LINE", self.stat[self.pc] + raise RuntimeError + # May be a list lookup or a function evaluation + if dim1 and not dim2: + if self.functions.has_key(var): + # A function + return self.functions[var](dim1) + else: + # A list evaluation + if self.lists.has_key(var): + dim1val = self.eval(dim1) + if dim1val < 1 or dim1val > len(self.lists[var]): + print "LIST INDEX OUT OF BOUNDS AT LINE", self.stat[self.pc] + raise RuntimeError + return self.lists[var][dim1val-1] + if dim1 and dim2: + if self.tables.has_key(var): + dim1val = self.eval(dim1) + dim2val = self.eval(dim2) + if dim1val < 1 or dim1val > len(self.tables[var]) or dim2val < 1 or dim2val > len(self.tables[var][0]): + print "TABLE INDEX OUT OUT BOUNDS AT LINE", self.stat[self.pc] + raise RuntimeError + return self.tables[var][dim1val-1][dim2val-1] + print "UNDEFINED VARIABLE", var, "AT LINE", self.stat[self.pc] + raise RuntimeError + + # Evaluate a relational expression + def releval(self,expr): + etype = expr[1] + lhs = self.eval(expr[2]) + rhs = self.eval(expr[3]) + if etype == '<': + if lhs < rhs: return 1 + else: return 0 + + elif etype == '<=': + if lhs <= rhs: return 1 + else: return 0 + + elif etype == '>': + if lhs > rhs: return 1 + else: return 0 + + elif etype == '>=': + if lhs >= rhs: return 1 + else: return 0 + + elif etype == '=': + if lhs == rhs: return 1 + else: return 0 + + elif etype == '<>': + if lhs != rhs: return 1 + else: return 0 + + # Assignment + def assign(self,target,value): + var, dim1, dim2 = target + if not dim1 and not dim2: + self.vars[var] = self.eval(value) + elif dim1 and not dim2: + # List assignment + dim1val = self.eval(dim1) + if not self.lists.has_key(var): + self.lists[var] = [0]*10 + + if dim1val > len(self.lists[var]): + print "DIMENSION TOO LARGE AT LINE", self.stat[self.pc] + raise RuntimeError + self.lists[var][dim1val-1] = self.eval(value) + elif dim1 and dim2: + dim1val = self.eval(dim1) + dim2val = self.eval(dim2) + if not self.tables.has_key(var): + temp = [0]*10 + v = [] + for i in range(10): v.append(temp[:]) + self.tables[var] = v + # Variable already exists + if dim1val > len(self.tables[var]) or dim2val > len(self.tables[var][0]): + print "DIMENSION TOO LARGE AT LINE", self.stat[self.pc] + raise RuntimeError + self.tables[var][dim1val-1][dim2val-1] = self.eval(value) + + # Change the current line number + def goto(self,linenum): + if not self.prog.has_key(linenum): + print "UNDEFINED LINE NUMBER %d AT LINE %d" % (linenum, self.stat[self.pc]) + raise RuntimeError + self.pc = self.stat.index(linenum) + + # Run it + def run(self): + self.vars = { } # All variables + self.lists = { } # List variables + self.tables = { } # Tables + self.loops = [ ] # Currently active loops + self.loopend= { } # Mapping saying where loops end + self.gosub = None # Gosub return point (if any) + self.error = 0 # Indicates program error + + self.stat = self.prog.keys() # Ordered list of all line numbers + self.stat.sort() + self.pc = 0 # Current program counter + + # Processing prior to running + + self.collect_data() # Collect all of the data statements + self.check_end() + self.check_loops() + + if self.error: raise RuntimeError + + while 1: + line = self.stat[self.pc] + instr = self.prog[line] + + op = instr[0] + + # END and STOP statements + if op == 'END' or op == 'STOP': + break # We're done + + # GOTO statement + elif op == 'GOTO': + newline = instr[1] + self.goto(newline) + continue + + # PRINT statement + elif op == 'PRINT': + plist = instr[1] + out = "" + for label,val in plist: + if out: + out += ' '*(15 - (len(out) % 15)) + out += label + if val: + if label: out += " " + eval = self.eval(val) + out += str(eval) + sys.stdout.write(out) + end = instr[2] + if not (end == ',' or end == ';'): + sys.stdout.write("\n") + if end == ',': sys.stdout.write(" "*(15-(len(out) % 15))) + if end == ';': sys.stdout.write(" "*(3-(len(out) % 3))) + + # LET statement + elif op == 'LET': + target = instr[1] + value = instr[2] + self.assign(target,value) + + # READ statement + elif op == 'READ': + for target in instr[1]: + if self.dc < len(self.data): + value = ('NUM',self.data[self.dc]) + self.assign(target,value) + self.dc += 1 + else: + # No more data. Program ends + return + elif op == 'IF': + relop = instr[1] + newline = instr[2] + if (self.releval(relop)): + self.goto(newline) + continue + + elif op == 'FOR': + loopvar = instr[1] + initval = instr[2] + finval = instr[3] + stepval = instr[4] + + # Check to see if this is a new loop + if not self.loops or self.loops[-1][0] != self.pc: + # Looks like a new loop. Make the initial assignment + newvalue = initval + self.assign((loopvar,None,None),initval) + if not stepval: stepval = ('NUM',1) + stepval = self.eval(stepval) # Evaluate step here + self.loops.append((self.pc,stepval)) + else: + # It's a repeat of the previous loop + # Update the value of the loop variable according to the step + stepval = ('NUM',self.loops[-1][1]) + newvalue = ('BINOP','+',('VAR',(loopvar,None,None)),stepval) + + if self.loops[-1][1] < 0: relop = '>=' + else: relop = '<=' + if not self.releval(('RELOP',relop,newvalue,finval)): + # Loop is done. Jump to the NEXT + self.pc = self.loopend[self.pc] + self.loops.pop() + else: + self.assign((loopvar,None,None),newvalue) + + elif op == 'NEXT': + if not self.loops: + print "NEXT WITHOUT FOR AT LINE",line + return + + nextvar = instr[1] + self.pc = self.loops[-1][0] + loopinst = self.prog[self.stat[self.pc]] + forvar = loopinst[1] + if nextvar != forvar: + print "NEXT DOESN'T MATCH FOR AT LINE", line + return + continue + elif op == 'GOSUB': + newline = instr[1] + if self.gosub: + print "ALREADY IN A SUBROUTINE AT LINE", line + return + self.gosub = self.stat[self.pc] + self.goto(newline) + continue + + elif op == 'RETURN': + if not self.gosub: + print "RETURN WITHOUT A GOSUB AT LINE",line + return + self.goto(self.gosub) + self.gosub = None + + elif op == 'FUNC': + fname = instr[1] + pname = instr[2] + expr = instr[3] + def eval_func(pvalue,name=pname,self=self,expr=expr): + self.assign((pname,None,None),pvalue) + return self.eval(expr) + self.functions[fname] = eval_func + + elif op == 'DIM': + for vname,x,y in instr[1]: + if y == 0: + # Single dimension variable + self.lists[vname] = [0]*x + else: + # Double dimension variable + temp = [0]*y + v = [] + for i in range(x): + v.append(temp[:]) + self.tables[vname] = v + + self.pc += 1 + + # Utility functions for program listing + def expr_str(self,expr): + etype = expr[0] + if etype == 'NUM': return str(expr[1]) + elif etype == 'GROUP': return "(%s)" % self.expr_str(expr[1]) + elif etype == 'UNARY': + if expr[1] == '-': return "-"+str(expr[2]) + elif etype == 'BINOP': + return "%s %s %s" % (self.expr_str(expr[2]),expr[1],self.expr_str(expr[3])) + elif etype == 'VAR': + return self.var_str(expr[1]) + + def relexpr_str(self,expr): + return "%s %s %s" % (self.expr_str(expr[2]),expr[1],self.expr_str(expr[3])) + + def var_str(self,var): + varname,dim1,dim2 = var + if not dim1 and not dim2: return varname + if dim1 and not dim2: return "%s(%s)" % (varname, self.expr_str(dim1)) + return "%s(%s,%s)" % (varname, self.expr_str(dim1),self.expr_str(dim2)) + + # Create a program listing + def list(self): + stat = self.prog.keys() # Ordered list of all line numbers + stat.sort() + for line in stat: + instr = self.prog[line] + op = instr[0] + if op in ['END','STOP','RETURN']: + print line, op + continue + elif op == 'REM': + print line, instr[1] + elif op == 'PRINT': + print line, op, + first = 1 + for p in instr[1]: + if not first: print ",", + if p[0] and p[1]: print '"%s"%s' % (p[0],self.expr_str(p[1])), + elif p[1]: print self.expr_str(p[1]), + else: print '"%s"' % (p[0],), + first = 0 + if instr[2]: print instr[2] + else: print + elif op == 'LET': + print line,"LET",self.var_str(instr[1]),"=",self.expr_str(instr[2]) + elif op == 'READ': + print line,"READ", + first = 1 + for r in instr[1]: + if not first: print ",", + print self.var_str(r), + first = 0 + print "" + elif op == 'IF': + print line,"IF %s THEN %d" % (self.relexpr_str(instr[1]),instr[2]) + elif op == 'GOTO' or op == 'GOSUB': + print line, op, instr[1] + elif op == 'FOR': + print line,"FOR %s = %s TO %s" % (instr[1],self.expr_str(instr[2]),self.expr_str(instr[3])), + if instr[4]: print "STEP %s" % (self.expr_str(instr[4])), + print + elif op == 'NEXT': + print line,"NEXT", instr[1] + elif op == 'FUNC': + print line,"DEF %s(%s) = %s" % (instr[1],instr[2],self.expr_str(instr[3])) + elif op == 'DIM': + print line,"DIM", + first = 1 + for vname,x,y in instr[1]: + if not first: print ",", + first = 0 + if y == 0: + print "%s(%d)" % (vname,x), + else: + print "%s(%d,%d)" % (vname,x,y), + + print + elif op == 'DATA': + print line,"DATA", + first = 1 + for v in instr[1]: + if not first: print ",", + first = 0 + print v, + print + + # Erase the current program + def new(self): + self.prog = {} + + # Insert statements + def add_statements(self,prog): + for line,stat in prog.items(): + self.prog[line] = stat + + # Delete a statement + def del_line(self,lineno): + try: + del self.prog[lineno] + except KeyError: + pass + diff --git a/ext/ply/example/BASIC/basparse.py b/ext/ply/example/BASIC/basparse.py new file mode 100644 index 000000000..930af9a22 --- /dev/null +++ b/ext/ply/example/BASIC/basparse.py @@ -0,0 +1,424 @@ +# An implementation of Dartmouth BASIC (1964) +# + +from ply import * +import basiclex + +tokens = basiclex.tokens + +precedence = ( + ('left', 'PLUS','MINUS'), + ('left', 'TIMES','DIVIDE'), + ('left', 'POWER'), + ('right','UMINUS') +) + +#### A BASIC program is a series of statements. We represent the program as a +#### dictionary of tuples indexed by line number. + +def p_program(p): + '''program : program statement + | statement''' + + if len(p) == 2 and p[1]: + p[0] = { } + line,stat = p[1] + p[0][line] = stat + elif len(p) ==3: + p[0] = p[1] + if not p[0]: p[0] = { } + if p[2]: + line,stat = p[2] + p[0][line] = stat + +#### This catch-all rule is used for any catastrophic errors. In this case, +#### we simply return nothing + +def p_program_error(p): + '''program : error''' + p[0] = None + p.parser.error = 1 + +#### Format of all BASIC statements. + +def p_statement(p): + '''statement : INTEGER command NEWLINE''' + if isinstance(p[2],str): + print p[2],"AT LINE", p[1] + p[0] = None + p.parser.error = 1 + else: + lineno = int(p[1]) + p[0] = (lineno,p[2]) + +#### Interactive statements. + +def p_statement_interactive(p): + '''statement : RUN NEWLINE + | LIST NEWLINE + | NEW NEWLINE''' + p[0] = (0, (p[1],0)) + +#### Blank line number +def p_statement_blank(p): + '''statement : INTEGER NEWLINE''' + p[0] = (0,('BLANK',int(p[1]))) + +#### Error handling for malformed statements + +def p_statement_bad(p): + '''statement : INTEGER error NEWLINE''' + print "MALFORMED STATEMENT AT LINE", p[1] + p[0] = None + p.parser.error = 1 + +#### Blank line + +def p_statement_newline(p): + '''statement : NEWLINE''' + p[0] = None + +#### LET statement + +def p_command_let(p): + '''command : LET variable EQUALS expr''' + p[0] = ('LET',p[2],p[4]) + +def p_command_let_bad(p): + '''command : LET variable EQUALS error''' + p[0] = "BAD EXPRESSION IN LET" + +#### READ statement + +def p_command_read(p): + '''command : READ varlist''' + p[0] = ('READ',p[2]) + +def p_command_read_bad(p): + '''command : READ error''' + p[0] = "MALFORMED VARIABLE LIST IN READ" + +#### DATA statement + +def p_command_data(p): + '''command : DATA numlist''' + p[0] = ('DATA',p[2]) + +def p_command_data_bad(p): + '''command : DATA error''' + p[0] = "MALFORMED NUMBER LIST IN DATA" + +#### PRINT statement + +def p_command_print(p): + '''command : PRINT plist optend''' + p[0] = ('PRINT',p[2],p[3]) + +def p_command_print_bad(p): + '''command : PRINT error''' + p[0] = "MALFORMED PRINT STATEMENT" + +#### Optional ending on PRINT. Either a comma (,) or semicolon (;) + +def p_optend(p): + '''optend : COMMA + | SEMI + |''' + if len(p) == 2: + p[0] = p[1] + else: + p[0] = None + +#### PRINT statement with no arguments + +def p_command_print_empty(p): + '''command : PRINT''' + p[0] = ('PRINT',[],None) + +#### GOTO statement + +def p_command_goto(p): + '''command : GOTO INTEGER''' + p[0] = ('GOTO',int(p[2])) + +def p_command_goto_bad(p): + '''command : GOTO error''' + p[0] = "INVALID LINE NUMBER IN GOTO" + +#### IF-THEN statement + +def p_command_if(p): + '''command : IF relexpr THEN INTEGER''' + p[0] = ('IF',p[2],int(p[4])) + +def p_command_if_bad(p): + '''command : IF error THEN INTEGER''' + p[0] = "BAD RELATIONAL EXPRESSION" + +def p_command_if_bad2(p): + '''command : IF relexpr THEN error''' + p[0] = "INVALID LINE NUMBER IN THEN" + +#### FOR statement + +def p_command_for(p): + '''command : FOR ID EQUALS expr TO expr optstep''' + p[0] = ('FOR',p[2],p[4],p[6],p[7]) + +def p_command_for_bad_initial(p): + '''command : FOR ID EQUALS error TO expr optstep''' + p[0] = "BAD INITIAL VALUE IN FOR STATEMENT" + +def p_command_for_bad_final(p): + '''command : FOR ID EQUALS expr TO error optstep''' + p[0] = "BAD FINAL VALUE IN FOR STATEMENT" + +def p_command_for_bad_step(p): + '''command : FOR ID EQUALS expr TO expr STEP error''' + p[0] = "MALFORMED STEP IN FOR STATEMENT" + +#### Optional STEP qualifier on FOR statement + +def p_optstep(p): + '''optstep : STEP expr + | empty''' + if len(p) == 3: + p[0] = p[2] + else: + p[0] = None + +#### NEXT statement + +def p_command_next(p): + '''command : NEXT ID''' + + p[0] = ('NEXT',p[2]) + +def p_command_next_bad(p): + '''command : NEXT error''' + p[0] = "MALFORMED NEXT" + +#### END statement + +def p_command_end(p): + '''command : END''' + p[0] = ('END',) + +#### REM statement + +def p_command_rem(p): + '''command : REM''' + p[0] = ('REM',p[1]) + +#### STOP statement + +def p_command_stop(p): + '''command : STOP''' + p[0] = ('STOP',) + +#### DEF statement + +def p_command_def(p): + '''command : DEF ID LPAREN ID RPAREN EQUALS expr''' + p[0] = ('FUNC',p[2],p[4],p[7]) + +def p_command_def_bad_rhs(p): + '''command : DEF ID LPAREN ID RPAREN EQUALS error''' + p[0] = "BAD EXPRESSION IN DEF STATEMENT" + +def p_command_def_bad_arg(p): + '''command : DEF ID LPAREN error RPAREN EQUALS expr''' + p[0] = "BAD ARGUMENT IN DEF STATEMENT" + +#### GOSUB statement + +def p_command_gosub(p): + '''command : GOSUB INTEGER''' + p[0] = ('GOSUB',int(p[2])) + +def p_command_gosub_bad(p): + '''command : GOSUB error''' + p[0] = "INVALID LINE NUMBER IN GOSUB" + +#### RETURN statement + +def p_command_return(p): + '''command : RETURN''' + p[0] = ('RETURN',) + +#### DIM statement + +def p_command_dim(p): + '''command : DIM dimlist''' + p[0] = ('DIM',p[2]) + +def p_command_dim_bad(p): + '''command : DIM error''' + p[0] = "MALFORMED VARIABLE LIST IN DIM" + +#### List of variables supplied to DIM statement + +def p_dimlist(p): + '''dimlist : dimlist COMMA dimitem + | dimitem''' + if len(p) == 4: + p[0] = p[1] + p[0].append(p[3]) + else: + p[0] = [p[1]] + +#### DIM items + +def p_dimitem_single(p): + '''dimitem : ID LPAREN INTEGER RPAREN''' + p[0] = (p[1],eval(p[3]),0) + +def p_dimitem_double(p): + '''dimitem : ID LPAREN INTEGER COMMA INTEGER RPAREN''' + p[0] = (p[1],eval(p[3]),eval(p[5])) + +#### Arithmetic expressions + +def p_expr_binary(p): + '''expr : expr PLUS expr + | expr MINUS expr + | expr TIMES expr + | expr DIVIDE expr + | expr POWER expr''' + + p[0] = ('BINOP',p[2],p[1],p[3]) + +def p_expr_number(p): + '''expr : INTEGER + | FLOAT''' + p[0] = ('NUM',eval(p[1])) + +def p_expr_variable(p): + '''expr : variable''' + p[0] = ('VAR',p[1]) + +def p_expr_group(p): + '''expr : LPAREN expr RPAREN''' + p[0] = ('GROUP',p[2]) + +def p_expr_unary(p): + '''expr : MINUS expr %prec UMINUS''' + p[0] = ('UNARY','-',p[2]) + +#### Relational expressions + +def p_relexpr(p): + '''relexpr : expr LT expr + | expr LE expr + | expr GT expr + | expr GE expr + | expr EQUALS expr + | expr NE expr''' + p[0] = ('RELOP',p[2],p[1],p[3]) + +#### Variables + +def p_variable(p): + '''variable : ID + | ID LPAREN expr RPAREN + | ID LPAREN expr COMMA expr RPAREN''' + if len(p) == 2: + p[0] = (p[1],None,None) + elif len(p) == 5: + p[0] = (p[1],p[3],None) + else: + p[0] = (p[1],p[3],p[5]) + +#### Builds a list of variable targets as a Python list + +def p_varlist(p): + '''varlist : varlist COMMA variable + | variable''' + if len(p) > 2: + p[0] = p[1] + p[0].append(p[3]) + else: + p[0] = [p[1]] + + +#### Builds a list of numbers as a Python list + +def p_numlist(p): + '''numlist : numlist COMMA number + | number''' + + if len(p) > 2: + p[0] = p[1] + p[0].append(p[3]) + else: + p[0] = [p[1]] + +#### A number. May be an integer or a float + +def p_number(p): + '''number : INTEGER + | FLOAT''' + p[0] = eval(p[1]) + +#### A signed number. + +def p_number_signed(p): + '''number : MINUS INTEGER + | MINUS FLOAT''' + p[0] = eval("-"+p[2]) + +#### List of targets for a print statement +#### Returns a list of tuples (label,expr) + +def p_plist(p): + '''plist : plist COMMA pitem + | pitem''' + if len(p) > 3: + p[0] = p[1] + p[0].append(p[3]) + else: + p[0] = [p[1]] + +def p_item_string(p): + '''pitem : STRING''' + p[0] = (p[1][1:-1],None) + +def p_item_string_expr(p): + '''pitem : STRING expr''' + p[0] = (p[1][1:-1],p[2]) + +def p_item_expr(p): + '''pitem : expr''' + p[0] = ("",p[1]) + +#### Empty + +def p_empty(p): + '''empty : ''' + +#### Catastrophic error handler +def p_error(p): + if not p: + print "SYNTAX ERROR AT EOF" + +bparser = yacc.yacc() + +def parse(data): + bparser.error = 0 + p = bparser.parse(data) + if bparser.error: return None + return p + + + + + + + + + + + + + + diff --git a/ext/ply/example/BASIC/dim.bas b/ext/ply/example/BASIC/dim.bas new file mode 100644 index 000000000..87bd95b32 --- /dev/null +++ b/ext/ply/example/BASIC/dim.bas @@ -0,0 +1,14 @@ +5 DIM A(50,15) +10 FOR I = 1 TO 50 +20 FOR J = 1 TO 15 +30 LET A(I,J) = I + J +35 REM PRINT I,J, A(I,J) +40 NEXT J +50 NEXT I +100 FOR I = 1 TO 50 +110 FOR J = 1 TO 15 +120 PRINT A(I,J), +130 NEXT J +140 PRINT +150 NEXT I +999 END diff --git a/ext/ply/example/BASIC/func.bas b/ext/ply/example/BASIC/func.bas new file mode 100644 index 000000000..447ee16a9 --- /dev/null +++ b/ext/ply/example/BASIC/func.bas @@ -0,0 +1,5 @@ +10 DEF FDX(X) = 2*X +20 FOR I = 0 TO 100 +30 PRINT FDX(I) +40 NEXT I +50 END diff --git a/ext/ply/example/BASIC/gcd.bas b/ext/ply/example/BASIC/gcd.bas new file mode 100644 index 000000000..d0b774608 --- /dev/null +++ b/ext/ply/example/BASIC/gcd.bas @@ -0,0 +1,22 @@ +10 PRINT "A","B","C","GCD" +20 READ A,B,C +30 LET X = A +40 LET Y = B +50 GOSUB 200 +60 LET X = G +70 LET Y = C +80 GOSUB 200 +90 PRINT A, B, C, G +100 GOTO 20 +110 DATA 60, 90, 120 +120 DATA 38456, 64872, 98765 +130 DATA 32, 384, 72 +200 LET Q = INT(X/Y) +210 LET R = X - Q*Y +220 IF R = 0 THEN 300 +230 LET X = Y +240 LET Y = R +250 GOTO 200 +300 LET G = Y +310 RETURN +999 END diff --git a/ext/ply/example/BASIC/gosub.bas b/ext/ply/example/BASIC/gosub.bas new file mode 100644 index 000000000..99737b16f --- /dev/null +++ b/ext/ply/example/BASIC/gosub.bas @@ -0,0 +1,13 @@ +100 LET X = 3 +110 GOSUB 400 +120 PRINT U, V, W +200 LET X = 5 +210 GOSUB 400 +220 LET Z = U + 2*V + 3*W +230 PRINT Z +240 GOTO 999 +400 LET U = X*X +410 LET V = X*X*X +420 LET W = X*X*X*X + X*X*X + X*X + X +430 RETURN +999 END diff --git a/ext/ply/example/BASIC/hello.bas b/ext/ply/example/BASIC/hello.bas new file mode 100644 index 000000000..cc6f0b0b5 --- /dev/null +++ b/ext/ply/example/BASIC/hello.bas @@ -0,0 +1,4 @@ +5 REM HELLO WORLD PROGAM +10 PRINT "HELLO WORLD" +99 END + diff --git a/ext/ply/example/BASIC/linear.bas b/ext/ply/example/BASIC/linear.bas new file mode 100644 index 000000000..56c08220b --- /dev/null +++ b/ext/ply/example/BASIC/linear.bas @@ -0,0 +1,17 @@ +1 REM ::: SOLVE A SYSTEM OF LINEAR EQUATIONS +2 REM ::: A1*X1 + A2*X2 = B1 +3 REM ::: A3*X1 + A4*X2 = B2 +4 REM -------------------------------------- +10 READ A1, A2, A3, A4 +15 LET D = A1 * A4 - A3 * A2 +20 IF D = 0 THEN 65 +30 READ B1, B2 +37 LET X1 = (B1*A4 - B2*A2) / D +42 LET X2 = (A1*B2 - A3*B1) / D +55 PRINT X1, X2 +60 GOTO 30 +65 PRINT "NO UNIQUE SOLUTION" +70 DATA 1, 2, 4 +80 DATA 2, -7, 5 +85 DATA 1, 3, 4, -7 +90 END diff --git a/ext/ply/example/BASIC/maxsin.bas b/ext/ply/example/BASIC/maxsin.bas new file mode 100644 index 000000000..b96901530 --- /dev/null +++ b/ext/ply/example/BASIC/maxsin.bas @@ -0,0 +1,12 @@ +5 PRINT "X VALUE", "SINE", "RESOLUTION" +10 READ D +20 LET M = -1 +30 FOR X = 0 TO 3 STEP D +40 IF SIN(X) <= M THEN 80 +50 LET X0 = X +60 LET M = SIN(X) +80 NEXT X +85 PRINT X0, M, D +90 GOTO 10 +100 DATA .1, .01, .001 +110 END diff --git a/ext/ply/example/BASIC/powers.bas b/ext/ply/example/BASIC/powers.bas new file mode 100644 index 000000000..a454dc3e2 --- /dev/null +++ b/ext/ply/example/BASIC/powers.bas @@ -0,0 +1,13 @@ +5 PRINT "THIS PROGRAM COMPUTES AND PRINTS THE NTH POWERS" +6 PRINT "OF THE NUMBERS LESS THAN OR EQUAL TO N FOR VARIOUS" +7 PRINT "N FROM 1 THROUGH 7" +8 PRINT +10 FOR N = 1 TO 7 +15 PRINT "N = "N +20 FOR I = 1 TO N +30 PRINT I^N, +40 NEXT I +50 PRINT +60 PRINT +70 NEXT N +80 END diff --git a/ext/ply/example/BASIC/rand.bas b/ext/ply/example/BASIC/rand.bas new file mode 100644 index 000000000..4ff7a1467 --- /dev/null +++ b/ext/ply/example/BASIC/rand.bas @@ -0,0 +1,4 @@ +10 FOR I = 1 TO 20 +20 PRINT INT(10*RND(0)) +30 NEXT I +40 END diff --git a/ext/ply/example/BASIC/sales.bas b/ext/ply/example/BASIC/sales.bas new file mode 100644 index 000000000..a39aefb76 --- /dev/null +++ b/ext/ply/example/BASIC/sales.bas @@ -0,0 +1,20 @@ +10 FOR I = 1 TO 3 +20 READ P(I) +30 NEXT I +40 FOR I = 1 TO 3 +50 FOR J = 1 TO 5 +60 READ S(I,J) +70 NEXT J +80 NEXT I +90 FOR J = 1 TO 5 +100 LET S = 0 +110 FOR I = 1 TO 3 +120 LET S = S + P(I) * S(I,J) +130 NEXT I +140 PRINT "TOTAL SALES FOR SALESMAN"J, "$"S +150 NEXT J +200 DATA 1.25, 4.30, 2.50 +210 DATA 40, 20, 37, 29, 42 +220 DATA 10, 16, 3, 21, 8 +230 DATA 35, 47, 29, 16, 33 +300 END diff --git a/ext/ply/example/BASIC/sears.bas b/ext/ply/example/BASIC/sears.bas new file mode 100644 index 000000000..5ced3974e --- /dev/null +++ b/ext/ply/example/BASIC/sears.bas @@ -0,0 +1,18 @@ +1 REM :: THIS PROGRAM COMPUTES HOW MANY TIMES YOU HAVE TO FOLD +2 REM :: A PIECE OF PAPER SO THAT IT IS TALLER THAN THE +3 REM :: SEARS TOWER. +4 REM :: S = HEIGHT OF TOWER (METERS) +5 REM :: T = THICKNESS OF PAPER (MILLIMETERS) +10 LET S = 442 +20 LET T = 0.1 +30 REM CONVERT T TO METERS +40 LET T = T * .001 +50 LET F = 1 +60 LET H = T +100 IF H > S THEN 200 +120 LET H = 2 * H +125 LET F = F + 1 +130 GOTO 100 +200 PRINT "NUMBER OF FOLDS ="F +220 PRINT "FINAL HEIGHT ="H +999 END diff --git a/ext/ply/example/BASIC/sqrt1.bas b/ext/ply/example/BASIC/sqrt1.bas new file mode 100644 index 000000000..6673a9152 --- /dev/null +++ b/ext/ply/example/BASIC/sqrt1.bas @@ -0,0 +1,5 @@ +10 LET X = 0 +20 LET X = X + 1 +30 PRINT X, SQR(X) +40 IF X < 100 THEN 20 +50 END diff --git a/ext/ply/example/BASIC/sqrt2.bas b/ext/ply/example/BASIC/sqrt2.bas new file mode 100644 index 000000000..862d85ef2 --- /dev/null +++ b/ext/ply/example/BASIC/sqrt2.bas @@ -0,0 +1,4 @@ +10 FOR X = 1 TO 100 +20 PRINT X, SQR(X) +30 NEXT X +40 END diff --git a/ext/ply/example/GardenSnake/GardenSnake.py b/ext/ply/example/GardenSnake/GardenSnake.py new file mode 100644 index 000000000..ffa550fc6 --- /dev/null +++ b/ext/ply/example/GardenSnake/GardenSnake.py @@ -0,0 +1,709 @@ +# GardenSnake - a parser generator demonstration program +# +# This implements a modified version of a subset of Python: +# - only 'def', 'return' and 'if' statements +# - 'if' only has 'then' clause (no elif nor else) +# - single-quoted strings only, content in raw format +# - numbers are decimal.Decimal instances (not integers or floats) +# - no print statment; use the built-in 'print' function +# - only < > == + - / * implemented (and unary + -) +# - assignment and tuple assignment work +# - no generators of any sort +# - no ... well, no quite a lot + +# Why? I'm thinking about a new indentation-based configuration +# language for a project and wanted to figure out how to do it. Once +# I got that working I needed a way to test it out. My original AST +# was dumb so I decided to target Python's AST and compile it into +# Python code. Plus, it's pretty cool that it only took a day or so +# from sitting down with Ply to having working code. + +# This uses David Beazley's Ply from http://www.dabeaz.com/ply/ + +# This work is hereby released into the Public Domain. To view a copy of +# the public domain dedication, visit +# http://creativecommons.org/licenses/publicdomain/ or send a letter to +# Creative Commons, 543 Howard Street, 5th Floor, San Francisco, +# California, 94105, USA. +# +# Portions of this work are derived from Python's Grammar definition +# and may be covered under the Python copyright and license +# +# Andrew Dalke / Dalke Scientific Software, LLC +# 30 August 2006 / Cape Town, South Africa + +# Changelog: +# 30 August - added link to CC license; removed the "swapcase" encoding + +# Modifications for inclusion in PLY distribution +import sys +sys.path.insert(0,"../..") +from ply import * + +##### Lexer ###### +#import lex +import decimal + +tokens = ( + 'DEF', + 'IF', + 'NAME', + 'NUMBER', # Python decimals + 'STRING', # single quoted strings only; syntax of raw strings + 'LPAR', + 'RPAR', + 'COLON', + 'EQ', + 'ASSIGN', + 'LT', + 'GT', + 'PLUS', + 'MINUS', + 'MULT', + 'DIV', + 'RETURN', + 'WS', + 'NEWLINE', + 'COMMA', + 'SEMICOLON', + 'INDENT', + 'DEDENT', + 'ENDMARKER', + ) + +#t_NUMBER = r'\d+' +# taken from decmial.py but without the leading sign +def t_NUMBER(t): + r"""(\d+(\.\d*)?|\.\d+)([eE][-+]? \d+)?""" + t.value = decimal.Decimal(t.value) + return t + +def t_STRING(t): + r"'([^\\']+|\\'|\\\\)*'" # I think this is right ... + t.value=t.value[1:-1].decode("string-escape") # .swapcase() # for fun + return t + +t_COLON = r':' +t_EQ = r'==' +t_ASSIGN = r'=' +t_LT = r'<' +t_GT = r'>' +t_PLUS = r'\+' +t_MINUS = r'-' +t_MULT = r'\*' +t_DIV = r'/' +t_COMMA = r',' +t_SEMICOLON = r';' + +# Ply nicely documented how to do this. + +RESERVED = { + "def": "DEF", + "if": "IF", + "return": "RETURN", + } + +def t_NAME(t): + r'[a-zA-Z_][a-zA-Z0-9_]*' + t.type = RESERVED.get(t.value, "NAME") + return t + +# Putting this before t_WS let it consume lines with only comments in +# them so the latter code never sees the WS part. Not consuming the +# newline. Needed for "if 1: #comment" +def t_comment(t): + r"[ ]*\043[^\n]*" # \043 is '#' + pass + + +# Whitespace +def t_WS(t): + r' [ ]+ ' + if t.lexer.at_line_start and t.lexer.paren_count == 0: + return t + +# Don't generate newline tokens when inside of parenthesis, eg +# a = (1, +# 2, 3) +def t_newline(t): + r'\n+' + t.lexer.lineno += len(t.value) + t.type = "NEWLINE" + if t.lexer.paren_count == 0: + return t + +def t_LPAR(t): + r'\(' + t.lexer.paren_count += 1 + return t + +def t_RPAR(t): + r'\)' + # check for underflow? should be the job of the parser + t.lexer.paren_count -= 1 + return t + + +def t_error(t): + raise SyntaxError("Unknown symbol %r" % (t.value[0],)) + print "Skipping", repr(t.value[0]) + t.lexer.skip(1) + +## I implemented INDENT / DEDENT generation as a post-processing filter + +# The original lex token stream contains WS and NEWLINE characters. +# WS will only occur before any other tokens on a line. + +# I have three filters. One tags tokens by adding two attributes. +# "must_indent" is True if the token must be indented from the +# previous code. The other is "at_line_start" which is True for WS +# and the first non-WS/non-NEWLINE on a line. It flags the check so +# see if the new line has changed indication level. + +# Python's syntax has three INDENT states +# 0) no colon hence no need to indent +# 1) "if 1: go()" - simple statements have a COLON but no need for an indent +# 2) "if 1:\n go()" - complex statements have a COLON NEWLINE and must indent +NO_INDENT = 0 +MAY_INDENT = 1 +MUST_INDENT = 2 + +# only care about whitespace at the start of a line +def track_tokens_filter(lexer, tokens): + lexer.at_line_start = at_line_start = True + indent = NO_INDENT + saw_colon = False + for token in tokens: + token.at_line_start = at_line_start + + if token.type == "COLON": + at_line_start = False + indent = MAY_INDENT + token.must_indent = False + + elif token.type == "NEWLINE": + at_line_start = True + if indent == MAY_INDENT: + indent = MUST_INDENT + token.must_indent = False + + elif token.type == "WS": + assert token.at_line_start == True + at_line_start = True + token.must_indent = False + + else: + # A real token; only indent after COLON NEWLINE + if indent == MUST_INDENT: + token.must_indent = True + else: + token.must_indent = False + at_line_start = False + indent = NO_INDENT + + yield token + lexer.at_line_start = at_line_start + +def _new_token(type, lineno): + tok = lex.LexToken() + tok.type = type + tok.value = None + tok.lineno = lineno + return tok + +# Synthesize a DEDENT tag +def DEDENT(lineno): + return _new_token("DEDENT", lineno) + +# Synthesize an INDENT tag +def INDENT(lineno): + return _new_token("INDENT", lineno) + + +# Track the indentation level and emit the right INDENT / DEDENT events. +def indentation_filter(tokens): + # A stack of indentation levels; will never pop item 0 + levels = [0] + token = None + depth = 0 + prev_was_ws = False + for token in tokens: +## if 1: +## print "Process", token, +## if token.at_line_start: +## print "at_line_start", +## if token.must_indent: +## print "must_indent", +## print + + # WS only occurs at the start of the line + # There may be WS followed by NEWLINE so + # only track the depth here. Don't indent/dedent + # until there's something real. + if token.type == "WS": + assert depth == 0 + depth = len(token.value) + prev_was_ws = True + # WS tokens are never passed to the parser + continue + + if token.type == "NEWLINE": + depth = 0 + if prev_was_ws or token.at_line_start: + # ignore blank lines + continue + # pass the other cases on through + yield token + continue + + # then it must be a real token (not WS, not NEWLINE) + # which can affect the indentation level + + prev_was_ws = False + if token.must_indent: + # The current depth must be larger than the previous level + if not (depth > levels[-1]): + raise IndentationError("expected an indented block") + + levels.append(depth) + yield INDENT(token.lineno) + + elif token.at_line_start: + # Must be on the same level or one of the previous levels + if depth == levels[-1]: + # At the same level + pass + elif depth > levels[-1]: + raise IndentationError("indentation increase but not in new block") + else: + # Back up; but only if it matches a previous level + try: + i = levels.index(depth) + except ValueError: + raise IndentationError("inconsistent indentation") + for _ in range(i+1, len(levels)): + yield DEDENT(token.lineno) + levels.pop() + + yield token + + ### Finished processing ### + + # Must dedent any remaining levels + if len(levels) > 1: + assert token is not None + for _ in range(1, len(levels)): + yield DEDENT(token.lineno) + + +# The top-level filter adds an ENDMARKER, if requested. +# Python's grammar uses it. +def filter(lexer, add_endmarker = True): + token = None + tokens = iter(lexer.token, None) + tokens = track_tokens_filter(lexer, tokens) + for token in indentation_filter(tokens): + yield token + + if add_endmarker: + lineno = 1 + if token is not None: + lineno = token.lineno + yield _new_token("ENDMARKER", lineno) + +# Combine Ply and my filters into a new lexer + +class IndentLexer(object): + def __init__(self, debug=0, optimize=0, lextab='lextab', reflags=0): + self.lexer = lex.lex(debug=debug, optimize=optimize, lextab=lextab, reflags=reflags) + self.token_stream = None + def input(self, s, add_endmarker=True): + self.lexer.paren_count = 0 + self.lexer.input(s) + self.token_stream = filter(self.lexer, add_endmarker) + def token(self): + try: + return self.token_stream.next() + except StopIteration: + return None + +########## Parser (tokens -> AST) ###### + +# also part of Ply +#import yacc + +# I use the Python AST +from compiler import ast + +# Helper function +def Assign(left, right): + names = [] + if isinstance(left, ast.Name): + # Single assignment on left + return ast.Assign([ast.AssName(left.name, 'OP_ASSIGN')], right) + elif isinstance(left, ast.Tuple): + # List of things - make sure they are Name nodes + names = [] + for child in left.getChildren(): + if not isinstance(child, ast.Name): + raise SyntaxError("that assignment not supported") + names.append(child.name) + ass_list = [ast.AssName(name, 'OP_ASSIGN') for name in names] + return ast.Assign([ast.AssTuple(ass_list)], right) + else: + raise SyntaxError("Can't do that yet") + + +# The grammar comments come from Python's Grammar/Grammar file + +## NB: compound_stmt in single_input is followed by extra NEWLINE! +# file_input: (NEWLINE | stmt)* ENDMARKER +def p_file_input_end(p): + """file_input_end : file_input ENDMARKER""" + p[0] = ast.Stmt(p[1]) +def p_file_input(p): + """file_input : file_input NEWLINE + | file_input stmt + | NEWLINE + | stmt""" + if isinstance(p[len(p)-1], basestring): + if len(p) == 3: + p[0] = p[1] + else: + p[0] = [] # p == 2 --> only a blank line + else: + if len(p) == 3: + p[0] = p[1] + p[2] + else: + p[0] = p[1] + + +# funcdef: [decorators] 'def' NAME parameters ':' suite +# ignoring decorators +def p_funcdef(p): + "funcdef : DEF NAME parameters COLON suite" + p[0] = ast.Function(None, p[2], tuple(p[3]), (), 0, None, p[5]) + +# parameters: '(' [varargslist] ')' +def p_parameters(p): + """parameters : LPAR RPAR + | LPAR varargslist RPAR""" + if len(p) == 3: + p[0] = [] + else: + p[0] = p[2] + + +# varargslist: (fpdef ['=' test] ',')* ('*' NAME [',' '**' NAME] | '**' NAME) | +# highly simplified +def p_varargslist(p): + """varargslist : varargslist COMMA NAME + | NAME""" + if len(p) == 4: + p[0] = p[1] + p[3] + else: + p[0] = [p[1]] + +# stmt: simple_stmt | compound_stmt +def p_stmt_simple(p): + """stmt : simple_stmt""" + # simple_stmt is a list + p[0] = p[1] + +def p_stmt_compound(p): + """stmt : compound_stmt""" + p[0] = [p[1]] + +# simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE +def p_simple_stmt(p): + """simple_stmt : small_stmts NEWLINE + | small_stmts SEMICOLON NEWLINE""" + p[0] = p[1] + +def p_small_stmts(p): + """small_stmts : small_stmts SEMICOLON small_stmt + | small_stmt""" + if len(p) == 4: + p[0] = p[1] + [p[3]] + else: + p[0] = [p[1]] + +# small_stmt: expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt | +# import_stmt | global_stmt | exec_stmt | assert_stmt +def p_small_stmt(p): + """small_stmt : flow_stmt + | expr_stmt""" + p[0] = p[1] + +# expr_stmt: testlist (augassign (yield_expr|testlist) | +# ('=' (yield_expr|testlist))*) +# augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | +# '<<=' | '>>=' | '**=' | '//=') +def p_expr_stmt(p): + """expr_stmt : testlist ASSIGN testlist + | testlist """ + if len(p) == 2: + # a list of expressions + p[0] = ast.Discard(p[1]) + else: + p[0] = Assign(p[1], p[3]) + +def p_flow_stmt(p): + "flow_stmt : return_stmt" + p[0] = p[1] + +# return_stmt: 'return' [testlist] +def p_return_stmt(p): + "return_stmt : RETURN testlist" + p[0] = ast.Return(p[2]) + + +def p_compound_stmt(p): + """compound_stmt : if_stmt + | funcdef""" + p[0] = p[1] + +def p_if_stmt(p): + 'if_stmt : IF test COLON suite' + p[0] = ast.If([(p[2], p[4])], None) + +def p_suite(p): + """suite : simple_stmt + | NEWLINE INDENT stmts DEDENT""" + if len(p) == 2: + p[0] = ast.Stmt(p[1]) + else: + p[0] = ast.Stmt(p[3]) + + +def p_stmts(p): + """stmts : stmts stmt + | stmt""" + if len(p) == 3: + p[0] = p[1] + p[2] + else: + p[0] = p[1] + +## No using Python's approach because Ply supports precedence + +# comparison: expr (comp_op expr)* +# arith_expr: term (('+'|'-') term)* +# term: factor (('*'|'/'|'%'|'//') factor)* +# factor: ('+'|'-'|'~') factor | power +# comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' + +def make_lt_compare((left, right)): + return ast.Compare(left, [('<', right),]) +def make_gt_compare((left, right)): + return ast.Compare(left, [('>', right),]) +def make_eq_compare((left, right)): + return ast.Compare(left, [('==', right),]) + + +binary_ops = { + "+": ast.Add, + "-": ast.Sub, + "*": ast.Mul, + "/": ast.Div, + "<": make_lt_compare, + ">": make_gt_compare, + "==": make_eq_compare, +} +unary_ops = { + "+": ast.UnaryAdd, + "-": ast.UnarySub, + } +precedence = ( + ("left", "EQ", "GT", "LT"), + ("left", "PLUS", "MINUS"), + ("left", "MULT", "DIV"), + ) + +def p_comparison(p): + """comparison : comparison PLUS comparison + | comparison MINUS comparison + | comparison MULT comparison + | comparison DIV comparison + | comparison LT comparison + | comparison EQ comparison + | comparison GT comparison + | PLUS comparison + | MINUS comparison + | power""" + if len(p) == 4: + p[0] = binary_ops[p[2]]((p[1], p[3])) + elif len(p) == 3: + p[0] = unary_ops[p[1]](p[2]) + else: + p[0] = p[1] + +# power: atom trailer* ['**' factor] +# trailers enables function calls. I only allow one level of calls +# so this is 'trailer' +def p_power(p): + """power : atom + | atom trailer""" + if len(p) == 2: + p[0] = p[1] + else: + if p[2][0] == "CALL": + p[0] = ast.CallFunc(p[1], p[2][1], None, None) + else: + raise AssertionError("not implemented") + +def p_atom_name(p): + """atom : NAME""" + p[0] = ast.Name(p[1]) + +def p_atom_number(p): + """atom : NUMBER + | STRING""" + p[0] = ast.Const(p[1]) + +def p_atom_tuple(p): + """atom : LPAR testlist RPAR""" + p[0] = p[2] + +# trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME +def p_trailer(p): + "trailer : LPAR arglist RPAR" + p[0] = ("CALL", p[2]) + +# testlist: test (',' test)* [','] +# Contains shift/reduce error +def p_testlist(p): + """testlist : testlist_multi COMMA + | testlist_multi """ + if len(p) == 2: + p[0] = p[1] + else: + # May need to promote singleton to tuple + if isinstance(p[1], list): + p[0] = p[1] + else: + p[0] = [p[1]] + # Convert into a tuple? + if isinstance(p[0], list): + p[0] = ast.Tuple(p[0]) + +def p_testlist_multi(p): + """testlist_multi : testlist_multi COMMA test + | test""" + if len(p) == 2: + # singleton + p[0] = p[1] + else: + if isinstance(p[1], list): + p[0] = p[1] + [p[3]] + else: + # singleton -> tuple + p[0] = [p[1], p[3]] + + +# test: or_test ['if' or_test 'else' test] | lambdef +# as I don't support 'and', 'or', and 'not' this works down to 'comparison' +def p_test(p): + "test : comparison" + p[0] = p[1] + + + +# arglist: (argument ',')* (argument [',']| '*' test [',' '**' test] | '**' test) +# XXX INCOMPLETE: this doesn't allow the trailing comma +def p_arglist(p): + """arglist : arglist COMMA argument + | argument""" + if len(p) == 4: + p[0] = p[1] + [p[3]] + else: + p[0] = [p[1]] + +# argument: test [gen_for] | test '=' test # Really [keyword '='] test +def p_argument(p): + "argument : test" + p[0] = p[1] + +def p_error(p): + #print "Error!", repr(p) + raise SyntaxError(p) + + +class GardenSnakeParser(object): + def __init__(self, lexer = None): + if lexer is None: + lexer = IndentLexer() + self.lexer = lexer + self.parser = yacc.yacc(start="file_input_end") + + def parse(self, code): + self.lexer.input(code) + result = self.parser.parse(lexer = self.lexer) + return ast.Module(None, result) + + +###### Code generation ###### + +from compiler import misc, syntax, pycodegen + +class GardenSnakeCompiler(object): + def __init__(self): + self.parser = GardenSnakeParser() + def compile(self, code, filename="<string>"): + tree = self.parser.parse(code) + #print tree + misc.set_filename(filename, tree) + syntax.check(tree) + gen = pycodegen.ModuleCodeGenerator(tree) + code = gen.getCode() + return code + +####### Test code ####### + +compile = GardenSnakeCompiler().compile + +code = r""" + +print('LET\'S TRY THIS \\OUT') + +#Comment here +def x(a): + print('called with',a) + if a == 1: + return 2 + if a*2 > 10: return 999 / 4 + # Another comment here + + return a+2*3 + +ints = (1, 2, + 3, 4, +5) +print('mutiline-expression', ints) + +t = 4+1/3*2+6*(9-5+1) +print('predence test; should be 34+2/3:', t, t==(34+2/3)) + +print('numbers', 1,2,3,4,5) +if 1: + 8 + a=9 + print(x(a)) + +print(x(1)) +print(x(2)) +print(x(8),'3') +print('this is decimal', 1/5) +print('BIG DECIMAL', 1.234567891234567e12345) + +""" + +# Set up the GardenSnake run-time environment +def print_(*args): + print "-->", " ".join(map(str,args)) + +globals()["print"] = print_ + +compiled_code = compile(code) + +exec compiled_code in globals() +print "Done" diff --git a/ext/ply/example/GardenSnake/README b/ext/ply/example/GardenSnake/README new file mode 100644 index 000000000..4d8be2db0 --- /dev/null +++ b/ext/ply/example/GardenSnake/README @@ -0,0 +1,5 @@ +This example is Andrew Dalke's GardenSnake language. It shows how to process an +indentation-like language like Python. Further details can be found here: + +http://dalkescientific.com/writings/diary/archive/2006/08/30/gardensnake_language.html + diff --git a/ext/ply/example/README b/ext/ply/example/README new file mode 100644 index 000000000..63519b557 --- /dev/null +++ b/ext/ply/example/README @@ -0,0 +1,10 @@ +Simple examples: + calc - Simple calculator + classcalc - Simple calculate defined as a class + +Complex examples + ansic - ANSI C grammar from K&R + BASIC - A small BASIC interpreter + GardenSnake - A simple python-like language + yply - Converts Unix yacc files to PLY programs. + diff --git a/ext/ply/example/ansic/clex.py b/ext/ply/example/ansic/clex.py index afd995208..12441a60b 100644 --- a/ext/ply/example/ansic/clex.py +++ b/ext/ply/example/ansic/clex.py @@ -4,7 +4,10 @@ # A lexer for ANSI C. # ---------------------------------------------------------------------- -import lex +import sys +sys.path.insert(0,"../..") + +import ply.lex as lex # Reserved words reserved = ( @@ -53,7 +56,7 @@ t_ignore = ' \t\x0c' # Newlines def t_NEWLINE(t): r'\n+' - t.lineno += t.value.count("\n") + t.lexer.lineno += t.value.count("\n") # Operators t_PLUS = r'\+' @@ -64,7 +67,7 @@ t_MOD = r'%' t_OR = r'\|' t_AND = r'&' t_NOT = r'~' -t_XOR = r'^' +t_XOR = r'\^' t_LSHIFT = r'<<' t_RSHIFT = r'>>' t_LOR = r'\|\|' @@ -149,7 +152,7 @@ def t_preprocessor(t): def t_error(t): print "Illegal character %s" % repr(t.value[0]) - t.skip(1) + t.lexer.skip(1) lexer = lex.lex(optimize=1) if __name__ == "__main__": diff --git a/ext/ply/example/ansic/cparse.py b/ext/ply/example/ansic/cparse.py index ddfd5c72b..d474378c8 100644 --- a/ext/ply/example/ansic/cparse.py +++ b/ext/ply/example/ansic/cparse.py @@ -4,8 +4,9 @@ # Simple parser for ANSI C. Based on the grammar in K&R, 2nd Ed. # ----------------------------------------------------------------------------- -import yacc +import sys import clex +import ply.yacc as yacc # Get the token map tokens = clex.tokens @@ -852,7 +853,10 @@ def p_error(t): import profile # Build the grammar -profile.run("yacc.yacc()") + +yacc.yacc(method='LALR') + +#profile.run("yacc.yacc(method='LALR')") diff --git a/ext/ply/example/calc/calc.py b/ext/ply/example/calc/calc.py index aeb23c246..987ce8019 100644 --- a/ext/ply/example/calc/calc.py +++ b/ext/ply/example/calc/calc.py @@ -5,21 +5,17 @@ # "Lex and Yacc", p. 63. # ----------------------------------------------------------------------------- +import sys +sys.path.insert(0,"../..") + tokens = ( 'NAME','NUMBER', - 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', - 'LPAREN','RPAREN', ) +literals = ['=','+','-','*','/', '(',')'] + # Tokens -t_PLUS = r'\+' -t_MINUS = r'-' -t_TIMES = r'\*' -t_DIVIDE = r'/' -t_EQUALS = r'=' -t_LPAREN = r'\(' -t_RPAREN = r'\)' t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' def t_NUMBER(t): @@ -35,69 +31,69 @@ t_ignore = " \t" def t_newline(t): r'\n+' - t.lineno += t.value.count("\n") + t.lexer.lineno += t.value.count("\n") def t_error(t): print "Illegal character '%s'" % t.value[0] - t.skip(1) + t.lexer.skip(1) # Build the lexer -import lex +import ply.lex as lex lex.lex() # Parsing rules precedence = ( - ('left','PLUS','MINUS'), - ('left','TIMES','DIVIDE'), + ('left','+','-'), + ('left','*','/'), ('right','UMINUS'), ) # dictionary of names names = { } -def p_statement_assign(t): - 'statement : NAME EQUALS expression' - names[t[1]] = t[3] +def p_statement_assign(p): + 'statement : NAME "=" expression' + names[p[1]] = p[3] -def p_statement_expr(t): +def p_statement_expr(p): 'statement : expression' - print t[1] - -def p_expression_binop(t): - '''expression : expression PLUS expression - | expression MINUS expression - | expression TIMES expression - | expression DIVIDE expression''' - if t[2] == '+' : t[0] = t[1] + t[3] - elif t[2] == '-': t[0] = t[1] - t[3] - elif t[2] == '*': t[0] = t[1] * t[3] - elif t[2] == '/': t[0] = t[1] / t[3] - -def p_expression_uminus(t): - 'expression : MINUS expression %prec UMINUS' - t[0] = -t[2] - -def p_expression_group(t): - 'expression : LPAREN expression RPAREN' - t[0] = t[2] - -def p_expression_number(t): - 'expression : NUMBER' - t[0] = t[1] - -def p_expression_name(t): - 'expression : NAME' + print p[1] + +def p_expression_binop(p): + '''expression : expression '+' expression + | expression '-' expression + | expression '*' expression + | expression '/' expression''' + if p[2] == '+' : p[0] = p[1] + p[3] + elif p[2] == '-': p[0] = p[1] - p[3] + elif p[2] == '*': p[0] = p[1] * p[3] + elif p[2] == '/': p[0] = p[1] / p[3] + +def p_expression_uminus(p): + "expression : '-' expression %prec UMINUS" + p[0] = -p[2] + +def p_expression_group(p): + "expression : '(' expression ')'" + p[0] = p[2] + +def p_expression_number(p): + "expression : NUMBER" + p[0] = p[1] + +def p_expression_name(p): + "expression : NAME" try: - t[0] = names[t[1]] + p[0] = names[p[1]] except LookupError: - print "Undefined name '%s'" % t[1] - t[0] = 0 + print "Undefined name '%s'" % p[1] + p[0] = 0 -def p_error(t): - print "Syntax error at '%s'" % t.value +def p_error(p): + print "Syntax error at '%s'" % p.value -import yacc +import ply.yacc as yacc yacc.yacc() while 1: @@ -105,4 +101,5 @@ while 1: s = raw_input('calc > ') except EOFError: break + if not s: continue yacc.parse(s) diff --git a/ext/ply/example/classcalc/calc.py b/ext/ply/example/classcalc/calc.py new file mode 100644 index 000000000..b2f3f70f1 --- /dev/null +++ b/ext/ply/example/classcalc/calc.py @@ -0,0 +1,152 @@ +#!/usr/bin/env python + +# ----------------------------------------------------------------------------- +# calc.py +# +# A simple calculator with variables. This is from O'Reilly's +# "Lex and Yacc", p. 63. +# +# Class-based example contributed to PLY by David McNab +# ----------------------------------------------------------------------------- + +import sys +sys.path.insert(0,"../..") + +import readline +import ply.lex as lex +import ply.yacc as yacc +import os + +class Parser: + """ + Base class for a lexer/parser that has the rules defined as methods + """ + tokens = () + precedence = () + + def __init__(self, **kw): + self.debug = kw.get('debug', 0) + self.names = { } + try: + modname = os.path.split(os.path.splitext(__file__)[0])[1] + "_" + self.__class__.__name__ + except: + modname = "parser"+"_"+self.__class__.__name__ + self.debugfile = modname + ".dbg" + self.tabmodule = modname + "_" + "parsetab" + #print self.debugfile, self.tabmodule + + # Build the lexer and parser + lex.lex(module=self, debug=self.debug) + yacc.yacc(module=self, + debug=self.debug, + debugfile=self.debugfile, + tabmodule=self.tabmodule) + + def run(self): + while 1: + try: + s = raw_input('calc > ') + except EOFError: + break + if not s: continue + yacc.parse(s) + + +class Calc(Parser): + + tokens = ( + 'NAME','NUMBER', + 'PLUS','MINUS','EXP', 'TIMES','DIVIDE','EQUALS', + 'LPAREN','RPAREN', + ) + + # Tokens + + t_PLUS = r'\+' + t_MINUS = r'-' + t_EXP = r'\*\*' + t_TIMES = r'\*' + t_DIVIDE = r'/' + t_EQUALS = r'=' + t_LPAREN = r'\(' + t_RPAREN = r'\)' + t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' + + def t_NUMBER(self, t): + r'\d+' + try: + t.value = int(t.value) + except ValueError: + print "Integer value too large", t.value + t.value = 0 + #print "parsed number %s" % repr(t.value) + return t + + t_ignore = " \t" + + def t_newline(self, t): + r'\n+' + t.lexer.lineno += t.value.count("\n") + + def t_error(self, t): + print "Illegal character '%s'" % t.value[0] + t.lexer.skip(1) + + # Parsing rules + + precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('left', 'EXP'), + ('right','UMINUS'), + ) + + def p_statement_assign(self, p): + 'statement : NAME EQUALS expression' + self.names[p[1]] = p[3] + + def p_statement_expr(self, p): + 'statement : expression' + print p[1] + + def p_expression_binop(self, p): + """ + expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression + | expression EXP expression + """ + #print [repr(p[i]) for i in range(0,4)] + if p[2] == '+' : p[0] = p[1] + p[3] + elif p[2] == '-': p[0] = p[1] - p[3] + elif p[2] == '*': p[0] = p[1] * p[3] + elif p[2] == '/': p[0] = p[1] / p[3] + elif p[2] == '**': p[0] = p[1] ** p[3] + + def p_expression_uminus(self, p): + 'expression : MINUS expression %prec UMINUS' + p[0] = -p[2] + + def p_expression_group(self, p): + 'expression : LPAREN expression RPAREN' + p[0] = p[2] + + def p_expression_number(self, p): + 'expression : NUMBER' + p[0] = p[1] + + def p_expression_name(self, p): + 'expression : NAME' + try: + p[0] = self.names[p[1]] + except LookupError: + print "Undefined name '%s'" % p[1] + p[0] = 0 + + def p_error(self, p): + print "Syntax error at '%s'" % p.value + +if __name__ == '__main__': + calc = Calc() + calc.run() diff --git a/ext/ply/example/cleanup.sh b/ext/ply/example/cleanup.sh new file mode 100644 index 000000000..3e115f41c --- /dev/null +++ b/ext/ply/example/cleanup.sh @@ -0,0 +1,2 @@ +#!/bin/sh +rm -f */*.pyc */parsetab.py */parser.out */*~ */*.class diff --git a/ext/ply/example/hedit/hedit.py b/ext/ply/example/hedit/hedit.py index f00427bf5..494f4fde5 100644 --- a/ext/ply/example/hedit/hedit.py +++ b/ext/ply/example/hedit/hedit.py @@ -14,6 +14,10 @@ # such tokens # ----------------------------------------------------------------------------- +import sys +sys.path.insert(0,"../..") + + tokens = ( 'H_EDIT_DESCRIPTOR', ) @@ -34,10 +38,10 @@ def t_H_EDIT_DESCRIPTOR(t): def t_error(t): print "Illegal character '%s'" % t.value[0] - t.skip(1) + t.lexer.skip(1) # Build the lexer -import lex +import ply.lex as lex lex.lex() lex.runmain() diff --git a/ext/ply/example/newclasscalc/calc.py b/ext/ply/example/newclasscalc/calc.py new file mode 100644 index 000000000..7f29bc821 --- /dev/null +++ b/ext/ply/example/newclasscalc/calc.py @@ -0,0 +1,155 @@ +#!/usr/bin/env python + +# ----------------------------------------------------------------------------- +# calc.py +# +# A simple calculator with variables. This is from O'Reilly's +# "Lex and Yacc", p. 63. +# +# Class-based example contributed to PLY by David McNab. +# +# Modified to use new-style classes. Test case. +# ----------------------------------------------------------------------------- + +import sys +sys.path.insert(0,"../..") + +import readline +import ply.lex as lex +import ply.yacc as yacc +import os + +class Parser(object): + """ + Base class for a lexer/parser that has the rules defined as methods + """ + tokens = () + precedence = () + + + def __init__(self, **kw): + self.debug = kw.get('debug', 0) + self.names = { } + try: + modname = os.path.split(os.path.splitext(__file__)[0])[1] + "_" + self.__class__.__name__ + except: + modname = "parser"+"_"+self.__class__.__name__ + self.debugfile = modname + ".dbg" + self.tabmodule = modname + "_" + "parsetab" + #print self.debugfile, self.tabmodule + + # Build the lexer and parser + lex.lex(module=self, debug=self.debug) + yacc.yacc(module=self, + debug=self.debug, + debugfile=self.debugfile, + tabmodule=self.tabmodule) + + def run(self): + while 1: + try: + s = raw_input('calc > ') + except EOFError: + break + if not s: continue + yacc.parse(s) + + +class Calc(Parser): + + tokens = ( + 'NAME','NUMBER', + 'PLUS','MINUS','EXP', 'TIMES','DIVIDE','EQUALS', + 'LPAREN','RPAREN', + ) + + # Tokens + + t_PLUS = r'\+' + t_MINUS = r'-' + t_EXP = r'\*\*' + t_TIMES = r'\*' + t_DIVIDE = r'/' + t_EQUALS = r'=' + t_LPAREN = r'\(' + t_RPAREN = r'\)' + t_NAME = r'[a-zA-Z_][a-zA-Z0-9_]*' + + def t_NUMBER(self, t): + r'\d+' + try: + t.value = int(t.value) + except ValueError: + print "Integer value too large", t.value + t.value = 0 + #print "parsed number %s" % repr(t.value) + return t + + t_ignore = " \t" + + def t_newline(self, t): + r'\n+' + t.lexer.lineno += t.value.count("\n") + + def t_error(self, t): + print "Illegal character '%s'" % t.value[0] + t.lexer.skip(1) + + # Parsing rules + + precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('left', 'EXP'), + ('right','UMINUS'), + ) + + def p_statement_assign(self, p): + 'statement : NAME EQUALS expression' + self.names[p[1]] = p[3] + + def p_statement_expr(self, p): + 'statement : expression' + print p[1] + + def p_expression_binop(self, p): + """ + expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression + | expression EXP expression + """ + #print [repr(p[i]) for i in range(0,4)] + if p[2] == '+' : p[0] = p[1] + p[3] + elif p[2] == '-': p[0] = p[1] - p[3] + elif p[2] == '*': p[0] = p[1] * p[3] + elif p[2] == '/': p[0] = p[1] / p[3] + elif p[2] == '**': p[0] = p[1] ** p[3] + + def p_expression_uminus(self, p): + 'expression : MINUS expression %prec UMINUS' + p[0] = -p[2] + + def p_expression_group(self, p): + 'expression : LPAREN expression RPAREN' + p[0] = p[2] + + def p_expression_number(self, p): + 'expression : NUMBER' + p[0] = p[1] + + def p_expression_name(self, p): + 'expression : NAME' + try: + p[0] = self.names[p[1]] + except LookupError: + print "Undefined name '%s'" % p[1] + p[0] = 0 + + def p_error(self, p): + print "Syntax error at '%s'" % p.value + +if __name__ == '__main__': + calc = Calc() + calc.run() diff --git a/ext/ply/example/optcalc/calc.py b/ext/ply/example/optcalc/calc.py index fa66cda5b..3a0ee6c9b 100644 --- a/ext/ply/example/optcalc/calc.py +++ b/ext/ply/example/optcalc/calc.py @@ -5,6 +5,9 @@ # "Lex and Yacc", p. 63. # ----------------------------------------------------------------------------- +import sys +sys.path.insert(0,"../..") + tokens = ( 'NAME','NUMBER', 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', @@ -35,14 +38,14 @@ t_ignore = " \t" def t_newline(t): r'\n+' - t.lineno += t.value.count("\n") + t.lexer.lineno += t.value.count("\n") def t_error(t): print "Illegal character '%s'" % t.value[0] - t.skip(1) + t.lexer.skip(1) # Build the lexer -import lex +import ply.lex as lex lex.lex(optimize=1) # Parsing rules @@ -98,7 +101,7 @@ def p_expression_name(t): def p_error(t): print "Syntax error at '%s'" % t.value -import yacc +import ply.yacc as yacc yacc.yacc(optimize=1) while 1: diff --git a/ext/ply/example/unicalc/calc.py b/ext/ply/example/unicalc/calc.py new file mode 100644 index 000000000..d1f59f748 --- /dev/null +++ b/ext/ply/example/unicalc/calc.py @@ -0,0 +1,114 @@ +# ----------------------------------------------------------------------------- +# calc.py +# +# A simple calculator with variables. This is from O'Reilly's +# "Lex and Yacc", p. 63. +# +# This example uses unicode strings for tokens, docstrings, and input. +# ----------------------------------------------------------------------------- + +import sys +sys.path.insert(0,"../..") + +tokens = ( + 'NAME','NUMBER', + 'PLUS','MINUS','TIMES','DIVIDE','EQUALS', + 'LPAREN','RPAREN', + ) + +# Tokens + +t_PLUS = ur'\+' +t_MINUS = ur'-' +t_TIMES = ur'\*' +t_DIVIDE = ur'/' +t_EQUALS = ur'=' +t_LPAREN = ur'\(' +t_RPAREN = ur'\)' +t_NAME = ur'[a-zA-Z_][a-zA-Z0-9_]*' + +def t_NUMBER(t): + ur'\d+' + try: + t.value = int(t.value) + except ValueError: + print "Integer value too large", t.value + t.value = 0 + return t + +t_ignore = u" \t" + +def t_newline(t): + ur'\n+' + t.lexer.lineno += t.value.count("\n") + +def t_error(t): + print "Illegal character '%s'" % t.value[0] + t.lexer.skip(1) + +# Build the lexer +import ply.lex as lex +lex.lex() + +# Parsing rules + +precedence = ( + ('left','PLUS','MINUS'), + ('left','TIMES','DIVIDE'), + ('right','UMINUS'), + ) + +# dictionary of names +names = { } + +def p_statement_assign(p): + 'statement : NAME EQUALS expression' + names[p[1]] = p[3] + +def p_statement_expr(p): + 'statement : expression' + print p[1] + +def p_expression_binop(p): + '''expression : expression PLUS expression + | expression MINUS expression + | expression TIMES expression + | expression DIVIDE expression''' + if p[2] == u'+' : p[0] = p[1] + p[3] + elif p[2] == u'-': p[0] = p[1] - p[3] + elif p[2] == u'*': p[0] = p[1] * p[3] + elif p[2] == u'/': p[0] = p[1] / p[3] + +def p_expression_uminus(p): + 'expression : MINUS expression %prec UMINUS' + p[0] = -p[2] + +def p_expression_group(p): + 'expression : LPAREN expression RPAREN' + p[0] = p[2] + +def p_expression_number(p): + 'expression : NUMBER' + p[0] = p[1] + +def p_expression_name(p): + 'expression : NAME' + try: + p[0] = names[p[1]] + except LookupError: + print "Undefined name '%s'" % p[1] + p[0] = 0 + +def p_error(p): + print "Syntax error at '%s'" % p.value + +import ply.yacc as yacc +yacc.yacc() + +while 1: + try: + s = raw_input('calc > ') + except EOFError: + break + if not s: continue + yacc.parse(unicode(s)) diff --git a/ext/ply/example/yply/README b/ext/ply/example/yply/README new file mode 100644 index 000000000..bfadf3643 --- /dev/null +++ b/ext/ply/example/yply/README @@ -0,0 +1,41 @@ +yply.py + +This example implements a program yply.py that converts a UNIX-yacc +specification file into a PLY-compatible program. To use, simply +run it like this: + + % python yply.py [-nocode] inputfile.y >myparser.py + +The output of this program is Python code. In the output, +any C code in the original file is included, but is commented out. +If you use the -nocode option, then all of the C code in the +original file is just discarded. + +To use the resulting grammer with PLY, you'll need to edit the +myparser.py file. Within this file, some stub code is included that +can be used to test the construction of the parsing tables. However, +you'll need to do more editing to make a workable parser. + +Disclaimer: This just an example I threw together in an afternoon. +It might have some bugs. However, it worked when I tried it on +a yacc-specified C++ parser containing 442 rules and 855 parsing +states. + +Comments: + +1. This example does not parse specification files meant for lex/flex. + You'll need to specify the tokenizer on your own. + +2. This example shows a number of interesting PLY features including + + - Parsing of literal text delimited by nested parentheses + - Some interaction between the parser and the lexer. + - Use of literals in the grammar specification + - One pass compilation. The program just emits the result, + there is no intermediate parse tree. + +3. This program could probably be cleaned up and enhanced a lot. + It would be great if someone wanted to work on this (hint). + +-Dave + diff --git a/ext/ply/example/yply/ylex.py b/ext/ply/example/yply/ylex.py new file mode 100644 index 000000000..61bc0c7ef --- /dev/null +++ b/ext/ply/example/yply/ylex.py @@ -0,0 +1,112 @@ +# lexer for yacc-grammars +# +# Author: David Beazley (dave@dabeaz.com) +# Date : October 2, 2006 + +import sys +sys.path.append("../..") + +from ply import * + +tokens = ( + 'LITERAL','SECTION','TOKEN','LEFT','RIGHT','PREC','START','TYPE','NONASSOC','UNION','CODE', + 'ID','QLITERAL','NUMBER', +) + +states = (('code','exclusive'),) + +literals = [ ';', ',', '<', '>', '|',':' ] +t_ignore = ' \t' + +t_TOKEN = r'%token' +t_LEFT = r'%left' +t_RIGHT = r'%right' +t_NONASSOC = r'%nonassoc' +t_PREC = r'%prec' +t_START = r'%start' +t_TYPE = r'%type' +t_UNION = r'%union' +t_ID = r'[a-zA-Z_][a-zA-Z_0-9]*' +t_QLITERAL = r'''(?P<quote>['"]).*?(?P=quote)''' +t_NUMBER = r'\d+' + +def t_SECTION(t): + r'%%' + if getattr(t.lexer,"lastsection",0): + t.value = t.lexer.lexdata[t.lexpos+2:] + t.lexer.lexpos = len(t.lexer.lexdata) + else: + t.lexer.lastsection = 0 + return t + +# Comments +def t_ccomment(t): + r'/\*(.|\n)*?\*/' + t.lineno += t.value.count('\n') + +t_ignore_cppcomment = r'//.*' + +def t_LITERAL(t): + r'%\{(.|\n)*?%\}' + t.lexer.lineno += t.value.count("\n") + return t + +def t_NEWLINE(t): + r'\n' + t.lexer.lineno += 1 + +def t_code(t): + r'\{' + t.lexer.codestart = t.lexpos + t.lexer.level = 1 + t.lexer.begin('code') + +def t_code_ignore_string(t): + r'\"([^\\\n]|(\\.))*?\"' + +def t_code_ignore_char(t): + r'\'([^\\\n]|(\\.))*?\'' + +def t_code_ignore_comment(t): + r'/\*(.|\n)*?\*/' + +def t_code_ignore_cppcom(t): + r'//.*' + +def t_code_lbrace(t): + r'\{' + t.lexer.level += 1 + +def t_code_rbrace(t): + r'\}' + t.lexer.level -= 1 + if t.lexer.level == 0: + t.type = 'CODE' + t.value = t.lexer.lexdata[t.lexer.codestart:t.lexpos+1] + t.lexer.begin('INITIAL') + t.lexer.lineno += t.value.count('\n') + return t + +t_code_ignore_nonspace = r'[^\s\}\'\"\{]+' +t_code_ignore_whitespace = r'\s+' +t_code_ignore = "" + +def t_code_error(t): + raise RuntimeError + +def t_error(t): + print "%d: Illegal character '%s'" % (t.lineno, t.value[0]) + print t.value + t.lexer.skip(1) + +lex.lex() + +if __name__ == '__main__': + lex.runmain() + + + + + + + diff --git a/ext/ply/example/yply/yparse.py b/ext/ply/example/yply/yparse.py new file mode 100644 index 000000000..a4e46bef7 --- /dev/null +++ b/ext/ply/example/yply/yparse.py @@ -0,0 +1,217 @@ +# parser for Unix yacc-based grammars +# +# Author: David Beazley (dave@dabeaz.com) +# Date : October 2, 2006 + +import ylex +tokens = ylex.tokens + +from ply import * + +tokenlist = [] +preclist = [] + +emit_code = 1 + +def p_yacc(p): + '''yacc : defsection rulesection''' + +def p_defsection(p): + '''defsection : definitions SECTION + | SECTION''' + p.lexer.lastsection = 1 + print "tokens = ", repr(tokenlist) + print + print "precedence = ", repr(preclist) + print + print "# -------------- RULES ----------------" + print + +def p_rulesection(p): + '''rulesection : rules SECTION''' + + print "# -------------- RULES END ----------------" + print_code(p[2],0) + +def p_definitions(p): + '''definitions : definitions definition + | definition''' + +def p_definition_literal(p): + '''definition : LITERAL''' + print_code(p[1],0) + +def p_definition_start(p): + '''definition : START ID''' + print "start = '%s'" % p[2] + +def p_definition_token(p): + '''definition : toktype opttype idlist optsemi ''' + for i in p[3]: + if i[0] not in "'\"": + tokenlist.append(i) + if p[1] == '%left': + preclist.append(('left',) + tuple(p[3])) + elif p[1] == '%right': + preclist.append(('right',) + tuple(p[3])) + elif p[1] == '%nonassoc': + preclist.append(('nonassoc',)+ tuple(p[3])) + +def p_toktype(p): + '''toktype : TOKEN + | LEFT + | RIGHT + | NONASSOC''' + p[0] = p[1] + +def p_opttype(p): + '''opttype : '<' ID '>' + | empty''' + +def p_idlist(p): + '''idlist : idlist optcomma tokenid + | tokenid''' + if len(p) == 2: + p[0] = [p[1]] + else: + p[0] = p[1] + p[1].append(p[3]) + +def p_tokenid(p): + '''tokenid : ID + | ID NUMBER + | QLITERAL + | QLITERAL NUMBER''' + p[0] = p[1] + +def p_optsemi(p): + '''optsemi : ';' + | empty''' + +def p_optcomma(p): + '''optcomma : ',' + | empty''' + +def p_definition_type(p): + '''definition : TYPE '<' ID '>' namelist optsemi''' + # type declarations are ignored + +def p_namelist(p): + '''namelist : namelist optcomma ID + | ID''' + +def p_definition_union(p): + '''definition : UNION CODE optsemi''' + # Union declarations are ignored + +def p_rules(p): + '''rules : rules rule + | rule''' + if len(p) == 2: + rule = p[1] + else: + rule = p[2] + + # Print out a Python equivalent of this rule + + embedded = [ ] # Embedded actions (a mess) + embed_count = 0 + + rulename = rule[0] + rulecount = 1 + for r in rule[1]: + # r contains one of the rule possibilities + print "def p_%s_%d(p):" % (rulename,rulecount) + prod = [] + prodcode = "" + for i in range(len(r)): + item = r[i] + if item[0] == '{': # A code block + if i == len(r) - 1: + prodcode = item + break + else: + # an embedded action + embed_name = "_embed%d_%s" % (embed_count,rulename) + prod.append(embed_name) + embedded.append((embed_name,item)) + embed_count += 1 + else: + prod.append(item) + print " '''%s : %s'''" % (rulename, " ".join(prod)) + # Emit code + print_code(prodcode,4) + print + rulecount += 1 + + for e,code in embedded: + print "def p_%s(p):" % e + print " '''%s : '''" % e + print_code(code,4) + print + +def p_rule(p): + '''rule : ID ':' rulelist ';' ''' + p[0] = (p[1],[p[3]]) + +def p_rule2(p): + '''rule : ID ':' rulelist morerules ';' ''' + p[4].insert(0,p[3]) + p[0] = (p[1],p[4]) + +def p_rule_empty(p): + '''rule : ID ':' ';' ''' + p[0] = (p[1],[[]]) + +def p_rule_empty2(p): + '''rule : ID ':' morerules ';' ''' + + p[3].insert(0,[]) + p[0] = (p[1],p[3]) + +def p_morerules(p): + '''morerules : morerules '|' rulelist + | '|' rulelist + | '|' ''' + + if len(p) == 2: + p[0] = [[]] + elif len(p) == 3: + p[0] = [p[2]] + else: + p[0] = p[1] + p[0].append(p[3]) + +# print "morerules", len(p), p[0] + +def p_rulelist(p): + '''rulelist : rulelist ruleitem + | ruleitem''' + + if len(p) == 2: + p[0] = [p[1]] + else: + p[0] = p[1] + p[1].append(p[2]) + +def p_ruleitem(p): + '''ruleitem : ID + | QLITERAL + | CODE + | PREC''' + p[0] = p[1] + +def p_empty(p): + '''empty : ''' + +def p_error(p): + pass + +yacc.yacc(debug=0) + +def print_code(code,indent): + if not emit_code: return + codelines = code.splitlines() + for c in codelines: + print "%s# %s" % (" "*indent,c) + diff --git a/ext/ply/example/yply/yply.py b/ext/ply/example/yply/yply.py new file mode 100644 index 000000000..a4398171e --- /dev/null +++ b/ext/ply/example/yply/yply.py @@ -0,0 +1,53 @@ +#!/usr/local/bin/python +# yply.py +# +# Author: David Beazley (dave@dabeaz.com) +# Date : October 2, 2006 +# +# Converts a UNIX-yacc specification file into a PLY-compatible +# specification. To use, simply do this: +# +# % python yply.py [-nocode] inputfile.y >myparser.py +# +# The output of this program is Python code. In the output, +# any C code in the original file is included, but is commented. +# If you use the -nocode option, then all of the C code in the +# original file is discarded. +# +# Disclaimer: This just an example I threw together in an afternoon. +# It might have some bugs. However, it worked when I tried it on +# a yacc-specified C++ parser containing 442 rules and 855 parsing +# states. +# + +import sys +sys.path.insert(0,"../..") + +import ylex +import yparse + +from ply import * + +if len(sys.argv) == 1: + print "usage : yply.py [-nocode] inputfile" + raise SystemExit + +if len(sys.argv) == 3: + if sys.argv[1] == '-nocode': + yparse.emit_code = 0 + else: + print "Unknown option '%s'" % sys.argv[1] + raise SystemExit + filename = sys.argv[2] +else: + filename = sys.argv[1] + +yacc.parse(open(filename).read()) + +print """ +if __name__ == '__main__': + from ply import * + yacc.yacc() +""" + + |