summaryrefslogtreecommitdiff
path: root/ext/ply/example/yply
diff options
context:
space:
mode:
Diffstat (limited to 'ext/ply/example/yply')
-rw-r--r--ext/ply/example/yply/README41
-rw-r--r--ext/ply/example/yply/ylex.py112
-rw-r--r--ext/ply/example/yply/yparse.py217
-rw-r--r--ext/ply/example/yply/yply.py53
4 files changed, 423 insertions, 0 deletions
diff --git a/ext/ply/example/yply/README b/ext/ply/example/yply/README
new file mode 100644
index 000000000..bfadf3643
--- /dev/null
+++ b/ext/ply/example/yply/README
@@ -0,0 +1,41 @@
+yply.py
+
+This example implements a program yply.py that converts a UNIX-yacc
+specification file into a PLY-compatible program. To use, simply
+run it like this:
+
+ % python yply.py [-nocode] inputfile.y >myparser.py
+
+The output of this program is Python code. In the output,
+any C code in the original file is included, but is commented out.
+If you use the -nocode option, then all of the C code in the
+original file is just discarded.
+
+To use the resulting grammer with PLY, you'll need to edit the
+myparser.py file. Within this file, some stub code is included that
+can be used to test the construction of the parsing tables. However,
+you'll need to do more editing to make a workable parser.
+
+Disclaimer: This just an example I threw together in an afternoon.
+It might have some bugs. However, it worked when I tried it on
+a yacc-specified C++ parser containing 442 rules and 855 parsing
+states.
+
+Comments:
+
+1. This example does not parse specification files meant for lex/flex.
+ You'll need to specify the tokenizer on your own.
+
+2. This example shows a number of interesting PLY features including
+
+ - Parsing of literal text delimited by nested parentheses
+ - Some interaction between the parser and the lexer.
+ - Use of literals in the grammar specification
+ - One pass compilation. The program just emits the result,
+ there is no intermediate parse tree.
+
+3. This program could probably be cleaned up and enhanced a lot.
+ It would be great if someone wanted to work on this (hint).
+
+-Dave
+
diff --git a/ext/ply/example/yply/ylex.py b/ext/ply/example/yply/ylex.py
new file mode 100644
index 000000000..61bc0c7ef
--- /dev/null
+++ b/ext/ply/example/yply/ylex.py
@@ -0,0 +1,112 @@
+# lexer for yacc-grammars
+#
+# Author: David Beazley (dave@dabeaz.com)
+# Date : October 2, 2006
+
+import sys
+sys.path.append("../..")
+
+from ply import *
+
+tokens = (
+ 'LITERAL','SECTION','TOKEN','LEFT','RIGHT','PREC','START','TYPE','NONASSOC','UNION','CODE',
+ 'ID','QLITERAL','NUMBER',
+)
+
+states = (('code','exclusive'),)
+
+literals = [ ';', ',', '<', '>', '|',':' ]
+t_ignore = ' \t'
+
+t_TOKEN = r'%token'
+t_LEFT = r'%left'
+t_RIGHT = r'%right'
+t_NONASSOC = r'%nonassoc'
+t_PREC = r'%prec'
+t_START = r'%start'
+t_TYPE = r'%type'
+t_UNION = r'%union'
+t_ID = r'[a-zA-Z_][a-zA-Z_0-9]*'
+t_QLITERAL = r'''(?P<quote>['"]).*?(?P=quote)'''
+t_NUMBER = r'\d+'
+
+def t_SECTION(t):
+ r'%%'
+ if getattr(t.lexer,"lastsection",0):
+ t.value = t.lexer.lexdata[t.lexpos+2:]
+ t.lexer.lexpos = len(t.lexer.lexdata)
+ else:
+ t.lexer.lastsection = 0
+ return t
+
+# Comments
+def t_ccomment(t):
+ r'/\*(.|\n)*?\*/'
+ t.lineno += t.value.count('\n')
+
+t_ignore_cppcomment = r'//.*'
+
+def t_LITERAL(t):
+ r'%\{(.|\n)*?%\}'
+ t.lexer.lineno += t.value.count("\n")
+ return t
+
+def t_NEWLINE(t):
+ r'\n'
+ t.lexer.lineno += 1
+
+def t_code(t):
+ r'\{'
+ t.lexer.codestart = t.lexpos
+ t.lexer.level = 1
+ t.lexer.begin('code')
+
+def t_code_ignore_string(t):
+ r'\"([^\\\n]|(\\.))*?\"'
+
+def t_code_ignore_char(t):
+ r'\'([^\\\n]|(\\.))*?\''
+
+def t_code_ignore_comment(t):
+ r'/\*(.|\n)*?\*/'
+
+def t_code_ignore_cppcom(t):
+ r'//.*'
+
+def t_code_lbrace(t):
+ r'\{'
+ t.lexer.level += 1
+
+def t_code_rbrace(t):
+ r'\}'
+ t.lexer.level -= 1
+ if t.lexer.level == 0:
+ t.type = 'CODE'
+ t.value = t.lexer.lexdata[t.lexer.codestart:t.lexpos+1]
+ t.lexer.begin('INITIAL')
+ t.lexer.lineno += t.value.count('\n')
+ return t
+
+t_code_ignore_nonspace = r'[^\s\}\'\"\{]+'
+t_code_ignore_whitespace = r'\s+'
+t_code_ignore = ""
+
+def t_code_error(t):
+ raise RuntimeError
+
+def t_error(t):
+ print "%d: Illegal character '%s'" % (t.lineno, t.value[0])
+ print t.value
+ t.lexer.skip(1)
+
+lex.lex()
+
+if __name__ == '__main__':
+ lex.runmain()
+
+
+
+
+
+
+
diff --git a/ext/ply/example/yply/yparse.py b/ext/ply/example/yply/yparse.py
new file mode 100644
index 000000000..a4e46bef7
--- /dev/null
+++ b/ext/ply/example/yply/yparse.py
@@ -0,0 +1,217 @@
+# parser for Unix yacc-based grammars
+#
+# Author: David Beazley (dave@dabeaz.com)
+# Date : October 2, 2006
+
+import ylex
+tokens = ylex.tokens
+
+from ply import *
+
+tokenlist = []
+preclist = []
+
+emit_code = 1
+
+def p_yacc(p):
+ '''yacc : defsection rulesection'''
+
+def p_defsection(p):
+ '''defsection : definitions SECTION
+ | SECTION'''
+ p.lexer.lastsection = 1
+ print "tokens = ", repr(tokenlist)
+ print
+ print "precedence = ", repr(preclist)
+ print
+ print "# -------------- RULES ----------------"
+ print
+
+def p_rulesection(p):
+ '''rulesection : rules SECTION'''
+
+ print "# -------------- RULES END ----------------"
+ print_code(p[2],0)
+
+def p_definitions(p):
+ '''definitions : definitions definition
+ | definition'''
+
+def p_definition_literal(p):
+ '''definition : LITERAL'''
+ print_code(p[1],0)
+
+def p_definition_start(p):
+ '''definition : START ID'''
+ print "start = '%s'" % p[2]
+
+def p_definition_token(p):
+ '''definition : toktype opttype idlist optsemi '''
+ for i in p[3]:
+ if i[0] not in "'\"":
+ tokenlist.append(i)
+ if p[1] == '%left':
+ preclist.append(('left',) + tuple(p[3]))
+ elif p[1] == '%right':
+ preclist.append(('right',) + tuple(p[3]))
+ elif p[1] == '%nonassoc':
+ preclist.append(('nonassoc',)+ tuple(p[3]))
+
+def p_toktype(p):
+ '''toktype : TOKEN
+ | LEFT
+ | RIGHT
+ | NONASSOC'''
+ p[0] = p[1]
+
+def p_opttype(p):
+ '''opttype : '<' ID '>'
+ | empty'''
+
+def p_idlist(p):
+ '''idlist : idlist optcomma tokenid
+ | tokenid'''
+ if len(p) == 2:
+ p[0] = [p[1]]
+ else:
+ p[0] = p[1]
+ p[1].append(p[3])
+
+def p_tokenid(p):
+ '''tokenid : ID
+ | ID NUMBER
+ | QLITERAL
+ | QLITERAL NUMBER'''
+ p[0] = p[1]
+
+def p_optsemi(p):
+ '''optsemi : ';'
+ | empty'''
+
+def p_optcomma(p):
+ '''optcomma : ','
+ | empty'''
+
+def p_definition_type(p):
+ '''definition : TYPE '<' ID '>' namelist optsemi'''
+ # type declarations are ignored
+
+def p_namelist(p):
+ '''namelist : namelist optcomma ID
+ | ID'''
+
+def p_definition_union(p):
+ '''definition : UNION CODE optsemi'''
+ # Union declarations are ignored
+
+def p_rules(p):
+ '''rules : rules rule
+ | rule'''
+ if len(p) == 2:
+ rule = p[1]
+ else:
+ rule = p[2]
+
+ # Print out a Python equivalent of this rule
+
+ embedded = [ ] # Embedded actions (a mess)
+ embed_count = 0
+
+ rulename = rule[0]
+ rulecount = 1
+ for r in rule[1]:
+ # r contains one of the rule possibilities
+ print "def p_%s_%d(p):" % (rulename,rulecount)
+ prod = []
+ prodcode = ""
+ for i in range(len(r)):
+ item = r[i]
+ if item[0] == '{': # A code block
+ if i == len(r) - 1:
+ prodcode = item
+ break
+ else:
+ # an embedded action
+ embed_name = "_embed%d_%s" % (embed_count,rulename)
+ prod.append(embed_name)
+ embedded.append((embed_name,item))
+ embed_count += 1
+ else:
+ prod.append(item)
+ print " '''%s : %s'''" % (rulename, " ".join(prod))
+ # Emit code
+ print_code(prodcode,4)
+ print
+ rulecount += 1
+
+ for e,code in embedded:
+ print "def p_%s(p):" % e
+ print " '''%s : '''" % e
+ print_code(code,4)
+ print
+
+def p_rule(p):
+ '''rule : ID ':' rulelist ';' '''
+ p[0] = (p[1],[p[3]])
+
+def p_rule2(p):
+ '''rule : ID ':' rulelist morerules ';' '''
+ p[4].insert(0,p[3])
+ p[0] = (p[1],p[4])
+
+def p_rule_empty(p):
+ '''rule : ID ':' ';' '''
+ p[0] = (p[1],[[]])
+
+def p_rule_empty2(p):
+ '''rule : ID ':' morerules ';' '''
+
+ p[3].insert(0,[])
+ p[0] = (p[1],p[3])
+
+def p_morerules(p):
+ '''morerules : morerules '|' rulelist
+ | '|' rulelist
+ | '|' '''
+
+ if len(p) == 2:
+ p[0] = [[]]
+ elif len(p) == 3:
+ p[0] = [p[2]]
+ else:
+ p[0] = p[1]
+ p[0].append(p[3])
+
+# print "morerules", len(p), p[0]
+
+def p_rulelist(p):
+ '''rulelist : rulelist ruleitem
+ | ruleitem'''
+
+ if len(p) == 2:
+ p[0] = [p[1]]
+ else:
+ p[0] = p[1]
+ p[1].append(p[2])
+
+def p_ruleitem(p):
+ '''ruleitem : ID
+ | QLITERAL
+ | CODE
+ | PREC'''
+ p[0] = p[1]
+
+def p_empty(p):
+ '''empty : '''
+
+def p_error(p):
+ pass
+
+yacc.yacc(debug=0)
+
+def print_code(code,indent):
+ if not emit_code: return
+ codelines = code.splitlines()
+ for c in codelines:
+ print "%s# %s" % (" "*indent,c)
+
diff --git a/ext/ply/example/yply/yply.py b/ext/ply/example/yply/yply.py
new file mode 100644
index 000000000..a4398171e
--- /dev/null
+++ b/ext/ply/example/yply/yply.py
@@ -0,0 +1,53 @@
+#!/usr/local/bin/python
+# yply.py
+#
+# Author: David Beazley (dave@dabeaz.com)
+# Date : October 2, 2006
+#
+# Converts a UNIX-yacc specification file into a PLY-compatible
+# specification. To use, simply do this:
+#
+# % python yply.py [-nocode] inputfile.y >myparser.py
+#
+# The output of this program is Python code. In the output,
+# any C code in the original file is included, but is commented.
+# If you use the -nocode option, then all of the C code in the
+# original file is discarded.
+#
+# Disclaimer: This just an example I threw together in an afternoon.
+# It might have some bugs. However, it worked when I tried it on
+# a yacc-specified C++ parser containing 442 rules and 855 parsing
+# states.
+#
+
+import sys
+sys.path.insert(0,"../..")
+
+import ylex
+import yparse
+
+from ply import *
+
+if len(sys.argv) == 1:
+ print "usage : yply.py [-nocode] inputfile"
+ raise SystemExit
+
+if len(sys.argv) == 3:
+ if sys.argv[1] == '-nocode':
+ yparse.emit_code = 0
+ else:
+ print "Unknown option '%s'" % sys.argv[1]
+ raise SystemExit
+ filename = sys.argv[2]
+else:
+ filename = sys.argv[1]
+
+yacc.parse(open(filename).read())
+
+print """
+if __name__ == '__main__':
+ from ply import *
+ yacc.yacc()
+"""
+
+