From 3d252f8e5fa2ec3f55730ab6d5d1a4a1b21b2cdf Mon Sep 17 00:00:00 2001 From: Nathan Binkert Date: Tue, 5 Jul 2011 18:30:04 -0700 Subject: grammar: better encapsulation of a grammar and parsing This makes it possible to use the grammar multiple times and use the multiple instances concurrently. This makes implementing an include statement as part of a grammar possible. --- src/python/m5/util/grammar.py | 178 +++++++++++++++++++++++------------------- 1 file changed, 97 insertions(+), 81 deletions(-) (limited to 'src/python/m5') diff --git a/src/python/m5/util/grammar.py b/src/python/m5/util/grammar.py index ab5f35868..07702cfad 100644 --- a/src/python/m5/util/grammar.py +++ b/src/python/m5/util/grammar.py @@ -1,4 +1,4 @@ -# Copyright (c) 2006-2009 Nathan Binkert +# Copyright (c) 2006-2011 Nathan Binkert # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -24,100 +24,116 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -from ply import lex, yacc +import os -class TokenError(lex.LexError): - def __init__(self, msg, t): - super(TokenError, self).__init__(msg) - self.token = t +import ply.lex +import ply.yacc -class ParseError(yacc.YaccError): +class ParseError(Exception): def __init__(self, message, token=None): - super(ParseError, self).__init__(message) + Exception.__init__(self, message) self.token = token -class Tokenizer(object): - def __init__(self, lexer, data): - if isinstance(data, basestring): - indata = [ data ] - elif isinstance(data, file): - indata = data.xreadlines() - else: - indata = data - - def _input(): - for i,line in enumerate(indata): - lexer.lineno = i + 1 - lexer.input(line) - while True: - tok = lexer.token() - if not tok: - break - yield tok - self.input = _input() - self.lexer = lexer - - def next(self): - return self.input.next() - - def __iter__(self): - return self - - def token(self): - try: - return self.next() - except StopIteration: - return None - - def __getattr__(self, attr): - return getattr(self.lexer, attr) - class Grammar(object): - def __init__(self, output=None, debug=False): - self.yacc_args = {} - self.yacc_args['debug'] = debug + def setupLexerFactory(self, **kwargs): + if 'module' in kwargs: + raise AttributeError, "module is an illegal attribute" + self.lex_kwargs = kwargs - if output: - import os + def setupParserFactory(self, **kwargs): + if 'module' in kwargs: + raise AttributeError, "module is an illegal attribute" + if 'output' in kwargs: dir,tab = os.path.split(output) if not tab.endswith('.py'): - raise AttributeError, 'The output file must end with .py' - self.yacc_args['outputdir'] = dir - self.yacc_args['tabmodule'] = tab[:-3] + raise AttributeError, \ + 'The output file must end with .py' + kwargs['outputdir'] = dir + kwargs['tabmodule'] = tab[:-3] - def t_error(self, t): - raise lex.LexError("Illegal character %s @ %d:%d" % \ - (`t.value[0]`, t.lineno, t.lexpos), `t.value[0]`) + self.yacc_kwargs = kwargs + + def __getattr__(self, attr): + if attr == 'lexers': + self.lexers = [] + return self.lexers + + if attr == 'lex_kwargs': + self.setupLexerFactory() + return self.lex_kwargs + + if attr == 'yacc_kwargs': + self.setupParserFactory() + return self.yacc_kwargs + + if attr == 'lex': + self.lex = ply.lex.lex(module=self, **self.lex_kwargs) + return self.lex + + if attr == 'yacc': + self.yacc = ply.yacc.yacc(module=self, **self.yacc_kwargs) + return self.yacc + + if attr == 'current_lexer': + if not self.lexers: + return None + return self.lexers[-1][0] + + if attr == 'current_source': + if not self.lexers: + return '' + return self.lexers[-1][1] + + if attr == 'current_line': + if not self.lexers: + return -1 + return self.current_lexer.lineno + + raise AttributeError, \ + "'%s' object has no attribute '%s'" % (type(self), attr) + + def parse_string(self, data, source='', debug=None, tracking=0): + if not isinstance(data, basestring): + raise AttributeError, \ + "argument must be a string, was '%s'" % type(f) + + import new + lexer = self.lex.clone() + lexer.input(data) + self.lexers.append((lexer, source)) + dict = { + 'productions' : self.yacc.productions, + 'action' : self.yacc.action, + 'goto' : self.yacc.goto, + 'errorfunc' : self.yacc.errorfunc, + } + parser = new.instance(ply.yacc.LRParser, dict) + result = parser.parse(lexer=lexer, debug=debug, tracking=tracking) + self.lexers.pop() + return result + + def parse_file(self, f, **kwargs): + if isinstance(f, basestring): + source = f + f = file(f, 'r') + elif isinstance(f, file): + source = f.name + else: + raise AttributeError, \ + "argument must be either a string or file, was '%s'" % type(f) + + return self.parse_string(f.read(), source, **kwargs) def p_error(self, t): if t: - msg = "Syntax error at %d:%d\n>>%s<<" % \ - (t.lineno, t.lexpos + 1, t.value) + msg = "Syntax error at %s:%d:%d\n>>%s<<" % \ + (self.current_source, t.lineno, t.lexpos + 1, t.value) else: - msg = "Syntax error at end of input" + msg = "Syntax error at end of %s" % (self.current_source, ) raise ParseError(msg, t) - def __getattr__(self, attr): - if attr == 'parser': - import ply.yacc - parser = ply.yacc.yacc(module=self, **self.yacc_args) - self.parser = parser - return parser - - if attr == 'lexer': - import ply.lex - lexer = ply.lex.lex(module=self) - self.lexer = lexer - return lexer - - raise AttributeError, "'%s' object has no attribute '%s'" % \ - (self.__class__.__name__, attr) - - def parse(self, stmt, **kwargs): - self.lexer.lineno = 1 - result = self.parser.parse(lexer=Tokenizer(self.lexer, stmt), **kwargs) - self.parser.restart() - - return result - + def t_error(self, t): + msg = "Illegal character %s @ %d:%d" % \ + (`t.value[0]`, t.lineno, t.lexpos) + raise ParseError(msg, t) -- cgit v1.2.3