util/sconfig/parsedesc.g


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195

######################################################################
# The remainder of this file is from parsedesc.{g,py}

def append(lst, x):
    "Imperative append"
    lst.append(x)
    return lst

def add_inline_token(tokens, str):
    tokens.insert( 0, (str, eval(str, {}, {})) )
    return Terminal(str)

def cleanup_choice(lst):
    if len(lst) == 0: return Sequence([])
    if len(lst) == 1: return lst[0]
    return apply(Choice, tuple(lst))

def cleanup_sequence(lst):
    if len(lst) == 1: return lst[0]
    return apply(Sequence, tuple(lst))

def cleanup_rep(node, rep):
    if rep == 'star':   return Star(node)
    elif rep == 'plus': return Plus(node)
    else:               return node

def resolve_name(tokens, id, args):
    if id in map(lambda x: x[0], tokens):
	# It's a token
	if args: 
	    print 'Warning: ignoring parameters on TOKEN %s<<%s>>' % (id, args)
        return Terminal(id)
    else:
        # It's a name, so assume it's a nonterminal
        return NonTerminal(id, args)

%%
parser ParserDescription:
    option:      "context-insensitive-scanner"

    ignore:      "[ \t\r\n]+"
    ignore:      "#.*?\r?\n"
    token END:   "$"
    token ATTR:  "<<.+?>>"
    token STMT:  "{{.+?}}"
    token ID:    '[a-zA-Z_][a-zA-Z_0-9]*'
    token STR:   '[rR]?\'([^\\n\'\\\\]|\\\\.)*\'|[rR]?"([^\\n"\\\\]|\\\\.)*"'
    token LP:    '\\('
    token RP:    '\\)'
    token LB:    '\\['
    token RB:    '\\]'
    token OR:    '[|]'
    token STAR:  '[*]'
    token PLUS:  '[+]'
    token QUEST: '[?]'
    token COLON: ':'

    rule Parser: "parser" ID ":"
                   Options
                   Tokens
                   Rules<<Tokens>> 
                 END 
                 {{ return Generator(ID,Options,Tokens,Rules) }}

    rule Options: {{ opt = {} }}
                  ( "option" ":" Str {{ opt[Str] = 1 }} )*
                  {{ return opt }}

    rule Tokens:  {{ tok = [] }}
                  (
                    "token" ID ":" Str {{ tok.append( (ID,Str) ) }}
                  | "ignore"   ":" Str {{ tok.append( ('#ignore',Str) ) }}
                  )*
                  {{ return tok }}

    rule Rules<<tokens>>:
                  {{ rul = [] }}
                  (
                    "rule" ID OptParam ":" ClauseA<<tokens>>
                    {{ rul.append( (ID,OptParam,ClauseA) ) }}
                  )*
                  {{ return rul }}

    rule ClauseA<<tokens>>:
                  ClauseB<<tokens>>
                  {{ v = [ClauseB] }}
                  ( OR ClauseB<<tokens>> {{ v.append(ClauseB) }} )*
                  {{ return cleanup_choice(v) }}

    rule ClauseB<<tokens>>:
                  {{ v = [] }}
                  ( ClauseC<<tokens>> {{ v.append(ClauseC) }} )*
                  {{ return cleanup_sequence(v) }}

    rule ClauseC<<tokens>>:
                  ClauseD<<tokens>>
                  ( PLUS {{ return Plus(ClauseD) }}
                  | STAR {{ return Star(ClauseD) }}
                  |      {{ return ClauseD }} )

    rule ClauseD<<tokens>>:
                  STR {{ t = (STR, eval(STR,{},{})) }}
                      {{ if t not in tokens: tokens.insert( 0, t ) }} 
                      {{ return Terminal(STR) }}
                | ID OptParam {{ return resolve_name(tokens, ID, OptParam) }}
                | LP ClauseA<<tokens>> RP {{ return ClauseA }}
                | LB ClauseA<<tokens>> RB {{ return Option(ClauseA) }}
                | STMT {{ return Eval(STMT[2:-2]) }}

    rule OptParam: [ ATTR {{ return ATTR[2:-2] }} ] {{ return '' }}
    rule Str:   STR {{ return eval(STR,{},{}) }}
%%

# This replaces the default main routine

yapps_options = [
    ('context-insensitive-scanner', 'context-insensitive-scanner',
     'Scan all tokens (see docs)')
    ]

def generate(inputfilename, outputfilename='', dump=0, **flags):
    """Generate a grammar, given an input filename (X.g)
    and an output filename (defaulting to X.py)."""

    if not outputfilename:
	if inputfilename[-2:]=='.g': outputfilename = inputfilename[:-2]+'.py'
	else: raise "Invalid Filename", outputfilename
        
    print '    SCONFIG   ', join(outputfilename.split('/')[-5:], '/')
    
    DIVIDER = '\n%%\n' # This pattern separates the pre/post parsers
    preparser, postparser = None, None # Code before and after the parser desc

    # Read the entire file
    s = open(inputfilename,'r').read()

    # See if there's a separation between the pre-parser and parser
    f = find(s, DIVIDER)
    if f >= 0: preparser, s = s[:f]+'\n\n', s[f+len(DIVIDER):]

    # See if there's a separation between the parser and post-parser
    f = find(s, DIVIDER)
    if f >= 0: s, postparser = s[:f], '\n\n'+s[f+len(DIVIDER):]

    # Create the parser and scanner
    p = ParserDescription(ParserDescriptionScanner(s))
    if not p: return
    
    # Now parse the file
    t = wrap_error_reporter(p, 'Parser')
    if not t: return # Error
    if preparser is not None: t.preparser = preparser
    if postparser is not None: t.postparser = postparser

    # Check the options
    for f in t.options.keys():
        for opt,_,_ in yapps_options:
            if f == opt: break
        else:
            print 'Warning: unrecognized option', f
    # Add command line options to the set
    for f in flags.keys(): t.options[f] = flags[f]
            
    # Generate the output
    if dump:
        t.dump_information()
    else:
        t.output = open(outputfilename, 'w')
        t.generate_output()

if __name__=='__main__':
    import sys, getopt
    optlist, args = getopt.getopt(sys.argv[1:], 'f:', ['dump'])
    if not args or len(args) > 2:
        print 'Usage:'
        print '  python', sys.argv[0], '[flags] input.g [output.py]'
        print 'Flags:'
        print ('  --dump' + ' '*40)[:35] + 'Dump out grammar information'
        for flag, _, doc in yapps_options:
            print ('  -f' + flag + ' '*40)[:35] + doc
    else:
        # Read in the options and create a list of flags
	flags = {}
	for opt in optlist:
	    for flag, name, _ in yapps_options:
		if opt == ('-f', flag):
		    flags[name] = 1
		    break
	    else:
                if opt == ('--dump', ''):
                    flags['dump'] = 1
		else:
                    print 'Warning - unrecognized option:  ', opt[0], opt[1]

        apply(generate, tuple(args), flags)