OILS / mycpp / examples / parse.py View on Github | oilshell.org

290 lines, 170 significant
1#!/usr/bin/env python2
2"""
3parse.py
4"""
5from __future__ import print_function
6
7import os
8import sys
9
10# PYTHONPATH=$REPO_ROOT/vendor
11from typing import Tuple, Optional, cast
12
13# PYTHONPATH=$REPO_ROOT/mycpp
14from mycpp.mylib import log, tagswitch
15from mycpp import mylib
16from _devbuild.gen.expr_asdl import expr, expr_e, expr_t, tok_e, tok_t, CompoundWord
17
18# PYTHONPATH=$REPO_ROOT
19from asdl import format as fmt
20
21
22class Lexer(object):
23 # "type declaration" of members
24
25 def __init__(self, s):
26 # type: (str) -> None
27 self.s = s
28 self.i = 0
29 self.n = len(s)
30
31 def Read(self):
32 # type: () -> Tuple[tok_t, str]
33 if self.i >= self.n:
34 return tok_e.Eof, '' # sentinel
35
36 tok = self.s[self.i]
37 self.i += 1
38
39 if tok.isdigit():
40 return tok_e.Const, tok
41 if tok.isalpha():
42 return tok_e.Var, tok
43 if tok in '+-':
44 return tok_e.Op1, tok # lower precedence
45 if tok in '*/':
46 return tok_e.Op2, tok # higher precedence
47 if tok in '()':
48 return tok_e.Paren, tok
49
50 return tok_e.Invalid, tok
51
52 def _MethodCallingOtherMethod(self):
53 # type: () -> None
54 # Make sure we don't get a member var called Read!
55 # This is easy because we're only checking assignment statements.
56 self.Read()
57
58
59class ParseError(Exception):
60
61 def __init__(self, msg):
62 # type: (str) -> None
63 self.msg = msg
64
65
66class Parser(object):
67 """
68 Grammar from http://compilers.iecc.com/crenshaw/tutor6.txt, adapted to ANTLR
69 syntax.
70
71 Expr = Term ('+' Term)*
72 Term = Factor ('*' Factor)*
73 Factor = VAR
74 | CONST
75 | '(' Expr ')'
76 """
77
78 def __init__(self, lexer):
79 # type: (Lexer) -> None
80 self.lexer = lexer
81 self.tok_type = tok_e.Eof # type: tok_t
82 self.tok_val = ''
83
84 def Next(self):
85 # type: () -> None
86 self.tok_type, self.tok_val = self.lexer.Read()
87 #log('-- %r %r', self.tok_type, self.tok_val)
88
89 def Eat(self, tok_val):
90 # type: (str) -> None
91 if self.tok_val != tok_val:
92 #raise ParseError('Expected %r, got %r' % (tok_val, self.tok_val))
93 raise ParseError('Expected ' + tok_val)
94 self.Next()
95
96 def ParseFactor(self):
97 # type: () -> expr_t
98
99 #log('ParseFactor')
100 if self.tok_type == tok_e.Var:
101 n1 = expr.Var(self.tok_val)
102 self.Next()
103 return n1
104
105 if self.tok_type == tok_e.Const:
106 n2 = expr.Const(int(self.tok_val))
107 self.Next()
108 return n2
109
110 if self.tok_type == tok_e.Paren:
111 self.Eat('(')
112 n3 = self.ParseExpr()
113 self.Eat(')')
114 return n3
115
116 #raise ParseError('Unexpected token %s %s' % (self.tok_type, self.tok_val))
117 raise ParseError('Unexpected token ' + self.tok_val)
118
119 def ParseTerm(self):
120 # type: () -> expr_t
121
122 #log('ParseTerm')
123 node = self.ParseFactor()
124
125 # TODO: Iterate and create nodes
126 while self.tok_type == tok_e.Op2:
127 op = self.tok_val
128 self.Next()
129 n2 = self.ParseFactor()
130 node = expr.Binary(op, node, n2)
131 return node
132
133 def ParseExpr(self):
134 # type: () -> expr_t
135
136 #log('ParseExpr')
137 node = self.ParseTerm()
138
139 while self.tok_type == tok_e.Op1:
140 op = self.tok_val
141 self.Next()
142 n2 = self.ParseTerm()
143 node = expr.Binary(op, node, n2)
144 return node
145
146 def Parse(self):
147 # type: () -> expr_t
148 self.Next()
149 return self.ParseExpr()
150
151
152def TestParse():
153 # type: () -> None
154
155 lex = Lexer('abc')
156 while True:
157 tok_type, tok_val = lex.Read()
158 if tok_type == tok_e.Eof:
159 break
160 #print('%s %s' % (tok_type, tok_val))
161 log('tok_val %s', tok_val)
162
163 CASES = [
164 '1+2',
165 '1+2*3',
166 '1*2+3',
167 '(1+2)*3',
168 'a+b+c+d',
169 'a*b*3*4',
170 '1',
171 'a',
172
173 # expect errors here:
174 '(',
175 ')',
176 '(a+b',
177 ' ',
178 ' $$ ',
179 ]
180 for expr_ in CASES:
181 lex = Lexer(expr_)
182 p = Parser(lex)
183 log('')
184 log('--')
185 log('%s =>', expr_)
186
187 node = None # type: Optional[expr_t]
188 try:
189 node = p.Parse()
190 except ParseError as e:
191 log('Parse error: %s', e.msg)
192 continue
193
194 #log('%s', tree)
195
196 htree = node.PrettyTree()
197 ast_f = fmt.AnsiOutput(mylib.Stdout())
198
199 fmt.PrintTree(htree, ast_f)
200 ast_f.write('\n')
201
202 UP_node = node
203 with tagswitch(UP_node) as case:
204 if case(expr_e.Const):
205 node = cast(expr.Const, UP_node)
206 log('Const %d', node.i)
207
208 elif case(expr_e.Var):
209 node = cast(expr.Var, UP_node)
210 log('Var %s', node.name)
211
212 else:
213 log('Other')
214
215
216def TestCreateNull():
217 # type: () -> None
218
219 c = expr.Const.CreateNull(alloc_lists=True)
220 log('c.i %d', c.i)
221
222 v = expr.Var.CreateNull(alloc_lists=True)
223 log('v.name %r', v.name)
224
225 b = expr.Binary.CreateNull(alloc_lists=True)
226 log('b.op %r', b.op)
227 b.op = '+'
228
229 # Must assign these
230 b.left = c
231 b.right = v
232
233 htree = b.PrettyTree()
234 ast_f = fmt.AnsiOutput(mylib.Stdout())
235
236 fmt.PrintTree(htree, ast_f)
237 ast_f.write('\n')
238
239
240def TestSubtype():
241 # type: () -> None
242
243 c = CompoundWord()
244 c.append('foo')
245 c.append('bar')
246
247 log('len(c) = %d', len(c))
248
249 # It behaves like a container
250 s1 = c[1]
251 log('s1 = %r', s1)
252
253 #for s in c:
254 # log("s = %r", s);
255
256
257def run_tests():
258 # type: () -> None
259
260 TestParse()
261 TestCreateNull()
262
263 TestSubtype()
264
265
266def run_benchmarks():
267 # type: () -> None
268 n = 100000
269
270 result = 0
271 i = 0
272 while i < n:
273 lex = Lexer('a*b*3*4')
274 p = Parser(lex)
275 tree = p.Parse()
276
277 i += 1
278
279 mylib.MaybeCollect() # manual GC point
280
281 log('result = %d', result)
282 log('iterations = %d', n)
283
284
285if __name__ == '__main__':
286 if os.getenv('BENCHMARK'):
287 log('Benchmarking...')
288 run_benchmarks()
289 else:
290 run_tests()