OILS / mycpp / examples / parse.py View on Github | oilshell.org

344 lines, 197 significant
1#!/usr/bin/env python2
2"""
3parse.py
4"""
5from __future__ import print_function
6
7import os
8import sys
9
10# PYTHONPATH=$REPO_ROOT/vendor
11from typing import Tuple, List, Optional, cast
12
13# PYTHONPATH=$REPO_ROOT/mycpp
14from mycpp.mylib import log, tagswitch
15from mycpp import mylib
16from _devbuild.gen.expr_asdl import (expr, expr_e, expr_t, tok_e, tok_t,
17 CompoundWord, Measure_v, MeasuredDoc)
18
19# PYTHONPATH=$REPO_ROOT
20from asdl import format as fmt
21
22
23class Lexer(object):
24 # "type declaration" of members
25
26 def __init__(self, s):
27 # type: (str) -> None
28 self.s = s
29 self.i = 0
30 self.n = len(s)
31
32 def Read(self):
33 # type: () -> Tuple[tok_t, str]
34 if self.i >= self.n:
35 return tok_e.Eof, '' # sentinel
36
37 tok = self.s[self.i]
38 self.i += 1
39
40 if tok.isdigit():
41 return tok_e.Const, tok
42 if tok.isalpha():
43 return tok_e.Var, tok
44 if tok in '+-':
45 return tok_e.Op1, tok # lower precedence
46 if tok in '*/':
47 return tok_e.Op2, tok # higher precedence
48 if tok in '()':
49 return tok_e.Paren, tok
50
51 return tok_e.Invalid, tok
52
53 def _MethodCallingOtherMethod(self):
54 # type: () -> None
55 # Make sure we don't get a member var called Read!
56 # This is easy because we're only checking assignment statements.
57 self.Read()
58
59
60class ParseError(Exception):
61
62 def __init__(self, msg):
63 # type: (str) -> None
64 self.msg = msg
65
66
67class Parser(object):
68 """
69 Grammar from http://compilers.iecc.com/crenshaw/tutor6.txt, adapted to ANTLR
70 syntax.
71
72 Expr = Term ('+' Term)*
73 Term = Factor ('*' Factor)*
74 Factor = VAR
75 | CONST
76 | '(' Expr ')'
77 """
78
79 def __init__(self, lexer):
80 # type: (Lexer) -> None
81 self.lexer = lexer
82 self.tok_type = tok_e.Eof # type: tok_t
83 self.tok_val = ''
84
85 def Next(self):
86 # type: () -> None
87 self.tok_type, self.tok_val = self.lexer.Read()
88 #log('-- %r %r', self.tok_type, self.tok_val)
89
90 def Eat(self, tok_val):
91 # type: (str) -> None
92 if self.tok_val != tok_val:
93 #raise ParseError('Expected %r, got %r' % (tok_val, self.tok_val))
94 raise ParseError('Expected ' + tok_val)
95 self.Next()
96
97 def ParseFactor(self):
98 # type: () -> expr_t
99
100 #log('ParseFactor')
101 if self.tok_type == tok_e.Var:
102 n1 = expr.Var(self.tok_val)
103 self.Next()
104 return n1
105
106 if self.tok_type == tok_e.Const:
107 n2 = expr.Const(int(self.tok_val))
108 self.Next()
109 return n2
110
111 if self.tok_type == tok_e.Paren:
112 self.Eat('(')
113 n3 = self.ParseExpr()
114 self.Eat(')')
115 return n3
116
117 #raise ParseError('Unexpected token %s %s' % (self.tok_type, self.tok_val))
118 raise ParseError('Unexpected token ' + self.tok_val)
119
120 def ParseTerm(self):
121 # type: () -> expr_t
122
123 #log('ParseTerm')
124 node = self.ParseFactor()
125
126 # TODO: Iterate and create nodes
127 while self.tok_type == tok_e.Op2:
128 op = self.tok_val
129 self.Next()
130 n2 = self.ParseFactor()
131 node = expr.Binary(op, node, n2)
132 return node
133
134 def ParseExpr(self):
135 # type: () -> expr_t
136
137 #log('ParseExpr')
138 node = self.ParseTerm()
139
140 while self.tok_type == tok_e.Op1:
141 op = self.tok_val
142 self.Next()
143 n2 = self.ParseTerm()
144 node = expr.Binary(op, node, n2)
145 return node
146
147 def Parse(self):
148 # type: () -> expr_t
149 self.Next()
150 return self.ParseExpr()
151
152
153def TestParse():
154 # type: () -> None
155
156 lex = Lexer('abc')
157 while True:
158 tok_type, tok_val = lex.Read()
159 if tok_type == tok_e.Eof:
160 break
161 #print('%s %s' % (tok_type, tok_val))
162 log('tok_val %s', tok_val)
163
164 CASES = [
165 '1+2',
166 '1+2*3',
167 '1*2+3',
168 '(1+2)*3',
169 'a+b+c+d',
170 'a*b*3*4',
171 '1',
172 'a',
173
174 # expect errors here:
175 '(',
176 ')',
177 '(a+b',
178 ' ',
179 ' $$ ',
180 ]
181 for expr_ in CASES:
182 lex = Lexer(expr_)
183 p = Parser(lex)
184 log('')
185 log('--')
186 log('%s =>', expr_)
187
188 node = None # type: Optional[expr_t]
189 try:
190 node = p.Parse()
191 except ParseError as e:
192 log('Parse error: %s', e.msg)
193 continue
194
195 #log('%s', tree)
196
197 htree = node.PrettyTree(False)
198 f = mylib.Stdout()
199
200 fmt.HNodePrettyPrint(htree, f)
201
202 UP_node = node
203 with tagswitch(UP_node) as case:
204 if case(expr_e.Const):
205 node = cast(expr.Const, UP_node)
206 log('Const %d', node.i)
207
208 elif case(expr_e.Var):
209 node = cast(expr.Var, UP_node)
210 log('Var %s', node.name)
211
212 else:
213 log('Other')
214
215
216def TestCreateNull():
217 # type: () -> None
218
219 c = expr.Const.CreateNull(alloc_lists=True)
220 log('c.i %d', c.i)
221
222 v = expr.Var.CreateNull(alloc_lists=True)
223 log('v.name %r', v.name)
224
225 b = expr.Binary.CreateNull(alloc_lists=True)
226 log('b.op %r', b.op)
227 b.op = '+'
228
229 # Must assign these
230 b.left = c
231 b.right = v
232
233 htree = b.PrettyTree(False)
234 f = mylib.Stdout()
235 fmt.HNodePrettyPrint(htree, f)
236
237
238def TestSubtype():
239 # type: () -> None
240
241 # TODO:
242 c = CompoundWord.New()
243 #c = CompoundWord.Take([])
244 c.append('foo')
245 c.append('bar')
246
247 log('len(c) = %d', len(c))
248
249 # It behaves like a container
250 s1 = c[1]
251 log('s1 = %r', s1)
252
253 c[1] = 'zz'
254 log('c[1] = %r', c[1])
255
256 # Iterate over it like a List
257 for s in c:
258 log("s = %r", s)
259
260 strs = ['a', 'b', 'c', 'd'] # type: List[str]
261
262 # TODO: does this need to work? Or do we use casting?
263 #c2 = CompoundWord(strs)
264
265 c3 = cast(CompoundWord, strs)
266 log('len(c3) = %d', len(c3))
267
268 # Hm this opposite cast crashes in mycpp - should probably work
269 #strs2 = cast(List[str], c)
270 #log('len(strs2) = %d', len(strs2))
271
272 # AList constructor
273
274 c4 = CompoundWord.Take(strs)
275 log('len(c4) = %d', len(c4))
276
277 # The length is zero after taking!
278 log('len(strs) = %d', len(strs))
279
280 if c[0] is None:
281 log('NULL BUG')
282 else:
283 log('not null')
284
285 log('c4[0] = %s', c4[0])
286
287 # this is back to zero, can technically be reused
288 strs.append('e')
289 log('len(strs) = %d', len(strs))
290
291 for s in c4:
292 print("s = %r" % s)
293
294
295def TestLeafValue():
296 # type: () -> None
297
298 f = mylib.Stdout()
299
300 n = 10
301 for i in xrange(n):
302 m = Measure_v(i, i + 1)
303 d = MeasuredDoc('s%d' % i, m)
304
305 tree = d.PrettyTree(False)
306 fmt.HNodePrettyPrint(tree, f)
307
308
309def run_tests():
310 # type: () -> None
311
312 TestParse()
313 TestCreateNull()
314
315 TestSubtype()
316
317 TestLeafValue()
318
319
320def run_benchmarks():
321 # type: () -> None
322 n = 100000
323
324 result = 0
325 i = 0
326 while i < n:
327 lex = Lexer('a*b*3*4')
328 p = Parser(lex)
329 tree = p.Parse()
330
331 i += 1
332
333 mylib.MaybeCollect() # manual GC point
334
335 log('result = %d', result)
336 log('iterations = %d', n)
337
338
339if __name__ == '__main__':
340 if os.getenv('BENCHMARK'):
341 log('Benchmarking...')
342 run_benchmarks()
343 else:
344 run_tests()