1 | #!/usr/bin/env python2
|
2 | """
|
3 | parse.py
|
4 | """
|
5 | from __future__ import print_function
|
6 |
|
7 | import os
|
8 | import sys
|
9 |
|
10 | # PYTHONPATH=$REPO_ROOT/vendor
|
11 | from typing import Tuple, List, Optional, cast
|
12 |
|
13 | # PYTHONPATH=$REPO_ROOT/mycpp
|
14 | from mycpp.mylib import log, tagswitch
|
15 | from mycpp import mylib
|
16 | from _devbuild.gen.expr_asdl import (expr, expr_e, expr_t, tok_e, tok_t,
|
17 | CompoundWord, Measure_v, MeasuredDoc)
|
18 |
|
19 | # PYTHONPATH=$REPO_ROOT
|
20 | from asdl import format as fmt
|
21 |
|
22 |
|
23 | class Lexer(object):
|
24 | # "type declaration" of members
|
25 |
|
26 | def __init__(self, s):
|
27 | # type: (str) -> None
|
28 | self.s = s
|
29 | self.i = 0
|
30 | self.n = len(s)
|
31 |
|
32 | def Read(self):
|
33 | # type: () -> Tuple[tok_t, str]
|
34 | if self.i >= self.n:
|
35 | return tok_e.Eof, '' # sentinel
|
36 |
|
37 | tok = self.s[self.i]
|
38 | self.i += 1
|
39 |
|
40 | if tok.isdigit():
|
41 | return tok_e.Const, tok
|
42 | if tok.isalpha():
|
43 | return tok_e.Var, tok
|
44 | if tok in '+-':
|
45 | return tok_e.Op1, tok # lower precedence
|
46 | if tok in '*/':
|
47 | return tok_e.Op2, tok # higher precedence
|
48 | if tok in '()':
|
49 | return tok_e.Paren, tok
|
50 |
|
51 | return tok_e.Invalid, tok
|
52 |
|
53 | def _MethodCallingOtherMethod(self):
|
54 | # type: () -> None
|
55 | # Make sure we don't get a member var called Read!
|
56 | # This is easy because we're only checking assignment statements.
|
57 | self.Read()
|
58 |
|
59 |
|
60 | class ParseError(Exception):
|
61 |
|
62 | def __init__(self, msg):
|
63 | # type: (str) -> None
|
64 | self.msg = msg
|
65 |
|
66 |
|
67 | class Parser(object):
|
68 | """
|
69 | Grammar from http://compilers.iecc.com/crenshaw/tutor6.txt, adapted to ANTLR
|
70 | syntax.
|
71 |
|
72 | Expr = Term ('+' Term)*
|
73 | Term = Factor ('*' Factor)*
|
74 | Factor = VAR
|
75 | | CONST
|
76 | | '(' Expr ')'
|
77 | """
|
78 |
|
79 | def __init__(self, lexer):
|
80 | # type: (Lexer) -> None
|
81 | self.lexer = lexer
|
82 | self.tok_type = tok_e.Eof # type: tok_t
|
83 | self.tok_val = ''
|
84 |
|
85 | def Next(self):
|
86 | # type: () -> None
|
87 | self.tok_type, self.tok_val = self.lexer.Read()
|
88 | #log('-- %r %r', self.tok_type, self.tok_val)
|
89 |
|
90 | def Eat(self, tok_val):
|
91 | # type: (str) -> None
|
92 | if self.tok_val != tok_val:
|
93 | #raise ParseError('Expected %r, got %r' % (tok_val, self.tok_val))
|
94 | raise ParseError('Expected ' + tok_val)
|
95 | self.Next()
|
96 |
|
97 | def ParseFactor(self):
|
98 | # type: () -> expr_t
|
99 |
|
100 | #log('ParseFactor')
|
101 | if self.tok_type == tok_e.Var:
|
102 | n1 = expr.Var(self.tok_val)
|
103 | self.Next()
|
104 | return n1
|
105 |
|
106 | if self.tok_type == tok_e.Const:
|
107 | n2 = expr.Const(int(self.tok_val))
|
108 | self.Next()
|
109 | return n2
|
110 |
|
111 | if self.tok_type == tok_e.Paren:
|
112 | self.Eat('(')
|
113 | n3 = self.ParseExpr()
|
114 | self.Eat(')')
|
115 | return n3
|
116 |
|
117 | #raise ParseError('Unexpected token %s %s' % (self.tok_type, self.tok_val))
|
118 | raise ParseError('Unexpected token ' + self.tok_val)
|
119 |
|
120 | def ParseTerm(self):
|
121 | # type: () -> expr_t
|
122 |
|
123 | #log('ParseTerm')
|
124 | node = self.ParseFactor()
|
125 |
|
126 | # TODO: Iterate and create nodes
|
127 | while self.tok_type == tok_e.Op2:
|
128 | op = self.tok_val
|
129 | self.Next()
|
130 | n2 = self.ParseFactor()
|
131 | node = expr.Binary(op, node, n2)
|
132 | return node
|
133 |
|
134 | def ParseExpr(self):
|
135 | # type: () -> expr_t
|
136 |
|
137 | #log('ParseExpr')
|
138 | node = self.ParseTerm()
|
139 |
|
140 | while self.tok_type == tok_e.Op1:
|
141 | op = self.tok_val
|
142 | self.Next()
|
143 | n2 = self.ParseTerm()
|
144 | node = expr.Binary(op, node, n2)
|
145 | return node
|
146 |
|
147 | def Parse(self):
|
148 | # type: () -> expr_t
|
149 | self.Next()
|
150 | return self.ParseExpr()
|
151 |
|
152 |
|
153 | def TestParse():
|
154 | # type: () -> None
|
155 |
|
156 | lex = Lexer('abc')
|
157 | while True:
|
158 | tok_type, tok_val = lex.Read()
|
159 | if tok_type == tok_e.Eof:
|
160 | break
|
161 | #print('%s %s' % (tok_type, tok_val))
|
162 | log('tok_val %s', tok_val)
|
163 |
|
164 | CASES = [
|
165 | '1+2',
|
166 | '1+2*3',
|
167 | '1*2+3',
|
168 | '(1+2)*3',
|
169 | 'a+b+c+d',
|
170 | 'a*b*3*4',
|
171 | '1',
|
172 | 'a',
|
173 |
|
174 | # expect errors here:
|
175 | '(',
|
176 | ')',
|
177 | '(a+b',
|
178 | ' ',
|
179 | ' $$ ',
|
180 | ]
|
181 | for expr_ in CASES:
|
182 | lex = Lexer(expr_)
|
183 | p = Parser(lex)
|
184 | log('')
|
185 | log('--')
|
186 | log('%s =>', expr_)
|
187 |
|
188 | node = None # type: Optional[expr_t]
|
189 | try:
|
190 | node = p.Parse()
|
191 | except ParseError as e:
|
192 | log('Parse error: %s', e.msg)
|
193 | continue
|
194 |
|
195 | #log('%s', tree)
|
196 |
|
197 | htree = node.PrettyTree(False)
|
198 | f = mylib.Stdout()
|
199 |
|
200 | fmt.HNodePrettyPrint(htree, f)
|
201 |
|
202 | UP_node = node
|
203 | with tagswitch(UP_node) as case:
|
204 | if case(expr_e.Const):
|
205 | node = cast(expr.Const, UP_node)
|
206 | log('Const %d', node.i)
|
207 |
|
208 | elif case(expr_e.Var):
|
209 | node = cast(expr.Var, UP_node)
|
210 | log('Var %s', node.name)
|
211 |
|
212 | else:
|
213 | log('Other')
|
214 |
|
215 |
|
216 | def TestCreateNull():
|
217 | # type: () -> None
|
218 |
|
219 | c = expr.Const.CreateNull(alloc_lists=True)
|
220 | log('c.i %d', c.i)
|
221 |
|
222 | v = expr.Var.CreateNull(alloc_lists=True)
|
223 | log('v.name %r', v.name)
|
224 |
|
225 | b = expr.Binary.CreateNull(alloc_lists=True)
|
226 | log('b.op %r', b.op)
|
227 | b.op = '+'
|
228 |
|
229 | # Must assign these
|
230 | b.left = c
|
231 | b.right = v
|
232 |
|
233 | htree = b.PrettyTree(False)
|
234 | f = mylib.Stdout()
|
235 | fmt.HNodePrettyPrint(htree, f)
|
236 |
|
237 |
|
238 | def TestSubtype():
|
239 | # type: () -> None
|
240 |
|
241 | # TODO:
|
242 | c = CompoundWord.New()
|
243 | #c = CompoundWord.Take([])
|
244 | c.append('foo')
|
245 | c.append('bar')
|
246 |
|
247 | log('len(c) = %d', len(c))
|
248 |
|
249 | # It behaves like a container
|
250 | s1 = c[1]
|
251 | log('s1 = %r', s1)
|
252 |
|
253 | c[1] = 'zz'
|
254 | log('c[1] = %r', c[1])
|
255 |
|
256 | # Iterate over it like a List
|
257 | for s in c:
|
258 | log("s = %r", s)
|
259 |
|
260 | strs = ['a', 'b', 'c', 'd'] # type: List[str]
|
261 |
|
262 | # TODO: does this need to work? Or do we use casting?
|
263 | #c2 = CompoundWord(strs)
|
264 |
|
265 | c3 = cast(CompoundWord, strs)
|
266 | log('len(c3) = %d', len(c3))
|
267 |
|
268 | # Hm this opposite cast crashes in mycpp - should probably work
|
269 | #strs2 = cast(List[str], c)
|
270 | #log('len(strs2) = %d', len(strs2))
|
271 |
|
272 | # AList constructor
|
273 |
|
274 | c4 = CompoundWord.Take(strs)
|
275 | log('len(c4) = %d', len(c4))
|
276 |
|
277 | # The length is zero after taking!
|
278 | log('len(strs) = %d', len(strs))
|
279 |
|
280 | if c[0] is None:
|
281 | log('NULL BUG')
|
282 | else:
|
283 | log('not null')
|
284 |
|
285 | log('c4[0] = %s', c4[0])
|
286 |
|
287 | # this is back to zero, can technically be reused
|
288 | strs.append('e')
|
289 | log('len(strs) = %d', len(strs))
|
290 |
|
291 | for s in c4:
|
292 | print("s = %r" % s)
|
293 |
|
294 |
|
295 | def TestLeafValue():
|
296 | # type: () -> None
|
297 |
|
298 | f = mylib.Stdout()
|
299 |
|
300 | n = 10
|
301 | for i in xrange(n):
|
302 | m = Measure_v(i, i + 1)
|
303 | d = MeasuredDoc('s%d' % i, m)
|
304 |
|
305 | tree = d.PrettyTree(False)
|
306 | fmt.HNodePrettyPrint(tree, f)
|
307 |
|
308 |
|
309 | def run_tests():
|
310 | # type: () -> None
|
311 |
|
312 | TestParse()
|
313 | TestCreateNull()
|
314 |
|
315 | TestSubtype()
|
316 |
|
317 | TestLeafValue()
|
318 |
|
319 |
|
320 | def run_benchmarks():
|
321 | # type: () -> None
|
322 | n = 100000
|
323 |
|
324 | result = 0
|
325 | i = 0
|
326 | while i < n:
|
327 | lex = Lexer('a*b*3*4')
|
328 | p = Parser(lex)
|
329 | tree = p.Parse()
|
330 |
|
331 | i += 1
|
332 |
|
333 | mylib.MaybeCollect() # manual GC point
|
334 |
|
335 | log('result = %d', result)
|
336 | log('iterations = %d', n)
|
337 |
|
338 |
|
339 | if __name__ == '__main__':
|
340 | if os.getenv('BENCHMARK'):
|
341 | log('Benchmarking...')
|
342 | run_benchmarks()
|
343 | else:
|
344 | run_tests()
|