1 | #!/usr/bin/env python2
|
2 | """
|
3 | parse.py
|
4 | """
|
5 | from __future__ import print_function
|
6 |
|
7 | import os
|
8 | import sys
|
9 |
|
10 | # PYTHONPATH=$REPO_ROOT/vendor
|
11 | from typing import Tuple, Optional, cast
|
12 |
|
13 | # PYTHONPATH=$REPO_ROOT/mycpp
|
14 | from mycpp.mylib import log, tagswitch
|
15 | from mycpp import mylib
|
16 | from _devbuild.gen.expr_asdl import expr, expr_e, expr_t, tok_e, tok_t, CompoundWord
|
17 |
|
18 | # PYTHONPATH=$REPO_ROOT
|
19 | from asdl import format as fmt
|
20 |
|
21 |
|
22 | class Lexer(object):
|
23 | # "type declaration" of members
|
24 |
|
25 | def __init__(self, s):
|
26 | # type: (str) -> None
|
27 | self.s = s
|
28 | self.i = 0
|
29 | self.n = len(s)
|
30 |
|
31 | def Read(self):
|
32 | # type: () -> Tuple[tok_t, str]
|
33 | if self.i >= self.n:
|
34 | return tok_e.Eof, '' # sentinel
|
35 |
|
36 | tok = self.s[self.i]
|
37 | self.i += 1
|
38 |
|
39 | if tok.isdigit():
|
40 | return tok_e.Const, tok
|
41 | if tok.isalpha():
|
42 | return tok_e.Var, tok
|
43 | if tok in '+-':
|
44 | return tok_e.Op1, tok # lower precedence
|
45 | if tok in '*/':
|
46 | return tok_e.Op2, tok # higher precedence
|
47 | if tok in '()':
|
48 | return tok_e.Paren, tok
|
49 |
|
50 | return tok_e.Invalid, tok
|
51 |
|
52 | def _MethodCallingOtherMethod(self):
|
53 | # type: () -> None
|
54 | # Make sure we don't get a member var called Read!
|
55 | # This is easy because we're only checking assignment statements.
|
56 | self.Read()
|
57 |
|
58 |
|
59 | class ParseError(Exception):
|
60 |
|
61 | def __init__(self, msg):
|
62 | # type: (str) -> None
|
63 | self.msg = msg
|
64 |
|
65 |
|
66 | class Parser(object):
|
67 | """
|
68 | Grammar from http://compilers.iecc.com/crenshaw/tutor6.txt, adapted to ANTLR
|
69 | syntax.
|
70 |
|
71 | Expr = Term ('+' Term)*
|
72 | Term = Factor ('*' Factor)*
|
73 | Factor = VAR
|
74 | | CONST
|
75 | | '(' Expr ')'
|
76 | """
|
77 |
|
78 | def __init__(self, lexer):
|
79 | # type: (Lexer) -> None
|
80 | self.lexer = lexer
|
81 | self.tok_type = tok_e.Eof # type: tok_t
|
82 | self.tok_val = ''
|
83 |
|
84 | def Next(self):
|
85 | # type: () -> None
|
86 | self.tok_type, self.tok_val = self.lexer.Read()
|
87 | #log('-- %r %r', self.tok_type, self.tok_val)
|
88 |
|
89 | def Eat(self, tok_val):
|
90 | # type: (str) -> None
|
91 | if self.tok_val != tok_val:
|
92 | #raise ParseError('Expected %r, got %r' % (tok_val, self.tok_val))
|
93 | raise ParseError('Expected ' + tok_val)
|
94 | self.Next()
|
95 |
|
96 | def ParseFactor(self):
|
97 | # type: () -> expr_t
|
98 |
|
99 | #log('ParseFactor')
|
100 | if self.tok_type == tok_e.Var:
|
101 | n1 = expr.Var(self.tok_val)
|
102 | self.Next()
|
103 | return n1
|
104 |
|
105 | if self.tok_type == tok_e.Const:
|
106 | n2 = expr.Const(int(self.tok_val))
|
107 | self.Next()
|
108 | return n2
|
109 |
|
110 | if self.tok_type == tok_e.Paren:
|
111 | self.Eat('(')
|
112 | n3 = self.ParseExpr()
|
113 | self.Eat(')')
|
114 | return n3
|
115 |
|
116 | #raise ParseError('Unexpected token %s %s' % (self.tok_type, self.tok_val))
|
117 | raise ParseError('Unexpected token ' + self.tok_val)
|
118 |
|
119 | def ParseTerm(self):
|
120 | # type: () -> expr_t
|
121 |
|
122 | #log('ParseTerm')
|
123 | node = self.ParseFactor()
|
124 |
|
125 | # TODO: Iterate and create nodes
|
126 | while self.tok_type == tok_e.Op2:
|
127 | op = self.tok_val
|
128 | self.Next()
|
129 | n2 = self.ParseFactor()
|
130 | node = expr.Binary(op, node, n2)
|
131 | return node
|
132 |
|
133 | def ParseExpr(self):
|
134 | # type: () -> expr_t
|
135 |
|
136 | #log('ParseExpr')
|
137 | node = self.ParseTerm()
|
138 |
|
139 | while self.tok_type == tok_e.Op1:
|
140 | op = self.tok_val
|
141 | self.Next()
|
142 | n2 = self.ParseTerm()
|
143 | node = expr.Binary(op, node, n2)
|
144 | return node
|
145 |
|
146 | def Parse(self):
|
147 | # type: () -> expr_t
|
148 | self.Next()
|
149 | return self.ParseExpr()
|
150 |
|
151 |
|
152 | def TestParse():
|
153 | # type: () -> None
|
154 |
|
155 | lex = Lexer('abc')
|
156 | while True:
|
157 | tok_type, tok_val = lex.Read()
|
158 | if tok_type == tok_e.Eof:
|
159 | break
|
160 | #print('%s %s' % (tok_type, tok_val))
|
161 | log('tok_val %s', tok_val)
|
162 |
|
163 | CASES = [
|
164 | '1+2',
|
165 | '1+2*3',
|
166 | '1*2+3',
|
167 | '(1+2)*3',
|
168 | 'a+b+c+d',
|
169 | 'a*b*3*4',
|
170 | '1',
|
171 | 'a',
|
172 |
|
173 | # expect errors here:
|
174 | '(',
|
175 | ')',
|
176 | '(a+b',
|
177 | ' ',
|
178 | ' $$ ',
|
179 | ]
|
180 | for expr_ in CASES:
|
181 | lex = Lexer(expr_)
|
182 | p = Parser(lex)
|
183 | log('')
|
184 | log('--')
|
185 | log('%s =>', expr_)
|
186 |
|
187 | node = None # type: Optional[expr_t]
|
188 | try:
|
189 | node = p.Parse()
|
190 | except ParseError as e:
|
191 | log('Parse error: %s', e.msg)
|
192 | continue
|
193 |
|
194 | #log('%s', tree)
|
195 |
|
196 | htree = node.PrettyTree()
|
197 | ast_f = fmt.AnsiOutput(mylib.Stdout())
|
198 |
|
199 | fmt.PrintTree(htree, ast_f)
|
200 | ast_f.write('\n')
|
201 |
|
202 | UP_node = node
|
203 | with tagswitch(UP_node) as case:
|
204 | if case(expr_e.Const):
|
205 | node = cast(expr.Const, UP_node)
|
206 | log('Const %d', node.i)
|
207 |
|
208 | elif case(expr_e.Var):
|
209 | node = cast(expr.Var, UP_node)
|
210 | log('Var %s', node.name)
|
211 |
|
212 | else:
|
213 | log('Other')
|
214 |
|
215 |
|
216 | def TestCreateNull():
|
217 | # type: () -> None
|
218 |
|
219 | c = expr.Const.CreateNull(alloc_lists=True)
|
220 | log('c.i %d', c.i)
|
221 |
|
222 | v = expr.Var.CreateNull(alloc_lists=True)
|
223 | log('v.name %r', v.name)
|
224 |
|
225 | b = expr.Binary.CreateNull(alloc_lists=True)
|
226 | log('b.op %r', b.op)
|
227 | b.op = '+'
|
228 |
|
229 | # Must assign these
|
230 | b.left = c
|
231 | b.right = v
|
232 |
|
233 | htree = b.PrettyTree()
|
234 | ast_f = fmt.AnsiOutput(mylib.Stdout())
|
235 |
|
236 | fmt.PrintTree(htree, ast_f)
|
237 | ast_f.write('\n')
|
238 |
|
239 |
|
240 | def TestSubtype():
|
241 | # type: () -> None
|
242 |
|
243 | c = CompoundWord()
|
244 | c.append('foo')
|
245 | c.append('bar')
|
246 |
|
247 | log('len(c) = %d', len(c))
|
248 |
|
249 | # It behaves like a container
|
250 | s1 = c[1]
|
251 | log('s1 = %r', s1)
|
252 |
|
253 | #for s in c:
|
254 | # log("s = %r", s);
|
255 |
|
256 |
|
257 | def run_tests():
|
258 | # type: () -> None
|
259 |
|
260 | TestParse()
|
261 | TestCreateNull()
|
262 |
|
263 | TestSubtype()
|
264 |
|
265 |
|
266 | def run_benchmarks():
|
267 | # type: () -> None
|
268 | n = 100000
|
269 |
|
270 | result = 0
|
271 | i = 0
|
272 | while i < n:
|
273 | lex = Lexer('a*b*3*4')
|
274 | p = Parser(lex)
|
275 | tree = p.Parse()
|
276 |
|
277 | i += 1
|
278 |
|
279 | mylib.MaybeCollect() # manual GC point
|
280 |
|
281 | log('result = %d', result)
|
282 | log('iterations = %d', n)
|
283 |
|
284 |
|
285 | if __name__ == '__main__':
|
286 | if os.getenv('BENCHMARK'):
|
287 | log('Benchmarking...')
|
288 | run_benchmarks()
|
289 | else:
|
290 | run_tests()
|