1 | """expr_to_ast.py."""
|
2 | from __future__ import print_function
|
3 |
|
4 | from _devbuild.gen.id_kind_asdl import Id, Id_t, Id_str, Kind
|
5 | from _devbuild.gen.syntax_asdl import (
|
6 | Token,
|
7 | SimpleVarSub,
|
8 | loc,
|
9 | loc_t,
|
10 | DoubleQuoted,
|
11 | SingleQuoted,
|
12 | BracedVarSub,
|
13 | CommandSub,
|
14 | ShArrayLiteral,
|
15 | command,
|
16 | expr,
|
17 | expr_e,
|
18 | expr_t,
|
19 | expr_context_e,
|
20 | re,
|
21 | re_t,
|
22 | re_repeat,
|
23 | re_repeat_t,
|
24 | class_literal_term,
|
25 | class_literal_term_t,
|
26 | PosixClass,
|
27 | PerlClass,
|
28 | NameType,
|
29 | y_lhs_t,
|
30 | Comprehension,
|
31 | Subscript,
|
32 | Attribute,
|
33 | proc_sig,
|
34 | proc_sig_t,
|
35 | Param,
|
36 | RestParam,
|
37 | ParamGroup,
|
38 | NamedArg,
|
39 | ArgList,
|
40 | pat,
|
41 | pat_t,
|
42 | TypeExpr,
|
43 | Func,
|
44 | Eggex,
|
45 | EggexFlag,
|
46 | CharCode,
|
47 | CharRange,
|
48 | )
|
49 | from _devbuild.gen.value_asdl import value, value_t
|
50 | from _devbuild.gen import grammar_nt
|
51 | from core.error import p_die
|
52 | from data_lang import j8
|
53 | from frontend import consts
|
54 | from frontend import lexer
|
55 | from frontend import location
|
56 | from mycpp import mops
|
57 | from mycpp import mylib
|
58 | from mycpp.mylib import log, tagswitch
|
59 | from osh import word_compile
|
60 | from ysh import expr_parse
|
61 | from ysh import regex_translate
|
62 |
|
63 | from typing import TYPE_CHECKING, Dict, List, Tuple, Optional, cast
|
64 | if TYPE_CHECKING:
|
65 | from pgen2.grammar import Grammar
|
66 | from pgen2.pnode import PNode
|
67 |
|
68 | _ = log
|
69 |
|
70 | PERL_CLASSES = {
|
71 | 'd': 'd',
|
72 | 'w': 'w',
|
73 | 'word': 'w',
|
74 | 's': 's',
|
75 | }
|
76 | # https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html
|
77 | POSIX_CLASSES = [
|
78 | 'alnum',
|
79 | 'cntrl',
|
80 | 'lower',
|
81 | 'space',
|
82 | 'alpha',
|
83 | 'digit',
|
84 | 'print',
|
85 | 'upper',
|
86 | 'blank',
|
87 | 'graph',
|
88 | 'punct',
|
89 | 'xdigit',
|
90 | ]
|
91 | # NOTE: There are also things like \p{Greek} that we could put in the
|
92 | # "non-sigil" namespace.
|
93 |
|
94 | RANGE_POINT_TOO_LONG = "Range start/end shouldn't have more than one character"
|
95 |
|
96 | POS_ARG_MISPLACED = "Positional arg can't appear in group of named args"
|
97 |
|
98 | # Copied from pgen2/token.py to avoid dependency.
|
99 | NT_OFFSET = 256
|
100 |
|
101 | if mylib.PYTHON:
|
102 |
|
103 | def MakeGrammarNames(ysh_grammar):
|
104 | # type: (Grammar) -> Dict[int, str]
|
105 |
|
106 | # TODO: Break this dependency
|
107 | from frontend import lexer_def
|
108 |
|
109 | names = {}
|
110 |
|
111 | for id_name, k in lexer_def.ID_SPEC.id_str2int.items():
|
112 | # Hm some are out of range
|
113 | #assert k < 256, (k, id_name)
|
114 |
|
115 | # TODO: Some tokens have values greater than NT_OFFSET
|
116 | if k < NT_OFFSET:
|
117 | names[k] = id_name
|
118 |
|
119 | for k, v in ysh_grammar.number2symbol.items():
|
120 | assert k >= NT_OFFSET, (k, v)
|
121 | names[k] = v
|
122 |
|
123 | return names
|
124 |
|
125 |
|
126 | class Transformer(object):
|
127 | """Homogeneous parse tree -> heterogeneous AST ("lossless syntax tree")
|
128 |
|
129 | pgen2 (Python's LL parser generator) doesn't have semantic actions like yacc,
|
130 | so this "transformer" is the equivalent.
|
131 |
|
132 | Files to refer to when modifying this function:
|
133 |
|
134 | ysh/grammar.pgen2 (generates _devbuild/gen/grammar_nt.py)
|
135 | frontend/syntax.asdl (generates _devbuild/gen/syntax_asdl.py)
|
136 |
|
137 | Related examples:
|
138 |
|
139 | opy/compiler2/transformer.py (Python's parse tree -> AST, ~1500 lines)
|
140 | Python-2.7.13/Python/ast.c (the "real" CPython version, ~3600 lines)
|
141 |
|
142 | Other:
|
143 | frontend/parse_lib.py (turn on print_parse_tree)
|
144 |
|
145 | Public methods:
|
146 | Expr, VarDecl
|
147 | atom, trailer, etc. are private, named after productions in grammar.pgen2.
|
148 | """
|
149 |
|
150 | def __init__(self, gr):
|
151 | # type: (Grammar) -> None
|
152 | self.number2symbol = gr.number2symbol
|
153 | if mylib.PYTHON:
|
154 | names = MakeGrammarNames(gr)
|
155 | # print raw nodes
|
156 | self.p_printer = expr_parse.ParseTreePrinter(names)
|
157 |
|
158 | def _LeftAssoc(self, p_node):
|
159 | # type: (PNode) -> expr_t
|
160 | """For an associative binary operation.
|
161 |
|
162 | Examples:
|
163 | xor_expr: and_expr ('xor' and_expr)*
|
164 | term: factor (('*'|'/'|'%'|'div') factor)*
|
165 |
|
166 | 3 - 1 - 2 must be grouped as ((3 - 1) - 2).
|
167 | """
|
168 | # Note: Compare the iteractive com_binary() method in
|
169 | # opy/compiler2/transformer.py.
|
170 |
|
171 | # Examples:
|
172 | # - The PNode for '3 - 1' will have 3 children
|
173 | # - The PNode for '3 - 1 - 2' will have 5 children
|
174 |
|
175 | #self.p_printer.Print(p_node)
|
176 |
|
177 | i = 1 # index of the operator
|
178 | n = p_node.NumChildren()
|
179 |
|
180 | left = self.Expr(p_node.GetChild(0))
|
181 | while i < n:
|
182 | op = p_node.GetChild(i)
|
183 | right = self.Expr(p_node.GetChild(i + 1))
|
184 |
|
185 | # create a new left node
|
186 | left = expr.Binary(op.tok, left, right)
|
187 | i += 2
|
188 |
|
189 | return left
|
190 |
|
191 | def _Trailer(self, base, p_trailer):
|
192 | # type: (expr_t, PNode) -> expr_t
|
193 | """
|
194 | trailer: ( '(' [arglist] ')' | '[' subscriptlist ']'
|
195 | | '.' NAME | '->' NAME | '::' NAME
|
196 | )
|
197 | """
|
198 | tok0 = p_trailer.GetChild(0).tok
|
199 | typ0 = p_trailer.GetChild(0).typ
|
200 |
|
201 | if typ0 == Id.Op_LParen:
|
202 | lparen = tok0
|
203 | rparen = p_trailer.GetChild(-1).tok
|
204 | arglist = ArgList(lparen, [], None, [], None, None, rparen)
|
205 | if p_trailer.NumChildren() == 2: # ()
|
206 | return expr.FuncCall(base, arglist)
|
207 |
|
208 | p = p_trailer.GetChild(1) # the X in ( X )
|
209 | assert p.typ == grammar_nt.arglist # f(x, y)
|
210 | self._ArgList(p, arglist)
|
211 | return expr.FuncCall(base, arglist)
|
212 |
|
213 | if typ0 == Id.Op_LBracket:
|
214 | p_args = p_trailer.GetChild(1)
|
215 | assert p_args.typ == grammar_nt.subscriptlist
|
216 |
|
217 | n = p_args.NumChildren()
|
218 | if n == 1: # a[1] a[1:2] a[:] etc.
|
219 | subscript = self._Subscript(p_args.GetChild(0))
|
220 | else: # a[1, 2] a[1:2, :]
|
221 | slices = [] # type: List[expr_t]
|
222 | for i in xrange(0, n, 2):
|
223 | slices.append(self._Subscript(p_args.GetChild(i)))
|
224 | # expr.Tuple evaluates to List in YSH.
|
225 | #
|
226 | # Note that syntactically, a[1:2, 3:4] is the the only way to
|
227 | # get a List[Slice]. [1:2, 3:4] by itself is not allowed.
|
228 | comma_tok = p_args.GetChild(1).tok
|
229 | subscript = expr.Tuple(comma_tok, slices, expr_context_e.Store)
|
230 |
|
231 | return Subscript(tok0, base, subscript)
|
232 |
|
233 | if typ0 in (Id.Expr_Dot, Id.Expr_RArrow, Id.Expr_RDArrow):
|
234 | attr = p_trailer.GetChild(1).tok # will be Id.Expr_Name
|
235 | return Attribute(base, tok0, attr, lexer.TokenVal(attr),
|
236 | expr_context_e.Store)
|
237 |
|
238 | raise AssertionError(typ0)
|
239 |
|
240 | def _DictPair(self, p_node):
|
241 | # type: (PNode) -> Tuple[expr_t, expr_t]
|
242 | """
|
243 | dict_pair: ( Expr_Name [':' test]
|
244 | | '[' testlist ']' ':' test )
|
245 | | sq_string ':' test
|
246 | | dq_string ':' test )
|
247 | """
|
248 | assert p_node.typ == grammar_nt.dict_pair
|
249 |
|
250 | typ = p_node.GetChild(0).typ
|
251 |
|
252 | if typ in (grammar_nt.sq_string, grammar_nt.dq_string):
|
253 | key = self.Expr(p_node.GetChild(0)) # type: expr_t
|
254 | val = self.Expr(p_node.GetChild(2))
|
255 | return key, val
|
256 |
|
257 | tok0 = p_node.GetChild(0).tok
|
258 | id_ = tok0.id
|
259 |
|
260 | if id_ == Id.Expr_Name:
|
261 | key_str = value.Str(lexer.TokenVal(tok0))
|
262 | key = expr.Const(tok0, key_str)
|
263 | if p_node.NumChildren() >= 3:
|
264 | val = self.Expr(p_node.GetChild(2))
|
265 | else:
|
266 | val = expr.Implicit
|
267 |
|
268 | if id_ == Id.Op_LBracket: # {[x+y]: 'val'}
|
269 | key = self.Expr(p_node.GetChild(1))
|
270 | val = self.Expr(p_node.GetChild(4))
|
271 | return key, val
|
272 |
|
273 | return key, val
|
274 |
|
275 | def _Dict(self, parent, p_node):
|
276 | # type: (PNode, PNode) -> expr.Dict
|
277 | """
|
278 | dict: dict_pair (comma_newline dict_pair)* [comma_newline]
|
279 | """
|
280 | if p_node.typ == Id.Op_RBrace: # {}
|
281 | return expr.Dict(parent.tok, [], [])
|
282 |
|
283 | assert p_node.typ == grammar_nt.dict
|
284 |
|
285 | keys = [] # type: List[expr_t]
|
286 | values = [] # type: List[expr_t]
|
287 |
|
288 | n = p_node.NumChildren()
|
289 | for i in xrange(0, n, 2):
|
290 | key, val = self._DictPair(p_node.GetChild(i))
|
291 | keys.append(key)
|
292 | values.append(val)
|
293 |
|
294 | return expr.Dict(parent.tok, keys, values)
|
295 |
|
296 | def _Tuple(self, parent):
|
297 | # type: (PNode) -> expr_t
|
298 |
|
299 | n = parent.NumChildren()
|
300 |
|
301 | # (x) -- not a tuple
|
302 | if n == 1:
|
303 | return self.Expr(parent.GetChild(0))
|
304 |
|
305 | # x, and (x,) aren't allowed
|
306 | if n == 2:
|
307 | p_die('Invalid trailing comma', parent.GetChild(1).tok)
|
308 |
|
309 | elts = [] # type: List[expr_t]
|
310 | for i in xrange(0, n, 2): # skip commas
|
311 | p_node = parent.GetChild(i)
|
312 | elts.append(self.Expr(p_node))
|
313 |
|
314 | return expr.Tuple(parent.tok, elts,
|
315 | expr_context_e.Store) # unused expr_context_e
|
316 |
|
317 | def _TestlistComp(self, parent, p_node, id0):
|
318 | # type: (PNode, PNode, Id_t) -> expr_t
|
319 | """
|
320 | testlist_comp:
|
321 | (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
|
322 | """
|
323 | assert p_node.typ == grammar_nt.testlist_comp
|
324 |
|
325 | n = p_node.NumChildren()
|
326 | if n > 1 and p_node.GetChild(1).typ == grammar_nt.comp_for:
|
327 | child0 = p_node.GetChild(0)
|
328 | if child0.typ == grammar_nt.splat_expr:
|
329 | p_die('Splat not currently supported', child0.tok)
|
330 | elt = self.Expr(child0)
|
331 |
|
332 | comp = self._CompFor(p_node.GetChild(1))
|
333 | if id0 == Id.Op_LParen: # (x+1 for x in y)
|
334 | return expr.GeneratorExp(elt, [comp])
|
335 | if id0 == Id.Op_LBracket: # [x+1 for x in y]
|
336 | return expr.ListComp(parent.tok, elt, [comp])
|
337 | raise AssertionError()
|
338 |
|
339 | if id0 == Id.Op_LParen:
|
340 | # Parenthesized expression like (x+1) or (x)
|
341 | if n == 1:
|
342 | return self.Expr(p_node.GetChild(0))
|
343 |
|
344 | # Tuples (1,) (1, 2) etc. - TODO: should be a list literal?
|
345 | if p_node.GetChild(1).typ == Id.Arith_Comma:
|
346 | return self._Tuple(p_node)
|
347 |
|
348 | raise AssertionError()
|
349 |
|
350 | if id0 == Id.Op_LBracket: # List [1,2,3]
|
351 | elts = [] # type: List[expr_t]
|
352 | for i in xrange(0, n, 2): # skip commas
|
353 | child = p_node.GetChild(i)
|
354 | if child.typ == grammar_nt.splat_expr:
|
355 | p_die('Splat not currently supported', child.tok)
|
356 | elts.append(self.Expr(child))
|
357 |
|
358 | return expr.List(parent.tok, elts,
|
359 | expr_context_e.Store) # unused expr_context_e
|
360 |
|
361 | raise AssertionError(Id_str(id0))
|
362 |
|
363 | def _Atom(self, parent):
|
364 | # type: (PNode) -> expr_t
|
365 | """Handle alternatives of 'atom' where there's more than one child."""
|
366 |
|
367 | tok = parent.GetChild(0).tok
|
368 | id_ = tok.id
|
369 | n = parent.NumChildren()
|
370 |
|
371 | if id_ == Id.Op_LParen:
|
372 | # atom: '(' [yield_expr|testlist_comp] ')' | ...
|
373 | if n == 2: # () is a tuple
|
374 | assert (
|
375 | parent.GetChild(1).typ == Id.Op_RParen), parent.GetChild(1)
|
376 | return expr.Tuple(tok, [], expr_context_e.Store)
|
377 |
|
378 | return self._TestlistComp(parent, parent.GetChild(1), id_)
|
379 |
|
380 | if id_ == Id.Op_LBracket:
|
381 | # atom: ... | '[' [testlist_comp] ']' | ...
|
382 |
|
383 | if n == 2: # []
|
384 | assert (parent.GetChild(1).typ == Id.Op_RBracket
|
385 | ), parent.GetChild(1)
|
386 | return expr.List(tok, [],
|
387 | expr_context_e.Store) # unused expr_context_e
|
388 |
|
389 | return self._TestlistComp(parent, parent.GetChild(1), id_)
|
390 |
|
391 | if id_ == Id.Left_CaretBracket: # ^[42 + x]
|
392 | child = self.Expr(parent.GetChild(1))
|
393 | return expr.Literal(child)
|
394 |
|
395 | if id_ == Id.Op_LBrace:
|
396 | # atom: ... | '{' [Op_Newline] [dict] '}'
|
397 | i = 1
|
398 | if parent.GetChild(i).typ == Id.Op_Newline:
|
399 | i += 1
|
400 | return self._Dict(parent, parent.GetChild(i))
|
401 |
|
402 | if id_ == Id.Arith_Amp:
|
403 | n = parent.NumChildren()
|
404 | if n >= 3:
|
405 | p_die("Places in containers not implemented yet",
|
406 | parent.GetChild(2).tok)
|
407 |
|
408 | name_tok = parent.GetChild(1).tok
|
409 | return expr.Place(name_tok, lexer.TokenVal(name_tok), [])
|
410 |
|
411 | if id_ == Id.Expr_Func:
|
412 | # STUB. This should really be a Func, not Lambda.
|
413 | return expr.Lambda([], expr.Implicit)
|
414 |
|
415 | # 100 M
|
416 | # Ignoring the suffix for now
|
417 | if id_ == Id.Expr_DecInt:
|
418 | assert n > 1
|
419 | p_die("Units suffix not implemented", parent.GetChild(1).tok)
|
420 | #return self.Expr(parent.GetChild(0))
|
421 |
|
422 | # 100.5 M
|
423 | # Ignoring the suffix for now
|
424 | if id_ == Id.Expr_Float:
|
425 | assert n > 1
|
426 | p_die("unix suffix implemented", parent.GetChild(1).tok)
|
427 | #return self.Expr(parent.GetChild(0))
|
428 |
|
429 | raise AssertionError(Id_str(id_))
|
430 |
|
431 | def _NameType(self, p_node):
|
432 | # type: (PNode) -> NameType
|
433 | """ name_type: Expr_Name [':'] [type_expr] """
|
434 | name_tok = p_node.GetChild(0).tok
|
435 | typ = None # type: Optional[TypeExpr]
|
436 |
|
437 | n = p_node.NumChildren()
|
438 | if n == 2:
|
439 | typ = self._TypeExpr(p_node.GetChild(1))
|
440 | if n == 3:
|
441 | typ = self._TypeExpr(p_node.GetChild(2))
|
442 |
|
443 | return NameType(name_tok, lexer.TokenVal(name_tok), typ)
|
444 |
|
445 | def _NameTypeList(self, p_node):
|
446 | # type: (PNode) -> List[NameType]
|
447 | """ name_type_list: name_type (',' name_type)* """
|
448 | assert p_node.typ == grammar_nt.name_type_list
|
449 | results = [] # type: List[NameType]
|
450 |
|
451 | n = p_node.NumChildren()
|
452 | for i in xrange(0, n, 2): # was children[::2]
|
453 | results.append(self._NameType(p_node.GetChild(i)))
|
454 | return results
|
455 |
|
456 | def _CompFor(self, p_node):
|
457 | # type: (PNode) -> Comprehension
|
458 | """comp_for: 'for' exprlist 'in' or_test ['if' or_test]"""
|
459 | lhs = self._NameTypeList(p_node.GetChild(1))
|
460 | iterable = self.Expr(p_node.GetChild(3))
|
461 |
|
462 | if p_node.NumChildren() >= 6:
|
463 | cond = self.Expr(p_node.GetChild(5))
|
464 | else:
|
465 | cond = None
|
466 |
|
467 | return Comprehension(lhs, iterable, cond)
|
468 |
|
469 | def _CompareChain(self, parent):
|
470 | # type: (PNode) -> expr_t
|
471 | """comparison: expr (comp_op expr)*"""
|
472 | cmp_ops = [] # type: List[Token]
|
473 | comparators = [] # type: List[expr_t]
|
474 | left = self.Expr(parent.GetChild(0))
|
475 |
|
476 | i = 1
|
477 | n = parent.NumChildren()
|
478 | while i < n:
|
479 | p = parent.GetChild(i)
|
480 | op = p.GetChild(0).tok
|
481 | if p.NumChildren() == 2:
|
482 | # Blame the first token, and change its type
|
483 | if op.id == Id.Expr_Not: # not in
|
484 | op.id = Id.Node_NotIn
|
485 | elif op.id == Id.Expr_Is: # is not
|
486 | op.id = Id.Node_IsNot
|
487 | else:
|
488 | raise AssertionError()
|
489 | else:
|
490 | # is, <, ==, etc.
|
491 | pass
|
492 |
|
493 | cmp_ops.append(op)
|
494 | i += 1
|
495 | comparators.append(self.Expr(parent.GetChild(i)))
|
496 | i += 1
|
497 | return expr.Compare(left, cmp_ops, comparators)
|
498 |
|
499 | def _Subscript(self, parent):
|
500 | # type: (PNode) -> expr_t
|
501 | """subscript: expr | [expr] ':' [expr]"""
|
502 | typ0 = parent.GetChild(0).typ
|
503 |
|
504 | n = parent.NumChildren()
|
505 |
|
506 | if typ0 == grammar_nt.expr:
|
507 | if n == 3: # a[1:2]
|
508 | lower = self.Expr(parent.GetChild(0))
|
509 | op_tok = parent.GetChild(1).tok
|
510 | upper = self.Expr(parent.GetChild(2))
|
511 |
|
512 | elif n == 2: # a[1:]
|
513 | lower = self.Expr(parent.GetChild(0))
|
514 | op_tok = parent.GetChild(1).tok
|
515 | upper = None
|
516 | else: # a[1]
|
517 | return self.Expr(parent.GetChild(0))
|
518 | else:
|
519 | assert typ0 == Id.Arith_Colon
|
520 | lower = None
|
521 | if n == 1: # a[:]
|
522 | op_tok = parent.GetChild(0).tok
|
523 | upper = None
|
524 | else: # a[:3]
|
525 | op_tok = parent.GetChild(0).tok
|
526 | upper = self.Expr(parent.GetChild(1))
|
527 |
|
528 | return expr.Slice(lower, op_tok, upper)
|
529 |
|
530 | def Expr(self, pnode):
|
531 | # type: (PNode) -> expr_t
|
532 | """Transform expressions (as opposed to statements)"""
|
533 | typ = pnode.typ
|
534 |
|
535 | #
|
536 | # YSH Entry Points / Additions
|
537 | #
|
538 |
|
539 | if typ == grammar_nt.ysh_expr: # for if/while
|
540 | # ysh_expr: '(' testlist ')'
|
541 | return self.Expr(pnode.GetChild(1))
|
542 |
|
543 | if typ == grammar_nt.command_expr:
|
544 | # return_expr: testlist end_stmt
|
545 | return self.Expr(pnode.GetChild(0))
|
546 |
|
547 | #
|
548 | # Python-like Expressions / Operators
|
549 | #
|
550 |
|
551 | if typ == grammar_nt.atom:
|
552 | if pnode.NumChildren() == 1:
|
553 | return self.Expr(pnode.GetChild(0))
|
554 | return self._Atom(pnode)
|
555 |
|
556 | if typ == grammar_nt.testlist:
|
557 | # testlist: test (',' test)* [',']
|
558 | return self._Tuple(pnode)
|
559 |
|
560 | if typ == grammar_nt.test:
|
561 | # test: or_test ['if' or_test 'else' test] | lambdef
|
562 | if pnode.NumChildren() == 1:
|
563 | return self.Expr(pnode.GetChild(0))
|
564 |
|
565 | # TODO: Handle lambdef
|
566 |
|
567 | test = self.Expr(pnode.GetChild(2))
|
568 | body = self.Expr(pnode.GetChild(0))
|
569 | orelse = self.Expr(pnode.GetChild(4))
|
570 | return expr.IfExp(test, body, orelse)
|
571 |
|
572 | if typ == grammar_nt.lambdef:
|
573 | # lambdef: '|' [name_type_list] '|' test
|
574 |
|
575 | n = pnode.NumChildren()
|
576 | if n == 4:
|
577 | params = self._NameTypeList(pnode.GetChild(1))
|
578 | else:
|
579 | params = []
|
580 |
|
581 | body = self.Expr(pnode.GetChild(n - 1))
|
582 | return expr.Lambda(params, body)
|
583 |
|
584 | #
|
585 | # Operators with Precedence
|
586 | #
|
587 |
|
588 | if typ == grammar_nt.or_test:
|
589 | # or_test: and_test ('or' and_test)*
|
590 | return self._LeftAssoc(pnode)
|
591 |
|
592 | if typ == grammar_nt.and_test:
|
593 | # and_test: not_test ('and' not_test)*
|
594 | return self._LeftAssoc(pnode)
|
595 |
|
596 | if typ == grammar_nt.not_test:
|
597 | # not_test: 'not' not_test | comparison
|
598 | if pnode.NumChildren() == 1:
|
599 | return self.Expr(pnode.GetChild(0))
|
600 |
|
601 | op_tok = pnode.GetChild(0).tok # not
|
602 | return expr.Unary(op_tok, self.Expr(pnode.GetChild(1)))
|
603 |
|
604 | elif typ == grammar_nt.comparison:
|
605 | if pnode.NumChildren() == 1:
|
606 | return self.Expr(pnode.GetChild(0))
|
607 |
|
608 | return self._CompareChain(pnode)
|
609 |
|
610 | elif typ == grammar_nt.range_expr:
|
611 | n = pnode.NumChildren()
|
612 | if n == 1:
|
613 | return self.Expr(pnode.GetChild(0))
|
614 |
|
615 | if n == 3:
|
616 | return expr.Range(self.Expr(pnode.GetChild(0)),
|
617 | pnode.GetChild(1).tok,
|
618 | self.Expr(pnode.GetChild(2)))
|
619 |
|
620 | raise AssertionError(n)
|
621 |
|
622 | elif typ == grammar_nt.expr:
|
623 | # expr: xor_expr ('|' xor_expr)*
|
624 | return self._LeftAssoc(pnode)
|
625 |
|
626 | if typ == grammar_nt.xor_expr:
|
627 | # xor_expr: and_expr ('xor' and_expr)*
|
628 | return self._LeftAssoc(pnode)
|
629 |
|
630 | if typ == grammar_nt.and_expr: # a & b
|
631 | # and_expr: shift_expr ('&' shift_expr)*
|
632 | return self._LeftAssoc(pnode)
|
633 |
|
634 | elif typ == grammar_nt.shift_expr:
|
635 | # shift_expr: arith_expr (('<<'|'>>') arith_expr)*
|
636 | return self._LeftAssoc(pnode)
|
637 |
|
638 | elif typ == grammar_nt.arith_expr:
|
639 | # arith_expr: term (('+'|'-') term)*
|
640 | return self._LeftAssoc(pnode)
|
641 |
|
642 | elif typ == grammar_nt.term:
|
643 | # term: factor (('*'|'/'|'div'|'mod') factor)*
|
644 | return self._LeftAssoc(pnode)
|
645 |
|
646 | elif typ == grammar_nt.factor:
|
647 | # factor: ('+'|'-'|'~') factor | power
|
648 | # the power would have already been reduced
|
649 | if pnode.NumChildren() == 1:
|
650 | return self.Expr(pnode.GetChild(0))
|
651 |
|
652 | assert pnode.NumChildren() == 2
|
653 | op = pnode.GetChild(0)
|
654 | e = pnode.GetChild(1)
|
655 |
|
656 | assert isinstance(op.tok, Token)
|
657 | return expr.Unary(op.tok, self.Expr(e))
|
658 |
|
659 | elif typ == grammar_nt.power:
|
660 | # power: atom trailer* ['**' factor]
|
661 |
|
662 | node = self.Expr(pnode.GetChild(0))
|
663 | if pnode.NumChildren() == 1: # No trailers
|
664 | return node
|
665 |
|
666 | # Support a->startswith(b) and mydict.key
|
667 | n = pnode.NumChildren()
|
668 | i = 1
|
669 | while i < n and pnode.GetChild(i).typ == grammar_nt.trailer:
|
670 | node = self._Trailer(node, pnode.GetChild(i))
|
671 | i += 1
|
672 |
|
673 | if i != n: # ['**' factor]
|
674 | op_tok = pnode.GetChild(i).tok
|
675 | assert op_tok.id == Id.Arith_DStar, op_tok
|
676 | factor = self.Expr(pnode.GetChild(i + 1))
|
677 | node = expr.Binary(op_tok, node, factor)
|
678 |
|
679 | return node
|
680 |
|
681 | elif typ == grammar_nt.eggex:
|
682 | return self._Eggex(pnode)
|
683 |
|
684 | elif typ == grammar_nt.ysh_expr_sub:
|
685 | return self.Expr(pnode.GetChild(0))
|
686 |
|
687 | #
|
688 | # YSH Lexer Modes
|
689 | #
|
690 |
|
691 | elif typ == grammar_nt.sh_array_literal:
|
692 | return cast(ShArrayLiteral, pnode.GetChild(1).tok)
|
693 |
|
694 | elif typ == grammar_nt.old_sh_array_literal:
|
695 | return cast(ShArrayLiteral, pnode.GetChild(1).tok)
|
696 |
|
697 | elif typ == grammar_nt.sh_command_sub:
|
698 | return cast(CommandSub, pnode.GetChild(1).tok)
|
699 |
|
700 | elif typ == grammar_nt.braced_var_sub:
|
701 | return cast(BracedVarSub, pnode.GetChild(1).tok)
|
702 |
|
703 | elif typ == grammar_nt.dq_string:
|
704 | dq = cast(DoubleQuoted, pnode.GetChild(1).tok)
|
705 | # sugar: ^"..." is short for ^["..."]
|
706 | if pnode.GetChild(0).typ == Id.Left_CaretDoubleQuote:
|
707 | return expr.Literal(dq)
|
708 | return dq
|
709 |
|
710 | elif typ == grammar_nt.sq_string:
|
711 | return cast(SingleQuoted, pnode.GetChild(1).tok)
|
712 |
|
713 | elif typ == grammar_nt.simple_var_sub:
|
714 | tok = pnode.GetChild(0).tok
|
715 |
|
716 | if tok.id == Id.VSub_DollarName: # $foo is disallowed
|
717 | bare = lexer.TokenSliceLeft(tok, 1)
|
718 | p_die(
|
719 | 'In expressions, remove $ and use `%s`, or sometimes "$%s"'
|
720 | % (bare, bare), tok)
|
721 |
|
722 | # $? is allowed
|
723 | return SimpleVarSub(tok)
|
724 |
|
725 | #
|
726 | # Terminals
|
727 | #
|
728 |
|
729 | tok = pnode.tok
|
730 | if typ == Id.Expr_Name:
|
731 | return expr.Var(tok, lexer.TokenVal(tok))
|
732 |
|
733 | # Everything else is an expr.Const
|
734 | tok_str = lexer.TokenVal(tok)
|
735 | # Remove underscores from 1_000_000. The lexer is responsible for
|
736 | # validation.
|
737 | c_under = tok_str.replace('_', '')
|
738 |
|
739 | if typ == Id.Expr_DecInt:
|
740 | ok, big_int = mops.FromStr2(c_under)
|
741 | if not ok:
|
742 | p_die('Decimal int constant is too large', tok)
|
743 | cval = value.Int(big_int) # type: value_t
|
744 |
|
745 | elif typ == Id.Expr_BinInt:
|
746 | assert c_under[:2] in ('0b', '0B'), c_under
|
747 | ok, big_int = mops.FromStr2(c_under[2:], 2)
|
748 | if not ok:
|
749 | p_die('Binary int constant is too large', tok)
|
750 | cval = value.Int(big_int)
|
751 |
|
752 | elif typ == Id.Expr_OctInt:
|
753 | assert c_under[:2] in ('0o', '0O'), c_under
|
754 | ok, big_int = mops.FromStr2(c_under[2:], 8)
|
755 | if not ok:
|
756 | p_die('Octal int constant is too large', tok)
|
757 | cval = value.Int(big_int)
|
758 |
|
759 | elif typ == Id.Expr_HexInt:
|
760 | assert c_under[:2] in ('0x', '0X'), c_under
|
761 | ok, big_int = mops.FromStr2(c_under[2:], 16)
|
762 | if not ok:
|
763 | p_die('Hex int constant is too large', tok)
|
764 | cval = value.Int(big_int)
|
765 |
|
766 | elif typ == Id.Expr_Float:
|
767 | # Note: float() in mycpp/gc_builtins.cc currently uses strtod
|
768 | # I think this never raises ValueError, because the lexer
|
769 | # should only accept strings that strtod() does?
|
770 | cval = value.Float(float(c_under))
|
771 |
|
772 | elif typ == Id.Expr_Null:
|
773 | cval = value.Null
|
774 |
|
775 | elif typ == Id.Expr_True:
|
776 | cval = value.Bool(True)
|
777 |
|
778 | elif typ == Id.Expr_False:
|
779 | cval = value.Bool(False)
|
780 |
|
781 | elif typ == Id.Char_OneChar: # \n
|
782 | assert len(tok_str) == 2, tok_str
|
783 | s = consts.LookupCharC(lexer.TokenSliceLeft(tok, 1))
|
784 | cval = value.Str(s)
|
785 |
|
786 | elif typ == Id.Char_YHex: # \yff
|
787 | assert len(tok_str) == 4, tok_str
|
788 | hex_str = lexer.TokenSliceLeft(tok, 2)
|
789 | s = chr(int(hex_str, 16))
|
790 | cval = value.Str(s)
|
791 |
|
792 | elif typ == Id.Char_UBraced: # \u{123}
|
793 | hex_str = lexer.TokenSlice(tok, 3, -1)
|
794 | code_point = int(hex_str, 16)
|
795 | s = j8.Utf8Encode(code_point)
|
796 | cval = value.Str(s)
|
797 |
|
798 | else:
|
799 | raise AssertionError(typ)
|
800 |
|
801 | return expr.Const(tok, cval)
|
802 |
|
803 | def _CheckLhs(self, lhs):
|
804 | # type: (expr_t) -> None
|
805 |
|
806 | UP_lhs = lhs
|
807 | with tagswitch(lhs) as case:
|
808 | if case(expr_e.Var):
|
809 | # OK - e.g. setvar a.b.c[i] = 42
|
810 | pass
|
811 |
|
812 | elif case(expr_e.Subscript):
|
813 | lhs = cast(Subscript, UP_lhs)
|
814 | self._CheckLhs(lhs.obj) # recurse on LHS
|
815 |
|
816 | elif case(expr_e.Attribute):
|
817 | lhs = cast(Attribute, UP_lhs)
|
818 | self._CheckLhs(lhs.obj) # recurse on LHS
|
819 |
|
820 | else:
|
821 | # Illegal - e.g. setglobal {}["key"] = 42
|
822 | p_die("Subscript/Attribute not allowed on this LHS expression",
|
823 | location.TokenForExpr(lhs))
|
824 |
|
825 | def _LhsExprList(self, p_node):
|
826 | # type: (PNode) -> List[y_lhs_t]
|
827 | """lhs_list: expr (',' expr)*"""
|
828 | assert p_node.typ == grammar_nt.lhs_list
|
829 |
|
830 | lhs_list = [] # type: List[y_lhs_t]
|
831 | n = p_node.NumChildren()
|
832 | for i in xrange(0, n, 2):
|
833 | p = p_node.GetChild(i)
|
834 | #self.p_printer.Print(p)
|
835 |
|
836 | e = self.Expr(p)
|
837 | UP_e = e
|
838 | with tagswitch(e) as case:
|
839 | if case(expr_e.Var):
|
840 | e = cast(expr.Var, UP_e)
|
841 | lhs_list.append(e.left)
|
842 |
|
843 | elif case(expr_e.Subscript):
|
844 | e = cast(Subscript, UP_e)
|
845 | self._CheckLhs(e)
|
846 | lhs_list.append(e)
|
847 |
|
848 | elif case(expr_e.Attribute):
|
849 | e = cast(Attribute, UP_e)
|
850 | self._CheckLhs(e)
|
851 | if e.op.id != Id.Expr_Dot:
|
852 | # e.g. setvar obj->method is not valid
|
853 | p_die("Can't assign to this attribute expr", e.op)
|
854 | lhs_list.append(e)
|
855 |
|
856 | else:
|
857 | pass # work around mycpp bug
|
858 |
|
859 | # TODO: could blame arbitary expr_t, bu this works most of
|
860 | # the time
|
861 | if p.tok:
|
862 | blame = p.tok # type: loc_t
|
863 | else:
|
864 | blame = loc.Missing
|
865 | p_die("Can't assign to this expression", blame)
|
866 |
|
867 | return lhs_list
|
868 |
|
869 | def MakeVarDecl(self, p_node):
|
870 | # type: (PNode) -> command.VarDecl
|
871 | """
|
872 | ysh_var_decl: name_type_list ['=' testlist] end_stmt
|
873 | """
|
874 | assert p_node.typ == grammar_nt.ysh_var_decl
|
875 |
|
876 | lhs = self._NameTypeList(p_node.GetChild(0)) # could be a tuple
|
877 |
|
878 | # This syntax is confusing, and different than JavaScript
|
879 | # var x, y = 1, 2
|
880 | # But this is useful:
|
881 | # var flag, i = parseArgs(spec, argv)
|
882 |
|
883 | n = p_node.NumChildren()
|
884 | if n >= 3:
|
885 | rhs = self.Expr(p_node.GetChild(2))
|
886 | else:
|
887 | rhs = None
|
888 |
|
889 | # The caller should fill in the keyword token.
|
890 | return command.VarDecl(None, lhs, rhs)
|
891 |
|
892 | def MakeMutation(self, p_node):
|
893 | # type: (PNode) -> command.Mutation
|
894 | """
|
895 | ysh_mutation: lhs_list (augassign | '=') testlist end_stmt
|
896 | """
|
897 | assert p_node.typ == grammar_nt.ysh_mutation
|
898 |
|
899 | lhs_list = self._LhsExprList(p_node.GetChild(0)) # could be a tuple
|
900 | op_tok = p_node.GetChild(1).tok
|
901 | if len(lhs_list) > 1 and op_tok.id != Id.Arith_Equal:
|
902 | p_die('Multiple assignment must use =', op_tok)
|
903 | rhs = self.Expr(p_node.GetChild(2))
|
904 | return command.Mutation(None, lhs_list, op_tok, rhs)
|
905 |
|
906 | def _EggexFlag(self, p_node):
|
907 | # type: (PNode) -> EggexFlag
|
908 | n = p_node.NumChildren()
|
909 | if n == 1:
|
910 | return EggexFlag(False, p_node.GetChild(0).tok)
|
911 | elif n == 2:
|
912 | return EggexFlag(True, p_node.GetChild(1).tok)
|
913 | else:
|
914 | raise AssertionError()
|
915 |
|
916 | def _Eggex(self, p_node):
|
917 | # type: (PNode) -> Eggex
|
918 | """
|
919 | eggex: '/' regex [';' re_flag* [';' Expr_Name] ] '/'
|
920 | """
|
921 | left = p_node.GetChild(0).tok
|
922 | regex = self._Regex(p_node.GetChild(1))
|
923 |
|
924 | flags = [] # type: List[EggexFlag]
|
925 | trans_pref = None # type: Optional[Token]
|
926 |
|
927 | i = 2
|
928 | current = p_node.GetChild(i)
|
929 | if current.typ == Id.Op_Semi:
|
930 | i += 1
|
931 | while True:
|
932 | current = p_node.GetChild(i)
|
933 | if current.typ != grammar_nt.re_flag:
|
934 | break
|
935 | flags.append(self._EggexFlag(current))
|
936 | i += 1
|
937 |
|
938 | if current.typ == Id.Op_Semi:
|
939 | i += 1
|
940 | trans_pref = p_node.GetChild(i).tok
|
941 |
|
942 | # Canonicalize and validate flags for ERE only. Default is ERE.
|
943 | if trans_pref is None or lexer.TokenVal(trans_pref) == 'ERE':
|
944 | canonical_flags = regex_translate.CanonicalFlags(flags)
|
945 | else:
|
946 | canonical_flags = None
|
947 |
|
948 | return Eggex(left, regex, flags, trans_pref, canonical_flags)
|
949 |
|
950 | def YshCasePattern(self, pnode):
|
951 | # type: (PNode) -> pat_t
|
952 | assert pnode.typ == grammar_nt.ysh_case_pat, pnode
|
953 |
|
954 | pattern = pnode.GetChild(0)
|
955 | typ = pattern.typ
|
956 | if typ == Id.Op_LParen:
|
957 | # pat_expr or pat_else
|
958 | pattern = pnode.GetChild(1)
|
959 | typ = pattern.typ
|
960 |
|
961 | if typ == grammar_nt.pat_else:
|
962 | return pat.Else
|
963 |
|
964 | if typ == grammar_nt.pat_exprs:
|
965 | exprs = [] # type: List[expr_t]
|
966 | for i in xrange(pattern.NumChildren()):
|
967 | child = pattern.GetChild(i)
|
968 | if child.typ == grammar_nt.expr:
|
969 | expr = self.Expr(child)
|
970 | exprs.append(expr)
|
971 | return pat.YshExprs(exprs)
|
972 |
|
973 | if typ == grammar_nt.eggex:
|
974 | return self._Eggex(pattern)
|
975 |
|
976 | raise AssertionError()
|
977 |
|
978 | def _BlockArg(self, p_node):
|
979 | # type: (PNode) -> expr_t
|
980 |
|
981 | n = p_node.NumChildren()
|
982 | if n == 1:
|
983 | child = p_node.GetChild(0)
|
984 | return self.Expr(child)
|
985 |
|
986 | # It can only be an expression, not a=42, or ...expr
|
987 | p_die('Invalid block expression argument', p_node.tok)
|
988 |
|
989 | def _Argument(self, p_node, after_semi, arglist):
|
990 | # type: (PNode, bool, ArgList) -> None
|
991 | """
|
992 | argument: (
|
993 | test [comp_for]
|
994 | | test '=' test # named arg
|
995 | | '...' test # var args
|
996 | )
|
997 | """
|
998 | pos_args = arglist.pos_args
|
999 | named_args = arglist.named_args
|
1000 |
|
1001 | assert p_node.typ == grammar_nt.argument, p_node
|
1002 | n = p_node.NumChildren()
|
1003 | if n == 1:
|
1004 | child = p_node.GetChild(0)
|
1005 | if after_semi:
|
1006 | p_die(POS_ARG_MISPLACED, child.tok)
|
1007 | arg = self.Expr(child)
|
1008 | pos_args.append(arg)
|
1009 | return
|
1010 |
|
1011 | if n == 2:
|
1012 | # Note: We allow multiple spreads, just like Julia. They are
|
1013 | # concatenated as in lists and dicts.
|
1014 | tok0 = p_node.GetChild(0).tok
|
1015 | if tok0.id == Id.Expr_Ellipsis:
|
1016 | spread_expr = expr.Spread(tok0, self.Expr(p_node.GetChild(1)))
|
1017 | if after_semi: # f(; ... named)
|
1018 | named_args.append(NamedArg(None, spread_expr))
|
1019 | else: # f(...named)
|
1020 | pos_args.append(spread_expr)
|
1021 | return
|
1022 |
|
1023 | # Note: generator expression not implemented
|
1024 | if p_node.GetChild(1).typ == grammar_nt.comp_for:
|
1025 | child = p_node.GetChild(0)
|
1026 | if after_semi:
|
1027 | p_die(POS_ARG_MISPLACED, child.tok)
|
1028 |
|
1029 | elt = self.Expr(child)
|
1030 | comp = self._CompFor(p_node.GetChild(1))
|
1031 | arg = expr.GeneratorExp(elt, [comp])
|
1032 | pos_args.append(arg)
|
1033 | return
|
1034 |
|
1035 | raise AssertionError()
|
1036 |
|
1037 | if n == 3: # named args can come before or after the semicolon
|
1038 | n1 = NamedArg(
|
1039 | p_node.GetChild(0).tok, self.Expr(p_node.GetChild(2)))
|
1040 | named_args.append(n1)
|
1041 | return
|
1042 |
|
1043 | raise AssertionError()
|
1044 |
|
1045 | def _ArgGroup(self, p_node, after_semi, arglist):
|
1046 | # type: (PNode, bool, ArgList) -> None
|
1047 | """
|
1048 | arg_group: argument (',' argument)* [',']
|
1049 | """
|
1050 | for i in xrange(p_node.NumChildren()):
|
1051 | p_child = p_node.GetChild(i)
|
1052 | if p_child.typ == grammar_nt.argument:
|
1053 | self._Argument(p_child, after_semi, arglist)
|
1054 |
|
1055 | def _ArgList(self, p_node, arglist):
|
1056 | # type: (PNode, ArgList) -> None
|
1057 | """For both funcs and procs
|
1058 |
|
1059 | arglist: (
|
1060 | [arg_group]
|
1061 | [';' [arg_group]]
|
1062 | )
|
1063 |
|
1064 | arglist3: ...
|
1065 | """
|
1066 | n = p_node.NumChildren()
|
1067 | if n == 0:
|
1068 | return
|
1069 |
|
1070 | i = 0
|
1071 |
|
1072 | if i >= n:
|
1073 | return
|
1074 | child = p_node.GetChild(i)
|
1075 | if child.typ == grammar_nt.arg_group:
|
1076 | self._ArgGroup(child, False, arglist)
|
1077 | i += 1
|
1078 |
|
1079 | if i >= n:
|
1080 | return
|
1081 | child = p_node.GetChild(i)
|
1082 | if child.typ == Id.Op_Semi:
|
1083 | arglist.semi_tok = child.tok
|
1084 | i += 1
|
1085 |
|
1086 | # Named args after first semi-colon
|
1087 | if i >= n:
|
1088 | return
|
1089 | child = p_node.GetChild(i)
|
1090 | if child.typ == grammar_nt.arg_group:
|
1091 | self._ArgGroup(child, True, arglist)
|
1092 | i += 1
|
1093 |
|
1094 | #
|
1095 | # Special third group may have block expression - only for arglist3,
|
1096 | # used for procs!
|
1097 | #
|
1098 |
|
1099 | if i >= n:
|
1100 | return
|
1101 | assert p_node.typ == grammar_nt.arglist3, p_node
|
1102 |
|
1103 | child = p_node.GetChild(i)
|
1104 | if child.typ == Id.Op_Semi:
|
1105 | arglist.semi_tok2 = child.tok
|
1106 | i += 1
|
1107 |
|
1108 | if i >= n:
|
1109 | return
|
1110 | child = p_node.GetChild(i)
|
1111 | if child.typ == grammar_nt.argument:
|
1112 | arglist.block_expr = self._BlockArg(child)
|
1113 | i += 1
|
1114 |
|
1115 | def ProcCallArgs(self, pnode, arglist):
|
1116 | # type: (PNode, ArgList) -> None
|
1117 | """
|
1118 | ysh_eager_arglist: '(' [arglist3] ')'
|
1119 | ysh_lazy_arglist: '[' [arglist] ']'
|
1120 | """
|
1121 | n = pnode.NumChildren()
|
1122 | if n == 2: # f()
|
1123 | return
|
1124 |
|
1125 | if n == 3:
|
1126 | child1 = pnode.GetChild(1) # the X in '( X )'
|
1127 |
|
1128 | self._ArgList(child1, arglist)
|
1129 | return
|
1130 |
|
1131 | raise AssertionError()
|
1132 |
|
1133 | def _TypeExpr(self, pnode):
|
1134 | # type: (PNode) -> TypeExpr
|
1135 | """
|
1136 | type_expr: Expr_Name [ '[' type_expr (',' type_expr)* ']' ]
|
1137 | """
|
1138 | assert pnode.typ == grammar_nt.type_expr, pnode.typ
|
1139 |
|
1140 | ty = TypeExpr.CreateNull() # don't allocate children
|
1141 |
|
1142 | ty.tok = pnode.GetChild(0).tok
|
1143 | ty.name = lexer.TokenVal(ty.tok)
|
1144 |
|
1145 | n = pnode.NumChildren()
|
1146 | if n == 1:
|
1147 | return ty
|
1148 |
|
1149 | ty.params = []
|
1150 | i = 2
|
1151 | while i < n:
|
1152 | p = self._TypeExpr(pnode.GetChild(i))
|
1153 | ty.params.append(p)
|
1154 | i += 2 # skip comma
|
1155 |
|
1156 | return ty
|
1157 |
|
1158 | def _Param(self, pnode):
|
1159 | # type: (PNode) -> Param
|
1160 | """
|
1161 | param: Expr_Name [type_expr] ['=' expr]
|
1162 | """
|
1163 | assert pnode.typ == grammar_nt.param
|
1164 |
|
1165 | name_tok = pnode.GetChild(0).tok
|
1166 | n = pnode.NumChildren()
|
1167 |
|
1168 | assert name_tok.id == Id.Expr_Name, name_tok
|
1169 |
|
1170 | default_val = None # type: expr_t
|
1171 | type_ = None # type: TypeExpr
|
1172 |
|
1173 | if n == 1:
|
1174 | # proc p(a)
|
1175 | pass
|
1176 |
|
1177 | elif n == 2:
|
1178 | # proc p(a Int)
|
1179 | type_ = self._TypeExpr(pnode.GetChild(1))
|
1180 |
|
1181 | elif n == 3:
|
1182 | # proc p(a = 3)
|
1183 | default_val = self.Expr(pnode.GetChild(2))
|
1184 |
|
1185 | elif n == 4:
|
1186 | # proc p(a Int = 3)
|
1187 | type_ = self._TypeExpr(pnode.GetChild(1))
|
1188 | default_val = self.Expr(pnode.GetChild(3))
|
1189 |
|
1190 | return Param(name_tok, lexer.TokenVal(name_tok), type_, default_val)
|
1191 |
|
1192 | def _ParamGroup(self, p_node):
|
1193 | # type: (PNode) -> ParamGroup
|
1194 | """
|
1195 | param_group:
|
1196 | (param ',')*
|
1197 | [ (param | '...' Expr_Name) [,] ]
|
1198 | """
|
1199 | assert p_node.typ == grammar_nt.param_group, p_node
|
1200 |
|
1201 | params = [] # type: List[Param]
|
1202 | rest_of = None # type: Optional[RestParam]
|
1203 |
|
1204 | n = p_node.NumChildren()
|
1205 | i = 0
|
1206 | while i < n:
|
1207 | child = p_node.GetChild(i)
|
1208 | if child.typ == grammar_nt.param:
|
1209 | params.append(self._Param(child))
|
1210 |
|
1211 | elif child.typ == Id.Expr_Ellipsis:
|
1212 | tok = p_node.GetChild(i + 1).tok
|
1213 | rest_of = RestParam(tok, lexer.TokenVal(tok))
|
1214 |
|
1215 | i += 2
|
1216 |
|
1217 | return ParamGroup(params, rest_of)
|
1218 |
|
1219 | def Proc(self, p_node):
|
1220 | # type: (PNode) -> proc_sig_t
|
1221 | """
|
1222 | ysh_proc: (
|
1223 | [ '('
|
1224 | [ param_group ] # word params, with defaults
|
1225 | [ ';' [ param_group ] ] # positional typed params, with defaults
|
1226 | [ ';' [ param_group ] ] # named params, with defaults
|
1227 | [ ';' Expr_Name ] # optional block param, with no type or default
|
1228 | ')'
|
1229 | ]
|
1230 | '{' # opening { for pgen2
|
1231 | )
|
1232 | """
|
1233 | assert p_node.typ == grammar_nt.ysh_proc
|
1234 |
|
1235 | n = p_node.NumChildren()
|
1236 | if n == 1: # proc f {
|
1237 | return proc_sig.Open
|
1238 |
|
1239 | if n == 3: # proc f () {
|
1240 | sig = proc_sig.Closed.CreateNull(alloc_lists=True) # no params
|
1241 |
|
1242 | # proc f( three param groups, and block group )
|
1243 | sig = proc_sig.Closed.CreateNull(alloc_lists=True) # no params
|
1244 |
|
1245 | # Word args
|
1246 | i = 1
|
1247 | child = p_node.GetChild(i)
|
1248 | if child.typ == grammar_nt.param_group:
|
1249 | sig.word = self._ParamGroup(p_node.GetChild(i))
|
1250 |
|
1251 | # Validate word args
|
1252 | for word in sig.word.params:
|
1253 | if word.type:
|
1254 | if word.type.name not in ('Str', 'Ref'):
|
1255 | p_die('Word params may only have type Str or Ref',
|
1256 | word.type.tok)
|
1257 | if word.type.params is not None:
|
1258 | p_die('Unexpected type parameters', word.type.tok)
|
1259 |
|
1260 | i += 2
|
1261 | else:
|
1262 | i += 1
|
1263 |
|
1264 | #log('i %d n %d', i, n)
|
1265 | if i >= n:
|
1266 | return sig
|
1267 |
|
1268 | # Positional args
|
1269 | child = p_node.GetChild(i)
|
1270 | if child.typ == grammar_nt.param_group:
|
1271 | sig.positional = self._ParamGroup(p_node.GetChild(i))
|
1272 | i += 2
|
1273 | else:
|
1274 | i += 1
|
1275 |
|
1276 | #log('i %d n %d', i, n)
|
1277 | if i >= n:
|
1278 | return sig
|
1279 |
|
1280 | # Keyword args
|
1281 | child = p_node.GetChild(i)
|
1282 | if child.typ == grammar_nt.param_group:
|
1283 | sig.named = self._ParamGroup(p_node.GetChild(i))
|
1284 | i += 2
|
1285 | else:
|
1286 | i += 1
|
1287 |
|
1288 | #log('i %d n %d', i, n)
|
1289 | if i >= n:
|
1290 | return sig
|
1291 |
|
1292 | child = p_node.GetChild(i)
|
1293 | if child.typ == grammar_nt.param_group:
|
1294 | group = self._ParamGroup(p_node.GetChild(i))
|
1295 | params = group.params
|
1296 | if len(params) > 1:
|
1297 | p_die('Only 1 block param is allowed', params[1].blame_tok)
|
1298 | if group.rest_of:
|
1299 | p_die("Rest param isn't allowed for blocks",
|
1300 | group.rest_of.blame_tok)
|
1301 |
|
1302 | if len(params) == 1:
|
1303 | if params[0].type:
|
1304 | if params[0].type.name != 'Command':
|
1305 | p_die('Block param must have type Command',
|
1306 | params[0].type.tok)
|
1307 | if params[0].type.params is not None:
|
1308 | p_die('Unexpected type parameters', params[0].type.tok)
|
1309 |
|
1310 | sig.block_param = params[0]
|
1311 |
|
1312 | return sig
|
1313 |
|
1314 | def YshFunc(self, p_node, out):
|
1315 | # type: (PNode, Func) -> None
|
1316 | """
|
1317 | ysh_func: Expr_Name '(' [param_group] [';' param_group] ')'
|
1318 | """
|
1319 | assert p_node.typ == grammar_nt.ysh_func
|
1320 |
|
1321 | #self.p_printer.Print(p_node)
|
1322 |
|
1323 | out.name = p_node.GetChild(0).tok
|
1324 |
|
1325 | n = p_node.NumChildren()
|
1326 | i = 2 # after (
|
1327 |
|
1328 | child = p_node.GetChild(i)
|
1329 | if child.typ == grammar_nt.param_group:
|
1330 | out.positional = self._ParamGroup(child)
|
1331 | i += 2 # skip past ;
|
1332 | else:
|
1333 | i += 1
|
1334 |
|
1335 | if i >= n:
|
1336 | return
|
1337 |
|
1338 | child = p_node.GetChild(i)
|
1339 | if child.typ == grammar_nt.param_group:
|
1340 | out.named = self._ParamGroup(child)
|
1341 |
|
1342 | #
|
1343 | # Eggex Language
|
1344 | #
|
1345 |
|
1346 | def _RangeCharSingleQuoted(self, p_node):
|
1347 | # type: (PNode) -> Optional[CharCode]
|
1348 |
|
1349 | assert p_node.typ == grammar_nt.range_char, p_node
|
1350 |
|
1351 | # 'a' in 'a'-'b'
|
1352 |
|
1353 | child0 = p_node.GetChild(0)
|
1354 | if child0.typ == grammar_nt.sq_string:
|
1355 | sq_part = cast(SingleQuoted, child0.GetChild(1).tok)
|
1356 | n = len(sq_part.sval)
|
1357 | if n == 0:
|
1358 | p_die("Quoted range char can't be empty",
|
1359 | loc.WordPart(sq_part))
|
1360 | elif n == 1:
|
1361 | return CharCode(sq_part.left, ord(sq_part.sval[0]), False)
|
1362 | else:
|
1363 | p_die(RANGE_POINT_TOO_LONG, loc.WordPart(sq_part))
|
1364 | return None
|
1365 |
|
1366 | def _OtherRangeToken(self, p_node):
|
1367 | # type: (PNode) -> Token
|
1368 | """An endpoint of a range (single char)
|
1369 |
|
1370 | range_char: Expr_Name | Expr_DecInt | sq_string | char_literal
|
1371 | a-z 0-9 'a'-'z' \x00-\xff
|
1372 | """
|
1373 | assert p_node.typ == grammar_nt.range_char, p_node
|
1374 |
|
1375 | child0 = p_node.GetChild(0)
|
1376 | if child0.typ == grammar_nt.char_literal:
|
1377 | # \x00 in /[\x00 - \x20]/
|
1378 | tok = child0.GetChild(0).tok
|
1379 | return tok
|
1380 |
|
1381 | tok = p_node.tok
|
1382 | # a in a-z is Expr_Name
|
1383 | # 0 in 0-9 is Expr_DecInt
|
1384 | assert tok.id in (Id.Expr_Name, Id.Expr_DecInt), tok
|
1385 |
|
1386 | if tok.length != 1:
|
1387 | p_die(RANGE_POINT_TOO_LONG, tok)
|
1388 | return tok
|
1389 |
|
1390 | def _NonRangeChars(self, p_node):
|
1391 | # type: (PNode) -> class_literal_term_t
|
1392 | """
|
1393 | \" \u1234 '#'
|
1394 | """
|
1395 | assert p_node.typ == grammar_nt.range_char, p_node
|
1396 |
|
1397 | child0 = p_node.GetChild(0)
|
1398 | typ0 = p_node.GetChild(0).typ
|
1399 |
|
1400 | if typ0 == grammar_nt.sq_string:
|
1401 | return cast(SingleQuoted, child0.GetChild(1).tok)
|
1402 |
|
1403 | if typ0 == grammar_nt.char_literal:
|
1404 | return word_compile.EvalCharLiteralForRegex(child0.tok)
|
1405 |
|
1406 | if typ0 == Id.Expr_Name:
|
1407 | # Look up PerlClass and PosixClass
|
1408 | return self._NameInClass(None, child0.tok)
|
1409 |
|
1410 | raise AssertionError()
|
1411 |
|
1412 | def _ClassLiteralTerm(self, p_node):
|
1413 | # type: (PNode) -> class_literal_term_t
|
1414 | """
|
1415 | class_literal_term:
|
1416 | range_char ['-' range_char ]
|
1417 | | '@' Expr_Name # splice
|
1418 | | '!' Expr_Name # negate char class
|
1419 | ...
|
1420 | """
|
1421 | assert p_node.typ == grammar_nt.class_literal_term, p_node
|
1422 |
|
1423 | typ0 = p_node.GetChild(0).typ
|
1424 |
|
1425 | if typ0 == grammar_nt.range_char:
|
1426 | n = p_node.NumChildren()
|
1427 |
|
1428 | if n == 1:
|
1429 | return self._NonRangeChars(p_node.GetChild(0))
|
1430 |
|
1431 | # 'a'-'z' etc.
|
1432 | if n == 3:
|
1433 | assert p_node.GetChild(1).typ == Id.Arith_Minus, p_node
|
1434 |
|
1435 | left = p_node.GetChild(0)
|
1436 | right = p_node.GetChild(2)
|
1437 |
|
1438 | code1 = self._RangeCharSingleQuoted(left)
|
1439 | if code1 is None:
|
1440 | tok1 = self._OtherRangeToken(left)
|
1441 | code1 = word_compile.EvalCharLiteralForRegex(tok1)
|
1442 |
|
1443 | code2 = self._RangeCharSingleQuoted(right)
|
1444 | if code2 is None:
|
1445 | tok2 = self._OtherRangeToken(right)
|
1446 | code2 = word_compile.EvalCharLiteralForRegex(tok2)
|
1447 | return CharRange(code1, code2)
|
1448 |
|
1449 | raise AssertionError()
|
1450 |
|
1451 | if typ0 == Id.Expr_At:
|
1452 | tok1 = p_node.GetChild(1).tok
|
1453 | return class_literal_term.Splice(tok1, lexer.TokenVal(tok1))
|
1454 |
|
1455 | if typ0 == Id.Expr_Bang:
|
1456 | return self._NameInClass(
|
1457 | p_node.GetChild(0).tok,
|
1458 | p_node.GetChild(1).tok)
|
1459 |
|
1460 | p_die("This kind of class literal term isn't implemented",
|
1461 | p_node.GetChild(0).tok)
|
1462 |
|
1463 | def _ClassLiteral(self, p_node):
|
1464 | # type: (PNode) -> List[class_literal_term_t]
|
1465 | """class_literal: '[' class_literal_term+ ']'."""
|
1466 | assert p_node.typ == grammar_nt.class_literal
|
1467 | # skip [ and ]
|
1468 | terms = [] # type: List[class_literal_term_t]
|
1469 | for i in xrange(1, p_node.NumChildren() - 1):
|
1470 | terms.append(self._ClassLiteralTerm(p_node.GetChild(i)))
|
1471 |
|
1472 | return terms
|
1473 |
|
1474 | def _NameInRegex(self, negated_tok, tok):
|
1475 | # type: (Token, Token) -> re_t
|
1476 | tok_str = lexer.TokenVal(tok)
|
1477 | if tok_str == 'dot':
|
1478 | if negated_tok:
|
1479 | p_die("Can't negate this symbol", tok)
|
1480 | return re.Primitive(tok, Id.Eggex_Dot)
|
1481 |
|
1482 | if tok_str in POSIX_CLASSES:
|
1483 | return PosixClass(negated_tok, tok_str)
|
1484 |
|
1485 | perl = PERL_CLASSES.get(tok_str)
|
1486 | if perl is not None:
|
1487 | return PerlClass(negated_tok, perl)
|
1488 |
|
1489 | if tok_str[0].isupper(): # e.g. HexDigit
|
1490 | return re.Splice(tok, lexer.TokenVal(tok))
|
1491 |
|
1492 | p_die("%r isn't a character class" % tok_str, tok)
|
1493 |
|
1494 | def _NameInClass(self, negated_tok, tok):
|
1495 | # type: (Token, Token) -> class_literal_term_t
|
1496 | """Like the above, but 'dot' and 'd' don't mean anything within []"""
|
1497 | tok_str = lexer.TokenVal(tok)
|
1498 |
|
1499 | # A bare, unquoted character literal. In the grammar, this is expressed as
|
1500 | # range_char without an ending.
|
1501 |
|
1502 | # d is NOT 'digit', it's a literal 'd'!
|
1503 | if len(tok_str) == 1:
|
1504 | # Expr_Name matches VAR_NAME_RE, which starts with [a-zA-Z_]
|
1505 | assert tok.id in (Id.Expr_Name, Id.Expr_DecInt)
|
1506 |
|
1507 | if negated_tok: # [~d] is not allowed, only [~digit]
|
1508 | p_die("Can't negate this symbol", tok)
|
1509 | return word_compile.EvalCharLiteralForRegex(tok)
|
1510 |
|
1511 | # digit, word, but not d, w, etc.
|
1512 | if tok_str in POSIX_CLASSES:
|
1513 | return PosixClass(negated_tok, tok_str)
|
1514 |
|
1515 | perl = PERL_CLASSES.get(tok_str)
|
1516 | if perl is not None:
|
1517 | return PerlClass(negated_tok, perl)
|
1518 | p_die("%r isn't a character class" % tok_str, tok)
|
1519 |
|
1520 | def _ReAtom(self, p_atom):
|
1521 | # type: (PNode) -> re_t
|
1522 | """
|
1523 | re_atom: ( char_literal | ...
|
1524 | """
|
1525 | assert p_atom.typ == grammar_nt.re_atom, p_atom.typ
|
1526 |
|
1527 | child0 = p_atom.GetChild(0)
|
1528 |
|
1529 | typ0 = p_atom.GetChild(0).typ
|
1530 | tok0 = p_atom.GetChild(0).tok
|
1531 |
|
1532 | # Non-terminals
|
1533 |
|
1534 | if typ0 == grammar_nt.class_literal:
|
1535 | return re.CharClassLiteral(False, self._ClassLiteral(child0))
|
1536 |
|
1537 | if typ0 == grammar_nt.sq_string:
|
1538 | return cast(SingleQuoted, child0.GetChild(1).tok)
|
1539 |
|
1540 | if typ0 == grammar_nt.char_literal:
|
1541 | # Note: ERE doesn't seem to support escapes like Python
|
1542 | # https://docs.python.org/3/library/re.html
|
1543 | # We might want to do a translation like this;
|
1544 | #
|
1545 | # \u{03bc} -> \u03bc
|
1546 | # \x00 -> \x00
|
1547 | # \n -> \n
|
1548 |
|
1549 | # Must be Id.Char_{OneChar,Hex,UBraced}
|
1550 | assert consts.GetKind(tok0.id) == Kind.Char
|
1551 | s = word_compile.EvalCStringToken(tok0.id, lexer.TokenVal(tok0))
|
1552 | return re.LiteralChars(tok0, s)
|
1553 |
|
1554 | # Special punctuation
|
1555 | if typ0 == Id.Expr_Dot: # .
|
1556 | return re.Primitive(tok0, Id.Eggex_Dot)
|
1557 |
|
1558 | if typ0 == Id.Arith_Caret: # ^
|
1559 | return re.Primitive(tok0, Id.Eggex_Start)
|
1560 |
|
1561 | if typ0 == Id.Expr_Dollar: # $
|
1562 | return re.Primitive(tok0, Id.Eggex_End)
|
1563 |
|
1564 | if typ0 == Id.Expr_Name:
|
1565 | # d digit -> PosixClass PerlClass etc.
|
1566 | return self._NameInRegex(None, tok0)
|
1567 |
|
1568 | if typ0 == Id.Expr_Symbol:
|
1569 | # Validate symbols here, like we validate PerlClass, etc.
|
1570 | tok_str = lexer.TokenVal(tok0)
|
1571 | if tok_str == '%start':
|
1572 | return re.Primitive(tok0, Id.Eggex_Start)
|
1573 | if tok_str == '%end':
|
1574 | return re.Primitive(tok0, Id.Eggex_End)
|
1575 | p_die("Unexpected token %r in regex" % tok_str, tok0)
|
1576 |
|
1577 | if typ0 == Id.Expr_At:
|
1578 | # | '@' Expr_Name
|
1579 | tok1 = p_atom.GetChild(1).tok
|
1580 | return re.Splice(tok0, lexer.TokenVal(tok1))
|
1581 |
|
1582 | if typ0 == Id.Expr_Bang:
|
1583 | # | '!' (Expr_Name | class_literal)
|
1584 | # | '!' '!' Expr_Name (Expr_Name | Expr_DecInt | '(' regex ')')
|
1585 | n = p_atom.NumChildren()
|
1586 | if n == 2:
|
1587 | child1 = p_atom.GetChild(1)
|
1588 | if child1.typ == grammar_nt.class_literal:
|
1589 | return re.CharClassLiteral(True,
|
1590 | self._ClassLiteral(child1))
|
1591 | else:
|
1592 | return self._NameInRegex(tok0, p_atom.GetChild(1).tok)
|
1593 | else:
|
1594 | # Note: !! conflicts with shell history
|
1595 | p_die(
|
1596 | "Backtracking with !! isn't implemented (requires Python/PCRE)",
|
1597 | p_atom.GetChild(1).tok)
|
1598 |
|
1599 | if typ0 == Id.Op_LParen:
|
1600 | # | '(' regex ')'
|
1601 |
|
1602 | # Note: in ERE (d+) is the same as <d+>. That is, Group becomes
|
1603 | # Capture.
|
1604 | return re.Group(self._Regex(p_atom.GetChild(1)))
|
1605 |
|
1606 | if typ0 == Id.Arith_Less:
|
1607 | # | '<' 'capture' regex ['as' Expr_Name] [':' Expr_Name] '>'
|
1608 |
|
1609 | n = p_atom.NumChildren()
|
1610 | assert n == 4 or n == 6 or n == 8, n
|
1611 |
|
1612 | # < capture d+ >
|
1613 | regex = self._Regex(p_atom.GetChild(2))
|
1614 |
|
1615 | as_name = None # type: Optional[Token]
|
1616 | func_name = None # type: Optional[Token]
|
1617 |
|
1618 | i = 3 # points at any of > as :
|
1619 |
|
1620 | typ = p_atom.GetChild(i).typ
|
1621 | if typ == Id.Expr_As:
|
1622 | as_name = p_atom.GetChild(i + 1).tok
|
1623 | i += 2
|
1624 |
|
1625 | typ = p_atom.GetChild(i).typ
|
1626 | if typ == Id.Arith_Colon:
|
1627 | func_name = p_atom.GetChild(i + 1).tok
|
1628 |
|
1629 | return re.Capture(regex, as_name, func_name)
|
1630 |
|
1631 | raise AssertionError(typ0)
|
1632 |
|
1633 | def _RepeatOp(self, p_repeat):
|
1634 | # type: (PNode) -> re_repeat_t
|
1635 | """
|
1636 | repeat_op: '+' | '*' | '?'
|
1637 | | '{' [Expr_Name] ('+' | '*' | '?' | repeat_range) '}'
|
1638 | """
|
1639 | assert p_repeat.typ == grammar_nt.repeat_op, p_repeat
|
1640 |
|
1641 | tok = p_repeat.GetChild(0).tok
|
1642 | id_ = tok.id
|
1643 |
|
1644 | if id_ in (Id.Arith_Plus, Id.Arith_Star, Id.Arith_QMark):
|
1645 | return tok # a+ a* a?
|
1646 |
|
1647 | if id_ == Id.Op_LBrace:
|
1648 | child1 = p_repeat.GetChild(1)
|
1649 | if child1.typ != grammar_nt.repeat_range:
|
1650 | # e.g. dot{N *} is .*?
|
1651 | p_die("Perl-style repetition isn't implemented with libc",
|
1652 | child1.tok)
|
1653 |
|
1654 | # repeat_range: (
|
1655 | # Expr_DecInt [',']
|
1656 | # | ',' Expr_DecInt
|
1657 | # | Expr_DecInt ',' Expr_DecInt
|
1658 | # )
|
1659 |
|
1660 | n = child1.NumChildren()
|
1661 | if n == 1: # {3}
|
1662 | tok = child1.GetChild(0).tok
|
1663 | return tok # different operator than + * ?
|
1664 |
|
1665 | if n == 2:
|
1666 | if child1.GetChild(0).typ == Id.Expr_DecInt: # {,3}
|
1667 | left = child1.GetChild(0).tok
|
1668 | return re_repeat.Range(left, lexer.TokenVal(left), '',
|
1669 | None)
|
1670 | else: # {1,}
|
1671 | right = child1.GetChild(1).tok
|
1672 | return re_repeat.Range(None, '', lexer.TokenVal(right),
|
1673 | right)
|
1674 |
|
1675 | if n == 3: # {1,3}
|
1676 | left = child1.GetChild(0).tok
|
1677 | right = child1.GetChild(2).tok
|
1678 | return re_repeat.Range(left, lexer.TokenVal(left),
|
1679 | lexer.TokenVal(right), right)
|
1680 |
|
1681 | raise AssertionError(n)
|
1682 |
|
1683 | raise AssertionError(id_)
|
1684 |
|
1685 | def _ReAlt(self, p_node):
|
1686 | # type: (PNode) -> re_t
|
1687 | """
|
1688 | re_alt: (re_atom [repeat_op])+
|
1689 | """
|
1690 | assert p_node.typ == grammar_nt.re_alt
|
1691 |
|
1692 | i = 0
|
1693 | n = p_node.NumChildren()
|
1694 | seq = [] # type: List[re_t]
|
1695 | while i < n:
|
1696 | r = self._ReAtom(p_node.GetChild(i))
|
1697 | i += 1
|
1698 | if i < n and p_node.GetChild(i).typ == grammar_nt.repeat_op:
|
1699 | repeat_op = self._RepeatOp(p_node.GetChild(i))
|
1700 | r = re.Repeat(r, repeat_op)
|
1701 | i += 1
|
1702 | seq.append(r)
|
1703 |
|
1704 | if len(seq) == 1:
|
1705 | return seq[0]
|
1706 | else:
|
1707 | return re.Seq(seq)
|
1708 |
|
1709 | def _Regex(self, p_node):
|
1710 | # type: (PNode) -> re_t
|
1711 | """
|
1712 | regex: [re_alt] (('|'|'or') re_alt)*
|
1713 | """
|
1714 | assert p_node.typ == grammar_nt.regex
|
1715 |
|
1716 | n = p_node.NumChildren()
|
1717 | alts = [] # type: List[re_t]
|
1718 | for i in xrange(0, n, 2): # was children[::2]
|
1719 | c = p_node.GetChild(i)
|
1720 | alts.append(self._ReAlt(c))
|
1721 |
|
1722 | if len(alts) == 1:
|
1723 | return alts[0]
|
1724 | else:
|
1725 | return re.Alt(alts)
|
1726 |
|
1727 |
|
1728 | # vim: sw=4
|