OILS / ysh / expr_to_ast.py View on Github | oils.pub

1728 lines, 1041 significant
1"""expr_to_ast.py."""
2from __future__ import print_function
3
4from _devbuild.gen.id_kind_asdl import Id, Id_t, Id_str, Kind
5from _devbuild.gen.syntax_asdl import (
6 Token,
7 SimpleVarSub,
8 loc,
9 loc_t,
10 DoubleQuoted,
11 SingleQuoted,
12 BracedVarSub,
13 CommandSub,
14 ShArrayLiteral,
15 command,
16 expr,
17 expr_e,
18 expr_t,
19 expr_context_e,
20 re,
21 re_t,
22 re_repeat,
23 re_repeat_t,
24 class_literal_term,
25 class_literal_term_t,
26 PosixClass,
27 PerlClass,
28 NameType,
29 y_lhs_t,
30 Comprehension,
31 Subscript,
32 Attribute,
33 proc_sig,
34 proc_sig_t,
35 Param,
36 RestParam,
37 ParamGroup,
38 NamedArg,
39 ArgList,
40 pat,
41 pat_t,
42 TypeExpr,
43 Func,
44 Eggex,
45 EggexFlag,
46 CharCode,
47 CharRange,
48)
49from _devbuild.gen.value_asdl import value, value_t
50from _devbuild.gen import grammar_nt
51from core.error import p_die
52from data_lang import j8
53from frontend import consts
54from frontend import lexer
55from frontend import location
56from mycpp import mops
57from mycpp import mylib
58from mycpp.mylib import log, tagswitch
59from osh import word_compile
60from ysh import expr_parse
61from ysh import regex_translate
62
63from typing import TYPE_CHECKING, Dict, List, Tuple, Optional, cast
64if TYPE_CHECKING:
65 from pgen2.grammar import Grammar
66 from pgen2.pnode import PNode
67
68_ = log
69
70PERL_CLASSES = {
71 'd': 'd',
72 'w': 'w',
73 'word': 'w',
74 's': 's',
75}
76# https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html
77POSIX_CLASSES = [
78 'alnum',
79 'cntrl',
80 'lower',
81 'space',
82 'alpha',
83 'digit',
84 'print',
85 'upper',
86 'blank',
87 'graph',
88 'punct',
89 'xdigit',
90]
91# NOTE: There are also things like \p{Greek} that we could put in the
92# "non-sigil" namespace.
93
94RANGE_POINT_TOO_LONG = "Range start/end shouldn't have more than one character"
95
96POS_ARG_MISPLACED = "Positional arg can't appear in group of named args"
97
98# Copied from pgen2/token.py to avoid dependency.
99NT_OFFSET = 256
100
101if mylib.PYTHON:
102
103 def MakeGrammarNames(ysh_grammar):
104 # type: (Grammar) -> Dict[int, str]
105
106 # TODO: Break this dependency
107 from frontend import lexer_def
108
109 names = {}
110
111 for id_name, k in lexer_def.ID_SPEC.id_str2int.items():
112 # Hm some are out of range
113 #assert k < 256, (k, id_name)
114
115 # TODO: Some tokens have values greater than NT_OFFSET
116 if k < NT_OFFSET:
117 names[k] = id_name
118
119 for k, v in ysh_grammar.number2symbol.items():
120 assert k >= NT_OFFSET, (k, v)
121 names[k] = v
122
123 return names
124
125
126class Transformer(object):
127 """Homogeneous parse tree -> heterogeneous AST ("lossless syntax tree")
128
129 pgen2 (Python's LL parser generator) doesn't have semantic actions like yacc,
130 so this "transformer" is the equivalent.
131
132 Files to refer to when modifying this function:
133
134 ysh/grammar.pgen2 (generates _devbuild/gen/grammar_nt.py)
135 frontend/syntax.asdl (generates _devbuild/gen/syntax_asdl.py)
136
137 Related examples:
138
139 opy/compiler2/transformer.py (Python's parse tree -> AST, ~1500 lines)
140 Python-2.7.13/Python/ast.c (the "real" CPython version, ~3600 lines)
141
142 Other:
143 frontend/parse_lib.py (turn on print_parse_tree)
144
145 Public methods:
146 Expr, VarDecl
147 atom, trailer, etc. are private, named after productions in grammar.pgen2.
148 """
149
150 def __init__(self, gr):
151 # type: (Grammar) -> None
152 self.number2symbol = gr.number2symbol
153 if mylib.PYTHON:
154 names = MakeGrammarNames(gr)
155 # print raw nodes
156 self.p_printer = expr_parse.ParseTreePrinter(names)
157
158 def _LeftAssoc(self, p_node):
159 # type: (PNode) -> expr_t
160 """For an associative binary operation.
161
162 Examples:
163 xor_expr: and_expr ('xor' and_expr)*
164 term: factor (('*'|'/'|'%'|'div') factor)*
165
166 3 - 1 - 2 must be grouped as ((3 - 1) - 2).
167 """
168 # Note: Compare the iteractive com_binary() method in
169 # opy/compiler2/transformer.py.
170
171 # Examples:
172 # - The PNode for '3 - 1' will have 3 children
173 # - The PNode for '3 - 1 - 2' will have 5 children
174
175 #self.p_printer.Print(p_node)
176
177 i = 1 # index of the operator
178 n = p_node.NumChildren()
179
180 left = self.Expr(p_node.GetChild(0))
181 while i < n:
182 op = p_node.GetChild(i)
183 right = self.Expr(p_node.GetChild(i + 1))
184
185 # create a new left node
186 left = expr.Binary(op.tok, left, right)
187 i += 2
188
189 return left
190
191 def _Trailer(self, base, p_trailer):
192 # type: (expr_t, PNode) -> expr_t
193 """
194 trailer: ( '(' [arglist] ')' | '[' subscriptlist ']'
195 | '.' NAME | '->' NAME | '::' NAME
196 )
197 """
198 tok0 = p_trailer.GetChild(0).tok
199 typ0 = p_trailer.GetChild(0).typ
200
201 if typ0 == Id.Op_LParen:
202 lparen = tok0
203 rparen = p_trailer.GetChild(-1).tok
204 arglist = ArgList(lparen, [], None, [], None, None, rparen)
205 if p_trailer.NumChildren() == 2: # ()
206 return expr.FuncCall(base, arglist)
207
208 p = p_trailer.GetChild(1) # the X in ( X )
209 assert p.typ == grammar_nt.arglist # f(x, y)
210 self._ArgList(p, arglist)
211 return expr.FuncCall(base, arglist)
212
213 if typ0 == Id.Op_LBracket:
214 p_args = p_trailer.GetChild(1)
215 assert p_args.typ == grammar_nt.subscriptlist
216
217 n = p_args.NumChildren()
218 if n == 1: # a[1] a[1:2] a[:] etc.
219 subscript = self._Subscript(p_args.GetChild(0))
220 else: # a[1, 2] a[1:2, :]
221 slices = [] # type: List[expr_t]
222 for i in xrange(0, n, 2):
223 slices.append(self._Subscript(p_args.GetChild(i)))
224 # expr.Tuple evaluates to List in YSH.
225 #
226 # Note that syntactically, a[1:2, 3:4] is the the only way to
227 # get a List[Slice]. [1:2, 3:4] by itself is not allowed.
228 comma_tok = p_args.GetChild(1).tok
229 subscript = expr.Tuple(comma_tok, slices, expr_context_e.Store)
230
231 return Subscript(tok0, base, subscript)
232
233 if typ0 in (Id.Expr_Dot, Id.Expr_RArrow, Id.Expr_RDArrow):
234 attr = p_trailer.GetChild(1).tok # will be Id.Expr_Name
235 return Attribute(base, tok0, attr, lexer.TokenVal(attr),
236 expr_context_e.Store)
237
238 raise AssertionError(typ0)
239
240 def _DictPair(self, p_node):
241 # type: (PNode) -> Tuple[expr_t, expr_t]
242 """
243 dict_pair: ( Expr_Name [':' test]
244 | '[' testlist ']' ':' test )
245 | sq_string ':' test
246 | dq_string ':' test )
247 """
248 assert p_node.typ == grammar_nt.dict_pair
249
250 typ = p_node.GetChild(0).typ
251
252 if typ in (grammar_nt.sq_string, grammar_nt.dq_string):
253 key = self.Expr(p_node.GetChild(0)) # type: expr_t
254 val = self.Expr(p_node.GetChild(2))
255 return key, val
256
257 tok0 = p_node.GetChild(0).tok
258 id_ = tok0.id
259
260 if id_ == Id.Expr_Name:
261 key_str = value.Str(lexer.TokenVal(tok0))
262 key = expr.Const(tok0, key_str)
263 if p_node.NumChildren() >= 3:
264 val = self.Expr(p_node.GetChild(2))
265 else:
266 val = expr.Implicit
267
268 if id_ == Id.Op_LBracket: # {[x+y]: 'val'}
269 key = self.Expr(p_node.GetChild(1))
270 val = self.Expr(p_node.GetChild(4))
271 return key, val
272
273 return key, val
274
275 def _Dict(self, parent, p_node):
276 # type: (PNode, PNode) -> expr.Dict
277 """
278 dict: dict_pair (comma_newline dict_pair)* [comma_newline]
279 """
280 if p_node.typ == Id.Op_RBrace: # {}
281 return expr.Dict(parent.tok, [], [])
282
283 assert p_node.typ == grammar_nt.dict
284
285 keys = [] # type: List[expr_t]
286 values = [] # type: List[expr_t]
287
288 n = p_node.NumChildren()
289 for i in xrange(0, n, 2):
290 key, val = self._DictPair(p_node.GetChild(i))
291 keys.append(key)
292 values.append(val)
293
294 return expr.Dict(parent.tok, keys, values)
295
296 def _Tuple(self, parent):
297 # type: (PNode) -> expr_t
298
299 n = parent.NumChildren()
300
301 # (x) -- not a tuple
302 if n == 1:
303 return self.Expr(parent.GetChild(0))
304
305 # x, and (x,) aren't allowed
306 if n == 2:
307 p_die('Invalid trailing comma', parent.GetChild(1).tok)
308
309 elts = [] # type: List[expr_t]
310 for i in xrange(0, n, 2): # skip commas
311 p_node = parent.GetChild(i)
312 elts.append(self.Expr(p_node))
313
314 return expr.Tuple(parent.tok, elts,
315 expr_context_e.Store) # unused expr_context_e
316
317 def _TestlistComp(self, parent, p_node, id0):
318 # type: (PNode, PNode, Id_t) -> expr_t
319 """
320 testlist_comp:
321 (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
322 """
323 assert p_node.typ == grammar_nt.testlist_comp
324
325 n = p_node.NumChildren()
326 if n > 1 and p_node.GetChild(1).typ == grammar_nt.comp_for:
327 child0 = p_node.GetChild(0)
328 if child0.typ == grammar_nt.splat_expr:
329 p_die('Splat not currently supported', child0.tok)
330 elt = self.Expr(child0)
331
332 comp = self._CompFor(p_node.GetChild(1))
333 if id0 == Id.Op_LParen: # (x+1 for x in y)
334 return expr.GeneratorExp(elt, [comp])
335 if id0 == Id.Op_LBracket: # [x+1 for x in y]
336 return expr.ListComp(parent.tok, elt, [comp])
337 raise AssertionError()
338
339 if id0 == Id.Op_LParen:
340 # Parenthesized expression like (x+1) or (x)
341 if n == 1:
342 return self.Expr(p_node.GetChild(0))
343
344 # Tuples (1,) (1, 2) etc. - TODO: should be a list literal?
345 if p_node.GetChild(1).typ == Id.Arith_Comma:
346 return self._Tuple(p_node)
347
348 raise AssertionError()
349
350 if id0 == Id.Op_LBracket: # List [1,2,3]
351 elts = [] # type: List[expr_t]
352 for i in xrange(0, n, 2): # skip commas
353 child = p_node.GetChild(i)
354 if child.typ == grammar_nt.splat_expr:
355 p_die('Splat not currently supported', child.tok)
356 elts.append(self.Expr(child))
357
358 return expr.List(parent.tok, elts,
359 expr_context_e.Store) # unused expr_context_e
360
361 raise AssertionError(Id_str(id0))
362
363 def _Atom(self, parent):
364 # type: (PNode) -> expr_t
365 """Handle alternatives of 'atom' where there's more than one child."""
366
367 tok = parent.GetChild(0).tok
368 id_ = tok.id
369 n = parent.NumChildren()
370
371 if id_ == Id.Op_LParen:
372 # atom: '(' [yield_expr|testlist_comp] ')' | ...
373 if n == 2: # () is a tuple
374 assert (
375 parent.GetChild(1).typ == Id.Op_RParen), parent.GetChild(1)
376 return expr.Tuple(tok, [], expr_context_e.Store)
377
378 return self._TestlistComp(parent, parent.GetChild(1), id_)
379
380 if id_ == Id.Op_LBracket:
381 # atom: ... | '[' [testlist_comp] ']' | ...
382
383 if n == 2: # []
384 assert (parent.GetChild(1).typ == Id.Op_RBracket
385 ), parent.GetChild(1)
386 return expr.List(tok, [],
387 expr_context_e.Store) # unused expr_context_e
388
389 return self._TestlistComp(parent, parent.GetChild(1), id_)
390
391 if id_ == Id.Left_CaretBracket: # ^[42 + x]
392 child = self.Expr(parent.GetChild(1))
393 return expr.Literal(child)
394
395 if id_ == Id.Op_LBrace:
396 # atom: ... | '{' [Op_Newline] [dict] '}'
397 i = 1
398 if parent.GetChild(i).typ == Id.Op_Newline:
399 i += 1
400 return self._Dict(parent, parent.GetChild(i))
401
402 if id_ == Id.Arith_Amp:
403 n = parent.NumChildren()
404 if n >= 3:
405 p_die("Places in containers not implemented yet",
406 parent.GetChild(2).tok)
407
408 name_tok = parent.GetChild(1).tok
409 return expr.Place(name_tok, lexer.TokenVal(name_tok), [])
410
411 if id_ == Id.Expr_Func:
412 # STUB. This should really be a Func, not Lambda.
413 return expr.Lambda([], expr.Implicit)
414
415 # 100 M
416 # Ignoring the suffix for now
417 if id_ == Id.Expr_DecInt:
418 assert n > 1
419 p_die("Units suffix not implemented", parent.GetChild(1).tok)
420 #return self.Expr(parent.GetChild(0))
421
422 # 100.5 M
423 # Ignoring the suffix for now
424 if id_ == Id.Expr_Float:
425 assert n > 1
426 p_die("unix suffix implemented", parent.GetChild(1).tok)
427 #return self.Expr(parent.GetChild(0))
428
429 raise AssertionError(Id_str(id_))
430
431 def _NameType(self, p_node):
432 # type: (PNode) -> NameType
433 """ name_type: Expr_Name [':'] [type_expr] """
434 name_tok = p_node.GetChild(0).tok
435 typ = None # type: Optional[TypeExpr]
436
437 n = p_node.NumChildren()
438 if n == 2:
439 typ = self._TypeExpr(p_node.GetChild(1))
440 if n == 3:
441 typ = self._TypeExpr(p_node.GetChild(2))
442
443 return NameType(name_tok, lexer.TokenVal(name_tok), typ)
444
445 def _NameTypeList(self, p_node):
446 # type: (PNode) -> List[NameType]
447 """ name_type_list: name_type (',' name_type)* """
448 assert p_node.typ == grammar_nt.name_type_list
449 results = [] # type: List[NameType]
450
451 n = p_node.NumChildren()
452 for i in xrange(0, n, 2): # was children[::2]
453 results.append(self._NameType(p_node.GetChild(i)))
454 return results
455
456 def _CompFor(self, p_node):
457 # type: (PNode) -> Comprehension
458 """comp_for: 'for' exprlist 'in' or_test ['if' or_test]"""
459 lhs = self._NameTypeList(p_node.GetChild(1))
460 iterable = self.Expr(p_node.GetChild(3))
461
462 if p_node.NumChildren() >= 6:
463 cond = self.Expr(p_node.GetChild(5))
464 else:
465 cond = None
466
467 return Comprehension(lhs, iterable, cond)
468
469 def _CompareChain(self, parent):
470 # type: (PNode) -> expr_t
471 """comparison: expr (comp_op expr)*"""
472 cmp_ops = [] # type: List[Token]
473 comparators = [] # type: List[expr_t]
474 left = self.Expr(parent.GetChild(0))
475
476 i = 1
477 n = parent.NumChildren()
478 while i < n:
479 p = parent.GetChild(i)
480 op = p.GetChild(0).tok
481 if p.NumChildren() == 2:
482 # Blame the first token, and change its type
483 if op.id == Id.Expr_Not: # not in
484 op.id = Id.Node_NotIn
485 elif op.id == Id.Expr_Is: # is not
486 op.id = Id.Node_IsNot
487 else:
488 raise AssertionError()
489 else:
490 # is, <, ==, etc.
491 pass
492
493 cmp_ops.append(op)
494 i += 1
495 comparators.append(self.Expr(parent.GetChild(i)))
496 i += 1
497 return expr.Compare(left, cmp_ops, comparators)
498
499 def _Subscript(self, parent):
500 # type: (PNode) -> expr_t
501 """subscript: expr | [expr] ':' [expr]"""
502 typ0 = parent.GetChild(0).typ
503
504 n = parent.NumChildren()
505
506 if typ0 == grammar_nt.expr:
507 if n == 3: # a[1:2]
508 lower = self.Expr(parent.GetChild(0))
509 op_tok = parent.GetChild(1).tok
510 upper = self.Expr(parent.GetChild(2))
511
512 elif n == 2: # a[1:]
513 lower = self.Expr(parent.GetChild(0))
514 op_tok = parent.GetChild(1).tok
515 upper = None
516 else: # a[1]
517 return self.Expr(parent.GetChild(0))
518 else:
519 assert typ0 == Id.Arith_Colon
520 lower = None
521 if n == 1: # a[:]
522 op_tok = parent.GetChild(0).tok
523 upper = None
524 else: # a[:3]
525 op_tok = parent.GetChild(0).tok
526 upper = self.Expr(parent.GetChild(1))
527
528 return expr.Slice(lower, op_tok, upper)
529
530 def Expr(self, pnode):
531 # type: (PNode) -> expr_t
532 """Transform expressions (as opposed to statements)"""
533 typ = pnode.typ
534
535 #
536 # YSH Entry Points / Additions
537 #
538
539 if typ == grammar_nt.ysh_expr: # for if/while
540 # ysh_expr: '(' testlist ')'
541 return self.Expr(pnode.GetChild(1))
542
543 if typ == grammar_nt.command_expr:
544 # return_expr: testlist end_stmt
545 return self.Expr(pnode.GetChild(0))
546
547 #
548 # Python-like Expressions / Operators
549 #
550
551 if typ == grammar_nt.atom:
552 if pnode.NumChildren() == 1:
553 return self.Expr(pnode.GetChild(0))
554 return self._Atom(pnode)
555
556 if typ == grammar_nt.testlist:
557 # testlist: test (',' test)* [',']
558 return self._Tuple(pnode)
559
560 if typ == grammar_nt.test:
561 # test: or_test ['if' or_test 'else' test] | lambdef
562 if pnode.NumChildren() == 1:
563 return self.Expr(pnode.GetChild(0))
564
565 # TODO: Handle lambdef
566
567 test = self.Expr(pnode.GetChild(2))
568 body = self.Expr(pnode.GetChild(0))
569 orelse = self.Expr(pnode.GetChild(4))
570 return expr.IfExp(test, body, orelse)
571
572 if typ == grammar_nt.lambdef:
573 # lambdef: '|' [name_type_list] '|' test
574
575 n = pnode.NumChildren()
576 if n == 4:
577 params = self._NameTypeList(pnode.GetChild(1))
578 else:
579 params = []
580
581 body = self.Expr(pnode.GetChild(n - 1))
582 return expr.Lambda(params, body)
583
584 #
585 # Operators with Precedence
586 #
587
588 if typ == grammar_nt.or_test:
589 # or_test: and_test ('or' and_test)*
590 return self._LeftAssoc(pnode)
591
592 if typ == grammar_nt.and_test:
593 # and_test: not_test ('and' not_test)*
594 return self._LeftAssoc(pnode)
595
596 if typ == grammar_nt.not_test:
597 # not_test: 'not' not_test | comparison
598 if pnode.NumChildren() == 1:
599 return self.Expr(pnode.GetChild(0))
600
601 op_tok = pnode.GetChild(0).tok # not
602 return expr.Unary(op_tok, self.Expr(pnode.GetChild(1)))
603
604 elif typ == grammar_nt.comparison:
605 if pnode.NumChildren() == 1:
606 return self.Expr(pnode.GetChild(0))
607
608 return self._CompareChain(pnode)
609
610 elif typ == grammar_nt.range_expr:
611 n = pnode.NumChildren()
612 if n == 1:
613 return self.Expr(pnode.GetChild(0))
614
615 if n == 3:
616 return expr.Range(self.Expr(pnode.GetChild(0)),
617 pnode.GetChild(1).tok,
618 self.Expr(pnode.GetChild(2)))
619
620 raise AssertionError(n)
621
622 elif typ == grammar_nt.expr:
623 # expr: xor_expr ('|' xor_expr)*
624 return self._LeftAssoc(pnode)
625
626 if typ == grammar_nt.xor_expr:
627 # xor_expr: and_expr ('xor' and_expr)*
628 return self._LeftAssoc(pnode)
629
630 if typ == grammar_nt.and_expr: # a & b
631 # and_expr: shift_expr ('&' shift_expr)*
632 return self._LeftAssoc(pnode)
633
634 elif typ == grammar_nt.shift_expr:
635 # shift_expr: arith_expr (('<<'|'>>') arith_expr)*
636 return self._LeftAssoc(pnode)
637
638 elif typ == grammar_nt.arith_expr:
639 # arith_expr: term (('+'|'-') term)*
640 return self._LeftAssoc(pnode)
641
642 elif typ == grammar_nt.term:
643 # term: factor (('*'|'/'|'div'|'mod') factor)*
644 return self._LeftAssoc(pnode)
645
646 elif typ == grammar_nt.factor:
647 # factor: ('+'|'-'|'~') factor | power
648 # the power would have already been reduced
649 if pnode.NumChildren() == 1:
650 return self.Expr(pnode.GetChild(0))
651
652 assert pnode.NumChildren() == 2
653 op = pnode.GetChild(0)
654 e = pnode.GetChild(1)
655
656 assert isinstance(op.tok, Token)
657 return expr.Unary(op.tok, self.Expr(e))
658
659 elif typ == grammar_nt.power:
660 # power: atom trailer* ['**' factor]
661
662 node = self.Expr(pnode.GetChild(0))
663 if pnode.NumChildren() == 1: # No trailers
664 return node
665
666 # Support a->startswith(b) and mydict.key
667 n = pnode.NumChildren()
668 i = 1
669 while i < n and pnode.GetChild(i).typ == grammar_nt.trailer:
670 node = self._Trailer(node, pnode.GetChild(i))
671 i += 1
672
673 if i != n: # ['**' factor]
674 op_tok = pnode.GetChild(i).tok
675 assert op_tok.id == Id.Arith_DStar, op_tok
676 factor = self.Expr(pnode.GetChild(i + 1))
677 node = expr.Binary(op_tok, node, factor)
678
679 return node
680
681 elif typ == grammar_nt.eggex:
682 return self._Eggex(pnode)
683
684 elif typ == grammar_nt.ysh_expr_sub:
685 return self.Expr(pnode.GetChild(0))
686
687 #
688 # YSH Lexer Modes
689 #
690
691 elif typ == grammar_nt.sh_array_literal:
692 return cast(ShArrayLiteral, pnode.GetChild(1).tok)
693
694 elif typ == grammar_nt.old_sh_array_literal:
695 return cast(ShArrayLiteral, pnode.GetChild(1).tok)
696
697 elif typ == grammar_nt.sh_command_sub:
698 return cast(CommandSub, pnode.GetChild(1).tok)
699
700 elif typ == grammar_nt.braced_var_sub:
701 return cast(BracedVarSub, pnode.GetChild(1).tok)
702
703 elif typ == grammar_nt.dq_string:
704 dq = cast(DoubleQuoted, pnode.GetChild(1).tok)
705 # sugar: ^"..." is short for ^["..."]
706 if pnode.GetChild(0).typ == Id.Left_CaretDoubleQuote:
707 return expr.Literal(dq)
708 return dq
709
710 elif typ == grammar_nt.sq_string:
711 return cast(SingleQuoted, pnode.GetChild(1).tok)
712
713 elif typ == grammar_nt.simple_var_sub:
714 tok = pnode.GetChild(0).tok
715
716 if tok.id == Id.VSub_DollarName: # $foo is disallowed
717 bare = lexer.TokenSliceLeft(tok, 1)
718 p_die(
719 'In expressions, remove $ and use `%s`, or sometimes "$%s"'
720 % (bare, bare), tok)
721
722 # $? is allowed
723 return SimpleVarSub(tok)
724
725 #
726 # Terminals
727 #
728
729 tok = pnode.tok
730 if typ == Id.Expr_Name:
731 return expr.Var(tok, lexer.TokenVal(tok))
732
733 # Everything else is an expr.Const
734 tok_str = lexer.TokenVal(tok)
735 # Remove underscores from 1_000_000. The lexer is responsible for
736 # validation.
737 c_under = tok_str.replace('_', '')
738
739 if typ == Id.Expr_DecInt:
740 ok, big_int = mops.FromStr2(c_under)
741 if not ok:
742 p_die('Decimal int constant is too large', tok)
743 cval = value.Int(big_int) # type: value_t
744
745 elif typ == Id.Expr_BinInt:
746 assert c_under[:2] in ('0b', '0B'), c_under
747 ok, big_int = mops.FromStr2(c_under[2:], 2)
748 if not ok:
749 p_die('Binary int constant is too large', tok)
750 cval = value.Int(big_int)
751
752 elif typ == Id.Expr_OctInt:
753 assert c_under[:2] in ('0o', '0O'), c_under
754 ok, big_int = mops.FromStr2(c_under[2:], 8)
755 if not ok:
756 p_die('Octal int constant is too large', tok)
757 cval = value.Int(big_int)
758
759 elif typ == Id.Expr_HexInt:
760 assert c_under[:2] in ('0x', '0X'), c_under
761 ok, big_int = mops.FromStr2(c_under[2:], 16)
762 if not ok:
763 p_die('Hex int constant is too large', tok)
764 cval = value.Int(big_int)
765
766 elif typ == Id.Expr_Float:
767 # Note: float() in mycpp/gc_builtins.cc currently uses strtod
768 # I think this never raises ValueError, because the lexer
769 # should only accept strings that strtod() does?
770 cval = value.Float(float(c_under))
771
772 elif typ == Id.Expr_Null:
773 cval = value.Null
774
775 elif typ == Id.Expr_True:
776 cval = value.Bool(True)
777
778 elif typ == Id.Expr_False:
779 cval = value.Bool(False)
780
781 elif typ == Id.Char_OneChar: # \n
782 assert len(tok_str) == 2, tok_str
783 s = consts.LookupCharC(lexer.TokenSliceLeft(tok, 1))
784 cval = value.Str(s)
785
786 elif typ == Id.Char_YHex: # \yff
787 assert len(tok_str) == 4, tok_str
788 hex_str = lexer.TokenSliceLeft(tok, 2)
789 s = chr(int(hex_str, 16))
790 cval = value.Str(s)
791
792 elif typ == Id.Char_UBraced: # \u{123}
793 hex_str = lexer.TokenSlice(tok, 3, -1)
794 code_point = int(hex_str, 16)
795 s = j8.Utf8Encode(code_point)
796 cval = value.Str(s)
797
798 else:
799 raise AssertionError(typ)
800
801 return expr.Const(tok, cval)
802
803 def _CheckLhs(self, lhs):
804 # type: (expr_t) -> None
805
806 UP_lhs = lhs
807 with tagswitch(lhs) as case:
808 if case(expr_e.Var):
809 # OK - e.g. setvar a.b.c[i] = 42
810 pass
811
812 elif case(expr_e.Subscript):
813 lhs = cast(Subscript, UP_lhs)
814 self._CheckLhs(lhs.obj) # recurse on LHS
815
816 elif case(expr_e.Attribute):
817 lhs = cast(Attribute, UP_lhs)
818 self._CheckLhs(lhs.obj) # recurse on LHS
819
820 else:
821 # Illegal - e.g. setglobal {}["key"] = 42
822 p_die("Subscript/Attribute not allowed on this LHS expression",
823 location.TokenForExpr(lhs))
824
825 def _LhsExprList(self, p_node):
826 # type: (PNode) -> List[y_lhs_t]
827 """lhs_list: expr (',' expr)*"""
828 assert p_node.typ == grammar_nt.lhs_list
829
830 lhs_list = [] # type: List[y_lhs_t]
831 n = p_node.NumChildren()
832 for i in xrange(0, n, 2):
833 p = p_node.GetChild(i)
834 #self.p_printer.Print(p)
835
836 e = self.Expr(p)
837 UP_e = e
838 with tagswitch(e) as case:
839 if case(expr_e.Var):
840 e = cast(expr.Var, UP_e)
841 lhs_list.append(e.left)
842
843 elif case(expr_e.Subscript):
844 e = cast(Subscript, UP_e)
845 self._CheckLhs(e)
846 lhs_list.append(e)
847
848 elif case(expr_e.Attribute):
849 e = cast(Attribute, UP_e)
850 self._CheckLhs(e)
851 if e.op.id != Id.Expr_Dot:
852 # e.g. setvar obj->method is not valid
853 p_die("Can't assign to this attribute expr", e.op)
854 lhs_list.append(e)
855
856 else:
857 pass # work around mycpp bug
858
859 # TODO: could blame arbitary expr_t, bu this works most of
860 # the time
861 if p.tok:
862 blame = p.tok # type: loc_t
863 else:
864 blame = loc.Missing
865 p_die("Can't assign to this expression", blame)
866
867 return lhs_list
868
869 def MakeVarDecl(self, p_node):
870 # type: (PNode) -> command.VarDecl
871 """
872 ysh_var_decl: name_type_list ['=' testlist] end_stmt
873 """
874 assert p_node.typ == grammar_nt.ysh_var_decl
875
876 lhs = self._NameTypeList(p_node.GetChild(0)) # could be a tuple
877
878 # This syntax is confusing, and different than JavaScript
879 # var x, y = 1, 2
880 # But this is useful:
881 # var flag, i = parseArgs(spec, argv)
882
883 n = p_node.NumChildren()
884 if n >= 3:
885 rhs = self.Expr(p_node.GetChild(2))
886 else:
887 rhs = None
888
889 # The caller should fill in the keyword token.
890 return command.VarDecl(None, lhs, rhs)
891
892 def MakeMutation(self, p_node):
893 # type: (PNode) -> command.Mutation
894 """
895 ysh_mutation: lhs_list (augassign | '=') testlist end_stmt
896 """
897 assert p_node.typ == grammar_nt.ysh_mutation
898
899 lhs_list = self._LhsExprList(p_node.GetChild(0)) # could be a tuple
900 op_tok = p_node.GetChild(1).tok
901 if len(lhs_list) > 1 and op_tok.id != Id.Arith_Equal:
902 p_die('Multiple assignment must use =', op_tok)
903 rhs = self.Expr(p_node.GetChild(2))
904 return command.Mutation(None, lhs_list, op_tok, rhs)
905
906 def _EggexFlag(self, p_node):
907 # type: (PNode) -> EggexFlag
908 n = p_node.NumChildren()
909 if n == 1:
910 return EggexFlag(False, p_node.GetChild(0).tok)
911 elif n == 2:
912 return EggexFlag(True, p_node.GetChild(1).tok)
913 else:
914 raise AssertionError()
915
916 def _Eggex(self, p_node):
917 # type: (PNode) -> Eggex
918 """
919 eggex: '/' regex [';' re_flag* [';' Expr_Name] ] '/'
920 """
921 left = p_node.GetChild(0).tok
922 regex = self._Regex(p_node.GetChild(1))
923
924 flags = [] # type: List[EggexFlag]
925 trans_pref = None # type: Optional[Token]
926
927 i = 2
928 current = p_node.GetChild(i)
929 if current.typ == Id.Op_Semi:
930 i += 1
931 while True:
932 current = p_node.GetChild(i)
933 if current.typ != grammar_nt.re_flag:
934 break
935 flags.append(self._EggexFlag(current))
936 i += 1
937
938 if current.typ == Id.Op_Semi:
939 i += 1
940 trans_pref = p_node.GetChild(i).tok
941
942 # Canonicalize and validate flags for ERE only. Default is ERE.
943 if trans_pref is None or lexer.TokenVal(trans_pref) == 'ERE':
944 canonical_flags = regex_translate.CanonicalFlags(flags)
945 else:
946 canonical_flags = None
947
948 return Eggex(left, regex, flags, trans_pref, canonical_flags)
949
950 def YshCasePattern(self, pnode):
951 # type: (PNode) -> pat_t
952 assert pnode.typ == grammar_nt.ysh_case_pat, pnode
953
954 pattern = pnode.GetChild(0)
955 typ = pattern.typ
956 if typ == Id.Op_LParen:
957 # pat_expr or pat_else
958 pattern = pnode.GetChild(1)
959 typ = pattern.typ
960
961 if typ == grammar_nt.pat_else:
962 return pat.Else
963
964 if typ == grammar_nt.pat_exprs:
965 exprs = [] # type: List[expr_t]
966 for i in xrange(pattern.NumChildren()):
967 child = pattern.GetChild(i)
968 if child.typ == grammar_nt.expr:
969 expr = self.Expr(child)
970 exprs.append(expr)
971 return pat.YshExprs(exprs)
972
973 if typ == grammar_nt.eggex:
974 return self._Eggex(pattern)
975
976 raise AssertionError()
977
978 def _BlockArg(self, p_node):
979 # type: (PNode) -> expr_t
980
981 n = p_node.NumChildren()
982 if n == 1:
983 child = p_node.GetChild(0)
984 return self.Expr(child)
985
986 # It can only be an expression, not a=42, or ...expr
987 p_die('Invalid block expression argument', p_node.tok)
988
989 def _Argument(self, p_node, after_semi, arglist):
990 # type: (PNode, bool, ArgList) -> None
991 """
992 argument: (
993 test [comp_for]
994 | test '=' test # named arg
995 | '...' test # var args
996 )
997 """
998 pos_args = arglist.pos_args
999 named_args = arglist.named_args
1000
1001 assert p_node.typ == grammar_nt.argument, p_node
1002 n = p_node.NumChildren()
1003 if n == 1:
1004 child = p_node.GetChild(0)
1005 if after_semi:
1006 p_die(POS_ARG_MISPLACED, child.tok)
1007 arg = self.Expr(child)
1008 pos_args.append(arg)
1009 return
1010
1011 if n == 2:
1012 # Note: We allow multiple spreads, just like Julia. They are
1013 # concatenated as in lists and dicts.
1014 tok0 = p_node.GetChild(0).tok
1015 if tok0.id == Id.Expr_Ellipsis:
1016 spread_expr = expr.Spread(tok0, self.Expr(p_node.GetChild(1)))
1017 if after_semi: # f(; ... named)
1018 named_args.append(NamedArg(None, spread_expr))
1019 else: # f(...named)
1020 pos_args.append(spread_expr)
1021 return
1022
1023 # Note: generator expression not implemented
1024 if p_node.GetChild(1).typ == grammar_nt.comp_for:
1025 child = p_node.GetChild(0)
1026 if after_semi:
1027 p_die(POS_ARG_MISPLACED, child.tok)
1028
1029 elt = self.Expr(child)
1030 comp = self._CompFor(p_node.GetChild(1))
1031 arg = expr.GeneratorExp(elt, [comp])
1032 pos_args.append(arg)
1033 return
1034
1035 raise AssertionError()
1036
1037 if n == 3: # named args can come before or after the semicolon
1038 n1 = NamedArg(
1039 p_node.GetChild(0).tok, self.Expr(p_node.GetChild(2)))
1040 named_args.append(n1)
1041 return
1042
1043 raise AssertionError()
1044
1045 def _ArgGroup(self, p_node, after_semi, arglist):
1046 # type: (PNode, bool, ArgList) -> None
1047 """
1048 arg_group: argument (',' argument)* [',']
1049 """
1050 for i in xrange(p_node.NumChildren()):
1051 p_child = p_node.GetChild(i)
1052 if p_child.typ == grammar_nt.argument:
1053 self._Argument(p_child, after_semi, arglist)
1054
1055 def _ArgList(self, p_node, arglist):
1056 # type: (PNode, ArgList) -> None
1057 """For both funcs and procs
1058
1059 arglist: (
1060 [arg_group]
1061 [';' [arg_group]]
1062 )
1063
1064 arglist3: ...
1065 """
1066 n = p_node.NumChildren()
1067 if n == 0:
1068 return
1069
1070 i = 0
1071
1072 if i >= n:
1073 return
1074 child = p_node.GetChild(i)
1075 if child.typ == grammar_nt.arg_group:
1076 self._ArgGroup(child, False, arglist)
1077 i += 1
1078
1079 if i >= n:
1080 return
1081 child = p_node.GetChild(i)
1082 if child.typ == Id.Op_Semi:
1083 arglist.semi_tok = child.tok
1084 i += 1
1085
1086 # Named args after first semi-colon
1087 if i >= n:
1088 return
1089 child = p_node.GetChild(i)
1090 if child.typ == grammar_nt.arg_group:
1091 self._ArgGroup(child, True, arglist)
1092 i += 1
1093
1094 #
1095 # Special third group may have block expression - only for arglist3,
1096 # used for procs!
1097 #
1098
1099 if i >= n:
1100 return
1101 assert p_node.typ == grammar_nt.arglist3, p_node
1102
1103 child = p_node.GetChild(i)
1104 if child.typ == Id.Op_Semi:
1105 arglist.semi_tok2 = child.tok
1106 i += 1
1107
1108 if i >= n:
1109 return
1110 child = p_node.GetChild(i)
1111 if child.typ == grammar_nt.argument:
1112 arglist.block_expr = self._BlockArg(child)
1113 i += 1
1114
1115 def ProcCallArgs(self, pnode, arglist):
1116 # type: (PNode, ArgList) -> None
1117 """
1118 ysh_eager_arglist: '(' [arglist3] ')'
1119 ysh_lazy_arglist: '[' [arglist] ']'
1120 """
1121 n = pnode.NumChildren()
1122 if n == 2: # f()
1123 return
1124
1125 if n == 3:
1126 child1 = pnode.GetChild(1) # the X in '( X )'
1127
1128 self._ArgList(child1, arglist)
1129 return
1130
1131 raise AssertionError()
1132
1133 def _TypeExpr(self, pnode):
1134 # type: (PNode) -> TypeExpr
1135 """
1136 type_expr: Expr_Name [ '[' type_expr (',' type_expr)* ']' ]
1137 """
1138 assert pnode.typ == grammar_nt.type_expr, pnode.typ
1139
1140 ty = TypeExpr.CreateNull() # don't allocate children
1141
1142 ty.tok = pnode.GetChild(0).tok
1143 ty.name = lexer.TokenVal(ty.tok)
1144
1145 n = pnode.NumChildren()
1146 if n == 1:
1147 return ty
1148
1149 ty.params = []
1150 i = 2
1151 while i < n:
1152 p = self._TypeExpr(pnode.GetChild(i))
1153 ty.params.append(p)
1154 i += 2 # skip comma
1155
1156 return ty
1157
1158 def _Param(self, pnode):
1159 # type: (PNode) -> Param
1160 """
1161 param: Expr_Name [type_expr] ['=' expr]
1162 """
1163 assert pnode.typ == grammar_nt.param
1164
1165 name_tok = pnode.GetChild(0).tok
1166 n = pnode.NumChildren()
1167
1168 assert name_tok.id == Id.Expr_Name, name_tok
1169
1170 default_val = None # type: expr_t
1171 type_ = None # type: TypeExpr
1172
1173 if n == 1:
1174 # proc p(a)
1175 pass
1176
1177 elif n == 2:
1178 # proc p(a Int)
1179 type_ = self._TypeExpr(pnode.GetChild(1))
1180
1181 elif n == 3:
1182 # proc p(a = 3)
1183 default_val = self.Expr(pnode.GetChild(2))
1184
1185 elif n == 4:
1186 # proc p(a Int = 3)
1187 type_ = self._TypeExpr(pnode.GetChild(1))
1188 default_val = self.Expr(pnode.GetChild(3))
1189
1190 return Param(name_tok, lexer.TokenVal(name_tok), type_, default_val)
1191
1192 def _ParamGroup(self, p_node):
1193 # type: (PNode) -> ParamGroup
1194 """
1195 param_group:
1196 (param ',')*
1197 [ (param | '...' Expr_Name) [,] ]
1198 """
1199 assert p_node.typ == grammar_nt.param_group, p_node
1200
1201 params = [] # type: List[Param]
1202 rest_of = None # type: Optional[RestParam]
1203
1204 n = p_node.NumChildren()
1205 i = 0
1206 while i < n:
1207 child = p_node.GetChild(i)
1208 if child.typ == grammar_nt.param:
1209 params.append(self._Param(child))
1210
1211 elif child.typ == Id.Expr_Ellipsis:
1212 tok = p_node.GetChild(i + 1).tok
1213 rest_of = RestParam(tok, lexer.TokenVal(tok))
1214
1215 i += 2
1216
1217 return ParamGroup(params, rest_of)
1218
1219 def Proc(self, p_node):
1220 # type: (PNode) -> proc_sig_t
1221 """
1222 ysh_proc: (
1223 [ '('
1224 [ param_group ] # word params, with defaults
1225 [ ';' [ param_group ] ] # positional typed params, with defaults
1226 [ ';' [ param_group ] ] # named params, with defaults
1227 [ ';' Expr_Name ] # optional block param, with no type or default
1228 ')'
1229 ]
1230 '{' # opening { for pgen2
1231 )
1232 """
1233 assert p_node.typ == grammar_nt.ysh_proc
1234
1235 n = p_node.NumChildren()
1236 if n == 1: # proc f {
1237 return proc_sig.Open
1238
1239 if n == 3: # proc f () {
1240 sig = proc_sig.Closed.CreateNull(alloc_lists=True) # no params
1241
1242 # proc f( three param groups, and block group )
1243 sig = proc_sig.Closed.CreateNull(alloc_lists=True) # no params
1244
1245 # Word args
1246 i = 1
1247 child = p_node.GetChild(i)
1248 if child.typ == grammar_nt.param_group:
1249 sig.word = self._ParamGroup(p_node.GetChild(i))
1250
1251 # Validate word args
1252 for word in sig.word.params:
1253 if word.type:
1254 if word.type.name not in ('Str', 'Ref'):
1255 p_die('Word params may only have type Str or Ref',
1256 word.type.tok)
1257 if word.type.params is not None:
1258 p_die('Unexpected type parameters', word.type.tok)
1259
1260 i += 2
1261 else:
1262 i += 1
1263
1264 #log('i %d n %d', i, n)
1265 if i >= n:
1266 return sig
1267
1268 # Positional args
1269 child = p_node.GetChild(i)
1270 if child.typ == grammar_nt.param_group:
1271 sig.positional = self._ParamGroup(p_node.GetChild(i))
1272 i += 2
1273 else:
1274 i += 1
1275
1276 #log('i %d n %d', i, n)
1277 if i >= n:
1278 return sig
1279
1280 # Keyword args
1281 child = p_node.GetChild(i)
1282 if child.typ == grammar_nt.param_group:
1283 sig.named = self._ParamGroup(p_node.GetChild(i))
1284 i += 2
1285 else:
1286 i += 1
1287
1288 #log('i %d n %d', i, n)
1289 if i >= n:
1290 return sig
1291
1292 child = p_node.GetChild(i)
1293 if child.typ == grammar_nt.param_group:
1294 group = self._ParamGroup(p_node.GetChild(i))
1295 params = group.params
1296 if len(params) > 1:
1297 p_die('Only 1 block param is allowed', params[1].blame_tok)
1298 if group.rest_of:
1299 p_die("Rest param isn't allowed for blocks",
1300 group.rest_of.blame_tok)
1301
1302 if len(params) == 1:
1303 if params[0].type:
1304 if params[0].type.name != 'Command':
1305 p_die('Block param must have type Command',
1306 params[0].type.tok)
1307 if params[0].type.params is not None:
1308 p_die('Unexpected type parameters', params[0].type.tok)
1309
1310 sig.block_param = params[0]
1311
1312 return sig
1313
1314 def YshFunc(self, p_node, out):
1315 # type: (PNode, Func) -> None
1316 """
1317 ysh_func: Expr_Name '(' [param_group] [';' param_group] ')'
1318 """
1319 assert p_node.typ == grammar_nt.ysh_func
1320
1321 #self.p_printer.Print(p_node)
1322
1323 out.name = p_node.GetChild(0).tok
1324
1325 n = p_node.NumChildren()
1326 i = 2 # after (
1327
1328 child = p_node.GetChild(i)
1329 if child.typ == grammar_nt.param_group:
1330 out.positional = self._ParamGroup(child)
1331 i += 2 # skip past ;
1332 else:
1333 i += 1
1334
1335 if i >= n:
1336 return
1337
1338 child = p_node.GetChild(i)
1339 if child.typ == grammar_nt.param_group:
1340 out.named = self._ParamGroup(child)
1341
1342 #
1343 # Eggex Language
1344 #
1345
1346 def _RangeCharSingleQuoted(self, p_node):
1347 # type: (PNode) -> Optional[CharCode]
1348
1349 assert p_node.typ == grammar_nt.range_char, p_node
1350
1351 # 'a' in 'a'-'b'
1352
1353 child0 = p_node.GetChild(0)
1354 if child0.typ == grammar_nt.sq_string:
1355 sq_part = cast(SingleQuoted, child0.GetChild(1).tok)
1356 n = len(sq_part.sval)
1357 if n == 0:
1358 p_die("Quoted range char can't be empty",
1359 loc.WordPart(sq_part))
1360 elif n == 1:
1361 return CharCode(sq_part.left, ord(sq_part.sval[0]), False)
1362 else:
1363 p_die(RANGE_POINT_TOO_LONG, loc.WordPart(sq_part))
1364 return None
1365
1366 def _OtherRangeToken(self, p_node):
1367 # type: (PNode) -> Token
1368 """An endpoint of a range (single char)
1369
1370 range_char: Expr_Name | Expr_DecInt | sq_string | char_literal
1371 a-z 0-9 'a'-'z' \x00-\xff
1372 """
1373 assert p_node.typ == grammar_nt.range_char, p_node
1374
1375 child0 = p_node.GetChild(0)
1376 if child0.typ == grammar_nt.char_literal:
1377 # \x00 in /[\x00 - \x20]/
1378 tok = child0.GetChild(0).tok
1379 return tok
1380
1381 tok = p_node.tok
1382 # a in a-z is Expr_Name
1383 # 0 in 0-9 is Expr_DecInt
1384 assert tok.id in (Id.Expr_Name, Id.Expr_DecInt), tok
1385
1386 if tok.length != 1:
1387 p_die(RANGE_POINT_TOO_LONG, tok)
1388 return tok
1389
1390 def _NonRangeChars(self, p_node):
1391 # type: (PNode) -> class_literal_term_t
1392 """
1393 \" \u1234 '#'
1394 """
1395 assert p_node.typ == grammar_nt.range_char, p_node
1396
1397 child0 = p_node.GetChild(0)
1398 typ0 = p_node.GetChild(0).typ
1399
1400 if typ0 == grammar_nt.sq_string:
1401 return cast(SingleQuoted, child0.GetChild(1).tok)
1402
1403 if typ0 == grammar_nt.char_literal:
1404 return word_compile.EvalCharLiteralForRegex(child0.tok)
1405
1406 if typ0 == Id.Expr_Name:
1407 # Look up PerlClass and PosixClass
1408 return self._NameInClass(None, child0.tok)
1409
1410 raise AssertionError()
1411
1412 def _ClassLiteralTerm(self, p_node):
1413 # type: (PNode) -> class_literal_term_t
1414 """
1415 class_literal_term:
1416 range_char ['-' range_char ]
1417 | '@' Expr_Name # splice
1418 | '!' Expr_Name # negate char class
1419 ...
1420 """
1421 assert p_node.typ == grammar_nt.class_literal_term, p_node
1422
1423 typ0 = p_node.GetChild(0).typ
1424
1425 if typ0 == grammar_nt.range_char:
1426 n = p_node.NumChildren()
1427
1428 if n == 1:
1429 return self._NonRangeChars(p_node.GetChild(0))
1430
1431 # 'a'-'z' etc.
1432 if n == 3:
1433 assert p_node.GetChild(1).typ == Id.Arith_Minus, p_node
1434
1435 left = p_node.GetChild(0)
1436 right = p_node.GetChild(2)
1437
1438 code1 = self._RangeCharSingleQuoted(left)
1439 if code1 is None:
1440 tok1 = self._OtherRangeToken(left)
1441 code1 = word_compile.EvalCharLiteralForRegex(tok1)
1442
1443 code2 = self._RangeCharSingleQuoted(right)
1444 if code2 is None:
1445 tok2 = self._OtherRangeToken(right)
1446 code2 = word_compile.EvalCharLiteralForRegex(tok2)
1447 return CharRange(code1, code2)
1448
1449 raise AssertionError()
1450
1451 if typ0 == Id.Expr_At:
1452 tok1 = p_node.GetChild(1).tok
1453 return class_literal_term.Splice(tok1, lexer.TokenVal(tok1))
1454
1455 if typ0 == Id.Expr_Bang:
1456 return self._NameInClass(
1457 p_node.GetChild(0).tok,
1458 p_node.GetChild(1).tok)
1459
1460 p_die("This kind of class literal term isn't implemented",
1461 p_node.GetChild(0).tok)
1462
1463 def _ClassLiteral(self, p_node):
1464 # type: (PNode) -> List[class_literal_term_t]
1465 """class_literal: '[' class_literal_term+ ']'."""
1466 assert p_node.typ == grammar_nt.class_literal
1467 # skip [ and ]
1468 terms = [] # type: List[class_literal_term_t]
1469 for i in xrange(1, p_node.NumChildren() - 1):
1470 terms.append(self._ClassLiteralTerm(p_node.GetChild(i)))
1471
1472 return terms
1473
1474 def _NameInRegex(self, negated_tok, tok):
1475 # type: (Token, Token) -> re_t
1476 tok_str = lexer.TokenVal(tok)
1477 if tok_str == 'dot':
1478 if negated_tok:
1479 p_die("Can't negate this symbol", tok)
1480 return re.Primitive(tok, Id.Eggex_Dot)
1481
1482 if tok_str in POSIX_CLASSES:
1483 return PosixClass(negated_tok, tok_str)
1484
1485 perl = PERL_CLASSES.get(tok_str)
1486 if perl is not None:
1487 return PerlClass(negated_tok, perl)
1488
1489 if tok_str[0].isupper(): # e.g. HexDigit
1490 return re.Splice(tok, lexer.TokenVal(tok))
1491
1492 p_die("%r isn't a character class" % tok_str, tok)
1493
1494 def _NameInClass(self, negated_tok, tok):
1495 # type: (Token, Token) -> class_literal_term_t
1496 """Like the above, but 'dot' and 'd' don't mean anything within []"""
1497 tok_str = lexer.TokenVal(tok)
1498
1499 # A bare, unquoted character literal. In the grammar, this is expressed as
1500 # range_char without an ending.
1501
1502 # d is NOT 'digit', it's a literal 'd'!
1503 if len(tok_str) == 1:
1504 # Expr_Name matches VAR_NAME_RE, which starts with [a-zA-Z_]
1505 assert tok.id in (Id.Expr_Name, Id.Expr_DecInt)
1506
1507 if negated_tok: # [~d] is not allowed, only [~digit]
1508 p_die("Can't negate this symbol", tok)
1509 return word_compile.EvalCharLiteralForRegex(tok)
1510
1511 # digit, word, but not d, w, etc.
1512 if tok_str in POSIX_CLASSES:
1513 return PosixClass(negated_tok, tok_str)
1514
1515 perl = PERL_CLASSES.get(tok_str)
1516 if perl is not None:
1517 return PerlClass(negated_tok, perl)
1518 p_die("%r isn't a character class" % tok_str, tok)
1519
1520 def _ReAtom(self, p_atom):
1521 # type: (PNode) -> re_t
1522 """
1523 re_atom: ( char_literal | ...
1524 """
1525 assert p_atom.typ == grammar_nt.re_atom, p_atom.typ
1526
1527 child0 = p_atom.GetChild(0)
1528
1529 typ0 = p_atom.GetChild(0).typ
1530 tok0 = p_atom.GetChild(0).tok
1531
1532 # Non-terminals
1533
1534 if typ0 == grammar_nt.class_literal:
1535 return re.CharClassLiteral(False, self._ClassLiteral(child0))
1536
1537 if typ0 == grammar_nt.sq_string:
1538 return cast(SingleQuoted, child0.GetChild(1).tok)
1539
1540 if typ0 == grammar_nt.char_literal:
1541 # Note: ERE doesn't seem to support escapes like Python
1542 # https://docs.python.org/3/library/re.html
1543 # We might want to do a translation like this;
1544 #
1545 # \u{03bc} -> \u03bc
1546 # \x00 -> \x00
1547 # \n -> \n
1548
1549 # Must be Id.Char_{OneChar,Hex,UBraced}
1550 assert consts.GetKind(tok0.id) == Kind.Char
1551 s = word_compile.EvalCStringToken(tok0.id, lexer.TokenVal(tok0))
1552 return re.LiteralChars(tok0, s)
1553
1554 # Special punctuation
1555 if typ0 == Id.Expr_Dot: # .
1556 return re.Primitive(tok0, Id.Eggex_Dot)
1557
1558 if typ0 == Id.Arith_Caret: # ^
1559 return re.Primitive(tok0, Id.Eggex_Start)
1560
1561 if typ0 == Id.Expr_Dollar: # $
1562 return re.Primitive(tok0, Id.Eggex_End)
1563
1564 if typ0 == Id.Expr_Name:
1565 # d digit -> PosixClass PerlClass etc.
1566 return self._NameInRegex(None, tok0)
1567
1568 if typ0 == Id.Expr_Symbol:
1569 # Validate symbols here, like we validate PerlClass, etc.
1570 tok_str = lexer.TokenVal(tok0)
1571 if tok_str == '%start':
1572 return re.Primitive(tok0, Id.Eggex_Start)
1573 if tok_str == '%end':
1574 return re.Primitive(tok0, Id.Eggex_End)
1575 p_die("Unexpected token %r in regex" % tok_str, tok0)
1576
1577 if typ0 == Id.Expr_At:
1578 # | '@' Expr_Name
1579 tok1 = p_atom.GetChild(1).tok
1580 return re.Splice(tok0, lexer.TokenVal(tok1))
1581
1582 if typ0 == Id.Expr_Bang:
1583 # | '!' (Expr_Name | class_literal)
1584 # | '!' '!' Expr_Name (Expr_Name | Expr_DecInt | '(' regex ')')
1585 n = p_atom.NumChildren()
1586 if n == 2:
1587 child1 = p_atom.GetChild(1)
1588 if child1.typ == grammar_nt.class_literal:
1589 return re.CharClassLiteral(True,
1590 self._ClassLiteral(child1))
1591 else:
1592 return self._NameInRegex(tok0, p_atom.GetChild(1).tok)
1593 else:
1594 # Note: !! conflicts with shell history
1595 p_die(
1596 "Backtracking with !! isn't implemented (requires Python/PCRE)",
1597 p_atom.GetChild(1).tok)
1598
1599 if typ0 == Id.Op_LParen:
1600 # | '(' regex ')'
1601
1602 # Note: in ERE (d+) is the same as <d+>. That is, Group becomes
1603 # Capture.
1604 return re.Group(self._Regex(p_atom.GetChild(1)))
1605
1606 if typ0 == Id.Arith_Less:
1607 # | '<' 'capture' regex ['as' Expr_Name] [':' Expr_Name] '>'
1608
1609 n = p_atom.NumChildren()
1610 assert n == 4 or n == 6 or n == 8, n
1611
1612 # < capture d+ >
1613 regex = self._Regex(p_atom.GetChild(2))
1614
1615 as_name = None # type: Optional[Token]
1616 func_name = None # type: Optional[Token]
1617
1618 i = 3 # points at any of > as :
1619
1620 typ = p_atom.GetChild(i).typ
1621 if typ == Id.Expr_As:
1622 as_name = p_atom.GetChild(i + 1).tok
1623 i += 2
1624
1625 typ = p_atom.GetChild(i).typ
1626 if typ == Id.Arith_Colon:
1627 func_name = p_atom.GetChild(i + 1).tok
1628
1629 return re.Capture(regex, as_name, func_name)
1630
1631 raise AssertionError(typ0)
1632
1633 def _RepeatOp(self, p_repeat):
1634 # type: (PNode) -> re_repeat_t
1635 """
1636 repeat_op: '+' | '*' | '?'
1637 | '{' [Expr_Name] ('+' | '*' | '?' | repeat_range) '}'
1638 """
1639 assert p_repeat.typ == grammar_nt.repeat_op, p_repeat
1640
1641 tok = p_repeat.GetChild(0).tok
1642 id_ = tok.id
1643
1644 if id_ in (Id.Arith_Plus, Id.Arith_Star, Id.Arith_QMark):
1645 return tok # a+ a* a?
1646
1647 if id_ == Id.Op_LBrace:
1648 child1 = p_repeat.GetChild(1)
1649 if child1.typ != grammar_nt.repeat_range:
1650 # e.g. dot{N *} is .*?
1651 p_die("Perl-style repetition isn't implemented with libc",
1652 child1.tok)
1653
1654 # repeat_range: (
1655 # Expr_DecInt [',']
1656 # | ',' Expr_DecInt
1657 # | Expr_DecInt ',' Expr_DecInt
1658 # )
1659
1660 n = child1.NumChildren()
1661 if n == 1: # {3}
1662 tok = child1.GetChild(0).tok
1663 return tok # different operator than + * ?
1664
1665 if n == 2:
1666 if child1.GetChild(0).typ == Id.Expr_DecInt: # {,3}
1667 left = child1.GetChild(0).tok
1668 return re_repeat.Range(left, lexer.TokenVal(left), '',
1669 None)
1670 else: # {1,}
1671 right = child1.GetChild(1).tok
1672 return re_repeat.Range(None, '', lexer.TokenVal(right),
1673 right)
1674
1675 if n == 3: # {1,3}
1676 left = child1.GetChild(0).tok
1677 right = child1.GetChild(2).tok
1678 return re_repeat.Range(left, lexer.TokenVal(left),
1679 lexer.TokenVal(right), right)
1680
1681 raise AssertionError(n)
1682
1683 raise AssertionError(id_)
1684
1685 def _ReAlt(self, p_node):
1686 # type: (PNode) -> re_t
1687 """
1688 re_alt: (re_atom [repeat_op])+
1689 """
1690 assert p_node.typ == grammar_nt.re_alt
1691
1692 i = 0
1693 n = p_node.NumChildren()
1694 seq = [] # type: List[re_t]
1695 while i < n:
1696 r = self._ReAtom(p_node.GetChild(i))
1697 i += 1
1698 if i < n and p_node.GetChild(i).typ == grammar_nt.repeat_op:
1699 repeat_op = self._RepeatOp(p_node.GetChild(i))
1700 r = re.Repeat(r, repeat_op)
1701 i += 1
1702 seq.append(r)
1703
1704 if len(seq) == 1:
1705 return seq[0]
1706 else:
1707 return re.Seq(seq)
1708
1709 def _Regex(self, p_node):
1710 # type: (PNode) -> re_t
1711 """
1712 regex: [re_alt] (('|'|'or') re_alt)*
1713 """
1714 assert p_node.typ == grammar_nt.regex
1715
1716 n = p_node.NumChildren()
1717 alts = [] # type: List[re_t]
1718 for i in xrange(0, n, 2): # was children[::2]
1719 c = p_node.GetChild(i)
1720 alts.append(self._ReAlt(c))
1721
1722 if len(alts) == 1:
1723 return alts[0]
1724 else:
1725 return re.Alt(alts)
1726
1727
1728# vim: sw=4