OILS / ysh / expr_to_ast.py View on Github | oils.pub

1740 lines, 1053 significant
1"""expr_to_ast.py."""
2from __future__ import print_function
3
4from _devbuild.gen.id_kind_asdl import Id, Id_t, Id_str, Kind
5from _devbuild.gen.syntax_asdl import (
6 ExprSub,
7 Token,
8 SimpleVarSub,
9 loc,
10 loc_t,
11 DoubleQuoted,
12 SingleQuoted,
13 BracedVarSub,
14 CommandSub,
15 YshArrayLiteral,
16 expr,
17 expr_e,
18 expr_t,
19 expr_context_e,
20 re,
21 re_t,
22 re_repeat,
23 re_repeat_t,
24 class_literal_term,
25 class_literal_term_t,
26 PosixClass,
27 PerlClass,
28 NameType,
29 y_lhs_t,
30 Comprehension,
31 Subscript,
32 Attribute,
33 proc_sig,
34 proc_sig_t,
35 Param,
36 RestParam,
37 ParamGroup,
38 NamedArg,
39 ArgList,
40 pat,
41 pat_t,
42 TypeExpr,
43 Func,
44 Eggex,
45 EggexFlag,
46 CharCode,
47 CharRange,
48 VarDecl,
49 Mutation,
50)
51from _devbuild.gen.value_asdl import value, value_t
52from _devbuild.gen import grammar_nt
53from core.error import p_die
54from data_lang import j8
55from frontend import consts
56from frontend import lexer
57from frontend import location
58from mycpp import mops
59from mycpp import mylib
60from mycpp.mylib import log, tagswitch
61from osh import word_compile
62from ysh import expr_parse
63from ysh import regex_translate
64
65from typing import TYPE_CHECKING, Dict, List, Tuple, Optional, cast
66if TYPE_CHECKING:
67 from pgen2.grammar import Grammar
68 from pgen2.pnode import PNode
69
70_ = log
71
72PERL_CLASSES = {
73 'd': 'd',
74 'w': 'w',
75 'word': 'w',
76 's': 's',
77}
78# https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html
79POSIX_CLASSES = [
80 'alnum',
81 'cntrl',
82 'lower',
83 'space',
84 'alpha',
85 'digit',
86 'print',
87 'upper',
88 'blank',
89 'graph',
90 'punct',
91 'xdigit',
92]
93# NOTE: There are also things like \p{Greek} that we could put in the
94# "non-sigil" namespace.
95
96RANGE_POINT_TOO_LONG = "Range start/end shouldn't have more than one character"
97
98POS_ARG_MISPLACED = "Positional arg can't appear in group of named args"
99
100# Copied from pgen2/token.py to avoid dependency.
101NT_OFFSET = 256
102
103if mylib.PYTHON:
104
105 def MakeGrammarNames(ysh_grammar):
106 # type: (Grammar) -> Dict[int, str]
107
108 # TODO: Break this dependency
109 from frontend import lexer_def
110
111 names = {}
112
113 for id_name, k in lexer_def.ID_SPEC.id_str2int.items():
114 # Hm some are out of range
115 #assert k < 256, (k, id_name)
116
117 # TODO: Some tokens have values greater than NT_OFFSET
118 if k < NT_OFFSET:
119 names[k] = id_name
120
121 for k, v in ysh_grammar.number2symbol.items():
122 assert k >= NT_OFFSET, (k, v)
123 names[k] = v
124
125 return names
126
127
128class Transformer(object):
129 """Homogeneous parse tree -> heterogeneous AST ("lossless syntax tree")
130
131 pgen2 (Python's LL parser generator) doesn't have semantic actions like yacc,
132 so this "transformer" is the equivalent.
133
134 Files to refer to when modifying this function:
135
136 ysh/grammar.pgen2 (generates _devbuild/gen/grammar_nt.py)
137 frontend/syntax.asdl (generates _devbuild/gen/syntax_asdl.py)
138
139 Related examples:
140
141 opy/compiler2/transformer.py (Python's parse tree -> AST, ~1500 lines)
142 Python-2.7.13/Python/ast.c (the "real" CPython version, ~3600 lines)
143
144 Other:
145 frontend/parse_lib.py (turn on print_parse_tree)
146
147 Public methods:
148 Expr, VarDecl
149 atom, trailer, etc. are private, named after productions in grammar.pgen2.
150 """
151
152 def __init__(self, gr):
153 # type: (Grammar) -> None
154 self.number2symbol = gr.number2symbol
155 if mylib.PYTHON:
156 names = MakeGrammarNames(gr)
157 # print raw nodes
158 self.p_printer = expr_parse.ParseTreePrinter(names)
159
160 def _LeftAssoc(self, p_node):
161 # type: (PNode) -> expr_t
162 """For an associative binary operation.
163
164 Examples:
165 xor_expr: and_expr ('xor' and_expr)*
166 term: factor (('*'|'/'|'%'|'div') factor)*
167
168 3 - 1 - 2 must be grouped as ((3 - 1) - 2).
169 """
170 # Note: Compare the iteractive com_binary() method in
171 # opy/compiler2/transformer.py.
172
173 # Examples:
174 # - The PNode for '3 - 1' will have 3 children
175 # - The PNode for '3 - 1 - 2' will have 5 children
176
177 #self.p_printer.Print(p_node)
178
179 i = 1 # index of the operator
180 n = p_node.NumChildren()
181
182 left = self.Expr(p_node.GetChild(0))
183 while i < n:
184 op = p_node.GetChild(i)
185 right = self.Expr(p_node.GetChild(i + 1))
186
187 # create a new left node
188 left = expr.Binary(op.tok, left, right)
189 i += 2
190
191 return left
192
193 def _Trailer(self, base, p_trailer):
194 # type: (expr_t, PNode) -> expr_t
195 """
196 trailer: ( '(' [arglist] ')' | '[' subscriptlist ']'
197 | '.' NAME | '->' NAME | '::' NAME
198 )
199 """
200 tok0 = p_trailer.GetChild(0).tok
201 typ0 = p_trailer.GetChild(0).typ
202
203 if typ0 == Id.Op_LParen:
204 lparen = tok0
205 rparen = p_trailer.GetChild(-1).tok
206 arglist = ArgList(lparen, [], None, [], None, None, rparen)
207 if p_trailer.NumChildren() == 2: # ()
208 return expr.FuncCall(base, arglist)
209
210 p = p_trailer.GetChild(1) # the X in ( X )
211 assert p.typ == grammar_nt.arglist # f(x, y)
212 self._ArgList(p, arglist)
213 return expr.FuncCall(base, arglist)
214
215 if typ0 == Id.Op_LBracket:
216 p_args = p_trailer.GetChild(1)
217 assert p_args.typ == grammar_nt.subscriptlist
218
219 n = p_args.NumChildren()
220 if n == 1: # a[1] a[1:2] a[:] etc.
221 subscript = self._Subscript(p_args.GetChild(0))
222 else: # a[1, 2] a[1:2, :]
223 slices = [] # type: List[expr_t]
224 for i in xrange(0, n, 2):
225 slices.append(self._Subscript(p_args.GetChild(i)))
226 # expr.Tuple evaluates to List in YSH.
227 #
228 # Note that syntactically, a[1:2, 3:4] is the the only way to
229 # get a List[Slice]. [1:2, 3:4] by itself is not allowed.
230 comma_tok = p_args.GetChild(1).tok
231 subscript = expr.Tuple(comma_tok, slices, expr_context_e.Store)
232
233 return Subscript(tok0, base, subscript)
234
235 if typ0 in (Id.Expr_Dot, Id.Expr_RArrow, Id.Expr_RDArrow):
236 attr = p_trailer.GetChild(1).tok # will be Id.Expr_Name
237 return Attribute(base, tok0, attr, lexer.TokenVal(attr),
238 expr_context_e.Store)
239
240 raise AssertionError(typ0)
241
242 def _DictPair(self, p_node):
243 # type: (PNode) -> Tuple[expr_t, expr_t]
244 """
245 dict_pair: ( Expr_Name [':' test]
246 | '[' testlist ']' ':' test )
247 | sq_string ':' test
248 | dq_string ':' test )
249 """
250 assert p_node.typ == grammar_nt.dict_pair
251
252 typ = p_node.GetChild(0).typ
253
254 if typ in (grammar_nt.sq_string, grammar_nt.dq_string):
255 key = self.Expr(p_node.GetChild(0)) # type: expr_t
256 val = self.Expr(p_node.GetChild(2))
257 return key, val
258
259 tok0 = p_node.GetChild(0).tok
260 id_ = tok0.id
261
262 if id_ == Id.Expr_Name:
263 key_str = value.Str(lexer.TokenVal(tok0))
264 key = expr.Const(tok0, key_str)
265 if p_node.NumChildren() >= 3:
266 val = self.Expr(p_node.GetChild(2))
267 else:
268 val = expr.Implicit
269
270 if id_ == Id.Op_LBracket: # {[x+y]: 'val'}
271 key = self.Expr(p_node.GetChild(1))
272 val = self.Expr(p_node.GetChild(4))
273 return key, val
274
275 return key, val
276
277 def _Dict(self, parent, p_node):
278 # type: (PNode, PNode) -> expr.Dict
279 """
280 dict: dict_pair (comma_newline dict_pair)* [comma_newline]
281 """
282 if p_node.typ == Id.Op_RBrace: # {}
283 return expr.Dict(parent.tok, [], [])
284
285 assert p_node.typ == grammar_nt.dict
286
287 keys = [] # type: List[expr_t]
288 values = [] # type: List[expr_t]
289
290 n = p_node.NumChildren()
291 for i in xrange(0, n, 2):
292 key, val = self._DictPair(p_node.GetChild(i))
293 keys.append(key)
294 values.append(val)
295
296 return expr.Dict(parent.tok, keys, values)
297
298 def _Tuple(self, parent):
299 # type: (PNode) -> expr_t
300
301 n = parent.NumChildren()
302
303 # (x) -- not a tuple
304 if n == 1:
305 return self.Expr(parent.GetChild(0))
306
307 # x, and (x,) aren't allowed
308 if n == 2:
309 p_die('Invalid trailing comma', parent.GetChild(1).tok)
310
311 elts = [] # type: List[expr_t]
312 for i in xrange(0, n, 2): # skip commas
313 p_node = parent.GetChild(i)
314 elts.append(self.Expr(p_node))
315
316 return expr.Tuple(parent.tok, elts,
317 expr_context_e.Store) # unused expr_context_e
318
319 def _TestlistComp(self, parent, p_node, id0):
320 # type: (PNode, PNode, Id_t) -> expr_t
321 """
322 testlist_comp:
323 (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
324 """
325 assert p_node.typ == grammar_nt.testlist_comp
326
327 n = p_node.NumChildren()
328 if n > 1 and p_node.GetChild(1).typ == grammar_nt.comp_for:
329 child0 = p_node.GetChild(0)
330 if child0.typ == grammar_nt.splat_expr:
331 p_die('Splat not currently supported', child0.tok)
332 elt = self.Expr(child0)
333
334 comp = self._CompFor(p_node.GetChild(1))
335 if id0 == Id.Op_LParen: # (x+1 for x in y)
336 return expr.GeneratorExp(elt, [comp])
337 if id0 == Id.Op_LBracket: # [x+1 for x in y]
338 return expr.ListComp(parent.tok, elt, [comp])
339 raise AssertionError()
340
341 if id0 == Id.Op_LParen:
342 # Parenthesized expression like (x+1) or (x)
343 if n == 1:
344 return self.Expr(p_node.GetChild(0))
345
346 # Tuples (1,) (1, 2) etc. - TODO: should be a list literal?
347 if p_node.GetChild(1).typ == Id.Arith_Comma:
348 return self._Tuple(p_node)
349
350 raise AssertionError()
351
352 if id0 == Id.Op_LBracket: # List [1,2,3]
353 elts = [] # type: List[expr_t]
354 for i in xrange(0, n, 2): # skip commas
355 child = p_node.GetChild(i)
356 if child.typ == grammar_nt.splat_expr:
357 p_die('Splat not currently supported', child.tok)
358 elts.append(self.Expr(child))
359
360 return expr.List(parent.tok, elts,
361 expr_context_e.Store) # unused expr_context_e
362
363 raise AssertionError(Id_str(id0))
364
365 def _Atom(self, parent):
366 # type: (PNode) -> expr_t
367 """Handle alternatives of 'atom' where there's more than one child."""
368
369 tok = parent.GetChild(0).tok
370 id_ = tok.id
371 n = parent.NumChildren()
372
373 if id_ == Id.Op_LParen:
374 # atom: '(' [yield_expr|testlist_comp] ')' | ...
375 if n == 2: # () is a tuple
376 assert (
377 parent.GetChild(1).typ == Id.Op_RParen), parent.GetChild(1)
378 return expr.Tuple(tok, [], expr_context_e.Store)
379
380 return self._TestlistComp(parent, parent.GetChild(1), id_)
381
382 if id_ == Id.Op_LBracket:
383 # atom: ... | '[' [testlist_comp] ']' | ...
384
385 if n == 2: # []
386 assert (parent.GetChild(1).typ == Id.Op_RBracket
387 ), parent.GetChild(1)
388 return expr.List(tok, [],
389 expr_context_e.Store) # unused expr_context_e
390
391 return self._TestlistComp(parent, parent.GetChild(1), id_)
392
393 if id_ == Id.Left_CaretBracket: # ^[42 + x]
394 child = self.Expr(parent.GetChild(1))
395 return expr.Literal(child)
396
397 if id_ == Id.Op_LBrace:
398 # atom: ... | '{' [Op_Newline] [dict] '}'
399 i = 1
400 if parent.GetChild(i).typ == Id.Op_Newline:
401 i += 1
402 return self._Dict(parent, parent.GetChild(i))
403
404 if id_ == Id.Arith_Amp:
405 n = parent.NumChildren()
406 if n >= 3:
407 p_die("Places in containers not implemented yet",
408 parent.GetChild(2).tok)
409
410 name_tok = parent.GetChild(1).tok
411 return expr.Place(name_tok, lexer.TokenVal(name_tok), [])
412
413 if id_ == Id.Expr_Func:
414 # STUB. This should really be a Func, not Lambda.
415 return expr.Lambda([], expr.Implicit)
416
417 # 100 M
418 # Ignoring the suffix for now
419 if id_ == Id.Expr_DecInt:
420 assert n > 1
421 p_die("Units suffix not implemented", parent.GetChild(1).tok)
422 #return self.Expr(parent.GetChild(0))
423
424 # 100.5 M
425 # Ignoring the suffix for now
426 if id_ == Id.Expr_Float:
427 assert n > 1
428 p_die("unix suffix implemented", parent.GetChild(1).tok)
429 #return self.Expr(parent.GetChild(0))
430
431 raise AssertionError(Id_str(id_))
432
433 def _NameType(self, p_node):
434 # type: (PNode) -> NameType
435 """ name_type: Expr_Name [':'] [type_expr] """
436 name_tok = p_node.GetChild(0).tok
437 typ = None # type: Optional[TypeExpr]
438
439 n = p_node.NumChildren()
440 if n == 2:
441 typ = self._TypeExpr(p_node.GetChild(1))
442 if n == 3:
443 typ = self._TypeExpr(p_node.GetChild(2))
444
445 return NameType(name_tok, lexer.TokenVal(name_tok), typ)
446
447 def _NameTypeList(self, p_node):
448 # type: (PNode) -> List[NameType]
449 """ name_type_list: name_type (',' name_type)* """
450 assert p_node.typ == grammar_nt.name_type_list
451 results = [] # type: List[NameType]
452
453 n = p_node.NumChildren()
454 for i in xrange(0, n, 2): # was children[::2]
455 results.append(self._NameType(p_node.GetChild(i)))
456 return results
457
458 def _CompFor(self, p_node):
459 # type: (PNode) -> Comprehension
460 """comp_for: 'for' exprlist 'in' or_test ['if' or_test]"""
461 lhs = self._NameTypeList(p_node.GetChild(1))
462 iterable = self.Expr(p_node.GetChild(3))
463
464 if p_node.NumChildren() >= 6:
465 cond = self.Expr(p_node.GetChild(5))
466 else:
467 cond = None
468
469 return Comprehension(lhs, iterable, cond)
470
471 def _CompareChain(self, parent):
472 # type: (PNode) -> expr_t
473 """comparison: expr (comp_op expr)*"""
474 cmp_ops = [] # type: List[Token]
475 comparators = [] # type: List[expr_t]
476 left = self.Expr(parent.GetChild(0))
477
478 i = 1
479 n = parent.NumChildren()
480 while i < n:
481 p = parent.GetChild(i)
482 op = p.GetChild(0).tok
483 if p.NumChildren() == 2:
484 # Blame the first token, and change its type
485 if op.id == Id.Expr_Not: # not in
486 op.id = Id.Node_NotIn
487 elif op.id == Id.Expr_Is: # is not
488 op.id = Id.Node_IsNot
489 else:
490 raise AssertionError()
491 else:
492 # is, <, ==, etc.
493 pass
494
495 cmp_ops.append(op)
496 i += 1
497 comparators.append(self.Expr(parent.GetChild(i)))
498 i += 1
499 return expr.Compare(left, cmp_ops, comparators)
500
501 def _Subscript(self, parent):
502 # type: (PNode) -> expr_t
503 """subscript: expr | [expr] ':' [expr]"""
504 typ0 = parent.GetChild(0).typ
505
506 n = parent.NumChildren()
507
508 if typ0 == grammar_nt.expr:
509 if n == 3: # a[1:2]
510 lower = self.Expr(parent.GetChild(0))
511 op_tok = parent.GetChild(1).tok
512 upper = self.Expr(parent.GetChild(2))
513
514 elif n == 2: # a[1:]
515 lower = self.Expr(parent.GetChild(0))
516 op_tok = parent.GetChild(1).tok
517 upper = None
518 else: # a[1]
519 return self.Expr(parent.GetChild(0))
520 else:
521 assert typ0 == Id.Arith_Colon
522 lower = None
523 if n == 1: # a[:]
524 op_tok = parent.GetChild(0).tok
525 upper = None
526 else: # a[:3]
527 op_tok = parent.GetChild(0).tok
528 upper = self.Expr(parent.GetChild(1))
529
530 return expr.Slice(lower, op_tok, upper)
531
532 def Expr(self, pnode):
533 # type: (PNode) -> expr_t
534 """Transform expressions (as opposed to statements)"""
535 typ = pnode.typ
536
537 #
538 # YSH Entry Points / Additions
539 #
540
541 if typ == grammar_nt.ysh_expr: # for if/while
542 # ysh_expr: '(' testlist ')'
543 return self.Expr(pnode.GetChild(1))
544
545 if typ == grammar_nt.command_expr:
546 # return_expr: testlist end_stmt
547 return self.Expr(pnode.GetChild(0))
548
549 #
550 # Python-like Expressions / Operators
551 #
552
553 if typ == grammar_nt.atom:
554 if pnode.NumChildren() == 1:
555 return self.Expr(pnode.GetChild(0))
556 return self._Atom(pnode)
557
558 if typ == grammar_nt.testlist:
559 # testlist: test (',' test)* [',']
560 return self._Tuple(pnode)
561
562 if typ == grammar_nt.test:
563 # test: or_test ['if' or_test 'else' test] | lambdef
564 if pnode.NumChildren() == 1:
565 return self.Expr(pnode.GetChild(0))
566
567 # TODO: Handle lambdef
568
569 test = self.Expr(pnode.GetChild(2))
570 body = self.Expr(pnode.GetChild(0))
571 orelse = self.Expr(pnode.GetChild(4))
572 return expr.IfExp(test, body, orelse)
573
574 if typ == grammar_nt.lambdef:
575 # lambdef: '|' [name_type_list] '|' test
576
577 n = pnode.NumChildren()
578 if n == 4:
579 params = self._NameTypeList(pnode.GetChild(1))
580 else:
581 params = []
582
583 body = self.Expr(pnode.GetChild(n - 1))
584 return expr.Lambda(params, body)
585
586 #
587 # Operators with Precedence
588 #
589
590 if typ == grammar_nt.or_test:
591 # or_test: and_test ('or' and_test)*
592 return self._LeftAssoc(pnode)
593
594 if typ == grammar_nt.and_test:
595 # and_test: not_test ('and' not_test)*
596 return self._LeftAssoc(pnode)
597
598 if typ == grammar_nt.not_test:
599 # not_test: 'not' not_test | comparison
600 if pnode.NumChildren() == 1:
601 return self.Expr(pnode.GetChild(0))
602
603 op_tok = pnode.GetChild(0).tok # not
604 return expr.Unary(op_tok, self.Expr(pnode.GetChild(1)))
605
606 elif typ == grammar_nt.comparison:
607 if pnode.NumChildren() == 1:
608 return self.Expr(pnode.GetChild(0))
609
610 return self._CompareChain(pnode)
611
612 elif typ == grammar_nt.range_expr:
613 n = pnode.NumChildren()
614 if n == 1:
615 return self.Expr(pnode.GetChild(0))
616
617 if n == 3:
618 return expr.Range(self.Expr(pnode.GetChild(0)),
619 pnode.GetChild(1).tok,
620 self.Expr(pnode.GetChild(2)))
621
622 raise AssertionError(n)
623
624 elif typ == grammar_nt.expr:
625 # expr: xor_expr ('|' xor_expr)*
626 return self._LeftAssoc(pnode)
627
628 if typ == grammar_nt.xor_expr:
629 # xor_expr: and_expr ('xor' and_expr)*
630 return self._LeftAssoc(pnode)
631
632 if typ == grammar_nt.and_expr: # a & b
633 # and_expr: shift_expr ('&' shift_expr)*
634 return self._LeftAssoc(pnode)
635
636 elif typ == grammar_nt.shift_expr:
637 # shift_expr: arith_expr (('<<'|'>>') arith_expr)*
638 return self._LeftAssoc(pnode)
639
640 elif typ == grammar_nt.arith_expr:
641 # arith_expr: term (('+'|'-') term)*
642 return self._LeftAssoc(pnode)
643
644 elif typ == grammar_nt.term:
645 # term: factor (('*'|'/'|'div'|'mod') factor)*
646 return self._LeftAssoc(pnode)
647
648 elif typ == grammar_nt.factor:
649 # factor: ('+'|'-'|'~') factor | power
650 # the power would have already been reduced
651 if pnode.NumChildren() == 1:
652 return self.Expr(pnode.GetChild(0))
653
654 assert pnode.NumChildren() == 2
655 op = pnode.GetChild(0)
656 e = pnode.GetChild(1)
657
658 assert isinstance(op.tok, Token)
659 return expr.Unary(op.tok, self.Expr(e))
660
661 elif typ == grammar_nt.power:
662 # power: atom trailer* ['**' factor]
663
664 node = self.Expr(pnode.GetChild(0))
665 if pnode.NumChildren() == 1: # No trailers
666 return node
667
668 # Support a->startswith(b) and mydict.key
669 n = pnode.NumChildren()
670 i = 1
671 while i < n and pnode.GetChild(i).typ == grammar_nt.trailer:
672 node = self._Trailer(node, pnode.GetChild(i))
673 i += 1
674
675 if i != n: # ['**' factor]
676 op_tok = pnode.GetChild(i).tok
677 assert op_tok.id == Id.Arith_DStar, op_tok
678 factor = self.Expr(pnode.GetChild(i + 1))
679 node = expr.Binary(op_tok, node, factor)
680
681 return node
682
683 elif typ == grammar_nt.eggex:
684 return self._Eggex(pnode)
685
686 elif typ == grammar_nt.ysh_expr_sub: # $[] @[] command mode
687 return self.Expr(pnode.GetChild(0))
688
689 elif typ == grammar_nt.ysh_expr_sub_2: # $[] @[] expression mode
690 left = pnode.GetChild(0).tok
691 e2 = self.Expr(pnode.GetChild(1))
692 right = pnode.GetChild(2).tok
693 return ExprSub(left, e2, right)
694
695 #
696 # YSH Lexer Modes
697 #
698
699 elif typ == grammar_nt.sh_array_literal:
700 return cast(YshArrayLiteral, pnode.GetChild(1).tok)
701
702 elif typ == grammar_nt.old_sh_array_literal:
703 return cast(YshArrayLiteral, pnode.GetChild(1).tok)
704
705 elif typ == grammar_nt.sh_command_sub:
706 return cast(CommandSub, pnode.GetChild(1).tok)
707
708 elif typ == grammar_nt.braced_var_sub:
709 return cast(BracedVarSub, pnode.GetChild(1).tok)
710
711 elif typ == grammar_nt.dq_string:
712 dq = cast(DoubleQuoted, pnode.GetChild(1).tok)
713 # sugar: ^"..." is short for ^["..."]
714 if pnode.GetChild(0).typ == Id.Left_CaretDoubleQuote:
715 return expr.Literal(dq)
716 return dq
717
718 elif typ == grammar_nt.sq_string:
719 return cast(SingleQuoted, pnode.GetChild(1).tok)
720
721 elif typ == grammar_nt.simple_var_sub:
722 tok = pnode.GetChild(0).tok
723
724 if tok.id == Id.VSub_DollarName: # $foo is disallowed
725 bare = lexer.TokenSliceLeft(tok, 1)
726 p_die(
727 'In expressions, remove $ and use `%s`, or sometimes "$%s"'
728 % (bare, bare), tok)
729
730 # $? is allowed
731 return SimpleVarSub(tok)
732
733 #
734 # Terminals
735 #
736
737 tok = pnode.tok
738 if typ == Id.Expr_Name:
739 return expr.Var(tok, lexer.TokenVal(tok))
740
741 # Everything else is an expr.Const
742 tok_str = lexer.TokenVal(tok)
743 # Remove underscores from 1_000_000. The lexer is responsible for
744 # validation.
745 c_under = tok_str.replace('_', '')
746
747 if typ == Id.Expr_DecInt:
748 ok, big_int = mops.FromStr2(c_under)
749 if not ok:
750 p_die('Decimal int constant is too large', tok)
751 cval = value.Int(big_int) # type: value_t
752
753 elif typ == Id.Expr_BinInt:
754 assert c_under[:2] in ('0b', '0B'), c_under
755 ok, big_int = mops.FromStr2(c_under[2:], 2)
756 if not ok:
757 p_die('Binary int constant is too large', tok)
758 cval = value.Int(big_int)
759
760 elif typ == Id.Expr_OctInt:
761 assert c_under[:2] in ('0o', '0O'), c_under
762 ok, big_int = mops.FromStr2(c_under[2:], 8)
763 if not ok:
764 p_die('Octal int constant is too large', tok)
765 cval = value.Int(big_int)
766
767 elif typ == Id.Expr_HexInt:
768 assert c_under[:2] in ('0x', '0X'), c_under
769 ok, big_int = mops.FromStr2(c_under[2:], 16)
770 if not ok:
771 p_die('Hex int constant is too large', tok)
772 cval = value.Int(big_int)
773
774 elif typ == Id.Expr_Float:
775 # Note: float() in mycpp/gc_builtins.cc currently uses strtod
776 # I think this never raises ValueError, because the lexer
777 # should only accept strings that strtod() does?
778 cval = value.Float(float(c_under))
779
780 elif typ == Id.Expr_Null:
781 cval = value.Null
782
783 elif typ == Id.Expr_True:
784 cval = value.Bool(True)
785
786 elif typ == Id.Expr_False:
787 cval = value.Bool(False)
788
789 elif typ == Id.Char_OneChar: # \n
790 assert len(tok_str) == 2, tok_str
791 s = consts.LookupCharC(lexer.TokenSliceLeft(tok, 1))
792 cval = value.Str(s)
793
794 elif typ == Id.Char_YHex: # \yff
795 assert len(tok_str) == 4, tok_str
796 hex_str = lexer.TokenSliceLeft(tok, 2)
797 s = chr(int(hex_str, 16))
798 cval = value.Str(s)
799
800 elif typ == Id.Char_UBraced: # \u{123}
801 hex_str = lexer.TokenSlice(tok, 3, -1)
802 code_point = int(hex_str, 16)
803 s = j8.Utf8Encode(code_point)
804 cval = value.Str(s)
805
806 else:
807 raise AssertionError(typ)
808
809 return expr.Const(tok, cval)
810
811 def _CheckLhs(self, lhs):
812 # type: (expr_t) -> None
813
814 UP_lhs = lhs
815 with tagswitch(lhs) as case:
816 if case(expr_e.Var):
817 # OK - e.g. setvar a.b.c[i] = 42
818 pass
819
820 elif case(expr_e.Subscript):
821 lhs = cast(Subscript, UP_lhs)
822 self._CheckLhs(lhs.obj) # recurse on LHS
823
824 elif case(expr_e.Attribute):
825 lhs = cast(Attribute, UP_lhs)
826 self._CheckLhs(lhs.obj) # recurse on LHS
827
828 else:
829 # Illegal - e.g. setglobal {}["key"] = 42
830 p_die("Subscript/Attribute not allowed on this LHS expression",
831 location.TokenForExpr(lhs))
832
833 def _LhsExprList(self, p_node):
834 # type: (PNode) -> List[y_lhs_t]
835 """lhs_list: expr (',' expr)*"""
836 assert p_node.typ == grammar_nt.lhs_list
837
838 lhs_list = [] # type: List[y_lhs_t]
839 n = p_node.NumChildren()
840 for i in xrange(0, n, 2):
841 p = p_node.GetChild(i)
842 #self.p_printer.Print(p)
843
844 e = self.Expr(p)
845 UP_e = e
846 with tagswitch(e) as case:
847 if case(expr_e.Var):
848 e = cast(expr.Var, UP_e)
849 lhs_list.append(e.left)
850
851 elif case(expr_e.Subscript):
852 e = cast(Subscript, UP_e)
853 self._CheckLhs(e)
854 lhs_list.append(e)
855
856 elif case(expr_e.Attribute):
857 e = cast(Attribute, UP_e)
858 self._CheckLhs(e)
859 if e.op.id != Id.Expr_Dot:
860 # e.g. setvar obj->method is not valid
861 p_die("Can't assign to this attribute expr", e.op)
862 lhs_list.append(e)
863
864 else:
865 pass # work around mycpp bug
866
867 # TODO: could blame arbitary expr_t, bu this works most of
868 # the time
869 if p.tok:
870 blame = p.tok # type: loc_t
871 else:
872 blame = loc.Missing
873 p_die("Can't assign to this expression", blame)
874
875 return lhs_list
876
877 def MakeVarDecl(self, p_node):
878 # type: (PNode) -> VarDecl
879 """
880 ysh_var_decl: name_type_list ['=' testlist] end_stmt
881 """
882 assert p_node.typ == grammar_nt.ysh_var_decl
883
884 lhs = self._NameTypeList(p_node.GetChild(0)) # could be a tuple
885
886 # This syntax is confusing, and different than JavaScript
887 # var x, y = 1, 2
888 # But this is useful:
889 # var flag, i = parseArgs(spec, argv)
890
891 n = p_node.NumChildren()
892 if n >= 3:
893 rhs = self.Expr(p_node.GetChild(2))
894 else:
895 rhs = None
896
897 # The caller should fill in the keyword token.
898 return VarDecl(None, lhs, rhs)
899
900 def MakeMutation(self, p_node):
901 # type: (PNode) -> Mutation
902 """
903 ysh_mutation: lhs_list (augassign | '=') testlist end_stmt
904 """
905 assert p_node.typ == grammar_nt.ysh_mutation
906
907 lhs_list = self._LhsExprList(p_node.GetChild(0)) # could be a tuple
908 op_tok = p_node.GetChild(1).tok
909 if len(lhs_list) > 1 and op_tok.id != Id.Arith_Equal:
910 p_die('Multiple assignment must use =', op_tok)
911 rhs = self.Expr(p_node.GetChild(2))
912 return Mutation(None, lhs_list, op_tok, rhs)
913
914 def _EggexFlag(self, p_node):
915 # type: (PNode) -> EggexFlag
916 n = p_node.NumChildren()
917 if n == 1:
918 return EggexFlag(False, p_node.GetChild(0).tok)
919 elif n == 2:
920 return EggexFlag(True, p_node.GetChild(1).tok)
921 else:
922 raise AssertionError()
923
924 def _Eggex(self, p_node):
925 # type: (PNode) -> Eggex
926 """
927 eggex: '/' regex [';' re_flag* [';' Expr_Name] ] '/'
928 """
929 left = p_node.GetChild(0).tok
930 regex = self._Regex(p_node.GetChild(1))
931
932 flags = [] # type: List[EggexFlag]
933 trans_pref = None # type: Optional[Token]
934
935 i = 2
936 current = p_node.GetChild(i)
937 if current.typ == Id.Op_Semi:
938 i += 1
939 while True:
940 current = p_node.GetChild(i)
941 if current.typ != grammar_nt.re_flag:
942 break
943 flags.append(self._EggexFlag(current))
944 i += 1
945
946 if current.typ == Id.Op_Semi:
947 i += 1
948 trans_pref = p_node.GetChild(i).tok
949
950 # Canonicalize and validate flags for ERE only. Default is ERE.
951 if trans_pref is None or lexer.TokenVal(trans_pref) == 'ERE':
952 canonical_flags = regex_translate.CanonicalFlags(flags)
953 else:
954 canonical_flags = None
955
956 return Eggex(left, regex, flags, trans_pref, canonical_flags)
957
958 def YshCasePattern(self, pnode):
959 # type: (PNode) -> pat_t
960 assert pnode.typ == grammar_nt.ysh_case_pat, pnode
961
962 pattern = pnode.GetChild(0)
963 typ = pattern.typ
964 if typ == Id.Op_LParen:
965 # pat_expr or pat_else
966 pattern = pnode.GetChild(1)
967 typ = pattern.typ
968
969 if typ == grammar_nt.pat_else:
970 return pat.Else
971
972 if typ == grammar_nt.pat_exprs:
973 exprs = [] # type: List[expr_t]
974 for i in xrange(pattern.NumChildren()):
975 child = pattern.GetChild(i)
976 if child.typ == grammar_nt.expr:
977 expr = self.Expr(child)
978 exprs.append(expr)
979 return pat.YshExprs(exprs)
980
981 if typ == grammar_nt.eggex:
982 return self._Eggex(pattern)
983
984 raise AssertionError()
985
986 def _BlockArg(self, p_node):
987 # type: (PNode) -> expr_t
988
989 n = p_node.NumChildren()
990 if n == 1:
991 child = p_node.GetChild(0)
992 return self.Expr(child)
993
994 # It can only be an expression, not a=42, or ...expr
995 p_die('Invalid block expression argument', p_node.tok)
996
997 def _Argument(self, p_node, after_semi, arglist):
998 # type: (PNode, bool, ArgList) -> None
999 """
1000 argument: (
1001 test [comp_for]
1002 | test '=' test # named arg
1003 | '...' test # var args
1004 )
1005 """
1006 pos_args = arglist.pos_args
1007 named_args = arglist.named_args
1008
1009 assert p_node.typ == grammar_nt.argument, p_node
1010 n = p_node.NumChildren()
1011 if n == 1:
1012 child = p_node.GetChild(0)
1013 if after_semi:
1014 p_die(POS_ARG_MISPLACED, child.tok)
1015 arg = self.Expr(child)
1016 pos_args.append(arg)
1017 return
1018
1019 if n == 2:
1020 # Note: We allow multiple spreads, just like Julia. They are
1021 # concatenated as in lists and dicts.
1022 tok0 = p_node.GetChild(0).tok
1023 if tok0.id == Id.Expr_Ellipsis:
1024 spread_expr = expr.Spread(tok0, self.Expr(p_node.GetChild(1)))
1025 if after_semi: # f(; ... named)
1026 named_args.append(NamedArg(None, spread_expr))
1027 else: # f(...named)
1028 pos_args.append(spread_expr)
1029 return
1030
1031 # Note: generator expression not implemented
1032 if p_node.GetChild(1).typ == grammar_nt.comp_for:
1033 child = p_node.GetChild(0)
1034 if after_semi:
1035 p_die(POS_ARG_MISPLACED, child.tok)
1036
1037 elt = self.Expr(child)
1038 comp = self._CompFor(p_node.GetChild(1))
1039 arg = expr.GeneratorExp(elt, [comp])
1040 pos_args.append(arg)
1041 return
1042
1043 raise AssertionError()
1044
1045 if n == 3: # named args can come before or after the semicolon
1046 n1 = NamedArg(
1047 p_node.GetChild(0).tok, self.Expr(p_node.GetChild(2)))
1048 named_args.append(n1)
1049 return
1050
1051 raise AssertionError()
1052
1053 def _ArgGroup(self, p_node, after_semi, arglist):
1054 # type: (PNode, bool, ArgList) -> None
1055 """
1056 arg_group: argument (',' argument)* [',']
1057 """
1058 for i in xrange(p_node.NumChildren()):
1059 p_child = p_node.GetChild(i)
1060 if p_child.typ == grammar_nt.argument:
1061 self._Argument(p_child, after_semi, arglist)
1062
1063 def _ArgList(self, p_node, arglist):
1064 # type: (PNode, ArgList) -> None
1065 """For both funcs and procs
1066
1067 arglist: (
1068 [arg_group]
1069 [';' [arg_group]]
1070 )
1071
1072 arglist3: ...
1073 """
1074 n = p_node.NumChildren()
1075 if n == 0:
1076 return
1077
1078 i = 0
1079
1080 if i >= n:
1081 return
1082 child = p_node.GetChild(i)
1083 if child.typ == grammar_nt.arg_group:
1084 self._ArgGroup(child, False, arglist)
1085 i += 1
1086
1087 if i >= n:
1088 return
1089 child = p_node.GetChild(i)
1090 if child.typ == Id.Op_Semi:
1091 arglist.semi_tok = child.tok
1092 i += 1
1093
1094 # Named args after first semi-colon
1095 if i >= n:
1096 return
1097 child = p_node.GetChild(i)
1098 if child.typ == grammar_nt.arg_group:
1099 self._ArgGroup(child, True, arglist)
1100 i += 1
1101
1102 #
1103 # Special third group may have block expression - only for arglist3,
1104 # used for procs!
1105 #
1106
1107 if i >= n:
1108 return
1109 assert p_node.typ == grammar_nt.arglist3, p_node
1110
1111 child = p_node.GetChild(i)
1112 if child.typ == Id.Op_Semi:
1113 arglist.semi_tok2 = child.tok
1114 i += 1
1115
1116 if i >= n:
1117 return
1118 child = p_node.GetChild(i)
1119 if child.typ == grammar_nt.argument:
1120 arglist.block_expr = self._BlockArg(child)
1121 i += 1
1122
1123 def ProcCallArgs(self, pnode, arglist):
1124 # type: (PNode, ArgList) -> None
1125 """
1126 ysh_eager_arglist: '(' [arglist3] ')'
1127 ysh_lazy_arglist: '[' [arglist] ']'
1128 """
1129 n = pnode.NumChildren()
1130 if n == 2: # f()
1131 return
1132
1133 if n == 3:
1134 child1 = pnode.GetChild(1) # the X in '( X )'
1135
1136 self._ArgList(child1, arglist)
1137 return
1138
1139 raise AssertionError()
1140
1141 def _TypeExpr(self, pnode):
1142 # type: (PNode) -> TypeExpr
1143 """
1144 type_expr: Expr_Name [ '[' type_expr (',' type_expr)* ']' ]
1145 """
1146 assert pnode.typ == grammar_nt.type_expr, pnode.typ
1147
1148 ty = TypeExpr.CreateNull() # don't allocate children
1149
1150 ty.tok = pnode.GetChild(0).tok
1151 ty.name = lexer.TokenVal(ty.tok)
1152
1153 n = pnode.NumChildren()
1154 if n == 1:
1155 return ty
1156
1157 ty.params = []
1158 i = 2
1159 while i < n:
1160 p = self._TypeExpr(pnode.GetChild(i))
1161 ty.params.append(p)
1162 i += 2 # skip comma
1163
1164 return ty
1165
1166 def _Param(self, pnode):
1167 # type: (PNode) -> Param
1168 """
1169 param: Expr_Name [type_expr] ['=' expr]
1170 """
1171 assert pnode.typ == grammar_nt.param
1172
1173 name_tok = pnode.GetChild(0).tok
1174 n = pnode.NumChildren()
1175
1176 assert name_tok.id == Id.Expr_Name, name_tok
1177
1178 default_val = None # type: expr_t
1179 type_ = None # type: TypeExpr
1180
1181 if n == 1:
1182 # proc p(a)
1183 pass
1184
1185 elif n == 2:
1186 # proc p(a Int)
1187 type_ = self._TypeExpr(pnode.GetChild(1))
1188
1189 elif n == 3:
1190 # proc p(a = 3)
1191 default_val = self.Expr(pnode.GetChild(2))
1192
1193 elif n == 4:
1194 # proc p(a Int = 3)
1195 type_ = self._TypeExpr(pnode.GetChild(1))
1196 default_val = self.Expr(pnode.GetChild(3))
1197
1198 return Param(name_tok, lexer.TokenVal(name_tok), type_, default_val)
1199
1200 def _ParamGroup(self, p_node):
1201 # type: (PNode) -> ParamGroup
1202 """
1203 param_group:
1204 (param ',')*
1205 [ (param | '...' Expr_Name) [,] ]
1206 """
1207 assert p_node.typ == grammar_nt.param_group, p_node
1208
1209 params = [] # type: List[Param]
1210 rest_of = None # type: Optional[RestParam]
1211
1212 n = p_node.NumChildren()
1213 i = 0
1214 while i < n:
1215 child = p_node.GetChild(i)
1216 if child.typ == grammar_nt.param:
1217 params.append(self._Param(child))
1218
1219 elif child.typ == Id.Expr_Ellipsis:
1220 tok = p_node.GetChild(i + 1).tok
1221 rest_of = RestParam(tok, lexer.TokenVal(tok))
1222
1223 i += 2
1224
1225 return ParamGroup(params, rest_of)
1226
1227 def Proc(self, p_node):
1228 # type: (PNode) -> proc_sig_t
1229 """
1230 ysh_proc: (
1231 [ '('
1232 [ param_group ] # word params, with defaults
1233 [ ';' [ param_group ] ] # positional typed params, with defaults
1234 [ ';' [ param_group ] ] # named params, with defaults
1235 [ ';' Expr_Name ] # optional block param, with no type or default
1236 ')'
1237 ]
1238 '{' # opening { for pgen2
1239 )
1240 """
1241 assert p_node.typ == grammar_nt.ysh_proc
1242
1243 n = p_node.NumChildren()
1244 if n == 1: # proc f {
1245 return proc_sig.Open
1246
1247 if n == 3: # proc f () {
1248 sig = proc_sig.Closed.CreateNull(alloc_lists=True) # no params
1249
1250 # proc f( three param groups, and block group )
1251 sig = proc_sig.Closed.CreateNull(alloc_lists=True) # no params
1252
1253 # Word args
1254 i = 1
1255 child = p_node.GetChild(i)
1256 if child.typ == grammar_nt.param_group:
1257 sig.word = self._ParamGroup(p_node.GetChild(i))
1258
1259 # Validate word args
1260 for word in sig.word.params:
1261 if word.type:
1262 if word.type.name not in ('Str', 'Ref'):
1263 p_die('Word params may only have type Str or Ref',
1264 word.type.tok)
1265 if word.type.params is not None:
1266 p_die('Unexpected type parameters', word.type.tok)
1267
1268 i += 2
1269 else:
1270 i += 1
1271
1272 #log('i %d n %d', i, n)
1273 if i >= n:
1274 return sig
1275
1276 # Positional args
1277 child = p_node.GetChild(i)
1278 if child.typ == grammar_nt.param_group:
1279 sig.positional = self._ParamGroup(p_node.GetChild(i))
1280 i += 2
1281 else:
1282 i += 1
1283
1284 #log('i %d n %d', i, n)
1285 if i >= n:
1286 return sig
1287
1288 # Keyword args
1289 child = p_node.GetChild(i)
1290 if child.typ == grammar_nt.param_group:
1291 sig.named = self._ParamGroup(p_node.GetChild(i))
1292 i += 2
1293 else:
1294 i += 1
1295
1296 #log('i %d n %d', i, n)
1297 if i >= n:
1298 return sig
1299
1300 child = p_node.GetChild(i)
1301 if child.typ == grammar_nt.param_group:
1302 group = self._ParamGroup(p_node.GetChild(i))
1303 params = group.params
1304 if len(params) > 1:
1305 p_die('Only 1 block param is allowed', params[1].blame_tok)
1306 if group.rest_of:
1307 p_die("Rest param isn't allowed for blocks",
1308 group.rest_of.blame_tok)
1309
1310 if len(params) == 1:
1311 if params[0].type:
1312 if params[0].type.name != 'Command':
1313 p_die('Block param must have type Command',
1314 params[0].type.tok)
1315 if params[0].type.params is not None:
1316 p_die('Unexpected type parameters', params[0].type.tok)
1317
1318 sig.block_param = params[0]
1319
1320 return sig
1321
1322 def YshFunc(self, p_node, out):
1323 # type: (PNode, Func) -> None
1324 """
1325 ysh_func: Expr_Name '(' [param_group] [';' param_group] ')'
1326 """
1327 assert p_node.typ == grammar_nt.ysh_func
1328
1329 #self.p_printer.Print(p_node)
1330
1331 out.name = p_node.GetChild(0).tok
1332
1333 n = p_node.NumChildren()
1334 i = 2 # after (
1335
1336 child = p_node.GetChild(i)
1337 if child.typ == grammar_nt.param_group:
1338 out.positional = self._ParamGroup(child)
1339 i += 2 # skip past ;
1340 else:
1341 i += 1
1342
1343 if i >= n:
1344 return
1345
1346 child = p_node.GetChild(i)
1347 if child.typ == grammar_nt.param_group:
1348 out.named = self._ParamGroup(child)
1349
1350 #
1351 # Eggex Language
1352 #
1353
1354 def _RangeCharSingleQuoted(self, p_node):
1355 # type: (PNode) -> Optional[CharCode]
1356
1357 assert p_node.typ == grammar_nt.range_char, p_node
1358
1359 # 'a' in 'a'-'b'
1360
1361 child0 = p_node.GetChild(0)
1362 if child0.typ == grammar_nt.sq_string:
1363 sq_part = cast(SingleQuoted, child0.GetChild(1).tok)
1364 n = len(sq_part.sval)
1365 if n == 0:
1366 p_die("Quoted range char can't be empty",
1367 loc.WordPart(sq_part))
1368 elif n == 1:
1369 return CharCode(sq_part.left, ord(sq_part.sval[0]), False)
1370 else:
1371 p_die(RANGE_POINT_TOO_LONG, loc.WordPart(sq_part))
1372 return None
1373
1374 def _OtherRangeToken(self, p_node):
1375 # type: (PNode) -> Token
1376 """An endpoint of a range (single char)
1377
1378 range_char: Expr_Name | Expr_DecInt | sq_string | char_literal
1379 a-z 0-9 'a'-'z' \x00-\xff
1380 """
1381 assert p_node.typ == grammar_nt.range_char, p_node
1382
1383 child0 = p_node.GetChild(0)
1384 if child0.typ == grammar_nt.char_literal:
1385 # \x00 in /[\x00 - \x20]/
1386 tok = child0.GetChild(0).tok
1387 return tok
1388
1389 tok = p_node.tok
1390 # a in a-z is Expr_Name
1391 # 0 in 0-9 is Expr_DecInt
1392 assert tok.id in (Id.Expr_Name, Id.Expr_DecInt), tok
1393
1394 if tok.length != 1:
1395 p_die(RANGE_POINT_TOO_LONG, tok)
1396 return tok
1397
1398 def _NonRangeChars(self, p_node):
1399 # type: (PNode) -> class_literal_term_t
1400 assert p_node.typ == grammar_nt.range_char, p_node
1401
1402 child0 = p_node.GetChild(0)
1403 typ0 = p_node.GetChild(0).typ
1404
1405 if typ0 == grammar_nt.sq_string: # /['foo']/
1406 return cast(SingleQuoted, child0.GetChild(1).tok)
1407
1408 if typ0 == grammar_nt.char_literal: # /[ \n \u{3bc} \yff ]/
1409 return word_compile.EvalCharLiteralForRegex(child0.tok)
1410
1411 if typ0 == Id.Expr_DecInt: # /[5]/
1412 tok = child0.tok
1413 if tok.length != 1:
1414 p_die("Unquoted number in char class must be a single byte", tok)
1415 return word_compile.EvalCharLiteralForRegex(tok)
1416
1417 if typ0 == Id.Expr_Name: # /[ d digit ]/
1418 # Look up PerlClass and PosixClass, or handle single char/digit
1419 return self._NameInClass(None, child0.tok)
1420
1421 raise AssertionError()
1422
1423 def _ClassLiteralTerm(self, p_node):
1424 # type: (PNode) -> class_literal_term_t
1425 """
1426 class_literal_term:
1427 range_char ['-' range_char ]
1428 | '@' Expr_Name # splice
1429 | '!' Expr_Name # negate char class
1430 ...
1431 """
1432 assert p_node.typ == grammar_nt.class_literal_term, p_node
1433
1434 typ0 = p_node.GetChild(0).typ
1435
1436 if typ0 == grammar_nt.range_char:
1437 n = p_node.NumChildren()
1438
1439 if n == 1:
1440 return self._NonRangeChars(p_node.GetChild(0))
1441
1442 # 'a'-'z' etc.
1443 if n == 3:
1444 assert p_node.GetChild(1).typ == Id.Arith_Minus, p_node
1445
1446 left = p_node.GetChild(0)
1447 right = p_node.GetChild(2)
1448
1449 code1 = self._RangeCharSingleQuoted(left)
1450 if code1 is None:
1451 tok1 = self._OtherRangeToken(left)
1452 code1 = word_compile.EvalCharLiteralForRegex(tok1)
1453
1454 code2 = self._RangeCharSingleQuoted(right)
1455 if code2 is None:
1456 tok2 = self._OtherRangeToken(right)
1457 code2 = word_compile.EvalCharLiteralForRegex(tok2)
1458 return CharRange(code1, code2)
1459
1460 raise AssertionError()
1461
1462 if typ0 == Id.Expr_At:
1463 tok1 = p_node.GetChild(1).tok
1464 return class_literal_term.Splice(tok1, lexer.TokenVal(tok1))
1465
1466 if typ0 == Id.Expr_Bang:
1467 return self._NameInClass(
1468 p_node.GetChild(0).tok,
1469 p_node.GetChild(1).tok)
1470
1471 p_die("This kind of class literal term isn't implemented",
1472 p_node.GetChild(0).tok)
1473
1474 def _ClassLiteral(self, p_node):
1475 # type: (PNode) -> List[class_literal_term_t]
1476 """class_literal: '[' class_literal_term+ ']'."""
1477 assert p_node.typ == grammar_nt.class_literal
1478 # skip [ and ]
1479 terms = [] # type: List[class_literal_term_t]
1480 for i in xrange(1, p_node.NumChildren() - 1):
1481 terms.append(self._ClassLiteralTerm(p_node.GetChild(i)))
1482
1483 return terms
1484
1485 def _NameInRegex(self, negated_tok, tok):
1486 # type: (Token, Token) -> re_t
1487 tok_str = lexer.TokenVal(tok)
1488 if tok_str == 'dot':
1489 if negated_tok:
1490 p_die("Can't negate this symbol", tok)
1491 return re.Primitive(tok, Id.Eggex_Dot)
1492
1493 if tok_str in POSIX_CLASSES:
1494 return PosixClass(negated_tok, tok_str)
1495
1496 perl = PERL_CLASSES.get(tok_str)
1497 if perl is not None:
1498 return PerlClass(negated_tok, perl)
1499
1500 if tok_str[0].isupper(): # e.g. HexDigit
1501 return re.Splice(tok, lexer.TokenVal(tok))
1502
1503 p_die("%r isn't a character class" % tok_str, tok)
1504
1505 def _NameInClass(self, negated_tok, tok):
1506 # type: (Token, Token) -> class_literal_term_t
1507 """Like the above, but 'dot' and 'd' don't mean anything within []"""
1508 # A bare, unquoted character literal. In the grammar, this is expressed as
1509 # range_char without an ending.
1510 assert tok.id == Id.Expr_Name, tok
1511
1512 # d is NOT 'digit', it's a literal 'd'!
1513 if tok.length == 1:
1514 # Expr_Name matches VAR_NAME_RE, which starts with [a-zA-Z_]
1515
1516 if negated_tok: # [~d] is not allowed, only [~digit]
1517 p_die("Can't negate this symbol", tok)
1518 return word_compile.EvalCharLiteralForRegex(tok)
1519
1520 tok_str = lexer.TokenVal(tok)
1521
1522 # digit, word, but not d, w, etc.
1523 if tok_str in POSIX_CLASSES:
1524 return PosixClass(negated_tok, tok_str)
1525
1526 perl = PERL_CLASSES.get(tok_str)
1527 if perl is not None:
1528 return PerlClass(negated_tok, perl)
1529
1530 p_die("%r isn't a character class" % tok_str, tok)
1531
1532 def _ReAtom(self, p_atom):
1533 # type: (PNode) -> re_t
1534 """
1535 re_atom: ( char_literal | ...
1536 """
1537 assert p_atom.typ == grammar_nt.re_atom, p_atom.typ
1538
1539 child0 = p_atom.GetChild(0)
1540
1541 typ0 = p_atom.GetChild(0).typ
1542 tok0 = p_atom.GetChild(0).tok
1543
1544 # Non-terminals
1545
1546 if typ0 == grammar_nt.class_literal:
1547 return re.CharClassLiteral(False, self._ClassLiteral(child0))
1548
1549 if typ0 == grammar_nt.sq_string:
1550 return cast(SingleQuoted, child0.GetChild(1).tok)
1551
1552 if typ0 == grammar_nt.char_literal:
1553 # Note: ERE doesn't seem to support escapes like Python
1554 # https://docs.python.org/3/library/re.html
1555 # We might want to do a translation like this;
1556 #
1557 # \u{03bc} -> \u03bc
1558 # \x00 -> \x00
1559 # \n -> \n
1560
1561 # Must be Id.Char_{OneChar,Hex,UBraced}
1562 assert consts.GetKind(tok0.id) == Kind.Char
1563 s = word_compile.EvalCStringToken(tok0.id, lexer.TokenVal(tok0))
1564 return re.LiteralChars(tok0, s)
1565
1566 # Special punctuation
1567 if typ0 == Id.Expr_Dot: # .
1568 return re.Primitive(tok0, Id.Eggex_Dot)
1569
1570 if typ0 == Id.Arith_Caret: # ^
1571 return re.Primitive(tok0, Id.Eggex_Start)
1572
1573 if typ0 == Id.Expr_Dollar: # $
1574 return re.Primitive(tok0, Id.Eggex_End)
1575
1576 if typ0 == Id.Expr_Name:
1577 # d digit -> PosixClass PerlClass etc.
1578 return self._NameInRegex(None, tok0)
1579
1580 if typ0 == Id.Expr_Symbol:
1581 # Validate symbols here, like we validate PerlClass, etc.
1582 tok_str = lexer.TokenVal(tok0)
1583 if tok_str == '%start':
1584 return re.Primitive(tok0, Id.Eggex_Start)
1585 if tok_str == '%end':
1586 return re.Primitive(tok0, Id.Eggex_End)
1587 p_die("Unexpected token %r in regex" % tok_str, tok0)
1588
1589 if typ0 == Id.Expr_At:
1590 # | '@' Expr_Name
1591 tok1 = p_atom.GetChild(1).tok
1592 return re.Splice(tok0, lexer.TokenVal(tok1))
1593
1594 if typ0 == Id.Expr_Bang:
1595 # | '!' (Expr_Name | class_literal)
1596 # | '!' '!' Expr_Name (Expr_Name | Expr_DecInt | '(' regex ')')
1597 n = p_atom.NumChildren()
1598 if n == 2:
1599 child1 = p_atom.GetChild(1)
1600 if child1.typ == grammar_nt.class_literal:
1601 return re.CharClassLiteral(True,
1602 self._ClassLiteral(child1))
1603 else:
1604 return self._NameInRegex(tok0, p_atom.GetChild(1).tok)
1605 else:
1606 # Note: !! conflicts with shell history
1607 p_die(
1608 "Backtracking with !! isn't implemented (requires Python/PCRE)",
1609 p_atom.GetChild(1).tok)
1610
1611 if typ0 == Id.Op_LParen:
1612 # | '(' regex ')'
1613
1614 # Note: in ERE (d+) is the same as <d+>. That is, Group becomes
1615 # Capture.
1616 return re.Group(self._Regex(p_atom.GetChild(1)))
1617
1618 if typ0 == Id.Arith_Less:
1619 # | '<' 'capture' regex ['as' Expr_Name] [':' Expr_Name] '>'
1620
1621 n = p_atom.NumChildren()
1622 assert n == 4 or n == 6 or n == 8, n
1623
1624 # < capture d+ >
1625 regex = self._Regex(p_atom.GetChild(2))
1626
1627 as_name = None # type: Optional[Token]
1628 func_name = None # type: Optional[Token]
1629
1630 i = 3 # points at any of > as :
1631
1632 typ = p_atom.GetChild(i).typ
1633 if typ == Id.Expr_As:
1634 as_name = p_atom.GetChild(i + 1).tok
1635 i += 2
1636
1637 typ = p_atom.GetChild(i).typ
1638 if typ == Id.Arith_Colon:
1639 func_name = p_atom.GetChild(i + 1).tok
1640
1641 return re.Capture(regex, as_name, func_name)
1642
1643 raise AssertionError(typ0)
1644
1645 def _RepeatOp(self, p_repeat):
1646 # type: (PNode) -> re_repeat_t
1647 """
1648 repeat_op: '+' | '*' | '?'
1649 | '{' [Expr_Name] ('+' | '*' | '?' | repeat_range) '}'
1650 """
1651 assert p_repeat.typ == grammar_nt.repeat_op, p_repeat
1652
1653 tok = p_repeat.GetChild(0).tok
1654 id_ = tok.id
1655
1656 if id_ in (Id.Arith_Plus, Id.Arith_Star, Id.Arith_QMark):
1657 return tok # a+ a* a?
1658
1659 if id_ == Id.Op_LBrace:
1660 child1 = p_repeat.GetChild(1)
1661 if child1.typ != grammar_nt.repeat_range:
1662 # e.g. dot{N *} is .*?
1663 p_die("Perl-style repetition isn't implemented with libc",
1664 child1.tok)
1665
1666 # repeat_range: (
1667 # Expr_DecInt [',']
1668 # | ',' Expr_DecInt
1669 # | Expr_DecInt ',' Expr_DecInt
1670 # )
1671
1672 n = child1.NumChildren()
1673 if n == 1: # {3}
1674 tok = child1.GetChild(0).tok
1675 return tok # different operator than + * ?
1676
1677 if n == 2:
1678 if child1.GetChild(0).typ == Id.Expr_DecInt: # {,3}
1679 left = child1.GetChild(0).tok
1680 return re_repeat.Range(left, lexer.TokenVal(left), '',
1681 None)
1682 else: # {1,}
1683 right = child1.GetChild(1).tok
1684 return re_repeat.Range(None, '', lexer.TokenVal(right),
1685 right)
1686
1687 if n == 3: # {1,3}
1688 left = child1.GetChild(0).tok
1689 right = child1.GetChild(2).tok
1690 return re_repeat.Range(left, lexer.TokenVal(left),
1691 lexer.TokenVal(right), right)
1692
1693 raise AssertionError(n)
1694
1695 raise AssertionError(id_)
1696
1697 def _ReAlt(self, p_node):
1698 # type: (PNode) -> re_t
1699 """
1700 re_alt: (re_atom [repeat_op])+
1701 """
1702 assert p_node.typ == grammar_nt.re_alt
1703
1704 i = 0
1705 n = p_node.NumChildren()
1706 seq = [] # type: List[re_t]
1707 while i < n:
1708 r = self._ReAtom(p_node.GetChild(i))
1709 i += 1
1710 if i < n and p_node.GetChild(i).typ == grammar_nt.repeat_op:
1711 repeat_op = self._RepeatOp(p_node.GetChild(i))
1712 r = re.Repeat(r, repeat_op)
1713 i += 1
1714 seq.append(r)
1715
1716 if len(seq) == 1:
1717 return seq[0]
1718 else:
1719 return re.Seq(seq)
1720
1721 def _Regex(self, p_node):
1722 # type: (PNode) -> re_t
1723 """
1724 regex: [re_alt] (('|'|'or') re_alt)*
1725 """
1726 assert p_node.typ == grammar_nt.regex
1727
1728 n = p_node.NumChildren()
1729 alts = [] # type: List[re_t]
1730 for i in xrange(0, n, 2): # was children[::2]
1731 c = p_node.GetChild(i)
1732 alts.append(self._ReAlt(c))
1733
1734 if len(alts) == 1:
1735 return alts[0]
1736 else:
1737 return re.Alt(alts)
1738
1739
1740# vim: sw=4