OILS / frontend / syntax.asdl View on Github | oils.pub

721 lines, 324 significant
1# Data types for the Oils AST, aka "Lossless Syntax Tree".
2#
3# Invariant: the source text can be reconstructed byte-for-byte from this tree.
4# The test/lossless.sh suite verifies this.
5
6# We usually try to preserve the physical order of the source in the ASDL
7# fields. One exception is the order of redirects:
8#
9# echo >out.txt hi
10# # versus
11# echo hi >out.txt
12
13# Unrepresented:
14# - let arithmetic (rarely used)
15# - coprocesses # one with arg and one without
16# - select block
17
18# Possible refactorings:
19#
20# # %CompoundWord as first class variant:
21# bool_expr = WordTest %CompoundWord | ...
22#
23# # Can DoubleQuoted have a subset of parts compared with CompoundWord?
24# string_part = ... # subset of word_part
25#
26# - Distinguish word_t with BracedTree vs. those without? seq_word_t?
27
28module syntax
29{
30 use frontend id_kind {
31 id # special case use of uint16_t: normally we can only use pointer types
32 }
33 use core value {
34 value
35 }
36
37 # More efficient than the List[bool] pattern we've been using
38 BoolParamBox = (bool b)
39 IntParamBox = (int i)
40
41 # core/main_loop.py
42 parse_result = EmptyLine | Eof | Node(command cmd)
43
44 # 'source' represents the location of a line / token.
45 source =
46 Interactive
47 | Headless
48 | Unused(str comment) # completion and history never show parse errors?
49 | CFlag
50 | Stdin(str comment)
51
52 # MainFile is for main.{osh,ysh}, --eval oshrc/yshrc. They're files loaded
53 # directly by the shell.
54 | MainFile(str path)
55 # A file loaded by 'source' or 'use'.
56 # TODO: we probably don't need this location? The debug stack provides a
57 # chain of locations back to the sourced script. Maybe we need to point to
58 # a debug_frame instead?
59 # It could be DiskFileShell and DiskFileUser, or just DiskFile.
60 | OtherFile(str path, loc location)
61
62 # Code parsed from a word. (TODO: rename source.Word?)
63 # used for 'eval arg', 'trap arg', 'printf arg',
64 # parseCommand() - this is a string?
65 # dynamic LHS - move this to Reparsed?
66 # complete -W
67 | Dynamic(str what, loc location)
68
69 # Point to the original variable reference
70 | VarRef(Token orig_tok)
71
72 # code parsed from the value of a variable
73 # used for $PS1 $PROMPT_COMMAND
74 | Variable(str var_name, loc location)
75
76 # alias expansion (location of first word)
77 | Alias(str argv0, loc argv0_loc)
78
79 # 2 kinds of reparsing: backticks, and x+1 in a[x+1]=y
80 | Reparsed(str what, Token left_token, Token right_token)
81
82 # For --location-str
83 | Synthetic(str s)
84
85 SourceLine = (int line_num, str content, source src)
86
87 # Note that ASDL generates:
88 # typedef uint16_t Id_t;
89 # So Token is
90 # 8 bytes GC header + 2 + 4 + 4 + 8 + 8 = 34 bytes on 64-bit machines
91 #
92 # We transpose (id, col, length) -> (id, length, col) for C struct packing.
93 Token = (id id, int length, int col, SourceLine? line, str? tval)
94
95 # I wanted to get rid of Token.tval with this separate WideToken type, but it
96 # is more efficient if word_part.Literal %Token literally is the same thing
97 # that comes out of the lexer. Otherwise we have extra garbage.
98
99 # WideToken = (id id, int length, int col, SourceLine? line, str? tval)
100
101 # Slight ASDL bug: CompoundWord has to be defined before using it as a shared
102 # variant. The _product_counter algorithm should be moved into a separate
103 # tag-assigning pass, and shared between gen_python.py and gen_cpp.py.
104 CompoundWord = (List[word_part] parts)
105
106 # Source location for errors
107 loc =
108 Missing # equivalent of runtime.NO_SPID
109 | Token %Token
110 # Very common case: argv arrays need original location
111 | ArgWord %CompoundWord
112 | WordPart(word_part p)
113 | Word(word w)
114 | Arith(arith_expr a)
115 # e.g. for errexit blaming
116 | Command(command c)
117
118 # debug_frame_t is an EXECUTION stack (proc func source use eval), while
119 # source_t (in some cases) is like a PARSING stack (files, strings from vars,
120 # etc.)
121 debug_frame =
122 # OSH: main_filename => BASH_SOURCE
123 MainFile(str main_filename)
124 # YSH
125 | Dummy # -c or stdin, not used by BASH_* vars
126 # Note: we could have more "frame 0" than MainFile and Dummy -
127 # - Completion hooks - dev.Tracer is missing these
128 # - PS1
129 # - PROMPT_COMMAND
130
131 # OSH: call_tok => BASH_LINENO, source_name => BASH_SOURCE
132 | Source(CompoundWord source_loc, str source_name)
133
134 # OSH: call_tok => BASH_LINENO, def_tok => BASH_SOURCE
135 # YSH: procs
136 | ProcLike(CompoundWord invoke_loc, Token def_tok, str proc_name)
137
138 # for io->eval, myfunc()
139 | Token %Token
140
141 # For 'eval', 'use', ...
142 | CompoundWord %CompoundWord
143
144 # Special frame added when running 'trap ERR', for more info, and as a sentinel
145 | BeforeErrTrap(Token tok)
146
147 #
148 # Shell language
149 #
150
151 bracket_op =
152 WholeArray(id op_id) # * or @
153 | ArrayIndex(arith_expr expr)
154
155 suffix_op =
156 Nullary %Token # ${x@Q} or ${!prefix@} (which also has prefix_op)
157 | Unary(Token op, rhs_word arg_word) # e.g. ${v:-default}
158 # TODO: Implement YSH ${x|html} and ${x %.3f}
159 | Static(Token tok, str arg)
160 | PatSub(CompoundWord pat, rhs_word replace, id replace_mode, Token slash_tok)
161 # optional begin is arith_expr.EmptyZero
162 # optional length is None, because it's handled in a special way
163 | Slice(arith_expr begin, arith_expr? length)
164
165 BracedVarSub = (
166 Token left, # in dynamic ParseVarRef, same as name_tok
167 Token name_tok, # location for the name
168 str var_name, # the name - TODO: remove this, use LazyStr() instead
169 Token? prefix_op, # prefix # or ! operators
170 bracket_op? bracket_op,
171 suffix_op? suffix_op,
172 Token right # in dynamic ParseVarRef, same as name_tok
173 )
174
175 # Variants:
176 # - Look at left token ID for $'' c'' vs r'' '' e.g. Id.Left_DollarSingleQuote
177 # - And """ and ''' e.g. Id.Left_TDoubleQuote
178 DoubleQuoted = (Token left, List[word_part] parts, Token right)
179
180 # Consider making str? sval LAZY, like lexer.LazyStr(tok)
181 SingleQuoted = (Token left, str sval, Token right)
182
183 # e.g. Id.VSub_QMark, Id.VSub_DollarName $foo with lexer.LazyStr()
184 SimpleVarSub = (Token tok)
185
186 CommandSub = (Token left_token, command child, Token right)
187
188 # @[expr] or $[expr] - expression substitution
189 # Used in both word_part and expr contexts
190 ExprSub = (Token left, expr child, Token right)
191
192 # - can contain word.BracedTree
193 # - no 'Token right' for now, doesn't appear to be used
194 YshArrayLiteral = (Token left, List[word] words, Token right)
195
196 # Unevaluated, typed arguments for func and proc.
197 # Note that ...arg is expr.Spread.
198 ArgList = (
199 Token left, List[expr] pos_args,
200 Token? semi_tok, List[NamedArg] named_args,
201 Token? semi_tok2, expr? block_expr,
202 Token right
203 )
204
205 AssocPair = (CompoundWord key, CompoundWord value, bool has_plus)
206
207 InitializerWord =
208 ArrayWord(word w)
209 | AssocPair %AssocPair
210
211 word_part =
212 YshArrayLiteral %YshArrayLiteral
213 | InitializerLiteral(Token left, List[InitializerWord] pairs, Token right)
214 | Literal %Token
215 # escaped case is separate so the evaluator doesn't have to check token ID
216 | EscapedLiteral(Token token, str ch)
217 | SingleQuoted %SingleQuoted
218 | DoubleQuoted %DoubleQuoted
219 # Could be SimpleVarSub %Token that's VSub_DollarName, but let's not
220 # confuse with the comon word_part.Literal is common for wno
221 | SimpleVarSub %SimpleVarSub
222 | BracedVarSub %BracedVarSub
223 | ZshVarSub (Token left, CompoundWord ignored, Token right)
224 # For command sub and process sub: $(...) <(...) >(...)
225 | CommandSub %CommandSub
226 # ~ or ~bob
227 | TildeSub(Token left, # always the tilde
228 Token? name, str? user_name)
229 | ArithSub(Token left, arith_expr anode, Token right)
230 # {a,b,c}
231 | BracedTuple(List[CompoundWord] words)
232 # {1..10} or {-5..10..2} or {01..10} (leading zeros matter)
233 # {a..f} or {a..f..2} or {a..f..-2}
234 # the whole range is one Token,
235 | BracedRange(Token blame_tok, id kind, str start, str end, int step)
236 # expanded version of {1..10}
237 | BracedRangeDigit(str s, Token orig_tok)
238 # extended globs are parsed statically, unlike globs
239 | ExtGlob(Token op, List[CompoundWord] arms, Token right)
240 # a regex group is similar to an extended glob part
241 | BashRegexGroup(Token left, CompoundWord? child, Token right)
242
243 # YSH word_part extensions
244
245 # @myarray - Id.Lit_Splice (could be optimized to %Token)
246 | Splice(Token blame_tok, str var_name)
247 # @[expr] $[expr] - array splice or expr sub
248 | ExprSub %ExprSub
249
250 # Use cases for Empty: RHS of 'x=', the argument in "${x:-}".
251 # The latter is semantically necessary. (See osh/word_parse.py).
252 # At runtime: RHS of 'declare x='.
253 rhs_word = Empty | Compound %CompoundWord
254
255 word =
256 # Returns from WordParser, but not generally stored in LST
257 Operator %Token
258 # A Compound word can contain any word_part except the Braced*Part.
259 # We could model this with another variant type but it incurs runtime
260 # overhead and seems like overkill. Note that DoubleQuoted can't
261 # contain a SingleQuoted, etc. either.
262 | Compound %CompoundWord
263 # For word sequences command.Simple, YshArrayLiteral, for_iter.Words
264 # Could be its own type
265 | BracedTree(List[word_part] parts)
266 # For dynamic parsing of test aka [ - the string is already evaluated.
267 | String(id id, str s, CompoundWord? blame_loc)
268 # Redirect words like > 3> {myvar}>
269 | Redir(Token? left_tok, Token op)
270
271 # Note: the name 'foo' is derived from token value 'foo=' or 'foo+='
272 sh_lhs =
273 Name(Token left, str name) # Lit_VarLike foo=
274 # TODO: Could be Name %Token
275 | IndexedName(Token left, str name, arith_expr index)
276 | UnparsedIndex(Token left, str name, str index) # for translation
277
278 arith_expr =
279 EmptyZero # these are valid: $(( )) (( )) ${a[@]: : }
280 | EmptyOne # condition is 1 for infinite loop: for (( ; ; ))
281 | VarSub %Token # e.g. $(( x )) Id.Arith_VarLike
282 | Word %CompoundWord # e.g. $(( 123'456'$y ))
283
284 | UnaryAssign(id op_id, arith_expr child)
285 | BinaryAssign(id op_id, arith_expr left, arith_expr right)
286
287 | Unary(id op_id, arith_expr child)
288 | Binary(Token op, arith_expr left, arith_expr right)
289 | TernaryOp(arith_expr cond, arith_expr true_expr, arith_expr false_expr)
290
291 bool_expr =
292 WordTest(word w) # e.g. [[ myword ]]
293 | Binary(id op_id, word left, word right)
294 | Unary(id op_id, word child)
295 | LogicalNot(bool_expr child)
296 | LogicalAnd(bool_expr left, bool_expr right)
297 | LogicalOr(bool_expr left, bool_expr right)
298
299 redir_loc =
300 Fd(int fd) | VarName(str name)
301
302 redir_param =
303 Word %CompoundWord
304 | HereWord(CompoundWord w, bool is_multiline)
305 | HereDoc(word here_begin, # e.g. EOF or 'EOF'
306 Token? here_end_tok, # Token consisting of the whole line
307 # It's always filled in AFTER creation, but
308 # temporarily so optional
309 List[word_part] stdin_parts # one for each line
310 )
311
312 Redir = (Token op, redir_loc loc, redir_param arg)
313
314 assign_op = Equal | PlusEqual
315 AssignPair = (Token left, sh_lhs lhs, assign_op op, rhs_word rhs)
316 # TODO: could put Id.Lit_VarLike foo= into LazyStr() with -1 slice
317 EnvPair = (Token left, str name, rhs_word val)
318
319 List_of_command < List[command]
320
321 condition =
322 Shell %List_of_command # if false; true; then echo hi; fi
323 | YshExpr(expr e) # if (x > 0) { echo hi }
324 # TODO: add more specific blame location
325
326 # Each arm tests one word against multiple words
327 # shell: *.cc|*.h) echo C++ ;;
328 # YSH: *.cc|*.h { echo C++ }
329 #
330 # Three location tokens:
331 # 1. left - shell has ( or *.cc ysh has *.cc
332 # 2. middle - shell has ) ysh has {
333 # 3. right - shell has optional ;; ysh has required }
334 #
335 # For YSH typed case, left can be ( and /
336 # And case_pat may contain more details
337 CaseArm = (
338 Token left, pat pattern, Token middle, List[command] action,
339 Token? right
340 )
341
342 # The argument to match against in a case command
343 # In YSH-style case commands we match against an `expr`, but in sh-style case
344 # commands we match against a word.
345 case_arg =
346 Word(word w)
347 | YshExpr(expr e)
348
349 EggexFlag = (bool negated, Token flag)
350
351 # canonical_flags can be compared for equality. This is needed to splice
352 # eggexes correctly, e.g. / 'abc' @pat ; i /
353 Eggex = (
354 Token left, re regex, List[EggexFlag] flags, Token? trans_pref,
355 str? canonical_flags)
356
357 pat =
358 Else
359 | Words(List[word] words)
360 | YshExprs(List[expr] exprs)
361 | Eggex %Eggex
362
363 # Each if arm starts with either an "if" or "elif" keyword
364 # In YSH, the then keyword is not used (replaced by braces {})
365 IfArm = (
366 Token keyword, condition cond, Token? then_kw, List[command] action,
367 # then_tok used in ysh-ify
368 Token? then_tok)
369
370 for_iter =
371 Args # for x; do echo $x; done # implicit "$@"
372 | Words(List[word] words) # for x in 'foo' *.py { echo $x }S
373 # like YshArrayLiteral, but no location for %(
374 | YshExpr(expr e, Token blame) # for x in (mylist) { echo $x }
375 #| Files(Token left, List[word] words)
376 # for x in <> {
377 # for x in < @myfiles > {
378
379 BraceGroup = (
380 Token left, Token? doc_token, List[command] children, Token right
381 )
382
383 Param = (Token blame_tok, str name, TypeExpr? type, expr? default_val)
384 RestParam = (Token blame_tok, str name)
385
386 ParamGroup = (List[Param] params, RestParam? rest_of)
387
388 # 'open' is for proc p { }; closed is for proc p () { }
389 proc_sig =
390 Open
391 | Closed(ParamGroup? word, ParamGroup? positional, ParamGroup? named,
392 Param? block_param)
393
394 Proc = (Token keyword, Token name, proc_sig sig, command body)
395
396 Func = (
397 Token keyword, Token name,
398 ParamGroup? positional, ParamGroup? named,
399 command body
400 )
401
402 # Represents all these case: s=1 s+=1 s[x]=1 ...
403 ParsedAssignment = (Token? left, Token? close, int part_offset, CompoundWord w)
404
405 # var, const.
406 # - Keyword is None for hay blocks. TODO: consider using BareDecl?
407 # - 'var x' allowed - RHS is None; idiomatic with value.Place
408 VarDecl = (Token? keyword, List[NameType] lhs, expr? rhs)
409
410 # setvar, maybe 'auto' later
411 Mutation = (Token keyword, List[y_lhs] lhs, Token op, expr rhs)
412
413 # call f(x) = 42
414 ExprCommand = (Token keyword, expr e)
415
416 ShFunction = (
417 Token? keyword, Token name_tok, str name, command body,
418 str? code_str
419 )
420
421 # Retain references to lines
422 LiteralBlock = (BraceGroup brace_group, str? code_str)
423
424 cmd_frag =
425 LiteralBlock %LiteralBlock # p { echo hi } has backing lines
426 | Expr(command c) # var b = ^(echo hi)
427
428 command =
429 NoOp
430
431 # can wrap many children, e.g. { }, loops, functions
432 | Redirect(command child, List[Redir] redirects)
433
434 | Simple(Token? blame_tok, # TODO: make required (BracedTuple?)
435 List[EnvPair] more_env,
436 List[word] words,
437 ArgList? typed_args, LiteralBlock? block,
438 # is_last_cmd is used for fork() optimizations
439 bool is_last_cmd,
440 # (#2307) Redirects on simple commands are evaluated
441 # AFTER the argv word list, unlike all other commands.
442 # This field is null if there are no redirects
443 List[Redir]? redirects)
444
445 # This doesn't technically belong in the LST, but it's convenient for
446 # execution
447 | ExpandedAlias(command child, List[EnvPair] more_env)
448 | Sentence(command child, Token terminator)
449 # Represents "bare assignment"
450 # Token left is redundant with pairs[0].left
451 | ShAssignment(Token left, List[AssignPair] pairs)
452
453 | ControlFlow(Token keyword, CompoundWord? arg_word)
454
455 # ops are | |&
456 | Pipeline(Token? negated, List[command] children, List[Token] ops)
457 # ops are && ||
458 | AndOr(List[command] children, List[Token] ops)
459
460 # Part of for, while, until (but not if, case, ShFunction). No redirects.
461 | DoGroup(Token left, List[command] children, Token right)
462 # A brace group is a compound command, with redirects.
463 | BraceGroup %BraceGroup
464 # Contains a single child, like CommandSub
465 | Subshell(Token left, command child, Token right, bool is_last_cmd)
466 | DParen(Token left, arith_expr child, Token right)
467 | DBracket(Token left, bool_expr expr, Token right)
468
469 # up to 3 iterations variables
470 | ForEach(Token keyword, List[str] iter_names, for_iter iterable,
471 Token? semi_tok, command body)
472 # C-style for loop. Any of the 3 expressions can be omitted.
473 # Note: body is required, but only optional here because of initialization
474 # order.
475 | ForExpr(Token keyword, arith_expr? init, arith_expr? cond,
476 arith_expr? update, command? body)
477 | WhileUntil(Token keyword, condition cond, command body)
478
479 | If(Token if_kw, List[IfArm] arms, Token? else_kw, List[command] else_action,
480 Token? fi_kw)
481 | Case(Token case_kw, case_arg to_match, Token arms_start, List[CaseArm] arms,
482 Token arms_end)
483
484 # The keyword is optional in the case of bash-style functions
485 # (ie. "foo() { ... }") which do not have one.
486 | ShFunction %ShFunction
487
488 | TimeBlock(Token keyword, command pipeline)
489 # Some nodes optimize it out as List[command], but we use CommandList for
490 # 1. the top level
491 # 2. ls ; ls & ls (same line)
492 # 3. CommandSub # single child that's a CommandList
493 # 4. Subshell # single child that's a CommandList
494
495 # TODO: Use List_of_command
496 | CommandList(List[command] children)
497
498 # YSH command constructs
499
500 | VarDecl %VarDecl
501
502 # this can behave like 'var', can be desugared
503 | BareDecl(Token lhs, expr rhs)
504
505 | Mutation %Mutation
506 | Expr %ExprCommand
507 | Proc %Proc
508 | Func %Func
509 | Retval(Token keyword, expr val)
510
511 # bytecode
512 b_command =
513 VarDecl %VarDecl
514 | Mutation %Mutation
515
516 #
517 # Glob representation, for converting ${x//} to extended regexes.
518 #
519
520 # Example: *.[ch] is:
521 # GlobOp(<Glob_Star '*'>),
522 # GlobLit(Glob_OtherLiteral, '.'),
523 # CharClass(False, ['ch']) # from Glob_CleanLiterals token
524
525 glob_part =
526 Literal(id id, str s)
527 | Operator(id op_id) # * or ?
528 | CharClass(bool negated, List[str] strs)
529
530 # Char classes are opaque for now. If we ever need them:
531 # - Collating symbols are [. .]
532 # - Equivalence classes are [=
533
534 printf_part =
535 Literal %Token
536 # flags are 0 hyphen space + #
537 # type is 's' for %s, etc.
538 | Percent(List[Token] flags, Token? width, Token? precision, Token type)
539
540 #
541 # YSH Language
542 #
543 # Copied and modified from Python-3.7/Parser/Python.asdl !
544
545 expr_context = Load | Store | Del | AugLoad | AugStore | Param
546
547 # Type expressions: Int List[Int] Dict[Str, Any]
548 # Do we have Func[Int, Int => Int] ? I guess we can parse that into this
549 # system.
550 TypeExpr = (Token tok, str name, List[TypeExpr] params)
551
552 # LHS bindings in var/const, and eggex
553 NameType = (Token left, str name, TypeExpr? typ)
554
555 # TODO: Inline this into GenExp and ListComp? Just use a flag there?
556 Comprehension = (List[NameType] lhs, expr iter, expr? cond)
557
558 # Named arguments supplied to call. Token is null for f(; ...named).
559 NamedArg = (Token? name, expr value)
560
561 # Subscripts are lists of expressions
562 # a[:i, n] (we don't have matrices, but we have data frames)
563 Subscript = (Token left, expr obj, expr index)
564
565 # Attributes are obj.attr, d->key, name::scope,
566 Attribute = (expr obj, Token op, Token attr, str attr_name, expr_context ctx)
567
568 y_lhs =
569 Var %Token # Id.Expr_Name
570 | Subscript %Subscript
571 | Attribute %Attribute
572
573 place_op =
574 # &a[i+1]
575 Subscript(Token op, expr index)
576 # &d.mykey
577 | Attribute(Token op, Token attr)
578
579 expr =
580 Var(Token left, str name) # a variable name to evaluate
581 # Constants are typically Null, Bool, Int, Float
582 # and also Str for key in {key: 42}
583 # But string literals are SingleQuoted or DoubleQuoted
584 # Python uses Num(object n), which doesn't respect our "LST" invariant.
585 | Const(Token c, value val)
586
587 # read(&x) json read (&x[0])
588 | Place(Token blame_tok, str var_name, place_op* ops)
589
590 # :| one 'two' "$three" |
591 | YshArrayLiteral %YshArrayLiteral
592
593 # / d+ ; ignorecase; %python /
594 | Eggex %Eggex
595
596 # $name is not an expr, but $? is, e.g. Id.VSub_QMark
597 | SimpleVarSub %SimpleVarSub
598 | BracedVarSub %BracedVarSub
599 | CommandSub %CommandSub
600 | ExprSub %ExprSub
601 | SingleQuoted %SingleQuoted
602 | DoubleQuoted %DoubleQuoted
603
604 | Literal(expr inner)
605 | Lambda(List[NameType] params, expr body)
606
607 | Unary(Token op, expr child)
608 | Binary(Token op, expr left, expr right)
609 # x < 4 < 3 and (x < 4) < 3
610 | Compare(expr left, List[Token] ops, List[expr] comparators)
611 | FuncCall(expr func, ArgList args)
612
613 # TODO: Need a representation for method call. We don't just want
614 # Attribute() and then Call()
615
616 | IfExp(expr test, expr body, expr orelse)
617 | Tuple(Token left, List[expr] elts, expr_context ctx)
618
619 | List(Token left, List[expr] elts, expr_context ctx)
620 | Dict(Token left, List[expr] keys, List[expr] values)
621 # For the values in {n1, n2}
622 | Implicit
623
624 | ListComp(Token left, expr elt, List[Comprehension] generators)
625 # not implemented
626 | DictComp(Token left, expr key, expr value, List[Comprehension] generators)
627 | GeneratorExp(expr elt, List[Comprehension] generators)
628
629 # Ranges are written 1:2, with first class expression syntax. There is no
630 # step as in Python. Use range(0, 10, step=2) for that.
631 | Range(expr lower, Token op, expr upper)
632
633 # Slices occur within [] only. Unlike ranges, the start/end can be #
634 # implicit. Like ranges, denote a step with slice(0, 10, step=2).
635 # a[3:] a[:i]
636 | Slice(expr? lower, Token op, expr? upper)
637
638 | Subscript %Subscript
639 | Attribute %Attribute
640
641 # Ellipsis is like 'Starred' within Python, which are valid on the LHS in
642 # Python for unpacking, and # within list literals for splicing.
643 # (Starred is NOT used for {k:v, **a}. That used a blank "keys"
644 # attribute.)
645
646 # I think we can use { **pairs } like Python
647 | Spread(Token left, expr child)
648
649 #
650 # Regex Language (Eggex)
651 #
652
653 # e.g. alnum digit
654 PosixClass = (Token? negated, str name)
655 # e.g. d w s
656 PerlClass = (Token? negated, str name)
657
658 # Char Sets and Ranges both use Char Codes
659 # with u_braced == true : \u{ff}
660 # with u_braced == false: \xff \\ 'a' a '0' 0
661 # ERE doesn't make a distinction, but compiling to Python/PCRE can use it
662 CharCode = (Token blame_tok, int i, bool u_braced)
663 CharRange = (CharCode start, CharCode end)
664
665 # Note: .NET has && in character classes, making it a recursive language
666
667 class_literal_term =
668 PosixClass %PosixClass
669 | PerlClass %PerlClass
670 | CharRange %CharRange
671 | CharCode %CharCode
672
673 | SingleQuoted %SingleQuoted
674 # @chars
675 | Splice(Token name, str var_name) # coudl be Splice %Token
676
677 # evaluated version of class_literal_term (could be in runtime.asdl)
678 char_class_term =
679 PosixClass %PosixClass
680 | PerlClass %PerlClass
681
682 | CharRange %CharRange
683 # For [ \x00 \\ ]
684 | CharCode %CharCode
685
686 # NOTE: modifier is unused now, can represent L or P
687 re_repeat =
688 Op %Token # + * ? or Expr_DecInt for x{3}
689 | Range(Token? left, str lower, str upper, Token? right) # dot{1,2}
690 # Haven't implemented the modifier, e.g. x{+ P}
691 # | Num(Token times, id modifier)
692 # | Range(Token? lower, Token? upper, id modifier)
693
694 re =
695 Primitive(Token blame_tok, id id) # . ^ $ dot %start %end
696 | PosixClass %PosixClass
697 | PerlClass %PerlClass
698 # syntax [ $x \n ]
699 | CharClassLiteral(bool negated, List[class_literal_term] terms)
700 # evaluated [ 'abc' \n ]
701 | CharClass(bool negated, List[char_class_term] terms)
702
703 # @D
704 | Splice(Token name, str var_name) # TODO: Splice %Token ?
705
706 | SingleQuoted %SingleQuoted
707
708 # Compound:
709 | Repeat(re child, re_repeat op)
710 | Seq(List[re] children)
711 | Alt(List[re] children)
712
713 | Group(re child)
714 # convert_func is filled in on evaluation
715 # TODO: name and func_name can be expanded to strings
716 | Capture(re child, Token? name, Token? func_name)
717 | Backtracking(bool negated, Token name, re child)
718
719 # \u{ff} is parsed as this, but SingleQuoted also evaluates to it
720 | LiteralChars(Token blame_tok, str s)
721}