frontend/syntax.asdl

OILS / frontend / syntax.asdl View on Github | oils.pub

711 lines, 317 significant

1	# Data types for the Oils AST, aka "Lossless Syntax Tree".
2	#
3	# Invariant: the source text can be reconstructed byte-for-byte from this tree.
4	# The test/lossless.sh suite verifies this.
5
6	# We usually try to preserve the physical order of the source in the ASDL
7	# fields. One exception is the order of redirects:
8	#
9	# echo >out.txt hi
10	# # versus
11	# echo hi >out.txt
12
13	# Unrepresented:
14	# - let arithmetic (rarely used)
15	# - coprocesses # one with arg and one without
16	# - select block
17
18	# Possible refactorings:
19	#
20	# # %CompoundWord as first class variant:
21	# bool_expr = WordTest %CompoundWord \| ...
22	#
23	# # Can DoubleQuoted have a subset of parts compared with CompoundWord?
24	# string_part = ... # subset of word_part
25	#
26	# - Distinguish word_t with BracedTree vs. those without? seq_word_t?
27
28	module syntax
29	{
30	use core value {
31	value LiteralBlock
32	}
33
34	# More efficient than the List[bool] pattern we've been using
35	BoolParamBox = (bool b)
36	IntParamBox = (int i)
37
38	# core/main_loop.py
39	parse_result = EmptyLine \| Eof \| Node(command cmd)
40
41	# 'source' represents the location of a line / token.
42	source =
43	Interactive
44	\| Headless
45	\| Unused(str comment) # completion and history never show parse errors?
46	\| CFlag
47	\| Stdin(str comment)
48
49	# MainFile is for main.{osh,ysh}, --eval oshrc/yshrc. They're files loaded
50	# directly by the shell.
51	\| MainFile(str path)
52	# A file loaded by 'source' or 'use'.
53	# TODO: we probably don't need this location? The debug stack provides a
54	# chain of locations back to the sourced script. Maybe we need to point to
55	# a debug_frame instead?
56	# It could be DiskFileShell and DiskFileUser, or just DiskFile.
57	\| OtherFile(str path, loc location)
58
59	# Code parsed from a word. (TODO: rename source.Word?)
60	# used for 'eval arg', 'trap arg', 'printf arg',
61	# parseCommand() - this is a string?
62	# dynamic LHS - move this to Reparsed?
63	# complete -W
64	\| Dynamic(str what, loc location)
65
66	# Point to the original variable reference
67	\| VarRef(Token orig_tok)
68
69	# code parsed from the value of a variable
70	# used for $PS1 $PROMPT_COMMAND
71	\| Variable(str var_name, loc location)
72
73	# alias expansion (location of first word)
74	\| Alias(str argv0, loc argv0_loc)
75
76	# 2 kinds of reparsing: backticks, and x+1 in a[x+1]=y
77	\| Reparsed(str what, Token left_token, Token right_token)
78
79	# For --location-str
80	\| Synthetic(str s)
81
82	SourceLine = (int line_num, str content, source src)
83
84	# Note that ASDL generates:
85	# typedef uint16_t Id_t;
86	# So Token is
87	# 8 bytes GC header + 2 + 4 + 4 + 8 + 8 = 34 bytes on 64-bit machines
88	#
89	# We transpose (id, col, length) -> (id, length, col) for C struct packing.
90	Token = (id id, int length, int col, SourceLine? line, str? tval)
91
92	# I wanted to get rid of Token.tval with this separate WideToken type, but it
93	# is more efficient if word_part.Literal %Token literally is the same thing
94	# that comes out of the lexer. Otherwise we have extra garbage.
95
96	# WideToken = (id id, int length, int col, SourceLine? line, str? tval)
97
98	# Slight ASDL bug: CompoundWord has to be defined before using it as a shared
99	# variant. The _product_counter algorithm should be moved into a separate
100	# tag-assigning pass, and shared between gen_python.py and gen_cpp.py.
101	CompoundWord = (List[word_part] parts)
102
103	# Source location for errors
104	loc =
105	Missing # equivalent of runtime.NO_SPID
106	\| Token %Token
107	# Very common case: argv arrays need original location
108	\| ArgWord %CompoundWord
109	\| WordPart(word_part p)
110	\| Word(word w)
111	\| Arith(arith_expr a)
112	# e.g. for errexit blaming
113	\| Command(command c)
114
115	# debug_frame_t is an EXECUTION stack (proc func source use eval), while
116	# source_t (in some cases) is like a PARSING stack (files, strings from vars,
117	# etc.)
118	debug_frame =
119	# OSH: main_filename => BASH_SOURCE
120	MainFile(str main_filename)
121	# YSH
122	\| Dummy # -c or stdin, not used by BASH_* vars
123	# Note: we could have more "frame 0" than MainFile and Dummy -
124	# - Completion hooks - dev.Tracer is missing these
125	# - PS1
126	# - PROMPT_COMMAND
127
128	# OSH: call_tok => BASH_LINENO, source_name => BASH_SOURCE
129	\| Source(CompoundWord source_loc, str source_name)
130
131	# OSH: call_tok => BASH_LINENO, def_tok => BASH_SOURCE
132	# YSH: procs
133	\| ProcLike(CompoundWord invoke_loc, Token def_tok, str proc_name)
134
135	# for io->eval, myfunc()
136	\| Token %Token
137
138	# For 'eval', 'use', ...
139	\| CompoundWord %CompoundWord
140
141	# Special frame added when running 'trap ERR', for more info, and as a sentinel
142	\| BeforeErrTrap(Token tok)
143
144	#
145	# Shell language
146	#
147
148	bracket_op =
149	WholeArray(id op_id) # * or @
150	\| ArrayIndex(arith_expr expr)
151
152	suffix_op =
153	Nullary %Token # ${x@Q} or ${!prefix@} (which also has prefix_op)
154	\| Unary(Token op, rhs_word arg_word) # e.g. ${v:-default}
155	# TODO: Implement YSH ${x\|html} and ${x %.3f}
156	\| Static(Token tok, str arg)
157	\| PatSub(CompoundWord pat, rhs_word replace, id replace_mode, Token slash_tok)
158	# optional begin is arith_expr.EmptyZero
159	# optional length is None, because it's handled in a special way
160	\| Slice(arith_expr begin, arith_expr? length)
161
162	BracedVarSub = (
163	Token left, # in dynamic ParseVarRef, same as name_tok
164	Token name_tok, # location for the name
165	str var_name, # the name - TODO: remove this, use LazyStr() instead
166	Token? prefix_op, # prefix # or ! operators
167	bracket_op? bracket_op,
168	suffix_op? suffix_op,
169	Token right # in dynamic ParseVarRef, same as name_tok
170	)
171
172	# Variants:
173	# - Look at left token ID for $'' c'' vs r'' '' e.g. Id.Left_DollarSingleQuote
174	# - And """ and ''' e.g. Id.Left_TDoubleQuote
175	DoubleQuoted = (Token left, List[word_part] parts, Token right)
176
177	# Consider making str? sval LAZY, like lexer.LazyStr(tok)
178	SingleQuoted = (Token left, str sval, Token right)
179
180	# e.g. Id.VSub_QMark, Id.VSub_DollarName $foo with lexer.LazyStr()
181	SimpleVarSub = (Token tok)
182
183	CommandSub = (Token left_token, command child, Token right)
184
185	# @[expr] or $[expr] - expression substitution
186	# Used in both word_part and expr contexts
187	ExprSub = (Token left, expr child, Token right)
188
189	# - can contain word.BracedTree
190	# - no 'Token right' for now, doesn't appear to be used
191	YshArrayLiteral = (Token left, List[word] words, Token right)
192
193	# Unevaluated, typed arguments for func and proc.
194	# Note that ...arg is expr.Spread.
195	ArgList = (
196	Token left, List[expr] pos_args,
197	Token? semi_tok, List[NamedArg] named_args,
198	Token? semi_tok2, expr? block_expr,
199	Token right
200	)
201
202	AssocPair = (CompoundWord key, CompoundWord value, bool has_plus)
203
204	InitializerWord =
205	ArrayWord(word w)
206	\| AssocPair %AssocPair
207
208	word_part =
209	YshArrayLiteral %YshArrayLiteral
210	\| InitializerLiteral(Token left, List[InitializerWord] pairs, Token right)
211	\| Literal %Token
212	# escaped case is separate so the evaluator doesn't have to check token ID
213	\| EscapedLiteral(Token token, str ch)
214	\| SingleQuoted %SingleQuoted
215	\| DoubleQuoted %DoubleQuoted
216	# Could be SimpleVarSub %Token that's VSub_DollarName, but let's not
217	# confuse with the comon word_part.Literal is common for wno
218	\| SimpleVarSub %SimpleVarSub
219	\| BracedVarSub %BracedVarSub
220	\| ZshVarSub (Token left, CompoundWord ignored, Token right)
221	# For command sub and process sub: $(...) <(...) >(...)
222	\| CommandSub %CommandSub
223	# ~ or ~bob
224	\| TildeSub(Token left, # always the tilde
225	Token? name, str? user_name)
226	\| ArithSub(Token left, arith_expr anode, Token right)
227	# {a,b,c}
228	\| BracedTuple(List[CompoundWord] words)
229	# {1..10} or {-5..10..2} or {01..10} (leading zeros matter)
230	# {a..f} or {a..f..2} or {a..f..-2}
231	# the whole range is one Token,
232	\| BracedRange(Token blame_tok, id kind, str start, str end, int step)
233	# expanded version of {1..10}
234	\| BracedRangeDigit(str s, Token orig_tok)
235	# extended globs are parsed statically, unlike globs
236	\| ExtGlob(Token op, List[CompoundWord] arms, Token right)
237	# a regex group is similar to an extended glob part
238	\| BashRegexGroup(Token left, CompoundWord? child, Token right)
239
240	# YSH word_part extensions
241
242	# @myarray - Id.Lit_Splice (could be optimized to %Token)
243	\| Splice(Token blame_tok, str var_name)
244	# @[expr] $[expr] - array splice or expr sub
245	\| ExprSub %ExprSub
246
247	# Use cases for Empty: RHS of 'x=', the argument in "${x:-}".
248	# The latter is semantically necessary. (See osh/word_parse.py).
249	# At runtime: RHS of 'declare x='.
250	rhs_word = Empty \| Compound %CompoundWord
251
252	word =
253	# Returns from WordParser, but not generally stored in LST
254	Operator %Token
255	# A Compound word can contain any word_part except the Braced*Part.
256	# We could model this with another variant type but it incurs runtime
257	# overhead and seems like overkill. Note that DoubleQuoted can't
258	# contain a SingleQuoted, etc. either.
259	\| Compound %CompoundWord
260	# For word sequences command.Simple, YshArrayLiteral, for_iter.Words
261	# Could be its own type
262	\| BracedTree(List[word_part] parts)
263	# For dynamic parsing of test aka [ - the string is already evaluated.
264	\| String(id id, str s, CompoundWord? blame_loc)
265	# Redirect words like > 3> {myvar}>
266	\| Redir(Token? left_tok, Token op)
267
268	# Note: the name 'foo' is derived from token value 'foo=' or 'foo+='
269	sh_lhs =
270	Name(Token left, str name) # Lit_VarLike foo=
271	# TODO: Could be Name %Token
272	\| IndexedName(Token left, str name, arith_expr index)
273	\| UnparsedIndex(Token left, str name, str index) # for translation
274
275	arith_expr =
276	EmptyZero # these are valid: $(( )) (( )) ${a[@]: : }
277	\| EmptyOne # condition is 1 for infinite loop: for (( ; ; ))
278	\| VarSub %Token # e.g. $(( x )) Id.Arith_VarLike
279	\| Word %CompoundWord # e.g. $(( 123'456'$y ))
280
281	\| UnaryAssign(id op_id, arith_expr child)
282	\| BinaryAssign(id op_id, arith_expr left, arith_expr right)
283
284	\| Unary(id op_id, arith_expr child)
285	\| Binary(Token op, arith_expr left, arith_expr right)
286	\| TernaryOp(arith_expr cond, arith_expr true_expr, arith_expr false_expr)
287
288	bool_expr =
289	WordTest(word w) # e.g. [[ myword ]]
290	\| Binary(id op_id, word left, word right)
291	\| Unary(id op_id, word child)
292	\| LogicalNot(bool_expr child)
293	\| LogicalAnd(bool_expr left, bool_expr right)
294	\| LogicalOr(bool_expr left, bool_expr right)
295
296	redir_loc =
297	Fd(int fd) \| VarName(str name)
298
299	redir_param =
300	Word %CompoundWord
301	\| HereWord(CompoundWord w, bool is_multiline)
302	\| HereDoc(word here_begin, # e.g. EOF or 'EOF'
303	Token? here_end_tok, # Token consisting of the whole line
304	# It's always filled in AFTER creation, but
305	# temporarily so optional
306	List[word_part] stdin_parts # one for each line
307	)
308
309	Redir = (Token op, redir_loc loc, redir_param arg)
310
311	assign_op = Equal \| PlusEqual
312	AssignPair = (Token left, sh_lhs lhs, assign_op op, rhs_word rhs)
313	# TODO: could put Id.Lit_VarLike foo= into LazyStr() with -1 slice
314	EnvPair = (Token left, str name, rhs_word val)
315
316	List_of_command < List[command]
317
318	condition =
319	Shell %List_of_command # if false; true; then echo hi; fi
320	\| YshExpr(expr e) # if (x > 0) { echo hi }
321	# TODO: add more specific blame location
322
323	# Each arm tests one word against multiple words
324	# shell: .cc\|.h) echo C++ ;;
325	# YSH: .cc\|.h { echo C++ }
326	#
327	# Three location tokens:
328	# 1. left - shell has ( or .cc ysh has .cc
329	# 2. middle - shell has ) ysh has {
330	# 3. right - shell has optional ;; ysh has required }
331	#
332	# For YSH typed case, left can be ( and /
333	# And case_pat may contain more details
334	CaseArm = (
335	Token left, pat pattern, Token middle, List[command] action,
336	Token? right
337	)
338
339	# The argument to match against in a case command
340	# In YSH-style case commands we match against an `expr`, but in sh-style case
341	# commands we match against a word.
342	case_arg =
343	Word(word w)
344	\| YshExpr(expr e)
345
346	EggexFlag = (bool negated, Token flag)
347
348	# canonical_flags can be compared for equality. This is needed to splice
349	# eggexes correctly, e.g. / 'abc' @pat ; i /
350	Eggex = (
351	Token left, re regex, List[EggexFlag] flags, Token? trans_pref,
352	str? canonical_flags)
353
354	pat =
355	Else
356	\| Words(List[word] words)
357	\| YshExprs(List[expr] exprs)
358	\| Eggex %Eggex
359
360	# Each if arm starts with either an "if" or "elif" keyword
361	# In YSH, the then keyword is not used (replaced by braces {})
362	IfArm = (
363	Token keyword, condition cond, Token? then_kw, List[command] action,
364	# then_tok used in ysh-ify
365	Token? then_tok)
366
367	for_iter =
368	Args # for x; do echo $x; done # implicit "$@"
369	\| Words(List[word] words) # for x in 'foo' *.py { echo $x }S
370	# like YshArrayLiteral, but no location for %(
371	\| YshExpr(expr e, Token blame) # for x in (mylist) { echo $x }
372	#\| Files(Token left, List[word] words)
373	# for x in <> {
374	# for x in < @myfiles > {
375
376	BraceGroup = (
377	Token left, Token? doc_token, List[command] children, Token right
378	)
379
380	Param = (Token blame_tok, str name, TypeExpr? type, expr? default_val)
381	RestParam = (Token blame_tok, str name)
382
383	ParamGroup = (List[Param] params, RestParam? rest_of)
384
385	# 'open' is for proc p { }; closed is for proc p () { }
386	proc_sig =
387	Open
388	\| Closed(ParamGroup? word, ParamGroup? positional, ParamGroup? named,
389	Param? block_param)
390
391	Proc = (Token keyword, Token name, proc_sig sig, command body)
392
393	Func = (
394	Token keyword, Token name,
395	ParamGroup? positional, ParamGroup? named,
396	command body
397	)
398
399	# Represents all these case: s=1 s+=1 s[x]=1 ...
400	ParsedAssignment = (Token? left, Token? close, int part_offset, CompoundWord w)
401
402	# var, const.
403	# - Keyword is None for hay blocks. TODO: consider using BareDecl?
404	# - 'var x' allowed - RHS is None; idiomatic with value.Place
405	VarDecl = (Token? keyword, List[NameType] lhs, expr? rhs)
406
407	# setvar, maybe 'auto' later
408	Mutation = (Token keyword, List[y_lhs] lhs, Token op, expr rhs)
409
410	# call f(x) = 42
411	ExprCommand = (Token keyword, expr e)
412
413	ShFunction = (
414	Token? keyword, Token name_tok, str name, command body,
415	str? code_str
416	)
417
418	command =
419	NoOp
420
421	# can wrap many children, e.g. { }, loops, functions
422	\| Redirect(command child, List[Redir] redirects)
423
424	\| Simple(Token? blame_tok, # TODO: make required (BracedTuple?)
425	List[EnvPair] more_env,
426	List[word] words,
427	ArgList? typed_args, LiteralBlock? block,
428	# is_last_cmd is used for fork() optimizations
429	bool is_last_cmd,
430	# (#2307) Redirects on simple commands are evaluated
431	# AFTER the argv word list, unlike all other commands.
432	# This field is null if there are no redirects
433	List[Redir]? redirects)
434
435	# This doesn't technically belong in the LST, but it's convenient for
436	# execution
437	\| ExpandedAlias(command child, List[EnvPair] more_env)
438	\| Sentence(command child, Token terminator)
439	# Represents "bare assignment"
440	# Token left is redundant with pairs[0].left
441	\| ShAssignment(Token left, List[AssignPair] pairs)
442
443	\| ControlFlow(Token keyword, CompoundWord? arg_word)
444
445	# ops are \| \|&
446	\| Pipeline(Token? negated, List[command] children, List[Token] ops)
447	# ops are && \|\|
448	\| AndOr(List[command] children, List[Token] ops)
449
450	# Part of for, while, until (but not if, case, ShFunction). No redirects.
451	\| DoGroup(Token left, List[command] children, Token right)
452	# A brace group is a compound command, with redirects.
453	\| BraceGroup %BraceGroup
454	# Contains a single child, like CommandSub
455	\| Subshell(Token left, command child, Token right, bool is_last_cmd)
456	\| DParen(Token left, arith_expr child, Token right)
457	\| DBracket(Token left, bool_expr expr, Token right)
458
459	# up to 3 iterations variables
460	\| ForEach(Token keyword, List[str] iter_names, for_iter iterable,
461	Token? semi_tok, command body)
462	# C-style for loop. Any of the 3 expressions can be omitted.
463	# Note: body is required, but only optional here because of initialization
464	# order.
465	\| ForExpr(Token keyword, arith_expr? init, arith_expr? cond,
466	arith_expr? update, command? body)
467	\| WhileUntil(Token keyword, condition cond, command body)
468
469	\| If(Token if_kw, List[IfArm] arms, Token? else_kw, List[command] else_action,
470	Token? fi_kw)
471	\| Case(Token case_kw, case_arg to_match, Token arms_start, List[CaseArm] arms,
472	Token arms_end)
473
474	# The keyword is optional in the case of bash-style functions
475	# (ie. "foo() { ... }") which do not have one.
476	\| ShFunction %ShFunction
477
478	\| TimeBlock(Token keyword, command pipeline)
479	# Some nodes optimize it out as List[command], but we use CommandList for
480	# 1. the top level
481	# 2. ls ; ls & ls (same line)
482	# 3. CommandSub # single child that's a CommandList
483	# 4. Subshell # single child that's a CommandList
484
485	# TODO: Use List_of_command
486	\| CommandList(List[command] children)
487
488	# YSH command constructs
489
490	\| VarDecl %VarDecl
491
492	# this can behave like 'var', can be desugared
493	\| BareDecl(Token lhs, expr rhs)
494
495	\| Mutation %Mutation
496	\| Expr %ExprCommand
497	\| Proc %Proc
498	\| Func %Func
499	\| Retval(Token keyword, expr val)
500
501	# bytecode
502	b_command =
503	VarDecl %VarDecl
504	\| Mutation %Mutation
505
506	#
507	# Glob representation, for converting ${x//} to extended regexes.
508	#
509
510	# Example: *.[ch] is:
511	# GlobOp(<Glob_Star '*'>),
512	# GlobLit(Glob_OtherLiteral, '.'),
513	# CharClass(False, ['ch']) # from Glob_CleanLiterals token
514
515	glob_part =
516	Literal(id id, str s)
517	\| Operator(id op_id) # * or ?
518	\| CharClass(bool negated, List[str] strs)
519
520	# Char classes are opaque for now. If we ever need them:
521	# - Collating symbols are [. .]
522	# - Equivalence classes are [=
523
524	printf_part =
525	Literal %Token
526	# flags are 0 hyphen space + #
527	# type is 's' for %s, etc.
528	\| Percent(List[Token] flags, Token? width, Token? precision, Token type)
529
530	#
531	# YSH Language
532	#
533	# Copied and modified from Python-3.7/Parser/Python.asdl !
534
535	expr_context = Load \| Store \| Del \| AugLoad \| AugStore \| Param
536
537	# Type expressions: Int List[Int] Dict[Str, Any]
538	# Do we have Func[Int, Int => Int] ? I guess we can parse that into this
539	# system.
540	TypeExpr = (Token tok, str name, List[TypeExpr] params)
541
542	# LHS bindings in var/const, and eggex
543	NameType = (Token left, str name, TypeExpr? typ)
544
545	# TODO: Inline this into GenExp and ListComp? Just use a flag there?
546	Comprehension = (List[NameType] lhs, expr iter, expr? cond)
547
548	# Named arguments supplied to call. Token is null for f(; ...named).
549	NamedArg = (Token? name, expr value)
550
551	# Subscripts are lists of expressions
552	# a[:i, n] (we don't have matrices, but we have data frames)
553	Subscript = (Token left, expr obj, expr index)
554
555	# Attributes are obj.attr, d->key, name::scope,
556	Attribute = (expr obj, Token op, Token attr, str attr_name, expr_context ctx)
557
558	y_lhs =
559	Var %Token # Id.Expr_Name
560	\| Subscript %Subscript
561	\| Attribute %Attribute
562
563	place_op =
564	# &a[i+1]
565	Subscript(Token op, expr index)
566	# &d.mykey
567	\| Attribute(Token op, Token attr)
568
569	expr =
570	Var(Token left, str name) # a variable name to evaluate
571	# Constants are typically Null, Bool, Int, Float
572	# and also Str for key in {key: 42}
573	# But string literals are SingleQuoted or DoubleQuoted
574	# Python uses Num(object n), which doesn't respect our "LST" invariant.
575	\| Const(Token c, value val)
576
577	# read(&x) json read (&x[0])
578	\| Place(Token blame_tok, str var_name, place_op* ops)
579
580	# :\| one 'two' "$three" \|
581	\| YshArrayLiteral %YshArrayLiteral
582
583	# / d+ ; ignorecase; %python /
584	\| Eggex %Eggex
585
586	# $name is not an expr, but $? is, e.g. Id.VSub_QMark
587	\| SimpleVarSub %SimpleVarSub
588	\| BracedVarSub %BracedVarSub
589	\| CommandSub %CommandSub
590	\| ExprSub %ExprSub
591	\| SingleQuoted %SingleQuoted
592	\| DoubleQuoted %DoubleQuoted
593
594	\| Literal(expr inner)
595	\| Lambda(List[NameType] params, expr body)
596
597	\| Unary(Token op, expr child)
598	\| Binary(Token op, expr left, expr right)
599	# x < 4 < 3 and (x < 4) < 3
600	\| Compare(expr left, List[Token] ops, List[expr] comparators)
601	\| FuncCall(expr func, ArgList args)
602
603	# TODO: Need a representation for method call. We don't just want
604	# Attribute() and then Call()
605
606	\| IfExp(expr test, expr body, expr orelse)
607	\| Tuple(Token left, List[expr] elts, expr_context ctx)
608
609	\| List(Token left, List[expr] elts, expr_context ctx)
610	\| Dict(Token left, List[expr] keys, List[expr] values)
611	# For the values in {n1, n2}
612	\| Implicit
613
614	\| ListComp(Token left, expr elt, List[Comprehension] generators)
615	# not implemented
616	\| DictComp(Token left, expr key, expr value, List[Comprehension] generators)
617	\| GeneratorExp(expr elt, List[Comprehension] generators)
618
619	# Ranges are written 1:2, with first class expression syntax. There is no
620	# step as in Python. Use range(0, 10, step=2) for that.
621	\| Range(expr lower, Token op, expr upper)
622
623	# Slices occur within [] only. Unlike ranges, the start/end can be #
624	# implicit. Like ranges, denote a step with slice(0, 10, step=2).
625	# a[3:] a[:i]
626	\| Slice(expr? lower, Token op, expr? upper)
627
628	\| Subscript %Subscript
629	\| Attribute %Attribute
630
631	# Ellipsis is like 'Starred' within Python, which are valid on the LHS in
632	# Python for unpacking, and # within list literals for splicing.
633	# (Starred is NOT used for {k:v, **a}. That used a blank "keys"
634	# attribute.)
635
636	# I think we can use { **pairs } like Python
637	\| Spread(Token left, expr child)
638
639	#
640	# Regex Language (Eggex)
641	#
642
643	# e.g. alnum digit
644	PosixClass = (Token? negated, str name)
645	# e.g. d w s
646	PerlClass = (Token? negated, str name)
647
648	# Char Sets and Ranges both use Char Codes
649	# with u_braced == true : \u{ff}
650	# with u_braced == false: \xff \\ 'a' a '0' 0
651	# ERE doesn't make a distinction, but compiling to Python/PCRE can use it
652	CharCode = (Token blame_tok, int i, bool u_braced)
653	CharRange = (CharCode start, CharCode end)
654
655	# Note: .NET has && in character classes, making it a recursive language
656
657	class_literal_term =
658	PosixClass %PosixClass
659	\| PerlClass %PerlClass
660	\| CharRange %CharRange
661	\| CharCode %CharCode
662
663	\| SingleQuoted %SingleQuoted
664	# @chars
665	\| Splice(Token name, str var_name) # coudl be Splice %Token
666
667	# evaluated version of class_literal_term (could be in runtime.asdl)
668	char_class_term =
669	PosixClass %PosixClass
670	\| PerlClass %PerlClass
671
672	\| CharRange %CharRange
673	# For [ \x00 \\ ]
674	\| CharCode %CharCode
675
676	# NOTE: modifier is unused now, can represent L or P
677	re_repeat =
678	Op %Token # + * ? or Expr_DecInt for x{3}
679	\| Range(Token? left, str lower, str upper, Token? right) # dot{1,2}
680	# Haven't implemented the modifier, e.g. x{+ P}
681	# \| Num(Token times, id modifier)
682	# \| Range(Token? lower, Token? upper, id modifier)
683
684	re =
685	Primitive(Token blame_tok, id id) # . ^ $ dot %start %end
686	\| PosixClass %PosixClass
687	\| PerlClass %PerlClass
688	# syntax [ $x \n ]
689	\| CharClassLiteral(bool negated, List[class_literal_term] terms)
690	# evaluated [ 'abc' \n ]
691	\| CharClass(bool negated, List[char_class_term] terms)
692
693	# @D
694	\| Splice(Token name, str var_name) # TODO: Splice %Token ?
695
696	\| SingleQuoted %SingleQuoted
697
698	# Compound:
699	\| Repeat(re child, re_repeat op)
700	\| Seq(List[re] children)
701	\| Alt(List[re] children)
702
703	\| Group(re child)
704	# convert_func is filled in on evaluation
705	# TODO: name and func_name can be expanded to strings
706	\| Capture(re child, Token? name, Token? func_name)
707	\| Backtracking(bool negated, Token name, re child)
708
709	# \u{ff} is parsed as this, but SingleQuoted also evaluates to it
710	\| LiteralChars(Token blame_tok, str s)
711	}