| 1 | # Data types for the Oils AST, aka "Lossless Syntax Tree".
|
| 2 | #
|
| 3 | # Invariant: the source text can be reconstructed byte-for-byte from this tree.
|
| 4 | # The test/lossless.sh suite verifies this.
|
| 5 |
|
| 6 | # We usually try to preserve the physical order of the source in the ASDL
|
| 7 | # fields. One exception is the order of redirects:
|
| 8 | #
|
| 9 | # echo >out.txt hi
|
| 10 | # # versus
|
| 11 | # echo hi >out.txt
|
| 12 |
|
| 13 | # Unrepresented:
|
| 14 | # - let arithmetic (rarely used)
|
| 15 | # - coprocesses # one with arg and one without
|
| 16 | # - select block
|
| 17 |
|
| 18 | # Possible refactorings:
|
| 19 | #
|
| 20 | # # %CompoundWord as first class variant:
|
| 21 | # bool_expr = WordTest %CompoundWord | ...
|
| 22 | #
|
| 23 | # # Can DoubleQuoted have a subset of parts compared with CompoundWord?
|
| 24 | # string_part = ... # subset of word_part
|
| 25 | #
|
| 26 | # - Distinguish word_t with BracedTree vs. those without? seq_word_t?
|
| 27 |
|
| 28 | module syntax
|
| 29 | {
|
| 30 | use core value {
|
| 31 | value LiteralBlock
|
| 32 | }
|
| 33 |
|
| 34 | # More efficient than the List[bool] pattern we've been using
|
| 35 | BoolParamBox = (bool b)
|
| 36 | IntParamBox = (int i)
|
| 37 |
|
| 38 | # core/main_loop.py
|
| 39 | parse_result = EmptyLine | Eof | Node(command cmd)
|
| 40 |
|
| 41 | # 'source' represents the location of a line / token.
|
| 42 | source =
|
| 43 | Interactive
|
| 44 | | Headless
|
| 45 | | Unused(str comment) # completion and history never show parse errors?
|
| 46 | | CFlag
|
| 47 | | Stdin(str comment)
|
| 48 |
|
| 49 | # MainFile is for main.{osh,ysh}, --eval oshrc/yshrc. They're files loaded
|
| 50 | # directly by the shell.
|
| 51 | | MainFile(str path)
|
| 52 | # A file loaded by 'source' or 'use'.
|
| 53 | # TODO: we probably don't need this location? The debug stack provides a
|
| 54 | # chain of locations back to the sourced script. Maybe we need to point to
|
| 55 | # a debug_frame instead?
|
| 56 | # It could be DiskFileShell and DiskFileUser, or just DiskFile.
|
| 57 | | OtherFile(str path, loc location)
|
| 58 |
|
| 59 | # Code parsed from a word. (TODO: rename source.Word?)
|
| 60 | # used for 'eval arg', 'trap arg', 'printf arg',
|
| 61 | # parseCommand() - this is a string?
|
| 62 | # dynamic LHS - move this to Reparsed?
|
| 63 | # complete -W
|
| 64 | | Dynamic(str what, loc location)
|
| 65 |
|
| 66 | # Point to the original variable reference
|
| 67 | | VarRef(Token orig_tok)
|
| 68 |
|
| 69 | # code parsed from the value of a variable
|
| 70 | # used for $PS1 $PROMPT_COMMAND
|
| 71 | | Variable(str var_name, loc location)
|
| 72 |
|
| 73 | # alias expansion (location of first word)
|
| 74 | | Alias(str argv0, loc argv0_loc)
|
| 75 |
|
| 76 | # 2 kinds of reparsing: backticks, and x+1 in a[x+1]=y
|
| 77 | | Reparsed(str what, Token left_token, Token right_token)
|
| 78 |
|
| 79 | # For --location-str
|
| 80 | | Synthetic(str s)
|
| 81 |
|
| 82 | SourceLine = (int line_num, str content, source src)
|
| 83 |
|
| 84 | # Note that ASDL generates:
|
| 85 | # typedef uint16_t Id_t;
|
| 86 | # So Token is
|
| 87 | # 8 bytes GC header + 2 + 4 + 4 + 8 + 8 = 34 bytes on 64-bit machines
|
| 88 | #
|
| 89 | # We transpose (id, col, length) -> (id, length, col) for C struct packing.
|
| 90 | Token = (id id, int length, int col, SourceLine? line, str? tval)
|
| 91 |
|
| 92 | # I wanted to get rid of Token.tval with this separate WideToken type, but it
|
| 93 | # is more efficient if word_part.Literal %Token literally is the same thing
|
| 94 | # that comes out of the lexer. Otherwise we have extra garbage.
|
| 95 |
|
| 96 | # WideToken = (id id, int length, int col, SourceLine? line, str? tval)
|
| 97 |
|
| 98 | # Slight ASDL bug: CompoundWord has to be defined before using it as a shared
|
| 99 | # variant. The _product_counter algorithm should be moved into a separate
|
| 100 | # tag-assigning pass, and shared between gen_python.py and gen_cpp.py.
|
| 101 | CompoundWord = (List[word_part] parts)
|
| 102 |
|
| 103 | # Source location for errors
|
| 104 | loc =
|
| 105 | Missing # equivalent of runtime.NO_SPID
|
| 106 | | Token %Token
|
| 107 | # Very common case: argv arrays need original location
|
| 108 | | ArgWord %CompoundWord
|
| 109 | | WordPart(word_part p)
|
| 110 | | Word(word w)
|
| 111 | | Arith(arith_expr a)
|
| 112 | # e.g. for errexit blaming
|
| 113 | | Command(command c)
|
| 114 |
|
| 115 | # debug_frame_t is an EXECUTION stack (proc func source use eval), while
|
| 116 | # source_t (in some cases) is like a PARSING stack (files, strings from vars,
|
| 117 | # etc.)
|
| 118 | debug_frame =
|
| 119 | # OSH: main_filename => BASH_SOURCE
|
| 120 | MainFile(str main_filename)
|
| 121 | # YSH
|
| 122 | | Dummy # -c or stdin, not used by BASH_* vars
|
| 123 | # Note: we could have more "frame 0" than MainFile and Dummy -
|
| 124 | # - Completion hooks - dev.Tracer is missing these
|
| 125 | # - PS1
|
| 126 | # - PROMPT_COMMAND
|
| 127 |
|
| 128 | # OSH: call_tok => BASH_LINENO, source_name => BASH_SOURCE
|
| 129 | | Source(CompoundWord source_loc, str source_name)
|
| 130 |
|
| 131 | # OSH: call_tok => BASH_LINENO, def_tok => BASH_SOURCE
|
| 132 | # YSH: procs
|
| 133 | | ProcLike(CompoundWord invoke_loc, Token def_tok, str proc_name)
|
| 134 |
|
| 135 | # for io->eval, myfunc()
|
| 136 | | Token %Token
|
| 137 |
|
| 138 | # For 'eval', 'use', ...
|
| 139 | | CompoundWord %CompoundWord
|
| 140 |
|
| 141 | # Special frame added when running 'trap ERR', for more info, and as a sentinel
|
| 142 | | BeforeErrTrap(Token tok)
|
| 143 |
|
| 144 | #
|
| 145 | # Shell language
|
| 146 | #
|
| 147 |
|
| 148 | bracket_op =
|
| 149 | WholeArray(id op_id) # * or @
|
| 150 | | ArrayIndex(arith_expr expr)
|
| 151 |
|
| 152 | suffix_op =
|
| 153 | Nullary %Token # ${x@Q} or ${!prefix@} (which also has prefix_op)
|
| 154 | | Unary(Token op, rhs_word arg_word) # e.g. ${v:-default}
|
| 155 | # TODO: Implement YSH ${x|html} and ${x %.3f}
|
| 156 | | Static(Token tok, str arg)
|
| 157 | | PatSub(CompoundWord pat, rhs_word replace, id replace_mode, Token slash_tok)
|
| 158 | # optional begin is arith_expr.EmptyZero
|
| 159 | # optional length is None, because it's handled in a special way
|
| 160 | | Slice(arith_expr begin, arith_expr? length)
|
| 161 |
|
| 162 | BracedVarSub = (
|
| 163 | Token left, # in dynamic ParseVarRef, same as name_tok
|
| 164 | Token name_tok, # location for the name
|
| 165 | str var_name, # the name - TODO: remove this, use LazyStr() instead
|
| 166 | Token? prefix_op, # prefix # or ! operators
|
| 167 | bracket_op? bracket_op,
|
| 168 | suffix_op? suffix_op,
|
| 169 | Token right # in dynamic ParseVarRef, same as name_tok
|
| 170 | )
|
| 171 |
|
| 172 | # Variants:
|
| 173 | # - Look at left token ID for $'' c'' vs r'' '' e.g. Id.Left_DollarSingleQuote
|
| 174 | # - And """ and ''' e.g. Id.Left_TDoubleQuote
|
| 175 | DoubleQuoted = (Token left, List[word_part] parts, Token right)
|
| 176 |
|
| 177 | # Consider making str? sval LAZY, like lexer.LazyStr(tok)
|
| 178 | SingleQuoted = (Token left, str sval, Token right)
|
| 179 |
|
| 180 | # e.g. Id.VSub_QMark, Id.VSub_DollarName $foo with lexer.LazyStr()
|
| 181 | SimpleVarSub = (Token tok)
|
| 182 |
|
| 183 | CommandSub = (Token left_token, command child, Token right)
|
| 184 |
|
| 185 | # @[expr] or $[expr] - expression substitution
|
| 186 | # Used in both word_part and expr contexts
|
| 187 | ExprSub = (Token left, expr child, Token right)
|
| 188 |
|
| 189 | # - can contain word.BracedTree
|
| 190 | # - no 'Token right' for now, doesn't appear to be used
|
| 191 | YshArrayLiteral = (Token left, List[word] words, Token right)
|
| 192 |
|
| 193 | # Unevaluated, typed arguments for func and proc.
|
| 194 | # Note that ...arg is expr.Spread.
|
| 195 | ArgList = (
|
| 196 | Token left, List[expr] pos_args,
|
| 197 | Token? semi_tok, List[NamedArg] named_args,
|
| 198 | Token? semi_tok2, expr? block_expr,
|
| 199 | Token right
|
| 200 | )
|
| 201 |
|
| 202 | AssocPair = (CompoundWord key, CompoundWord value, bool has_plus)
|
| 203 |
|
| 204 | InitializerWord =
|
| 205 | ArrayWord(word w)
|
| 206 | | AssocPair %AssocPair
|
| 207 |
|
| 208 | word_part =
|
| 209 | YshArrayLiteral %YshArrayLiteral
|
| 210 | | InitializerLiteral(Token left, List[InitializerWord] pairs, Token right)
|
| 211 | | Literal %Token
|
| 212 | # escaped case is separate so the evaluator doesn't have to check token ID
|
| 213 | | EscapedLiteral(Token token, str ch)
|
| 214 | | SingleQuoted %SingleQuoted
|
| 215 | | DoubleQuoted %DoubleQuoted
|
| 216 | # Could be SimpleVarSub %Token that's VSub_DollarName, but let's not
|
| 217 | # confuse with the comon word_part.Literal is common for wno
|
| 218 | | SimpleVarSub %SimpleVarSub
|
| 219 | | BracedVarSub %BracedVarSub
|
| 220 | | ZshVarSub (Token left, CompoundWord ignored, Token right)
|
| 221 | # For command sub and process sub: $(...) <(...) >(...)
|
| 222 | | CommandSub %CommandSub
|
| 223 | # ~ or ~bob
|
| 224 | | TildeSub(Token left, # always the tilde
|
| 225 | Token? name, str? user_name)
|
| 226 | | ArithSub(Token left, arith_expr anode, Token right)
|
| 227 | # {a,b,c}
|
| 228 | | BracedTuple(List[CompoundWord] words)
|
| 229 | # {1..10} or {-5..10..2} or {01..10} (leading zeros matter)
|
| 230 | # {a..f} or {a..f..2} or {a..f..-2}
|
| 231 | # the whole range is one Token,
|
| 232 | | BracedRange(Token blame_tok, id kind, str start, str end, int step)
|
| 233 | # expanded version of {1..10}
|
| 234 | | BracedRangeDigit(str s, Token orig_tok)
|
| 235 | # extended globs are parsed statically, unlike globs
|
| 236 | | ExtGlob(Token op, List[CompoundWord] arms, Token right)
|
| 237 | # a regex group is similar to an extended glob part
|
| 238 | | BashRegexGroup(Token left, CompoundWord? child, Token right)
|
| 239 |
|
| 240 | # YSH word_part extensions
|
| 241 |
|
| 242 | # @myarray - Id.Lit_Splice (could be optimized to %Token)
|
| 243 | | Splice(Token blame_tok, str var_name)
|
| 244 | # @[expr] $[expr] - array splice or expr sub
|
| 245 | | ExprSub %ExprSub
|
| 246 |
|
| 247 | # Use cases for Empty: RHS of 'x=', the argument in "${x:-}".
|
| 248 | # The latter is semantically necessary. (See osh/word_parse.py).
|
| 249 | # At runtime: RHS of 'declare x='.
|
| 250 | rhs_word = Empty | Compound %CompoundWord
|
| 251 |
|
| 252 | word =
|
| 253 | # Returns from WordParser, but not generally stored in LST
|
| 254 | Operator %Token
|
| 255 | # A Compound word can contain any word_part except the Braced*Part.
|
| 256 | # We could model this with another variant type but it incurs runtime
|
| 257 | # overhead and seems like overkill. Note that DoubleQuoted can't
|
| 258 | # contain a SingleQuoted, etc. either.
|
| 259 | | Compound %CompoundWord
|
| 260 | # For word sequences command.Simple, YshArrayLiteral, for_iter.Words
|
| 261 | # Could be its own type
|
| 262 | | BracedTree(List[word_part] parts)
|
| 263 | # For dynamic parsing of test aka [ - the string is already evaluated.
|
| 264 | | String(id id, str s, CompoundWord? blame_loc)
|
| 265 | # Redirect words like > 3> {myvar}>
|
| 266 | | Redir(Token? left_tok, Token op)
|
| 267 |
|
| 268 | # Note: the name 'foo' is derived from token value 'foo=' or 'foo+='
|
| 269 | sh_lhs =
|
| 270 | Name(Token left, str name) # Lit_VarLike foo=
|
| 271 | # TODO: Could be Name %Token
|
| 272 | | IndexedName(Token left, str name, arith_expr index)
|
| 273 | | UnparsedIndex(Token left, str name, str index) # for translation
|
| 274 |
|
| 275 | arith_expr =
|
| 276 | EmptyZero # these are valid: $(( )) (( )) ${a[@]: : }
|
| 277 | | EmptyOne # condition is 1 for infinite loop: for (( ; ; ))
|
| 278 | | VarSub %Token # e.g. $(( x )) Id.Arith_VarLike
|
| 279 | | Word %CompoundWord # e.g. $(( 123'456'$y ))
|
| 280 |
|
| 281 | | UnaryAssign(id op_id, arith_expr child)
|
| 282 | | BinaryAssign(id op_id, arith_expr left, arith_expr right)
|
| 283 |
|
| 284 | | Unary(id op_id, arith_expr child)
|
| 285 | | Binary(Token op, arith_expr left, arith_expr right)
|
| 286 | | TernaryOp(arith_expr cond, arith_expr true_expr, arith_expr false_expr)
|
| 287 |
|
| 288 | bool_expr =
|
| 289 | WordTest(word w) # e.g. [[ myword ]]
|
| 290 | | Binary(id op_id, word left, word right)
|
| 291 | | Unary(id op_id, word child)
|
| 292 | | LogicalNot(bool_expr child)
|
| 293 | | LogicalAnd(bool_expr left, bool_expr right)
|
| 294 | | LogicalOr(bool_expr left, bool_expr right)
|
| 295 |
|
| 296 | redir_loc =
|
| 297 | Fd(int fd) | VarName(str name)
|
| 298 |
|
| 299 | redir_param =
|
| 300 | Word %CompoundWord
|
| 301 | | HereWord(CompoundWord w, bool is_multiline)
|
| 302 | | HereDoc(word here_begin, # e.g. EOF or 'EOF'
|
| 303 | Token? here_end_tok, # Token consisting of the whole line
|
| 304 | # It's always filled in AFTER creation, but
|
| 305 | # temporarily so optional
|
| 306 | List[word_part] stdin_parts # one for each line
|
| 307 | )
|
| 308 |
|
| 309 | Redir = (Token op, redir_loc loc, redir_param arg)
|
| 310 |
|
| 311 | assign_op = Equal | PlusEqual
|
| 312 | AssignPair = (Token left, sh_lhs lhs, assign_op op, rhs_word rhs)
|
| 313 | # TODO: could put Id.Lit_VarLike foo= into LazyStr() with -1 slice
|
| 314 | EnvPair = (Token left, str name, rhs_word val)
|
| 315 |
|
| 316 | List_of_command < List[command]
|
| 317 |
|
| 318 | condition =
|
| 319 | Shell %List_of_command # if false; true; then echo hi; fi
|
| 320 | | YshExpr(expr e) # if (x > 0) { echo hi }
|
| 321 | # TODO: add more specific blame location
|
| 322 |
|
| 323 | # Each arm tests one word against multiple words
|
| 324 | # shell: *.cc|*.h) echo C++ ;;
|
| 325 | # YSH: *.cc|*.h { echo C++ }
|
| 326 | #
|
| 327 | # Three location tokens:
|
| 328 | # 1. left - shell has ( or *.cc ysh has *.cc
|
| 329 | # 2. middle - shell has ) ysh has {
|
| 330 | # 3. right - shell has optional ;; ysh has required }
|
| 331 | #
|
| 332 | # For YSH typed case, left can be ( and /
|
| 333 | # And case_pat may contain more details
|
| 334 | CaseArm = (
|
| 335 | Token left, pat pattern, Token middle, List[command] action,
|
| 336 | Token? right
|
| 337 | )
|
| 338 |
|
| 339 | # The argument to match against in a case command
|
| 340 | # In YSH-style case commands we match against an `expr`, but in sh-style case
|
| 341 | # commands we match against a word.
|
| 342 | case_arg =
|
| 343 | Word(word w)
|
| 344 | | YshExpr(expr e)
|
| 345 |
|
| 346 | EggexFlag = (bool negated, Token flag)
|
| 347 |
|
| 348 | # canonical_flags can be compared for equality. This is needed to splice
|
| 349 | # eggexes correctly, e.g. / 'abc' @pat ; i /
|
| 350 | Eggex = (
|
| 351 | Token left, re regex, List[EggexFlag] flags, Token? trans_pref,
|
| 352 | str? canonical_flags)
|
| 353 |
|
| 354 | pat =
|
| 355 | Else
|
| 356 | | Words(List[word] words)
|
| 357 | | YshExprs(List[expr] exprs)
|
| 358 | | Eggex %Eggex
|
| 359 |
|
| 360 | # Each if arm starts with either an "if" or "elif" keyword
|
| 361 | # In YSH, the then keyword is not used (replaced by braces {})
|
| 362 | IfArm = (
|
| 363 | Token keyword, condition cond, Token? then_kw, List[command] action,
|
| 364 | # then_tok used in ysh-ify
|
| 365 | Token? then_tok)
|
| 366 |
|
| 367 | for_iter =
|
| 368 | Args # for x; do echo $x; done # implicit "$@"
|
| 369 | | Words(List[word] words) # for x in 'foo' *.py { echo $x }S
|
| 370 | # like YshArrayLiteral, but no location for %(
|
| 371 | | YshExpr(expr e, Token blame) # for x in (mylist) { echo $x }
|
| 372 | #| Files(Token left, List[word] words)
|
| 373 | # for x in <> {
|
| 374 | # for x in < @myfiles > {
|
| 375 |
|
| 376 | BraceGroup = (
|
| 377 | Token left, Token? doc_token, List[command] children, Token right
|
| 378 | )
|
| 379 |
|
| 380 | Param = (Token blame_tok, str name, TypeExpr? type, expr? default_val)
|
| 381 | RestParam = (Token blame_tok, str name)
|
| 382 |
|
| 383 | ParamGroup = (List[Param] params, RestParam? rest_of)
|
| 384 |
|
| 385 | # 'open' is for proc p { }; closed is for proc p () { }
|
| 386 | proc_sig =
|
| 387 | Open
|
| 388 | | Closed(ParamGroup? word, ParamGroup? positional, ParamGroup? named,
|
| 389 | Param? block_param)
|
| 390 |
|
| 391 | Proc = (Token keyword, Token name, proc_sig sig, command body)
|
| 392 |
|
| 393 | Func = (
|
| 394 | Token keyword, Token name,
|
| 395 | ParamGroup? positional, ParamGroup? named,
|
| 396 | command body
|
| 397 | )
|
| 398 |
|
| 399 | # Represents all these case: s=1 s+=1 s[x]=1 ...
|
| 400 | ParsedAssignment = (Token? left, Token? close, int part_offset, CompoundWord w)
|
| 401 |
|
| 402 | # var, const.
|
| 403 | # - Keyword is None for hay blocks. TODO: consider using BareDecl?
|
| 404 | # - 'var x' allowed - RHS is None; idiomatic with value.Place
|
| 405 | VarDecl = (Token? keyword, List[NameType] lhs, expr? rhs)
|
| 406 |
|
| 407 | # setvar, maybe 'auto' later
|
| 408 | Mutation = (Token keyword, List[y_lhs] lhs, Token op, expr rhs)
|
| 409 |
|
| 410 | # call f(x) = 42
|
| 411 | ExprCommand = (Token keyword, expr e)
|
| 412 |
|
| 413 | ShFunction = (
|
| 414 | Token? keyword, Token name_tok, str name, command body,
|
| 415 | str? code_str
|
| 416 | )
|
| 417 |
|
| 418 | command =
|
| 419 | NoOp
|
| 420 |
|
| 421 | # can wrap many children, e.g. { }, loops, functions
|
| 422 | | Redirect(command child, List[Redir] redirects)
|
| 423 |
|
| 424 | | Simple(Token? blame_tok, # TODO: make required (BracedTuple?)
|
| 425 | List[EnvPair] more_env,
|
| 426 | List[word] words,
|
| 427 | ArgList? typed_args, LiteralBlock? block,
|
| 428 | # is_last_cmd is used for fork() optimizations
|
| 429 | bool is_last_cmd,
|
| 430 | # (#2307) Redirects on simple commands are evaluated
|
| 431 | # AFTER the argv word list, unlike all other commands.
|
| 432 | # This field is null if there are no redirects
|
| 433 | List[Redir]? redirects)
|
| 434 |
|
| 435 | # This doesn't technically belong in the LST, but it's convenient for
|
| 436 | # execution
|
| 437 | | ExpandedAlias(command child, List[EnvPair] more_env)
|
| 438 | | Sentence(command child, Token terminator)
|
| 439 | # Represents "bare assignment"
|
| 440 | # Token left is redundant with pairs[0].left
|
| 441 | | ShAssignment(Token left, List[AssignPair] pairs)
|
| 442 |
|
| 443 | | ControlFlow(Token keyword, CompoundWord? arg_word)
|
| 444 |
|
| 445 | # ops are | |&
|
| 446 | | Pipeline(Token? negated, List[command] children, List[Token] ops)
|
| 447 | # ops are && ||
|
| 448 | | AndOr(List[command] children, List[Token] ops)
|
| 449 |
|
| 450 | # Part of for, while, until (but not if, case, ShFunction). No redirects.
|
| 451 | | DoGroup(Token left, List[command] children, Token right)
|
| 452 | # A brace group is a compound command, with redirects.
|
| 453 | | BraceGroup %BraceGroup
|
| 454 | # Contains a single child, like CommandSub
|
| 455 | | Subshell(Token left, command child, Token right, bool is_last_cmd)
|
| 456 | | DParen(Token left, arith_expr child, Token right)
|
| 457 | | DBracket(Token left, bool_expr expr, Token right)
|
| 458 |
|
| 459 | # up to 3 iterations variables
|
| 460 | | ForEach(Token keyword, List[str] iter_names, for_iter iterable,
|
| 461 | Token? semi_tok, command body)
|
| 462 | # C-style for loop. Any of the 3 expressions can be omitted.
|
| 463 | # Note: body is required, but only optional here because of initialization
|
| 464 | # order.
|
| 465 | | ForExpr(Token keyword, arith_expr? init, arith_expr? cond,
|
| 466 | arith_expr? update, command? body)
|
| 467 | | WhileUntil(Token keyword, condition cond, command body)
|
| 468 |
|
| 469 | | If(Token if_kw, List[IfArm] arms, Token? else_kw, List[command] else_action,
|
| 470 | Token? fi_kw)
|
| 471 | | Case(Token case_kw, case_arg to_match, Token arms_start, List[CaseArm] arms,
|
| 472 | Token arms_end)
|
| 473 |
|
| 474 | # The keyword is optional in the case of bash-style functions
|
| 475 | # (ie. "foo() { ... }") which do not have one.
|
| 476 | | ShFunction %ShFunction
|
| 477 |
|
| 478 | | TimeBlock(Token keyword, command pipeline)
|
| 479 | # Some nodes optimize it out as List[command], but we use CommandList for
|
| 480 | # 1. the top level
|
| 481 | # 2. ls ; ls & ls (same line)
|
| 482 | # 3. CommandSub # single child that's a CommandList
|
| 483 | # 4. Subshell # single child that's a CommandList
|
| 484 |
|
| 485 | # TODO: Use List_of_command
|
| 486 | | CommandList(List[command] children)
|
| 487 |
|
| 488 | # YSH command constructs
|
| 489 |
|
| 490 | | VarDecl %VarDecl
|
| 491 |
|
| 492 | # this can behave like 'var', can be desugared
|
| 493 | | BareDecl(Token lhs, expr rhs)
|
| 494 |
|
| 495 | | Mutation %Mutation
|
| 496 | | Expr %ExprCommand
|
| 497 | | Proc %Proc
|
| 498 | | Func %Func
|
| 499 | | Retval(Token keyword, expr val)
|
| 500 |
|
| 501 | # bytecode
|
| 502 | b_command =
|
| 503 | VarDecl %VarDecl
|
| 504 | | Mutation %Mutation
|
| 505 |
|
| 506 | #
|
| 507 | # Glob representation, for converting ${x//} to extended regexes.
|
| 508 | #
|
| 509 |
|
| 510 | # Example: *.[ch] is:
|
| 511 | # GlobOp(<Glob_Star '*'>),
|
| 512 | # GlobLit(Glob_OtherLiteral, '.'),
|
| 513 | # CharClass(False, ['ch']) # from Glob_CleanLiterals token
|
| 514 |
|
| 515 | glob_part =
|
| 516 | Literal(id id, str s)
|
| 517 | | Operator(id op_id) # * or ?
|
| 518 | | CharClass(bool negated, List[str] strs)
|
| 519 |
|
| 520 | # Char classes are opaque for now. If we ever need them:
|
| 521 | # - Collating symbols are [. .]
|
| 522 | # - Equivalence classes are [=
|
| 523 |
|
| 524 | printf_part =
|
| 525 | Literal %Token
|
| 526 | # flags are 0 hyphen space + #
|
| 527 | # type is 's' for %s, etc.
|
| 528 | | Percent(List[Token] flags, Token? width, Token? precision, Token type)
|
| 529 |
|
| 530 | #
|
| 531 | # YSH Language
|
| 532 | #
|
| 533 | # Copied and modified from Python-3.7/Parser/Python.asdl !
|
| 534 |
|
| 535 | expr_context = Load | Store | Del | AugLoad | AugStore | Param
|
| 536 |
|
| 537 | # Type expressions: Int List[Int] Dict[Str, Any]
|
| 538 | # Do we have Func[Int, Int => Int] ? I guess we can parse that into this
|
| 539 | # system.
|
| 540 | TypeExpr = (Token tok, str name, List[TypeExpr] params)
|
| 541 |
|
| 542 | # LHS bindings in var/const, and eggex
|
| 543 | NameType = (Token left, str name, TypeExpr? typ)
|
| 544 |
|
| 545 | # TODO: Inline this into GenExp and ListComp? Just use a flag there?
|
| 546 | Comprehension = (List[NameType] lhs, expr iter, expr? cond)
|
| 547 |
|
| 548 | # Named arguments supplied to call. Token is null for f(; ...named).
|
| 549 | NamedArg = (Token? name, expr value)
|
| 550 |
|
| 551 | # Subscripts are lists of expressions
|
| 552 | # a[:i, n] (we don't have matrices, but we have data frames)
|
| 553 | Subscript = (Token left, expr obj, expr index)
|
| 554 |
|
| 555 | # Attributes are obj.attr, d->key, name::scope,
|
| 556 | Attribute = (expr obj, Token op, Token attr, str attr_name, expr_context ctx)
|
| 557 |
|
| 558 | y_lhs =
|
| 559 | Var %Token # Id.Expr_Name
|
| 560 | | Subscript %Subscript
|
| 561 | | Attribute %Attribute
|
| 562 |
|
| 563 | place_op =
|
| 564 | # &a[i+1]
|
| 565 | Subscript(Token op, expr index)
|
| 566 | # &d.mykey
|
| 567 | | Attribute(Token op, Token attr)
|
| 568 |
|
| 569 | expr =
|
| 570 | Var(Token left, str name) # a variable name to evaluate
|
| 571 | # Constants are typically Null, Bool, Int, Float
|
| 572 | # and also Str for key in {key: 42}
|
| 573 | # But string literals are SingleQuoted or DoubleQuoted
|
| 574 | # Python uses Num(object n), which doesn't respect our "LST" invariant.
|
| 575 | | Const(Token c, value val)
|
| 576 |
|
| 577 | # read(&x) json read (&x[0])
|
| 578 | | Place(Token blame_tok, str var_name, place_op* ops)
|
| 579 |
|
| 580 | # :| one 'two' "$three" |
|
| 581 | | YshArrayLiteral %YshArrayLiteral
|
| 582 |
|
| 583 | # / d+ ; ignorecase; %python /
|
| 584 | | Eggex %Eggex
|
| 585 |
|
| 586 | # $name is not an expr, but $? is, e.g. Id.VSub_QMark
|
| 587 | | SimpleVarSub %SimpleVarSub
|
| 588 | | BracedVarSub %BracedVarSub
|
| 589 | | CommandSub %CommandSub
|
| 590 | | ExprSub %ExprSub
|
| 591 | | SingleQuoted %SingleQuoted
|
| 592 | | DoubleQuoted %DoubleQuoted
|
| 593 |
|
| 594 | | Literal(expr inner)
|
| 595 | | Lambda(List[NameType] params, expr body)
|
| 596 |
|
| 597 | | Unary(Token op, expr child)
|
| 598 | | Binary(Token op, expr left, expr right)
|
| 599 | # x < 4 < 3 and (x < 4) < 3
|
| 600 | | Compare(expr left, List[Token] ops, List[expr] comparators)
|
| 601 | | FuncCall(expr func, ArgList args)
|
| 602 |
|
| 603 | # TODO: Need a representation for method call. We don't just want
|
| 604 | # Attribute() and then Call()
|
| 605 |
|
| 606 | | IfExp(expr test, expr body, expr orelse)
|
| 607 | | Tuple(Token left, List[expr] elts, expr_context ctx)
|
| 608 |
|
| 609 | | List(Token left, List[expr] elts, expr_context ctx)
|
| 610 | | Dict(Token left, List[expr] keys, List[expr] values)
|
| 611 | # For the values in {n1, n2}
|
| 612 | | Implicit
|
| 613 |
|
| 614 | | ListComp(Token left, expr elt, List[Comprehension] generators)
|
| 615 | # not implemented
|
| 616 | | DictComp(Token left, expr key, expr value, List[Comprehension] generators)
|
| 617 | | GeneratorExp(expr elt, List[Comprehension] generators)
|
| 618 |
|
| 619 | # Ranges are written 1:2, with first class expression syntax. There is no
|
| 620 | # step as in Python. Use range(0, 10, step=2) for that.
|
| 621 | | Range(expr lower, Token op, expr upper)
|
| 622 |
|
| 623 | # Slices occur within [] only. Unlike ranges, the start/end can be #
|
| 624 | # implicit. Like ranges, denote a step with slice(0, 10, step=2).
|
| 625 | # a[3:] a[:i]
|
| 626 | | Slice(expr? lower, Token op, expr? upper)
|
| 627 |
|
| 628 | | Subscript %Subscript
|
| 629 | | Attribute %Attribute
|
| 630 |
|
| 631 | # Ellipsis is like 'Starred' within Python, which are valid on the LHS in
|
| 632 | # Python for unpacking, and # within list literals for splicing.
|
| 633 | # (Starred is NOT used for {k:v, **a}. That used a blank "keys"
|
| 634 | # attribute.)
|
| 635 |
|
| 636 | # I think we can use { **pairs } like Python
|
| 637 | | Spread(Token left, expr child)
|
| 638 |
|
| 639 | #
|
| 640 | # Regex Language (Eggex)
|
| 641 | #
|
| 642 |
|
| 643 | # e.g. alnum digit
|
| 644 | PosixClass = (Token? negated, str name)
|
| 645 | # e.g. d w s
|
| 646 | PerlClass = (Token? negated, str name)
|
| 647 |
|
| 648 | # Char Sets and Ranges both use Char Codes
|
| 649 | # with u_braced == true : \u{ff}
|
| 650 | # with u_braced == false: \xff \\ 'a' a '0' 0
|
| 651 | # ERE doesn't make a distinction, but compiling to Python/PCRE can use it
|
| 652 | CharCode = (Token blame_tok, int i, bool u_braced)
|
| 653 | CharRange = (CharCode start, CharCode end)
|
| 654 |
|
| 655 | # Note: .NET has && in character classes, making it a recursive language
|
| 656 |
|
| 657 | class_literal_term =
|
| 658 | PosixClass %PosixClass
|
| 659 | | PerlClass %PerlClass
|
| 660 | | CharRange %CharRange
|
| 661 | | CharCode %CharCode
|
| 662 |
|
| 663 | | SingleQuoted %SingleQuoted
|
| 664 | # @chars
|
| 665 | | Splice(Token name, str var_name) # coudl be Splice %Token
|
| 666 |
|
| 667 | # evaluated version of class_literal_term (could be in runtime.asdl)
|
| 668 | char_class_term =
|
| 669 | PosixClass %PosixClass
|
| 670 | | PerlClass %PerlClass
|
| 671 |
|
| 672 | | CharRange %CharRange
|
| 673 | # For [ \x00 \\ ]
|
| 674 | | CharCode %CharCode
|
| 675 |
|
| 676 | # NOTE: modifier is unused now, can represent L or P
|
| 677 | re_repeat =
|
| 678 | Op %Token # + * ? or Expr_DecInt for x{3}
|
| 679 | | Range(Token? left, str lower, str upper, Token? right) # dot{1,2}
|
| 680 | # Haven't implemented the modifier, e.g. x{+ P}
|
| 681 | # | Num(Token times, id modifier)
|
| 682 | # | Range(Token? lower, Token? upper, id modifier)
|
| 683 |
|
| 684 | re =
|
| 685 | Primitive(Token blame_tok, id id) # . ^ $ dot %start %end
|
| 686 | | PosixClass %PosixClass
|
| 687 | | PerlClass %PerlClass
|
| 688 | # syntax [ $x \n ]
|
| 689 | | CharClassLiteral(bool negated, List[class_literal_term] terms)
|
| 690 | # evaluated [ 'abc' \n ]
|
| 691 | | CharClass(bool negated, List[char_class_term] terms)
|
| 692 |
|
| 693 | # @D
|
| 694 | | Splice(Token name, str var_name) # TODO: Splice %Token ?
|
| 695 |
|
| 696 | | SingleQuoted %SingleQuoted
|
| 697 |
|
| 698 | # Compound:
|
| 699 | | Repeat(re child, re_repeat op)
|
| 700 | | Seq(List[re] children)
|
| 701 | | Alt(List[re] children)
|
| 702 |
|
| 703 | | Group(re child)
|
| 704 | # convert_func is filled in on evaluation
|
| 705 | # TODO: name and func_name can be expanded to strings
|
| 706 | | Capture(re child, Token? name, Token? func_name)
|
| 707 | | Backtracking(bool negated, Token name, re child)
|
| 708 |
|
| 709 | # \u{ff} is parsed as this, but SingleQuoted also evaluates to it
|
| 710 | | LiteralChars(Token blame_tok, str s)
|
| 711 | }
|