OILS / osh / cmd_parse.py View on Github | oilshell.org

2825 lines, 1428 significant
1# Copyright 2016 Andy Chu. All rights reserved.
2# Licensed under the Apache License, Version 2.0 (the "License");
3# you may not use this file except in compliance with the License.
4# You may obtain a copy of the License at
5#
6# http://www.apache.org/licenses/LICENSE-2.0
7"""
8cmd_parse.py - Parse high level shell commands.
9"""
10from __future__ import print_function
11
12from _devbuild.gen import grammar_nt
13from _devbuild.gen.id_kind_asdl import Id, Id_t, Id_str, Kind, Kind_str
14from _devbuild.gen.types_asdl import lex_mode_e, cmd_mode_e, cmd_mode_t
15from _devbuild.gen.syntax_asdl import (
16 loc,
17 SourceLine,
18 source,
19 parse_result,
20 parse_result_t,
21 command,
22 command_t,
23 condition,
24 condition_t,
25 for_iter,
26 ArgList,
27 BraceGroup,
28 LiteralBlock,
29 CaseArm,
30 case_arg,
31 IfArm,
32 pat,
33 pat_t,
34 Redir,
35 redir_param,
36 redir_loc,
37 redir_loc_t,
38 word_e,
39 word_t,
40 CompoundWord,
41 Token,
42 word_part_e,
43 word_part_t,
44 rhs_word,
45 rhs_word_t,
46 sh_lhs,
47 sh_lhs_t,
48 AssignPair,
49 EnvPair,
50 ParsedAssignment,
51 assign_op_e,
52 NameType,
53 proc_sig,
54 proc_sig_e,
55 Proc,
56 Func,
57)
58from core import alloc
59from core import error
60from core.error import p_die
61from display import ui
62from frontend import consts
63from frontend import lexer
64from frontend import location
65from frontend import match
66from frontend import reader
67from mycpp.mylib import log
68from osh import braces
69from osh import bool_parse
70from osh import word_
71
72from typing import Optional, List, Dict, Any, Tuple, cast, TYPE_CHECKING
73if TYPE_CHECKING:
74 from core.alloc import Arena
75 from core import optview
76 from frontend.lexer import Lexer
77 from frontend.parse_lib import ParseContext, AliasesInFlight
78 from frontend.reader import _Reader
79 from osh.word_parse import WordParser
80
81_ = Kind_str # for debug prints
82
83TAB_CH = 9 # ord('\t')
84SPACE_CH = 32 # ord(' ')
85
86
87def _ReadHereLines(
88 line_reader, # type: _Reader
89 h, # type: Redir
90 delimiter, # type: str
91):
92 # type: (...) -> Tuple[List[Tuple[SourceLine, int]], Tuple[SourceLine, int]]
93 # NOTE: We read all lines at once, instead of parsing line-by-line,
94 # because of cases like this:
95 # cat <<EOF
96 # 1 $(echo 2
97 # echo 3) 4
98 # EOF
99 here_lines = [] # type: List[Tuple[SourceLine, int]]
100 last_line = None # type: Tuple[SourceLine, int]
101 strip_leading_tabs = (h.op.id == Id.Redir_DLessDash)
102
103 while True:
104 src_line, unused_offset = line_reader.GetLine()
105
106 if src_line is None: # EOF
107 # An unterminated here doc is just a warning in bash. We make it
108 # fatal because we want to be strict, and because it causes problems
109 # reporting other errors.
110 # Attribute it to the << in <<EOF for now.
111 p_die("Couldn't find terminator for here doc that starts here",
112 h.op)
113
114 assert len(src_line.content) != 0 # None should be the empty line
115
116 line = src_line.content
117
118 # If op is <<-, strip off ALL leading tabs -- not spaces, and not just
119 # the first tab.
120 start_offset = 0
121 if strip_leading_tabs:
122 n = len(line)
123 i = 0 # used after loop exit
124 while i < n:
125 if line[i] != '\t':
126 break
127 i += 1
128 start_offset = i
129
130 if line[start_offset:].rstrip() == delimiter:
131 last_line = (src_line, start_offset)
132 break
133
134 here_lines.append((src_line, start_offset))
135
136 return here_lines, last_line
137
138
139def _MakeLiteralHereLines(
140 here_lines, # type: List[Tuple[SourceLine, int]]
141 arena, # type: Arena
142 do_lossless, # type: bool
143):
144 # type: (...) -> List[word_part_t]
145 """Create a Token for each line.
146
147 For <<'EOF' and <<-'EOF' - single quoted rule
148
149 <<- has non-zero start_offset
150 """
151 # less precise type, because List[T] is an invariant type
152 tokens = [] # type: List[word_part_t]
153 for src_line, start_offset in here_lines:
154
155 # Maintain lossless invariant for STRIPPED tabs: add a Token to the
156 # arena invariant, but don't refer to it.
157 #
158 # Note: We could use Lit_CharsWithoutPrefix for 'single quoted' EOF
159 # here docs, but it's more complex with double quoted EOF docs.
160
161 if do_lossless: # avoid garbage, doesn't affect correctness
162 arena.NewToken(Id.Lit_CharsWithoutPrefix, start_offset, 0,
163 src_line)
164
165 t = arena.NewToken(Id.Lit_Chars, start_offset, len(src_line.content),
166 src_line)
167 tokens.append(t)
168 return tokens
169
170
171def _ParseHereDocBody(parse_ctx, r, line_reader, arena):
172 # type: (ParseContext, Redir, _Reader, Arena) -> None
173 """Fill in attributes of a pending here doc node."""
174 h = cast(redir_param.HereDoc, r.arg)
175 # "If any character in word is quoted, the delimiter shall be formed by
176 # performing quote removal on word, and the here-document lines shall not
177 # be expanded. Otherwise, the delimiter shall be the word itself."
178 # NOTE: \EOF counts, or even E\OF
179 ok, delimiter, delim_quoted = word_.StaticEval(h.here_begin)
180 if not ok:
181 p_die('Invalid here doc delimiter', loc.Word(h.here_begin))
182
183 here_lines, last_line = _ReadHereLines(line_reader, r, delimiter)
184
185 if delim_quoted:
186 # <<'EOF' and <<-'EOF' - Literal for each line.
187 h.stdin_parts = _MakeLiteralHereLines(here_lines, arena,
188 parse_ctx.do_lossless)
189 else:
190 # <<EOF and <<-EOF - Parse as word
191 line_reader = reader.VirtualLineReader(arena, here_lines,
192 parse_ctx.do_lossless)
193 w_parser = parse_ctx.MakeWordParserForHereDoc(line_reader)
194 w_parser.ReadHereDocBody(h.stdin_parts) # fills this in
195
196 end_line, start_offset = last_line
197
198 # Maintain lossless invariant for STRIPPED tabs: add a Token to the
199 # arena invariant, but don't refer to it.
200 if parse_ctx.do_lossless: # avoid garbage, doesn't affect correctness
201 arena.NewToken(Id.Lit_CharsWithoutPrefix, start_offset, 0, end_line)
202
203 # Create a Token with the end terminator. Maintains the invariant that the
204 # tokens "add up".
205 h.here_end_tok = arena.NewToken(Id.Undefined_Tok, start_offset,
206 len(end_line.content), end_line)
207
208
209def _MakeAssignPair(parse_ctx, preparsed, arena):
210 # type: (ParseContext, ParsedAssignment, Arena) -> AssignPair
211 """Create an AssignPair from a 4-tuples from DetectShAssignment."""
212
213 left_token = preparsed.left
214 close_token = preparsed.close
215
216 lhs = None # type: sh_lhs_t
217
218 if left_token.id == Id.Lit_VarLike: # s=1
219 if lexer.IsPlusEquals(left_token):
220 var_name = lexer.TokenSliceRight(left_token, -2)
221 op = assign_op_e.PlusEqual
222 else:
223 var_name = lexer.TokenSliceRight(left_token, -1)
224 op = assign_op_e.Equal
225
226 lhs = sh_lhs.Name(left_token, var_name)
227
228 elif left_token.id == Id.Lit_ArrayLhsOpen and parse_ctx.do_lossless:
229 var_name = lexer.TokenSliceRight(left_token, -1)
230 if lexer.IsPlusEquals(close_token):
231 op = assign_op_e.PlusEqual
232 else:
233 op = assign_op_e.Equal
234
235 assert left_token.line == close_token.line, \
236 '%s and %s not on same line' % (left_token, close_token)
237
238 left_pos = left_token.col + left_token.length
239 index_str = left_token.line.content[left_pos:close_token.col]
240 lhs = sh_lhs.UnparsedIndex(left_token, var_name, index_str)
241
242 elif left_token.id == Id.Lit_ArrayLhsOpen: # a[x++]=1
243 var_name = lexer.TokenSliceRight(left_token, -1)
244 if lexer.IsPlusEquals(close_token):
245 op = assign_op_e.PlusEqual
246 else:
247 op = assign_op_e.Equal
248
249 # Similar to SnipCodeString / SnipCodeBlock
250 if left_token.line == close_token.line:
251 # extract what's between brackets
252 s = left_token.col + left_token.length
253 code_str = left_token.line.content[s:close_token.col]
254 else:
255 raise NotImplementedError('%s != %s' %
256 (left_token.line, close_token.line))
257 a_parser = parse_ctx.MakeArithParser(code_str)
258
259 # a[i+1]= is a LHS
260 src = source.Reparsed('array LHS', left_token, close_token)
261 with alloc.ctx_SourceCode(arena, src):
262 index_node = a_parser.Parse() # may raise error.Parse
263
264 lhs = sh_lhs.IndexedName(left_token, var_name, index_node)
265
266 else:
267 raise AssertionError()
268
269 # TODO: Should we also create a rhs_expr.ArrayLiteral here?
270 parts = preparsed.w.parts
271 offset = preparsed.part_offset
272
273 n = len(parts)
274 if offset == n:
275 rhs = rhs_word.Empty # type: rhs_word_t
276 else:
277 w = CompoundWord(parts[offset:])
278 word_.TildeDetectAssign(w)
279 rhs = w
280
281 return AssignPair(left_token, lhs, op, rhs)
282
283
284def _AppendMoreEnv(preparsed_list, more_env):
285 # type: (List[ParsedAssignment], List[EnvPair]) -> None
286 """Helper to modify a SimpleCommand node.
287
288 Args:
289 preparsed: a list of 4-tuples from DetectShAssignment
290 more_env: a list to append env_pairs to
291 """
292 for preparsed in preparsed_list:
293 left_token = preparsed.left
294
295 if left_token.id != Id.Lit_VarLike: # can't be a[x]=1
296 p_die(
297 "Environment binding shouldn't look like an array assignment",
298 left_token)
299
300 if lexer.IsPlusEquals(left_token):
301 p_die('Expected = in environment binding, got +=', left_token)
302
303 var_name = lexer.TokenSliceRight(left_token, -1)
304
305 parts = preparsed.w.parts
306 n = len(parts)
307 offset = preparsed.part_offset
308 if offset == n:
309 rhs = rhs_word.Empty # type: rhs_word_t
310 else:
311 w = CompoundWord(parts[offset:])
312 word_.TildeDetectAssign(w)
313 rhs = w
314
315 more_env.append(EnvPair(left_token, var_name, rhs))
316
317
318def _SplitSimpleCommandPrefix(words):
319 # type: (List[CompoundWord]) -> Tuple[List[ParsedAssignment], List[CompoundWord]]
320 """Second pass of SimpleCommand parsing: look for assignment words."""
321 preparsed_list = [] # type: List[ParsedAssignment]
322 suffix_words = [] # type: List[CompoundWord]
323
324 done_prefix = False
325 for w in words:
326 if done_prefix:
327 suffix_words.append(w)
328 continue
329
330 left_token, close_token, part_offset = word_.DetectShAssignment(w)
331 if left_token:
332 preparsed_list.append(
333 ParsedAssignment(left_token, close_token, part_offset, w))
334 else:
335 done_prefix = True
336 suffix_words.append(w)
337
338 return preparsed_list, suffix_words
339
340
341def _MakeSimpleCommand(
342 preparsed_list, # type: List[ParsedAssignment]
343 suffix_words, # type: List[CompoundWord]
344 typed_args, # type: Optional[ArgList]
345 block, # type: Optional[LiteralBlock]
346):
347 # type: (...) -> command.Simple
348 """Create a command.Simple"""
349
350 # FOO=(1 2 3) ls is not allowed.
351 for preparsed in preparsed_list:
352 if word_.HasArrayPart(preparsed.w):
353 p_die("Environment bindings can't contain array literals",
354 loc.Word(preparsed.w))
355
356 # NOTE: It would be possible to add this check back. But it already happens
357 # at runtime in EvalWordSequence2.
358 # echo FOO=(1 2 3) is not allowed (but we should NOT fail on echo FOO[x]=1).
359 if 0:
360 for w in suffix_words:
361 if word_.HasArrayPart(w):
362 p_die("Commands can't contain array literals", loc.Word(w))
363
364 assert len(suffix_words) != 0
365 # {a,b,c} # Use { before brace detection
366 # ~/bin/ls # Use ~ before tilde detection
367 part0 = suffix_words[0].parts[0]
368 blame_tok = location.LeftTokenForWordPart(part0)
369
370 # NOTE: We only do brace DETECTION here, not brace EXPANSION. Therefore we
371 # can't implement bash's behavior of having say {~bob,~jane}/src work,
372 # because we only have a BracedTree.
373 # This is documented in spec/brace-expansion.
374 # NOTE: Technically we could do expansion outside of 'oshc translate', but it
375 # doesn't seem worth it.
376 words2 = braces.BraceDetectAll(suffix_words)
377 words3 = word_.TildeDetectAll(words2)
378
379 more_env = [] # type: List[EnvPair]
380 _AppendMoreEnv(preparsed_list, more_env)
381
382 # is_last_cmd is False by default
383 return command.Simple(blame_tok, more_env, words3, typed_args, block,
384 False)
385
386
387class VarChecker(object):
388 """Statically check for proc and variable usage errors."""
389
390 def __init__(self):
391 # type: () -> None
392 """
393 Args:
394 oil_proc: Whether to disallow nested proc/function declarations
395 """
396 # self.tokens for location info: 'proc' or another token
397 self.tokens = [] # type: List[Token]
398 self.names = [] # type: List[Dict[str, Id_t]]
399
400 def Push(self, blame_tok):
401 # type: (Token) -> None
402 """Called when we enter a shell function, proc, or func.
403
404 Bash allows this, but it's confusing because it's the same as two
405 functions at the top level.
406
407 f() {
408 g() {
409 echo 'top level function defined in another one'
410 }
411 }
412
413 YSH disallows nested procs and funcs.
414 """
415 if len(self.tokens) != 0:
416 if blame_tok.id == Id.KW_Proc:
417 p_die("procs must be defined at the top level", blame_tok)
418 if blame_tok.id == Id.KW_Func:
419 p_die("funcs must be defined at the top level", blame_tok)
420 if self.tokens[0].id in (Id.KW_Proc, Id.KW_Func):
421 p_die("shell functions can't be defined inside proc or func",
422 blame_tok)
423
424 self.tokens.append(blame_tok)
425 entry = {} # type: Dict[str, Id_t]
426 self.names.append(entry)
427
428 def Pop(self):
429 # type: () -> None
430 self.names.pop()
431 self.tokens.pop()
432
433 def Check(self, keyword_id, var_name, blame_tok):
434 # type: (Id_t, str, Token) -> None
435 """Check for declaration / mutation errors in proc and func.
436
437 var x
438 x already declared
439 setvar x:
440 x is not declared
441 setglobal x:
442 No errors are possible; we would need all these many conditions to
443 statically know the names:
444 - no 'source'
445 - shopt -u copy_env.
446 - AND use lib has to be static
447
448 What about bare assignment in Hay? I think these are dynamic checks --
449 there is no static check. Hay is for building up data imperatively,
450 and then LATER, right before main(), it can be type checked.
451
452 Package {
453 version = '3.11'
454 version = '3.12'
455 }
456 """
457 # No static checks are the global level! Because of 'source', var and
458 # setvar are essentially the same.
459 if len(self.names) == 0:
460 return
461
462 top = self.names[-1]
463 if keyword_id == Id.KW_Var:
464 if var_name in top:
465 p_die('%r was already declared' % var_name, blame_tok)
466 else:
467 top[var_name] = keyword_id
468
469 if keyword_id == Id.KW_SetVar:
470 if var_name not in top:
471 # Note: the solution could be setglobal, etc.
472 p_die(
473 "setvar couldn't find matching 'var %s' (OILS-ERR-10)" %
474 var_name, blame_tok)
475
476
477class ctx_VarChecker(object):
478
479 def __init__(self, var_checker, blame_tok):
480 # type: (VarChecker, Token) -> None
481 var_checker.Push(blame_tok)
482 self.var_checker = var_checker
483
484 def __enter__(self):
485 # type: () -> None
486 pass
487
488 def __exit__(self, type, value, traceback):
489 # type: (Any, Any, Any) -> None
490 self.var_checker.Pop()
491
492
493class ctx_CmdMode(object):
494
495 def __init__(self, cmd_parse, new_cmd_mode):
496 # type: (CommandParser, cmd_mode_t) -> None
497 self.cmd_parse = cmd_parse
498 self.prev_cmd_mode = cmd_parse.cmd_mode
499 cmd_parse.cmd_mode = new_cmd_mode
500
501 def __enter__(self):
502 # type: () -> None
503 pass
504
505 def __exit__(self, type, value, traceback):
506 # type: (Any, Any, Any) -> None
507 self.cmd_parse.cmd_mode = self.prev_cmd_mode
508
509
510SECONDARY_KEYWORDS = [
511 Id.KW_Do, Id.KW_Done, Id.KW_Then, Id.KW_Fi, Id.KW_Elif, Id.KW_Else,
512 Id.KW_Esac
513]
514
515
516class CommandParser(object):
517 """Recursive descent parser derived from POSIX shell grammar.
518
519 This is a BNF grammar:
520 https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_10
521
522 - Augmented with both bash/OSH and YSH constructs.
523
524 - We use regex-like iteration rather than recursive references
525 ? means optional (0 or 1)
526 * means 0 or more
527 + means 1 or more
528
529 - Keywords are spelled in Caps:
530 If Elif Case
531
532 - Operator tokens are quoted:
533 '(' '|'
534
535 or can be spelled directly if it matters:
536
537 Op_LParen Op_Pipe
538
539 - Non-terminals are snake_case:
540 brace_group subshell
541
542 Methods in this class should ROUGHLY CORRESPOND to grammar productions, and
543 the production should be in the method docstrings, e.g.
544
545 def ParseSubshell():
546 "
547 subshell : '(' compound_list ')'
548
549 Looking at Op_LParen # Comment to say how this method is called
550 "
551
552 The grammar may be factored to make parsing easier.
553 """
554
555 def __init__(self,
556 parse_ctx,
557 parse_opts,
558 w_parser,
559 lexer,
560 line_reader,
561 eof_id=Id.Eof_Real):
562 # type: (ParseContext, optview.Parse, WordParser, Lexer, _Reader, Id_t) -> None
563 self.parse_ctx = parse_ctx
564 self.aliases = parse_ctx.aliases # aliases to expand at parse time
565
566 self.parse_opts = parse_opts
567 self.w_parser = w_parser # type: WordParser # for normal parsing
568 self.lexer = lexer # for pushing hints, lookahead to (
569 self.line_reader = line_reader # for here docs
570 self.eof_id = eof_id
571
572 self.arena = line_reader.arena # for adding here doc and alias spans
573 self.aliases_in_flight = [] # type: AliasesInFlight
574
575 # A hacky boolean to remove 'if cd / {' ambiguity.
576 self.allow_block = True
577
578 # Stack of booleans for nested Attr and SHELL nodes.
579 # Attr nodes allow bare assignment x = 42, but not shell x=42.
580 # SHELL nodes are the inverse. 'var x = 42' is preferred in shell
581 # nodes, but x42 is still allowed.
582 #
583 # Note: this stack could be optimized by turning it into an integer and
584 # binary encoding.
585 self.hay_attrs_stack = [] # type: List[bool]
586
587 # Note: VarChecker is instantiated with each CommandParser, which means
588 # that two 'proc foo' -- inside a command sub and outside -- don't
589 # conflict, because they use different CommandParser instances. I think
590 # this OK but you can imagine different behaviors.
591 self.var_checker = VarChecker()
592
593 self.cmd_mode = cmd_mode_e.Shell # type: cmd_mode_t
594
595 self.Reset()
596
597 # Init_() function for "keyword arg"
598 def Init_AliasesInFlight(self, aliases_in_flight):
599 # type: (AliasesInFlight) -> None
600 self.aliases_in_flight = aliases_in_flight
601
602 def Reset(self):
603 # type: () -> None
604 """Reset our own internal state.
605
606 Called by the interactive loop.
607 """
608 # Cursor state set by _GetWord()
609 self.next_lex_mode = lex_mode_e.ShCommand
610 self.cur_word = None # type: word_t # current word
611 self.c_kind = Kind.Undefined
612 self.c_id = Id.Undefined_Tok
613
614 self.pending_here_docs = [] # type: List[Redir]
615
616 def ResetInputObjects(self):
617 # type: () -> None
618 """Reset the internal state of our inputs.
619
620 Called by the interactive loop.
621 """
622 self.w_parser.Reset()
623 self.lexer.ResetInputObjects()
624 self.line_reader.Reset()
625
626 def _SetNext(self):
627 # type: () -> None
628 """Call this when you no longer need the current token.
629
630 This method is lazy. A subsequent call to _GetWord() will
631 actually read the next Token.
632 """
633 self.next_lex_mode = lex_mode_e.ShCommand
634
635 def _SetNextBrack(self):
636 # type: () -> None
637 self.next_lex_mode = lex_mode_e.ShCommandFakeBrack
638
639 def _GetWord(self):
640 # type: () -> None
641 """Call this when you need to make a decision based on Id or Kind.
642
643 If there was an "unfulfilled" call to _SetNext(), it reads a word and sets
644 self.c_id and self.c_kind.
645
646 Otherwise it does nothing.
647 """
648 if self.next_lex_mode != lex_mode_e.Undefined:
649 w = self.w_parser.ReadWord(self.next_lex_mode)
650 #log("w %s", w)
651
652 # Here docs only happen in command mode, so other kinds of newlines don't
653 # count.
654 if w.tag() == word_e.Operator:
655 tok = cast(Token, w)
656 if tok.id == Id.Op_Newline:
657 for h in self.pending_here_docs:
658 _ParseHereDocBody(self.parse_ctx, h, self.line_reader,
659 self.arena)
660 del self.pending_here_docs[:] # No .clear() until Python 3.3.
661
662 self.cur_word = w
663
664 self.c_kind = word_.CommandKind(self.cur_word)
665 # Has special case for Id.Lit_{LBrace,RBrace,Equals}
666 self.c_id = word_.CommandId(self.cur_word)
667 self.next_lex_mode = lex_mode_e.Undefined
668
669 def _Eat(self, c_id, msg=None):
670 # type: (Id_t, Optional[str]) -> word_t
671 """Consume a word of a type, maybe showing a custom error message.
672
673 Args:
674 c_id: the Id we expected
675 msg: improved error message
676 """
677 self._GetWord()
678 if self.c_id != c_id:
679 if msg is None:
680 msg = 'Expected word type %s, got %s' % (
681 ui.PrettyId(c_id), ui.PrettyId(self.c_id))
682 p_die(msg, loc.Word(self.cur_word))
683
684 skipped = self.cur_word
685 self._SetNext()
686 return skipped
687
688 def _NewlineOk(self):
689 # type: () -> None
690 """Check for optional newline and consume it."""
691 self._GetWord()
692 if self.c_id == Id.Op_Newline:
693 self._SetNext()
694
695 def _AtSecondaryKeyword(self):
696 # type: () -> bool
697 self._GetWord()
698 if self.c_id in SECONDARY_KEYWORDS:
699 return True
700 return False
701
702 def ParseRedirect(self):
703 # type: () -> Redir
704 self._GetWord()
705 assert self.c_kind == Kind.Redir, self.cur_word
706 op_tok = cast(Token, self.cur_word) # for MyPy
707
708 # Note: the lexer could take distinguish between
709 # >out
710 # 3>out
711 # {fd}>out
712 #
713 # which would make the code below faster. But small string optimization
714 # would also speed it up, since redirects are small.
715
716 # One way to do this is with Kind.Redir and Kind.RedirNamed, and then
717 # possibly "unify" the IDs by subtracting a constant like 8 or 16?
718
719 op_val = lexer.TokenVal(op_tok)
720 if op_val[0] == '{':
721 pos = op_val.find('}')
722 assert pos != -1 # lexer ensures this
723 where = redir_loc.VarName(op_val[1:pos]) # type: redir_loc_t
724
725 elif op_val[0].isdigit():
726 pos = 1
727 if op_val[1].isdigit():
728 pos = 2
729 where = redir_loc.Fd(int(op_val[:pos]))
730
731 else:
732 where = redir_loc.Fd(consts.RedirDefaultFd(op_tok.id))
733
734 self._SetNext()
735
736 self._GetWord()
737 # Other redirect
738 if self.c_kind != Kind.Word:
739 p_die('Invalid token after redirect operator',
740 loc.Word(self.cur_word))
741
742 # Here doc
743 if op_tok.id in (Id.Redir_DLess, Id.Redir_DLessDash):
744 arg = redir_param.HereDoc.CreateNull()
745 arg.here_begin = self.cur_word
746 arg.stdin_parts = []
747
748 r = Redir(op_tok, where, arg)
749
750 self.pending_here_docs.append(r) # will be filled on next newline.
751
752 self._SetNext()
753 return r
754
755 arg_word = self.cur_word
756 tilde = word_.TildeDetect(arg_word)
757 if tilde:
758 arg_word = tilde
759 self._SetNext()
760
761 # We should never get Empty, Token, etc.
762 assert arg_word.tag() == word_e.Compound, arg_word
763 return Redir(op_tok, where, cast(CompoundWord, arg_word))
764
765 def _ParseRedirectList(self):
766 # type: () -> List[Redir]
767 """Try parsing any redirects at the cursor.
768
769 This is used for blocks only, not commands.
770 """
771 redirects = [] # type: List[Redir]
772 while True:
773 # This prediction needs to ONLY accept redirect operators. Should we
774 # make them a separate Kind?
775 self._GetWord()
776 if self.c_kind != Kind.Redir:
777 break
778
779 node = self.ParseRedirect()
780 redirects.append(node)
781 self._SetNext()
782
783 return redirects
784
785 def _MaybeParseRedirectList(self, node):
786 # type: (command_t) -> command_t
787 """Try parsing redirects at the current position.
788
789 If there are any, then wrap the command_t argument with a
790 command.Redirect node. Otherwise, return argument unchanged.
791 """
792 self._GetWord()
793 if self.c_kind != Kind.Redir:
794 return node
795
796 redirects = [self.ParseRedirect()]
797
798 while True:
799 # This prediction needs to ONLY accept redirect operators. Should we
800 # make them a separate Kind?
801 self._GetWord()
802 if self.c_kind != Kind.Redir:
803 break
804
805 redirects.append(self.ParseRedirect())
806 self._SetNext()
807
808 return command.Redirect(node, redirects)
809
810 def _ScanSimpleCommand(self):
811 # type: () -> Tuple[List[Redir], List[CompoundWord], Optional[ArgList], Optional[LiteralBlock]]
812 """YSH extends simple commands with typed args and blocks.
813
814 Shell has a recursive grammar, which awkwardly expresses
815 non-grammatical rules:
816
817 simple_command : cmd_prefix cmd_word cmd_suffix
818 | cmd_prefix cmd_word
819 | cmd_prefix
820 | cmd_name cmd_suffix
821 | cmd_name
822 ;
823 cmd_name : WORD /* Apply rule 7a */
824 ;
825 cmd_word : WORD /* Apply rule 7b */
826 ;
827 cmd_prefix : io_redirect
828 | cmd_prefix io_redirect
829 | ASSIGNMENT_WORD
830 | cmd_prefix ASSIGNMENT_WORD
831 ;
832 cmd_suffix : io_redirect
833 | cmd_suffix io_redirect
834 | WORD
835 | cmd_suffix WORD
836
837 YSH grammar:
838
839 redirect = redir_op WORD
840 item = WORD | redirect
841
842 typed_args =
843 '(' arglist ')'
844 | '[' arglist ']'
845
846 simple_command =
847 cmd_prefix* item+ typed_args? BraceGroup? cmd_suffix*
848
849 Notably, redirects shouldn't appear after typed args, or after
850 BraceGroup.
851
852 Examples:
853
854 This is an assignment:
855 foo=1 >out
856
857 This is a command.Simple
858 >out
859
860 What about
861 >out (42)
862 """
863 redirects = [] # type: List[Redir]
864 words = [] # type: List[CompoundWord]
865 typed_args = None # type: Optional[ArgList]
866 block = None # type: Optional[LiteralBlock]
867
868 first_word_caps = False # does first word look like Caps, but not CAPS
869
870 i = 0
871 while True:
872 self._GetWord()
873
874 # If we got { }, change it to something that's not Kind.Word
875 kind2 = self.c_kind
876 if (kind2 == Kind.Word and self.parse_opts.parse_brace() and
877 self.c_id in (Id.Lit_LBrace, Id.Lit_RBrace)):
878 kind2 = Kind.Op
879
880 if kind2 == Kind.Redir:
881 node = self.ParseRedirect()
882 redirects.append(node)
883
884 elif kind2 == Kind.Word:
885 w = cast(CompoundWord, self.cur_word) # Kind.Word ensures this
886
887 if i == 0:
888 # Disallow leading =a because it's confusing
889 part0 = w.parts[0]
890 if part0.tag() == word_part_e.Literal:
891 tok = cast(Token, part0)
892 if tok.id == Id.Lit_Equals:
893 p_die(
894 "=word isn't allowed. Hint: add a space after =, or quote it",
895 tok)
896
897 # Is the first word a Hay Attr word?
898 #
899 # Can we remove this StaticEval() call, and just look
900 # inside Token? I think once we get rid of SHELL nodes,
901 # this will be simpler.
902
903 ok, word_str, quoted = word_.StaticEval(w)
904 # Foo { a = 1 } is OK, but not foo { a = 1 } or FOO { a = 1 }
905 if (ok and len(word_str) and word_str[0].isupper() and
906 not word_str.isupper()):
907 first_word_caps = True
908 #log('W %s', word_str)
909
910 words.append(w)
911
912 else:
913 break
914
915 self._SetNextBrack() # Allow bracket for SECOND word on
916 i += 1
917
918 # my-cmd (x) or my-cmd [x]
919 self._GetWord()
920 if self.c_id == Id.Op_LParen:
921 # 1. Check that there's a preceding space
922 prev_byte = self.lexer.ByteLookBack()
923 if prev_byte not in (SPACE_CH, TAB_CH):
924 if self.parse_opts.parse_at():
925 p_die('Space required before (', loc.Word(self.cur_word))
926 else:
927 # inline func call like @sorted(x) is invalid in OSH, but the
928 # solution isn't a space
929 p_die(
930 'Unexpected left paren (might need a space before it)',
931 loc.Word(self.cur_word))
932
933 # 2. Check that it's not (). We disallow this because it's a no-op and
934 # there could be confusion with shell func defs.
935 # For some reason we need to call lexer.LookPastSpace, not
936 # w_parser.LookPastSpace. I think this is because we're at (, which is
937 # an operator token. All the other cases are like 'x=', which is PART
938 # of a word, and we don't know if it will end.
939 next_id = self.lexer.LookPastSpace(lex_mode_e.ShCommand)
940 if next_id == Id.Op_RParen:
941 p_die('Empty arg list not allowed', loc.Word(self.cur_word))
942
943 typed_args = self.w_parser.ParseProcCallArgs(
944 grammar_nt.ysh_eager_arglist)
945
946 self._SetNext()
947
948 elif self.c_id == Id.Op_LBracket: # only when parse_bracket set
949 typed_args = self.w_parser.ParseProcCallArgs(
950 grammar_nt.ysh_lazy_arglist)
951
952 self._SetNext()
953
954 self._GetWord()
955
956 # Allow redirects after typed args, e.g.
957 # json write (x) > out.txt
958 if self.c_kind == Kind.Redir:
959 redirects.extend(self._ParseRedirectList())
960
961 # my-cmd { echo hi } my-cmd (x) { echo hi } ...
962 if (self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace and
963 # Disabled for if/while condition, etc.
964 self.allow_block):
965
966 # allow x = 42
967 self.hay_attrs_stack.append(first_word_caps)
968 brace_group = self.ParseBraceGroup()
969
970 # So we can get the source code back later
971 lines = self.arena.SaveLinesAndDiscard(brace_group.left,
972 brace_group.right)
973 block = LiteralBlock(brace_group, lines)
974
975 self.hay_attrs_stack.pop()
976
977 self._GetWord()
978
979 # Allow redirects after block, e.g.
980 # cd /tmp { echo $PWD } > out.txt
981 if self.c_kind == Kind.Redir:
982 redirects.extend(self._ParseRedirectList())
983
984 return redirects, words, typed_args, block
985
986 def _MaybeExpandAliases(self, words):
987 # type: (List[CompoundWord]) -> Optional[command_t]
988 """Try to expand aliases.
989
990 Args:
991 words: A list of Compound
992
993 Returns:
994 A new LST node, or None.
995
996 Our implementation of alias has two design choices:
997 - Where to insert it in parsing. We do it at the end of ParseSimpleCommand.
998 - What grammar rule to parse the expanded alias buffer with. In our case
999 it's ParseCommand().
1000
1001 This doesn't quite match what other shells do, but I can't figure out a
1002 better places.
1003
1004 Most test cases pass, except for ones like:
1005
1006 alias LBRACE='{'
1007 LBRACE echo one; echo two; }
1008
1009 alias MULTILINE='echo 1
1010 echo 2
1011 echo 3'
1012 MULTILINE
1013
1014 NOTE: dash handles aliases in a totally different way. It has a global
1015 variable checkkwd in parser.c. It assigns it all over the grammar, like
1016 this:
1017
1018 checkkwd = CHKNL | CHKKWD | CHKALIAS;
1019
1020 The readtoken() function checks (checkkwd & CHKALIAS) and then calls
1021 lookupalias(). This seems to provide a consistent behavior among shells,
1022 but it's less modular and testable.
1023
1024 Bash also uses a global 'parser_state & PST_ALEXPNEXT'.
1025
1026 Returns:
1027 A command node if any aliases were expanded, or None otherwise.
1028 """
1029 # Start a new list if there aren't any. This will be passed recursively
1030 # through CommandParser instances.
1031 aliases_in_flight = (self.aliases_in_flight
1032 if len(self.aliases_in_flight) else [])
1033
1034 # for error message
1035 first_word_str = None # type: Optional[str]
1036 argv0_loc = loc.Word(words[0])
1037
1038 expanded = [] # type: List[str]
1039 i = 0
1040 n = len(words)
1041
1042 while i < n:
1043 w = words[i]
1044
1045 ok, word_str, quoted = word_.StaticEval(w)
1046 if not ok or quoted:
1047 break
1048
1049 alias_exp = self.aliases.get(word_str)
1050 if alias_exp is None:
1051 break
1052
1053 # Prevent infinite loops. This is subtle: we want to prevent infinite
1054 # expansion of alias echo='echo x'. But we don't want to prevent
1055 # expansion of the second word in 'echo echo', so we add 'i' to
1056 # "aliases_in_flight".
1057 if (word_str, i) in aliases_in_flight:
1058 break
1059
1060 if i == 0:
1061 first_word_str = word_str # for error message
1062
1063 #log('%r -> %r', word_str, alias_exp)
1064 aliases_in_flight.append((word_str, i))
1065 expanded.append(alias_exp)
1066 i += 1
1067
1068 if not alias_exp.endswith(' '):
1069 # alias e='echo [ ' is the same expansion as
1070 # alias e='echo ['
1071 # The trailing space indicates whether we should continue to expand
1072 # aliases; it's not part of it.
1073 expanded.append(' ')
1074 break # No more expansions
1075
1076 if len(expanded) == 0: # No expansions; caller does parsing.
1077 return None
1078
1079 # We are expanding an alias, so copy the rest of the words and re-parse.
1080 if i < n:
1081 left_tok = location.LeftTokenForWord(words[i])
1082 right_tok = location.RightTokenForWord(words[-1])
1083
1084 # OLD CONSTRAINT
1085 #assert left_tok.line_id == right_tok.line_id
1086
1087 words_str = self.arena.SnipCodeString(left_tok, right_tok)
1088 expanded.append(words_str)
1089
1090 code_str = ''.join(expanded)
1091
1092 # TODO:
1093 # Aliases break static parsing (like backticks), so use our own Arena.
1094 # This matters for Hay, which calls SaveLinesAndDiscard().
1095 # arena = alloc.Arena()
1096 arena = self.arena
1097
1098 line_reader = reader.StringLineReader(code_str, arena)
1099 cp = self.parse_ctx.MakeOshParser(line_reader)
1100 cp.Init_AliasesInFlight(aliases_in_flight)
1101
1102 # break circular dep
1103 from frontend import parse_lib
1104
1105 # The interaction between COMPLETION and ALIASES requires special care.
1106 # See docstring of BeginAliasExpansion() in parse_lib.py.
1107 src = source.Alias(first_word_str, argv0_loc)
1108 with alloc.ctx_SourceCode(arena, src):
1109 with parse_lib.ctx_Alias(self.parse_ctx.trail):
1110 try:
1111 # _ParseCommandTerm() handles multiline commands, compound
1112 # commands, etc. as opposed to ParseLogicalLine()
1113 node = cp._ParseCommandTerm()
1114 except error.Parse as e:
1115 # Failure to parse alias expansion is a fatal error
1116 # We don't need more handling here/
1117 raise
1118
1119 if 0:
1120 log('AFTER expansion:')
1121 node.PrettyPrint()
1122
1123 return node
1124
1125 def ParseSimpleCommand(self):
1126 # type: () -> command_t
1127 """Fixed transcription of the POSIX grammar
1128
1129 io_file : '<' filename
1130 | LESSAND filename
1131 ...
1132
1133 io_here : DLESS here_end
1134 | DLESSDASH here_end
1135
1136 redirect : IO_NUMBER (io_redirect | io_here)
1137
1138 prefix_part : ASSIGNMENT_WORD | redirect
1139 cmd_part : WORD | redirect
1140
1141 assign_kw : Declare | Export | Local | Readonly
1142
1143 # Without any words it is parsed as a command, not an assignment
1144 assign_listing : assign_kw
1145
1146 # Now we have something to do (might be changing assignment flags too)
1147 # NOTE: any prefixes should be a warning, but they are allowed in shell.
1148 assignment : prefix_part* assign_kw (WORD | ASSIGNMENT_WORD)+
1149
1150 # an external command, a function call, or a builtin -- a "word_command"
1151 word_command : prefix_part* cmd_part+
1152
1153 simple_command : assign_listing
1154 | assignment
1155 | proc_command
1156
1157 Simple imperative algorithm:
1158
1159 1) Read a list of words and redirects. Append them to separate lists.
1160 2) Look for the first non-assignment word. If it's declare, etc., then
1161 keep parsing words AND assign words. Otherwise, just parse words.
1162 3) If there are no non-assignment words, then it's a global assignment.
1163
1164 { redirects, global assignments } OR
1165 { redirects, prefix_bindings, words } OR
1166 { redirects, ERROR_prefix_bindings, keyword, assignments, words }
1167
1168 THEN CHECK that prefix bindings don't have any array literal parts!
1169 global assignment and keyword assignments can have the of course.
1170 well actually EXPORT shouldn't have them either -- WARNING
1171
1172 3 cases we want to warn: prefix_bindings for assignment, and array literal
1173 in prefix bindings, or export
1174
1175 A command can be an assignment word, word, or redirect on its own.
1176
1177 ls
1178 >out.txt
1179
1180 >out.txt FOO=bar # this touches the file
1181
1182 Or any sequence:
1183 ls foo bar
1184 <in.txt ls foo bar >out.txt
1185 <in.txt ls >out.txt foo bar
1186
1187 Or add one or more environment bindings:
1188 VAR=val env
1189 >out.txt VAR=val env
1190
1191 here_end vs filename is a matter of whether we test that it's quoted. e.g.
1192 <<EOF vs <<'EOF'.
1193 """
1194 redirects, words, typed_args, block = self._ScanSimpleCommand()
1195
1196 typed_loc = None # type: Optional[Token]
1197 if block:
1198 typed_loc = block.brace_group.left
1199 if typed_args:
1200 typed_loc = typed_args.left # preferred over block location
1201
1202 if len(words) == 0: # e.g. >out.txt # redirect without words
1203 assert len(redirects) != 0
1204 if typed_loc is not None:
1205 p_die("Unexpected typed args", typed_loc)
1206 return command.Redirect(command.NoOp, redirects)
1207
1208 preparsed_list, suffix_words = _SplitSimpleCommandPrefix(words)
1209 if len(preparsed_list):
1210 # Disallow X=Y inside proc and func
1211 # and inside Hay Attr blocks
1212 # But allow X=Y at the top level
1213 # for interactive use foo=bar
1214 # for global constants GLOBAL=~/src
1215 # because YSH assignment doesn't have tilde sub
1216 if len(suffix_words) == 0:
1217 if (self.cmd_mode != cmd_mode_e.Shell or
1218 (len(self.hay_attrs_stack) and self.hay_attrs_stack[-1])):
1219 p_die('Use var/setvar to assign in YSH',
1220 preparsed_list[0].left)
1221
1222 # Set a reference to words and redirects for completion. We want to
1223 # inspect this state after a failed parse.
1224 self.parse_ctx.trail.SetLatestWords(suffix_words, redirects)
1225
1226 if len(suffix_words) == 0:
1227 if typed_loc is not None:
1228 p_die("Unexpected typed args", typed_loc)
1229
1230 # ShAssignment: No suffix words like ONE=1 a[x]=1 TWO=2
1231 pairs = [] # type: List[AssignPair]
1232 for preparsed in preparsed_list:
1233 pairs.append(
1234 _MakeAssignPair(self.parse_ctx, preparsed, self.arena))
1235
1236 left_tok = location.LeftTokenForCompoundWord(words[0])
1237 assign_node = command.ShAssignment(left_tok, pairs)
1238 if len(redirects):
1239 return command.Redirect(assign_node, redirects)
1240 else:
1241 return assign_node
1242
1243 kind, kw_token = word_.IsControlFlow(suffix_words[0])
1244
1245 if kind == Kind.ControlFlow:
1246 if not self.parse_opts.parse_ignored() and len(redirects):
1247 p_die("Control flow shouldn't have redirects", kw_token)
1248 if len(preparsed_list): # FOO=bar local spam=eggs not allowed
1249 p_die("Control flow shouldn't have environment bindings",
1250 preparsed_list[0].left)
1251
1252 if kw_token.id == Id.ControlFlow_Return:
1253 # return x - inside procs and shell functions
1254 # return (x) - inside funcs
1255 if typed_args is None:
1256 if self.cmd_mode not in (cmd_mode_e.Shell,
1257 cmd_mode_e.Proc):
1258 p_die('Shell-style returns not allowed here', kw_token)
1259 else:
1260 if self.cmd_mode != cmd_mode_e.Func:
1261 p_die('Typed return is only allowed inside func',
1262 typed_loc)
1263 if len(typed_args.pos_args) != 1:
1264 p_die("Typed return expects one argument", typed_loc)
1265 if len(typed_args.named_args) != 0:
1266 p_die("Typed return doesn't take named arguments",
1267 typed_loc)
1268 return command.Retval(kw_token, typed_args.pos_args[0])
1269
1270 # Except for return (x), we shouldn't have typed args
1271 if typed_loc is not None:
1272 p_die("Unexpected typed args", typed_loc)
1273
1274 # Attach the token for errors. (ShAssignment may not need it.)
1275 if len(suffix_words) == 1:
1276 arg_word = None # type: Optional[word_t]
1277 elif len(suffix_words) == 2:
1278 arg_word = suffix_words[1]
1279 else:
1280 p_die('Unexpected argument to %r' % lexer.TokenVal(kw_token),
1281 loc.Word(suffix_words[2]))
1282
1283 return command.ControlFlow(kw_token, arg_word)
1284
1285 # Alias expansion only understands words, not typed args ( ) or block { }
1286 if not typed_args and not block and self.parse_opts.expand_aliases():
1287 # If any expansions were detected, then parse again.
1288 expanded_node = self._MaybeExpandAliases(suffix_words)
1289 if expanded_node:
1290 # Attach env bindings and redirects to the expanded node.
1291 more_env = [] # type: List[EnvPair]
1292 _AppendMoreEnv(preparsed_list, more_env)
1293 exp = command.ExpandedAlias(expanded_node, more_env)
1294 if len(redirects):
1295 return command.Redirect(exp, redirects)
1296 else:
1297 return exp
1298
1299 # TODO: check that we don't have env1=x x[1]=y env2=z here.
1300
1301 # FOO=bar printenv.py FOO
1302 node = _MakeSimpleCommand(preparsed_list, suffix_words, typed_args,
1303 block)
1304 if len(redirects):
1305 return command.Redirect(node, redirects)
1306 else:
1307 return node
1308
1309 def ParseBraceGroup(self):
1310 # type: () -> BraceGroup
1311 """
1312 Original:
1313 brace_group : LBrace command_list RBrace ;
1314
1315 YSH:
1316 brace_group : LBrace (Op_Newline IgnoredComment?)? command_list RBrace ;
1317
1318 The doc comment can only occur if there's a newline.
1319 """
1320 ate = self._Eat(Id.Lit_LBrace)
1321 left = word_.BraceToken(ate)
1322
1323 doc_word = None # type: word_t
1324 self._GetWord()
1325 if self.c_id == Id.Op_Newline:
1326 self._SetNext()
1327 # Set a flag so we don't skip over ###
1328 with word_.ctx_EmitDocToken(self.w_parser):
1329 self._GetWord()
1330
1331 if self.c_id == Id.Ignored_Comment:
1332 doc_word = self.cur_word
1333 self._SetNext()
1334
1335 # Id.Ignored_Comment means it's a Token, or None
1336 doc_token = cast(Token, doc_word)
1337
1338 c_list = self._ParseCommandList()
1339
1340 ate = self._Eat(Id.Lit_RBrace)
1341 right = word_.BraceToken(ate)
1342
1343 # Note(andychu): Related ASDL bug #1216. Choosing the Python [] behavior
1344 # would allow us to revert this back to None, which was changed in
1345 # https://github.com/oilshell/oil/pull/1211. Choosing the C++ nullptr
1346 # behavior saves allocations, but is less type safe.
1347 return BraceGroup(left, doc_token, c_list.children, right)
1348
1349 def ParseDoGroup(self):
1350 # type: () -> command.DoGroup
1351 """Used by ForEach, ForExpr, While, Until. Should this be a Do node?
1352
1353 do_group : Do command_list Done ; /* Apply rule 6 */
1354 """
1355 ate = self._Eat(Id.KW_Do)
1356 do_kw = word_.AsKeywordToken(ate)
1357
1358 c_list = self._ParseCommandList() # could be anything
1359
1360 ate = self._Eat(Id.KW_Done)
1361 done_kw = word_.AsKeywordToken(ate)
1362
1363 return command.DoGroup(do_kw, c_list.children, done_kw)
1364
1365 def ParseForWords(self):
1366 # type: () -> Tuple[List[CompoundWord], Optional[Token]]
1367 """
1368 for_words : WORD* for_sep
1369 ;
1370 for_sep : ';' newline_ok
1371 | NEWLINES
1372 ;
1373 """
1374 words = [] # type: List[CompoundWord]
1375 # The token of any semi-colon, so we can remove it.
1376 semi_tok = None # type: Optional[Token]
1377
1378 while True:
1379 self._GetWord()
1380 if self.c_id == Id.Op_Semi:
1381 tok = cast(Token, self.cur_word)
1382 semi_tok = tok
1383 self._SetNext()
1384 self._NewlineOk()
1385 break
1386 elif self.c_id == Id.Op_Newline:
1387 self._SetNext()
1388 break
1389 elif self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
1390 break
1391
1392 if self.cur_word.tag() != word_e.Compound:
1393 # TODO: Can we also show a pointer to the 'for' keyword?
1394 p_die('Invalid word in for loop', loc.Word(self.cur_word))
1395
1396 w2 = cast(CompoundWord, self.cur_word)
1397 words.append(w2)
1398 self._SetNext()
1399 return words, semi_tok
1400
1401 def _ParseForExprLoop(self, for_kw):
1402 # type: (Token) -> command.ForExpr
1403 """
1404 Shell:
1405 for '((' init ';' cond ';' update '))' for_sep? do_group
1406
1407 YSH:
1408 for '((' init ';' cond ';' update '))' for_sep? brace_group
1409 """
1410 node = self.w_parser.ReadForExpression()
1411 node.keyword = for_kw
1412
1413 self._SetNext()
1414
1415 self._GetWord()
1416 if self.c_id == Id.Op_Semi:
1417 self._SetNext()
1418 self._NewlineOk()
1419 elif self.c_id == Id.Op_Newline:
1420 self._SetNext()
1421 elif self.c_id == Id.KW_Do: # missing semicolon/newline allowed
1422 pass
1423 elif self.c_id == Id.Lit_LBrace: # does NOT require parse_brace
1424 pass
1425 else:
1426 p_die('Invalid word after for expression', loc.Word(self.cur_word))
1427
1428 if self.c_id == Id.Lit_LBrace:
1429 node.body = self.ParseBraceGroup()
1430 else:
1431 node.body = self.ParseDoGroup()
1432 return node
1433
1434 def _ParseForEachLoop(self, for_kw):
1435 # type: (Token) -> command.ForEach
1436 node = command.ForEach.CreateNull(alloc_lists=True)
1437 node.keyword = for_kw
1438
1439 num_iter_names = 0
1440 while True:
1441 w = self.cur_word
1442
1443 # Hack that makes the language more familiar:
1444 # - 'x, y' is accepted, but not 'x,y' or 'x ,y'
1445 # - 'x y' is also accepted but not idiomatic.
1446 UP_w = w
1447 if w.tag() == word_e.Compound:
1448 w = cast(CompoundWord, UP_w)
1449 if word_.LiteralId(w.parts[-1]) == Id.Lit_Comma:
1450 w.parts.pop()
1451
1452 ok, iter_name, quoted = word_.StaticEval(w)
1453 if not ok or quoted: # error: for $x
1454 p_die('Expected loop variable (a constant word)', loc.Word(w))
1455
1456 if not match.IsValidVarName(iter_name): # error: for -
1457 # TODO: consider commas?
1458 if ',' in iter_name:
1459 p_die('Loop variables look like x, y (fix spaces)',
1460 loc.Word(w))
1461 p_die('Invalid loop variable name %r' % iter_name, loc.Word(w))
1462
1463 node.iter_names.append(iter_name)
1464 num_iter_names += 1
1465 self._SetNext()
1466
1467 self._GetWord()
1468 # 'in' or 'do' or ';' or Op_Newline marks the end of variable names
1469 # Subtlety: 'var' is KW_Var and is a valid loop name
1470 if self.c_id in (Id.KW_In, Id.KW_Do) or self.c_kind == Kind.Op:
1471 break
1472
1473 if num_iter_names == 3:
1474 p_die('Unexpected word after 3 loop variables',
1475 loc.Word(self.cur_word))
1476
1477 self._NewlineOk()
1478
1479 self._GetWord()
1480 if self.c_id == Id.KW_In:
1481 expr_blame = word_.AsKeywordToken(self.cur_word)
1482
1483 self._SetNext() # skip in
1484
1485 next_id = self.w_parser.LookPastSpace()
1486 #log('%s', Id_str(next_id))
1487
1488 if next_id == Id.Op_LParen: # for x in (expr) {
1489 enode = self.w_parser.ParseYshExprForCommand()
1490 node.iterable = for_iter.YshExpr(enode, expr_blame)
1491
1492 # We don't accept for x in (obj); do ...
1493 self._GetWord()
1494 if self.c_id != Id.Lit_LBrace:
1495 p_die('Expected { after iterable expression',
1496 loc.Word(self.cur_word))
1497
1498 elif next_id == Id.Redir_LessGreat: # for x in <> {
1499 # <> is Id.Redir_Great - reuse this for simplicity
1500 w = self._Eat(Id.Redir_LessGreat)
1501 p_die('Reserved syntax', loc.Word(self.cur_word))
1502
1503 #left = word_.AsOperatorToken(w)
1504
1505 #node.iterable = for_iter.Files(left, [])
1506
1507 ## Must be { not 'do'
1508 #self._GetWord()
1509 #if self.c_id != Id.Lit_LBrace:
1510 # p_die('Expected { after files', loc.Word(self.cur_word))
1511
1512 elif next_id == Id.Redir_Less: # for x in < > {
1513 w = self._Eat(Id.Redir_Less)
1514 p_die('Reserved syntax', loc.Word(self.cur_word))
1515
1516 #left = word_.AsOperatorToken(w)
1517
1518 # TODO: we could accept
1519 #
1520 # for x in < README.md *.py > {
1521 # for x in < @myfiles > {
1522 #
1523 # And set _filename _line_num, similar to awk
1524
1525 #self._Eat(Id.Redir_Great)
1526
1527 #node.iterable = for_iter.Files(left, [])
1528
1529 ## Must be { not 'do'
1530 #self._GetWord()
1531 #if self.c_id != Id.Lit_LBrace:
1532 # p_die('Expected { after files', loc.Word(self.cur_word))
1533
1534 else:
1535 semi_tok = None # type: Optional[Token]
1536 iter_words, semi_tok = self.ParseForWords()
1537 node.semi_tok = semi_tok
1538
1539 if not self.parse_opts.parse_bare_word() and len(
1540 iter_words) == 1:
1541 ok, s, quoted = word_.StaticEval(iter_words[0])
1542 if ok and match.IsValidVarName(s) and not quoted:
1543 p_die(
1544 'Surround this word with either parens or quotes (parse_bare_word)',
1545 loc.Word(iter_words[0]))
1546
1547 words2 = braces.BraceDetectAll(iter_words)
1548 words3 = word_.TildeDetectAll(words2)
1549 node.iterable = for_iter.Words(words3)
1550
1551 # Now that we know there are words, do an extra check
1552 if num_iter_names > 2:
1553 p_die('Expected at most 2 loop variables', for_kw)
1554
1555 elif self.c_id == Id.KW_Do:
1556 node.iterable = for_iter.Args # implicitly loop over "$@"
1557 # do not advance
1558
1559 elif self.c_id == Id.Op_Semi: # for x; do
1560 node.iterable = for_iter.Args # implicitly loop over "$@"
1561 self._SetNext()
1562
1563 else: # for foo BAD
1564 p_die('Unexpected word after for loop variable',
1565 loc.Word(self.cur_word))
1566
1567 self._GetWord()
1568 if self.c_id == Id.Lit_LBrace: # parse_opts.parse_brace() must be on
1569 node.body = self.ParseBraceGroup()
1570 else:
1571 node.body = self.ParseDoGroup()
1572
1573 return node
1574
1575 def ParseFor(self):
1576 # type: () -> command_t
1577 """
1578 TODO: Update the grammar
1579
1580 for_clause : For for_name newline_ok (in for_words? for_sep)? do_group ;
1581 | For '((' ... TODO
1582 """
1583 ate = self._Eat(Id.KW_For)
1584 for_kw = word_.AsKeywordToken(ate)
1585
1586 self._GetWord()
1587 if self.c_id == Id.Op_DLeftParen:
1588 if not self.parse_opts.parse_dparen():
1589 p_die("Bash for loops aren't allowed (parse_dparen)",
1590 loc.Word(self.cur_word))
1591
1592 # for (( i = 0; i < 10; i++)
1593 n1 = self._ParseForExprLoop(for_kw)
1594 return self._MaybeParseRedirectList(n1)
1595 else:
1596 # for x in a b; do echo hi; done
1597 n2 = self._ParseForEachLoop(for_kw)
1598 return self._MaybeParseRedirectList(n2)
1599
1600 def _ParseConditionList(self):
1601 # type: () -> condition_t
1602 """
1603 condition_list: command_list
1604
1605 This is a helper to parse a condition list for if commands and while/until
1606 loops. It will throw a parse error if there are no conditions in the list.
1607 """
1608 self.allow_block = False
1609 commands = self._ParseCommandList()
1610 self.allow_block = True
1611
1612 if len(commands.children) == 0:
1613 p_die("Expected a condition", loc.Word(self.cur_word))
1614
1615 return condition.Shell(commands.children)
1616
1617 def ParseWhileUntil(self, keyword):
1618 # type: (Token) -> command.WhileUntil
1619 """
1620 while_clause : While command_list do_group ;
1621 until_clause : Until command_list do_group ;
1622 """
1623 self._SetNext() # skip keyword
1624
1625 if (self.parse_opts.parse_paren() and
1626 self.w_parser.LookPastSpace() == Id.Op_LParen):
1627 enode = self.w_parser.ParseYshExprForCommand()
1628 cond = condition.YshExpr(enode) # type: condition_t
1629 else:
1630 cond = self._ParseConditionList()
1631
1632 # NOTE: The LSTs will be different for OSH and YSH, but the execution
1633 # should be unchanged. To be sure we should desugar.
1634 self._GetWord()
1635 if self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
1636 # while test -f foo {
1637 body_node = self.ParseBraceGroup() # type: command_t
1638 else:
1639 body_node = self.ParseDoGroup()
1640
1641 return command.WhileUntil(keyword, cond, body_node)
1642
1643 def ParseCaseArm(self):
1644 # type: () -> CaseArm
1645 """
1646 case_item: '('? pattern ('|' pattern)* ')'
1647 newline_ok command_term? trailer? ;
1648
1649 Looking at '(' or pattern
1650 """
1651 self.lexer.PushHint(Id.Op_RParen, Id.Right_CasePat)
1652
1653 left_tok = location.LeftTokenForWord(self.cur_word) # ( or pat
1654
1655 if self.c_id == Id.Op_LParen: # Optional (
1656 self._SetNext()
1657
1658 pat_words = [] # type: List[word_t]
1659 while True:
1660 self._GetWord()
1661 if self.c_kind != Kind.Word:
1662 p_die('Expected case pattern', loc.Word(self.cur_word))
1663 pat_words.append(self.cur_word)
1664 self._SetNext()
1665
1666 self._GetWord()
1667 if self.c_id == Id.Op_Pipe:
1668 self._SetNext()
1669 else:
1670 break
1671
1672 ate = self._Eat(Id.Right_CasePat)
1673 middle_tok = word_.AsOperatorToken(ate)
1674
1675 self._NewlineOk()
1676
1677 self._GetWord()
1678 if self.c_id not in (Id.Op_DSemi, Id.Op_SemiAmp, Id.Op_DSemiAmp,
1679 Id.KW_Esac):
1680 c_list = self._ParseCommandTerm()
1681 action_children = c_list.children
1682 else:
1683 action_children = []
1684
1685 dsemi_tok = None # type: Token
1686 self._GetWord()
1687 if self.c_id == Id.KW_Esac: # missing last ;;
1688 pass
1689 elif self.c_id in (Id.Op_DSemi, Id.Op_SemiAmp, Id.Op_DSemiAmp):
1690 dsemi_tok = word_.AsOperatorToken(self.cur_word)
1691 self._SetNext()
1692 else:
1693 # Happens on EOF
1694 p_die('Expected ;; or esac', loc.Word(self.cur_word))
1695
1696 self._NewlineOk()
1697
1698 return CaseArm(left_tok, pat.Words(pat_words), middle_tok,
1699 action_children, dsemi_tok)
1700
1701 def ParseYshCaseArm(self, discriminant):
1702 # type: (Id_t) -> CaseArm
1703 """
1704 case_item : pattern newline_ok brace_group newline_ok
1705 pattern : pat_words
1706 | pat_exprs
1707 | pat_eggex
1708 | pat_else
1709 pat_words : pat_word (newline_ok '|' newline_ok pat_word)*
1710 pat_exprs : pat_expr (newline_ok '|' newline_ok pat_expr)*
1711 pat_word : WORD
1712 pat_eggex : '/' oil_eggex '/'
1713 pat_expr : '(' oil_expr ')'
1714 pat_else : '(' Id.KW_Else ')'
1715
1716 Looking at: 'pattern'
1717
1718 Note that the trailing `newline_ok` in `case_item` is handled by
1719 `ParseYshCase`. We do this because parsing that `newline_ok` returns
1720 the next "discriminant" for the next token, so it makes more sense to
1721 handle it there.
1722 """
1723 left_tok = None # type: Token
1724 pattern = None # type: pat_t
1725
1726 if discriminant in (Id.Op_LParen, Id.Arith_Slash):
1727 # pat_exprs, pat_else or pat_eggex
1728 pattern, left_tok = self.w_parser.ParseYshCasePattern()
1729 else:
1730 # pat_words
1731 pat_words = [] # type: List[word_t]
1732 while True:
1733 self._GetWord()
1734 if self.c_kind != Kind.Word:
1735 p_die('Expected case pattern', loc.Word(self.cur_word))
1736 pat_words.append(self.cur_word)
1737 self._SetNext()
1738
1739 if not left_tok:
1740 left_tok = location.LeftTokenForWord(self.cur_word)
1741
1742 self._NewlineOk()
1743
1744 self._GetWord()
1745 if self.c_id == Id.Op_Pipe:
1746 self._SetNext()
1747 self._NewlineOk()
1748 else:
1749 break
1750 pattern = pat.Words(pat_words)
1751
1752 self._NewlineOk()
1753 action = self.ParseBraceGroup()
1754
1755 # The left token of the action is our "middle" token
1756 return CaseArm(left_tok, pattern, action.left, action.children,
1757 action.right)
1758
1759 def ParseYshCase(self, case_kw):
1760 # type: (Token) -> command.Case
1761 """
1762 ysh_case : Case '(' expr ')' LBrace newline_ok ysh_case_arm* RBrace ;
1763
1764 Looking at: token after 'case'
1765 """
1766 enode = self.w_parser.ParseYshExprForCommand()
1767 to_match = case_arg.YshExpr(enode)
1768
1769 ate = self._Eat(Id.Lit_LBrace)
1770 arms_start = word_.BraceToken(ate)
1771
1772 discriminant = self.w_parser.NewlineOkForYshCase()
1773
1774 # Note: for now, zero arms are accepted, just like POSIX case $x in esac
1775 arms = [] # type: List[CaseArm]
1776 while discriminant != Id.Op_RBrace:
1777 arm = self.ParseYshCaseArm(discriminant)
1778 arms.append(arm)
1779
1780 discriminant = self.w_parser.NewlineOkForYshCase()
1781
1782 # NewlineOkForYshCase leaves the lexer in lex_mode_e.Expr. So the '}'
1783 # token is read as an Id.Op_RBrace, but we need to store this as a
1784 # Id.Lit_RBrace.
1785 ate = self._Eat(Id.Op_RBrace)
1786 arms_end = word_.AsOperatorToken(ate)
1787 arms_end.id = Id.Lit_RBrace
1788
1789 return command.Case(case_kw, to_match, arms_start, arms, arms_end)
1790
1791 def ParseOldCase(self, case_kw):
1792 # type: (Token) -> command.Case
1793 """
1794 case_clause : Case WORD newline_ok In newline_ok case_arm* Esac ;
1795
1796 -> Looking at WORD
1797
1798 FYI original POSIX case list, which takes pains for DSEMI
1799
1800 case_list: case_item (DSEMI newline_ok case_item)* DSEMI? newline_ok;
1801 """
1802 self._GetWord()
1803 w = self.cur_word
1804 if not self.parse_opts.parse_bare_word():
1805 ok, s, quoted = word_.StaticEval(w)
1806 if ok and not quoted:
1807 p_die(
1808 "This is a constant string. You may want a variable like $x (parse_bare_word)",
1809 loc.Word(w))
1810
1811 if w.tag() != word_e.Compound:
1812 p_die("Expected a word to match against", loc.Word(w))
1813
1814 to_match = case_arg.Word(w)
1815 self._SetNext() # past WORD
1816
1817 self._NewlineOk()
1818
1819 ate = self._Eat(Id.KW_In)
1820 arms_start = word_.AsKeywordToken(ate)
1821
1822 self._NewlineOk()
1823
1824 arms = [] # type: List[CaseArm]
1825 while True:
1826 self._GetWord()
1827 if self.c_id == Id.KW_Esac:
1828 break
1829 # case arm should begin with a pattern word or (
1830 if self.c_kind != Kind.Word and self.c_id != Id.Op_LParen:
1831 break
1832
1833 arm = self.ParseCaseArm()
1834 arms.append(arm)
1835
1836 ate = self._Eat(Id.KW_Esac)
1837 arms_end = word_.AsKeywordToken(ate)
1838
1839 # no redirects yet
1840 return command.Case(case_kw, to_match, arms_start, arms, arms_end)
1841
1842 def ParseCase(self):
1843 # type: () -> command.Case
1844 """
1845 case_clause : old_case # from POSIX
1846 | ysh_case
1847 ;
1848
1849 Looking at 'Case'
1850 """
1851 case_kw = word_.AsKeywordToken(self.cur_word)
1852 self._SetNext() # past 'case'
1853
1854 if self.w_parser.LookPastSpace() == Id.Op_LParen:
1855 return self.ParseYshCase(case_kw)
1856 else:
1857 return self.ParseOldCase(case_kw)
1858
1859 def _ParseYshElifElse(self, if_node):
1860 # type: (command.If) -> None
1861 """If test -f foo { echo foo.
1862
1863 } elif test -f bar; test -f spam { ^ we parsed up to here echo
1864 bar } else { echo none }
1865 """
1866 arms = if_node.arms
1867
1868 while self.c_id == Id.KW_Elif:
1869 elif_kw = word_.AsKeywordToken(self.cur_word)
1870 self._SetNext() # skip elif
1871 if (self.parse_opts.parse_paren() and
1872 self.w_parser.LookPastSpace() == Id.Op_LParen):
1873 enode = self.w_parser.ParseYshExprForCommand()
1874 cond = condition.YshExpr(enode) # type: condition_t
1875 else:
1876 self.allow_block = False
1877 commands = self._ParseCommandList()
1878 self.allow_block = True
1879 cond = condition.Shell(commands.children)
1880
1881 body = self.ParseBraceGroup()
1882 self._GetWord()
1883
1884 arm = IfArm(elif_kw, cond, None, body.children, None)
1885 arms.append(arm)
1886
1887 self._GetWord()
1888 if self.c_id == Id.KW_Else:
1889 self._SetNext()
1890 body = self.ParseBraceGroup()
1891 if_node.else_action = body.children
1892
1893 def _ParseYshIf(self, if_kw, cond):
1894 # type: (Token, condition_t) -> command.If
1895 """
1896 if test -f foo {
1897 # ^ we parsed up to here
1898 echo foo
1899 } elif test -f bar; test -f spam {
1900 echo bar
1901 } else {
1902 echo none
1903 }
1904 NOTE: If you do something like if test -n foo{, the parser keeps going, and
1905 the error is confusing because it doesn't point to the right place.
1906
1907 I think we might need strict_brace so that foo{ is disallowed. It has to
1908 be foo\{ or foo{a,b}. Or just turn that on with parse_brace? After you
1909 form ANY CompoundWord, make sure it's balanced for Lit_LBrace and
1910 Lit_RBrace? Maybe this is pre-parsing step in the WordParser?
1911 """
1912 if_node = command.If.CreateNull(alloc_lists=True)
1913 if_node.if_kw = if_kw
1914
1915 body1 = self.ParseBraceGroup()
1916 # Every arm has 1 spid, unlike shell-style
1917 # TODO: We could get the spids from the brace group.
1918 arm = IfArm(if_kw, cond, None, body1.children, None)
1919
1920 if_node.arms.append(arm)
1921
1922 self._GetWord()
1923 if self.c_id in (Id.KW_Elif, Id.KW_Else):
1924 self._ParseYshElifElse(if_node)
1925 # the whole if node has the 'else' spid, unlike shell-style there's no 'fi'
1926 # spid because that's in the BraceGroup.
1927 return if_node
1928
1929 def _ParseElifElse(self, if_node):
1930 # type: (command.If) -> None
1931 """
1932 else_part: (Elif command_list Then command_list)* Else command_list ;
1933 """
1934 arms = if_node.arms
1935
1936 self._GetWord()
1937 while self.c_id == Id.KW_Elif:
1938 elif_kw = word_.AsKeywordToken(self.cur_word)
1939 self._SetNext() # past 'elif'
1940
1941 cond = self._ParseConditionList()
1942
1943 ate = self._Eat(Id.KW_Then)
1944 then_kw = word_.AsKeywordToken(ate)
1945
1946 body = self._ParseCommandList()
1947 arm = IfArm(elif_kw, cond, then_kw, body.children, then_kw)
1948
1949 arms.append(arm)
1950
1951 self._GetWord()
1952 if self.c_id == Id.KW_Else:
1953 else_kw = word_.AsKeywordToken(self.cur_word)
1954 self._SetNext() # past 'else'
1955 body = self._ParseCommandList()
1956 if_node.else_action = body.children
1957 else:
1958 else_kw = None
1959
1960 if_node.else_kw = else_kw
1961
1962 def ParseIf(self):
1963 # type: () -> command.If
1964 """
1965 if_clause : If command_list Then command_list else_part? Fi ;
1966
1967 open : '{' | Then
1968 close : '}' | Fi
1969
1970 ysh_if : If ( command_list | '(' expr ')' )
1971 open command_list else_part? close;
1972
1973 There are 2 conditionals here: parse_paren, then parse_brace
1974 """
1975 if_node = command.If.CreateNull(alloc_lists=True)
1976 if_kw = word_.AsKeywordToken(self.cur_word)
1977 if_node.if_kw = if_kw
1978 self._SetNext() # past 'if'
1979
1980 if (self.parse_opts.parse_paren() and
1981 self.w_parser.LookPastSpace() == Id.Op_LParen):
1982 # if (x + 1)
1983 enode = self.w_parser.ParseYshExprForCommand()
1984 cond = condition.YshExpr(enode) # type: condition_t
1985 else:
1986 # if echo 1; echo 2; then
1987 # Remove ambiguity with if cd / {
1988 cond = self._ParseConditionList()
1989
1990 self._GetWord()
1991 if self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
1992 return self._ParseYshIf(if_kw, cond)
1993
1994 ate = self._Eat(Id.KW_Then)
1995 then_kw = word_.AsKeywordToken(ate)
1996
1997 body = self._ParseCommandList()
1998
1999 # First arm
2000 arm = IfArm(if_kw, cond, then_kw, body.children, then_kw)
2001 if_node.arms.append(arm)
2002
2003 # 2nd to Nth arm
2004 if self.c_id in (Id.KW_Elif, Id.KW_Else):
2005 self._ParseElifElse(if_node)
2006
2007 ate = self._Eat(Id.KW_Fi)
2008 if_node.fi_kw = word_.AsKeywordToken(ate)
2009
2010 return if_node
2011
2012 def ParseTime(self):
2013 # type: () -> command_t
2014 """Time [-p] pipeline.
2015
2016 According to bash help.
2017 """
2018 time_kw = word_.AsKeywordToken(self.cur_word)
2019 self._SetNext() # skip time
2020 pipeline = self.ParsePipeline()
2021 return command.TimeBlock(time_kw, pipeline)
2022
2023 def ParseCompoundCommand(self):
2024 # type: () -> command_t
2025 """
2026 Refactoring: we put io_redirect* here instead of in function_body and
2027 command.
2028
2029 compound_command : brace_group io_redirect*
2030 | subshell io_redirect*
2031 | for_clause io_redirect*
2032 | while_clause io_redirect*
2033 | until_clause io_redirect*
2034 | if_clause io_redirect*
2035 | case_clause io_redirect*
2036
2037 # bash extensions
2038 | time_clause
2039 | [[ BoolExpr ]]
2040 | (( ArithExpr ))
2041 """
2042 self._GetWord()
2043 if self.c_id == Id.Lit_LBrace:
2044 n1 = self.ParseBraceGroup()
2045 return self._MaybeParseRedirectList(n1)
2046 if self.c_id == Id.Op_LParen:
2047 n2 = self.ParseSubshell()
2048 return self._MaybeParseRedirectList(n2)
2049
2050 if self.c_id == Id.KW_For:
2051 # Note: Redirects parsed in this call. POSIX for and bash for ((
2052 # have different nodetypes.
2053 return self.ParseFor()
2054 if self.c_id in (Id.KW_While, Id.KW_Until):
2055 keyword = word_.AsKeywordToken(self.cur_word)
2056 n3 = self.ParseWhileUntil(keyword)
2057 return self._MaybeParseRedirectList(n3)
2058
2059 if self.c_id == Id.KW_If:
2060 n4 = self.ParseIf()
2061 return self._MaybeParseRedirectList(n4)
2062
2063 if self.c_id == Id.KW_Case:
2064 n5 = self.ParseCase()
2065 return self._MaybeParseRedirectList(n5)
2066
2067 if self.c_id == Id.KW_DLeftBracket:
2068 if not self.parse_opts.parse_dbracket():
2069 p_die('Bash [[ not allowed in YSH (parse_dbracket)',
2070 loc.Word(self.cur_word))
2071 n6 = self.ParseDBracket()
2072 return self._MaybeParseRedirectList(n6)
2073 if self.c_id == Id.Op_DLeftParen:
2074 if not self.parse_opts.parse_dparen():
2075 p_die(
2076 'Bash (( not allowed in YSH (parse_dparen, see OILS-ERR-14 for wart)',
2077 loc.Word(self.cur_word))
2078 n7 = self.ParseDParen()
2079 return self._MaybeParseRedirectList(n7)
2080
2081 # bash extensions: no redirects
2082 if self.c_id == Id.KW_Time:
2083 return self.ParseTime()
2084
2085 # Happens in function body, e.g. myfunc() oops
2086 p_die(
2087 'Unexpected word while parsing compound command (%s)' %
2088 Id_str(self.c_id), loc.Word(self.cur_word))
2089 assert False # for MyPy
2090
2091 def ParseFunctionDef(self):
2092 # type: () -> command.ShFunction
2093 """
2094 function_header : fname '(' ')'
2095 function_def : function_header newline_ok function_body ;
2096
2097 Precondition: Looking at the function name.
2098
2099 NOTE: There is an ambiguity with:
2100
2101 function foo ( echo hi ) and
2102 function foo () ( echo hi )
2103
2104 Bash only accepts the latter, though it doesn't really follow a grammar.
2105 """
2106 word0 = cast(CompoundWord, self.cur_word) # caller ensures validity
2107 name = word_.ShFunctionName(word0)
2108 if len(name) == 0: # example: foo$x is invalid
2109 p_die('Invalid function name', loc.Word(word0))
2110
2111 part0 = word0.parts[0]
2112 # If we got a non-empty string from ShFunctionName, this should be true.
2113 assert part0.tag() == word_part_e.Literal
2114 blame_tok = cast(Token, part0) # for ctx_VarChecker
2115
2116 self._SetNext() # move past function name
2117
2118 # Must be true because of lookahead
2119 self._GetWord()
2120 assert self.c_id == Id.Op_LParen, self.cur_word
2121
2122 self.lexer.PushHint(Id.Op_RParen, Id.Right_ShFunction)
2123 self._SetNext()
2124
2125 self._GetWord()
2126 if self.c_id == Id.Right_ShFunction:
2127 # 'f ()' implies a function definition, since invoking it with no args
2128 # would just be 'f'
2129 self._SetNext()
2130
2131 self._NewlineOk()
2132
2133 func = command.ShFunction.CreateNull()
2134 func.name = name
2135 with ctx_VarChecker(self.var_checker, blame_tok):
2136 func.body = self.ParseCompoundCommand()
2137
2138 func.name_tok = location.LeftTokenForCompoundWord(word0)
2139 return func
2140 else:
2141 p_die('Expected ) in function definition', loc.Word(self.cur_word))
2142 return None
2143
2144 def ParseKshFunctionDef(self):
2145 # type: () -> command.ShFunction
2146 """
2147 ksh_function_def : 'function' fname ( '(' ')' )? newline_ok function_body
2148 """
2149 keyword_tok = word_.AsKeywordToken(self.cur_word)
2150
2151 self._SetNext() # skip past 'function'
2152 self._GetWord()
2153
2154 cur_word = cast(CompoundWord, self.cur_word) # caller ensures validity
2155 name = word_.ShFunctionName(cur_word)
2156 if len(name) == 0: # example: foo$x is invalid
2157 p_die('Invalid KSH-style function name', loc.Word(cur_word))
2158
2159 name_word = self.cur_word
2160 self._SetNext() # skip past 'function name
2161
2162 self._GetWord()
2163 if self.c_id == Id.Op_LParen:
2164 self.lexer.PushHint(Id.Op_RParen, Id.Right_ShFunction)
2165 self._SetNext()
2166 self._Eat(Id.Right_ShFunction)
2167
2168 self._NewlineOk()
2169
2170 func = command.ShFunction.CreateNull()
2171 func.name = name
2172 with ctx_VarChecker(self.var_checker, keyword_tok):
2173 func.body = self.ParseCompoundCommand()
2174
2175 func.keyword = keyword_tok
2176 func.name_tok = location.LeftTokenForWord(name_word)
2177 return func
2178
2179 def ParseYshProc(self):
2180 # type: () -> Proc
2181 node = Proc.CreateNull(alloc_lists=True)
2182
2183 keyword_tok = word_.AsKeywordToken(self.cur_word)
2184 node.keyword = keyword_tok
2185
2186 with ctx_VarChecker(self.var_checker, keyword_tok):
2187 with ctx_CmdMode(self, cmd_mode_e.Proc):
2188 self.w_parser.ParseProc(node)
2189 if node.sig.tag() == proc_sig_e.Closed: # Register params
2190 sig = cast(proc_sig.Closed, node.sig)
2191
2192 # Treat 3 kinds of params as variables.
2193 wp = sig.word
2194 if wp:
2195 for param in wp.params:
2196 self.var_checker.Check(Id.KW_Var, param.name,
2197 param.blame_tok)
2198 if wp.rest_of:
2199 r = wp.rest_of
2200 self.var_checker.Check(Id.KW_Var, r.name,
2201 r.blame_tok)
2202 # We COULD register __out here but it would require a different API.
2203 #if param.prefix and param.prefix.id == Id.Arith_Colon:
2204 # self.var_checker.Check(Id.KW_Var, '__' + param.name)
2205
2206 posit = sig.positional
2207 if posit:
2208 for param in posit.params:
2209 self.var_checker.Check(Id.KW_Var, param.name,
2210 param.blame_tok)
2211 if posit.rest_of:
2212 r = posit.rest_of
2213 self.var_checker.Check(Id.KW_Var, r.name,
2214 r.blame_tok)
2215
2216 named = sig.named
2217 if named:
2218 for param in named.params:
2219 self.var_checker.Check(Id.KW_Var, param.name,
2220 param.blame_tok)
2221 if named.rest_of:
2222 r = named.rest_of
2223 self.var_checker.Check(Id.KW_Var, r.name,
2224 r.blame_tok)
2225
2226 if sig.block_param:
2227 b = sig.block_param
2228 self.var_checker.Check(Id.KW_Var, b.name, b.blame_tok)
2229
2230 self._SetNext()
2231 node.body = self.ParseBraceGroup()
2232 # No redirects for YSH procs (only at call site)
2233
2234 return node
2235
2236 def ParseYshFunc(self):
2237 # type: () -> Func
2238 """
2239 ysh_func: (
2240 Expr_Name '(' [func_params] [';' func_params] ')' ['=>' type_expr] '{'
2241 )
2242 Looking at KW_Func
2243 """
2244 node = Func.CreateNull(alloc_lists=True)
2245
2246 keyword_tok = word_.AsKeywordToken(self.cur_word)
2247 node.keyword = keyword_tok
2248
2249 with ctx_VarChecker(self.var_checker, keyword_tok):
2250 self.w_parser.ParseFunc(node)
2251
2252 posit = node.positional
2253 if posit:
2254 for param in posit.params:
2255 self.var_checker.Check(Id.KW_Var, param.name,
2256 param.blame_tok)
2257 if posit.rest_of:
2258 r = posit.rest_of
2259 self.var_checker.Check(Id.KW_Var, r.name, r.blame_tok)
2260
2261 named = node.named
2262 if named:
2263 for param in named.params:
2264 self.var_checker.Check(Id.KW_Var, param.name,
2265 param.blame_tok)
2266 if named.rest_of:
2267 r = named.rest_of
2268 self.var_checker.Check(Id.KW_Var, r.name, r.blame_tok)
2269
2270 self._SetNext()
2271 with ctx_CmdMode(self, cmd_mode_e.Func):
2272 node.body = self.ParseBraceGroup()
2273
2274 return node
2275
2276 def ParseCoproc(self):
2277 # type: () -> command_t
2278 """
2279 TODO: command.Coproc?
2280 """
2281 raise NotImplementedError()
2282
2283 def ParseSubshell(self):
2284 # type: () -> command.Subshell
2285 """
2286 subshell : '(' compound_list ')'
2287
2288 Looking at Op_LParen
2289 """
2290 left = word_.AsOperatorToken(self.cur_word)
2291 self._SetNext() # skip past (
2292
2293 # Ensure that something $( (cd / && pwd) ) works. If ) is already on the
2294 # translation stack, we want to delay it.
2295
2296 self.lexer.PushHint(Id.Op_RParen, Id.Right_Subshell)
2297
2298 c_list = self._ParseCommandList()
2299 if len(c_list.children) == 1:
2300 child = c_list.children[0]
2301 else:
2302 child = c_list
2303
2304 ate = self._Eat(Id.Right_Subshell)
2305 right = word_.AsOperatorToken(ate)
2306
2307 return command.Subshell(left, child, right, False)
2308
2309 def ParseDBracket(self):
2310 # type: () -> command.DBracket
2311 """Pass the underlying word parser off to the boolean expression
2312 parser."""
2313 left = word_.AsKeywordToken(self.cur_word)
2314 # TODO: Test interactive. Without closing ]], you should get > prompt
2315 # (PS2)
2316
2317 self._SetNext() # skip [[
2318 b_parser = bool_parse.BoolParser(self.w_parser)
2319 bnode, right = b_parser.Parse() # May raise
2320 return command.DBracket(left, bnode, right)
2321
2322 def ParseDParen(self):
2323 # type: () -> command.DParen
2324 left = word_.AsOperatorToken(self.cur_word)
2325
2326 self._SetNext() # skip ((
2327 anode, right = self.w_parser.ReadDParen()
2328 assert anode is not None
2329
2330 return command.DParen(left, anode, right)
2331
2332 def ParseCommand(self):
2333 # type: () -> command_t
2334 """
2335 command : simple_command
2336 | compound_command # OSH edit: io_redirect* folded in
2337 | function_def
2338 | ksh_function_def
2339
2340 # YSH extensions
2341 | proc NAME ...
2342 | typed proc NAME ...
2343 | func NAME ...
2344 | const ...
2345 | var ...
2346 | setglobal ...
2347 | setref ...
2348 | setvar ...
2349 | call EXPR
2350 | = EXPR
2351 ;
2352
2353 Note: the reason const / var are not part of compound_command is because
2354 they can't be alone in a shell function body.
2355
2356 Example:
2357 This is valid shell f() if true; then echo hi; fi
2358 This is invalid f() var x = 1
2359 """
2360 if self._AtSecondaryKeyword():
2361 p_die('Unexpected word when parsing command',
2362 loc.Word(self.cur_word))
2363
2364 # YSH Extensions
2365
2366 if self.c_id == Id.KW_Proc: # proc p { ... }
2367 # proc is hidden because of the 'local reasoning' principle. Code
2368 # inside procs should be YSH, full stop. That means ysh:upgrade is
2369 # on.
2370 if self.parse_opts.parse_proc():
2371 return self.ParseYshProc()
2372 else:
2373 # 2024-02: This avoids bad syntax errors if you type YSH code
2374 # into OSH
2375 # proc p (x) { echo hi } would actually be parsed as a
2376 # command.Simple! Shell compatibility: quote 'proc'
2377 p_die("proc is a YSH keyword, but this is OSH.",
2378 loc.Word(self.cur_word))
2379
2380 if self.c_id == Id.KW_Typed: # typed proc p () { ... }
2381 self._SetNext()
2382 self._GetWord()
2383 if self.c_id != Id.KW_Proc:
2384 p_die("Expected 'proc' after 'typed'", loc.Word(self.cur_word))
2385
2386 if self.parse_opts.parse_proc():
2387 return self.ParseYshProc()
2388 else:
2389 p_die("typed is a YSH keyword, but this is OSH.",
2390 loc.Word(self.cur_word))
2391
2392 if self.c_id == Id.KW_Func: # func f(x) { ... }
2393 if self.parse_opts.parse_func():
2394 return self.ParseYshFunc()
2395 else:
2396 # Same reasoning as above, for 'proc'
2397 p_die("func is a YSH keyword, but this is OSH.",
2398 loc.Word(self.cur_word))
2399
2400 if self.c_id == Id.KW_Const and self.cmd_mode != cmd_mode_e.Shell:
2401 p_die("const can't be inside proc or func. Use var instead.",
2402 loc.Word(self.cur_word))
2403
2404 if self.c_id in (Id.KW_Var, Id.KW_Const): # var x = 1
2405 keyword_id = self.c_id
2406 kw_token = word_.LiteralToken(self.cur_word)
2407 self._SetNext()
2408 n8 = self.w_parser.ParseVarDecl(kw_token)
2409 for lhs in n8.lhs:
2410 self.var_checker.Check(keyword_id, lhs.name, lhs.left)
2411 return n8
2412
2413 if self.c_id in (Id.KW_SetVar, Id.KW_SetGlobal):
2414 kw_token = word_.LiteralToken(self.cur_word)
2415 self._SetNext()
2416 n9 = self.w_parser.ParseMutation(kw_token, self.var_checker)
2417 return n9
2418
2419 if self.c_id in (Id.KW_Call, Id.Lit_Equals):
2420 # = 42 + a[i]
2421 # call mylist->append('x')
2422
2423 keyword = word_.LiteralToken(self.cur_word)
2424 assert keyword is not None
2425 self._SetNext()
2426 enode = self.w_parser.ParseCommandExpr()
2427 return command.Expr(keyword, enode)
2428
2429 if self.c_id == Id.KW_Function:
2430 return self.ParseKshFunctionDef()
2431
2432 if self.c_id in (Id.KW_DLeftBracket, Id.Op_DLeftParen, Id.Op_LParen,
2433 Id.Lit_LBrace, Id.KW_For, Id.KW_While, Id.KW_Until,
2434 Id.KW_If, Id.KW_Case, Id.KW_Time):
2435 return self.ParseCompoundCommand()
2436
2437 # Syntax error for '}' starting a line, which all shells disallow.
2438 if self.c_id == Id.Lit_RBrace:
2439 p_die('Unexpected right brace', loc.Word(self.cur_word))
2440
2441 if self.c_kind == Kind.Redir: # Leading redirect
2442 return self.ParseSimpleCommand()
2443
2444 if self.c_kind == Kind.Word:
2445 # ensured by Kind.Word
2446 cur_word = cast(CompoundWord, self.cur_word)
2447
2448 # NOTE: At the top level, only Token and Compound are possible.
2449 # Can this be modelled better in the type system, removing asserts?
2450 #
2451 # TODO: This can be a proc INVOCATION! (Doesn't even need parse_paren)
2452 # Problem: We have to distinguish f( ) { echo ; } and myproc (x, y)
2453 # That requires 2 tokens of lookahead, which we don't have
2454 #
2455 # Or maybe we don't just have ParseSimpleCommand -- we will have
2456 # ParseYshCommand or something
2457
2458 if (self.w_parser.LookAheadFuncParens() and
2459 not word_.IsVarLike(cur_word)):
2460 return self.ParseFunctionDef() # f() { echo; } # function
2461
2462 # Parse x = 1+2*3 when inside HayNode { } blocks
2463 parts = cur_word.parts
2464 if self.parse_opts.parse_equals() and len(parts) == 1:
2465 part0 = parts[0]
2466 if part0.tag() == word_part_e.Literal:
2467 tok = cast(Token, part0)
2468 if (tok.id == Id.Lit_Chars and
2469 self.w_parser.LookPastSpace() == Id.Lit_Equals and
2470 match.IsValidVarName(lexer.LazyStr(tok))):
2471
2472 if (len(self.hay_attrs_stack) and
2473 self.hay_attrs_stack[-1]):
2474 # Note: no static var_checker.Check() for bare assignment
2475 enode = self.w_parser.ParseBareDecl()
2476 self._SetNext() # Somehow this is necessary
2477 # TODO: Use BareDecl here. Well, do that when we
2478 # treat it as const or lazy.
2479 return command.VarDecl(
2480 None,
2481 [NameType(tok, lexer.TokenVal(tok), None)],
2482 enode)
2483 else:
2484 self._SetNext()
2485 self._GetWord()
2486 p_die(
2487 'Unexpected = (Hint: use var/setvar, or quote it)',
2488 loc.Word(self.cur_word))
2489
2490 # echo foo
2491 # f=(a b c) # array
2492 # array[1+2]+=1
2493 return self.ParseSimpleCommand()
2494
2495 if self.c_kind == Kind.Eof:
2496 p_die("Unexpected EOF while parsing command",
2497 loc.Word(self.cur_word))
2498
2499 # NOTE: This only happens in batch mode in the second turn of the loop!
2500 # e.g. )
2501 p_die("Invalid word while parsing command", loc.Word(self.cur_word))
2502
2503 assert False # for MyPy
2504
2505 def ParsePipeline(self):
2506 # type: () -> command_t
2507 """
2508 pipeline : Bang? command ( '|' newline_ok command )* ;
2509 """
2510 negated = None # type: Optional[Token]
2511
2512 self._GetWord()
2513 if self.c_id == Id.KW_Bang:
2514 negated = word_.AsKeywordToken(self.cur_word)
2515 self._SetNext()
2516
2517 child = self.ParseCommand()
2518 assert child is not None
2519
2520 children = [child]
2521
2522 self._GetWord()
2523 if self.c_id not in (Id.Op_Pipe, Id.Op_PipeAmp):
2524 if negated is not None:
2525 node = command.Pipeline(negated, children, [])
2526 return node
2527 else:
2528 return child # no pipeline
2529
2530 # | or |&
2531 ops = [] # type: List[Token]
2532 while True:
2533 op = word_.AsOperatorToken(self.cur_word)
2534 ops.append(op)
2535
2536 self._SetNext() # skip past Id.Op_Pipe or Id.Op_PipeAmp
2537 self._NewlineOk()
2538
2539 child = self.ParseCommand()
2540 children.append(child)
2541
2542 self._GetWord()
2543 if self.c_id not in (Id.Op_Pipe, Id.Op_PipeAmp):
2544 break
2545
2546 return command.Pipeline(negated, children, ops)
2547
2548 def ParseAndOr(self):
2549 # type: () -> command_t
2550 self._GetWord()
2551 if self.c_id == Id.Lit_TDot:
2552 # We got '...', so parse in multiline mode
2553 self._SetNext()
2554 with word_.ctx_Multiline(self.w_parser):
2555 return self._ParseAndOr()
2556
2557 # Parse in normal mode, not multiline
2558 return self._ParseAndOr()
2559
2560 def _ParseAndOr(self):
2561 # type: () -> command_t
2562 """
2563 and_or : and_or ( AND_IF | OR_IF ) newline_ok pipeline
2564 | pipeline
2565
2566 Note that it is left recursive and left associative. We parse it
2567 iteratively with a token of lookahead.
2568 """
2569 child = self.ParsePipeline()
2570 assert child is not None
2571
2572 self._GetWord()
2573 if self.c_id not in (Id.Op_DPipe, Id.Op_DAmp):
2574 return child
2575
2576 ops = [] # type: List[Token]
2577 children = [child]
2578
2579 while True:
2580 ops.append(word_.AsOperatorToken(self.cur_word))
2581
2582 self._SetNext() # skip past || &&
2583 self._NewlineOk()
2584
2585 child = self.ParsePipeline()
2586 children.append(child)
2587
2588 self._GetWord()
2589 if self.c_id not in (Id.Op_DPipe, Id.Op_DAmp):
2590 break
2591
2592 return command.AndOr(children, ops)
2593
2594 # NOTE: _ParseCommandLine and _ParseCommandTerm are similar, but different.
2595
2596 # At the top level, we execute after every line, e.g. to
2597 # - process alias (a form of dynamic parsing)
2598 # - process 'exit', because invalid syntax might appear after it
2599
2600 # On the other hand, for a while loop body, we parse the whole thing at once,
2601 # and then execute it. We don't want to parse it over and over again!
2602
2603 # COMPARE
2604 # command_line : and_or (sync_op and_or)* trailer? ; # TOP LEVEL
2605 # command_term : and_or (trailer and_or)* ; # CHILDREN
2606
2607 def _ParseCommandLine(self):
2608 # type: () -> command_t
2609 """
2610 command_line : and_or (sync_op and_or)* trailer? ;
2611 trailer : sync_op newline_ok
2612 | NEWLINES;
2613 sync_op : '&' | ';';
2614
2615 NOTE: This rule causes LL(k > 1) behavior. We would have to peek to see if
2616 there is another command word after the sync op.
2617
2618 But it's easier to express imperatively. Do the following in a loop:
2619 1. ParseAndOr
2620 2. Peek.
2621 a. If there's a newline, then return. (We're only parsing a single
2622 line.)
2623 b. If there's a sync_op, process it. Then look for a newline and
2624 return. Otherwise, parse another AndOr.
2625 """
2626 # This END_LIST is slightly different than END_LIST in _ParseCommandTerm.
2627 # I don't think we should add anything else here; otherwise it will be
2628 # ignored at the end of ParseInteractiveLine(), e.g. leading to bug #301.
2629 END_LIST = [Id.Op_Newline, Id.Eof_Real]
2630
2631 children = [] # type: List[command_t]
2632 done = False
2633 while not done:
2634 child = self.ParseAndOr()
2635
2636 self._GetWord()
2637 if self.c_id in (Id.Op_Semi, Id.Op_Amp):
2638 tok = cast(Token, self.cur_word) # for MyPy
2639 child = command.Sentence(child, tok)
2640 self._SetNext()
2641
2642 self._GetWord()
2643 if self.c_id in END_LIST:
2644 done = True
2645
2646 elif self.c_id in END_LIST:
2647 done = True
2648
2649 else:
2650 # e.g. echo a(b)
2651 p_die(
2652 'Invalid word while parsing command line (%s)' %
2653 Id_str(self.c_id), loc.Word(self.cur_word))
2654
2655 children.append(child)
2656
2657 # Simplify the AST.
2658 if len(children) > 1:
2659 return command.CommandList(children)
2660 else:
2661 return children[0]
2662
2663 def _ParseCommandTerm(self):
2664 # type: () -> command.CommandList
2665 """"
2666 command_term : and_or (trailer and_or)* ;
2667 trailer : sync_op newline_ok
2668 | NEWLINES;
2669 sync_op : '&' | ';';
2670
2671 This is handled in imperative style, like _ParseCommandLine.
2672 Called by _ParseCommandList for all blocks, and also for ParseCaseArm,
2673 which is slightly different. (HOW? Is it the DSEMI?)
2674
2675 Returns:
2676 syntax_asdl.command
2677 """
2678 # Token types that will end the command term.
2679 END_LIST = [
2680 self.eof_id, Id.Right_Subshell, Id.Lit_RBrace, Id.Op_DSemi,
2681 Id.Op_SemiAmp, Id.Op_DSemiAmp
2682 ]
2683
2684 # NOTE: This is similar to _ParseCommandLine.
2685 #
2686 # - Why aren't we doing END_LIST in _ParseCommandLine?
2687 # - Because you will never be inside $() at the top level.
2688 # - We also know it will end in a newline. It can't end in "fi"!
2689 # - example: if true; then { echo hi; } fi
2690
2691 children = [] # type: List[command_t]
2692 done = False
2693 while not done:
2694 # Most keywords are valid "first words". But do/done/then do not BEGIN
2695 # commands, so they are not valid.
2696 if self._AtSecondaryKeyword():
2697 break
2698
2699 child = self.ParseAndOr()
2700
2701 self._GetWord()
2702 if self.c_id == Id.Op_Newline:
2703 self._SetNext()
2704
2705 self._GetWord()
2706 if self.c_id in END_LIST:
2707 done = True
2708
2709 elif self.c_id in (Id.Op_Semi, Id.Op_Amp):
2710 tok = cast(Token, self.cur_word) # for MyPy
2711 child = command.Sentence(child, tok)
2712 self._SetNext()
2713
2714 self._GetWord()
2715 if self.c_id == Id.Op_Newline:
2716 self._SetNext() # skip over newline
2717
2718 # Test if we should keep going. There might be another command after
2719 # the semi and newline.
2720 self._GetWord()
2721 if self.c_id in END_LIST: # \n EOF
2722 done = True
2723
2724 elif self.c_id in END_LIST: # ; EOF
2725 done = True
2726
2727 elif self.c_id in END_LIST: # EOF
2728 done = True
2729
2730 # For if test -f foo; test -f bar {
2731 elif self.parse_opts.parse_brace() and self.c_id == Id.Lit_LBrace:
2732 done = True
2733
2734 elif self.c_kind != Kind.Word:
2735 # e.g. f() { echo (( x )) ; }
2736 # but can't fail on 'fi fi', see osh/cmd_parse_test.py
2737
2738 #log("Invalid %s", self.cur_word)
2739 p_die("Invalid word while parsing command list",
2740 loc.Word(self.cur_word))
2741
2742 children.append(child)
2743
2744 return command.CommandList(children)
2745
2746 def _ParseCommandList(self):
2747 # type: () -> command.CommandList
2748 """
2749 command_list : newline_ok command_term trailer? ;
2750
2751 This one is called by all the compound commands. It's basically a command
2752 block.
2753
2754 NOTE: Rather than translating the CFG directly, the code follows a style
2755 more like this: more like this: (and_or trailer)+. It makes capture
2756 easier.
2757 """
2758 self._NewlineOk()
2759 return self._ParseCommandTerm()
2760
2761 def ParseLogicalLine(self):
2762 # type: () -> command_t
2763 """Parse a single line for main_loop.
2764
2765 A wrapper around _ParseCommandLine(). Similar but not identical to
2766 _ParseCommandList() and ParseCommandSub().
2767
2768 Raises:
2769 ParseError
2770 """
2771 self._NewlineOk()
2772 self._GetWord()
2773 if self.c_id == Id.Eof_Real:
2774 return None # main loop checks for here docs
2775 node = self._ParseCommandLine()
2776 return node
2777
2778 def ParseInteractiveLine(self):
2779 # type: () -> parse_result_t
2780 """Parse a single line for Interactive main_loop.
2781
2782 Different from ParseLogicalLine because newlines are handled differently.
2783
2784 Raises:
2785 ParseError
2786 """
2787 self._GetWord()
2788 if self.c_id == Id.Op_Newline:
2789 return parse_result.EmptyLine
2790 if self.c_id == Id.Eof_Real:
2791 return parse_result.Eof
2792
2793 node = self._ParseCommandLine()
2794 return parse_result.Node(node)
2795
2796 def ParseCommandSub(self):
2797 # type: () -> command_t
2798 """Parse $(echo hi) and `echo hi` for word_parse.py.
2799
2800 They can have multiple lines, like this: echo $( echo one echo
2801 two )
2802 """
2803 self._NewlineOk()
2804
2805 self._GetWord()
2806 if self.c_kind == Kind.Eof: # e.g. $()
2807 return command.NoOp
2808
2809 c_list = self._ParseCommandTerm()
2810 if len(c_list.children) == 1:
2811 return c_list.children[0]
2812 else:
2813 return c_list
2814
2815 def CheckForPendingHereDocs(self):
2816 # type: () -> None
2817 # NOTE: This happens when there is no newline at the end of a file, like
2818 # osh -c 'cat <<EOF'
2819 if len(self.pending_here_docs):
2820 node = self.pending_here_docs[0] # Just show the first one?
2821 h = cast(redir_param.HereDoc, node.arg)
2822 p_die('Unterminated here doc began here', loc.Word(h.here_begin))
2823
2824
2825# vim: sw=4