OILS / osh / word_.py View on Github | oils.pub

865 lines, 403 significant
1"""
2word.py - Utility functions for words, e.g. treating them as "tokens".
3"""
4
5from _devbuild.gen.id_kind_asdl import Id, Kind, Id_t, Kind_t
6from _devbuild.gen.runtime_asdl import Piece
7from _devbuild.gen.syntax_asdl import (
8 Token,
9 CompoundWord,
10 DoubleQuoted,
11 SingleQuoted,
12 word,
13 word_e,
14 word_t,
15 word_str,
16 word_part,
17 word_part_t,
18 word_part_e,
19 AssocPair,
20)
21from frontend import consts
22from frontend import lexer
23from mycpp import mylib
24from mycpp.mylib import tagswitch, log
25
26from typing import Tuple, Optional, List, Any, cast, TYPE_CHECKING
27if TYPE_CHECKING:
28 from osh.word_parse import WordParser
29
30_ = log
31
32
33def MakePiece(s, quoted):
34 # type: (str, bool) -> Piece
35 """
36 For $x versus "$x", etc.
37 """
38 return Piece(s, quoted, not quoted)
39
40
41def PieceQuoted(s):
42 # type: (str) -> Piece
43 """
44 For 'hi' "$x"
45 and $[myexpr] in YSH
46 """
47 # quoted=True, do_split=False
48 return Piece(s, True, False)
49
50
51def PieceOperator(s):
52 # type: (str) -> Piece
53 """
54 For Extended glob @(--verbose|help)
55 And BashRegexGroup [[ foo =~ x(a b)y ]
56
57 We don't want ( to become \(, so quoted=False
58 """
59 # quoted=False, do_split=False
60 return Piece(s, False, False)
61
62
63def LiteralId(part):
64 # type: (word_part_t) -> Id_t
65 """If the WordPart consists of a single literal token, return its Id.
66
67 Used for Id.KW_For, or Id.RBrace, etc.
68 """
69 if part.tag() != word_part_e.Literal:
70 return Id.Undefined_Tok # unequal to any other Id
71
72 return cast(Token, part).id
73
74
75def CheckLiteralId(part, tok_id):
76 # type: (word_part_t, Id_t) -> Optional[Token]
77 """If the WordPart is a Token of a given Id, return the Token."""
78 if part.tag() != word_part_e.Literal:
79 return None
80
81 tok = cast(Token, part)
82 if tok.id == tok_id:
83 return tok
84
85 return None
86
87
88def LiteralToken(UP_w):
89 # type: (word_t) -> Optional[Token]
90 """If a word consists of a literal token, return it.
91
92 Otherwise return None.
93 """
94 # We're casting here because this function is called by the CommandParser for
95 # var, setvar, '...', etc. It's easier to cast in one place.
96 assert UP_w.tag() == word_e.Compound, UP_w
97 w = cast(CompoundWord, UP_w)
98
99 if len(w.parts) != 1:
100 return None
101
102 part0 = w.parts[0]
103 if part0.tag() != word_part_e.Literal:
104 return None
105
106 return cast(Token, part0)
107
108
109def _EvalWordPart(part):
110 # type: (word_part_t) -> Tuple[bool, str, bool]
111 """Evaluate a WordPart at PARSE TIME.
112
113 Used for:
114
115 1. here doc delimiters
116 2. function names
117 3. for loop variable names
118 4. Compiling constant regex words at parse time
119 5. a special case for ${a////c} to see if we got a leading slash in the
120 pattern.
121
122 Returns:
123 3-tuple of
124 ok: bool, success. If there are parts that can't be statically
125 evaluated, then we return false.
126 value: a string (not Value)
127 quoted: whether any part of the word was quoted
128 """
129 UP_part = part
130 with tagswitch(part) as case:
131 if case(word_part_e.Literal):
132 tok = cast(Token, UP_part)
133 # Weird performance issue: if we change this to lexer.LazyStr(),
134 # the parser slows down, e.g. on configure-coreutils from 805 B
135 # irefs to ~830 B. The real issue is that we should avoid calling
136 # this from CommandParser - for the Hay node.
137 return True, lexer.TokenVal(tok), False
138 #return True, lexer.LazyStr(tok), False
139
140 elif case(word_part_e.EscapedLiteral):
141 part = cast(word_part.EscapedLiteral, UP_part)
142 if mylib.PYTHON:
143 val = lexer.TokenVal(part.token)
144 assert len(val) == 2, val # e.g. \*
145 assert val[0] == '\\'
146 s = lexer.TokenSliceLeft(part.token, 1)
147 return True, s, True
148
149 elif case(word_part_e.SingleQuoted):
150 part = cast(SingleQuoted, UP_part)
151 return True, part.sval, True
152
153 elif case(word_part_e.DoubleQuoted):
154 part = cast(DoubleQuoted, UP_part)
155 strs = [] # type: List[str]
156 for p in part.parts:
157 ok, s, _ = _EvalWordPart(p)
158 if not ok:
159 return False, '', True
160 strs.append(s)
161
162 return True, ''.join(strs), True # At least one part was quoted!
163
164 elif case(word_part_e.YshArrayLiteral, word_part_e.InitializerLiteral,
165 word_part_e.ZshVarSub, word_part_e.CommandSub,
166 word_part_e.SimpleVarSub, word_part_e.BracedVarSub,
167 word_part_e.TildeSub, word_part_e.ArithSub,
168 word_part_e.ExtGlob, word_part_e.Splice,
169 word_part_e.ExprSub):
170 return False, '', False
171
172 else:
173 raise AssertionError(part.tag())
174
175
176def FastStrEval(w):
177 # type: (CompoundWord) -> Optional[str]
178 """
179 Detects common case
180
181 (1) CompoundWord([LiteralPart(Id.LitChars)])
182 For echo -e, test x -lt 0, etc.
183 (2) single quoted word like 'foo'
184
185 Other patterns we could detect are:
186 (1) "foo"
187 (2) "$var" and "${var}" - I think these are very common in OSH code (but not YSH)
188 - I think val_ops.Stringify() can handle all the errors
189 """
190 if len(w.parts) != 1:
191 return None
192
193 part0 = w.parts[0]
194 UP_part0 = part0
195 with tagswitch(part0) as case:
196 if case(word_part_e.Literal):
197 part0 = cast(Token, UP_part0)
198
199 if part0.id in (Id.Lit_Chars, Id.Lit_LBracket, Id.Lit_RBracket):
200 # Could add more tokens in this case
201 # e.g. + is Lit_Other, and it's a Token in 'expr'
202 # Right now it's Lit_Chars (e.g. ls -l) and [ and ] because I
203 # know those are common
204 # { } are not as common
205 return lexer.LazyStr(part0)
206
207 else:
208 # e.g. Id.Lit_Star needs to be glob expanded
209 # TODO: Consider moving Id.Lit_Star etc. to Kind.MaybeGlob?
210 return None
211
212 elif case(word_part_e.SingleQuoted):
213 part0 = cast(SingleQuoted, UP_part0)
214 # TODO: SingleQuoted should have lazy (str? sval) field
215 # This would only affect multi-line strings though?
216 return part0.sval
217
218 else:
219 # e.g. DoubleQuoted can't be optimized to a string, because it
220 # might have "$@" and such
221 return None
222
223
224def StaticEval(UP_w):
225 # type: (word_t) -> Tuple[bool, str, bool]
226 """Evaluate a Compound at PARSE TIME."""
227 quoted = False
228
229 # e.g. for ( instead of for (( is a token word
230 if UP_w.tag() != word_e.Compound:
231 return False, '', quoted
232
233 w = cast(CompoundWord, UP_w)
234
235 strs = [] # type: List[str]
236 for part in w.parts:
237 ok, s, q = _EvalWordPart(part)
238 if not ok:
239 return False, '', quoted
240 if q:
241 quoted = True # at least one part was quoted
242 strs.append(s)
243 #log('StaticEval parts %s', w.parts)
244 return True, ''.join(strs), quoted
245
246
247# From bash, general.c, unquoted_tilde_word():
248# POSIX.2, 3.6.1: A tilde-prefix consists of an unquoted tilde character at
249# the beginning of the word, followed by all of the characters preceding the
250# first unquoted slash in the word, or all the characters in the word if there
251# is no slash...If none of the characters in the tilde-prefix are quoted, the
252# characters in the tilde-prefix following the tilde shell be treated as a
253# possible login name.
254#define TILDE_END(c) ((c) == '\0' || (c) == '/' || (c) == ':')
255#
256# So an unquoted tilde can ALWAYS start a new lex mode? You respect quotes and
257# substitutions.
258#
259# We only detect ~Lit_Chars and split. So we might as well just write a regex.
260
261
262def TildeDetect(UP_w):
263 # type: (word_t) -> Optional[CompoundWord]
264 """Detect tilde expansion in a word.
265
266 It might begin with Literal that needs to be turned into a TildeSub.
267 (It depends on whether the second token begins with slash).
268
269 If so, it return a new word. Otherwise return None.
270
271 NOTE:
272 - The regex for Lit_TildeLike could be expanded. Right now it's
273 conservative, like Lit_Chars without the /.
274 - It's possible to write this in a mutating style, since only the first token
275 is changed. But note that we CANNOT know this during lexing.
276 """
277 # BracedTree can't be tilde expanded
278 if UP_w.tag() != word_e.Compound:
279 return None
280
281 w = cast(CompoundWord, UP_w)
282 return TildeDetect2(w)
283
284
285def TildeDetect2(w):
286 # type: (CompoundWord) -> Optional[CompoundWord]
287 """If tilde sub is detected, returns a new CompoundWord.
288
289 Accepts CompoundWord, not word_t. After brace expansion, we know we have a
290 List[CompoundWord].
291
292 Tilde detection:
293
294 YES:
295 ~ ~/
296 ~bob ~bob/
297
298 NO:
299 ~bob# ~bob#/
300 ~bob$x
301 ~$x
302
303 Pattern to match (all must be word_part_e.Literal):
304
305 Lit_Tilde Lit_Chars? (Lit_Slash | %end)
306 """
307 if len(w.parts) == 0: # ${a-} has no parts
308 return None
309
310 tok0 = CheckLiteralId(w.parts[0], Id.Lit_Tilde)
311 if tok0 is None:
312 return None
313
314 new_parts = [] # type: List[word_part_t]
315
316 if len(w.parts) == 1: # ~
317 new_parts.append(word_part.TildeSub(tok0, None, None))
318 return CompoundWord(new_parts)
319
320 id1 = LiteralId(w.parts[1])
321 if id1 == Id.Lit_Slash: # ~/
322 new_parts.append(word_part.TildeSub(tok0, None, None))
323 new_parts.extend(w.parts[1:])
324 return CompoundWord(new_parts)
325
326 if id1 != Id.Lit_Chars:
327 return None # ~$x is not TildeSub
328
329 tok1 = cast(Token, w.parts[1])
330
331 if len(w.parts) == 2: # ~foo
332 new_parts.append(word_part.TildeSub(tok0, tok1, lexer.TokenVal(tok1)))
333 return CompoundWord(new_parts)
334
335 id2 = LiteralId(w.parts[2])
336 if id2 != Id.Lit_Slash: # ~foo$x is not TildeSub
337 return None
338
339 new_parts.append(word_part.TildeSub(tok0, tok1, lexer.TokenVal(tok1)))
340 new_parts.extend(w.parts[2:])
341 return CompoundWord(new_parts)
342
343
344def TildeDetectAssign(w):
345 # type: (CompoundWord) -> None
346 """Detects multiple tilde sub, like a=~:~/src:~bob
347
348 MUTATES its argument.
349
350 Pattern for to match (all must be word_part_e.Literal):
351
352 Lit_Tilde Lit_Chars? (Lit_Slash | Lit_Colon | %end)
353 """
354 parts = w.parts
355
356 # Bail out EARLY if there are no ~ at all
357 has_tilde = False
358 for part in parts:
359 if LiteralId(part) == Id.Lit_Tilde:
360 has_tilde = True
361 break
362 if not has_tilde:
363 return # Avoid further work and allocations
364
365 # Avoid IndexError, since we have to look ahead up to 2 tokens
366 parts.append(None)
367 parts.append(None)
368
369 new_parts = [] # type: List[word_part_t]
370
371 tilde_could_be_next = True # true at first, and true after :
372
373 i = 0
374 n = len(parts)
375
376 while i < n:
377 part0 = parts[i]
378 if part0 is None:
379 break
380
381 #log('i = %d', i)
382 #log('part0 %s', part0)
383
384 # Skip tilde in middle of word, like a=foo~bar
385 if tilde_could_be_next and LiteralId(part0) == Id.Lit_Tilde:
386 # If ~ ends the string, we have
387 part1 = parts[i + 1]
388 part2 = parts[i + 2]
389
390 tok0 = cast(Token, part0)
391
392 if part1 is None: # x=foo:~
393 new_parts.append(word_part.TildeSub(tok0, None, None))
394 break # at end
395
396 id1 = LiteralId(part1)
397
398 if id1 in (Id.Lit_Slash, Id.Lit_Colon): # x=foo:~/ or x=foo:~:
399 new_parts.append(word_part.TildeSub(tok0, None, None))
400 new_parts.append(part1)
401 i += 2
402 continue
403
404 if id1 != Id.Lit_Chars:
405 new_parts.append(part0) # unchanged
406 new_parts.append(part1) # ...
407 i += 2
408 continue # x=foo:~$x is not tilde sub
409
410 tok1 = cast(Token, part1)
411
412 if part2 is None: # x=foo:~foo
413 # consume both
414 new_parts.append(
415 word_part.TildeSub(tok0, tok1, lexer.TokenVal(tok1)))
416 break # at end
417
418 id2 = LiteralId(part2)
419 if id2 not in (Id.Lit_Slash, Id.Lit_Colon): # x=foo:~foo$x
420 new_parts.append(part0) # unchanged
421 new_parts.append(part1) # ...
422 new_parts.append(part2) # ...
423 i += 3
424 continue
425
426 new_parts.append(
427 word_part.TildeSub(tok0, tok1, lexer.TokenVal(tok1)))
428 new_parts.append(part2)
429 i += 3
430
431 tilde_could_be_next = (id2 == Id.Lit_Colon)
432
433 else:
434 new_parts.append(part0)
435 i += 1
436
437 tilde_could_be_next = (LiteralId(part0) == Id.Lit_Colon)
438
439 parts.pop()
440 parts.pop()
441
442 # Mutate argument
443 w.parts = new_parts
444
445
446def TildeDetectAll(words):
447 # type: (List[word_t]) -> List[word_t]
448 out = [] # type: List[word_t]
449 for w in words:
450 t = TildeDetect(w)
451 if t:
452 out.append(t)
453 else:
454 out.append(w)
455 return out
456
457
458def HasArrayPart(w):
459 # type: (CompoundWord) -> bool
460 """Used in cmd_parse."""
461 for part in w.parts:
462 if part.tag() == word_part_e.InitializerLiteral:
463 return True
464 return False
465
466
467def ShFunctionName(w):
468 # type: (CompoundWord) -> str
469 """Returns a valid shell function name, or the empty string.
470
471 TODO: Maybe use this regex to validate:
472
473 FUNCTION_NAME_RE = r'[^{}\[\]=]*'
474
475 Bash is very lenient, but that would disallow confusing characters, for
476 better error messages on a[x]=(), etc.
477 """
478 ok, s, quoted = StaticEval(w)
479 # Function names should not have quotes
480 if not ok or quoted:
481 return ''
482 return s
483
484
485def IsVarLike(w):
486 # type: (CompoundWord) -> bool
487 """Tests whether a word looks like FOO=bar.
488
489 This is a quick test for the command parser to distinguish:
490
491 func() { echo hi; }
492 func=(1 2 3)
493 """
494 if len(w.parts) == 0:
495 return False
496
497 return LiteralId(w.parts[0]) == Id.Lit_VarLike
498
499
500def LooksLikeArithVar(UP_w):
501 # type: (word_t) -> Optional[Token]
502 """Return a token if this word looks like an arith var.
503
504 NOTE: This can't be combined with DetectShAssignment because VarLike and
505 ArithVarLike must be different tokens. Otherwise _ReadCompoundWord will be
506 confused between array assignments foo=(1 2) and function calls foo(1, 2).
507 """
508 if UP_w.tag() != word_e.Compound:
509 return None
510
511 w = cast(CompoundWord, UP_w)
512 if len(w.parts) != 1:
513 return None
514
515 return CheckLiteralId(w.parts[0], Id.Lit_ArithVarLike)
516
517
518def CheckLeadingEquals(w):
519 # type: (CompoundWord) -> Optional[Token]
520 """Test whether a word looks like =word
521
522 For shopt --set strict_parse_equals
523 """
524 if len(w.parts) == 0:
525 return None
526
527 return CheckLiteralId(w.parts[0], Id.Lit_Equals)
528
529
530def DetectShAssignment(w):
531 # type: (CompoundWord) -> Tuple[Optional[Token], Optional[Token], int]
532 """Detects whether a word looks like FOO=bar or FOO[x]=bar.
533
534 Returns:
535 left_token or None # Lit_VarLike, Lit_ArrayLhsOpen, or None if it's not an
536 # assignment
537 close_token, # Lit_ArrayLhsClose if it was detected, or None
538 part_offset # where to start the value word, 0 if not an assignment
539
540 Cases:
541
542 s=1
543 s+=1
544 s[x]=1
545 s[x]+=1
546
547 a=()
548 a+=()
549 a[x]=(
550 a[x]+=() # We parse this (as bash does), but it's never valid because arrays
551 # can't be nested.
552 """
553 no_token = None # type: Optional[Token]
554
555 n = len(w.parts)
556 if n == 0:
557 return no_token, no_token, 0
558
559 part0 = w.parts[0]
560 if part0.tag() != word_part_e.Literal:
561 return no_token, no_token, 0
562
563 tok0 = cast(Token, part0)
564
565 if tok0.id == Id.Lit_VarLike:
566 return tok0, no_token, 1 # everything after first token is the value
567
568 if tok0.id == Id.Lit_ArrayLhsOpen:
569 # NOTE that a[]=x should be an error. We don't want to silently decay.
570 if n < 2:
571 return no_token, no_token, 0
572 for i in xrange(1, n):
573 part = w.parts[i]
574 tok_close = CheckLiteralId(part, Id.Lit_ArrayLhsClose)
575 if tok_close:
576 return tok0, tok_close, i + 1
577
578 # Nothing detected. Could be 'foobar' or a[x+1+2/' without the closing ].
579 return no_token, no_token, 0
580
581
582def DetectAssocPair(w):
583 # type: (CompoundWord) -> Optional[AssocPair]
584 """Like DetectShAssignment, but for A=(['k']=v ['k2']=v)
585
586 The key and the value are both strings. So we just pick out
587 word_part. Unlike a[k]=v, A=([k]=v) is NOT ambiguous, because the
588 [k] syntax is only used for associative array literals, as opposed
589 to indexed array literals.
590 """
591 parts = w.parts
592 if LiteralId(parts[0]) != Id.Lit_LBracket:
593 return None
594
595 n = len(parts)
596 for i in xrange(n):
597 id_ = LiteralId(parts[i])
598 if id_ == Id.Lit_ArrayLhsClose: # ]=
599 # e.g. if we have [$x$y]=$a$b
600 key = CompoundWord(parts[1:i]) # $x$y
601 value = CompoundWord(parts[i + 1:]) # $a$b from
602
603 has_plus = lexer.IsPlusEquals(cast(Token, parts[i]))
604
605 # Type-annotated intermediate value for mycpp translation
606 return AssocPair(key, value, has_plus)
607
608 return None
609
610
611def IsControlFlow(w):
612 # type: (CompoundWord) -> Tuple[Kind_t, Optional[Token]]
613 """Tests if a word is a control flow word."""
614 no_token = None # type: Optional[Token]
615
616 if len(w.parts) != 1:
617 return Kind.Undefined, no_token
618
619 UP_part0 = w.parts[0]
620 token_type = LiteralId(UP_part0)
621 if token_type == Id.Undefined_Tok:
622 return Kind.Undefined, no_token
623
624 token_kind = consts.GetKind(token_type)
625 if token_kind == Kind.ControlFlow:
626 return token_kind, cast(Token, UP_part0)
627
628 return Kind.Undefined, no_token
629
630
631def BraceToken(UP_w):
632 # type: (word_t) -> Optional[Token]
633 """If a word has Id.Lit_LBrace or Lit_RBrace, return a Token.
634
635 This is a special case for osh/cmd_parse.py
636
637 The WordParser changes Id.Op_LBrace from ExprParser into Id.Lit_LBrace, so we
638 may get a token, not a word.
639 """
640 with tagswitch(UP_w) as case:
641 if case(word_e.Operator):
642 tok = cast(Token, UP_w)
643 assert tok.id in (Id.Lit_LBrace, Id.Lit_RBrace), tok
644 return tok
645
646 elif case(word_e.Compound):
647 w = cast(CompoundWord, UP_w)
648 return LiteralToken(w)
649
650 else:
651 raise AssertionError()
652
653
654def AsKeywordToken(UP_w):
655 # type: (word_t) -> Token
656 """
657 Given a word that IS A CompoundWord containing just a keyword, return the
658 single token at the start.
659 """
660 assert UP_w.tag() == word_e.Compound, UP_w
661 w = cast(CompoundWord, UP_w)
662
663 part = w.parts[0]
664 assert part.tag() == word_part_e.Literal, part
665 tok = cast(Token, part)
666 assert consts.GetKind(tok.id) == Kind.KW, tok
667 return tok
668
669
670def AsOperatorToken(word):
671 # type: (word_t) -> Token
672 """For a word that IS an operator (word.Token), return that token.
673
674 This must only be called on a word which is known to be an operator
675 (word.Token).
676 """
677 assert word.tag() == word_e.Operator, word
678 return cast(Token, word)
679
680
681#
682# Polymorphic between Token and Compound
683#
684
685
686def ArithId(w):
687 # type: (word_t) -> Id_t
688 """Used by shell arithmetic parsing."""
689 if w.tag() == word_e.Operator:
690 tok = cast(Token, w)
691 return tok.id
692
693 assert isinstance(w, CompoundWord)
694 return Id.Word_Compound
695
696
697def BoolId(w):
698 # type: (word_t) -> Id_t
699 UP_w = w
700 with tagswitch(w) as case:
701 if case(word_e.String): # for test/[
702 w = cast(word.String, UP_w)
703 return w.id
704
705 elif case(word_e.Operator):
706 tok = cast(Token, UP_w)
707 return tok.id
708
709 elif case(word_e.Compound):
710 w = cast(CompoundWord, UP_w)
711
712 if len(w.parts) != 1:
713 return Id.Word_Compound
714
715 token_type = LiteralId(w.parts[0])
716 if token_type == Id.Undefined_Tok:
717 return Id.Word_Compound # It's a regular word
718
719 # This is outside the BoolUnary/BoolBinary namespace, but works the same.
720 if token_type in (Id.KW_Bang, Id.Lit_DRightBracket):
721 return token_type # special boolean "tokens"
722
723 token_kind = consts.GetKind(token_type)
724 if token_kind in (Kind.BoolUnary, Kind.BoolBinary):
725 return token_type # boolean operators
726
727 return Id.Word_Compound
728
729 else:
730 # I think Empty never happens in this context?
731 raise AssertionError(w.tag())
732
733
734def CommandId(w):
735 # type: (word_t) -> Id_t
736 """Used by CommandParser."""
737 UP_w = w
738 with tagswitch(w) as case:
739 if case(word_e.Operator):
740 tok = cast(Token, UP_w)
741 return tok.id
742
743 elif case(word_e.Compound):
744 w = cast(CompoundWord, UP_w)
745
746 # Fine-grained categorization of SINGLE literal parts
747 if len(w.parts) != 1:
748 return Id.Word_Compound # generic word
749
750 token_type = LiteralId(w.parts[0])
751 if token_type == Id.Undefined_Tok:
752 return Id.Word_Compound # Not Kind.Lit, generic word
753
754 if token_type in (Id.Lit_LBrace, Id.Lit_RBrace, Id.Lit_Equals,
755 Id.Lit_TDot):
756 # - { } are for YSH braces
757 # - = is for the = keyword
758 # - ... is to start multiline mode
759 #
760 # TODO: Should we use Op_{LBrace,RBrace} and Kind.Op when
761 # parse_brace? Lit_Equals could be KW_Equals?
762 return token_type
763
764 token_kind = consts.GetKind(token_type)
765 if token_kind == Kind.KW:
766 return token_type # Id.KW_Var, etc.
767
768 return Id.Word_Compound # generic word
769
770 elif case(word_e.Redir):
771 w = cast(word.Redir, UP_w)
772 return w.op.id
773
774 else:
775 raise AssertionError(w.tag())
776
777
778def CommandKind(w):
779 # type: (word_t) -> Kind_t
780 """The CommandKind is for coarse-grained decisions in the CommandParser.
781
782 NOTE: This is inconsistent with CommandId(), because we never return
783 Kind.KW or Kind.Lit. But the CommandParser is easier to write this way.
784
785 For example, these are valid redirects to a Kind.Word, and the parser
786 checks:
787
788 echo hi > =
789 echo hi > {
790
791 Invalid:
792 echo hi > (
793 echo hi > ;
794 """
795 if w.tag() == word_e.Operator:
796 tok = cast(Token, w)
797 # CommandParser uses Kind.Op, Kind.Eof, etc.
798 return consts.GetKind(tok.id)
799 if w.tag() == word_e.Redir:
800 return Kind.Redir
801
802 return Kind.Word
803
804
805# Stubs for converting RHS of assignment to expression mode.
806# For ysh_ify.py
807def IsVarSub(w):
808 # type: (word_t) -> bool
809 """Return whether it's any var sub, or a double quoted one."""
810 return False
811
812
813# Doesn't translate with mycpp because of dynamic %
814def ErrorWord(error_str):
815 # type: (str) -> CompoundWord
816 t = lexer.DummyToken(Id.Lit_Chars, error_str)
817 return CompoundWord([t])
818
819
820def Pretty(w):
821 # type: (word_t) -> str
822 """Return a string to display to the user."""
823 UP_w = w
824 if w.tag() == word_e.String:
825 w = cast(word.String, UP_w)
826 if w.id == Id.Eof_Real:
827 return 'EOF'
828 else:
829 return repr(w.s)
830 else:
831 return word_str(w.tag()) # tag name
832
833
834class ctx_EmitDocToken(object):
835 """For doc comments."""
836
837 def __init__(self, w_parser):
838 # type: (WordParser) -> None
839 w_parser.EmitDocToken(True)
840 self.w_parser = w_parser
841
842 def __enter__(self):
843 # type: () -> None
844 pass
845
846 def __exit__(self, type, value, traceback):
847 # type: (Any, Any, Any) -> None
848 self.w_parser.EmitDocToken(False)
849
850
851class ctx_Multiline(object):
852 """For multiline commands."""
853
854 def __init__(self, w_parser):
855 # type: (WordParser) -> None
856 w_parser.Multiline(True)
857 self.w_parser = w_parser
858
859 def __enter__(self):
860 # type: () -> None
861 pass
862
863 def __exit__(self, type, value, traceback):
864 # type: (Any, Any, Any) -> None
865 self.w_parser.Multiline(False)