osh/word

OILS / osh / word_.py View on Github | oils.pub

865 lines, 403 significant

1	"""
2	word.py - Utility functions for words, e.g. treating them as "tokens".
3	"""
4
5	from _devbuild.gen.id_kind_asdl import Id, Kind, Id_t, Kind_t
6	from _devbuild.gen.runtime_asdl import Piece
7	from _devbuild.gen.syntax_asdl import (
8	Token,
9	CompoundWord,
10	DoubleQuoted,
11	SingleQuoted,
12	word,
13	word_e,
14	word_t,
15	word_str,
16	word_part,
17	word_part_t,
18	word_part_e,
19	AssocPair,
20	)
21	from frontend import consts
22	from frontend import lexer
23	from mycpp import mylib
24	from mycpp.mylib import tagswitch, log
25
26	from typing import Tuple, Optional, List, Any, cast, TYPE_CHECKING
27	if TYPE_CHECKING:
28	from osh.word_parse import WordParser
29
30	_ = log
31
32
33	def MakePiece(s, quoted):
34	# type: (str, bool) -> Piece
35	"""
36	For $x versus "$x", etc.
37	"""
38	return Piece(s, quoted, not quoted)
39
40
41	def PieceQuoted(s):
42	# type: (str) -> Piece
43	"""
44	For 'hi' "$x"
45	and $[myexpr] in YSH
46	"""
47	# quoted=True, do_split=False
48	return Piece(s, True, False)
49
50
51	def PieceOperator(s):
52	# type: (str) -> Piece
53	"""
54	For Extended glob @(--verbose\|help)
55	And BashRegexGroup [[ foo =~ x(a b)y ]
56
57	We don't want ( to become \(, so quoted=False
58	"""
59	# quoted=False, do_split=False
60	return Piece(s, False, False)
61
62
63	def LiteralId(part):
64	# type: (word_part_t) -> Id_t
65	"""If the WordPart consists of a single literal token, return its Id.
66
67	Used for Id.KW_For, or Id.RBrace, etc.
68	"""
69	if part.tag() != word_part_e.Literal:
70	return Id.Undefined_Tok # unequal to any other Id
71
72	return cast(Token, part).id
73
74
75	def CheckLiteralId(part, tok_id):
76	# type: (word_part_t, Id_t) -> Optional[Token]
77	"""If the WordPart is a Token of a given Id, return the Token."""
78	if part.tag() != word_part_e.Literal:
79	return None
80
81	tok = cast(Token, part)
82	if tok.id == tok_id:
83	return tok
84
85	return None
86
87
88	def LiteralToken(UP_w):
89	# type: (word_t) -> Optional[Token]
90	"""If a word consists of a literal token, return it.
91
92	Otherwise return None.
93	"""
94	# We're casting here because this function is called by the CommandParser for
95	# var, setvar, '...', etc. It's easier to cast in one place.
96	assert UP_w.tag() == word_e.Compound, UP_w
97	w = cast(CompoundWord, UP_w)
98
99	if len(w.parts) != 1:
100	return None
101
102	part0 = w.parts[0]
103	if part0.tag() != word_part_e.Literal:
104	return None
105
106	return cast(Token, part0)
107
108
109	def _EvalWordPart(part):
110	# type: (word_part_t) -> Tuple[bool, str, bool]
111	"""Evaluate a WordPart at PARSE TIME.
112
113	Used for:
114
115	1. here doc delimiters
116	2. function names
117	3. for loop variable names
118	4. Compiling constant regex words at parse time
119	5. a special case for ${a////c} to see if we got a leading slash in the
120	pattern.
121
122	Returns:
123	3-tuple of
124	ok: bool, success. If there are parts that can't be statically
125	evaluated, then we return false.
126	value: a string (not Value)
127	quoted: whether any part of the word was quoted
128	"""
129	UP_part = part
130	with tagswitch(part) as case:
131	if case(word_part_e.Literal):
132	tok = cast(Token, UP_part)
133	# Weird performance issue: if we change this to lexer.LazyStr(),
134	# the parser slows down, e.g. on configure-coreutils from 805 B
135	# irefs to ~830 B. The real issue is that we should avoid calling
136	# this from CommandParser - for the Hay node.
137	return True, lexer.TokenVal(tok), False
138	#return True, lexer.LazyStr(tok), False
139
140	elif case(word_part_e.EscapedLiteral):
141	part = cast(word_part.EscapedLiteral, UP_part)
142	if mylib.PYTHON:
143	val = lexer.TokenVal(part.token)
144	assert len(val) == 2, val # e.g. \*
145	assert val[0] == '\\'
146	s = lexer.TokenSliceLeft(part.token, 1)
147	return True, s, True
148
149	elif case(word_part_e.SingleQuoted):
150	part = cast(SingleQuoted, UP_part)
151	return True, part.sval, True
152
153	elif case(word_part_e.DoubleQuoted):
154	part = cast(DoubleQuoted, UP_part)
155	strs = [] # type: List[str]
156	for p in part.parts:
157	ok, s, _ = _EvalWordPart(p)
158	if not ok:
159	return False, '', True
160	strs.append(s)
161
162	return True, ''.join(strs), True # At least one part was quoted!
163
164	elif case(word_part_e.YshArrayLiteral, word_part_e.InitializerLiteral,
165	word_part_e.ZshVarSub, word_part_e.CommandSub,
166	word_part_e.SimpleVarSub, word_part_e.BracedVarSub,
167	word_part_e.TildeSub, word_part_e.ArithSub,
168	word_part_e.ExtGlob, word_part_e.Splice,
169	word_part_e.ExprSub):
170	return False, '', False
171
172	else:
173	raise AssertionError(part.tag())
174
175
176	def FastStrEval(w):
177	# type: (CompoundWord) -> Optional[str]
178	"""
179	Detects common case
180
181	(1) CompoundWord([LiteralPart(Id.LitChars)])
182	For echo -e, test x -lt 0, etc.
183	(2) single quoted word like 'foo'
184
185	Other patterns we could detect are:
186	(1) "foo"
187	(2) "$var" and "${var}" - I think these are very common in OSH code (but not YSH)
188	- I think val_ops.Stringify() can handle all the errors
189	"""
190	if len(w.parts) != 1:
191	return None
192
193	part0 = w.parts[0]
194	UP_part0 = part0
195	with tagswitch(part0) as case:
196	if case(word_part_e.Literal):
197	part0 = cast(Token, UP_part0)
198
199	if part0.id in (Id.Lit_Chars, Id.Lit_LBracket, Id.Lit_RBracket):
200	# Could add more tokens in this case
201	# e.g. + is Lit_Other, and it's a Token in 'expr'
202	# Right now it's Lit_Chars (e.g. ls -l) and [ and ] because I
203	# know those are common
204	# { } are not as common
205	return lexer.LazyStr(part0)
206
207	else:
208	# e.g. Id.Lit_Star needs to be glob expanded
209	# TODO: Consider moving Id.Lit_Star etc. to Kind.MaybeGlob?
210	return None
211
212	elif case(word_part_e.SingleQuoted):
213	part0 = cast(SingleQuoted, UP_part0)
214	# TODO: SingleQuoted should have lazy (str? sval) field
215	# This would only affect multi-line strings though?
216	return part0.sval
217
218	else:
219	# e.g. DoubleQuoted can't be optimized to a string, because it
220	# might have "$@" and such
221	return None
222
223
224	def StaticEval(UP_w):
225	# type: (word_t) -> Tuple[bool, str, bool]
226	"""Evaluate a Compound at PARSE TIME."""
227	quoted = False
228
229	# e.g. for ( instead of for (( is a token word
230	if UP_w.tag() != word_e.Compound:
231	return False, '', quoted
232
233	w = cast(CompoundWord, UP_w)
234
235	strs = [] # type: List[str]
236	for part in w.parts:
237	ok, s, q = _EvalWordPart(part)
238	if not ok:
239	return False, '', quoted
240	if q:
241	quoted = True # at least one part was quoted
242	strs.append(s)
243	#log('StaticEval parts %s', w.parts)
244	return True, ''.join(strs), quoted
245
246
247	# From bash, general.c, unquoted_tilde_word():
248	# POSIX.2, 3.6.1: A tilde-prefix consists of an unquoted tilde character at
249	# the beginning of the word, followed by all of the characters preceding the
250	# first unquoted slash in the word, or all the characters in the word if there
251	# is no slash...If none of the characters in the tilde-prefix are quoted, the
252	# characters in the tilde-prefix following the tilde shell be treated as a
253	# possible login name.
254	#define TILDE_END(c) ((c) == '\0' \|\| (c) == '/' \|\| (c) == ':')
255	#
256	# So an unquoted tilde can ALWAYS start a new lex mode? You respect quotes and
257	# substitutions.
258	#
259	# We only detect ~Lit_Chars and split. So we might as well just write a regex.
260
261
262	def TildeDetect(UP_w):
263	# type: (word_t) -> Optional[CompoundWord]
264	"""Detect tilde expansion in a word.
265
266	It might begin with Literal that needs to be turned into a TildeSub.
267	(It depends on whether the second token begins with slash).
268
269	If so, it return a new word. Otherwise return None.
270
271	NOTE:
272	- The regex for Lit_TildeLike could be expanded. Right now it's
273	conservative, like Lit_Chars without the /.
274	- It's possible to write this in a mutating style, since only the first token
275	is changed. But note that we CANNOT know this during lexing.
276	"""
277	# BracedTree can't be tilde expanded
278	if UP_w.tag() != word_e.Compound:
279	return None
280
281	w = cast(CompoundWord, UP_w)
282	return TildeDetect2(w)
283
284
285	def TildeDetect2(w):
286	# type: (CompoundWord) -> Optional[CompoundWord]
287	"""If tilde sub is detected, returns a new CompoundWord.
288
289	Accepts CompoundWord, not word_t. After brace expansion, we know we have a
290	List[CompoundWord].
291
292	Tilde detection:
293
294	YES:
295	~ ~/
296	~bob ~bob/
297
298	NO:
299	~bob# ~bob#/
300	~bob$x
301	~$x
302
303	Pattern to match (all must be word_part_e.Literal):
304
305	Lit_Tilde Lit_Chars? (Lit_Slash \| %end)
306	"""
307	if len(w.parts) == 0: # ${a-} has no parts
308	return None
309
310	tok0 = CheckLiteralId(w.parts[0], Id.Lit_Tilde)
311	if tok0 is None:
312	return None
313
314	new_parts = [] # type: List[word_part_t]
315
316	if len(w.parts) == 1: # ~
317	new_parts.append(word_part.TildeSub(tok0, None, None))
318	return CompoundWord(new_parts)
319
320	id1 = LiteralId(w.parts[1])
321	if id1 == Id.Lit_Slash: # ~/
322	new_parts.append(word_part.TildeSub(tok0, None, None))
323	new_parts.extend(w.parts[1:])
324	return CompoundWord(new_parts)
325
326	if id1 != Id.Lit_Chars:
327	return None # ~$x is not TildeSub
328
329	tok1 = cast(Token, w.parts[1])
330
331	if len(w.parts) == 2: # ~foo
332	new_parts.append(word_part.TildeSub(tok0, tok1, lexer.TokenVal(tok1)))
333	return CompoundWord(new_parts)
334
335	id2 = LiteralId(w.parts[2])
336	if id2 != Id.Lit_Slash: # ~foo$x is not TildeSub
337	return None
338
339	new_parts.append(word_part.TildeSub(tok0, tok1, lexer.TokenVal(tok1)))
340	new_parts.extend(w.parts[2:])
341	return CompoundWord(new_parts)
342
343
344	def TildeDetectAssign(w):
345	# type: (CompoundWord) -> None
346	"""Detects multiple tilde sub, like a=~:~/src:~bob
347
348	MUTATES its argument.
349
350	Pattern for to match (all must be word_part_e.Literal):
351
352	Lit_Tilde Lit_Chars? (Lit_Slash \| Lit_Colon \| %end)
353	"""
354	parts = w.parts
355
356	# Bail out EARLY if there are no ~ at all
357	has_tilde = False
358	for part in parts:
359	if LiteralId(part) == Id.Lit_Tilde:
360	has_tilde = True
361	break
362	if not has_tilde:
363	return # Avoid further work and allocations
364
365	# Avoid IndexError, since we have to look ahead up to 2 tokens
366	parts.append(None)
367	parts.append(None)
368
369	new_parts = [] # type: List[word_part_t]
370
371	tilde_could_be_next = True # true at first, and true after :
372
373	i = 0
374	n = len(parts)
375
376	while i < n:
377	part0 = parts[i]
378	if part0 is None:
379	break
380
381	#log('i = %d', i)
382	#log('part0 %s', part0)
383
384	# Skip tilde in middle of word, like a=foo~bar
385	if tilde_could_be_next and LiteralId(part0) == Id.Lit_Tilde:
386	# If ~ ends the string, we have
387	part1 = parts[i + 1]
388	part2 = parts[i + 2]
389
390	tok0 = cast(Token, part0)
391
392	if part1 is None: # x=foo:~
393	new_parts.append(word_part.TildeSub(tok0, None, None))
394	break # at end
395
396	id1 = LiteralId(part1)
397
398	if id1 in (Id.Lit_Slash, Id.Lit_Colon): # x=foo:~/ or x=foo:~:
399	new_parts.append(word_part.TildeSub(tok0, None, None))
400	new_parts.append(part1)
401	i += 2
402	continue
403
404	if id1 != Id.Lit_Chars:
405	new_parts.append(part0) # unchanged
406	new_parts.append(part1) # ...
407	i += 2
408	continue # x=foo:~$x is not tilde sub
409
410	tok1 = cast(Token, part1)
411
412	if part2 is None: # x=foo:~foo
413	# consume both
414	new_parts.append(
415	word_part.TildeSub(tok0, tok1, lexer.TokenVal(tok1)))
416	break # at end
417
418	id2 = LiteralId(part2)
419	if id2 not in (Id.Lit_Slash, Id.Lit_Colon): # x=foo:~foo$x
420	new_parts.append(part0) # unchanged
421	new_parts.append(part1) # ...
422	new_parts.append(part2) # ...
423	i += 3
424	continue
425
426	new_parts.append(
427	word_part.TildeSub(tok0, tok1, lexer.TokenVal(tok1)))
428	new_parts.append(part2)
429	i += 3
430
431	tilde_could_be_next = (id2 == Id.Lit_Colon)
432
433	else:
434	new_parts.append(part0)
435	i += 1
436
437	tilde_could_be_next = (LiteralId(part0) == Id.Lit_Colon)
438
439	parts.pop()
440	parts.pop()
441
442	# Mutate argument
443	w.parts = new_parts
444
445
446	def TildeDetectAll(words):
447	# type: (List[word_t]) -> List[word_t]
448	out = [] # type: List[word_t]
449	for w in words:
450	t = TildeDetect(w)
451	if t:
452	out.append(t)
453	else:
454	out.append(w)
455	return out
456
457
458	def HasArrayPart(w):
459	# type: (CompoundWord) -> bool
460	"""Used in cmd_parse."""
461	for part in w.parts:
462	if part.tag() == word_part_e.InitializerLiteral:
463	return True
464	return False
465
466
467	def ShFunctionName(w):
468	# type: (CompoundWord) -> str
469	"""Returns a valid shell function name, or the empty string.
470
471	TODO: Maybe use this regex to validate:
472
473	FUNCTION_NAME_RE = r'[^{}\[\]=]*'
474
475	Bash is very lenient, but that would disallow confusing characters, for
476	better error messages on a[x]=(), etc.
477	"""
478	ok, s, quoted = StaticEval(w)
479	# Function names should not have quotes
480	if not ok or quoted:
481	return ''
482	return s
483
484
485	def IsVarLike(w):
486	# type: (CompoundWord) -> bool
487	"""Tests whether a word looks like FOO=bar.
488
489	This is a quick test for the command parser to distinguish:
490
491	func() { echo hi; }
492	func=(1 2 3)
493	"""
494	if len(w.parts) == 0:
495	return False
496
497	return LiteralId(w.parts[0]) == Id.Lit_VarLike
498
499
500	def LooksLikeArithVar(UP_w):
501	# type: (word_t) -> Optional[Token]
502	"""Return a token if this word looks like an arith var.
503
504	NOTE: This can't be combined with DetectShAssignment because VarLike and
505	ArithVarLike must be different tokens. Otherwise _ReadCompoundWord will be
506	confused between array assignments foo=(1 2) and function calls foo(1, 2).
507	"""
508	if UP_w.tag() != word_e.Compound:
509	return None
510
511	w = cast(CompoundWord, UP_w)
512	if len(w.parts) != 1:
513	return None
514
515	return CheckLiteralId(w.parts[0], Id.Lit_ArithVarLike)
516
517
518	def CheckLeadingEquals(w):
519	# type: (CompoundWord) -> Optional[Token]
520	"""Test whether a word looks like =word
521
522	For shopt --set strict_parse_equals
523	"""
524	if len(w.parts) == 0:
525	return None
526
527	return CheckLiteralId(w.parts[0], Id.Lit_Equals)
528
529
530	def DetectShAssignment(w):
531	# type: (CompoundWord) -> Tuple[Optional[Token], Optional[Token], int]
532	"""Detects whether a word looks like FOO=bar or FOO[x]=bar.
533
534	Returns:
535	left_token or None # Lit_VarLike, Lit_ArrayLhsOpen, or None if it's not an
536	# assignment
537	close_token, # Lit_ArrayLhsClose if it was detected, or None
538	part_offset # where to start the value word, 0 if not an assignment
539
540	Cases:
541
542	s=1
543	s+=1
544	s[x]=1
545	s[x]+=1
546
547	a=()
548	a+=()
549	a[x]=(
550	a[x]+=() # We parse this (as bash does), but it's never valid because arrays
551	# can't be nested.
552	"""
553	no_token = None # type: Optional[Token]
554
555	n = len(w.parts)
556	if n == 0:
557	return no_token, no_token, 0
558
559	part0 = w.parts[0]
560	if part0.tag() != word_part_e.Literal:
561	return no_token, no_token, 0
562
563	tok0 = cast(Token, part0)
564
565	if tok0.id == Id.Lit_VarLike:
566	return tok0, no_token, 1 # everything after first token is the value
567
568	if tok0.id == Id.Lit_ArrayLhsOpen:
569	# NOTE that a[]=x should be an error. We don't want to silently decay.
570	if n < 2:
571	return no_token, no_token, 0
572	for i in xrange(1, n):
573	part = w.parts[i]
574	tok_close = CheckLiteralId(part, Id.Lit_ArrayLhsClose)
575	if tok_close:
576	return tok0, tok_close, i + 1
577
578	# Nothing detected. Could be 'foobar' or a[x+1+2/' without the closing ].
579	return no_token, no_token, 0
580
581
582	def DetectAssocPair(w):
583	# type: (CompoundWord) -> Optional[AssocPair]
584	"""Like DetectShAssignment, but for A=(['k']=v ['k2']=v)
585
586	The key and the value are both strings. So we just pick out
587	word_part. Unlike a[k]=v, A=([k]=v) is NOT ambiguous, because the
588	[k] syntax is only used for associative array literals, as opposed
589	to indexed array literals.
590	"""
591	parts = w.parts
592	if LiteralId(parts[0]) != Id.Lit_LBracket:
593	return None
594
595	n = len(parts)
596	for i in xrange(n):
597	id_ = LiteralId(parts[i])
598	if id_ == Id.Lit_ArrayLhsClose: # ]=
599	# e.g. if we have [$x$y]=$a$b
600	key = CompoundWord(parts[1:i]) # $x$y
601	value = CompoundWord(parts[i + 1:]) # $a$b from
602
603	has_plus = lexer.IsPlusEquals(cast(Token, parts[i]))
604
605	# Type-annotated intermediate value for mycpp translation
606	return AssocPair(key, value, has_plus)
607
608	return None
609
610
611	def IsControlFlow(w):
612	# type: (CompoundWord) -> Tuple[Kind_t, Optional[Token]]
613	"""Tests if a word is a control flow word."""
614	no_token = None # type: Optional[Token]
615
616	if len(w.parts) != 1:
617	return Kind.Undefined, no_token
618
619	UP_part0 = w.parts[0]
620	token_type = LiteralId(UP_part0)
621	if token_type == Id.Undefined_Tok:
622	return Kind.Undefined, no_token
623
624	token_kind = consts.GetKind(token_type)
625	if token_kind == Kind.ControlFlow:
626	return token_kind, cast(Token, UP_part0)
627
628	return Kind.Undefined, no_token
629
630
631	def BraceToken(UP_w):
632	# type: (word_t) -> Optional[Token]
633	"""If a word has Id.Lit_LBrace or Lit_RBrace, return a Token.
634
635	This is a special case for osh/cmd_parse.py
636
637	The WordParser changes Id.Op_LBrace from ExprParser into Id.Lit_LBrace, so we
638	may get a token, not a word.
639	"""
640	with tagswitch(UP_w) as case:
641	if case(word_e.Operator):
642	tok = cast(Token, UP_w)
643	assert tok.id in (Id.Lit_LBrace, Id.Lit_RBrace), tok
644	return tok
645
646	elif case(word_e.Compound):
647	w = cast(CompoundWord, UP_w)
648	return LiteralToken(w)
649
650	else:
651	raise AssertionError()
652
653
654	def AsKeywordToken(UP_w):
655	# type: (word_t) -> Token
656	"""
657	Given a word that IS A CompoundWord containing just a keyword, return the
658	single token at the start.
659	"""
660	assert UP_w.tag() == word_e.Compound, UP_w
661	w = cast(CompoundWord, UP_w)
662
663	part = w.parts[0]
664	assert part.tag() == word_part_e.Literal, part
665	tok = cast(Token, part)
666	assert consts.GetKind(tok.id) == Kind.KW, tok
667	return tok
668
669
670	def AsOperatorToken(word):
671	# type: (word_t) -> Token
672	"""For a word that IS an operator (word.Token), return that token.
673
674	This must only be called on a word which is known to be an operator
675	(word.Token).
676	"""
677	assert word.tag() == word_e.Operator, word
678	return cast(Token, word)
679
680
681	#
682	# Polymorphic between Token and Compound
683	#
684
685
686	def ArithId(w):
687	# type: (word_t) -> Id_t
688	"""Used by shell arithmetic parsing."""
689	if w.tag() == word_e.Operator:
690	tok = cast(Token, w)
691	return tok.id
692
693	assert isinstance(w, CompoundWord)
694	return Id.Word_Compound
695
696
697	def BoolId(w):
698	# type: (word_t) -> Id_t
699	UP_w = w
700	with tagswitch(w) as case:
701	if case(word_e.String): # for test/[
702	w = cast(word.String, UP_w)
703	return w.id
704
705	elif case(word_e.Operator):
706	tok = cast(Token, UP_w)
707	return tok.id
708
709	elif case(word_e.Compound):
710	w = cast(CompoundWord, UP_w)
711
712	if len(w.parts) != 1:
713	return Id.Word_Compound
714
715	token_type = LiteralId(w.parts[0])
716	if token_type == Id.Undefined_Tok:
717	return Id.Word_Compound # It's a regular word
718
719	# This is outside the BoolUnary/BoolBinary namespace, but works the same.
720	if token_type in (Id.KW_Bang, Id.Lit_DRightBracket):
721	return token_type # special boolean "tokens"
722
723	token_kind = consts.GetKind(token_type)
724	if token_kind in (Kind.BoolUnary, Kind.BoolBinary):
725	return token_type # boolean operators
726
727	return Id.Word_Compound
728
729	else:
730	# I think Empty never happens in this context?
731	raise AssertionError(w.tag())
732
733
734	def CommandId(w):
735	# type: (word_t) -> Id_t
736	"""Used by CommandParser."""
737	UP_w = w
738	with tagswitch(w) as case:
739	if case(word_e.Operator):
740	tok = cast(Token, UP_w)
741	return tok.id
742
743	elif case(word_e.Compound):
744	w = cast(CompoundWord, UP_w)
745
746	# Fine-grained categorization of SINGLE literal parts
747	if len(w.parts) != 1:
748	return Id.Word_Compound # generic word
749
750	token_type = LiteralId(w.parts[0])
751	if token_type == Id.Undefined_Tok:
752	return Id.Word_Compound # Not Kind.Lit, generic word
753
754	if token_type in (Id.Lit_LBrace, Id.Lit_RBrace, Id.Lit_Equals,
755	Id.Lit_TDot):
756	# - { } are for YSH braces
757	# - = is for the = keyword
758	# - ... is to start multiline mode
759	#
760	# TODO: Should we use Op_{LBrace,RBrace} and Kind.Op when
761	# parse_brace? Lit_Equals could be KW_Equals?
762	return token_type
763
764	token_kind = consts.GetKind(token_type)
765	if token_kind == Kind.KW:
766	return token_type # Id.KW_Var, etc.
767
768	return Id.Word_Compound # generic word
769
770	elif case(word_e.Redir):
771	w = cast(word.Redir, UP_w)
772	return w.op.id
773
774	else:
775	raise AssertionError(w.tag())
776
777
778	def CommandKind(w):
779	# type: (word_t) -> Kind_t
780	"""The CommandKind is for coarse-grained decisions in the CommandParser.
781
782	NOTE: This is inconsistent with CommandId(), because we never return
783	Kind.KW or Kind.Lit. But the CommandParser is easier to write this way.
784
785	For example, these are valid redirects to a Kind.Word, and the parser
786	checks:
787
788	echo hi > =
789	echo hi > {
790
791	Invalid:
792	echo hi > (
793	echo hi > ;
794	"""
795	if w.tag() == word_e.Operator:
796	tok = cast(Token, w)
797	# CommandParser uses Kind.Op, Kind.Eof, etc.
798	return consts.GetKind(tok.id)
799	if w.tag() == word_e.Redir:
800	return Kind.Redir
801
802	return Kind.Word
803
804
805	# Stubs for converting RHS of assignment to expression mode.
806	# For ysh_ify.py
807	def IsVarSub(w):
808	# type: (word_t) -> bool
809	"""Return whether it's any var sub, or a double quoted one."""
810	return False
811
812
813	# Doesn't translate with mycpp because of dynamic %
814	def ErrorWord(error_str):
815	# type: (str) -> CompoundWord
816	t = lexer.DummyToken(Id.Lit_Chars, error_str)
817	return CompoundWord([t])
818
819
820	def Pretty(w):
821	# type: (word_t) -> str
822	"""Return a string to display to the user."""
823	UP_w = w
824	if w.tag() == word_e.String:
825	w = cast(word.String, UP_w)
826	if w.id == Id.Eof_Real:
827	return 'EOF'
828	else:
829	return repr(w.s)
830	else:
831	return word_str(w.tag()) # tag name
832
833
834	class ctx_EmitDocToken(object):
835	"""For doc comments."""
836
837	def __init__(self, w_parser):
838	# type: (WordParser) -> None
839	w_parser.EmitDocToken(True)
840	self.w_parser = w_parser
841
842	def __enter__(self):
843	# type: () -> None
844	pass
845
846	def __exit__(self, type, value, traceback):
847	# type: (Any, Any, Any) -> None
848	self.w_parser.EmitDocToken(False)
849
850
851	class ctx_Multiline(object):
852	"""For multiline commands."""
853
854	def __init__(self, w_parser):
855	# type: (WordParser) -> None
856	w_parser.Multiline(True)
857	self.w_parser = w_parser
858
859	def __enter__(self):
860	# type: () -> None
861	pass
862
863	def __exit__(self, type, value, traceback):
864	# type: (Any, Any, Any) -> None
865	self.w_parser.Multiline(False)