osh/word_eval.py

OILS / osh / word_eval.py View on Github | oilshell.org

2540 lines, 1549 significant

1	"""
2	word_eval.py - Evaluator for the word language.
3	"""
4
5	from _devbuild.gen.id_kind_asdl import Id, Kind, Kind_str
6	from _devbuild.gen.syntax_asdl import (
7	Token,
8	SimpleVarSub,
9	loc,
10	loc_t,
11	BracedVarSub,
12	CommandSub,
13	bracket_op,
14	bracket_op_e,
15	suffix_op,
16	suffix_op_e,
17	ShArrayLiteral,
18	SingleQuoted,
19	DoubleQuoted,
20	word_e,
21	word_t,
22	CompoundWord,
23	rhs_word,
24	rhs_word_e,
25	rhs_word_t,
26	word_part,
27	word_part_e,
28	)
29	from _devbuild.gen.runtime_asdl import (
30	part_value,
31	part_value_e,
32	part_value_t,
33	cmd_value,
34	cmd_value_e,
35	cmd_value_t,
36	error_code_e,
37	AssignArg,
38	a_index,
39	a_index_e,
40	VTestPlace,
41	VarSubState,
42	Piece,
43	)
44	from _devbuild.gen.option_asdl import option_i, builtin_i
45	from _devbuild.gen.value_asdl import (
46	value,
47	value_e,
48	value_t,
49	sh_lvalue,
50	sh_lvalue_t,
51	)
52	from core import bash_impl
53	from core import error
54	from core import pyos
55	from core import pyutil
56	from core import state
57	from display import ui
58	from core import util
59	from data_lang import j8
60	from data_lang import j8_lite
61	from core.error import e_die
62	from frontend import consts
63	from frontend import lexer
64	from frontend import location
65	from mycpp import mops
66	from mycpp.mylib import log, tagswitch, NewDict
67	from osh import braces
68	from osh import glob_
69	from osh import string_ops
70	from osh import word_
71	from ysh import expr_eval
72	from ysh import val_ops
73
74	from typing import Optional, Tuple, List, Dict, cast, TYPE_CHECKING
75
76	if TYPE_CHECKING:
77	from _devbuild.gen.syntax_asdl import word_part_t
78	from _devbuild.gen.option_asdl import builtin_t
79	from core import optview
80	from core.state import Mem
81	from core.vm import _Executor
82	from osh.split import SplitContext
83	from osh import prompt
84	from osh import sh_expr_eval
85
86	# Flags for _EvalWordToParts and _EvalWordPart (not all are used for both)
87	QUOTED = 1 << 0
88	IS_SUBST = 1 << 1
89
90	EXTGLOB_FILES = 1 << 2 # allow @(cc) from file system?
91	EXTGLOB_MATCH = 1 << 3 # allow @(cc) in pattern matching?
92	EXTGLOB_NESTED = 1 << 4 # for @(one\|!(two\|three))
93
94	# For EvalWordToString
95	QUOTE_FNMATCH = 1 << 5
96	QUOTE_ERE = 1 << 6
97
98	# For compatibility, ${BASH_SOURCE} and ${BASH_SOURCE[@]} are both valid.
99	# Ditto for ${FUNCNAME} and ${BASH_LINENO}.
100	_STRING_AND_ARRAY = ['BASH_SOURCE', 'FUNCNAME', 'BASH_LINENO']
101
102
103	def ShouldArrayDecay(var_name, exec_opts, is_plain_var_sub=True):
104	# type: (str, optview.Exec, bool) -> bool
105	"""Return whether we should allow ${a} to mean ${a[0]}."""
106	return (not exec_opts.strict_array() or
107	is_plain_var_sub and var_name in _STRING_AND_ARRAY)
108
109
110	def DecayArray(val):
111	# type: (value_t) -> value_t
112	"""Resolve ${array} to ${array[0]}."""
113	if val.tag() == value_e.BashArray:
114	array_val = cast(value.BashArray, val)
115	s, error_code = bash_impl.BashArray_GetElement(array_val, 0)
116
117	# Note: index 0 should never cause the out-of-bound index error.
118	assert error_code == error_code_e.OK
119
120	elif val.tag() == value_e.BashAssoc:
121	assoc_val = cast(value.BashAssoc, val)
122	s = bash_impl.BashAssoc_GetElement(assoc_val, '0')
123	else:
124	raise AssertionError(val.tag())
125
126	if s is None:
127	return value.Undef
128	else:
129	return value.Str(s)
130
131
132	def _DetectMetaBuiltinStr(s):
133	# type: (str) -> bool
134	"""
135	We need to detect all of these cases:
136
137	builtin local
138	command local
139	builtin builtin local
140	builtin command local
141
142	Fundamentally, assignment builtins have different WORD EVALUATION RULES
143	for a=$x (no word splitting), so it seems hard to do this in
144	meta_oils.Builtin() or meta_oils.Command()
145	"""
146	return (consts.LookupNormalBuiltin(s)
147	in (builtin_i.builtin, builtin_i.command))
148
149
150	def _DetectMetaBuiltin(val0):
151	# type: (part_value_t) -> bool
152	UP_val0 = val0
153	if val0.tag() == part_value_e.String:
154	val0 = cast(Piece, UP_val0)
155	if not val0.quoted:
156	return _DetectMetaBuiltinStr(val0.s)
157	return False
158
159
160	def _SplitAssignArg(arg, blame_word):
161	# type: (str, CompoundWord) -> AssignArg
162	"""Dynamically parse argument to declare, export, etc.
163
164	This is a fallback to the static parsing done below.
165	"""
166	# Note: it would be better to cache regcomp(), but we don't have an API for
167	# that, and it probably isn't a bottleneck now
168	m = util.RegexSearch(consts.ASSIGN_ARG_RE, arg)
169	if m is None:
170	e_die("Assignment builtin expected NAME=value, got %r" % arg,
171	blame_word)
172
173	var_name = m[1]
174	# m[2] is used for grouping; ERE doesn't have non-capturing groups
175
176	op = m[3]
177	assert op is not None, op
178	if len(op): # declare NAME=
179	val = value.Str(m[4]) # type: Optional[value_t]
180	append = op[0] == '+'
181	else: # declare NAME
182	val = None # no operator
183	append = False
184
185	return AssignArg(var_name, val, append, blame_word)
186
187
188	# NOTE: Could be done with util.BackslashEscape like glob_.GlobEscape().
189	def _BackslashEscape(s):
190	# type: (str) -> str
191	"""Double up backslashes.
192
193	Useful for strings about to be globbed and strings about to be IFS
194	escaped.
195	"""
196	return s.replace('\\', '\\\\')
197
198
199	def _ValueToPartValue(val, quoted, part_loc):
200	# type: (value_t, bool, word_part_t) -> part_value_t
201	"""Helper for VarSub evaluation.
202
203	Called by _EvalBracedVarSub and _EvalWordPart for SimpleVarSub.
204	"""
205	UP_val = val
206
207	with tagswitch(val) as case:
208	if case(value_e.Undef):
209	# This happens in the case of ${undef+foo}. We skipped _ProcessUndef,
210	# but we have to append to the empty string.
211	return Piece('', quoted, not quoted)
212
213	elif case(value_e.Str):
214	val = cast(value.Str, UP_val)
215	return Piece(val.s, quoted, not quoted)
216
217	elif case(value_e.BashArray):
218	val = cast(value.BashArray, UP_val)
219	return part_value.Array(bash_impl.BashArray_GetValues(val))
220
221	elif case(value_e.BashAssoc):
222	val = cast(value.BashAssoc, UP_val)
223	# bash behavior: splice values!
224	return part_value.Array(bash_impl.BashAssoc_GetValues(val))
225
226	# Cases added for YSH
227	# value_e.List is also here - we use val_ops.Stringify()s err message
228	elif case(value_e.Null, value_e.Bool, value_e.Int, value_e.Float,
229	value_e.Eggex, value_e.List):
230	s = val_ops.Stringify(val, loc.WordPart(part_loc), 'Word eval ')
231	return Piece(s, quoted, not quoted)
232
233	else:
234	raise error.TypeErr(val, "Can't substitute into word",
235	loc.WordPart(part_loc))
236
237	raise AssertionError('for -Wreturn-type in C++')
238
239
240	def _MakeWordFrames(part_vals):
241	# type: (List[part_value_t]) -> List[List[Piece]]
242	"""A word evaluates to a flat list of part_value (String or Array). frame
243	is a portion that results in zero or more args. It can never be joined.
244	This idea exists because of arrays like "$@" and "${a[@]}".
245
246	Example:
247
248	a=(1 '2 3' 4)
249	x=x
250	y=y
251
252	# This word
253	$x"${a[@]}"$y
254
255	# Results in Three frames:
256	[ ('x', False, True), ('1', True, False) ]
257	[ ('2 3', True, False) ]
258	[ ('4', True, False), ('y', False, True) ]
259
260	Note: A frame is a 3-tuple that's identical to Piece()? Maybe we
261	should make that top level type.
262
263	TODO:
264	- Instead of List[List[Piece]], where List[Piece] is a Frame
265	- Change this representation to
266	Frames = (List[Piece] pieces, List[int] break_indices)
267	# where break_indices are the end
268
269	Consider a common case like "$x" or "${x}" - I think this a lot more
270	efficient?
271
272	And then change _EvalWordFrame(pieces: List[Piece], start: int, end: int)
273	"""
274	current = [] # type: List[Piece]
275	frames = [current]
276
277	for p in part_vals:
278	UP_p = p
279
280	with tagswitch(p) as case:
281	if case(part_value_e.String):
282	p = cast(Piece, UP_p)
283	current.append(p)
284
285	elif case(part_value_e.Array):
286	p = cast(part_value.Array, UP_p)
287
288	is_first = True
289	for s in p.strs:
290	if s is None:
291	continue # ignore undefined array entries
292
293	# Arrays parts are always quoted; otherwise they would have decayed to
294	# a string.
295	piece = Piece(s, True, False)
296	if is_first:
297	current.append(piece)
298	is_first = False
299	else:
300	current = [piece]
301	frames.append(current) # singleton frame
302
303	else:
304	raise AssertionError()
305
306	return frames
307
308
309	# TODO: This could be _MakeWordFrames and then sep.join(). It's redundant.
310	def _DecayPartValuesToString(part_vals, join_char):
311	# type: (List[part_value_t], str) -> str
312	# Decay ${a=x"$@"x} to string.
313	out = [] # type: List[str]
314	for p in part_vals:
315	UP_p = p
316	with tagswitch(p) as case:
317	if case(part_value_e.String):
318	p = cast(Piece, UP_p)
319	out.append(p.s)
320	elif case(part_value_e.Array):
321	p = cast(part_value.Array, UP_p)
322	# TODO: Eliminate double join for speed?
323	tmp = [s for s in p.strs if s is not None]
324	out.append(join_char.join(tmp))
325	else:
326	raise AssertionError()
327	return ''.join(out)
328
329
330	def _PerformSlice(
331	val, # type: value_t
332	offset, # type: mops.BigInt
333	length, # type: int
334	has_length, # type: bool
335	part, # type: BracedVarSub
336	arg0_val, # type: value.Str
337	):
338	# type: (...) -> value_t
339	UP_val = val
340	with tagswitch(val) as case:
341	if case(value_e.Str): # Slice UTF-8 characters in a string.
342	val = cast(value.Str, UP_val)
343	s = val.s
344	n = len(s)
345
346	begin = mops.BigTruncate(offset)
347	if begin < 0: # Compute offset with unicode
348	byte_begin = n
349	num_iters = -begin
350	for _ in xrange(num_iters):
351	byte_begin = string_ops.PreviousUtf8Char(s, byte_begin)
352	else:
353	byte_begin = string_ops.AdvanceUtf8Chars(s, begin, 0)
354
355	if has_length:
356	if length < 0: # Compute offset with unicode
357	# Confusing: this is a POSITION
358	byte_end = n
359	num_iters = -length
360	for _ in xrange(num_iters):
361	byte_end = string_ops.PreviousUtf8Char(s, byte_end)
362	else:
363	byte_end = string_ops.AdvanceUtf8Chars(
364	s, length, byte_begin)
365	else:
366	byte_end = len(s)
367
368	substr = s[byte_begin:byte_end]
369	result = value.Str(substr) # type: value_t
370
371	elif case(value_e.BashArray,
372	value_e.SparseArray): # Slice array entries.
373	# NOTE: This error is ALWAYS fatal in bash. It's inconsistent with
374	# strings.
375	if has_length and length < 0:
376	e_die("Array slice can't have negative length: %d" % length,
377	loc.WordPart(part))
378
379	if bash_impl.BigInt_Less(offset, mops.ZERO):
380	# ${@:-3} starts counts from the end
381	if val.tag() == value_e.BashArray:
382	val = cast(value.BashArray, UP_val)
383	array_length = mops.IntWiden(
384	bash_impl.BashArray_Length(val))
385	elif val.tag() == value_e.SparseArray:
386	val = cast(value.SparseArray, UP_val)
387	array_length = bash_impl.SparseArray_Length(val)
388	else:
389	raise AssertionError()
390
391	# The array length counts $0 for $@ and $*
392	if arg0_val is not None:
393	array_length = mops.Add(array_length, mops.ONE)
394
395	offset = mops.Add(offset, array_length)
396
397	if bash_impl.BigInt_Less(offset, mops.ZERO):
398	strs = [] # type: List[str]
399	else:
400	# Quirk: "offset" for positional arguments ($@ and $*) counts $0.
401	prepends_arg0 = False
402	if arg0_val is not None:
403	if bash_impl.BigInt_Greater(offset, mops.ZERO):
404	offset = mops.Sub(offset, mops.ONE)
405	elif not has_length or length >= 1:
406	prepends_arg0 = True
407	length = length - 1
408
409	if has_length and length == 0:
410	strs = []
411
412	elif val.tag() == value_e.BashArray:
413	val = cast(value.BashArray, UP_val)
414	orig = bash_impl.BashArray_GetValues(val)
415	n = len(orig)
416
417	strs = []
418	i = mops.BigTruncate(offset)
419	count = 0
420	while i < n:
421	if has_length and count == length: # length could be 0
422	break
423	s = orig[i]
424	if s is not None: # Unset elements don't count towards the length
425	strs.append(s)
426	count += 1
427	i += 1
428
429	elif val.tag() == value_e.SparseArray:
430	val = cast(value.SparseArray, UP_val)
431
432	# TODO: We may optimize this by finding the first index
433	# using the binary search. Furthermore, the sorting by
434	# SparseArray_GetKeys can be replaced with the heap sort so
435	# that we only extract the first LENGTH elements of the
436	# indices greater or equal to OFFSET.
437	i = 0
438	for index in bash_impl.SparseArray_GetKeys(val):
439	if bash_impl.BigInt_GreaterEq(index, offset):
440	break
441	i = i + 1
442
443	if has_length:
444	strs = bash_impl.SparseArray_GetValues(val)[i:i +
445	length]
446	else:
447	strs = bash_impl.SparseArray_GetValues(val)[i:]
448
449	else:
450	raise AssertionError()
451
452	if prepends_arg0:
453	new_list = [arg0_val.s]
454	new_list.extend(strs)
455	strs = new_list
456
457	result = value.BashArray(strs)
458
459	elif case(value_e.BashAssoc):
460	e_die("Can't slice associative arrays", loc.WordPart(part))
461
462	else:
463	raise error.TypeErr(val, 'Slice op expected Str or BashArray',
464	loc.WordPart(part))
465
466	return result
467
468
469	class StringWordEvaluator(object):
470	"""Interface used by ArithEvaluator / BoolEvaluator"""
471
472	def __init__(self):
473	# type: () -> None
474	"""Empty constructor for mycpp."""
475	pass
476
477	def EvalWordToString(self, w, eval_flags=0):
478	# type: (word_t, int) -> value.Str
479	raise NotImplementedError()
480
481
482	def _GetDollarHyphen(exec_opts):
483	# type: (optview.Exec) -> str
484	chars = [] # type: List[str]
485	if exec_opts.interactive():
486	chars.append('i')
487
488	if exec_opts.errexit():
489	chars.append('e')
490	if exec_opts.noglob():
491	chars.append('f')
492	if exec_opts.noexec():
493	chars.append('n')
494	if exec_opts.nounset():
495	chars.append('u')
496	# NO letter for pipefail?
497	if exec_opts.xtrace():
498	chars.append('x')
499	if exec_opts.noclobber():
500	chars.append('C')
501
502	# bash has:
503	# - c for sh -c, i for sh -i (mksh also has this)
504	# - h for hashing (mksh also has this)
505	# - B for brace expansion
506	return ''.join(chars)
507
508
509	class TildeEvaluator(object):
510
511	def __init__(self, mem, exec_opts):
512	# type: (Mem, optview.Exec) -> None
513	self.mem = mem
514	self.exec_opts = exec_opts
515
516	def GetMyHomeDir(self):
517	# type: () -> Optional[str]
518	"""Consult $HOME first, and then make a libc call.
519
520	Important: the libc call can FAIL, which is why we prefer $HOME. See issue
521	#1578.
522	"""
523	# First look up the HOME var, ENV.HOME, ...
524	s = self.mem.env_config.Get('HOME')
525	if s is not None:
526	return s
527
528	# Then ask the OS. This is what bash does.
529	return pyos.GetMyHomeDir()
530
531	def Eval(self, part):
532	# type: (word_part.TildeSub) -> str
533	"""Evaluates ~ and ~user, given a Lit_TildeLike token."""
534
535	if part.user_name is None:
536	result = self.GetMyHomeDir()
537	else:
538	result = pyos.GetHomeDir(part.user_name)
539
540	if result is None:
541	if self.exec_opts.strict_tilde():
542	e_die("Error expanding tilde (e.g. invalid user)", part.left)
543	else:
544	# Return ~ or ~user literally
545	result = '~'
546	if part.user_name is not None:
547	result = result + part.user_name # mycpp doesn't have +=
548
549	return result
550
551
552	class AbstractWordEvaluator(StringWordEvaluator):
553	"""Abstract base class for word evaluators.
554
555	Public entry points:
556	EvalWordToString EvalForPlugin EvalRhsWord
557	EvalWordSequence EvalWordSequence2
558	"""
559
560	def __init__(
561	self,
562	mem, # type: state.Mem
563	exec_opts, # type: optview.Exec
564	mutable_opts, # type: state.MutableOpts
565	tilde_ev, # type: TildeEvaluator
566	splitter, # type: SplitContext
567	errfmt, # type: ui.ErrorFormatter
568	):
569	# type: (...) -> None
570	self.arith_ev = None # type: sh_expr_eval.ArithEvaluator
571	self.expr_ev = None # type: expr_eval.ExprEvaluator
572	self.prompt_ev = None # type: prompt.Evaluator
573
574	self.unsafe_arith = None # type: sh_expr_eval.UnsafeArith
575
576	self.tilde_ev = tilde_ev
577
578	self.mem = mem # for $HOME, $1, etc.
579	self.exec_opts = exec_opts # for nounset
580	self.mutable_opts = mutable_opts # for _allow_command_sub
581	self.splitter = splitter
582	self.errfmt = errfmt
583
584	self.globber = glob_.Globber(exec_opts)
585
586	def CheckCircularDeps(self):
587	# type: () -> None
588	raise NotImplementedError()
589
590	def _EvalCommandSub(self, cs_part, quoted):
591	# type: (CommandSub, bool) -> part_value_t
592	"""Abstract since it has a side effect."""
593	raise NotImplementedError()
594
595	def _EvalProcessSub(self, cs_part):
596	# type: (CommandSub) -> part_value_t
597	"""Abstract since it has a side effect."""
598	raise NotImplementedError()
599
600	def _EvalVarNum(self, var_num):
601	# type: (int) -> value_t
602	assert var_num >= 0
603	return self.mem.GetArgNum(var_num)
604
605	def _EvalSpecialVar(self, op_id, quoted, vsub_state):
606	# type: (int, bool, VarSubState) -> value_t
607	"""Evaluate $?
608
609	and so forth
610	"""
611	# $@ is special -- it need to know whether it is in a double quoted
612	# context.
613	#
614	# - If it's $@ in a double quoted context, return an ARRAY.
615	# - If it's $@ in a normal context, return a STRING, which then will be
616	# subject to splitting.
617
618	if op_id in (Id.VSub_At, Id.VSub_Star):
619	argv = self.mem.GetArgv()
620	val = value.BashArray(argv) # type: value_t
621	if op_id == Id.VSub_At:
622	# "$@" evaluates to an array, $@ should be decayed
623	vsub_state.join_array = not quoted
624	else: # $* "$*" are both decayed
625	vsub_state.join_array = True
626
627	elif op_id == Id.VSub_Hyphen:
628	val = value.Str(_GetDollarHyphen(self.exec_opts))
629
630	else:
631	val = self.mem.GetSpecialVar(op_id)
632
633	return val
634
635	def _ApplyTestOp(
636	self,
637	val, # type: value_t
638	op, # type: suffix_op.Unary
639	quoted, # type: bool
640	part_vals, # type: Optional[List[part_value_t]]
641	vtest_place, # type: VTestPlace
642	blame_token, # type: Token
643	):
644	# type: (...) -> bool
645	"""
646	Returns:
647	Whether part_vals was mutated
648
649	${a:-} returns part_value[]
650	${a:+} returns part_value[]
651	${a:?error} returns error word?
652	${a:=} returns part_value[] but also needs self.mem for side effects.
653
654	So I guess it should return part_value[], and then a flag for raising an
655	error, and then a flag for assigning it?
656	The original BracedVarSub will have the name.
657
658	Example of needing multiple part_value[]
659
660	echo X-${a:-'def'"ault"}-X
661
662	We return two part values from the BracedVarSub. Also consider:
663
664	echo ${a:-x"$@"x}
665	"""
666	eval_flags = IS_SUBST
667	if quoted:
668	eval_flags \|= QUOTED
669
670	tok = op.op
671	# NOTE: Splicing part_values is necessary because of code like
672	# ${undef:-'a b' c 'd # e'}. Each part_value can have a different
673	# do_glob/do_elide setting.
674	UP_val = val
675	with tagswitch(val) as case:
676	if case(value_e.Undef):
677	is_falsey = True
678
679	elif case(value_e.Str):
680	val = cast(value.Str, UP_val)
681	if tok.id in (Id.VTest_ColonHyphen, Id.VTest_ColonEquals,
682	Id.VTest_ColonQMark, Id.VTest_ColonPlus):
683	is_falsey = len(val.s) == 0
684	else:
685	is_falsey = False
686
687	elif case(value_e.BashArray):
688	val = cast(value.BashArray, UP_val)
689	# TODO: allow undefined
690	is_falsey = len(val.strs) == 0
691
692	elif case(value_e.BashAssoc):
693	val = cast(value.BashAssoc, UP_val)
694	is_falsey = len(val.d) == 0
695
696	else:
697	# value.Eggex, etc. are all false
698	is_falsey = False
699
700	if tok.id in (Id.VTest_ColonHyphen, Id.VTest_Hyphen):
701	if is_falsey:
702	self._EvalRhsWordToParts(op.arg_word, part_vals, eval_flags)
703	return True
704	else:
705	return False
706
707	# Inverse of the above.
708	elif tok.id in (Id.VTest_ColonPlus, Id.VTest_Plus):
709	if is_falsey:
710	return False
711	else:
712	self._EvalRhsWordToParts(op.arg_word, part_vals, eval_flags)
713	return True
714
715	# Splice and assign
716	elif tok.id in (Id.VTest_ColonEquals, Id.VTest_Equals):
717	if is_falsey:
718	# Collect new part vals.
719	assign_part_vals = [] # type: List[part_value_t]
720	self._EvalRhsWordToParts(op.arg_word, assign_part_vals,
721	eval_flags)
722	# Append them to out param AND return them.
723	part_vals.extend(assign_part_vals)
724
725	if vtest_place.name is None:
726	# TODO: error context
727	e_die("Can't assign to special variable")
728	else:
729	# NOTE: This decays arrays too! 'shopt -s strict_array' could
730	# avoid it.
731	rhs_str = _DecayPartValuesToString(
732	assign_part_vals, self.splitter.GetJoinChar())
733	if vtest_place.index is None: # using None when no index
734	lval = location.LName(
735	vtest_place.name) # type: sh_lvalue_t
736	else:
737	var_name = vtest_place.name
738	var_index = vtest_place.index
739	UP_var_index = var_index
740
741	with tagswitch(var_index) as case:
742	if case(a_index_e.Int):
743	var_index = cast(a_index.Int, UP_var_index)
744	lval = sh_lvalue.Indexed(
745	var_name, var_index.i, loc.Missing)
746	elif case(a_index_e.Str):
747	var_index = cast(a_index.Str, UP_var_index)
748	lval = sh_lvalue.Keyed(var_name, var_index.s,
749	loc.Missing)
750	else:
751	raise AssertionError()
752
753	state.OshLanguageSetValue(self.mem, lval,
754	value.Str(rhs_str))
755	return True
756
757	else:
758	return False
759
760	elif tok.id in (Id.VTest_ColonQMark, Id.VTest_QMark):
761	if is_falsey:
762	# The arg is the error message
763	error_part_vals = [] # type: List[part_value_t]
764	self._EvalRhsWordToParts(op.arg_word, error_part_vals,
765	eval_flags)
766	error_str = _DecayPartValuesToString(
767	error_part_vals, self.splitter.GetJoinChar())
768
769	#
770	# Display fancy/helpful error
771	#
772	if vtest_place.name is None:
773	var_name = '???'
774	else:
775	var_name = vtest_place.name
776
777	if 0:
778	# This hint is nice, but looks too noisy for now
779	op_str = lexer.LazyStr(tok)
780	if tok.id == Id.VTest_ColonQMark:
781	why = 'empty or unset'
782	else:
783	why = 'unset'
784
785	self.errfmt.Print_(
786	"Hint: operator %s means a variable can't be %s" %
787	(op_str, why), tok)
788
789	if val.tag() == value_e.Undef:
790	actual = 'unset'
791	else:
792	actual = 'empty'
793
794	if len(error_str):
795	suffix = ': %r' % error_str
796	else:
797	suffix = ''
798	e_die("Var %s is %s%s" % (var_name, actual, suffix),
799	blame_token)
800
801	else:
802	return False
803
804	else:
805	raise AssertionError(tok.id)
806
807	def _Count(self, val, token):
808	# type: (value_t, Token) -> int
809	"""Returns the length of the value, for ${#var}"""
810	UP_val = val
811	with tagswitch(val) as case:
812	if case(value_e.Str):
813	val = cast(value.Str, UP_val)
814	# NOTE: Whether bash counts bytes or chars is affected by LANG
815	# environment variables.
816	# Should we respect that, or another way to select? set -o
817	# count-bytes?
818
819	# https://stackoverflow.com/questions/17368067/length-of-string-in-bash
820	try:
821	count = string_ops.CountUtf8Chars(val.s)
822	except error.Strict as e:
823	# Add this here so we don't have to add it so far down the stack.
824	# TODO: It's better to show BOTH this CODE an the actual DATA
825	# somehow.
826	e.location = token
827
828	if self.exec_opts.strict_word_eval():
829	raise
830	else:
831	# NOTE: Doesn't make the command exit with 1; it just returns a
832	# length of -1.
833	self.errfmt.PrettyPrintError(e, prefix='warning: ')
834	return -1
835
836	elif case(value_e.BashArray):
837	val = cast(value.BashArray, UP_val)
838	count = bash_impl.BashArray_Count(val)
839
840	elif case(value_e.BashAssoc):
841	val = cast(value.BashAssoc, UP_val)
842	count = bash_impl.BashAssoc_Count(val)
843
844	elif case(value_e.SparseArray):
845	val = cast(value.SparseArray, UP_val)
846	count = bash_impl.SparseArray_Count(val)
847
848	else:
849	raise error.TypeErr(
850	val, "Length op expected Str, BashArray, BashAssoc", token)
851
852	return count
853
854	def _Keys(self, val, token):
855	# type: (value_t, Token) -> value_t
856	"""Return keys of a container, for ${!array[@]}"""
857
858	UP_val = val
859	with tagswitch(val) as case:
860	if case(value_e.BashArray):
861	val = cast(value.BashArray, UP_val)
862	indices = [str(i) for i in bash_impl.BashArray_GetKeys(val)]
863	return value.BashArray(indices)
864
865	elif case(value_e.BashAssoc):
866	val = cast(value.BashAssoc, UP_val)
867	assert val.d is not None # for MyPy, so it's not Optional[]
868
869	# BUG: Keys aren't ordered according to insertion!
870	keys = bash_impl.BashAssoc_GetKeys(val)
871	return value.BashArray(keys)
872
873	else:
874	raise error.TypeErr(val, 'Keys op expected Str', token)
875
876	def _EvalVarRef(self, val, blame_tok, quoted, vsub_state, vtest_place):
877	# type: (value_t, Token, bool, VarSubState, VTestPlace) -> value_t
878	"""Handles indirect expansion like ${!var} and ${!a[0]}.
879
880	Args:
881	blame_tok: 'foo' for ${!foo}
882	"""
883	UP_val = val
884	with tagswitch(val) as case:
885	if case(value_e.Undef):
886	return value.Undef # ${!undef} is just weird bash behavior
887
888	elif case(value_e.Str):
889	val = cast(value.Str, UP_val)
890	var_ref_str = val.s
891
892	elif case(value_e.BashArray): # caught earlier but OK
893	val = cast(value.BashArray, UP_val)
894	var_ref_str = ' '.join(bash_impl.BashArray_GetValues(val))
895
896	elif case(value_e.BashAssoc): # caught earlier but OK
897	val = cast(value.BashAssoc, UP_val)
898	var_ref_str = ' '.join(bash_impl.BashAssoc_GetValues(val))
899
900	else:
901	raise error.TypeErr(val, 'Var Ref op expected Str', blame_tok)
902
903	bvs_part = self.unsafe_arith.ParseVarRef(var_ref_str, blame_tok)
904	return self._VarRefValue(bvs_part, quoted, vsub_state, vtest_place)
905
906	def _ApplyUnarySuffixOp(self, val, op):
907	# type: (value_t, suffix_op.Unary) -> value_t
908	assert val.tag() != value_e.Undef
909
910	op_kind = consts.GetKind(op.op.id)
911
912	if op_kind == Kind.VOp1:
913	# NOTE: glob syntax is supported in ^ ^^ , ,, ! As well as % %% # ##.
914	# Detect has_extglob so that DoUnarySuffixOp doesn't use the fast
915	# shortcut for constant strings.
916	arg_val, has_extglob = self.EvalWordToPattern(op.arg_word)
917	assert arg_val.tag() == value_e.Str
918
919	UP_val = val
920	with tagswitch(val) as case:
921	if case(value_e.Str):
922	val = cast(value.Str, UP_val)
923	s = string_ops.DoUnarySuffixOp(val.s, op.op, arg_val.s,
924	has_extglob)
925	#log('%r %r -> %r', val.s, arg_val.s, s)
926	new_val = value.Str(s) # type: value_t
927
928	elif case(value_e.BashArray, value_e.BashAssoc):
929	# get values
930	if val.tag() == value_e.BashArray:
931	val = cast(value.BashArray, UP_val)
932	values = bash_impl.BashArray_GetValues(val)
933	elif val.tag() == value_e.BashAssoc:
934	val = cast(value.BashAssoc, UP_val)
935	values = bash_impl.BashAssoc_GetValues(val)
936	else:
937	raise AssertionError()
938
939	# ${a[@]#prefix} is VECTORIZED on arrays. YSH should have this too.
940	strs = [
941	string_ops.DoUnarySuffixOp(s, op.op, arg_val.s,
942	has_extglob) for s in values
943	]
944	new_val = value.BashArray(strs)
945
946	else:
947	raise error.TypeErr(
948	val, 'Unary op expected Str, BashArray, BashAssoc',
949	op.op)
950
951	else:
952	raise AssertionError(Kind_str(op_kind))
953
954	return new_val
955
956	def _PatSub(self, val, op):
957	# type: (value_t, suffix_op.PatSub) -> value_t
958
959	pat_val, has_extglob = self.EvalWordToPattern(op.pat)
960	# Extended globs aren't supported because we only translate * ? etc. to
961	# ERE. I don't think there's a straightforward translation from !(*.py) to
962	# ERE! You would need an engine that supports negation? (Derivatives?)
963	if has_extglob:
964	e_die('extended globs not supported in ${x//GLOB/}', op.pat)
965
966	if op.replace:
967	replace_val = self.EvalRhsWord(op.replace)
968	# Can't have an array, so must be a string
969	assert replace_val.tag() == value_e.Str, replace_val
970	replace_str = cast(value.Str, replace_val).s
971	else:
972	replace_str = ''
973
974	# note: doesn't support self.exec_opts.extglob()!
975	regex, warnings = glob_.GlobToERE(pat_val.s)
976	if len(warnings):
977	# TODO:
978	# - Add 'shopt -s strict_glob' mode and expose warnings.
979	# "Glob is not in CANONICAL FORM".
980	# - Propagate location info back to the 'op.pat' word.
981	pass
982	#log('regex %r', regex)
983	replacer = string_ops.GlobReplacer(regex, replace_str, op.slash_tok)
984
985	with tagswitch(val) as case2:
986	if case2(value_e.Str):
987	str_val = cast(value.Str, val)
988	s = replacer.Replace(str_val.s, op)
989	val = value.Str(s)
990
991	elif case2(value_e.BashArray, value_e.BashAssoc):
992	if val.tag() == value_e.BashArray:
993	array_val = cast(value.BashArray, val)
994	values = bash_impl.BashArray_GetValues(array_val)
995	elif val.tag() == value_e.BashAssoc:
996	assoc_val = cast(value.BashAssoc, val)
997	values = bash_impl.BashAssoc_GetValues(assoc_val)
998	else:
999	raise AssertionError()
1000	strs = [replacer.Replace(s, op) for s in values]
1001	val = value.BashArray(strs)
1002
1003	else:
1004	raise error.TypeErr(
1005	val, 'Pat Sub op expected Str, BashArray, BashAssoc',
1006	op.slash_tok)
1007
1008	return val
1009
1010	def _Slice(self, val, op, var_name, part):
1011	# type: (value_t, suffix_op.Slice, Optional[str], BracedVarSub) -> value_t
1012
1013	begin = self.arith_ev.EvalToBigInt(op.begin)
1014
1015	# Note: bash allows lengths to be negative (with odd semantics), but
1016	# we don't allow that right now.
1017	has_length = False
1018	length = -1
1019	if op.length:
1020	has_length = True
1021	length = self.arith_ev.EvalToInt(op.length)
1022
1023	try:
1024	arg0_val = None # type: value.Str
1025	if var_name is None: # $* or $@
1026	arg0_val = self.mem.GetArg0()
1027	val = _PerformSlice(val, begin, length, has_length, part, arg0_val)
1028	except error.Strict as e:
1029	if self.exec_opts.strict_word_eval():
1030	raise
1031	else:
1032	self.errfmt.PrettyPrintError(e, prefix='warning: ')
1033	with tagswitch(val) as case2:
1034	if case2(value_e.Str):
1035	val = value.Str('')
1036	elif case2(value_e.BashArray):
1037	val = value.BashArray([])
1038	else:
1039	raise NotImplementedError()
1040	return val
1041
1042	def _Nullary(self, val, op, var_name, vsub_token, vsub_state):
1043	# type: (value_t, Token, Optional[str], Token, VarSubState) -> Tuple[value.Str, bool]
1044
1045	quoted2 = False
1046	op_id = op.id
1047	if op_id == Id.VOp0_P:
1048	val = self._ProcessUndef(val, vsub_token, vsub_state)
1049	UP_val = val
1050	with tagswitch(val) as case:
1051	if case(value_e.Undef):
1052	result = value.Str('')
1053	elif case(value_e.Str):
1054	str_val = cast(value.Str, UP_val)
1055	prompt = self.prompt_ev.EvalPrompt(str_val)
1056	# readline gets rid of these, so we should too.
1057	p = prompt.replace('\x01', '').replace('\x02', '')
1058	result = value.Str(p)
1059	else:
1060	e_die("Can't use @P on %s" % ui.ValType(val), op)
1061
1062	elif op_id == Id.VOp0_Q:
1063	UP_val = val
1064	with tagswitch(val) as case:
1065	if case(value_e.Undef):
1066	# We need to issue an error when "-o nounset" is enabled.
1067	# Although we do not need to check val for value_e.Undef,
1068	# we call _ProcessUndef for consistency in the error
1069	# message.
1070	self._ProcessUndef(val, vsub_token, vsub_state)
1071
1072	# For unset variables, we do not generate any quoted words.
1073	result = value.Str('')
1074
1075	elif case(value_e.Str):
1076	str_val = cast(value.Str, UP_val)
1077	result = value.Str(j8_lite.MaybeShellEncode(str_val.s))
1078	# oddly, 'echo ${x@Q}' is equivalent to 'echo "${x@Q}"' in
1079	# bash
1080	quoted2 = True
1081	elif case(value_e.BashArray, value_e.BashAssoc):
1082	if val.tag() == value_e.BashArray:
1083	val = cast(value.BashArray, UP_val)
1084	values = [s for s in bash_impl.BashArray_GetValues(val) if s is not None]
1085	elif val.tag() == value_e.BashAssoc:
1086	val = cast(value.BashAssoc, UP_val)
1087	values = bash_impl.BashAssoc_GetValues(val)
1088	else:
1089	raise AssertionError()
1090
1091	tmp = [
1092	# TODO: should use fastfunc.ShellEncode
1093	j8_lite.MaybeShellEncode(s) for s in values
1094	]
1095	result = value.Str(' '.join(tmp))
1096	else:
1097	e_die("Can't use @Q on %s" % ui.ValType(val), op)
1098
1099	elif op_id == Id.VOp0_a:
1100	val = self._ProcessUndef(val, vsub_token, vsub_state)
1101	UP_val = val
1102	# We're ONLY simluating -a and -A, not -r -x -n for now. See
1103	# spec/ble-idioms.test.sh.
1104	chars = [] # type: List[str]
1105	with tagswitch(val) as case:
1106	if case(value_e.BashArray):
1107	chars.append('a')
1108	elif case(value_e.BashAssoc):
1109	chars.append('A')
1110
1111	if var_name is not None: # e.g. ${?@a} is allowed
1112	cell = self.mem.GetCell(var_name)
1113	if cell:
1114	if cell.readonly:
1115	chars.append('r')
1116	if cell.exported:
1117	chars.append('x')
1118	if cell.nameref:
1119	chars.append('n')
1120
1121	result = value.Str(''.join(chars))
1122
1123	else:
1124	e_die('Var op %r not implemented' % lexer.TokenVal(op), op)
1125
1126	return result, quoted2
1127
1128	def _WholeArray(self, val, part, quoted, vsub_state):
1129	# type: (value_t, BracedVarSub, bool, VarSubState) -> value_t
1130	op_id = cast(bracket_op.WholeArray, part.bracket_op).op_id
1131
1132	if op_id == Id.Lit_At:
1133	op_str = '@'
1134	vsub_state.join_array = not quoted # ${a[@]} decays but "${a[@]}" doesn't
1135	elif op_id == Id.Arith_Star:
1136	op_str = '*'
1137	vsub_state.join_array = True # both ${a[]} and "${a[]}" decay
1138	else:
1139	raise AssertionError(op_id) # unknown
1140
1141	with tagswitch(val) as case2:
1142	if case2(value_e.Undef):
1143	# For an undefined array, we save the token of the array
1144	# reference for the later error message.
1145	vsub_state.array_ref = part.name_tok
1146	elif case2(value_e.Str):
1147	if self.exec_opts.strict_array():
1148	e_die("Can't index string with %s" % op_str,
1149	loc.WordPart(part))
1150	elif case2(value_e.BashArray, value_e.SparseArray,
1151	value_e.BashAssoc):
1152	pass # no-op
1153	else:
1154	# The other YSH types such as List, Dict, and Float are not
1155	# supported. Error messages will be printed later, so we here
1156	# return the unsupported objects without modification.
1157	pass # no-op
1158
1159	return val
1160
1161	def _ArrayIndex(self, val, part, vtest_place):
1162	# type: (value_t, BracedVarSub, VTestPlace) -> value_t
1163	"""Process a numeric array index like ${a[i+1]}"""
1164	anode = cast(bracket_op.ArrayIndex, part.bracket_op).expr
1165
1166	UP_val = val
1167	with tagswitch(val) as case2:
1168	if case2(value_e.Undef):
1169	pass # it will be checked later
1170
1171	elif case2(value_e.Str):
1172	# Bash treats any string as an array, so we can't add our own
1173	# behavior here without making valid OSH invalid bash.
1174	e_die("Can't index string %r with integer" % part.var_name,
1175	part.name_tok)
1176
1177	elif case2(value_e.BashArray):
1178	array_val = cast(value.BashArray, UP_val)
1179	index = self.arith_ev.EvalToInt(anode)
1180	vtest_place.index = a_index.Int(index)
1181
1182	s, error_code = bash_impl.BashArray_GetElement(
1183	array_val, index)
1184	if error_code == error_code_e.IndexOutOfRange:
1185	# Note: Bash outputs warning but does not make it a real
1186	# error. We follow the Bash behavior here.
1187	self.errfmt.Print_(
1188	"Index %d out of bounds for array of length %d" %
1189	(index, bash_impl.BashArray_Length(array_val)),
1190	blame_loc=part.name_tok)
1191
1192	if s is None:
1193	val = value.Undef
1194	else:
1195	val = value.Str(s)
1196
1197	elif case2(value_e.SparseArray):
1198	sparse_val = cast(value.SparseArray, UP_val)
1199	big_index = self.arith_ev.EvalToBigInt(anode)
1200	vtest_place.index = a_index.Int(mops.BigTruncate(big_index))
1201
1202	s, error_code = bash_impl.SparseArray_GetElement(
1203	sparse_val, big_index)
1204	if error_code == error_code_e.IndexOutOfRange:
1205	# Note: Bash outputs warning but does not make it a real
1206	# error. We follow the Bash behavior here.
1207	big_length = bash_impl.SparseArray_Length(sparse_val)
1208	self.errfmt.Print_(
1209	"Index %s out of bounds for array of length %s" %
1210	(mops.ToStr(big_index), mops.ToStr(big_length)),
1211	blame_loc=part.name_tok)
1212
1213	if s is None:
1214	val = value.Undef
1215	else:
1216	val = value.Str(s)
1217
1218	elif case2(value_e.BashAssoc):
1219	assoc_val = cast(value.BashAssoc, UP_val)
1220	# Location could also be attached to bracket_op? But
1221	# arith_expr.VarSub works OK too
1222	key = self.arith_ev.EvalWordToString(
1223	anode, blame_loc=location.TokenForArith(anode))
1224
1225	vtest_place.index = a_index.Str(key) # out param
1226	s = bash_impl.BashAssoc_GetElement(assoc_val, key)
1227
1228	if s is None:
1229	val = value.Undef
1230	else:
1231	val = value.Str(s)
1232
1233	else:
1234	raise error.TypeErr(val,
1235	'Index op expected BashArray, BashAssoc',
1236	loc.WordPart(part))
1237
1238	return val
1239
1240	def _EvalDoubleQuoted(self, parts, part_vals):
1241	# type: (List[word_part_t], List[part_value_t]) -> None
1242	"""Evaluate parts of a DoubleQuoted part.
1243
1244	Args:
1245	part_vals: output param to append to.
1246	"""
1247	# Example of returning array:
1248	# $ a=(1 2); b=(3); $ c=(4 5)
1249	# $ argv "${a[@]}${b[@]}${c[@]}"
1250	# ['1', '234', '5']
1251	#
1252	# Example of multiple parts
1253	# $ argv "${a[@]}${undef[@]:-${c[@]}}"
1254	# ['1', '24', '5']
1255
1256	# Special case for "". The parser outputs (DoubleQuoted []), instead
1257	# of (DoubleQuoted [Literal '']). This is better but it means we
1258	# have to check for it.
1259	if len(parts) == 0:
1260	v = Piece('', True, False)
1261	part_vals.append(v)
1262	return
1263
1264	for p in parts:
1265	self._EvalWordPart(p, part_vals, QUOTED)
1266
1267	def EvalDoubleQuotedToString(self, dq_part):
1268	# type: (DoubleQuoted) -> str
1269	"""For double quoted strings in YSH expressions.
1270
1271	Example: var x = "$foo-${foo}"
1272	"""
1273	part_vals = [] # type: List[part_value_t]
1274	self._EvalDoubleQuoted(dq_part.parts, part_vals)
1275	return self._ConcatPartVals(part_vals, dq_part.left)
1276
1277	def _DecayArray(self, val):
1278	# type: (value.BashArray) -> value.Str
1279	"""Decay $* to a string."""
1280	assert val.tag() == value_e.BashArray, val
1281	sep = self.splitter.GetJoinChar()
1282	tmp = [s for s in bash_impl.BashArray_GetValues(val) if s is not None]
1283	return value.Str(sep.join(tmp))
1284
1285	def _ProcessUndef(self, val, name_tok, vsub_state):
1286	# type: (value_t, Token, VarSubState) -> value_t
1287	assert name_tok is not None
1288
1289	if val.tag() != value_e.Undef:
1290	return val
1291
1292	if vsub_state.array_ref is not None:
1293	array_tok = vsub_state.array_ref
1294	if self.exec_opts.nounset():
1295	e_die('Undefined array %r' % lexer.TokenVal(array_tok),
1296	array_tok)
1297	else:
1298	return value.BashArray([])
1299	else:
1300	if self.exec_opts.nounset():
1301	tok_str = lexer.TokenVal(name_tok)
1302	name = tok_str[1:] if tok_str.startswith('$') else tok_str
1303	e_die('Undefined variable %r' % name, name_tok)
1304	else:
1305	return value.Str('')
1306
1307	def _EvalBracketOp(self, val, part, quoted, vsub_state, vtest_place):
1308	# type: (value_t, BracedVarSub, bool, VarSubState, VTestPlace) -> value_t
1309
1310	if part.bracket_op:
1311	with tagswitch(part.bracket_op) as case:
1312	if case(bracket_op_e.WholeArray):
1313	val = self._WholeArray(val, part, quoted, vsub_state)
1314
1315	elif case(bracket_op_e.ArrayIndex):
1316	val = self._ArrayIndex(val, part, vtest_place)
1317
1318	else:
1319	raise AssertionError(part.bracket_op.tag())
1320
1321	else: # no bracket op
1322	var_name = vtest_place.name
1323	if (var_name is not None and
1324	val.tag() in (value_e.BashArray, value_e.BashAssoc) and
1325	not vsub_state.is_type_query):
1326	if ShouldArrayDecay(var_name, self.exec_opts,
1327	not (part.prefix_op or part.suffix_op)):
1328	# for ${BASH_SOURCE}, etc.
1329	val = DecayArray(val)
1330	else:
1331	e_die(
1332	"Array %r can't be referred to as a scalar (without @ or *)"
1333	% var_name, loc.WordPart(part))
1334
1335	return val
1336
1337	def _VarRefValue(self, part, quoted, vsub_state, vtest_place):
1338	# type: (BracedVarSub, bool, VarSubState, VTestPlace) -> value_t
1339	"""Duplicates some logic from _EvalBracedVarSub, but returns a
1340	value_t."""
1341
1342	# 1. Evaluate from (var_name, var_num, token Id) -> value
1343	if part.name_tok.id == Id.VSub_Name:
1344	vtest_place.name = part.var_name
1345	val = self.mem.GetValue(part.var_name)
1346
1347	elif part.name_tok.id == Id.VSub_Number:
1348	var_num = int(part.var_name)
1349	val = self._EvalVarNum(var_num)
1350
1351	else:
1352	# $* decays
1353	val = self._EvalSpecialVar(part.name_tok.id, quoted, vsub_state)
1354
1355	# We don't need var_index because it's only for L-Values of test ops?
1356	if self.exec_opts.eval_unsafe_arith():
1357	val = self._EvalBracketOp(val, part, quoted, vsub_state,
1358	vtest_place)
1359	else:
1360	with state.ctx_Option(self.mutable_opts,
1361	[option_i._allow_command_sub], False):
1362	val = self._EvalBracketOp(val, part, quoted, vsub_state,
1363	vtest_place)
1364
1365	return val
1366
1367	def _EvalBracedVarSub(self, part, part_vals, quoted):
1368	# type: (BracedVarSub, List[part_value_t], bool) -> None
1369	"""
1370	Args:
1371	part_vals: output param to append to.
1372	"""
1373	# We have different operators that interact in a non-obvious order.
1374	#
1375	# 1. bracket_op: value -> value, with side effect on vsub_state
1376	#
1377	# 2. prefix_op
1378	# a. length ${#x}: value -> value
1379	# b. var ref ${!ref}: can expand to an array
1380	#
1381	# 3. suffix_op:
1382	# a. no operator: you have a value
1383	# b. Test: value -> part_value[]
1384	# c. Other Suffix: value -> value
1385	#
1386	# 4. Process vsub_state.join_array here before returning.
1387	#
1388	# These cases are hard to distinguish:
1389	# - ${!prefix@} prefix query
1390	# - ${!array[@]} keys
1391	# - ${!ref} named reference
1392	# - ${!ref[0]} named reference
1393	#
1394	# I think we need several stages:
1395	#
1396	# 1. value: name, number, special, prefix query
1397	# 2. bracket_op
1398	# 3. prefix length -- this is TERMINAL
1399	# 4. indirection? Only for some of the ! cases
1400	# 5. string transformation suffix ops like ##
1401	# 6. test op
1402	# 7. vsub_state.join_array
1403
1404	# vsub_state.join_array is for joining "${a[*]}" and unquoted ${a[@]} AFTER
1405	# suffix ops are applied. If we take the length with a prefix op, the
1406	# distinction is ignored.
1407
1408	var_name = None # type: Optional[str] # used throughout the function
1409	vtest_place = VTestPlace(var_name, None) # For ${foo=default}
1410	vsub_state = VarSubState.CreateNull() # for $, ${a[]}, etc.
1411
1412	# 1. Evaluate from (var_name, var_num, token Id) -> value
1413	if part.name_tok.id == Id.VSub_Name:
1414	# Handle ${!prefix@} first, since that looks at names and not values
1415	# Do NOT handle ${!A[@]@a} here!
1416	if (part.prefix_op is not None and part.bracket_op is None and
1417	part.suffix_op is not None and
1418	part.suffix_op.tag() == suffix_op_e.Nullary):
1419	nullary_op = cast(Token, part.suffix_op)
1420	# ${!x@} but not ${!x@P}
1421	if consts.GetKind(nullary_op.id) == Kind.VOp3:
1422	names = self.mem.VarNamesStartingWith(part.var_name)
1423	names.sort()
1424
1425	if quoted and nullary_op.id == Id.VOp3_At:
1426	part_vals.append(part_value.Array(names))
1427	else:
1428	sep = self.splitter.GetJoinChar()
1429	part_vals.append(Piece(sep.join(names), quoted, True))
1430	return # EARLY RETURN
1431
1432	var_name = part.var_name
1433	vtest_place.name = var_name # for _ApplyTestOp
1434
1435	val = self.mem.GetValue(var_name)
1436
1437	elif part.name_tok.id == Id.VSub_Number:
1438	var_num = int(part.var_name)
1439	val = self._EvalVarNum(var_num)
1440	else:
1441	# $* decays
1442	val = self._EvalSpecialVar(part.name_tok.id, quoted, vsub_state)
1443
1444	suffix_op_ = part.suffix_op
1445	if suffix_op_:
1446	UP_op = suffix_op_
1447	with tagswitch(suffix_op_) as case:
1448	if case(suffix_op_e.Nullary):
1449	suffix_op_ = cast(Token, UP_op)
1450
1451	# Type query ${array@a} is a STRING, not an array
1452	# NOTE: ${array@Q} is ${array[0]@Q} in bash, which is different than
1453	# ${array[@]@Q}
1454	if suffix_op_.id == Id.VOp0_a:
1455	vsub_state.is_type_query = True
1456
1457	# 2. Bracket Op
1458	val = self._EvalBracketOp(val, part, quoted, vsub_state, vtest_place)
1459
1460	if part.prefix_op:
1461	if part.prefix_op.id == Id.VSub_Pound: # ${#var} for length
1462	# undef -> '' BEFORE length
1463	val = self._ProcessUndef(val, part.name_tok, vsub_state)
1464
1465	n = self._Count(val, part.name_tok)
1466	part_vals.append(Piece(str(n), quoted, False))
1467	return # EARLY EXIT: nothing else can come after length
1468
1469	elif part.prefix_op.id == Id.VSub_Bang:
1470	if (part.bracket_op and
1471	part.bracket_op.tag() == bracket_op_e.WholeArray and
1472	not suffix_op_):
1473	# undef -> empty array
1474	val = self._ProcessUndef(val, part.name_tok, vsub_state)
1475
1476	# ${!array[@]} to get indices/keys
1477	val = self._Keys(val, part.name_tok)
1478	# already set vsub_State.join_array ABOVE
1479	else:
1480	# Process ${!ref}. SURPRISE: ${!a[0]} is an indirect expansion unlike
1481	# ${!a[@]} !
1482	# ${!ref} can expand into an array if ref='array[@]'
1483
1484	# Clear it now that we have a var ref
1485	vtest_place.name = None
1486	vtest_place.index = None
1487
1488	val = self._EvalVarRef(val, part.name_tok, quoted,
1489	vsub_state, vtest_place)
1490
1491	else:
1492	raise AssertionError(part.prefix_op)
1493
1494	quoted2 = False # another bit for @Q
1495	if suffix_op_:
1496	op = suffix_op_ # could get rid of this alias
1497
1498	with tagswitch(suffix_op_) as case:
1499	if case(suffix_op_e.Nullary):
1500	op = cast(Token, UP_op)
1501	val, quoted2 = self._Nullary(val, op, var_name,
1502	part.name_tok, vsub_state)
1503
1504	elif case(suffix_op_e.Unary):
1505	op = cast(suffix_op.Unary, UP_op)
1506	if consts.GetKind(op.op.id) == Kind.VTest:
1507	# Note: _ProcessUndef (i.e., the conversion of undef ->
1508	# '') is not applied to the VTest operators such as
1509	# ${a:-def}, ${a+set}, etc.
1510	if self._ApplyTestOp(val, op, quoted, part_vals,
1511	vtest_place, part.name_tok):
1512	# e.g. to evaluate ${undef:-'default'}, we already appended
1513	# what we need
1514	return
1515
1516	else:
1517	# Other suffix: value -> value
1518	val = self._ProcessUndef(val, part.name_tok,
1519	vsub_state)
1520	val = self._ApplyUnarySuffixOp(val, op)
1521
1522	elif case(suffix_op_e.PatSub): # PatSub, vectorized
1523	op = cast(suffix_op.PatSub, UP_op)
1524	val = self._ProcessUndef(val, part.name_tok, vsub_state)
1525	val = self._PatSub(val, op)
1526
1527	elif case(suffix_op_e.Slice):
1528	op = cast(suffix_op.Slice, UP_op)
1529	val = self._ProcessUndef(val, part.name_tok, vsub_state)
1530	val = self._Slice(val, op, var_name, part)
1531
1532	elif case(suffix_op_e.Static):
1533	op = cast(suffix_op.Static, UP_op)
1534	e_die('Not implemented', op.tok)
1535
1536	else:
1537	raise AssertionError()
1538	else:
1539	val = self._ProcessUndef(val, part.name_tok, vsub_state)
1540
1541	# After applying suffixes, process join_array here.
1542	UP_val = val
1543	if val.tag() == value_e.BashArray:
1544	array_val = cast(value.BashArray, UP_val)
1545	if vsub_state.join_array:
1546	val = self._DecayArray(array_val)
1547	else:
1548	val = array_val
1549
1550	# For example, ${a} evaluates to value.Str(), but we want a
1551	# Piece().
1552	part_val = _ValueToPartValue(val, quoted or quoted2, part)
1553	part_vals.append(part_val)
1554
1555	def _ConcatPartVals(self, part_vals, location):
1556	# type: (List[part_value_t], loc_t) -> str
1557
1558	strs = [] # type: List[str]
1559	for part_val in part_vals:
1560	UP_part_val = part_val
1561	with tagswitch(part_val) as case:
1562	if case(part_value_e.String):
1563	part_val = cast(Piece, UP_part_val)
1564	s = part_val.s
1565
1566	elif case(part_value_e.Array):
1567	part_val = cast(part_value.Array, UP_part_val)
1568	if self.exec_opts.strict_array():
1569	# Examples: echo f > "$@"; local foo="$@"
1570	e_die("Illegal array word part (strict_array)",
1571	location)
1572	else:
1573	# It appears to not respect IFS
1574	# TODO: eliminate double join()?
1575	tmp = [s for s in part_val.strs if s is not None]
1576	s = ' '.join(tmp)
1577
1578	else:
1579	raise AssertionError()
1580
1581	strs.append(s)
1582
1583	return ''.join(strs)
1584
1585	def EvalBracedVarSubToString(self, part):
1586	# type: (BracedVarSub) -> str
1587	"""For double quoted strings in YSH expressions.
1588
1589	Example: var x = "$foo-${foo}"
1590	"""
1591	part_vals = [] # type: List[part_value_t]
1592	self._EvalBracedVarSub(part, part_vals, False)
1593	# blame ${ location
1594	return self._ConcatPartVals(part_vals, part.left)
1595
1596	def _EvalSimpleVarSub(self, part, part_vals, quoted):
1597	# type: (SimpleVarSub, List[part_value_t], bool) -> None
1598
1599	token = part.tok
1600
1601	vsub_state = VarSubState.CreateNull()
1602
1603	# 1. Evaluate from (var_name, var_num, Token) -> defined, value
1604	if token.id == Id.VSub_DollarName:
1605	var_name = lexer.LazyStr(token)
1606	# TODO: Special case for LINENO
1607	val = self.mem.GetValue(var_name)
1608	if val.tag() in (value_e.BashArray, value_e.BashAssoc):
1609	if ShouldArrayDecay(var_name, self.exec_opts):
1610	# for $BASH_SOURCE, etc.
1611	val = DecayArray(val)
1612	else:
1613	e_die(
1614	"Array %r can't be referred to as a scalar (without @ or *)"
1615	% var_name, token)
1616
1617	elif token.id == Id.VSub_Number:
1618	var_num = int(lexer.LazyStr(token))
1619	val = self._EvalVarNum(var_num)
1620
1621	else:
1622	val = self._EvalSpecialVar(token.id, quoted, vsub_state)
1623
1624	#log('SIMPLE %s', part)
1625	val = self._ProcessUndef(val, token, vsub_state)
1626	UP_val = val
1627	if val.tag() == value_e.BashArray:
1628	array_val = cast(value.BashArray, UP_val)
1629	if vsub_state.join_array:
1630	val = self._DecayArray(array_val)
1631	else:
1632	val = array_val
1633
1634	v = _ValueToPartValue(val, quoted, part)
1635	part_vals.append(v)
1636
1637	def EvalSimpleVarSubToString(self, node):
1638	# type: (SimpleVarSub) -> str
1639	"""For double quoted strings in YSH expressions.
1640
1641	Example: var x = "$foo-${foo}"
1642	"""
1643	part_vals = [] # type: List[part_value_t]
1644	self._EvalSimpleVarSub(node, part_vals, False)
1645	return self._ConcatPartVals(part_vals, node.tok)
1646
1647	def _EvalExtGlob(self, part, part_vals):
1648	# type: (word_part.ExtGlob, List[part_value_t]) -> None
1649	"""Evaluate @($x\|'foo'\|$(hostname)) and flatten it."""
1650	op = part.op
1651	if op.id == Id.ExtGlob_Comma:
1652	op_str = '@('
1653	else:
1654	op_str = lexer.LazyStr(op)
1655	# Do NOT split these.
1656	part_vals.append(Piece(op_str, False, False))
1657
1658	for i, w in enumerate(part.arms):
1659	if i != 0:
1660	part_vals.append(Piece('\|', False, False)) # separator
1661	# FLATTEN the tree of extglob "arms".
1662	self._EvalWordToParts(w, part_vals, EXTGLOB_NESTED)
1663	part_vals.append(Piece(')', False, False)) # closing )
1664
1665	def _TranslateExtGlob(self, part_vals, w, glob_parts, fnmatch_parts):
1666	# type: (List[part_value_t], CompoundWord, List[str], List[str]) -> None
1667	"""Translate a flattened WORD with an ExtGlob part to string patterns.
1668
1669	We need both glob and fnmatch patterns. _EvalExtGlob does the
1670	flattening.
1671	"""
1672	for i, part_val in enumerate(part_vals):
1673	UP_part_val = part_val
1674	with tagswitch(part_val) as case:
1675	if case(part_value_e.String):
1676	part_val = cast(Piece, UP_part_val)
1677	if part_val.quoted and not self.exec_opts.noglob():
1678	s = glob_.GlobEscape(part_val.s)
1679	else:
1680	# e.g. the @( and \| in @(foo\|bar) aren't quoted
1681	s = part_val.s
1682	glob_parts.append(s)
1683	fnmatch_parts.append(s) # from _EvalExtGlob()
1684
1685	elif case(part_value_e.Array):
1686	# Disallow array
1687	e_die(
1688	"Extended globs and arrays can't appear in the same word",
1689	w)
1690
1691	elif case(part_value_e.ExtGlob):
1692	part_val = cast(part_value.ExtGlob, UP_part_val)
1693	# keep appending fnmatch_parts, but repplace glob_parts with '*'
1694	self._TranslateExtGlob(part_val.part_vals, w, [],
1695	fnmatch_parts)
1696	glob_parts.append('*')
1697
1698	else:
1699	raise AssertionError()
1700
1701	def _EvalWordPart(self, part, part_vals, flags):
1702	# type: (word_part_t, List[part_value_t], int) -> None
1703	"""Evaluate a word part, appending to part_vals
1704
1705	Called by _EvalWordToParts, EvalWordToString, and _EvalDoubleQuoted.
1706	"""
1707	quoted = bool(flags & QUOTED)
1708	is_subst = bool(flags & IS_SUBST)
1709
1710	UP_part = part
1711	with tagswitch(part) as case:
1712	if case(word_part_e.ShArrayLiteral):
1713	part = cast(ShArrayLiteral, UP_part)
1714	e_die("Unexpected array literal", loc.WordPart(part))
1715	elif case(word_part_e.BashAssocLiteral):
1716	part = cast(word_part.BashAssocLiteral, UP_part)
1717	e_die("Unexpected associative array literal",
1718	loc.WordPart(part))
1719
1720	elif case(word_part_e.Literal):
1721	part = cast(Token, UP_part)
1722	# Split if it's in a substitution.
1723	# That is: echo is not split, but ${foo:-echo} is split
1724	v = Piece(lexer.LazyStr(part), quoted, is_subst)
1725	part_vals.append(v)
1726
1727	elif case(word_part_e.EscapedLiteral):
1728	part = cast(word_part.EscapedLiteral, UP_part)
1729	v = Piece(part.ch, True, False)
1730	part_vals.append(v)
1731
1732	elif case(word_part_e.SingleQuoted):
1733	part = cast(SingleQuoted, UP_part)
1734	v = Piece(part.sval, True, False)
1735	part_vals.append(v)
1736
1737	elif case(word_part_e.DoubleQuoted):
1738	part = cast(DoubleQuoted, UP_part)
1739	self._EvalDoubleQuoted(part.parts, part_vals)
1740
1741	elif case(word_part_e.CommandSub):
1742	part = cast(CommandSub, UP_part)
1743	id_ = part.left_token.id
1744	if id_ in (Id.Left_DollarParen, Id.Left_AtParen,
1745	Id.Left_Backtick):
1746	sv = self._EvalCommandSub(part,
1747	quoted) # type: part_value_t
1748
1749	elif id_ in (Id.Left_ProcSubIn, Id.Left_ProcSubOut):
1750	sv = self._EvalProcessSub(part)
1751
1752	else:
1753	raise AssertionError(id_)
1754
1755	part_vals.append(sv)
1756
1757	elif case(word_part_e.SimpleVarSub):
1758	part = cast(SimpleVarSub, UP_part)
1759	self._EvalSimpleVarSub(part, part_vals, quoted)
1760
1761	elif case(word_part_e.BracedVarSub):
1762	part = cast(BracedVarSub, UP_part)
1763	self._EvalBracedVarSub(part, part_vals, quoted)
1764
1765	elif case(word_part_e.TildeSub):
1766	part = cast(word_part.TildeSub, UP_part)
1767	# We never parse a quoted string into a TildeSub.
1768	assert not quoted
1769	s = self.tilde_ev.Eval(part)
1770	v = Piece(s, True, False) # NOT split even when unquoted!
1771	part_vals.append(v)
1772
1773	elif case(word_part_e.ArithSub):
1774	part = cast(word_part.ArithSub, UP_part)
1775	num = self.arith_ev.EvalToBigInt(part.anode)
1776	v = Piece(mops.ToStr(num), quoted, not quoted)
1777	part_vals.append(v)
1778
1779	elif case(word_part_e.ExtGlob):
1780	part = cast(word_part.ExtGlob, UP_part)
1781	#if not self.exec_opts.extglob():
1782	# die() # disallow at runtime? Don't just decay
1783
1784	# Create a node to hold the flattened tree. The caller decides whether
1785	# to pass it to fnmatch() or replace it with '*' and pass it to glob().
1786	part_vals2 = [] # type: List[part_value_t]
1787	self._EvalExtGlob(part, part_vals2) # flattens tree
1788	part_vals.append(part_value.ExtGlob(part_vals2))
1789
1790	elif case(word_part_e.BashRegexGroup):
1791	part = cast(word_part.BashRegexGroup, UP_part)
1792
1793	part_vals.append(Piece('(', False, False)) # not quoted
1794	if part.child:
1795	self._EvalWordToParts(part.child, part_vals, 0)
1796	part_vals.append(Piece(')', False, False))
1797
1798	elif case(word_part_e.Splice):
1799	part = cast(word_part.Splice, UP_part)
1800	val = self.mem.GetValue(part.var_name)
1801
1802	strs = self.expr_ev.SpliceValue(val, part)
1803	part_vals.append(part_value.Array(strs))
1804
1805	elif case(word_part_e.ExprSub):
1806	part = cast(word_part.ExprSub, UP_part)
1807	part_val = self.expr_ev.EvalExprSub(part)
1808	part_vals.append(part_val)
1809
1810	elif case(word_part_e.ZshVarSub):
1811	part = cast(word_part.ZshVarSub, UP_part)
1812	e_die("ZSH var subs are parsed, but can't be evaluated",
1813	part.left)
1814
1815	else:
1816	raise AssertionError(part.tag())
1817
1818	def _EvalRhsWordToParts(self, w, part_vals, eval_flags=0):
1819	# type: (rhs_word_t, List[part_value_t], int) -> None
1820	quoted = bool(eval_flags & QUOTED)
1821
1822	UP_w = w
1823	with tagswitch(w) as case:
1824	if case(rhs_word_e.Empty):
1825	part_vals.append(Piece('', quoted, not quoted))
1826
1827	elif case(rhs_word_e.Compound):
1828	w = cast(CompoundWord, UP_w)
1829	self._EvalWordToParts(w, part_vals, eval_flags=eval_flags)
1830
1831	else:
1832	raise AssertionError()
1833
1834	def _EvalWordToParts(self, w, part_vals, eval_flags=0):
1835	# type: (CompoundWord, List[part_value_t], int) -> None
1836	"""Helper for EvalRhsWord, EvalWordSequence, etc.
1837
1838	Returns:
1839	Appends to part_vals. Note that this is a TREE.
1840	"""
1841	# Does the word have an extended glob? This is a special case because
1842	# of the way we use glob() and then fnmatch(..., FNM_EXTMATCH) to
1843	# implement extended globs. It's hard to carry that extra information
1844	# all the way past the word splitting stage.
1845
1846	# OSH semantic limitations: If a word has an extended glob part, then
1847	# 1. It can't have an array
1848	# 2. Word splitting of unquoted words isn't respected
1849
1850	word_part_vals = [] # type: List[part_value_t]
1851	has_extglob = False
1852	for p in w.parts:
1853	if p.tag() == word_part_e.ExtGlob:
1854	has_extglob = True
1855	self._EvalWordPart(p, word_part_vals, eval_flags)
1856
1857	# Caller REQUESTED extglob evaluation, AND we parsed word_part.ExtGlob()
1858	if has_extglob:
1859	if bool(eval_flags & EXTGLOB_FILES):
1860	# Treat the WHOLE word as a pattern. We need to TWO VARIANTS of the
1861	# word because of the way we use libc:
1862	# 1. With '*' for extglob parts
1863	# 2. With _EvalExtGlob() for extglob parts
1864
1865	glob_parts = [] # type: List[str]
1866	fnmatch_parts = [] # type: List[str]
1867	self._TranslateExtGlob(word_part_vals, w, glob_parts,
1868	fnmatch_parts)
1869
1870	#log('word_part_vals %s', word_part_vals)
1871	glob_pat = ''.join(glob_parts)
1872	fnmatch_pat = ''.join(fnmatch_parts)
1873	#log("glob %s fnmatch %s", glob_pat, fnmatch_pat)
1874
1875	results = [] # type: List[str]
1876	n = self.globber.ExpandExtended(glob_pat, fnmatch_pat, results)
1877	if n < 0:
1878	raise error.FailGlob(
1879	'Extended glob %r matched no files' % fnmatch_pat, w)
1880
1881	part_vals.append(part_value.Array(results))
1882	elif bool(eval_flags & EXTGLOB_NESTED):
1883	# We only glob at the TOP level of @(nested\|@(pattern))
1884	part_vals.extend(word_part_vals)
1885	else:
1886	# e.g. simple_word_eval, assignment builtin
1887	e_die('Extended glob not allowed in this word', w)
1888	else:
1889	part_vals.extend(word_part_vals)
1890
1891	def _PartValsToString(self, part_vals, w, eval_flags, strs):
1892	# type: (List[part_value_t], CompoundWord, int, List[str]) -> None
1893	"""Helper for EvalWordToString, similar to _ConcatPartVals() above.
1894
1895	Note: arg 'w' could just be a span ID
1896	"""
1897	for part_val in part_vals:
1898	UP_part_val = part_val
1899	with tagswitch(part_val) as case:
1900	if case(part_value_e.String):
1901	part_val = cast(Piece, UP_part_val)
1902	s = part_val.s
1903	if part_val.quoted:
1904	if eval_flags & QUOTE_FNMATCH:
1905	# [[ foo == /"".py ]] or case (.py) or ${x%.py} or ${x//*.py/}
1906	s = glob_.GlobEscape(s)
1907	elif eval_flags & QUOTE_ERE:
1908	s = glob_.ExtendedRegexEscape(s)
1909	strs.append(s)
1910
1911	elif case(part_value_e.Array):
1912	part_val = cast(part_value.Array, UP_part_val)
1913	if self.exec_opts.strict_array():
1914	# Examples: echo f > "$@"; local foo="$@"
1915
1916	# TODO: This attributes too coarsely, to the word rather than the
1917	# parts. Problem: the word is a TREE of parts, but we only have a
1918	# flat list of part_vals. The only case where we really get arrays
1919	# is "$@", "${a[@]}", "${a[@]//pat/replace}", etc.
1920	e_die(
1921	"This word should yield a string, but it contains an array",
1922	w)
1923
1924	# TODO: Maybe add detail like this.
1925	#e_die('RHS of assignment should only have strings. '
1926	# 'To assign arrays, use b=( "${a[@]}" )')
1927	else:
1928	# It appears to not respect IFS
1929	tmp = [s for s in part_val.strs if s is not None]
1930	s = ' '.join(tmp) # TODO: eliminate double join()?
1931	strs.append(s)
1932
1933	elif case(part_value_e.ExtGlob):
1934	part_val = cast(part_value.ExtGlob, UP_part_val)
1935
1936	# Extended globs are only allowed where we expect them!
1937	if not bool(eval_flags & QUOTE_FNMATCH):
1938	e_die('extended glob not allowed in this word', w)
1939
1940	# recursive call
1941	self._PartValsToString(part_val.part_vals, w, eval_flags,
1942	strs)
1943
1944	else:
1945	raise AssertionError()
1946
1947	def EvalWordToString(self, UP_w, eval_flags=0):
1948	# type: (word_t, int) -> value.Str
1949	"""Given a word, return a string.
1950
1951	Flags can contain a quoting algorithm.
1952	"""
1953	assert UP_w.tag() == word_e.Compound, UP_w
1954	w = cast(CompoundWord, UP_w)
1955
1956	if eval_flags == 0: # QUOTE_FNMATCH etc. breaks optimization
1957	fast_str = word_.FastStrEval(w)
1958	if fast_str is not None:
1959	return value.Str(fast_str)
1960
1961	# Could we additionally optimize a=$b, if we know $b isn't an array
1962	# etc.?
1963
1964	# Note: these empty lists are hot in fib benchmark
1965
1966	part_vals = [] # type: List[part_value_t]
1967	for p in w.parts:
1968	# this doesn't use eval_flags, which is slightly confusing
1969	self._EvalWordPart(p, part_vals, 0)
1970
1971	strs = [] # type: List[str]
1972	self._PartValsToString(part_vals, w, eval_flags, strs)
1973	return value.Str(''.join(strs))
1974
1975	def EvalWordToPattern(self, UP_w):
1976	# type: (rhs_word_t) -> Tuple[value.Str, bool]
1977	"""Like EvalWordToString, but returns whether we got ExtGlob."""
1978	if UP_w.tag() == rhs_word_e.Empty:
1979	return value.Str(''), False
1980
1981	assert UP_w.tag() == rhs_word_e.Compound, UP_w
1982	w = cast(CompoundWord, UP_w)
1983
1984	has_extglob = False
1985	part_vals = [] # type: List[part_value_t]
1986	for p in w.parts:
1987	# this doesn't use eval_flags, which is slightly confusing
1988	self._EvalWordPart(p, part_vals, 0)
1989	if p.tag() == word_part_e.ExtGlob:
1990	has_extglob = True
1991
1992	strs = [] # type: List[str]
1993	self._PartValsToString(part_vals, w, QUOTE_FNMATCH, strs)
1994	return value.Str(''.join(strs)), has_extglob
1995
1996	def EvalForPlugin(self, w):
1997	# type: (CompoundWord) -> value.Str
1998	"""Wrapper around EvalWordToString that prevents errors.
1999
2000	Runtime errors like $(( 1 / 0 )) and mutating $? like $(exit 42)
2001	are handled here.
2002
2003	Similar to ExprEvaluator.PluginCall().
2004	"""
2005	with state.ctx_Registers(self.mem): # to "sandbox" $? and $PIPESTATUS
2006	try:
2007	val = self.EvalWordToString(w)
2008	except error.FatalRuntime as e:
2009	val = value.Str('<Runtime error: %s>' % e.UserErrorString())
2010
2011	except (IOError, OSError) as e:
2012	val = value.Str('<I/O error: %s>' % pyutil.strerror(e))
2013
2014	except KeyboardInterrupt:
2015	val = value.Str('<Ctrl-C>')
2016
2017	return val
2018
2019	def EvalRhsWord(self, UP_w):
2020	# type: (rhs_word_t) -> value_t
2021	"""Used for RHS of assignment.
2022
2023	There is no splitting.
2024	"""
2025	if UP_w.tag() == rhs_word_e.Empty:
2026	return value.Str('')
2027
2028	assert UP_w.tag() == word_e.Compound, UP_w
2029	w = cast(CompoundWord, UP_w)
2030
2031	if len(w.parts) == 1:
2032	part0 = w.parts[0]
2033	UP_part0 = part0
2034	tag = part0.tag()
2035	# Special case for a=(1 2). ShArrayLiteral won't appear in words that
2036	# don't look like assignments.
2037	if tag == word_part_e.ShArrayLiteral:
2038	part0 = cast(ShArrayLiteral, UP_part0)
2039	array_words = part0.words
2040	words = braces.BraceExpandWords(array_words)
2041	strs = self.EvalWordSequence(words)
2042	return value.BashArray(strs)
2043
2044	if tag == word_part_e.BashAssocLiteral:
2045	part0 = cast(word_part.BashAssocLiteral, UP_part0)
2046	d = NewDict() # type: Dict[str, str]
2047	for pair in part0.pairs:
2048	k = self.EvalWordToString(pair.key)
2049	v = self.EvalWordToString(pair.value)
2050	d[k.s] = v.s
2051	return value.BashAssoc(d)
2052
2053	# If RHS doesn't look like a=( ... ), then it must be a string.
2054	return self.EvalWordToString(w)
2055
2056	def _EvalWordFrame(self, frame, argv):
2057	# type: (List[Piece], List[str]) -> None
2058	all_empty = True
2059	all_quoted = True
2060	any_quoted = False
2061
2062	#log('--- frame %s', frame)
2063
2064	for piece in frame:
2065	if len(piece.s):
2066	all_empty = False
2067
2068	if piece.quoted:
2069	any_quoted = True
2070	else:
2071	all_quoted = False
2072
2073	# Elision of ${empty}${empty} but not $empty"$empty" or $empty""
2074	if all_empty and not any_quoted:
2075	return
2076
2077	# If every frag is quoted, e.g. "$a$b" or any part in "${a[@]}"x, then
2078	# don't do word splitting or globbing.
2079	if all_quoted:
2080	tmp = [piece.s for piece in frame]
2081	a = ''.join(tmp)
2082	argv.append(a)
2083	return
2084
2085	will_glob = not self.exec_opts.noglob()
2086
2087	if 0:
2088	log('---')
2089	log('FRAME')
2090	for i, piece in enumerate(frame):
2091	log('(%d) %s', i, piece)
2092	log('')
2093
2094	# Array of strings, some of which are BOTH IFS-escaped and GLOB escaped!
2095	frags = [] # type: List[str]
2096	for piece in frame:
2097	if will_glob and piece.quoted:
2098	frag = glob_.GlobEscape(piece.s)
2099	else:
2100	# If we have a literal \, then we turn it into \\\\.
2101	# Splitting takes \\\\ -> \\
2102	# Globbing takes \\ to \ if it doesn't match
2103	frag = _BackslashEscape(piece.s)
2104
2105	if piece.do_split:
2106	frag = _BackslashEscape(frag)
2107	else:
2108	frag = self.splitter.Escape(frag)
2109
2110	frags.append(frag)
2111
2112	if 0:
2113	log('---')
2114	log('FRAGS')
2115	for i, frag in enumerate(frags):
2116	log('(%d) %s', i, frag)
2117	log('')
2118
2119	flat = ''.join(frags)
2120	#log('flat: %r', flat)
2121
2122	args = self.splitter.SplitForWordEval(flat)
2123
2124	# space=' '; argv $space"". We have a quoted part, but we CANNOT elide.
2125	# Add it back and don't bother globbing.
2126	if len(args) == 0 and any_quoted:
2127	argv.append('')
2128	return
2129
2130	#log('split args: %r', args)
2131	for a in args:
2132	if glob_.LooksLikeGlob(a):
2133	n = self.globber.Expand(a, argv)
2134	if n < 0:
2135	# TODO: location info, with span IDs carried through the frame
2136	raise error.FailGlob('Pattern %r matched no files' % a,
2137	loc.Missing)
2138	else:
2139	argv.append(glob_.GlobUnescape(a))
2140
2141	def _EvalWordToArgv(self, w):
2142	# type: (CompoundWord) -> List[str]
2143	"""Helper for _EvalAssignBuiltin.
2144
2145	Splitting and globbing are disabled for assignment builtins.
2146
2147	Example: declare -"${a[@]}" b=(1 2)
2148	where a is [x b=a d=a]
2149	"""
2150	part_vals = [] # type: List[part_value_t]
2151	self._EvalWordToParts(w, part_vals, 0) # not double quoted
2152	frames = _MakeWordFrames(part_vals)
2153	argv = [] # type: List[str]
2154	for frame in frames:
2155	if len(frame): # empty array gives empty frame!
2156	tmp = [piece.s for piece in frame]
2157	argv.append(''.join(tmp)) # no split or glob
2158	#log('argv: %s', argv)
2159	return argv
2160
2161	def _EvalAssignBuiltin(self, builtin_id, arg0, words, meta_offset):
2162	# type: (builtin_t, str, List[CompoundWord], int) -> cmd_value.Assign
2163	"""Handles both static and dynamic assignment, e.g.
2164
2165	x='foo=bar'
2166	local a=(1 2) $x
2167
2168	Grammar:
2169
2170	('builtin' \| 'command')* keyword flag* pair*
2171	flag = [-+].*
2172
2173	There is also command -p, but we haven't implemented it. Maybe just
2174	punt on it.
2175	"""
2176	eval_to_pairs = True # except for -f and -F
2177	started_pairs = False
2178
2179	flags = [arg0] # initial flags like -p, and -f -F name1 name2
2180	flag_locs = [words[0]]
2181	assign_args = [] # type: List[AssignArg]
2182
2183	n = len(words)
2184	for i in xrange(meta_offset + 1, n): # skip first word
2185	w = words[i]
2186
2187	if word_.IsVarLike(w):
2188	started_pairs = True # Everything from now on is an assign_pair
2189
2190	if started_pairs:
2191	left_token, close_token, part_offset = word_.DetectShAssignment(
2192	w)
2193	if left_token: # Detected statically
2194	if left_token.id != Id.Lit_VarLike:
2195	# (not guaranteed since started_pairs is set twice)
2196	e_die('LHS array not allowed in assignment builtin', w)
2197
2198	if lexer.IsPlusEquals(left_token):
2199	var_name = lexer.TokenSliceRight(left_token, -2)
2200	append = True
2201	else:
2202	var_name = lexer.TokenSliceRight(left_token, -1)
2203	append = False
2204
2205	if part_offset == len(w.parts):
2206	rhs = rhs_word.Empty # type: rhs_word_t
2207	else:
2208	# tmp is for intersection of C++/MyPy type systems
2209	tmp = CompoundWord(w.parts[part_offset:])
2210	word_.TildeDetectAssign(tmp)
2211	rhs = tmp
2212
2213	with state.ctx_AssignBuiltin(self.mutable_opts):
2214	right = self.EvalRhsWord(rhs)
2215
2216	arg2 = AssignArg(var_name, right, append, w)
2217	assign_args.append(arg2)
2218
2219	else: # e.g. export $dynamic
2220	argv = self._EvalWordToArgv(w)
2221	for arg in argv:
2222	arg2 = _SplitAssignArg(arg, w)
2223	assign_args.append(arg2)
2224
2225	else:
2226	argv = self._EvalWordToArgv(w)
2227	for arg in argv:
2228	if arg.startswith('-') or arg.startswith('+'):
2229	# e.g. declare -r +r
2230	flags.append(arg)
2231	flag_locs.append(w)
2232
2233	# Shortcut that relies on -f and -F always meaning "function" for
2234	# all assignment builtins
2235	if 'f' in arg or 'F' in arg:
2236	eval_to_pairs = False
2237
2238	else: # e.g. export $dynamic
2239	if eval_to_pairs:
2240	arg2 = _SplitAssignArg(arg, w)
2241	assign_args.append(arg2)
2242	started_pairs = True
2243	else:
2244	flags.append(arg)
2245
2246	return cmd_value.Assign(builtin_id, flags, flag_locs, assign_args)
2247
2248	def _DetectAssignBuiltinStr(self, arg0, words, meta_offset):
2249	# type: (str, List[CompoundWord], int) -> Optional[cmd_value.Assign]
2250	builtin_id = consts.LookupAssignBuiltin(arg0)
2251	if builtin_id != consts.NO_INDEX:
2252	return self._EvalAssignBuiltin(builtin_id, arg0, words,
2253	meta_offset)
2254	return None
2255
2256	def _DetectAssignBuiltin(self, val0, words, meta_offset):
2257	# type: (part_value_t, List[CompoundWord], int) -> Optional[cmd_value.Assign]
2258	UP_val0 = val0
2259	if val0.tag() == part_value_e.String:
2260	val0 = cast(Piece, UP_val0)
2261	if not val0.quoted:
2262	return self._DetectAssignBuiltinStr(val0.s, words, meta_offset)
2263	return None
2264
2265	def SimpleEvalWordSequence2(self, words, is_last_cmd, allow_assign):
2266	# type: (List[CompoundWord], bool, bool) -> cmd_value_t
2267	"""Simple word evaluation for YSH."""
2268	strs = [] # type: List[str]
2269	locs = [] # type: List[CompoundWord]
2270
2271	meta_offset = 0
2272	for i, w in enumerate(words):
2273	# No globbing in the first arg for command.Simple.
2274	if i == meta_offset and allow_assign:
2275	strs0 = self._EvalWordToArgv(w)
2276	# TODO: Remove this because YSH will disallow assignment
2277	# builtins? (including export?)
2278	if len(strs0) == 1:
2279	cmd_val = self._DetectAssignBuiltinStr(
2280	strs0[0], words, meta_offset)
2281	if cmd_val:
2282	return cmd_val
2283
2284	strs.extend(strs0)
2285	for _ in strs0:
2286	locs.append(w)
2287	continue
2288
2289	if glob_.LooksLikeStaticGlob(w):
2290	val = self.EvalWordToString(w) # respects strict-array
2291	num_appended = self.globber.Expand(val.s, strs)
2292	if num_appended < 0:
2293	raise error.FailGlob('Pattern %r matched no files' % val.s,
2294	w)
2295	for _ in xrange(num_appended):
2296	locs.append(w)
2297	continue
2298
2299	part_vals = [] # type: List[part_value_t]
2300	self._EvalWordToParts(w, part_vals, 0) # not quoted
2301
2302	if 0:
2303	log('')
2304	log('Static: part_vals after _EvalWordToParts:')
2305	for entry in part_vals:
2306	log(' %s', entry)
2307
2308	# Still need to process
2309	frames = _MakeWordFrames(part_vals)
2310
2311	if 0:
2312	log('')
2313	log('Static: frames after _MakeWordFrames:')
2314	for entry in frames:
2315	log(' %s', entry)
2316
2317	# We will still allow x"${a[@]"x, though it's deprecated by @a, which
2318	# disallows such expressions at parse time.
2319	for frame in frames:
2320	if len(frame): # empty array gives empty frame!
2321	tmp = [piece.s for piece in frame]
2322	strs.append(''.join(tmp)) # no split or glob
2323	locs.append(w)
2324
2325	assert len(strs) == len(locs), '%s vs. %d' % (strs, len(locs))
2326	return cmd_value.Argv(strs, locs, is_last_cmd, None, None)
2327
2328	def EvalWordSequence2(self, words, is_last_cmd, allow_assign=False):
2329	# type: (List[CompoundWord], bool, bool) -> cmd_value_t
2330	"""Turns a list of Words into a list of strings.
2331
2332	Unlike the EvalWord*() methods, it does globbing.
2333
2334	Args:
2335	allow_assign: True for command.Simple, False for BashArray a=(1 2 3)
2336	"""
2337	if self.exec_opts.simple_word_eval():
2338	return self.SimpleEvalWordSequence2(words, is_last_cmd,
2339	allow_assign)
2340
2341	# Parse time:
2342	# 1. brace expansion. TODO: Do at parse time.
2343	# 2. Tilde detection. DONE at parse time. Only if Id.Lit_Tilde is the
2344	# first WordPart.
2345	#
2346	# Run time:
2347	# 3. tilde sub, var sub, command sub, arith sub. These are all
2348	# "concurrent" on WordParts. (optional process sub with <() )
2349	# 4. word splitting. Can turn this off with a shell option? Definitely
2350	# off for oil.
2351	# 5. globbing -- several exec_opts affect this: nullglob, safeglob, etc.
2352
2353	#log('W %s', words)
2354	strs = [] # type: List[str]
2355	locs = [] # type: List[CompoundWord]
2356
2357	# 0 for declare x
2358	# 1 for builtin declare x
2359	# 2 for command builtin declare x
2360	# etc.
2361	meta_offset = 0
2362
2363	n = 0
2364	for i, w in enumerate(words):
2365	fast_str = word_.FastStrEval(w)
2366	if fast_str is not None:
2367	strs.append(fast_str)
2368	locs.append(w)
2369
2370	# e.g. the 'local' in 'local a=b c=d' will be here
2371	if allow_assign and i == meta_offset:
2372	cmd_val = self._DetectAssignBuiltinStr(
2373	fast_str, words, meta_offset)
2374	if cmd_val:
2375	return cmd_val
2376
2377	if i <= meta_offset and _DetectMetaBuiltinStr(fast_str):
2378	meta_offset += 1
2379
2380	# Bug fix: n must be updated on every loop iteration
2381	n = len(strs)
2382	assert len(strs) == len(locs), strs
2383	continue
2384
2385	part_vals = [] # type: List[part_value_t]
2386	self._EvalWordToParts(w, part_vals, EXTGLOB_FILES)
2387
2388	# DYNAMICALLY detect if we're going to run an assignment builtin, and
2389	# change the rest of the evaluation algorithm if so.
2390	#
2391	# We want to allow:
2392	# e=export
2393	# $e foo=bar
2394	#
2395	# But we don't want to evaluate the first word twice in the case of:
2396	# $(some-command) --flag
2397	if len(part_vals) == 1:
2398	if allow_assign and i == meta_offset:
2399	cmd_val = self._DetectAssignBuiltin(
2400	part_vals[0], words, meta_offset)
2401	if cmd_val:
2402	return cmd_val
2403
2404	if i <= meta_offset and _DetectMetaBuiltin(part_vals[0]):
2405	meta_offset += 1
2406
2407	if 0:
2408	log('')
2409	log('part_vals after _EvalWordToParts:')
2410	for entry in part_vals:
2411	log(' %s', entry)
2412
2413	frames = _MakeWordFrames(part_vals)
2414	if 0:
2415	log('')
2416	log('frames after _MakeWordFrames:')
2417	for entry in frames:
2418	log(' %s', entry)
2419
2420	# Do splitting and globbing. Each frame will append zero or more args.
2421	for frame in frames:
2422	self._EvalWordFrame(frame, strs)
2423
2424	# Fill in locations parallel to strs.
2425	n_next = len(strs)
2426	for _ in xrange(n_next - n):
2427	locs.append(w)
2428	n = n_next
2429
2430	# A non-assignment command.
2431	# NOTE: Can't look up builtins here like we did for assignment, because
2432	# functions can override builtins.
2433	assert len(strs) == len(locs), '%s vs. %d' % (strs, len(locs))
2434	return cmd_value.Argv(strs, locs, is_last_cmd, None, None)
2435
2436	def EvalWordSequence(self, words):
2437	# type: (List[CompoundWord]) -> List[str]
2438	"""For arrays and for loops.
2439
2440	They don't allow assignment builtins.
2441	"""
2442	# is_last_cmd is irrelevant
2443	cmd_val = self.EvalWordSequence2(words, False)
2444	assert cmd_val.tag() == cmd_value_e.Argv
2445	return cast(cmd_value.Argv, cmd_val).argv
2446
2447
2448	class NormalWordEvaluator(AbstractWordEvaluator):
2449
2450	def __init__(
2451	self,
2452	mem, # type: state.Mem
2453	exec_opts, # type: optview.Exec
2454	mutable_opts, # type: state.MutableOpts
2455	tilde_ev, # type: TildeEvaluator
2456	splitter, # type: SplitContext
2457	errfmt, # type: ui.ErrorFormatter
2458	):
2459	# type: (...) -> None
2460	AbstractWordEvaluator.__init__(self, mem, exec_opts, mutable_opts,
2461	tilde_ev, splitter, errfmt)
2462	self.shell_ex = None # type: _Executor
2463
2464	def CheckCircularDeps(self):
2465	# type: () -> None
2466	assert self.arith_ev is not None
2467	# Disabled for pure OSH
2468	#assert self.expr_ev is not None
2469	assert self.shell_ex is not None
2470	assert self.prompt_ev is not None
2471
2472	def _EvalCommandSub(self, cs_part, quoted):
2473	# type: (CommandSub, bool) -> part_value_t
2474	stdout_str = self.shell_ex.RunCommandSub(cs_part)
2475
2476	if cs_part.left_token.id == Id.Left_AtParen:
2477	# YSH splitting algorithm: does not depend on IFS
2478	try:
2479	strs = j8.SplitJ8Lines(stdout_str)
2480	except error.Decode as e:
2481	# status code 4 is special, for encode/decode errors.
2482	raise error.Structured(4, e.Message(), cs_part.left_token)
2483
2484	#strs = self.splitter.SplitForWordEval(stdout_str)
2485	return part_value.Array(strs)
2486	else:
2487	return Piece(stdout_str, quoted, not quoted)
2488
2489	def _EvalProcessSub(self, cs_part):
2490	# type: (CommandSub) -> Piece
2491	dev_path = self.shell_ex.RunProcessSub(cs_part)
2492	# pretend it's quoted; no split or glob
2493	return Piece(dev_path, True, False)
2494
2495
2496	_DUMMY = '__NO_COMMAND_SUB__'
2497
2498
2499	class CompletionWordEvaluator(AbstractWordEvaluator):
2500	"""An evaluator that has no access to an executor.
2501
2502	NOTE: core/completion.py doesn't actually try to use these strings to
2503	complete. If you have something like 'echo $(echo hi)/f<TAB>', it sees the
2504	inner command as the last one, and knows that it is not at the end of the
2505	line.
2506	"""
2507
2508	def __init__(
2509	self,
2510	mem, # type: state.Mem
2511	exec_opts, # type: optview.Exec
2512	mutable_opts, # type: state.MutableOpts
2513	tilde_ev, # type: TildeEvaluator
2514	splitter, # type: SplitContext
2515	errfmt, # type: ui.ErrorFormatter
2516	):
2517	# type: (...) -> None
2518	AbstractWordEvaluator.__init__(self, mem, exec_opts, mutable_opts,
2519	tilde_ev, splitter, errfmt)
2520
2521	def CheckCircularDeps(self):
2522	# type: () -> None
2523	assert self.prompt_ev is not None
2524	assert self.arith_ev is not None
2525	assert self.expr_ev is not None
2526
2527	def _EvalCommandSub(self, cs_part, quoted):
2528	# type: (CommandSub, bool) -> part_value_t
2529	if cs_part.left_token.id == Id.Left_AtParen:
2530	return part_value.Array([_DUMMY])
2531	else:
2532	return Piece(_DUMMY, quoted, not quoted)
2533
2534	def _EvalProcessSub(self, cs_part):
2535	# type: (CommandSub) -> Piece
2536	# pretend it's quoted; no split or glob
2537	return Piece('__NO_PROCESS_SUB__', True, False)
2538
2539
2540	# vim: sw=4