osh/word_eval.py

OILS / osh / word_eval.py View on Github | oils.pub

2542 lines, 1548 significant

1	"""
2	word_eval.py - Evaluator for the word language.
3	"""
4
5	from _devbuild.gen.id_kind_asdl import Id, Kind, Kind_str
6	from _devbuild.gen.syntax_asdl import (
7	Token,
8	SimpleVarSub,
9	loc,
10	loc_t,
11	BracedVarSub,
12	CommandSub,
13	bracket_op,
14	bracket_op_e,
15	suffix_op,
16	suffix_op_e,
17	ShArrayLiteral,
18	SingleQuoted,
19	DoubleQuoted,
20	word_e,
21	word_t,
22	CompoundWord,
23	rhs_word,
24	rhs_word_e,
25	rhs_word_t,
26	word_part,
27	word_part_e,
28	)
29	from _devbuild.gen.runtime_asdl import (
30	part_value,
31	part_value_e,
32	part_value_t,
33	cmd_value,
34	cmd_value_e,
35	cmd_value_t,
36	error_code_e,
37	AssignArg,
38	a_index,
39	a_index_e,
40	VTestPlace,
41	VarSubState,
42	Piece,
43	)
44	from _devbuild.gen.option_asdl import option_i, builtin_i
45	from _devbuild.gen.value_asdl import (
46	value,
47	value_e,
48	value_t,
49	sh_lvalue,
50	sh_lvalue_t,
51	)
52	from core import bash_impl
53	from core import error
54	from core import pyos
55	from core import pyutil
56	from core import state
57	from display import ui
58	from core import util
59	from data_lang import j8
60	from data_lang import j8_lite
61	from core.error import e_die
62	from frontend import consts
63	from frontend import lexer
64	from frontend import location
65	from mycpp import mops
66	from mycpp.mylib import log, tagswitch, NewDict
67	from osh import braces
68	from osh import glob_
69	from osh import string_ops
70	from osh import word_
71	from ysh import expr_eval
72	from ysh import val_ops
73
74	from typing import Optional, Tuple, List, Dict, cast, TYPE_CHECKING
75
76	if TYPE_CHECKING:
77	from _devbuild.gen.syntax_asdl import word_part_t
78	from _devbuild.gen.option_asdl import builtin_t
79	from core import optview
80	from core.state import Mem
81	from core.vm import _Executor
82	from osh.split import SplitContext
83	from osh import prompt
84	from osh import sh_expr_eval
85
86	# Flags for _EvalWordToParts and _EvalWordPart (not all are used for both)
87	QUOTED = 1 << 0
88	IS_SUBST = 1 << 1
89
90	EXTGLOB_FILES = 1 << 2 # allow @(cc) from file system?
91	EXTGLOB_MATCH = 1 << 3 # allow @(cc) in pattern matching?
92	EXTGLOB_NESTED = 1 << 4 # for @(one\|!(two\|three))
93
94	# For EvalWordToString
95	QUOTE_FNMATCH = 1 << 5
96	QUOTE_ERE = 1 << 6
97
98	# For compatibility, ${BASH_SOURCE} and ${BASH_SOURCE[@]} are both valid.
99	# Ditto for ${FUNCNAME} and ${BASH_LINENO}.
100	_STRING_AND_ARRAY = ['BASH_SOURCE', 'FUNCNAME', 'BASH_LINENO']
101
102
103	def ShouldArrayDecay(var_name, exec_opts, is_plain_var_sub=True):
104	# type: (str, optview.Exec, bool) -> bool
105	"""Return whether we should allow ${a} to mean ${a[0]}."""
106	return (not exec_opts.strict_array() or
107	is_plain_var_sub and var_name in _STRING_AND_ARRAY)
108
109
110	def DecayArray(val):
111	# type: (value_t) -> value_t
112	"""Resolve ${array} to ${array[0]}."""
113	if val.tag() == value_e.BashArray:
114	array_val = cast(value.BashArray, val)
115	s, error_code = bash_impl.BashArray_GetElement(array_val, 0)
116
117	# Note: index 0 should never cause the out-of-bound index error.
118	assert error_code == error_code_e.OK
119
120	elif val.tag() == value_e.BashAssoc:
121	assoc_val = cast(value.BashAssoc, val)
122	s = bash_impl.BashAssoc_GetElement(assoc_val, '0')
123	else:
124	raise AssertionError(val.tag())
125
126	if s is None:
127	return value.Undef
128	else:
129	return value.Str(s)
130
131
132	def _DetectMetaBuiltinStr(s):
133	# type: (str) -> bool
134	"""
135	We need to detect all of these cases:
136
137	builtin local
138	command local
139	builtin builtin local
140	builtin command local
141
142	Fundamentally, assignment builtins have different WORD EVALUATION RULES
143	for a=$x (no word splitting), so it seems hard to do this in
144	meta_oils.Builtin() or meta_oils.Command()
145	"""
146	return (consts.LookupNormalBuiltin(s)
147	in (builtin_i.builtin, builtin_i.command))
148
149
150	def _DetectMetaBuiltin(val0):
151	# type: (part_value_t) -> bool
152	UP_val0 = val0
153	if val0.tag() == part_value_e.String:
154	val0 = cast(Piece, UP_val0)
155	if not val0.quoted:
156	return _DetectMetaBuiltinStr(val0.s)
157	return False
158
159
160	def _SplitAssignArg(arg, blame_word):
161	# type: (str, CompoundWord) -> AssignArg
162	"""Dynamically parse argument to declare, export, etc.
163
164	This is a fallback to the static parsing done below.
165	"""
166	# Note: it would be better to cache regcomp(), but we don't have an API for
167	# that, and it probably isn't a bottleneck now
168	m = util.RegexSearch(consts.ASSIGN_ARG_RE, arg)
169	if m is None:
170	e_die("Assignment builtin expected NAME=value, got %r" % arg,
171	blame_word)
172
173	var_name = m[1]
174	# m[2] is used for grouping; ERE doesn't have non-capturing groups
175
176	op = m[3]
177	assert op is not None, op
178	if len(op): # declare NAME=
179	val = value.Str(m[4]) # type: Optional[value_t]
180	append = op[0] == '+'
181	else: # declare NAME
182	val = None # no operator
183	append = False
184
185	return AssignArg(var_name, val, append, blame_word)
186
187
188	# NOTE: Could be done with util.BackslashEscape like glob_.GlobEscape().
189	def _BackslashEscape(s):
190	# type: (str) -> str
191	"""Double up backslashes.
192
193	Useful for strings about to be globbed and strings about to be IFS
194	escaped.
195	"""
196	return s.replace('\\', '\\\\')
197
198
199	def _ValueToPartValue(val, quoted, part_loc):
200	# type: (value_t, bool, word_part_t) -> part_value_t
201	"""Helper for VarSub evaluation.
202
203	Called by _EvalBracedVarSub and _EvalWordPart for SimpleVarSub.
204	"""
205	UP_val = val
206
207	with tagswitch(val) as case:
208	if case(value_e.Undef):
209	# This happens in the case of ${undef+foo}. We skipped _ProcessUndef,
210	# but we have to append to the empty string.
211	return Piece('', quoted, not quoted)
212
213	elif case(value_e.Str):
214	val = cast(value.Str, UP_val)
215	return Piece(val.s, quoted, not quoted)
216
217	elif case(value_e.BashArray):
218	val = cast(value.BashArray, UP_val)
219	return part_value.Array(bash_impl.BashArray_GetValues(val))
220
221	elif case(value_e.BashAssoc):
222	val = cast(value.BashAssoc, UP_val)
223	# bash behavior: splice values!
224	return part_value.Array(bash_impl.BashAssoc_GetValues(val))
225
226	# Cases added for YSH
227	# value_e.List is also here - we use val_ops.Stringify()s err message
228	elif case(value_e.Null, value_e.Bool, value_e.Int, value_e.Float,
229	value_e.Eggex, value_e.List):
230	s = val_ops.Stringify(val, loc.WordPart(part_loc), 'Word eval ')
231	return Piece(s, quoted, not quoted)
232
233	else:
234	raise error.TypeErr(val, "Can't substitute into word",
235	loc.WordPart(part_loc))
236
237	raise AssertionError('for -Wreturn-type in C++')
238
239
240	def _MakeWordFrames(part_vals):
241	# type: (List[part_value_t]) -> List[List[Piece]]
242	"""A word evaluates to a flat list of part_value (String or Array). frame
243	is a portion that results in zero or more args. It can never be joined.
244	This idea exists because of arrays like "$@" and "${a[@]}".
245
246	Example:
247
248	a=(1 '2 3' 4)
249	x=x
250	y=y
251
252	# This word
253	$x"${a[@]}"$y
254
255	# Results in Three frames:
256	[ ('x', False, True), ('1', True, False) ]
257	[ ('2 3', True, False) ]
258	[ ('4', True, False), ('y', False, True) ]
259
260	Note: A frame is a 3-tuple that's identical to Piece()? Maybe we
261	should make that top level type.
262
263	TODO:
264	- Instead of List[List[Piece]], where List[Piece] is a Frame
265	- Change this representation to
266	Frames = (List[Piece] pieces, List[int] break_indices)
267	# where break_indices are the end
268
269	Consider a common case like "$x" or "${x}" - I think this a lot more
270	efficient?
271
272	And then change _EvalWordFrame(pieces: List[Piece], start: int, end: int)
273	"""
274	current = [] # type: List[Piece]
275	frames = [current]
276
277	for p in part_vals:
278	UP_p = p
279
280	with tagswitch(p) as case:
281	if case(part_value_e.String):
282	p = cast(Piece, UP_p)
283	current.append(p)
284
285	elif case(part_value_e.Array):
286	p = cast(part_value.Array, UP_p)
287
288	is_first = True
289	for s in p.strs:
290	if s is None:
291	continue # ignore undefined array entries
292
293	# Arrays parts are always quoted; otherwise they would have decayed to
294	# a string.
295	piece = Piece(s, True, False)
296	if is_first:
297	current.append(piece)
298	is_first = False
299	else:
300	current = [piece]
301	frames.append(current) # singleton frame
302
303	else:
304	raise AssertionError()
305
306	return frames
307
308
309	# TODO: This could be _MakeWordFrames and then sep.join(). It's redundant.
310	def _DecayPartValuesToString(part_vals, join_char):
311	# type: (List[part_value_t], str) -> str
312	# Decay ${a=x"$@"x} to string.
313	out = [] # type: List[str]
314	for p in part_vals:
315	UP_p = p
316	with tagswitch(p) as case:
317	if case(part_value_e.String):
318	p = cast(Piece, UP_p)
319	out.append(p.s)
320	elif case(part_value_e.Array):
321	p = cast(part_value.Array, UP_p)
322	# TODO: Eliminate double join for speed?
323	tmp = [s for s in p.strs if s is not None]
324	out.append(join_char.join(tmp))
325	else:
326	raise AssertionError()
327	return ''.join(out)
328
329
330	def _PerformSlice(
331	val, # type: value_t
332	offset, # type: mops.BigInt
333	length, # type: int
334	has_length, # type: bool
335	part, # type: BracedVarSub
336	arg0_val, # type: value.Str
337	):
338	# type: (...) -> value_t
339	UP_val = val
340	with tagswitch(val) as case:
341	if case(value_e.Str): # Slice UTF-8 characters in a string.
342	val = cast(value.Str, UP_val)
343	s = val.s
344	n = len(s)
345
346	begin = mops.BigTruncate(offset)
347	if begin < 0: # Compute offset with unicode
348	byte_begin = n
349	num_iters = -begin
350	for _ in xrange(num_iters):
351	byte_begin = string_ops.PreviousUtf8Char(s, byte_begin)
352	else:
353	byte_begin = string_ops.AdvanceUtf8Chars(s, begin, 0)
354
355	if has_length:
356	if length < 0: # Compute offset with unicode
357	# Confusing: this is a POSITION
358	byte_end = n
359	num_iters = -length
360	for _ in xrange(num_iters):
361	byte_end = string_ops.PreviousUtf8Char(s, byte_end)
362	else:
363	byte_end = string_ops.AdvanceUtf8Chars(
364	s, length, byte_begin)
365	else:
366	byte_end = len(s)
367
368	substr = s[byte_begin:byte_end]
369	result = value.Str(substr) # type: value_t
370
371	elif case(value_e.BashArray,
372	value_e.SparseArray): # Slice array entries.
373	# NOTE: This error is ALWAYS fatal in bash. It's inconsistent with
374	# strings.
375	if has_length and length < 0:
376	e_die("Array slice can't have negative length: %d" % length,
377	loc.WordPart(part))
378
379	if bash_impl.BigInt_Less(offset, mops.ZERO):
380	# ${@:-3} starts counts from the end
381	if val.tag() == value_e.BashArray:
382	val = cast(value.BashArray, UP_val)
383	array_length = mops.IntWiden(
384	bash_impl.BashArray_Length(val))
385	elif val.tag() == value_e.SparseArray:
386	val = cast(value.SparseArray, UP_val)
387	array_length = bash_impl.SparseArray_Length(val)
388	else:
389	raise AssertionError()
390
391	# The array length counts $0 for $@ and $*
392	if arg0_val is not None:
393	array_length = mops.Add(array_length, mops.ONE)
394
395	offset = mops.Add(offset, array_length)
396
397	if bash_impl.BigInt_Less(offset, mops.ZERO):
398	strs = [] # type: List[str]
399	else:
400	# Quirk: "offset" for positional arguments ($@ and $*) counts $0.
401	prepends_arg0 = False
402	if arg0_val is not None:
403	if bash_impl.BigInt_Greater(offset, mops.ZERO):
404	offset = mops.Sub(offset, mops.ONE)
405	elif not has_length or length >= 1:
406	prepends_arg0 = True
407	length = length - 1
408
409	if has_length and length == 0:
410	strs = []
411
412	elif val.tag() == value_e.BashArray:
413	val = cast(value.BashArray, UP_val)
414	orig = bash_impl.BashArray_GetValues(val)
415	n = len(orig)
416
417	strs = []
418	i = mops.BigTruncate(offset)
419	count = 0
420	while i < n:
421	if has_length and count == length: # length could be 0
422	break
423	s = orig[i]
424	if s is not None: # Unset elements don't count towards the length
425	strs.append(s)
426	count += 1
427	i += 1
428
429	elif val.tag() == value_e.SparseArray:
430	val = cast(value.SparseArray, UP_val)
431
432	# TODO: We may optimize this by finding the first index
433	# using the binary search. Furthermore, the sorting by
434	# SparseArray_GetKeys can be replaced with the heap sort so
435	# that we only extract the first LENGTH elements of the
436	# indices greater or equal to OFFSET.
437	i = 0
438	for index in bash_impl.SparseArray_GetKeys(val):
439	if bash_impl.BigInt_GreaterEq(index, offset):
440	break
441	i = i + 1
442
443	if has_length:
444	strs = bash_impl.SparseArray_GetValues(val)[i:i +
445	length]
446	else:
447	strs = bash_impl.SparseArray_GetValues(val)[i:]
448
449	else:
450	raise AssertionError()
451
452	if prepends_arg0:
453	new_list = [arg0_val.s]
454	new_list.extend(strs)
455	strs = new_list
456
457	result = value.BashArray(strs)
458
459	elif case(value_e.BashAssoc):
460	e_die("Can't slice associative arrays", loc.WordPart(part))
461
462	else:
463	raise error.TypeErr(val, 'Slice op expected Str or BashArray',
464	loc.WordPart(part))
465
466	return result
467
468
469	class StringWordEvaluator(object):
470	"""Interface used by ArithEvaluator / BoolEvaluator"""
471
472	def __init__(self):
473	# type: () -> None
474	"""Empty constructor for mycpp."""
475	pass
476
477	def EvalWordToString(self, w, eval_flags=0):
478	# type: (word_t, int) -> value.Str
479	raise NotImplementedError()
480
481
482	def _GetDollarHyphen(exec_opts):
483	# type: (optview.Exec) -> str
484	chars = [] # type: List[str]
485	if exec_opts.interactive():
486	chars.append('i')
487
488	if exec_opts.errexit():
489	chars.append('e')
490	if exec_opts.noglob():
491	chars.append('f')
492	if exec_opts.noexec():
493	chars.append('n')
494	if exec_opts.nounset():
495	chars.append('u')
496	# NO letter for pipefail?
497	if exec_opts.xtrace():
498	chars.append('x')
499	if exec_opts.noclobber():
500	chars.append('C')
501
502	# bash has:
503	# - c for sh -c, i for sh -i (mksh also has this)
504	# - h for hashing (mksh also has this)
505	# - B for brace expansion
506	return ''.join(chars)
507
508
509	class TildeEvaluator(object):
510
511	def __init__(self, mem, exec_opts):
512	# type: (Mem, optview.Exec) -> None
513	self.mem = mem
514	self.exec_opts = exec_opts
515
516	def GetMyHomeDir(self):
517	# type: () -> Optional[str]
518	"""Consult $HOME first, and then make a libc call.
519
520	Important: the libc call can FAIL, which is why we prefer $HOME. See issue
521	#1578.
522	"""
523	# First look up the HOME var, ENV.HOME, ...
524	s = self.mem.env_config.Get('HOME')
525	if s is not None:
526	return s
527
528	# Then ask the OS. This is what bash does.
529	return pyos.GetMyHomeDir()
530
531	def Eval(self, part):
532	# type: (word_part.TildeSub) -> str
533	"""Evaluates ~ and ~user, given a Lit_TildeLike token."""
534
535	if part.user_name is None:
536	result = self.GetMyHomeDir()
537	else:
538	result = pyos.GetHomeDir(part.user_name)
539
540	if result is None:
541	if self.exec_opts.strict_tilde():
542	e_die("Error expanding tilde (e.g. invalid user)", part.left)
543	else:
544	# Return ~ or ~user literally
545	result = '~'
546	if part.user_name is not None:
547	result = result + part.user_name # mycpp doesn't have +=
548
549	return result
550
551
552	class AbstractWordEvaluator(StringWordEvaluator):
553	"""Abstract base class for word evaluators.
554
555	Public entry points:
556	EvalWordToString EvalForPlugin EvalRhsWord
557	EvalWordSequence EvalWordSequence2
558	"""
559
560	def __init__(
561	self,
562	mem, # type: state.Mem
563	exec_opts, # type: optview.Exec
564	mutable_opts, # type: state.MutableOpts
565	tilde_ev, # type: TildeEvaluator
566	splitter, # type: SplitContext
567	errfmt, # type: ui.ErrorFormatter
568	):
569	# type: (...) -> None
570	self.arith_ev = None # type: sh_expr_eval.ArithEvaluator
571	self.expr_ev = None # type: expr_eval.ExprEvaluator
572	self.prompt_ev = None # type: prompt.Evaluator
573
574	self.unsafe_arith = None # type: sh_expr_eval.UnsafeArith
575
576	self.tilde_ev = tilde_ev
577
578	self.mem = mem # for $HOME, $1, etc.
579	self.exec_opts = exec_opts # for nounset
580	self.mutable_opts = mutable_opts # for _allow_command_sub
581	self.splitter = splitter
582	self.errfmt = errfmt
583
584	self.globber = glob_.Globber(exec_opts)
585
586	def CheckCircularDeps(self):
587	# type: () -> None
588	raise NotImplementedError()
589
590	def _EvalCommandSub(self, cs_part, quoted):
591	# type: (CommandSub, bool) -> part_value_t
592	"""Abstract since it has a side effect."""
593	raise NotImplementedError()
594
595	def _EvalProcessSub(self, cs_part):
596	# type: (CommandSub) -> part_value_t
597	"""Abstract since it has a side effect."""
598	raise NotImplementedError()
599
600	def _EvalVarNum(self, var_num):
601	# type: (int) -> value_t
602	assert var_num >= 0
603	return self.mem.GetArgNum(var_num)
604
605	def _EvalSpecialVar(self, op_id, quoted, vsub_state):
606	# type: (int, bool, VarSubState) -> value_t
607	"""Evaluate $?
608
609	and so forth
610	"""
611	# $@ is special -- it need to know whether it is in a double quoted
612	# context.
613	#
614	# - If it's $@ in a double quoted context, return an ARRAY.
615	# - If it's $@ in a normal context, return a STRING, which then will be
616	# subject to splitting.
617
618	if op_id in (Id.VSub_At, Id.VSub_Star):
619	argv = self.mem.GetArgv()
620	val = value.BashArray(argv) # type: value_t
621	if op_id == Id.VSub_At:
622	# "$@" evaluates to an array, $@ should be decayed
623	vsub_state.join_array = not quoted
624	else: # $* "$*" are both decayed
625	vsub_state.join_array = True
626
627	elif op_id == Id.VSub_Hyphen:
628	val = value.Str(_GetDollarHyphen(self.exec_opts))
629
630	else:
631	val = self.mem.GetSpecialVar(op_id)
632
633	return val
634
635	def _ApplyTestOp(
636	self,
637	val, # type: value_t
638	op, # type: suffix_op.Unary
639	quoted, # type: bool
640	part_vals, # type: Optional[List[part_value_t]]
641	vtest_place, # type: VTestPlace
642	blame_token, # type: Token
643	):
644	# type: (...) -> bool
645	"""
646	Returns:
647	Whether part_vals was mutated
648
649	${a:-} returns part_value[]
650	${a:+} returns part_value[]
651	${a:?error} returns error word?
652	${a:=} returns part_value[] but also needs self.mem for side effects.
653
654	So I guess it should return part_value[], and then a flag for raising an
655	error, and then a flag for assigning it?
656	The original BracedVarSub will have the name.
657
658	Example of needing multiple part_value[]
659
660	echo X-${a:-'def'"ault"}-X
661
662	We return two part values from the BracedVarSub. Also consider:
663
664	echo ${a:-x"$@"x}
665	"""
666	eval_flags = IS_SUBST
667	if quoted:
668	eval_flags \|= QUOTED
669
670	tok = op.op
671	# NOTE: Splicing part_values is necessary because of code like
672	# ${undef:-'a b' c 'd # e'}. Each part_value can have a different
673	# do_glob/do_elide setting.
674	UP_val = val
675	with tagswitch(val) as case:
676	if case(value_e.Undef):
677	is_falsey = True
678
679	elif case(value_e.Str):
680	val = cast(value.Str, UP_val)
681	if tok.id in (Id.VTest_ColonHyphen, Id.VTest_ColonEquals,
682	Id.VTest_ColonQMark, Id.VTest_ColonPlus):
683	is_falsey = len(val.s) == 0
684	else:
685	is_falsey = False
686
687	elif case(value_e.BashArray):
688	val = cast(value.BashArray, UP_val)
689	# TODO: allow undefined
690	is_falsey = len(val.strs) == 0
691
692	elif case(value_e.BashAssoc):
693	val = cast(value.BashAssoc, UP_val)
694	is_falsey = len(val.d) == 0
695
696	else:
697	# value.Eggex, etc. are all false
698	is_falsey = False
699
700	if tok.id in (Id.VTest_ColonHyphen, Id.VTest_Hyphen):
701	if is_falsey:
702	self._EvalRhsWordToParts(op.arg_word, part_vals, eval_flags)
703	return True
704	else:
705	return False
706
707	# Inverse of the above.
708	elif tok.id in (Id.VTest_ColonPlus, Id.VTest_Plus):
709	if is_falsey:
710	return False
711	else:
712	self._EvalRhsWordToParts(op.arg_word, part_vals, eval_flags)
713	return True
714
715	# Splice and assign
716	elif tok.id in (Id.VTest_ColonEquals, Id.VTest_Equals):
717	if is_falsey:
718	# Collect new part vals.
719	assign_part_vals = [] # type: List[part_value_t]
720	self._EvalRhsWordToParts(op.arg_word, assign_part_vals,
721	eval_flags)
722	# Append them to out param AND return them.
723	part_vals.extend(assign_part_vals)
724
725	if vtest_place.name is None:
726	# TODO: error context
727	e_die("Can't assign to special variable")
728	else:
729	# NOTE: This decays arrays too! 'shopt -s strict_array' could
730	# avoid it.
731	rhs_str = _DecayPartValuesToString(
732	assign_part_vals, self.splitter.GetJoinChar())
733	if vtest_place.index is None: # using None when no index
734	lval = location.LName(
735	vtest_place.name) # type: sh_lvalue_t
736	else:
737	var_name = vtest_place.name
738	var_index = vtest_place.index
739	UP_var_index = var_index
740
741	with tagswitch(var_index) as case:
742	if case(a_index_e.Int):
743	var_index = cast(a_index.Int, UP_var_index)
744	lval = sh_lvalue.Indexed(
745	var_name, var_index.i, loc.Missing)
746	elif case(a_index_e.Str):
747	var_index = cast(a_index.Str, UP_var_index)
748	lval = sh_lvalue.Keyed(var_name, var_index.s,
749	loc.Missing)
750	else:
751	raise AssertionError()
752
753	state.OshLanguageSetValue(self.mem, lval,
754	value.Str(rhs_str))
755	return True
756
757	else:
758	return False
759
760	elif tok.id in (Id.VTest_ColonQMark, Id.VTest_QMark):
761	if is_falsey:
762	# The arg is the error message
763	error_part_vals = [] # type: List[part_value_t]
764	self._EvalRhsWordToParts(op.arg_word, error_part_vals,
765	eval_flags)
766	error_str = _DecayPartValuesToString(
767	error_part_vals, self.splitter.GetJoinChar())
768
769	#
770	# Display fancy/helpful error
771	#
772	if vtest_place.name is None:
773	var_name = '???'
774	else:
775	var_name = vtest_place.name
776
777	if 0:
778	# This hint is nice, but looks too noisy for now
779	op_str = lexer.LazyStr(tok)
780	if tok.id == Id.VTest_ColonQMark:
781	why = 'empty or unset'
782	else:
783	why = 'unset'
784
785	self.errfmt.Print_(
786	"Hint: operator %s means a variable can't be %s" %
787	(op_str, why), tok)
788
789	if val.tag() == value_e.Undef:
790	actual = 'unset'
791	else:
792	actual = 'empty'
793
794	if len(error_str):
795	suffix = ': %r' % error_str
796	else:
797	suffix = ''
798	e_die("Var %s is %s%s" % (var_name, actual, suffix),
799	blame_token)
800
801	else:
802	return False
803
804	else:
805	raise AssertionError(tok.id)
806
807	def _Count(self, val, token):
808	# type: (value_t, Token) -> int
809	"""Returns the length of the value, for ${#var}"""
810	UP_val = val
811	with tagswitch(val) as case:
812	if case(value_e.Str):
813	val = cast(value.Str, UP_val)
814	# NOTE: Whether bash counts bytes or chars is affected by LANG
815	# environment variables.
816	# Should we respect that, or another way to select? set -o
817	# count-bytes?
818
819	# https://stackoverflow.com/questions/17368067/length-of-string-in-bash
820	try:
821	count = string_ops.CountUtf8Chars(val.s)
822	except error.Strict as e:
823	# Add this here so we don't have to add it so far down the stack.
824	# TODO: It's better to show BOTH this CODE an the actual DATA
825	# somehow.
826	e.location = token
827
828	if self.exec_opts.strict_word_eval():
829	raise
830	else:
831	# NOTE: Doesn't make the command exit with 1; it just returns a
832	# length of -1.
833	self.errfmt.PrettyPrintError(e, prefix='warning: ')
834	return -1
835
836	elif case(value_e.BashArray):
837	val = cast(value.BashArray, UP_val)
838	count = bash_impl.BashArray_Count(val)
839
840	elif case(value_e.BashAssoc):
841	val = cast(value.BashAssoc, UP_val)
842	count = bash_impl.BashAssoc_Count(val)
843
844	elif case(value_e.SparseArray):
845	val = cast(value.SparseArray, UP_val)
846	count = bash_impl.SparseArray_Count(val)
847
848	else:
849	raise error.TypeErr(
850	val, "Length op expected Str, BashArray, BashAssoc", token)
851
852	return count
853
854	def _Keys(self, val, token):
855	# type: (value_t, Token) -> value_t
856	"""Return keys of a container, for ${!array[@]}"""
857
858	UP_val = val
859	with tagswitch(val) as case:
860	if case(value_e.BashArray):
861	val = cast(value.BashArray, UP_val)
862	indices = [str(i) for i in bash_impl.BashArray_GetKeys(val)]
863	return value.BashArray(indices)
864
865	elif case(value_e.BashAssoc):
866	val = cast(value.BashAssoc, UP_val)
867	assert val.d is not None # for MyPy, so it's not Optional[]
868
869	# BUG: Keys aren't ordered according to insertion!
870	keys = bash_impl.BashAssoc_GetKeys(val)
871	return value.BashArray(keys)
872
873	else:
874	raise error.TypeErr(val, 'Keys op expected Str', token)
875
876	def _EvalVarRef(self, val, blame_tok, quoted, vsub_state, vtest_place):
877	# type: (value_t, Token, bool, VarSubState, VTestPlace) -> value_t
878	"""Handles indirect expansion like ${!var} and ${!a[0]}.
879
880	Args:
881	blame_tok: 'foo' for ${!foo}
882	"""
883	UP_val = val
884	with tagswitch(val) as case:
885	if case(value_e.Undef):
886	# bash-4.4 returned value.Undef here. bash-5.0 started to treat
887	# the variable name to be empty so that the indirection fails.
888	var_ref_str = ''
889
890	elif case(value_e.Str):
891	val = cast(value.Str, UP_val)
892	var_ref_str = val.s
893
894	elif case(value_e.BashArray): # caught earlier but OK
895	val = cast(value.BashArray, UP_val)
896	# When there are more than one element in the array, this
897	# produces a wrong variable name containing spaces.
898	var_ref_str = ' '.join(bash_impl.BashArray_GetValues(val))
899
900	elif case(value_e.BashAssoc): # caught earlier but OK
901	val = cast(value.BashAssoc, UP_val)
902	var_ref_str = ' '.join(bash_impl.BashAssoc_GetValues(val))
903
904	else:
905	raise error.TypeErr(val, 'Var Ref op expected Str', blame_tok)
906
907	try:
908	bvs_part = self.unsafe_arith.ParseVarRef(var_ref_str, blame_tok)
909	except error.FatalRuntime as e:
910	raise error.VarSubFailure(e.msg, e.location)
911
912	return self._VarRefValue(bvs_part, quoted, vsub_state, vtest_place)
913
914	def _ApplyUnarySuffixOp(self, val, op):
915	# type: (value_t, suffix_op.Unary) -> value_t
916	assert val.tag() != value_e.Undef
917
918	op_kind = consts.GetKind(op.op.id)
919
920	if op_kind == Kind.VOp1:
921	# NOTE: glob syntax is supported in ^ ^^ , ,, ! As well as % %% # ##.
922	# Detect has_extglob so that DoUnarySuffixOp doesn't use the fast
923	# shortcut for constant strings.
924	arg_val, has_extglob = self.EvalWordToPattern(op.arg_word)
925	assert arg_val.tag() == value_e.Str
926
927	UP_val = val
928	with tagswitch(val) as case:
929	if case(value_e.Str):
930	val = cast(value.Str, UP_val)
931	s = string_ops.DoUnarySuffixOp(val.s, op.op, arg_val.s,
932	has_extglob)
933	#log('%r %r -> %r', val.s, arg_val.s, s)
934	new_val = value.Str(s) # type: value_t
935
936	elif case(value_e.BashArray, value_e.BashAssoc):
937	# get values
938	if val.tag() == value_e.BashArray:
939	val = cast(value.BashArray, UP_val)
940	values = bash_impl.BashArray_GetValues(val)
941	elif val.tag() == value_e.BashAssoc:
942	val = cast(value.BashAssoc, UP_val)
943	values = bash_impl.BashAssoc_GetValues(val)
944	else:
945	raise AssertionError()
946
947	# ${a[@]#prefix} is VECTORIZED on arrays. YSH should have this too.
948	strs = [
949	string_ops.DoUnarySuffixOp(s, op.op, arg_val.s,
950	has_extglob) for s in values
951	]
952	new_val = value.BashArray(strs)
953
954	else:
955	raise error.TypeErr(
956	val, 'Unary op expected Str, BashArray, BashAssoc',
957	op.op)
958
959	else:
960	raise AssertionError(Kind_str(op_kind))
961
962	return new_val
963
964	def _PatSub(self, val, op):
965	# type: (value_t, suffix_op.PatSub) -> value_t
966
967	pat_val, has_extglob = self.EvalWordToPattern(op.pat)
968	# Extended globs aren't supported because we only translate * ? etc. to
969	# ERE. I don't think there's a straightforward translation from !(*.py) to
970	# ERE! You would need an engine that supports negation? (Derivatives?)
971	if has_extglob:
972	e_die('extended globs not supported in ${x//GLOB/}', op.pat)
973
974	if op.replace:
975	replace_val = self.EvalRhsWord(op.replace)
976	# Can't have an array, so must be a string
977	assert replace_val.tag() == value_e.Str, replace_val
978	replace_str = cast(value.Str, replace_val).s
979	else:
980	replace_str = ''
981
982	# note: doesn't support self.exec_opts.extglob()!
983	regex, warnings = glob_.GlobToERE(pat_val.s)
984	if len(warnings):
985	# TODO:
986	# - Add 'shopt -s strict_glob' mode and expose warnings.
987	# "Glob is not in CANONICAL FORM".
988	# - Propagate location info back to the 'op.pat' word.
989	pass
990	#log('regex %r', regex)
991	replacer = string_ops.GlobReplacer(regex, replace_str, op.slash_tok)
992
993	with tagswitch(val) as case2:
994	if case2(value_e.Str):
995	str_val = cast(value.Str, val)
996	s = replacer.Replace(str_val.s, op)
997	val = value.Str(s)
998
999	elif case2(value_e.BashArray, value_e.BashAssoc):
1000	if val.tag() == value_e.BashArray:
1001	array_val = cast(value.BashArray, val)
1002	values = bash_impl.BashArray_GetValues(array_val)
1003	elif val.tag() == value_e.BashAssoc:
1004	assoc_val = cast(value.BashAssoc, val)
1005	values = bash_impl.BashAssoc_GetValues(assoc_val)
1006	else:
1007	raise AssertionError()
1008	strs = [replacer.Replace(s, op) for s in values]
1009	val = value.BashArray(strs)
1010
1011	else:
1012	raise error.TypeErr(
1013	val, 'Pat Sub op expected Str, BashArray, BashAssoc',
1014	op.slash_tok)
1015
1016	return val
1017
1018	def _Slice(self, val, op, var_name, part):
1019	# type: (value_t, suffix_op.Slice, Optional[str], BracedVarSub) -> value_t
1020
1021	begin = self.arith_ev.EvalToBigInt(op.begin)
1022
1023	# Note: bash allows lengths to be negative (with odd semantics), but
1024	# we don't allow that right now.
1025	has_length = False
1026	length = -1
1027	if op.length:
1028	has_length = True
1029	length = self.arith_ev.EvalToInt(op.length)
1030
1031	try:
1032	arg0_val = None # type: value.Str
1033	if var_name is None: # $* or $@
1034	arg0_val = self.mem.GetArg0()
1035	val = _PerformSlice(val, begin, length, has_length, part, arg0_val)
1036	except error.Strict as e:
1037	if self.exec_opts.strict_word_eval():
1038	raise
1039	else:
1040	self.errfmt.PrettyPrintError(e, prefix='warning: ')
1041	with tagswitch(val) as case2:
1042	if case2(value_e.Str):
1043	val = value.Str('')
1044	elif case2(value_e.BashArray):
1045	val = value.BashArray([])
1046	else:
1047	raise NotImplementedError()
1048	return val
1049
1050	def _Nullary(self, val, op, var_name, vsub_token, vsub_state):
1051	# type: (value_t, Token, Optional[str], Token, VarSubState) -> Tuple[value.Str, bool]
1052
1053	quoted2 = False
1054	op_id = op.id
1055	if op_id == Id.VOp0_P:
1056	val = self._ProcessUndef(val, vsub_token, vsub_state)
1057	UP_val = val
1058	with tagswitch(val) as case:
1059	if case(value_e.Undef):
1060	result = value.Str('')
1061	elif case(value_e.Str):
1062	str_val = cast(value.Str, UP_val)
1063	prompt = self.prompt_ev.EvalPrompt(str_val)
1064	# readline gets rid of these, so we should too.
1065	p = prompt.replace('\x01', '').replace('\x02', '')
1066	result = value.Str(p)
1067	else:
1068	e_die("Can't use @P on %s" % ui.ValType(val), op)
1069
1070	elif op_id == Id.VOp0_Q:
1071	UP_val = val
1072	with tagswitch(val) as case:
1073	if case(value_e.Undef):
1074	# We need to issue an error when "-o nounset" is enabled.
1075	# Although we do not need to check val for value_e.Undef,
1076	# we call _ProcessUndef for consistency in the error
1077	# message.
1078	self._ProcessUndef(val, vsub_token, vsub_state)
1079
1080	# For unset variables, we do not generate any quoted words.
1081	result = value.Str('')
1082
1083	elif case(value_e.Str):
1084	str_val = cast(value.Str, UP_val)
1085	result = value.Str(j8_lite.MaybeShellEncode(str_val.s))
1086	# oddly, 'echo ${x@Q}' is equivalent to 'echo "${x@Q}"' in
1087	# bash
1088	quoted2 = True
1089	elif case(value_e.BashArray, value_e.BashAssoc):
1090	if val.tag() == value_e.BashArray:
1091	val = cast(value.BashArray, UP_val)
1092	values = [s for s in bash_impl.BashArray_GetValues(val) if s is not None]
1093	elif val.tag() == value_e.BashAssoc:
1094	val = cast(value.BashAssoc, UP_val)
1095	values = bash_impl.BashAssoc_GetValues(val)
1096	else:
1097	raise AssertionError()
1098
1099	tmp = [
1100	# TODO: should use fastfunc.ShellEncode
1101	j8_lite.MaybeShellEncode(s) for s in values
1102	]
1103	result = value.Str(' '.join(tmp))
1104	else:
1105	e_die("Can't use @Q on %s" % ui.ValType(val), op)
1106
1107	elif op_id == Id.VOp0_a:
1108	val = self._ProcessUndef(val, vsub_token, vsub_state)
1109	UP_val = val
1110	# We're ONLY simluating -a and -A, not -r -x -n for now. See
1111	# spec/ble-idioms.test.sh.
1112	chars = [] # type: List[str]
1113	with tagswitch(vsub_state.h_value) as case:
1114	if case(value_e.BashArray):
1115	chars.append('a')
1116	elif case(value_e.BashAssoc):
1117	chars.append('A')
1118
1119	if var_name is not None: # e.g. ${?@a} is allowed
1120	cell = self.mem.GetCell(var_name)
1121	if cell:
1122	if cell.readonly:
1123	chars.append('r')
1124	if cell.exported:
1125	chars.append('x')
1126	if cell.nameref:
1127	chars.append('n')
1128
1129	result = value.Str(''.join(chars))
1130
1131	else:
1132	e_die('Var op %r not implemented' % lexer.TokenVal(op), op)
1133
1134	return result, quoted2
1135
1136	def _WholeArray(self, val, part, quoted, vsub_state):
1137	# type: (value_t, BracedVarSub, bool, VarSubState) -> value_t
1138	op_id = cast(bracket_op.WholeArray, part.bracket_op).op_id
1139
1140	if op_id == Id.Lit_At:
1141	op_str = '@'
1142	vsub_state.join_array = not quoted # ${a[@]} decays but "${a[@]}" doesn't
1143	elif op_id == Id.Arith_Star:
1144	op_str = '*'
1145	vsub_state.join_array = True # both ${a[]} and "${a[]}" decay
1146	else:
1147	raise AssertionError(op_id) # unknown
1148
1149	with tagswitch(val) as case2:
1150	if case2(value_e.Undef):
1151	# For an undefined array, we save the token of the array
1152	# reference for the later error message.
1153	vsub_state.array_ref = part.name_tok
1154	elif case2(value_e.Str):
1155	if self.exec_opts.strict_array():
1156	e_die("Can't index string with %s" % op_str,
1157	loc.WordPart(part))
1158	elif case2(value_e.BashArray, value_e.SparseArray,
1159	value_e.BashAssoc):
1160	pass # no-op
1161	else:
1162	# The other YSH types such as List, Dict, and Float are not
1163	# supported. Error messages will be printed later, so we here
1164	# return the unsupported objects without modification.
1165	pass # no-op
1166
1167	return val
1168
1169	def _ArrayIndex(self, val, part, vtest_place):
1170	# type: (value_t, BracedVarSub, VTestPlace) -> value_t
1171	"""Process a numeric array index like ${a[i+1]}"""
1172	anode = cast(bracket_op.ArrayIndex, part.bracket_op).expr
1173
1174	UP_val = val
1175	with tagswitch(val) as case2:
1176	if case2(value_e.Undef):
1177	pass # it will be checked later
1178
1179	elif case2(value_e.Str):
1180	# Bash treats any string as an array, so we can't add our own
1181	# behavior here without making valid OSH invalid bash.
1182	e_die("Can't index string %r with integer" % part.var_name,
1183	part.name_tok)
1184
1185	elif case2(value_e.BashArray):
1186	array_val = cast(value.BashArray, UP_val)
1187	index = self.arith_ev.EvalToInt(anode)
1188	vtest_place.index = a_index.Int(index)
1189
1190	s, error_code = bash_impl.BashArray_GetElement(
1191	array_val, index)
1192	if error_code == error_code_e.IndexOutOfRange:
1193	# Note: Bash outputs warning but does not make it a real
1194	# error. We follow the Bash behavior here.
1195	self.errfmt.Print_(
1196	"Index %d out of bounds for array of length %d" %
1197	(index, bash_impl.BashArray_Length(array_val)),
1198	blame_loc=part.name_tok)
1199
1200	if s is None:
1201	val = value.Undef
1202	else:
1203	val = value.Str(s)
1204
1205	elif case2(value_e.SparseArray):
1206	sparse_val = cast(value.SparseArray, UP_val)
1207	big_index = self.arith_ev.EvalToBigInt(anode)
1208	vtest_place.index = a_index.Int(mops.BigTruncate(big_index))
1209
1210	s, error_code = bash_impl.SparseArray_GetElement(
1211	sparse_val, big_index)
1212	if error_code == error_code_e.IndexOutOfRange:
1213	# Note: Bash outputs warning but does not make it a real
1214	# error. We follow the Bash behavior here.
1215	big_length = bash_impl.SparseArray_Length(sparse_val)
1216	self.errfmt.Print_(
1217	"Index %s out of bounds for array of length %s" %
1218	(mops.ToStr(big_index), mops.ToStr(big_length)),
1219	blame_loc=part.name_tok)
1220
1221	if s is None:
1222	val = value.Undef
1223	else:
1224	val = value.Str(s)
1225
1226	elif case2(value_e.BashAssoc):
1227	assoc_val = cast(value.BashAssoc, UP_val)
1228	# Location could also be attached to bracket_op? But
1229	# arith_expr.VarSub works OK too
1230	key = self.arith_ev.EvalWordToString(
1231	anode, blame_loc=location.TokenForArith(anode))
1232
1233	vtest_place.index = a_index.Str(key) # out param
1234	s = bash_impl.BashAssoc_GetElement(assoc_val, key)
1235
1236	if s is None:
1237	val = value.Undef
1238	else:
1239	val = value.Str(s)
1240
1241	else:
1242	raise error.TypeErr(val,
1243	'Index op expected BashArray, BashAssoc',
1244	loc.WordPart(part))
1245
1246	return val
1247
1248	def _EvalDoubleQuoted(self, parts, part_vals):
1249	# type: (List[word_part_t], List[part_value_t]) -> None
1250	"""Evaluate parts of a DoubleQuoted part.
1251
1252	Args:
1253	part_vals: output param to append to.
1254	"""
1255	# Example of returning array:
1256	# $ a=(1 2); b=(3); $ c=(4 5)
1257	# $ argv "${a[@]}${b[@]}${c[@]}"
1258	# ['1', '234', '5']
1259	#
1260	# Example of multiple parts
1261	# $ argv "${a[@]}${undef[@]:-${c[@]}}"
1262	# ['1', '24', '5']
1263
1264	# Special case for "". The parser outputs (DoubleQuoted []), instead
1265	# of (DoubleQuoted [Literal '']). This is better but it means we
1266	# have to check for it.
1267	if len(parts) == 0:
1268	v = Piece('', True, False)
1269	part_vals.append(v)
1270	return
1271
1272	for p in parts:
1273	self._EvalWordPart(p, part_vals, QUOTED)
1274
1275	def EvalDoubleQuotedToString(self, dq_part):
1276	# type: (DoubleQuoted) -> str
1277	"""For double quoted strings in YSH expressions.
1278
1279	Example: var x = "$foo-${foo}"
1280	"""
1281	part_vals = [] # type: List[part_value_t]
1282	self._EvalDoubleQuoted(dq_part.parts, part_vals)
1283	return self._ConcatPartVals(part_vals, dq_part.left)
1284
1285	def _DecayArray(self, val):
1286	# type: (value.BashArray) -> value.Str
1287	"""Decay $* to a string."""
1288	assert val.tag() == value_e.BashArray, val
1289	sep = self.splitter.GetJoinChar()
1290	tmp = [s for s in bash_impl.BashArray_GetValues(val) if s is not None]
1291	return value.Str(sep.join(tmp))
1292
1293	def _ProcessUndef(self, val, name_tok, vsub_state):
1294	# type: (value_t, Token, VarSubState) -> value_t
1295	assert name_tok is not None
1296
1297	if val.tag() != value_e.Undef:
1298	return val
1299
1300	if vsub_state.array_ref is not None:
1301	array_tok = vsub_state.array_ref
1302	if self.exec_opts.nounset():
1303	e_die('Undefined array %r' % lexer.TokenVal(array_tok),
1304	array_tok)
1305	else:
1306	return value.BashArray([])
1307	else:
1308	if self.exec_opts.nounset():
1309	tok_str = lexer.TokenVal(name_tok)
1310	name = tok_str[1:] if tok_str.startswith('$') else tok_str
1311	e_die('Undefined variable %r' % name, name_tok)
1312	else:
1313	return value.Str('')
1314
1315	def _EvalBracketOp(self, val, part, quoted, vsub_state, vtest_place):
1316	# type: (value_t, BracedVarSub, bool, VarSubState, VTestPlace) -> value_t
1317
1318	if part.bracket_op:
1319	with tagswitch(part.bracket_op) as case:
1320	if case(bracket_op_e.WholeArray):
1321	val = self._WholeArray(val, part, quoted, vsub_state)
1322
1323	elif case(bracket_op_e.ArrayIndex):
1324	val = self._ArrayIndex(val, part, vtest_place)
1325
1326	else:
1327	raise AssertionError(part.bracket_op.tag())
1328
1329	else: # no bracket op
1330	var_name = vtest_place.name
1331	if (var_name is not None and
1332	val.tag() in (value_e.BashArray, value_e.BashAssoc)):
1333	if ShouldArrayDecay(var_name, self.exec_opts,
1334	not (part.prefix_op or part.suffix_op)):
1335	# for ${BASH_SOURCE}, etc.
1336	val = DecayArray(val)
1337	else:
1338	e_die(
1339	"Array %r can't be referred to as a scalar (without @ or *)"
1340	% var_name, loc.WordPart(part))
1341
1342	return val
1343
1344	def _VarRefValue(self, part, quoted, vsub_state, vtest_place):
1345	# type: (BracedVarSub, bool, VarSubState, VTestPlace) -> value_t
1346	"""Duplicates some logic from _EvalBracedVarSub, but returns a
1347	value_t."""
1348
1349	# 1. Evaluate from (var_name, var_num, token Id) -> value
1350	if part.name_tok.id == Id.VSub_Name:
1351	vtest_place.name = part.var_name
1352	val = self.mem.GetValue(part.var_name)
1353
1354	elif part.name_tok.id == Id.VSub_Number:
1355	var_num = int(part.var_name)
1356	val = self._EvalVarNum(var_num)
1357
1358	else:
1359	# $* decays
1360	val = self._EvalSpecialVar(part.name_tok.id, quoted, vsub_state)
1361
1362	# update h-value (i.e., the holder of the current value)
1363	vsub_state.h_value = val
1364
1365	# We don't need var_index because it's only for L-Values of test ops?
1366	if self.exec_opts.eval_unsafe_arith():
1367	val = self._EvalBracketOp(val, part, quoted, vsub_state,
1368	vtest_place)
1369	else:
1370	with state.ctx_Option(self.mutable_opts,
1371	[option_i._allow_command_sub], False):
1372	val = self._EvalBracketOp(val, part, quoted, vsub_state,
1373	vtest_place)
1374
1375	return val
1376
1377	def _EvalBracedVarSub(self, part, part_vals, quoted):
1378	# type: (BracedVarSub, List[part_value_t], bool) -> None
1379	"""
1380	Args:
1381	part_vals: output param to append to.
1382	"""
1383	# We have different operators that interact in a non-obvious order.
1384	#
1385	# 1. bracket_op: value -> value, with side effect on vsub_state
1386	#
1387	# 2. prefix_op
1388	# a. length ${#x}: value -> value
1389	# b. var ref ${!ref}: can expand to an array
1390	#
1391	# 3. suffix_op:
1392	# a. no operator: you have a value
1393	# b. Test: value -> part_value[]
1394	# c. Other Suffix: value -> value
1395	#
1396	# 4. Process vsub_state.join_array here before returning.
1397	#
1398	# These cases are hard to distinguish:
1399	# - ${!prefix@} prefix query
1400	# - ${!array[@]} keys
1401	# - ${!ref} named reference
1402	# - ${!ref[0]} named reference
1403	#
1404	# I think we need several stages:
1405	#
1406	# 1. value: name, number, special, prefix query
1407	# 2. bracket_op
1408	# 3. prefix length -- this is TERMINAL
1409	# 4. indirection? Only for some of the ! cases
1410	# 5. string transformation suffix ops like ##
1411	# 6. test op
1412	# 7. vsub_state.join_array
1413
1414	# vsub_state.join_array is for joining "${a[*]}" and unquoted ${a[@]} AFTER
1415	# suffix ops are applied. If we take the length with a prefix op, the
1416	# distinction is ignored.
1417
1418	var_name = None # type: Optional[str] # used throughout the function
1419	vtest_place = VTestPlace(var_name, None) # For ${foo=default}
1420	vsub_state = VarSubState.CreateNull() # for $, ${a[]}, etc.
1421
1422	# 1. Evaluate from (var_name, var_num, token Id) -> value
1423	if part.name_tok.id == Id.VSub_Name:
1424	# Handle ${!prefix@} first, since that looks at names and not values
1425	# Do NOT handle ${!A[@]@a} here!
1426	if (part.prefix_op is not None and part.bracket_op is None and
1427	part.suffix_op is not None and
1428	part.suffix_op.tag() == suffix_op_e.Nullary):
1429	nullary_op = cast(Token, part.suffix_op)
1430	# ${!x@} but not ${!x@P}
1431	if consts.GetKind(nullary_op.id) == Kind.VOp3:
1432	names = self.mem.VarNamesStartingWith(part.var_name)
1433	names.sort()
1434
1435	if quoted and nullary_op.id == Id.VOp3_At:
1436	part_vals.append(part_value.Array(names))
1437	else:
1438	sep = self.splitter.GetJoinChar()
1439	part_vals.append(Piece(sep.join(names), quoted, True))
1440	return # EARLY RETURN
1441
1442	var_name = part.var_name
1443	vtest_place.name = var_name # for _ApplyTestOp
1444
1445	val = self.mem.GetValue(var_name)
1446
1447	elif part.name_tok.id == Id.VSub_Number:
1448	var_num = int(part.var_name)
1449	val = self._EvalVarNum(var_num)
1450	else:
1451	# $* decays
1452	val = self._EvalSpecialVar(part.name_tok.id, quoted, vsub_state)
1453
1454	suffix_op_ = part.suffix_op
1455	if suffix_op_:
1456	UP_op = suffix_op_
1457	vsub_state.h_value = val
1458
1459	# 2. Bracket Op
1460	val = self._EvalBracketOp(val, part, quoted, vsub_state, vtest_place)
1461
1462	if part.prefix_op:
1463	if part.prefix_op.id == Id.VSub_Pound: # ${#var} for length
1464	# undef -> '' BEFORE length
1465	val = self._ProcessUndef(val, part.name_tok, vsub_state)
1466
1467	n = self._Count(val, part.name_tok)
1468	part_vals.append(Piece(str(n), quoted, False))
1469	return # EARLY EXIT: nothing else can come after length
1470
1471	elif part.prefix_op.id == Id.VSub_Bang:
1472	if (part.bracket_op and
1473	part.bracket_op.tag() == bracket_op_e.WholeArray and
1474	not suffix_op_):
1475	# undef -> empty array
1476	val = self._ProcessUndef(val, part.name_tok, vsub_state)
1477
1478	# ${!array[@]} to get indices/keys
1479	val = self._Keys(val, part.name_tok)
1480	# already set vsub_State.join_array ABOVE
1481	else:
1482	# Process ${!ref}. SURPRISE: ${!a[0]} is an indirect expansion unlike
1483	# ${!a[@]} !
1484	# ${!ref} can expand into an array if ref='array[@]'
1485
1486	# Clear it now that we have a var ref
1487	vtest_place.name = None
1488	vtest_place.index = None
1489
1490	val = self._EvalVarRef(val, part.name_tok, quoted,
1491	vsub_state, vtest_place)
1492
1493	else:
1494	raise AssertionError(part.prefix_op)
1495
1496	quoted2 = False # another bit for @Q
1497	if suffix_op_:
1498	op = suffix_op_ # could get rid of this alias
1499
1500	with tagswitch(suffix_op_) as case:
1501	if case(suffix_op_e.Nullary):
1502	op = cast(Token, UP_op)
1503	val, quoted2 = self._Nullary(val, op, var_name,
1504	part.name_tok, vsub_state)
1505
1506	elif case(suffix_op_e.Unary):
1507	op = cast(suffix_op.Unary, UP_op)
1508	if consts.GetKind(op.op.id) == Kind.VTest:
1509	# Note: _ProcessUndef (i.e., the conversion of undef ->
1510	# '') is not applied to the VTest operators such as
1511	# ${a:-def}, ${a+set}, etc.
1512	if self._ApplyTestOp(val, op, quoted, part_vals,
1513	vtest_place, part.name_tok):
1514	# e.g. to evaluate ${undef:-'default'}, we already appended
1515	# what we need
1516	return
1517
1518	else:
1519	# Other suffix: value -> value
1520	val = self._ProcessUndef(val, part.name_tok,
1521	vsub_state)
1522	val = self._ApplyUnarySuffixOp(val, op)
1523
1524	elif case(suffix_op_e.PatSub): # PatSub, vectorized
1525	op = cast(suffix_op.PatSub, UP_op)
1526	val = self._ProcessUndef(val, part.name_tok, vsub_state)
1527	val = self._PatSub(val, op)
1528
1529	elif case(suffix_op_e.Slice):
1530	op = cast(suffix_op.Slice, UP_op)
1531	val = self._ProcessUndef(val, part.name_tok, vsub_state)
1532	val = self._Slice(val, op, var_name, part)
1533
1534	elif case(suffix_op_e.Static):
1535	op = cast(suffix_op.Static, UP_op)
1536	e_die('Not implemented', op.tok)
1537
1538	else:
1539	raise AssertionError()
1540	else:
1541	val = self._ProcessUndef(val, part.name_tok, vsub_state)
1542
1543	# After applying suffixes, process join_array here.
1544	UP_val = val
1545	if val.tag() == value_e.BashArray:
1546	array_val = cast(value.BashArray, UP_val)
1547	if vsub_state.join_array:
1548	val = self._DecayArray(array_val)
1549	else:
1550	val = array_val
1551
1552	# For example, ${a} evaluates to value.Str(), but we want a
1553	# Piece().
1554	part_val = _ValueToPartValue(val, quoted or quoted2, part)
1555	part_vals.append(part_val)
1556
1557	def _ConcatPartVals(self, part_vals, location):
1558	# type: (List[part_value_t], loc_t) -> str
1559
1560	strs = [] # type: List[str]
1561	for part_val in part_vals:
1562	UP_part_val = part_val
1563	with tagswitch(part_val) as case:
1564	if case(part_value_e.String):
1565	part_val = cast(Piece, UP_part_val)
1566	s = part_val.s
1567
1568	elif case(part_value_e.Array):
1569	part_val = cast(part_value.Array, UP_part_val)
1570	if self.exec_opts.strict_array():
1571	# Examples: echo f > "$@"; local foo="$@"
1572	e_die("Illegal array word part (strict_array)",
1573	location)
1574	else:
1575	# It appears to not respect IFS
1576	# TODO: eliminate double join()?
1577	tmp = [s for s in part_val.strs if s is not None]
1578	s = ' '.join(tmp)
1579
1580	else:
1581	raise AssertionError()
1582
1583	strs.append(s)
1584
1585	return ''.join(strs)
1586
1587	def EvalBracedVarSubToString(self, part):
1588	# type: (BracedVarSub) -> str
1589	"""For double quoted strings in YSH expressions.
1590
1591	Example: var x = "$foo-${foo}"
1592	"""
1593	part_vals = [] # type: List[part_value_t]
1594	self._EvalBracedVarSub(part, part_vals, False)
1595	# blame ${ location
1596	return self._ConcatPartVals(part_vals, part.left)
1597
1598	def _EvalSimpleVarSub(self, part, part_vals, quoted):
1599	# type: (SimpleVarSub, List[part_value_t], bool) -> None
1600
1601	token = part.tok
1602
1603	vsub_state = VarSubState.CreateNull()
1604
1605	# 1. Evaluate from (var_name, var_num, Token) -> defined, value
1606	if token.id == Id.VSub_DollarName:
1607	var_name = lexer.LazyStr(token)
1608	# TODO: Special case for LINENO
1609	val = self.mem.GetValue(var_name)
1610	if val.tag() in (value_e.BashArray, value_e.BashAssoc):
1611	if ShouldArrayDecay(var_name, self.exec_opts):
1612	# for $BASH_SOURCE, etc.
1613	val = DecayArray(val)
1614	else:
1615	e_die(
1616	"Array %r can't be referred to as a scalar (without @ or *)"
1617	% var_name, token)
1618
1619	elif token.id == Id.VSub_Number:
1620	var_num = int(lexer.LazyStr(token))
1621	val = self._EvalVarNum(var_num)
1622
1623	else:
1624	val = self._EvalSpecialVar(token.id, quoted, vsub_state)
1625
1626	#log('SIMPLE %s', part)
1627	val = self._ProcessUndef(val, token, vsub_state)
1628	UP_val = val
1629	if val.tag() == value_e.BashArray:
1630	array_val = cast(value.BashArray, UP_val)
1631	if vsub_state.join_array:
1632	val = self._DecayArray(array_val)
1633	else:
1634	val = array_val
1635
1636	v = _ValueToPartValue(val, quoted, part)
1637	part_vals.append(v)
1638
1639	def EvalSimpleVarSubToString(self, node):
1640	# type: (SimpleVarSub) -> str
1641	"""For double quoted strings in YSH expressions.
1642
1643	Example: var x = "$foo-${foo}"
1644	"""
1645	part_vals = [] # type: List[part_value_t]
1646	self._EvalSimpleVarSub(node, part_vals, False)
1647	return self._ConcatPartVals(part_vals, node.tok)
1648
1649	def _EvalExtGlob(self, part, part_vals):
1650	# type: (word_part.ExtGlob, List[part_value_t]) -> None
1651	"""Evaluate @($x\|'foo'\|$(hostname)) and flatten it."""
1652	op = part.op
1653	if op.id == Id.ExtGlob_Comma:
1654	op_str = '@('
1655	else:
1656	op_str = lexer.LazyStr(op)
1657	# Do NOT split these.
1658	part_vals.append(Piece(op_str, False, False))
1659
1660	for i, w in enumerate(part.arms):
1661	if i != 0:
1662	part_vals.append(Piece('\|', False, False)) # separator
1663	# FLATTEN the tree of extglob "arms".
1664	self._EvalWordToParts(w, part_vals, EXTGLOB_NESTED)
1665	part_vals.append(Piece(')', False, False)) # closing )
1666
1667	def _TranslateExtGlob(self, part_vals, w, glob_parts, fnmatch_parts):
1668	# type: (List[part_value_t], CompoundWord, List[str], List[str]) -> None
1669	"""Translate a flattened WORD with an ExtGlob part to string patterns.
1670
1671	We need both glob and fnmatch patterns. _EvalExtGlob does the
1672	flattening.
1673	"""
1674	for i, part_val in enumerate(part_vals):
1675	UP_part_val = part_val
1676	with tagswitch(part_val) as case:
1677	if case(part_value_e.String):
1678	part_val = cast(Piece, UP_part_val)
1679	if part_val.quoted and not self.exec_opts.noglob():
1680	s = glob_.GlobEscape(part_val.s)
1681	else:
1682	# e.g. the @( and \| in @(foo\|bar) aren't quoted
1683	s = part_val.s
1684	glob_parts.append(s)
1685	fnmatch_parts.append(s) # from _EvalExtGlob()
1686
1687	elif case(part_value_e.Array):
1688	# Disallow array
1689	e_die(
1690	"Extended globs and arrays can't appear in the same word",
1691	w)
1692
1693	elif case(part_value_e.ExtGlob):
1694	part_val = cast(part_value.ExtGlob, UP_part_val)
1695	# keep appending fnmatch_parts, but repplace glob_parts with '*'
1696	self._TranslateExtGlob(part_val.part_vals, w, [],
1697	fnmatch_parts)
1698	glob_parts.append('*')
1699
1700	else:
1701	raise AssertionError()
1702
1703	def _EvalWordPart(self, part, part_vals, flags):
1704	# type: (word_part_t, List[part_value_t], int) -> None
1705	"""Evaluate a word part, appending to part_vals
1706
1707	Called by _EvalWordToParts, EvalWordToString, and _EvalDoubleQuoted.
1708	"""
1709	quoted = bool(flags & QUOTED)
1710	is_subst = bool(flags & IS_SUBST)
1711
1712	UP_part = part
1713	with tagswitch(part) as case:
1714	if case(word_part_e.ShArrayLiteral):
1715	part = cast(ShArrayLiteral, UP_part)
1716	e_die("Unexpected array literal", loc.WordPart(part))
1717	elif case(word_part_e.BashAssocLiteral):
1718	part = cast(word_part.BashAssocLiteral, UP_part)
1719	e_die("Unexpected associative array literal",
1720	loc.WordPart(part))
1721
1722	elif case(word_part_e.Literal):
1723	part = cast(Token, UP_part)
1724	# Split if it's in a substitution.
1725	# That is: echo is not split, but ${foo:-echo} is split
1726	v = Piece(lexer.LazyStr(part), quoted, is_subst)
1727	part_vals.append(v)
1728
1729	elif case(word_part_e.EscapedLiteral):
1730	part = cast(word_part.EscapedLiteral, UP_part)
1731	v = Piece(part.ch, True, False)
1732	part_vals.append(v)
1733
1734	elif case(word_part_e.SingleQuoted):
1735	part = cast(SingleQuoted, UP_part)
1736	v = Piece(part.sval, True, False)
1737	part_vals.append(v)
1738
1739	elif case(word_part_e.DoubleQuoted):
1740	part = cast(DoubleQuoted, UP_part)
1741	self._EvalDoubleQuoted(part.parts, part_vals)
1742
1743	elif case(word_part_e.CommandSub):
1744	part = cast(CommandSub, UP_part)
1745	id_ = part.left_token.id
1746	if id_ in (Id.Left_DollarParen, Id.Left_AtParen,
1747	Id.Left_Backtick):
1748	sv = self._EvalCommandSub(part,
1749	quoted) # type: part_value_t
1750
1751	elif id_ in (Id.Left_ProcSubIn, Id.Left_ProcSubOut):
1752	sv = self._EvalProcessSub(part)
1753
1754	else:
1755	raise AssertionError(id_)
1756
1757	part_vals.append(sv)
1758
1759	elif case(word_part_e.SimpleVarSub):
1760	part = cast(SimpleVarSub, UP_part)
1761	self._EvalSimpleVarSub(part, part_vals, quoted)
1762
1763	elif case(word_part_e.BracedVarSub):
1764	part = cast(BracedVarSub, UP_part)
1765	self._EvalBracedVarSub(part, part_vals, quoted)
1766
1767	elif case(word_part_e.TildeSub):
1768	part = cast(word_part.TildeSub, UP_part)
1769	# We never parse a quoted string into a TildeSub.
1770	assert not quoted
1771	s = self.tilde_ev.Eval(part)
1772	v = Piece(s, True, False) # NOT split even when unquoted!
1773	part_vals.append(v)
1774
1775	elif case(word_part_e.ArithSub):
1776	part = cast(word_part.ArithSub, UP_part)
1777	num = self.arith_ev.EvalToBigInt(part.anode)
1778	v = Piece(mops.ToStr(num), quoted, not quoted)
1779	part_vals.append(v)
1780
1781	elif case(word_part_e.ExtGlob):
1782	part = cast(word_part.ExtGlob, UP_part)
1783	#if not self.exec_opts.extglob():
1784	# die() # disallow at runtime? Don't just decay
1785
1786	# Create a node to hold the flattened tree. The caller decides whether
1787	# to pass it to fnmatch() or replace it with '*' and pass it to glob().
1788	part_vals2 = [] # type: List[part_value_t]
1789	self._EvalExtGlob(part, part_vals2) # flattens tree
1790	part_vals.append(part_value.ExtGlob(part_vals2))
1791
1792	elif case(word_part_e.BashRegexGroup):
1793	part = cast(word_part.BashRegexGroup, UP_part)
1794
1795	part_vals.append(Piece('(', False, False)) # not quoted
1796	if part.child:
1797	self._EvalWordToParts(part.child, part_vals, 0)
1798	part_vals.append(Piece(')', False, False))
1799
1800	elif case(word_part_e.Splice):
1801	part = cast(word_part.Splice, UP_part)
1802	val = self.mem.GetValue(part.var_name)
1803
1804	strs = self.expr_ev.SpliceValue(val, part)
1805	part_vals.append(part_value.Array(strs))
1806
1807	elif case(word_part_e.ExprSub):
1808	part = cast(word_part.ExprSub, UP_part)
1809	part_val = self.expr_ev.EvalExprSub(part)
1810	part_vals.append(part_val)
1811
1812	elif case(word_part_e.ZshVarSub):
1813	part = cast(word_part.ZshVarSub, UP_part)
1814	e_die("ZSH var subs are parsed, but can't be evaluated",
1815	part.left)
1816
1817	else:
1818	raise AssertionError(part.tag())
1819
1820	def _EvalRhsWordToParts(self, w, part_vals, eval_flags=0):
1821	# type: (rhs_word_t, List[part_value_t], int) -> None
1822	quoted = bool(eval_flags & QUOTED)
1823
1824	UP_w = w
1825	with tagswitch(w) as case:
1826	if case(rhs_word_e.Empty):
1827	part_vals.append(Piece('', quoted, not quoted))
1828
1829	elif case(rhs_word_e.Compound):
1830	w = cast(CompoundWord, UP_w)
1831	self._EvalWordToParts(w, part_vals, eval_flags=eval_flags)
1832
1833	else:
1834	raise AssertionError()
1835
1836	def _EvalWordToParts(self, w, part_vals, eval_flags=0):
1837	# type: (CompoundWord, List[part_value_t], int) -> None
1838	"""Helper for EvalRhsWord, EvalWordSequence, etc.
1839
1840	Returns:
1841	Appends to part_vals. Note that this is a TREE.
1842	"""
1843	# Does the word have an extended glob? This is a special case because
1844	# of the way we use glob() and then fnmatch(..., FNM_EXTMATCH) to
1845	# implement extended globs. It's hard to carry that extra information
1846	# all the way past the word splitting stage.
1847
1848	# OSH semantic limitations: If a word has an extended glob part, then
1849	# 1. It can't have an array
1850	# 2. Word splitting of unquoted words isn't respected
1851
1852	word_part_vals = [] # type: List[part_value_t]
1853	has_extglob = False
1854	for p in w.parts:
1855	if p.tag() == word_part_e.ExtGlob:
1856	has_extglob = True
1857	self._EvalWordPart(p, word_part_vals, eval_flags)
1858
1859	# Caller REQUESTED extglob evaluation, AND we parsed word_part.ExtGlob()
1860	if has_extglob:
1861	if bool(eval_flags & EXTGLOB_FILES):
1862	# Treat the WHOLE word as a pattern. We need to TWO VARIANTS of the
1863	# word because of the way we use libc:
1864	# 1. With '*' for extglob parts
1865	# 2. With _EvalExtGlob() for extglob parts
1866
1867	glob_parts = [] # type: List[str]
1868	fnmatch_parts = [] # type: List[str]
1869	self._TranslateExtGlob(word_part_vals, w, glob_parts,
1870	fnmatch_parts)
1871
1872	#log('word_part_vals %s', word_part_vals)
1873	glob_pat = ''.join(glob_parts)
1874	fnmatch_pat = ''.join(fnmatch_parts)
1875	#log("glob %s fnmatch %s", glob_pat, fnmatch_pat)
1876
1877	results = [] # type: List[str]
1878	n = self.globber.ExpandExtended(glob_pat, fnmatch_pat, results)
1879	if n < 0:
1880	raise error.FailGlob(
1881	'Extended glob %r matched no files' % fnmatch_pat, w)
1882
1883	part_vals.append(part_value.Array(results))
1884	elif bool(eval_flags & EXTGLOB_NESTED):
1885	# We only glob at the TOP level of @(nested\|@(pattern))
1886	part_vals.extend(word_part_vals)
1887	else:
1888	# e.g. simple_word_eval, assignment builtin
1889	e_die('Extended glob not allowed in this word', w)
1890	else:
1891	part_vals.extend(word_part_vals)
1892
1893	def _PartValsToString(self, part_vals, w, eval_flags, strs):
1894	# type: (List[part_value_t], CompoundWord, int, List[str]) -> None
1895	"""Helper for EvalWordToString, similar to _ConcatPartVals() above.
1896
1897	Note: arg 'w' could just be a span ID
1898	"""
1899	for part_val in part_vals:
1900	UP_part_val = part_val
1901	with tagswitch(part_val) as case:
1902	if case(part_value_e.String):
1903	part_val = cast(Piece, UP_part_val)
1904	s = part_val.s
1905	if part_val.quoted:
1906	if eval_flags & QUOTE_FNMATCH:
1907	# [[ foo == /"".py ]] or case (.py) or ${x%.py} or ${x//*.py/}
1908	s = glob_.GlobEscape(s)
1909	elif eval_flags & QUOTE_ERE:
1910	s = glob_.ExtendedRegexEscape(s)
1911	strs.append(s)
1912
1913	elif case(part_value_e.Array):
1914	part_val = cast(part_value.Array, UP_part_val)
1915	if self.exec_opts.strict_array():
1916	# Examples: echo f > "$@"; local foo="$@"
1917
1918	# TODO: This attributes too coarsely, to the word rather than the
1919	# parts. Problem: the word is a TREE of parts, but we only have a
1920	# flat list of part_vals. The only case where we really get arrays
1921	# is "$@", "${a[@]}", "${a[@]//pat/replace}", etc.
1922	e_die(
1923	"This word should yield a string, but it contains an array",
1924	w)
1925
1926	# TODO: Maybe add detail like this.
1927	#e_die('RHS of assignment should only have strings. '
1928	# 'To assign arrays, use b=( "${a[@]}" )')
1929	else:
1930	# It appears to not respect IFS
1931	tmp = [s for s in part_val.strs if s is not None]
1932	s = ' '.join(tmp) # TODO: eliminate double join()?
1933	strs.append(s)
1934
1935	elif case(part_value_e.ExtGlob):
1936	part_val = cast(part_value.ExtGlob, UP_part_val)
1937
1938	# Extended globs are only allowed where we expect them!
1939	if not bool(eval_flags & QUOTE_FNMATCH):
1940	e_die('extended glob not allowed in this word', w)
1941
1942	# recursive call
1943	self._PartValsToString(part_val.part_vals, w, eval_flags,
1944	strs)
1945
1946	else:
1947	raise AssertionError()
1948
1949	def EvalWordToString(self, UP_w, eval_flags=0):
1950	# type: (word_t, int) -> value.Str
1951	"""Given a word, return a string.
1952
1953	Flags can contain a quoting algorithm.
1954	"""
1955	assert UP_w.tag() == word_e.Compound, UP_w
1956	w = cast(CompoundWord, UP_w)
1957
1958	if eval_flags == 0: # QUOTE_FNMATCH etc. breaks optimization
1959	fast_str = word_.FastStrEval(w)
1960	if fast_str is not None:
1961	return value.Str(fast_str)
1962
1963	# Could we additionally optimize a=$b, if we know $b isn't an array
1964	# etc.?
1965
1966	# Note: these empty lists are hot in fib benchmark
1967
1968	part_vals = [] # type: List[part_value_t]
1969	for p in w.parts:
1970	# this doesn't use eval_flags, which is slightly confusing
1971	self._EvalWordPart(p, part_vals, 0)
1972
1973	strs = [] # type: List[str]
1974	self._PartValsToString(part_vals, w, eval_flags, strs)
1975	return value.Str(''.join(strs))
1976
1977	def EvalWordToPattern(self, UP_w):
1978	# type: (rhs_word_t) -> Tuple[value.Str, bool]
1979	"""Like EvalWordToString, but returns whether we got ExtGlob."""
1980	if UP_w.tag() == rhs_word_e.Empty:
1981	return value.Str(''), False
1982
1983	assert UP_w.tag() == rhs_word_e.Compound, UP_w
1984	w = cast(CompoundWord, UP_w)
1985
1986	has_extglob = False
1987	part_vals = [] # type: List[part_value_t]
1988	for p in w.parts:
1989	# this doesn't use eval_flags, which is slightly confusing
1990	self._EvalWordPart(p, part_vals, 0)
1991	if p.tag() == word_part_e.ExtGlob:
1992	has_extglob = True
1993
1994	strs = [] # type: List[str]
1995	self._PartValsToString(part_vals, w, QUOTE_FNMATCH, strs)
1996	return value.Str(''.join(strs)), has_extglob
1997
1998	def EvalForPlugin(self, w):
1999	# type: (CompoundWord) -> value.Str
2000	"""Wrapper around EvalWordToString that prevents errors.
2001
2002	Runtime errors like $(( 1 / 0 )) and mutating $? like $(exit 42)
2003	are handled here.
2004
2005	Similar to ExprEvaluator.PluginCall().
2006	"""
2007	with state.ctx_Registers(self.mem): # to "sandbox" $? and $PIPESTATUS
2008	try:
2009	val = self.EvalWordToString(w)
2010	except error.FatalRuntime as e:
2011	val = value.Str('<Runtime error: %s>' % e.UserErrorString())
2012
2013	except (IOError, OSError) as e:
2014	val = value.Str('<I/O error: %s>' % pyutil.strerror(e))
2015
2016	except KeyboardInterrupt:
2017	val = value.Str('<Ctrl-C>')
2018
2019	return val
2020
2021	def EvalRhsWord(self, UP_w):
2022	# type: (rhs_word_t) -> value_t
2023	"""Used for RHS of assignment.
2024
2025	There is no splitting.
2026	"""
2027	if UP_w.tag() == rhs_word_e.Empty:
2028	return value.Str('')
2029
2030	assert UP_w.tag() == word_e.Compound, UP_w
2031	w = cast(CompoundWord, UP_w)
2032
2033	if len(w.parts) == 1:
2034	part0 = w.parts[0]
2035	UP_part0 = part0
2036	tag = part0.tag()
2037	# Special case for a=(1 2). ShArrayLiteral won't appear in words that
2038	# don't look like assignments.
2039	if tag == word_part_e.ShArrayLiteral:
2040	part0 = cast(ShArrayLiteral, UP_part0)
2041	array_words = part0.words
2042	words = braces.BraceExpandWords(array_words)
2043	strs = self.EvalWordSequence(words)
2044	return value.BashArray(strs)
2045
2046	if tag == word_part_e.BashAssocLiteral:
2047	part0 = cast(word_part.BashAssocLiteral, UP_part0)
2048	d = NewDict() # type: Dict[str, str]
2049	for pair in part0.pairs:
2050	k = self.EvalWordToString(pair.key)
2051	v = self.EvalWordToString(pair.value)
2052	d[k.s] = v.s
2053	return value.BashAssoc(d)
2054
2055	# If RHS doesn't look like a=( ... ), then it must be a string.
2056	return self.EvalWordToString(w)
2057
2058	def _EvalWordFrame(self, frame, argv):
2059	# type: (List[Piece], List[str]) -> None
2060	all_empty = True
2061	all_quoted = True
2062	any_quoted = False
2063
2064	#log('--- frame %s', frame)
2065
2066	for piece in frame:
2067	if len(piece.s):
2068	all_empty = False
2069
2070	if piece.quoted:
2071	any_quoted = True
2072	else:
2073	all_quoted = False
2074
2075	# Elision of ${empty}${empty} but not $empty"$empty" or $empty""
2076	if all_empty and not any_quoted:
2077	return
2078
2079	# If every frag is quoted, e.g. "$a$b" or any part in "${a[@]}"x, then
2080	# don't do word splitting or globbing.
2081	if all_quoted:
2082	tmp = [piece.s for piece in frame]
2083	a = ''.join(tmp)
2084	argv.append(a)
2085	return
2086
2087	will_glob = not self.exec_opts.noglob()
2088
2089	if 0:
2090	log('---')
2091	log('FRAME')
2092	for i, piece in enumerate(frame):
2093	log('(%d) %s', i, piece)
2094	log('')
2095
2096	# Array of strings, some of which are BOTH IFS-escaped and GLOB escaped!
2097	frags = [] # type: List[str]
2098	for piece in frame:
2099	if will_glob and piece.quoted:
2100	frag = glob_.GlobEscape(piece.s)
2101	else:
2102	# If we have a literal \, then we turn it into \\\\.
2103	# Splitting takes \\\\ -> \\
2104	# Globbing takes \\ to \ if it doesn't match
2105	frag = _BackslashEscape(piece.s)
2106
2107	if piece.do_split:
2108	frag = _BackslashEscape(frag)
2109	else:
2110	frag = self.splitter.Escape(frag)
2111
2112	frags.append(frag)
2113
2114	if 0:
2115	log('---')
2116	log('FRAGS')
2117	for i, frag in enumerate(frags):
2118	log('(%d) %s', i, frag)
2119	log('')
2120
2121	flat = ''.join(frags)
2122	#log('flat: %r', flat)
2123
2124	args = self.splitter.SplitForWordEval(flat)
2125
2126	# space=' '; argv $space"". We have a quoted part, but we CANNOT elide.
2127	# Add it back and don't bother globbing.
2128	if len(args) == 0 and any_quoted:
2129	argv.append('')
2130	return
2131
2132	#log('split args: %r', args)
2133	for a in args:
2134	if glob_.LooksLikeGlob(a):
2135	n = self.globber.Expand(a, argv)
2136	if n < 0:
2137	# TODO: location info, with span IDs carried through the frame
2138	raise error.FailGlob('Pattern %r matched no files' % a,
2139	loc.Missing)
2140	else:
2141	argv.append(glob_.GlobUnescape(a))
2142
2143	def _EvalWordToArgv(self, w):
2144	# type: (CompoundWord) -> List[str]
2145	"""Helper for _EvalAssignBuiltin.
2146
2147	Splitting and globbing are disabled for assignment builtins.
2148
2149	Example: declare -"${a[@]}" b=(1 2)
2150	where a is [x b=a d=a]
2151	"""
2152	part_vals = [] # type: List[part_value_t]
2153	self._EvalWordToParts(w, part_vals, 0) # not double quoted
2154	frames = _MakeWordFrames(part_vals)
2155	argv = [] # type: List[str]
2156	for frame in frames:
2157	if len(frame): # empty array gives empty frame!
2158	tmp = [piece.s for piece in frame]
2159	argv.append(''.join(tmp)) # no split or glob
2160	#log('argv: %s', argv)
2161	return argv
2162
2163	def _EvalAssignBuiltin(self, builtin_id, arg0, words, meta_offset):
2164	# type: (builtin_t, str, List[CompoundWord], int) -> cmd_value.Assign
2165	"""Handles both static and dynamic assignment, e.g.
2166
2167	x='foo=bar'
2168	local a=(1 2) $x
2169
2170	Grammar:
2171
2172	('builtin' \| 'command')* keyword flag* pair*
2173	flag = [-+].*
2174
2175	There is also command -p, but we haven't implemented it. Maybe just
2176	punt on it.
2177	"""
2178	eval_to_pairs = True # except for -f and -F
2179	started_pairs = False
2180
2181	flags = [arg0] # initial flags like -p, and -f -F name1 name2
2182	flag_locs = [words[0]]
2183	assign_args = [] # type: List[AssignArg]
2184
2185	n = len(words)
2186	for i in xrange(meta_offset + 1, n): # skip first word
2187	w = words[i]
2188
2189	if word_.IsVarLike(w):
2190	started_pairs = True # Everything from now on is an assign_pair
2191
2192	if started_pairs:
2193	left_token, close_token, part_offset = word_.DetectShAssignment(
2194	w)
2195	if left_token: # Detected statically
2196	if left_token.id != Id.Lit_VarLike:
2197	# (not guaranteed since started_pairs is set twice)
2198	e_die('LHS array not allowed in assignment builtin', w)
2199
2200	if lexer.IsPlusEquals(left_token):
2201	var_name = lexer.TokenSliceRight(left_token, -2)
2202	append = True
2203	else:
2204	var_name = lexer.TokenSliceRight(left_token, -1)
2205	append = False
2206
2207	if part_offset == len(w.parts):
2208	rhs = rhs_word.Empty # type: rhs_word_t
2209	else:
2210	# tmp is for intersection of C++/MyPy type systems
2211	tmp = CompoundWord(w.parts[part_offset:])
2212	word_.TildeDetectAssign(tmp)
2213	rhs = tmp
2214
2215	with state.ctx_AssignBuiltin(self.mutable_opts):
2216	right = self.EvalRhsWord(rhs)
2217
2218	arg2 = AssignArg(var_name, right, append, w)
2219	assign_args.append(arg2)
2220
2221	else: # e.g. export $dynamic
2222	argv = self._EvalWordToArgv(w)
2223	for arg in argv:
2224	arg2 = _SplitAssignArg(arg, w)
2225	assign_args.append(arg2)
2226
2227	else:
2228	argv = self._EvalWordToArgv(w)
2229	for arg in argv:
2230	if arg.startswith('-') or arg.startswith('+'):
2231	# e.g. declare -r +r
2232	flags.append(arg)
2233	flag_locs.append(w)
2234
2235	# Shortcut that relies on -f and -F always meaning "function" for
2236	# all assignment builtins
2237	if 'f' in arg or 'F' in arg:
2238	eval_to_pairs = False
2239
2240	else: # e.g. export $dynamic
2241	if eval_to_pairs:
2242	arg2 = _SplitAssignArg(arg, w)
2243	assign_args.append(arg2)
2244	started_pairs = True
2245	else:
2246	flags.append(arg)
2247
2248	return cmd_value.Assign(builtin_id, flags, flag_locs, assign_args)
2249
2250	def _DetectAssignBuiltinStr(self, arg0, words, meta_offset):
2251	# type: (str, List[CompoundWord], int) -> Optional[cmd_value.Assign]
2252	builtin_id = consts.LookupAssignBuiltin(arg0)
2253	if builtin_id != consts.NO_INDEX:
2254	return self._EvalAssignBuiltin(builtin_id, arg0, words,
2255	meta_offset)
2256	return None
2257
2258	def _DetectAssignBuiltin(self, val0, words, meta_offset):
2259	# type: (part_value_t, List[CompoundWord], int) -> Optional[cmd_value.Assign]
2260	UP_val0 = val0
2261	if val0.tag() == part_value_e.String:
2262	val0 = cast(Piece, UP_val0)
2263	if not val0.quoted:
2264	return self._DetectAssignBuiltinStr(val0.s, words, meta_offset)
2265	return None
2266
2267	def SimpleEvalWordSequence2(self, words, is_last_cmd, allow_assign):
2268	# type: (List[CompoundWord], bool, bool) -> cmd_value_t
2269	"""Simple word evaluation for YSH."""
2270	strs = [] # type: List[str]
2271	locs = [] # type: List[CompoundWord]
2272
2273	meta_offset = 0
2274	for i, w in enumerate(words):
2275	# No globbing in the first arg for command.Simple.
2276	if i == meta_offset and allow_assign:
2277	strs0 = self._EvalWordToArgv(w)
2278	# TODO: Remove this because YSH will disallow assignment
2279	# builtins? (including export?)
2280	if len(strs0) == 1:
2281	cmd_val = self._DetectAssignBuiltinStr(
2282	strs0[0], words, meta_offset)
2283	if cmd_val:
2284	return cmd_val
2285
2286	strs.extend(strs0)
2287	for _ in strs0:
2288	locs.append(w)
2289	continue
2290
2291	if glob_.LooksLikeStaticGlob(w):
2292	val = self.EvalWordToString(w) # respects strict-array
2293	num_appended = self.globber.Expand(val.s, strs)
2294	if num_appended < 0:
2295	raise error.FailGlob('Pattern %r matched no files' % val.s,
2296	w)
2297	for _ in xrange(num_appended):
2298	locs.append(w)
2299	continue
2300
2301	part_vals = [] # type: List[part_value_t]
2302	self._EvalWordToParts(w, part_vals, 0) # not quoted
2303
2304	if 0:
2305	log('')
2306	log('Static: part_vals after _EvalWordToParts:')
2307	for entry in part_vals:
2308	log(' %s', entry)
2309
2310	# Still need to process
2311	frames = _MakeWordFrames(part_vals)
2312
2313	if 0:
2314	log('')
2315	log('Static: frames after _MakeWordFrames:')
2316	for entry in frames:
2317	log(' %s', entry)
2318
2319	# We will still allow x"${a[@]"x, though it's deprecated by @a, which
2320	# disallows such expressions at parse time.
2321	for frame in frames:
2322	if len(frame): # empty array gives empty frame!
2323	tmp = [piece.s for piece in frame]
2324	strs.append(''.join(tmp)) # no split or glob
2325	locs.append(w)
2326
2327	assert len(strs) == len(locs), '%s vs. %d' % (strs, len(locs))
2328	return cmd_value.Argv(strs, locs, is_last_cmd, None, None)
2329
2330	def EvalWordSequence2(self, words, is_last_cmd, allow_assign=False):
2331	# type: (List[CompoundWord], bool, bool) -> cmd_value_t
2332	"""Turns a list of Words into a list of strings.
2333
2334	Unlike the EvalWord*() methods, it does globbing.
2335
2336	Args:
2337	allow_assign: True for command.Simple, False for BashArray a=(1 2 3)
2338	"""
2339	if self.exec_opts.simple_word_eval():
2340	return self.SimpleEvalWordSequence2(words, is_last_cmd,
2341	allow_assign)
2342
2343	# Parse time:
2344	# 1. brace expansion. TODO: Do at parse time.
2345	# 2. Tilde detection. DONE at parse time. Only if Id.Lit_Tilde is the
2346	# first WordPart.
2347	#
2348	# Run time:
2349	# 3. tilde sub, var sub, command sub, arith sub. These are all
2350	# "concurrent" on WordParts. (optional process sub with <() )
2351	# 4. word splitting. Can turn this off with a shell option? Definitely
2352	# off for oil.
2353	# 5. globbing -- several exec_opts affect this: nullglob, safeglob, etc.
2354
2355	#log('W %s', words)
2356	strs = [] # type: List[str]
2357	locs = [] # type: List[CompoundWord]
2358
2359	# 0 for declare x
2360	# 1 for builtin declare x
2361	# 2 for command builtin declare x
2362	# etc.
2363	meta_offset = 0
2364
2365	n = 0
2366	for i, w in enumerate(words):
2367	fast_str = word_.FastStrEval(w)
2368	if fast_str is not None:
2369	strs.append(fast_str)
2370	locs.append(w)
2371
2372	# e.g. the 'local' in 'local a=b c=d' will be here
2373	if allow_assign and i == meta_offset:
2374	cmd_val = self._DetectAssignBuiltinStr(
2375	fast_str, words, meta_offset)
2376	if cmd_val:
2377	return cmd_val
2378
2379	if i <= meta_offset and _DetectMetaBuiltinStr(fast_str):
2380	meta_offset += 1
2381
2382	# Bug fix: n must be updated on every loop iteration
2383	n = len(strs)
2384	assert len(strs) == len(locs), strs
2385	continue
2386
2387	part_vals = [] # type: List[part_value_t]
2388	self._EvalWordToParts(w, part_vals, EXTGLOB_FILES)
2389
2390	# DYNAMICALLY detect if we're going to run an assignment builtin, and
2391	# change the rest of the evaluation algorithm if so.
2392	#
2393	# We want to allow:
2394	# e=export
2395	# $e foo=bar
2396	#
2397	# But we don't want to evaluate the first word twice in the case of:
2398	# $(some-command) --flag
2399	if len(part_vals) == 1:
2400	if allow_assign and i == meta_offset:
2401	cmd_val = self._DetectAssignBuiltin(
2402	part_vals[0], words, meta_offset)
2403	if cmd_val:
2404	return cmd_val
2405
2406	if i <= meta_offset and _DetectMetaBuiltin(part_vals[0]):
2407	meta_offset += 1
2408
2409	if 0:
2410	log('')
2411	log('part_vals after _EvalWordToParts:')
2412	for entry in part_vals:
2413	log(' %s', entry)
2414
2415	frames = _MakeWordFrames(part_vals)
2416	if 0:
2417	log('')
2418	log('frames after _MakeWordFrames:')
2419	for entry in frames:
2420	log(' %s', entry)
2421
2422	# Do splitting and globbing. Each frame will append zero or more args.
2423	for frame in frames:
2424	self._EvalWordFrame(frame, strs)
2425
2426	# Fill in locations parallel to strs.
2427	n_next = len(strs)
2428	for _ in xrange(n_next - n):
2429	locs.append(w)
2430	n = n_next
2431
2432	# A non-assignment command.
2433	# NOTE: Can't look up builtins here like we did for assignment, because
2434	# functions can override builtins.
2435	assert len(strs) == len(locs), '%s vs. %d' % (strs, len(locs))
2436	return cmd_value.Argv(strs, locs, is_last_cmd, None, None)
2437
2438	def EvalWordSequence(self, words):
2439	# type: (List[CompoundWord]) -> List[str]
2440	"""For arrays and for loops.
2441
2442	They don't allow assignment builtins.
2443	"""
2444	# is_last_cmd is irrelevant
2445	cmd_val = self.EvalWordSequence2(words, False)
2446	assert cmd_val.tag() == cmd_value_e.Argv
2447	return cast(cmd_value.Argv, cmd_val).argv
2448
2449
2450	class NormalWordEvaluator(AbstractWordEvaluator):
2451
2452	def __init__(
2453	self,
2454	mem, # type: state.Mem
2455	exec_opts, # type: optview.Exec
2456	mutable_opts, # type: state.MutableOpts
2457	tilde_ev, # type: TildeEvaluator
2458	splitter, # type: SplitContext
2459	errfmt, # type: ui.ErrorFormatter
2460	):
2461	# type: (...) -> None
2462	AbstractWordEvaluator.__init__(self, mem, exec_opts, mutable_opts,
2463	tilde_ev, splitter, errfmt)
2464	self.shell_ex = None # type: _Executor
2465
2466	def CheckCircularDeps(self):
2467	# type: () -> None
2468	assert self.arith_ev is not None
2469	# Disabled for pure OSH
2470	#assert self.expr_ev is not None
2471	assert self.shell_ex is not None
2472	assert self.prompt_ev is not None
2473
2474	def _EvalCommandSub(self, cs_part, quoted):
2475	# type: (CommandSub, bool) -> part_value_t
2476	stdout_str = self.shell_ex.RunCommandSub(cs_part)
2477
2478	if cs_part.left_token.id == Id.Left_AtParen:
2479	# YSH splitting algorithm: does not depend on IFS
2480	try:
2481	strs = j8.SplitJ8Lines(stdout_str)
2482	except error.Decode as e:
2483	# status code 4 is special, for encode/decode errors.
2484	raise error.Structured(4, e.Message(), cs_part.left_token)
2485
2486	#strs = self.splitter.SplitForWordEval(stdout_str)
2487	return part_value.Array(strs)
2488	else:
2489	return Piece(stdout_str, quoted, not quoted)
2490
2491	def _EvalProcessSub(self, cs_part):
2492	# type: (CommandSub) -> Piece
2493	dev_path = self.shell_ex.RunProcessSub(cs_part)
2494	# pretend it's quoted; no split or glob
2495	return Piece(dev_path, True, False)
2496
2497
2498	_DUMMY = '__NO_COMMAND_SUB__'
2499
2500
2501	class CompletionWordEvaluator(AbstractWordEvaluator):
2502	"""An evaluator that has no access to an executor.
2503
2504	NOTE: core/completion.py doesn't actually try to use these strings to
2505	complete. If you have something like 'echo $(echo hi)/f<TAB>', it sees the
2506	inner command as the last one, and knows that it is not at the end of the
2507	line.
2508	"""
2509
2510	def __init__(
2511	self,
2512	mem, # type: state.Mem
2513	exec_opts, # type: optview.Exec
2514	mutable_opts, # type: state.MutableOpts
2515	tilde_ev, # type: TildeEvaluator
2516	splitter, # type: SplitContext
2517	errfmt, # type: ui.ErrorFormatter
2518	):
2519	# type: (...) -> None
2520	AbstractWordEvaluator.__init__(self, mem, exec_opts, mutable_opts,
2521	tilde_ev, splitter, errfmt)
2522
2523	def CheckCircularDeps(self):
2524	# type: () -> None
2525	assert self.prompt_ev is not None
2526	assert self.arith_ev is not None
2527	assert self.expr_ev is not None
2528
2529	def _EvalCommandSub(self, cs_part, quoted):
2530	# type: (CommandSub, bool) -> part_value_t
2531	if cs_part.left_token.id == Id.Left_AtParen:
2532	return part_value.Array([_DUMMY])
2533	else:
2534	return Piece(_DUMMY, quoted, not quoted)
2535
2536	def _EvalProcessSub(self, cs_part):
2537	# type: (CommandSub) -> Piece
2538	# pretend it's quoted; no split or glob
2539	return Piece('__NO_PROCESS_SUB__', True, False)
2540
2541
2542	# vim: sw=4