osh/word_eval.py

OILS / osh / word_eval.py View on Github | oils.pub

2642 lines, 1638 significant

1	"""
2	word_eval.py - Evaluator for the word language.
3	"""
4
5	from _devbuild.gen.id_kind_asdl import Id, Kind, Kind_str
6	from _devbuild.gen.syntax_asdl import (
7	Token,
8	SimpleVarSub,
9	loc,
10	loc_t,
11	BracedVarSub,
12	CommandSub,
13	bracket_op,
14	bracket_op_e,
15	suffix_op,
16	suffix_op_e,
17	ShArrayLiteral,
18	SingleQuoted,
19	DoubleQuoted,
20	word_e,
21	word_t,
22	CompoundWord,
23	rhs_word,
24	rhs_word_e,
25	rhs_word_t,
26	word_part,
27	word_part_e,
28	)
29	from _devbuild.gen.runtime_asdl import (
30	part_value,
31	part_value_e,
32	part_value_t,
33	cmd_value,
34	cmd_value_e,
35	cmd_value_t,
36	error_code_e,
37	AssignArg,
38	a_index,
39	a_index_e,
40	VTestPlace,
41	VarSubState,
42	Piece,
43	)
44	from _devbuild.gen.option_asdl import option_i, builtin_i
45	from _devbuild.gen.value_asdl import (
46	value,
47	value_e,
48	value_t,
49	sh_lvalue,
50	sh_lvalue_t,
51	)
52	from core import bash_impl
53	from core import error
54	from core import pyos
55	from core import pyutil
56	from core import state
57	from display import ui
58	from core import util
59	from data_lang import j8
60	from data_lang import j8_lite
61	from core.error import e_die
62	from frontend import consts
63	from frontend import lexer
64	from frontend import location
65	from mycpp import mops
66	from mycpp.mylib import log, tagswitch, NewDict
67	from osh import braces
68	from osh import glob_
69	from osh import string_ops
70	from osh import word_
71	from ysh import expr_eval
72	from ysh import val_ops
73
74	from typing import Optional, Tuple, List, Dict, cast, TYPE_CHECKING
75
76	if TYPE_CHECKING:
77	from _devbuild.gen.syntax_asdl import word_part_t
78	from _devbuild.gen.option_asdl import builtin_t
79	from core import optview
80	from core.state import Mem
81	from core.vm import _Executor
82	from osh.split import SplitContext
83	from osh import prompt
84	from osh import sh_expr_eval
85
86	# Flags for _EvalWordToParts and _EvalWordPart (not all are used for both)
87	QUOTED = 1 << 0
88	IS_SUBST = 1 << 1
89
90	EXTGLOB_FILES = 1 << 2 # allow @(cc) from file system?
91	EXTGLOB_MATCH = 1 << 3 # allow @(cc) in pattern matching?
92	EXTGLOB_NESTED = 1 << 4 # for @(one\|!(two\|three))
93
94	# For EvalWordToString
95	QUOTE_FNMATCH = 1 << 5
96	QUOTE_ERE = 1 << 6
97
98	# For compatibility, ${BASH_SOURCE} and ${BASH_SOURCE[@]} are both valid.
99	# Ditto for ${FUNCNAME} and ${BASH_LINENO}.
100	_STRING_AND_ARRAY = ['BASH_SOURCE', 'FUNCNAME', 'BASH_LINENO']
101
102
103	def ShouldArrayDecay(var_name, exec_opts, is_plain_var_sub=True):
104	# type: (str, optview.Exec, bool) -> bool
105	"""Return whether we should allow ${a} to mean ${a[0]}."""
106	return (not exec_opts.strict_array() or
107	is_plain_var_sub and var_name in _STRING_AND_ARRAY)
108
109
110	def DecayArray(val):
111	# type: (value_t) -> value_t
112	"""Resolve ${array} to ${array[0]}."""
113	if val.tag() in (value_e.BashArray, value_e.SparseArray):
114	if val.tag() == value_e.BashArray:
115	array_val = cast(value.BashArray, val)
116	s, error_code = bash_impl.BashArray_GetElement(array_val, 0)
117	elif val.tag() == value_e.SparseArray:
118	sparse_val = cast(value.SparseArray, val)
119	s, error_code = bash_impl.SparseArray_GetElement(
120	sparse_val, mops.ZERO)
121	else:
122	raise AssertionError(val.tag())
123
124	# Note: index 0 should never cause the out-of-bound index error.
125	assert error_code == error_code_e.OK
126
127	elif val.tag() == value_e.BashAssoc:
128	assoc_val = cast(value.BashAssoc, val)
129	s = bash_impl.BashAssoc_GetElement(assoc_val, '0')
130	else:
131	raise AssertionError(val.tag())
132
133	if s is None:
134	return value.Undef
135	else:
136	return value.Str(s)
137
138
139	def _DetectMetaBuiltinStr(s):
140	# type: (str) -> bool
141	"""
142	We need to detect all of these cases:
143
144	builtin local
145	command local
146	builtin builtin local
147	builtin command local
148
149	Fundamentally, assignment builtins have different WORD EVALUATION RULES
150	for a=$x (no word splitting), so it seems hard to do this in
151	meta_oils.Builtin() or meta_oils.Command()
152	"""
153	return (consts.LookupNormalBuiltin(s)
154	in (builtin_i.builtin, builtin_i.command))
155
156
157	def _DetectMetaBuiltin(val0):
158	# type: (part_value_t) -> bool
159	UP_val0 = val0
160	if val0.tag() == part_value_e.String:
161	val0 = cast(Piece, UP_val0)
162	if not val0.quoted:
163	return _DetectMetaBuiltinStr(val0.s)
164	return False
165
166
167	def _SplitAssignArg(arg, blame_word):
168	# type: (str, CompoundWord) -> AssignArg
169	"""Dynamically parse argument to declare, export, etc.
170
171	This is a fallback to the static parsing done below.
172	"""
173	# Note: it would be better to cache regcomp(), but we don't have an API for
174	# that, and it probably isn't a bottleneck now
175	m = util.RegexSearch(consts.ASSIGN_ARG_RE, arg)
176	if m is None:
177	e_die("Assignment builtin expected NAME=value, got %r" % arg,
178	blame_word)
179
180	var_name = m[1]
181	# m[2] is used for grouping; ERE doesn't have non-capturing groups
182
183	op = m[3]
184	assert op is not None, op
185	if len(op): # declare NAME=
186	val = value.Str(m[4]) # type: Optional[value_t]
187	append = op[0] == '+'
188	else: # declare NAME
189	val = None # no operator
190	append = False
191
192	return AssignArg(var_name, val, append, blame_word)
193
194
195	# NOTE: Could be done with util.BackslashEscape like glob_.GlobEscape().
196	def _BackslashEscape(s):
197	# type: (str) -> str
198	"""Double up backslashes.
199
200	Useful for strings about to be globbed and strings about to be IFS
201	escaped.
202	"""
203	return s.replace('\\', '\\\\')
204
205
206	def _ValueToPartValue(val, quoted, part_loc):
207	# type: (value_t, bool, word_part_t) -> part_value_t
208	"""Helper for VarSub evaluation.
209
210	Called by _EvalBracedVarSub and _EvalWordPart for SimpleVarSub.
211	"""
212	UP_val = val
213
214	with tagswitch(val) as case:
215	if case(value_e.Undef):
216	# This happens in the case of ${undef+foo}. We skipped _ProcessUndef,
217	# but we have to append to the empty string.
218	return Piece('', quoted, not quoted)
219
220	elif case(value_e.Str):
221	val = cast(value.Str, UP_val)
222	return Piece(val.s, quoted, not quoted)
223
224	elif case(value_e.BashArray):
225	val = cast(value.BashArray, UP_val)
226	return part_value.Array(bash_impl.BashArray_GetValues(val))
227
228	elif case(value_e.SparseArray):
229	val = cast(value.SparseArray, UP_val)
230	return part_value.Array(bash_impl.SparseArray_GetValues(val))
231
232	elif case(value_e.BashAssoc):
233	val = cast(value.BashAssoc, UP_val)
234	# bash behavior: splice values!
235	return part_value.Array(bash_impl.BashAssoc_GetValues(val))
236
237	# Cases added for YSH
238	# value_e.List is also here - we use val_ops.Stringify()s err message
239	elif case(value_e.Null, value_e.Bool, value_e.Int, value_e.Float,
240	value_e.Eggex, value_e.List):
241	s = val_ops.Stringify(val, loc.WordPart(part_loc), 'Word eval ')
242	return Piece(s, quoted, not quoted)
243
244	else:
245	raise error.TypeErr(val, "Can't substitute into word",
246	loc.WordPart(part_loc))
247
248	raise AssertionError('for -Wreturn-type in C++')
249
250
251	def _MakeWordFrames(part_vals):
252	# type: (List[part_value_t]) -> List[List[Piece]]
253	"""A word evaluates to a flat list of part_value (String or Array). frame
254	is a portion that results in zero or more args. It can never be joined.
255	This idea exists because of arrays like "$@" and "${a[@]}".
256
257	Example:
258
259	a=(1 '2 3' 4)
260	x=x
261	y=y
262
263	# This word
264	$x"${a[@]}"$y
265
266	# Results in Three frames:
267	[ ('x', False, True), ('1', True, False) ]
268	[ ('2 3', True, False) ]
269	[ ('4', True, False), ('y', False, True) ]
270
271	Note: A frame is a 3-tuple that's identical to Piece()? Maybe we
272	should make that top level type.
273
274	TODO:
275	- Instead of List[List[Piece]], where List[Piece] is a Frame
276	- Change this representation to
277	Frames = (List[Piece] pieces, List[int] break_indices)
278	# where break_indices are the end
279
280	Consider a common case like "$x" or "${x}" - I think this a lot more
281	efficient?
282
283	And then change _EvalWordFrame(pieces: List[Piece], start: int, end: int)
284	"""
285	current = [] # type: List[Piece]
286	frames = [current]
287
288	for p in part_vals:
289	UP_p = p
290
291	with tagswitch(p) as case:
292	if case(part_value_e.String):
293	p = cast(Piece, UP_p)
294	current.append(p)
295
296	elif case(part_value_e.Array):
297	p = cast(part_value.Array, UP_p)
298
299	is_first = True
300	for s in p.strs:
301	if s is None:
302	continue # ignore undefined array entries
303
304	# Arrays parts are always quoted; otherwise they would have
305	# decayed to a string.
306	piece = Piece(s, True, False)
307	if is_first:
308	current.append(piece)
309	is_first = False
310	else:
311	current = [piece]
312	frames.append(current) # singleton frame
313
314	else:
315	raise AssertionError()
316
317	return frames
318
319
320	# TODO: This could be _MakeWordFrames and then sep.join(). It's redundant.
321	def _DecayPartValuesToString(part_vals, join_char):
322	# type: (List[part_value_t], str) -> str
323	# Decay ${a=x"$@"x} to string.
324	out = [] # type: List[str]
325	for p in part_vals:
326	UP_p = p
327	with tagswitch(p) as case:
328	if case(part_value_e.String):
329	p = cast(Piece, UP_p)
330	out.append(p.s)
331	elif case(part_value_e.Array):
332	p = cast(part_value.Array, UP_p)
333	# TODO: Eliminate double join for speed?
334	tmp = [s for s in p.strs if s is not None]
335	out.append(join_char.join(tmp))
336	else:
337	raise AssertionError()
338	return ''.join(out)
339
340
341	def _PerformSlice(
342	val, # type: value_t
343	offset, # type: mops.BigInt
344	length, # type: int
345	has_length, # type: bool
346	part, # type: BracedVarSub
347	arg0_val, # type: value.Str
348	):
349	# type: (...) -> value_t
350	UP_val = val
351	with tagswitch(val) as case:
352	if case(value_e.Str): # Slice UTF-8 characters in a string.
353	val = cast(value.Str, UP_val)
354	s = val.s
355	n = len(s)
356
357	begin = mops.BigTruncate(offset)
358	if begin < 0: # Compute offset with unicode
359	byte_begin = n
360	num_iters = -begin
361	for _ in xrange(num_iters):
362	byte_begin = string_ops.PreviousUtf8Char(s, byte_begin)
363	else:
364	byte_begin = string_ops.AdvanceUtf8Chars(s, begin, 0)
365
366	if has_length:
367	if length < 0: # Compute offset with unicode
368	# Confusing: this is a POSITION
369	byte_end = n
370	num_iters = -length
371	for _ in xrange(num_iters):
372	byte_end = string_ops.PreviousUtf8Char(s, byte_end)
373	else:
374	byte_end = string_ops.AdvanceUtf8Chars(
375	s, length, byte_begin)
376	else:
377	byte_end = len(s)
378
379	substr = s[byte_begin:byte_end]
380	result = value.Str(substr) # type: value_t
381
382	elif case(value_e.BashArray,
383	value_e.SparseArray): # Slice array entries.
384	# NOTE: This error is ALWAYS fatal in bash. It's inconsistent with
385	# strings.
386	if has_length and length < 0:
387	e_die("Array slice can't have negative length: %d" % length,
388	loc.WordPart(part))
389
390	if bash_impl.BigInt_Less(offset, mops.ZERO):
391	# ${@:-3} starts counts from the end
392	if val.tag() == value_e.BashArray:
393	val = cast(value.BashArray, UP_val)
394	array_length = mops.IntWiden(
395	bash_impl.BashArray_Length(val))
396	elif val.tag() == value_e.SparseArray:
397	val = cast(value.SparseArray, UP_val)
398	array_length = bash_impl.SparseArray_Length(val)
399	else:
400	raise AssertionError()
401
402	# The array length counts $0 for $@ and $*
403	if arg0_val is not None:
404	array_length = mops.Add(array_length, mops.ONE)
405
406	offset = mops.Add(offset, array_length)
407
408	if bash_impl.BigInt_Less(offset, mops.ZERO):
409	strs = [] # type: List[str]
410	else:
411	# Quirk: "offset" for positional arguments ($@ and $*) counts $0.
412	prepends_arg0 = False
413	if arg0_val is not None:
414	if bash_impl.BigInt_Greater(offset, mops.ZERO):
415	offset = mops.Sub(offset, mops.ONE)
416	elif not has_length or length >= 1:
417	prepends_arg0 = True
418	length = length - 1
419
420	if has_length and length == 0:
421	strs = []
422
423	elif val.tag() == value_e.BashArray:
424	val = cast(value.BashArray, UP_val)
425	orig = bash_impl.BashArray_GetValues(val)
426	n = len(orig)
427
428	strs = []
429	i = mops.BigTruncate(offset)
430	count = 0
431	while i < n:
432	if has_length and count == length: # length could be 0
433	break
434	s = orig[i]
435	if s is not None: # Unset elements don't count towards the length
436	strs.append(s)
437	count += 1
438	i += 1
439
440	elif val.tag() == value_e.SparseArray:
441	val = cast(value.SparseArray, UP_val)
442
443	# TODO: We may optimize this by finding the first index
444	# using the binary search. Furthermore, the sorting by
445	# SparseArray_GetKeys can be replaced with the heap sort so
446	# that we only extract the first LENGTH elements of the
447	# indices greater or equal to OFFSET.
448	i = 0
449	for index in bash_impl.SparseArray_GetKeys(val):
450	if bash_impl.BigInt_GreaterEq(index, offset):
451	break
452	i = i + 1
453
454	if has_length:
455	strs = bash_impl.SparseArray_GetValues(val)[i:i +
456	length]
457	else:
458	strs = bash_impl.SparseArray_GetValues(val)[i:]
459
460	else:
461	raise AssertionError()
462
463	if prepends_arg0:
464	new_list = [arg0_val.s]
465	new_list.extend(strs)
466	strs = new_list
467
468	result = value.BashArray(strs)
469
470	elif case(value_e.BashAssoc):
471	e_die("Can't slice associative arrays", loc.WordPart(part))
472
473	else:
474	raise error.TypeErr(val, 'Slice op expected Str or BashArray',
475	loc.WordPart(part))
476
477	return result
478
479
480	class StringWordEvaluator(object):
481	"""Interface used by ArithEvaluator / BoolEvaluator"""
482
483	def __init__(self):
484	# type: () -> None
485	"""Empty constructor for mycpp."""
486	pass
487
488	def EvalWordToString(self, w, eval_flags=0):
489	# type: (word_t, int) -> value.Str
490	raise NotImplementedError()
491
492
493	def _GetDollarHyphen(exec_opts):
494	# type: (optview.Exec) -> str
495	chars = [] # type: List[str]
496	if exec_opts.interactive():
497	chars.append('i')
498
499	if exec_opts.errexit():
500	chars.append('e')
501	if exec_opts.noglob():
502	chars.append('f')
503	if exec_opts.noexec():
504	chars.append('n')
505	if exec_opts.nounset():
506	chars.append('u')
507	# NO letter for pipefail?
508	if exec_opts.xtrace():
509	chars.append('x')
510	if exec_opts.noclobber():
511	chars.append('C')
512
513	# bash has:
514	# - c for sh -c, i for sh -i (mksh also has this)
515	# - h for hashing (mksh also has this)
516	# - B for brace expansion
517	return ''.join(chars)
518
519
520	class TildeEvaluator(object):
521
522	def __init__(self, mem, exec_opts):
523	# type: (Mem, optview.Exec) -> None
524	self.mem = mem
525	self.exec_opts = exec_opts
526
527	def GetMyHomeDir(self):
528	# type: () -> Optional[str]
529	"""Consult $HOME first, and then make a libc call.
530
531	Important: the libc call can FAIL, which is why we prefer $HOME. See issue
532	#1578.
533	"""
534	# First look up the HOME var, ENV.HOME, ...
535	s = self.mem.env_config.Get('HOME')
536	if s is not None:
537	return s
538
539	# Then ask the OS. This is what bash does.
540	return pyos.GetMyHomeDir()
541
542	def Eval(self, part):
543	# type: (word_part.TildeSub) -> str
544	"""Evaluates ~ and ~user, given a Lit_TildeLike token."""
545
546	if part.user_name is None:
547	result = self.GetMyHomeDir()
548	else:
549	result = pyos.GetHomeDir(part.user_name)
550
551	if result is None:
552	if self.exec_opts.strict_tilde():
553	e_die("Error expanding tilde (e.g. invalid user)", part.left)
554	else:
555	# Return ~ or ~user literally
556	result = '~'
557	if part.user_name is not None:
558	result = result + part.user_name # mycpp doesn't have +=
559
560	return result
561
562
563	class AbstractWordEvaluator(StringWordEvaluator):
564	"""Abstract base class for word evaluators.
565
566	Public entry points:
567	EvalWordToString EvalForPlugin EvalRhsWord
568	EvalWordSequence EvalWordSequence2
569	"""
570
571	def __init__(
572	self,
573	mem, # type: state.Mem
574	exec_opts, # type: optview.Exec
575	mutable_opts, # type: state.MutableOpts
576	tilde_ev, # type: TildeEvaluator
577	splitter, # type: SplitContext
578	errfmt, # type: ui.ErrorFormatter
579	):
580	# type: (...) -> None
581	self.arith_ev = None # type: sh_expr_eval.ArithEvaluator
582	self.expr_ev = None # type: expr_eval.ExprEvaluator
583	self.prompt_ev = None # type: prompt.Evaluator
584
585	self.unsafe_arith = None # type: sh_expr_eval.UnsafeArith
586
587	self.tilde_ev = tilde_ev
588
589	self.mem = mem # for $HOME, $1, etc.
590	self.exec_opts = exec_opts # for nounset
591	self.mutable_opts = mutable_opts # for _allow_command_sub
592	self.splitter = splitter
593	self.errfmt = errfmt
594
595	self.globber = glob_.Globber(exec_opts)
596
597	def CheckCircularDeps(self):
598	# type: () -> None
599	raise NotImplementedError()
600
601	def _EvalCommandSub(self, cs_part, quoted):
602	# type: (CommandSub, bool) -> part_value_t
603	"""Abstract since it has a side effect."""
604	raise NotImplementedError()
605
606	def _EvalProcessSub(self, cs_part):
607	# type: (CommandSub) -> part_value_t
608	"""Abstract since it has a side effect."""
609	raise NotImplementedError()
610
611	def _EvalVarNum(self, var_num):
612	# type: (int) -> value_t
613	assert var_num >= 0
614	return self.mem.GetArgNum(var_num)
615
616	def _EvalSpecialVar(self, op_id, quoted, vsub_state):
617	# type: (int, bool, VarSubState) -> value_t
618	"""Evaluate $?
619
620	and so forth
621	"""
622	# $@ is special -- it need to know whether it is in a double quoted
623	# context.
624	#
625	# - If it's $@ in a double quoted context, return an ARRAY.
626	# - If it's $@ in a normal context, return a STRING, which then will be
627	# subject to splitting.
628
629	if op_id in (Id.VSub_At, Id.VSub_Star):
630	argv = self.mem.GetArgv()
631	val = value.BashArray(argv) # type: value_t
632	if op_id == Id.VSub_At:
633	# "$@" evaluates to an array, $@ should be decayed
634	vsub_state.join_array = not quoted
635	else: # $* "$*" are both decayed
636	vsub_state.join_array = True
637
638	elif op_id == Id.VSub_Hyphen:
639	val = value.Str(_GetDollarHyphen(self.exec_opts))
640
641	else:
642	val = self.mem.GetSpecialVar(op_id)
643
644	return val
645
646	def _ApplyTestOp(
647	self,
648	val, # type: value_t
649	op, # type: suffix_op.Unary
650	quoted, # type: bool
651	part_vals, # type: Optional[List[part_value_t]]
652	vtest_place, # type: VTestPlace
653	blame_token, # type: Token
654	vsub_state, # type: VarSubState
655	):
656	# type: (...) -> bool
657	"""
658	Returns:
659	Whether part_vals was mutated
660
661	${a:-} returns part_value[]
662	${a:+} returns part_value[]
663	${a:?error} returns error word?
664	${a:=} returns part_value[] but also needs self.mem for side effects.
665
666	So I guess it should return part_value[], and then a flag for raising
667	an error, and then a flag for assigning it?
668	The original BracedVarSub will have the name.
669
670	Example of needing multiple part_value[]
671
672	echo X-${a:-'def'"ault"}-X
673
674	We return two part values from the BracedVarSub. Also consider:
675
676	echo ${a:-x"$@"x}
677	"""
678	eval_flags = IS_SUBST
679	if quoted:
680	eval_flags \|= QUOTED
681
682	tok = op.op
683	# NOTE: Splicing part_values is necessary because of code like
684	# ${undef:-'a b' c 'd # e'}. Each part_value can have a different
685	# do_glob/do_elide setting.
686	UP_val = val
687	with tagswitch(val) as case:
688	if case(value_e.Undef):
689	is_falsey = True
690
691	elif case(value_e.Str):
692	val = cast(value.Str, UP_val)
693	if tok.id in (Id.VTest_ColonHyphen, Id.VTest_ColonEquals,
694	Id.VTest_ColonQMark, Id.VTest_ColonPlus):
695	is_falsey = len(val.s) == 0
696	else:
697	is_falsey = False
698
699	elif case(value_e.BashArray, value_e.SparseArray,
700	value_e.BashAssoc):
701	if val.tag() == value_e.BashArray:
702	val = cast(value.BashArray, UP_val)
703	strs = bash_impl.BashArray_GetValues(val)
704	elif val.tag() == value_e.SparseArray:
705	val = cast(value.SparseArray, UP_val)
706	strs = bash_impl.SparseArray_GetValues(val)
707	elif val.tag() == value_e.BashAssoc:
708	val = cast(value.BashAssoc, UP_val)
709	strs = bash_impl.BashAssoc_GetValues(val)
710	else:
711	raise AssertionError()
712
713	if tok.id in (Id.VTest_ColonHyphen, Id.VTest_ColonEquals,
714	Id.VTest_ColonQMark, Id.VTest_ColonPlus):
715	# "$*" - the separator is the first character of IFS
716	# $* $@ "$@" - the separator is a space
717	if quoted and vsub_state.join_array:
718	sep_width = len(self.splitter.GetJoinChar())
719	else:
720	sep_width = 1
721
722	# We test whether the joined string will be empty. When
723	# the separator is empty, all the elements need to be
724	# empty. When the separator is non-empty, one element is
725	# allowed at most and needs to be an empty string if any.
726	if sep_width == 0:
727	is_falsey = True
728	for s in strs:
729	if len(s) != 0:
730	is_falsey = False
731	break
732	else:
733	is_falsey = len(strs) == 0 or (len(strs) == 1 and
734	len(strs[0]) == 0)
735	else:
736	# TODO: allow undefined
737	is_falsey = len(strs) == 0
738
739	else:
740	# value.Eggex, etc. are all false
741	is_falsey = False
742
743	if tok.id in (Id.VTest_ColonHyphen, Id.VTest_Hyphen):
744	if is_falsey:
745	self._EvalRhsWordToParts(op.arg_word, part_vals, eval_flags)
746	return True
747	else:
748	return False
749
750	# Inverse of the above.
751	elif tok.id in (Id.VTest_ColonPlus, Id.VTest_Plus):
752	if is_falsey:
753	return False
754	else:
755	self._EvalRhsWordToParts(op.arg_word, part_vals, eval_flags)
756	return True
757
758	# Splice and assign
759	elif tok.id in (Id.VTest_ColonEquals, Id.VTest_Equals):
760	if is_falsey:
761	# Collect new part vals.
762	assign_part_vals = [] # type: List[part_value_t]
763	self._EvalRhsWordToParts(op.arg_word, assign_part_vals,
764	eval_flags)
765	# Append them to out param AND return them.
766	part_vals.extend(assign_part_vals)
767
768	if vtest_place.name is None:
769	# TODO: error context
770	e_die("Can't assign to special variable")
771	else:
772	# NOTE: This decays arrays too! 'shopt -s strict_array' could
773	# avoid it.
774	rhs_str = _DecayPartValuesToString(
775	assign_part_vals, self.splitter.GetJoinChar())
776	if vtest_place.index is None: # using None when no index
777	lval = location.LName(
778	vtest_place.name) # type: sh_lvalue_t
779	else:
780	var_name = vtest_place.name
781	var_index = vtest_place.index
782	UP_var_index = var_index
783
784	with tagswitch(var_index) as case:
785	if case(a_index_e.Int):
786	var_index = cast(a_index.Int, UP_var_index)
787	lval = sh_lvalue.Indexed(
788	var_name, var_index.i, loc.Missing)
789	elif case(a_index_e.Str):
790	var_index = cast(a_index.Str, UP_var_index)
791	lval = sh_lvalue.Keyed(var_name, var_index.s,
792	loc.Missing)
793	else:
794	raise AssertionError()
795
796	state.OshLanguageSetValue(self.mem, lval,
797	value.Str(rhs_str))
798	return True
799
800	else:
801	return False
802
803	elif tok.id in (Id.VTest_ColonQMark, Id.VTest_QMark):
804	if is_falsey:
805	# The arg is the error message
806	error_part_vals = [] # type: List[part_value_t]
807	self._EvalRhsWordToParts(op.arg_word, error_part_vals,
808	eval_flags)
809	error_str = _DecayPartValuesToString(
810	error_part_vals, self.splitter.GetJoinChar())
811
812	#
813	# Display fancy/helpful error
814	#
815	if vtest_place.name is None:
816	var_name = '???'
817	else:
818	var_name = vtest_place.name
819
820	if 0:
821	# This hint is nice, but looks too noisy for now
822	op_str = lexer.LazyStr(tok)
823	if tok.id == Id.VTest_ColonQMark:
824	why = 'empty or unset'
825	else:
826	why = 'unset'
827
828	self.errfmt.Print_(
829	"Hint: operator %s means a variable can't be %s" %
830	(op_str, why), tok)
831
832	if val.tag() == value_e.Undef:
833	actual = 'unset'
834	else:
835	actual = 'empty'
836
837	if len(error_str):
838	suffix = ': %r' % error_str
839	else:
840	suffix = ''
841	e_die("Var %s is %s%s" % (var_name, actual, suffix),
842	blame_token)
843
844	else:
845	return False
846
847	else:
848	raise AssertionError(tok.id)
849
850	def _Count(self, val, token):
851	# type: (value_t, Token) -> int
852	"""Returns the length of the value, for ${#var}"""
853	UP_val = val
854	with tagswitch(val) as case:
855	if case(value_e.Str):
856	val = cast(value.Str, UP_val)
857	# NOTE: Whether bash counts bytes or chars is affected by LANG
858	# environment variables.
859	# Should we respect that, or another way to select? set -o
860	# count-bytes?
861
862	# https://stackoverflow.com/questions/17368067/length-of-string-in-bash
863	try:
864	count = string_ops.CountUtf8Chars(val.s)
865	except error.Strict as e:
866	# Add this here so we don't have to add it so far down the stack.
867	# TODO: It's better to show BOTH this CODE an the actual DATA
868	# somehow.
869	e.location = token
870
871	if self.exec_opts.strict_word_eval():
872	raise
873	else:
874	# NOTE: Doesn't make the command exit with 1; it just returns a
875	# length of -1.
876	self.errfmt.PrettyPrintError(e, prefix='warning: ')
877	return -1
878
879	elif case(value_e.BashArray):
880	val = cast(value.BashArray, UP_val)
881	count = bash_impl.BashArray_Count(val)
882
883	elif case(value_e.BashAssoc):
884	val = cast(value.BashAssoc, UP_val)
885	count = bash_impl.BashAssoc_Count(val)
886
887	elif case(value_e.SparseArray):
888	val = cast(value.SparseArray, UP_val)
889	count = bash_impl.SparseArray_Count(val)
890
891	else:
892	raise error.TypeErr(
893	val, "Length op expected Str, BashArray, BashAssoc", token)
894
895	return count
896
897	def _Keys(self, val, token):
898	# type: (value_t, Token) -> value_t
899	"""Return keys of a container, for ${!array[@]}"""
900
901	UP_val = val
902	with tagswitch(val) as case:
903	if case(value_e.BashArray):
904	val = cast(value.BashArray, UP_val)
905	indices = [str(i) for i in bash_impl.BashArray_GetKeys(val)]
906	return value.BashArray(indices)
907
908	elif case(value_e.BashAssoc):
909	val = cast(value.BashAssoc, UP_val)
910	assert val.d is not None # for MyPy, so it's not Optional[]
911
912	# BUG: Keys aren't ordered according to insertion!
913	keys = bash_impl.BashAssoc_GetKeys(val)
914	return value.BashArray(keys)
915
916	else:
917	raise error.TypeErr(val, 'Keys op expected Str', token)
918
919	def _EvalVarRef(self, val, blame_tok, quoted, vsub_state, vtest_place):
920	# type: (value_t, Token, bool, VarSubState, VTestPlace) -> value_t
921	"""Handles indirect expansion like ${!var} and ${!a[0]}.
922
923	Args:
924	blame_tok: 'foo' for ${!foo}
925	"""
926	UP_val = val
927	with tagswitch(val) as case:
928	if case(value_e.Undef):
929	# bash-4.4 returned value.Undef here. bash-5.0 started to treat
930	# the variable name to be empty so that the indirection fails.
931	var_ref_str = ''
932
933	elif case(value_e.Str):
934	val = cast(value.Str, UP_val)
935	var_ref_str = val.s
936
937	elif case(value_e.BashArray): # caught earlier but OK
938	val = cast(value.BashArray, UP_val)
939	# When there are more than one element in the array, this
940	# produces a wrong variable name containing spaces.
941	var_ref_str = ' '.join(bash_impl.BashArray_GetValues(val))
942
943	elif case(value_e.BashAssoc): # caught earlier but OK
944	val = cast(value.BashAssoc, UP_val)
945	var_ref_str = ' '.join(bash_impl.BashAssoc_GetValues(val))
946
947	else:
948	raise error.TypeErr(val, 'Var Ref op expected Str', blame_tok)
949
950	try:
951	bvs_part = self.unsafe_arith.ParseVarRef(var_ref_str, blame_tok)
952	except error.FatalRuntime as e:
953	raise error.VarSubFailure(e.msg, e.location)
954
955	return self._VarRefValue(bvs_part, quoted, vsub_state, vtest_place)
956
957	def _ApplyUnarySuffixOp(self, val, op):
958	# type: (value_t, suffix_op.Unary) -> value_t
959	assert val.tag() != value_e.Undef
960
961	op_kind = consts.GetKind(op.op.id)
962
963	if op_kind == Kind.VOp1:
964	# NOTE: glob syntax is supported in ^ ^^ , ,, ! As well as % %% # ##.
965	# Detect has_extglob so that DoUnarySuffixOp doesn't use the fast
966	# shortcut for constant strings.
967	arg_val, has_extglob = self.EvalWordToPattern(op.arg_word)
968	assert arg_val.tag() == value_e.Str
969
970	UP_val = val
971	with tagswitch(val) as case:
972	if case(value_e.Str):
973	val = cast(value.Str, UP_val)
974	s = string_ops.DoUnarySuffixOp(val.s, op.op, arg_val.s,
975	has_extglob)
976	#log('%r %r -> %r', val.s, arg_val.s, s)
977	new_val = value.Str(s) # type: value_t
978
979	elif case(value_e.BashArray, value_e.SparseArray,
980	value_e.BashAssoc):
981	# get values
982	if val.tag() == value_e.BashArray:
983	val = cast(value.BashArray, UP_val)
984	values = bash_impl.BashArray_GetValues(val)
985	elif val.tag() == value_e.SparseArray:
986	val = cast(value.SparseArray, UP_val)
987	values = bash_impl.SparseArray_GetValues(val)
988	elif val.tag() == value_e.BashAssoc:
989	val = cast(value.BashAssoc, UP_val)
990	values = bash_impl.BashAssoc_GetValues(val)
991	else:
992	raise AssertionError()
993
994	# ${a[@]#prefix} is VECTORIZED on arrays. YSH should have this too.
995	strs = [
996	string_ops.DoUnarySuffixOp(s, op.op, arg_val.s,
997	has_extglob) for s in values
998	]
999	new_val = value.BashArray(strs)
1000
1001	else:
1002	raise error.TypeErr(
1003	val, 'Unary op expected Str, BashArray, BashAssoc',
1004	op.op)
1005
1006	else:
1007	raise AssertionError(Kind_str(op_kind))
1008
1009	return new_val
1010
1011	def _PatSub(self, val, op):
1012	# type: (value_t, suffix_op.PatSub) -> value_t
1013
1014	pat_val, has_extglob = self.EvalWordToPattern(op.pat)
1015	# Extended globs aren't supported because we only translate * ? etc. to
1016	# ERE. I don't think there's a straightforward translation from !(*.py) to
1017	# ERE! You would need an engine that supports negation? (Derivatives?)
1018	if has_extglob:
1019	e_die('extended globs not supported in ${x//GLOB/}', op.pat)
1020
1021	if op.replace:
1022	replace_val = self.EvalRhsWord(op.replace)
1023	# Can't have an array, so must be a string
1024	assert replace_val.tag() == value_e.Str, replace_val
1025	replace_str = cast(value.Str, replace_val).s
1026	else:
1027	replace_str = ''
1028
1029	# note: doesn't support self.exec_opts.extglob()!
1030	regex, warnings = glob_.GlobToERE(pat_val.s)
1031	if len(warnings):
1032	# TODO:
1033	# - Add 'shopt -s strict_glob' mode and expose warnings.
1034	# "Glob is not in CANONICAL FORM".
1035	# - Propagate location info back to the 'op.pat' word.
1036	pass
1037	#log('regex %r', regex)
1038	replacer = string_ops.GlobReplacer(regex, replace_str, op.slash_tok)
1039
1040	with tagswitch(val) as case2:
1041	if case2(value_e.Str):
1042	str_val = cast(value.Str, val)
1043	s = replacer.Replace(str_val.s, op)
1044	val = value.Str(s)
1045
1046	elif case2(value_e.BashArray, value_e.SparseArray,
1047	value_e.BashAssoc):
1048	if val.tag() == value_e.BashArray:
1049	array_val = cast(value.BashArray, val)
1050	values = bash_impl.BashArray_GetValues(array_val)
1051	elif val.tag() == value_e.SparseArray:
1052	sparse_val = cast(value.SparseArray, val)
1053	values = bash_impl.SparseArray_GetValues(sparse_val)
1054	elif val.tag() == value_e.BashAssoc:
1055	assoc_val = cast(value.BashAssoc, val)
1056	values = bash_impl.BashAssoc_GetValues(assoc_val)
1057	else:
1058	raise AssertionError()
1059	strs = [replacer.Replace(s, op) for s in values]
1060	val = value.BashArray(strs)
1061
1062	else:
1063	raise error.TypeErr(
1064	val, 'Pat Sub op expected Str, BashArray, BashAssoc',
1065	op.slash_tok)
1066
1067	return val
1068
1069	def _Slice(self, val, op, var_name, part):
1070	# type: (value_t, suffix_op.Slice, Optional[str], BracedVarSub) -> value_t
1071
1072	begin = self.arith_ev.EvalToBigInt(op.begin)
1073
1074	# Note: bash allows lengths to be negative (with odd semantics), but
1075	# we don't allow that right now.
1076	has_length = False
1077	length = -1
1078	if op.length:
1079	has_length = True
1080	length = self.arith_ev.EvalToInt(op.length)
1081
1082	try:
1083	arg0_val = None # type: value.Str
1084	if var_name is None: # $* or $@
1085	arg0_val = self.mem.GetArg0()
1086	val = _PerformSlice(val, begin, length, has_length, part, arg0_val)
1087	except error.Strict as e:
1088	if self.exec_opts.strict_word_eval():
1089	raise
1090	else:
1091	self.errfmt.PrettyPrintError(e, prefix='warning: ')
1092	with tagswitch(val) as case2:
1093	if case2(value_e.Str):
1094	val = value.Str('')
1095	elif case2(value_e.BashArray):
1096	val = value.BashArray([])
1097	else:
1098	raise NotImplementedError()
1099	return val
1100
1101	def _Nullary(self, val, op, var_name, vsub_token, vsub_state):
1102	# type: (value_t, Token, Optional[str], Token, VarSubState) -> Tuple[value_t, bool]
1103
1104	quoted2 = False
1105	op_id = op.id
1106	if op_id == Id.VOp0_P:
1107	val = self._ProcessUndef(val, vsub_token, vsub_state)
1108	UP_val = val
1109	with tagswitch(val) as case:
1110	if case(value_e.Undef):
1111	result = value.Str('') # type: value_t
1112	elif case(value_e.Str):
1113	str_val = cast(value.Str, UP_val)
1114	prompt = self.prompt_ev.EvalPrompt(str_val.s)
1115	# readline gets rid of these, so we should too.
1116	p = prompt.replace('\x01', '').replace('\x02', '')
1117	result = value.Str(p)
1118	elif case(value_e.BashArray, value_e.SparseArray,
1119	value_e.BashAssoc):
1120	if val.tag() == value_e.BashArray:
1121	val = cast(value.BashArray, UP_val)
1122	values = [
1123	s for s in bash_impl.BashArray_GetValues(val)
1124	if s is not None
1125	]
1126	elif val.tag() == value_e.SparseArray:
1127	val = cast(value.SparseArray, UP_val)
1128	values = bash_impl.SparseArray_GetValues(val)
1129	elif val.tag() == value_e.BashAssoc:
1130	val = cast(value.BashAssoc, UP_val)
1131	values = bash_impl.BashAssoc_GetValues(val)
1132	else:
1133	raise AssertionError()
1134
1135	tmp = [
1136	self.prompt_ev.EvalPrompt(s).replace(
1137	'\x01', '').replace('\x02', '') for s in values
1138	]
1139	result = value.BashArray(tmp)
1140	else:
1141	e_die("Can't use @P on %s" % ui.ValType(val), op)
1142
1143	elif op_id == Id.VOp0_Q:
1144	UP_val = val
1145	with tagswitch(val) as case:
1146	if case(value_e.Undef):
1147	# We need to issue an error when "-o nounset" is enabled.
1148	# Although we do not need to check val for value_e.Undef,
1149	# we call _ProcessUndef for consistency in the error
1150	# message.
1151	self._ProcessUndef(val, vsub_token, vsub_state)
1152
1153	# For unset variables, we do not generate any quoted words.
1154	if vsub_state.array_ref is not None:
1155	result = value.BashArray([])
1156	else:
1157	result = value.Str('')
1158
1159	elif case(value_e.Str):
1160	str_val = cast(value.Str, UP_val)
1161	result = value.Str(j8_lite.MaybeShellEncode(str_val.s))
1162	# oddly, 'echo ${x@Q}' is equivalent to 'echo "${x@Q}"' in
1163	# bash
1164	quoted2 = True
1165	elif case(value_e.BashArray, value_e.SparseArray,
1166	value_e.BashAssoc):
1167	if val.tag() == value_e.BashArray:
1168	val = cast(value.BashArray, UP_val)
1169	values = [
1170	s for s in bash_impl.BashArray_GetValues(val)
1171	if s is not None
1172	]
1173	elif val.tag() == value_e.SparseArray:
1174	val = cast(value.SparseArray, UP_val)
1175	values = bash_impl.SparseArray_GetValues(val)
1176	elif val.tag() == value_e.BashAssoc:
1177	val = cast(value.BashAssoc, UP_val)
1178	values = bash_impl.BashAssoc_GetValues(val)
1179	else:
1180	raise AssertionError()
1181
1182	tmp = [
1183	# TODO: should use fastfunc.ShellEncode
1184	j8_lite.MaybeShellEncode(s) for s in values
1185	]
1186	result = value.BashArray(tmp)
1187	else:
1188	e_die("Can't use @Q on %s" % ui.ValType(val), op)
1189
1190	elif op_id == Id.VOp0_a:
1191	val = self._ProcessUndef(val, vsub_token, vsub_state)
1192	UP_val = val
1193	# We're ONLY simluating -a and -A, not -r -x -n for now. See
1194	# spec/ble-idioms.test.sh.
1195	chars = [] # type: List[str]
1196	with tagswitch(vsub_state.h_value) as case:
1197	if case(value_e.BashArray, value_e.SparseArray):
1198	chars.append('a')
1199	elif case(value_e.BashAssoc):
1200	chars.append('A')
1201
1202	if var_name is not None: # e.g. ${?@a} is allowed
1203	cell = self.mem.GetCell(var_name)
1204	if cell:
1205	if cell.readonly:
1206	chars.append('r')
1207	if cell.exported:
1208	chars.append('x')
1209	if cell.nameref:
1210	chars.append('n')
1211
1212	count = 1
1213	with tagswitch(val) as case:
1214	if case(value_e.Undef):
1215	count = 0
1216	elif case(value_e.BashArray):
1217	val = cast(value.BashArray, UP_val)
1218	count = bash_impl.BashArray_Count(val)
1219	elif case(value_e.SparseArray):
1220	val = cast(value.SparseArray, UP_val)
1221	count = bash_impl.SparseArray_Count(val)
1222	elif case(value_e.BashAssoc):
1223	val = cast(value.BashAssoc, UP_val)
1224	count = bash_impl.BashAssoc_Count(val)
1225
1226	result = value.BashArray([''.join(chars)] * count)
1227
1228	else:
1229	e_die('Var op %r not implemented' % lexer.TokenVal(op), op)
1230
1231	return result, quoted2
1232
1233	def _WholeArray(self, val, part, quoted, vsub_state):
1234	# type: (value_t, BracedVarSub, bool, VarSubState) -> value_t
1235	op_id = cast(bracket_op.WholeArray, part.bracket_op).op_id
1236
1237	if op_id == Id.Lit_At:
1238	op_str = '@'
1239	vsub_state.join_array = not quoted # ${a[@]} decays but "${a[@]}" doesn't
1240	elif op_id == Id.Arith_Star:
1241	op_str = '*'
1242	vsub_state.join_array = True # both ${a[]} and "${a[]}" decay
1243	else:
1244	raise AssertionError(op_id) # unknown
1245
1246	with tagswitch(val) as case2:
1247	if case2(value_e.Undef):
1248	# For an undefined array, we save the token of the array
1249	# reference for the later error message.
1250	vsub_state.array_ref = part.name_tok
1251	elif case2(value_e.Str):
1252	if self.exec_opts.strict_array():
1253	e_die("Can't index string with %s" % op_str,
1254	loc.WordPart(part))
1255	elif case2(value_e.BashArray, value_e.SparseArray,
1256	value_e.BashAssoc):
1257	pass # no-op
1258	else:
1259	# The other YSH types such as List, Dict, and Float are not
1260	# supported. Error messages will be printed later, so we here
1261	# return the unsupported objects without modification.
1262	pass # no-op
1263
1264	return val
1265
1266	def _ArrayIndex(self, val, part, vtest_place):
1267	# type: (value_t, BracedVarSub, VTestPlace) -> value_t
1268	"""Process a numeric array index like ${a[i+1]}"""
1269	anode = cast(bracket_op.ArrayIndex, part.bracket_op).expr
1270
1271	UP_val = val
1272	with tagswitch(val) as case2:
1273	if case2(value_e.Undef):
1274	pass # it will be checked later
1275
1276	elif case2(value_e.Str):
1277	# Bash treats any string as an array, so we can't add our own
1278	# behavior here without making valid OSH invalid bash.
1279	e_die("Can't index string %r with integer" % part.var_name,
1280	part.name_tok)
1281
1282	elif case2(value_e.BashArray):
1283	array_val = cast(value.BashArray, UP_val)
1284	index = self.arith_ev.EvalToInt(anode)
1285	vtest_place.index = a_index.Int(index)
1286
1287	s, error_code = bash_impl.BashArray_GetElement(
1288	array_val, index)
1289	if error_code == error_code_e.IndexOutOfRange:
1290	# Note: Bash outputs warning but does not make it a real
1291	# error. We follow the Bash behavior here.
1292	self.errfmt.Print_(
1293	"Index %d out of bounds for array of length %d" %
1294	(index, bash_impl.BashArray_Length(array_val)),
1295	blame_loc=part.name_tok)
1296
1297	if s is None:
1298	val = value.Undef
1299	else:
1300	val = value.Str(s)
1301
1302	elif case2(value_e.SparseArray):
1303	sparse_val = cast(value.SparseArray, UP_val)
1304	big_index = self.arith_ev.EvalToBigInt(anode)
1305	vtest_place.index = a_index.Int(mops.BigTruncate(big_index))
1306
1307	s, error_code = bash_impl.SparseArray_GetElement(
1308	sparse_val, big_index)
1309	if error_code == error_code_e.IndexOutOfRange:
1310	# Note: Bash outputs warning but does not make it a real
1311	# error. We follow the Bash behavior here.
1312	big_length = bash_impl.SparseArray_Length(sparse_val)
1313	self.errfmt.Print_(
1314	"Index %s out of bounds for array of length %s" %
1315	(mops.ToStr(big_index), mops.ToStr(big_length)),
1316	blame_loc=part.name_tok)
1317
1318	if s is None:
1319	val = value.Undef
1320	else:
1321	val = value.Str(s)
1322
1323	elif case2(value_e.BashAssoc):
1324	assoc_val = cast(value.BashAssoc, UP_val)
1325	# Location could also be attached to bracket_op? But
1326	# arith_expr.VarSub works OK too
1327	key = self.arith_ev.EvalWordToString(
1328	anode, blame_loc=location.TokenForArith(anode))
1329
1330	vtest_place.index = a_index.Str(key) # out param
1331	s = bash_impl.BashAssoc_GetElement(assoc_val, key)
1332
1333	if s is None:
1334	val = value.Undef
1335	else:
1336	val = value.Str(s)
1337
1338	else:
1339	raise error.TypeErr(val,
1340	'Index op expected BashArray, BashAssoc',
1341	loc.WordPart(part))
1342
1343	return val
1344
1345	def _EvalDoubleQuoted(self, parts, part_vals):
1346	# type: (List[word_part_t], List[part_value_t]) -> None
1347	"""Evaluate parts of a DoubleQuoted part.
1348
1349	Args:
1350	part_vals: output param to append to.
1351	"""
1352	# Example of returning array:
1353	# $ a=(1 2); b=(3); $ c=(4 5)
1354	# $ argv "${a[@]}${b[@]}${c[@]}"
1355	# ['1', '234', '5']
1356	#
1357	# Example of multiple parts
1358	# $ argv "${a[@]}${undef[@]:-${c[@]}}"
1359	# ['1', '24', '5']
1360
1361	# Special case for "". The parser outputs (DoubleQuoted []), instead
1362	# of (DoubleQuoted [Literal '']). This is better but it means we
1363	# have to check for it.
1364	if len(parts) == 0:
1365	v = Piece('', True, False)
1366	part_vals.append(v)
1367	return
1368
1369	for p in parts:
1370	self._EvalWordPart(p, part_vals, QUOTED)
1371
1372	def EvalDoubleQuotedToString(self, dq_part):
1373	# type: (DoubleQuoted) -> str
1374	"""For double quoted strings in YSH expressions.
1375
1376	Example: var x = "$foo-${foo}"
1377	"""
1378	part_vals = [] # type: List[part_value_t]
1379	self._EvalDoubleQuoted(dq_part.parts, part_vals)
1380	return self._ConcatPartVals(part_vals, dq_part.left)
1381
1382	def _DecayArray(self, val):
1383	# type: (value.BashArray) -> value.Str
1384	"""Decay $* to a string."""
1385	assert val.tag() == value_e.BashArray, val
1386	sep = self.splitter.GetJoinChar()
1387	tmp = [s for s in bash_impl.BashArray_GetValues(val) if s is not None]
1388	return value.Str(sep.join(tmp))
1389
1390	def _ProcessUndef(self, val, name_tok, vsub_state):
1391	# type: (value_t, Token, VarSubState) -> value_t
1392	assert name_tok is not None
1393
1394	if val.tag() != value_e.Undef:
1395	return val
1396
1397	if vsub_state.array_ref is not None:
1398	array_tok = vsub_state.array_ref
1399	if self.exec_opts.nounset():
1400	e_die('Undefined array %r' % lexer.TokenVal(array_tok),
1401	array_tok)
1402	else:
1403	return value.BashArray([])
1404	else:
1405	if self.exec_opts.nounset():
1406	tok_str = lexer.TokenVal(name_tok)
1407	name = tok_str[1:] if tok_str.startswith('$') else tok_str
1408	e_die('Undefined variable %r' % name, name_tok)
1409	else:
1410	return value.Str('')
1411
1412	def _EvalBracketOp(self, val, part, quoted, vsub_state, vtest_place):
1413	# type: (value_t, BracedVarSub, bool, VarSubState, VTestPlace) -> value_t
1414
1415	if part.bracket_op:
1416	with tagswitch(part.bracket_op) as case:
1417	if case(bracket_op_e.WholeArray):
1418	val = self._WholeArray(val, part, quoted, vsub_state)
1419
1420	elif case(bracket_op_e.ArrayIndex):
1421	val = self._ArrayIndex(val, part, vtest_place)
1422
1423	else:
1424	raise AssertionError(part.bracket_op.tag())
1425
1426	else: # no bracket op
1427	var_name = vtest_place.name
1428	if (var_name is not None and
1429	val.tag() in (value_e.BashArray, value_e.SparseArray,
1430	value_e.BashAssoc)):
1431	if ShouldArrayDecay(var_name, self.exec_opts,
1432	not (part.prefix_op or part.suffix_op)):
1433	# for ${BASH_SOURCE}, etc.
1434	val = DecayArray(val)
1435	else:
1436	e_die(
1437	"Array %r can't be referred to as a scalar (without @ or *)"
1438	% var_name, loc.WordPart(part))
1439
1440	return val
1441
1442	def _VarRefValue(self, part, quoted, vsub_state, vtest_place):
1443	# type: (BracedVarSub, bool, VarSubState, VTestPlace) -> value_t
1444	"""Duplicates some logic from _EvalBracedVarSub, but returns a
1445	value_t."""
1446
1447	# 1. Evaluate from (var_name, var_num, token Id) -> value
1448	if part.name_tok.id == Id.VSub_Name:
1449	vtest_place.name = part.var_name
1450	val = self.mem.GetValue(part.var_name)
1451
1452	elif part.name_tok.id == Id.VSub_Number:
1453	var_num = int(part.var_name)
1454	val = self._EvalVarNum(var_num)
1455
1456	else:
1457	# $* decays
1458	val = self._EvalSpecialVar(part.name_tok.id, quoted, vsub_state)
1459
1460	# update h-value (i.e., the holder of the current value)
1461	vsub_state.h_value = val
1462
1463	# We don't need var_index because it's only for L-Values of test ops?
1464	if self.exec_opts.eval_unsafe_arith():
1465	val = self._EvalBracketOp(val, part, quoted, vsub_state,
1466	vtest_place)
1467	else:
1468	with state.ctx_Option(self.mutable_opts,
1469	[option_i._allow_command_sub], False):
1470	val = self._EvalBracketOp(val, part, quoted, vsub_state,
1471	vtest_place)
1472
1473	return val
1474
1475	def _EvalBracedVarSub(self, part, part_vals, quoted):
1476	# type: (BracedVarSub, List[part_value_t], bool) -> None
1477	"""
1478	Args:
1479	part_vals: output param to append to.
1480	"""
1481	# We have different operators that interact in a non-obvious order.
1482	#
1483	# 1. bracket_op: value -> value, with side effect on vsub_state
1484	#
1485	# 2. prefix_op
1486	# a. length ${#x}: value -> value
1487	# b. var ref ${!ref}: can expand to an array
1488	#
1489	# 3. suffix_op:
1490	# a. no operator: you have a value
1491	# b. Test: value -> part_value[]
1492	# c. Other Suffix: value -> value
1493	#
1494	# 4. Process vsub_state.join_array here before returning.
1495	#
1496	# These cases are hard to distinguish:
1497	# - ${!prefix@} prefix query
1498	# - ${!array[@]} keys
1499	# - ${!ref} named reference
1500	# - ${!ref[0]} named reference
1501	#
1502	# I think we need several stages:
1503	#
1504	# 1. value: name, number, special, prefix query
1505	# 2. bracket_op
1506	# 3. prefix length -- this is TERMINAL
1507	# 4. indirection? Only for some of the ! cases
1508	# 5. string transformation suffix ops like ##
1509	# 6. test op
1510	# 7. vsub_state.join_array
1511
1512	# vsub_state.join_array is for joining "${a[*]}" and unquoted ${a[@]} AFTER
1513	# suffix ops are applied. If we take the length with a prefix op, the
1514	# distinction is ignored.
1515
1516	var_name = None # type: Optional[str] # used throughout the function
1517	vtest_place = VTestPlace(var_name, None) # For ${foo=default}
1518	vsub_state = VarSubState.CreateNull() # for $, ${a[]}, etc.
1519
1520	# 1. Evaluate from (var_name, var_num, token Id) -> value
1521	if part.name_tok.id == Id.VSub_Name:
1522	# Handle ${!prefix@} first, since that looks at names and not values
1523	# Do NOT handle ${!A[@]@a} here!
1524	if (part.prefix_op is not None and part.bracket_op is None and
1525	part.suffix_op is not None and
1526	part.suffix_op.tag() == suffix_op_e.Nullary):
1527	nullary_op = cast(Token, part.suffix_op)
1528	# ${!x@} but not ${!x@P}
1529	if consts.GetKind(nullary_op.id) == Kind.VOp3:
1530	names = self.mem.VarNamesStartingWith(part.var_name)
1531	names.sort()
1532
1533	if quoted and nullary_op.id == Id.VOp3_At:
1534	part_vals.append(part_value.Array(names))
1535	else:
1536	sep = self.splitter.GetJoinChar()
1537	part_vals.append(Piece(sep.join(names), quoted, True))
1538	return # EARLY RETURN
1539
1540	var_name = part.var_name
1541	vtest_place.name = var_name # for _ApplyTestOp
1542
1543	val = self.mem.GetValue(var_name)
1544
1545	elif part.name_tok.id == Id.VSub_Number:
1546	var_num = int(part.var_name)
1547	val = self._EvalVarNum(var_num)
1548	else:
1549	# $* decays
1550	val = self._EvalSpecialVar(part.name_tok.id, quoted, vsub_state)
1551
1552	suffix_op_ = part.suffix_op
1553	if suffix_op_:
1554	UP_op = suffix_op_
1555	vsub_state.h_value = val
1556
1557	# 2. Bracket Op
1558	val = self._EvalBracketOp(val, part, quoted, vsub_state, vtest_place)
1559
1560	if part.prefix_op:
1561	if part.prefix_op.id == Id.VSub_Pound: # ${#var} for length
1562	# undef -> '' BEFORE length
1563	val = self._ProcessUndef(val, part.name_tok, vsub_state)
1564
1565	n = self._Count(val, part.name_tok)
1566	part_vals.append(Piece(str(n), quoted, False))
1567	return # EARLY EXIT: nothing else can come after length
1568
1569	elif part.prefix_op.id == Id.VSub_Bang:
1570	if (part.bracket_op and
1571	part.bracket_op.tag() == bracket_op_e.WholeArray and
1572	not suffix_op_):
1573	# undef -> empty array
1574	val = self._ProcessUndef(val, part.name_tok, vsub_state)
1575
1576	# ${!array[@]} to get indices/keys
1577	val = self._Keys(val, part.name_tok)
1578	# already set vsub_State.join_array ABOVE
1579	else:
1580	# Process ${!ref}. SURPRISE: ${!a[0]} is an indirect expansion unlike
1581	# ${!a[@]} !
1582	# ${!ref} can expand into an array if ref='array[@]'
1583
1584	# Clear it now that we have a var ref
1585	vtest_place.name = None
1586	vtest_place.index = None
1587
1588	val = self._EvalVarRef(val, part.name_tok, quoted,
1589	vsub_state, vtest_place)
1590
1591	else:
1592	raise AssertionError(part.prefix_op)
1593
1594	quoted2 = False # another bit for @Q
1595	if suffix_op_:
1596	op = suffix_op_ # could get rid of this alias
1597
1598	with tagswitch(suffix_op_) as case:
1599	if case(suffix_op_e.Nullary):
1600	op = cast(Token, UP_op)
1601	val, quoted2 = self._Nullary(val, op, var_name,
1602	part.name_tok, vsub_state)
1603
1604	elif case(suffix_op_e.Unary):
1605	op = cast(suffix_op.Unary, UP_op)
1606	if consts.GetKind(op.op.id) == Kind.VTest:
1607	# Note: _ProcessUndef (i.e., the conversion of undef ->
1608	# '') is not applied to the VTest operators such as
1609	# ${a:-def}, ${a+set}, etc.
1610	if self._ApplyTestOp(val, op, quoted, part_vals,
1611	vtest_place, part.name_tok,
1612	vsub_state):
1613	# e.g. to evaluate ${undef:-'default'}, we already appended
1614	# what we need
1615	return
1616
1617	else:
1618	# Other suffix: value -> value
1619	val = self._ProcessUndef(val, part.name_tok,
1620	vsub_state)
1621	val = self._ApplyUnarySuffixOp(val, op)
1622
1623	elif case(suffix_op_e.PatSub): # PatSub, vectorized
1624	op = cast(suffix_op.PatSub, UP_op)
1625	val = self._ProcessUndef(val, part.name_tok, vsub_state)
1626	val = self._PatSub(val, op)
1627
1628	elif case(suffix_op_e.Slice):
1629	op = cast(suffix_op.Slice, UP_op)
1630	val = self._ProcessUndef(val, part.name_tok, vsub_state)
1631	val = self._Slice(val, op, var_name, part)
1632
1633	elif case(suffix_op_e.Static):
1634	op = cast(suffix_op.Static, UP_op)
1635	e_die('Not implemented', op.tok)
1636
1637	else:
1638	raise AssertionError()
1639	else:
1640	val = self._ProcessUndef(val, part.name_tok, vsub_state)
1641
1642	# After applying suffixes, process join_array here.
1643	UP_val = val
1644	if val.tag() == value_e.BashArray:
1645	array_val = cast(value.BashArray, UP_val)
1646	if vsub_state.join_array:
1647	val = self._DecayArray(array_val)
1648	else:
1649	val = array_val
1650
1651	# For example, ${a} evaluates to value.Str(), but we want a
1652	# Piece().
1653	part_val = _ValueToPartValue(val, quoted or quoted2, part)
1654	part_vals.append(part_val)
1655
1656	def _ConcatPartVals(self, part_vals, location):
1657	# type: (List[part_value_t], loc_t) -> str
1658
1659	strs = [] # type: List[str]
1660	for part_val in part_vals:
1661	UP_part_val = part_val
1662	with tagswitch(part_val) as case:
1663	if case(part_value_e.String):
1664	part_val = cast(Piece, UP_part_val)
1665	s = part_val.s
1666
1667	elif case(part_value_e.Array):
1668	part_val = cast(part_value.Array, UP_part_val)
1669	if self.exec_opts.strict_array():
1670	# Examples: echo f > "$@"; local foo="$@"
1671	e_die("Illegal array word part (strict_array)",
1672	location)
1673	else:
1674	# It appears to not respect IFS
1675	# TODO: eliminate double join()?
1676	tmp = [s for s in part_val.strs if s is not None]
1677	s = ' '.join(tmp)
1678
1679	else:
1680	raise AssertionError()
1681
1682	strs.append(s)
1683
1684	return ''.join(strs)
1685
1686	def EvalBracedVarSubToString(self, part):
1687	# type: (BracedVarSub) -> str
1688	"""For double quoted strings in YSH expressions.
1689
1690	Example: var x = "$foo-${foo}"
1691	"""
1692	part_vals = [] # type: List[part_value_t]
1693	self._EvalBracedVarSub(part, part_vals, False)
1694	# blame ${ location
1695	return self._ConcatPartVals(part_vals, part.left)
1696
1697	def _EvalSimpleVarSub(self, part, part_vals, quoted):
1698	# type: (SimpleVarSub, List[part_value_t], bool) -> None
1699
1700	token = part.tok
1701
1702	vsub_state = VarSubState.CreateNull()
1703
1704	# 1. Evaluate from (var_name, var_num, Token) -> defined, value
1705	if token.id == Id.VSub_DollarName:
1706	var_name = lexer.LazyStr(token)
1707	# TODO: Special case for LINENO
1708	val = self.mem.GetValue(var_name)
1709	if val.tag() in (value_e.BashArray, value_e.SparseArray,
1710	value_e.BashAssoc):
1711	if ShouldArrayDecay(var_name, self.exec_opts):
1712	# for $BASH_SOURCE, etc.
1713	val = DecayArray(val)
1714	else:
1715	e_die(
1716	"Array %r can't be referred to as a scalar (without @ or *)"
1717	% var_name, token)
1718
1719	elif token.id == Id.VSub_Number:
1720	var_num = int(lexer.LazyStr(token))
1721	val = self._EvalVarNum(var_num)
1722
1723	else:
1724	val = self._EvalSpecialVar(token.id, quoted, vsub_state)
1725
1726	#log('SIMPLE %s', part)
1727	val = self._ProcessUndef(val, token, vsub_state)
1728	UP_val = val
1729	if val.tag() == value_e.BashArray:
1730	array_val = cast(value.BashArray, UP_val)
1731	if vsub_state.join_array:
1732	val = self._DecayArray(array_val)
1733	else:
1734	val = array_val
1735
1736	v = _ValueToPartValue(val, quoted, part)
1737	part_vals.append(v)
1738
1739	def EvalSimpleVarSubToString(self, node):
1740	# type: (SimpleVarSub) -> str
1741	"""For double quoted strings in YSH expressions.
1742
1743	Example: var x = "$foo-${foo}"
1744	"""
1745	part_vals = [] # type: List[part_value_t]
1746	self._EvalSimpleVarSub(node, part_vals, False)
1747	return self._ConcatPartVals(part_vals, node.tok)
1748
1749	def _EvalExtGlob(self, part, part_vals):
1750	# type: (word_part.ExtGlob, List[part_value_t]) -> None
1751	"""Evaluate @($x\|'foo'\|$(hostname)) and flatten it."""
1752	op = part.op
1753	if op.id == Id.ExtGlob_Comma:
1754	op_str = '@('
1755	else:
1756	op_str = lexer.LazyStr(op)
1757	# Do NOT split these.
1758	part_vals.append(Piece(op_str, False, False))
1759
1760	for i, w in enumerate(part.arms):
1761	if i != 0:
1762	part_vals.append(Piece('\|', False, False)) # separator
1763	# FLATTEN the tree of extglob "arms".
1764	self._EvalWordToParts(w, part_vals, EXTGLOB_NESTED)
1765	part_vals.append(Piece(')', False, False)) # closing )
1766
1767	def _TranslateExtGlob(self, part_vals, w, glob_parts, fnmatch_parts):
1768	# type: (List[part_value_t], CompoundWord, List[str], List[str]) -> None
1769	"""Translate a flattened WORD with an ExtGlob part to string patterns.
1770
1771	We need both glob and fnmatch patterns. _EvalExtGlob does the
1772	flattening.
1773	"""
1774	for i, part_val in enumerate(part_vals):
1775	UP_part_val = part_val
1776	with tagswitch(part_val) as case:
1777	if case(part_value_e.String):
1778	part_val = cast(Piece, UP_part_val)
1779	if part_val.quoted and not self.exec_opts.noglob():
1780	s = glob_.GlobEscape(part_val.s)
1781	else:
1782	# e.g. the @( and \| in @(foo\|bar) aren't quoted
1783	s = part_val.s
1784	glob_parts.append(s)
1785	fnmatch_parts.append(s) # from _EvalExtGlob()
1786
1787	elif case(part_value_e.Array):
1788	# Disallow array
1789	e_die(
1790	"Extended globs and arrays can't appear in the same word",
1791	w)
1792
1793	elif case(part_value_e.ExtGlob):
1794	part_val = cast(part_value.ExtGlob, UP_part_val)
1795	# keep appending fnmatch_parts, but repplace glob_parts with '*'
1796	self._TranslateExtGlob(part_val.part_vals, w, [],
1797	fnmatch_parts)
1798	glob_parts.append('*')
1799
1800	else:
1801	raise AssertionError()
1802
1803	def _EvalWordPart(self, part, part_vals, flags):
1804	# type: (word_part_t, List[part_value_t], int) -> None
1805	"""Evaluate a word part, appending to part_vals
1806
1807	Called by _EvalWordToParts, EvalWordToString, and _EvalDoubleQuoted.
1808	"""
1809	quoted = bool(flags & QUOTED)
1810	is_subst = bool(flags & IS_SUBST)
1811
1812	UP_part = part
1813	with tagswitch(part) as case:
1814	if case(word_part_e.ShArrayLiteral):
1815	part = cast(ShArrayLiteral, UP_part)
1816	e_die("Unexpected array literal", loc.WordPart(part))
1817	elif case(word_part_e.BashAssocLiteral):
1818	part = cast(word_part.BashAssocLiteral, UP_part)
1819	e_die("Unexpected associative array literal",
1820	loc.WordPart(part))
1821
1822	elif case(word_part_e.Literal):
1823	part = cast(Token, UP_part)
1824	# Split if it's in a substitution.
1825	# That is: echo is not split, but ${foo:-echo} is split
1826	v = Piece(lexer.LazyStr(part), quoted, is_subst)
1827	part_vals.append(v)
1828
1829	elif case(word_part_e.EscapedLiteral):
1830	part = cast(word_part.EscapedLiteral, UP_part)
1831	v = Piece(part.ch, True, False)
1832	part_vals.append(v)
1833
1834	elif case(word_part_e.SingleQuoted):
1835	part = cast(SingleQuoted, UP_part)
1836	v = Piece(part.sval, True, False)
1837	part_vals.append(v)
1838
1839	elif case(word_part_e.DoubleQuoted):
1840	part = cast(DoubleQuoted, UP_part)
1841	self._EvalDoubleQuoted(part.parts, part_vals)
1842
1843	elif case(word_part_e.CommandSub):
1844	part = cast(CommandSub, UP_part)
1845	id_ = part.left_token.id
1846	if id_ in (Id.Left_DollarParen, Id.Left_AtParen,
1847	Id.Left_Backtick):
1848	sv = self._EvalCommandSub(part,
1849	quoted) # type: part_value_t
1850
1851	elif id_ in (Id.Left_ProcSubIn, Id.Left_ProcSubOut):
1852	sv = self._EvalProcessSub(part)
1853
1854	else:
1855	raise AssertionError(id_)
1856
1857	part_vals.append(sv)
1858
1859	elif case(word_part_e.SimpleVarSub):
1860	part = cast(SimpleVarSub, UP_part)
1861	self._EvalSimpleVarSub(part, part_vals, quoted)
1862
1863	elif case(word_part_e.BracedVarSub):
1864	part = cast(BracedVarSub, UP_part)
1865	self._EvalBracedVarSub(part, part_vals, quoted)
1866
1867	elif case(word_part_e.TildeSub):
1868	part = cast(word_part.TildeSub, UP_part)
1869	# We never parse a quoted string into a TildeSub.
1870	assert not quoted
1871	s = self.tilde_ev.Eval(part)
1872	v = Piece(s, True, False) # NOT split even when unquoted!
1873	part_vals.append(v)
1874
1875	elif case(word_part_e.ArithSub):
1876	part = cast(word_part.ArithSub, UP_part)
1877	num = self.arith_ev.EvalToBigInt(part.anode)
1878	v = Piece(mops.ToStr(num), quoted, not quoted)
1879	part_vals.append(v)
1880
1881	elif case(word_part_e.ExtGlob):
1882	part = cast(word_part.ExtGlob, UP_part)
1883	#if not self.exec_opts.extglob():
1884	# die() # disallow at runtime? Don't just decay
1885
1886	# Create a node to hold the flattened tree. The caller decides whether
1887	# to pass it to fnmatch() or replace it with '*' and pass it to glob().
1888	part_vals2 = [] # type: List[part_value_t]
1889	self._EvalExtGlob(part, part_vals2) # flattens tree
1890	part_vals.append(part_value.ExtGlob(part_vals2))
1891
1892	elif case(word_part_e.BashRegexGroup):
1893	part = cast(word_part.BashRegexGroup, UP_part)
1894
1895	part_vals.append(Piece('(', False, False)) # not quoted
1896	if part.child:
1897	self._EvalWordToParts(part.child, part_vals, 0)
1898	part_vals.append(Piece(')', False, False))
1899
1900	elif case(word_part_e.Splice):
1901	part = cast(word_part.Splice, UP_part)
1902	val = self.mem.GetValue(part.var_name)
1903
1904	strs = self.expr_ev.SpliceValue(val, part)
1905	part_vals.append(part_value.Array(strs))
1906
1907	elif case(word_part_e.ExprSub):
1908	part = cast(word_part.ExprSub, UP_part)
1909	part_val = self.expr_ev.EvalExprSub(part)
1910	part_vals.append(part_val)
1911
1912	elif case(word_part_e.ZshVarSub):
1913	part = cast(word_part.ZshVarSub, UP_part)
1914	e_die("ZSH var subs are parsed, but can't be evaluated",
1915	part.left)
1916
1917	else:
1918	raise AssertionError(part.tag())
1919
1920	def _EvalRhsWordToParts(self, w, part_vals, eval_flags=0):
1921	# type: (rhs_word_t, List[part_value_t], int) -> None
1922	quoted = bool(eval_flags & QUOTED)
1923
1924	UP_w = w
1925	with tagswitch(w) as case:
1926	if case(rhs_word_e.Empty):
1927	part_vals.append(Piece('', quoted, not quoted))
1928
1929	elif case(rhs_word_e.Compound):
1930	w = cast(CompoundWord, UP_w)
1931	self._EvalWordToParts(w, part_vals, eval_flags=eval_flags)
1932
1933	else:
1934	raise AssertionError()
1935
1936	def _EvalWordToParts(self, w, part_vals, eval_flags=0):
1937	# type: (CompoundWord, List[part_value_t], int) -> None
1938	"""Helper for EvalRhsWord, EvalWordSequence, etc.
1939
1940	Returns:
1941	Appends to part_vals. Note that this is a TREE.
1942	"""
1943	# Does the word have an extended glob? This is a special case because
1944	# of the way we use glob() and then fnmatch(..., FNM_EXTMATCH) to
1945	# implement extended globs. It's hard to carry that extra information
1946	# all the way past the word splitting stage.
1947
1948	# OSH semantic limitations: If a word has an extended glob part, then
1949	# 1. It can't have an array
1950	# 2. Word splitting of unquoted words isn't respected
1951
1952	word_part_vals = [] # type: List[part_value_t]
1953	has_extglob = False
1954	for p in w.parts:
1955	if p.tag() == word_part_e.ExtGlob:
1956	has_extglob = True
1957	self._EvalWordPart(p, word_part_vals, eval_flags)
1958
1959	# Caller REQUESTED extglob evaluation, AND we parsed word_part.ExtGlob()
1960	if has_extglob:
1961	if bool(eval_flags & EXTGLOB_FILES):
1962	# Treat the WHOLE word as a pattern. We need to TWO VARIANTS of the
1963	# word because of the way we use libc:
1964	# 1. With '*' for extglob parts
1965	# 2. With _EvalExtGlob() for extglob parts
1966
1967	glob_parts = [] # type: List[str]
1968	fnmatch_parts = [] # type: List[str]
1969	self._TranslateExtGlob(word_part_vals, w, glob_parts,
1970	fnmatch_parts)
1971
1972	#log('word_part_vals %s', word_part_vals)
1973	glob_pat = ''.join(glob_parts)
1974	fnmatch_pat = ''.join(fnmatch_parts)
1975	#log("glob %s fnmatch %s", glob_pat, fnmatch_pat)
1976
1977	results = [] # type: List[str]
1978	n = self.globber.ExpandExtended(glob_pat, fnmatch_pat, results)
1979	if n < 0:
1980	raise error.FailGlob(
1981	'Extended glob %r matched no files' % fnmatch_pat, w)
1982
1983	part_vals.append(part_value.Array(results))
1984	elif bool(eval_flags & EXTGLOB_NESTED):
1985	# We only glob at the TOP level of @(nested\|@(pattern))
1986	part_vals.extend(word_part_vals)
1987	else:
1988	# e.g. simple_word_eval, assignment builtin
1989	e_die('Extended glob not allowed in this word', w)
1990	else:
1991	part_vals.extend(word_part_vals)
1992
1993	def _PartValsToString(self, part_vals, w, eval_flags, strs):
1994	# type: (List[part_value_t], CompoundWord, int, List[str]) -> None
1995	"""Helper for EvalWordToString, similar to _ConcatPartVals() above.
1996
1997	Note: arg 'w' could just be a span ID
1998	"""
1999	for part_val in part_vals:
2000	UP_part_val = part_val
2001	with tagswitch(part_val) as case:
2002	if case(part_value_e.String):
2003	part_val = cast(Piece, UP_part_val)
2004	s = part_val.s
2005	if part_val.quoted:
2006	if eval_flags & QUOTE_FNMATCH:
2007	# [[ foo == /"".py ]] or case (.py) or ${x%.py} or ${x//*.py/}
2008	s = glob_.GlobEscape(s)
2009	elif eval_flags & QUOTE_ERE:
2010	s = glob_.ExtendedRegexEscape(s)
2011	strs.append(s)
2012
2013	elif case(part_value_e.Array):
2014	part_val = cast(part_value.Array, UP_part_val)
2015	if self.exec_opts.strict_array():
2016	# Examples: echo f > "$@"; local foo="$@"
2017
2018	# TODO: This attributes too coarsely, to the word rather than the
2019	# parts. Problem: the word is a TREE of parts, but we only have a
2020	# flat list of part_vals. The only case where we really get arrays
2021	# is "$@", "${a[@]}", "${a[@]//pat/replace}", etc.
2022	e_die(
2023	"This word should yield a string, but it contains an array",
2024	w)
2025
2026	# TODO: Maybe add detail like this.
2027	#e_die('RHS of assignment should only have strings. '
2028	# 'To assign arrays, use b=( "${a[@]}" )')
2029	else:
2030	# It appears to not respect IFS
2031	tmp = [s for s in part_val.strs if s is not None]
2032	s = ' '.join(tmp) # TODO: eliminate double join()?
2033	strs.append(s)
2034
2035	elif case(part_value_e.ExtGlob):
2036	part_val = cast(part_value.ExtGlob, UP_part_val)
2037
2038	# Extended globs are only allowed where we expect them!
2039	if not bool(eval_flags & QUOTE_FNMATCH):
2040	e_die('extended glob not allowed in this word', w)
2041
2042	# recursive call
2043	self._PartValsToString(part_val.part_vals, w, eval_flags,
2044	strs)
2045
2046	else:
2047	raise AssertionError()
2048
2049	def EvalWordToString(self, UP_w, eval_flags=0):
2050	# type: (word_t, int) -> value.Str
2051	"""Given a word, return a string.
2052
2053	Flags can contain a quoting algorithm.
2054	"""
2055	assert UP_w.tag() == word_e.Compound, UP_w
2056	w = cast(CompoundWord, UP_w)
2057
2058	if eval_flags == 0: # QUOTE_FNMATCH etc. breaks optimization
2059	fast_str = word_.FastStrEval(w)
2060	if fast_str is not None:
2061	return value.Str(fast_str)
2062
2063	# Could we additionally optimize a=$b, if we know $b isn't an array
2064	# etc.?
2065
2066	# Note: these empty lists are hot in fib benchmark
2067
2068	part_vals = [] # type: List[part_value_t]
2069	for p in w.parts:
2070	# this doesn't use eval_flags, which is slightly confusing
2071	self._EvalWordPart(p, part_vals, 0)
2072
2073	strs = [] # type: List[str]
2074	self._PartValsToString(part_vals, w, eval_flags, strs)
2075	return value.Str(''.join(strs))
2076
2077	def EvalWordToPattern(self, UP_w):
2078	# type: (rhs_word_t) -> Tuple[value.Str, bool]
2079	"""Like EvalWordToString, but returns whether we got ExtGlob."""
2080	if UP_w.tag() == rhs_word_e.Empty:
2081	return value.Str(''), False
2082
2083	assert UP_w.tag() == rhs_word_e.Compound, UP_w
2084	w = cast(CompoundWord, UP_w)
2085
2086	has_extglob = False
2087	part_vals = [] # type: List[part_value_t]
2088	for p in w.parts:
2089	# this doesn't use eval_flags, which is slightly confusing
2090	self._EvalWordPart(p, part_vals, 0)
2091	if p.tag() == word_part_e.ExtGlob:
2092	has_extglob = True
2093
2094	strs = [] # type: List[str]
2095	self._PartValsToString(part_vals, w, QUOTE_FNMATCH, strs)
2096	return value.Str(''.join(strs)), has_extglob
2097
2098	def EvalForPlugin(self, w):
2099	# type: (CompoundWord) -> value.Str
2100	"""Wrapper around EvalWordToString that prevents errors.
2101
2102	Runtime errors like $(( 1 / 0 )) and mutating $? like $(exit 42)
2103	are handled here.
2104
2105	Similar to ExprEvaluator.PluginCall().
2106	"""
2107	with state.ctx_Registers(self.mem): # to "sandbox" $? and $PIPESTATUS
2108	try:
2109	val = self.EvalWordToString(w)
2110	except error.FatalRuntime as e:
2111	val = value.Str('<Runtime error: %s>' % e.UserErrorString())
2112
2113	except (IOError, OSError) as e:
2114	val = value.Str('<I/O error: %s>' % pyutil.strerror(e))
2115
2116	except KeyboardInterrupt:
2117	val = value.Str('<Ctrl-C>')
2118
2119	return val
2120
2121	def EvalRhsWord(self, UP_w):
2122	# type: (rhs_word_t) -> value_t
2123	"""Used for RHS of assignment.
2124
2125	There is no splitting.
2126	"""
2127	if UP_w.tag() == rhs_word_e.Empty:
2128	return value.Str('')
2129
2130	assert UP_w.tag() == word_e.Compound, UP_w
2131	w = cast(CompoundWord, UP_w)
2132
2133	if len(w.parts) == 1:
2134	part0 = w.parts[0]
2135	UP_part0 = part0
2136	tag = part0.tag()
2137	# Special case for a=(1 2). ShArrayLiteral won't appear in words that
2138	# don't look like assignments.
2139	if tag == word_part_e.ShArrayLiteral:
2140	part0 = cast(ShArrayLiteral, UP_part0)
2141	array_words = part0.words
2142	words = braces.BraceExpandWords(array_words)
2143	strs = self.EvalWordSequence(words)
2144	return value.BashArray(strs)
2145
2146	if tag == word_part_e.BashAssocLiteral:
2147	part0 = cast(word_part.BashAssocLiteral, UP_part0)
2148	d = NewDict() # type: Dict[str, str]
2149	for pair in part0.pairs:
2150	k = self.EvalWordToString(pair.key)
2151	v = self.EvalWordToString(pair.value)
2152	d[k.s] = v.s
2153	return value.BashAssoc(d)
2154
2155	# If RHS doesn't look like a=( ... ), then it must be a string.
2156	return self.EvalWordToString(w)
2157
2158	def _EvalWordFrame(self, frame, argv):
2159	# type: (List[Piece], List[str]) -> None
2160	all_empty = True
2161	all_quoted = True
2162	any_quoted = False
2163
2164	#log('--- frame %s', frame)
2165
2166	for piece in frame:
2167	if len(piece.s):
2168	all_empty = False
2169
2170	if piece.quoted:
2171	any_quoted = True
2172	else:
2173	all_quoted = False
2174
2175	# Elision of ${empty}${empty} but not $empty"$empty" or $empty""
2176	if all_empty and not any_quoted:
2177	return
2178
2179	# If every frag is quoted, e.g. "$a$b" or any part in "${a[@]}"x, then
2180	# don't do word splitting or globbing.
2181	if all_quoted:
2182	tmp = [piece.s for piece in frame]
2183	a = ''.join(tmp)
2184	argv.append(a)
2185	return
2186
2187	will_glob = not self.exec_opts.noglob()
2188
2189	if 0:
2190	log('---')
2191	log('FRAME')
2192	for i, piece in enumerate(frame):
2193	log('(%d) %s', i, piece)
2194	log('')
2195
2196	# Array of strings, some of which are BOTH IFS-escaped and GLOB escaped!
2197	frags = [] # type: List[str]
2198	for piece in frame:
2199	if will_glob and piece.quoted:
2200	frag = glob_.GlobEscape(piece.s)
2201	else:
2202	# If we have a literal \, then we turn it into \\\\.
2203	# Splitting takes \\\\ -> \\
2204	# Globbing takes \\ to \ if it doesn't match
2205	frag = _BackslashEscape(piece.s)
2206
2207	if piece.do_split:
2208	frag = _BackslashEscape(frag)
2209	else:
2210	frag = self.splitter.Escape(frag)
2211
2212	frags.append(frag)
2213
2214	if 0:
2215	log('---')
2216	log('FRAGS')
2217	for i, frag in enumerate(frags):
2218	log('(%d) %s', i, frag)
2219	log('')
2220
2221	flat = ''.join(frags)
2222	#log('flat: %r', flat)
2223
2224	args = self.splitter.SplitForWordEval(flat)
2225
2226	# space=' '; argv $space"". We have a quoted part, but we CANNOT elide.
2227	# Add it back and don't bother globbing.
2228	if len(args) == 0 and any_quoted:
2229	argv.append('')
2230	return
2231
2232	#log('split args: %r', args)
2233	for a in args:
2234	if glob_.LooksLikeGlob(a):
2235	n = self.globber.Expand(a, argv)
2236	if n < 0:
2237	# TODO: location info, with span IDs carried through the frame
2238	raise error.FailGlob('Pattern %r matched no files' % a,
2239	loc.Missing)
2240	else:
2241	argv.append(glob_.GlobUnescape(a))
2242
2243	def _EvalWordToArgv(self, w):
2244	# type: (CompoundWord) -> List[str]
2245	"""Helper for _EvalAssignBuiltin.
2246
2247	Splitting and globbing are disabled for assignment builtins.
2248
2249	Example: declare -"${a[@]}" b=(1 2)
2250	where a is [x b=a d=a]
2251	"""
2252	part_vals = [] # type: List[part_value_t]
2253	self._EvalWordToParts(w, part_vals, 0) # not double quoted
2254	frames = _MakeWordFrames(part_vals)
2255	argv = [] # type: List[str]
2256	for frame in frames:
2257	if len(frame): # empty array gives empty frame!
2258	tmp = [piece.s for piece in frame]
2259	argv.append(''.join(tmp)) # no split or glob
2260	#log('argv: %s', argv)
2261	return argv
2262
2263	def _EvalAssignBuiltin(self, builtin_id, arg0, words, meta_offset):
2264	# type: (builtin_t, str, List[CompoundWord], int) -> cmd_value.Assign
2265	"""Handles both static and dynamic assignment, e.g.
2266
2267	x='foo=bar'
2268	local a=(1 2) $x
2269
2270	Grammar:
2271
2272	('builtin' \| 'command')* keyword flag* pair*
2273	flag = [-+].*
2274
2275	There is also command -p, but we haven't implemented it. Maybe just
2276	punt on it.
2277	"""
2278	eval_to_pairs = True # except for -f and -F
2279	started_pairs = False
2280
2281	flags = [arg0] # initial flags like -p, and -f -F name1 name2
2282	flag_locs = [words[0]]
2283	assign_args = [] # type: List[AssignArg]
2284
2285	n = len(words)
2286	for i in xrange(meta_offset + 1, n): # skip first word
2287	w = words[i]
2288
2289	if word_.IsVarLike(w):
2290	started_pairs = True # Everything from now on is an assign_pair
2291
2292	if started_pairs:
2293	left_token, close_token, part_offset = word_.DetectShAssignment(
2294	w)
2295	if left_token: # Detected statically
2296	if left_token.id != Id.Lit_VarLike:
2297	# (not guaranteed since started_pairs is set twice)
2298	e_die('LHS array not allowed in assignment builtin', w)
2299
2300	if lexer.IsPlusEquals(left_token):
2301	var_name = lexer.TokenSliceRight(left_token, -2)
2302	append = True
2303	else:
2304	var_name = lexer.TokenSliceRight(left_token, -1)
2305	append = False
2306
2307	if part_offset == len(w.parts):
2308	rhs = rhs_word.Empty # type: rhs_word_t
2309	else:
2310	# tmp is for intersection of C++/MyPy type systems
2311	tmp = CompoundWord(w.parts[part_offset:])
2312	word_.TildeDetectAssign(tmp)
2313	rhs = tmp
2314
2315	with state.ctx_AssignBuiltin(self.mutable_opts):
2316	right = self.EvalRhsWord(rhs)
2317
2318	arg2 = AssignArg(var_name, right, append, w)
2319	assign_args.append(arg2)
2320
2321	else: # e.g. export $dynamic
2322	argv = self._EvalWordToArgv(w)
2323	for arg in argv:
2324	arg2 = _SplitAssignArg(arg, w)
2325	assign_args.append(arg2)
2326
2327	else:
2328	argv = self._EvalWordToArgv(w)
2329	for arg in argv:
2330	if arg.startswith('-') or arg.startswith('+'):
2331	# e.g. declare -r +r
2332	flags.append(arg)
2333	flag_locs.append(w)
2334
2335	# Shortcut that relies on -f and -F always meaning "function" for
2336	# all assignment builtins
2337	if 'f' in arg or 'F' in arg:
2338	eval_to_pairs = False
2339
2340	else: # e.g. export $dynamic
2341	if eval_to_pairs:
2342	arg2 = _SplitAssignArg(arg, w)
2343	assign_args.append(arg2)
2344	started_pairs = True
2345	else:
2346	flags.append(arg)
2347
2348	return cmd_value.Assign(builtin_id, flags, flag_locs, assign_args)
2349
2350	def _DetectAssignBuiltinStr(self, arg0, words, meta_offset):
2351	# type: (str, List[CompoundWord], int) -> Optional[cmd_value.Assign]
2352	builtin_id = consts.LookupAssignBuiltin(arg0)
2353	if builtin_id != consts.NO_INDEX:
2354	return self._EvalAssignBuiltin(builtin_id, arg0, words,
2355	meta_offset)
2356	return None
2357
2358	def _DetectAssignBuiltin(self, val0, words, meta_offset):
2359	# type: (part_value_t, List[CompoundWord], int) -> Optional[cmd_value.Assign]
2360	UP_val0 = val0
2361	if val0.tag() == part_value_e.String:
2362	val0 = cast(Piece, UP_val0)
2363	if not val0.quoted:
2364	return self._DetectAssignBuiltinStr(val0.s, words, meta_offset)
2365	return None
2366
2367	def SimpleEvalWordSequence2(self, words, is_last_cmd, allow_assign):
2368	# type: (List[CompoundWord], bool, bool) -> cmd_value_t
2369	"""Simple word evaluation for YSH."""
2370	strs = [] # type: List[str]
2371	locs = [] # type: List[CompoundWord]
2372
2373	meta_offset = 0
2374	for i, w in enumerate(words):
2375	# No globbing in the first arg for command.Simple.
2376	if i == meta_offset and allow_assign:
2377	strs0 = self._EvalWordToArgv(w)
2378	# TODO: Remove this because YSH will disallow assignment
2379	# builtins? (including export?)
2380	if len(strs0) == 1:
2381	cmd_val = self._DetectAssignBuiltinStr(
2382	strs0[0], words, meta_offset)
2383	if cmd_val:
2384	return cmd_val
2385
2386	strs.extend(strs0)
2387	for _ in strs0:
2388	locs.append(w)
2389	continue
2390
2391	if glob_.LooksLikeStaticGlob(w):
2392	val = self.EvalWordToString(w) # respects strict-array
2393	num_appended = self.globber.Expand(val.s, strs)
2394	if num_appended < 0:
2395	raise error.FailGlob('Pattern %r matched no files' % val.s,
2396	w)
2397	for _ in xrange(num_appended):
2398	locs.append(w)
2399	continue
2400
2401	part_vals = [] # type: List[part_value_t]
2402	self._EvalWordToParts(w, part_vals, 0) # not quoted
2403
2404	if 0:
2405	log('')
2406	log('Static: part_vals after _EvalWordToParts:')
2407	for entry in part_vals:
2408	log(' %s', entry)
2409
2410	# Still need to process
2411	frames = _MakeWordFrames(part_vals)
2412
2413	if 0:
2414	log('')
2415	log('Static: frames after _MakeWordFrames:')
2416	for entry in frames:
2417	log(' %s', entry)
2418
2419	# We will still allow x"${a[@]"x, though it's deprecated by @a, which
2420	# disallows such expressions at parse time.
2421	for frame in frames:
2422	if len(frame): # empty array gives empty frame!
2423	tmp = [piece.s for piece in frame]
2424	strs.append(''.join(tmp)) # no split or glob
2425	locs.append(w)
2426
2427	assert len(strs) == len(locs), '%s vs. %d' % (strs, len(locs))
2428	return cmd_value.Argv(strs, locs, is_last_cmd, None, None)
2429
2430	def EvalWordSequence2(self, words, is_last_cmd, allow_assign=False):
2431	# type: (List[CompoundWord], bool, bool) -> cmd_value_t
2432	"""Turns a list of Words into a list of strings.
2433
2434	Unlike the EvalWord*() methods, it does globbing.
2435
2436	Args:
2437	allow_assign: True for command.Simple, False for BashArray a=(1 2 3)
2438	"""
2439	if self.exec_opts.simple_word_eval():
2440	return self.SimpleEvalWordSequence2(words, is_last_cmd,
2441	allow_assign)
2442
2443	# Parse time:
2444	# 1. brace expansion. TODO: Do at parse time.
2445	# 2. Tilde detection. DONE at parse time. Only if Id.Lit_Tilde is the
2446	# first WordPart.
2447	#
2448	# Run time:
2449	# 3. tilde sub, var sub, command sub, arith sub. These are all
2450	# "concurrent" on WordParts. (optional process sub with <() )
2451	# 4. word splitting. Can turn this off with a shell option? Definitely
2452	# off for oil.
2453	# 5. globbing -- several exec_opts affect this: nullglob, safeglob, etc.
2454
2455	#log('W %s', words)
2456	strs = [] # type: List[str]
2457	locs = [] # type: List[CompoundWord]
2458
2459	# 0 for declare x
2460	# 1 for builtin declare x
2461	# 2 for command builtin declare x
2462	# etc.
2463	meta_offset = 0
2464
2465	n = 0
2466	for i, w in enumerate(words):
2467	fast_str = word_.FastStrEval(w)
2468	if fast_str is not None:
2469	strs.append(fast_str)
2470	locs.append(w)
2471
2472	# e.g. the 'local' in 'local a=b c=d' will be here
2473	if allow_assign and i == meta_offset:
2474	cmd_val = self._DetectAssignBuiltinStr(
2475	fast_str, words, meta_offset)
2476	if cmd_val:
2477	return cmd_val
2478
2479	if i <= meta_offset and _DetectMetaBuiltinStr(fast_str):
2480	meta_offset += 1
2481
2482	# Bug fix: n must be updated on every loop iteration
2483	n = len(strs)
2484	assert len(strs) == len(locs), strs
2485	continue
2486
2487	part_vals = [] # type: List[part_value_t]
2488	self._EvalWordToParts(w, part_vals, EXTGLOB_FILES)
2489
2490	# DYNAMICALLY detect if we're going to run an assignment builtin, and
2491	# change the rest of the evaluation algorithm if so.
2492	#
2493	# We want to allow:
2494	# e=export
2495	# $e foo=bar
2496	#
2497	# But we don't want to evaluate the first word twice in the case of:
2498	# $(some-command) --flag
2499	if len(part_vals) == 1:
2500	if allow_assign and i == meta_offset:
2501	cmd_val = self._DetectAssignBuiltin(
2502	part_vals[0], words, meta_offset)
2503	if cmd_val:
2504	return cmd_val
2505
2506	if i <= meta_offset and _DetectMetaBuiltin(part_vals[0]):
2507	meta_offset += 1
2508
2509	if 0:
2510	log('')
2511	log('part_vals after _EvalWordToParts:')
2512	for entry in part_vals:
2513	log(' %s', entry)
2514
2515	frames = _MakeWordFrames(part_vals)
2516	if 0:
2517	log('')
2518	log('frames after _MakeWordFrames:')
2519	for entry in frames:
2520	log(' %s', entry)
2521
2522	# Do splitting and globbing. Each frame will append zero or more args.
2523	for frame in frames:
2524	self._EvalWordFrame(frame, strs)
2525
2526	# Fill in locations parallel to strs.
2527	n_next = len(strs)
2528	for _ in xrange(n_next - n):
2529	locs.append(w)
2530	n = n_next
2531
2532	# A non-assignment command.
2533	# NOTE: Can't look up builtins here like we did for assignment, because
2534	# functions can override builtins.
2535	assert len(strs) == len(locs), '%s vs. %d' % (strs, len(locs))
2536	return cmd_value.Argv(strs, locs, is_last_cmd, None, None)
2537
2538	def EvalWordSequence(self, words):
2539	# type: (List[CompoundWord]) -> List[str]
2540	"""For arrays and for loops.
2541
2542	They don't allow assignment builtins.
2543	"""
2544	# is_last_cmd is irrelevant
2545	cmd_val = self.EvalWordSequence2(words, False)
2546	assert cmd_val.tag() == cmd_value_e.Argv
2547	return cast(cmd_value.Argv, cmd_val).argv
2548
2549
2550	class NormalWordEvaluator(AbstractWordEvaluator):
2551
2552	def __init__(
2553	self,
2554	mem, # type: state.Mem
2555	exec_opts, # type: optview.Exec
2556	mutable_opts, # type: state.MutableOpts
2557	tilde_ev, # type: TildeEvaluator
2558	splitter, # type: SplitContext
2559	errfmt, # type: ui.ErrorFormatter
2560	):
2561	# type: (...) -> None
2562	AbstractWordEvaluator.__init__(self, mem, exec_opts, mutable_opts,
2563	tilde_ev, splitter, errfmt)
2564	self.shell_ex = None # type: _Executor
2565
2566	def CheckCircularDeps(self):
2567	# type: () -> None
2568	assert self.arith_ev is not None
2569	# Disabled for pure OSH
2570	#assert self.expr_ev is not None
2571	assert self.shell_ex is not None
2572	assert self.prompt_ev is not None
2573
2574	def _EvalCommandSub(self, cs_part, quoted):
2575	# type: (CommandSub, bool) -> part_value_t
2576	stdout_str = self.shell_ex.RunCommandSub(cs_part)
2577
2578	if cs_part.left_token.id == Id.Left_AtParen:
2579	# YSH splitting algorithm: does not depend on IFS
2580	try:
2581	strs = j8.SplitJ8Lines(stdout_str)
2582	except error.Decode as e:
2583	# status code 4 is special, for encode/decode errors.
2584	raise error.Structured(4, e.Message(), cs_part.left_token)
2585
2586	#strs = self.splitter.SplitForWordEval(stdout_str)
2587	return part_value.Array(strs)
2588	else:
2589	return Piece(stdout_str, quoted, not quoted)
2590
2591	def _EvalProcessSub(self, cs_part):
2592	# type: (CommandSub) -> Piece
2593	dev_path = self.shell_ex.RunProcessSub(cs_part)
2594	# pretend it's quoted; no split or glob
2595	return Piece(dev_path, True, False)
2596
2597
2598	_DUMMY = '__NO_COMMAND_SUB__'
2599
2600
2601	class CompletionWordEvaluator(AbstractWordEvaluator):
2602	"""An evaluator that has no access to an executor.
2603
2604	NOTE: core/completion.py doesn't actually try to use these strings to
2605	complete. If you have something like 'echo $(echo hi)/f<TAB>', it sees the
2606	inner command as the last one, and knows that it is not at the end of the
2607	line.
2608	"""
2609
2610	def __init__(
2611	self,
2612	mem, # type: state.Mem
2613	exec_opts, # type: optview.Exec
2614	mutable_opts, # type: state.MutableOpts
2615	tilde_ev, # type: TildeEvaluator
2616	splitter, # type: SplitContext
2617	errfmt, # type: ui.ErrorFormatter
2618	):
2619	# type: (...) -> None
2620	AbstractWordEvaluator.__init__(self, mem, exec_opts, mutable_opts,
2621	tilde_ev, splitter, errfmt)
2622
2623	def CheckCircularDeps(self):
2624	# type: () -> None
2625	assert self.prompt_ev is not None
2626	assert self.arith_ev is not None
2627	assert self.expr_ev is not None
2628
2629	def _EvalCommandSub(self, cs_part, quoted):
2630	# type: (CommandSub, bool) -> part_value_t
2631	if cs_part.left_token.id == Id.Left_AtParen:
2632	return part_value.Array([_DUMMY])
2633	else:
2634	return Piece(_DUMMY, quoted, not quoted)
2635
2636	def _EvalProcessSub(self, cs_part):
2637	# type: (CommandSub) -> Piece
2638	# pretend it's quoted; no split or glob
2639	return Piece('__NO_PROCESS_SUB__', True, False)
2640
2641
2642	# vim: sw=4