osh/word_eval.py

OILS / osh / word_eval.py View on Github | oilshell.org

2542 lines, 1549 significant

1	"""
2	word_eval.py - Evaluator for the word language.
3	"""
4
5	from _devbuild.gen.id_kind_asdl import Id, Kind, Kind_str
6	from _devbuild.gen.syntax_asdl import (
7	Token,
8	SimpleVarSub,
9	loc,
10	loc_t,
11	BracedVarSub,
12	CommandSub,
13	bracket_op,
14	bracket_op_e,
15	suffix_op,
16	suffix_op_e,
17	ShArrayLiteral,
18	SingleQuoted,
19	DoubleQuoted,
20	word_e,
21	word_t,
22	CompoundWord,
23	rhs_word,
24	rhs_word_e,
25	rhs_word_t,
26	word_part,
27	word_part_e,
28	)
29	from _devbuild.gen.runtime_asdl import (
30	part_value,
31	part_value_e,
32	part_value_t,
33	cmd_value,
34	cmd_value_e,
35	cmd_value_t,
36	error_code_e,
37	AssignArg,
38	a_index,
39	a_index_e,
40	VTestPlace,
41	VarSubState,
42	Piece,
43	)
44	from _devbuild.gen.option_asdl import option_i, builtin_i
45	from _devbuild.gen.value_asdl import (
46	value,
47	value_e,
48	value_t,
49	sh_lvalue,
50	sh_lvalue_t,
51	)
52	from core import bash_impl
53	from core import error
54	from core import pyos
55	from core import pyutil
56	from core import state
57	from display import ui
58	from core import util
59	from data_lang import j8
60	from data_lang import j8_lite
61	from core.error import e_die
62	from frontend import consts
63	from frontend import lexer
64	from frontend import location
65	from mycpp import mops
66	from mycpp.mylib import log, tagswitch, NewDict
67	from osh import braces
68	from osh import glob_
69	from osh import string_ops
70	from osh import word_
71	from ysh import expr_eval
72	from ysh import val_ops
73
74	from typing import Optional, Tuple, List, Dict, cast, TYPE_CHECKING
75
76	if TYPE_CHECKING:
77	from _devbuild.gen.syntax_asdl import word_part_t
78	from _devbuild.gen.option_asdl import builtin_t
79	from core import optview
80	from core.state import Mem
81	from core.vm import _Executor
82	from osh.split import SplitContext
83	from osh import prompt
84	from osh import sh_expr_eval
85
86	# Flags for _EvalWordToParts and _EvalWordPart (not all are used for both)
87	QUOTED = 1 << 0
88	IS_SUBST = 1 << 1
89
90	EXTGLOB_FILES = 1 << 2 # allow @(cc) from file system?
91	EXTGLOB_MATCH = 1 << 3 # allow @(cc) in pattern matching?
92	EXTGLOB_NESTED = 1 << 4 # for @(one\|!(two\|three))
93
94	# For EvalWordToString
95	QUOTE_FNMATCH = 1 << 5
96	QUOTE_ERE = 1 << 6
97
98	# For compatibility, ${BASH_SOURCE} and ${BASH_SOURCE[@]} are both valid.
99	# Ditto for ${FUNCNAME} and ${BASH_LINENO}.
100	_STRING_AND_ARRAY = ['BASH_SOURCE', 'FUNCNAME', 'BASH_LINENO']
101
102
103	def ShouldArrayDecay(var_name, exec_opts, is_plain_var_sub=True):
104	# type: (str, optview.Exec, bool) -> bool
105	"""Return whether we should allow ${a} to mean ${a[0]}."""
106	return (not exec_opts.strict_array() or
107	is_plain_var_sub and var_name in _STRING_AND_ARRAY)
108
109
110	def DecayArray(val):
111	# type: (value_t) -> value_t
112	"""Resolve ${array} to ${array[0]}."""
113	if val.tag() == value_e.BashArray:
114	array_val = cast(value.BashArray, val)
115	s, error_code = bash_impl.BashArray_GetElement(array_val, 0)
116
117	# Note: index 0 should never cause the out-of-bound index error.
118	assert error_code == error_code_e.OK
119
120	elif val.tag() == value_e.BashAssoc:
121	assoc_val = cast(value.BashAssoc, val)
122	s = bash_impl.BashAssoc_GetElement(assoc_val, '0')
123	else:
124	raise AssertionError(val.tag())
125
126	if s is None:
127	return value.Undef
128	else:
129	return value.Str(s)
130
131
132	def _DetectMetaBuiltinStr(s):
133	# type: (str) -> bool
134	"""
135	We need to detect all of these cases:
136
137	builtin local
138	command local
139	builtin builtin local
140	builtin command local
141
142	Fundamentally, assignment builtins have different WORD EVALUATION RULES
143	for a=$x (no word splitting), so it seems hard to do this in
144	meta_oils.Builtin() or meta_oils.Command()
145	"""
146	return (consts.LookupNormalBuiltin(s)
147	in (builtin_i.builtin, builtin_i.command))
148
149
150	def _DetectMetaBuiltin(val0):
151	# type: (part_value_t) -> bool
152	UP_val0 = val0
153	if val0.tag() == part_value_e.String:
154	val0 = cast(Piece, UP_val0)
155	if not val0.quoted:
156	return _DetectMetaBuiltinStr(val0.s)
157	return False
158
159
160	def _SplitAssignArg(arg, blame_word):
161	# type: (str, CompoundWord) -> AssignArg
162	"""Dynamically parse argument to declare, export, etc.
163
164	This is a fallback to the static parsing done below.
165	"""
166	# Note: it would be better to cache regcomp(), but we don't have an API for
167	# that, and it probably isn't a bottleneck now
168	m = util.RegexSearch(consts.ASSIGN_ARG_RE, arg)
169	if m is None:
170	e_die("Assignment builtin expected NAME=value, got %r" % arg,
171	blame_word)
172
173	var_name = m[1]
174	# m[2] is used for grouping; ERE doesn't have non-capturing groups
175
176	op = m[3]
177	assert op is not None, op
178	if len(op): # declare NAME=
179	val = value.Str(m[4]) # type: Optional[value_t]
180	append = op[0] == '+'
181	else: # declare NAME
182	val = None # no operator
183	append = False
184
185	return AssignArg(var_name, val, append, blame_word)
186
187
188	# NOTE: Could be done with util.BackslashEscape like glob_.GlobEscape().
189	def _BackslashEscape(s):
190	# type: (str) -> str
191	"""Double up backslashes.
192
193	Useful for strings about to be globbed and strings about to be IFS
194	escaped.
195	"""
196	return s.replace('\\', '\\\\')
197
198
199	def _ValueToPartValue(val, quoted, part_loc):
200	# type: (value_t, bool, word_part_t) -> part_value_t
201	"""Helper for VarSub evaluation.
202
203	Called by _EvalBracedVarSub and _EvalWordPart for SimpleVarSub.
204	"""
205	UP_val = val
206
207	with tagswitch(val) as case:
208	if case(value_e.Undef):
209	# This happens in the case of ${undef+foo}. We skipped _ProcessUndef,
210	# but we have to append to the empty string.
211	return Piece('', quoted, not quoted)
212
213	elif case(value_e.Str):
214	val = cast(value.Str, UP_val)
215	return Piece(val.s, quoted, not quoted)
216
217	elif case(value_e.BashArray):
218	val = cast(value.BashArray, UP_val)
219	return part_value.Array(bash_impl.BashArray_GetValues(val))
220
221	elif case(value_e.BashAssoc):
222	val = cast(value.BashAssoc, UP_val)
223	# bash behavior: splice values!
224	return part_value.Array(bash_impl.BashAssoc_GetValues(val))
225
226	# Cases added for YSH
227	# value_e.List is also here - we use val_ops.Stringify()s err message
228	elif case(value_e.Null, value_e.Bool, value_e.Int, value_e.Float,
229	value_e.Eggex, value_e.List):
230	s = val_ops.Stringify(val, loc.WordPart(part_loc), 'Word eval ')
231	return Piece(s, quoted, not quoted)
232
233	else:
234	raise error.TypeErr(val, "Can't substitute into word",
235	loc.WordPart(part_loc))
236
237	raise AssertionError('for -Wreturn-type in C++')
238
239
240	def _MakeWordFrames(part_vals):
241	# type: (List[part_value_t]) -> List[List[Piece]]
242	"""A word evaluates to a flat list of part_value (String or Array). frame
243	is a portion that results in zero or more args. It can never be joined.
244	This idea exists because of arrays like "$@" and "${a[@]}".
245
246	Example:
247
248	a=(1 '2 3' 4)
249	x=x
250	y=y
251
252	# This word
253	$x"${a[@]}"$y
254
255	# Results in Three frames:
256	[ ('x', False, True), ('1', True, False) ]
257	[ ('2 3', True, False) ]
258	[ ('4', True, False), ('y', False, True) ]
259
260	Note: A frame is a 3-tuple that's identical to Piece()? Maybe we
261	should make that top level type.
262
263	TODO:
264	- Instead of List[List[Piece]], where List[Piece] is a Frame
265	- Change this representation to
266	Frames = (List[Piece] pieces, List[int] break_indices)
267	# where break_indices are the end
268
269	Consider a common case like "$x" or "${x}" - I think this a lot more
270	efficient?
271
272	And then change _EvalWordFrame(pieces: List[Piece], start: int, end: int)
273	"""
274	current = [] # type: List[Piece]
275	frames = [current]
276
277	for p in part_vals:
278	UP_p = p
279
280	with tagswitch(p) as case:
281	if case(part_value_e.String):
282	p = cast(Piece, UP_p)
283	current.append(p)
284
285	elif case(part_value_e.Array):
286	p = cast(part_value.Array, UP_p)
287
288	is_first = True
289	for s in p.strs:
290	if s is None:
291	continue # ignore undefined array entries
292
293	# Arrays parts are always quoted; otherwise they would have decayed to
294	# a string.
295	piece = Piece(s, True, False)
296	if is_first:
297	current.append(piece)
298	is_first = False
299	else:
300	current = [piece]
301	frames.append(current) # singleton frame
302
303	else:
304	raise AssertionError()
305
306	return frames
307
308
309	# TODO: This could be _MakeWordFrames and then sep.join(). It's redundant.
310	def _DecayPartValuesToString(part_vals, join_char):
311	# type: (List[part_value_t], str) -> str
312	# Decay ${a=x"$@"x} to string.
313	out = [] # type: List[str]
314	for p in part_vals:
315	UP_p = p
316	with tagswitch(p) as case:
317	if case(part_value_e.String):
318	p = cast(Piece, UP_p)
319	out.append(p.s)
320	elif case(part_value_e.Array):
321	p = cast(part_value.Array, UP_p)
322	# TODO: Eliminate double join for speed?
323	tmp = [s for s in p.strs if s is not None]
324	out.append(join_char.join(tmp))
325	else:
326	raise AssertionError()
327	return ''.join(out)
328
329
330	def _PerformSlice(
331	val, # type: value_t
332	offset, # type: mops.BigInt
333	length, # type: int
334	has_length, # type: bool
335	part, # type: BracedVarSub
336	arg0_val, # type: value.Str
337	):
338	# type: (...) -> value_t
339	UP_val = val
340	with tagswitch(val) as case:
341	if case(value_e.Str): # Slice UTF-8 characters in a string.
342	val = cast(value.Str, UP_val)
343	s = val.s
344	n = len(s)
345
346	begin = mops.BigTruncate(offset)
347	if begin < 0: # Compute offset with unicode
348	byte_begin = n
349	num_iters = -begin
350	for _ in xrange(num_iters):
351	byte_begin = string_ops.PreviousUtf8Char(s, byte_begin)
352	else:
353	byte_begin = string_ops.AdvanceUtf8Chars(s, begin, 0)
354
355	if has_length:
356	if length < 0: # Compute offset with unicode
357	# Confusing: this is a POSITION
358	byte_end = n
359	num_iters = -length
360	for _ in xrange(num_iters):
361	byte_end = string_ops.PreviousUtf8Char(s, byte_end)
362	else:
363	byte_end = string_ops.AdvanceUtf8Chars(
364	s, length, byte_begin)
365	else:
366	byte_end = len(s)
367
368	substr = s[byte_begin:byte_end]
369	result = value.Str(substr) # type: value_t
370
371	elif case(value_e.BashArray,
372	value_e.SparseArray): # Slice array entries.
373	# NOTE: This error is ALWAYS fatal in bash. It's inconsistent with
374	# strings.
375	if has_length and length < 0:
376	e_die("Array slice can't have negative length: %d" % length,
377	loc.WordPart(part))
378
379	if bash_impl.BigInt_Less(offset, mops.ZERO):
380	# ${@:-3} starts counts from the end
381	if val.tag() == value_e.BashArray:
382	val = cast(value.BashArray, UP_val)
383	array_length = mops.IntWiden(
384	bash_impl.BashArray_Length(val))
385	elif val.tag() == value_e.SparseArray:
386	val = cast(value.SparseArray, UP_val)
387	array_length = bash_impl.SparseArray_Length(val)
388	else:
389	raise AssertionError()
390
391	# The array length counts $0 for $@ and $*
392	if arg0_val is not None:
393	array_length = mops.Add(array_length, mops.ONE)
394
395	offset = mops.Add(offset, array_length)
396
397	if bash_impl.BigInt_Less(offset, mops.ZERO):
398	strs = [] # type: List[str]
399	else:
400	# Quirk: "offset" for positional arguments ($@ and $*) counts $0.
401	prepends_arg0 = False
402	if arg0_val is not None:
403	if bash_impl.BigInt_Greater(offset, mops.ZERO):
404	offset = mops.Sub(offset, mops.ONE)
405	elif not has_length or length >= 1:
406	prepends_arg0 = True
407	length = length - 1
408
409	if has_length and length == 0:
410	strs = []
411
412	elif val.tag() == value_e.BashArray:
413	val = cast(value.BashArray, UP_val)
414	orig = bash_impl.BashArray_GetValues(val)
415	n = len(orig)
416
417	strs = []
418	i = mops.BigTruncate(offset)
419	count = 0
420	while i < n:
421	if has_length and count == length: # length could be 0
422	break
423	s = orig[i]
424	if s is not None: # Unset elements don't count towards the length
425	strs.append(s)
426	count += 1
427	i += 1
428
429	elif val.tag() == value_e.SparseArray:
430	val = cast(value.SparseArray, UP_val)
431
432	# TODO: We may optimize this by finding the first index
433	# using the binary search. Furthermore, the sorting by
434	# SparseArray_GetKeys can be replaced with the heap sort so
435	# that we only extract the first LENGTH elements of the
436	# indices greater or equal to OFFSET.
437	i = 0
438	for index in bash_impl.SparseArray_GetKeys(val):
439	if bash_impl.BigInt_GreaterEq(index, offset):
440	break
441	i = i + 1
442
443	if has_length:
444	strs = bash_impl.SparseArray_GetValues(val)[i:i +
445	length]
446	else:
447	strs = bash_impl.SparseArray_GetValues(val)[i:]
448
449	else:
450	raise AssertionError()
451
452	if prepends_arg0:
453	new_list = [arg0_val.s]
454	new_list.extend(strs)
455	strs = new_list
456
457	result = value.BashArray(strs)
458
459	elif case(value_e.BashAssoc):
460	e_die("Can't slice associative arrays", loc.WordPart(part))
461
462	else:
463	raise error.TypeErr(val, 'Slice op expected Str or BashArray',
464	loc.WordPart(part))
465
466	return result
467
468
469	class StringWordEvaluator(object):
470	"""Interface used by ArithEvaluator / BoolEvaluator"""
471
472	def __init__(self):
473	# type: () -> None
474	"""Empty constructor for mycpp."""
475	pass
476
477	def EvalWordToString(self, w, eval_flags=0):
478	# type: (word_t, int) -> value.Str
479	raise NotImplementedError()
480
481
482	def _GetDollarHyphen(exec_opts):
483	# type: (optview.Exec) -> str
484	chars = [] # type: List[str]
485	if exec_opts.interactive():
486	chars.append('i')
487
488	if exec_opts.errexit():
489	chars.append('e')
490	if exec_opts.noglob():
491	chars.append('f')
492	if exec_opts.noexec():
493	chars.append('n')
494	if exec_opts.nounset():
495	chars.append('u')
496	# NO letter for pipefail?
497	if exec_opts.xtrace():
498	chars.append('x')
499	if exec_opts.noclobber():
500	chars.append('C')
501
502	# bash has:
503	# - c for sh -c, i for sh -i (mksh also has this)
504	# - h for hashing (mksh also has this)
505	# - B for brace expansion
506	return ''.join(chars)
507
508
509	class TildeEvaluator(object):
510
511	def __init__(self, mem, exec_opts):
512	# type: (Mem, optview.Exec) -> None
513	self.mem = mem
514	self.exec_opts = exec_opts
515
516	def GetMyHomeDir(self):
517	# type: () -> Optional[str]
518	"""Consult $HOME first, and then make a libc call.
519
520	Important: the libc call can FAIL, which is why we prefer $HOME. See issue
521	#1578.
522	"""
523	# First look up the HOME var, ENV.HOME, ...
524	s = self.mem.env_config.Get('HOME')
525	if s is not None:
526	return s
527
528	# Then ask the OS. This is what bash does.
529	return pyos.GetMyHomeDir()
530
531	def Eval(self, part):
532	# type: (word_part.TildeSub) -> str
533	"""Evaluates ~ and ~user, given a Lit_TildeLike token."""
534
535	if part.user_name is None:
536	result = self.GetMyHomeDir()
537	else:
538	result = pyos.GetHomeDir(part.user_name)
539
540	if result is None:
541	if self.exec_opts.strict_tilde():
542	e_die("Error expanding tilde (e.g. invalid user)", part.left)
543	else:
544	# Return ~ or ~user literally
545	result = '~'
546	if part.user_name is not None:
547	result = result + part.user_name # mycpp doesn't have +=
548
549	return result
550
551
552	class AbstractWordEvaluator(StringWordEvaluator):
553	"""Abstract base class for word evaluators.
554
555	Public entry points:
556	EvalWordToString EvalForPlugin EvalRhsWord
557	EvalWordSequence EvalWordSequence2
558	"""
559
560	def __init__(
561	self,
562	mem, # type: state.Mem
563	exec_opts, # type: optview.Exec
564	mutable_opts, # type: state.MutableOpts
565	tilde_ev, # type: TildeEvaluator
566	splitter, # type: SplitContext
567	errfmt, # type: ui.ErrorFormatter
568	):
569	# type: (...) -> None
570	self.arith_ev = None # type: sh_expr_eval.ArithEvaluator
571	self.expr_ev = None # type: expr_eval.ExprEvaluator
572	self.prompt_ev = None # type: prompt.Evaluator
573
574	self.unsafe_arith = None # type: sh_expr_eval.UnsafeArith
575
576	self.tilde_ev = tilde_ev
577
578	self.mem = mem # for $HOME, $1, etc.
579	self.exec_opts = exec_opts # for nounset
580	self.mutable_opts = mutable_opts # for _allow_command_sub
581	self.splitter = splitter
582	self.errfmt = errfmt
583
584	self.globber = glob_.Globber(exec_opts)
585
586	def CheckCircularDeps(self):
587	# type: () -> None
588	raise NotImplementedError()
589
590	def _EvalCommandSub(self, cs_part, quoted):
591	# type: (CommandSub, bool) -> part_value_t
592	"""Abstract since it has a side effect."""
593	raise NotImplementedError()
594
595	def _EvalProcessSub(self, cs_part):
596	# type: (CommandSub) -> part_value_t
597	"""Abstract since it has a side effect."""
598	raise NotImplementedError()
599
600	def _EvalVarNum(self, var_num):
601	# type: (int) -> value_t
602	assert var_num >= 0
603	return self.mem.GetArgNum(var_num)
604
605	def _EvalSpecialVar(self, op_id, quoted, vsub_state):
606	# type: (int, bool, VarSubState) -> value_t
607	"""Evaluate $?
608
609	and so forth
610	"""
611	# $@ is special -- it need to know whether it is in a double quoted
612	# context.
613	#
614	# - If it's $@ in a double quoted context, return an ARRAY.
615	# - If it's $@ in a normal context, return a STRING, which then will be
616	# subject to splitting.
617
618	if op_id in (Id.VSub_At, Id.VSub_Star):
619	argv = self.mem.GetArgv()
620	val = value.BashArray(argv) # type: value_t
621	if op_id == Id.VSub_At:
622	# "$@" evaluates to an array, $@ should be decayed
623	vsub_state.join_array = not quoted
624	else: # $* "$*" are both decayed
625	vsub_state.join_array = True
626
627	elif op_id == Id.VSub_Hyphen:
628	val = value.Str(_GetDollarHyphen(self.exec_opts))
629
630	else:
631	val = self.mem.GetSpecialVar(op_id)
632
633	return val
634
635	def _ApplyTestOp(
636	self,
637	val, # type: value_t
638	op, # type: suffix_op.Unary
639	quoted, # type: bool
640	part_vals, # type: Optional[List[part_value_t]]
641	vtest_place, # type: VTestPlace
642	blame_token, # type: Token
643	):
644	# type: (...) -> bool
645	"""
646	Returns:
647	Whether part_vals was mutated
648
649	${a:-} returns part_value[]
650	${a:+} returns part_value[]
651	${a:?error} returns error word?
652	${a:=} returns part_value[] but also needs self.mem for side effects.
653
654	So I guess it should return part_value[], and then a flag for raising an
655	error, and then a flag for assigning it?
656	The original BracedVarSub will have the name.
657
658	Example of needing multiple part_value[]
659
660	echo X-${a:-'def'"ault"}-X
661
662	We return two part values from the BracedVarSub. Also consider:
663
664	echo ${a:-x"$@"x}
665	"""
666	eval_flags = IS_SUBST
667	if quoted:
668	eval_flags \|= QUOTED
669
670	tok = op.op
671	# NOTE: Splicing part_values is necessary because of code like
672	# ${undef:-'a b' c 'd # e'}. Each part_value can have a different
673	# do_glob/do_elide setting.
674	UP_val = val
675	with tagswitch(val) as case:
676	if case(value_e.Undef):
677	is_falsey = True
678
679	elif case(value_e.Str):
680	val = cast(value.Str, UP_val)
681	if tok.id in (Id.VTest_ColonHyphen, Id.VTest_ColonEquals,
682	Id.VTest_ColonQMark, Id.VTest_ColonPlus):
683	is_falsey = len(val.s) == 0
684	else:
685	is_falsey = False
686
687	elif case(value_e.BashArray):
688	val = cast(value.BashArray, UP_val)
689	# TODO: allow undefined
690	is_falsey = len(val.strs) == 0
691
692	elif case(value_e.BashAssoc):
693	val = cast(value.BashAssoc, UP_val)
694	is_falsey = len(val.d) == 0
695
696	else:
697	# value.Eggex, etc. are all false
698	is_falsey = False
699
700	if tok.id in (Id.VTest_ColonHyphen, Id.VTest_Hyphen):
701	if is_falsey:
702	self._EvalRhsWordToParts(op.arg_word, part_vals, eval_flags)
703	return True
704	else:
705	return False
706
707	# Inverse of the above.
708	elif tok.id in (Id.VTest_ColonPlus, Id.VTest_Plus):
709	if is_falsey:
710	return False
711	else:
712	self._EvalRhsWordToParts(op.arg_word, part_vals, eval_flags)
713	return True
714
715	# Splice and assign
716	elif tok.id in (Id.VTest_ColonEquals, Id.VTest_Equals):
717	if is_falsey:
718	# Collect new part vals.
719	assign_part_vals = [] # type: List[part_value_t]
720	self._EvalRhsWordToParts(op.arg_word, assign_part_vals,
721	eval_flags)
722	# Append them to out param AND return them.
723	part_vals.extend(assign_part_vals)
724
725	if vtest_place.name is None:
726	# TODO: error context
727	e_die("Can't assign to special variable")
728	else:
729	# NOTE: This decays arrays too! 'shopt -s strict_array' could
730	# avoid it.
731	rhs_str = _DecayPartValuesToString(
732	assign_part_vals, self.splitter.GetJoinChar())
733	if vtest_place.index is None: # using None when no index
734	lval = location.LName(
735	vtest_place.name) # type: sh_lvalue_t
736	else:
737	var_name = vtest_place.name
738	var_index = vtest_place.index
739	UP_var_index = var_index
740
741	with tagswitch(var_index) as case:
742	if case(a_index_e.Int):
743	var_index = cast(a_index.Int, UP_var_index)
744	lval = sh_lvalue.Indexed(
745	var_name, var_index.i, loc.Missing)
746	elif case(a_index_e.Str):
747	var_index = cast(a_index.Str, UP_var_index)
748	lval = sh_lvalue.Keyed(var_name, var_index.s,
749	loc.Missing)
750	else:
751	raise AssertionError()
752
753	state.OshLanguageSetValue(self.mem, lval,
754	value.Str(rhs_str))
755	return True
756
757	else:
758	return False
759
760	elif tok.id in (Id.VTest_ColonQMark, Id.VTest_QMark):
761	if is_falsey:
762	# The arg is the error message
763	error_part_vals = [] # type: List[part_value_t]
764	self._EvalRhsWordToParts(op.arg_word, error_part_vals,
765	eval_flags)
766	error_str = _DecayPartValuesToString(
767	error_part_vals, self.splitter.GetJoinChar())
768
769	#
770	# Display fancy/helpful error
771	#
772	if vtest_place.name is None:
773	var_name = '???'
774	else:
775	var_name = vtest_place.name
776
777	if 0:
778	# This hint is nice, but looks too noisy for now
779	op_str = lexer.LazyStr(tok)
780	if tok.id == Id.VTest_ColonQMark:
781	why = 'empty or unset'
782	else:
783	why = 'unset'
784
785	self.errfmt.Print_(
786	"Hint: operator %s means a variable can't be %s" %
787	(op_str, why), tok)
788
789	if val.tag() == value_e.Undef:
790	actual = 'unset'
791	else:
792	actual = 'empty'
793
794	if len(error_str):
795	suffix = ': %r' % error_str
796	else:
797	suffix = ''
798	e_die("Var %s is %s%s" % (var_name, actual, suffix),
799	blame_token)
800
801	else:
802	return False
803
804	else:
805	raise AssertionError(tok.id)
806
807	def _Count(self, val, token):
808	# type: (value_t, Token) -> int
809	"""Returns the length of the value, for ${#var}"""
810	UP_val = val
811	with tagswitch(val) as case:
812	if case(value_e.Str):
813	val = cast(value.Str, UP_val)
814	# NOTE: Whether bash counts bytes or chars is affected by LANG
815	# environment variables.
816	# Should we respect that, or another way to select? set -o
817	# count-bytes?
818
819	# https://stackoverflow.com/questions/17368067/length-of-string-in-bash
820	try:
821	count = string_ops.CountUtf8Chars(val.s)
822	except error.Strict as e:
823	# Add this here so we don't have to add it so far down the stack.
824	# TODO: It's better to show BOTH this CODE an the actual DATA
825	# somehow.
826	e.location = token
827
828	if self.exec_opts.strict_word_eval():
829	raise
830	else:
831	# NOTE: Doesn't make the command exit with 1; it just returns a
832	# length of -1.
833	self.errfmt.PrettyPrintError(e, prefix='warning: ')
834	return -1
835
836	elif case(value_e.BashArray):
837	val = cast(value.BashArray, UP_val)
838	count = bash_impl.BashArray_Count(val)
839
840	elif case(value_e.BashAssoc):
841	val = cast(value.BashAssoc, UP_val)
842	count = bash_impl.BashAssoc_Count(val)
843
844	elif case(value_e.SparseArray):
845	val = cast(value.SparseArray, UP_val)
846	count = bash_impl.SparseArray_Count(val)
847
848	else:
849	raise error.TypeErr(
850	val, "Length op expected Str, BashArray, BashAssoc", token)
851
852	return count
853
854	def _Keys(self, val, token):
855	# type: (value_t, Token) -> value_t
856	"""Return keys of a container, for ${!array[@]}"""
857
858	UP_val = val
859	with tagswitch(val) as case:
860	if case(value_e.BashArray):
861	val = cast(value.BashArray, UP_val)
862	indices = [str(i) for i in bash_impl.BashArray_GetKeys(val)]
863	return value.BashArray(indices)
864
865	elif case(value_e.BashAssoc):
866	val = cast(value.BashAssoc, UP_val)
867	assert val.d is not None # for MyPy, so it's not Optional[]
868
869	# BUG: Keys aren't ordered according to insertion!
870	keys = bash_impl.BashAssoc_GetKeys(val)
871	return value.BashArray(keys)
872
873	else:
874	raise error.TypeErr(val, 'Keys op expected Str', token)
875
876	def _EvalVarRef(self, val, blame_tok, quoted, vsub_state, vtest_place):
877	# type: (value_t, Token, bool, VarSubState, VTestPlace) -> value_t
878	"""Handles indirect expansion like ${!var} and ${!a[0]}.
879
880	Args:
881	blame_tok: 'foo' for ${!foo}
882	"""
883	UP_val = val
884	with tagswitch(val) as case:
885	if case(value_e.Undef):
886	# bash-4.4 returned value.Undef here. bash-5.0 started to treat
887	# the variable name to be empty so that the indirection fails.
888	var_ref_str = ''
889
890	elif case(value_e.Str):
891	val = cast(value.Str, UP_val)
892	var_ref_str = val.s
893
894	elif case(value_e.BashArray): # caught earlier but OK
895	val = cast(value.BashArray, UP_val)
896	var_ref_str = ' '.join(bash_impl.BashArray_GetValues(val))
897
898	elif case(value_e.BashAssoc): # caught earlier but OK
899	val = cast(value.BashAssoc, UP_val)
900	var_ref_str = ' '.join(bash_impl.BashAssoc_GetValues(val))
901
902	else:
903	raise error.TypeErr(val, 'Var Ref op expected Str', blame_tok)
904
905	bvs_part = self.unsafe_arith.ParseVarRef(var_ref_str, blame_tok)
906	return self._VarRefValue(bvs_part, quoted, vsub_state, vtest_place)
907
908	def _ApplyUnarySuffixOp(self, val, op):
909	# type: (value_t, suffix_op.Unary) -> value_t
910	assert val.tag() != value_e.Undef
911
912	op_kind = consts.GetKind(op.op.id)
913
914	if op_kind == Kind.VOp1:
915	# NOTE: glob syntax is supported in ^ ^^ , ,, ! As well as % %% # ##.
916	# Detect has_extglob so that DoUnarySuffixOp doesn't use the fast
917	# shortcut for constant strings.
918	arg_val, has_extglob = self.EvalWordToPattern(op.arg_word)
919	assert arg_val.tag() == value_e.Str
920
921	UP_val = val
922	with tagswitch(val) as case:
923	if case(value_e.Str):
924	val = cast(value.Str, UP_val)
925	s = string_ops.DoUnarySuffixOp(val.s, op.op, arg_val.s,
926	has_extglob)
927	#log('%r %r -> %r', val.s, arg_val.s, s)
928	new_val = value.Str(s) # type: value_t
929
930	elif case(value_e.BashArray, value_e.BashAssoc):
931	# get values
932	if val.tag() == value_e.BashArray:
933	val = cast(value.BashArray, UP_val)
934	values = bash_impl.BashArray_GetValues(val)
935	elif val.tag() == value_e.BashAssoc:
936	val = cast(value.BashAssoc, UP_val)
937	values = bash_impl.BashAssoc_GetValues(val)
938	else:
939	raise AssertionError()
940
941	# ${a[@]#prefix} is VECTORIZED on arrays. YSH should have this too.
942	strs = [
943	string_ops.DoUnarySuffixOp(s, op.op, arg_val.s,
944	has_extglob) for s in values
945	]
946	new_val = value.BashArray(strs)
947
948	else:
949	raise error.TypeErr(
950	val, 'Unary op expected Str, BashArray, BashAssoc',
951	op.op)
952
953	else:
954	raise AssertionError(Kind_str(op_kind))
955
956	return new_val
957
958	def _PatSub(self, val, op):
959	# type: (value_t, suffix_op.PatSub) -> value_t
960
961	pat_val, has_extglob = self.EvalWordToPattern(op.pat)
962	# Extended globs aren't supported because we only translate * ? etc. to
963	# ERE. I don't think there's a straightforward translation from !(*.py) to
964	# ERE! You would need an engine that supports negation? (Derivatives?)
965	if has_extglob:
966	e_die('extended globs not supported in ${x//GLOB/}', op.pat)
967
968	if op.replace:
969	replace_val = self.EvalRhsWord(op.replace)
970	# Can't have an array, so must be a string
971	assert replace_val.tag() == value_e.Str, replace_val
972	replace_str = cast(value.Str, replace_val).s
973	else:
974	replace_str = ''
975
976	# note: doesn't support self.exec_opts.extglob()!
977	regex, warnings = glob_.GlobToERE(pat_val.s)
978	if len(warnings):
979	# TODO:
980	# - Add 'shopt -s strict_glob' mode and expose warnings.
981	# "Glob is not in CANONICAL FORM".
982	# - Propagate location info back to the 'op.pat' word.
983	pass
984	#log('regex %r', regex)
985	replacer = string_ops.GlobReplacer(regex, replace_str, op.slash_tok)
986
987	with tagswitch(val) as case2:
988	if case2(value_e.Str):
989	str_val = cast(value.Str, val)
990	s = replacer.Replace(str_val.s, op)
991	val = value.Str(s)
992
993	elif case2(value_e.BashArray, value_e.BashAssoc):
994	if val.tag() == value_e.BashArray:
995	array_val = cast(value.BashArray, val)
996	values = bash_impl.BashArray_GetValues(array_val)
997	elif val.tag() == value_e.BashAssoc:
998	assoc_val = cast(value.BashAssoc, val)
999	values = bash_impl.BashAssoc_GetValues(assoc_val)
1000	else:
1001	raise AssertionError()
1002	strs = [replacer.Replace(s, op) for s in values]
1003	val = value.BashArray(strs)
1004
1005	else:
1006	raise error.TypeErr(
1007	val, 'Pat Sub op expected Str, BashArray, BashAssoc',
1008	op.slash_tok)
1009
1010	return val
1011
1012	def _Slice(self, val, op, var_name, part):
1013	# type: (value_t, suffix_op.Slice, Optional[str], BracedVarSub) -> value_t
1014
1015	begin = self.arith_ev.EvalToBigInt(op.begin)
1016
1017	# Note: bash allows lengths to be negative (with odd semantics), but
1018	# we don't allow that right now.
1019	has_length = False
1020	length = -1
1021	if op.length:
1022	has_length = True
1023	length = self.arith_ev.EvalToInt(op.length)
1024
1025	try:
1026	arg0_val = None # type: value.Str
1027	if var_name is None: # $* or $@
1028	arg0_val = self.mem.GetArg0()
1029	val = _PerformSlice(val, begin, length, has_length, part, arg0_val)
1030	except error.Strict as e:
1031	if self.exec_opts.strict_word_eval():
1032	raise
1033	else:
1034	self.errfmt.PrettyPrintError(e, prefix='warning: ')
1035	with tagswitch(val) as case2:
1036	if case2(value_e.Str):
1037	val = value.Str('')
1038	elif case2(value_e.BashArray):
1039	val = value.BashArray([])
1040	else:
1041	raise NotImplementedError()
1042	return val
1043
1044	def _Nullary(self, val, op, var_name, vsub_token, vsub_state):
1045	# type: (value_t, Token, Optional[str], Token, VarSubState) -> Tuple[value.Str, bool]
1046
1047	quoted2 = False
1048	op_id = op.id
1049	if op_id == Id.VOp0_P:
1050	val = self._ProcessUndef(val, vsub_token, vsub_state)
1051	UP_val = val
1052	with tagswitch(val) as case:
1053	if case(value_e.Undef):
1054	result = value.Str('')
1055	elif case(value_e.Str):
1056	str_val = cast(value.Str, UP_val)
1057	prompt = self.prompt_ev.EvalPrompt(str_val)
1058	# readline gets rid of these, so we should too.
1059	p = prompt.replace('\x01', '').replace('\x02', '')
1060	result = value.Str(p)
1061	else:
1062	e_die("Can't use @P on %s" % ui.ValType(val), op)
1063
1064	elif op_id == Id.VOp0_Q:
1065	UP_val = val
1066	with tagswitch(val) as case:
1067	if case(value_e.Undef):
1068	# We need to issue an error when "-o nounset" is enabled.
1069	# Although we do not need to check val for value_e.Undef,
1070	# we call _ProcessUndef for consistency in the error
1071	# message.
1072	self._ProcessUndef(val, vsub_token, vsub_state)
1073
1074	# For unset variables, we do not generate any quoted words.
1075	result = value.Str('')
1076
1077	elif case(value_e.Str):
1078	str_val = cast(value.Str, UP_val)
1079	result = value.Str(j8_lite.MaybeShellEncode(str_val.s))
1080	# oddly, 'echo ${x@Q}' is equivalent to 'echo "${x@Q}"' in
1081	# bash
1082	quoted2 = True
1083	elif case(value_e.BashArray, value_e.BashAssoc):
1084	if val.tag() == value_e.BashArray:
1085	val = cast(value.BashArray, UP_val)
1086	values = [s for s in bash_impl.BashArray_GetValues(val) if s is not None]
1087	elif val.tag() == value_e.BashAssoc:
1088	val = cast(value.BashAssoc, UP_val)
1089	values = bash_impl.BashAssoc_GetValues(val)
1090	else:
1091	raise AssertionError()
1092
1093	tmp = [
1094	# TODO: should use fastfunc.ShellEncode
1095	j8_lite.MaybeShellEncode(s) for s in values
1096	]
1097	result = value.Str(' '.join(tmp))
1098	else:
1099	e_die("Can't use @Q on %s" % ui.ValType(val), op)
1100
1101	elif op_id == Id.VOp0_a:
1102	val = self._ProcessUndef(val, vsub_token, vsub_state)
1103	UP_val = val
1104	# We're ONLY simluating -a and -A, not -r -x -n for now. See
1105	# spec/ble-idioms.test.sh.
1106	chars = [] # type: List[str]
1107	with tagswitch(val) as case:
1108	if case(value_e.BashArray):
1109	chars.append('a')
1110	elif case(value_e.BashAssoc):
1111	chars.append('A')
1112
1113	if var_name is not None: # e.g. ${?@a} is allowed
1114	cell = self.mem.GetCell(var_name)
1115	if cell:
1116	if cell.readonly:
1117	chars.append('r')
1118	if cell.exported:
1119	chars.append('x')
1120	if cell.nameref:
1121	chars.append('n')
1122
1123	result = value.Str(''.join(chars))
1124
1125	else:
1126	e_die('Var op %r not implemented' % lexer.TokenVal(op), op)
1127
1128	return result, quoted2
1129
1130	def _WholeArray(self, val, part, quoted, vsub_state):
1131	# type: (value_t, BracedVarSub, bool, VarSubState) -> value_t
1132	op_id = cast(bracket_op.WholeArray, part.bracket_op).op_id
1133
1134	if op_id == Id.Lit_At:
1135	op_str = '@'
1136	vsub_state.join_array = not quoted # ${a[@]} decays but "${a[@]}" doesn't
1137	elif op_id == Id.Arith_Star:
1138	op_str = '*'
1139	vsub_state.join_array = True # both ${a[]} and "${a[]}" decay
1140	else:
1141	raise AssertionError(op_id) # unknown
1142
1143	with tagswitch(val) as case2:
1144	if case2(value_e.Undef):
1145	# For an undefined array, we save the token of the array
1146	# reference for the later error message.
1147	vsub_state.array_ref = part.name_tok
1148	elif case2(value_e.Str):
1149	if self.exec_opts.strict_array():
1150	e_die("Can't index string with %s" % op_str,
1151	loc.WordPart(part))
1152	elif case2(value_e.BashArray, value_e.SparseArray,
1153	value_e.BashAssoc):
1154	pass # no-op
1155	else:
1156	# The other YSH types such as List, Dict, and Float are not
1157	# supported. Error messages will be printed later, so we here
1158	# return the unsupported objects without modification.
1159	pass # no-op
1160
1161	return val
1162
1163	def _ArrayIndex(self, val, part, vtest_place):
1164	# type: (value_t, BracedVarSub, VTestPlace) -> value_t
1165	"""Process a numeric array index like ${a[i+1]}"""
1166	anode = cast(bracket_op.ArrayIndex, part.bracket_op).expr
1167
1168	UP_val = val
1169	with tagswitch(val) as case2:
1170	if case2(value_e.Undef):
1171	pass # it will be checked later
1172
1173	elif case2(value_e.Str):
1174	# Bash treats any string as an array, so we can't add our own
1175	# behavior here without making valid OSH invalid bash.
1176	e_die("Can't index string %r with integer" % part.var_name,
1177	part.name_tok)
1178
1179	elif case2(value_e.BashArray):
1180	array_val = cast(value.BashArray, UP_val)
1181	index = self.arith_ev.EvalToInt(anode)
1182	vtest_place.index = a_index.Int(index)
1183
1184	s, error_code = bash_impl.BashArray_GetElement(
1185	array_val, index)
1186	if error_code == error_code_e.IndexOutOfRange:
1187	# Note: Bash outputs warning but does not make it a real
1188	# error. We follow the Bash behavior here.
1189	self.errfmt.Print_(
1190	"Index %d out of bounds for array of length %d" %
1191	(index, bash_impl.BashArray_Length(array_val)),
1192	blame_loc=part.name_tok)
1193
1194	if s is None:
1195	val = value.Undef
1196	else:
1197	val = value.Str(s)
1198
1199	elif case2(value_e.SparseArray):
1200	sparse_val = cast(value.SparseArray, UP_val)
1201	big_index = self.arith_ev.EvalToBigInt(anode)
1202	vtest_place.index = a_index.Int(mops.BigTruncate(big_index))
1203
1204	s, error_code = bash_impl.SparseArray_GetElement(
1205	sparse_val, big_index)
1206	if error_code == error_code_e.IndexOutOfRange:
1207	# Note: Bash outputs warning but does not make it a real
1208	# error. We follow the Bash behavior here.
1209	big_length = bash_impl.SparseArray_Length(sparse_val)
1210	self.errfmt.Print_(
1211	"Index %s out of bounds for array of length %s" %
1212	(mops.ToStr(big_index), mops.ToStr(big_length)),
1213	blame_loc=part.name_tok)
1214
1215	if s is None:
1216	val = value.Undef
1217	else:
1218	val = value.Str(s)
1219
1220	elif case2(value_e.BashAssoc):
1221	assoc_val = cast(value.BashAssoc, UP_val)
1222	# Location could also be attached to bracket_op? But
1223	# arith_expr.VarSub works OK too
1224	key = self.arith_ev.EvalWordToString(
1225	anode, blame_loc=location.TokenForArith(anode))
1226
1227	vtest_place.index = a_index.Str(key) # out param
1228	s = bash_impl.BashAssoc_GetElement(assoc_val, key)
1229
1230	if s is None:
1231	val = value.Undef
1232	else:
1233	val = value.Str(s)
1234
1235	else:
1236	raise error.TypeErr(val,
1237	'Index op expected BashArray, BashAssoc',
1238	loc.WordPart(part))
1239
1240	return val
1241
1242	def _EvalDoubleQuoted(self, parts, part_vals):
1243	# type: (List[word_part_t], List[part_value_t]) -> None
1244	"""Evaluate parts of a DoubleQuoted part.
1245
1246	Args:
1247	part_vals: output param to append to.
1248	"""
1249	# Example of returning array:
1250	# $ a=(1 2); b=(3); $ c=(4 5)
1251	# $ argv "${a[@]}${b[@]}${c[@]}"
1252	# ['1', '234', '5']
1253	#
1254	# Example of multiple parts
1255	# $ argv "${a[@]}${undef[@]:-${c[@]}}"
1256	# ['1', '24', '5']
1257
1258	# Special case for "". The parser outputs (DoubleQuoted []), instead
1259	# of (DoubleQuoted [Literal '']). This is better but it means we
1260	# have to check for it.
1261	if len(parts) == 0:
1262	v = Piece('', True, False)
1263	part_vals.append(v)
1264	return
1265
1266	for p in parts:
1267	self._EvalWordPart(p, part_vals, QUOTED)
1268
1269	def EvalDoubleQuotedToString(self, dq_part):
1270	# type: (DoubleQuoted) -> str
1271	"""For double quoted strings in YSH expressions.
1272
1273	Example: var x = "$foo-${foo}"
1274	"""
1275	part_vals = [] # type: List[part_value_t]
1276	self._EvalDoubleQuoted(dq_part.parts, part_vals)
1277	return self._ConcatPartVals(part_vals, dq_part.left)
1278
1279	def _DecayArray(self, val):
1280	# type: (value.BashArray) -> value.Str
1281	"""Decay $* to a string."""
1282	assert val.tag() == value_e.BashArray, val
1283	sep = self.splitter.GetJoinChar()
1284	tmp = [s for s in bash_impl.BashArray_GetValues(val) if s is not None]
1285	return value.Str(sep.join(tmp))
1286
1287	def _ProcessUndef(self, val, name_tok, vsub_state):
1288	# type: (value_t, Token, VarSubState) -> value_t
1289	assert name_tok is not None
1290
1291	if val.tag() != value_e.Undef:
1292	return val
1293
1294	if vsub_state.array_ref is not None:
1295	array_tok = vsub_state.array_ref
1296	if self.exec_opts.nounset():
1297	e_die('Undefined array %r' % lexer.TokenVal(array_tok),
1298	array_tok)
1299	else:
1300	return value.BashArray([])
1301	else:
1302	if self.exec_opts.nounset():
1303	tok_str = lexer.TokenVal(name_tok)
1304	name = tok_str[1:] if tok_str.startswith('$') else tok_str
1305	e_die('Undefined variable %r' % name, name_tok)
1306	else:
1307	return value.Str('')
1308
1309	def _EvalBracketOp(self, val, part, quoted, vsub_state, vtest_place):
1310	# type: (value_t, BracedVarSub, bool, VarSubState, VTestPlace) -> value_t
1311
1312	if part.bracket_op:
1313	with tagswitch(part.bracket_op) as case:
1314	if case(bracket_op_e.WholeArray):
1315	val = self._WholeArray(val, part, quoted, vsub_state)
1316
1317	elif case(bracket_op_e.ArrayIndex):
1318	val = self._ArrayIndex(val, part, vtest_place)
1319
1320	else:
1321	raise AssertionError(part.bracket_op.tag())
1322
1323	else: # no bracket op
1324	var_name = vtest_place.name
1325	if (var_name is not None and
1326	val.tag() in (value_e.BashArray, value_e.BashAssoc) and
1327	not vsub_state.is_type_query):
1328	if ShouldArrayDecay(var_name, self.exec_opts,
1329	not (part.prefix_op or part.suffix_op)):
1330	# for ${BASH_SOURCE}, etc.
1331	val = DecayArray(val)
1332	else:
1333	e_die(
1334	"Array %r can't be referred to as a scalar (without @ or *)"
1335	% var_name, loc.WordPart(part))
1336
1337	return val
1338
1339	def _VarRefValue(self, part, quoted, vsub_state, vtest_place):
1340	# type: (BracedVarSub, bool, VarSubState, VTestPlace) -> value_t
1341	"""Duplicates some logic from _EvalBracedVarSub, but returns a
1342	value_t."""
1343
1344	# 1. Evaluate from (var_name, var_num, token Id) -> value
1345	if part.name_tok.id == Id.VSub_Name:
1346	vtest_place.name = part.var_name
1347	val = self.mem.GetValue(part.var_name)
1348
1349	elif part.name_tok.id == Id.VSub_Number:
1350	var_num = int(part.var_name)
1351	val = self._EvalVarNum(var_num)
1352
1353	else:
1354	# $* decays
1355	val = self._EvalSpecialVar(part.name_tok.id, quoted, vsub_state)
1356
1357	# We don't need var_index because it's only for L-Values of test ops?
1358	if self.exec_opts.eval_unsafe_arith():
1359	val = self._EvalBracketOp(val, part, quoted, vsub_state,
1360	vtest_place)
1361	else:
1362	with state.ctx_Option(self.mutable_opts,
1363	[option_i._allow_command_sub], False):
1364	val = self._EvalBracketOp(val, part, quoted, vsub_state,
1365	vtest_place)
1366
1367	return val
1368
1369	def _EvalBracedVarSub(self, part, part_vals, quoted):
1370	# type: (BracedVarSub, List[part_value_t], bool) -> None
1371	"""
1372	Args:
1373	part_vals: output param to append to.
1374	"""
1375	# We have different operators that interact in a non-obvious order.
1376	#
1377	# 1. bracket_op: value -> value, with side effect on vsub_state
1378	#
1379	# 2. prefix_op
1380	# a. length ${#x}: value -> value
1381	# b. var ref ${!ref}: can expand to an array
1382	#
1383	# 3. suffix_op:
1384	# a. no operator: you have a value
1385	# b. Test: value -> part_value[]
1386	# c. Other Suffix: value -> value
1387	#
1388	# 4. Process vsub_state.join_array here before returning.
1389	#
1390	# These cases are hard to distinguish:
1391	# - ${!prefix@} prefix query
1392	# - ${!array[@]} keys
1393	# - ${!ref} named reference
1394	# - ${!ref[0]} named reference
1395	#
1396	# I think we need several stages:
1397	#
1398	# 1. value: name, number, special, prefix query
1399	# 2. bracket_op
1400	# 3. prefix length -- this is TERMINAL
1401	# 4. indirection? Only for some of the ! cases
1402	# 5. string transformation suffix ops like ##
1403	# 6. test op
1404	# 7. vsub_state.join_array
1405
1406	# vsub_state.join_array is for joining "${a[*]}" and unquoted ${a[@]} AFTER
1407	# suffix ops are applied. If we take the length with a prefix op, the
1408	# distinction is ignored.
1409
1410	var_name = None # type: Optional[str] # used throughout the function
1411	vtest_place = VTestPlace(var_name, None) # For ${foo=default}
1412	vsub_state = VarSubState.CreateNull() # for $, ${a[]}, etc.
1413
1414	# 1. Evaluate from (var_name, var_num, token Id) -> value
1415	if part.name_tok.id == Id.VSub_Name:
1416	# Handle ${!prefix@} first, since that looks at names and not values
1417	# Do NOT handle ${!A[@]@a} here!
1418	if (part.prefix_op is not None and part.bracket_op is None and
1419	part.suffix_op is not None and
1420	part.suffix_op.tag() == suffix_op_e.Nullary):
1421	nullary_op = cast(Token, part.suffix_op)
1422	# ${!x@} but not ${!x@P}
1423	if consts.GetKind(nullary_op.id) == Kind.VOp3:
1424	names = self.mem.VarNamesStartingWith(part.var_name)
1425	names.sort()
1426
1427	if quoted and nullary_op.id == Id.VOp3_At:
1428	part_vals.append(part_value.Array(names))
1429	else:
1430	sep = self.splitter.GetJoinChar()
1431	part_vals.append(Piece(sep.join(names), quoted, True))
1432	return # EARLY RETURN
1433
1434	var_name = part.var_name
1435	vtest_place.name = var_name # for _ApplyTestOp
1436
1437	val = self.mem.GetValue(var_name)
1438
1439	elif part.name_tok.id == Id.VSub_Number:
1440	var_num = int(part.var_name)
1441	val = self._EvalVarNum(var_num)
1442	else:
1443	# $* decays
1444	val = self._EvalSpecialVar(part.name_tok.id, quoted, vsub_state)
1445
1446	suffix_op_ = part.suffix_op
1447	if suffix_op_:
1448	UP_op = suffix_op_
1449	with tagswitch(suffix_op_) as case:
1450	if case(suffix_op_e.Nullary):
1451	suffix_op_ = cast(Token, UP_op)
1452
1453	# Type query ${array@a} is a STRING, not an array
1454	# NOTE: ${array@Q} is ${array[0]@Q} in bash, which is different than
1455	# ${array[@]@Q}
1456	if suffix_op_.id == Id.VOp0_a:
1457	vsub_state.is_type_query = True
1458
1459	# 2. Bracket Op
1460	val = self._EvalBracketOp(val, part, quoted, vsub_state, vtest_place)
1461
1462	if part.prefix_op:
1463	if part.prefix_op.id == Id.VSub_Pound: # ${#var} for length
1464	# undef -> '' BEFORE length
1465	val = self._ProcessUndef(val, part.name_tok, vsub_state)
1466
1467	n = self._Count(val, part.name_tok)
1468	part_vals.append(Piece(str(n), quoted, False))
1469	return # EARLY EXIT: nothing else can come after length
1470
1471	elif part.prefix_op.id == Id.VSub_Bang:
1472	if (part.bracket_op and
1473	part.bracket_op.tag() == bracket_op_e.WholeArray and
1474	not suffix_op_):
1475	# undef -> empty array
1476	val = self._ProcessUndef(val, part.name_tok, vsub_state)
1477
1478	# ${!array[@]} to get indices/keys
1479	val = self._Keys(val, part.name_tok)
1480	# already set vsub_State.join_array ABOVE
1481	else:
1482	# Process ${!ref}. SURPRISE: ${!a[0]} is an indirect expansion unlike
1483	# ${!a[@]} !
1484	# ${!ref} can expand into an array if ref='array[@]'
1485
1486	# Clear it now that we have a var ref
1487	vtest_place.name = None
1488	vtest_place.index = None
1489
1490	val = self._EvalVarRef(val, part.name_tok, quoted,
1491	vsub_state, vtest_place)
1492
1493	else:
1494	raise AssertionError(part.prefix_op)
1495
1496	quoted2 = False # another bit for @Q
1497	if suffix_op_:
1498	op = suffix_op_ # could get rid of this alias
1499
1500	with tagswitch(suffix_op_) as case:
1501	if case(suffix_op_e.Nullary):
1502	op = cast(Token, UP_op)
1503	val, quoted2 = self._Nullary(val, op, var_name,
1504	part.name_tok, vsub_state)
1505
1506	elif case(suffix_op_e.Unary):
1507	op = cast(suffix_op.Unary, UP_op)
1508	if consts.GetKind(op.op.id) == Kind.VTest:
1509	# Note: _ProcessUndef (i.e., the conversion of undef ->
1510	# '') is not applied to the VTest operators such as
1511	# ${a:-def}, ${a+set}, etc.
1512	if self._ApplyTestOp(val, op, quoted, part_vals,
1513	vtest_place, part.name_tok):
1514	# e.g. to evaluate ${undef:-'default'}, we already appended
1515	# what we need
1516	return
1517
1518	else:
1519	# Other suffix: value -> value
1520	val = self._ProcessUndef(val, part.name_tok,
1521	vsub_state)
1522	val = self._ApplyUnarySuffixOp(val, op)
1523
1524	elif case(suffix_op_e.PatSub): # PatSub, vectorized
1525	op = cast(suffix_op.PatSub, UP_op)
1526	val = self._ProcessUndef(val, part.name_tok, vsub_state)
1527	val = self._PatSub(val, op)
1528
1529	elif case(suffix_op_e.Slice):
1530	op = cast(suffix_op.Slice, UP_op)
1531	val = self._ProcessUndef(val, part.name_tok, vsub_state)
1532	val = self._Slice(val, op, var_name, part)
1533
1534	elif case(suffix_op_e.Static):
1535	op = cast(suffix_op.Static, UP_op)
1536	e_die('Not implemented', op.tok)
1537
1538	else:
1539	raise AssertionError()
1540	else:
1541	val = self._ProcessUndef(val, part.name_tok, vsub_state)
1542
1543	# After applying suffixes, process join_array here.
1544	UP_val = val
1545	if val.tag() == value_e.BashArray:
1546	array_val = cast(value.BashArray, UP_val)
1547	if vsub_state.join_array:
1548	val = self._DecayArray(array_val)
1549	else:
1550	val = array_val
1551
1552	# For example, ${a} evaluates to value.Str(), but we want a
1553	# Piece().
1554	part_val = _ValueToPartValue(val, quoted or quoted2, part)
1555	part_vals.append(part_val)
1556
1557	def _ConcatPartVals(self, part_vals, location):
1558	# type: (List[part_value_t], loc_t) -> str
1559
1560	strs = [] # type: List[str]
1561	for part_val in part_vals:
1562	UP_part_val = part_val
1563	with tagswitch(part_val) as case:
1564	if case(part_value_e.String):
1565	part_val = cast(Piece, UP_part_val)
1566	s = part_val.s
1567
1568	elif case(part_value_e.Array):
1569	part_val = cast(part_value.Array, UP_part_val)
1570	if self.exec_opts.strict_array():
1571	# Examples: echo f > "$@"; local foo="$@"
1572	e_die("Illegal array word part (strict_array)",
1573	location)
1574	else:
1575	# It appears to not respect IFS
1576	# TODO: eliminate double join()?
1577	tmp = [s for s in part_val.strs if s is not None]
1578	s = ' '.join(tmp)
1579
1580	else:
1581	raise AssertionError()
1582
1583	strs.append(s)
1584
1585	return ''.join(strs)
1586
1587	def EvalBracedVarSubToString(self, part):
1588	# type: (BracedVarSub) -> str
1589	"""For double quoted strings in YSH expressions.
1590
1591	Example: var x = "$foo-${foo}"
1592	"""
1593	part_vals = [] # type: List[part_value_t]
1594	self._EvalBracedVarSub(part, part_vals, False)
1595	# blame ${ location
1596	return self._ConcatPartVals(part_vals, part.left)
1597
1598	def _EvalSimpleVarSub(self, part, part_vals, quoted):
1599	# type: (SimpleVarSub, List[part_value_t], bool) -> None
1600
1601	token = part.tok
1602
1603	vsub_state = VarSubState.CreateNull()
1604
1605	# 1. Evaluate from (var_name, var_num, Token) -> defined, value
1606	if token.id == Id.VSub_DollarName:
1607	var_name = lexer.LazyStr(token)
1608	# TODO: Special case for LINENO
1609	val = self.mem.GetValue(var_name)
1610	if val.tag() in (value_e.BashArray, value_e.BashAssoc):
1611	if ShouldArrayDecay(var_name, self.exec_opts):
1612	# for $BASH_SOURCE, etc.
1613	val = DecayArray(val)
1614	else:
1615	e_die(
1616	"Array %r can't be referred to as a scalar (without @ or *)"
1617	% var_name, token)
1618
1619	elif token.id == Id.VSub_Number:
1620	var_num = int(lexer.LazyStr(token))
1621	val = self._EvalVarNum(var_num)
1622
1623	else:
1624	val = self._EvalSpecialVar(token.id, quoted, vsub_state)
1625
1626	#log('SIMPLE %s', part)
1627	val = self._ProcessUndef(val, token, vsub_state)
1628	UP_val = val
1629	if val.tag() == value_e.BashArray:
1630	array_val = cast(value.BashArray, UP_val)
1631	if vsub_state.join_array:
1632	val = self._DecayArray(array_val)
1633	else:
1634	val = array_val
1635
1636	v = _ValueToPartValue(val, quoted, part)
1637	part_vals.append(v)
1638
1639	def EvalSimpleVarSubToString(self, node):
1640	# type: (SimpleVarSub) -> str
1641	"""For double quoted strings in YSH expressions.
1642
1643	Example: var x = "$foo-${foo}"
1644	"""
1645	part_vals = [] # type: List[part_value_t]
1646	self._EvalSimpleVarSub(node, part_vals, False)
1647	return self._ConcatPartVals(part_vals, node.tok)
1648
1649	def _EvalExtGlob(self, part, part_vals):
1650	# type: (word_part.ExtGlob, List[part_value_t]) -> None
1651	"""Evaluate @($x\|'foo'\|$(hostname)) and flatten it."""
1652	op = part.op
1653	if op.id == Id.ExtGlob_Comma:
1654	op_str = '@('
1655	else:
1656	op_str = lexer.LazyStr(op)
1657	# Do NOT split these.
1658	part_vals.append(Piece(op_str, False, False))
1659
1660	for i, w in enumerate(part.arms):
1661	if i != 0:
1662	part_vals.append(Piece('\|', False, False)) # separator
1663	# FLATTEN the tree of extglob "arms".
1664	self._EvalWordToParts(w, part_vals, EXTGLOB_NESTED)
1665	part_vals.append(Piece(')', False, False)) # closing )
1666
1667	def _TranslateExtGlob(self, part_vals, w, glob_parts, fnmatch_parts):
1668	# type: (List[part_value_t], CompoundWord, List[str], List[str]) -> None
1669	"""Translate a flattened WORD with an ExtGlob part to string patterns.
1670
1671	We need both glob and fnmatch patterns. _EvalExtGlob does the
1672	flattening.
1673	"""
1674	for i, part_val in enumerate(part_vals):
1675	UP_part_val = part_val
1676	with tagswitch(part_val) as case:
1677	if case(part_value_e.String):
1678	part_val = cast(Piece, UP_part_val)
1679	if part_val.quoted and not self.exec_opts.noglob():
1680	s = glob_.GlobEscape(part_val.s)
1681	else:
1682	# e.g. the @( and \| in @(foo\|bar) aren't quoted
1683	s = part_val.s
1684	glob_parts.append(s)
1685	fnmatch_parts.append(s) # from _EvalExtGlob()
1686
1687	elif case(part_value_e.Array):
1688	# Disallow array
1689	e_die(
1690	"Extended globs and arrays can't appear in the same word",
1691	w)
1692
1693	elif case(part_value_e.ExtGlob):
1694	part_val = cast(part_value.ExtGlob, UP_part_val)
1695	# keep appending fnmatch_parts, but repplace glob_parts with '*'
1696	self._TranslateExtGlob(part_val.part_vals, w, [],
1697	fnmatch_parts)
1698	glob_parts.append('*')
1699
1700	else:
1701	raise AssertionError()
1702
1703	def _EvalWordPart(self, part, part_vals, flags):
1704	# type: (word_part_t, List[part_value_t], int) -> None
1705	"""Evaluate a word part, appending to part_vals
1706
1707	Called by _EvalWordToParts, EvalWordToString, and _EvalDoubleQuoted.
1708	"""
1709	quoted = bool(flags & QUOTED)
1710	is_subst = bool(flags & IS_SUBST)
1711
1712	UP_part = part
1713	with tagswitch(part) as case:
1714	if case(word_part_e.ShArrayLiteral):
1715	part = cast(ShArrayLiteral, UP_part)
1716	e_die("Unexpected array literal", loc.WordPart(part))
1717	elif case(word_part_e.BashAssocLiteral):
1718	part = cast(word_part.BashAssocLiteral, UP_part)
1719	e_die("Unexpected associative array literal",
1720	loc.WordPart(part))
1721
1722	elif case(word_part_e.Literal):
1723	part = cast(Token, UP_part)
1724	# Split if it's in a substitution.
1725	# That is: echo is not split, but ${foo:-echo} is split
1726	v = Piece(lexer.LazyStr(part), quoted, is_subst)
1727	part_vals.append(v)
1728
1729	elif case(word_part_e.EscapedLiteral):
1730	part = cast(word_part.EscapedLiteral, UP_part)
1731	v = Piece(part.ch, True, False)
1732	part_vals.append(v)
1733
1734	elif case(word_part_e.SingleQuoted):
1735	part = cast(SingleQuoted, UP_part)
1736	v = Piece(part.sval, True, False)
1737	part_vals.append(v)
1738
1739	elif case(word_part_e.DoubleQuoted):
1740	part = cast(DoubleQuoted, UP_part)
1741	self._EvalDoubleQuoted(part.parts, part_vals)
1742
1743	elif case(word_part_e.CommandSub):
1744	part = cast(CommandSub, UP_part)
1745	id_ = part.left_token.id
1746	if id_ in (Id.Left_DollarParen, Id.Left_AtParen,
1747	Id.Left_Backtick):
1748	sv = self._EvalCommandSub(part,
1749	quoted) # type: part_value_t
1750
1751	elif id_ in (Id.Left_ProcSubIn, Id.Left_ProcSubOut):
1752	sv = self._EvalProcessSub(part)
1753
1754	else:
1755	raise AssertionError(id_)
1756
1757	part_vals.append(sv)
1758
1759	elif case(word_part_e.SimpleVarSub):
1760	part = cast(SimpleVarSub, UP_part)
1761	self._EvalSimpleVarSub(part, part_vals, quoted)
1762
1763	elif case(word_part_e.BracedVarSub):
1764	part = cast(BracedVarSub, UP_part)
1765	self._EvalBracedVarSub(part, part_vals, quoted)
1766
1767	elif case(word_part_e.TildeSub):
1768	part = cast(word_part.TildeSub, UP_part)
1769	# We never parse a quoted string into a TildeSub.
1770	assert not quoted
1771	s = self.tilde_ev.Eval(part)
1772	v = Piece(s, True, False) # NOT split even when unquoted!
1773	part_vals.append(v)
1774
1775	elif case(word_part_e.ArithSub):
1776	part = cast(word_part.ArithSub, UP_part)
1777	num = self.arith_ev.EvalToBigInt(part.anode)
1778	v = Piece(mops.ToStr(num), quoted, not quoted)
1779	part_vals.append(v)
1780
1781	elif case(word_part_e.ExtGlob):
1782	part = cast(word_part.ExtGlob, UP_part)
1783	#if not self.exec_opts.extglob():
1784	# die() # disallow at runtime? Don't just decay
1785
1786	# Create a node to hold the flattened tree. The caller decides whether
1787	# to pass it to fnmatch() or replace it with '*' and pass it to glob().
1788	part_vals2 = [] # type: List[part_value_t]
1789	self._EvalExtGlob(part, part_vals2) # flattens tree
1790	part_vals.append(part_value.ExtGlob(part_vals2))
1791
1792	elif case(word_part_e.BashRegexGroup):
1793	part = cast(word_part.BashRegexGroup, UP_part)
1794
1795	part_vals.append(Piece('(', False, False)) # not quoted
1796	if part.child:
1797	self._EvalWordToParts(part.child, part_vals, 0)
1798	part_vals.append(Piece(')', False, False))
1799
1800	elif case(word_part_e.Splice):
1801	part = cast(word_part.Splice, UP_part)
1802	val = self.mem.GetValue(part.var_name)
1803
1804	strs = self.expr_ev.SpliceValue(val, part)
1805	part_vals.append(part_value.Array(strs))
1806
1807	elif case(word_part_e.ExprSub):
1808	part = cast(word_part.ExprSub, UP_part)
1809	part_val = self.expr_ev.EvalExprSub(part)
1810	part_vals.append(part_val)
1811
1812	elif case(word_part_e.ZshVarSub):
1813	part = cast(word_part.ZshVarSub, UP_part)
1814	e_die("ZSH var subs are parsed, but can't be evaluated",
1815	part.left)
1816
1817	else:
1818	raise AssertionError(part.tag())
1819
1820	def _EvalRhsWordToParts(self, w, part_vals, eval_flags=0):
1821	# type: (rhs_word_t, List[part_value_t], int) -> None
1822	quoted = bool(eval_flags & QUOTED)
1823
1824	UP_w = w
1825	with tagswitch(w) as case:
1826	if case(rhs_word_e.Empty):
1827	part_vals.append(Piece('', quoted, not quoted))
1828
1829	elif case(rhs_word_e.Compound):
1830	w = cast(CompoundWord, UP_w)
1831	self._EvalWordToParts(w, part_vals, eval_flags=eval_flags)
1832
1833	else:
1834	raise AssertionError()
1835
1836	def _EvalWordToParts(self, w, part_vals, eval_flags=0):
1837	# type: (CompoundWord, List[part_value_t], int) -> None
1838	"""Helper for EvalRhsWord, EvalWordSequence, etc.
1839
1840	Returns:
1841	Appends to part_vals. Note that this is a TREE.
1842	"""
1843	# Does the word have an extended glob? This is a special case because
1844	# of the way we use glob() and then fnmatch(..., FNM_EXTMATCH) to
1845	# implement extended globs. It's hard to carry that extra information
1846	# all the way past the word splitting stage.
1847
1848	# OSH semantic limitations: If a word has an extended glob part, then
1849	# 1. It can't have an array
1850	# 2. Word splitting of unquoted words isn't respected
1851
1852	word_part_vals = [] # type: List[part_value_t]
1853	has_extglob = False
1854	for p in w.parts:
1855	if p.tag() == word_part_e.ExtGlob:
1856	has_extglob = True
1857	self._EvalWordPart(p, word_part_vals, eval_flags)
1858
1859	# Caller REQUESTED extglob evaluation, AND we parsed word_part.ExtGlob()
1860	if has_extglob:
1861	if bool(eval_flags & EXTGLOB_FILES):
1862	# Treat the WHOLE word as a pattern. We need to TWO VARIANTS of the
1863	# word because of the way we use libc:
1864	# 1. With '*' for extglob parts
1865	# 2. With _EvalExtGlob() for extglob parts
1866
1867	glob_parts = [] # type: List[str]
1868	fnmatch_parts = [] # type: List[str]
1869	self._TranslateExtGlob(word_part_vals, w, glob_parts,
1870	fnmatch_parts)
1871
1872	#log('word_part_vals %s', word_part_vals)
1873	glob_pat = ''.join(glob_parts)
1874	fnmatch_pat = ''.join(fnmatch_parts)
1875	#log("glob %s fnmatch %s", glob_pat, fnmatch_pat)
1876
1877	results = [] # type: List[str]
1878	n = self.globber.ExpandExtended(glob_pat, fnmatch_pat, results)
1879	if n < 0:
1880	raise error.FailGlob(
1881	'Extended glob %r matched no files' % fnmatch_pat, w)
1882
1883	part_vals.append(part_value.Array(results))
1884	elif bool(eval_flags & EXTGLOB_NESTED):
1885	# We only glob at the TOP level of @(nested\|@(pattern))
1886	part_vals.extend(word_part_vals)
1887	else:
1888	# e.g. simple_word_eval, assignment builtin
1889	e_die('Extended glob not allowed in this word', w)
1890	else:
1891	part_vals.extend(word_part_vals)
1892
1893	def _PartValsToString(self, part_vals, w, eval_flags, strs):
1894	# type: (List[part_value_t], CompoundWord, int, List[str]) -> None
1895	"""Helper for EvalWordToString, similar to _ConcatPartVals() above.
1896
1897	Note: arg 'w' could just be a span ID
1898	"""
1899	for part_val in part_vals:
1900	UP_part_val = part_val
1901	with tagswitch(part_val) as case:
1902	if case(part_value_e.String):
1903	part_val = cast(Piece, UP_part_val)
1904	s = part_val.s
1905	if part_val.quoted:
1906	if eval_flags & QUOTE_FNMATCH:
1907	# [[ foo == /"".py ]] or case (.py) or ${x%.py} or ${x//*.py/}
1908	s = glob_.GlobEscape(s)
1909	elif eval_flags & QUOTE_ERE:
1910	s = glob_.ExtendedRegexEscape(s)
1911	strs.append(s)
1912
1913	elif case(part_value_e.Array):
1914	part_val = cast(part_value.Array, UP_part_val)
1915	if self.exec_opts.strict_array():
1916	# Examples: echo f > "$@"; local foo="$@"
1917
1918	# TODO: This attributes too coarsely, to the word rather than the
1919	# parts. Problem: the word is a TREE of parts, but we only have a
1920	# flat list of part_vals. The only case where we really get arrays
1921	# is "$@", "${a[@]}", "${a[@]//pat/replace}", etc.
1922	e_die(
1923	"This word should yield a string, but it contains an array",
1924	w)
1925
1926	# TODO: Maybe add detail like this.
1927	#e_die('RHS of assignment should only have strings. '
1928	# 'To assign arrays, use b=( "${a[@]}" )')
1929	else:
1930	# It appears to not respect IFS
1931	tmp = [s for s in part_val.strs if s is not None]
1932	s = ' '.join(tmp) # TODO: eliminate double join()?
1933	strs.append(s)
1934
1935	elif case(part_value_e.ExtGlob):
1936	part_val = cast(part_value.ExtGlob, UP_part_val)
1937
1938	# Extended globs are only allowed where we expect them!
1939	if not bool(eval_flags & QUOTE_FNMATCH):
1940	e_die('extended glob not allowed in this word', w)
1941
1942	# recursive call
1943	self._PartValsToString(part_val.part_vals, w, eval_flags,
1944	strs)
1945
1946	else:
1947	raise AssertionError()
1948
1949	def EvalWordToString(self, UP_w, eval_flags=0):
1950	# type: (word_t, int) -> value.Str
1951	"""Given a word, return a string.
1952
1953	Flags can contain a quoting algorithm.
1954	"""
1955	assert UP_w.tag() == word_e.Compound, UP_w
1956	w = cast(CompoundWord, UP_w)
1957
1958	if eval_flags == 0: # QUOTE_FNMATCH etc. breaks optimization
1959	fast_str = word_.FastStrEval(w)
1960	if fast_str is not None:
1961	return value.Str(fast_str)
1962
1963	# Could we additionally optimize a=$b, if we know $b isn't an array
1964	# etc.?
1965
1966	# Note: these empty lists are hot in fib benchmark
1967
1968	part_vals = [] # type: List[part_value_t]
1969	for p in w.parts:
1970	# this doesn't use eval_flags, which is slightly confusing
1971	self._EvalWordPart(p, part_vals, 0)
1972
1973	strs = [] # type: List[str]
1974	self._PartValsToString(part_vals, w, eval_flags, strs)
1975	return value.Str(''.join(strs))
1976
1977	def EvalWordToPattern(self, UP_w):
1978	# type: (rhs_word_t) -> Tuple[value.Str, bool]
1979	"""Like EvalWordToString, but returns whether we got ExtGlob."""
1980	if UP_w.tag() == rhs_word_e.Empty:
1981	return value.Str(''), False
1982
1983	assert UP_w.tag() == rhs_word_e.Compound, UP_w
1984	w = cast(CompoundWord, UP_w)
1985
1986	has_extglob = False
1987	part_vals = [] # type: List[part_value_t]
1988	for p in w.parts:
1989	# this doesn't use eval_flags, which is slightly confusing
1990	self._EvalWordPart(p, part_vals, 0)
1991	if p.tag() == word_part_e.ExtGlob:
1992	has_extglob = True
1993
1994	strs = [] # type: List[str]
1995	self._PartValsToString(part_vals, w, QUOTE_FNMATCH, strs)
1996	return value.Str(''.join(strs)), has_extglob
1997
1998	def EvalForPlugin(self, w):
1999	# type: (CompoundWord) -> value.Str
2000	"""Wrapper around EvalWordToString that prevents errors.
2001
2002	Runtime errors like $(( 1 / 0 )) and mutating $? like $(exit 42)
2003	are handled here.
2004
2005	Similar to ExprEvaluator.PluginCall().
2006	"""
2007	with state.ctx_Registers(self.mem): # to "sandbox" $? and $PIPESTATUS
2008	try:
2009	val = self.EvalWordToString(w)
2010	except error.FatalRuntime as e:
2011	val = value.Str('<Runtime error: %s>' % e.UserErrorString())
2012
2013	except (IOError, OSError) as e:
2014	val = value.Str('<I/O error: %s>' % pyutil.strerror(e))
2015
2016	except KeyboardInterrupt:
2017	val = value.Str('<Ctrl-C>')
2018
2019	return val
2020
2021	def EvalRhsWord(self, UP_w):
2022	# type: (rhs_word_t) -> value_t
2023	"""Used for RHS of assignment.
2024
2025	There is no splitting.
2026	"""
2027	if UP_w.tag() == rhs_word_e.Empty:
2028	return value.Str('')
2029
2030	assert UP_w.tag() == word_e.Compound, UP_w
2031	w = cast(CompoundWord, UP_w)
2032
2033	if len(w.parts) == 1:
2034	part0 = w.parts[0]
2035	UP_part0 = part0
2036	tag = part0.tag()
2037	# Special case for a=(1 2). ShArrayLiteral won't appear in words that
2038	# don't look like assignments.
2039	if tag == word_part_e.ShArrayLiteral:
2040	part0 = cast(ShArrayLiteral, UP_part0)
2041	array_words = part0.words
2042	words = braces.BraceExpandWords(array_words)
2043	strs = self.EvalWordSequence(words)
2044	return value.BashArray(strs)
2045
2046	if tag == word_part_e.BashAssocLiteral:
2047	part0 = cast(word_part.BashAssocLiteral, UP_part0)
2048	d = NewDict() # type: Dict[str, str]
2049	for pair in part0.pairs:
2050	k = self.EvalWordToString(pair.key)
2051	v = self.EvalWordToString(pair.value)
2052	d[k.s] = v.s
2053	return value.BashAssoc(d)
2054
2055	# If RHS doesn't look like a=( ... ), then it must be a string.
2056	return self.EvalWordToString(w)
2057
2058	def _EvalWordFrame(self, frame, argv):
2059	# type: (List[Piece], List[str]) -> None
2060	all_empty = True
2061	all_quoted = True
2062	any_quoted = False
2063
2064	#log('--- frame %s', frame)
2065
2066	for piece in frame:
2067	if len(piece.s):
2068	all_empty = False
2069
2070	if piece.quoted:
2071	any_quoted = True
2072	else:
2073	all_quoted = False
2074
2075	# Elision of ${empty}${empty} but not $empty"$empty" or $empty""
2076	if all_empty and not any_quoted:
2077	return
2078
2079	# If every frag is quoted, e.g. "$a$b" or any part in "${a[@]}"x, then
2080	# don't do word splitting or globbing.
2081	if all_quoted:
2082	tmp = [piece.s for piece in frame]
2083	a = ''.join(tmp)
2084	argv.append(a)
2085	return
2086
2087	will_glob = not self.exec_opts.noglob()
2088
2089	if 0:
2090	log('---')
2091	log('FRAME')
2092	for i, piece in enumerate(frame):
2093	log('(%d) %s', i, piece)
2094	log('')
2095
2096	# Array of strings, some of which are BOTH IFS-escaped and GLOB escaped!
2097	frags = [] # type: List[str]
2098	for piece in frame:
2099	if will_glob and piece.quoted:
2100	frag = glob_.GlobEscape(piece.s)
2101	else:
2102	# If we have a literal \, then we turn it into \\\\.
2103	# Splitting takes \\\\ -> \\
2104	# Globbing takes \\ to \ if it doesn't match
2105	frag = _BackslashEscape(piece.s)
2106
2107	if piece.do_split:
2108	frag = _BackslashEscape(frag)
2109	else:
2110	frag = self.splitter.Escape(frag)
2111
2112	frags.append(frag)
2113
2114	if 0:
2115	log('---')
2116	log('FRAGS')
2117	for i, frag in enumerate(frags):
2118	log('(%d) %s', i, frag)
2119	log('')
2120
2121	flat = ''.join(frags)
2122	#log('flat: %r', flat)
2123
2124	args = self.splitter.SplitForWordEval(flat)
2125
2126	# space=' '; argv $space"". We have a quoted part, but we CANNOT elide.
2127	# Add it back and don't bother globbing.
2128	if len(args) == 0 and any_quoted:
2129	argv.append('')
2130	return
2131
2132	#log('split args: %r', args)
2133	for a in args:
2134	if glob_.LooksLikeGlob(a):
2135	n = self.globber.Expand(a, argv)
2136	if n < 0:
2137	# TODO: location info, with span IDs carried through the frame
2138	raise error.FailGlob('Pattern %r matched no files' % a,
2139	loc.Missing)
2140	else:
2141	argv.append(glob_.GlobUnescape(a))
2142
2143	def _EvalWordToArgv(self, w):
2144	# type: (CompoundWord) -> List[str]
2145	"""Helper for _EvalAssignBuiltin.
2146
2147	Splitting and globbing are disabled for assignment builtins.
2148
2149	Example: declare -"${a[@]}" b=(1 2)
2150	where a is [x b=a d=a]
2151	"""
2152	part_vals = [] # type: List[part_value_t]
2153	self._EvalWordToParts(w, part_vals, 0) # not double quoted
2154	frames = _MakeWordFrames(part_vals)
2155	argv = [] # type: List[str]
2156	for frame in frames:
2157	if len(frame): # empty array gives empty frame!
2158	tmp = [piece.s for piece in frame]
2159	argv.append(''.join(tmp)) # no split or glob
2160	#log('argv: %s', argv)
2161	return argv
2162
2163	def _EvalAssignBuiltin(self, builtin_id, arg0, words, meta_offset):
2164	# type: (builtin_t, str, List[CompoundWord], int) -> cmd_value.Assign
2165	"""Handles both static and dynamic assignment, e.g.
2166
2167	x='foo=bar'
2168	local a=(1 2) $x
2169
2170	Grammar:
2171
2172	('builtin' \| 'command')* keyword flag* pair*
2173	flag = [-+].*
2174
2175	There is also command -p, but we haven't implemented it. Maybe just
2176	punt on it.
2177	"""
2178	eval_to_pairs = True # except for -f and -F
2179	started_pairs = False
2180
2181	flags = [arg0] # initial flags like -p, and -f -F name1 name2
2182	flag_locs = [words[0]]
2183	assign_args = [] # type: List[AssignArg]
2184
2185	n = len(words)
2186	for i in xrange(meta_offset + 1, n): # skip first word
2187	w = words[i]
2188
2189	if word_.IsVarLike(w):
2190	started_pairs = True # Everything from now on is an assign_pair
2191
2192	if started_pairs:
2193	left_token, close_token, part_offset = word_.DetectShAssignment(
2194	w)
2195	if left_token: # Detected statically
2196	if left_token.id != Id.Lit_VarLike:
2197	# (not guaranteed since started_pairs is set twice)
2198	e_die('LHS array not allowed in assignment builtin', w)
2199
2200	if lexer.IsPlusEquals(left_token):
2201	var_name = lexer.TokenSliceRight(left_token, -2)
2202	append = True
2203	else:
2204	var_name = lexer.TokenSliceRight(left_token, -1)
2205	append = False
2206
2207	if part_offset == len(w.parts):
2208	rhs = rhs_word.Empty # type: rhs_word_t
2209	else:
2210	# tmp is for intersection of C++/MyPy type systems
2211	tmp = CompoundWord(w.parts[part_offset:])
2212	word_.TildeDetectAssign(tmp)
2213	rhs = tmp
2214
2215	with state.ctx_AssignBuiltin(self.mutable_opts):
2216	right = self.EvalRhsWord(rhs)
2217
2218	arg2 = AssignArg(var_name, right, append, w)
2219	assign_args.append(arg2)
2220
2221	else: # e.g. export $dynamic
2222	argv = self._EvalWordToArgv(w)
2223	for arg in argv:
2224	arg2 = _SplitAssignArg(arg, w)
2225	assign_args.append(arg2)
2226
2227	else:
2228	argv = self._EvalWordToArgv(w)
2229	for arg in argv:
2230	if arg.startswith('-') or arg.startswith('+'):
2231	# e.g. declare -r +r
2232	flags.append(arg)
2233	flag_locs.append(w)
2234
2235	# Shortcut that relies on -f and -F always meaning "function" for
2236	# all assignment builtins
2237	if 'f' in arg or 'F' in arg:
2238	eval_to_pairs = False
2239
2240	else: # e.g. export $dynamic
2241	if eval_to_pairs:
2242	arg2 = _SplitAssignArg(arg, w)
2243	assign_args.append(arg2)
2244	started_pairs = True
2245	else:
2246	flags.append(arg)
2247
2248	return cmd_value.Assign(builtin_id, flags, flag_locs, assign_args)
2249
2250	def _DetectAssignBuiltinStr(self, arg0, words, meta_offset):
2251	# type: (str, List[CompoundWord], int) -> Optional[cmd_value.Assign]
2252	builtin_id = consts.LookupAssignBuiltin(arg0)
2253	if builtin_id != consts.NO_INDEX:
2254	return self._EvalAssignBuiltin(builtin_id, arg0, words,
2255	meta_offset)
2256	return None
2257
2258	def _DetectAssignBuiltin(self, val0, words, meta_offset):
2259	# type: (part_value_t, List[CompoundWord], int) -> Optional[cmd_value.Assign]
2260	UP_val0 = val0
2261	if val0.tag() == part_value_e.String:
2262	val0 = cast(Piece, UP_val0)
2263	if not val0.quoted:
2264	return self._DetectAssignBuiltinStr(val0.s, words, meta_offset)
2265	return None
2266
2267	def SimpleEvalWordSequence2(self, words, is_last_cmd, allow_assign):
2268	# type: (List[CompoundWord], bool, bool) -> cmd_value_t
2269	"""Simple word evaluation for YSH."""
2270	strs = [] # type: List[str]
2271	locs = [] # type: List[CompoundWord]
2272
2273	meta_offset = 0
2274	for i, w in enumerate(words):
2275	# No globbing in the first arg for command.Simple.
2276	if i == meta_offset and allow_assign:
2277	strs0 = self._EvalWordToArgv(w)
2278	# TODO: Remove this because YSH will disallow assignment
2279	# builtins? (including export?)
2280	if len(strs0) == 1:
2281	cmd_val = self._DetectAssignBuiltinStr(
2282	strs0[0], words, meta_offset)
2283	if cmd_val:
2284	return cmd_val
2285
2286	strs.extend(strs0)
2287	for _ in strs0:
2288	locs.append(w)
2289	continue
2290
2291	if glob_.LooksLikeStaticGlob(w):
2292	val = self.EvalWordToString(w) # respects strict-array
2293	num_appended = self.globber.Expand(val.s, strs)
2294	if num_appended < 0:
2295	raise error.FailGlob('Pattern %r matched no files' % val.s,
2296	w)
2297	for _ in xrange(num_appended):
2298	locs.append(w)
2299	continue
2300
2301	part_vals = [] # type: List[part_value_t]
2302	self._EvalWordToParts(w, part_vals, 0) # not quoted
2303
2304	if 0:
2305	log('')
2306	log('Static: part_vals after _EvalWordToParts:')
2307	for entry in part_vals:
2308	log(' %s', entry)
2309
2310	# Still need to process
2311	frames = _MakeWordFrames(part_vals)
2312
2313	if 0:
2314	log('')
2315	log('Static: frames after _MakeWordFrames:')
2316	for entry in frames:
2317	log(' %s', entry)
2318
2319	# We will still allow x"${a[@]"x, though it's deprecated by @a, which
2320	# disallows such expressions at parse time.
2321	for frame in frames:
2322	if len(frame): # empty array gives empty frame!
2323	tmp = [piece.s for piece in frame]
2324	strs.append(''.join(tmp)) # no split or glob
2325	locs.append(w)
2326
2327	assert len(strs) == len(locs), '%s vs. %d' % (strs, len(locs))
2328	return cmd_value.Argv(strs, locs, is_last_cmd, None, None)
2329
2330	def EvalWordSequence2(self, words, is_last_cmd, allow_assign=False):
2331	# type: (List[CompoundWord], bool, bool) -> cmd_value_t
2332	"""Turns a list of Words into a list of strings.
2333
2334	Unlike the EvalWord*() methods, it does globbing.
2335
2336	Args:
2337	allow_assign: True for command.Simple, False for BashArray a=(1 2 3)
2338	"""
2339	if self.exec_opts.simple_word_eval():
2340	return self.SimpleEvalWordSequence2(words, is_last_cmd,
2341	allow_assign)
2342
2343	# Parse time:
2344	# 1. brace expansion. TODO: Do at parse time.
2345	# 2. Tilde detection. DONE at parse time. Only if Id.Lit_Tilde is the
2346	# first WordPart.
2347	#
2348	# Run time:
2349	# 3. tilde sub, var sub, command sub, arith sub. These are all
2350	# "concurrent" on WordParts. (optional process sub with <() )
2351	# 4. word splitting. Can turn this off with a shell option? Definitely
2352	# off for oil.
2353	# 5. globbing -- several exec_opts affect this: nullglob, safeglob, etc.
2354
2355	#log('W %s', words)
2356	strs = [] # type: List[str]
2357	locs = [] # type: List[CompoundWord]
2358
2359	# 0 for declare x
2360	# 1 for builtin declare x
2361	# 2 for command builtin declare x
2362	# etc.
2363	meta_offset = 0
2364
2365	n = 0
2366	for i, w in enumerate(words):
2367	fast_str = word_.FastStrEval(w)
2368	if fast_str is not None:
2369	strs.append(fast_str)
2370	locs.append(w)
2371
2372	# e.g. the 'local' in 'local a=b c=d' will be here
2373	if allow_assign and i == meta_offset:
2374	cmd_val = self._DetectAssignBuiltinStr(
2375	fast_str, words, meta_offset)
2376	if cmd_val:
2377	return cmd_val
2378
2379	if i <= meta_offset and _DetectMetaBuiltinStr(fast_str):
2380	meta_offset += 1
2381
2382	# Bug fix: n must be updated on every loop iteration
2383	n = len(strs)
2384	assert len(strs) == len(locs), strs
2385	continue
2386
2387	part_vals = [] # type: List[part_value_t]
2388	self._EvalWordToParts(w, part_vals, EXTGLOB_FILES)
2389
2390	# DYNAMICALLY detect if we're going to run an assignment builtin, and
2391	# change the rest of the evaluation algorithm if so.
2392	#
2393	# We want to allow:
2394	# e=export
2395	# $e foo=bar
2396	#
2397	# But we don't want to evaluate the first word twice in the case of:
2398	# $(some-command) --flag
2399	if len(part_vals) == 1:
2400	if allow_assign and i == meta_offset:
2401	cmd_val = self._DetectAssignBuiltin(
2402	part_vals[0], words, meta_offset)
2403	if cmd_val:
2404	return cmd_val
2405
2406	if i <= meta_offset and _DetectMetaBuiltin(part_vals[0]):
2407	meta_offset += 1
2408
2409	if 0:
2410	log('')
2411	log('part_vals after _EvalWordToParts:')
2412	for entry in part_vals:
2413	log(' %s', entry)
2414
2415	frames = _MakeWordFrames(part_vals)
2416	if 0:
2417	log('')
2418	log('frames after _MakeWordFrames:')
2419	for entry in frames:
2420	log(' %s', entry)
2421
2422	# Do splitting and globbing. Each frame will append zero or more args.
2423	for frame in frames:
2424	self._EvalWordFrame(frame, strs)
2425
2426	# Fill in locations parallel to strs.
2427	n_next = len(strs)
2428	for _ in xrange(n_next - n):
2429	locs.append(w)
2430	n = n_next
2431
2432	# A non-assignment command.
2433	# NOTE: Can't look up builtins here like we did for assignment, because
2434	# functions can override builtins.
2435	assert len(strs) == len(locs), '%s vs. %d' % (strs, len(locs))
2436	return cmd_value.Argv(strs, locs, is_last_cmd, None, None)
2437
2438	def EvalWordSequence(self, words):
2439	# type: (List[CompoundWord]) -> List[str]
2440	"""For arrays and for loops.
2441
2442	They don't allow assignment builtins.
2443	"""
2444	# is_last_cmd is irrelevant
2445	cmd_val = self.EvalWordSequence2(words, False)
2446	assert cmd_val.tag() == cmd_value_e.Argv
2447	return cast(cmd_value.Argv, cmd_val).argv
2448
2449
2450	class NormalWordEvaluator(AbstractWordEvaluator):
2451
2452	def __init__(
2453	self,
2454	mem, # type: state.Mem
2455	exec_opts, # type: optview.Exec
2456	mutable_opts, # type: state.MutableOpts
2457	tilde_ev, # type: TildeEvaluator
2458	splitter, # type: SplitContext
2459	errfmt, # type: ui.ErrorFormatter
2460	):
2461	# type: (...) -> None
2462	AbstractWordEvaluator.__init__(self, mem, exec_opts, mutable_opts,
2463	tilde_ev, splitter, errfmt)
2464	self.shell_ex = None # type: _Executor
2465
2466	def CheckCircularDeps(self):
2467	# type: () -> None
2468	assert self.arith_ev is not None
2469	# Disabled for pure OSH
2470	#assert self.expr_ev is not None
2471	assert self.shell_ex is not None
2472	assert self.prompt_ev is not None
2473
2474	def _EvalCommandSub(self, cs_part, quoted):
2475	# type: (CommandSub, bool) -> part_value_t
2476	stdout_str = self.shell_ex.RunCommandSub(cs_part)
2477
2478	if cs_part.left_token.id == Id.Left_AtParen:
2479	# YSH splitting algorithm: does not depend on IFS
2480	try:
2481	strs = j8.SplitJ8Lines(stdout_str)
2482	except error.Decode as e:
2483	# status code 4 is special, for encode/decode errors.
2484	raise error.Structured(4, e.Message(), cs_part.left_token)
2485
2486	#strs = self.splitter.SplitForWordEval(stdout_str)
2487	return part_value.Array(strs)
2488	else:
2489	return Piece(stdout_str, quoted, not quoted)
2490
2491	def _EvalProcessSub(self, cs_part):
2492	# type: (CommandSub) -> Piece
2493	dev_path = self.shell_ex.RunProcessSub(cs_part)
2494	# pretend it's quoted; no split or glob
2495	return Piece(dev_path, True, False)
2496
2497
2498	_DUMMY = '__NO_COMMAND_SUB__'
2499
2500
2501	class CompletionWordEvaluator(AbstractWordEvaluator):
2502	"""An evaluator that has no access to an executor.
2503
2504	NOTE: core/completion.py doesn't actually try to use these strings to
2505	complete. If you have something like 'echo $(echo hi)/f<TAB>', it sees the
2506	inner command as the last one, and knows that it is not at the end of the
2507	line.
2508	"""
2509
2510	def __init__(
2511	self,
2512	mem, # type: state.Mem
2513	exec_opts, # type: optview.Exec
2514	mutable_opts, # type: state.MutableOpts
2515	tilde_ev, # type: TildeEvaluator
2516	splitter, # type: SplitContext
2517	errfmt, # type: ui.ErrorFormatter
2518	):
2519	# type: (...) -> None
2520	AbstractWordEvaluator.__init__(self, mem, exec_opts, mutable_opts,
2521	tilde_ev, splitter, errfmt)
2522
2523	def CheckCircularDeps(self):
2524	# type: () -> None
2525	assert self.prompt_ev is not None
2526	assert self.arith_ev is not None
2527	assert self.expr_ev is not None
2528
2529	def _EvalCommandSub(self, cs_part, quoted):
2530	# type: (CommandSub, bool) -> part_value_t
2531	if cs_part.left_token.id == Id.Left_AtParen:
2532	return part_value.Array([_DUMMY])
2533	else:
2534	return Piece(_DUMMY, quoted, not quoted)
2535
2536	def _EvalProcessSub(self, cs_part):
2537	# type: (CommandSub) -> Piece
2538	# pretend it's quoted; no split or glob
2539	return Piece('__NO_PROCESS_SUB__', True, False)
2540
2541
2542	# vim: sw=4