osh/word_eval.py

OILS / osh / word_eval.py View on Github | oilshell.org

2437 lines, 1474 significant

1	"""
2	word_eval.py - Evaluator for the word language.
3	"""
4
5	from _devbuild.gen.id_kind_asdl import Id, Kind, Kind_str
6	from _devbuild.gen.syntax_asdl import (
7	Token,
8	SimpleVarSub,
9	loc,
10	loc_t,
11	BracedVarSub,
12	CommandSub,
13	bracket_op,
14	bracket_op_e,
15	suffix_op,
16	suffix_op_e,
17	ShArrayLiteral,
18	SingleQuoted,
19	DoubleQuoted,
20	word_e,
21	word_t,
22	CompoundWord,
23	rhs_word,
24	rhs_word_e,
25	rhs_word_t,
26	word_part,
27	word_part_e,
28	)
29	from _devbuild.gen.runtime_asdl import (
30	part_value,
31	part_value_e,
32	part_value_t,
33	cmd_value,
34	cmd_value_e,
35	cmd_value_t,
36	AssignArg,
37	a_index,
38	a_index_e,
39	VTestPlace,
40	VarSubState,
41	Piece,
42	)
43	from _devbuild.gen.option_asdl import option_i, builtin_i
44	from _devbuild.gen.value_asdl import (
45	value,
46	value_e,
47	value_t,
48	sh_lvalue,
49	sh_lvalue_t,
50	)
51	from core import error
52	from core import pyos
53	from core import pyutil
54	from core import state
55	from display import ui
56	from core import util
57	from data_lang import j8
58	from data_lang import j8_lite
59	from core.error import e_die
60	from frontend import consts
61	from frontend import lexer
62	from frontend import location
63	from mycpp import mops
64	from mycpp.mylib import log, tagswitch, NewDict
65	from osh import braces
66	from osh import glob_
67	from osh import string_ops
68	from osh import word_
69	from ysh import expr_eval
70	from ysh import val_ops
71
72	from typing import Optional, Tuple, List, Dict, cast, TYPE_CHECKING
73
74	if TYPE_CHECKING:
75	from _devbuild.gen.syntax_asdl import word_part_t
76	from _devbuild.gen.option_asdl import builtin_t
77	from core import optview
78	from core.state import Mem
79	from core.vm import _Executor
80	from osh.split import SplitContext
81	from osh import prompt
82	from osh import sh_expr_eval
83
84	# Flags for _EvalWordToParts and _EvalWordPart (not all are used for both)
85	QUOTED = 1 << 0
86	IS_SUBST = 1 << 1
87
88	EXTGLOB_FILES = 1 << 2 # allow @(cc) from file system?
89	EXTGLOB_MATCH = 1 << 3 # allow @(cc) in pattern matching?
90	EXTGLOB_NESTED = 1 << 4 # for @(one\|!(two\|three))
91
92	# For EvalWordToString
93	QUOTE_FNMATCH = 1 << 5
94	QUOTE_ERE = 1 << 6
95
96	# For compatibility, ${BASH_SOURCE} and ${BASH_SOURCE[@]} are both valid.
97	# Ditto for ${FUNCNAME} and ${BASH_LINENO}.
98	_STRING_AND_ARRAY = ['BASH_SOURCE', 'FUNCNAME', 'BASH_LINENO']
99
100
101	def ShouldArrayDecay(var_name, exec_opts, is_plain_var_sub=True):
102	# type: (str, optview.Exec, bool) -> bool
103	"""Return whether we should allow ${a} to mean ${a[0]}."""
104	return (not exec_opts.strict_array() or
105	is_plain_var_sub and var_name in _STRING_AND_ARRAY)
106
107
108	def DecayArray(val):
109	# type: (value_t) -> value_t
110	"""Resolve ${array} to ${array[0]}."""
111	if val.tag() == value_e.BashArray:
112	array_val = cast(value.BashArray, val)
113	s = array_val.strs[0] if len(array_val.strs) else None
114	elif val.tag() == value_e.BashAssoc:
115	assoc_val = cast(value.BashAssoc, val)
116	s = assoc_val.d['0'] if '0' in assoc_val.d else None
117	else:
118	raise AssertionError(val.tag())
119
120	if s is None:
121	return value.Undef
122	else:
123	return value.Str(s)
124
125
126	def GetArrayItem(strs, index):
127	# type: (List[str], int) -> Optional[str]
128
129	n = len(strs)
130	if index < 0:
131	index += n
132
133	if 0 <= index and index < n:
134	# TODO: strs->index() has a redundant check for (i < 0)
135	s = strs[index]
136	# note: s could be None because representation is sparse
137	else:
138	s = None
139	return s
140
141
142	def _DetectMetaBuiltinStr(s):
143	# type: (str) -> bool
144	"""
145	We need to detect all of these cases:
146
147	builtin local
148	command local
149	builtin builtin local
150	builtin command local
151
152	Fundamentally, assignment builtins have different WORD EVALUATION RULES
153	for a=$x (no word splitting), so it seems hard to do this in
154	meta_oils.Builtin() or meta_oils.Command()
155	"""
156	return (consts.LookupNormalBuiltin(s)
157	in (builtin_i.builtin, builtin_i.command))
158
159
160	def _DetectMetaBuiltin(val0):
161	# type: (part_value_t) -> bool
162	UP_val0 = val0
163	if val0.tag() == part_value_e.String:
164	val0 = cast(Piece, UP_val0)
165	if not val0.quoted:
166	return _DetectMetaBuiltinStr(val0.s)
167	return False
168
169
170	def _SplitAssignArg(arg, blame_word):
171	# type: (str, CompoundWord) -> AssignArg
172	"""Dynamically parse argument to declare, export, etc.
173
174	This is a fallback to the static parsing done below.
175	"""
176	# Note: it would be better to cache regcomp(), but we don't have an API for
177	# that, and it probably isn't a bottleneck now
178	m = util.RegexSearch(consts.ASSIGN_ARG_RE, arg)
179	if m is None:
180	e_die("Assignment builtin expected NAME=value, got %r" % arg,
181	blame_word)
182
183	var_name = m[1]
184	# m[2] is used for grouping; ERE doesn't have non-capturing groups
185
186	op = m[3]
187	assert op is not None, op
188	if len(op): # declare NAME=
189	val = value.Str(m[4]) # type: Optional[value_t]
190	append = op[0] == '+'
191	else: # declare NAME
192	val = None # no operator
193	append = False
194
195	return AssignArg(var_name, val, append, blame_word)
196
197
198	# NOTE: Could be done with util.BackslashEscape like glob_.GlobEscape().
199	def _BackslashEscape(s):
200	# type: (str) -> str
201	"""Double up backslashes.
202
203	Useful for strings about to be globbed and strings about to be IFS
204	escaped.
205	"""
206	return s.replace('\\', '\\\\')
207
208
209	def _ValueToPartValue(val, quoted, part_loc):
210	# type: (value_t, bool, word_part_t) -> part_value_t
211	"""Helper for VarSub evaluation.
212
213	Called by _EvalBracedVarSub and _EvalWordPart for SimpleVarSub.
214	"""
215	UP_val = val
216
217	with tagswitch(val) as case:
218	if case(value_e.Undef):
219	# This happens in the case of ${undef+foo}. We skipped _EmptyStrOrError,
220	# but we have to append to the empty string.
221	return Piece('', quoted, not quoted)
222
223	elif case(value_e.Str):
224	val = cast(value.Str, UP_val)
225	return Piece(val.s, quoted, not quoted)
226
227	elif case(value_e.BashArray):
228	val = cast(value.BashArray, UP_val)
229	return part_value.Array(val.strs)
230
231	elif case(value_e.BashAssoc):
232	val = cast(value.BashAssoc, UP_val)
233	# bash behavior: splice values!
234	return part_value.Array(val.d.values())
235
236	# Cases added for YSH
237	# value_e.List is also here - we use val_ops.Stringify()s err message
238	elif case(value_e.Null, value_e.Bool, value_e.Int, value_e.Float,
239	value_e.Eggex, value_e.List):
240	s = val_ops.Stringify(val, loc.Missing, 'Word eval ')
241	return Piece(s, quoted, not quoted)
242
243	else:
244	raise error.TypeErr(val, "Can't substitute into word",
245	loc.WordPart(part_loc))
246
247	raise AssertionError('for -Wreturn-type in C++')
248
249
250	def _MakeWordFrames(part_vals):
251	# type: (List[part_value_t]) -> List[List[Piece]]
252	"""A word evaluates to a flat list of part_value (String or Array). frame
253	is a portion that results in zero or more args. It can never be joined.
254	This idea exists because of arrays like "$@" and "${a[@]}".
255
256	Example:
257
258	a=(1 '2 3' 4)
259	x=x
260	y=y
261
262	# This word
263	$x"${a[@]}"$y
264
265	# Results in Three frames:
266	[ ('x', False, True), ('1', True, False) ]
267	[ ('2 3', True, False) ]
268	[ ('4', True, False), ('y', False, True) ]
269
270	Note: A frame is a 3-tuple that's identical to Piece()? Maybe we
271	should make that top level type.
272
273	TODO:
274	- Instead of List[List[Piece]], where List[Piece] is a Frame
275	- Change this representation to
276	Frames = (List[Piece] pieces, List[int] break_indices)
277	# where break_indices are the end
278
279	Consider a common case like "$x" or "${x}" - I think this a lot more
280	efficient?
281
282	And then change _EvalWordFrame(pieces: List[Piece], start: int, end: int)
283	"""
284	current = [] # type: List[Piece]
285	frames = [current]
286
287	for p in part_vals:
288	UP_p = p
289
290	with tagswitch(p) as case:
291	if case(part_value_e.String):
292	p = cast(Piece, UP_p)
293	current.append(p)
294
295	elif case(part_value_e.Array):
296	p = cast(part_value.Array, UP_p)
297
298	is_first = True
299	for s in p.strs:
300	if s is None:
301	continue # ignore undefined array entries
302
303	# Arrays parts are always quoted; otherwise they would have decayed to
304	# a string.
305	piece = Piece(s, True, False)
306	if is_first:
307	current.append(piece)
308	is_first = False
309	else:
310	current = [piece]
311	frames.append(current) # singleton frame
312
313	else:
314	raise AssertionError()
315
316	return frames
317
318
319	# TODO: This could be _MakeWordFrames and then sep.join(). It's redundant.
320	def _DecayPartValuesToString(part_vals, join_char):
321	# type: (List[part_value_t], str) -> str
322	# Decay ${a=x"$@"x} to string.
323	out = [] # type: List[str]
324	for p in part_vals:
325	UP_p = p
326	with tagswitch(p) as case:
327	if case(part_value_e.String):
328	p = cast(Piece, UP_p)
329	out.append(p.s)
330	elif case(part_value_e.Array):
331	p = cast(part_value.Array, UP_p)
332	# TODO: Eliminate double join for speed?
333	tmp = [s for s in p.strs if s is not None]
334	out.append(join_char.join(tmp))
335	else:
336	raise AssertionError()
337	return ''.join(out)
338
339
340	def _PerformSlice(
341	val, # type: value_t
342	begin, # type: int
343	length, # type: int
344	has_length, # type: bool
345	part, # type: BracedVarSub
346	arg0_val, # type: value.Str
347	):
348	# type: (...) -> value_t
349	UP_val = val
350	with tagswitch(val) as case:
351	if case(value_e.Str): # Slice UTF-8 characters in a string.
352	val = cast(value.Str, UP_val)
353	s = val.s
354	n = len(s)
355
356	if begin < 0: # Compute offset with unicode
357	byte_begin = n
358	num_iters = -begin
359	for _ in xrange(num_iters):
360	byte_begin = string_ops.PreviousUtf8Char(s, byte_begin)
361	else:
362	byte_begin = string_ops.AdvanceUtf8Chars(s, begin, 0)
363
364	if has_length:
365	if length < 0: # Compute offset with unicode
366	# Confusing: this is a POSITION
367	byte_end = n
368	num_iters = -length
369	for _ in xrange(num_iters):
370	byte_end = string_ops.PreviousUtf8Char(s, byte_end)
371	else:
372	byte_end = string_ops.AdvanceUtf8Chars(
373	s, length, byte_begin)
374	else:
375	byte_end = len(s)
376
377	substr = s[byte_begin:byte_end]
378	result = value.Str(substr) # type: value_t
379
380	elif case(value_e.BashArray): # Slice array entries.
381	val = cast(value.BashArray, UP_val)
382	# NOTE: This error is ALWAYS fatal in bash. It's inconsistent with
383	# strings.
384	if has_length and length < 0:
385	e_die("Array slice can't have negative length: %d" % length,
386	loc.WordPart(part))
387
388	# Quirk: "begin" for positional arguments ($@ and $*) counts $0.
389	if arg0_val is not None:
390	orig = [arg0_val.s]
391	orig.extend(val.strs)
392	else:
393	orig = val.strs
394
395	n = len(orig)
396	if begin < 0:
397	i = n + begin # ${@:-3} starts counts from the end
398	else:
399	i = begin
400	strs = [] # type: List[str]
401	count = 0
402	while i < n:
403	if has_length and count == length: # length could be 0
404	break
405	s = orig[i]
406	if s is not None: # Unset elements don't count towards the length
407	strs.append(s)
408	count += 1
409	i += 1
410
411	result = value.BashArray(strs)
412
413	elif case(value_e.BashAssoc):
414	e_die("Can't slice associative arrays", loc.WordPart(part))
415
416	else:
417	raise error.TypeErr(val, 'Slice op expected Str or BashArray',
418	loc.WordPart(part))
419
420	return result
421
422
423	class StringWordEvaluator(object):
424	"""Interface used by ArithEvaluator / BoolEvaluator"""
425
426	def __init__(self):
427	# type: () -> None
428	"""Empty constructor for mycpp."""
429	pass
430
431	def EvalWordToString(self, w, eval_flags=0):
432	# type: (word_t, int) -> value.Str
433	raise NotImplementedError()
434
435
436	def _GetDollarHyphen(exec_opts):
437	# type: (optview.Exec) -> str
438	chars = [] # type: List[str]
439	if exec_opts.interactive():
440	chars.append('i')
441
442	if exec_opts.errexit():
443	chars.append('e')
444	if exec_opts.noglob():
445	chars.append('f')
446	if exec_opts.noexec():
447	chars.append('n')
448	if exec_opts.nounset():
449	chars.append('u')
450	# NO letter for pipefail?
451	if exec_opts.xtrace():
452	chars.append('x')
453	if exec_opts.noclobber():
454	chars.append('C')
455
456	# bash has:
457	# - c for sh -c, i for sh -i (mksh also has this)
458	# - h for hashing (mksh also has this)
459	# - B for brace expansion
460	return ''.join(chars)
461
462
463	class TildeEvaluator(object):
464
465	def __init__(self, mem, exec_opts):
466	# type: (Mem, optview.Exec) -> None
467	self.mem = mem
468	self.exec_opts = exec_opts
469
470	def GetMyHomeDir(self):
471	# type: () -> Optional[str]
472	"""Consult $HOME first, and then make a libc call.
473
474	Important: the libc call can FAIL, which is why we prefer $HOME. See issue
475	#1578.
476	"""
477	# TODO: Also ENV.HOME
478
479	# First look up the HOME var, then ask the OS. This is what bash does.
480	val = self.mem.GetValue('HOME')
481	UP_val = val
482	if val.tag() == value_e.Str:
483	val = cast(value.Str, UP_val)
484	return val.s
485	return pyos.GetMyHomeDir()
486
487	def Eval(self, part):
488	# type: (word_part.TildeSub) -> str
489	"""Evaluates ~ and ~user, given a Lit_TildeLike token."""
490
491	if part.user_name is None:
492	result = self.GetMyHomeDir()
493	else:
494	result = pyos.GetHomeDir(part.user_name)
495
496	if result is None:
497	if self.exec_opts.strict_tilde():
498	e_die("Error expanding tilde (e.g. invalid user)", part.left)
499	else:
500	# Return ~ or ~user literally
501	result = '~'
502	if part.user_name is not None:
503	result = result + part.user_name # mycpp doesn't have +=
504
505	return result
506
507
508	class AbstractWordEvaluator(StringWordEvaluator):
509	"""Abstract base class for word evaluators.
510
511	Public entry points:
512	EvalWordToString EvalForPlugin EvalRhsWord
513	EvalWordSequence EvalWordSequence2
514	"""
515
516	def __init__(
517	self,
518	mem, # type: state.Mem
519	exec_opts, # type: optview.Exec
520	mutable_opts, # type: state.MutableOpts
521	tilde_ev, # type: TildeEvaluator
522	splitter, # type: SplitContext
523	errfmt, # type: ui.ErrorFormatter
524	):
525	# type: (...) -> None
526	self.arith_ev = None # type: sh_expr_eval.ArithEvaluator
527	self.expr_ev = None # type: expr_eval.ExprEvaluator
528	self.prompt_ev = None # type: prompt.Evaluator
529
530	self.unsafe_arith = None # type: sh_expr_eval.UnsafeArith
531
532	self.tilde_ev = tilde_ev
533
534	self.mem = mem # for $HOME, $1, etc.
535	self.exec_opts = exec_opts # for nounset
536	self.mutable_opts = mutable_opts # for _allow_command_sub
537	self.splitter = splitter
538	self.errfmt = errfmt
539
540	self.globber = glob_.Globber(exec_opts)
541
542	def CheckCircularDeps(self):
543	# type: () -> None
544	raise NotImplementedError()
545
546	def _EvalCommandSub(self, cs_part, quoted):
547	# type: (CommandSub, bool) -> part_value_t
548	"""Abstract since it has a side effect."""
549	raise NotImplementedError()
550
551	def _EvalProcessSub(self, cs_part):
552	# type: (CommandSub) -> part_value_t
553	"""Abstract since it has a side effect."""
554	raise NotImplementedError()
555
556	def _EvalVarNum(self, var_num):
557	# type: (int) -> value_t
558	assert var_num >= 0
559	return self.mem.GetArgNum(var_num)
560
561	def _EvalSpecialVar(self, op_id, quoted, vsub_state):
562	# type: (int, bool, VarSubState) -> value_t
563	"""Evaluate $?
564
565	and so forth
566	"""
567	# $@ is special -- it need to know whether it is in a double quoted
568	# context.
569	#
570	# - If it's $@ in a double quoted context, return an ARRAY.
571	# - If it's $@ in a normal context, return a STRING, which then will be
572	# subject to splitting.
573
574	if op_id in (Id.VSub_At, Id.VSub_Star):
575	argv = self.mem.GetArgv()
576	val = value.BashArray(argv) # type: value_t
577	if op_id == Id.VSub_At:
578	# "$@" evaluates to an array, $@ should be decayed
579	vsub_state.join_array = not quoted
580	else: # $* "$*" are both decayed
581	vsub_state.join_array = True
582
583	elif op_id == Id.VSub_Hyphen:
584	val = value.Str(_GetDollarHyphen(self.exec_opts))
585
586	else:
587	val = self.mem.GetSpecialVar(op_id)
588
589	return val
590
591	def _ApplyTestOp(
592	self,
593	val, # type: value_t
594	op, # type: suffix_op.Unary
595	quoted, # type: bool
596	part_vals, # type: Optional[List[part_value_t]]
597	vtest_place, # type: VTestPlace
598	blame_token, # type: Token
599	):
600	# type: (...) -> bool
601	"""
602	Returns:
603	Whether part_vals was mutated
604
605	${a:-} returns part_value[]
606	${a:+} returns part_value[]
607	${a:?error} returns error word?
608	${a:=} returns part_value[] but also needs self.mem for side effects.
609
610	So I guess it should return part_value[], and then a flag for raising an
611	error, and then a flag for assigning it?
612	The original BracedVarSub will have the name.
613
614	Example of needing multiple part_value[]
615
616	echo X-${a:-'def'"ault"}-X
617
618	We return two part values from the BracedVarSub. Also consider:
619
620	echo ${a:-x"$@"x}
621	"""
622	eval_flags = IS_SUBST
623	if quoted:
624	eval_flags \|= QUOTED
625
626	tok = op.op
627	# NOTE: Splicing part_values is necessary because of code like
628	# ${undef:-'a b' c 'd # e'}. Each part_value can have a different
629	# do_glob/do_elide setting.
630	UP_val = val
631	with tagswitch(val) as case:
632	if case(value_e.Undef):
633	is_falsey = True
634
635	elif case(value_e.Str):
636	val = cast(value.Str, UP_val)
637	if tok.id in (Id.VTest_ColonHyphen, Id.VTest_ColonEquals,
638	Id.VTest_ColonQMark, Id.VTest_ColonPlus):
639	is_falsey = len(val.s) == 0
640	else:
641	is_falsey = False
642
643	elif case(value_e.BashArray):
644	val = cast(value.BashArray, UP_val)
645	# TODO: allow undefined
646	is_falsey = len(val.strs) == 0
647
648	elif case(value_e.BashAssoc):
649	val = cast(value.BashAssoc, UP_val)
650	is_falsey = len(val.d) == 0
651
652	else:
653	# value.Eggex, etc. are all false
654	is_falsey = False
655
656	if tok.id in (Id.VTest_ColonHyphen, Id.VTest_Hyphen):
657	if is_falsey:
658	self._EvalRhsWordToParts(op.arg_word, part_vals, eval_flags)
659	return True
660	else:
661	return False
662
663	# Inverse of the above.
664	elif tok.id in (Id.VTest_ColonPlus, Id.VTest_Plus):
665	if is_falsey:
666	return False
667	else:
668	self._EvalRhsWordToParts(op.arg_word, part_vals, eval_flags)
669	return True
670
671	# Splice and assign
672	elif tok.id in (Id.VTest_ColonEquals, Id.VTest_Equals):
673	if is_falsey:
674	# Collect new part vals.
675	assign_part_vals = [] # type: List[part_value_t]
676	self._EvalRhsWordToParts(op.arg_word, assign_part_vals,
677	eval_flags)
678	# Append them to out param AND return them.
679	part_vals.extend(assign_part_vals)
680
681	if vtest_place.name is None:
682	# TODO: error context
683	e_die("Can't assign to special variable")
684	else:
685	# NOTE: This decays arrays too! 'shopt -s strict_array' could
686	# avoid it.
687	rhs_str = _DecayPartValuesToString(
688	assign_part_vals, self.splitter.GetJoinChar())
689	if vtest_place.index is None: # using None when no index
690	lval = location.LName(
691	vtest_place.name) # type: sh_lvalue_t
692	else:
693	var_name = vtest_place.name
694	var_index = vtest_place.index
695	UP_var_index = var_index
696
697	with tagswitch(var_index) as case:
698	if case(a_index_e.Int):
699	var_index = cast(a_index.Int, UP_var_index)
700	lval = sh_lvalue.Indexed(
701	var_name, var_index.i, loc.Missing)
702	elif case(a_index_e.Str):
703	var_index = cast(a_index.Str, UP_var_index)
704	lval = sh_lvalue.Keyed(var_name, var_index.s,
705	loc.Missing)
706	else:
707	raise AssertionError()
708
709	state.OshLanguageSetValue(self.mem, lval,
710	value.Str(rhs_str))
711	return True
712
713	else:
714	return False
715
716	elif tok.id in (Id.VTest_ColonQMark, Id.VTest_QMark):
717	if is_falsey:
718	# The arg is the error message
719	error_part_vals = [] # type: List[part_value_t]
720	self._EvalRhsWordToParts(op.arg_word, error_part_vals,
721	eval_flags)
722	error_str = _DecayPartValuesToString(
723	error_part_vals, self.splitter.GetJoinChar())
724
725	#
726	# Display fancy/helpful error
727	#
728	if vtest_place.name is None:
729	var_name = '???'
730	else:
731	var_name = vtest_place.name
732
733	if 0:
734	# This hint is nice, but looks too noisy for now
735	op_str = lexer.LazyStr(tok)
736	if tok.id == Id.VTest_ColonQMark:
737	why = 'empty or unset'
738	else:
739	why = 'unset'
740
741	self.errfmt.Print_(
742	"Hint: operator %s means a variable can't be %s" %
743	(op_str, why), tok)
744
745	if val.tag() == value_e.Undef:
746	actual = 'unset'
747	else:
748	actual = 'empty'
749
750	if len(error_str):
751	suffix = ': %r' % error_str
752	else:
753	suffix = ''
754	e_die("Var %s is %s%s" % (var_name, actual, suffix),
755	blame_token)
756
757	else:
758	return False
759
760	else:
761	raise AssertionError(tok.id)
762
763	def _Length(self, val, token):
764	# type: (value_t, Token) -> int
765	"""Returns the length of the value, for ${#var}"""
766	UP_val = val
767	with tagswitch(val) as case:
768	if case(value_e.Str):
769	val = cast(value.Str, UP_val)
770	# NOTE: Whether bash counts bytes or chars is affected by LANG
771	# environment variables.
772	# Should we respect that, or another way to select? set -o
773	# count-bytes?
774
775	# https://stackoverflow.com/questions/17368067/length-of-string-in-bash
776	try:
777	length = string_ops.CountUtf8Chars(val.s)
778	except error.Strict as e:
779	# Add this here so we don't have to add it so far down the stack.
780	# TODO: It's better to show BOTH this CODE an the actual DATA
781	# somehow.
782	e.location = token
783
784	if self.exec_opts.strict_word_eval():
785	raise
786	else:
787	# NOTE: Doesn't make the command exit with 1; it just returns a
788	# length of -1.
789	self.errfmt.PrettyPrintError(e, prefix='warning: ')
790	return -1
791
792	elif case(value_e.BashArray):
793	val = cast(value.BashArray, UP_val)
794	# There can be empty placeholder values in the array.
795	length = 0
796	for s in val.strs:
797	if s is not None:
798	length += 1
799
800	elif case(value_e.BashAssoc):
801	val = cast(value.BashAssoc, UP_val)
802	length = len(val.d)
803
804	else:
805	raise error.TypeErr(
806	val, "Length op expected Str, BashArray, BashAssoc", token)
807
808	return length
809
810	def _Keys(self, val, token):
811	# type: (value_t, Token) -> value_t
812	"""Return keys of a container, for ${!array[@]}"""
813
814	UP_val = val
815	with tagswitch(val) as case:
816	if case(value_e.BashArray):
817	val = cast(value.BashArray, UP_val)
818	# translation issue: tuple indices not supported in list comprehensions
819	#indices = [str(i) for i, s in enumerate(val.strs) if s is not None]
820	indices = [] # type: List[str]
821	for i, s in enumerate(val.strs):
822	if s is not None:
823	indices.append(str(i))
824	return value.BashArray(indices)
825
826	elif case(value_e.BashAssoc):
827	val = cast(value.BashAssoc, UP_val)
828	assert val.d is not None # for MyPy, so it's not Optional[]
829
830	# BUG: Keys aren't ordered according to insertion!
831	return value.BashArray(val.d.keys())
832
833	else:
834	raise error.TypeErr(val, 'Keys op expected Str', token)
835
836	def _EvalVarRef(self, val, blame_tok, quoted, vsub_state, vtest_place):
837	# type: (value_t, Token, bool, VarSubState, VTestPlace) -> value_t
838	"""Handles indirect expansion like ${!var} and ${!a[0]}.
839
840	Args:
841	blame_tok: 'foo' for ${!foo}
842	"""
843	UP_val = val
844	with tagswitch(val) as case:
845	if case(value_e.Undef):
846	return value.Undef # ${!undef} is just weird bash behavior
847
848	elif case(value_e.Str):
849	val = cast(value.Str, UP_val)
850	bvs_part = self.unsafe_arith.ParseVarRef(val.s, blame_tok)
851	return self._VarRefValue(bvs_part, quoted, vsub_state,
852	vtest_place)
853
854	elif case(value_e.BashArray): # caught earlier but OK
855	e_die('Indirect expansion of array')
856
857	elif case(value_e.BashAssoc): # caught earlier but OK
858	e_die('Indirect expansion of assoc array')
859
860	else:
861	raise error.TypeErr(val, 'Var Ref op expected Str', blame_tok)
862
863	def _ApplyUnarySuffixOp(self, val, op):
864	# type: (value_t, suffix_op.Unary) -> value_t
865	assert val.tag() != value_e.Undef
866
867	op_kind = consts.GetKind(op.op.id)
868
869	if op_kind == Kind.VOp1:
870	# NOTE: glob syntax is supported in ^ ^^ , ,, ! As well as % %% # ##.
871	# Detect has_extglob so that DoUnarySuffixOp doesn't use the fast
872	# shortcut for constant strings.
873	arg_val, has_extglob = self.EvalWordToPattern(op.arg_word)
874	assert arg_val.tag() == value_e.Str
875
876	UP_val = val
877	with tagswitch(val) as case:
878	if case(value_e.Str):
879	val = cast(value.Str, UP_val)
880	s = string_ops.DoUnarySuffixOp(val.s, op.op, arg_val.s,
881	has_extglob)
882	#log('%r %r -> %r', val.s, arg_val.s, s)
883	new_val = value.Str(s) # type: value_t
884
885	elif case(value_e.BashArray):
886	val = cast(value.BashArray, UP_val)
887	# ${a[@]#prefix} is VECTORIZED on arrays. YSH should have this too.
888	strs = [] # type: List[str]
889	for s in val.strs:
890	if s is not None:
891	strs.append(
892	string_ops.DoUnarySuffixOp(
893	s, op.op, arg_val.s, has_extglob))
894	new_val = value.BashArray(strs)
895
896	elif case(value_e.BashAssoc):
897	val = cast(value.BashAssoc, UP_val)
898	strs = []
899	for s in val.d.values():
900	strs.append(
901	string_ops.DoUnarySuffixOp(s, op.op, arg_val.s,
902	has_extglob))
903	new_val = value.BashArray(strs)
904
905	else:
906	raise error.TypeErr(
907	val, 'Unary op expected Str, BashArray, BashAssoc',
908	op.op)
909
910	else:
911	raise AssertionError(Kind_str(op_kind))
912
913	return new_val
914
915	def _PatSub(self, val, op):
916	# type: (value_t, suffix_op.PatSub) -> value_t
917
918	pat_val, has_extglob = self.EvalWordToPattern(op.pat)
919	# Extended globs aren't supported because we only translate * ? etc. to
920	# ERE. I don't think there's a straightforward translation from !(*.py) to
921	# ERE! You would need an engine that supports negation? (Derivatives?)
922	if has_extglob:
923	e_die('extended globs not supported in ${x//GLOB/}', op.pat)
924
925	if op.replace:
926	replace_val = self.EvalRhsWord(op.replace)
927	# Can't have an array, so must be a string
928	assert replace_val.tag() == value_e.Str, replace_val
929	replace_str = cast(value.Str, replace_val).s
930	else:
931	replace_str = ''
932
933	# note: doesn't support self.exec_opts.extglob()!
934	regex, warnings = glob_.GlobToERE(pat_val.s)
935	if len(warnings):
936	# TODO:
937	# - Add 'shopt -s strict_glob' mode and expose warnings.
938	# "Glob is not in CANONICAL FORM".
939	# - Propagate location info back to the 'op.pat' word.
940	pass
941	#log('regex %r', regex)
942	replacer = string_ops.GlobReplacer(regex, replace_str, op.slash_tok)
943
944	with tagswitch(val) as case2:
945	if case2(value_e.Str):
946	str_val = cast(value.Str, val)
947	s = replacer.Replace(str_val.s, op)
948	val = value.Str(s)
949
950	elif case2(value_e.BashArray):
951	array_val = cast(value.BashArray, val)
952	strs = [] # type: List[str]
953	for s in array_val.strs:
954	if s is not None:
955	strs.append(replacer.Replace(s, op))
956	val = value.BashArray(strs)
957
958	elif case2(value_e.BashAssoc):
959	assoc_val = cast(value.BashAssoc, val)
960	strs = []
961	for s in assoc_val.d.values():
962	strs.append(replacer.Replace(s, op))
963	val = value.BashArray(strs)
964
965	else:
966	raise error.TypeErr(
967	val, 'Pat Sub op expected Str, BashArray, BashAssoc',
968	op.slash_tok)
969
970	return val
971
972	def _Slice(self, val, op, var_name, part):
973	# type: (value_t, suffix_op.Slice, Optional[str], BracedVarSub) -> value_t
974
975	begin = self.arith_ev.EvalToInt(op.begin)
976
977	# Note: bash allows lengths to be negative (with odd semantics), but
978	# we don't allow that right now.
979	has_length = False
980	length = -1
981	if op.length:
982	has_length = True
983	length = self.arith_ev.EvalToInt(op.length)
984
985	try:
986	arg0_val = None # type: value.Str
987	if var_name is None: # $* or $@
988	arg0_val = self.mem.GetArg0()
989	val = _PerformSlice(val, begin, length, has_length, part, arg0_val)
990	except error.Strict as e:
991	if self.exec_opts.strict_word_eval():
992	raise
993	else:
994	self.errfmt.PrettyPrintError(e, prefix='warning: ')
995	with tagswitch(val) as case2:
996	if case2(value_e.Str):
997	val = value.Str('')
998	elif case2(value_e.BashArray):
999	val = value.BashArray([])
1000	else:
1001	raise NotImplementedError()
1002	return val
1003
1004	def _Nullary(self, val, op, var_name):
1005	# type: (value_t, Token, Optional[str]) -> Tuple[value.Str, bool]
1006
1007	UP_val = val
1008	quoted2 = False
1009	op_id = op.id
1010	if op_id == Id.VOp0_P:
1011	with tagswitch(val) as case:
1012	if case(value_e.Str):
1013	str_val = cast(value.Str, UP_val)
1014	prompt = self.prompt_ev.EvalPrompt(str_val)
1015	# readline gets rid of these, so we should too.
1016	p = prompt.replace('\x01', '').replace('\x02', '')
1017	result = value.Str(p)
1018	else:
1019	e_die("Can't use @P on %s" % ui.ValType(val), op)
1020
1021	elif op_id == Id.VOp0_Q:
1022	with tagswitch(val) as case:
1023	if case(value_e.Str):
1024	str_val = cast(value.Str, UP_val)
1025	result = value.Str(j8_lite.MaybeShellEncode(str_val.s))
1026	# oddly, 'echo ${x@Q}' is equivalent to 'echo "${x@Q}"' in
1027	# bash
1028	quoted2 = True
1029	elif case(value_e.BashArray):
1030	array_val = cast(value.BashArray, UP_val)
1031
1032	# TODO: should use fastfunc.ShellEncode
1033	tmp = [j8_lite.MaybeShellEncode(s) for s in array_val.strs]
1034	result = value.Str(' '.join(tmp))
1035	else:
1036	e_die("Can't use @Q on %s" % ui.ValType(val), op)
1037
1038	elif op_id == Id.VOp0_a:
1039	# We're ONLY simluating -a and -A, not -r -x -n for now. See
1040	# spec/ble-idioms.test.sh.
1041	chars = [] # type: List[str]
1042	with tagswitch(val) as case:
1043	if case(value_e.BashArray):
1044	chars.append('a')
1045	elif case(value_e.BashAssoc):
1046	chars.append('A')
1047
1048	if var_name is not None: # e.g. ${?@a} is allowed
1049	cell = self.mem.GetCell(var_name)
1050	if cell:
1051	if cell.readonly:
1052	chars.append('r')
1053	if cell.exported:
1054	chars.append('x')
1055	if cell.nameref:
1056	chars.append('n')
1057
1058	result = value.Str(''.join(chars))
1059
1060	else:
1061	e_die('Var op %r not implemented' % lexer.TokenVal(op), op)
1062
1063	return result, quoted2
1064
1065	def _WholeArray(self, val, part, quoted, vsub_state):
1066	# type: (value_t, BracedVarSub, bool, VarSubState) -> value_t
1067	op_id = cast(bracket_op.WholeArray, part.bracket_op).op_id
1068
1069	if op_id == Id.Lit_At:
1070	vsub_state.join_array = not quoted # ${a[@]} decays but "${a[@]}" doesn't
1071	UP_val = val
1072	with tagswitch(val) as case2:
1073	if case2(value_e.Undef):
1074	if not vsub_state.has_test_op:
1075	val = self._EmptyBashArrayOrError(part.token)
1076	elif case2(value_e.Str):
1077	if self.exec_opts.strict_array():
1078	e_die("Can't index string with @", loc.WordPart(part))
1079	elif case2(value_e.BashArray):
1080	pass # no-op
1081
1082	elif op_id == Id.Arith_Star:
1083	vsub_state.join_array = True # both ${a[]} and "${a[]}" decay
1084	UP_val = val
1085	with tagswitch(val) as case2:
1086	if case2(value_e.Undef):
1087	if not vsub_state.has_test_op:
1088	val = self._EmptyBashArrayOrError(part.token)
1089	elif case2(value_e.Str):
1090	if self.exec_opts.strict_array():
1091	e_die("Can't index string with *", loc.WordPart(part))
1092	elif case2(value_e.BashArray):
1093	pass # no-op
1094
1095	else:
1096	raise AssertionError(op_id) # unknown
1097
1098	return val
1099
1100	def _ArrayIndex(self, val, part, vtest_place):
1101	# type: (value_t, BracedVarSub, VTestPlace) -> value_t
1102	"""Process a numeric array index like ${a[i+1]}"""
1103	anode = cast(bracket_op.ArrayIndex, part.bracket_op).expr
1104
1105	UP_val = val
1106	with tagswitch(val) as case2:
1107	if case2(value_e.Undef):
1108	pass # it will be checked later
1109
1110	elif case2(value_e.Str):
1111	# Bash treats any string as an array, so we can't add our own
1112	# behavior here without making valid OSH invalid bash.
1113	e_die("Can't index string %r with integer" % part.var_name,
1114	part.token)
1115
1116	elif case2(value_e.BashArray):
1117	array_val = cast(value.BashArray, UP_val)
1118	index = self.arith_ev.EvalToInt(anode)
1119	vtest_place.index = a_index.Int(index)
1120
1121	s = GetArrayItem(array_val.strs, index)
1122
1123	if s is None:
1124	val = value.Undef
1125	else:
1126	val = value.Str(s)
1127
1128	elif case2(value_e.BashAssoc):
1129	assoc_val = cast(value.BashAssoc, UP_val)
1130	# Location could also be attached to bracket_op? But
1131	# arith_expr.VarSub works OK too
1132	key = self.arith_ev.EvalWordToString(
1133	anode, blame_loc=location.TokenForArith(anode))
1134
1135	vtest_place.index = a_index.Str(key) # out param
1136	s = assoc_val.d.get(key)
1137
1138	if s is None:
1139	val = value.Undef
1140	else:
1141	val = value.Str(s)
1142
1143	else:
1144	raise error.TypeErr(val,
1145	'Index op expected BashArray, BashAssoc',
1146	loc.WordPart(part))
1147
1148	return val
1149
1150	def _EvalDoubleQuoted(self, parts, part_vals):
1151	# type: (List[word_part_t], List[part_value_t]) -> None
1152	"""Evaluate parts of a DoubleQuoted part.
1153
1154	Args:
1155	part_vals: output param to append to.
1156	"""
1157	# Example of returning array:
1158	# $ a=(1 2); b=(3); $ c=(4 5)
1159	# $ argv "${a[@]}${b[@]}${c[@]}"
1160	# ['1', '234', '5']
1161	#
1162	# Example of multiple parts
1163	# $ argv "${a[@]}${undef[@]:-${c[@]}}"
1164	# ['1', '24', '5']
1165
1166	# Special case for "". The parser outputs (DoubleQuoted []), instead
1167	# of (DoubleQuoted [Literal '']). This is better but it means we
1168	# have to check for it.
1169	if len(parts) == 0:
1170	v = Piece('', True, False)
1171	part_vals.append(v)
1172	return
1173
1174	for p in parts:
1175	self._EvalWordPart(p, part_vals, QUOTED)
1176
1177	def EvalDoubleQuotedToString(self, dq_part):
1178	# type: (DoubleQuoted) -> str
1179	"""For double quoted strings in YSH expressions.
1180
1181	Example: var x = "$foo-${foo}"
1182	"""
1183	part_vals = [] # type: List[part_value_t]
1184	self._EvalDoubleQuoted(dq_part.parts, part_vals)
1185	return self._ConcatPartVals(part_vals, dq_part.left)
1186
1187	def _DecayArray(self, val):
1188	# type: (value.BashArray) -> value.Str
1189	"""Decay $* to a string."""
1190	assert val.tag() == value_e.BashArray, val
1191	sep = self.splitter.GetJoinChar()
1192	tmp = [s for s in val.strs if s is not None]
1193	return value.Str(sep.join(tmp))
1194
1195	def _EmptyStrOrError(self, val, token):
1196	# type: (value_t, Token) -> value_t
1197	if val.tag() != value_e.Undef:
1198	return val
1199
1200	if not self.exec_opts.nounset():
1201	return value.Str('')
1202
1203	tok_str = lexer.TokenVal(token)
1204	name = tok_str[1:] if tok_str.startswith('$') else tok_str
1205	e_die('Undefined variable %r' % name, token)
1206
1207	def _EmptyBashArrayOrError(self, token):
1208	# type: (Token) -> value_t
1209	assert token is not None
1210	if self.exec_opts.nounset():
1211	e_die('Undefined array %r' % lexer.TokenVal(token), token)
1212	else:
1213	return value.BashArray([])
1214
1215	def _EvalBracketOp(self, val, part, quoted, vsub_state, vtest_place):
1216	# type: (value_t, BracedVarSub, bool, VarSubState, VTestPlace) -> value_t
1217
1218	if part.bracket_op:
1219	with tagswitch(part.bracket_op) as case:
1220	if case(bracket_op_e.WholeArray):
1221	val = self._WholeArray(val, part, quoted, vsub_state)
1222
1223	elif case(bracket_op_e.ArrayIndex):
1224	val = self._ArrayIndex(val, part, vtest_place)
1225
1226	else:
1227	raise AssertionError(part.bracket_op.tag())
1228
1229	else: # no bracket op
1230	var_name = vtest_place.name
1231	if (var_name is not None and
1232	val.tag() in (value_e.BashArray, value_e.BashAssoc) and
1233	not vsub_state.is_type_query):
1234	if ShouldArrayDecay(var_name, self.exec_opts,
1235	not (part.prefix_op or part.suffix_op)):
1236	# for ${BASH_SOURCE}, etc.
1237	val = DecayArray(val)
1238	else:
1239	e_die(
1240	"Array %r can't be referred to as a scalar (without @ or *)"
1241	% var_name, loc.WordPart(part))
1242
1243	return val
1244
1245	def _VarRefValue(self, part, quoted, vsub_state, vtest_place):
1246	# type: (BracedVarSub, bool, VarSubState, VTestPlace) -> value_t
1247	"""Duplicates some logic from _EvalBracedVarSub, but returns a
1248	value_t."""
1249
1250	# 1. Evaluate from (var_name, var_num, token Id) -> value
1251	if part.token.id == Id.VSub_Name:
1252	vtest_place.name = part.var_name
1253	val = self.mem.GetValue(part.var_name)
1254
1255	elif part.token.id == Id.VSub_Number:
1256	var_num = int(part.var_name)
1257	val = self._EvalVarNum(var_num)
1258
1259	else:
1260	# $* decays
1261	val = self._EvalSpecialVar(part.token.id, quoted, vsub_state)
1262
1263	# We don't need var_index because it's only for L-Values of test ops?
1264	if self.exec_opts.eval_unsafe_arith():
1265	val = self._EvalBracketOp(val, part, quoted, vsub_state,
1266	vtest_place)
1267	else:
1268	with state.ctx_Option(self.mutable_opts,
1269	[option_i._allow_command_sub], False):
1270	val = self._EvalBracketOp(val, part, quoted, vsub_state,
1271	vtest_place)
1272
1273	return val
1274
1275	def _EvalBracedVarSub(self, part, part_vals, quoted):
1276	# type: (BracedVarSub, List[part_value_t], bool) -> None
1277	"""
1278	Args:
1279	part_vals: output param to append to.
1280	"""
1281	# We have different operators that interact in a non-obvious order.
1282	#
1283	# 1. bracket_op: value -> value, with side effect on vsub_state
1284	#
1285	# 2. prefix_op
1286	# a. length ${#x}: value -> value
1287	# b. var ref ${!ref}: can expand to an array
1288	#
1289	# 3. suffix_op:
1290	# a. no operator: you have a value
1291	# b. Test: value -> part_value[]
1292	# c. Other Suffix: value -> value
1293	#
1294	# 4. Process vsub_state.join_array here before returning.
1295	#
1296	# These cases are hard to distinguish:
1297	# - ${!prefix@} prefix query
1298	# - ${!array[@]} keys
1299	# - ${!ref} named reference
1300	# - ${!ref[0]} named reference
1301	#
1302	# I think we need several stages:
1303	#
1304	# 1. value: name, number, special, prefix query
1305	# 2. bracket_op
1306	# 3. prefix length -- this is TERMINAL
1307	# 4. indirection? Only for some of the ! cases
1308	# 5. string transformation suffix ops like ##
1309	# 6. test op
1310	# 7. vsub_state.join_array
1311
1312	# vsub_state.join_array is for joining "${a[*]}" and unquoted ${a[@]} AFTER
1313	# suffix ops are applied. If we take the length with a prefix op, the
1314	# distinction is ignored.
1315
1316	var_name = None # type: Optional[str] # used throughout the function
1317	vtest_place = VTestPlace(var_name, None) # For ${foo=default}
1318	vsub_state = VarSubState.CreateNull() # for $, ${a[]}, etc.
1319
1320	# 1. Evaluate from (var_name, var_num, token Id) -> value
1321	if part.token.id == Id.VSub_Name:
1322	# Handle ${!prefix@} first, since that looks at names and not values
1323	# Do NOT handle ${!A[@]@a} here!
1324	if (part.prefix_op is not None and part.bracket_op is None and
1325	part.suffix_op is not None and
1326	part.suffix_op.tag() == suffix_op_e.Nullary):
1327	nullary_op = cast(Token, part.suffix_op)
1328	# ${!x@} but not ${!x@P}
1329	if consts.GetKind(nullary_op.id) == Kind.VOp3:
1330	names = self.mem.VarNamesStartingWith(part.var_name)
1331	names.sort()
1332
1333	if quoted and nullary_op.id == Id.VOp3_At:
1334	part_vals.append(part_value.Array(names))
1335	else:
1336	sep = self.splitter.GetJoinChar()
1337	part_vals.append(Piece(sep.join(names), quoted, True))
1338	return # EARLY RETURN
1339
1340	var_name = part.var_name
1341	vtest_place.name = var_name # for _ApplyTestOp
1342
1343	val = self.mem.GetValue(var_name)
1344
1345	elif part.token.id == Id.VSub_Number:
1346	var_num = int(part.var_name)
1347	val = self._EvalVarNum(var_num)
1348	else:
1349	# $* decays
1350	val = self._EvalSpecialVar(part.token.id, quoted, vsub_state)
1351
1352	suffix_op_ = part.suffix_op
1353	if suffix_op_:
1354	UP_op = suffix_op_
1355	with tagswitch(suffix_op_) as case:
1356	if case(suffix_op_e.Nullary):
1357	suffix_op_ = cast(Token, UP_op)
1358
1359	# Type query ${array@a} is a STRING, not an array
1360	# NOTE: ${array@Q} is ${array[0]@Q} in bash, which is different than
1361	# ${array[@]@Q}
1362	if suffix_op_.id == Id.VOp0_a:
1363	vsub_state.is_type_query = True
1364
1365	elif case(suffix_op_e.Unary):
1366	suffix_op_ = cast(suffix_op.Unary, UP_op)
1367
1368	# Do the _EmptyStrOrError/_EmptyBashArrayOrError up front, EXCEPT in
1369	# the case of Kind.VTest
1370	if consts.GetKind(suffix_op_.op.id) == Kind.VTest:
1371	vsub_state.has_test_op = True
1372
1373	# 2. Bracket Op
1374	val = self._EvalBracketOp(val, part, quoted, vsub_state, vtest_place)
1375
1376	if part.prefix_op:
1377	if part.prefix_op.id == Id.VSub_Pound: # ${#var} for length
1378	if not vsub_state.has_test_op: # undef -> '' BEFORE length
1379	val = self._EmptyStrOrError(val, part.token)
1380
1381	n = self._Length(val, part.token)
1382	part_vals.append(Piece(str(n), quoted, False))
1383	return # EARLY EXIT: nothing else can come after length
1384
1385	elif part.prefix_op.id == Id.VSub_Bang:
1386	if (part.bracket_op and
1387	part.bracket_op.tag() == bracket_op_e.WholeArray):
1388	if vsub_state.has_test_op:
1389	# ${!a[@]-'default'} is a non-fatal runtime error in bash. Here
1390	# it's fatal.
1391	op_tok = cast(suffix_op.Unary, UP_op).op
1392	e_die('Test operation not allowed with ${!array[@]}',
1393	op_tok)
1394
1395	# ${!array[@]} to get indices/keys
1396	val = self._Keys(val, part.token)
1397	# already set vsub_State.join_array ABOVE
1398	else:
1399	# Process ${!ref}. SURPRISE: ${!a[0]} is an indirect expansion unlike
1400	# ${!a[@]} !
1401	# ${!ref} can expand into an array if ref='array[@]'
1402
1403	# Clear it now that we have a var ref
1404	vtest_place.name = None
1405	vtest_place.index = None
1406
1407	val = self._EvalVarRef(val, part.token, quoted, vsub_state,
1408	vtest_place)
1409
1410	if not vsub_state.has_test_op: # undef -> '' AFTER indirection
1411	val = self._EmptyStrOrError(val, part.token)
1412
1413	else:
1414	raise AssertionError(part.prefix_op)
1415
1416	else:
1417	if not vsub_state.has_test_op: # undef -> '' if no prefix op
1418	val = self._EmptyStrOrError(val, part.token)
1419
1420	quoted2 = False # another bit for @Q
1421	if suffix_op_:
1422	op = suffix_op_ # could get rid of this alias
1423
1424	with tagswitch(suffix_op_) as case:
1425	if case(suffix_op_e.Nullary):
1426	op = cast(Token, UP_op)
1427	val, quoted2 = self._Nullary(val, op, var_name)
1428
1429	elif case(suffix_op_e.Unary):
1430	op = cast(suffix_op.Unary, UP_op)
1431	if consts.GetKind(op.op.id) == Kind.VTest:
1432	if self._ApplyTestOp(val, op, quoted, part_vals,
1433	vtest_place, part.token):
1434	# e.g. to evaluate ${undef:-'default'}, we already appended
1435	# what we need
1436	return
1437
1438	else:
1439	# Other suffix: value -> value
1440	val = self._ApplyUnarySuffixOp(val, op)
1441
1442	elif case(suffix_op_e.PatSub): # PatSub, vectorized
1443	op = cast(suffix_op.PatSub, UP_op)
1444	val = self._PatSub(val, op)
1445
1446	elif case(suffix_op_e.Slice):
1447	op = cast(suffix_op.Slice, UP_op)
1448	val = self._Slice(val, op, var_name, part)
1449
1450	elif case(suffix_op_e.Static):
1451	op = cast(suffix_op.Static, UP_op)
1452	e_die('Not implemented', op.tok)
1453
1454	else:
1455	raise AssertionError()
1456
1457	# After applying suffixes, process join_array here.
1458	UP_val = val
1459	if val.tag() == value_e.BashArray:
1460	array_val = cast(value.BashArray, UP_val)
1461	if vsub_state.join_array:
1462	val = self._DecayArray(array_val)
1463	else:
1464	val = array_val
1465
1466	# For example, ${a} evaluates to value.Str(), but we want a
1467	# Piece().
1468	part_val = _ValueToPartValue(val, quoted or quoted2, part)
1469	part_vals.append(part_val)
1470
1471	def _ConcatPartVals(self, part_vals, location):
1472	# type: (List[part_value_t], loc_t) -> str
1473
1474	strs = [] # type: List[str]
1475	for part_val in part_vals:
1476	UP_part_val = part_val
1477	with tagswitch(part_val) as case:
1478	if case(part_value_e.String):
1479	part_val = cast(Piece, UP_part_val)
1480	s = part_val.s
1481
1482	elif case(part_value_e.Array):
1483	part_val = cast(part_value.Array, UP_part_val)
1484	if self.exec_opts.strict_array():
1485	# Examples: echo f > "$@"; local foo="$@"
1486	e_die("Illegal array word part (strict_array)",
1487	location)
1488	else:
1489	# It appears to not respect IFS
1490	# TODO: eliminate double join()?
1491	tmp = [s for s in part_val.strs if s is not None]
1492	s = ' '.join(tmp)
1493
1494	else:
1495	raise AssertionError()
1496
1497	strs.append(s)
1498
1499	return ''.join(strs)
1500
1501	def EvalBracedVarSubToString(self, part):
1502	# type: (BracedVarSub) -> str
1503	"""For double quoted strings in YSH expressions.
1504
1505	Example: var x = "$foo-${foo}"
1506	"""
1507	part_vals = [] # type: List[part_value_t]
1508	self._EvalBracedVarSub(part, part_vals, False)
1509	# blame ${ location
1510	return self._ConcatPartVals(part_vals, part.left)
1511
1512	def _EvalSimpleVarSub(self, part, part_vals, quoted):
1513	# type: (SimpleVarSub, List[part_value_t], bool) -> None
1514
1515	token = part.tok
1516
1517	vsub_state = VarSubState.CreateNull()
1518
1519	# 1. Evaluate from (var_name, var_num, Token) -> defined, value
1520	if token.id == Id.VSub_DollarName:
1521	var_name = lexer.LazyStr(token)
1522	# TODO: Special case for LINENO
1523	val = self.mem.GetValue(var_name)
1524	if val.tag() in (value_e.BashArray, value_e.BashAssoc):
1525	if ShouldArrayDecay(var_name, self.exec_opts):
1526	# for $BASH_SOURCE, etc.
1527	val = DecayArray(val)
1528	else:
1529	e_die(
1530	"Array %r can't be referred to as a scalar (without @ or *)"
1531	% var_name, token)
1532
1533	elif token.id == Id.VSub_Number:
1534	var_num = int(lexer.LazyStr(token))
1535	val = self._EvalVarNum(var_num)
1536
1537	else:
1538	val = self._EvalSpecialVar(token.id, quoted, vsub_state)
1539
1540	#log('SIMPLE %s', part)
1541	val = self._EmptyStrOrError(val, token)
1542	UP_val = val
1543	if val.tag() == value_e.BashArray:
1544	array_val = cast(value.BashArray, UP_val)
1545	if vsub_state.join_array:
1546	val = self._DecayArray(array_val)
1547	else:
1548	val = array_val
1549
1550	v = _ValueToPartValue(val, quoted, part)
1551	part_vals.append(v)
1552
1553	def EvalSimpleVarSubToString(self, node):
1554	# type: (SimpleVarSub) -> str
1555	"""For double quoted strings in YSH expressions.
1556
1557	Example: var x = "$foo-${foo}"
1558	"""
1559	part_vals = [] # type: List[part_value_t]
1560	self._EvalSimpleVarSub(node, part_vals, False)
1561	return self._ConcatPartVals(part_vals, node.tok)
1562
1563	def _EvalExtGlob(self, part, part_vals):
1564	# type: (word_part.ExtGlob, List[part_value_t]) -> None
1565	"""Evaluate @($x\|'foo'\|$(hostname)) and flatten it."""
1566	op = part.op
1567	if op.id == Id.ExtGlob_Comma:
1568	op_str = '@('
1569	else:
1570	op_str = lexer.LazyStr(op)
1571	# Do NOT split these.
1572	part_vals.append(Piece(op_str, False, False))
1573
1574	for i, w in enumerate(part.arms):
1575	if i != 0:
1576	part_vals.append(Piece('\|', False, False)) # separator
1577	# FLATTEN the tree of extglob "arms".
1578	self._EvalWordToParts(w, part_vals, EXTGLOB_NESTED)
1579	part_vals.append(Piece(')', False, False)) # closing )
1580
1581	def _TranslateExtGlob(self, part_vals, w, glob_parts, fnmatch_parts):
1582	# type: (List[part_value_t], CompoundWord, List[str], List[str]) -> None
1583	"""Translate a flattened WORD with an ExtGlob part to string patterns.
1584
1585	We need both glob and fnmatch patterns. _EvalExtGlob does the
1586	flattening.
1587	"""
1588	for i, part_val in enumerate(part_vals):
1589	UP_part_val = part_val
1590	with tagswitch(part_val) as case:
1591	if case(part_value_e.String):
1592	part_val = cast(Piece, UP_part_val)
1593	if part_val.quoted and not self.exec_opts.noglob():
1594	s = glob_.GlobEscape(part_val.s)
1595	else:
1596	# e.g. the @( and \| in @(foo\|bar) aren't quoted
1597	s = part_val.s
1598	glob_parts.append(s)
1599	fnmatch_parts.append(s) # from _EvalExtGlob()
1600
1601	elif case(part_value_e.Array):
1602	# Disallow array
1603	e_die(
1604	"Extended globs and arrays can't appear in the same word",
1605	w)
1606
1607	elif case(part_value_e.ExtGlob):
1608	part_val = cast(part_value.ExtGlob, UP_part_val)
1609	# keep appending fnmatch_parts, but repplace glob_parts with '*'
1610	self._TranslateExtGlob(part_val.part_vals, w, [],
1611	fnmatch_parts)
1612	glob_parts.append('*')
1613
1614	else:
1615	raise AssertionError()
1616
1617	def _EvalWordPart(self, part, part_vals, flags):
1618	# type: (word_part_t, List[part_value_t], int) -> None
1619	"""Evaluate a word part, appending to part_vals
1620
1621	Called by _EvalWordToParts, EvalWordToString, and _EvalDoubleQuoted.
1622	"""
1623	quoted = bool(flags & QUOTED)
1624	is_subst = bool(flags & IS_SUBST)
1625
1626	UP_part = part
1627	with tagswitch(part) as case:
1628	if case(word_part_e.ShArrayLiteral):
1629	part = cast(ShArrayLiteral, UP_part)
1630	e_die("Unexpected array literal", loc.WordPart(part))
1631	elif case(word_part_e.BashAssocLiteral):
1632	part = cast(word_part.BashAssocLiteral, UP_part)
1633	e_die("Unexpected associative array literal",
1634	loc.WordPart(part))
1635
1636	elif case(word_part_e.Literal):
1637	part = cast(Token, UP_part)
1638	# Split if it's in a substitution.
1639	# That is: echo is not split, but ${foo:-echo} is split
1640	v = Piece(lexer.LazyStr(part), quoted, is_subst)
1641	part_vals.append(v)
1642
1643	elif case(word_part_e.EscapedLiteral):
1644	part = cast(word_part.EscapedLiteral, UP_part)
1645	v = Piece(part.ch, True, False)
1646	part_vals.append(v)
1647
1648	elif case(word_part_e.SingleQuoted):
1649	part = cast(SingleQuoted, UP_part)
1650	v = Piece(part.sval, True, False)
1651	part_vals.append(v)
1652
1653	elif case(word_part_e.DoubleQuoted):
1654	part = cast(DoubleQuoted, UP_part)
1655	self._EvalDoubleQuoted(part.parts, part_vals)
1656
1657	elif case(word_part_e.CommandSub):
1658	part = cast(CommandSub, UP_part)
1659	id_ = part.left_token.id
1660	if id_ in (Id.Left_DollarParen, Id.Left_AtParen,
1661	Id.Left_Backtick):
1662	sv = self._EvalCommandSub(part,
1663	quoted) # type: part_value_t
1664
1665	elif id_ in (Id.Left_ProcSubIn, Id.Left_ProcSubOut):
1666	sv = self._EvalProcessSub(part)
1667
1668	else:
1669	raise AssertionError(id_)
1670
1671	part_vals.append(sv)
1672
1673	elif case(word_part_e.SimpleVarSub):
1674	part = cast(SimpleVarSub, UP_part)
1675	self._EvalSimpleVarSub(part, part_vals, quoted)
1676
1677	elif case(word_part_e.BracedVarSub):
1678	part = cast(BracedVarSub, UP_part)
1679	self._EvalBracedVarSub(part, part_vals, quoted)
1680
1681	elif case(word_part_e.TildeSub):
1682	part = cast(word_part.TildeSub, UP_part)
1683	# We never parse a quoted string into a TildeSub.
1684	assert not quoted
1685	s = self.tilde_ev.Eval(part)
1686	v = Piece(s, True, False) # NOT split even when unquoted!
1687	part_vals.append(v)
1688
1689	elif case(word_part_e.ArithSub):
1690	part = cast(word_part.ArithSub, UP_part)
1691	num = self.arith_ev.EvalToBigInt(part.anode)
1692	v = Piece(mops.ToStr(num), quoted, not quoted)
1693	part_vals.append(v)
1694
1695	elif case(word_part_e.ExtGlob):
1696	part = cast(word_part.ExtGlob, UP_part)
1697	#if not self.exec_opts.extglob():
1698	# die() # disallow at runtime? Don't just decay
1699
1700	# Create a node to hold the flattened tree. The caller decides whether
1701	# to pass it to fnmatch() or replace it with '*' and pass it to glob().
1702	part_vals2 = [] # type: List[part_value_t]
1703	self._EvalExtGlob(part, part_vals2) # flattens tree
1704	part_vals.append(part_value.ExtGlob(part_vals2))
1705
1706	elif case(word_part_e.BashRegexGroup):
1707	part = cast(word_part.BashRegexGroup, UP_part)
1708
1709	part_vals.append(Piece('(', False, False)) # not quoted
1710	if part.child:
1711	self._EvalWordToParts(part.child, part_vals, 0)
1712	part_vals.append(Piece(')', False, False))
1713
1714	elif case(word_part_e.Splice):
1715	part = cast(word_part.Splice, UP_part)
1716	val = self.mem.GetValue(part.var_name)
1717
1718	strs = self.expr_ev.SpliceValue(val, part)
1719	part_vals.append(part_value.Array(strs))
1720
1721	elif case(word_part_e.ExprSub):
1722	part = cast(word_part.ExprSub, UP_part)
1723	part_val = self.expr_ev.EvalExprSub(part)
1724	part_vals.append(part_val)
1725
1726	elif case(word_part_e.ZshVarSub):
1727	part = cast(word_part.ZshVarSub, UP_part)
1728	e_die("ZSH var subs are parsed, but can't be evaluated",
1729	part.left)
1730
1731	else:
1732	raise AssertionError(part.tag())
1733
1734	def _EvalRhsWordToParts(self, w, part_vals, eval_flags=0):
1735	# type: (rhs_word_t, List[part_value_t], int) -> None
1736	quoted = bool(eval_flags & QUOTED)
1737
1738	UP_w = w
1739	with tagswitch(w) as case:
1740	if case(rhs_word_e.Empty):
1741	part_vals.append(Piece('', quoted, not quoted))
1742
1743	elif case(rhs_word_e.Compound):
1744	w = cast(CompoundWord, UP_w)
1745	self._EvalWordToParts(w, part_vals, eval_flags=eval_flags)
1746
1747	else:
1748	raise AssertionError()
1749
1750	def _EvalWordToParts(self, w, part_vals, eval_flags=0):
1751	# type: (CompoundWord, List[part_value_t], int) -> None
1752	"""Helper for EvalRhsWord, EvalWordSequence, etc.
1753
1754	Returns:
1755	Appends to part_vals. Note that this is a TREE.
1756	"""
1757	# Does the word have an extended glob? This is a special case because
1758	# of the way we use glob() and then fnmatch(..., FNM_EXTMATCH) to
1759	# implement extended globs. It's hard to carry that extra information
1760	# all the way past the word splitting stage.
1761
1762	# OSH semantic limitations: If a word has an extended glob part, then
1763	# 1. It can't have an array
1764	# 2. Word splitting of unquoted words isn't respected
1765
1766	word_part_vals = [] # type: List[part_value_t]
1767	has_extglob = False
1768	for p in w.parts:
1769	if p.tag() == word_part_e.ExtGlob:
1770	has_extglob = True
1771	self._EvalWordPart(p, word_part_vals, eval_flags)
1772
1773	# Caller REQUESTED extglob evaluation, AND we parsed word_part.ExtGlob()
1774	if has_extglob:
1775	if bool(eval_flags & EXTGLOB_FILES):
1776	# Treat the WHOLE word as a pattern. We need to TWO VARIANTS of the
1777	# word because of the way we use libc:
1778	# 1. With '*' for extglob parts
1779	# 2. With _EvalExtGlob() for extglob parts
1780
1781	glob_parts = [] # type: List[str]
1782	fnmatch_parts = [] # type: List[str]
1783	self._TranslateExtGlob(word_part_vals, w, glob_parts,
1784	fnmatch_parts)
1785
1786	#log('word_part_vals %s', word_part_vals)
1787	glob_pat = ''.join(glob_parts)
1788	fnmatch_pat = ''.join(fnmatch_parts)
1789	#log("glob %s fnmatch %s", glob_pat, fnmatch_pat)
1790
1791	results = [] # type: List[str]
1792	n = self.globber.ExpandExtended(glob_pat, fnmatch_pat, results)
1793	if n < 0:
1794	raise error.FailGlob(
1795	'Extended glob %r matched no files' % fnmatch_pat, w)
1796
1797	part_vals.append(part_value.Array(results))
1798	elif bool(eval_flags & EXTGLOB_NESTED):
1799	# We only glob at the TOP level of @(nested\|@(pattern))
1800	part_vals.extend(word_part_vals)
1801	else:
1802	# e.g. simple_word_eval, assignment builtin
1803	e_die('Extended glob not allowed in this word', w)
1804	else:
1805	part_vals.extend(word_part_vals)
1806
1807	def _PartValsToString(self, part_vals, w, eval_flags, strs):
1808	# type: (List[part_value_t], CompoundWord, int, List[str]) -> None
1809	"""Helper for EvalWordToString, similar to _ConcatPartVals() above.
1810
1811	Note: arg 'w' could just be a span ID
1812	"""
1813	for part_val in part_vals:
1814	UP_part_val = part_val
1815	with tagswitch(part_val) as case:
1816	if case(part_value_e.String):
1817	part_val = cast(Piece, UP_part_val)
1818	s = part_val.s
1819	if part_val.quoted:
1820	if eval_flags & QUOTE_FNMATCH:
1821	# [[ foo == /"".py ]] or case (.py) or ${x%.py} or ${x//*.py/}
1822	s = glob_.GlobEscape(s)
1823	elif eval_flags & QUOTE_ERE:
1824	s = glob_.ExtendedRegexEscape(s)
1825	strs.append(s)
1826
1827	elif case(part_value_e.Array):
1828	part_val = cast(part_value.Array, UP_part_val)
1829	if self.exec_opts.strict_array():
1830	# Examples: echo f > "$@"; local foo="$@"
1831
1832	# TODO: This attributes too coarsely, to the word rather than the
1833	# parts. Problem: the word is a TREE of parts, but we only have a
1834	# flat list of part_vals. The only case where we really get arrays
1835	# is "$@", "${a[@]}", "${a[@]//pat/replace}", etc.
1836	e_die(
1837	"This word should yield a string, but it contains an array",
1838	w)
1839
1840	# TODO: Maybe add detail like this.
1841	#e_die('RHS of assignment should only have strings. '
1842	# 'To assign arrays, use b=( "${a[@]}" )')
1843	else:
1844	# It appears to not respect IFS
1845	tmp = [s for s in part_val.strs if s is not None]
1846	s = ' '.join(tmp) # TODO: eliminate double join()?
1847	strs.append(s)
1848
1849	elif case(part_value_e.ExtGlob):
1850	part_val = cast(part_value.ExtGlob, UP_part_val)
1851
1852	# Extended globs are only allowed where we expect them!
1853	if not bool(eval_flags & QUOTE_FNMATCH):
1854	e_die('extended glob not allowed in this word', w)
1855
1856	# recursive call
1857	self._PartValsToString(part_val.part_vals, w, eval_flags,
1858	strs)
1859
1860	else:
1861	raise AssertionError()
1862
1863	def EvalWordToString(self, UP_w, eval_flags=0):
1864	# type: (word_t, int) -> value.Str
1865	"""Given a word, return a string.
1866
1867	Flags can contain a quoting algorithm.
1868	"""
1869	assert UP_w.tag() == word_e.Compound, UP_w
1870	w = cast(CompoundWord, UP_w)
1871
1872	if eval_flags == 0: # QUOTE_FNMATCH etc. breaks optimization
1873	fast_str = word_.FastStrEval(w)
1874	if fast_str is not None:
1875	return value.Str(fast_str)
1876
1877	# Could we additionally optimize a=$b, if we know $b isn't an array
1878	# etc.?
1879
1880	# Note: these empty lists are hot in fib benchmark
1881
1882	part_vals = [] # type: List[part_value_t]
1883	for p in w.parts:
1884	# this doesn't use eval_flags, which is slightly confusing
1885	self._EvalWordPart(p, part_vals, 0)
1886
1887	strs = [] # type: List[str]
1888	self._PartValsToString(part_vals, w, eval_flags, strs)
1889	return value.Str(''.join(strs))
1890
1891	def EvalWordToPattern(self, UP_w):
1892	# type: (rhs_word_t) -> Tuple[value.Str, bool]
1893	"""Like EvalWordToString, but returns whether we got ExtGlob."""
1894	if UP_w.tag() == rhs_word_e.Empty:
1895	return value.Str(''), False
1896
1897	assert UP_w.tag() == rhs_word_e.Compound, UP_w
1898	w = cast(CompoundWord, UP_w)
1899
1900	has_extglob = False
1901	part_vals = [] # type: List[part_value_t]
1902	for p in w.parts:
1903	# this doesn't use eval_flags, which is slightly confusing
1904	self._EvalWordPart(p, part_vals, 0)
1905	if p.tag() == word_part_e.ExtGlob:
1906	has_extglob = True
1907
1908	strs = [] # type: List[str]
1909	self._PartValsToString(part_vals, w, QUOTE_FNMATCH, strs)
1910	return value.Str(''.join(strs)), has_extglob
1911
1912	def EvalForPlugin(self, w):
1913	# type: (CompoundWord) -> value.Str
1914	"""Wrapper around EvalWordToString that prevents errors.
1915
1916	Runtime errors like $(( 1 / 0 )) and mutating $? like $(exit 42)
1917	are handled here.
1918
1919	Similar to ExprEvaluator.PluginCall().
1920	"""
1921	with state.ctx_Registers(self.mem): # to "sandbox" $? and $PIPESTATUS
1922	try:
1923	val = self.EvalWordToString(w)
1924	except error.FatalRuntime as e:
1925	val = value.Str('<Runtime error: %s>' % e.UserErrorString())
1926
1927	except (IOError, OSError) as e:
1928	val = value.Str('<I/O error: %s>' % pyutil.strerror(e))
1929
1930	except KeyboardInterrupt:
1931	val = value.Str('<Ctrl-C>')
1932
1933	return val
1934
1935	def EvalRhsWord(self, UP_w):
1936	# type: (rhs_word_t) -> value_t
1937	"""Used for RHS of assignment.
1938
1939	There is no splitting.
1940	"""
1941	if UP_w.tag() == rhs_word_e.Empty:
1942	return value.Str('')
1943
1944	assert UP_w.tag() == word_e.Compound, UP_w
1945	w = cast(CompoundWord, UP_w)
1946
1947	if len(w.parts) == 1:
1948	part0 = w.parts[0]
1949	UP_part0 = part0
1950	tag = part0.tag()
1951	# Special case for a=(1 2). ShArrayLiteral won't appear in words that
1952	# don't look like assignments.
1953	if tag == word_part_e.ShArrayLiteral:
1954	part0 = cast(ShArrayLiteral, UP_part0)
1955	array_words = part0.words
1956	words = braces.BraceExpandWords(array_words)
1957	strs = self.EvalWordSequence(words)
1958	return value.BashArray(strs)
1959
1960	if tag == word_part_e.BashAssocLiteral:
1961	part0 = cast(word_part.BashAssocLiteral, UP_part0)
1962	d = NewDict() # type: Dict[str, str]
1963	for pair in part0.pairs:
1964	k = self.EvalWordToString(pair.key)
1965	v = self.EvalWordToString(pair.value)
1966	d[k.s] = v.s
1967	return value.BashAssoc(d)
1968
1969	# If RHS doesn't look like a=( ... ), then it must be a string.
1970	return self.EvalWordToString(w)
1971
1972	def _EvalWordFrame(self, frame, argv):
1973	# type: (List[Piece], List[str]) -> None
1974	all_empty = True
1975	all_quoted = True
1976	any_quoted = False
1977
1978	#log('--- frame %s', frame)
1979
1980	for piece in frame:
1981	if len(piece.s):
1982	all_empty = False
1983
1984	if piece.quoted:
1985	any_quoted = True
1986	else:
1987	all_quoted = False
1988
1989	# Elision of ${empty}${empty} but not $empty"$empty" or $empty""
1990	if all_empty and not any_quoted:
1991	return
1992
1993	# If every frag is quoted, e.g. "$a$b" or any part in "${a[@]}"x, then
1994	# don't do word splitting or globbing.
1995	if all_quoted:
1996	tmp = [piece.s for piece in frame]
1997	a = ''.join(tmp)
1998	argv.append(a)
1999	return
2000
2001	will_glob = not self.exec_opts.noglob()
2002
2003	# Array of strings, some of which are BOTH IFS-escaped and GLOB escaped!
2004	frags = [] # type: List[str]
2005	for piece in frame:
2006	if will_glob and piece.quoted:
2007	frag = glob_.GlobEscape(piece.s)
2008	else:
2009	# If we have a literal \, then we turn it into \\\\.
2010	# Splitting takes \\\\ -> \\
2011	# Globbing takes \\ to \ if it doesn't match
2012	frag = _BackslashEscape(piece.s)
2013
2014	if piece.do_split:
2015	frag = _BackslashEscape(frag)
2016	else:
2017	frag = self.splitter.Escape(frag)
2018
2019	frags.append(frag)
2020
2021	flat = ''.join(frags)
2022	#log('flat: %r', flat)
2023
2024	args = self.splitter.SplitForWordEval(flat)
2025
2026	# space=' '; argv $space"". We have a quoted part, but we CANNOT elide.
2027	# Add it back and don't bother globbing.
2028	if len(args) == 0 and any_quoted:
2029	argv.append('')
2030	return
2031
2032	#log('split args: %r', args)
2033	for a in args:
2034	if glob_.LooksLikeGlob(a):
2035	n = self.globber.Expand(a, argv)
2036	if n < 0:
2037	# TODO: location info, with span IDs carried through the frame
2038	raise error.FailGlob('Pattern %r matched no files' % a,
2039	loc.Missing)
2040	else:
2041	argv.append(glob_.GlobUnescape(a))
2042
2043	def _EvalWordToArgv(self, w):
2044	# type: (CompoundWord) -> List[str]
2045	"""Helper for _EvalAssignBuiltin.
2046
2047	Splitting and globbing are disabled for assignment builtins.
2048
2049	Example: declare -"${a[@]}" b=(1 2)
2050	where a is [x b=a d=a]
2051	"""
2052	part_vals = [] # type: List[part_value_t]
2053	self._EvalWordToParts(w, part_vals, 0) # not double quoted
2054	frames = _MakeWordFrames(part_vals)
2055	argv = [] # type: List[str]
2056	for frame in frames:
2057	if len(frame): # empty array gives empty frame!
2058	tmp = [piece.s for piece in frame]
2059	argv.append(''.join(tmp)) # no split or glob
2060	#log('argv: %s', argv)
2061	return argv
2062
2063	def _EvalAssignBuiltin(self, builtin_id, arg0, words, meta_offset):
2064	# type: (builtin_t, str, List[CompoundWord], int) -> cmd_value.Assign
2065	"""Handles both static and dynamic assignment, e.g.
2066
2067	x='foo=bar'
2068	local a=(1 2) $x
2069
2070	Grammar:
2071
2072	('builtin' \| 'command')* keyword flag* pair*
2073	flag = [-+].*
2074
2075	There is also command -p, but we haven't implemented it. Maybe just
2076	punt on it.
2077	"""
2078	eval_to_pairs = True # except for -f and -F
2079	started_pairs = False
2080
2081	flags = [arg0] # initial flags like -p, and -f -F name1 name2
2082	flag_locs = [words[0]]
2083	assign_args = [] # type: List[AssignArg]
2084
2085	n = len(words)
2086	for i in xrange(meta_offset + 1, n): # skip first word
2087	w = words[i]
2088
2089	if word_.IsVarLike(w):
2090	started_pairs = True # Everything from now on is an assign_pair
2091
2092	if started_pairs:
2093	left_token, close_token, part_offset = word_.DetectShAssignment(
2094	w)
2095	if left_token: # Detected statically
2096	if left_token.id != Id.Lit_VarLike:
2097	# (not guaranteed since started_pairs is set twice)
2098	e_die('LHS array not allowed in assignment builtin', w)
2099
2100	if lexer.IsPlusEquals(left_token):
2101	var_name = lexer.TokenSliceRight(left_token, -2)
2102	append = True
2103	else:
2104	var_name = lexer.TokenSliceRight(left_token, -1)
2105	append = False
2106
2107	if part_offset == len(w.parts):
2108	rhs = rhs_word.Empty # type: rhs_word_t
2109	else:
2110	# tmp is for intersection of C++/MyPy type systems
2111	tmp = CompoundWord(w.parts[part_offset:])
2112	word_.TildeDetectAssign(tmp)
2113	rhs = tmp
2114
2115	with state.ctx_AssignBuiltin(self.mutable_opts):
2116	right = self.EvalRhsWord(rhs)
2117
2118	arg2 = AssignArg(var_name, right, append, w)
2119	assign_args.append(arg2)
2120
2121	else: # e.g. export $dynamic
2122	argv = self._EvalWordToArgv(w)
2123	for arg in argv:
2124	arg2 = _SplitAssignArg(arg, w)
2125	assign_args.append(arg2)
2126
2127	else:
2128	argv = self._EvalWordToArgv(w)
2129	for arg in argv:
2130	if arg.startswith('-') or arg.startswith('+'):
2131	# e.g. declare -r +r
2132	flags.append(arg)
2133	flag_locs.append(w)
2134
2135	# Shortcut that relies on -f and -F always meaning "function" for
2136	# all assignment builtins
2137	if 'f' in arg or 'F' in arg:
2138	eval_to_pairs = False
2139
2140	else: # e.g. export $dynamic
2141	if eval_to_pairs:
2142	arg2 = _SplitAssignArg(arg, w)
2143	assign_args.append(arg2)
2144	started_pairs = True
2145	else:
2146	flags.append(arg)
2147
2148	return cmd_value.Assign(builtin_id, flags, flag_locs, assign_args)
2149
2150	def _DetectAssignBuiltinStr(self, arg0, words, meta_offset):
2151	# type: (str, List[CompoundWord], int) -> Optional[cmd_value.Assign]
2152	builtin_id = consts.LookupAssignBuiltin(arg0)
2153	if builtin_id != consts.NO_INDEX:
2154	return self._EvalAssignBuiltin(builtin_id, arg0, words,
2155	meta_offset)
2156	return None
2157
2158	def _DetectAssignBuiltin(self, val0, words, meta_offset):
2159	# type: (part_value_t, List[CompoundWord], int) -> Optional[cmd_value.Assign]
2160	UP_val0 = val0
2161	if val0.tag() == part_value_e.String:
2162	val0 = cast(Piece, UP_val0)
2163	if not val0.quoted:
2164	return self._DetectAssignBuiltinStr(val0.s, words, meta_offset)
2165	return None
2166
2167	def SimpleEvalWordSequence2(self, words, is_last_cmd, allow_assign):
2168	# type: (List[CompoundWord], bool, bool) -> cmd_value_t
2169	"""Simple word evaluation for YSH."""
2170	strs = [] # type: List[str]
2171	locs = [] # type: List[CompoundWord]
2172
2173	meta_offset = 0
2174	for i, w in enumerate(words):
2175	# No globbing in the first arg for command.Simple.
2176	if i == meta_offset and allow_assign:
2177	strs0 = self._EvalWordToArgv(w)
2178	# TODO: Remove this because YSH will disallow assignment
2179	# builtins? (including export?)
2180	if len(strs0) == 1:
2181	cmd_val = self._DetectAssignBuiltinStr(
2182	strs0[0], words, meta_offset)
2183	if cmd_val:
2184	return cmd_val
2185
2186	strs.extend(strs0)
2187	for _ in strs0:
2188	locs.append(w)
2189	continue
2190
2191	if glob_.LooksLikeStaticGlob(w):
2192	val = self.EvalWordToString(w) # respects strict-array
2193	num_appended = self.globber.Expand(val.s, strs)
2194	if num_appended < 0:
2195	raise error.FailGlob('Pattern %r matched no files' % val.s,
2196	w)
2197	for _ in xrange(num_appended):
2198	locs.append(w)
2199	continue
2200
2201	part_vals = [] # type: List[part_value_t]
2202	self._EvalWordToParts(w, part_vals, 0) # not quoted
2203
2204	if 0:
2205	log('')
2206	log('Static: part_vals after _EvalWordToParts:')
2207	for entry in part_vals:
2208	log(' %s', entry)
2209
2210	# Still need to process
2211	frames = _MakeWordFrames(part_vals)
2212
2213	if 0:
2214	log('')
2215	log('Static: frames after _MakeWordFrames:')
2216	for entry in frames:
2217	log(' %s', entry)
2218
2219	# We will still allow x"${a[@]"x, though it's deprecated by @a, which
2220	# disallows such expressions at parse time.
2221	for frame in frames:
2222	if len(frame): # empty array gives empty frame!
2223	tmp = [piece.s for piece in frame]
2224	strs.append(''.join(tmp)) # no split or glob
2225	locs.append(w)
2226
2227	return cmd_value.Argv(strs, locs, is_last_cmd, None, None)
2228
2229	def EvalWordSequence2(self, words, is_last_cmd, allow_assign=False):
2230	# type: (List[CompoundWord], bool, bool) -> cmd_value_t
2231	"""Turns a list of Words into a list of strings.
2232
2233	Unlike the EvalWord*() methods, it does globbing.
2234
2235	Args:
2236	allow_assign: True for command.Simple, False for BashArray a=(1 2 3)
2237	"""
2238	if self.exec_opts.simple_word_eval():
2239	return self.SimpleEvalWordSequence2(words, is_last_cmd,
2240	allow_assign)
2241
2242	# Parse time:
2243	# 1. brace expansion. TODO: Do at parse time.
2244	# 2. Tilde detection. DONE at parse time. Only if Id.Lit_Tilde is the
2245	# first WordPart.
2246	#
2247	# Run time:
2248	# 3. tilde sub, var sub, command sub, arith sub. These are all
2249	# "concurrent" on WordParts. (optional process sub with <() )
2250	# 4. word splitting. Can turn this off with a shell option? Definitely
2251	# off for oil.
2252	# 5. globbing -- several exec_opts affect this: nullglob, safeglob, etc.
2253
2254	#log('W %s', words)
2255	strs = [] # type: List[str]
2256	locs = [] # type: List[CompoundWord]
2257
2258	# 0 for declare x
2259	# 1 for builtin declare x
2260	# 2 for command builtin declare x
2261	# etc.
2262	meta_offset = 0
2263
2264	n = 0
2265	for i, w in enumerate(words):
2266	fast_str = word_.FastStrEval(w)
2267	if fast_str is not None:
2268	strs.append(fast_str)
2269	locs.append(w)
2270
2271	# e.g. the 'local' in 'local a=b c=d' will be here
2272	if allow_assign and i == meta_offset:
2273	cmd_val = self._DetectAssignBuiltinStr(
2274	fast_str, words, meta_offset)
2275	if cmd_val:
2276	return cmd_val
2277
2278	if i <= meta_offset and _DetectMetaBuiltinStr(fast_str):
2279	meta_offset += 1
2280
2281	continue
2282
2283	part_vals = [] # type: List[part_value_t]
2284	self._EvalWordToParts(w, part_vals, EXTGLOB_FILES)
2285
2286	# DYNAMICALLY detect if we're going to run an assignment builtin, and
2287	# change the rest of the evaluation algorithm if so.
2288	#
2289	# We want to allow:
2290	# e=export
2291	# $e foo=bar
2292	#
2293	# But we don't want to evaluate the first word twice in the case of:
2294	# $(some-command) --flag
2295	if len(part_vals) == 1:
2296	if allow_assign and i == meta_offset:
2297	cmd_val = self._DetectAssignBuiltin(
2298	part_vals[0], words, meta_offset)
2299	if cmd_val:
2300	return cmd_val
2301
2302	if i <= meta_offset and _DetectMetaBuiltin(part_vals[0]):
2303	meta_offset += 1
2304
2305	if 0:
2306	log('')
2307	log('part_vals after _EvalWordToParts:')
2308	for entry in part_vals:
2309	log(' %s', entry)
2310
2311	frames = _MakeWordFrames(part_vals)
2312	if 0:
2313	log('')
2314	log('frames after _MakeWordFrames:')
2315	for entry in frames:
2316	log(' %s', entry)
2317
2318	# Do splitting and globbing. Each frame will append zero or more args.
2319	for frame in frames:
2320	self._EvalWordFrame(frame, strs)
2321
2322	# Fill in locations parallel to strs.
2323	n_next = len(strs)
2324	for _ in xrange(n_next - n):
2325	locs.append(w)
2326	n = n_next
2327
2328	# A non-assignment command.
2329	# NOTE: Can't look up builtins here like we did for assignment, because
2330	# functions can override builtins.
2331	return cmd_value.Argv(strs, locs, is_last_cmd, None, None)
2332
2333	def EvalWordSequence(self, words):
2334	# type: (List[CompoundWord]) -> List[str]
2335	"""For arrays and for loops.
2336
2337	They don't allow assignment builtins.
2338	"""
2339	# is_last_cmd is irrelevant
2340	cmd_val = self.EvalWordSequence2(words, False)
2341	assert cmd_val.tag() == cmd_value_e.Argv
2342	return cast(cmd_value.Argv, cmd_val).argv
2343
2344
2345	class NormalWordEvaluator(AbstractWordEvaluator):
2346
2347	def __init__(
2348	self,
2349	mem, # type: state.Mem
2350	exec_opts, # type: optview.Exec
2351	mutable_opts, # type: state.MutableOpts
2352	tilde_ev, # type: TildeEvaluator
2353	splitter, # type: SplitContext
2354	errfmt, # type: ui.ErrorFormatter
2355	):
2356	# type: (...) -> None
2357	AbstractWordEvaluator.__init__(self, mem, exec_opts, mutable_opts,
2358	tilde_ev, splitter, errfmt)
2359	self.shell_ex = None # type: _Executor
2360
2361	def CheckCircularDeps(self):
2362	# type: () -> None
2363	assert self.arith_ev is not None
2364	# Disabled for pure OSH
2365	#assert self.expr_ev is not None
2366	assert self.shell_ex is not None
2367	assert self.prompt_ev is not None
2368
2369	def _EvalCommandSub(self, cs_part, quoted):
2370	# type: (CommandSub, bool) -> part_value_t
2371	stdout_str = self.shell_ex.RunCommandSub(cs_part)
2372
2373	if cs_part.left_token.id == Id.Left_AtParen:
2374	# YSH splitting algorithm: does not depend on IFS
2375	try:
2376	strs = j8.SplitJ8Lines(stdout_str)
2377	except error.Decode as e:
2378	# status code 4 is special, for encode/decode errors.
2379	raise error.Structured(4, e.Message(), cs_part.left_token)
2380
2381	#strs = self.splitter.SplitForWordEval(stdout_str)
2382	return part_value.Array(strs)
2383	else:
2384	return Piece(stdout_str, quoted, not quoted)
2385
2386	def _EvalProcessSub(self, cs_part):
2387	# type: (CommandSub) -> Piece
2388	dev_path = self.shell_ex.RunProcessSub(cs_part)
2389	# pretend it's quoted; no split or glob
2390	return Piece(dev_path, True, False)
2391
2392
2393	_DUMMY = '__NO_COMMAND_SUB__'
2394
2395
2396	class CompletionWordEvaluator(AbstractWordEvaluator):
2397	"""An evaluator that has no access to an executor.
2398
2399	NOTE: core/completion.py doesn't actually try to use these strings to
2400	complete. If you have something like 'echo $(echo hi)/f<TAB>', it sees the
2401	inner command as the last one, and knows that it is not at the end of the
2402	line.
2403	"""
2404
2405	def __init__(
2406	self,
2407	mem, # type: state.Mem
2408	exec_opts, # type: optview.Exec
2409	mutable_opts, # type: state.MutableOpts
2410	tilde_ev, # type: TildeEvaluator
2411	splitter, # type: SplitContext
2412	errfmt, # type: ui.ErrorFormatter
2413	):
2414	# type: (...) -> None
2415	AbstractWordEvaluator.__init__(self, mem, exec_opts, mutable_opts,
2416	tilde_ev, splitter, errfmt)
2417
2418	def CheckCircularDeps(self):
2419	# type: () -> None
2420	assert self.prompt_ev is not None
2421	assert self.arith_ev is not None
2422	assert self.expr_ev is not None
2423
2424	def _EvalCommandSub(self, cs_part, quoted):
2425	# type: (CommandSub, bool) -> part_value_t
2426	if cs_part.left_token.id == Id.Left_AtParen:
2427	return part_value.Array([_DUMMY])
2428	else:
2429	return Piece(_DUMMY, quoted, not quoted)
2430
2431	def _EvalProcessSub(self, cs_part):
2432	# type: (CommandSub) -> Piece
2433	# pretend it's quoted; no split or glob
2434	return Piece('__NO_PROCESS_SUB__', True, False)
2435
2436
2437	# vim: sw=4