osh/word_eval.py

OILS / osh / word_eval.py View on Github | oilshell.org

2435 lines, 1474 significant

1	"""
2	word_eval.py - Evaluator for the word language.
3	"""
4
5	from _devbuild.gen.id_kind_asdl import Id, Kind, Kind_str
6	from _devbuild.gen.syntax_asdl import (
7	Token,
8	SimpleVarSub,
9	loc,
10	loc_t,
11	BracedVarSub,
12	CommandSub,
13	bracket_op,
14	bracket_op_e,
15	suffix_op,
16	suffix_op_e,
17	ShArrayLiteral,
18	SingleQuoted,
19	DoubleQuoted,
20	word_e,
21	word_t,
22	CompoundWord,
23	rhs_word,
24	rhs_word_e,
25	rhs_word_t,
26	word_part,
27	word_part_e,
28	)
29	from _devbuild.gen.runtime_asdl import (
30	part_value,
31	part_value_e,
32	part_value_t,
33	cmd_value,
34	cmd_value_e,
35	cmd_value_t,
36	AssignArg,
37	a_index,
38	a_index_e,
39	VTestPlace,
40	VarSubState,
41	Piece,
42	)
43	from _devbuild.gen.option_asdl import option_i, builtin_i
44	from _devbuild.gen.value_asdl import (
45	value,
46	value_e,
47	value_t,
48	sh_lvalue,
49	sh_lvalue_t,
50	)
51	from core import error
52	from core import pyos
53	from core import pyutil
54	from core import state
55	from display import ui
56	from core import util
57	from data_lang import j8
58	from data_lang import j8_lite
59	from core.error import e_die
60	from frontend import consts
61	from frontend import lexer
62	from frontend import location
63	from mycpp import mops
64	from mycpp.mylib import log, tagswitch, NewDict
65	from osh import braces
66	from osh import glob_
67	from osh import string_ops
68	from osh import word_
69	from ysh import expr_eval
70	from ysh import val_ops
71
72	from typing import Optional, Tuple, List, Dict, cast, TYPE_CHECKING
73
74	if TYPE_CHECKING:
75	from _devbuild.gen.syntax_asdl import word_part_t
76	from _devbuild.gen.option_asdl import builtin_t
77	from core import optview
78	from core.state import Mem
79	from core.vm import _Executor
80	from osh.split import SplitContext
81	from osh import prompt
82	from osh import sh_expr_eval
83
84	# Flags for _EvalWordToParts and _EvalWordPart (not all are used for both)
85	QUOTED = 1 << 0
86	IS_SUBST = 1 << 1
87
88	EXTGLOB_FILES = 1 << 2 # allow @(cc) from file system?
89	EXTGLOB_MATCH = 1 << 3 # allow @(cc) in pattern matching?
90	EXTGLOB_NESTED = 1 << 4 # for @(one\|!(two\|three))
91
92	# For EvalWordToString
93	QUOTE_FNMATCH = 1 << 5
94	QUOTE_ERE = 1 << 6
95
96	# For compatibility, ${BASH_SOURCE} and ${BASH_SOURCE[@]} are both valid.
97	# Ditto for ${FUNCNAME} and ${BASH_LINENO}.
98	_STRING_AND_ARRAY = ['BASH_SOURCE', 'FUNCNAME', 'BASH_LINENO']
99
100
101	def ShouldArrayDecay(var_name, exec_opts, is_plain_var_sub=True):
102	# type: (str, optview.Exec, bool) -> bool
103	"""Return whether we should allow ${a} to mean ${a[0]}."""
104	return (not exec_opts.strict_array() or
105	is_plain_var_sub and var_name in _STRING_AND_ARRAY)
106
107
108	def DecayArray(val):
109	# type: (value_t) -> value_t
110	"""Resolve ${array} to ${array[0]}."""
111	if val.tag() == value_e.BashArray:
112	array_val = cast(value.BashArray, val)
113	s = array_val.strs[0] if len(array_val.strs) else None
114	elif val.tag() == value_e.BashAssoc:
115	assoc_val = cast(value.BashAssoc, val)
116	s = assoc_val.d['0'] if '0' in assoc_val.d else None
117	else:
118	raise AssertionError(val.tag())
119
120	if s is None:
121	return value.Undef
122	else:
123	return value.Str(s)
124
125
126	def GetArrayItem(strs, index):
127	# type: (List[str], int) -> Optional[str]
128
129	n = len(strs)
130	if index < 0:
131	index += n
132
133	if 0 <= index and index < n:
134	# TODO: strs->index() has a redundant check for (i < 0)
135	s = strs[index]
136	# note: s could be None because representation is sparse
137	else:
138	s = None
139	return s
140
141
142	def _DetectMetaBuiltinStr(s):
143	# type: (str) -> bool
144	"""
145	We need to detect all of these cases:
146
147	builtin local
148	command local
149	builtin builtin local
150	builtin command local
151
152	Fundamentally, assignment builtins have different WORD EVALUATION RULES
153	for a=$x (no word splitting), so it seems hard to do this in
154	meta_oils.Builtin() or meta_oils.Command()
155	"""
156	return (consts.LookupNormalBuiltin(s)
157	in (builtin_i.builtin, builtin_i.command))
158
159
160	def _DetectMetaBuiltin(val0):
161	# type: (part_value_t) -> bool
162	UP_val0 = val0
163	if val0.tag() == part_value_e.String:
164	val0 = cast(Piece, UP_val0)
165	if not val0.quoted:
166	return _DetectMetaBuiltinStr(val0.s)
167	return False
168
169
170	def _SplitAssignArg(arg, blame_word):
171	# type: (str, CompoundWord) -> AssignArg
172	"""Dynamically parse argument to declare, export, etc.
173
174	This is a fallback to the static parsing done below.
175	"""
176	# Note: it would be better to cache regcomp(), but we don't have an API for
177	# that, and it probably isn't a bottleneck now
178	m = util.RegexSearch(consts.ASSIGN_ARG_RE, arg)
179	if m is None:
180	e_die("Assignment builtin expected NAME=value, got %r" % arg,
181	blame_word)
182
183	var_name = m[1]
184	# m[2] is used for grouping; ERE doesn't have non-capturing groups
185
186	op = m[3]
187	assert op is not None, op
188	if len(op): # declare NAME=
189	val = value.Str(m[4]) # type: Optional[value_t]
190	append = op[0] == '+'
191	else: # declare NAME
192	val = None # no operator
193	append = False
194
195	return AssignArg(var_name, val, append, blame_word)
196
197
198	# NOTE: Could be done with util.BackslashEscape like glob_.GlobEscape().
199	def _BackslashEscape(s):
200	# type: (str) -> str
201	"""Double up backslashes.
202
203	Useful for strings about to be globbed and strings about to be IFS
204	escaped.
205	"""
206	return s.replace('\\', '\\\\')
207
208
209	def _ValueToPartValue(val, quoted, part_loc):
210	# type: (value_t, bool, word_part_t) -> part_value_t
211	"""Helper for VarSub evaluation.
212
213	Called by _EvalBracedVarSub and _EvalWordPart for SimpleVarSub.
214	"""
215	UP_val = val
216
217	with tagswitch(val) as case:
218	if case(value_e.Undef):
219	# This happens in the case of ${undef+foo}. We skipped _EmptyStrOrError,
220	# but we have to append to the empty string.
221	return Piece('', quoted, not quoted)
222
223	elif case(value_e.Str):
224	val = cast(value.Str, UP_val)
225	return Piece(val.s, quoted, not quoted)
226
227	elif case(value_e.BashArray):
228	val = cast(value.BashArray, UP_val)
229	return part_value.Array(val.strs)
230
231	elif case(value_e.BashAssoc):
232	val = cast(value.BashAssoc, UP_val)
233	# bash behavior: splice values!
234	return part_value.Array(val.d.values())
235
236	# Cases added for YSH
237	# value_e.List is also here - we use val_ops.Stringify()s err message
238	elif case(value_e.Null, value_e.Bool, value_e.Int, value_e.Float,
239	value_e.Eggex, value_e.List):
240	s = val_ops.Stringify(val, loc.Missing, 'Word eval ')
241	return Piece(s, quoted, not quoted)
242
243	else:
244	raise error.TypeErr(val, "Can't substitute into word",
245	loc.WordPart(part_loc))
246
247	raise AssertionError('for -Wreturn-type in C++')
248
249
250	def _MakeWordFrames(part_vals):
251	# type: (List[part_value_t]) -> List[List[Piece]]
252	"""A word evaluates to a flat list of part_value (String or Array). frame
253	is a portion that results in zero or more args. It can never be joined.
254	This idea exists because of arrays like "$@" and "${a[@]}".
255
256	Example:
257
258	a=(1 '2 3' 4)
259	x=x
260	y=y
261
262	# This word
263	$x"${a[@]}"$y
264
265	# Results in Three frames:
266	[ ('x', False, True), ('1', True, False) ]
267	[ ('2 3', True, False) ]
268	[ ('4', True, False), ('y', False, True) ]
269
270	Note: A frame is a 3-tuple that's identical to Piece()? Maybe we
271	should make that top level type.
272
273	TODO:
274	- Instead of List[List[Piece]], where List[Piece] is a Frame
275	- Change this representation to
276	Frames = (List[Piece] pieces, List[int] break_indices)
277	# where break_indices are the end
278
279	Consider a common case like "$x" or "${x}" - I think this a lot more
280	efficient?
281
282	And then change _EvalWordFrame(pieces: List[Piece], start: int, end: int)
283	"""
284	current = [] # type: List[Piece]
285	frames = [current]
286
287	for p in part_vals:
288	UP_p = p
289
290	with tagswitch(p) as case:
291	if case(part_value_e.String):
292	p = cast(Piece, UP_p)
293	current.append(p)
294
295	elif case(part_value_e.Array):
296	p = cast(part_value.Array, UP_p)
297
298	is_first = True
299	for s in p.strs:
300	if s is None:
301	continue # ignore undefined array entries
302
303	# Arrays parts are always quoted; otherwise they would have decayed to
304	# a string.
305	piece = Piece(s, True, False)
306	if is_first:
307	current.append(piece)
308	is_first = False
309	else:
310	current = [piece]
311	frames.append(current) # singleton frame
312
313	else:
314	raise AssertionError()
315
316	return frames
317
318
319	# TODO: This could be _MakeWordFrames and then sep.join(). It's redundant.
320	def _DecayPartValuesToString(part_vals, join_char):
321	# type: (List[part_value_t], str) -> str
322	# Decay ${a=x"$@"x} to string.
323	out = [] # type: List[str]
324	for p in part_vals:
325	UP_p = p
326	with tagswitch(p) as case:
327	if case(part_value_e.String):
328	p = cast(Piece, UP_p)
329	out.append(p.s)
330	elif case(part_value_e.Array):
331	p = cast(part_value.Array, UP_p)
332	# TODO: Eliminate double join for speed?
333	tmp = [s for s in p.strs if s is not None]
334	out.append(join_char.join(tmp))
335	else:
336	raise AssertionError()
337	return ''.join(out)
338
339
340	def _PerformSlice(
341	val, # type: value_t
342	begin, # type: int
343	length, # type: int
344	has_length, # type: bool
345	part, # type: BracedVarSub
346	arg0_val, # type: value.Str
347	):
348	# type: (...) -> value_t
349	UP_val = val
350	with tagswitch(val) as case:
351	if case(value_e.Str): # Slice UTF-8 characters in a string.
352	val = cast(value.Str, UP_val)
353	s = val.s
354	n = len(s)
355
356	if begin < 0: # Compute offset with unicode
357	byte_begin = n
358	num_iters = -begin
359	for _ in xrange(num_iters):
360	byte_begin = string_ops.PreviousUtf8Char(s, byte_begin)
361	else:
362	byte_begin = string_ops.AdvanceUtf8Chars(s, begin, 0)
363
364	if has_length:
365	if length < 0: # Compute offset with unicode
366	# Confusing: this is a POSITION
367	byte_end = n
368	num_iters = -length
369	for _ in xrange(num_iters):
370	byte_end = string_ops.PreviousUtf8Char(s, byte_end)
371	else:
372	byte_end = string_ops.AdvanceUtf8Chars(
373	s, length, byte_begin)
374	else:
375	byte_end = len(s)
376
377	substr = s[byte_begin:byte_end]
378	result = value.Str(substr) # type: value_t
379
380	elif case(value_e.BashArray): # Slice array entries.
381	val = cast(value.BashArray, UP_val)
382	# NOTE: This error is ALWAYS fatal in bash. It's inconsistent with
383	# strings.
384	if has_length and length < 0:
385	e_die("Array slice can't have negative length: %d" % length,
386	loc.WordPart(part))
387
388	# Quirk: "begin" for positional arguments ($@ and $*) counts $0.
389	if arg0_val is not None:
390	orig = [arg0_val.s]
391	orig.extend(val.strs)
392	else:
393	orig = val.strs
394
395	n = len(orig)
396	if begin < 0:
397	i = n + begin # ${@:-3} starts counts from the end
398	else:
399	i = begin
400	strs = [] # type: List[str]
401	count = 0
402	while i < n:
403	if has_length and count == length: # length could be 0
404	break
405	s = orig[i]
406	if s is not None: # Unset elements don't count towards the length
407	strs.append(s)
408	count += 1
409	i += 1
410
411	result = value.BashArray(strs)
412
413	elif case(value_e.BashAssoc):
414	e_die("Can't slice associative arrays", loc.WordPart(part))
415
416	else:
417	raise error.TypeErr(val, 'Slice op expected Str or BashArray',
418	loc.WordPart(part))
419
420	return result
421
422
423	class StringWordEvaluator(object):
424	"""Interface used by ArithEvaluator / BoolEvaluator"""
425
426	def __init__(self):
427	# type: () -> None
428	"""Empty constructor for mycpp."""
429	pass
430
431	def EvalWordToString(self, w, eval_flags=0):
432	# type: (word_t, int) -> value.Str
433	raise NotImplementedError()
434
435
436	def _GetDollarHyphen(exec_opts):
437	# type: (optview.Exec) -> str
438	chars = [] # type: List[str]
439	if exec_opts.interactive():
440	chars.append('i')
441
442	if exec_opts.errexit():
443	chars.append('e')
444	if exec_opts.noglob():
445	chars.append('f')
446	if exec_opts.noexec():
447	chars.append('n')
448	if exec_opts.nounset():
449	chars.append('u')
450	# NO letter for pipefail?
451	if exec_opts.xtrace():
452	chars.append('x')
453	if exec_opts.noclobber():
454	chars.append('C')
455
456	# bash has:
457	# - c for sh -c, i for sh -i (mksh also has this)
458	# - h for hashing (mksh also has this)
459	# - B for brace expansion
460	return ''.join(chars)
461
462
463	class TildeEvaluator(object):
464
465	def __init__(self, mem, exec_opts):
466	# type: (Mem, optview.Exec) -> None
467	self.mem = mem
468	self.exec_opts = exec_opts
469
470	def GetMyHomeDir(self):
471	# type: () -> Optional[str]
472	"""Consult $HOME first, and then make a libc call.
473
474	Important: the libc call can FAIL, which is why we prefer $HOME. See issue
475	#1578.
476	"""
477	# First look up the HOME var, then ask the OS. This is what bash does.
478	val = self.mem.GetValue('HOME')
479	UP_val = val
480	if val.tag() == value_e.Str:
481	val = cast(value.Str, UP_val)
482	return val.s
483	return pyos.GetMyHomeDir()
484
485	def Eval(self, part):
486	# type: (word_part.TildeSub) -> str
487	"""Evaluates ~ and ~user, given a Lit_TildeLike token."""
488
489	if part.user_name is None:
490	result = self.GetMyHomeDir()
491	else:
492	result = pyos.GetHomeDir(part.user_name)
493
494	if result is None:
495	if self.exec_opts.strict_tilde():
496	e_die("Error expanding tilde (e.g. invalid user)", part.left)
497	else:
498	# Return ~ or ~user literally
499	result = '~'
500	if part.user_name is not None:
501	result = result + part.user_name # mycpp doesn't have +=
502
503	return result
504
505
506	class AbstractWordEvaluator(StringWordEvaluator):
507	"""Abstract base class for word evaluators.
508
509	Public entry points:
510	EvalWordToString EvalForPlugin EvalRhsWord
511	EvalWordSequence EvalWordSequence2
512	"""
513
514	def __init__(
515	self,
516	mem, # type: state.Mem
517	exec_opts, # type: optview.Exec
518	mutable_opts, # type: state.MutableOpts
519	tilde_ev, # type: TildeEvaluator
520	splitter, # type: SplitContext
521	errfmt, # type: ui.ErrorFormatter
522	):
523	# type: (...) -> None
524	self.arith_ev = None # type: sh_expr_eval.ArithEvaluator
525	self.expr_ev = None # type: expr_eval.ExprEvaluator
526	self.prompt_ev = None # type: prompt.Evaluator
527
528	self.unsafe_arith = None # type: sh_expr_eval.UnsafeArith
529
530	self.tilde_ev = tilde_ev
531
532	self.mem = mem # for $HOME, $1, etc.
533	self.exec_opts = exec_opts # for nounset
534	self.mutable_opts = mutable_opts # for _allow_command_sub
535	self.splitter = splitter
536	self.errfmt = errfmt
537
538	self.globber = glob_.Globber(exec_opts)
539
540	def CheckCircularDeps(self):
541	# type: () -> None
542	raise NotImplementedError()
543
544	def _EvalCommandSub(self, cs_part, quoted):
545	# type: (CommandSub, bool) -> part_value_t
546	"""Abstract since it has a side effect."""
547	raise NotImplementedError()
548
549	def _EvalProcessSub(self, cs_part):
550	# type: (CommandSub) -> part_value_t
551	"""Abstract since it has a side effect."""
552	raise NotImplementedError()
553
554	def _EvalVarNum(self, var_num):
555	# type: (int) -> value_t
556	assert var_num >= 0
557	return self.mem.GetArgNum(var_num)
558
559	def _EvalSpecialVar(self, op_id, quoted, vsub_state):
560	# type: (int, bool, VarSubState) -> value_t
561	"""Evaluate $?
562
563	and so forth
564	"""
565	# $@ is special -- it need to know whether it is in a double quoted
566	# context.
567	#
568	# - If it's $@ in a double quoted context, return an ARRAY.
569	# - If it's $@ in a normal context, return a STRING, which then will be
570	# subject to splitting.
571
572	if op_id in (Id.VSub_At, Id.VSub_Star):
573	argv = self.mem.GetArgv()
574	val = value.BashArray(argv) # type: value_t
575	if op_id == Id.VSub_At:
576	# "$@" evaluates to an array, $@ should be decayed
577	vsub_state.join_array = not quoted
578	else: # $* "$*" are both decayed
579	vsub_state.join_array = True
580
581	elif op_id == Id.VSub_Hyphen:
582	val = value.Str(_GetDollarHyphen(self.exec_opts))
583
584	else:
585	val = self.mem.GetSpecialVar(op_id)
586
587	return val
588
589	def _ApplyTestOp(
590	self,
591	val, # type: value_t
592	op, # type: suffix_op.Unary
593	quoted, # type: bool
594	part_vals, # type: Optional[List[part_value_t]]
595	vtest_place, # type: VTestPlace
596	blame_token, # type: Token
597	):
598	# type: (...) -> bool
599	"""
600	Returns:
601	Whether part_vals was mutated
602
603	${a:-} returns part_value[]
604	${a:+} returns part_value[]
605	${a:?error} returns error word?
606	${a:=} returns part_value[] but also needs self.mem for side effects.
607
608	So I guess it should return part_value[], and then a flag for raising an
609	error, and then a flag for assigning it?
610	The original BracedVarSub will have the name.
611
612	Example of needing multiple part_value[]
613
614	echo X-${a:-'def'"ault"}-X
615
616	We return two part values from the BracedVarSub. Also consider:
617
618	echo ${a:-x"$@"x}
619	"""
620	eval_flags = IS_SUBST
621	if quoted:
622	eval_flags \|= QUOTED
623
624	tok = op.op
625	# NOTE: Splicing part_values is necessary because of code like
626	# ${undef:-'a b' c 'd # e'}. Each part_value can have a different
627	# do_glob/do_elide setting.
628	UP_val = val
629	with tagswitch(val) as case:
630	if case(value_e.Undef):
631	is_falsey = True
632
633	elif case(value_e.Str):
634	val = cast(value.Str, UP_val)
635	if tok.id in (Id.VTest_ColonHyphen, Id.VTest_ColonEquals,
636	Id.VTest_ColonQMark, Id.VTest_ColonPlus):
637	is_falsey = len(val.s) == 0
638	else:
639	is_falsey = False
640
641	elif case(value_e.BashArray):
642	val = cast(value.BashArray, UP_val)
643	# TODO: allow undefined
644	is_falsey = len(val.strs) == 0
645
646	elif case(value_e.BashAssoc):
647	val = cast(value.BashAssoc, UP_val)
648	is_falsey = len(val.d) == 0
649
650	else:
651	# value.Eggex, etc. are all false
652	is_falsey = False
653
654	if tok.id in (Id.VTest_ColonHyphen, Id.VTest_Hyphen):
655	if is_falsey:
656	self._EvalRhsWordToParts(op.arg_word, part_vals, eval_flags)
657	return True
658	else:
659	return False
660
661	# Inverse of the above.
662	elif tok.id in (Id.VTest_ColonPlus, Id.VTest_Plus):
663	if is_falsey:
664	return False
665	else:
666	self._EvalRhsWordToParts(op.arg_word, part_vals, eval_flags)
667	return True
668
669	# Splice and assign
670	elif tok.id in (Id.VTest_ColonEquals, Id.VTest_Equals):
671	if is_falsey:
672	# Collect new part vals.
673	assign_part_vals = [] # type: List[part_value_t]
674	self._EvalRhsWordToParts(op.arg_word, assign_part_vals,
675	eval_flags)
676	# Append them to out param AND return them.
677	part_vals.extend(assign_part_vals)
678
679	if vtest_place.name is None:
680	# TODO: error context
681	e_die("Can't assign to special variable")
682	else:
683	# NOTE: This decays arrays too! 'shopt -s strict_array' could
684	# avoid it.
685	rhs_str = _DecayPartValuesToString(
686	assign_part_vals, self.splitter.GetJoinChar())
687	if vtest_place.index is None: # using None when no index
688	lval = location.LName(
689	vtest_place.name) # type: sh_lvalue_t
690	else:
691	var_name = vtest_place.name
692	var_index = vtest_place.index
693	UP_var_index = var_index
694
695	with tagswitch(var_index) as case:
696	if case(a_index_e.Int):
697	var_index = cast(a_index.Int, UP_var_index)
698	lval = sh_lvalue.Indexed(
699	var_name, var_index.i, loc.Missing)
700	elif case(a_index_e.Str):
701	var_index = cast(a_index.Str, UP_var_index)
702	lval = sh_lvalue.Keyed(var_name, var_index.s,
703	loc.Missing)
704	else:
705	raise AssertionError()
706
707	state.OshLanguageSetValue(self.mem, lval,
708	value.Str(rhs_str))
709	return True
710
711	else:
712	return False
713
714	elif tok.id in (Id.VTest_ColonQMark, Id.VTest_QMark):
715	if is_falsey:
716	# The arg is the error message
717	error_part_vals = [] # type: List[part_value_t]
718	self._EvalRhsWordToParts(op.arg_word, error_part_vals,
719	eval_flags)
720	error_str = _DecayPartValuesToString(
721	error_part_vals, self.splitter.GetJoinChar())
722
723	#
724	# Display fancy/helpful error
725	#
726	if vtest_place.name is None:
727	var_name = '???'
728	else:
729	var_name = vtest_place.name
730
731	if 0:
732	# This hint is nice, but looks too noisy for now
733	op_str = lexer.LazyStr(tok)
734	if tok.id == Id.VTest_ColonQMark:
735	why = 'empty or unset'
736	else:
737	why = 'unset'
738
739	self.errfmt.Print_(
740	"Hint: operator %s means a variable can't be %s" %
741	(op_str, why), tok)
742
743	if val.tag() == value_e.Undef:
744	actual = 'unset'
745	else:
746	actual = 'empty'
747
748	if len(error_str):
749	suffix = ': %r' % error_str
750	else:
751	suffix = ''
752	e_die("Var %s is %s%s" % (var_name, actual, suffix),
753	blame_token)
754
755	else:
756	return False
757
758	else:
759	raise AssertionError(tok.id)
760
761	def _Length(self, val, token):
762	# type: (value_t, Token) -> int
763	"""Returns the length of the value, for ${#var}"""
764	UP_val = val
765	with tagswitch(val) as case:
766	if case(value_e.Str):
767	val = cast(value.Str, UP_val)
768	# NOTE: Whether bash counts bytes or chars is affected by LANG
769	# environment variables.
770	# Should we respect that, or another way to select? set -o
771	# count-bytes?
772
773	# https://stackoverflow.com/questions/17368067/length-of-string-in-bash
774	try:
775	length = string_ops.CountUtf8Chars(val.s)
776	except error.Strict as e:
777	# Add this here so we don't have to add it so far down the stack.
778	# TODO: It's better to show BOTH this CODE an the actual DATA
779	# somehow.
780	e.location = token
781
782	if self.exec_opts.strict_word_eval():
783	raise
784	else:
785	# NOTE: Doesn't make the command exit with 1; it just returns a
786	# length of -1.
787	self.errfmt.PrettyPrintError(e, prefix='warning: ')
788	return -1
789
790	elif case(value_e.BashArray):
791	val = cast(value.BashArray, UP_val)
792	# There can be empty placeholder values in the array.
793	length = 0
794	for s in val.strs:
795	if s is not None:
796	length += 1
797
798	elif case(value_e.BashAssoc):
799	val = cast(value.BashAssoc, UP_val)
800	length = len(val.d)
801
802	else:
803	raise error.TypeErr(
804	val, "Length op expected Str, BashArray, BashAssoc", token)
805
806	return length
807
808	def _Keys(self, val, token):
809	# type: (value_t, Token) -> value_t
810	"""Return keys of a container, for ${!array[@]}"""
811
812	UP_val = val
813	with tagswitch(val) as case:
814	if case(value_e.BashArray):
815	val = cast(value.BashArray, UP_val)
816	# translation issue: tuple indices not supported in list comprehensions
817	#indices = [str(i) for i, s in enumerate(val.strs) if s is not None]
818	indices = [] # type: List[str]
819	for i, s in enumerate(val.strs):
820	if s is not None:
821	indices.append(str(i))
822	return value.BashArray(indices)
823
824	elif case(value_e.BashAssoc):
825	val = cast(value.BashAssoc, UP_val)
826	assert val.d is not None # for MyPy, so it's not Optional[]
827
828	# BUG: Keys aren't ordered according to insertion!
829	return value.BashArray(val.d.keys())
830
831	else:
832	raise error.TypeErr(val, 'Keys op expected Str', token)
833
834	def _EvalVarRef(self, val, blame_tok, quoted, vsub_state, vtest_place):
835	# type: (value_t, Token, bool, VarSubState, VTestPlace) -> value_t
836	"""Handles indirect expansion like ${!var} and ${!a[0]}.
837
838	Args:
839	blame_tok: 'foo' for ${!foo}
840	"""
841	UP_val = val
842	with tagswitch(val) as case:
843	if case(value_e.Undef):
844	return value.Undef # ${!undef} is just weird bash behavior
845
846	elif case(value_e.Str):
847	val = cast(value.Str, UP_val)
848	bvs_part = self.unsafe_arith.ParseVarRef(val.s, blame_tok)
849	return self._VarRefValue(bvs_part, quoted, vsub_state,
850	vtest_place)
851
852	elif case(value_e.BashArray): # caught earlier but OK
853	e_die('Indirect expansion of array')
854
855	elif case(value_e.BashAssoc): # caught earlier but OK
856	e_die('Indirect expansion of assoc array')
857
858	else:
859	raise error.TypeErr(val, 'Var Ref op expected Str', blame_tok)
860
861	def _ApplyUnarySuffixOp(self, val, op):
862	# type: (value_t, suffix_op.Unary) -> value_t
863	assert val.tag() != value_e.Undef
864
865	op_kind = consts.GetKind(op.op.id)
866
867	if op_kind == Kind.VOp1:
868	# NOTE: glob syntax is supported in ^ ^^ , ,, ! As well as % %% # ##.
869	# Detect has_extglob so that DoUnarySuffixOp doesn't use the fast
870	# shortcut for constant strings.
871	arg_val, has_extglob = self.EvalWordToPattern(op.arg_word)
872	assert arg_val.tag() == value_e.Str
873
874	UP_val = val
875	with tagswitch(val) as case:
876	if case(value_e.Str):
877	val = cast(value.Str, UP_val)
878	s = string_ops.DoUnarySuffixOp(val.s, op.op, arg_val.s,
879	has_extglob)
880	#log('%r %r -> %r', val.s, arg_val.s, s)
881	new_val = value.Str(s) # type: value_t
882
883	elif case(value_e.BashArray):
884	val = cast(value.BashArray, UP_val)
885	# ${a[@]#prefix} is VECTORIZED on arrays. YSH should have this too.
886	strs = [] # type: List[str]
887	for s in val.strs:
888	if s is not None:
889	strs.append(
890	string_ops.DoUnarySuffixOp(
891	s, op.op, arg_val.s, has_extglob))
892	new_val = value.BashArray(strs)
893
894	elif case(value_e.BashAssoc):
895	val = cast(value.BashAssoc, UP_val)
896	strs = []
897	for s in val.d.values():
898	strs.append(
899	string_ops.DoUnarySuffixOp(s, op.op, arg_val.s,
900	has_extglob))
901	new_val = value.BashArray(strs)
902
903	else:
904	raise error.TypeErr(
905	val, 'Unary op expected Str, BashArray, BashAssoc',
906	op.op)
907
908	else:
909	raise AssertionError(Kind_str(op_kind))
910
911	return new_val
912
913	def _PatSub(self, val, op):
914	# type: (value_t, suffix_op.PatSub) -> value_t
915
916	pat_val, has_extglob = self.EvalWordToPattern(op.pat)
917	# Extended globs aren't supported because we only translate * ? etc. to
918	# ERE. I don't think there's a straightforward translation from !(*.py) to
919	# ERE! You would need an engine that supports negation? (Derivatives?)
920	if has_extglob:
921	e_die('extended globs not supported in ${x//GLOB/}', op.pat)
922
923	if op.replace:
924	replace_val = self.EvalRhsWord(op.replace)
925	# Can't have an array, so must be a string
926	assert replace_val.tag() == value_e.Str, replace_val
927	replace_str = cast(value.Str, replace_val).s
928	else:
929	replace_str = ''
930
931	# note: doesn't support self.exec_opts.extglob()!
932	regex, warnings = glob_.GlobToERE(pat_val.s)
933	if len(warnings):
934	# TODO:
935	# - Add 'shopt -s strict_glob' mode and expose warnings.
936	# "Glob is not in CANONICAL FORM".
937	# - Propagate location info back to the 'op.pat' word.
938	pass
939	#log('regex %r', regex)
940	replacer = string_ops.GlobReplacer(regex, replace_str, op.slash_tok)
941
942	with tagswitch(val) as case2:
943	if case2(value_e.Str):
944	str_val = cast(value.Str, val)
945	s = replacer.Replace(str_val.s, op)
946	val = value.Str(s)
947
948	elif case2(value_e.BashArray):
949	array_val = cast(value.BashArray, val)
950	strs = [] # type: List[str]
951	for s in array_val.strs:
952	if s is not None:
953	strs.append(replacer.Replace(s, op))
954	val = value.BashArray(strs)
955
956	elif case2(value_e.BashAssoc):
957	assoc_val = cast(value.BashAssoc, val)
958	strs = []
959	for s in assoc_val.d.values():
960	strs.append(replacer.Replace(s, op))
961	val = value.BashArray(strs)
962
963	else:
964	raise error.TypeErr(
965	val, 'Pat Sub op expected Str, BashArray, BashAssoc',
966	op.slash_tok)
967
968	return val
969
970	def _Slice(self, val, op, var_name, part):
971	# type: (value_t, suffix_op.Slice, Optional[str], BracedVarSub) -> value_t
972
973	begin = self.arith_ev.EvalToInt(op.begin)
974
975	# Note: bash allows lengths to be negative (with odd semantics), but
976	# we don't allow that right now.
977	has_length = False
978	length = -1
979	if op.length:
980	has_length = True
981	length = self.arith_ev.EvalToInt(op.length)
982
983	try:
984	arg0_val = None # type: value.Str
985	if var_name is None: # $* or $@
986	arg0_val = self.mem.GetArg0()
987	val = _PerformSlice(val, begin, length, has_length, part, arg0_val)
988	except error.Strict as e:
989	if self.exec_opts.strict_word_eval():
990	raise
991	else:
992	self.errfmt.PrettyPrintError(e, prefix='warning: ')
993	with tagswitch(val) as case2:
994	if case2(value_e.Str):
995	val = value.Str('')
996	elif case2(value_e.BashArray):
997	val = value.BashArray([])
998	else:
999	raise NotImplementedError()
1000	return val
1001
1002	def _Nullary(self, val, op, var_name):
1003	# type: (value_t, Token, Optional[str]) -> Tuple[value.Str, bool]
1004
1005	UP_val = val
1006	quoted2 = False
1007	op_id = op.id
1008	if op_id == Id.VOp0_P:
1009	with tagswitch(val) as case:
1010	if case(value_e.Str):
1011	str_val = cast(value.Str, UP_val)
1012	prompt = self.prompt_ev.EvalPrompt(str_val)
1013	# readline gets rid of these, so we should too.
1014	p = prompt.replace('\x01', '').replace('\x02', '')
1015	result = value.Str(p)
1016	else:
1017	e_die("Can't use @P on %s" % ui.ValType(val), op)
1018
1019	elif op_id == Id.VOp0_Q:
1020	with tagswitch(val) as case:
1021	if case(value_e.Str):
1022	str_val = cast(value.Str, UP_val)
1023	result = value.Str(j8_lite.MaybeShellEncode(str_val.s))
1024	# oddly, 'echo ${x@Q}' is equivalent to 'echo "${x@Q}"' in
1025	# bash
1026	quoted2 = True
1027	elif case(value_e.BashArray):
1028	array_val = cast(value.BashArray, UP_val)
1029
1030	# TODO: should use fastfunc.ShellEncode
1031	tmp = [j8_lite.MaybeShellEncode(s) for s in array_val.strs]
1032	result = value.Str(' '.join(tmp))
1033	else:
1034	e_die("Can't use @Q on %s" % ui.ValType(val), op)
1035
1036	elif op_id == Id.VOp0_a:
1037	# We're ONLY simluating -a and -A, not -r -x -n for now. See
1038	# spec/ble-idioms.test.sh.
1039	chars = [] # type: List[str]
1040	with tagswitch(val) as case:
1041	if case(value_e.BashArray):
1042	chars.append('a')
1043	elif case(value_e.BashAssoc):
1044	chars.append('A')
1045
1046	if var_name is not None: # e.g. ${?@a} is allowed
1047	cell = self.mem.GetCell(var_name)
1048	if cell:
1049	if cell.readonly:
1050	chars.append('r')
1051	if cell.exported:
1052	chars.append('x')
1053	if cell.nameref:
1054	chars.append('n')
1055
1056	result = value.Str(''.join(chars))
1057
1058	else:
1059	e_die('Var op %r not implemented' % lexer.TokenVal(op), op)
1060
1061	return result, quoted2
1062
1063	def _WholeArray(self, val, part, quoted, vsub_state):
1064	# type: (value_t, BracedVarSub, bool, VarSubState) -> value_t
1065	op_id = cast(bracket_op.WholeArray, part.bracket_op).op_id
1066
1067	if op_id == Id.Lit_At:
1068	vsub_state.join_array = not quoted # ${a[@]} decays but "${a[@]}" doesn't
1069	UP_val = val
1070	with tagswitch(val) as case2:
1071	if case2(value_e.Undef):
1072	if not vsub_state.has_test_op:
1073	val = self._EmptyBashArrayOrError(part.token)
1074	elif case2(value_e.Str):
1075	if self.exec_opts.strict_array():
1076	e_die("Can't index string with @", loc.WordPart(part))
1077	elif case2(value_e.BashArray):
1078	pass # no-op
1079
1080	elif op_id == Id.Arith_Star:
1081	vsub_state.join_array = True # both ${a[]} and "${a[]}" decay
1082	UP_val = val
1083	with tagswitch(val) as case2:
1084	if case2(value_e.Undef):
1085	if not vsub_state.has_test_op:
1086	val = self._EmptyBashArrayOrError(part.token)
1087	elif case2(value_e.Str):
1088	if self.exec_opts.strict_array():
1089	e_die("Can't index string with *", loc.WordPart(part))
1090	elif case2(value_e.BashArray):
1091	pass # no-op
1092
1093	else:
1094	raise AssertionError(op_id) # unknown
1095
1096	return val
1097
1098	def _ArrayIndex(self, val, part, vtest_place):
1099	# type: (value_t, BracedVarSub, VTestPlace) -> value_t
1100	"""Process a numeric array index like ${a[i+1]}"""
1101	anode = cast(bracket_op.ArrayIndex, part.bracket_op).expr
1102
1103	UP_val = val
1104	with tagswitch(val) as case2:
1105	if case2(value_e.Undef):
1106	pass # it will be checked later
1107
1108	elif case2(value_e.Str):
1109	# Bash treats any string as an array, so we can't add our own
1110	# behavior here without making valid OSH invalid bash.
1111	e_die("Can't index string %r with integer" % part.var_name,
1112	part.token)
1113
1114	elif case2(value_e.BashArray):
1115	array_val = cast(value.BashArray, UP_val)
1116	index = self.arith_ev.EvalToInt(anode)
1117	vtest_place.index = a_index.Int(index)
1118
1119	s = GetArrayItem(array_val.strs, index)
1120
1121	if s is None:
1122	val = value.Undef
1123	else:
1124	val = value.Str(s)
1125
1126	elif case2(value_e.BashAssoc):
1127	assoc_val = cast(value.BashAssoc, UP_val)
1128	# Location could also be attached to bracket_op? But
1129	# arith_expr.VarSub works OK too
1130	key = self.arith_ev.EvalWordToString(
1131	anode, blame_loc=location.TokenForArith(anode))
1132
1133	vtest_place.index = a_index.Str(key) # out param
1134	s = assoc_val.d.get(key)
1135
1136	if s is None:
1137	val = value.Undef
1138	else:
1139	val = value.Str(s)
1140
1141	else:
1142	raise error.TypeErr(val,
1143	'Index op expected BashArray, BashAssoc',
1144	loc.WordPart(part))
1145
1146	return val
1147
1148	def _EvalDoubleQuoted(self, parts, part_vals):
1149	# type: (List[word_part_t], List[part_value_t]) -> None
1150	"""Evaluate parts of a DoubleQuoted part.
1151
1152	Args:
1153	part_vals: output param to append to.
1154	"""
1155	# Example of returning array:
1156	# $ a=(1 2); b=(3); $ c=(4 5)
1157	# $ argv "${a[@]}${b[@]}${c[@]}"
1158	# ['1', '234', '5']
1159	#
1160	# Example of multiple parts
1161	# $ argv "${a[@]}${undef[@]:-${c[@]}}"
1162	# ['1', '24', '5']
1163
1164	# Special case for "". The parser outputs (DoubleQuoted []), instead
1165	# of (DoubleQuoted [Literal '']). This is better but it means we
1166	# have to check for it.
1167	if len(parts) == 0:
1168	v = Piece('', True, False)
1169	part_vals.append(v)
1170	return
1171
1172	for p in parts:
1173	self._EvalWordPart(p, part_vals, QUOTED)
1174
1175	def EvalDoubleQuotedToString(self, dq_part):
1176	# type: (DoubleQuoted) -> str
1177	"""For double quoted strings in YSH expressions.
1178
1179	Example: var x = "$foo-${foo}"
1180	"""
1181	part_vals = [] # type: List[part_value_t]
1182	self._EvalDoubleQuoted(dq_part.parts, part_vals)
1183	return self._ConcatPartVals(part_vals, dq_part.left)
1184
1185	def _DecayArray(self, val):
1186	# type: (value.BashArray) -> value.Str
1187	"""Decay $* to a string."""
1188	assert val.tag() == value_e.BashArray, val
1189	sep = self.splitter.GetJoinChar()
1190	tmp = [s for s in val.strs if s is not None]
1191	return value.Str(sep.join(tmp))
1192
1193	def _EmptyStrOrError(self, val, token):
1194	# type: (value_t, Token) -> value_t
1195	if val.tag() != value_e.Undef:
1196	return val
1197
1198	if not self.exec_opts.nounset():
1199	return value.Str('')
1200
1201	tok_str = lexer.TokenVal(token)
1202	name = tok_str[1:] if tok_str.startswith('$') else tok_str
1203	e_die('Undefined variable %r' % name, token)
1204
1205	def _EmptyBashArrayOrError(self, token):
1206	# type: (Token) -> value_t
1207	assert token is not None
1208	if self.exec_opts.nounset():
1209	e_die('Undefined array %r' % lexer.TokenVal(token), token)
1210	else:
1211	return value.BashArray([])
1212
1213	def _EvalBracketOp(self, val, part, quoted, vsub_state, vtest_place):
1214	# type: (value_t, BracedVarSub, bool, VarSubState, VTestPlace) -> value_t
1215
1216	if part.bracket_op:
1217	with tagswitch(part.bracket_op) as case:
1218	if case(bracket_op_e.WholeArray):
1219	val = self._WholeArray(val, part, quoted, vsub_state)
1220
1221	elif case(bracket_op_e.ArrayIndex):
1222	val = self._ArrayIndex(val, part, vtest_place)
1223
1224	else:
1225	raise AssertionError(part.bracket_op.tag())
1226
1227	else: # no bracket op
1228	var_name = vtest_place.name
1229	if (var_name is not None and
1230	val.tag() in (value_e.BashArray, value_e.BashAssoc) and
1231	not vsub_state.is_type_query):
1232	if ShouldArrayDecay(var_name, self.exec_opts,
1233	not (part.prefix_op or part.suffix_op)):
1234	# for ${BASH_SOURCE}, etc.
1235	val = DecayArray(val)
1236	else:
1237	e_die(
1238	"Array %r can't be referred to as a scalar (without @ or *)"
1239	% var_name, loc.WordPart(part))
1240
1241	return val
1242
1243	def _VarRefValue(self, part, quoted, vsub_state, vtest_place):
1244	# type: (BracedVarSub, bool, VarSubState, VTestPlace) -> value_t
1245	"""Duplicates some logic from _EvalBracedVarSub, but returns a
1246	value_t."""
1247
1248	# 1. Evaluate from (var_name, var_num, token Id) -> value
1249	if part.token.id == Id.VSub_Name:
1250	vtest_place.name = part.var_name
1251	val = self.mem.GetValue(part.var_name)
1252
1253	elif part.token.id == Id.VSub_Number:
1254	var_num = int(part.var_name)
1255	val = self._EvalVarNum(var_num)
1256
1257	else:
1258	# $* decays
1259	val = self._EvalSpecialVar(part.token.id, quoted, vsub_state)
1260
1261	# We don't need var_index because it's only for L-Values of test ops?
1262	if self.exec_opts.eval_unsafe_arith():
1263	val = self._EvalBracketOp(val, part, quoted, vsub_state,
1264	vtest_place)
1265	else:
1266	with state.ctx_Option(self.mutable_opts,
1267	[option_i._allow_command_sub], False):
1268	val = self._EvalBracketOp(val, part, quoted, vsub_state,
1269	vtest_place)
1270
1271	return val
1272
1273	def _EvalBracedVarSub(self, part, part_vals, quoted):
1274	# type: (BracedVarSub, List[part_value_t], bool) -> None
1275	"""
1276	Args:
1277	part_vals: output param to append to.
1278	"""
1279	# We have different operators that interact in a non-obvious order.
1280	#
1281	# 1. bracket_op: value -> value, with side effect on vsub_state
1282	#
1283	# 2. prefix_op
1284	# a. length ${#x}: value -> value
1285	# b. var ref ${!ref}: can expand to an array
1286	#
1287	# 3. suffix_op:
1288	# a. no operator: you have a value
1289	# b. Test: value -> part_value[]
1290	# c. Other Suffix: value -> value
1291	#
1292	# 4. Process vsub_state.join_array here before returning.
1293	#
1294	# These cases are hard to distinguish:
1295	# - ${!prefix@} prefix query
1296	# - ${!array[@]} keys
1297	# - ${!ref} named reference
1298	# - ${!ref[0]} named reference
1299	#
1300	# I think we need several stages:
1301	#
1302	# 1. value: name, number, special, prefix query
1303	# 2. bracket_op
1304	# 3. prefix length -- this is TERMINAL
1305	# 4. indirection? Only for some of the ! cases
1306	# 5. string transformation suffix ops like ##
1307	# 6. test op
1308	# 7. vsub_state.join_array
1309
1310	# vsub_state.join_array is for joining "${a[*]}" and unquoted ${a[@]} AFTER
1311	# suffix ops are applied. If we take the length with a prefix op, the
1312	# distinction is ignored.
1313
1314	var_name = None # type: Optional[str] # used throughout the function
1315	vtest_place = VTestPlace(var_name, None) # For ${foo=default}
1316	vsub_state = VarSubState.CreateNull() # for $, ${a[]}, etc.
1317
1318	# 1. Evaluate from (var_name, var_num, token Id) -> value
1319	if part.token.id == Id.VSub_Name:
1320	# Handle ${!prefix@} first, since that looks at names and not values
1321	# Do NOT handle ${!A[@]@a} here!
1322	if (part.prefix_op is not None and part.bracket_op is None and
1323	part.suffix_op is not None and
1324	part.suffix_op.tag() == suffix_op_e.Nullary):
1325	nullary_op = cast(Token, part.suffix_op)
1326	# ${!x@} but not ${!x@P}
1327	if consts.GetKind(nullary_op.id) == Kind.VOp3:
1328	names = self.mem.VarNamesStartingWith(part.var_name)
1329	names.sort()
1330
1331	if quoted and nullary_op.id == Id.VOp3_At:
1332	part_vals.append(part_value.Array(names))
1333	else:
1334	sep = self.splitter.GetJoinChar()
1335	part_vals.append(Piece(sep.join(names), quoted, True))
1336	return # EARLY RETURN
1337
1338	var_name = part.var_name
1339	vtest_place.name = var_name # for _ApplyTestOp
1340
1341	val = self.mem.GetValue(var_name)
1342
1343	elif part.token.id == Id.VSub_Number:
1344	var_num = int(part.var_name)
1345	val = self._EvalVarNum(var_num)
1346	else:
1347	# $* decays
1348	val = self._EvalSpecialVar(part.token.id, quoted, vsub_state)
1349
1350	suffix_op_ = part.suffix_op
1351	if suffix_op_:
1352	UP_op = suffix_op_
1353	with tagswitch(suffix_op_) as case:
1354	if case(suffix_op_e.Nullary):
1355	suffix_op_ = cast(Token, UP_op)
1356
1357	# Type query ${array@a} is a STRING, not an array
1358	# NOTE: ${array@Q} is ${array[0]@Q} in bash, which is different than
1359	# ${array[@]@Q}
1360	if suffix_op_.id == Id.VOp0_a:
1361	vsub_state.is_type_query = True
1362
1363	elif case(suffix_op_e.Unary):
1364	suffix_op_ = cast(suffix_op.Unary, UP_op)
1365
1366	# Do the _EmptyStrOrError/_EmptyBashArrayOrError up front, EXCEPT in
1367	# the case of Kind.VTest
1368	if consts.GetKind(suffix_op_.op.id) == Kind.VTest:
1369	vsub_state.has_test_op = True
1370
1371	# 2. Bracket Op
1372	val = self._EvalBracketOp(val, part, quoted, vsub_state, vtest_place)
1373
1374	if part.prefix_op:
1375	if part.prefix_op.id == Id.VSub_Pound: # ${#var} for length
1376	if not vsub_state.has_test_op: # undef -> '' BEFORE length
1377	val = self._EmptyStrOrError(val, part.token)
1378
1379	n = self._Length(val, part.token)
1380	part_vals.append(Piece(str(n), quoted, False))
1381	return # EARLY EXIT: nothing else can come after length
1382
1383	elif part.prefix_op.id == Id.VSub_Bang:
1384	if (part.bracket_op and
1385	part.bracket_op.tag() == bracket_op_e.WholeArray):
1386	if vsub_state.has_test_op:
1387	# ${!a[@]-'default'} is a non-fatal runtime error in bash. Here
1388	# it's fatal.
1389	op_tok = cast(suffix_op.Unary, UP_op).op
1390	e_die('Test operation not allowed with ${!array[@]}',
1391	op_tok)
1392
1393	# ${!array[@]} to get indices/keys
1394	val = self._Keys(val, part.token)
1395	# already set vsub_State.join_array ABOVE
1396	else:
1397	# Process ${!ref}. SURPRISE: ${!a[0]} is an indirect expansion unlike
1398	# ${!a[@]} !
1399	# ${!ref} can expand into an array if ref='array[@]'
1400
1401	# Clear it now that we have a var ref
1402	vtest_place.name = None
1403	vtest_place.index = None
1404
1405	val = self._EvalVarRef(val, part.token, quoted, vsub_state,
1406	vtest_place)
1407
1408	if not vsub_state.has_test_op: # undef -> '' AFTER indirection
1409	val = self._EmptyStrOrError(val, part.token)
1410
1411	else:
1412	raise AssertionError(part.prefix_op)
1413
1414	else:
1415	if not vsub_state.has_test_op: # undef -> '' if no prefix op
1416	val = self._EmptyStrOrError(val, part.token)
1417
1418	quoted2 = False # another bit for @Q
1419	if suffix_op_:
1420	op = suffix_op_ # could get rid of this alias
1421
1422	with tagswitch(suffix_op_) as case:
1423	if case(suffix_op_e.Nullary):
1424	op = cast(Token, UP_op)
1425	val, quoted2 = self._Nullary(val, op, var_name)
1426
1427	elif case(suffix_op_e.Unary):
1428	op = cast(suffix_op.Unary, UP_op)
1429	if consts.GetKind(op.op.id) == Kind.VTest:
1430	if self._ApplyTestOp(val, op, quoted, part_vals,
1431	vtest_place, part.token):
1432	# e.g. to evaluate ${undef:-'default'}, we already appended
1433	# what we need
1434	return
1435
1436	else:
1437	# Other suffix: value -> value
1438	val = self._ApplyUnarySuffixOp(val, op)
1439
1440	elif case(suffix_op_e.PatSub): # PatSub, vectorized
1441	op = cast(suffix_op.PatSub, UP_op)
1442	val = self._PatSub(val, op)
1443
1444	elif case(suffix_op_e.Slice):
1445	op = cast(suffix_op.Slice, UP_op)
1446	val = self._Slice(val, op, var_name, part)
1447
1448	elif case(suffix_op_e.Static):
1449	op = cast(suffix_op.Static, UP_op)
1450	e_die('Not implemented', op.tok)
1451
1452	else:
1453	raise AssertionError()
1454
1455	# After applying suffixes, process join_array here.
1456	UP_val = val
1457	if val.tag() == value_e.BashArray:
1458	array_val = cast(value.BashArray, UP_val)
1459	if vsub_state.join_array:
1460	val = self._DecayArray(array_val)
1461	else:
1462	val = array_val
1463
1464	# For example, ${a} evaluates to value.Str(), but we want a
1465	# Piece().
1466	part_val = _ValueToPartValue(val, quoted or quoted2, part)
1467	part_vals.append(part_val)
1468
1469	def _ConcatPartVals(self, part_vals, location):
1470	# type: (List[part_value_t], loc_t) -> str
1471
1472	strs = [] # type: List[str]
1473	for part_val in part_vals:
1474	UP_part_val = part_val
1475	with tagswitch(part_val) as case:
1476	if case(part_value_e.String):
1477	part_val = cast(Piece, UP_part_val)
1478	s = part_val.s
1479
1480	elif case(part_value_e.Array):
1481	part_val = cast(part_value.Array, UP_part_val)
1482	if self.exec_opts.strict_array():
1483	# Examples: echo f > "$@"; local foo="$@"
1484	e_die("Illegal array word part (strict_array)",
1485	location)
1486	else:
1487	# It appears to not respect IFS
1488	# TODO: eliminate double join()?
1489	tmp = [s for s in part_val.strs if s is not None]
1490	s = ' '.join(tmp)
1491
1492	else:
1493	raise AssertionError()
1494
1495	strs.append(s)
1496
1497	return ''.join(strs)
1498
1499	def EvalBracedVarSubToString(self, part):
1500	# type: (BracedVarSub) -> str
1501	"""For double quoted strings in YSH expressions.
1502
1503	Example: var x = "$foo-${foo}"
1504	"""
1505	part_vals = [] # type: List[part_value_t]
1506	self._EvalBracedVarSub(part, part_vals, False)
1507	# blame ${ location
1508	return self._ConcatPartVals(part_vals, part.left)
1509
1510	def _EvalSimpleVarSub(self, part, part_vals, quoted):
1511	# type: (SimpleVarSub, List[part_value_t], bool) -> None
1512
1513	token = part.tok
1514
1515	vsub_state = VarSubState.CreateNull()
1516
1517	# 1. Evaluate from (var_name, var_num, Token) -> defined, value
1518	if token.id == Id.VSub_DollarName:
1519	var_name = lexer.LazyStr(token)
1520	# TODO: Special case for LINENO
1521	val = self.mem.GetValue(var_name)
1522	if val.tag() in (value_e.BashArray, value_e.BashAssoc):
1523	if ShouldArrayDecay(var_name, self.exec_opts):
1524	# for $BASH_SOURCE, etc.
1525	val = DecayArray(val)
1526	else:
1527	e_die(
1528	"Array %r can't be referred to as a scalar (without @ or *)"
1529	% var_name, token)
1530
1531	elif token.id == Id.VSub_Number:
1532	var_num = int(lexer.LazyStr(token))
1533	val = self._EvalVarNum(var_num)
1534
1535	else:
1536	val = self._EvalSpecialVar(token.id, quoted, vsub_state)
1537
1538	#log('SIMPLE %s', part)
1539	val = self._EmptyStrOrError(val, token)
1540	UP_val = val
1541	if val.tag() == value_e.BashArray:
1542	array_val = cast(value.BashArray, UP_val)
1543	if vsub_state.join_array:
1544	val = self._DecayArray(array_val)
1545	else:
1546	val = array_val
1547
1548	v = _ValueToPartValue(val, quoted, part)
1549	part_vals.append(v)
1550
1551	def EvalSimpleVarSubToString(self, node):
1552	# type: (SimpleVarSub) -> str
1553	"""For double quoted strings in YSH expressions.
1554
1555	Example: var x = "$foo-${foo}"
1556	"""
1557	part_vals = [] # type: List[part_value_t]
1558	self._EvalSimpleVarSub(node, part_vals, False)
1559	return self._ConcatPartVals(part_vals, node.tok)
1560
1561	def _EvalExtGlob(self, part, part_vals):
1562	# type: (word_part.ExtGlob, List[part_value_t]) -> None
1563	"""Evaluate @($x\|'foo'\|$(hostname)) and flatten it."""
1564	op = part.op
1565	if op.id == Id.ExtGlob_Comma:
1566	op_str = '@('
1567	else:
1568	op_str = lexer.LazyStr(op)
1569	# Do NOT split these.
1570	part_vals.append(Piece(op_str, False, False))
1571
1572	for i, w in enumerate(part.arms):
1573	if i != 0:
1574	part_vals.append(Piece('\|', False, False)) # separator
1575	# FLATTEN the tree of extglob "arms".
1576	self._EvalWordToParts(w, part_vals, EXTGLOB_NESTED)
1577	part_vals.append(Piece(')', False, False)) # closing )
1578
1579	def _TranslateExtGlob(self, part_vals, w, glob_parts, fnmatch_parts):
1580	# type: (List[part_value_t], CompoundWord, List[str], List[str]) -> None
1581	"""Translate a flattened WORD with an ExtGlob part to string patterns.
1582
1583	We need both glob and fnmatch patterns. _EvalExtGlob does the
1584	flattening.
1585	"""
1586	for i, part_val in enumerate(part_vals):
1587	UP_part_val = part_val
1588	with tagswitch(part_val) as case:
1589	if case(part_value_e.String):
1590	part_val = cast(Piece, UP_part_val)
1591	if part_val.quoted and not self.exec_opts.noglob():
1592	s = glob_.GlobEscape(part_val.s)
1593	else:
1594	# e.g. the @( and \| in @(foo\|bar) aren't quoted
1595	s = part_val.s
1596	glob_parts.append(s)
1597	fnmatch_parts.append(s) # from _EvalExtGlob()
1598
1599	elif case(part_value_e.Array):
1600	# Disallow array
1601	e_die(
1602	"Extended globs and arrays can't appear in the same word",
1603	w)
1604
1605	elif case(part_value_e.ExtGlob):
1606	part_val = cast(part_value.ExtGlob, UP_part_val)
1607	# keep appending fnmatch_parts, but repplace glob_parts with '*'
1608	self._TranslateExtGlob(part_val.part_vals, w, [],
1609	fnmatch_parts)
1610	glob_parts.append('*')
1611
1612	else:
1613	raise AssertionError()
1614
1615	def _EvalWordPart(self, part, part_vals, flags):
1616	# type: (word_part_t, List[part_value_t], int) -> None
1617	"""Evaluate a word part, appending to part_vals
1618
1619	Called by _EvalWordToParts, EvalWordToString, and _EvalDoubleQuoted.
1620	"""
1621	quoted = bool(flags & QUOTED)
1622	is_subst = bool(flags & IS_SUBST)
1623
1624	UP_part = part
1625	with tagswitch(part) as case:
1626	if case(word_part_e.ShArrayLiteral):
1627	part = cast(ShArrayLiteral, UP_part)
1628	e_die("Unexpected array literal", loc.WordPart(part))
1629	elif case(word_part_e.BashAssocLiteral):
1630	part = cast(word_part.BashAssocLiteral, UP_part)
1631	e_die("Unexpected associative array literal",
1632	loc.WordPart(part))
1633
1634	elif case(word_part_e.Literal):
1635	part = cast(Token, UP_part)
1636	# Split if it's in a substitution.
1637	# That is: echo is not split, but ${foo:-echo} is split
1638	v = Piece(lexer.LazyStr(part), quoted, is_subst)
1639	part_vals.append(v)
1640
1641	elif case(word_part_e.EscapedLiteral):
1642	part = cast(word_part.EscapedLiteral, UP_part)
1643	v = Piece(part.ch, True, False)
1644	part_vals.append(v)
1645
1646	elif case(word_part_e.SingleQuoted):
1647	part = cast(SingleQuoted, UP_part)
1648	v = Piece(part.sval, True, False)
1649	part_vals.append(v)
1650
1651	elif case(word_part_e.DoubleQuoted):
1652	part = cast(DoubleQuoted, UP_part)
1653	self._EvalDoubleQuoted(part.parts, part_vals)
1654
1655	elif case(word_part_e.CommandSub):
1656	part = cast(CommandSub, UP_part)
1657	id_ = part.left_token.id
1658	if id_ in (Id.Left_DollarParen, Id.Left_AtParen,
1659	Id.Left_Backtick):
1660	sv = self._EvalCommandSub(part,
1661	quoted) # type: part_value_t
1662
1663	elif id_ in (Id.Left_ProcSubIn, Id.Left_ProcSubOut):
1664	sv = self._EvalProcessSub(part)
1665
1666	else:
1667	raise AssertionError(id_)
1668
1669	part_vals.append(sv)
1670
1671	elif case(word_part_e.SimpleVarSub):
1672	part = cast(SimpleVarSub, UP_part)
1673	self._EvalSimpleVarSub(part, part_vals, quoted)
1674
1675	elif case(word_part_e.BracedVarSub):
1676	part = cast(BracedVarSub, UP_part)
1677	self._EvalBracedVarSub(part, part_vals, quoted)
1678
1679	elif case(word_part_e.TildeSub):
1680	part = cast(word_part.TildeSub, UP_part)
1681	# We never parse a quoted string into a TildeSub.
1682	assert not quoted
1683	s = self.tilde_ev.Eval(part)
1684	v = Piece(s, True, False) # NOT split even when unquoted!
1685	part_vals.append(v)
1686
1687	elif case(word_part_e.ArithSub):
1688	part = cast(word_part.ArithSub, UP_part)
1689	num = self.arith_ev.EvalToBigInt(part.anode)
1690	v = Piece(mops.ToStr(num), quoted, not quoted)
1691	part_vals.append(v)
1692
1693	elif case(word_part_e.ExtGlob):
1694	part = cast(word_part.ExtGlob, UP_part)
1695	#if not self.exec_opts.extglob():
1696	# die() # disallow at runtime? Don't just decay
1697
1698	# Create a node to hold the flattened tree. The caller decides whether
1699	# to pass it to fnmatch() or replace it with '*' and pass it to glob().
1700	part_vals2 = [] # type: List[part_value_t]
1701	self._EvalExtGlob(part, part_vals2) # flattens tree
1702	part_vals.append(part_value.ExtGlob(part_vals2))
1703
1704	elif case(word_part_e.BashRegexGroup):
1705	part = cast(word_part.BashRegexGroup, UP_part)
1706
1707	part_vals.append(Piece('(', False, False)) # not quoted
1708	if part.child:
1709	self._EvalWordToParts(part.child, part_vals, 0)
1710	part_vals.append(Piece(')', False, False))
1711
1712	elif case(word_part_e.Splice):
1713	part = cast(word_part.Splice, UP_part)
1714	val = self.mem.GetValue(part.var_name)
1715
1716	strs = self.expr_ev.SpliceValue(val, part)
1717	part_vals.append(part_value.Array(strs))
1718
1719	elif case(word_part_e.ExprSub):
1720	part = cast(word_part.ExprSub, UP_part)
1721	part_val = self.expr_ev.EvalExprSub(part)
1722	part_vals.append(part_val)
1723
1724	elif case(word_part_e.ZshVarSub):
1725	part = cast(word_part.ZshVarSub, UP_part)
1726	e_die("ZSH var subs are parsed, but can't be evaluated",
1727	part.left)
1728
1729	else:
1730	raise AssertionError(part.tag())
1731
1732	def _EvalRhsWordToParts(self, w, part_vals, eval_flags=0):
1733	# type: (rhs_word_t, List[part_value_t], int) -> None
1734	quoted = bool(eval_flags & QUOTED)
1735
1736	UP_w = w
1737	with tagswitch(w) as case:
1738	if case(rhs_word_e.Empty):
1739	part_vals.append(Piece('', quoted, not quoted))
1740
1741	elif case(rhs_word_e.Compound):
1742	w = cast(CompoundWord, UP_w)
1743	self._EvalWordToParts(w, part_vals, eval_flags=eval_flags)
1744
1745	else:
1746	raise AssertionError()
1747
1748	def _EvalWordToParts(self, w, part_vals, eval_flags=0):
1749	# type: (CompoundWord, List[part_value_t], int) -> None
1750	"""Helper for EvalRhsWord, EvalWordSequence, etc.
1751
1752	Returns:
1753	Appends to part_vals. Note that this is a TREE.
1754	"""
1755	# Does the word have an extended glob? This is a special case because
1756	# of the way we use glob() and then fnmatch(..., FNM_EXTMATCH) to
1757	# implement extended globs. It's hard to carry that extra information
1758	# all the way past the word splitting stage.
1759
1760	# OSH semantic limitations: If a word has an extended glob part, then
1761	# 1. It can't have an array
1762	# 2. Word splitting of unquoted words isn't respected
1763
1764	word_part_vals = [] # type: List[part_value_t]
1765	has_extglob = False
1766	for p in w.parts:
1767	if p.tag() == word_part_e.ExtGlob:
1768	has_extglob = True
1769	self._EvalWordPart(p, word_part_vals, eval_flags)
1770
1771	# Caller REQUESTED extglob evaluation, AND we parsed word_part.ExtGlob()
1772	if has_extglob:
1773	if bool(eval_flags & EXTGLOB_FILES):
1774	# Treat the WHOLE word as a pattern. We need to TWO VARIANTS of the
1775	# word because of the way we use libc:
1776	# 1. With '*' for extglob parts
1777	# 2. With _EvalExtGlob() for extglob parts
1778
1779	glob_parts = [] # type: List[str]
1780	fnmatch_parts = [] # type: List[str]
1781	self._TranslateExtGlob(word_part_vals, w, glob_parts,
1782	fnmatch_parts)
1783
1784	#log('word_part_vals %s', word_part_vals)
1785	glob_pat = ''.join(glob_parts)
1786	fnmatch_pat = ''.join(fnmatch_parts)
1787	#log("glob %s fnmatch %s", glob_pat, fnmatch_pat)
1788
1789	results = [] # type: List[str]
1790	n = self.globber.ExpandExtended(glob_pat, fnmatch_pat, results)
1791	if n < 0:
1792	raise error.FailGlob(
1793	'Extended glob %r matched no files' % fnmatch_pat, w)
1794
1795	part_vals.append(part_value.Array(results))
1796	elif bool(eval_flags & EXTGLOB_NESTED):
1797	# We only glob at the TOP level of @(nested\|@(pattern))
1798	part_vals.extend(word_part_vals)
1799	else:
1800	# e.g. simple_word_eval, assignment builtin
1801	e_die('Extended glob not allowed in this word', w)
1802	else:
1803	part_vals.extend(word_part_vals)
1804
1805	def _PartValsToString(self, part_vals, w, eval_flags, strs):
1806	# type: (List[part_value_t], CompoundWord, int, List[str]) -> None
1807	"""Helper for EvalWordToString, similar to _ConcatPartVals() above.
1808
1809	Note: arg 'w' could just be a span ID
1810	"""
1811	for part_val in part_vals:
1812	UP_part_val = part_val
1813	with tagswitch(part_val) as case:
1814	if case(part_value_e.String):
1815	part_val = cast(Piece, UP_part_val)
1816	s = part_val.s
1817	if part_val.quoted:
1818	if eval_flags & QUOTE_FNMATCH:
1819	# [[ foo == /"".py ]] or case (.py) or ${x%.py} or ${x//*.py/}
1820	s = glob_.GlobEscape(s)
1821	elif eval_flags & QUOTE_ERE:
1822	s = glob_.ExtendedRegexEscape(s)
1823	strs.append(s)
1824
1825	elif case(part_value_e.Array):
1826	part_val = cast(part_value.Array, UP_part_val)
1827	if self.exec_opts.strict_array():
1828	# Examples: echo f > "$@"; local foo="$@"
1829
1830	# TODO: This attributes too coarsely, to the word rather than the
1831	# parts. Problem: the word is a TREE of parts, but we only have a
1832	# flat list of part_vals. The only case where we really get arrays
1833	# is "$@", "${a[@]}", "${a[@]//pat/replace}", etc.
1834	e_die(
1835	"This word should yield a string, but it contains an array",
1836	w)
1837
1838	# TODO: Maybe add detail like this.
1839	#e_die('RHS of assignment should only have strings. '
1840	# 'To assign arrays, use b=( "${a[@]}" )')
1841	else:
1842	# It appears to not respect IFS
1843	tmp = [s for s in part_val.strs if s is not None]
1844	s = ' '.join(tmp) # TODO: eliminate double join()?
1845	strs.append(s)
1846
1847	elif case(part_value_e.ExtGlob):
1848	part_val = cast(part_value.ExtGlob, UP_part_val)
1849
1850	# Extended globs are only allowed where we expect them!
1851	if not bool(eval_flags & QUOTE_FNMATCH):
1852	e_die('extended glob not allowed in this word', w)
1853
1854	# recursive call
1855	self._PartValsToString(part_val.part_vals, w, eval_flags,
1856	strs)
1857
1858	else:
1859	raise AssertionError()
1860
1861	def EvalWordToString(self, UP_w, eval_flags=0):
1862	# type: (word_t, int) -> value.Str
1863	"""Given a word, return a string.
1864
1865	Flags can contain a quoting algorithm.
1866	"""
1867	assert UP_w.tag() == word_e.Compound, UP_w
1868	w = cast(CompoundWord, UP_w)
1869
1870	if eval_flags == 0: # QUOTE_FNMATCH etc. breaks optimization
1871	fast_str = word_.FastStrEval(w)
1872	if fast_str is not None:
1873	return value.Str(fast_str)
1874
1875	# Could we additionally optimize a=$b, if we know $b isn't an array
1876	# etc.?
1877
1878	# Note: these empty lists are hot in fib benchmark
1879
1880	part_vals = [] # type: List[part_value_t]
1881	for p in w.parts:
1882	# this doesn't use eval_flags, which is slightly confusing
1883	self._EvalWordPart(p, part_vals, 0)
1884
1885	strs = [] # type: List[str]
1886	self._PartValsToString(part_vals, w, eval_flags, strs)
1887	return value.Str(''.join(strs))
1888
1889	def EvalWordToPattern(self, UP_w):
1890	# type: (rhs_word_t) -> Tuple[value.Str, bool]
1891	"""Like EvalWordToString, but returns whether we got ExtGlob."""
1892	if UP_w.tag() == rhs_word_e.Empty:
1893	return value.Str(''), False
1894
1895	assert UP_w.tag() == rhs_word_e.Compound, UP_w
1896	w = cast(CompoundWord, UP_w)
1897
1898	has_extglob = False
1899	part_vals = [] # type: List[part_value_t]
1900	for p in w.parts:
1901	# this doesn't use eval_flags, which is slightly confusing
1902	self._EvalWordPart(p, part_vals, 0)
1903	if p.tag() == word_part_e.ExtGlob:
1904	has_extglob = True
1905
1906	strs = [] # type: List[str]
1907	self._PartValsToString(part_vals, w, QUOTE_FNMATCH, strs)
1908	return value.Str(''.join(strs)), has_extglob
1909
1910	def EvalForPlugin(self, w):
1911	# type: (CompoundWord) -> value.Str
1912	"""Wrapper around EvalWordToString that prevents errors.
1913
1914	Runtime errors like $(( 1 / 0 )) and mutating $? like $(exit 42)
1915	are handled here.
1916
1917	Similar to ExprEvaluator.PluginCall().
1918	"""
1919	with state.ctx_Registers(self.mem): # to "sandbox" $? and $PIPESTATUS
1920	try:
1921	val = self.EvalWordToString(w)
1922	except error.FatalRuntime as e:
1923	val = value.Str('<Runtime error: %s>' % e.UserErrorString())
1924
1925	except (IOError, OSError) as e:
1926	val = value.Str('<I/O error: %s>' % pyutil.strerror(e))
1927
1928	except KeyboardInterrupt:
1929	val = value.Str('<Ctrl-C>')
1930
1931	return val
1932
1933	def EvalRhsWord(self, UP_w):
1934	# type: (rhs_word_t) -> value_t
1935	"""Used for RHS of assignment.
1936
1937	There is no splitting.
1938	"""
1939	if UP_w.tag() == rhs_word_e.Empty:
1940	return value.Str('')
1941
1942	assert UP_w.tag() == word_e.Compound, UP_w
1943	w = cast(CompoundWord, UP_w)
1944
1945	if len(w.parts) == 1:
1946	part0 = w.parts[0]
1947	UP_part0 = part0
1948	tag = part0.tag()
1949	# Special case for a=(1 2). ShArrayLiteral won't appear in words that
1950	# don't look like assignments.
1951	if tag == word_part_e.ShArrayLiteral:
1952	part0 = cast(ShArrayLiteral, UP_part0)
1953	array_words = part0.words
1954	words = braces.BraceExpandWords(array_words)
1955	strs = self.EvalWordSequence(words)
1956	return value.BashArray(strs)
1957
1958	if tag == word_part_e.BashAssocLiteral:
1959	part0 = cast(word_part.BashAssocLiteral, UP_part0)
1960	d = NewDict() # type: Dict[str, str]
1961	for pair in part0.pairs:
1962	k = self.EvalWordToString(pair.key)
1963	v = self.EvalWordToString(pair.value)
1964	d[k.s] = v.s
1965	return value.BashAssoc(d)
1966
1967	# If RHS doesn't look like a=( ... ), then it must be a string.
1968	return self.EvalWordToString(w)
1969
1970	def _EvalWordFrame(self, frame, argv):
1971	# type: (List[Piece], List[str]) -> None
1972	all_empty = True
1973	all_quoted = True
1974	any_quoted = False
1975
1976	#log('--- frame %s', frame)
1977
1978	for piece in frame:
1979	if len(piece.s):
1980	all_empty = False
1981
1982	if piece.quoted:
1983	any_quoted = True
1984	else:
1985	all_quoted = False
1986
1987	# Elision of ${empty}${empty} but not $empty"$empty" or $empty""
1988	if all_empty and not any_quoted:
1989	return
1990
1991	# If every frag is quoted, e.g. "$a$b" or any part in "${a[@]}"x, then
1992	# don't do word splitting or globbing.
1993	if all_quoted:
1994	tmp = [piece.s for piece in frame]
1995	a = ''.join(tmp)
1996	argv.append(a)
1997	return
1998
1999	will_glob = not self.exec_opts.noglob()
2000
2001	# Array of strings, some of which are BOTH IFS-escaped and GLOB escaped!
2002	frags = [] # type: List[str]
2003	for piece in frame:
2004	if will_glob and piece.quoted:
2005	frag = glob_.GlobEscape(piece.s)
2006	else:
2007	# If we have a literal \, then we turn it into \\\\.
2008	# Splitting takes \\\\ -> \\
2009	# Globbing takes \\ to \ if it doesn't match
2010	frag = _BackslashEscape(piece.s)
2011
2012	if piece.do_split:
2013	frag = _BackslashEscape(frag)
2014	else:
2015	frag = self.splitter.Escape(frag)
2016
2017	frags.append(frag)
2018
2019	flat = ''.join(frags)
2020	#log('flat: %r', flat)
2021
2022	args = self.splitter.SplitForWordEval(flat)
2023
2024	# space=' '; argv $space"". We have a quoted part, but we CANNOT elide.
2025	# Add it back and don't bother globbing.
2026	if len(args) == 0 and any_quoted:
2027	argv.append('')
2028	return
2029
2030	#log('split args: %r', args)
2031	for a in args:
2032	if glob_.LooksLikeGlob(a):
2033	n = self.globber.Expand(a, argv)
2034	if n < 0:
2035	# TODO: location info, with span IDs carried through the frame
2036	raise error.FailGlob('Pattern %r matched no files' % a,
2037	loc.Missing)
2038	else:
2039	argv.append(glob_.GlobUnescape(a))
2040
2041	def _EvalWordToArgv(self, w):
2042	# type: (CompoundWord) -> List[str]
2043	"""Helper for _EvalAssignBuiltin.
2044
2045	Splitting and globbing are disabled for assignment builtins.
2046
2047	Example: declare -"${a[@]}" b=(1 2)
2048	where a is [x b=a d=a]
2049	"""
2050	part_vals = [] # type: List[part_value_t]
2051	self._EvalWordToParts(w, part_vals, 0) # not double quoted
2052	frames = _MakeWordFrames(part_vals)
2053	argv = [] # type: List[str]
2054	for frame in frames:
2055	if len(frame): # empty array gives empty frame!
2056	tmp = [piece.s for piece in frame]
2057	argv.append(''.join(tmp)) # no split or glob
2058	#log('argv: %s', argv)
2059	return argv
2060
2061	def _EvalAssignBuiltin(self, builtin_id, arg0, words, meta_offset):
2062	# type: (builtin_t, str, List[CompoundWord], int) -> cmd_value.Assign
2063	"""Handles both static and dynamic assignment, e.g.
2064
2065	x='foo=bar'
2066	local a=(1 2) $x
2067
2068	Grammar:
2069
2070	('builtin' \| 'command')* keyword flag* pair*
2071	flag = [-+].*
2072
2073	There is also command -p, but we haven't implemented it. Maybe just
2074	punt on it.
2075	"""
2076	eval_to_pairs = True # except for -f and -F
2077	started_pairs = False
2078
2079	flags = [arg0] # initial flags like -p, and -f -F name1 name2
2080	flag_locs = [words[0]]
2081	assign_args = [] # type: List[AssignArg]
2082
2083	n = len(words)
2084	for i in xrange(meta_offset + 1, n): # skip first word
2085	w = words[i]
2086
2087	if word_.IsVarLike(w):
2088	started_pairs = True # Everything from now on is an assign_pair
2089
2090	if started_pairs:
2091	left_token, close_token, part_offset = word_.DetectShAssignment(
2092	w)
2093	if left_token: # Detected statically
2094	if left_token.id != Id.Lit_VarLike:
2095	# (not guaranteed since started_pairs is set twice)
2096	e_die('LHS array not allowed in assignment builtin', w)
2097
2098	if lexer.IsPlusEquals(left_token):
2099	var_name = lexer.TokenSliceRight(left_token, -2)
2100	append = True
2101	else:
2102	var_name = lexer.TokenSliceRight(left_token, -1)
2103	append = False
2104
2105	if part_offset == len(w.parts):
2106	rhs = rhs_word.Empty # type: rhs_word_t
2107	else:
2108	# tmp is for intersection of C++/MyPy type systems
2109	tmp = CompoundWord(w.parts[part_offset:])
2110	word_.TildeDetectAssign(tmp)
2111	rhs = tmp
2112
2113	with state.ctx_AssignBuiltin(self.mutable_opts):
2114	right = self.EvalRhsWord(rhs)
2115
2116	arg2 = AssignArg(var_name, right, append, w)
2117	assign_args.append(arg2)
2118
2119	else: # e.g. export $dynamic
2120	argv = self._EvalWordToArgv(w)
2121	for arg in argv:
2122	arg2 = _SplitAssignArg(arg, w)
2123	assign_args.append(arg2)
2124
2125	else:
2126	argv = self._EvalWordToArgv(w)
2127	for arg in argv:
2128	if arg.startswith('-') or arg.startswith('+'):
2129	# e.g. declare -r +r
2130	flags.append(arg)
2131	flag_locs.append(w)
2132
2133	# Shortcut that relies on -f and -F always meaning "function" for
2134	# all assignment builtins
2135	if 'f' in arg or 'F' in arg:
2136	eval_to_pairs = False
2137
2138	else: # e.g. export $dynamic
2139	if eval_to_pairs:
2140	arg2 = _SplitAssignArg(arg, w)
2141	assign_args.append(arg2)
2142	started_pairs = True
2143	else:
2144	flags.append(arg)
2145
2146	return cmd_value.Assign(builtin_id, flags, flag_locs, assign_args)
2147
2148	def _DetectAssignBuiltinStr(self, arg0, words, meta_offset):
2149	# type: (str, List[CompoundWord], int) -> Optional[cmd_value.Assign]
2150	builtin_id = consts.LookupAssignBuiltin(arg0)
2151	if builtin_id != consts.NO_INDEX:
2152	return self._EvalAssignBuiltin(builtin_id, arg0, words,
2153	meta_offset)
2154	return None
2155
2156	def _DetectAssignBuiltin(self, val0, words, meta_offset):
2157	# type: (part_value_t, List[CompoundWord], int) -> Optional[cmd_value.Assign]
2158	UP_val0 = val0
2159	if val0.tag() == part_value_e.String:
2160	val0 = cast(Piece, UP_val0)
2161	if not val0.quoted:
2162	return self._DetectAssignBuiltinStr(val0.s, words, meta_offset)
2163	return None
2164
2165	def SimpleEvalWordSequence2(self, words, is_last_cmd, allow_assign):
2166	# type: (List[CompoundWord], bool, bool) -> cmd_value_t
2167	"""Simple word evaluation for YSH."""
2168	strs = [] # type: List[str]
2169	locs = [] # type: List[CompoundWord]
2170
2171	meta_offset = 0
2172	for i, w in enumerate(words):
2173	# No globbing in the first arg for command.Simple.
2174	if i == meta_offset and allow_assign:
2175	strs0 = self._EvalWordToArgv(w)
2176	# TODO: Remove this because YSH will disallow assignment
2177	# builtins? (including export?)
2178	if len(strs0) == 1:
2179	cmd_val = self._DetectAssignBuiltinStr(
2180	strs0[0], words, meta_offset)
2181	if cmd_val:
2182	return cmd_val
2183
2184	strs.extend(strs0)
2185	for _ in strs0:
2186	locs.append(w)
2187	continue
2188
2189	if glob_.LooksLikeStaticGlob(w):
2190	val = self.EvalWordToString(w) # respects strict-array
2191	num_appended = self.globber.Expand(val.s, strs)
2192	if num_appended < 0:
2193	raise error.FailGlob('Pattern %r matched no files' % val.s,
2194	w)
2195	for _ in xrange(num_appended):
2196	locs.append(w)
2197	continue
2198
2199	part_vals = [] # type: List[part_value_t]
2200	self._EvalWordToParts(w, part_vals, 0) # not quoted
2201
2202	if 0:
2203	log('')
2204	log('Static: part_vals after _EvalWordToParts:')
2205	for entry in part_vals:
2206	log(' %s', entry)
2207
2208	# Still need to process
2209	frames = _MakeWordFrames(part_vals)
2210
2211	if 0:
2212	log('')
2213	log('Static: frames after _MakeWordFrames:')
2214	for entry in frames:
2215	log(' %s', entry)
2216
2217	# We will still allow x"${a[@]"x, though it's deprecated by @a, which
2218	# disallows such expressions at parse time.
2219	for frame in frames:
2220	if len(frame): # empty array gives empty frame!
2221	tmp = [piece.s for piece in frame]
2222	strs.append(''.join(tmp)) # no split or glob
2223	locs.append(w)
2224
2225	return cmd_value.Argv(strs, locs, is_last_cmd, None, None)
2226
2227	def EvalWordSequence2(self, words, is_last_cmd, allow_assign=False):
2228	# type: (List[CompoundWord], bool, bool) -> cmd_value_t
2229	"""Turns a list of Words into a list of strings.
2230
2231	Unlike the EvalWord*() methods, it does globbing.
2232
2233	Args:
2234	allow_assign: True for command.Simple, False for BashArray a=(1 2 3)
2235	"""
2236	if self.exec_opts.simple_word_eval():
2237	return self.SimpleEvalWordSequence2(words, is_last_cmd,
2238	allow_assign)
2239
2240	# Parse time:
2241	# 1. brace expansion. TODO: Do at parse time.
2242	# 2. Tilde detection. DONE at parse time. Only if Id.Lit_Tilde is the
2243	# first WordPart.
2244	#
2245	# Run time:
2246	# 3. tilde sub, var sub, command sub, arith sub. These are all
2247	# "concurrent" on WordParts. (optional process sub with <() )
2248	# 4. word splitting. Can turn this off with a shell option? Definitely
2249	# off for oil.
2250	# 5. globbing -- several exec_opts affect this: nullglob, safeglob, etc.
2251
2252	#log('W %s', words)
2253	strs = [] # type: List[str]
2254	locs = [] # type: List[CompoundWord]
2255
2256	# 0 for declare x
2257	# 1 for builtin declare x
2258	# 2 for command builtin declare x
2259	# etc.
2260	meta_offset = 0
2261
2262	n = 0
2263	for i, w in enumerate(words):
2264	fast_str = word_.FastStrEval(w)
2265	if fast_str is not None:
2266	strs.append(fast_str)
2267	locs.append(w)
2268
2269	# e.g. the 'local' in 'local a=b c=d' will be here
2270	if allow_assign and i == meta_offset:
2271	cmd_val = self._DetectAssignBuiltinStr(
2272	fast_str, words, meta_offset)
2273	if cmd_val:
2274	return cmd_val
2275
2276	if i <= meta_offset and _DetectMetaBuiltinStr(fast_str):
2277	meta_offset += 1
2278
2279	continue
2280
2281	part_vals = [] # type: List[part_value_t]
2282	self._EvalWordToParts(w, part_vals, EXTGLOB_FILES)
2283
2284	# DYNAMICALLY detect if we're going to run an assignment builtin, and
2285	# change the rest of the evaluation algorithm if so.
2286	#
2287	# We want to allow:
2288	# e=export
2289	# $e foo=bar
2290	#
2291	# But we don't want to evaluate the first word twice in the case of:
2292	# $(some-command) --flag
2293	if len(part_vals) == 1:
2294	if allow_assign and i == meta_offset:
2295	cmd_val = self._DetectAssignBuiltin(
2296	part_vals[0], words, meta_offset)
2297	if cmd_val:
2298	return cmd_val
2299
2300	if i <= meta_offset and _DetectMetaBuiltin(part_vals[0]):
2301	meta_offset += 1
2302
2303	if 0:
2304	log('')
2305	log('part_vals after _EvalWordToParts:')
2306	for entry in part_vals:
2307	log(' %s', entry)
2308
2309	frames = _MakeWordFrames(part_vals)
2310	if 0:
2311	log('')
2312	log('frames after _MakeWordFrames:')
2313	for entry in frames:
2314	log(' %s', entry)
2315
2316	# Do splitting and globbing. Each frame will append zero or more args.
2317	for frame in frames:
2318	self._EvalWordFrame(frame, strs)
2319
2320	# Fill in locations parallel to strs.
2321	n_next = len(strs)
2322	for _ in xrange(n_next - n):
2323	locs.append(w)
2324	n = n_next
2325
2326	# A non-assignment command.
2327	# NOTE: Can't look up builtins here like we did for assignment, because
2328	# functions can override builtins.
2329	return cmd_value.Argv(strs, locs, is_last_cmd, None, None)
2330
2331	def EvalWordSequence(self, words):
2332	# type: (List[CompoundWord]) -> List[str]
2333	"""For arrays and for loops.
2334
2335	They don't allow assignment builtins.
2336	"""
2337	# is_last_cmd is irrelevant
2338	cmd_val = self.EvalWordSequence2(words, False)
2339	assert cmd_val.tag() == cmd_value_e.Argv
2340	return cast(cmd_value.Argv, cmd_val).argv
2341
2342
2343	class NormalWordEvaluator(AbstractWordEvaluator):
2344
2345	def __init__(
2346	self,
2347	mem, # type: state.Mem
2348	exec_opts, # type: optview.Exec
2349	mutable_opts, # type: state.MutableOpts
2350	tilde_ev, # type: TildeEvaluator
2351	splitter, # type: SplitContext
2352	errfmt, # type: ui.ErrorFormatter
2353	):
2354	# type: (...) -> None
2355	AbstractWordEvaluator.__init__(self, mem, exec_opts, mutable_opts,
2356	tilde_ev, splitter, errfmt)
2357	self.shell_ex = None # type: _Executor
2358
2359	def CheckCircularDeps(self):
2360	# type: () -> None
2361	assert self.arith_ev is not None
2362	# Disabled for pure OSH
2363	#assert self.expr_ev is not None
2364	assert self.shell_ex is not None
2365	assert self.prompt_ev is not None
2366
2367	def _EvalCommandSub(self, cs_part, quoted):
2368	# type: (CommandSub, bool) -> part_value_t
2369	stdout_str = self.shell_ex.RunCommandSub(cs_part)
2370
2371	if cs_part.left_token.id == Id.Left_AtParen:
2372	# YSH splitting algorithm: does not depend on IFS
2373	try:
2374	strs = j8.SplitJ8Lines(stdout_str)
2375	except error.Decode as e:
2376	# status code 4 is special, for encode/decode errors.
2377	raise error.Structured(4, e.Message(), cs_part.left_token)
2378
2379	#strs = self.splitter.SplitForWordEval(stdout_str)
2380	return part_value.Array(strs)
2381	else:
2382	return Piece(stdout_str, quoted, not quoted)
2383
2384	def _EvalProcessSub(self, cs_part):
2385	# type: (CommandSub) -> Piece
2386	dev_path = self.shell_ex.RunProcessSub(cs_part)
2387	# pretend it's quoted; no split or glob
2388	return Piece(dev_path, True, False)
2389
2390
2391	_DUMMY = '__NO_COMMAND_SUB__'
2392
2393
2394	class CompletionWordEvaluator(AbstractWordEvaluator):
2395	"""An evaluator that has no access to an executor.
2396
2397	NOTE: core/completion.py doesn't actually try to use these strings to
2398	complete. If you have something like 'echo $(echo hi)/f<TAB>', it sees the
2399	inner command as the last one, and knows that it is not at the end of the
2400	line.
2401	"""
2402
2403	def __init__(
2404	self,
2405	mem, # type: state.Mem
2406	exec_opts, # type: optview.Exec
2407	mutable_opts, # type: state.MutableOpts
2408	tilde_ev, # type: TildeEvaluator
2409	splitter, # type: SplitContext
2410	errfmt, # type: ui.ErrorFormatter
2411	):
2412	# type: (...) -> None
2413	AbstractWordEvaluator.__init__(self, mem, exec_opts, mutable_opts,
2414	tilde_ev, splitter, errfmt)
2415
2416	def CheckCircularDeps(self):
2417	# type: () -> None
2418	assert self.prompt_ev is not None
2419	assert self.arith_ev is not None
2420	assert self.expr_ev is not None
2421
2422	def _EvalCommandSub(self, cs_part, quoted):
2423	# type: (CommandSub, bool) -> part_value_t
2424	if cs_part.left_token.id == Id.Left_AtParen:
2425	return part_value.Array([_DUMMY])
2426	else:
2427	return Piece(_DUMMY, quoted, not quoted)
2428
2429	def _EvalProcessSub(self, cs_part):
2430	# type: (CommandSub) -> Piece
2431	# pretend it's quoted; no split or glob
2432	return Piece('__NO_PROCESS_SUB__', True, False)
2433
2434
2435	# vim: sw=4