OILS / osh / word_eval.py View on Github | oils.pub

2548 lines, 1552 significant
1"""
2word_eval.py - Evaluator for the word language.
3"""
4
5from _devbuild.gen.id_kind_asdl import Id, Kind, Kind_str
6from _devbuild.gen.syntax_asdl import (
7 Token,
8 SimpleVarSub,
9 loc,
10 loc_t,
11 BracedVarSub,
12 CommandSub,
13 bracket_op,
14 bracket_op_e,
15 suffix_op,
16 suffix_op_e,
17 ShArrayLiteral,
18 SingleQuoted,
19 DoubleQuoted,
20 word_e,
21 word_t,
22 CompoundWord,
23 rhs_word,
24 rhs_word_e,
25 rhs_word_t,
26 word_part,
27 word_part_e,
28)
29from _devbuild.gen.runtime_asdl import (
30 part_value,
31 part_value_e,
32 part_value_t,
33 cmd_value,
34 cmd_value_e,
35 cmd_value_t,
36 error_code_e,
37 AssignArg,
38 a_index,
39 a_index_e,
40 VTestPlace,
41 VarSubState,
42 Piece,
43)
44from _devbuild.gen.option_asdl import option_i, builtin_i
45from _devbuild.gen.value_asdl import (
46 value,
47 value_e,
48 value_t,
49 sh_lvalue,
50 sh_lvalue_t,
51)
52from core import bash_impl
53from core import error
54from core import pyos
55from core import pyutil
56from core import state
57from display import ui
58from core import util
59from data_lang import j8
60from data_lang import j8_lite
61from core.error import e_die
62from frontend import consts
63from frontend import lexer
64from frontend import location
65from mycpp import mops
66from mycpp.mylib import log, tagswitch, NewDict
67from osh import braces
68from osh import glob_
69from osh import string_ops
70from osh import word_
71from ysh import expr_eval
72from ysh import val_ops
73
74from typing import Optional, Tuple, List, Dict, cast, TYPE_CHECKING
75
76if TYPE_CHECKING:
77 from _devbuild.gen.syntax_asdl import word_part_t
78 from _devbuild.gen.option_asdl import builtin_t
79 from core import optview
80 from core.state import Mem
81 from core.vm import _Executor
82 from osh.split import SplitContext
83 from osh import prompt
84 from osh import sh_expr_eval
85
86# Flags for _EvalWordToParts and _EvalWordPart (not all are used for both)
87QUOTED = 1 << 0
88IS_SUBST = 1 << 1
89
90EXTGLOB_FILES = 1 << 2 # allow @(cc) from file system?
91EXTGLOB_MATCH = 1 << 3 # allow @(cc) in pattern matching?
92EXTGLOB_NESTED = 1 << 4 # for @(one|!(two|three))
93
94# For EvalWordToString
95QUOTE_FNMATCH = 1 << 5
96QUOTE_ERE = 1 << 6
97
98# For compatibility, ${BASH_SOURCE} and ${BASH_SOURCE[@]} are both valid.
99# Ditto for ${FUNCNAME} and ${BASH_LINENO}.
100_STRING_AND_ARRAY = ['BASH_SOURCE', 'FUNCNAME', 'BASH_LINENO']
101
102
103def ShouldArrayDecay(var_name, exec_opts, is_plain_var_sub=True):
104 # type: (str, optview.Exec, bool) -> bool
105 """Return whether we should allow ${a} to mean ${a[0]}."""
106 return (not exec_opts.strict_array() or
107 is_plain_var_sub and var_name in _STRING_AND_ARRAY)
108
109
110def DecayArray(val):
111 # type: (value_t) -> value_t
112 """Resolve ${array} to ${array[0]}."""
113 if val.tag() == value_e.BashArray:
114 array_val = cast(value.BashArray, val)
115 s, error_code = bash_impl.BashArray_GetElement(array_val, 0)
116
117 # Note: index 0 should never cause the out-of-bound index error.
118 assert error_code == error_code_e.OK
119
120 elif val.tag() == value_e.BashAssoc:
121 assoc_val = cast(value.BashAssoc, val)
122 s = bash_impl.BashAssoc_GetElement(assoc_val, '0')
123 else:
124 raise AssertionError(val.tag())
125
126 if s is None:
127 return value.Undef
128 else:
129 return value.Str(s)
130
131
132def _DetectMetaBuiltinStr(s):
133 # type: (str) -> bool
134 """
135 We need to detect all of these cases:
136
137 builtin local
138 command local
139 builtin builtin local
140 builtin command local
141
142 Fundamentally, assignment builtins have different WORD EVALUATION RULES
143 for a=$x (no word splitting), so it seems hard to do this in
144 meta_oils.Builtin() or meta_oils.Command()
145 """
146 return (consts.LookupNormalBuiltin(s)
147 in (builtin_i.builtin, builtin_i.command))
148
149
150def _DetectMetaBuiltin(val0):
151 # type: (part_value_t) -> bool
152 UP_val0 = val0
153 if val0.tag() == part_value_e.String:
154 val0 = cast(Piece, UP_val0)
155 if not val0.quoted:
156 return _DetectMetaBuiltinStr(val0.s)
157 return False
158
159
160def _SplitAssignArg(arg, blame_word):
161 # type: (str, CompoundWord) -> AssignArg
162 """Dynamically parse argument to declare, export, etc.
163
164 This is a fallback to the static parsing done below.
165 """
166 # Note: it would be better to cache regcomp(), but we don't have an API for
167 # that, and it probably isn't a bottleneck now
168 m = util.RegexSearch(consts.ASSIGN_ARG_RE, arg)
169 if m is None:
170 e_die("Assignment builtin expected NAME=value, got %r" % arg,
171 blame_word)
172
173 var_name = m[1]
174 # m[2] is used for grouping; ERE doesn't have non-capturing groups
175
176 op = m[3]
177 assert op is not None, op
178 if len(op): # declare NAME=
179 val = value.Str(m[4]) # type: Optional[value_t]
180 append = op[0] == '+'
181 else: # declare NAME
182 val = None # no operator
183 append = False
184
185 return AssignArg(var_name, val, append, blame_word)
186
187
188# NOTE: Could be done with util.BackslashEscape like glob_.GlobEscape().
189def _BackslashEscape(s):
190 # type: (str) -> str
191 """Double up backslashes.
192
193 Useful for strings about to be globbed and strings about to be IFS
194 escaped.
195 """
196 return s.replace('\\', '\\\\')
197
198
199def _ValueToPartValue(val, quoted, part_loc):
200 # type: (value_t, bool, word_part_t) -> part_value_t
201 """Helper for VarSub evaluation.
202
203 Called by _EvalBracedVarSub and _EvalWordPart for SimpleVarSub.
204 """
205 UP_val = val
206
207 with tagswitch(val) as case:
208 if case(value_e.Undef):
209 # This happens in the case of ${undef+foo}. We skipped _ProcessUndef,
210 # but we have to append to the empty string.
211 return Piece('', quoted, not quoted)
212
213 elif case(value_e.Str):
214 val = cast(value.Str, UP_val)
215 return Piece(val.s, quoted, not quoted)
216
217 elif case(value_e.BashArray):
218 val = cast(value.BashArray, UP_val)
219 return part_value.Array(bash_impl.BashArray_GetValues(val))
220
221 elif case(value_e.BashAssoc):
222 val = cast(value.BashAssoc, UP_val)
223 # bash behavior: splice values!
224 return part_value.Array(bash_impl.BashAssoc_GetValues(val))
225
226 # Cases added for YSH
227 # value_e.List is also here - we use val_ops.Stringify()s err message
228 elif case(value_e.Null, value_e.Bool, value_e.Int, value_e.Float,
229 value_e.Eggex, value_e.List):
230 s = val_ops.Stringify(val, loc.WordPart(part_loc), 'Word eval ')
231 return Piece(s, quoted, not quoted)
232
233 else:
234 raise error.TypeErr(val, "Can't substitute into word",
235 loc.WordPart(part_loc))
236
237 raise AssertionError('for -Wreturn-type in C++')
238
239
240def _MakeWordFrames(part_vals):
241 # type: (List[part_value_t]) -> List[List[Piece]]
242 """A word evaluates to a flat list of part_value (String or Array). frame
243 is a portion that results in zero or more args. It can never be joined.
244 This idea exists because of arrays like "$@" and "${a[@]}".
245
246 Example:
247
248 a=(1 '2 3' 4)
249 x=x
250 y=y
251
252 # This word
253 $x"${a[@]}"$y
254
255 # Results in Three frames:
256 [ ('x', False, True), ('1', True, False) ]
257 [ ('2 3', True, False) ]
258 [ ('4', True, False), ('y', False, True) ]
259
260 Note: A frame is a 3-tuple that's identical to Piece()? Maybe we
261 should make that top level type.
262
263 TODO:
264 - Instead of List[List[Piece]], where List[Piece] is a Frame
265 - Change this representation to
266 Frames = (List[Piece] pieces, List[int] break_indices)
267 # where break_indices are the end
268
269 Consider a common case like "$x" or "${x}" - I think this a lot more
270 efficient?
271
272 And then change _EvalWordFrame(pieces: List[Piece], start: int, end: int)
273 """
274 current = [] # type: List[Piece]
275 frames = [current]
276
277 for p in part_vals:
278 UP_p = p
279
280 with tagswitch(p) as case:
281 if case(part_value_e.String):
282 p = cast(Piece, UP_p)
283 current.append(p)
284
285 elif case(part_value_e.Array):
286 p = cast(part_value.Array, UP_p)
287
288 is_first = True
289 for s in p.strs:
290 if s is None:
291 continue # ignore undefined array entries
292
293 # Arrays parts are always quoted; otherwise they would have decayed to
294 # a string.
295 piece = Piece(s, True, False)
296 if is_first:
297 current.append(piece)
298 is_first = False
299 else:
300 current = [piece]
301 frames.append(current) # singleton frame
302
303 else:
304 raise AssertionError()
305
306 return frames
307
308
309# TODO: This could be _MakeWordFrames and then sep.join(). It's redundant.
310def _DecayPartValuesToString(part_vals, join_char):
311 # type: (List[part_value_t], str) -> str
312 # Decay ${a=x"$@"x} to string.
313 out = [] # type: List[str]
314 for p in part_vals:
315 UP_p = p
316 with tagswitch(p) as case:
317 if case(part_value_e.String):
318 p = cast(Piece, UP_p)
319 out.append(p.s)
320 elif case(part_value_e.Array):
321 p = cast(part_value.Array, UP_p)
322 # TODO: Eliminate double join for speed?
323 tmp = [s for s in p.strs if s is not None]
324 out.append(join_char.join(tmp))
325 else:
326 raise AssertionError()
327 return ''.join(out)
328
329
330def _PerformSlice(
331 val, # type: value_t
332 offset, # type: mops.BigInt
333 length, # type: int
334 has_length, # type: bool
335 part, # type: BracedVarSub
336 arg0_val, # type: value.Str
337):
338 # type: (...) -> value_t
339 UP_val = val
340 with tagswitch(val) as case:
341 if case(value_e.Str): # Slice UTF-8 characters in a string.
342 val = cast(value.Str, UP_val)
343 s = val.s
344 n = len(s)
345
346 begin = mops.BigTruncate(offset)
347 if begin < 0: # Compute offset with unicode
348 byte_begin = n
349 num_iters = -begin
350 for _ in xrange(num_iters):
351 byte_begin = string_ops.PreviousUtf8Char(s, byte_begin)
352 else:
353 byte_begin = string_ops.AdvanceUtf8Chars(s, begin, 0)
354
355 if has_length:
356 if length < 0: # Compute offset with unicode
357 # Confusing: this is a POSITION
358 byte_end = n
359 num_iters = -length
360 for _ in xrange(num_iters):
361 byte_end = string_ops.PreviousUtf8Char(s, byte_end)
362 else:
363 byte_end = string_ops.AdvanceUtf8Chars(
364 s, length, byte_begin)
365 else:
366 byte_end = len(s)
367
368 substr = s[byte_begin:byte_end]
369 result = value.Str(substr) # type: value_t
370
371 elif case(value_e.BashArray,
372 value_e.SparseArray): # Slice array entries.
373 # NOTE: This error is ALWAYS fatal in bash. It's inconsistent with
374 # strings.
375 if has_length and length < 0:
376 e_die("Array slice can't have negative length: %d" % length,
377 loc.WordPart(part))
378
379 if bash_impl.BigInt_Less(offset, mops.ZERO):
380 # ${@:-3} starts counts from the end
381 if val.tag() == value_e.BashArray:
382 val = cast(value.BashArray, UP_val)
383 array_length = mops.IntWiden(
384 bash_impl.BashArray_Length(val))
385 elif val.tag() == value_e.SparseArray:
386 val = cast(value.SparseArray, UP_val)
387 array_length = bash_impl.SparseArray_Length(val)
388 else:
389 raise AssertionError()
390
391 # The array length counts $0 for $@ and $*
392 if arg0_val is not None:
393 array_length = mops.Add(array_length, mops.ONE)
394
395 offset = mops.Add(offset, array_length)
396
397 if bash_impl.BigInt_Less(offset, mops.ZERO):
398 strs = [] # type: List[str]
399 else:
400 # Quirk: "offset" for positional arguments ($@ and $*) counts $0.
401 prepends_arg0 = False
402 if arg0_val is not None:
403 if bash_impl.BigInt_Greater(offset, mops.ZERO):
404 offset = mops.Sub(offset, mops.ONE)
405 elif not has_length or length >= 1:
406 prepends_arg0 = True
407 length = length - 1
408
409 if has_length and length == 0:
410 strs = []
411
412 elif val.tag() == value_e.BashArray:
413 val = cast(value.BashArray, UP_val)
414 orig = bash_impl.BashArray_GetValues(val)
415 n = len(orig)
416
417 strs = []
418 i = mops.BigTruncate(offset)
419 count = 0
420 while i < n:
421 if has_length and count == length: # length could be 0
422 break
423 s = orig[i]
424 if s is not None: # Unset elements don't count towards the length
425 strs.append(s)
426 count += 1
427 i += 1
428
429 elif val.tag() == value_e.SparseArray:
430 val = cast(value.SparseArray, UP_val)
431
432 # TODO: We may optimize this by finding the first index
433 # using the binary search. Furthermore, the sorting by
434 # SparseArray_GetKeys can be replaced with the heap sort so
435 # that we only extract the first LENGTH elements of the
436 # indices greater or equal to OFFSET.
437 i = 0
438 for index in bash_impl.SparseArray_GetKeys(val):
439 if bash_impl.BigInt_GreaterEq(index, offset):
440 break
441 i = i + 1
442
443 if has_length:
444 strs = bash_impl.SparseArray_GetValues(val)[i:i +
445 length]
446 else:
447 strs = bash_impl.SparseArray_GetValues(val)[i:]
448
449 else:
450 raise AssertionError()
451
452 if prepends_arg0:
453 new_list = [arg0_val.s]
454 new_list.extend(strs)
455 strs = new_list
456
457 result = value.BashArray(strs)
458
459 elif case(value_e.BashAssoc):
460 e_die("Can't slice associative arrays", loc.WordPart(part))
461
462 else:
463 raise error.TypeErr(val, 'Slice op expected Str or BashArray',
464 loc.WordPart(part))
465
466 return result
467
468
469class StringWordEvaluator(object):
470 """Interface used by ArithEvaluator / BoolEvaluator"""
471
472 def __init__(self):
473 # type: () -> None
474 """Empty constructor for mycpp."""
475 pass
476
477 def EvalWordToString(self, w, eval_flags=0):
478 # type: (word_t, int) -> value.Str
479 raise NotImplementedError()
480
481
482def _GetDollarHyphen(exec_opts):
483 # type: (optview.Exec) -> str
484 chars = [] # type: List[str]
485 if exec_opts.interactive():
486 chars.append('i')
487
488 if exec_opts.errexit():
489 chars.append('e')
490 if exec_opts.noglob():
491 chars.append('f')
492 if exec_opts.noexec():
493 chars.append('n')
494 if exec_opts.nounset():
495 chars.append('u')
496 # NO letter for pipefail?
497 if exec_opts.xtrace():
498 chars.append('x')
499 if exec_opts.noclobber():
500 chars.append('C')
501
502 # bash has:
503 # - c for sh -c, i for sh -i (mksh also has this)
504 # - h for hashing (mksh also has this)
505 # - B for brace expansion
506 return ''.join(chars)
507
508
509class TildeEvaluator(object):
510
511 def __init__(self, mem, exec_opts):
512 # type: (Mem, optview.Exec) -> None
513 self.mem = mem
514 self.exec_opts = exec_opts
515
516 def GetMyHomeDir(self):
517 # type: () -> Optional[str]
518 """Consult $HOME first, and then make a libc call.
519
520 Important: the libc call can FAIL, which is why we prefer $HOME. See issue
521 #1578.
522 """
523 # First look up the HOME var, ENV.HOME, ...
524 s = self.mem.env_config.Get('HOME')
525 if s is not None:
526 return s
527
528 # Then ask the OS. This is what bash does.
529 return pyos.GetMyHomeDir()
530
531 def Eval(self, part):
532 # type: (word_part.TildeSub) -> str
533 """Evaluates ~ and ~user, given a Lit_TildeLike token."""
534
535 if part.user_name is None:
536 result = self.GetMyHomeDir()
537 else:
538 result = pyos.GetHomeDir(part.user_name)
539
540 if result is None:
541 if self.exec_opts.strict_tilde():
542 e_die("Error expanding tilde (e.g. invalid user)", part.left)
543 else:
544 # Return ~ or ~user literally
545 result = '~'
546 if part.user_name is not None:
547 result = result + part.user_name # mycpp doesn't have +=
548
549 return result
550
551
552class AbstractWordEvaluator(StringWordEvaluator):
553 """Abstract base class for word evaluators.
554
555 Public entry points:
556 EvalWordToString EvalForPlugin EvalRhsWord
557 EvalWordSequence EvalWordSequence2
558 """
559
560 def __init__(
561 self,
562 mem, # type: state.Mem
563 exec_opts, # type: optview.Exec
564 mutable_opts, # type: state.MutableOpts
565 tilde_ev, # type: TildeEvaluator
566 splitter, # type: SplitContext
567 errfmt, # type: ui.ErrorFormatter
568 ):
569 # type: (...) -> None
570 self.arith_ev = None # type: sh_expr_eval.ArithEvaluator
571 self.expr_ev = None # type: expr_eval.ExprEvaluator
572 self.prompt_ev = None # type: prompt.Evaluator
573
574 self.unsafe_arith = None # type: sh_expr_eval.UnsafeArith
575
576 self.tilde_ev = tilde_ev
577
578 self.mem = mem # for $HOME, $1, etc.
579 self.exec_opts = exec_opts # for nounset
580 self.mutable_opts = mutable_opts # for _allow_command_sub
581 self.splitter = splitter
582 self.errfmt = errfmt
583
584 self.globber = glob_.Globber(exec_opts)
585
586 def CheckCircularDeps(self):
587 # type: () -> None
588 raise NotImplementedError()
589
590 def _EvalCommandSub(self, cs_part, quoted):
591 # type: (CommandSub, bool) -> part_value_t
592 """Abstract since it has a side effect."""
593 raise NotImplementedError()
594
595 def _EvalProcessSub(self, cs_part):
596 # type: (CommandSub) -> part_value_t
597 """Abstract since it has a side effect."""
598 raise NotImplementedError()
599
600 def _EvalVarNum(self, var_num):
601 # type: (int) -> value_t
602 assert var_num >= 0
603 return self.mem.GetArgNum(var_num)
604
605 def _EvalSpecialVar(self, op_id, quoted, vsub_state):
606 # type: (int, bool, VarSubState) -> value_t
607 """Evaluate $?
608
609 and so forth
610 """
611 # $@ is special -- it need to know whether it is in a double quoted
612 # context.
613 #
614 # - If it's $@ in a double quoted context, return an ARRAY.
615 # - If it's $@ in a normal context, return a STRING, which then will be
616 # subject to splitting.
617
618 if op_id in (Id.VSub_At, Id.VSub_Star):
619 argv = self.mem.GetArgv()
620 val = value.BashArray(argv) # type: value_t
621 if op_id == Id.VSub_At:
622 # "$@" evaluates to an array, $@ should be decayed
623 vsub_state.join_array = not quoted
624 else: # $* "$*" are both decayed
625 vsub_state.join_array = True
626
627 elif op_id == Id.VSub_Hyphen:
628 val = value.Str(_GetDollarHyphen(self.exec_opts))
629
630 else:
631 val = self.mem.GetSpecialVar(op_id)
632
633 return val
634
635 def _ApplyTestOp(
636 self,
637 val, # type: value_t
638 op, # type: suffix_op.Unary
639 quoted, # type: bool
640 part_vals, # type: Optional[List[part_value_t]]
641 vtest_place, # type: VTestPlace
642 blame_token, # type: Token
643 ):
644 # type: (...) -> bool
645 """
646 Returns:
647 Whether part_vals was mutated
648
649 ${a:-} returns part_value[]
650 ${a:+} returns part_value[]
651 ${a:?error} returns error word?
652 ${a:=} returns part_value[] but also needs self.mem for side effects.
653
654 So I guess it should return part_value[], and then a flag for raising an
655 error, and then a flag for assigning it?
656 The original BracedVarSub will have the name.
657
658 Example of needing multiple part_value[]
659
660 echo X-${a:-'def'"ault"}-X
661
662 We return two part values from the BracedVarSub. Also consider:
663
664 echo ${a:-x"$@"x}
665 """
666 eval_flags = IS_SUBST
667 if quoted:
668 eval_flags |= QUOTED
669
670 tok = op.op
671 # NOTE: Splicing part_values is necessary because of code like
672 # ${undef:-'a b' c 'd # e'}. Each part_value can have a different
673 # do_glob/do_elide setting.
674 UP_val = val
675 with tagswitch(val) as case:
676 if case(value_e.Undef):
677 is_falsey = True
678
679 elif case(value_e.Str):
680 val = cast(value.Str, UP_val)
681 if tok.id in (Id.VTest_ColonHyphen, Id.VTest_ColonEquals,
682 Id.VTest_ColonQMark, Id.VTest_ColonPlus):
683 is_falsey = len(val.s) == 0
684 else:
685 is_falsey = False
686
687 elif case(value_e.BashArray):
688 val = cast(value.BashArray, UP_val)
689 # TODO: allow undefined
690 is_falsey = len(val.strs) == 0
691
692 elif case(value_e.BashAssoc):
693 val = cast(value.BashAssoc, UP_val)
694 is_falsey = len(val.d) == 0
695
696 else:
697 # value.Eggex, etc. are all false
698 is_falsey = False
699
700 if tok.id in (Id.VTest_ColonHyphen, Id.VTest_Hyphen):
701 if is_falsey:
702 self._EvalRhsWordToParts(op.arg_word, part_vals, eval_flags)
703 return True
704 else:
705 return False
706
707 # Inverse of the above.
708 elif tok.id in (Id.VTest_ColonPlus, Id.VTest_Plus):
709 if is_falsey:
710 return False
711 else:
712 self._EvalRhsWordToParts(op.arg_word, part_vals, eval_flags)
713 return True
714
715 # Splice and assign
716 elif tok.id in (Id.VTest_ColonEquals, Id.VTest_Equals):
717 if is_falsey:
718 # Collect new part vals.
719 assign_part_vals = [] # type: List[part_value_t]
720 self._EvalRhsWordToParts(op.arg_word, assign_part_vals,
721 eval_flags)
722 # Append them to out param AND return them.
723 part_vals.extend(assign_part_vals)
724
725 if vtest_place.name is None:
726 # TODO: error context
727 e_die("Can't assign to special variable")
728 else:
729 # NOTE: This decays arrays too! 'shopt -s strict_array' could
730 # avoid it.
731 rhs_str = _DecayPartValuesToString(
732 assign_part_vals, self.splitter.GetJoinChar())
733 if vtest_place.index is None: # using None when no index
734 lval = location.LName(
735 vtest_place.name) # type: sh_lvalue_t
736 else:
737 var_name = vtest_place.name
738 var_index = vtest_place.index
739 UP_var_index = var_index
740
741 with tagswitch(var_index) as case:
742 if case(a_index_e.Int):
743 var_index = cast(a_index.Int, UP_var_index)
744 lval = sh_lvalue.Indexed(
745 var_name, var_index.i, loc.Missing)
746 elif case(a_index_e.Str):
747 var_index = cast(a_index.Str, UP_var_index)
748 lval = sh_lvalue.Keyed(var_name, var_index.s,
749 loc.Missing)
750 else:
751 raise AssertionError()
752
753 state.OshLanguageSetValue(self.mem, lval,
754 value.Str(rhs_str))
755 return True
756
757 else:
758 return False
759
760 elif tok.id in (Id.VTest_ColonQMark, Id.VTest_QMark):
761 if is_falsey:
762 # The arg is the error message
763 error_part_vals = [] # type: List[part_value_t]
764 self._EvalRhsWordToParts(op.arg_word, error_part_vals,
765 eval_flags)
766 error_str = _DecayPartValuesToString(
767 error_part_vals, self.splitter.GetJoinChar())
768
769 #
770 # Display fancy/helpful error
771 #
772 if vtest_place.name is None:
773 var_name = '???'
774 else:
775 var_name = vtest_place.name
776
777 if 0:
778 # This hint is nice, but looks too noisy for now
779 op_str = lexer.LazyStr(tok)
780 if tok.id == Id.VTest_ColonQMark:
781 why = 'empty or unset'
782 else:
783 why = 'unset'
784
785 self.errfmt.Print_(
786 "Hint: operator %s means a variable can't be %s" %
787 (op_str, why), tok)
788
789 if val.tag() == value_e.Undef:
790 actual = 'unset'
791 else:
792 actual = 'empty'
793
794 if len(error_str):
795 suffix = ': %r' % error_str
796 else:
797 suffix = ''
798 e_die("Var %s is %s%s" % (var_name, actual, suffix),
799 blame_token)
800
801 else:
802 return False
803
804 else:
805 raise AssertionError(tok.id)
806
807 def _Count(self, val, token):
808 # type: (value_t, Token) -> int
809 """Returns the length of the value, for ${#var}"""
810 UP_val = val
811 with tagswitch(val) as case:
812 if case(value_e.Str):
813 val = cast(value.Str, UP_val)
814 # NOTE: Whether bash counts bytes or chars is affected by LANG
815 # environment variables.
816 # Should we respect that, or another way to select? set -o
817 # count-bytes?
818
819 # https://stackoverflow.com/questions/17368067/length-of-string-in-bash
820 try:
821 count = string_ops.CountUtf8Chars(val.s)
822 except error.Strict as e:
823 # Add this here so we don't have to add it so far down the stack.
824 # TODO: It's better to show BOTH this CODE an the actual DATA
825 # somehow.
826 e.location = token
827
828 if self.exec_opts.strict_word_eval():
829 raise
830 else:
831 # NOTE: Doesn't make the command exit with 1; it just returns a
832 # length of -1.
833 self.errfmt.PrettyPrintError(e, prefix='warning: ')
834 return -1
835
836 elif case(value_e.BashArray):
837 val = cast(value.BashArray, UP_val)
838 count = bash_impl.BashArray_Count(val)
839
840 elif case(value_e.BashAssoc):
841 val = cast(value.BashAssoc, UP_val)
842 count = bash_impl.BashAssoc_Count(val)
843
844 elif case(value_e.SparseArray):
845 val = cast(value.SparseArray, UP_val)
846 count = bash_impl.SparseArray_Count(val)
847
848 else:
849 raise error.TypeErr(
850 val, "Length op expected Str, BashArray, BashAssoc", token)
851
852 return count
853
854 def _Keys(self, val, token):
855 # type: (value_t, Token) -> value_t
856 """Return keys of a container, for ${!array[@]}"""
857
858 UP_val = val
859 with tagswitch(val) as case:
860 if case(value_e.BashArray):
861 val = cast(value.BashArray, UP_val)
862 indices = [str(i) for i in bash_impl.BashArray_GetKeys(val)]
863 return value.BashArray(indices)
864
865 elif case(value_e.BashAssoc):
866 val = cast(value.BashAssoc, UP_val)
867 assert val.d is not None # for MyPy, so it's not Optional[]
868
869 # BUG: Keys aren't ordered according to insertion!
870 keys = bash_impl.BashAssoc_GetKeys(val)
871 return value.BashArray(keys)
872
873 else:
874 raise error.TypeErr(val, 'Keys op expected Str', token)
875
876 def _EvalVarRef(self, val, blame_tok, quoted, vsub_state, vtest_place):
877 # type: (value_t, Token, bool, VarSubState, VTestPlace) -> value_t
878 """Handles indirect expansion like ${!var} and ${!a[0]}.
879
880 Args:
881 blame_tok: 'foo' for ${!foo}
882 """
883 UP_val = val
884 with tagswitch(val) as case:
885 if case(value_e.Undef):
886 # bash-4.4 returned value.Undef here. bash-5.0 started to treat
887 # the variable name to be empty so that the indirection fails.
888 var_ref_str = ''
889
890 elif case(value_e.Str):
891 val = cast(value.Str, UP_val)
892 var_ref_str = val.s
893
894 elif case(value_e.BashArray): # caught earlier but OK
895 val = cast(value.BashArray, UP_val)
896 # When there are more than one element in the array, this
897 # produces a wrong variable name containing spaces.
898 var_ref_str = ' '.join(bash_impl.BashArray_GetValues(val))
899
900 elif case(value_e.BashAssoc): # caught earlier but OK
901 val = cast(value.BashAssoc, UP_val)
902 var_ref_str = ' '.join(bash_impl.BashAssoc_GetValues(val))
903
904 else:
905 raise error.TypeErr(val, 'Var Ref op expected Str', blame_tok)
906
907 try:
908 bvs_part = self.unsafe_arith.ParseVarRef(var_ref_str, blame_tok)
909 except error.FatalRuntime as e:
910 raise error.VarSubFailure(e.msg, e.location)
911
912 return self._VarRefValue(bvs_part, quoted, vsub_state, vtest_place)
913
914 def _ApplyUnarySuffixOp(self, val, op):
915 # type: (value_t, suffix_op.Unary) -> value_t
916 assert val.tag() != value_e.Undef
917
918 op_kind = consts.GetKind(op.op.id)
919
920 if op_kind == Kind.VOp1:
921 # NOTE: glob syntax is supported in ^ ^^ , ,, ! As well as % %% # ##.
922 # Detect has_extglob so that DoUnarySuffixOp doesn't use the fast
923 # shortcut for constant strings.
924 arg_val, has_extglob = self.EvalWordToPattern(op.arg_word)
925 assert arg_val.tag() == value_e.Str
926
927 UP_val = val
928 with tagswitch(val) as case:
929 if case(value_e.Str):
930 val = cast(value.Str, UP_val)
931 s = string_ops.DoUnarySuffixOp(val.s, op.op, arg_val.s,
932 has_extglob)
933 #log('%r %r -> %r', val.s, arg_val.s, s)
934 new_val = value.Str(s) # type: value_t
935
936 elif case(value_e.BashArray, value_e.BashAssoc):
937 # get values
938 if val.tag() == value_e.BashArray:
939 val = cast(value.BashArray, UP_val)
940 values = bash_impl.BashArray_GetValues(val)
941 elif val.tag() == value_e.BashAssoc:
942 val = cast(value.BashAssoc, UP_val)
943 values = bash_impl.BashAssoc_GetValues(val)
944 else:
945 raise AssertionError()
946
947 # ${a[@]#prefix} is VECTORIZED on arrays. YSH should have this too.
948 strs = [
949 string_ops.DoUnarySuffixOp(s, op.op, arg_val.s,
950 has_extglob) for s in values
951 ]
952 new_val = value.BashArray(strs)
953
954 else:
955 raise error.TypeErr(
956 val, 'Unary op expected Str, BashArray, BashAssoc',
957 op.op)
958
959 else:
960 raise AssertionError(Kind_str(op_kind))
961
962 return new_val
963
964 def _PatSub(self, val, op):
965 # type: (value_t, suffix_op.PatSub) -> value_t
966
967 pat_val, has_extglob = self.EvalWordToPattern(op.pat)
968 # Extended globs aren't supported because we only translate * ? etc. to
969 # ERE. I don't think there's a straightforward translation from !(*.py) to
970 # ERE! You would need an engine that supports negation? (Derivatives?)
971 if has_extglob:
972 e_die('extended globs not supported in ${x//GLOB/}', op.pat)
973
974 if op.replace:
975 replace_val = self.EvalRhsWord(op.replace)
976 # Can't have an array, so must be a string
977 assert replace_val.tag() == value_e.Str, replace_val
978 replace_str = cast(value.Str, replace_val).s
979 else:
980 replace_str = ''
981
982 # note: doesn't support self.exec_opts.extglob()!
983 regex, warnings = glob_.GlobToERE(pat_val.s)
984 if len(warnings):
985 # TODO:
986 # - Add 'shopt -s strict_glob' mode and expose warnings.
987 # "Glob is not in CANONICAL FORM".
988 # - Propagate location info back to the 'op.pat' word.
989 pass
990 #log('regex %r', regex)
991 replacer = string_ops.GlobReplacer(regex, replace_str, op.slash_tok)
992
993 with tagswitch(val) as case2:
994 if case2(value_e.Str):
995 str_val = cast(value.Str, val)
996 s = replacer.Replace(str_val.s, op)
997 val = value.Str(s)
998
999 elif case2(value_e.BashArray, value_e.BashAssoc):
1000 if val.tag() == value_e.BashArray:
1001 array_val = cast(value.BashArray, val)
1002 values = bash_impl.BashArray_GetValues(array_val)
1003 elif val.tag() == value_e.BashAssoc:
1004 assoc_val = cast(value.BashAssoc, val)
1005 values = bash_impl.BashAssoc_GetValues(assoc_val)
1006 else:
1007 raise AssertionError()
1008 strs = [replacer.Replace(s, op) for s in values]
1009 val = value.BashArray(strs)
1010
1011 else:
1012 raise error.TypeErr(
1013 val, 'Pat Sub op expected Str, BashArray, BashAssoc',
1014 op.slash_tok)
1015
1016 return val
1017
1018 def _Slice(self, val, op, var_name, part):
1019 # type: (value_t, suffix_op.Slice, Optional[str], BracedVarSub) -> value_t
1020
1021 begin = self.arith_ev.EvalToBigInt(op.begin)
1022
1023 # Note: bash allows lengths to be negative (with odd semantics), but
1024 # we don't allow that right now.
1025 has_length = False
1026 length = -1
1027 if op.length:
1028 has_length = True
1029 length = self.arith_ev.EvalToInt(op.length)
1030
1031 try:
1032 arg0_val = None # type: value.Str
1033 if var_name is None: # $* or $@
1034 arg0_val = self.mem.GetArg0()
1035 val = _PerformSlice(val, begin, length, has_length, part, arg0_val)
1036 except error.Strict as e:
1037 if self.exec_opts.strict_word_eval():
1038 raise
1039 else:
1040 self.errfmt.PrettyPrintError(e, prefix='warning: ')
1041 with tagswitch(val) as case2:
1042 if case2(value_e.Str):
1043 val = value.Str('')
1044 elif case2(value_e.BashArray):
1045 val = value.BashArray([])
1046 else:
1047 raise NotImplementedError()
1048 return val
1049
1050 def _Nullary(self, val, op, var_name, vsub_token, vsub_state):
1051 # type: (value_t, Token, Optional[str], Token, VarSubState) -> Tuple[value.Str, bool]
1052
1053 quoted2 = False
1054 op_id = op.id
1055 if op_id == Id.VOp0_P:
1056 val = self._ProcessUndef(val, vsub_token, vsub_state)
1057 UP_val = val
1058 with tagswitch(val) as case:
1059 if case(value_e.Undef):
1060 result = value.Str('')
1061 elif case(value_e.Str):
1062 str_val = cast(value.Str, UP_val)
1063 prompt = self.prompt_ev.EvalPrompt(str_val)
1064 # readline gets rid of these, so we should too.
1065 p = prompt.replace('\x01', '').replace('\x02', '')
1066 result = value.Str(p)
1067 else:
1068 e_die("Can't use @P on %s" % ui.ValType(val), op)
1069
1070 elif op_id == Id.VOp0_Q:
1071 UP_val = val
1072 with tagswitch(val) as case:
1073 if case(value_e.Undef):
1074 # We need to issue an error when "-o nounset" is enabled.
1075 # Although we do not need to check val for value_e.Undef,
1076 # we call _ProcessUndef for consistency in the error
1077 # message.
1078 self._ProcessUndef(val, vsub_token, vsub_state)
1079
1080 # For unset variables, we do not generate any quoted words.
1081 result = value.Str('')
1082
1083 elif case(value_e.Str):
1084 str_val = cast(value.Str, UP_val)
1085 result = value.Str(j8_lite.MaybeShellEncode(str_val.s))
1086 # oddly, 'echo ${x@Q}' is equivalent to 'echo "${x@Q}"' in
1087 # bash
1088 quoted2 = True
1089 elif case(value_e.BashArray, value_e.BashAssoc):
1090 if val.tag() == value_e.BashArray:
1091 val = cast(value.BashArray, UP_val)
1092 values = [s for s in bash_impl.BashArray_GetValues(val) if s is not None]
1093 elif val.tag() == value_e.BashAssoc:
1094 val = cast(value.BashAssoc, UP_val)
1095 values = bash_impl.BashAssoc_GetValues(val)
1096 else:
1097 raise AssertionError()
1098
1099 tmp = [
1100 # TODO: should use fastfunc.ShellEncode
1101 j8_lite.MaybeShellEncode(s) for s in values
1102 ]
1103 result = value.Str(' '.join(tmp))
1104 else:
1105 e_die("Can't use @Q on %s" % ui.ValType(val), op)
1106
1107 elif op_id == Id.VOp0_a:
1108 val = self._ProcessUndef(val, vsub_token, vsub_state)
1109 UP_val = val
1110 # We're ONLY simluating -a and -A, not -r -x -n for now. See
1111 # spec/ble-idioms.test.sh.
1112 chars = [] # type: List[str]
1113 with tagswitch(val) as case:
1114 if case(value_e.BashArray):
1115 chars.append('a')
1116 elif case(value_e.BashAssoc):
1117 chars.append('A')
1118
1119 if var_name is not None: # e.g. ${?@a} is allowed
1120 cell = self.mem.GetCell(var_name)
1121 if cell:
1122 if cell.readonly:
1123 chars.append('r')
1124 if cell.exported:
1125 chars.append('x')
1126 if cell.nameref:
1127 chars.append('n')
1128
1129 result = value.Str(''.join(chars))
1130
1131 else:
1132 e_die('Var op %r not implemented' % lexer.TokenVal(op), op)
1133
1134 return result, quoted2
1135
1136 def _WholeArray(self, val, part, quoted, vsub_state):
1137 # type: (value_t, BracedVarSub, bool, VarSubState) -> value_t
1138 op_id = cast(bracket_op.WholeArray, part.bracket_op).op_id
1139
1140 if op_id == Id.Lit_At:
1141 op_str = '@'
1142 vsub_state.join_array = not quoted # ${a[@]} decays but "${a[@]}" doesn't
1143 elif op_id == Id.Arith_Star:
1144 op_str = '*'
1145 vsub_state.join_array = True # both ${a[*]} and "${a[*]}" decay
1146 else:
1147 raise AssertionError(op_id) # unknown
1148
1149 with tagswitch(val) as case2:
1150 if case2(value_e.Undef):
1151 # For an undefined array, we save the token of the array
1152 # reference for the later error message.
1153 vsub_state.array_ref = part.name_tok
1154 elif case2(value_e.Str):
1155 if self.exec_opts.strict_array():
1156 e_die("Can't index string with %s" % op_str,
1157 loc.WordPart(part))
1158 elif case2(value_e.BashArray, value_e.SparseArray,
1159 value_e.BashAssoc):
1160 pass # no-op
1161 else:
1162 # The other YSH types such as List, Dict, and Float are not
1163 # supported. Error messages will be printed later, so we here
1164 # return the unsupported objects without modification.
1165 pass # no-op
1166
1167 return val
1168
1169 def _ArrayIndex(self, val, part, vtest_place):
1170 # type: (value_t, BracedVarSub, VTestPlace) -> value_t
1171 """Process a numeric array index like ${a[i+1]}"""
1172 anode = cast(bracket_op.ArrayIndex, part.bracket_op).expr
1173
1174 UP_val = val
1175 with tagswitch(val) as case2:
1176 if case2(value_e.Undef):
1177 pass # it will be checked later
1178
1179 elif case2(value_e.Str):
1180 # Bash treats any string as an array, so we can't add our own
1181 # behavior here without making valid OSH invalid bash.
1182 e_die("Can't index string %r with integer" % part.var_name,
1183 part.name_tok)
1184
1185 elif case2(value_e.BashArray):
1186 array_val = cast(value.BashArray, UP_val)
1187 index = self.arith_ev.EvalToInt(anode)
1188 vtest_place.index = a_index.Int(index)
1189
1190 s, error_code = bash_impl.BashArray_GetElement(
1191 array_val, index)
1192 if error_code == error_code_e.IndexOutOfRange:
1193 # Note: Bash outputs warning but does not make it a real
1194 # error. We follow the Bash behavior here.
1195 self.errfmt.Print_(
1196 "Index %d out of bounds for array of length %d" %
1197 (index, bash_impl.BashArray_Length(array_val)),
1198 blame_loc=part.name_tok)
1199
1200 if s is None:
1201 val = value.Undef
1202 else:
1203 val = value.Str(s)
1204
1205 elif case2(value_e.SparseArray):
1206 sparse_val = cast(value.SparseArray, UP_val)
1207 big_index = self.arith_ev.EvalToBigInt(anode)
1208 vtest_place.index = a_index.Int(mops.BigTruncate(big_index))
1209
1210 s, error_code = bash_impl.SparseArray_GetElement(
1211 sparse_val, big_index)
1212 if error_code == error_code_e.IndexOutOfRange:
1213 # Note: Bash outputs warning but does not make it a real
1214 # error. We follow the Bash behavior here.
1215 big_length = bash_impl.SparseArray_Length(sparse_val)
1216 self.errfmt.Print_(
1217 "Index %s out of bounds for array of length %s" %
1218 (mops.ToStr(big_index), mops.ToStr(big_length)),
1219 blame_loc=part.name_tok)
1220
1221 if s is None:
1222 val = value.Undef
1223 else:
1224 val = value.Str(s)
1225
1226 elif case2(value_e.BashAssoc):
1227 assoc_val = cast(value.BashAssoc, UP_val)
1228 # Location could also be attached to bracket_op? But
1229 # arith_expr.VarSub works OK too
1230 key = self.arith_ev.EvalWordToString(
1231 anode, blame_loc=location.TokenForArith(anode))
1232
1233 vtest_place.index = a_index.Str(key) # out param
1234 s = bash_impl.BashAssoc_GetElement(assoc_val, key)
1235
1236 if s is None:
1237 val = value.Undef
1238 else:
1239 val = value.Str(s)
1240
1241 else:
1242 raise error.TypeErr(val,
1243 'Index op expected BashArray, BashAssoc',
1244 loc.WordPart(part))
1245
1246 return val
1247
1248 def _EvalDoubleQuoted(self, parts, part_vals):
1249 # type: (List[word_part_t], List[part_value_t]) -> None
1250 """Evaluate parts of a DoubleQuoted part.
1251
1252 Args:
1253 part_vals: output param to append to.
1254 """
1255 # Example of returning array:
1256 # $ a=(1 2); b=(3); $ c=(4 5)
1257 # $ argv "${a[@]}${b[@]}${c[@]}"
1258 # ['1', '234', '5']
1259 #
1260 # Example of multiple parts
1261 # $ argv "${a[@]}${undef[@]:-${c[@]}}"
1262 # ['1', '24', '5']
1263
1264 # Special case for "". The parser outputs (DoubleQuoted []), instead
1265 # of (DoubleQuoted [Literal '']). This is better but it means we
1266 # have to check for it.
1267 if len(parts) == 0:
1268 v = Piece('', True, False)
1269 part_vals.append(v)
1270 return
1271
1272 for p in parts:
1273 self._EvalWordPart(p, part_vals, QUOTED)
1274
1275 def EvalDoubleQuotedToString(self, dq_part):
1276 # type: (DoubleQuoted) -> str
1277 """For double quoted strings in YSH expressions.
1278
1279 Example: var x = "$foo-${foo}"
1280 """
1281 part_vals = [] # type: List[part_value_t]
1282 self._EvalDoubleQuoted(dq_part.parts, part_vals)
1283 return self._ConcatPartVals(part_vals, dq_part.left)
1284
1285 def _DecayArray(self, val):
1286 # type: (value.BashArray) -> value.Str
1287 """Decay $* to a string."""
1288 assert val.tag() == value_e.BashArray, val
1289 sep = self.splitter.GetJoinChar()
1290 tmp = [s for s in bash_impl.BashArray_GetValues(val) if s is not None]
1291 return value.Str(sep.join(tmp))
1292
1293 def _ProcessUndef(self, val, name_tok, vsub_state):
1294 # type: (value_t, Token, VarSubState) -> value_t
1295 assert name_tok is not None
1296
1297 if val.tag() != value_e.Undef:
1298 return val
1299
1300 if vsub_state.array_ref is not None:
1301 array_tok = vsub_state.array_ref
1302 if self.exec_opts.nounset():
1303 e_die('Undefined array %r' % lexer.TokenVal(array_tok),
1304 array_tok)
1305 else:
1306 return value.BashArray([])
1307 else:
1308 if self.exec_opts.nounset():
1309 tok_str = lexer.TokenVal(name_tok)
1310 name = tok_str[1:] if tok_str.startswith('$') else tok_str
1311 e_die('Undefined variable %r' % name, name_tok)
1312 else:
1313 return value.Str('')
1314
1315 def _EvalBracketOp(self, val, part, quoted, vsub_state, vtest_place):
1316 # type: (value_t, BracedVarSub, bool, VarSubState, VTestPlace) -> value_t
1317
1318 if part.bracket_op:
1319 with tagswitch(part.bracket_op) as case:
1320 if case(bracket_op_e.WholeArray):
1321 val = self._WholeArray(val, part, quoted, vsub_state)
1322
1323 elif case(bracket_op_e.ArrayIndex):
1324 val = self._ArrayIndex(val, part, vtest_place)
1325
1326 else:
1327 raise AssertionError(part.bracket_op.tag())
1328
1329 else: # no bracket op
1330 var_name = vtest_place.name
1331 if (var_name is not None and
1332 val.tag() in (value_e.BashArray, value_e.BashAssoc) and
1333 not vsub_state.is_type_query):
1334 if ShouldArrayDecay(var_name, self.exec_opts,
1335 not (part.prefix_op or part.suffix_op)):
1336 # for ${BASH_SOURCE}, etc.
1337 val = DecayArray(val)
1338 else:
1339 e_die(
1340 "Array %r can't be referred to as a scalar (without @ or *)"
1341 % var_name, loc.WordPart(part))
1342
1343 return val
1344
1345 def _VarRefValue(self, part, quoted, vsub_state, vtest_place):
1346 # type: (BracedVarSub, bool, VarSubState, VTestPlace) -> value_t
1347 """Duplicates some logic from _EvalBracedVarSub, but returns a
1348 value_t."""
1349
1350 # 1. Evaluate from (var_name, var_num, token Id) -> value
1351 if part.name_tok.id == Id.VSub_Name:
1352 vtest_place.name = part.var_name
1353 val = self.mem.GetValue(part.var_name)
1354
1355 elif part.name_tok.id == Id.VSub_Number:
1356 var_num = int(part.var_name)
1357 val = self._EvalVarNum(var_num)
1358
1359 else:
1360 # $* decays
1361 val = self._EvalSpecialVar(part.name_tok.id, quoted, vsub_state)
1362
1363 # We don't need var_index because it's only for L-Values of test ops?
1364 if self.exec_opts.eval_unsafe_arith():
1365 val = self._EvalBracketOp(val, part, quoted, vsub_state,
1366 vtest_place)
1367 else:
1368 with state.ctx_Option(self.mutable_opts,
1369 [option_i._allow_command_sub], False):
1370 val = self._EvalBracketOp(val, part, quoted, vsub_state,
1371 vtest_place)
1372
1373 return val
1374
1375 def _EvalBracedVarSub(self, part, part_vals, quoted):
1376 # type: (BracedVarSub, List[part_value_t], bool) -> None
1377 """
1378 Args:
1379 part_vals: output param to append to.
1380 """
1381 # We have different operators that interact in a non-obvious order.
1382 #
1383 # 1. bracket_op: value -> value, with side effect on vsub_state
1384 #
1385 # 2. prefix_op
1386 # a. length ${#x}: value -> value
1387 # b. var ref ${!ref}: can expand to an array
1388 #
1389 # 3. suffix_op:
1390 # a. no operator: you have a value
1391 # b. Test: value -> part_value[]
1392 # c. Other Suffix: value -> value
1393 #
1394 # 4. Process vsub_state.join_array here before returning.
1395 #
1396 # These cases are hard to distinguish:
1397 # - ${!prefix@} prefix query
1398 # - ${!array[@]} keys
1399 # - ${!ref} named reference
1400 # - ${!ref[0]} named reference
1401 #
1402 # I think we need several stages:
1403 #
1404 # 1. value: name, number, special, prefix query
1405 # 2. bracket_op
1406 # 3. prefix length -- this is TERMINAL
1407 # 4. indirection? Only for some of the ! cases
1408 # 5. string transformation suffix ops like ##
1409 # 6. test op
1410 # 7. vsub_state.join_array
1411
1412 # vsub_state.join_array is for joining "${a[*]}" and unquoted ${a[@]} AFTER
1413 # suffix ops are applied. If we take the length with a prefix op, the
1414 # distinction is ignored.
1415
1416 var_name = None # type: Optional[str] # used throughout the function
1417 vtest_place = VTestPlace(var_name, None) # For ${foo=default}
1418 vsub_state = VarSubState.CreateNull() # for $*, ${a[*]}, etc.
1419
1420 # 1. Evaluate from (var_name, var_num, token Id) -> value
1421 if part.name_tok.id == Id.VSub_Name:
1422 # Handle ${!prefix@} first, since that looks at names and not values
1423 # Do NOT handle ${!A[@]@a} here!
1424 if (part.prefix_op is not None and part.bracket_op is None and
1425 part.suffix_op is not None and
1426 part.suffix_op.tag() == suffix_op_e.Nullary):
1427 nullary_op = cast(Token, part.suffix_op)
1428 # ${!x@} but not ${!x@P}
1429 if consts.GetKind(nullary_op.id) == Kind.VOp3:
1430 names = self.mem.VarNamesStartingWith(part.var_name)
1431 names.sort()
1432
1433 if quoted and nullary_op.id == Id.VOp3_At:
1434 part_vals.append(part_value.Array(names))
1435 else:
1436 sep = self.splitter.GetJoinChar()
1437 part_vals.append(Piece(sep.join(names), quoted, True))
1438 return # EARLY RETURN
1439
1440 var_name = part.var_name
1441 vtest_place.name = var_name # for _ApplyTestOp
1442
1443 val = self.mem.GetValue(var_name)
1444
1445 elif part.name_tok.id == Id.VSub_Number:
1446 var_num = int(part.var_name)
1447 val = self._EvalVarNum(var_num)
1448 else:
1449 # $* decays
1450 val = self._EvalSpecialVar(part.name_tok.id, quoted, vsub_state)
1451
1452 suffix_op_ = part.suffix_op
1453 if suffix_op_:
1454 UP_op = suffix_op_
1455 with tagswitch(suffix_op_) as case:
1456 if case(suffix_op_e.Nullary):
1457 suffix_op_ = cast(Token, UP_op)
1458
1459 # Type query ${array@a} is a STRING, not an array
1460 # NOTE: ${array@Q} is ${array[0]@Q} in bash, which is different than
1461 # ${array[@]@Q}
1462 if suffix_op_.id == Id.VOp0_a:
1463 vsub_state.is_type_query = True
1464
1465 # 2. Bracket Op
1466 val = self._EvalBracketOp(val, part, quoted, vsub_state, vtest_place)
1467
1468 if part.prefix_op:
1469 if part.prefix_op.id == Id.VSub_Pound: # ${#var} for length
1470 # undef -> '' BEFORE length
1471 val = self._ProcessUndef(val, part.name_tok, vsub_state)
1472
1473 n = self._Count(val, part.name_tok)
1474 part_vals.append(Piece(str(n), quoted, False))
1475 return # EARLY EXIT: nothing else can come after length
1476
1477 elif part.prefix_op.id == Id.VSub_Bang:
1478 if (part.bracket_op and
1479 part.bracket_op.tag() == bracket_op_e.WholeArray and
1480 not suffix_op_):
1481 # undef -> empty array
1482 val = self._ProcessUndef(val, part.name_tok, vsub_state)
1483
1484 # ${!array[@]} to get indices/keys
1485 val = self._Keys(val, part.name_tok)
1486 # already set vsub_State.join_array ABOVE
1487 else:
1488 # Process ${!ref}. SURPRISE: ${!a[0]} is an indirect expansion unlike
1489 # ${!a[@]} !
1490 # ${!ref} can expand into an array if ref='array[@]'
1491
1492 # Clear it now that we have a var ref
1493 vtest_place.name = None
1494 vtest_place.index = None
1495
1496 val = self._EvalVarRef(val, part.name_tok, quoted,
1497 vsub_state, vtest_place)
1498
1499 else:
1500 raise AssertionError(part.prefix_op)
1501
1502 quoted2 = False # another bit for @Q
1503 if suffix_op_:
1504 op = suffix_op_ # could get rid of this alias
1505
1506 with tagswitch(suffix_op_) as case:
1507 if case(suffix_op_e.Nullary):
1508 op = cast(Token, UP_op)
1509 val, quoted2 = self._Nullary(val, op, var_name,
1510 part.name_tok, vsub_state)
1511
1512 elif case(suffix_op_e.Unary):
1513 op = cast(suffix_op.Unary, UP_op)
1514 if consts.GetKind(op.op.id) == Kind.VTest:
1515 # Note: _ProcessUndef (i.e., the conversion of undef ->
1516 # '') is not applied to the VTest operators such as
1517 # ${a:-def}, ${a+set}, etc.
1518 if self._ApplyTestOp(val, op, quoted, part_vals,
1519 vtest_place, part.name_tok):
1520 # e.g. to evaluate ${undef:-'default'}, we already appended
1521 # what we need
1522 return
1523
1524 else:
1525 # Other suffix: value -> value
1526 val = self._ProcessUndef(val, part.name_tok,
1527 vsub_state)
1528 val = self._ApplyUnarySuffixOp(val, op)
1529
1530 elif case(suffix_op_e.PatSub): # PatSub, vectorized
1531 op = cast(suffix_op.PatSub, UP_op)
1532 val = self._ProcessUndef(val, part.name_tok, vsub_state)
1533 val = self._PatSub(val, op)
1534
1535 elif case(suffix_op_e.Slice):
1536 op = cast(suffix_op.Slice, UP_op)
1537 val = self._ProcessUndef(val, part.name_tok, vsub_state)
1538 val = self._Slice(val, op, var_name, part)
1539
1540 elif case(suffix_op_e.Static):
1541 op = cast(suffix_op.Static, UP_op)
1542 e_die('Not implemented', op.tok)
1543
1544 else:
1545 raise AssertionError()
1546 else:
1547 val = self._ProcessUndef(val, part.name_tok, vsub_state)
1548
1549 # After applying suffixes, process join_array here.
1550 UP_val = val
1551 if val.tag() == value_e.BashArray:
1552 array_val = cast(value.BashArray, UP_val)
1553 if vsub_state.join_array:
1554 val = self._DecayArray(array_val)
1555 else:
1556 val = array_val
1557
1558 # For example, ${a} evaluates to value.Str(), but we want a
1559 # Piece().
1560 part_val = _ValueToPartValue(val, quoted or quoted2, part)
1561 part_vals.append(part_val)
1562
1563 def _ConcatPartVals(self, part_vals, location):
1564 # type: (List[part_value_t], loc_t) -> str
1565
1566 strs = [] # type: List[str]
1567 for part_val in part_vals:
1568 UP_part_val = part_val
1569 with tagswitch(part_val) as case:
1570 if case(part_value_e.String):
1571 part_val = cast(Piece, UP_part_val)
1572 s = part_val.s
1573
1574 elif case(part_value_e.Array):
1575 part_val = cast(part_value.Array, UP_part_val)
1576 if self.exec_opts.strict_array():
1577 # Examples: echo f > "$@"; local foo="$@"
1578 e_die("Illegal array word part (strict_array)",
1579 location)
1580 else:
1581 # It appears to not respect IFS
1582 # TODO: eliminate double join()?
1583 tmp = [s for s in part_val.strs if s is not None]
1584 s = ' '.join(tmp)
1585
1586 else:
1587 raise AssertionError()
1588
1589 strs.append(s)
1590
1591 return ''.join(strs)
1592
1593 def EvalBracedVarSubToString(self, part):
1594 # type: (BracedVarSub) -> str
1595 """For double quoted strings in YSH expressions.
1596
1597 Example: var x = "$foo-${foo}"
1598 """
1599 part_vals = [] # type: List[part_value_t]
1600 self._EvalBracedVarSub(part, part_vals, False)
1601 # blame ${ location
1602 return self._ConcatPartVals(part_vals, part.left)
1603
1604 def _EvalSimpleVarSub(self, part, part_vals, quoted):
1605 # type: (SimpleVarSub, List[part_value_t], bool) -> None
1606
1607 token = part.tok
1608
1609 vsub_state = VarSubState.CreateNull()
1610
1611 # 1. Evaluate from (var_name, var_num, Token) -> defined, value
1612 if token.id == Id.VSub_DollarName:
1613 var_name = lexer.LazyStr(token)
1614 # TODO: Special case for LINENO
1615 val = self.mem.GetValue(var_name)
1616 if val.tag() in (value_e.BashArray, value_e.BashAssoc):
1617 if ShouldArrayDecay(var_name, self.exec_opts):
1618 # for $BASH_SOURCE, etc.
1619 val = DecayArray(val)
1620 else:
1621 e_die(
1622 "Array %r can't be referred to as a scalar (without @ or *)"
1623 % var_name, token)
1624
1625 elif token.id == Id.VSub_Number:
1626 var_num = int(lexer.LazyStr(token))
1627 val = self._EvalVarNum(var_num)
1628
1629 else:
1630 val = self._EvalSpecialVar(token.id, quoted, vsub_state)
1631
1632 #log('SIMPLE %s', part)
1633 val = self._ProcessUndef(val, token, vsub_state)
1634 UP_val = val
1635 if val.tag() == value_e.BashArray:
1636 array_val = cast(value.BashArray, UP_val)
1637 if vsub_state.join_array:
1638 val = self._DecayArray(array_val)
1639 else:
1640 val = array_val
1641
1642 v = _ValueToPartValue(val, quoted, part)
1643 part_vals.append(v)
1644
1645 def EvalSimpleVarSubToString(self, node):
1646 # type: (SimpleVarSub) -> str
1647 """For double quoted strings in YSH expressions.
1648
1649 Example: var x = "$foo-${foo}"
1650 """
1651 part_vals = [] # type: List[part_value_t]
1652 self._EvalSimpleVarSub(node, part_vals, False)
1653 return self._ConcatPartVals(part_vals, node.tok)
1654
1655 def _EvalExtGlob(self, part, part_vals):
1656 # type: (word_part.ExtGlob, List[part_value_t]) -> None
1657 """Evaluate @($x|'foo'|$(hostname)) and flatten it."""
1658 op = part.op
1659 if op.id == Id.ExtGlob_Comma:
1660 op_str = '@('
1661 else:
1662 op_str = lexer.LazyStr(op)
1663 # Do NOT split these.
1664 part_vals.append(Piece(op_str, False, False))
1665
1666 for i, w in enumerate(part.arms):
1667 if i != 0:
1668 part_vals.append(Piece('|', False, False)) # separator
1669 # FLATTEN the tree of extglob "arms".
1670 self._EvalWordToParts(w, part_vals, EXTGLOB_NESTED)
1671 part_vals.append(Piece(')', False, False)) # closing )
1672
1673 def _TranslateExtGlob(self, part_vals, w, glob_parts, fnmatch_parts):
1674 # type: (List[part_value_t], CompoundWord, List[str], List[str]) -> None
1675 """Translate a flattened WORD with an ExtGlob part to string patterns.
1676
1677 We need both glob and fnmatch patterns. _EvalExtGlob does the
1678 flattening.
1679 """
1680 for i, part_val in enumerate(part_vals):
1681 UP_part_val = part_val
1682 with tagswitch(part_val) as case:
1683 if case(part_value_e.String):
1684 part_val = cast(Piece, UP_part_val)
1685 if part_val.quoted and not self.exec_opts.noglob():
1686 s = glob_.GlobEscape(part_val.s)
1687 else:
1688 # e.g. the @( and | in @(foo|bar) aren't quoted
1689 s = part_val.s
1690 glob_parts.append(s)
1691 fnmatch_parts.append(s) # from _EvalExtGlob()
1692
1693 elif case(part_value_e.Array):
1694 # Disallow array
1695 e_die(
1696 "Extended globs and arrays can't appear in the same word",
1697 w)
1698
1699 elif case(part_value_e.ExtGlob):
1700 part_val = cast(part_value.ExtGlob, UP_part_val)
1701 # keep appending fnmatch_parts, but repplace glob_parts with '*'
1702 self._TranslateExtGlob(part_val.part_vals, w, [],
1703 fnmatch_parts)
1704 glob_parts.append('*')
1705
1706 else:
1707 raise AssertionError()
1708
1709 def _EvalWordPart(self, part, part_vals, flags):
1710 # type: (word_part_t, List[part_value_t], int) -> None
1711 """Evaluate a word part, appending to part_vals
1712
1713 Called by _EvalWordToParts, EvalWordToString, and _EvalDoubleQuoted.
1714 """
1715 quoted = bool(flags & QUOTED)
1716 is_subst = bool(flags & IS_SUBST)
1717
1718 UP_part = part
1719 with tagswitch(part) as case:
1720 if case(word_part_e.ShArrayLiteral):
1721 part = cast(ShArrayLiteral, UP_part)
1722 e_die("Unexpected array literal", loc.WordPart(part))
1723 elif case(word_part_e.BashAssocLiteral):
1724 part = cast(word_part.BashAssocLiteral, UP_part)
1725 e_die("Unexpected associative array literal",
1726 loc.WordPart(part))
1727
1728 elif case(word_part_e.Literal):
1729 part = cast(Token, UP_part)
1730 # Split if it's in a substitution.
1731 # That is: echo is not split, but ${foo:-echo} is split
1732 v = Piece(lexer.LazyStr(part), quoted, is_subst)
1733 part_vals.append(v)
1734
1735 elif case(word_part_e.EscapedLiteral):
1736 part = cast(word_part.EscapedLiteral, UP_part)
1737 v = Piece(part.ch, True, False)
1738 part_vals.append(v)
1739
1740 elif case(word_part_e.SingleQuoted):
1741 part = cast(SingleQuoted, UP_part)
1742 v = Piece(part.sval, True, False)
1743 part_vals.append(v)
1744
1745 elif case(word_part_e.DoubleQuoted):
1746 part = cast(DoubleQuoted, UP_part)
1747 self._EvalDoubleQuoted(part.parts, part_vals)
1748
1749 elif case(word_part_e.CommandSub):
1750 part = cast(CommandSub, UP_part)
1751 id_ = part.left_token.id
1752 if id_ in (Id.Left_DollarParen, Id.Left_AtParen,
1753 Id.Left_Backtick):
1754 sv = self._EvalCommandSub(part,
1755 quoted) # type: part_value_t
1756
1757 elif id_ in (Id.Left_ProcSubIn, Id.Left_ProcSubOut):
1758 sv = self._EvalProcessSub(part)
1759
1760 else:
1761 raise AssertionError(id_)
1762
1763 part_vals.append(sv)
1764
1765 elif case(word_part_e.SimpleVarSub):
1766 part = cast(SimpleVarSub, UP_part)
1767 self._EvalSimpleVarSub(part, part_vals, quoted)
1768
1769 elif case(word_part_e.BracedVarSub):
1770 part = cast(BracedVarSub, UP_part)
1771 self._EvalBracedVarSub(part, part_vals, quoted)
1772
1773 elif case(word_part_e.TildeSub):
1774 part = cast(word_part.TildeSub, UP_part)
1775 # We never parse a quoted string into a TildeSub.
1776 assert not quoted
1777 s = self.tilde_ev.Eval(part)
1778 v = Piece(s, True, False) # NOT split even when unquoted!
1779 part_vals.append(v)
1780
1781 elif case(word_part_e.ArithSub):
1782 part = cast(word_part.ArithSub, UP_part)
1783 num = self.arith_ev.EvalToBigInt(part.anode)
1784 v = Piece(mops.ToStr(num), quoted, not quoted)
1785 part_vals.append(v)
1786
1787 elif case(word_part_e.ExtGlob):
1788 part = cast(word_part.ExtGlob, UP_part)
1789 #if not self.exec_opts.extglob():
1790 # die() # disallow at runtime? Don't just decay
1791
1792 # Create a node to hold the flattened tree. The caller decides whether
1793 # to pass it to fnmatch() or replace it with '*' and pass it to glob().
1794 part_vals2 = [] # type: List[part_value_t]
1795 self._EvalExtGlob(part, part_vals2) # flattens tree
1796 part_vals.append(part_value.ExtGlob(part_vals2))
1797
1798 elif case(word_part_e.BashRegexGroup):
1799 part = cast(word_part.BashRegexGroup, UP_part)
1800
1801 part_vals.append(Piece('(', False, False)) # not quoted
1802 if part.child:
1803 self._EvalWordToParts(part.child, part_vals, 0)
1804 part_vals.append(Piece(')', False, False))
1805
1806 elif case(word_part_e.Splice):
1807 part = cast(word_part.Splice, UP_part)
1808 val = self.mem.GetValue(part.var_name)
1809
1810 strs = self.expr_ev.SpliceValue(val, part)
1811 part_vals.append(part_value.Array(strs))
1812
1813 elif case(word_part_e.ExprSub):
1814 part = cast(word_part.ExprSub, UP_part)
1815 part_val = self.expr_ev.EvalExprSub(part)
1816 part_vals.append(part_val)
1817
1818 elif case(word_part_e.ZshVarSub):
1819 part = cast(word_part.ZshVarSub, UP_part)
1820 e_die("ZSH var subs are parsed, but can't be evaluated",
1821 part.left)
1822
1823 else:
1824 raise AssertionError(part.tag())
1825
1826 def _EvalRhsWordToParts(self, w, part_vals, eval_flags=0):
1827 # type: (rhs_word_t, List[part_value_t], int) -> None
1828 quoted = bool(eval_flags & QUOTED)
1829
1830 UP_w = w
1831 with tagswitch(w) as case:
1832 if case(rhs_word_e.Empty):
1833 part_vals.append(Piece('', quoted, not quoted))
1834
1835 elif case(rhs_word_e.Compound):
1836 w = cast(CompoundWord, UP_w)
1837 self._EvalWordToParts(w, part_vals, eval_flags=eval_flags)
1838
1839 else:
1840 raise AssertionError()
1841
1842 def _EvalWordToParts(self, w, part_vals, eval_flags=0):
1843 # type: (CompoundWord, List[part_value_t], int) -> None
1844 """Helper for EvalRhsWord, EvalWordSequence, etc.
1845
1846 Returns:
1847 Appends to part_vals. Note that this is a TREE.
1848 """
1849 # Does the word have an extended glob? This is a special case because
1850 # of the way we use glob() and then fnmatch(..., FNM_EXTMATCH) to
1851 # implement extended globs. It's hard to carry that extra information
1852 # all the way past the word splitting stage.
1853
1854 # OSH semantic limitations: If a word has an extended glob part, then
1855 # 1. It can't have an array
1856 # 2. Word splitting of unquoted words isn't respected
1857
1858 word_part_vals = [] # type: List[part_value_t]
1859 has_extglob = False
1860 for p in w.parts:
1861 if p.tag() == word_part_e.ExtGlob:
1862 has_extglob = True
1863 self._EvalWordPart(p, word_part_vals, eval_flags)
1864
1865 # Caller REQUESTED extglob evaluation, AND we parsed word_part.ExtGlob()
1866 if has_extglob:
1867 if bool(eval_flags & EXTGLOB_FILES):
1868 # Treat the WHOLE word as a pattern. We need to TWO VARIANTS of the
1869 # word because of the way we use libc:
1870 # 1. With '*' for extglob parts
1871 # 2. With _EvalExtGlob() for extglob parts
1872
1873 glob_parts = [] # type: List[str]
1874 fnmatch_parts = [] # type: List[str]
1875 self._TranslateExtGlob(word_part_vals, w, glob_parts,
1876 fnmatch_parts)
1877
1878 #log('word_part_vals %s', word_part_vals)
1879 glob_pat = ''.join(glob_parts)
1880 fnmatch_pat = ''.join(fnmatch_parts)
1881 #log("glob %s fnmatch %s", glob_pat, fnmatch_pat)
1882
1883 results = [] # type: List[str]
1884 n = self.globber.ExpandExtended(glob_pat, fnmatch_pat, results)
1885 if n < 0:
1886 raise error.FailGlob(
1887 'Extended glob %r matched no files' % fnmatch_pat, w)
1888
1889 part_vals.append(part_value.Array(results))
1890 elif bool(eval_flags & EXTGLOB_NESTED):
1891 # We only glob at the TOP level of @(nested|@(pattern))
1892 part_vals.extend(word_part_vals)
1893 else:
1894 # e.g. simple_word_eval, assignment builtin
1895 e_die('Extended glob not allowed in this word', w)
1896 else:
1897 part_vals.extend(word_part_vals)
1898
1899 def _PartValsToString(self, part_vals, w, eval_flags, strs):
1900 # type: (List[part_value_t], CompoundWord, int, List[str]) -> None
1901 """Helper for EvalWordToString, similar to _ConcatPartVals() above.
1902
1903 Note: arg 'w' could just be a span ID
1904 """
1905 for part_val in part_vals:
1906 UP_part_val = part_val
1907 with tagswitch(part_val) as case:
1908 if case(part_value_e.String):
1909 part_val = cast(Piece, UP_part_val)
1910 s = part_val.s
1911 if part_val.quoted:
1912 if eval_flags & QUOTE_FNMATCH:
1913 # [[ foo == */"*".py ]] or case (*.py) or ${x%*.py} or ${x//*.py/}
1914 s = glob_.GlobEscape(s)
1915 elif eval_flags & QUOTE_ERE:
1916 s = glob_.ExtendedRegexEscape(s)
1917 strs.append(s)
1918
1919 elif case(part_value_e.Array):
1920 part_val = cast(part_value.Array, UP_part_val)
1921 if self.exec_opts.strict_array():
1922 # Examples: echo f > "$@"; local foo="$@"
1923
1924 # TODO: This attributes too coarsely, to the word rather than the
1925 # parts. Problem: the word is a TREE of parts, but we only have a
1926 # flat list of part_vals. The only case where we really get arrays
1927 # is "$@", "${a[@]}", "${a[@]//pat/replace}", etc.
1928 e_die(
1929 "This word should yield a string, but it contains an array",
1930 w)
1931
1932 # TODO: Maybe add detail like this.
1933 #e_die('RHS of assignment should only have strings. '
1934 # 'To assign arrays, use b=( "${a[@]}" )')
1935 else:
1936 # It appears to not respect IFS
1937 tmp = [s for s in part_val.strs if s is not None]
1938 s = ' '.join(tmp) # TODO: eliminate double join()?
1939 strs.append(s)
1940
1941 elif case(part_value_e.ExtGlob):
1942 part_val = cast(part_value.ExtGlob, UP_part_val)
1943
1944 # Extended globs are only allowed where we expect them!
1945 if not bool(eval_flags & QUOTE_FNMATCH):
1946 e_die('extended glob not allowed in this word', w)
1947
1948 # recursive call
1949 self._PartValsToString(part_val.part_vals, w, eval_flags,
1950 strs)
1951
1952 else:
1953 raise AssertionError()
1954
1955 def EvalWordToString(self, UP_w, eval_flags=0):
1956 # type: (word_t, int) -> value.Str
1957 """Given a word, return a string.
1958
1959 Flags can contain a quoting algorithm.
1960 """
1961 assert UP_w.tag() == word_e.Compound, UP_w
1962 w = cast(CompoundWord, UP_w)
1963
1964 if eval_flags == 0: # QUOTE_FNMATCH etc. breaks optimization
1965 fast_str = word_.FastStrEval(w)
1966 if fast_str is not None:
1967 return value.Str(fast_str)
1968
1969 # Could we additionally optimize a=$b, if we know $b isn't an array
1970 # etc.?
1971
1972 # Note: these empty lists are hot in fib benchmark
1973
1974 part_vals = [] # type: List[part_value_t]
1975 for p in w.parts:
1976 # this doesn't use eval_flags, which is slightly confusing
1977 self._EvalWordPart(p, part_vals, 0)
1978
1979 strs = [] # type: List[str]
1980 self._PartValsToString(part_vals, w, eval_flags, strs)
1981 return value.Str(''.join(strs))
1982
1983 def EvalWordToPattern(self, UP_w):
1984 # type: (rhs_word_t) -> Tuple[value.Str, bool]
1985 """Like EvalWordToString, but returns whether we got ExtGlob."""
1986 if UP_w.tag() == rhs_word_e.Empty:
1987 return value.Str(''), False
1988
1989 assert UP_w.tag() == rhs_word_e.Compound, UP_w
1990 w = cast(CompoundWord, UP_w)
1991
1992 has_extglob = False
1993 part_vals = [] # type: List[part_value_t]
1994 for p in w.parts:
1995 # this doesn't use eval_flags, which is slightly confusing
1996 self._EvalWordPart(p, part_vals, 0)
1997 if p.tag() == word_part_e.ExtGlob:
1998 has_extglob = True
1999
2000 strs = [] # type: List[str]
2001 self._PartValsToString(part_vals, w, QUOTE_FNMATCH, strs)
2002 return value.Str(''.join(strs)), has_extglob
2003
2004 def EvalForPlugin(self, w):
2005 # type: (CompoundWord) -> value.Str
2006 """Wrapper around EvalWordToString that prevents errors.
2007
2008 Runtime errors like $(( 1 / 0 )) and mutating $? like $(exit 42)
2009 are handled here.
2010
2011 Similar to ExprEvaluator.PluginCall().
2012 """
2013 with state.ctx_Registers(self.mem): # to "sandbox" $? and $PIPESTATUS
2014 try:
2015 val = self.EvalWordToString(w)
2016 except error.FatalRuntime as e:
2017 val = value.Str('<Runtime error: %s>' % e.UserErrorString())
2018
2019 except (IOError, OSError) as e:
2020 val = value.Str('<I/O error: %s>' % pyutil.strerror(e))
2021
2022 except KeyboardInterrupt:
2023 val = value.Str('<Ctrl-C>')
2024
2025 return val
2026
2027 def EvalRhsWord(self, UP_w):
2028 # type: (rhs_word_t) -> value_t
2029 """Used for RHS of assignment.
2030
2031 There is no splitting.
2032 """
2033 if UP_w.tag() == rhs_word_e.Empty:
2034 return value.Str('')
2035
2036 assert UP_w.tag() == word_e.Compound, UP_w
2037 w = cast(CompoundWord, UP_w)
2038
2039 if len(w.parts) == 1:
2040 part0 = w.parts[0]
2041 UP_part0 = part0
2042 tag = part0.tag()
2043 # Special case for a=(1 2). ShArrayLiteral won't appear in words that
2044 # don't look like assignments.
2045 if tag == word_part_e.ShArrayLiteral:
2046 part0 = cast(ShArrayLiteral, UP_part0)
2047 array_words = part0.words
2048 words = braces.BraceExpandWords(array_words)
2049 strs = self.EvalWordSequence(words)
2050 return value.BashArray(strs)
2051
2052 if tag == word_part_e.BashAssocLiteral:
2053 part0 = cast(word_part.BashAssocLiteral, UP_part0)
2054 d = NewDict() # type: Dict[str, str]
2055 for pair in part0.pairs:
2056 k = self.EvalWordToString(pair.key)
2057 v = self.EvalWordToString(pair.value)
2058 d[k.s] = v.s
2059 return value.BashAssoc(d)
2060
2061 # If RHS doesn't look like a=( ... ), then it must be a string.
2062 return self.EvalWordToString(w)
2063
2064 def _EvalWordFrame(self, frame, argv):
2065 # type: (List[Piece], List[str]) -> None
2066 all_empty = True
2067 all_quoted = True
2068 any_quoted = False
2069
2070 #log('--- frame %s', frame)
2071
2072 for piece in frame:
2073 if len(piece.s):
2074 all_empty = False
2075
2076 if piece.quoted:
2077 any_quoted = True
2078 else:
2079 all_quoted = False
2080
2081 # Elision of ${empty}${empty} but not $empty"$empty" or $empty""
2082 if all_empty and not any_quoted:
2083 return
2084
2085 # If every frag is quoted, e.g. "$a$b" or any part in "${a[@]}"x, then
2086 # don't do word splitting or globbing.
2087 if all_quoted:
2088 tmp = [piece.s for piece in frame]
2089 a = ''.join(tmp)
2090 argv.append(a)
2091 return
2092
2093 will_glob = not self.exec_opts.noglob()
2094
2095 if 0:
2096 log('---')
2097 log('FRAME')
2098 for i, piece in enumerate(frame):
2099 log('(%d) %s', i, piece)
2100 log('')
2101
2102 # Array of strings, some of which are BOTH IFS-escaped and GLOB escaped!
2103 frags = [] # type: List[str]
2104 for piece in frame:
2105 if will_glob and piece.quoted:
2106 frag = glob_.GlobEscape(piece.s)
2107 else:
2108 # If we have a literal \, then we turn it into \\\\.
2109 # Splitting takes \\\\ -> \\
2110 # Globbing takes \\ to \ if it doesn't match
2111 frag = _BackslashEscape(piece.s)
2112
2113 if piece.do_split:
2114 frag = _BackslashEscape(frag)
2115 else:
2116 frag = self.splitter.Escape(frag)
2117
2118 frags.append(frag)
2119
2120 if 0:
2121 log('---')
2122 log('FRAGS')
2123 for i, frag in enumerate(frags):
2124 log('(%d) %s', i, frag)
2125 log('')
2126
2127 flat = ''.join(frags)
2128 #log('flat: %r', flat)
2129
2130 args = self.splitter.SplitForWordEval(flat)
2131
2132 # space=' '; argv $space"". We have a quoted part, but we CANNOT elide.
2133 # Add it back and don't bother globbing.
2134 if len(args) == 0 and any_quoted:
2135 argv.append('')
2136 return
2137
2138 #log('split args: %r', args)
2139 for a in args:
2140 if glob_.LooksLikeGlob(a):
2141 n = self.globber.Expand(a, argv)
2142 if n < 0:
2143 # TODO: location info, with span IDs carried through the frame
2144 raise error.FailGlob('Pattern %r matched no files' % a,
2145 loc.Missing)
2146 else:
2147 argv.append(glob_.GlobUnescape(a))
2148
2149 def _EvalWordToArgv(self, w):
2150 # type: (CompoundWord) -> List[str]
2151 """Helper for _EvalAssignBuiltin.
2152
2153 Splitting and globbing are disabled for assignment builtins.
2154
2155 Example: declare -"${a[@]}" b=(1 2)
2156 where a is [x b=a d=a]
2157 """
2158 part_vals = [] # type: List[part_value_t]
2159 self._EvalWordToParts(w, part_vals, 0) # not double quoted
2160 frames = _MakeWordFrames(part_vals)
2161 argv = [] # type: List[str]
2162 for frame in frames:
2163 if len(frame): # empty array gives empty frame!
2164 tmp = [piece.s for piece in frame]
2165 argv.append(''.join(tmp)) # no split or glob
2166 #log('argv: %s', argv)
2167 return argv
2168
2169 def _EvalAssignBuiltin(self, builtin_id, arg0, words, meta_offset):
2170 # type: (builtin_t, str, List[CompoundWord], int) -> cmd_value.Assign
2171 """Handles both static and dynamic assignment, e.g.
2172
2173 x='foo=bar'
2174 local a=(1 2) $x
2175
2176 Grammar:
2177
2178 ('builtin' | 'command')* keyword flag* pair*
2179 flag = [-+].*
2180
2181 There is also command -p, but we haven't implemented it. Maybe just
2182 punt on it.
2183 """
2184 eval_to_pairs = True # except for -f and -F
2185 started_pairs = False
2186
2187 flags = [arg0] # initial flags like -p, and -f -F name1 name2
2188 flag_locs = [words[0]]
2189 assign_args = [] # type: List[AssignArg]
2190
2191 n = len(words)
2192 for i in xrange(meta_offset + 1, n): # skip first word
2193 w = words[i]
2194
2195 if word_.IsVarLike(w):
2196 started_pairs = True # Everything from now on is an assign_pair
2197
2198 if started_pairs:
2199 left_token, close_token, part_offset = word_.DetectShAssignment(
2200 w)
2201 if left_token: # Detected statically
2202 if left_token.id != Id.Lit_VarLike:
2203 # (not guaranteed since started_pairs is set twice)
2204 e_die('LHS array not allowed in assignment builtin', w)
2205
2206 if lexer.IsPlusEquals(left_token):
2207 var_name = lexer.TokenSliceRight(left_token, -2)
2208 append = True
2209 else:
2210 var_name = lexer.TokenSliceRight(left_token, -1)
2211 append = False
2212
2213 if part_offset == len(w.parts):
2214 rhs = rhs_word.Empty # type: rhs_word_t
2215 else:
2216 # tmp is for intersection of C++/MyPy type systems
2217 tmp = CompoundWord(w.parts[part_offset:])
2218 word_.TildeDetectAssign(tmp)
2219 rhs = tmp
2220
2221 with state.ctx_AssignBuiltin(self.mutable_opts):
2222 right = self.EvalRhsWord(rhs)
2223
2224 arg2 = AssignArg(var_name, right, append, w)
2225 assign_args.append(arg2)
2226
2227 else: # e.g. export $dynamic
2228 argv = self._EvalWordToArgv(w)
2229 for arg in argv:
2230 arg2 = _SplitAssignArg(arg, w)
2231 assign_args.append(arg2)
2232
2233 else:
2234 argv = self._EvalWordToArgv(w)
2235 for arg in argv:
2236 if arg.startswith('-') or arg.startswith('+'):
2237 # e.g. declare -r +r
2238 flags.append(arg)
2239 flag_locs.append(w)
2240
2241 # Shortcut that relies on -f and -F always meaning "function" for
2242 # all assignment builtins
2243 if 'f' in arg or 'F' in arg:
2244 eval_to_pairs = False
2245
2246 else: # e.g. export $dynamic
2247 if eval_to_pairs:
2248 arg2 = _SplitAssignArg(arg, w)
2249 assign_args.append(arg2)
2250 started_pairs = True
2251 else:
2252 flags.append(arg)
2253
2254 return cmd_value.Assign(builtin_id, flags, flag_locs, assign_args)
2255
2256 def _DetectAssignBuiltinStr(self, arg0, words, meta_offset):
2257 # type: (str, List[CompoundWord], int) -> Optional[cmd_value.Assign]
2258 builtin_id = consts.LookupAssignBuiltin(arg0)
2259 if builtin_id != consts.NO_INDEX:
2260 return self._EvalAssignBuiltin(builtin_id, arg0, words,
2261 meta_offset)
2262 return None
2263
2264 def _DetectAssignBuiltin(self, val0, words, meta_offset):
2265 # type: (part_value_t, List[CompoundWord], int) -> Optional[cmd_value.Assign]
2266 UP_val0 = val0
2267 if val0.tag() == part_value_e.String:
2268 val0 = cast(Piece, UP_val0)
2269 if not val0.quoted:
2270 return self._DetectAssignBuiltinStr(val0.s, words, meta_offset)
2271 return None
2272
2273 def SimpleEvalWordSequence2(self, words, is_last_cmd, allow_assign):
2274 # type: (List[CompoundWord], bool, bool) -> cmd_value_t
2275 """Simple word evaluation for YSH."""
2276 strs = [] # type: List[str]
2277 locs = [] # type: List[CompoundWord]
2278
2279 meta_offset = 0
2280 for i, w in enumerate(words):
2281 # No globbing in the first arg for command.Simple.
2282 if i == meta_offset and allow_assign:
2283 strs0 = self._EvalWordToArgv(w)
2284 # TODO: Remove this because YSH will disallow assignment
2285 # builtins? (including export?)
2286 if len(strs0) == 1:
2287 cmd_val = self._DetectAssignBuiltinStr(
2288 strs0[0], words, meta_offset)
2289 if cmd_val:
2290 return cmd_val
2291
2292 strs.extend(strs0)
2293 for _ in strs0:
2294 locs.append(w)
2295 continue
2296
2297 if glob_.LooksLikeStaticGlob(w):
2298 val = self.EvalWordToString(w) # respects strict-array
2299 num_appended = self.globber.Expand(val.s, strs)
2300 if num_appended < 0:
2301 raise error.FailGlob('Pattern %r matched no files' % val.s,
2302 w)
2303 for _ in xrange(num_appended):
2304 locs.append(w)
2305 continue
2306
2307 part_vals = [] # type: List[part_value_t]
2308 self._EvalWordToParts(w, part_vals, 0) # not quoted
2309
2310 if 0:
2311 log('')
2312 log('Static: part_vals after _EvalWordToParts:')
2313 for entry in part_vals:
2314 log(' %s', entry)
2315
2316 # Still need to process
2317 frames = _MakeWordFrames(part_vals)
2318
2319 if 0:
2320 log('')
2321 log('Static: frames after _MakeWordFrames:')
2322 for entry in frames:
2323 log(' %s', entry)
2324
2325 # We will still allow x"${a[@]"x, though it's deprecated by @a, which
2326 # disallows such expressions at parse time.
2327 for frame in frames:
2328 if len(frame): # empty array gives empty frame!
2329 tmp = [piece.s for piece in frame]
2330 strs.append(''.join(tmp)) # no split or glob
2331 locs.append(w)
2332
2333 assert len(strs) == len(locs), '%s vs. %d' % (strs, len(locs))
2334 return cmd_value.Argv(strs, locs, is_last_cmd, None, None)
2335
2336 def EvalWordSequence2(self, words, is_last_cmd, allow_assign=False):
2337 # type: (List[CompoundWord], bool, bool) -> cmd_value_t
2338 """Turns a list of Words into a list of strings.
2339
2340 Unlike the EvalWord*() methods, it does globbing.
2341
2342 Args:
2343 allow_assign: True for command.Simple, False for BashArray a=(1 2 3)
2344 """
2345 if self.exec_opts.simple_word_eval():
2346 return self.SimpleEvalWordSequence2(words, is_last_cmd,
2347 allow_assign)
2348
2349 # Parse time:
2350 # 1. brace expansion. TODO: Do at parse time.
2351 # 2. Tilde detection. DONE at parse time. Only if Id.Lit_Tilde is the
2352 # first WordPart.
2353 #
2354 # Run time:
2355 # 3. tilde sub, var sub, command sub, arith sub. These are all
2356 # "concurrent" on WordParts. (optional process sub with <() )
2357 # 4. word splitting. Can turn this off with a shell option? Definitely
2358 # off for oil.
2359 # 5. globbing -- several exec_opts affect this: nullglob, safeglob, etc.
2360
2361 #log('W %s', words)
2362 strs = [] # type: List[str]
2363 locs = [] # type: List[CompoundWord]
2364
2365 # 0 for declare x
2366 # 1 for builtin declare x
2367 # 2 for command builtin declare x
2368 # etc.
2369 meta_offset = 0
2370
2371 n = 0
2372 for i, w in enumerate(words):
2373 fast_str = word_.FastStrEval(w)
2374 if fast_str is not None:
2375 strs.append(fast_str)
2376 locs.append(w)
2377
2378 # e.g. the 'local' in 'local a=b c=d' will be here
2379 if allow_assign and i == meta_offset:
2380 cmd_val = self._DetectAssignBuiltinStr(
2381 fast_str, words, meta_offset)
2382 if cmd_val:
2383 return cmd_val
2384
2385 if i <= meta_offset and _DetectMetaBuiltinStr(fast_str):
2386 meta_offset += 1
2387
2388 # Bug fix: n must be updated on every loop iteration
2389 n = len(strs)
2390 assert len(strs) == len(locs), strs
2391 continue
2392
2393 part_vals = [] # type: List[part_value_t]
2394 self._EvalWordToParts(w, part_vals, EXTGLOB_FILES)
2395
2396 # DYNAMICALLY detect if we're going to run an assignment builtin, and
2397 # change the rest of the evaluation algorithm if so.
2398 #
2399 # We want to allow:
2400 # e=export
2401 # $e foo=bar
2402 #
2403 # But we don't want to evaluate the first word twice in the case of:
2404 # $(some-command) --flag
2405 if len(part_vals) == 1:
2406 if allow_assign and i == meta_offset:
2407 cmd_val = self._DetectAssignBuiltin(
2408 part_vals[0], words, meta_offset)
2409 if cmd_val:
2410 return cmd_val
2411
2412 if i <= meta_offset and _DetectMetaBuiltin(part_vals[0]):
2413 meta_offset += 1
2414
2415 if 0:
2416 log('')
2417 log('part_vals after _EvalWordToParts:')
2418 for entry in part_vals:
2419 log(' %s', entry)
2420
2421 frames = _MakeWordFrames(part_vals)
2422 if 0:
2423 log('')
2424 log('frames after _MakeWordFrames:')
2425 for entry in frames:
2426 log(' %s', entry)
2427
2428 # Do splitting and globbing. Each frame will append zero or more args.
2429 for frame in frames:
2430 self._EvalWordFrame(frame, strs)
2431
2432 # Fill in locations parallel to strs.
2433 n_next = len(strs)
2434 for _ in xrange(n_next - n):
2435 locs.append(w)
2436 n = n_next
2437
2438 # A non-assignment command.
2439 # NOTE: Can't look up builtins here like we did for assignment, because
2440 # functions can override builtins.
2441 assert len(strs) == len(locs), '%s vs. %d' % (strs, len(locs))
2442 return cmd_value.Argv(strs, locs, is_last_cmd, None, None)
2443
2444 def EvalWordSequence(self, words):
2445 # type: (List[CompoundWord]) -> List[str]
2446 """For arrays and for loops.
2447
2448 They don't allow assignment builtins.
2449 """
2450 # is_last_cmd is irrelevant
2451 cmd_val = self.EvalWordSequence2(words, False)
2452 assert cmd_val.tag() == cmd_value_e.Argv
2453 return cast(cmd_value.Argv, cmd_val).argv
2454
2455
2456class NormalWordEvaluator(AbstractWordEvaluator):
2457
2458 def __init__(
2459 self,
2460 mem, # type: state.Mem
2461 exec_opts, # type: optview.Exec
2462 mutable_opts, # type: state.MutableOpts
2463 tilde_ev, # type: TildeEvaluator
2464 splitter, # type: SplitContext
2465 errfmt, # type: ui.ErrorFormatter
2466 ):
2467 # type: (...) -> None
2468 AbstractWordEvaluator.__init__(self, mem, exec_opts, mutable_opts,
2469 tilde_ev, splitter, errfmt)
2470 self.shell_ex = None # type: _Executor
2471
2472 def CheckCircularDeps(self):
2473 # type: () -> None
2474 assert self.arith_ev is not None
2475 # Disabled for pure OSH
2476 #assert self.expr_ev is not None
2477 assert self.shell_ex is not None
2478 assert self.prompt_ev is not None
2479
2480 def _EvalCommandSub(self, cs_part, quoted):
2481 # type: (CommandSub, bool) -> part_value_t
2482 stdout_str = self.shell_ex.RunCommandSub(cs_part)
2483
2484 if cs_part.left_token.id == Id.Left_AtParen:
2485 # YSH splitting algorithm: does not depend on IFS
2486 try:
2487 strs = j8.SplitJ8Lines(stdout_str)
2488 except error.Decode as e:
2489 # status code 4 is special, for encode/decode errors.
2490 raise error.Structured(4, e.Message(), cs_part.left_token)
2491
2492 #strs = self.splitter.SplitForWordEval(stdout_str)
2493 return part_value.Array(strs)
2494 else:
2495 return Piece(stdout_str, quoted, not quoted)
2496
2497 def _EvalProcessSub(self, cs_part):
2498 # type: (CommandSub) -> Piece
2499 dev_path = self.shell_ex.RunProcessSub(cs_part)
2500 # pretend it's quoted; no split or glob
2501 return Piece(dev_path, True, False)
2502
2503
2504_DUMMY = '__NO_COMMAND_SUB__'
2505
2506
2507class CompletionWordEvaluator(AbstractWordEvaluator):
2508 """An evaluator that has no access to an executor.
2509
2510 NOTE: core/completion.py doesn't actually try to use these strings to
2511 complete. If you have something like 'echo $(echo hi)/f<TAB>', it sees the
2512 inner command as the last one, and knows that it is not at the end of the
2513 line.
2514 """
2515
2516 def __init__(
2517 self,
2518 mem, # type: state.Mem
2519 exec_opts, # type: optview.Exec
2520 mutable_opts, # type: state.MutableOpts
2521 tilde_ev, # type: TildeEvaluator
2522 splitter, # type: SplitContext
2523 errfmt, # type: ui.ErrorFormatter
2524 ):
2525 # type: (...) -> None
2526 AbstractWordEvaluator.__init__(self, mem, exec_opts, mutable_opts,
2527 tilde_ev, splitter, errfmt)
2528
2529 def CheckCircularDeps(self):
2530 # type: () -> None
2531 assert self.prompt_ev is not None
2532 assert self.arith_ev is not None
2533 assert self.expr_ev is not None
2534
2535 def _EvalCommandSub(self, cs_part, quoted):
2536 # type: (CommandSub, bool) -> part_value_t
2537 if cs_part.left_token.id == Id.Left_AtParen:
2538 return part_value.Array([_DUMMY])
2539 else:
2540 return Piece(_DUMMY, quoted, not quoted)
2541
2542 def _EvalProcessSub(self, cs_part):
2543 # type: (CommandSub) -> Piece
2544 # pretend it's quoted; no split or glob
2545 return Piece('__NO_PROCESS_SUB__', True, False)
2546
2547
2548# vim: sw=4