OILS / osh / word_eval.py View on Github | oilshell.org

2542 lines, 1549 significant
1"""
2word_eval.py - Evaluator for the word language.
3"""
4
5from _devbuild.gen.id_kind_asdl import Id, Kind, Kind_str
6from _devbuild.gen.syntax_asdl import (
7 Token,
8 SimpleVarSub,
9 loc,
10 loc_t,
11 BracedVarSub,
12 CommandSub,
13 bracket_op,
14 bracket_op_e,
15 suffix_op,
16 suffix_op_e,
17 ShArrayLiteral,
18 SingleQuoted,
19 DoubleQuoted,
20 word_e,
21 word_t,
22 CompoundWord,
23 rhs_word,
24 rhs_word_e,
25 rhs_word_t,
26 word_part,
27 word_part_e,
28)
29from _devbuild.gen.runtime_asdl import (
30 part_value,
31 part_value_e,
32 part_value_t,
33 cmd_value,
34 cmd_value_e,
35 cmd_value_t,
36 error_code_e,
37 AssignArg,
38 a_index,
39 a_index_e,
40 VTestPlace,
41 VarSubState,
42 Piece,
43)
44from _devbuild.gen.option_asdl import option_i, builtin_i
45from _devbuild.gen.value_asdl import (
46 value,
47 value_e,
48 value_t,
49 sh_lvalue,
50 sh_lvalue_t,
51)
52from core import bash_impl
53from core import error
54from core import pyos
55from core import pyutil
56from core import state
57from display import ui
58from core import util
59from data_lang import j8
60from data_lang import j8_lite
61from core.error import e_die
62from frontend import consts
63from frontend import lexer
64from frontend import location
65from mycpp import mops
66from mycpp.mylib import log, tagswitch, NewDict
67from osh import braces
68from osh import glob_
69from osh import string_ops
70from osh import word_
71from ysh import expr_eval
72from ysh import val_ops
73
74from typing import Optional, Tuple, List, Dict, cast, TYPE_CHECKING
75
76if TYPE_CHECKING:
77 from _devbuild.gen.syntax_asdl import word_part_t
78 from _devbuild.gen.option_asdl import builtin_t
79 from core import optview
80 from core.state import Mem
81 from core.vm import _Executor
82 from osh.split import SplitContext
83 from osh import prompt
84 from osh import sh_expr_eval
85
86# Flags for _EvalWordToParts and _EvalWordPart (not all are used for both)
87QUOTED = 1 << 0
88IS_SUBST = 1 << 1
89
90EXTGLOB_FILES = 1 << 2 # allow @(cc) from file system?
91EXTGLOB_MATCH = 1 << 3 # allow @(cc) in pattern matching?
92EXTGLOB_NESTED = 1 << 4 # for @(one|!(two|three))
93
94# For EvalWordToString
95QUOTE_FNMATCH = 1 << 5
96QUOTE_ERE = 1 << 6
97
98# For compatibility, ${BASH_SOURCE} and ${BASH_SOURCE[@]} are both valid.
99# Ditto for ${FUNCNAME} and ${BASH_LINENO}.
100_STRING_AND_ARRAY = ['BASH_SOURCE', 'FUNCNAME', 'BASH_LINENO']
101
102
103def ShouldArrayDecay(var_name, exec_opts, is_plain_var_sub=True):
104 # type: (str, optview.Exec, bool) -> bool
105 """Return whether we should allow ${a} to mean ${a[0]}."""
106 return (not exec_opts.strict_array() or
107 is_plain_var_sub and var_name in _STRING_AND_ARRAY)
108
109
110def DecayArray(val):
111 # type: (value_t) -> value_t
112 """Resolve ${array} to ${array[0]}."""
113 if val.tag() == value_e.BashArray:
114 array_val = cast(value.BashArray, val)
115 s, error_code = bash_impl.BashArray_GetElement(array_val, 0)
116
117 # Note: index 0 should never cause the out-of-bound index error.
118 assert error_code == error_code_e.OK
119
120 elif val.tag() == value_e.BashAssoc:
121 assoc_val = cast(value.BashAssoc, val)
122 s = bash_impl.BashAssoc_GetElement(assoc_val, '0')
123 else:
124 raise AssertionError(val.tag())
125
126 if s is None:
127 return value.Undef
128 else:
129 return value.Str(s)
130
131
132def _DetectMetaBuiltinStr(s):
133 # type: (str) -> bool
134 """
135 We need to detect all of these cases:
136
137 builtin local
138 command local
139 builtin builtin local
140 builtin command local
141
142 Fundamentally, assignment builtins have different WORD EVALUATION RULES
143 for a=$x (no word splitting), so it seems hard to do this in
144 meta_oils.Builtin() or meta_oils.Command()
145 """
146 return (consts.LookupNormalBuiltin(s)
147 in (builtin_i.builtin, builtin_i.command))
148
149
150def _DetectMetaBuiltin(val0):
151 # type: (part_value_t) -> bool
152 UP_val0 = val0
153 if val0.tag() == part_value_e.String:
154 val0 = cast(Piece, UP_val0)
155 if not val0.quoted:
156 return _DetectMetaBuiltinStr(val0.s)
157 return False
158
159
160def _SplitAssignArg(arg, blame_word):
161 # type: (str, CompoundWord) -> AssignArg
162 """Dynamically parse argument to declare, export, etc.
163
164 This is a fallback to the static parsing done below.
165 """
166 # Note: it would be better to cache regcomp(), but we don't have an API for
167 # that, and it probably isn't a bottleneck now
168 m = util.RegexSearch(consts.ASSIGN_ARG_RE, arg)
169 if m is None:
170 e_die("Assignment builtin expected NAME=value, got %r" % arg,
171 blame_word)
172
173 var_name = m[1]
174 # m[2] is used for grouping; ERE doesn't have non-capturing groups
175
176 op = m[3]
177 assert op is not None, op
178 if len(op): # declare NAME=
179 val = value.Str(m[4]) # type: Optional[value_t]
180 append = op[0] == '+'
181 else: # declare NAME
182 val = None # no operator
183 append = False
184
185 return AssignArg(var_name, val, append, blame_word)
186
187
188# NOTE: Could be done with util.BackslashEscape like glob_.GlobEscape().
189def _BackslashEscape(s):
190 # type: (str) -> str
191 """Double up backslashes.
192
193 Useful for strings about to be globbed and strings about to be IFS
194 escaped.
195 """
196 return s.replace('\\', '\\\\')
197
198
199def _ValueToPartValue(val, quoted, part_loc):
200 # type: (value_t, bool, word_part_t) -> part_value_t
201 """Helper for VarSub evaluation.
202
203 Called by _EvalBracedVarSub and _EvalWordPart for SimpleVarSub.
204 """
205 UP_val = val
206
207 with tagswitch(val) as case:
208 if case(value_e.Undef):
209 # This happens in the case of ${undef+foo}. We skipped _ProcessUndef,
210 # but we have to append to the empty string.
211 return Piece('', quoted, not quoted)
212
213 elif case(value_e.Str):
214 val = cast(value.Str, UP_val)
215 return Piece(val.s, quoted, not quoted)
216
217 elif case(value_e.BashArray):
218 val = cast(value.BashArray, UP_val)
219 return part_value.Array(bash_impl.BashArray_GetValues(val))
220
221 elif case(value_e.BashAssoc):
222 val = cast(value.BashAssoc, UP_val)
223 # bash behavior: splice values!
224 return part_value.Array(bash_impl.BashAssoc_GetValues(val))
225
226 # Cases added for YSH
227 # value_e.List is also here - we use val_ops.Stringify()s err message
228 elif case(value_e.Null, value_e.Bool, value_e.Int, value_e.Float,
229 value_e.Eggex, value_e.List):
230 s = val_ops.Stringify(val, loc.WordPart(part_loc), 'Word eval ')
231 return Piece(s, quoted, not quoted)
232
233 else:
234 raise error.TypeErr(val, "Can't substitute into word",
235 loc.WordPart(part_loc))
236
237 raise AssertionError('for -Wreturn-type in C++')
238
239
240def _MakeWordFrames(part_vals):
241 # type: (List[part_value_t]) -> List[List[Piece]]
242 """A word evaluates to a flat list of part_value (String or Array). frame
243 is a portion that results in zero or more args. It can never be joined.
244 This idea exists because of arrays like "$@" and "${a[@]}".
245
246 Example:
247
248 a=(1 '2 3' 4)
249 x=x
250 y=y
251
252 # This word
253 $x"${a[@]}"$y
254
255 # Results in Three frames:
256 [ ('x', False, True), ('1', True, False) ]
257 [ ('2 3', True, False) ]
258 [ ('4', True, False), ('y', False, True) ]
259
260 Note: A frame is a 3-tuple that's identical to Piece()? Maybe we
261 should make that top level type.
262
263 TODO:
264 - Instead of List[List[Piece]], where List[Piece] is a Frame
265 - Change this representation to
266 Frames = (List[Piece] pieces, List[int] break_indices)
267 # where break_indices are the end
268
269 Consider a common case like "$x" or "${x}" - I think this a lot more
270 efficient?
271
272 And then change _EvalWordFrame(pieces: List[Piece], start: int, end: int)
273 """
274 current = [] # type: List[Piece]
275 frames = [current]
276
277 for p in part_vals:
278 UP_p = p
279
280 with tagswitch(p) as case:
281 if case(part_value_e.String):
282 p = cast(Piece, UP_p)
283 current.append(p)
284
285 elif case(part_value_e.Array):
286 p = cast(part_value.Array, UP_p)
287
288 is_first = True
289 for s in p.strs:
290 if s is None:
291 continue # ignore undefined array entries
292
293 # Arrays parts are always quoted; otherwise they would have decayed to
294 # a string.
295 piece = Piece(s, True, False)
296 if is_first:
297 current.append(piece)
298 is_first = False
299 else:
300 current = [piece]
301 frames.append(current) # singleton frame
302
303 else:
304 raise AssertionError()
305
306 return frames
307
308
309# TODO: This could be _MakeWordFrames and then sep.join(). It's redundant.
310def _DecayPartValuesToString(part_vals, join_char):
311 # type: (List[part_value_t], str) -> str
312 # Decay ${a=x"$@"x} to string.
313 out = [] # type: List[str]
314 for p in part_vals:
315 UP_p = p
316 with tagswitch(p) as case:
317 if case(part_value_e.String):
318 p = cast(Piece, UP_p)
319 out.append(p.s)
320 elif case(part_value_e.Array):
321 p = cast(part_value.Array, UP_p)
322 # TODO: Eliminate double join for speed?
323 tmp = [s for s in p.strs if s is not None]
324 out.append(join_char.join(tmp))
325 else:
326 raise AssertionError()
327 return ''.join(out)
328
329
330def _PerformSlice(
331 val, # type: value_t
332 offset, # type: mops.BigInt
333 length, # type: int
334 has_length, # type: bool
335 part, # type: BracedVarSub
336 arg0_val, # type: value.Str
337):
338 # type: (...) -> value_t
339 UP_val = val
340 with tagswitch(val) as case:
341 if case(value_e.Str): # Slice UTF-8 characters in a string.
342 val = cast(value.Str, UP_val)
343 s = val.s
344 n = len(s)
345
346 begin = mops.BigTruncate(offset)
347 if begin < 0: # Compute offset with unicode
348 byte_begin = n
349 num_iters = -begin
350 for _ in xrange(num_iters):
351 byte_begin = string_ops.PreviousUtf8Char(s, byte_begin)
352 else:
353 byte_begin = string_ops.AdvanceUtf8Chars(s, begin, 0)
354
355 if has_length:
356 if length < 0: # Compute offset with unicode
357 # Confusing: this is a POSITION
358 byte_end = n
359 num_iters = -length
360 for _ in xrange(num_iters):
361 byte_end = string_ops.PreviousUtf8Char(s, byte_end)
362 else:
363 byte_end = string_ops.AdvanceUtf8Chars(
364 s, length, byte_begin)
365 else:
366 byte_end = len(s)
367
368 substr = s[byte_begin:byte_end]
369 result = value.Str(substr) # type: value_t
370
371 elif case(value_e.BashArray,
372 value_e.SparseArray): # Slice array entries.
373 # NOTE: This error is ALWAYS fatal in bash. It's inconsistent with
374 # strings.
375 if has_length and length < 0:
376 e_die("Array slice can't have negative length: %d" % length,
377 loc.WordPart(part))
378
379 if bash_impl.BigInt_Less(offset, mops.ZERO):
380 # ${@:-3} starts counts from the end
381 if val.tag() == value_e.BashArray:
382 val = cast(value.BashArray, UP_val)
383 array_length = mops.IntWiden(
384 bash_impl.BashArray_Length(val))
385 elif val.tag() == value_e.SparseArray:
386 val = cast(value.SparseArray, UP_val)
387 array_length = bash_impl.SparseArray_Length(val)
388 else:
389 raise AssertionError()
390
391 # The array length counts $0 for $@ and $*
392 if arg0_val is not None:
393 array_length = mops.Add(array_length, mops.ONE)
394
395 offset = mops.Add(offset, array_length)
396
397 if bash_impl.BigInt_Less(offset, mops.ZERO):
398 strs = [] # type: List[str]
399 else:
400 # Quirk: "offset" for positional arguments ($@ and $*) counts $0.
401 prepends_arg0 = False
402 if arg0_val is not None:
403 if bash_impl.BigInt_Greater(offset, mops.ZERO):
404 offset = mops.Sub(offset, mops.ONE)
405 elif not has_length or length >= 1:
406 prepends_arg0 = True
407 length = length - 1
408
409 if has_length and length == 0:
410 strs = []
411
412 elif val.tag() == value_e.BashArray:
413 val = cast(value.BashArray, UP_val)
414 orig = bash_impl.BashArray_GetValues(val)
415 n = len(orig)
416
417 strs = []
418 i = mops.BigTruncate(offset)
419 count = 0
420 while i < n:
421 if has_length and count == length: # length could be 0
422 break
423 s = orig[i]
424 if s is not None: # Unset elements don't count towards the length
425 strs.append(s)
426 count += 1
427 i += 1
428
429 elif val.tag() == value_e.SparseArray:
430 val = cast(value.SparseArray, UP_val)
431
432 # TODO: We may optimize this by finding the first index
433 # using the binary search. Furthermore, the sorting by
434 # SparseArray_GetKeys can be replaced with the heap sort so
435 # that we only extract the first LENGTH elements of the
436 # indices greater or equal to OFFSET.
437 i = 0
438 for index in bash_impl.SparseArray_GetKeys(val):
439 if bash_impl.BigInt_GreaterEq(index, offset):
440 break
441 i = i + 1
442
443 if has_length:
444 strs = bash_impl.SparseArray_GetValues(val)[i:i +
445 length]
446 else:
447 strs = bash_impl.SparseArray_GetValues(val)[i:]
448
449 else:
450 raise AssertionError()
451
452 if prepends_arg0:
453 new_list = [arg0_val.s]
454 new_list.extend(strs)
455 strs = new_list
456
457 result = value.BashArray(strs)
458
459 elif case(value_e.BashAssoc):
460 e_die("Can't slice associative arrays", loc.WordPart(part))
461
462 else:
463 raise error.TypeErr(val, 'Slice op expected Str or BashArray',
464 loc.WordPart(part))
465
466 return result
467
468
469class StringWordEvaluator(object):
470 """Interface used by ArithEvaluator / BoolEvaluator"""
471
472 def __init__(self):
473 # type: () -> None
474 """Empty constructor for mycpp."""
475 pass
476
477 def EvalWordToString(self, w, eval_flags=0):
478 # type: (word_t, int) -> value.Str
479 raise NotImplementedError()
480
481
482def _GetDollarHyphen(exec_opts):
483 # type: (optview.Exec) -> str
484 chars = [] # type: List[str]
485 if exec_opts.interactive():
486 chars.append('i')
487
488 if exec_opts.errexit():
489 chars.append('e')
490 if exec_opts.noglob():
491 chars.append('f')
492 if exec_opts.noexec():
493 chars.append('n')
494 if exec_opts.nounset():
495 chars.append('u')
496 # NO letter for pipefail?
497 if exec_opts.xtrace():
498 chars.append('x')
499 if exec_opts.noclobber():
500 chars.append('C')
501
502 # bash has:
503 # - c for sh -c, i for sh -i (mksh also has this)
504 # - h for hashing (mksh also has this)
505 # - B for brace expansion
506 return ''.join(chars)
507
508
509class TildeEvaluator(object):
510
511 def __init__(self, mem, exec_opts):
512 # type: (Mem, optview.Exec) -> None
513 self.mem = mem
514 self.exec_opts = exec_opts
515
516 def GetMyHomeDir(self):
517 # type: () -> Optional[str]
518 """Consult $HOME first, and then make a libc call.
519
520 Important: the libc call can FAIL, which is why we prefer $HOME. See issue
521 #1578.
522 """
523 # First look up the HOME var, ENV.HOME, ...
524 s = self.mem.env_config.Get('HOME')
525 if s is not None:
526 return s
527
528 # Then ask the OS. This is what bash does.
529 return pyos.GetMyHomeDir()
530
531 def Eval(self, part):
532 # type: (word_part.TildeSub) -> str
533 """Evaluates ~ and ~user, given a Lit_TildeLike token."""
534
535 if part.user_name is None:
536 result = self.GetMyHomeDir()
537 else:
538 result = pyos.GetHomeDir(part.user_name)
539
540 if result is None:
541 if self.exec_opts.strict_tilde():
542 e_die("Error expanding tilde (e.g. invalid user)", part.left)
543 else:
544 # Return ~ or ~user literally
545 result = '~'
546 if part.user_name is not None:
547 result = result + part.user_name # mycpp doesn't have +=
548
549 return result
550
551
552class AbstractWordEvaluator(StringWordEvaluator):
553 """Abstract base class for word evaluators.
554
555 Public entry points:
556 EvalWordToString EvalForPlugin EvalRhsWord
557 EvalWordSequence EvalWordSequence2
558 """
559
560 def __init__(
561 self,
562 mem, # type: state.Mem
563 exec_opts, # type: optview.Exec
564 mutable_opts, # type: state.MutableOpts
565 tilde_ev, # type: TildeEvaluator
566 splitter, # type: SplitContext
567 errfmt, # type: ui.ErrorFormatter
568 ):
569 # type: (...) -> None
570 self.arith_ev = None # type: sh_expr_eval.ArithEvaluator
571 self.expr_ev = None # type: expr_eval.ExprEvaluator
572 self.prompt_ev = None # type: prompt.Evaluator
573
574 self.unsafe_arith = None # type: sh_expr_eval.UnsafeArith
575
576 self.tilde_ev = tilde_ev
577
578 self.mem = mem # for $HOME, $1, etc.
579 self.exec_opts = exec_opts # for nounset
580 self.mutable_opts = mutable_opts # for _allow_command_sub
581 self.splitter = splitter
582 self.errfmt = errfmt
583
584 self.globber = glob_.Globber(exec_opts)
585
586 def CheckCircularDeps(self):
587 # type: () -> None
588 raise NotImplementedError()
589
590 def _EvalCommandSub(self, cs_part, quoted):
591 # type: (CommandSub, bool) -> part_value_t
592 """Abstract since it has a side effect."""
593 raise NotImplementedError()
594
595 def _EvalProcessSub(self, cs_part):
596 # type: (CommandSub) -> part_value_t
597 """Abstract since it has a side effect."""
598 raise NotImplementedError()
599
600 def _EvalVarNum(self, var_num):
601 # type: (int) -> value_t
602 assert var_num >= 0
603 return self.mem.GetArgNum(var_num)
604
605 def _EvalSpecialVar(self, op_id, quoted, vsub_state):
606 # type: (int, bool, VarSubState) -> value_t
607 """Evaluate $?
608
609 and so forth
610 """
611 # $@ is special -- it need to know whether it is in a double quoted
612 # context.
613 #
614 # - If it's $@ in a double quoted context, return an ARRAY.
615 # - If it's $@ in a normal context, return a STRING, which then will be
616 # subject to splitting.
617
618 if op_id in (Id.VSub_At, Id.VSub_Star):
619 argv = self.mem.GetArgv()
620 val = value.BashArray(argv) # type: value_t
621 if op_id == Id.VSub_At:
622 # "$@" evaluates to an array, $@ should be decayed
623 vsub_state.join_array = not quoted
624 else: # $* "$*" are both decayed
625 vsub_state.join_array = True
626
627 elif op_id == Id.VSub_Hyphen:
628 val = value.Str(_GetDollarHyphen(self.exec_opts))
629
630 else:
631 val = self.mem.GetSpecialVar(op_id)
632
633 return val
634
635 def _ApplyTestOp(
636 self,
637 val, # type: value_t
638 op, # type: suffix_op.Unary
639 quoted, # type: bool
640 part_vals, # type: Optional[List[part_value_t]]
641 vtest_place, # type: VTestPlace
642 blame_token, # type: Token
643 ):
644 # type: (...) -> bool
645 """
646 Returns:
647 Whether part_vals was mutated
648
649 ${a:-} returns part_value[]
650 ${a:+} returns part_value[]
651 ${a:?error} returns error word?
652 ${a:=} returns part_value[] but also needs self.mem for side effects.
653
654 So I guess it should return part_value[], and then a flag for raising an
655 error, and then a flag for assigning it?
656 The original BracedVarSub will have the name.
657
658 Example of needing multiple part_value[]
659
660 echo X-${a:-'def'"ault"}-X
661
662 We return two part values from the BracedVarSub. Also consider:
663
664 echo ${a:-x"$@"x}
665 """
666 eval_flags = IS_SUBST
667 if quoted:
668 eval_flags |= QUOTED
669
670 tok = op.op
671 # NOTE: Splicing part_values is necessary because of code like
672 # ${undef:-'a b' c 'd # e'}. Each part_value can have a different
673 # do_glob/do_elide setting.
674 UP_val = val
675 with tagswitch(val) as case:
676 if case(value_e.Undef):
677 is_falsey = True
678
679 elif case(value_e.Str):
680 val = cast(value.Str, UP_val)
681 if tok.id in (Id.VTest_ColonHyphen, Id.VTest_ColonEquals,
682 Id.VTest_ColonQMark, Id.VTest_ColonPlus):
683 is_falsey = len(val.s) == 0
684 else:
685 is_falsey = False
686
687 elif case(value_e.BashArray):
688 val = cast(value.BashArray, UP_val)
689 # TODO: allow undefined
690 is_falsey = len(val.strs) == 0
691
692 elif case(value_e.BashAssoc):
693 val = cast(value.BashAssoc, UP_val)
694 is_falsey = len(val.d) == 0
695
696 else:
697 # value.Eggex, etc. are all false
698 is_falsey = False
699
700 if tok.id in (Id.VTest_ColonHyphen, Id.VTest_Hyphen):
701 if is_falsey:
702 self._EvalRhsWordToParts(op.arg_word, part_vals, eval_flags)
703 return True
704 else:
705 return False
706
707 # Inverse of the above.
708 elif tok.id in (Id.VTest_ColonPlus, Id.VTest_Plus):
709 if is_falsey:
710 return False
711 else:
712 self._EvalRhsWordToParts(op.arg_word, part_vals, eval_flags)
713 return True
714
715 # Splice and assign
716 elif tok.id in (Id.VTest_ColonEquals, Id.VTest_Equals):
717 if is_falsey:
718 # Collect new part vals.
719 assign_part_vals = [] # type: List[part_value_t]
720 self._EvalRhsWordToParts(op.arg_word, assign_part_vals,
721 eval_flags)
722 # Append them to out param AND return them.
723 part_vals.extend(assign_part_vals)
724
725 if vtest_place.name is None:
726 # TODO: error context
727 e_die("Can't assign to special variable")
728 else:
729 # NOTE: This decays arrays too! 'shopt -s strict_array' could
730 # avoid it.
731 rhs_str = _DecayPartValuesToString(
732 assign_part_vals, self.splitter.GetJoinChar())
733 if vtest_place.index is None: # using None when no index
734 lval = location.LName(
735 vtest_place.name) # type: sh_lvalue_t
736 else:
737 var_name = vtest_place.name
738 var_index = vtest_place.index
739 UP_var_index = var_index
740
741 with tagswitch(var_index) as case:
742 if case(a_index_e.Int):
743 var_index = cast(a_index.Int, UP_var_index)
744 lval = sh_lvalue.Indexed(
745 var_name, var_index.i, loc.Missing)
746 elif case(a_index_e.Str):
747 var_index = cast(a_index.Str, UP_var_index)
748 lval = sh_lvalue.Keyed(var_name, var_index.s,
749 loc.Missing)
750 else:
751 raise AssertionError()
752
753 state.OshLanguageSetValue(self.mem, lval,
754 value.Str(rhs_str))
755 return True
756
757 else:
758 return False
759
760 elif tok.id in (Id.VTest_ColonQMark, Id.VTest_QMark):
761 if is_falsey:
762 # The arg is the error message
763 error_part_vals = [] # type: List[part_value_t]
764 self._EvalRhsWordToParts(op.arg_word, error_part_vals,
765 eval_flags)
766 error_str = _DecayPartValuesToString(
767 error_part_vals, self.splitter.GetJoinChar())
768
769 #
770 # Display fancy/helpful error
771 #
772 if vtest_place.name is None:
773 var_name = '???'
774 else:
775 var_name = vtest_place.name
776
777 if 0:
778 # This hint is nice, but looks too noisy for now
779 op_str = lexer.LazyStr(tok)
780 if tok.id == Id.VTest_ColonQMark:
781 why = 'empty or unset'
782 else:
783 why = 'unset'
784
785 self.errfmt.Print_(
786 "Hint: operator %s means a variable can't be %s" %
787 (op_str, why), tok)
788
789 if val.tag() == value_e.Undef:
790 actual = 'unset'
791 else:
792 actual = 'empty'
793
794 if len(error_str):
795 suffix = ': %r' % error_str
796 else:
797 suffix = ''
798 e_die("Var %s is %s%s" % (var_name, actual, suffix),
799 blame_token)
800
801 else:
802 return False
803
804 else:
805 raise AssertionError(tok.id)
806
807 def _Count(self, val, token):
808 # type: (value_t, Token) -> int
809 """Returns the length of the value, for ${#var}"""
810 UP_val = val
811 with tagswitch(val) as case:
812 if case(value_e.Str):
813 val = cast(value.Str, UP_val)
814 # NOTE: Whether bash counts bytes or chars is affected by LANG
815 # environment variables.
816 # Should we respect that, or another way to select? set -o
817 # count-bytes?
818
819 # https://stackoverflow.com/questions/17368067/length-of-string-in-bash
820 try:
821 count = string_ops.CountUtf8Chars(val.s)
822 except error.Strict as e:
823 # Add this here so we don't have to add it so far down the stack.
824 # TODO: It's better to show BOTH this CODE an the actual DATA
825 # somehow.
826 e.location = token
827
828 if self.exec_opts.strict_word_eval():
829 raise
830 else:
831 # NOTE: Doesn't make the command exit with 1; it just returns a
832 # length of -1.
833 self.errfmt.PrettyPrintError(e, prefix='warning: ')
834 return -1
835
836 elif case(value_e.BashArray):
837 val = cast(value.BashArray, UP_val)
838 count = bash_impl.BashArray_Count(val)
839
840 elif case(value_e.BashAssoc):
841 val = cast(value.BashAssoc, UP_val)
842 count = bash_impl.BashAssoc_Count(val)
843
844 elif case(value_e.SparseArray):
845 val = cast(value.SparseArray, UP_val)
846 count = bash_impl.SparseArray_Count(val)
847
848 else:
849 raise error.TypeErr(
850 val, "Length op expected Str, BashArray, BashAssoc", token)
851
852 return count
853
854 def _Keys(self, val, token):
855 # type: (value_t, Token) -> value_t
856 """Return keys of a container, for ${!array[@]}"""
857
858 UP_val = val
859 with tagswitch(val) as case:
860 if case(value_e.BashArray):
861 val = cast(value.BashArray, UP_val)
862 indices = [str(i) for i in bash_impl.BashArray_GetKeys(val)]
863 return value.BashArray(indices)
864
865 elif case(value_e.BashAssoc):
866 val = cast(value.BashAssoc, UP_val)
867 assert val.d is not None # for MyPy, so it's not Optional[]
868
869 # BUG: Keys aren't ordered according to insertion!
870 keys = bash_impl.BashAssoc_GetKeys(val)
871 return value.BashArray(keys)
872
873 else:
874 raise error.TypeErr(val, 'Keys op expected Str', token)
875
876 def _EvalVarRef(self, val, blame_tok, quoted, vsub_state, vtest_place):
877 # type: (value_t, Token, bool, VarSubState, VTestPlace) -> value_t
878 """Handles indirect expansion like ${!var} and ${!a[0]}.
879
880 Args:
881 blame_tok: 'foo' for ${!foo}
882 """
883 UP_val = val
884 with tagswitch(val) as case:
885 if case(value_e.Undef):
886 # bash-4.4 returned value.Undef here. bash-5.0 started to treat
887 # the variable name to be empty so that the indirection fails.
888 var_ref_str = ''
889
890 elif case(value_e.Str):
891 val = cast(value.Str, UP_val)
892 var_ref_str = val.s
893
894 elif case(value_e.BashArray): # caught earlier but OK
895 val = cast(value.BashArray, UP_val)
896 var_ref_str = ' '.join(bash_impl.BashArray_GetValues(val))
897
898 elif case(value_e.BashAssoc): # caught earlier but OK
899 val = cast(value.BashAssoc, UP_val)
900 var_ref_str = ' '.join(bash_impl.BashAssoc_GetValues(val))
901
902 else:
903 raise error.TypeErr(val, 'Var Ref op expected Str', blame_tok)
904
905 bvs_part = self.unsafe_arith.ParseVarRef(var_ref_str, blame_tok)
906 return self._VarRefValue(bvs_part, quoted, vsub_state, vtest_place)
907
908 def _ApplyUnarySuffixOp(self, val, op):
909 # type: (value_t, suffix_op.Unary) -> value_t
910 assert val.tag() != value_e.Undef
911
912 op_kind = consts.GetKind(op.op.id)
913
914 if op_kind == Kind.VOp1:
915 # NOTE: glob syntax is supported in ^ ^^ , ,, ! As well as % %% # ##.
916 # Detect has_extglob so that DoUnarySuffixOp doesn't use the fast
917 # shortcut for constant strings.
918 arg_val, has_extglob = self.EvalWordToPattern(op.arg_word)
919 assert arg_val.tag() == value_e.Str
920
921 UP_val = val
922 with tagswitch(val) as case:
923 if case(value_e.Str):
924 val = cast(value.Str, UP_val)
925 s = string_ops.DoUnarySuffixOp(val.s, op.op, arg_val.s,
926 has_extglob)
927 #log('%r %r -> %r', val.s, arg_val.s, s)
928 new_val = value.Str(s) # type: value_t
929
930 elif case(value_e.BashArray, value_e.BashAssoc):
931 # get values
932 if val.tag() == value_e.BashArray:
933 val = cast(value.BashArray, UP_val)
934 values = bash_impl.BashArray_GetValues(val)
935 elif val.tag() == value_e.BashAssoc:
936 val = cast(value.BashAssoc, UP_val)
937 values = bash_impl.BashAssoc_GetValues(val)
938 else:
939 raise AssertionError()
940
941 # ${a[@]#prefix} is VECTORIZED on arrays. YSH should have this too.
942 strs = [
943 string_ops.DoUnarySuffixOp(s, op.op, arg_val.s,
944 has_extglob) for s in values
945 ]
946 new_val = value.BashArray(strs)
947
948 else:
949 raise error.TypeErr(
950 val, 'Unary op expected Str, BashArray, BashAssoc',
951 op.op)
952
953 else:
954 raise AssertionError(Kind_str(op_kind))
955
956 return new_val
957
958 def _PatSub(self, val, op):
959 # type: (value_t, suffix_op.PatSub) -> value_t
960
961 pat_val, has_extglob = self.EvalWordToPattern(op.pat)
962 # Extended globs aren't supported because we only translate * ? etc. to
963 # ERE. I don't think there's a straightforward translation from !(*.py) to
964 # ERE! You would need an engine that supports negation? (Derivatives?)
965 if has_extglob:
966 e_die('extended globs not supported in ${x//GLOB/}', op.pat)
967
968 if op.replace:
969 replace_val = self.EvalRhsWord(op.replace)
970 # Can't have an array, so must be a string
971 assert replace_val.tag() == value_e.Str, replace_val
972 replace_str = cast(value.Str, replace_val).s
973 else:
974 replace_str = ''
975
976 # note: doesn't support self.exec_opts.extglob()!
977 regex, warnings = glob_.GlobToERE(pat_val.s)
978 if len(warnings):
979 # TODO:
980 # - Add 'shopt -s strict_glob' mode and expose warnings.
981 # "Glob is not in CANONICAL FORM".
982 # - Propagate location info back to the 'op.pat' word.
983 pass
984 #log('regex %r', regex)
985 replacer = string_ops.GlobReplacer(regex, replace_str, op.slash_tok)
986
987 with tagswitch(val) as case2:
988 if case2(value_e.Str):
989 str_val = cast(value.Str, val)
990 s = replacer.Replace(str_val.s, op)
991 val = value.Str(s)
992
993 elif case2(value_e.BashArray, value_e.BashAssoc):
994 if val.tag() == value_e.BashArray:
995 array_val = cast(value.BashArray, val)
996 values = bash_impl.BashArray_GetValues(array_val)
997 elif val.tag() == value_e.BashAssoc:
998 assoc_val = cast(value.BashAssoc, val)
999 values = bash_impl.BashAssoc_GetValues(assoc_val)
1000 else:
1001 raise AssertionError()
1002 strs = [replacer.Replace(s, op) for s in values]
1003 val = value.BashArray(strs)
1004
1005 else:
1006 raise error.TypeErr(
1007 val, 'Pat Sub op expected Str, BashArray, BashAssoc',
1008 op.slash_tok)
1009
1010 return val
1011
1012 def _Slice(self, val, op, var_name, part):
1013 # type: (value_t, suffix_op.Slice, Optional[str], BracedVarSub) -> value_t
1014
1015 begin = self.arith_ev.EvalToBigInt(op.begin)
1016
1017 # Note: bash allows lengths to be negative (with odd semantics), but
1018 # we don't allow that right now.
1019 has_length = False
1020 length = -1
1021 if op.length:
1022 has_length = True
1023 length = self.arith_ev.EvalToInt(op.length)
1024
1025 try:
1026 arg0_val = None # type: value.Str
1027 if var_name is None: # $* or $@
1028 arg0_val = self.mem.GetArg0()
1029 val = _PerformSlice(val, begin, length, has_length, part, arg0_val)
1030 except error.Strict as e:
1031 if self.exec_opts.strict_word_eval():
1032 raise
1033 else:
1034 self.errfmt.PrettyPrintError(e, prefix='warning: ')
1035 with tagswitch(val) as case2:
1036 if case2(value_e.Str):
1037 val = value.Str('')
1038 elif case2(value_e.BashArray):
1039 val = value.BashArray([])
1040 else:
1041 raise NotImplementedError()
1042 return val
1043
1044 def _Nullary(self, val, op, var_name, vsub_token, vsub_state):
1045 # type: (value_t, Token, Optional[str], Token, VarSubState) -> Tuple[value.Str, bool]
1046
1047 quoted2 = False
1048 op_id = op.id
1049 if op_id == Id.VOp0_P:
1050 val = self._ProcessUndef(val, vsub_token, vsub_state)
1051 UP_val = val
1052 with tagswitch(val) as case:
1053 if case(value_e.Undef):
1054 result = value.Str('')
1055 elif case(value_e.Str):
1056 str_val = cast(value.Str, UP_val)
1057 prompt = self.prompt_ev.EvalPrompt(str_val)
1058 # readline gets rid of these, so we should too.
1059 p = prompt.replace('\x01', '').replace('\x02', '')
1060 result = value.Str(p)
1061 else:
1062 e_die("Can't use @P on %s" % ui.ValType(val), op)
1063
1064 elif op_id == Id.VOp0_Q:
1065 UP_val = val
1066 with tagswitch(val) as case:
1067 if case(value_e.Undef):
1068 # We need to issue an error when "-o nounset" is enabled.
1069 # Although we do not need to check val for value_e.Undef,
1070 # we call _ProcessUndef for consistency in the error
1071 # message.
1072 self._ProcessUndef(val, vsub_token, vsub_state)
1073
1074 # For unset variables, we do not generate any quoted words.
1075 result = value.Str('')
1076
1077 elif case(value_e.Str):
1078 str_val = cast(value.Str, UP_val)
1079 result = value.Str(j8_lite.MaybeShellEncode(str_val.s))
1080 # oddly, 'echo ${x@Q}' is equivalent to 'echo "${x@Q}"' in
1081 # bash
1082 quoted2 = True
1083 elif case(value_e.BashArray, value_e.BashAssoc):
1084 if val.tag() == value_e.BashArray:
1085 val = cast(value.BashArray, UP_val)
1086 values = [s for s in bash_impl.BashArray_GetValues(val) if s is not None]
1087 elif val.tag() == value_e.BashAssoc:
1088 val = cast(value.BashAssoc, UP_val)
1089 values = bash_impl.BashAssoc_GetValues(val)
1090 else:
1091 raise AssertionError()
1092
1093 tmp = [
1094 # TODO: should use fastfunc.ShellEncode
1095 j8_lite.MaybeShellEncode(s) for s in values
1096 ]
1097 result = value.Str(' '.join(tmp))
1098 else:
1099 e_die("Can't use @Q on %s" % ui.ValType(val), op)
1100
1101 elif op_id == Id.VOp0_a:
1102 val = self._ProcessUndef(val, vsub_token, vsub_state)
1103 UP_val = val
1104 # We're ONLY simluating -a and -A, not -r -x -n for now. See
1105 # spec/ble-idioms.test.sh.
1106 chars = [] # type: List[str]
1107 with tagswitch(val) as case:
1108 if case(value_e.BashArray):
1109 chars.append('a')
1110 elif case(value_e.BashAssoc):
1111 chars.append('A')
1112
1113 if var_name is not None: # e.g. ${?@a} is allowed
1114 cell = self.mem.GetCell(var_name)
1115 if cell:
1116 if cell.readonly:
1117 chars.append('r')
1118 if cell.exported:
1119 chars.append('x')
1120 if cell.nameref:
1121 chars.append('n')
1122
1123 result = value.Str(''.join(chars))
1124
1125 else:
1126 e_die('Var op %r not implemented' % lexer.TokenVal(op), op)
1127
1128 return result, quoted2
1129
1130 def _WholeArray(self, val, part, quoted, vsub_state):
1131 # type: (value_t, BracedVarSub, bool, VarSubState) -> value_t
1132 op_id = cast(bracket_op.WholeArray, part.bracket_op).op_id
1133
1134 if op_id == Id.Lit_At:
1135 op_str = '@'
1136 vsub_state.join_array = not quoted # ${a[@]} decays but "${a[@]}" doesn't
1137 elif op_id == Id.Arith_Star:
1138 op_str = '*'
1139 vsub_state.join_array = True # both ${a[*]} and "${a[*]}" decay
1140 else:
1141 raise AssertionError(op_id) # unknown
1142
1143 with tagswitch(val) as case2:
1144 if case2(value_e.Undef):
1145 # For an undefined array, we save the token of the array
1146 # reference for the later error message.
1147 vsub_state.array_ref = part.name_tok
1148 elif case2(value_e.Str):
1149 if self.exec_opts.strict_array():
1150 e_die("Can't index string with %s" % op_str,
1151 loc.WordPart(part))
1152 elif case2(value_e.BashArray, value_e.SparseArray,
1153 value_e.BashAssoc):
1154 pass # no-op
1155 else:
1156 # The other YSH types such as List, Dict, and Float are not
1157 # supported. Error messages will be printed later, so we here
1158 # return the unsupported objects without modification.
1159 pass # no-op
1160
1161 return val
1162
1163 def _ArrayIndex(self, val, part, vtest_place):
1164 # type: (value_t, BracedVarSub, VTestPlace) -> value_t
1165 """Process a numeric array index like ${a[i+1]}"""
1166 anode = cast(bracket_op.ArrayIndex, part.bracket_op).expr
1167
1168 UP_val = val
1169 with tagswitch(val) as case2:
1170 if case2(value_e.Undef):
1171 pass # it will be checked later
1172
1173 elif case2(value_e.Str):
1174 # Bash treats any string as an array, so we can't add our own
1175 # behavior here without making valid OSH invalid bash.
1176 e_die("Can't index string %r with integer" % part.var_name,
1177 part.name_tok)
1178
1179 elif case2(value_e.BashArray):
1180 array_val = cast(value.BashArray, UP_val)
1181 index = self.arith_ev.EvalToInt(anode)
1182 vtest_place.index = a_index.Int(index)
1183
1184 s, error_code = bash_impl.BashArray_GetElement(
1185 array_val, index)
1186 if error_code == error_code_e.IndexOutOfRange:
1187 # Note: Bash outputs warning but does not make it a real
1188 # error. We follow the Bash behavior here.
1189 self.errfmt.Print_(
1190 "Index %d out of bounds for array of length %d" %
1191 (index, bash_impl.BashArray_Length(array_val)),
1192 blame_loc=part.name_tok)
1193
1194 if s is None:
1195 val = value.Undef
1196 else:
1197 val = value.Str(s)
1198
1199 elif case2(value_e.SparseArray):
1200 sparse_val = cast(value.SparseArray, UP_val)
1201 big_index = self.arith_ev.EvalToBigInt(anode)
1202 vtest_place.index = a_index.Int(mops.BigTruncate(big_index))
1203
1204 s, error_code = bash_impl.SparseArray_GetElement(
1205 sparse_val, big_index)
1206 if error_code == error_code_e.IndexOutOfRange:
1207 # Note: Bash outputs warning but does not make it a real
1208 # error. We follow the Bash behavior here.
1209 big_length = bash_impl.SparseArray_Length(sparse_val)
1210 self.errfmt.Print_(
1211 "Index %s out of bounds for array of length %s" %
1212 (mops.ToStr(big_index), mops.ToStr(big_length)),
1213 blame_loc=part.name_tok)
1214
1215 if s is None:
1216 val = value.Undef
1217 else:
1218 val = value.Str(s)
1219
1220 elif case2(value_e.BashAssoc):
1221 assoc_val = cast(value.BashAssoc, UP_val)
1222 # Location could also be attached to bracket_op? But
1223 # arith_expr.VarSub works OK too
1224 key = self.arith_ev.EvalWordToString(
1225 anode, blame_loc=location.TokenForArith(anode))
1226
1227 vtest_place.index = a_index.Str(key) # out param
1228 s = bash_impl.BashAssoc_GetElement(assoc_val, key)
1229
1230 if s is None:
1231 val = value.Undef
1232 else:
1233 val = value.Str(s)
1234
1235 else:
1236 raise error.TypeErr(val,
1237 'Index op expected BashArray, BashAssoc',
1238 loc.WordPart(part))
1239
1240 return val
1241
1242 def _EvalDoubleQuoted(self, parts, part_vals):
1243 # type: (List[word_part_t], List[part_value_t]) -> None
1244 """Evaluate parts of a DoubleQuoted part.
1245
1246 Args:
1247 part_vals: output param to append to.
1248 """
1249 # Example of returning array:
1250 # $ a=(1 2); b=(3); $ c=(4 5)
1251 # $ argv "${a[@]}${b[@]}${c[@]}"
1252 # ['1', '234', '5']
1253 #
1254 # Example of multiple parts
1255 # $ argv "${a[@]}${undef[@]:-${c[@]}}"
1256 # ['1', '24', '5']
1257
1258 # Special case for "". The parser outputs (DoubleQuoted []), instead
1259 # of (DoubleQuoted [Literal '']). This is better but it means we
1260 # have to check for it.
1261 if len(parts) == 0:
1262 v = Piece('', True, False)
1263 part_vals.append(v)
1264 return
1265
1266 for p in parts:
1267 self._EvalWordPart(p, part_vals, QUOTED)
1268
1269 def EvalDoubleQuotedToString(self, dq_part):
1270 # type: (DoubleQuoted) -> str
1271 """For double quoted strings in YSH expressions.
1272
1273 Example: var x = "$foo-${foo}"
1274 """
1275 part_vals = [] # type: List[part_value_t]
1276 self._EvalDoubleQuoted(dq_part.parts, part_vals)
1277 return self._ConcatPartVals(part_vals, dq_part.left)
1278
1279 def _DecayArray(self, val):
1280 # type: (value.BashArray) -> value.Str
1281 """Decay $* to a string."""
1282 assert val.tag() == value_e.BashArray, val
1283 sep = self.splitter.GetJoinChar()
1284 tmp = [s for s in bash_impl.BashArray_GetValues(val) if s is not None]
1285 return value.Str(sep.join(tmp))
1286
1287 def _ProcessUndef(self, val, name_tok, vsub_state):
1288 # type: (value_t, Token, VarSubState) -> value_t
1289 assert name_tok is not None
1290
1291 if val.tag() != value_e.Undef:
1292 return val
1293
1294 if vsub_state.array_ref is not None:
1295 array_tok = vsub_state.array_ref
1296 if self.exec_opts.nounset():
1297 e_die('Undefined array %r' % lexer.TokenVal(array_tok),
1298 array_tok)
1299 else:
1300 return value.BashArray([])
1301 else:
1302 if self.exec_opts.nounset():
1303 tok_str = lexer.TokenVal(name_tok)
1304 name = tok_str[1:] if tok_str.startswith('$') else tok_str
1305 e_die('Undefined variable %r' % name, name_tok)
1306 else:
1307 return value.Str('')
1308
1309 def _EvalBracketOp(self, val, part, quoted, vsub_state, vtest_place):
1310 # type: (value_t, BracedVarSub, bool, VarSubState, VTestPlace) -> value_t
1311
1312 if part.bracket_op:
1313 with tagswitch(part.bracket_op) as case:
1314 if case(bracket_op_e.WholeArray):
1315 val = self._WholeArray(val, part, quoted, vsub_state)
1316
1317 elif case(bracket_op_e.ArrayIndex):
1318 val = self._ArrayIndex(val, part, vtest_place)
1319
1320 else:
1321 raise AssertionError(part.bracket_op.tag())
1322
1323 else: # no bracket op
1324 var_name = vtest_place.name
1325 if (var_name is not None and
1326 val.tag() in (value_e.BashArray, value_e.BashAssoc) and
1327 not vsub_state.is_type_query):
1328 if ShouldArrayDecay(var_name, self.exec_opts,
1329 not (part.prefix_op or part.suffix_op)):
1330 # for ${BASH_SOURCE}, etc.
1331 val = DecayArray(val)
1332 else:
1333 e_die(
1334 "Array %r can't be referred to as a scalar (without @ or *)"
1335 % var_name, loc.WordPart(part))
1336
1337 return val
1338
1339 def _VarRefValue(self, part, quoted, vsub_state, vtest_place):
1340 # type: (BracedVarSub, bool, VarSubState, VTestPlace) -> value_t
1341 """Duplicates some logic from _EvalBracedVarSub, but returns a
1342 value_t."""
1343
1344 # 1. Evaluate from (var_name, var_num, token Id) -> value
1345 if part.name_tok.id == Id.VSub_Name:
1346 vtest_place.name = part.var_name
1347 val = self.mem.GetValue(part.var_name)
1348
1349 elif part.name_tok.id == Id.VSub_Number:
1350 var_num = int(part.var_name)
1351 val = self._EvalVarNum(var_num)
1352
1353 else:
1354 # $* decays
1355 val = self._EvalSpecialVar(part.name_tok.id, quoted, vsub_state)
1356
1357 # We don't need var_index because it's only for L-Values of test ops?
1358 if self.exec_opts.eval_unsafe_arith():
1359 val = self._EvalBracketOp(val, part, quoted, vsub_state,
1360 vtest_place)
1361 else:
1362 with state.ctx_Option(self.mutable_opts,
1363 [option_i._allow_command_sub], False):
1364 val = self._EvalBracketOp(val, part, quoted, vsub_state,
1365 vtest_place)
1366
1367 return val
1368
1369 def _EvalBracedVarSub(self, part, part_vals, quoted):
1370 # type: (BracedVarSub, List[part_value_t], bool) -> None
1371 """
1372 Args:
1373 part_vals: output param to append to.
1374 """
1375 # We have different operators that interact in a non-obvious order.
1376 #
1377 # 1. bracket_op: value -> value, with side effect on vsub_state
1378 #
1379 # 2. prefix_op
1380 # a. length ${#x}: value -> value
1381 # b. var ref ${!ref}: can expand to an array
1382 #
1383 # 3. suffix_op:
1384 # a. no operator: you have a value
1385 # b. Test: value -> part_value[]
1386 # c. Other Suffix: value -> value
1387 #
1388 # 4. Process vsub_state.join_array here before returning.
1389 #
1390 # These cases are hard to distinguish:
1391 # - ${!prefix@} prefix query
1392 # - ${!array[@]} keys
1393 # - ${!ref} named reference
1394 # - ${!ref[0]} named reference
1395 #
1396 # I think we need several stages:
1397 #
1398 # 1. value: name, number, special, prefix query
1399 # 2. bracket_op
1400 # 3. prefix length -- this is TERMINAL
1401 # 4. indirection? Only for some of the ! cases
1402 # 5. string transformation suffix ops like ##
1403 # 6. test op
1404 # 7. vsub_state.join_array
1405
1406 # vsub_state.join_array is for joining "${a[*]}" and unquoted ${a[@]} AFTER
1407 # suffix ops are applied. If we take the length with a prefix op, the
1408 # distinction is ignored.
1409
1410 var_name = None # type: Optional[str] # used throughout the function
1411 vtest_place = VTestPlace(var_name, None) # For ${foo=default}
1412 vsub_state = VarSubState.CreateNull() # for $*, ${a[*]}, etc.
1413
1414 # 1. Evaluate from (var_name, var_num, token Id) -> value
1415 if part.name_tok.id == Id.VSub_Name:
1416 # Handle ${!prefix@} first, since that looks at names and not values
1417 # Do NOT handle ${!A[@]@a} here!
1418 if (part.prefix_op is not None and part.bracket_op is None and
1419 part.suffix_op is not None and
1420 part.suffix_op.tag() == suffix_op_e.Nullary):
1421 nullary_op = cast(Token, part.suffix_op)
1422 # ${!x@} but not ${!x@P}
1423 if consts.GetKind(nullary_op.id) == Kind.VOp3:
1424 names = self.mem.VarNamesStartingWith(part.var_name)
1425 names.sort()
1426
1427 if quoted and nullary_op.id == Id.VOp3_At:
1428 part_vals.append(part_value.Array(names))
1429 else:
1430 sep = self.splitter.GetJoinChar()
1431 part_vals.append(Piece(sep.join(names), quoted, True))
1432 return # EARLY RETURN
1433
1434 var_name = part.var_name
1435 vtest_place.name = var_name # for _ApplyTestOp
1436
1437 val = self.mem.GetValue(var_name)
1438
1439 elif part.name_tok.id == Id.VSub_Number:
1440 var_num = int(part.var_name)
1441 val = self._EvalVarNum(var_num)
1442 else:
1443 # $* decays
1444 val = self._EvalSpecialVar(part.name_tok.id, quoted, vsub_state)
1445
1446 suffix_op_ = part.suffix_op
1447 if suffix_op_:
1448 UP_op = suffix_op_
1449 with tagswitch(suffix_op_) as case:
1450 if case(suffix_op_e.Nullary):
1451 suffix_op_ = cast(Token, UP_op)
1452
1453 # Type query ${array@a} is a STRING, not an array
1454 # NOTE: ${array@Q} is ${array[0]@Q} in bash, which is different than
1455 # ${array[@]@Q}
1456 if suffix_op_.id == Id.VOp0_a:
1457 vsub_state.is_type_query = True
1458
1459 # 2. Bracket Op
1460 val = self._EvalBracketOp(val, part, quoted, vsub_state, vtest_place)
1461
1462 if part.prefix_op:
1463 if part.prefix_op.id == Id.VSub_Pound: # ${#var} for length
1464 # undef -> '' BEFORE length
1465 val = self._ProcessUndef(val, part.name_tok, vsub_state)
1466
1467 n = self._Count(val, part.name_tok)
1468 part_vals.append(Piece(str(n), quoted, False))
1469 return # EARLY EXIT: nothing else can come after length
1470
1471 elif part.prefix_op.id == Id.VSub_Bang:
1472 if (part.bracket_op and
1473 part.bracket_op.tag() == bracket_op_e.WholeArray and
1474 not suffix_op_):
1475 # undef -> empty array
1476 val = self._ProcessUndef(val, part.name_tok, vsub_state)
1477
1478 # ${!array[@]} to get indices/keys
1479 val = self._Keys(val, part.name_tok)
1480 # already set vsub_State.join_array ABOVE
1481 else:
1482 # Process ${!ref}. SURPRISE: ${!a[0]} is an indirect expansion unlike
1483 # ${!a[@]} !
1484 # ${!ref} can expand into an array if ref='array[@]'
1485
1486 # Clear it now that we have a var ref
1487 vtest_place.name = None
1488 vtest_place.index = None
1489
1490 val = self._EvalVarRef(val, part.name_tok, quoted,
1491 vsub_state, vtest_place)
1492
1493 else:
1494 raise AssertionError(part.prefix_op)
1495
1496 quoted2 = False # another bit for @Q
1497 if suffix_op_:
1498 op = suffix_op_ # could get rid of this alias
1499
1500 with tagswitch(suffix_op_) as case:
1501 if case(suffix_op_e.Nullary):
1502 op = cast(Token, UP_op)
1503 val, quoted2 = self._Nullary(val, op, var_name,
1504 part.name_tok, vsub_state)
1505
1506 elif case(suffix_op_e.Unary):
1507 op = cast(suffix_op.Unary, UP_op)
1508 if consts.GetKind(op.op.id) == Kind.VTest:
1509 # Note: _ProcessUndef (i.e., the conversion of undef ->
1510 # '') is not applied to the VTest operators such as
1511 # ${a:-def}, ${a+set}, etc.
1512 if self._ApplyTestOp(val, op, quoted, part_vals,
1513 vtest_place, part.name_tok):
1514 # e.g. to evaluate ${undef:-'default'}, we already appended
1515 # what we need
1516 return
1517
1518 else:
1519 # Other suffix: value -> value
1520 val = self._ProcessUndef(val, part.name_tok,
1521 vsub_state)
1522 val = self._ApplyUnarySuffixOp(val, op)
1523
1524 elif case(suffix_op_e.PatSub): # PatSub, vectorized
1525 op = cast(suffix_op.PatSub, UP_op)
1526 val = self._ProcessUndef(val, part.name_tok, vsub_state)
1527 val = self._PatSub(val, op)
1528
1529 elif case(suffix_op_e.Slice):
1530 op = cast(suffix_op.Slice, UP_op)
1531 val = self._ProcessUndef(val, part.name_tok, vsub_state)
1532 val = self._Slice(val, op, var_name, part)
1533
1534 elif case(suffix_op_e.Static):
1535 op = cast(suffix_op.Static, UP_op)
1536 e_die('Not implemented', op.tok)
1537
1538 else:
1539 raise AssertionError()
1540 else:
1541 val = self._ProcessUndef(val, part.name_tok, vsub_state)
1542
1543 # After applying suffixes, process join_array here.
1544 UP_val = val
1545 if val.tag() == value_e.BashArray:
1546 array_val = cast(value.BashArray, UP_val)
1547 if vsub_state.join_array:
1548 val = self._DecayArray(array_val)
1549 else:
1550 val = array_val
1551
1552 # For example, ${a} evaluates to value.Str(), but we want a
1553 # Piece().
1554 part_val = _ValueToPartValue(val, quoted or quoted2, part)
1555 part_vals.append(part_val)
1556
1557 def _ConcatPartVals(self, part_vals, location):
1558 # type: (List[part_value_t], loc_t) -> str
1559
1560 strs = [] # type: List[str]
1561 for part_val in part_vals:
1562 UP_part_val = part_val
1563 with tagswitch(part_val) as case:
1564 if case(part_value_e.String):
1565 part_val = cast(Piece, UP_part_val)
1566 s = part_val.s
1567
1568 elif case(part_value_e.Array):
1569 part_val = cast(part_value.Array, UP_part_val)
1570 if self.exec_opts.strict_array():
1571 # Examples: echo f > "$@"; local foo="$@"
1572 e_die("Illegal array word part (strict_array)",
1573 location)
1574 else:
1575 # It appears to not respect IFS
1576 # TODO: eliminate double join()?
1577 tmp = [s for s in part_val.strs if s is not None]
1578 s = ' '.join(tmp)
1579
1580 else:
1581 raise AssertionError()
1582
1583 strs.append(s)
1584
1585 return ''.join(strs)
1586
1587 def EvalBracedVarSubToString(self, part):
1588 # type: (BracedVarSub) -> str
1589 """For double quoted strings in YSH expressions.
1590
1591 Example: var x = "$foo-${foo}"
1592 """
1593 part_vals = [] # type: List[part_value_t]
1594 self._EvalBracedVarSub(part, part_vals, False)
1595 # blame ${ location
1596 return self._ConcatPartVals(part_vals, part.left)
1597
1598 def _EvalSimpleVarSub(self, part, part_vals, quoted):
1599 # type: (SimpleVarSub, List[part_value_t], bool) -> None
1600
1601 token = part.tok
1602
1603 vsub_state = VarSubState.CreateNull()
1604
1605 # 1. Evaluate from (var_name, var_num, Token) -> defined, value
1606 if token.id == Id.VSub_DollarName:
1607 var_name = lexer.LazyStr(token)
1608 # TODO: Special case for LINENO
1609 val = self.mem.GetValue(var_name)
1610 if val.tag() in (value_e.BashArray, value_e.BashAssoc):
1611 if ShouldArrayDecay(var_name, self.exec_opts):
1612 # for $BASH_SOURCE, etc.
1613 val = DecayArray(val)
1614 else:
1615 e_die(
1616 "Array %r can't be referred to as a scalar (without @ or *)"
1617 % var_name, token)
1618
1619 elif token.id == Id.VSub_Number:
1620 var_num = int(lexer.LazyStr(token))
1621 val = self._EvalVarNum(var_num)
1622
1623 else:
1624 val = self._EvalSpecialVar(token.id, quoted, vsub_state)
1625
1626 #log('SIMPLE %s', part)
1627 val = self._ProcessUndef(val, token, vsub_state)
1628 UP_val = val
1629 if val.tag() == value_e.BashArray:
1630 array_val = cast(value.BashArray, UP_val)
1631 if vsub_state.join_array:
1632 val = self._DecayArray(array_val)
1633 else:
1634 val = array_val
1635
1636 v = _ValueToPartValue(val, quoted, part)
1637 part_vals.append(v)
1638
1639 def EvalSimpleVarSubToString(self, node):
1640 # type: (SimpleVarSub) -> str
1641 """For double quoted strings in YSH expressions.
1642
1643 Example: var x = "$foo-${foo}"
1644 """
1645 part_vals = [] # type: List[part_value_t]
1646 self._EvalSimpleVarSub(node, part_vals, False)
1647 return self._ConcatPartVals(part_vals, node.tok)
1648
1649 def _EvalExtGlob(self, part, part_vals):
1650 # type: (word_part.ExtGlob, List[part_value_t]) -> None
1651 """Evaluate @($x|'foo'|$(hostname)) and flatten it."""
1652 op = part.op
1653 if op.id == Id.ExtGlob_Comma:
1654 op_str = '@('
1655 else:
1656 op_str = lexer.LazyStr(op)
1657 # Do NOT split these.
1658 part_vals.append(Piece(op_str, False, False))
1659
1660 for i, w in enumerate(part.arms):
1661 if i != 0:
1662 part_vals.append(Piece('|', False, False)) # separator
1663 # FLATTEN the tree of extglob "arms".
1664 self._EvalWordToParts(w, part_vals, EXTGLOB_NESTED)
1665 part_vals.append(Piece(')', False, False)) # closing )
1666
1667 def _TranslateExtGlob(self, part_vals, w, glob_parts, fnmatch_parts):
1668 # type: (List[part_value_t], CompoundWord, List[str], List[str]) -> None
1669 """Translate a flattened WORD with an ExtGlob part to string patterns.
1670
1671 We need both glob and fnmatch patterns. _EvalExtGlob does the
1672 flattening.
1673 """
1674 for i, part_val in enumerate(part_vals):
1675 UP_part_val = part_val
1676 with tagswitch(part_val) as case:
1677 if case(part_value_e.String):
1678 part_val = cast(Piece, UP_part_val)
1679 if part_val.quoted and not self.exec_opts.noglob():
1680 s = glob_.GlobEscape(part_val.s)
1681 else:
1682 # e.g. the @( and | in @(foo|bar) aren't quoted
1683 s = part_val.s
1684 glob_parts.append(s)
1685 fnmatch_parts.append(s) # from _EvalExtGlob()
1686
1687 elif case(part_value_e.Array):
1688 # Disallow array
1689 e_die(
1690 "Extended globs and arrays can't appear in the same word",
1691 w)
1692
1693 elif case(part_value_e.ExtGlob):
1694 part_val = cast(part_value.ExtGlob, UP_part_val)
1695 # keep appending fnmatch_parts, but repplace glob_parts with '*'
1696 self._TranslateExtGlob(part_val.part_vals, w, [],
1697 fnmatch_parts)
1698 glob_parts.append('*')
1699
1700 else:
1701 raise AssertionError()
1702
1703 def _EvalWordPart(self, part, part_vals, flags):
1704 # type: (word_part_t, List[part_value_t], int) -> None
1705 """Evaluate a word part, appending to part_vals
1706
1707 Called by _EvalWordToParts, EvalWordToString, and _EvalDoubleQuoted.
1708 """
1709 quoted = bool(flags & QUOTED)
1710 is_subst = bool(flags & IS_SUBST)
1711
1712 UP_part = part
1713 with tagswitch(part) as case:
1714 if case(word_part_e.ShArrayLiteral):
1715 part = cast(ShArrayLiteral, UP_part)
1716 e_die("Unexpected array literal", loc.WordPart(part))
1717 elif case(word_part_e.BashAssocLiteral):
1718 part = cast(word_part.BashAssocLiteral, UP_part)
1719 e_die("Unexpected associative array literal",
1720 loc.WordPart(part))
1721
1722 elif case(word_part_e.Literal):
1723 part = cast(Token, UP_part)
1724 # Split if it's in a substitution.
1725 # That is: echo is not split, but ${foo:-echo} is split
1726 v = Piece(lexer.LazyStr(part), quoted, is_subst)
1727 part_vals.append(v)
1728
1729 elif case(word_part_e.EscapedLiteral):
1730 part = cast(word_part.EscapedLiteral, UP_part)
1731 v = Piece(part.ch, True, False)
1732 part_vals.append(v)
1733
1734 elif case(word_part_e.SingleQuoted):
1735 part = cast(SingleQuoted, UP_part)
1736 v = Piece(part.sval, True, False)
1737 part_vals.append(v)
1738
1739 elif case(word_part_e.DoubleQuoted):
1740 part = cast(DoubleQuoted, UP_part)
1741 self._EvalDoubleQuoted(part.parts, part_vals)
1742
1743 elif case(word_part_e.CommandSub):
1744 part = cast(CommandSub, UP_part)
1745 id_ = part.left_token.id
1746 if id_ in (Id.Left_DollarParen, Id.Left_AtParen,
1747 Id.Left_Backtick):
1748 sv = self._EvalCommandSub(part,
1749 quoted) # type: part_value_t
1750
1751 elif id_ in (Id.Left_ProcSubIn, Id.Left_ProcSubOut):
1752 sv = self._EvalProcessSub(part)
1753
1754 else:
1755 raise AssertionError(id_)
1756
1757 part_vals.append(sv)
1758
1759 elif case(word_part_e.SimpleVarSub):
1760 part = cast(SimpleVarSub, UP_part)
1761 self._EvalSimpleVarSub(part, part_vals, quoted)
1762
1763 elif case(word_part_e.BracedVarSub):
1764 part = cast(BracedVarSub, UP_part)
1765 self._EvalBracedVarSub(part, part_vals, quoted)
1766
1767 elif case(word_part_e.TildeSub):
1768 part = cast(word_part.TildeSub, UP_part)
1769 # We never parse a quoted string into a TildeSub.
1770 assert not quoted
1771 s = self.tilde_ev.Eval(part)
1772 v = Piece(s, True, False) # NOT split even when unquoted!
1773 part_vals.append(v)
1774
1775 elif case(word_part_e.ArithSub):
1776 part = cast(word_part.ArithSub, UP_part)
1777 num = self.arith_ev.EvalToBigInt(part.anode)
1778 v = Piece(mops.ToStr(num), quoted, not quoted)
1779 part_vals.append(v)
1780
1781 elif case(word_part_e.ExtGlob):
1782 part = cast(word_part.ExtGlob, UP_part)
1783 #if not self.exec_opts.extglob():
1784 # die() # disallow at runtime? Don't just decay
1785
1786 # Create a node to hold the flattened tree. The caller decides whether
1787 # to pass it to fnmatch() or replace it with '*' and pass it to glob().
1788 part_vals2 = [] # type: List[part_value_t]
1789 self._EvalExtGlob(part, part_vals2) # flattens tree
1790 part_vals.append(part_value.ExtGlob(part_vals2))
1791
1792 elif case(word_part_e.BashRegexGroup):
1793 part = cast(word_part.BashRegexGroup, UP_part)
1794
1795 part_vals.append(Piece('(', False, False)) # not quoted
1796 if part.child:
1797 self._EvalWordToParts(part.child, part_vals, 0)
1798 part_vals.append(Piece(')', False, False))
1799
1800 elif case(word_part_e.Splice):
1801 part = cast(word_part.Splice, UP_part)
1802 val = self.mem.GetValue(part.var_name)
1803
1804 strs = self.expr_ev.SpliceValue(val, part)
1805 part_vals.append(part_value.Array(strs))
1806
1807 elif case(word_part_e.ExprSub):
1808 part = cast(word_part.ExprSub, UP_part)
1809 part_val = self.expr_ev.EvalExprSub(part)
1810 part_vals.append(part_val)
1811
1812 elif case(word_part_e.ZshVarSub):
1813 part = cast(word_part.ZshVarSub, UP_part)
1814 e_die("ZSH var subs are parsed, but can't be evaluated",
1815 part.left)
1816
1817 else:
1818 raise AssertionError(part.tag())
1819
1820 def _EvalRhsWordToParts(self, w, part_vals, eval_flags=0):
1821 # type: (rhs_word_t, List[part_value_t], int) -> None
1822 quoted = bool(eval_flags & QUOTED)
1823
1824 UP_w = w
1825 with tagswitch(w) as case:
1826 if case(rhs_word_e.Empty):
1827 part_vals.append(Piece('', quoted, not quoted))
1828
1829 elif case(rhs_word_e.Compound):
1830 w = cast(CompoundWord, UP_w)
1831 self._EvalWordToParts(w, part_vals, eval_flags=eval_flags)
1832
1833 else:
1834 raise AssertionError()
1835
1836 def _EvalWordToParts(self, w, part_vals, eval_flags=0):
1837 # type: (CompoundWord, List[part_value_t], int) -> None
1838 """Helper for EvalRhsWord, EvalWordSequence, etc.
1839
1840 Returns:
1841 Appends to part_vals. Note that this is a TREE.
1842 """
1843 # Does the word have an extended glob? This is a special case because
1844 # of the way we use glob() and then fnmatch(..., FNM_EXTMATCH) to
1845 # implement extended globs. It's hard to carry that extra information
1846 # all the way past the word splitting stage.
1847
1848 # OSH semantic limitations: If a word has an extended glob part, then
1849 # 1. It can't have an array
1850 # 2. Word splitting of unquoted words isn't respected
1851
1852 word_part_vals = [] # type: List[part_value_t]
1853 has_extglob = False
1854 for p in w.parts:
1855 if p.tag() == word_part_e.ExtGlob:
1856 has_extglob = True
1857 self._EvalWordPart(p, word_part_vals, eval_flags)
1858
1859 # Caller REQUESTED extglob evaluation, AND we parsed word_part.ExtGlob()
1860 if has_extglob:
1861 if bool(eval_flags & EXTGLOB_FILES):
1862 # Treat the WHOLE word as a pattern. We need to TWO VARIANTS of the
1863 # word because of the way we use libc:
1864 # 1. With '*' for extglob parts
1865 # 2. With _EvalExtGlob() for extglob parts
1866
1867 glob_parts = [] # type: List[str]
1868 fnmatch_parts = [] # type: List[str]
1869 self._TranslateExtGlob(word_part_vals, w, glob_parts,
1870 fnmatch_parts)
1871
1872 #log('word_part_vals %s', word_part_vals)
1873 glob_pat = ''.join(glob_parts)
1874 fnmatch_pat = ''.join(fnmatch_parts)
1875 #log("glob %s fnmatch %s", glob_pat, fnmatch_pat)
1876
1877 results = [] # type: List[str]
1878 n = self.globber.ExpandExtended(glob_pat, fnmatch_pat, results)
1879 if n < 0:
1880 raise error.FailGlob(
1881 'Extended glob %r matched no files' % fnmatch_pat, w)
1882
1883 part_vals.append(part_value.Array(results))
1884 elif bool(eval_flags & EXTGLOB_NESTED):
1885 # We only glob at the TOP level of @(nested|@(pattern))
1886 part_vals.extend(word_part_vals)
1887 else:
1888 # e.g. simple_word_eval, assignment builtin
1889 e_die('Extended glob not allowed in this word', w)
1890 else:
1891 part_vals.extend(word_part_vals)
1892
1893 def _PartValsToString(self, part_vals, w, eval_flags, strs):
1894 # type: (List[part_value_t], CompoundWord, int, List[str]) -> None
1895 """Helper for EvalWordToString, similar to _ConcatPartVals() above.
1896
1897 Note: arg 'w' could just be a span ID
1898 """
1899 for part_val in part_vals:
1900 UP_part_val = part_val
1901 with tagswitch(part_val) as case:
1902 if case(part_value_e.String):
1903 part_val = cast(Piece, UP_part_val)
1904 s = part_val.s
1905 if part_val.quoted:
1906 if eval_flags & QUOTE_FNMATCH:
1907 # [[ foo == */"*".py ]] or case (*.py) or ${x%*.py} or ${x//*.py/}
1908 s = glob_.GlobEscape(s)
1909 elif eval_flags & QUOTE_ERE:
1910 s = glob_.ExtendedRegexEscape(s)
1911 strs.append(s)
1912
1913 elif case(part_value_e.Array):
1914 part_val = cast(part_value.Array, UP_part_val)
1915 if self.exec_opts.strict_array():
1916 # Examples: echo f > "$@"; local foo="$@"
1917
1918 # TODO: This attributes too coarsely, to the word rather than the
1919 # parts. Problem: the word is a TREE of parts, but we only have a
1920 # flat list of part_vals. The only case where we really get arrays
1921 # is "$@", "${a[@]}", "${a[@]//pat/replace}", etc.
1922 e_die(
1923 "This word should yield a string, but it contains an array",
1924 w)
1925
1926 # TODO: Maybe add detail like this.
1927 #e_die('RHS of assignment should only have strings. '
1928 # 'To assign arrays, use b=( "${a[@]}" )')
1929 else:
1930 # It appears to not respect IFS
1931 tmp = [s for s in part_val.strs if s is not None]
1932 s = ' '.join(tmp) # TODO: eliminate double join()?
1933 strs.append(s)
1934
1935 elif case(part_value_e.ExtGlob):
1936 part_val = cast(part_value.ExtGlob, UP_part_val)
1937
1938 # Extended globs are only allowed where we expect them!
1939 if not bool(eval_flags & QUOTE_FNMATCH):
1940 e_die('extended glob not allowed in this word', w)
1941
1942 # recursive call
1943 self._PartValsToString(part_val.part_vals, w, eval_flags,
1944 strs)
1945
1946 else:
1947 raise AssertionError()
1948
1949 def EvalWordToString(self, UP_w, eval_flags=0):
1950 # type: (word_t, int) -> value.Str
1951 """Given a word, return a string.
1952
1953 Flags can contain a quoting algorithm.
1954 """
1955 assert UP_w.tag() == word_e.Compound, UP_w
1956 w = cast(CompoundWord, UP_w)
1957
1958 if eval_flags == 0: # QUOTE_FNMATCH etc. breaks optimization
1959 fast_str = word_.FastStrEval(w)
1960 if fast_str is not None:
1961 return value.Str(fast_str)
1962
1963 # Could we additionally optimize a=$b, if we know $b isn't an array
1964 # etc.?
1965
1966 # Note: these empty lists are hot in fib benchmark
1967
1968 part_vals = [] # type: List[part_value_t]
1969 for p in w.parts:
1970 # this doesn't use eval_flags, which is slightly confusing
1971 self._EvalWordPart(p, part_vals, 0)
1972
1973 strs = [] # type: List[str]
1974 self._PartValsToString(part_vals, w, eval_flags, strs)
1975 return value.Str(''.join(strs))
1976
1977 def EvalWordToPattern(self, UP_w):
1978 # type: (rhs_word_t) -> Tuple[value.Str, bool]
1979 """Like EvalWordToString, but returns whether we got ExtGlob."""
1980 if UP_w.tag() == rhs_word_e.Empty:
1981 return value.Str(''), False
1982
1983 assert UP_w.tag() == rhs_word_e.Compound, UP_w
1984 w = cast(CompoundWord, UP_w)
1985
1986 has_extglob = False
1987 part_vals = [] # type: List[part_value_t]
1988 for p in w.parts:
1989 # this doesn't use eval_flags, which is slightly confusing
1990 self._EvalWordPart(p, part_vals, 0)
1991 if p.tag() == word_part_e.ExtGlob:
1992 has_extglob = True
1993
1994 strs = [] # type: List[str]
1995 self._PartValsToString(part_vals, w, QUOTE_FNMATCH, strs)
1996 return value.Str(''.join(strs)), has_extglob
1997
1998 def EvalForPlugin(self, w):
1999 # type: (CompoundWord) -> value.Str
2000 """Wrapper around EvalWordToString that prevents errors.
2001
2002 Runtime errors like $(( 1 / 0 )) and mutating $? like $(exit 42)
2003 are handled here.
2004
2005 Similar to ExprEvaluator.PluginCall().
2006 """
2007 with state.ctx_Registers(self.mem): # to "sandbox" $? and $PIPESTATUS
2008 try:
2009 val = self.EvalWordToString(w)
2010 except error.FatalRuntime as e:
2011 val = value.Str('<Runtime error: %s>' % e.UserErrorString())
2012
2013 except (IOError, OSError) as e:
2014 val = value.Str('<I/O error: %s>' % pyutil.strerror(e))
2015
2016 except KeyboardInterrupt:
2017 val = value.Str('<Ctrl-C>')
2018
2019 return val
2020
2021 def EvalRhsWord(self, UP_w):
2022 # type: (rhs_word_t) -> value_t
2023 """Used for RHS of assignment.
2024
2025 There is no splitting.
2026 """
2027 if UP_w.tag() == rhs_word_e.Empty:
2028 return value.Str('')
2029
2030 assert UP_w.tag() == word_e.Compound, UP_w
2031 w = cast(CompoundWord, UP_w)
2032
2033 if len(w.parts) == 1:
2034 part0 = w.parts[0]
2035 UP_part0 = part0
2036 tag = part0.tag()
2037 # Special case for a=(1 2). ShArrayLiteral won't appear in words that
2038 # don't look like assignments.
2039 if tag == word_part_e.ShArrayLiteral:
2040 part0 = cast(ShArrayLiteral, UP_part0)
2041 array_words = part0.words
2042 words = braces.BraceExpandWords(array_words)
2043 strs = self.EvalWordSequence(words)
2044 return value.BashArray(strs)
2045
2046 if tag == word_part_e.BashAssocLiteral:
2047 part0 = cast(word_part.BashAssocLiteral, UP_part0)
2048 d = NewDict() # type: Dict[str, str]
2049 for pair in part0.pairs:
2050 k = self.EvalWordToString(pair.key)
2051 v = self.EvalWordToString(pair.value)
2052 d[k.s] = v.s
2053 return value.BashAssoc(d)
2054
2055 # If RHS doesn't look like a=( ... ), then it must be a string.
2056 return self.EvalWordToString(w)
2057
2058 def _EvalWordFrame(self, frame, argv):
2059 # type: (List[Piece], List[str]) -> None
2060 all_empty = True
2061 all_quoted = True
2062 any_quoted = False
2063
2064 #log('--- frame %s', frame)
2065
2066 for piece in frame:
2067 if len(piece.s):
2068 all_empty = False
2069
2070 if piece.quoted:
2071 any_quoted = True
2072 else:
2073 all_quoted = False
2074
2075 # Elision of ${empty}${empty} but not $empty"$empty" or $empty""
2076 if all_empty and not any_quoted:
2077 return
2078
2079 # If every frag is quoted, e.g. "$a$b" or any part in "${a[@]}"x, then
2080 # don't do word splitting or globbing.
2081 if all_quoted:
2082 tmp = [piece.s for piece in frame]
2083 a = ''.join(tmp)
2084 argv.append(a)
2085 return
2086
2087 will_glob = not self.exec_opts.noglob()
2088
2089 if 0:
2090 log('---')
2091 log('FRAME')
2092 for i, piece in enumerate(frame):
2093 log('(%d) %s', i, piece)
2094 log('')
2095
2096 # Array of strings, some of which are BOTH IFS-escaped and GLOB escaped!
2097 frags = [] # type: List[str]
2098 for piece in frame:
2099 if will_glob and piece.quoted:
2100 frag = glob_.GlobEscape(piece.s)
2101 else:
2102 # If we have a literal \, then we turn it into \\\\.
2103 # Splitting takes \\\\ -> \\
2104 # Globbing takes \\ to \ if it doesn't match
2105 frag = _BackslashEscape(piece.s)
2106
2107 if piece.do_split:
2108 frag = _BackslashEscape(frag)
2109 else:
2110 frag = self.splitter.Escape(frag)
2111
2112 frags.append(frag)
2113
2114 if 0:
2115 log('---')
2116 log('FRAGS')
2117 for i, frag in enumerate(frags):
2118 log('(%d) %s', i, frag)
2119 log('')
2120
2121 flat = ''.join(frags)
2122 #log('flat: %r', flat)
2123
2124 args = self.splitter.SplitForWordEval(flat)
2125
2126 # space=' '; argv $space"". We have a quoted part, but we CANNOT elide.
2127 # Add it back and don't bother globbing.
2128 if len(args) == 0 and any_quoted:
2129 argv.append('')
2130 return
2131
2132 #log('split args: %r', args)
2133 for a in args:
2134 if glob_.LooksLikeGlob(a):
2135 n = self.globber.Expand(a, argv)
2136 if n < 0:
2137 # TODO: location info, with span IDs carried through the frame
2138 raise error.FailGlob('Pattern %r matched no files' % a,
2139 loc.Missing)
2140 else:
2141 argv.append(glob_.GlobUnescape(a))
2142
2143 def _EvalWordToArgv(self, w):
2144 # type: (CompoundWord) -> List[str]
2145 """Helper for _EvalAssignBuiltin.
2146
2147 Splitting and globbing are disabled for assignment builtins.
2148
2149 Example: declare -"${a[@]}" b=(1 2)
2150 where a is [x b=a d=a]
2151 """
2152 part_vals = [] # type: List[part_value_t]
2153 self._EvalWordToParts(w, part_vals, 0) # not double quoted
2154 frames = _MakeWordFrames(part_vals)
2155 argv = [] # type: List[str]
2156 for frame in frames:
2157 if len(frame): # empty array gives empty frame!
2158 tmp = [piece.s for piece in frame]
2159 argv.append(''.join(tmp)) # no split or glob
2160 #log('argv: %s', argv)
2161 return argv
2162
2163 def _EvalAssignBuiltin(self, builtin_id, arg0, words, meta_offset):
2164 # type: (builtin_t, str, List[CompoundWord], int) -> cmd_value.Assign
2165 """Handles both static and dynamic assignment, e.g.
2166
2167 x='foo=bar'
2168 local a=(1 2) $x
2169
2170 Grammar:
2171
2172 ('builtin' | 'command')* keyword flag* pair*
2173 flag = [-+].*
2174
2175 There is also command -p, but we haven't implemented it. Maybe just
2176 punt on it.
2177 """
2178 eval_to_pairs = True # except for -f and -F
2179 started_pairs = False
2180
2181 flags = [arg0] # initial flags like -p, and -f -F name1 name2
2182 flag_locs = [words[0]]
2183 assign_args = [] # type: List[AssignArg]
2184
2185 n = len(words)
2186 for i in xrange(meta_offset + 1, n): # skip first word
2187 w = words[i]
2188
2189 if word_.IsVarLike(w):
2190 started_pairs = True # Everything from now on is an assign_pair
2191
2192 if started_pairs:
2193 left_token, close_token, part_offset = word_.DetectShAssignment(
2194 w)
2195 if left_token: # Detected statically
2196 if left_token.id != Id.Lit_VarLike:
2197 # (not guaranteed since started_pairs is set twice)
2198 e_die('LHS array not allowed in assignment builtin', w)
2199
2200 if lexer.IsPlusEquals(left_token):
2201 var_name = lexer.TokenSliceRight(left_token, -2)
2202 append = True
2203 else:
2204 var_name = lexer.TokenSliceRight(left_token, -1)
2205 append = False
2206
2207 if part_offset == len(w.parts):
2208 rhs = rhs_word.Empty # type: rhs_word_t
2209 else:
2210 # tmp is for intersection of C++/MyPy type systems
2211 tmp = CompoundWord(w.parts[part_offset:])
2212 word_.TildeDetectAssign(tmp)
2213 rhs = tmp
2214
2215 with state.ctx_AssignBuiltin(self.mutable_opts):
2216 right = self.EvalRhsWord(rhs)
2217
2218 arg2 = AssignArg(var_name, right, append, w)
2219 assign_args.append(arg2)
2220
2221 else: # e.g. export $dynamic
2222 argv = self._EvalWordToArgv(w)
2223 for arg in argv:
2224 arg2 = _SplitAssignArg(arg, w)
2225 assign_args.append(arg2)
2226
2227 else:
2228 argv = self._EvalWordToArgv(w)
2229 for arg in argv:
2230 if arg.startswith('-') or arg.startswith('+'):
2231 # e.g. declare -r +r
2232 flags.append(arg)
2233 flag_locs.append(w)
2234
2235 # Shortcut that relies on -f and -F always meaning "function" for
2236 # all assignment builtins
2237 if 'f' in arg or 'F' in arg:
2238 eval_to_pairs = False
2239
2240 else: # e.g. export $dynamic
2241 if eval_to_pairs:
2242 arg2 = _SplitAssignArg(arg, w)
2243 assign_args.append(arg2)
2244 started_pairs = True
2245 else:
2246 flags.append(arg)
2247
2248 return cmd_value.Assign(builtin_id, flags, flag_locs, assign_args)
2249
2250 def _DetectAssignBuiltinStr(self, arg0, words, meta_offset):
2251 # type: (str, List[CompoundWord], int) -> Optional[cmd_value.Assign]
2252 builtin_id = consts.LookupAssignBuiltin(arg0)
2253 if builtin_id != consts.NO_INDEX:
2254 return self._EvalAssignBuiltin(builtin_id, arg0, words,
2255 meta_offset)
2256 return None
2257
2258 def _DetectAssignBuiltin(self, val0, words, meta_offset):
2259 # type: (part_value_t, List[CompoundWord], int) -> Optional[cmd_value.Assign]
2260 UP_val0 = val0
2261 if val0.tag() == part_value_e.String:
2262 val0 = cast(Piece, UP_val0)
2263 if not val0.quoted:
2264 return self._DetectAssignBuiltinStr(val0.s, words, meta_offset)
2265 return None
2266
2267 def SimpleEvalWordSequence2(self, words, is_last_cmd, allow_assign):
2268 # type: (List[CompoundWord], bool, bool) -> cmd_value_t
2269 """Simple word evaluation for YSH."""
2270 strs = [] # type: List[str]
2271 locs = [] # type: List[CompoundWord]
2272
2273 meta_offset = 0
2274 for i, w in enumerate(words):
2275 # No globbing in the first arg for command.Simple.
2276 if i == meta_offset and allow_assign:
2277 strs0 = self._EvalWordToArgv(w)
2278 # TODO: Remove this because YSH will disallow assignment
2279 # builtins? (including export?)
2280 if len(strs0) == 1:
2281 cmd_val = self._DetectAssignBuiltinStr(
2282 strs0[0], words, meta_offset)
2283 if cmd_val:
2284 return cmd_val
2285
2286 strs.extend(strs0)
2287 for _ in strs0:
2288 locs.append(w)
2289 continue
2290
2291 if glob_.LooksLikeStaticGlob(w):
2292 val = self.EvalWordToString(w) # respects strict-array
2293 num_appended = self.globber.Expand(val.s, strs)
2294 if num_appended < 0:
2295 raise error.FailGlob('Pattern %r matched no files' % val.s,
2296 w)
2297 for _ in xrange(num_appended):
2298 locs.append(w)
2299 continue
2300
2301 part_vals = [] # type: List[part_value_t]
2302 self._EvalWordToParts(w, part_vals, 0) # not quoted
2303
2304 if 0:
2305 log('')
2306 log('Static: part_vals after _EvalWordToParts:')
2307 for entry in part_vals:
2308 log(' %s', entry)
2309
2310 # Still need to process
2311 frames = _MakeWordFrames(part_vals)
2312
2313 if 0:
2314 log('')
2315 log('Static: frames after _MakeWordFrames:')
2316 for entry in frames:
2317 log(' %s', entry)
2318
2319 # We will still allow x"${a[@]"x, though it's deprecated by @a, which
2320 # disallows such expressions at parse time.
2321 for frame in frames:
2322 if len(frame): # empty array gives empty frame!
2323 tmp = [piece.s for piece in frame]
2324 strs.append(''.join(tmp)) # no split or glob
2325 locs.append(w)
2326
2327 assert len(strs) == len(locs), '%s vs. %d' % (strs, len(locs))
2328 return cmd_value.Argv(strs, locs, is_last_cmd, None, None)
2329
2330 def EvalWordSequence2(self, words, is_last_cmd, allow_assign=False):
2331 # type: (List[CompoundWord], bool, bool) -> cmd_value_t
2332 """Turns a list of Words into a list of strings.
2333
2334 Unlike the EvalWord*() methods, it does globbing.
2335
2336 Args:
2337 allow_assign: True for command.Simple, False for BashArray a=(1 2 3)
2338 """
2339 if self.exec_opts.simple_word_eval():
2340 return self.SimpleEvalWordSequence2(words, is_last_cmd,
2341 allow_assign)
2342
2343 # Parse time:
2344 # 1. brace expansion. TODO: Do at parse time.
2345 # 2. Tilde detection. DONE at parse time. Only if Id.Lit_Tilde is the
2346 # first WordPart.
2347 #
2348 # Run time:
2349 # 3. tilde sub, var sub, command sub, arith sub. These are all
2350 # "concurrent" on WordParts. (optional process sub with <() )
2351 # 4. word splitting. Can turn this off with a shell option? Definitely
2352 # off for oil.
2353 # 5. globbing -- several exec_opts affect this: nullglob, safeglob, etc.
2354
2355 #log('W %s', words)
2356 strs = [] # type: List[str]
2357 locs = [] # type: List[CompoundWord]
2358
2359 # 0 for declare x
2360 # 1 for builtin declare x
2361 # 2 for command builtin declare x
2362 # etc.
2363 meta_offset = 0
2364
2365 n = 0
2366 for i, w in enumerate(words):
2367 fast_str = word_.FastStrEval(w)
2368 if fast_str is not None:
2369 strs.append(fast_str)
2370 locs.append(w)
2371
2372 # e.g. the 'local' in 'local a=b c=d' will be here
2373 if allow_assign and i == meta_offset:
2374 cmd_val = self._DetectAssignBuiltinStr(
2375 fast_str, words, meta_offset)
2376 if cmd_val:
2377 return cmd_val
2378
2379 if i <= meta_offset and _DetectMetaBuiltinStr(fast_str):
2380 meta_offset += 1
2381
2382 # Bug fix: n must be updated on every loop iteration
2383 n = len(strs)
2384 assert len(strs) == len(locs), strs
2385 continue
2386
2387 part_vals = [] # type: List[part_value_t]
2388 self._EvalWordToParts(w, part_vals, EXTGLOB_FILES)
2389
2390 # DYNAMICALLY detect if we're going to run an assignment builtin, and
2391 # change the rest of the evaluation algorithm if so.
2392 #
2393 # We want to allow:
2394 # e=export
2395 # $e foo=bar
2396 #
2397 # But we don't want to evaluate the first word twice in the case of:
2398 # $(some-command) --flag
2399 if len(part_vals) == 1:
2400 if allow_assign and i == meta_offset:
2401 cmd_val = self._DetectAssignBuiltin(
2402 part_vals[0], words, meta_offset)
2403 if cmd_val:
2404 return cmd_val
2405
2406 if i <= meta_offset and _DetectMetaBuiltin(part_vals[0]):
2407 meta_offset += 1
2408
2409 if 0:
2410 log('')
2411 log('part_vals after _EvalWordToParts:')
2412 for entry in part_vals:
2413 log(' %s', entry)
2414
2415 frames = _MakeWordFrames(part_vals)
2416 if 0:
2417 log('')
2418 log('frames after _MakeWordFrames:')
2419 for entry in frames:
2420 log(' %s', entry)
2421
2422 # Do splitting and globbing. Each frame will append zero or more args.
2423 for frame in frames:
2424 self._EvalWordFrame(frame, strs)
2425
2426 # Fill in locations parallel to strs.
2427 n_next = len(strs)
2428 for _ in xrange(n_next - n):
2429 locs.append(w)
2430 n = n_next
2431
2432 # A non-assignment command.
2433 # NOTE: Can't look up builtins here like we did for assignment, because
2434 # functions can override builtins.
2435 assert len(strs) == len(locs), '%s vs. %d' % (strs, len(locs))
2436 return cmd_value.Argv(strs, locs, is_last_cmd, None, None)
2437
2438 def EvalWordSequence(self, words):
2439 # type: (List[CompoundWord]) -> List[str]
2440 """For arrays and for loops.
2441
2442 They don't allow assignment builtins.
2443 """
2444 # is_last_cmd is irrelevant
2445 cmd_val = self.EvalWordSequence2(words, False)
2446 assert cmd_val.tag() == cmd_value_e.Argv
2447 return cast(cmd_value.Argv, cmd_val).argv
2448
2449
2450class NormalWordEvaluator(AbstractWordEvaluator):
2451
2452 def __init__(
2453 self,
2454 mem, # type: state.Mem
2455 exec_opts, # type: optview.Exec
2456 mutable_opts, # type: state.MutableOpts
2457 tilde_ev, # type: TildeEvaluator
2458 splitter, # type: SplitContext
2459 errfmt, # type: ui.ErrorFormatter
2460 ):
2461 # type: (...) -> None
2462 AbstractWordEvaluator.__init__(self, mem, exec_opts, mutable_opts,
2463 tilde_ev, splitter, errfmt)
2464 self.shell_ex = None # type: _Executor
2465
2466 def CheckCircularDeps(self):
2467 # type: () -> None
2468 assert self.arith_ev is not None
2469 # Disabled for pure OSH
2470 #assert self.expr_ev is not None
2471 assert self.shell_ex is not None
2472 assert self.prompt_ev is not None
2473
2474 def _EvalCommandSub(self, cs_part, quoted):
2475 # type: (CommandSub, bool) -> part_value_t
2476 stdout_str = self.shell_ex.RunCommandSub(cs_part)
2477
2478 if cs_part.left_token.id == Id.Left_AtParen:
2479 # YSH splitting algorithm: does not depend on IFS
2480 try:
2481 strs = j8.SplitJ8Lines(stdout_str)
2482 except error.Decode as e:
2483 # status code 4 is special, for encode/decode errors.
2484 raise error.Structured(4, e.Message(), cs_part.left_token)
2485
2486 #strs = self.splitter.SplitForWordEval(stdout_str)
2487 return part_value.Array(strs)
2488 else:
2489 return Piece(stdout_str, quoted, not quoted)
2490
2491 def _EvalProcessSub(self, cs_part):
2492 # type: (CommandSub) -> Piece
2493 dev_path = self.shell_ex.RunProcessSub(cs_part)
2494 # pretend it's quoted; no split or glob
2495 return Piece(dev_path, True, False)
2496
2497
2498_DUMMY = '__NO_COMMAND_SUB__'
2499
2500
2501class CompletionWordEvaluator(AbstractWordEvaluator):
2502 """An evaluator that has no access to an executor.
2503
2504 NOTE: core/completion.py doesn't actually try to use these strings to
2505 complete. If you have something like 'echo $(echo hi)/f<TAB>', it sees the
2506 inner command as the last one, and knows that it is not at the end of the
2507 line.
2508 """
2509
2510 def __init__(
2511 self,
2512 mem, # type: state.Mem
2513 exec_opts, # type: optview.Exec
2514 mutable_opts, # type: state.MutableOpts
2515 tilde_ev, # type: TildeEvaluator
2516 splitter, # type: SplitContext
2517 errfmt, # type: ui.ErrorFormatter
2518 ):
2519 # type: (...) -> None
2520 AbstractWordEvaluator.__init__(self, mem, exec_opts, mutable_opts,
2521 tilde_ev, splitter, errfmt)
2522
2523 def CheckCircularDeps(self):
2524 # type: () -> None
2525 assert self.prompt_ev is not None
2526 assert self.arith_ev is not None
2527 assert self.expr_ev is not None
2528
2529 def _EvalCommandSub(self, cs_part, quoted):
2530 # type: (CommandSub, bool) -> part_value_t
2531 if cs_part.left_token.id == Id.Left_AtParen:
2532 return part_value.Array([_DUMMY])
2533 else:
2534 return Piece(_DUMMY, quoted, not quoted)
2535
2536 def _EvalProcessSub(self, cs_part):
2537 # type: (CommandSub) -> Piece
2538 # pretend it's quoted; no split or glob
2539 return Piece('__NO_PROCESS_SUB__', True, False)
2540
2541
2542# vim: sw=4