OILS / osh / word_eval.py View on Github | oils.pub

2597 lines, 1600 significant
1"""
2word_eval.py - Evaluator for the word language.
3"""
4
5from _devbuild.gen.id_kind_asdl import Id, Kind, Kind_str
6from _devbuild.gen.syntax_asdl import (
7 Token,
8 SimpleVarSub,
9 loc,
10 loc_t,
11 BracedVarSub,
12 CommandSub,
13 bracket_op,
14 bracket_op_e,
15 suffix_op,
16 suffix_op_e,
17 ShArrayLiteral,
18 SingleQuoted,
19 DoubleQuoted,
20 word_e,
21 word_t,
22 CompoundWord,
23 rhs_word,
24 rhs_word_e,
25 rhs_word_t,
26 word_part,
27 word_part_e,
28)
29from _devbuild.gen.runtime_asdl import (
30 part_value,
31 part_value_e,
32 part_value_t,
33 cmd_value,
34 cmd_value_e,
35 cmd_value_t,
36 error_code_e,
37 AssignArg,
38 a_index,
39 a_index_e,
40 VTestPlace,
41 VarSubState,
42 Piece,
43)
44from _devbuild.gen.option_asdl import option_i, builtin_i
45from _devbuild.gen.value_asdl import (
46 value,
47 value_e,
48 value_t,
49 sh_lvalue,
50 sh_lvalue_t,
51)
52from core import bash_impl
53from core import error
54from core import pyos
55from core import pyutil
56from core import state
57from display import ui
58from core import util
59from data_lang import j8
60from data_lang import j8_lite
61from core.error import e_die
62from frontend import consts
63from frontend import lexer
64from frontend import location
65from mycpp import mops
66from mycpp.mylib import log, tagswitch, NewDict
67from osh import braces
68from osh import glob_
69from osh import string_ops
70from osh import word_
71from ysh import expr_eval
72from ysh import val_ops
73
74from typing import Optional, Tuple, List, Dict, cast, TYPE_CHECKING
75
76if TYPE_CHECKING:
77 from _devbuild.gen.syntax_asdl import word_part_t
78 from _devbuild.gen.option_asdl import builtin_t
79 from core import optview
80 from core.state import Mem
81 from core.vm import _Executor
82 from osh.split import SplitContext
83 from osh import prompt
84 from osh import sh_expr_eval
85
86# Flags for _EvalWordToParts and _EvalWordPart (not all are used for both)
87QUOTED = 1 << 0
88IS_SUBST = 1 << 1
89
90EXTGLOB_FILES = 1 << 2 # allow @(cc) from file system?
91EXTGLOB_MATCH = 1 << 3 # allow @(cc) in pattern matching?
92EXTGLOB_NESTED = 1 << 4 # for @(one|!(two|three))
93
94# For EvalWordToString
95QUOTE_FNMATCH = 1 << 5
96QUOTE_ERE = 1 << 6
97
98# For compatibility, ${BASH_SOURCE} and ${BASH_SOURCE[@]} are both valid.
99# Ditto for ${FUNCNAME} and ${BASH_LINENO}.
100_STRING_AND_ARRAY = ['BASH_SOURCE', 'FUNCNAME', 'BASH_LINENO']
101
102
103def ShouldArrayDecay(var_name, exec_opts, is_plain_var_sub=True):
104 # type: (str, optview.Exec, bool) -> bool
105 """Return whether we should allow ${a} to mean ${a[0]}."""
106 return (not exec_opts.strict_array() or
107 is_plain_var_sub and var_name in _STRING_AND_ARRAY)
108
109
110def DecayArray(val):
111 # type: (value_t) -> value_t
112 """Resolve ${array} to ${array[0]}."""
113 if val.tag() == value_e.BashArray:
114 array_val = cast(value.BashArray, val)
115 s, error_code = bash_impl.BashArray_GetElement(array_val, 0)
116
117 # Note: index 0 should never cause the out-of-bound index error.
118 assert error_code == error_code_e.OK
119
120 elif val.tag() == value_e.BashAssoc:
121 assoc_val = cast(value.BashAssoc, val)
122 s = bash_impl.BashAssoc_GetElement(assoc_val, '0')
123 else:
124 raise AssertionError(val.tag())
125
126 if s is None:
127 return value.Undef
128 else:
129 return value.Str(s)
130
131
132def _DetectMetaBuiltinStr(s):
133 # type: (str) -> bool
134 """
135 We need to detect all of these cases:
136
137 builtin local
138 command local
139 builtin builtin local
140 builtin command local
141
142 Fundamentally, assignment builtins have different WORD EVALUATION RULES
143 for a=$x (no word splitting), so it seems hard to do this in
144 meta_oils.Builtin() or meta_oils.Command()
145 """
146 return (consts.LookupNormalBuiltin(s)
147 in (builtin_i.builtin, builtin_i.command))
148
149
150def _DetectMetaBuiltin(val0):
151 # type: (part_value_t) -> bool
152 UP_val0 = val0
153 if val0.tag() == part_value_e.String:
154 val0 = cast(Piece, UP_val0)
155 if not val0.quoted:
156 return _DetectMetaBuiltinStr(val0.s)
157 return False
158
159
160def _SplitAssignArg(arg, blame_word):
161 # type: (str, CompoundWord) -> AssignArg
162 """Dynamically parse argument to declare, export, etc.
163
164 This is a fallback to the static parsing done below.
165 """
166 # Note: it would be better to cache regcomp(), but we don't have an API for
167 # that, and it probably isn't a bottleneck now
168 m = util.RegexSearch(consts.ASSIGN_ARG_RE, arg)
169 if m is None:
170 e_die("Assignment builtin expected NAME=value, got %r" % arg,
171 blame_word)
172
173 var_name = m[1]
174 # m[2] is used for grouping; ERE doesn't have non-capturing groups
175
176 op = m[3]
177 assert op is not None, op
178 if len(op): # declare NAME=
179 val = value.Str(m[4]) # type: Optional[value_t]
180 append = op[0] == '+'
181 else: # declare NAME
182 val = None # no operator
183 append = False
184
185 return AssignArg(var_name, val, append, blame_word)
186
187
188# NOTE: Could be done with util.BackslashEscape like glob_.GlobEscape().
189def _BackslashEscape(s):
190 # type: (str) -> str
191 """Double up backslashes.
192
193 Useful for strings about to be globbed and strings about to be IFS
194 escaped.
195 """
196 return s.replace('\\', '\\\\')
197
198
199def _ValueToPartValue(val, quoted, part_loc):
200 # type: (value_t, bool, word_part_t) -> part_value_t
201 """Helper for VarSub evaluation.
202
203 Called by _EvalBracedVarSub and _EvalWordPart for SimpleVarSub.
204 """
205 UP_val = val
206
207 with tagswitch(val) as case:
208 if case(value_e.Undef):
209 # This happens in the case of ${undef+foo}. We skipped _ProcessUndef,
210 # but we have to append to the empty string.
211 return Piece('', quoted, not quoted)
212
213 elif case(value_e.Str):
214 val = cast(value.Str, UP_val)
215 return Piece(val.s, quoted, not quoted)
216
217 elif case(value_e.BashArray):
218 val = cast(value.BashArray, UP_val)
219 return part_value.Array(bash_impl.BashArray_GetValues(val))
220
221 elif case(value_e.BashAssoc):
222 val = cast(value.BashAssoc, UP_val)
223 # bash behavior: splice values!
224 return part_value.Array(bash_impl.BashAssoc_GetValues(val))
225
226 # Cases added for YSH
227 # value_e.List is also here - we use val_ops.Stringify()s err message
228 elif case(value_e.Null, value_e.Bool, value_e.Int, value_e.Float,
229 value_e.Eggex, value_e.List):
230 s = val_ops.Stringify(val, loc.WordPart(part_loc), 'Word eval ')
231 return Piece(s, quoted, not quoted)
232
233 else:
234 raise error.TypeErr(val, "Can't substitute into word",
235 loc.WordPart(part_loc))
236
237 raise AssertionError('for -Wreturn-type in C++')
238
239
240def _MakeWordFrames(part_vals):
241 # type: (List[part_value_t]) -> List[List[Piece]]
242 """A word evaluates to a flat list of part_value (String or Array). frame
243 is a portion that results in zero or more args. It can never be joined.
244 This idea exists because of arrays like "$@" and "${a[@]}".
245
246 Example:
247
248 a=(1 '2 3' 4)
249 x=x
250 y=y
251
252 # This word
253 $x"${a[@]}"$y
254
255 # Results in Three frames:
256 [ ('x', False, True), ('1', True, False) ]
257 [ ('2 3', True, False) ]
258 [ ('4', True, False), ('y', False, True) ]
259
260 Note: A frame is a 3-tuple that's identical to Piece()? Maybe we
261 should make that top level type.
262
263 TODO:
264 - Instead of List[List[Piece]], where List[Piece] is a Frame
265 - Change this representation to
266 Frames = (List[Piece] pieces, List[int] break_indices)
267 # where break_indices are the end
268
269 Consider a common case like "$x" or "${x}" - I think this a lot more
270 efficient?
271
272 And then change _EvalWordFrame(pieces: List[Piece], start: int, end: int)
273 """
274 current = [] # type: List[Piece]
275 frames = [current]
276
277 for p in part_vals:
278 UP_p = p
279
280 with tagswitch(p) as case:
281 if case(part_value_e.String):
282 p = cast(Piece, UP_p)
283 current.append(p)
284
285 elif case(part_value_e.Array):
286 p = cast(part_value.Array, UP_p)
287
288 is_first = True
289 for s in p.strs:
290 if s is None:
291 continue # ignore undefined array entries
292
293 # Arrays parts are always quoted; otherwise they would have decayed to
294 # a string.
295 piece = Piece(s, True, False)
296 if is_first:
297 current.append(piece)
298 is_first = False
299 else:
300 current = [piece]
301 frames.append(current) # singleton frame
302
303 else:
304 raise AssertionError()
305
306 return frames
307
308
309# TODO: This could be _MakeWordFrames and then sep.join(). It's redundant.
310def _DecayPartValuesToString(part_vals, join_char):
311 # type: (List[part_value_t], str) -> str
312 # Decay ${a=x"$@"x} to string.
313 out = [] # type: List[str]
314 for p in part_vals:
315 UP_p = p
316 with tagswitch(p) as case:
317 if case(part_value_e.String):
318 p = cast(Piece, UP_p)
319 out.append(p.s)
320 elif case(part_value_e.Array):
321 p = cast(part_value.Array, UP_p)
322 # TODO: Eliminate double join for speed?
323 tmp = [s for s in p.strs if s is not None]
324 out.append(join_char.join(tmp))
325 else:
326 raise AssertionError()
327 return ''.join(out)
328
329
330def _PerformSlice(
331 val, # type: value_t
332 offset, # type: mops.BigInt
333 length, # type: int
334 has_length, # type: bool
335 part, # type: BracedVarSub
336 arg0_val, # type: value.Str
337):
338 # type: (...) -> value_t
339 UP_val = val
340 with tagswitch(val) as case:
341 if case(value_e.Str): # Slice UTF-8 characters in a string.
342 val = cast(value.Str, UP_val)
343 s = val.s
344 n = len(s)
345
346 begin = mops.BigTruncate(offset)
347 if begin < 0: # Compute offset with unicode
348 byte_begin = n
349 num_iters = -begin
350 for _ in xrange(num_iters):
351 byte_begin = string_ops.PreviousUtf8Char(s, byte_begin)
352 else:
353 byte_begin = string_ops.AdvanceUtf8Chars(s, begin, 0)
354
355 if has_length:
356 if length < 0: # Compute offset with unicode
357 # Confusing: this is a POSITION
358 byte_end = n
359 num_iters = -length
360 for _ in xrange(num_iters):
361 byte_end = string_ops.PreviousUtf8Char(s, byte_end)
362 else:
363 byte_end = string_ops.AdvanceUtf8Chars(
364 s, length, byte_begin)
365 else:
366 byte_end = len(s)
367
368 substr = s[byte_begin:byte_end]
369 result = value.Str(substr) # type: value_t
370
371 elif case(value_e.BashArray,
372 value_e.SparseArray): # Slice array entries.
373 # NOTE: This error is ALWAYS fatal in bash. It's inconsistent with
374 # strings.
375 if has_length and length < 0:
376 e_die("Array slice can't have negative length: %d" % length,
377 loc.WordPart(part))
378
379 if bash_impl.BigInt_Less(offset, mops.ZERO):
380 # ${@:-3} starts counts from the end
381 if val.tag() == value_e.BashArray:
382 val = cast(value.BashArray, UP_val)
383 array_length = mops.IntWiden(
384 bash_impl.BashArray_Length(val))
385 elif val.tag() == value_e.SparseArray:
386 val = cast(value.SparseArray, UP_val)
387 array_length = bash_impl.SparseArray_Length(val)
388 else:
389 raise AssertionError()
390
391 # The array length counts $0 for $@ and $*
392 if arg0_val is not None:
393 array_length = mops.Add(array_length, mops.ONE)
394
395 offset = mops.Add(offset, array_length)
396
397 if bash_impl.BigInt_Less(offset, mops.ZERO):
398 strs = [] # type: List[str]
399 else:
400 # Quirk: "offset" for positional arguments ($@ and $*) counts $0.
401 prepends_arg0 = False
402 if arg0_val is not None:
403 if bash_impl.BigInt_Greater(offset, mops.ZERO):
404 offset = mops.Sub(offset, mops.ONE)
405 elif not has_length or length >= 1:
406 prepends_arg0 = True
407 length = length - 1
408
409 if has_length and length == 0:
410 strs = []
411
412 elif val.tag() == value_e.BashArray:
413 val = cast(value.BashArray, UP_val)
414 orig = bash_impl.BashArray_GetValues(val)
415 n = len(orig)
416
417 strs = []
418 i = mops.BigTruncate(offset)
419 count = 0
420 while i < n:
421 if has_length and count == length: # length could be 0
422 break
423 s = orig[i]
424 if s is not None: # Unset elements don't count towards the length
425 strs.append(s)
426 count += 1
427 i += 1
428
429 elif val.tag() == value_e.SparseArray:
430 val = cast(value.SparseArray, UP_val)
431
432 # TODO: We may optimize this by finding the first index
433 # using the binary search. Furthermore, the sorting by
434 # SparseArray_GetKeys can be replaced with the heap sort so
435 # that we only extract the first LENGTH elements of the
436 # indices greater or equal to OFFSET.
437 i = 0
438 for index in bash_impl.SparseArray_GetKeys(val):
439 if bash_impl.BigInt_GreaterEq(index, offset):
440 break
441 i = i + 1
442
443 if has_length:
444 strs = bash_impl.SparseArray_GetValues(val)[i:i +
445 length]
446 else:
447 strs = bash_impl.SparseArray_GetValues(val)[i:]
448
449 else:
450 raise AssertionError()
451
452 if prepends_arg0:
453 new_list = [arg0_val.s]
454 new_list.extend(strs)
455 strs = new_list
456
457 result = value.BashArray(strs)
458
459 elif case(value_e.BashAssoc):
460 e_die("Can't slice associative arrays", loc.WordPart(part))
461
462 else:
463 raise error.TypeErr(val, 'Slice op expected Str or BashArray',
464 loc.WordPart(part))
465
466 return result
467
468
469class StringWordEvaluator(object):
470 """Interface used by ArithEvaluator / BoolEvaluator"""
471
472 def __init__(self):
473 # type: () -> None
474 """Empty constructor for mycpp."""
475 pass
476
477 def EvalWordToString(self, w, eval_flags=0):
478 # type: (word_t, int) -> value.Str
479 raise NotImplementedError()
480
481
482def _GetDollarHyphen(exec_opts):
483 # type: (optview.Exec) -> str
484 chars = [] # type: List[str]
485 if exec_opts.interactive():
486 chars.append('i')
487
488 if exec_opts.errexit():
489 chars.append('e')
490 if exec_opts.noglob():
491 chars.append('f')
492 if exec_opts.noexec():
493 chars.append('n')
494 if exec_opts.nounset():
495 chars.append('u')
496 # NO letter for pipefail?
497 if exec_opts.xtrace():
498 chars.append('x')
499 if exec_opts.noclobber():
500 chars.append('C')
501
502 # bash has:
503 # - c for sh -c, i for sh -i (mksh also has this)
504 # - h for hashing (mksh also has this)
505 # - B for brace expansion
506 return ''.join(chars)
507
508
509class TildeEvaluator(object):
510
511 def __init__(self, mem, exec_opts):
512 # type: (Mem, optview.Exec) -> None
513 self.mem = mem
514 self.exec_opts = exec_opts
515
516 def GetMyHomeDir(self):
517 # type: () -> Optional[str]
518 """Consult $HOME first, and then make a libc call.
519
520 Important: the libc call can FAIL, which is why we prefer $HOME. See issue
521 #1578.
522 """
523 # First look up the HOME var, ENV.HOME, ...
524 s = self.mem.env_config.Get('HOME')
525 if s is not None:
526 return s
527
528 # Then ask the OS. This is what bash does.
529 return pyos.GetMyHomeDir()
530
531 def Eval(self, part):
532 # type: (word_part.TildeSub) -> str
533 """Evaluates ~ and ~user, given a Lit_TildeLike token."""
534
535 if part.user_name is None:
536 result = self.GetMyHomeDir()
537 else:
538 result = pyos.GetHomeDir(part.user_name)
539
540 if result is None:
541 if self.exec_opts.strict_tilde():
542 e_die("Error expanding tilde (e.g. invalid user)", part.left)
543 else:
544 # Return ~ or ~user literally
545 result = '~'
546 if part.user_name is not None:
547 result = result + part.user_name # mycpp doesn't have +=
548
549 return result
550
551
552class AbstractWordEvaluator(StringWordEvaluator):
553 """Abstract base class for word evaluators.
554
555 Public entry points:
556 EvalWordToString EvalForPlugin EvalRhsWord
557 EvalWordSequence EvalWordSequence2
558 """
559
560 def __init__(
561 self,
562 mem, # type: state.Mem
563 exec_opts, # type: optview.Exec
564 mutable_opts, # type: state.MutableOpts
565 tilde_ev, # type: TildeEvaluator
566 splitter, # type: SplitContext
567 errfmt, # type: ui.ErrorFormatter
568 ):
569 # type: (...) -> None
570 self.arith_ev = None # type: sh_expr_eval.ArithEvaluator
571 self.expr_ev = None # type: expr_eval.ExprEvaluator
572 self.prompt_ev = None # type: prompt.Evaluator
573
574 self.unsafe_arith = None # type: sh_expr_eval.UnsafeArith
575
576 self.tilde_ev = tilde_ev
577
578 self.mem = mem # for $HOME, $1, etc.
579 self.exec_opts = exec_opts # for nounset
580 self.mutable_opts = mutable_opts # for _allow_command_sub
581 self.splitter = splitter
582 self.errfmt = errfmt
583
584 self.globber = glob_.Globber(exec_opts)
585
586 def CheckCircularDeps(self):
587 # type: () -> None
588 raise NotImplementedError()
589
590 def _EvalCommandSub(self, cs_part, quoted):
591 # type: (CommandSub, bool) -> part_value_t
592 """Abstract since it has a side effect."""
593 raise NotImplementedError()
594
595 def _EvalProcessSub(self, cs_part):
596 # type: (CommandSub) -> part_value_t
597 """Abstract since it has a side effect."""
598 raise NotImplementedError()
599
600 def _EvalVarNum(self, var_num):
601 # type: (int) -> value_t
602 assert var_num >= 0
603 return self.mem.GetArgNum(var_num)
604
605 def _EvalSpecialVar(self, op_id, quoted, vsub_state):
606 # type: (int, bool, VarSubState) -> value_t
607 """Evaluate $?
608
609 and so forth
610 """
611 # $@ is special -- it need to know whether it is in a double quoted
612 # context.
613 #
614 # - If it's $@ in a double quoted context, return an ARRAY.
615 # - If it's $@ in a normal context, return a STRING, which then will be
616 # subject to splitting.
617
618 if op_id in (Id.VSub_At, Id.VSub_Star):
619 argv = self.mem.GetArgv()
620 val = value.BashArray(argv) # type: value_t
621 if op_id == Id.VSub_At:
622 # "$@" evaluates to an array, $@ should be decayed
623 vsub_state.join_array = not quoted
624 else: # $* "$*" are both decayed
625 vsub_state.join_array = True
626
627 elif op_id == Id.VSub_Hyphen:
628 val = value.Str(_GetDollarHyphen(self.exec_opts))
629
630 else:
631 val = self.mem.GetSpecialVar(op_id)
632
633 return val
634
635 def _ApplyTestOp(
636 self,
637 val, # type: value_t
638 op, # type: suffix_op.Unary
639 quoted, # type: bool
640 part_vals, # type: Optional[List[part_value_t]]
641 vtest_place, # type: VTestPlace
642 blame_token, # type: Token
643 vsub_state, # type: VarSubState
644 ):
645 # type: (...) -> bool
646 """
647 Returns:
648 Whether part_vals was mutated
649
650 ${a:-} returns part_value[]
651 ${a:+} returns part_value[]
652 ${a:?error} returns error word?
653 ${a:=} returns part_value[] but also needs self.mem for side effects.
654
655 So I guess it should return part_value[], and then a flag for raising an
656 error, and then a flag for assigning it?
657 The original BracedVarSub will have the name.
658
659 Example of needing multiple part_value[]
660
661 echo X-${a:-'def'"ault"}-X
662
663 We return two part values from the BracedVarSub. Also consider:
664
665 echo ${a:-x"$@"x}
666 """
667 eval_flags = IS_SUBST
668 if quoted:
669 eval_flags |= QUOTED
670
671 tok = op.op
672 # NOTE: Splicing part_values is necessary because of code like
673 # ${undef:-'a b' c 'd # e'}. Each part_value can have a different
674 # do_glob/do_elide setting.
675 UP_val = val
676 with tagswitch(val) as case:
677 if case(value_e.Undef):
678 is_falsey = True
679
680 elif case(value_e.Str):
681 val = cast(value.Str, UP_val)
682 if tok.id in (Id.VTest_ColonHyphen, Id.VTest_ColonEquals,
683 Id.VTest_ColonQMark, Id.VTest_ColonPlus):
684 is_falsey = len(val.s) == 0
685 else:
686 is_falsey = False
687
688 elif case(value_e.BashArray, value_e.BashAssoc):
689 if val.tag() == value_e.BashArray:
690 val = cast(value.BashArray, UP_val)
691 strs = bash_impl.BashArray_GetValues(val)
692 elif val.tag() == value_e.BashAssoc:
693 val = cast(value.BashAssoc, UP_val)
694 strs = bash_impl.BashAssoc_GetValues(val)
695 else:
696 raise AssertionError()
697
698 if tok.id in (Id.VTest_ColonHyphen, Id.VTest_ColonEquals,
699 Id.VTest_ColonQMark, Id.VTest_ColonPlus):
700 if vsub_state.join_array:
701 sep_width = len(self.splitter.GetJoinChar())
702 else:
703 sep_width = 1 # we use ' ' for a[@]
704
705 if sep_width == 0:
706 is_falsey = True
707 for s in strs:
708 if len(s) != 0:
709 is_falsey = False
710 break
711 else:
712 is_falsey = len(strs) == 0 or (len(strs) == 1 and
713 len(strs[0]) == 0)
714 else:
715 # TODO: allow undefined
716 is_falsey = len(strs) == 0
717
718 else:
719 # value.Eggex, etc. are all false
720 is_falsey = False
721
722 if tok.id in (Id.VTest_ColonHyphen, Id.VTest_Hyphen):
723 if is_falsey:
724 self._EvalRhsWordToParts(op.arg_word, part_vals, eval_flags)
725 return True
726 else:
727 return False
728
729 # Inverse of the above.
730 elif tok.id in (Id.VTest_ColonPlus, Id.VTest_Plus):
731 if is_falsey:
732 return False
733 else:
734 self._EvalRhsWordToParts(op.arg_word, part_vals, eval_flags)
735 return True
736
737 # Splice and assign
738 elif tok.id in (Id.VTest_ColonEquals, Id.VTest_Equals):
739 if is_falsey:
740 # Collect new part vals.
741 assign_part_vals = [] # type: List[part_value_t]
742 self._EvalRhsWordToParts(op.arg_word, assign_part_vals,
743 eval_flags)
744 # Append them to out param AND return them.
745 part_vals.extend(assign_part_vals)
746
747 if vtest_place.name is None:
748 # TODO: error context
749 e_die("Can't assign to special variable")
750 else:
751 # NOTE: This decays arrays too! 'shopt -s strict_array' could
752 # avoid it.
753 rhs_str = _DecayPartValuesToString(
754 assign_part_vals, self.splitter.GetJoinChar())
755 if vtest_place.index is None: # using None when no index
756 lval = location.LName(
757 vtest_place.name) # type: sh_lvalue_t
758 else:
759 var_name = vtest_place.name
760 var_index = vtest_place.index
761 UP_var_index = var_index
762
763 with tagswitch(var_index) as case:
764 if case(a_index_e.Int):
765 var_index = cast(a_index.Int, UP_var_index)
766 lval = sh_lvalue.Indexed(
767 var_name, var_index.i, loc.Missing)
768 elif case(a_index_e.Str):
769 var_index = cast(a_index.Str, UP_var_index)
770 lval = sh_lvalue.Keyed(var_name, var_index.s,
771 loc.Missing)
772 else:
773 raise AssertionError()
774
775 state.OshLanguageSetValue(self.mem, lval,
776 value.Str(rhs_str))
777 return True
778
779 else:
780 return False
781
782 elif tok.id in (Id.VTest_ColonQMark, Id.VTest_QMark):
783 if is_falsey:
784 # The arg is the error message
785 error_part_vals = [] # type: List[part_value_t]
786 self._EvalRhsWordToParts(op.arg_word, error_part_vals,
787 eval_flags)
788 error_str = _DecayPartValuesToString(
789 error_part_vals, self.splitter.GetJoinChar())
790
791 #
792 # Display fancy/helpful error
793 #
794 if vtest_place.name is None:
795 var_name = '???'
796 else:
797 var_name = vtest_place.name
798
799 if 0:
800 # This hint is nice, but looks too noisy for now
801 op_str = lexer.LazyStr(tok)
802 if tok.id == Id.VTest_ColonQMark:
803 why = 'empty or unset'
804 else:
805 why = 'unset'
806
807 self.errfmt.Print_(
808 "Hint: operator %s means a variable can't be %s" %
809 (op_str, why), tok)
810
811 if val.tag() == value_e.Undef:
812 actual = 'unset'
813 else:
814 actual = 'empty'
815
816 if len(error_str):
817 suffix = ': %r' % error_str
818 else:
819 suffix = ''
820 e_die("Var %s is %s%s" % (var_name, actual, suffix),
821 blame_token)
822
823 else:
824 return False
825
826 else:
827 raise AssertionError(tok.id)
828
829 def _Count(self, val, token):
830 # type: (value_t, Token) -> int
831 """Returns the length of the value, for ${#var}"""
832 UP_val = val
833 with tagswitch(val) as case:
834 if case(value_e.Str):
835 val = cast(value.Str, UP_val)
836 # NOTE: Whether bash counts bytes or chars is affected by LANG
837 # environment variables.
838 # Should we respect that, or another way to select? set -o
839 # count-bytes?
840
841 # https://stackoverflow.com/questions/17368067/length-of-string-in-bash
842 try:
843 count = string_ops.CountUtf8Chars(val.s)
844 except error.Strict as e:
845 # Add this here so we don't have to add it so far down the stack.
846 # TODO: It's better to show BOTH this CODE an the actual DATA
847 # somehow.
848 e.location = token
849
850 if self.exec_opts.strict_word_eval():
851 raise
852 else:
853 # NOTE: Doesn't make the command exit with 1; it just returns a
854 # length of -1.
855 self.errfmt.PrettyPrintError(e, prefix='warning: ')
856 return -1
857
858 elif case(value_e.BashArray):
859 val = cast(value.BashArray, UP_val)
860 count = bash_impl.BashArray_Count(val)
861
862 elif case(value_e.BashAssoc):
863 val = cast(value.BashAssoc, UP_val)
864 count = bash_impl.BashAssoc_Count(val)
865
866 elif case(value_e.SparseArray):
867 val = cast(value.SparseArray, UP_val)
868 count = bash_impl.SparseArray_Count(val)
869
870 else:
871 raise error.TypeErr(
872 val, "Length op expected Str, BashArray, BashAssoc", token)
873
874 return count
875
876 def _Keys(self, val, token):
877 # type: (value_t, Token) -> value_t
878 """Return keys of a container, for ${!array[@]}"""
879
880 UP_val = val
881 with tagswitch(val) as case:
882 if case(value_e.BashArray):
883 val = cast(value.BashArray, UP_val)
884 indices = [str(i) for i in bash_impl.BashArray_GetKeys(val)]
885 return value.BashArray(indices)
886
887 elif case(value_e.BashAssoc):
888 val = cast(value.BashAssoc, UP_val)
889 assert val.d is not None # for MyPy, so it's not Optional[]
890
891 # BUG: Keys aren't ordered according to insertion!
892 keys = bash_impl.BashAssoc_GetKeys(val)
893 return value.BashArray(keys)
894
895 else:
896 raise error.TypeErr(val, 'Keys op expected Str', token)
897
898 def _EvalVarRef(self, val, blame_tok, quoted, vsub_state, vtest_place):
899 # type: (value_t, Token, bool, VarSubState, VTestPlace) -> value_t
900 """Handles indirect expansion like ${!var} and ${!a[0]}.
901
902 Args:
903 blame_tok: 'foo' for ${!foo}
904 """
905 UP_val = val
906 with tagswitch(val) as case:
907 if case(value_e.Undef):
908 # bash-4.4 returned value.Undef here. bash-5.0 started to treat
909 # the variable name to be empty so that the indirection fails.
910 var_ref_str = ''
911
912 elif case(value_e.Str):
913 val = cast(value.Str, UP_val)
914 var_ref_str = val.s
915
916 elif case(value_e.BashArray): # caught earlier but OK
917 val = cast(value.BashArray, UP_val)
918 # When there are more than one element in the array, this
919 # produces a wrong variable name containing spaces.
920 var_ref_str = ' '.join(bash_impl.BashArray_GetValues(val))
921
922 elif case(value_e.BashAssoc): # caught earlier but OK
923 val = cast(value.BashAssoc, UP_val)
924 var_ref_str = ' '.join(bash_impl.BashAssoc_GetValues(val))
925
926 else:
927 raise error.TypeErr(val, 'Var Ref op expected Str', blame_tok)
928
929 try:
930 bvs_part = self.unsafe_arith.ParseVarRef(var_ref_str, blame_tok)
931 except error.FatalRuntime as e:
932 raise error.VarSubFailure(e.msg, e.location)
933
934 return self._VarRefValue(bvs_part, quoted, vsub_state, vtest_place)
935
936 def _ApplyUnarySuffixOp(self, val, op):
937 # type: (value_t, suffix_op.Unary) -> value_t
938 assert val.tag() != value_e.Undef
939
940 op_kind = consts.GetKind(op.op.id)
941
942 if op_kind == Kind.VOp1:
943 # NOTE: glob syntax is supported in ^ ^^ , ,, ! As well as % %% # ##.
944 # Detect has_extglob so that DoUnarySuffixOp doesn't use the fast
945 # shortcut for constant strings.
946 arg_val, has_extglob = self.EvalWordToPattern(op.arg_word)
947 assert arg_val.tag() == value_e.Str
948
949 UP_val = val
950 with tagswitch(val) as case:
951 if case(value_e.Str):
952 val = cast(value.Str, UP_val)
953 s = string_ops.DoUnarySuffixOp(val.s, op.op, arg_val.s,
954 has_extglob)
955 #log('%r %r -> %r', val.s, arg_val.s, s)
956 new_val = value.Str(s) # type: value_t
957
958 elif case(value_e.BashArray, value_e.BashAssoc):
959 # get values
960 if val.tag() == value_e.BashArray:
961 val = cast(value.BashArray, UP_val)
962 values = bash_impl.BashArray_GetValues(val)
963 elif val.tag() == value_e.BashAssoc:
964 val = cast(value.BashAssoc, UP_val)
965 values = bash_impl.BashAssoc_GetValues(val)
966 else:
967 raise AssertionError()
968
969 # ${a[@]#prefix} is VECTORIZED on arrays. YSH should have this too.
970 strs = [
971 string_ops.DoUnarySuffixOp(s, op.op, arg_val.s,
972 has_extglob) for s in values
973 ]
974 new_val = value.BashArray(strs)
975
976 else:
977 raise error.TypeErr(
978 val, 'Unary op expected Str, BashArray, BashAssoc',
979 op.op)
980
981 else:
982 raise AssertionError(Kind_str(op_kind))
983
984 return new_val
985
986 def _PatSub(self, val, op):
987 # type: (value_t, suffix_op.PatSub) -> value_t
988
989 pat_val, has_extglob = self.EvalWordToPattern(op.pat)
990 # Extended globs aren't supported because we only translate * ? etc. to
991 # ERE. I don't think there's a straightforward translation from !(*.py) to
992 # ERE! You would need an engine that supports negation? (Derivatives?)
993 if has_extglob:
994 e_die('extended globs not supported in ${x//GLOB/}', op.pat)
995
996 if op.replace:
997 replace_val = self.EvalRhsWord(op.replace)
998 # Can't have an array, so must be a string
999 assert replace_val.tag() == value_e.Str, replace_val
1000 replace_str = cast(value.Str, replace_val).s
1001 else:
1002 replace_str = ''
1003
1004 # note: doesn't support self.exec_opts.extglob()!
1005 regex, warnings = glob_.GlobToERE(pat_val.s)
1006 if len(warnings):
1007 # TODO:
1008 # - Add 'shopt -s strict_glob' mode and expose warnings.
1009 # "Glob is not in CANONICAL FORM".
1010 # - Propagate location info back to the 'op.pat' word.
1011 pass
1012 #log('regex %r', regex)
1013 replacer = string_ops.GlobReplacer(regex, replace_str, op.slash_tok)
1014
1015 with tagswitch(val) as case2:
1016 if case2(value_e.Str):
1017 str_val = cast(value.Str, val)
1018 s = replacer.Replace(str_val.s, op)
1019 val = value.Str(s)
1020
1021 elif case2(value_e.BashArray, value_e.BashAssoc):
1022 if val.tag() == value_e.BashArray:
1023 array_val = cast(value.BashArray, val)
1024 values = bash_impl.BashArray_GetValues(array_val)
1025 elif val.tag() == value_e.BashAssoc:
1026 assoc_val = cast(value.BashAssoc, val)
1027 values = bash_impl.BashAssoc_GetValues(assoc_val)
1028 else:
1029 raise AssertionError()
1030 strs = [replacer.Replace(s, op) for s in values]
1031 val = value.BashArray(strs)
1032
1033 else:
1034 raise error.TypeErr(
1035 val, 'Pat Sub op expected Str, BashArray, BashAssoc',
1036 op.slash_tok)
1037
1038 return val
1039
1040 def _Slice(self, val, op, var_name, part):
1041 # type: (value_t, suffix_op.Slice, Optional[str], BracedVarSub) -> value_t
1042
1043 begin = self.arith_ev.EvalToBigInt(op.begin)
1044
1045 # Note: bash allows lengths to be negative (with odd semantics), but
1046 # we don't allow that right now.
1047 has_length = False
1048 length = -1
1049 if op.length:
1050 has_length = True
1051 length = self.arith_ev.EvalToInt(op.length)
1052
1053 try:
1054 arg0_val = None # type: value.Str
1055 if var_name is None: # $* or $@
1056 arg0_val = self.mem.GetArg0()
1057 val = _PerformSlice(val, begin, length, has_length, part, arg0_val)
1058 except error.Strict as e:
1059 if self.exec_opts.strict_word_eval():
1060 raise
1061 else:
1062 self.errfmt.PrettyPrintError(e, prefix='warning: ')
1063 with tagswitch(val) as case2:
1064 if case2(value_e.Str):
1065 val = value.Str('')
1066 elif case2(value_e.BashArray):
1067 val = value.BashArray([])
1068 else:
1069 raise NotImplementedError()
1070 return val
1071
1072 def _Nullary(self, val, op, var_name, vsub_token, vsub_state):
1073 # type: (value_t, Token, Optional[str], Token, VarSubState) -> Tuple[value_t, bool]
1074
1075 quoted2 = False
1076 op_id = op.id
1077 if op_id == Id.VOp0_P:
1078 val = self._ProcessUndef(val, vsub_token, vsub_state)
1079 UP_val = val
1080 with tagswitch(val) as case:
1081 if case(value_e.Undef):
1082 result = value.Str('') # type: value_t
1083 elif case(value_e.Str):
1084 str_val = cast(value.Str, UP_val)
1085 prompt = self.prompt_ev.EvalPrompt(str_val.s)
1086 # readline gets rid of these, so we should too.
1087 p = prompt.replace('\x01', '').replace('\x02', '')
1088 result = value.Str(p)
1089 elif case(value_e.BashArray, value_e.BashAssoc):
1090 if val.tag() == value_e.BashArray:
1091 val = cast(value.BashArray, UP_val)
1092 values = [
1093 s for s in bash_impl.BashArray_GetValues(val)
1094 if s is not None
1095 ]
1096 elif val.tag() == value_e.BashAssoc:
1097 val = cast(value.BashAssoc, UP_val)
1098 values = bash_impl.BashAssoc_GetValues(val)
1099 else:
1100 raise AssertionError()
1101
1102 tmp = [
1103 self.prompt_ev.EvalPrompt(s).replace(
1104 '\x01', '').replace('\x02', '') for s in values
1105 ]
1106 result = value.BashArray(tmp)
1107 else:
1108 e_die("Can't use @P on %s" % ui.ValType(val), op)
1109
1110 elif op_id == Id.VOp0_Q:
1111 UP_val = val
1112 with tagswitch(val) as case:
1113 if case(value_e.Undef):
1114 # We need to issue an error when "-o nounset" is enabled.
1115 # Although we do not need to check val for value_e.Undef,
1116 # we call _ProcessUndef for consistency in the error
1117 # message.
1118 self._ProcessUndef(val, vsub_token, vsub_state)
1119
1120 # For unset variables, we do not generate any quoted words.
1121 if vsub_state.array_ref is not None:
1122 result = value.BashArray([])
1123 else:
1124 result = value.Str('')
1125
1126 elif case(value_e.Str):
1127 str_val = cast(value.Str, UP_val)
1128 result = value.Str(j8_lite.MaybeShellEncode(str_val.s))
1129 # oddly, 'echo ${x@Q}' is equivalent to 'echo "${x@Q}"' in
1130 # bash
1131 quoted2 = True
1132 elif case(value_e.BashArray, value_e.BashAssoc):
1133 if val.tag() == value_e.BashArray:
1134 val = cast(value.BashArray, UP_val)
1135 values = [s for s in bash_impl.BashArray_GetValues(val) if s is not None]
1136 elif val.tag() == value_e.BashAssoc:
1137 val = cast(value.BashAssoc, UP_val)
1138 values = bash_impl.BashAssoc_GetValues(val)
1139 else:
1140 raise AssertionError()
1141
1142 tmp = [
1143 # TODO: should use fastfunc.ShellEncode
1144 j8_lite.MaybeShellEncode(s) for s in values
1145 ]
1146 result = value.BashArray(tmp)
1147 else:
1148 e_die("Can't use @Q on %s" % ui.ValType(val), op)
1149
1150 elif op_id == Id.VOp0_a:
1151 val = self._ProcessUndef(val, vsub_token, vsub_state)
1152 UP_val = val
1153 # We're ONLY simluating -a and -A, not -r -x -n for now. See
1154 # spec/ble-idioms.test.sh.
1155 chars = [] # type: List[str]
1156 with tagswitch(vsub_state.h_value) as case:
1157 if case(value_e.BashArray):
1158 chars.append('a')
1159 elif case(value_e.BashAssoc):
1160 chars.append('A')
1161
1162 if var_name is not None: # e.g. ${?@a} is allowed
1163 cell = self.mem.GetCell(var_name)
1164 if cell:
1165 if cell.readonly:
1166 chars.append('r')
1167 if cell.exported:
1168 chars.append('x')
1169 if cell.nameref:
1170 chars.append('n')
1171
1172 count = 1
1173 with tagswitch(val) as case:
1174 if case(value_e.Undef):
1175 count = 0
1176 elif case(value_e.BashArray):
1177 val = cast(value.BashArray, UP_val)
1178 count = bash_impl.BashArray_Count(val)
1179 elif case(value_e.BashAssoc):
1180 val = cast(value.BashAssoc, UP_val)
1181 count = bash_impl.BashAssoc_Count(val)
1182
1183 result = value.BashArray([''.join(chars)] * count)
1184
1185 else:
1186 e_die('Var op %r not implemented' % lexer.TokenVal(op), op)
1187
1188 return result, quoted2
1189
1190 def _WholeArray(self, val, part, quoted, vsub_state):
1191 # type: (value_t, BracedVarSub, bool, VarSubState) -> value_t
1192 op_id = cast(bracket_op.WholeArray, part.bracket_op).op_id
1193
1194 if op_id == Id.Lit_At:
1195 op_str = '@'
1196 vsub_state.join_array = not quoted # ${a[@]} decays but "${a[@]}" doesn't
1197 elif op_id == Id.Arith_Star:
1198 op_str = '*'
1199 vsub_state.join_array = True # both ${a[*]} and "${a[*]}" decay
1200 else:
1201 raise AssertionError(op_id) # unknown
1202
1203 with tagswitch(val) as case2:
1204 if case2(value_e.Undef):
1205 # For an undefined array, we save the token of the array
1206 # reference for the later error message.
1207 vsub_state.array_ref = part.name_tok
1208 elif case2(value_e.Str):
1209 if self.exec_opts.strict_array():
1210 e_die("Can't index string with %s" % op_str,
1211 loc.WordPart(part))
1212 elif case2(value_e.BashArray, value_e.SparseArray,
1213 value_e.BashAssoc):
1214 pass # no-op
1215 else:
1216 # The other YSH types such as List, Dict, and Float are not
1217 # supported. Error messages will be printed later, so we here
1218 # return the unsupported objects without modification.
1219 pass # no-op
1220
1221 return val
1222
1223 def _ArrayIndex(self, val, part, vtest_place):
1224 # type: (value_t, BracedVarSub, VTestPlace) -> value_t
1225 """Process a numeric array index like ${a[i+1]}"""
1226 anode = cast(bracket_op.ArrayIndex, part.bracket_op).expr
1227
1228 UP_val = val
1229 with tagswitch(val) as case2:
1230 if case2(value_e.Undef):
1231 pass # it will be checked later
1232
1233 elif case2(value_e.Str):
1234 # Bash treats any string as an array, so we can't add our own
1235 # behavior here without making valid OSH invalid bash.
1236 e_die("Can't index string %r with integer" % part.var_name,
1237 part.name_tok)
1238
1239 elif case2(value_e.BashArray):
1240 array_val = cast(value.BashArray, UP_val)
1241 index = self.arith_ev.EvalToInt(anode)
1242 vtest_place.index = a_index.Int(index)
1243
1244 s, error_code = bash_impl.BashArray_GetElement(
1245 array_val, index)
1246 if error_code == error_code_e.IndexOutOfRange:
1247 # Note: Bash outputs warning but does not make it a real
1248 # error. We follow the Bash behavior here.
1249 self.errfmt.Print_(
1250 "Index %d out of bounds for array of length %d" %
1251 (index, bash_impl.BashArray_Length(array_val)),
1252 blame_loc=part.name_tok)
1253
1254 if s is None:
1255 val = value.Undef
1256 else:
1257 val = value.Str(s)
1258
1259 elif case2(value_e.SparseArray):
1260 sparse_val = cast(value.SparseArray, UP_val)
1261 big_index = self.arith_ev.EvalToBigInt(anode)
1262 vtest_place.index = a_index.Int(mops.BigTruncate(big_index))
1263
1264 s, error_code = bash_impl.SparseArray_GetElement(
1265 sparse_val, big_index)
1266 if error_code == error_code_e.IndexOutOfRange:
1267 # Note: Bash outputs warning but does not make it a real
1268 # error. We follow the Bash behavior here.
1269 big_length = bash_impl.SparseArray_Length(sparse_val)
1270 self.errfmt.Print_(
1271 "Index %s out of bounds for array of length %s" %
1272 (mops.ToStr(big_index), mops.ToStr(big_length)),
1273 blame_loc=part.name_tok)
1274
1275 if s is None:
1276 val = value.Undef
1277 else:
1278 val = value.Str(s)
1279
1280 elif case2(value_e.BashAssoc):
1281 assoc_val = cast(value.BashAssoc, UP_val)
1282 # Location could also be attached to bracket_op? But
1283 # arith_expr.VarSub works OK too
1284 key = self.arith_ev.EvalWordToString(
1285 anode, blame_loc=location.TokenForArith(anode))
1286
1287 vtest_place.index = a_index.Str(key) # out param
1288 s = bash_impl.BashAssoc_GetElement(assoc_val, key)
1289
1290 if s is None:
1291 val = value.Undef
1292 else:
1293 val = value.Str(s)
1294
1295 else:
1296 raise error.TypeErr(val,
1297 'Index op expected BashArray, BashAssoc',
1298 loc.WordPart(part))
1299
1300 return val
1301
1302 def _EvalDoubleQuoted(self, parts, part_vals):
1303 # type: (List[word_part_t], List[part_value_t]) -> None
1304 """Evaluate parts of a DoubleQuoted part.
1305
1306 Args:
1307 part_vals: output param to append to.
1308 """
1309 # Example of returning array:
1310 # $ a=(1 2); b=(3); $ c=(4 5)
1311 # $ argv "${a[@]}${b[@]}${c[@]}"
1312 # ['1', '234', '5']
1313 #
1314 # Example of multiple parts
1315 # $ argv "${a[@]}${undef[@]:-${c[@]}}"
1316 # ['1', '24', '5']
1317
1318 # Special case for "". The parser outputs (DoubleQuoted []), instead
1319 # of (DoubleQuoted [Literal '']). This is better but it means we
1320 # have to check for it.
1321 if len(parts) == 0:
1322 v = Piece('', True, False)
1323 part_vals.append(v)
1324 return
1325
1326 for p in parts:
1327 self._EvalWordPart(p, part_vals, QUOTED)
1328
1329 def EvalDoubleQuotedToString(self, dq_part):
1330 # type: (DoubleQuoted) -> str
1331 """For double quoted strings in YSH expressions.
1332
1333 Example: var x = "$foo-${foo}"
1334 """
1335 part_vals = [] # type: List[part_value_t]
1336 self._EvalDoubleQuoted(dq_part.parts, part_vals)
1337 return self._ConcatPartVals(part_vals, dq_part.left)
1338
1339 def _DecayArray(self, val):
1340 # type: (value.BashArray) -> value.Str
1341 """Decay $* to a string."""
1342 assert val.tag() == value_e.BashArray, val
1343 sep = self.splitter.GetJoinChar()
1344 tmp = [s for s in bash_impl.BashArray_GetValues(val) if s is not None]
1345 return value.Str(sep.join(tmp))
1346
1347 def _ProcessUndef(self, val, name_tok, vsub_state):
1348 # type: (value_t, Token, VarSubState) -> value_t
1349 assert name_tok is not None
1350
1351 if val.tag() != value_e.Undef:
1352 return val
1353
1354 if vsub_state.array_ref is not None:
1355 array_tok = vsub_state.array_ref
1356 if self.exec_opts.nounset():
1357 e_die('Undefined array %r' % lexer.TokenVal(array_tok),
1358 array_tok)
1359 else:
1360 return value.BashArray([])
1361 else:
1362 if self.exec_opts.nounset():
1363 tok_str = lexer.TokenVal(name_tok)
1364 name = tok_str[1:] if tok_str.startswith('$') else tok_str
1365 e_die('Undefined variable %r' % name, name_tok)
1366 else:
1367 return value.Str('')
1368
1369 def _EvalBracketOp(self, val, part, quoted, vsub_state, vtest_place):
1370 # type: (value_t, BracedVarSub, bool, VarSubState, VTestPlace) -> value_t
1371
1372 if part.bracket_op:
1373 with tagswitch(part.bracket_op) as case:
1374 if case(bracket_op_e.WholeArray):
1375 val = self._WholeArray(val, part, quoted, vsub_state)
1376
1377 elif case(bracket_op_e.ArrayIndex):
1378 val = self._ArrayIndex(val, part, vtest_place)
1379
1380 else:
1381 raise AssertionError(part.bracket_op.tag())
1382
1383 else: # no bracket op
1384 var_name = vtest_place.name
1385 if (var_name is not None and
1386 val.tag() in (value_e.BashArray, value_e.BashAssoc)):
1387 if ShouldArrayDecay(var_name, self.exec_opts,
1388 not (part.prefix_op or part.suffix_op)):
1389 # for ${BASH_SOURCE}, etc.
1390 val = DecayArray(val)
1391 else:
1392 e_die(
1393 "Array %r can't be referred to as a scalar (without @ or *)"
1394 % var_name, loc.WordPart(part))
1395
1396 return val
1397
1398 def _VarRefValue(self, part, quoted, vsub_state, vtest_place):
1399 # type: (BracedVarSub, bool, VarSubState, VTestPlace) -> value_t
1400 """Duplicates some logic from _EvalBracedVarSub, but returns a
1401 value_t."""
1402
1403 # 1. Evaluate from (var_name, var_num, token Id) -> value
1404 if part.name_tok.id == Id.VSub_Name:
1405 vtest_place.name = part.var_name
1406 val = self.mem.GetValue(part.var_name)
1407
1408 elif part.name_tok.id == Id.VSub_Number:
1409 var_num = int(part.var_name)
1410 val = self._EvalVarNum(var_num)
1411
1412 else:
1413 # $* decays
1414 val = self._EvalSpecialVar(part.name_tok.id, quoted, vsub_state)
1415
1416 # update h-value (i.e., the holder of the current value)
1417 vsub_state.h_value = val
1418
1419 # We don't need var_index because it's only for L-Values of test ops?
1420 if self.exec_opts.eval_unsafe_arith():
1421 val = self._EvalBracketOp(val, part, quoted, vsub_state,
1422 vtest_place)
1423 else:
1424 with state.ctx_Option(self.mutable_opts,
1425 [option_i._allow_command_sub], False):
1426 val = self._EvalBracketOp(val, part, quoted, vsub_state,
1427 vtest_place)
1428
1429 return val
1430
1431 def _EvalBracedVarSub(self, part, part_vals, quoted):
1432 # type: (BracedVarSub, List[part_value_t], bool) -> None
1433 """
1434 Args:
1435 part_vals: output param to append to.
1436 """
1437 # We have different operators that interact in a non-obvious order.
1438 #
1439 # 1. bracket_op: value -> value, with side effect on vsub_state
1440 #
1441 # 2. prefix_op
1442 # a. length ${#x}: value -> value
1443 # b. var ref ${!ref}: can expand to an array
1444 #
1445 # 3. suffix_op:
1446 # a. no operator: you have a value
1447 # b. Test: value -> part_value[]
1448 # c. Other Suffix: value -> value
1449 #
1450 # 4. Process vsub_state.join_array here before returning.
1451 #
1452 # These cases are hard to distinguish:
1453 # - ${!prefix@} prefix query
1454 # - ${!array[@]} keys
1455 # - ${!ref} named reference
1456 # - ${!ref[0]} named reference
1457 #
1458 # I think we need several stages:
1459 #
1460 # 1. value: name, number, special, prefix query
1461 # 2. bracket_op
1462 # 3. prefix length -- this is TERMINAL
1463 # 4. indirection? Only for some of the ! cases
1464 # 5. string transformation suffix ops like ##
1465 # 6. test op
1466 # 7. vsub_state.join_array
1467
1468 # vsub_state.join_array is for joining "${a[*]}" and unquoted ${a[@]} AFTER
1469 # suffix ops are applied. If we take the length with a prefix op, the
1470 # distinction is ignored.
1471
1472 var_name = None # type: Optional[str] # used throughout the function
1473 vtest_place = VTestPlace(var_name, None) # For ${foo=default}
1474 vsub_state = VarSubState.CreateNull() # for $*, ${a[*]}, etc.
1475
1476 # 1. Evaluate from (var_name, var_num, token Id) -> value
1477 if part.name_tok.id == Id.VSub_Name:
1478 # Handle ${!prefix@} first, since that looks at names and not values
1479 # Do NOT handle ${!A[@]@a} here!
1480 if (part.prefix_op is not None and part.bracket_op is None and
1481 part.suffix_op is not None and
1482 part.suffix_op.tag() == suffix_op_e.Nullary):
1483 nullary_op = cast(Token, part.suffix_op)
1484 # ${!x@} but not ${!x@P}
1485 if consts.GetKind(nullary_op.id) == Kind.VOp3:
1486 names = self.mem.VarNamesStartingWith(part.var_name)
1487 names.sort()
1488
1489 if quoted and nullary_op.id == Id.VOp3_At:
1490 part_vals.append(part_value.Array(names))
1491 else:
1492 sep = self.splitter.GetJoinChar()
1493 part_vals.append(Piece(sep.join(names), quoted, True))
1494 return # EARLY RETURN
1495
1496 var_name = part.var_name
1497 vtest_place.name = var_name # for _ApplyTestOp
1498
1499 val = self.mem.GetValue(var_name)
1500
1501 elif part.name_tok.id == Id.VSub_Number:
1502 var_num = int(part.var_name)
1503 val = self._EvalVarNum(var_num)
1504 else:
1505 # $* decays
1506 val = self._EvalSpecialVar(part.name_tok.id, quoted, vsub_state)
1507
1508 suffix_op_ = part.suffix_op
1509 if suffix_op_:
1510 UP_op = suffix_op_
1511 vsub_state.h_value = val
1512
1513 # 2. Bracket Op
1514 val = self._EvalBracketOp(val, part, quoted, vsub_state, vtest_place)
1515
1516 if part.prefix_op:
1517 if part.prefix_op.id == Id.VSub_Pound: # ${#var} for length
1518 # undef -> '' BEFORE length
1519 val = self._ProcessUndef(val, part.name_tok, vsub_state)
1520
1521 n = self._Count(val, part.name_tok)
1522 part_vals.append(Piece(str(n), quoted, False))
1523 return # EARLY EXIT: nothing else can come after length
1524
1525 elif part.prefix_op.id == Id.VSub_Bang:
1526 if (part.bracket_op and
1527 part.bracket_op.tag() == bracket_op_e.WholeArray and
1528 not suffix_op_):
1529 # undef -> empty array
1530 val = self._ProcessUndef(val, part.name_tok, vsub_state)
1531
1532 # ${!array[@]} to get indices/keys
1533 val = self._Keys(val, part.name_tok)
1534 # already set vsub_State.join_array ABOVE
1535 else:
1536 # Process ${!ref}. SURPRISE: ${!a[0]} is an indirect expansion unlike
1537 # ${!a[@]} !
1538 # ${!ref} can expand into an array if ref='array[@]'
1539
1540 # Clear it now that we have a var ref
1541 vtest_place.name = None
1542 vtest_place.index = None
1543
1544 val = self._EvalVarRef(val, part.name_tok, quoted,
1545 vsub_state, vtest_place)
1546
1547 else:
1548 raise AssertionError(part.prefix_op)
1549
1550 quoted2 = False # another bit for @Q
1551 if suffix_op_:
1552 op = suffix_op_ # could get rid of this alias
1553
1554 with tagswitch(suffix_op_) as case:
1555 if case(suffix_op_e.Nullary):
1556 op = cast(Token, UP_op)
1557 val, quoted2 = self._Nullary(val, op, var_name,
1558 part.name_tok, vsub_state)
1559
1560 elif case(suffix_op_e.Unary):
1561 op = cast(suffix_op.Unary, UP_op)
1562 if consts.GetKind(op.op.id) == Kind.VTest:
1563 # Note: _ProcessUndef (i.e., the conversion of undef ->
1564 # '') is not applied to the VTest operators such as
1565 # ${a:-def}, ${a+set}, etc.
1566 if self._ApplyTestOp(val, op, quoted, part_vals,
1567 vtest_place, part.name_tok,
1568 vsub_state):
1569 # e.g. to evaluate ${undef:-'default'}, we already appended
1570 # what we need
1571 return
1572
1573 else:
1574 # Other suffix: value -> value
1575 val = self._ProcessUndef(val, part.name_tok,
1576 vsub_state)
1577 val = self._ApplyUnarySuffixOp(val, op)
1578
1579 elif case(suffix_op_e.PatSub): # PatSub, vectorized
1580 op = cast(suffix_op.PatSub, UP_op)
1581 val = self._ProcessUndef(val, part.name_tok, vsub_state)
1582 val = self._PatSub(val, op)
1583
1584 elif case(suffix_op_e.Slice):
1585 op = cast(suffix_op.Slice, UP_op)
1586 val = self._ProcessUndef(val, part.name_tok, vsub_state)
1587 val = self._Slice(val, op, var_name, part)
1588
1589 elif case(suffix_op_e.Static):
1590 op = cast(suffix_op.Static, UP_op)
1591 e_die('Not implemented', op.tok)
1592
1593 else:
1594 raise AssertionError()
1595 else:
1596 val = self._ProcessUndef(val, part.name_tok, vsub_state)
1597
1598 # After applying suffixes, process join_array here.
1599 UP_val = val
1600 if val.tag() == value_e.BashArray:
1601 array_val = cast(value.BashArray, UP_val)
1602 if vsub_state.join_array:
1603 val = self._DecayArray(array_val)
1604 else:
1605 val = array_val
1606
1607 # For example, ${a} evaluates to value.Str(), but we want a
1608 # Piece().
1609 part_val = _ValueToPartValue(val, quoted or quoted2, part)
1610 part_vals.append(part_val)
1611
1612 def _ConcatPartVals(self, part_vals, location):
1613 # type: (List[part_value_t], loc_t) -> str
1614
1615 strs = [] # type: List[str]
1616 for part_val in part_vals:
1617 UP_part_val = part_val
1618 with tagswitch(part_val) as case:
1619 if case(part_value_e.String):
1620 part_val = cast(Piece, UP_part_val)
1621 s = part_val.s
1622
1623 elif case(part_value_e.Array):
1624 part_val = cast(part_value.Array, UP_part_val)
1625 if self.exec_opts.strict_array():
1626 # Examples: echo f > "$@"; local foo="$@"
1627 e_die("Illegal array word part (strict_array)",
1628 location)
1629 else:
1630 # It appears to not respect IFS
1631 # TODO: eliminate double join()?
1632 tmp = [s for s in part_val.strs if s is not None]
1633 s = ' '.join(tmp)
1634
1635 else:
1636 raise AssertionError()
1637
1638 strs.append(s)
1639
1640 return ''.join(strs)
1641
1642 def EvalBracedVarSubToString(self, part):
1643 # type: (BracedVarSub) -> str
1644 """For double quoted strings in YSH expressions.
1645
1646 Example: var x = "$foo-${foo}"
1647 """
1648 part_vals = [] # type: List[part_value_t]
1649 self._EvalBracedVarSub(part, part_vals, False)
1650 # blame ${ location
1651 return self._ConcatPartVals(part_vals, part.left)
1652
1653 def _EvalSimpleVarSub(self, part, part_vals, quoted):
1654 # type: (SimpleVarSub, List[part_value_t], bool) -> None
1655
1656 token = part.tok
1657
1658 vsub_state = VarSubState.CreateNull()
1659
1660 # 1. Evaluate from (var_name, var_num, Token) -> defined, value
1661 if token.id == Id.VSub_DollarName:
1662 var_name = lexer.LazyStr(token)
1663 # TODO: Special case for LINENO
1664 val = self.mem.GetValue(var_name)
1665 if val.tag() in (value_e.BashArray, value_e.BashAssoc):
1666 if ShouldArrayDecay(var_name, self.exec_opts):
1667 # for $BASH_SOURCE, etc.
1668 val = DecayArray(val)
1669 else:
1670 e_die(
1671 "Array %r can't be referred to as a scalar (without @ or *)"
1672 % var_name, token)
1673
1674 elif token.id == Id.VSub_Number:
1675 var_num = int(lexer.LazyStr(token))
1676 val = self._EvalVarNum(var_num)
1677
1678 else:
1679 val = self._EvalSpecialVar(token.id, quoted, vsub_state)
1680
1681 #log('SIMPLE %s', part)
1682 val = self._ProcessUndef(val, token, vsub_state)
1683 UP_val = val
1684 if val.tag() == value_e.BashArray:
1685 array_val = cast(value.BashArray, UP_val)
1686 if vsub_state.join_array:
1687 val = self._DecayArray(array_val)
1688 else:
1689 val = array_val
1690
1691 v = _ValueToPartValue(val, quoted, part)
1692 part_vals.append(v)
1693
1694 def EvalSimpleVarSubToString(self, node):
1695 # type: (SimpleVarSub) -> str
1696 """For double quoted strings in YSH expressions.
1697
1698 Example: var x = "$foo-${foo}"
1699 """
1700 part_vals = [] # type: List[part_value_t]
1701 self._EvalSimpleVarSub(node, part_vals, False)
1702 return self._ConcatPartVals(part_vals, node.tok)
1703
1704 def _EvalExtGlob(self, part, part_vals):
1705 # type: (word_part.ExtGlob, List[part_value_t]) -> None
1706 """Evaluate @($x|'foo'|$(hostname)) and flatten it."""
1707 op = part.op
1708 if op.id == Id.ExtGlob_Comma:
1709 op_str = '@('
1710 else:
1711 op_str = lexer.LazyStr(op)
1712 # Do NOT split these.
1713 part_vals.append(Piece(op_str, False, False))
1714
1715 for i, w in enumerate(part.arms):
1716 if i != 0:
1717 part_vals.append(Piece('|', False, False)) # separator
1718 # FLATTEN the tree of extglob "arms".
1719 self._EvalWordToParts(w, part_vals, EXTGLOB_NESTED)
1720 part_vals.append(Piece(')', False, False)) # closing )
1721
1722 def _TranslateExtGlob(self, part_vals, w, glob_parts, fnmatch_parts):
1723 # type: (List[part_value_t], CompoundWord, List[str], List[str]) -> None
1724 """Translate a flattened WORD with an ExtGlob part to string patterns.
1725
1726 We need both glob and fnmatch patterns. _EvalExtGlob does the
1727 flattening.
1728 """
1729 for i, part_val in enumerate(part_vals):
1730 UP_part_val = part_val
1731 with tagswitch(part_val) as case:
1732 if case(part_value_e.String):
1733 part_val = cast(Piece, UP_part_val)
1734 if part_val.quoted and not self.exec_opts.noglob():
1735 s = glob_.GlobEscape(part_val.s)
1736 else:
1737 # e.g. the @( and | in @(foo|bar) aren't quoted
1738 s = part_val.s
1739 glob_parts.append(s)
1740 fnmatch_parts.append(s) # from _EvalExtGlob()
1741
1742 elif case(part_value_e.Array):
1743 # Disallow array
1744 e_die(
1745 "Extended globs and arrays can't appear in the same word",
1746 w)
1747
1748 elif case(part_value_e.ExtGlob):
1749 part_val = cast(part_value.ExtGlob, UP_part_val)
1750 # keep appending fnmatch_parts, but repplace glob_parts with '*'
1751 self._TranslateExtGlob(part_val.part_vals, w, [],
1752 fnmatch_parts)
1753 glob_parts.append('*')
1754
1755 else:
1756 raise AssertionError()
1757
1758 def _EvalWordPart(self, part, part_vals, flags):
1759 # type: (word_part_t, List[part_value_t], int) -> None
1760 """Evaluate a word part, appending to part_vals
1761
1762 Called by _EvalWordToParts, EvalWordToString, and _EvalDoubleQuoted.
1763 """
1764 quoted = bool(flags & QUOTED)
1765 is_subst = bool(flags & IS_SUBST)
1766
1767 UP_part = part
1768 with tagswitch(part) as case:
1769 if case(word_part_e.ShArrayLiteral):
1770 part = cast(ShArrayLiteral, UP_part)
1771 e_die("Unexpected array literal", loc.WordPart(part))
1772 elif case(word_part_e.BashAssocLiteral):
1773 part = cast(word_part.BashAssocLiteral, UP_part)
1774 e_die("Unexpected associative array literal",
1775 loc.WordPart(part))
1776
1777 elif case(word_part_e.Literal):
1778 part = cast(Token, UP_part)
1779 # Split if it's in a substitution.
1780 # That is: echo is not split, but ${foo:-echo} is split
1781 v = Piece(lexer.LazyStr(part), quoted, is_subst)
1782 part_vals.append(v)
1783
1784 elif case(word_part_e.EscapedLiteral):
1785 part = cast(word_part.EscapedLiteral, UP_part)
1786 v = Piece(part.ch, True, False)
1787 part_vals.append(v)
1788
1789 elif case(word_part_e.SingleQuoted):
1790 part = cast(SingleQuoted, UP_part)
1791 v = Piece(part.sval, True, False)
1792 part_vals.append(v)
1793
1794 elif case(word_part_e.DoubleQuoted):
1795 part = cast(DoubleQuoted, UP_part)
1796 self._EvalDoubleQuoted(part.parts, part_vals)
1797
1798 elif case(word_part_e.CommandSub):
1799 part = cast(CommandSub, UP_part)
1800 id_ = part.left_token.id
1801 if id_ in (Id.Left_DollarParen, Id.Left_AtParen,
1802 Id.Left_Backtick):
1803 sv = self._EvalCommandSub(part,
1804 quoted) # type: part_value_t
1805
1806 elif id_ in (Id.Left_ProcSubIn, Id.Left_ProcSubOut):
1807 sv = self._EvalProcessSub(part)
1808
1809 else:
1810 raise AssertionError(id_)
1811
1812 part_vals.append(sv)
1813
1814 elif case(word_part_e.SimpleVarSub):
1815 part = cast(SimpleVarSub, UP_part)
1816 self._EvalSimpleVarSub(part, part_vals, quoted)
1817
1818 elif case(word_part_e.BracedVarSub):
1819 part = cast(BracedVarSub, UP_part)
1820 self._EvalBracedVarSub(part, part_vals, quoted)
1821
1822 elif case(word_part_e.TildeSub):
1823 part = cast(word_part.TildeSub, UP_part)
1824 # We never parse a quoted string into a TildeSub.
1825 assert not quoted
1826 s = self.tilde_ev.Eval(part)
1827 v = Piece(s, True, False) # NOT split even when unquoted!
1828 part_vals.append(v)
1829
1830 elif case(word_part_e.ArithSub):
1831 part = cast(word_part.ArithSub, UP_part)
1832 num = self.arith_ev.EvalToBigInt(part.anode)
1833 v = Piece(mops.ToStr(num), quoted, not quoted)
1834 part_vals.append(v)
1835
1836 elif case(word_part_e.ExtGlob):
1837 part = cast(word_part.ExtGlob, UP_part)
1838 #if not self.exec_opts.extglob():
1839 # die() # disallow at runtime? Don't just decay
1840
1841 # Create a node to hold the flattened tree. The caller decides whether
1842 # to pass it to fnmatch() or replace it with '*' and pass it to glob().
1843 part_vals2 = [] # type: List[part_value_t]
1844 self._EvalExtGlob(part, part_vals2) # flattens tree
1845 part_vals.append(part_value.ExtGlob(part_vals2))
1846
1847 elif case(word_part_e.BashRegexGroup):
1848 part = cast(word_part.BashRegexGroup, UP_part)
1849
1850 part_vals.append(Piece('(', False, False)) # not quoted
1851 if part.child:
1852 self._EvalWordToParts(part.child, part_vals, 0)
1853 part_vals.append(Piece(')', False, False))
1854
1855 elif case(word_part_e.Splice):
1856 part = cast(word_part.Splice, UP_part)
1857 val = self.mem.GetValue(part.var_name)
1858
1859 strs = self.expr_ev.SpliceValue(val, part)
1860 part_vals.append(part_value.Array(strs))
1861
1862 elif case(word_part_e.ExprSub):
1863 part = cast(word_part.ExprSub, UP_part)
1864 part_val = self.expr_ev.EvalExprSub(part)
1865 part_vals.append(part_val)
1866
1867 elif case(word_part_e.ZshVarSub):
1868 part = cast(word_part.ZshVarSub, UP_part)
1869 e_die("ZSH var subs are parsed, but can't be evaluated",
1870 part.left)
1871
1872 else:
1873 raise AssertionError(part.tag())
1874
1875 def _EvalRhsWordToParts(self, w, part_vals, eval_flags=0):
1876 # type: (rhs_word_t, List[part_value_t], int) -> None
1877 quoted = bool(eval_flags & QUOTED)
1878
1879 UP_w = w
1880 with tagswitch(w) as case:
1881 if case(rhs_word_e.Empty):
1882 part_vals.append(Piece('', quoted, not quoted))
1883
1884 elif case(rhs_word_e.Compound):
1885 w = cast(CompoundWord, UP_w)
1886 self._EvalWordToParts(w, part_vals, eval_flags=eval_flags)
1887
1888 else:
1889 raise AssertionError()
1890
1891 def _EvalWordToParts(self, w, part_vals, eval_flags=0):
1892 # type: (CompoundWord, List[part_value_t], int) -> None
1893 """Helper for EvalRhsWord, EvalWordSequence, etc.
1894
1895 Returns:
1896 Appends to part_vals. Note that this is a TREE.
1897 """
1898 # Does the word have an extended glob? This is a special case because
1899 # of the way we use glob() and then fnmatch(..., FNM_EXTMATCH) to
1900 # implement extended globs. It's hard to carry that extra information
1901 # all the way past the word splitting stage.
1902
1903 # OSH semantic limitations: If a word has an extended glob part, then
1904 # 1. It can't have an array
1905 # 2. Word splitting of unquoted words isn't respected
1906
1907 word_part_vals = [] # type: List[part_value_t]
1908 has_extglob = False
1909 for p in w.parts:
1910 if p.tag() == word_part_e.ExtGlob:
1911 has_extglob = True
1912 self._EvalWordPart(p, word_part_vals, eval_flags)
1913
1914 # Caller REQUESTED extglob evaluation, AND we parsed word_part.ExtGlob()
1915 if has_extglob:
1916 if bool(eval_flags & EXTGLOB_FILES):
1917 # Treat the WHOLE word as a pattern. We need to TWO VARIANTS of the
1918 # word because of the way we use libc:
1919 # 1. With '*' for extglob parts
1920 # 2. With _EvalExtGlob() for extglob parts
1921
1922 glob_parts = [] # type: List[str]
1923 fnmatch_parts = [] # type: List[str]
1924 self._TranslateExtGlob(word_part_vals, w, glob_parts,
1925 fnmatch_parts)
1926
1927 #log('word_part_vals %s', word_part_vals)
1928 glob_pat = ''.join(glob_parts)
1929 fnmatch_pat = ''.join(fnmatch_parts)
1930 #log("glob %s fnmatch %s", glob_pat, fnmatch_pat)
1931
1932 results = [] # type: List[str]
1933 n = self.globber.ExpandExtended(glob_pat, fnmatch_pat, results)
1934 if n < 0:
1935 raise error.FailGlob(
1936 'Extended glob %r matched no files' % fnmatch_pat, w)
1937
1938 part_vals.append(part_value.Array(results))
1939 elif bool(eval_flags & EXTGLOB_NESTED):
1940 # We only glob at the TOP level of @(nested|@(pattern))
1941 part_vals.extend(word_part_vals)
1942 else:
1943 # e.g. simple_word_eval, assignment builtin
1944 e_die('Extended glob not allowed in this word', w)
1945 else:
1946 part_vals.extend(word_part_vals)
1947
1948 def _PartValsToString(self, part_vals, w, eval_flags, strs):
1949 # type: (List[part_value_t], CompoundWord, int, List[str]) -> None
1950 """Helper for EvalWordToString, similar to _ConcatPartVals() above.
1951
1952 Note: arg 'w' could just be a span ID
1953 """
1954 for part_val in part_vals:
1955 UP_part_val = part_val
1956 with tagswitch(part_val) as case:
1957 if case(part_value_e.String):
1958 part_val = cast(Piece, UP_part_val)
1959 s = part_val.s
1960 if part_val.quoted:
1961 if eval_flags & QUOTE_FNMATCH:
1962 # [[ foo == */"*".py ]] or case (*.py) or ${x%*.py} or ${x//*.py/}
1963 s = glob_.GlobEscape(s)
1964 elif eval_flags & QUOTE_ERE:
1965 s = glob_.ExtendedRegexEscape(s)
1966 strs.append(s)
1967
1968 elif case(part_value_e.Array):
1969 part_val = cast(part_value.Array, UP_part_val)
1970 if self.exec_opts.strict_array():
1971 # Examples: echo f > "$@"; local foo="$@"
1972
1973 # TODO: This attributes too coarsely, to the word rather than the
1974 # parts. Problem: the word is a TREE of parts, but we only have a
1975 # flat list of part_vals. The only case where we really get arrays
1976 # is "$@", "${a[@]}", "${a[@]//pat/replace}", etc.
1977 e_die(
1978 "This word should yield a string, but it contains an array",
1979 w)
1980
1981 # TODO: Maybe add detail like this.
1982 #e_die('RHS of assignment should only have strings. '
1983 # 'To assign arrays, use b=( "${a[@]}" )')
1984 else:
1985 # It appears to not respect IFS
1986 tmp = [s for s in part_val.strs if s is not None]
1987 s = ' '.join(tmp) # TODO: eliminate double join()?
1988 strs.append(s)
1989
1990 elif case(part_value_e.ExtGlob):
1991 part_val = cast(part_value.ExtGlob, UP_part_val)
1992
1993 # Extended globs are only allowed where we expect them!
1994 if not bool(eval_flags & QUOTE_FNMATCH):
1995 e_die('extended glob not allowed in this word', w)
1996
1997 # recursive call
1998 self._PartValsToString(part_val.part_vals, w, eval_flags,
1999 strs)
2000
2001 else:
2002 raise AssertionError()
2003
2004 def EvalWordToString(self, UP_w, eval_flags=0):
2005 # type: (word_t, int) -> value.Str
2006 """Given a word, return a string.
2007
2008 Flags can contain a quoting algorithm.
2009 """
2010 assert UP_w.tag() == word_e.Compound, UP_w
2011 w = cast(CompoundWord, UP_w)
2012
2013 if eval_flags == 0: # QUOTE_FNMATCH etc. breaks optimization
2014 fast_str = word_.FastStrEval(w)
2015 if fast_str is not None:
2016 return value.Str(fast_str)
2017
2018 # Could we additionally optimize a=$b, if we know $b isn't an array
2019 # etc.?
2020
2021 # Note: these empty lists are hot in fib benchmark
2022
2023 part_vals = [] # type: List[part_value_t]
2024 for p in w.parts:
2025 # this doesn't use eval_flags, which is slightly confusing
2026 self._EvalWordPart(p, part_vals, 0)
2027
2028 strs = [] # type: List[str]
2029 self._PartValsToString(part_vals, w, eval_flags, strs)
2030 return value.Str(''.join(strs))
2031
2032 def EvalWordToPattern(self, UP_w):
2033 # type: (rhs_word_t) -> Tuple[value.Str, bool]
2034 """Like EvalWordToString, but returns whether we got ExtGlob."""
2035 if UP_w.tag() == rhs_word_e.Empty:
2036 return value.Str(''), False
2037
2038 assert UP_w.tag() == rhs_word_e.Compound, UP_w
2039 w = cast(CompoundWord, UP_w)
2040
2041 has_extglob = False
2042 part_vals = [] # type: List[part_value_t]
2043 for p in w.parts:
2044 # this doesn't use eval_flags, which is slightly confusing
2045 self._EvalWordPart(p, part_vals, 0)
2046 if p.tag() == word_part_e.ExtGlob:
2047 has_extglob = True
2048
2049 strs = [] # type: List[str]
2050 self._PartValsToString(part_vals, w, QUOTE_FNMATCH, strs)
2051 return value.Str(''.join(strs)), has_extglob
2052
2053 def EvalForPlugin(self, w):
2054 # type: (CompoundWord) -> value.Str
2055 """Wrapper around EvalWordToString that prevents errors.
2056
2057 Runtime errors like $(( 1 / 0 )) and mutating $? like $(exit 42)
2058 are handled here.
2059
2060 Similar to ExprEvaluator.PluginCall().
2061 """
2062 with state.ctx_Registers(self.mem): # to "sandbox" $? and $PIPESTATUS
2063 try:
2064 val = self.EvalWordToString(w)
2065 except error.FatalRuntime as e:
2066 val = value.Str('<Runtime error: %s>' % e.UserErrorString())
2067
2068 except (IOError, OSError) as e:
2069 val = value.Str('<I/O error: %s>' % pyutil.strerror(e))
2070
2071 except KeyboardInterrupt:
2072 val = value.Str('<Ctrl-C>')
2073
2074 return val
2075
2076 def EvalRhsWord(self, UP_w):
2077 # type: (rhs_word_t) -> value_t
2078 """Used for RHS of assignment.
2079
2080 There is no splitting.
2081 """
2082 if UP_w.tag() == rhs_word_e.Empty:
2083 return value.Str('')
2084
2085 assert UP_w.tag() == word_e.Compound, UP_w
2086 w = cast(CompoundWord, UP_w)
2087
2088 if len(w.parts) == 1:
2089 part0 = w.parts[0]
2090 UP_part0 = part0
2091 tag = part0.tag()
2092 # Special case for a=(1 2). ShArrayLiteral won't appear in words that
2093 # don't look like assignments.
2094 if tag == word_part_e.ShArrayLiteral:
2095 part0 = cast(ShArrayLiteral, UP_part0)
2096 array_words = part0.words
2097 words = braces.BraceExpandWords(array_words)
2098 strs = self.EvalWordSequence(words)
2099 return value.BashArray(strs)
2100
2101 if tag == word_part_e.BashAssocLiteral:
2102 part0 = cast(word_part.BashAssocLiteral, UP_part0)
2103 d = NewDict() # type: Dict[str, str]
2104 for pair in part0.pairs:
2105 k = self.EvalWordToString(pair.key)
2106 v = self.EvalWordToString(pair.value)
2107 d[k.s] = v.s
2108 return value.BashAssoc(d)
2109
2110 # If RHS doesn't look like a=( ... ), then it must be a string.
2111 return self.EvalWordToString(w)
2112
2113 def _EvalWordFrame(self, frame, argv):
2114 # type: (List[Piece], List[str]) -> None
2115 all_empty = True
2116 all_quoted = True
2117 any_quoted = False
2118
2119 #log('--- frame %s', frame)
2120
2121 for piece in frame:
2122 if len(piece.s):
2123 all_empty = False
2124
2125 if piece.quoted:
2126 any_quoted = True
2127 else:
2128 all_quoted = False
2129
2130 # Elision of ${empty}${empty} but not $empty"$empty" or $empty""
2131 if all_empty and not any_quoted:
2132 return
2133
2134 # If every frag is quoted, e.g. "$a$b" or any part in "${a[@]}"x, then
2135 # don't do word splitting or globbing.
2136 if all_quoted:
2137 tmp = [piece.s for piece in frame]
2138 a = ''.join(tmp)
2139 argv.append(a)
2140 return
2141
2142 will_glob = not self.exec_opts.noglob()
2143
2144 if 0:
2145 log('---')
2146 log('FRAME')
2147 for i, piece in enumerate(frame):
2148 log('(%d) %s', i, piece)
2149 log('')
2150
2151 # Array of strings, some of which are BOTH IFS-escaped and GLOB escaped!
2152 frags = [] # type: List[str]
2153 for piece in frame:
2154 if will_glob and piece.quoted:
2155 frag = glob_.GlobEscape(piece.s)
2156 else:
2157 # If we have a literal \, then we turn it into \\\\.
2158 # Splitting takes \\\\ -> \\
2159 # Globbing takes \\ to \ if it doesn't match
2160 frag = _BackslashEscape(piece.s)
2161
2162 if piece.do_split:
2163 frag = _BackslashEscape(frag)
2164 else:
2165 frag = self.splitter.Escape(frag)
2166
2167 frags.append(frag)
2168
2169 if 0:
2170 log('---')
2171 log('FRAGS')
2172 for i, frag in enumerate(frags):
2173 log('(%d) %s', i, frag)
2174 log('')
2175
2176 flat = ''.join(frags)
2177 #log('flat: %r', flat)
2178
2179 args = self.splitter.SplitForWordEval(flat)
2180
2181 # space=' '; argv $space"". We have a quoted part, but we CANNOT elide.
2182 # Add it back and don't bother globbing.
2183 if len(args) == 0 and any_quoted:
2184 argv.append('')
2185 return
2186
2187 #log('split args: %r', args)
2188 for a in args:
2189 if glob_.LooksLikeGlob(a):
2190 n = self.globber.Expand(a, argv)
2191 if n < 0:
2192 # TODO: location info, with span IDs carried through the frame
2193 raise error.FailGlob('Pattern %r matched no files' % a,
2194 loc.Missing)
2195 else:
2196 argv.append(glob_.GlobUnescape(a))
2197
2198 def _EvalWordToArgv(self, w):
2199 # type: (CompoundWord) -> List[str]
2200 """Helper for _EvalAssignBuiltin.
2201
2202 Splitting and globbing are disabled for assignment builtins.
2203
2204 Example: declare -"${a[@]}" b=(1 2)
2205 where a is [x b=a d=a]
2206 """
2207 part_vals = [] # type: List[part_value_t]
2208 self._EvalWordToParts(w, part_vals, 0) # not double quoted
2209 frames = _MakeWordFrames(part_vals)
2210 argv = [] # type: List[str]
2211 for frame in frames:
2212 if len(frame): # empty array gives empty frame!
2213 tmp = [piece.s for piece in frame]
2214 argv.append(''.join(tmp)) # no split or glob
2215 #log('argv: %s', argv)
2216 return argv
2217
2218 def _EvalAssignBuiltin(self, builtin_id, arg0, words, meta_offset):
2219 # type: (builtin_t, str, List[CompoundWord], int) -> cmd_value.Assign
2220 """Handles both static and dynamic assignment, e.g.
2221
2222 x='foo=bar'
2223 local a=(1 2) $x
2224
2225 Grammar:
2226
2227 ('builtin' | 'command')* keyword flag* pair*
2228 flag = [-+].*
2229
2230 There is also command -p, but we haven't implemented it. Maybe just
2231 punt on it.
2232 """
2233 eval_to_pairs = True # except for -f and -F
2234 started_pairs = False
2235
2236 flags = [arg0] # initial flags like -p, and -f -F name1 name2
2237 flag_locs = [words[0]]
2238 assign_args = [] # type: List[AssignArg]
2239
2240 n = len(words)
2241 for i in xrange(meta_offset + 1, n): # skip first word
2242 w = words[i]
2243
2244 if word_.IsVarLike(w):
2245 started_pairs = True # Everything from now on is an assign_pair
2246
2247 if started_pairs:
2248 left_token, close_token, part_offset = word_.DetectShAssignment(
2249 w)
2250 if left_token: # Detected statically
2251 if left_token.id != Id.Lit_VarLike:
2252 # (not guaranteed since started_pairs is set twice)
2253 e_die('LHS array not allowed in assignment builtin', w)
2254
2255 if lexer.IsPlusEquals(left_token):
2256 var_name = lexer.TokenSliceRight(left_token, -2)
2257 append = True
2258 else:
2259 var_name = lexer.TokenSliceRight(left_token, -1)
2260 append = False
2261
2262 if part_offset == len(w.parts):
2263 rhs = rhs_word.Empty # type: rhs_word_t
2264 else:
2265 # tmp is for intersection of C++/MyPy type systems
2266 tmp = CompoundWord(w.parts[part_offset:])
2267 word_.TildeDetectAssign(tmp)
2268 rhs = tmp
2269
2270 with state.ctx_AssignBuiltin(self.mutable_opts):
2271 right = self.EvalRhsWord(rhs)
2272
2273 arg2 = AssignArg(var_name, right, append, w)
2274 assign_args.append(arg2)
2275
2276 else: # e.g. export $dynamic
2277 argv = self._EvalWordToArgv(w)
2278 for arg in argv:
2279 arg2 = _SplitAssignArg(arg, w)
2280 assign_args.append(arg2)
2281
2282 else:
2283 argv = self._EvalWordToArgv(w)
2284 for arg in argv:
2285 if arg.startswith('-') or arg.startswith('+'):
2286 # e.g. declare -r +r
2287 flags.append(arg)
2288 flag_locs.append(w)
2289
2290 # Shortcut that relies on -f and -F always meaning "function" for
2291 # all assignment builtins
2292 if 'f' in arg or 'F' in arg:
2293 eval_to_pairs = False
2294
2295 else: # e.g. export $dynamic
2296 if eval_to_pairs:
2297 arg2 = _SplitAssignArg(arg, w)
2298 assign_args.append(arg2)
2299 started_pairs = True
2300 else:
2301 flags.append(arg)
2302
2303 return cmd_value.Assign(builtin_id, flags, flag_locs, assign_args)
2304
2305 def _DetectAssignBuiltinStr(self, arg0, words, meta_offset):
2306 # type: (str, List[CompoundWord], int) -> Optional[cmd_value.Assign]
2307 builtin_id = consts.LookupAssignBuiltin(arg0)
2308 if builtin_id != consts.NO_INDEX:
2309 return self._EvalAssignBuiltin(builtin_id, arg0, words,
2310 meta_offset)
2311 return None
2312
2313 def _DetectAssignBuiltin(self, val0, words, meta_offset):
2314 # type: (part_value_t, List[CompoundWord], int) -> Optional[cmd_value.Assign]
2315 UP_val0 = val0
2316 if val0.tag() == part_value_e.String:
2317 val0 = cast(Piece, UP_val0)
2318 if not val0.quoted:
2319 return self._DetectAssignBuiltinStr(val0.s, words, meta_offset)
2320 return None
2321
2322 def SimpleEvalWordSequence2(self, words, is_last_cmd, allow_assign):
2323 # type: (List[CompoundWord], bool, bool) -> cmd_value_t
2324 """Simple word evaluation for YSH."""
2325 strs = [] # type: List[str]
2326 locs = [] # type: List[CompoundWord]
2327
2328 meta_offset = 0
2329 for i, w in enumerate(words):
2330 # No globbing in the first arg for command.Simple.
2331 if i == meta_offset and allow_assign:
2332 strs0 = self._EvalWordToArgv(w)
2333 # TODO: Remove this because YSH will disallow assignment
2334 # builtins? (including export?)
2335 if len(strs0) == 1:
2336 cmd_val = self._DetectAssignBuiltinStr(
2337 strs0[0], words, meta_offset)
2338 if cmd_val:
2339 return cmd_val
2340
2341 strs.extend(strs0)
2342 for _ in strs0:
2343 locs.append(w)
2344 continue
2345
2346 if glob_.LooksLikeStaticGlob(w):
2347 val = self.EvalWordToString(w) # respects strict-array
2348 num_appended = self.globber.Expand(val.s, strs)
2349 if num_appended < 0:
2350 raise error.FailGlob('Pattern %r matched no files' % val.s,
2351 w)
2352 for _ in xrange(num_appended):
2353 locs.append(w)
2354 continue
2355
2356 part_vals = [] # type: List[part_value_t]
2357 self._EvalWordToParts(w, part_vals, 0) # not quoted
2358
2359 if 0:
2360 log('')
2361 log('Static: part_vals after _EvalWordToParts:')
2362 for entry in part_vals:
2363 log(' %s', entry)
2364
2365 # Still need to process
2366 frames = _MakeWordFrames(part_vals)
2367
2368 if 0:
2369 log('')
2370 log('Static: frames after _MakeWordFrames:')
2371 for entry in frames:
2372 log(' %s', entry)
2373
2374 # We will still allow x"${a[@]"x, though it's deprecated by @a, which
2375 # disallows such expressions at parse time.
2376 for frame in frames:
2377 if len(frame): # empty array gives empty frame!
2378 tmp = [piece.s for piece in frame]
2379 strs.append(''.join(tmp)) # no split or glob
2380 locs.append(w)
2381
2382 assert len(strs) == len(locs), '%s vs. %d' % (strs, len(locs))
2383 return cmd_value.Argv(strs, locs, is_last_cmd, None, None)
2384
2385 def EvalWordSequence2(self, words, is_last_cmd, allow_assign=False):
2386 # type: (List[CompoundWord], bool, bool) -> cmd_value_t
2387 """Turns a list of Words into a list of strings.
2388
2389 Unlike the EvalWord*() methods, it does globbing.
2390
2391 Args:
2392 allow_assign: True for command.Simple, False for BashArray a=(1 2 3)
2393 """
2394 if self.exec_opts.simple_word_eval():
2395 return self.SimpleEvalWordSequence2(words, is_last_cmd,
2396 allow_assign)
2397
2398 # Parse time:
2399 # 1. brace expansion. TODO: Do at parse time.
2400 # 2. Tilde detection. DONE at parse time. Only if Id.Lit_Tilde is the
2401 # first WordPart.
2402 #
2403 # Run time:
2404 # 3. tilde sub, var sub, command sub, arith sub. These are all
2405 # "concurrent" on WordParts. (optional process sub with <() )
2406 # 4. word splitting. Can turn this off with a shell option? Definitely
2407 # off for oil.
2408 # 5. globbing -- several exec_opts affect this: nullglob, safeglob, etc.
2409
2410 #log('W %s', words)
2411 strs = [] # type: List[str]
2412 locs = [] # type: List[CompoundWord]
2413
2414 # 0 for declare x
2415 # 1 for builtin declare x
2416 # 2 for command builtin declare x
2417 # etc.
2418 meta_offset = 0
2419
2420 n = 0
2421 for i, w in enumerate(words):
2422 fast_str = word_.FastStrEval(w)
2423 if fast_str is not None:
2424 strs.append(fast_str)
2425 locs.append(w)
2426
2427 # e.g. the 'local' in 'local a=b c=d' will be here
2428 if allow_assign and i == meta_offset:
2429 cmd_val = self._DetectAssignBuiltinStr(
2430 fast_str, words, meta_offset)
2431 if cmd_val:
2432 return cmd_val
2433
2434 if i <= meta_offset and _DetectMetaBuiltinStr(fast_str):
2435 meta_offset += 1
2436
2437 # Bug fix: n must be updated on every loop iteration
2438 n = len(strs)
2439 assert len(strs) == len(locs), strs
2440 continue
2441
2442 part_vals = [] # type: List[part_value_t]
2443 self._EvalWordToParts(w, part_vals, EXTGLOB_FILES)
2444
2445 # DYNAMICALLY detect if we're going to run an assignment builtin, and
2446 # change the rest of the evaluation algorithm if so.
2447 #
2448 # We want to allow:
2449 # e=export
2450 # $e foo=bar
2451 #
2452 # But we don't want to evaluate the first word twice in the case of:
2453 # $(some-command) --flag
2454 if len(part_vals) == 1:
2455 if allow_assign and i == meta_offset:
2456 cmd_val = self._DetectAssignBuiltin(
2457 part_vals[0], words, meta_offset)
2458 if cmd_val:
2459 return cmd_val
2460
2461 if i <= meta_offset and _DetectMetaBuiltin(part_vals[0]):
2462 meta_offset += 1
2463
2464 if 0:
2465 log('')
2466 log('part_vals after _EvalWordToParts:')
2467 for entry in part_vals:
2468 log(' %s', entry)
2469
2470 frames = _MakeWordFrames(part_vals)
2471 if 0:
2472 log('')
2473 log('frames after _MakeWordFrames:')
2474 for entry in frames:
2475 log(' %s', entry)
2476
2477 # Do splitting and globbing. Each frame will append zero or more args.
2478 for frame in frames:
2479 self._EvalWordFrame(frame, strs)
2480
2481 # Fill in locations parallel to strs.
2482 n_next = len(strs)
2483 for _ in xrange(n_next - n):
2484 locs.append(w)
2485 n = n_next
2486
2487 # A non-assignment command.
2488 # NOTE: Can't look up builtins here like we did for assignment, because
2489 # functions can override builtins.
2490 assert len(strs) == len(locs), '%s vs. %d' % (strs, len(locs))
2491 return cmd_value.Argv(strs, locs, is_last_cmd, None, None)
2492
2493 def EvalWordSequence(self, words):
2494 # type: (List[CompoundWord]) -> List[str]
2495 """For arrays and for loops.
2496
2497 They don't allow assignment builtins.
2498 """
2499 # is_last_cmd is irrelevant
2500 cmd_val = self.EvalWordSequence2(words, False)
2501 assert cmd_val.tag() == cmd_value_e.Argv
2502 return cast(cmd_value.Argv, cmd_val).argv
2503
2504
2505class NormalWordEvaluator(AbstractWordEvaluator):
2506
2507 def __init__(
2508 self,
2509 mem, # type: state.Mem
2510 exec_opts, # type: optview.Exec
2511 mutable_opts, # type: state.MutableOpts
2512 tilde_ev, # type: TildeEvaluator
2513 splitter, # type: SplitContext
2514 errfmt, # type: ui.ErrorFormatter
2515 ):
2516 # type: (...) -> None
2517 AbstractWordEvaluator.__init__(self, mem, exec_opts, mutable_opts,
2518 tilde_ev, splitter, errfmt)
2519 self.shell_ex = None # type: _Executor
2520
2521 def CheckCircularDeps(self):
2522 # type: () -> None
2523 assert self.arith_ev is not None
2524 # Disabled for pure OSH
2525 #assert self.expr_ev is not None
2526 assert self.shell_ex is not None
2527 assert self.prompt_ev is not None
2528
2529 def _EvalCommandSub(self, cs_part, quoted):
2530 # type: (CommandSub, bool) -> part_value_t
2531 stdout_str = self.shell_ex.RunCommandSub(cs_part)
2532
2533 if cs_part.left_token.id == Id.Left_AtParen:
2534 # YSH splitting algorithm: does not depend on IFS
2535 try:
2536 strs = j8.SplitJ8Lines(stdout_str)
2537 except error.Decode as e:
2538 # status code 4 is special, for encode/decode errors.
2539 raise error.Structured(4, e.Message(), cs_part.left_token)
2540
2541 #strs = self.splitter.SplitForWordEval(stdout_str)
2542 return part_value.Array(strs)
2543 else:
2544 return Piece(stdout_str, quoted, not quoted)
2545
2546 def _EvalProcessSub(self, cs_part):
2547 # type: (CommandSub) -> Piece
2548 dev_path = self.shell_ex.RunProcessSub(cs_part)
2549 # pretend it's quoted; no split or glob
2550 return Piece(dev_path, True, False)
2551
2552
2553_DUMMY = '__NO_COMMAND_SUB__'
2554
2555
2556class CompletionWordEvaluator(AbstractWordEvaluator):
2557 """An evaluator that has no access to an executor.
2558
2559 NOTE: core/completion.py doesn't actually try to use these strings to
2560 complete. If you have something like 'echo $(echo hi)/f<TAB>', it sees the
2561 inner command as the last one, and knows that it is not at the end of the
2562 line.
2563 """
2564
2565 def __init__(
2566 self,
2567 mem, # type: state.Mem
2568 exec_opts, # type: optview.Exec
2569 mutable_opts, # type: state.MutableOpts
2570 tilde_ev, # type: TildeEvaluator
2571 splitter, # type: SplitContext
2572 errfmt, # type: ui.ErrorFormatter
2573 ):
2574 # type: (...) -> None
2575 AbstractWordEvaluator.__init__(self, mem, exec_opts, mutable_opts,
2576 tilde_ev, splitter, errfmt)
2577
2578 def CheckCircularDeps(self):
2579 # type: () -> None
2580 assert self.prompt_ev is not None
2581 assert self.arith_ev is not None
2582 assert self.expr_ev is not None
2583
2584 def _EvalCommandSub(self, cs_part, quoted):
2585 # type: (CommandSub, bool) -> part_value_t
2586 if cs_part.left_token.id == Id.Left_AtParen:
2587 return part_value.Array([_DUMMY])
2588 else:
2589 return Piece(_DUMMY, quoted, not quoted)
2590
2591 def _EvalProcessSub(self, cs_part):
2592 # type: (CommandSub) -> Piece
2593 # pretend it's quoted; no split or glob
2594 return Piece('__NO_PROCESS_SUB__', True, False)
2595
2596
2597# vim: sw=4