OILS / osh / word_eval.py View on Github | oils.pub

2574 lines, 1578 significant
1"""
2word_eval.py - Evaluator for the word language.
3"""
4
5from _devbuild.gen.id_kind_asdl import Id, Kind, Kind_str
6from _devbuild.gen.syntax_asdl import (
7 Token,
8 SimpleVarSub,
9 loc,
10 loc_t,
11 BracedVarSub,
12 CommandSub,
13 bracket_op,
14 bracket_op_e,
15 suffix_op,
16 suffix_op_e,
17 ShArrayLiteral,
18 SingleQuoted,
19 DoubleQuoted,
20 word_e,
21 word_t,
22 CompoundWord,
23 rhs_word,
24 rhs_word_e,
25 rhs_word_t,
26 word_part,
27 word_part_e,
28)
29from _devbuild.gen.runtime_asdl import (
30 part_value,
31 part_value_e,
32 part_value_t,
33 cmd_value,
34 cmd_value_e,
35 cmd_value_t,
36 error_code_e,
37 AssignArg,
38 a_index,
39 a_index_e,
40 VTestPlace,
41 VarSubState,
42 Piece,
43)
44from _devbuild.gen.option_asdl import option_i, builtin_i
45from _devbuild.gen.value_asdl import (
46 value,
47 value_e,
48 value_t,
49 sh_lvalue,
50 sh_lvalue_t,
51)
52from core import bash_impl
53from core import error
54from core import pyos
55from core import pyutil
56from core import state
57from display import ui
58from core import util
59from data_lang import j8
60from data_lang import j8_lite
61from core.error import e_die
62from frontend import consts
63from frontend import lexer
64from frontend import location
65from mycpp import mops
66from mycpp.mylib import log, tagswitch, NewDict
67from osh import braces
68from osh import glob_
69from osh import string_ops
70from osh import word_
71from ysh import expr_eval
72from ysh import val_ops
73
74from typing import Optional, Tuple, List, Dict, cast, TYPE_CHECKING
75
76if TYPE_CHECKING:
77 from _devbuild.gen.syntax_asdl import word_part_t
78 from _devbuild.gen.option_asdl import builtin_t
79 from core import optview
80 from core.state import Mem
81 from core.vm import _Executor
82 from osh.split import SplitContext
83 from osh import prompt
84 from osh import sh_expr_eval
85
86# Flags for _EvalWordToParts and _EvalWordPart (not all are used for both)
87QUOTED = 1 << 0
88IS_SUBST = 1 << 1
89
90EXTGLOB_FILES = 1 << 2 # allow @(cc) from file system?
91EXTGLOB_MATCH = 1 << 3 # allow @(cc) in pattern matching?
92EXTGLOB_NESTED = 1 << 4 # for @(one|!(two|three))
93
94# For EvalWordToString
95QUOTE_FNMATCH = 1 << 5
96QUOTE_ERE = 1 << 6
97
98# For compatibility, ${BASH_SOURCE} and ${BASH_SOURCE[@]} are both valid.
99# Ditto for ${FUNCNAME} and ${BASH_LINENO}.
100_STRING_AND_ARRAY = ['BASH_SOURCE', 'FUNCNAME', 'BASH_LINENO']
101
102
103def ShouldArrayDecay(var_name, exec_opts, is_plain_var_sub=True):
104 # type: (str, optview.Exec, bool) -> bool
105 """Return whether we should allow ${a} to mean ${a[0]}."""
106 return (not exec_opts.strict_array() or
107 is_plain_var_sub and var_name in _STRING_AND_ARRAY)
108
109
110def DecayArray(val):
111 # type: (value_t) -> value_t
112 """Resolve ${array} to ${array[0]}."""
113 if val.tag() == value_e.BashArray:
114 array_val = cast(value.BashArray, val)
115 s, error_code = bash_impl.BashArray_GetElement(array_val, 0)
116
117 # Note: index 0 should never cause the out-of-bound index error.
118 assert error_code == error_code_e.OK
119
120 elif val.tag() == value_e.BashAssoc:
121 assoc_val = cast(value.BashAssoc, val)
122 s = bash_impl.BashAssoc_GetElement(assoc_val, '0')
123 else:
124 raise AssertionError(val.tag())
125
126 if s is None:
127 return value.Undef
128 else:
129 return value.Str(s)
130
131
132def _DetectMetaBuiltinStr(s):
133 # type: (str) -> bool
134 """
135 We need to detect all of these cases:
136
137 builtin local
138 command local
139 builtin builtin local
140 builtin command local
141
142 Fundamentally, assignment builtins have different WORD EVALUATION RULES
143 for a=$x (no word splitting), so it seems hard to do this in
144 meta_oils.Builtin() or meta_oils.Command()
145 """
146 return (consts.LookupNormalBuiltin(s)
147 in (builtin_i.builtin, builtin_i.command))
148
149
150def _DetectMetaBuiltin(val0):
151 # type: (part_value_t) -> bool
152 UP_val0 = val0
153 if val0.tag() == part_value_e.String:
154 val0 = cast(Piece, UP_val0)
155 if not val0.quoted:
156 return _DetectMetaBuiltinStr(val0.s)
157 return False
158
159
160def _SplitAssignArg(arg, blame_word):
161 # type: (str, CompoundWord) -> AssignArg
162 """Dynamically parse argument to declare, export, etc.
163
164 This is a fallback to the static parsing done below.
165 """
166 # Note: it would be better to cache regcomp(), but we don't have an API for
167 # that, and it probably isn't a bottleneck now
168 m = util.RegexSearch(consts.ASSIGN_ARG_RE, arg)
169 if m is None:
170 e_die("Assignment builtin expected NAME=value, got %r" % arg,
171 blame_word)
172
173 var_name = m[1]
174 # m[2] is used for grouping; ERE doesn't have non-capturing groups
175
176 op = m[3]
177 assert op is not None, op
178 if len(op): # declare NAME=
179 val = value.Str(m[4]) # type: Optional[value_t]
180 append = op[0] == '+'
181 else: # declare NAME
182 val = None # no operator
183 append = False
184
185 return AssignArg(var_name, val, append, blame_word)
186
187
188# NOTE: Could be done with util.BackslashEscape like glob_.GlobEscape().
189def _BackslashEscape(s):
190 # type: (str) -> str
191 """Double up backslashes.
192
193 Useful for strings about to be globbed and strings about to be IFS
194 escaped.
195 """
196 return s.replace('\\', '\\\\')
197
198
199def _ValueToPartValue(val, quoted, part_loc):
200 # type: (value_t, bool, word_part_t) -> part_value_t
201 """Helper for VarSub evaluation.
202
203 Called by _EvalBracedVarSub and _EvalWordPart for SimpleVarSub.
204 """
205 UP_val = val
206
207 with tagswitch(val) as case:
208 if case(value_e.Undef):
209 # This happens in the case of ${undef+foo}. We skipped _ProcessUndef,
210 # but we have to append to the empty string.
211 return Piece('', quoted, not quoted)
212
213 elif case(value_e.Str):
214 val = cast(value.Str, UP_val)
215 return Piece(val.s, quoted, not quoted)
216
217 elif case(value_e.BashArray):
218 val = cast(value.BashArray, UP_val)
219 return part_value.Array(bash_impl.BashArray_GetValues(val))
220
221 elif case(value_e.BashAssoc):
222 val = cast(value.BashAssoc, UP_val)
223 # bash behavior: splice values!
224 return part_value.Array(bash_impl.BashAssoc_GetValues(val))
225
226 # Cases added for YSH
227 # value_e.List is also here - we use val_ops.Stringify()s err message
228 elif case(value_e.Null, value_e.Bool, value_e.Int, value_e.Float,
229 value_e.Eggex, value_e.List):
230 s = val_ops.Stringify(val, loc.WordPart(part_loc), 'Word eval ')
231 return Piece(s, quoted, not quoted)
232
233 else:
234 raise error.TypeErr(val, "Can't substitute into word",
235 loc.WordPart(part_loc))
236
237 raise AssertionError('for -Wreturn-type in C++')
238
239
240def _MakeWordFrames(part_vals):
241 # type: (List[part_value_t]) -> List[List[Piece]]
242 """A word evaluates to a flat list of part_value (String or Array). frame
243 is a portion that results in zero or more args. It can never be joined.
244 This idea exists because of arrays like "$@" and "${a[@]}".
245
246 Example:
247
248 a=(1 '2 3' 4)
249 x=x
250 y=y
251
252 # This word
253 $x"${a[@]}"$y
254
255 # Results in Three frames:
256 [ ('x', False, True), ('1', True, False) ]
257 [ ('2 3', True, False) ]
258 [ ('4', True, False), ('y', False, True) ]
259
260 Note: A frame is a 3-tuple that's identical to Piece()? Maybe we
261 should make that top level type.
262
263 TODO:
264 - Instead of List[List[Piece]], where List[Piece] is a Frame
265 - Change this representation to
266 Frames = (List[Piece] pieces, List[int] break_indices)
267 # where break_indices are the end
268
269 Consider a common case like "$x" or "${x}" - I think this a lot more
270 efficient?
271
272 And then change _EvalWordFrame(pieces: List[Piece], start: int, end: int)
273 """
274 current = [] # type: List[Piece]
275 frames = [current]
276
277 for p in part_vals:
278 UP_p = p
279
280 with tagswitch(p) as case:
281 if case(part_value_e.String):
282 p = cast(Piece, UP_p)
283 current.append(p)
284
285 elif case(part_value_e.Array):
286 p = cast(part_value.Array, UP_p)
287
288 is_first = True
289 for s in p.strs:
290 if s is None:
291 continue # ignore undefined array entries
292
293 # Arrays parts are always quoted; otherwise they would have decayed to
294 # a string.
295 piece = Piece(s, True, False)
296 if is_first:
297 current.append(piece)
298 is_first = False
299 else:
300 current = [piece]
301 frames.append(current) # singleton frame
302
303 else:
304 raise AssertionError()
305
306 return frames
307
308
309# TODO: This could be _MakeWordFrames and then sep.join(). It's redundant.
310def _DecayPartValuesToString(part_vals, join_char):
311 # type: (List[part_value_t], str) -> str
312 # Decay ${a=x"$@"x} to string.
313 out = [] # type: List[str]
314 for p in part_vals:
315 UP_p = p
316 with tagswitch(p) as case:
317 if case(part_value_e.String):
318 p = cast(Piece, UP_p)
319 out.append(p.s)
320 elif case(part_value_e.Array):
321 p = cast(part_value.Array, UP_p)
322 # TODO: Eliminate double join for speed?
323 tmp = [s for s in p.strs if s is not None]
324 out.append(join_char.join(tmp))
325 else:
326 raise AssertionError()
327 return ''.join(out)
328
329
330def _PerformSlice(
331 val, # type: value_t
332 offset, # type: mops.BigInt
333 length, # type: int
334 has_length, # type: bool
335 part, # type: BracedVarSub
336 arg0_val, # type: value.Str
337):
338 # type: (...) -> value_t
339 UP_val = val
340 with tagswitch(val) as case:
341 if case(value_e.Str): # Slice UTF-8 characters in a string.
342 val = cast(value.Str, UP_val)
343 s = val.s
344 n = len(s)
345
346 begin = mops.BigTruncate(offset)
347 if begin < 0: # Compute offset with unicode
348 byte_begin = n
349 num_iters = -begin
350 for _ in xrange(num_iters):
351 byte_begin = string_ops.PreviousUtf8Char(s, byte_begin)
352 else:
353 byte_begin = string_ops.AdvanceUtf8Chars(s, begin, 0)
354
355 if has_length:
356 if length < 0: # Compute offset with unicode
357 # Confusing: this is a POSITION
358 byte_end = n
359 num_iters = -length
360 for _ in xrange(num_iters):
361 byte_end = string_ops.PreviousUtf8Char(s, byte_end)
362 else:
363 byte_end = string_ops.AdvanceUtf8Chars(
364 s, length, byte_begin)
365 else:
366 byte_end = len(s)
367
368 substr = s[byte_begin:byte_end]
369 result = value.Str(substr) # type: value_t
370
371 elif case(value_e.BashArray,
372 value_e.SparseArray): # Slice array entries.
373 # NOTE: This error is ALWAYS fatal in bash. It's inconsistent with
374 # strings.
375 if has_length and length < 0:
376 e_die("Array slice can't have negative length: %d" % length,
377 loc.WordPart(part))
378
379 if bash_impl.BigInt_Less(offset, mops.ZERO):
380 # ${@:-3} starts counts from the end
381 if val.tag() == value_e.BashArray:
382 val = cast(value.BashArray, UP_val)
383 array_length = mops.IntWiden(
384 bash_impl.BashArray_Length(val))
385 elif val.tag() == value_e.SparseArray:
386 val = cast(value.SparseArray, UP_val)
387 array_length = bash_impl.SparseArray_Length(val)
388 else:
389 raise AssertionError()
390
391 # The array length counts $0 for $@ and $*
392 if arg0_val is not None:
393 array_length = mops.Add(array_length, mops.ONE)
394
395 offset = mops.Add(offset, array_length)
396
397 if bash_impl.BigInt_Less(offset, mops.ZERO):
398 strs = [] # type: List[str]
399 else:
400 # Quirk: "offset" for positional arguments ($@ and $*) counts $0.
401 prepends_arg0 = False
402 if arg0_val is not None:
403 if bash_impl.BigInt_Greater(offset, mops.ZERO):
404 offset = mops.Sub(offset, mops.ONE)
405 elif not has_length or length >= 1:
406 prepends_arg0 = True
407 length = length - 1
408
409 if has_length and length == 0:
410 strs = []
411
412 elif val.tag() == value_e.BashArray:
413 val = cast(value.BashArray, UP_val)
414 orig = bash_impl.BashArray_GetValues(val)
415 n = len(orig)
416
417 strs = []
418 i = mops.BigTruncate(offset)
419 count = 0
420 while i < n:
421 if has_length and count == length: # length could be 0
422 break
423 s = orig[i]
424 if s is not None: # Unset elements don't count towards the length
425 strs.append(s)
426 count += 1
427 i += 1
428
429 elif val.tag() == value_e.SparseArray:
430 val = cast(value.SparseArray, UP_val)
431
432 # TODO: We may optimize this by finding the first index
433 # using the binary search. Furthermore, the sorting by
434 # SparseArray_GetKeys can be replaced with the heap sort so
435 # that we only extract the first LENGTH elements of the
436 # indices greater or equal to OFFSET.
437 i = 0
438 for index in bash_impl.SparseArray_GetKeys(val):
439 if bash_impl.BigInt_GreaterEq(index, offset):
440 break
441 i = i + 1
442
443 if has_length:
444 strs = bash_impl.SparseArray_GetValues(val)[i:i +
445 length]
446 else:
447 strs = bash_impl.SparseArray_GetValues(val)[i:]
448
449 else:
450 raise AssertionError()
451
452 if prepends_arg0:
453 new_list = [arg0_val.s]
454 new_list.extend(strs)
455 strs = new_list
456
457 result = value.BashArray(strs)
458
459 elif case(value_e.BashAssoc):
460 e_die("Can't slice associative arrays", loc.WordPart(part))
461
462 else:
463 raise error.TypeErr(val, 'Slice op expected Str or BashArray',
464 loc.WordPart(part))
465
466 return result
467
468
469class StringWordEvaluator(object):
470 """Interface used by ArithEvaluator / BoolEvaluator"""
471
472 def __init__(self):
473 # type: () -> None
474 """Empty constructor for mycpp."""
475 pass
476
477 def EvalWordToString(self, w, eval_flags=0):
478 # type: (word_t, int) -> value.Str
479 raise NotImplementedError()
480
481
482def _GetDollarHyphen(exec_opts):
483 # type: (optview.Exec) -> str
484 chars = [] # type: List[str]
485 if exec_opts.interactive():
486 chars.append('i')
487
488 if exec_opts.errexit():
489 chars.append('e')
490 if exec_opts.noglob():
491 chars.append('f')
492 if exec_opts.noexec():
493 chars.append('n')
494 if exec_opts.nounset():
495 chars.append('u')
496 # NO letter for pipefail?
497 if exec_opts.xtrace():
498 chars.append('x')
499 if exec_opts.noclobber():
500 chars.append('C')
501
502 # bash has:
503 # - c for sh -c, i for sh -i (mksh also has this)
504 # - h for hashing (mksh also has this)
505 # - B for brace expansion
506 return ''.join(chars)
507
508
509class TildeEvaluator(object):
510
511 def __init__(self, mem, exec_opts):
512 # type: (Mem, optview.Exec) -> None
513 self.mem = mem
514 self.exec_opts = exec_opts
515
516 def GetMyHomeDir(self):
517 # type: () -> Optional[str]
518 """Consult $HOME first, and then make a libc call.
519
520 Important: the libc call can FAIL, which is why we prefer $HOME. See issue
521 #1578.
522 """
523 # First look up the HOME var, ENV.HOME, ...
524 s = self.mem.env_config.Get('HOME')
525 if s is not None:
526 return s
527
528 # Then ask the OS. This is what bash does.
529 return pyos.GetMyHomeDir()
530
531 def Eval(self, part):
532 # type: (word_part.TildeSub) -> str
533 """Evaluates ~ and ~user, given a Lit_TildeLike token."""
534
535 if part.user_name is None:
536 result = self.GetMyHomeDir()
537 else:
538 result = pyos.GetHomeDir(part.user_name)
539
540 if result is None:
541 if self.exec_opts.strict_tilde():
542 e_die("Error expanding tilde (e.g. invalid user)", part.left)
543 else:
544 # Return ~ or ~user literally
545 result = '~'
546 if part.user_name is not None:
547 result = result + part.user_name # mycpp doesn't have +=
548
549 return result
550
551
552class AbstractWordEvaluator(StringWordEvaluator):
553 """Abstract base class for word evaluators.
554
555 Public entry points:
556 EvalWordToString EvalForPlugin EvalRhsWord
557 EvalWordSequence EvalWordSequence2
558 """
559
560 def __init__(
561 self,
562 mem, # type: state.Mem
563 exec_opts, # type: optview.Exec
564 mutable_opts, # type: state.MutableOpts
565 tilde_ev, # type: TildeEvaluator
566 splitter, # type: SplitContext
567 errfmt, # type: ui.ErrorFormatter
568 ):
569 # type: (...) -> None
570 self.arith_ev = None # type: sh_expr_eval.ArithEvaluator
571 self.expr_ev = None # type: expr_eval.ExprEvaluator
572 self.prompt_ev = None # type: prompt.Evaluator
573
574 self.unsafe_arith = None # type: sh_expr_eval.UnsafeArith
575
576 self.tilde_ev = tilde_ev
577
578 self.mem = mem # for $HOME, $1, etc.
579 self.exec_opts = exec_opts # for nounset
580 self.mutable_opts = mutable_opts # for _allow_command_sub
581 self.splitter = splitter
582 self.errfmt = errfmt
583
584 self.globber = glob_.Globber(exec_opts)
585
586 def CheckCircularDeps(self):
587 # type: () -> None
588 raise NotImplementedError()
589
590 def _EvalCommandSub(self, cs_part, quoted):
591 # type: (CommandSub, bool) -> part_value_t
592 """Abstract since it has a side effect."""
593 raise NotImplementedError()
594
595 def _EvalProcessSub(self, cs_part):
596 # type: (CommandSub) -> part_value_t
597 """Abstract since it has a side effect."""
598 raise NotImplementedError()
599
600 def _EvalVarNum(self, var_num):
601 # type: (int) -> value_t
602 assert var_num >= 0
603 return self.mem.GetArgNum(var_num)
604
605 def _EvalSpecialVar(self, op_id, quoted, vsub_state):
606 # type: (int, bool, VarSubState) -> value_t
607 """Evaluate $?
608
609 and so forth
610 """
611 # $@ is special -- it need to know whether it is in a double quoted
612 # context.
613 #
614 # - If it's $@ in a double quoted context, return an ARRAY.
615 # - If it's $@ in a normal context, return a STRING, which then will be
616 # subject to splitting.
617
618 if op_id in (Id.VSub_At, Id.VSub_Star):
619 argv = self.mem.GetArgv()
620 val = value.BashArray(argv) # type: value_t
621 if op_id == Id.VSub_At:
622 # "$@" evaluates to an array, $@ should be decayed
623 vsub_state.join_array = not quoted
624 else: # $* "$*" are both decayed
625 vsub_state.join_array = True
626
627 elif op_id == Id.VSub_Hyphen:
628 val = value.Str(_GetDollarHyphen(self.exec_opts))
629
630 else:
631 val = self.mem.GetSpecialVar(op_id)
632
633 return val
634
635 def _ApplyTestOp(
636 self,
637 val, # type: value_t
638 op, # type: suffix_op.Unary
639 quoted, # type: bool
640 part_vals, # type: Optional[List[part_value_t]]
641 vtest_place, # type: VTestPlace
642 blame_token, # type: Token
643 ):
644 # type: (...) -> bool
645 """
646 Returns:
647 Whether part_vals was mutated
648
649 ${a:-} returns part_value[]
650 ${a:+} returns part_value[]
651 ${a:?error} returns error word?
652 ${a:=} returns part_value[] but also needs self.mem for side effects.
653
654 So I guess it should return part_value[], and then a flag for raising an
655 error, and then a flag for assigning it?
656 The original BracedVarSub will have the name.
657
658 Example of needing multiple part_value[]
659
660 echo X-${a:-'def'"ault"}-X
661
662 We return two part values from the BracedVarSub. Also consider:
663
664 echo ${a:-x"$@"x}
665 """
666 eval_flags = IS_SUBST
667 if quoted:
668 eval_flags |= QUOTED
669
670 tok = op.op
671 # NOTE: Splicing part_values is necessary because of code like
672 # ${undef:-'a b' c 'd # e'}. Each part_value can have a different
673 # do_glob/do_elide setting.
674 UP_val = val
675 with tagswitch(val) as case:
676 if case(value_e.Undef):
677 is_falsey = True
678
679 elif case(value_e.Str):
680 val = cast(value.Str, UP_val)
681 if tok.id in (Id.VTest_ColonHyphen, Id.VTest_ColonEquals,
682 Id.VTest_ColonQMark, Id.VTest_ColonPlus):
683 is_falsey = len(val.s) == 0
684 else:
685 is_falsey = False
686
687 elif case(value_e.BashArray):
688 val = cast(value.BashArray, UP_val)
689 # TODO: allow undefined
690 is_falsey = len(val.strs) == 0
691
692 elif case(value_e.BashAssoc):
693 val = cast(value.BashAssoc, UP_val)
694 is_falsey = len(val.d) == 0
695
696 else:
697 # value.Eggex, etc. are all false
698 is_falsey = False
699
700 if tok.id in (Id.VTest_ColonHyphen, Id.VTest_Hyphen):
701 if is_falsey:
702 self._EvalRhsWordToParts(op.arg_word, part_vals, eval_flags)
703 return True
704 else:
705 return False
706
707 # Inverse of the above.
708 elif tok.id in (Id.VTest_ColonPlus, Id.VTest_Plus):
709 if is_falsey:
710 return False
711 else:
712 self._EvalRhsWordToParts(op.arg_word, part_vals, eval_flags)
713 return True
714
715 # Splice and assign
716 elif tok.id in (Id.VTest_ColonEquals, Id.VTest_Equals):
717 if is_falsey:
718 # Collect new part vals.
719 assign_part_vals = [] # type: List[part_value_t]
720 self._EvalRhsWordToParts(op.arg_word, assign_part_vals,
721 eval_flags)
722 # Append them to out param AND return them.
723 part_vals.extend(assign_part_vals)
724
725 if vtest_place.name is None:
726 # TODO: error context
727 e_die("Can't assign to special variable")
728 else:
729 # NOTE: This decays arrays too! 'shopt -s strict_array' could
730 # avoid it.
731 rhs_str = _DecayPartValuesToString(
732 assign_part_vals, self.splitter.GetJoinChar())
733 if vtest_place.index is None: # using None when no index
734 lval = location.LName(
735 vtest_place.name) # type: sh_lvalue_t
736 else:
737 var_name = vtest_place.name
738 var_index = vtest_place.index
739 UP_var_index = var_index
740
741 with tagswitch(var_index) as case:
742 if case(a_index_e.Int):
743 var_index = cast(a_index.Int, UP_var_index)
744 lval = sh_lvalue.Indexed(
745 var_name, var_index.i, loc.Missing)
746 elif case(a_index_e.Str):
747 var_index = cast(a_index.Str, UP_var_index)
748 lval = sh_lvalue.Keyed(var_name, var_index.s,
749 loc.Missing)
750 else:
751 raise AssertionError()
752
753 state.OshLanguageSetValue(self.mem, lval,
754 value.Str(rhs_str))
755 return True
756
757 else:
758 return False
759
760 elif tok.id in (Id.VTest_ColonQMark, Id.VTest_QMark):
761 if is_falsey:
762 # The arg is the error message
763 error_part_vals = [] # type: List[part_value_t]
764 self._EvalRhsWordToParts(op.arg_word, error_part_vals,
765 eval_flags)
766 error_str = _DecayPartValuesToString(
767 error_part_vals, self.splitter.GetJoinChar())
768
769 #
770 # Display fancy/helpful error
771 #
772 if vtest_place.name is None:
773 var_name = '???'
774 else:
775 var_name = vtest_place.name
776
777 if 0:
778 # This hint is nice, but looks too noisy for now
779 op_str = lexer.LazyStr(tok)
780 if tok.id == Id.VTest_ColonQMark:
781 why = 'empty or unset'
782 else:
783 why = 'unset'
784
785 self.errfmt.Print_(
786 "Hint: operator %s means a variable can't be %s" %
787 (op_str, why), tok)
788
789 if val.tag() == value_e.Undef:
790 actual = 'unset'
791 else:
792 actual = 'empty'
793
794 if len(error_str):
795 suffix = ': %r' % error_str
796 else:
797 suffix = ''
798 e_die("Var %s is %s%s" % (var_name, actual, suffix),
799 blame_token)
800
801 else:
802 return False
803
804 else:
805 raise AssertionError(tok.id)
806
807 def _Count(self, val, token):
808 # type: (value_t, Token) -> int
809 """Returns the length of the value, for ${#var}"""
810 UP_val = val
811 with tagswitch(val) as case:
812 if case(value_e.Str):
813 val = cast(value.Str, UP_val)
814 # NOTE: Whether bash counts bytes or chars is affected by LANG
815 # environment variables.
816 # Should we respect that, or another way to select? set -o
817 # count-bytes?
818
819 # https://stackoverflow.com/questions/17368067/length-of-string-in-bash
820 try:
821 count = string_ops.CountUtf8Chars(val.s)
822 except error.Strict as e:
823 # Add this here so we don't have to add it so far down the stack.
824 # TODO: It's better to show BOTH this CODE an the actual DATA
825 # somehow.
826 e.location = token
827
828 if self.exec_opts.strict_word_eval():
829 raise
830 else:
831 # NOTE: Doesn't make the command exit with 1; it just returns a
832 # length of -1.
833 self.errfmt.PrettyPrintError(e, prefix='warning: ')
834 return -1
835
836 elif case(value_e.BashArray):
837 val = cast(value.BashArray, UP_val)
838 count = bash_impl.BashArray_Count(val)
839
840 elif case(value_e.BashAssoc):
841 val = cast(value.BashAssoc, UP_val)
842 count = bash_impl.BashAssoc_Count(val)
843
844 elif case(value_e.SparseArray):
845 val = cast(value.SparseArray, UP_val)
846 count = bash_impl.SparseArray_Count(val)
847
848 else:
849 raise error.TypeErr(
850 val, "Length op expected Str, BashArray, BashAssoc", token)
851
852 return count
853
854 def _Keys(self, val, token):
855 # type: (value_t, Token) -> value_t
856 """Return keys of a container, for ${!array[@]}"""
857
858 UP_val = val
859 with tagswitch(val) as case:
860 if case(value_e.BashArray):
861 val = cast(value.BashArray, UP_val)
862 indices = [str(i) for i in bash_impl.BashArray_GetKeys(val)]
863 return value.BashArray(indices)
864
865 elif case(value_e.BashAssoc):
866 val = cast(value.BashAssoc, UP_val)
867 assert val.d is not None # for MyPy, so it's not Optional[]
868
869 # BUG: Keys aren't ordered according to insertion!
870 keys = bash_impl.BashAssoc_GetKeys(val)
871 return value.BashArray(keys)
872
873 else:
874 raise error.TypeErr(val, 'Keys op expected Str', token)
875
876 def _EvalVarRef(self, val, blame_tok, quoted, vsub_state, vtest_place):
877 # type: (value_t, Token, bool, VarSubState, VTestPlace) -> value_t
878 """Handles indirect expansion like ${!var} and ${!a[0]}.
879
880 Args:
881 blame_tok: 'foo' for ${!foo}
882 """
883 UP_val = val
884 with tagswitch(val) as case:
885 if case(value_e.Undef):
886 # bash-4.4 returned value.Undef here. bash-5.0 started to treat
887 # the variable name to be empty so that the indirection fails.
888 var_ref_str = ''
889
890 elif case(value_e.Str):
891 val = cast(value.Str, UP_val)
892 var_ref_str = val.s
893
894 elif case(value_e.BashArray): # caught earlier but OK
895 val = cast(value.BashArray, UP_val)
896 # When there are more than one element in the array, this
897 # produces a wrong variable name containing spaces.
898 var_ref_str = ' '.join(bash_impl.BashArray_GetValues(val))
899
900 elif case(value_e.BashAssoc): # caught earlier but OK
901 val = cast(value.BashAssoc, UP_val)
902 var_ref_str = ' '.join(bash_impl.BashAssoc_GetValues(val))
903
904 else:
905 raise error.TypeErr(val, 'Var Ref op expected Str', blame_tok)
906
907 try:
908 bvs_part = self.unsafe_arith.ParseVarRef(var_ref_str, blame_tok)
909 except error.FatalRuntime as e:
910 raise error.VarSubFailure(e.msg, e.location)
911
912 return self._VarRefValue(bvs_part, quoted, vsub_state, vtest_place)
913
914 def _ApplyUnarySuffixOp(self, val, op):
915 # type: (value_t, suffix_op.Unary) -> value_t
916 assert val.tag() != value_e.Undef
917
918 op_kind = consts.GetKind(op.op.id)
919
920 if op_kind == Kind.VOp1:
921 # NOTE: glob syntax is supported in ^ ^^ , ,, ! As well as % %% # ##.
922 # Detect has_extglob so that DoUnarySuffixOp doesn't use the fast
923 # shortcut for constant strings.
924 arg_val, has_extglob = self.EvalWordToPattern(op.arg_word)
925 assert arg_val.tag() == value_e.Str
926
927 UP_val = val
928 with tagswitch(val) as case:
929 if case(value_e.Str):
930 val = cast(value.Str, UP_val)
931 s = string_ops.DoUnarySuffixOp(val.s, op.op, arg_val.s,
932 has_extglob)
933 #log('%r %r -> %r', val.s, arg_val.s, s)
934 new_val = value.Str(s) # type: value_t
935
936 elif case(value_e.BashArray, value_e.BashAssoc):
937 # get values
938 if val.tag() == value_e.BashArray:
939 val = cast(value.BashArray, UP_val)
940 values = bash_impl.BashArray_GetValues(val)
941 elif val.tag() == value_e.BashAssoc:
942 val = cast(value.BashAssoc, UP_val)
943 values = bash_impl.BashAssoc_GetValues(val)
944 else:
945 raise AssertionError()
946
947 # ${a[@]#prefix} is VECTORIZED on arrays. YSH should have this too.
948 strs = [
949 string_ops.DoUnarySuffixOp(s, op.op, arg_val.s,
950 has_extglob) for s in values
951 ]
952 new_val = value.BashArray(strs)
953
954 else:
955 raise error.TypeErr(
956 val, 'Unary op expected Str, BashArray, BashAssoc',
957 op.op)
958
959 else:
960 raise AssertionError(Kind_str(op_kind))
961
962 return new_val
963
964 def _PatSub(self, val, op):
965 # type: (value_t, suffix_op.PatSub) -> value_t
966
967 pat_val, has_extglob = self.EvalWordToPattern(op.pat)
968 # Extended globs aren't supported because we only translate * ? etc. to
969 # ERE. I don't think there's a straightforward translation from !(*.py) to
970 # ERE! You would need an engine that supports negation? (Derivatives?)
971 if has_extglob:
972 e_die('extended globs not supported in ${x//GLOB/}', op.pat)
973
974 if op.replace:
975 replace_val = self.EvalRhsWord(op.replace)
976 # Can't have an array, so must be a string
977 assert replace_val.tag() == value_e.Str, replace_val
978 replace_str = cast(value.Str, replace_val).s
979 else:
980 replace_str = ''
981
982 # note: doesn't support self.exec_opts.extglob()!
983 regex, warnings = glob_.GlobToERE(pat_val.s)
984 if len(warnings):
985 # TODO:
986 # - Add 'shopt -s strict_glob' mode and expose warnings.
987 # "Glob is not in CANONICAL FORM".
988 # - Propagate location info back to the 'op.pat' word.
989 pass
990 #log('regex %r', regex)
991 replacer = string_ops.GlobReplacer(regex, replace_str, op.slash_tok)
992
993 with tagswitch(val) as case2:
994 if case2(value_e.Str):
995 str_val = cast(value.Str, val)
996 s = replacer.Replace(str_val.s, op)
997 val = value.Str(s)
998
999 elif case2(value_e.BashArray, value_e.BashAssoc):
1000 if val.tag() == value_e.BashArray:
1001 array_val = cast(value.BashArray, val)
1002 values = bash_impl.BashArray_GetValues(array_val)
1003 elif val.tag() == value_e.BashAssoc:
1004 assoc_val = cast(value.BashAssoc, val)
1005 values = bash_impl.BashAssoc_GetValues(assoc_val)
1006 else:
1007 raise AssertionError()
1008 strs = [replacer.Replace(s, op) for s in values]
1009 val = value.BashArray(strs)
1010
1011 else:
1012 raise error.TypeErr(
1013 val, 'Pat Sub op expected Str, BashArray, BashAssoc',
1014 op.slash_tok)
1015
1016 return val
1017
1018 def _Slice(self, val, op, var_name, part):
1019 # type: (value_t, suffix_op.Slice, Optional[str], BracedVarSub) -> value_t
1020
1021 begin = self.arith_ev.EvalToBigInt(op.begin)
1022
1023 # Note: bash allows lengths to be negative (with odd semantics), but
1024 # we don't allow that right now.
1025 has_length = False
1026 length = -1
1027 if op.length:
1028 has_length = True
1029 length = self.arith_ev.EvalToInt(op.length)
1030
1031 try:
1032 arg0_val = None # type: value.Str
1033 if var_name is None: # $* or $@
1034 arg0_val = self.mem.GetArg0()
1035 val = _PerformSlice(val, begin, length, has_length, part, arg0_val)
1036 except error.Strict as e:
1037 if self.exec_opts.strict_word_eval():
1038 raise
1039 else:
1040 self.errfmt.PrettyPrintError(e, prefix='warning: ')
1041 with tagswitch(val) as case2:
1042 if case2(value_e.Str):
1043 val = value.Str('')
1044 elif case2(value_e.BashArray):
1045 val = value.BashArray([])
1046 else:
1047 raise NotImplementedError()
1048 return val
1049
1050 def _Nullary(self, val, op, var_name, vsub_token, vsub_state):
1051 # type: (value_t, Token, Optional[str], Token, VarSubState) -> Tuple[value_t, bool]
1052
1053 quoted2 = False
1054 op_id = op.id
1055 if op_id == Id.VOp0_P:
1056 val = self._ProcessUndef(val, vsub_token, vsub_state)
1057 UP_val = val
1058 with tagswitch(val) as case:
1059 if case(value_e.Undef):
1060 result = value.Str('') # type: value_t
1061 elif case(value_e.Str):
1062 str_val = cast(value.Str, UP_val)
1063 prompt = self.prompt_ev.EvalPrompt(str_val.s)
1064 # readline gets rid of these, so we should too.
1065 p = prompt.replace('\x01', '').replace('\x02', '')
1066 result = value.Str(p)
1067 elif case(value_e.BashArray, value_e.BashAssoc):
1068 if val.tag() == value_e.BashArray:
1069 val = cast(value.BashArray, UP_val)
1070 values = [
1071 s for s in bash_impl.BashArray_GetValues(val)
1072 if s is not None
1073 ]
1074 elif val.tag() == value_e.BashAssoc:
1075 val = cast(value.BashAssoc, UP_val)
1076 values = bash_impl.BashAssoc_GetValues(val)
1077 else:
1078 raise AssertionError()
1079
1080 tmp = [
1081 self.prompt_ev.EvalPrompt(s).replace(
1082 '\x01', '').replace('\x02', '') for s in values
1083 ]
1084 result = value.BashArray(tmp)
1085 else:
1086 e_die("Can't use @P on %s" % ui.ValType(val), op)
1087
1088 elif op_id == Id.VOp0_Q:
1089 UP_val = val
1090 with tagswitch(val) as case:
1091 if case(value_e.Undef):
1092 # We need to issue an error when "-o nounset" is enabled.
1093 # Although we do not need to check val for value_e.Undef,
1094 # we call _ProcessUndef for consistency in the error
1095 # message.
1096 self._ProcessUndef(val, vsub_token, vsub_state)
1097
1098 # For unset variables, we do not generate any quoted words.
1099 if vsub_state.array_ref is not None:
1100 result = value.BashArray([])
1101 else:
1102 result = value.Str('')
1103
1104 elif case(value_e.Str):
1105 str_val = cast(value.Str, UP_val)
1106 result = value.Str(j8_lite.MaybeShellEncode(str_val.s))
1107 # oddly, 'echo ${x@Q}' is equivalent to 'echo "${x@Q}"' in
1108 # bash
1109 quoted2 = True
1110 elif case(value_e.BashArray, value_e.BashAssoc):
1111 if val.tag() == value_e.BashArray:
1112 val = cast(value.BashArray, UP_val)
1113 values = [s for s in bash_impl.BashArray_GetValues(val) if s is not None]
1114 elif val.tag() == value_e.BashAssoc:
1115 val = cast(value.BashAssoc, UP_val)
1116 values = bash_impl.BashAssoc_GetValues(val)
1117 else:
1118 raise AssertionError()
1119
1120 tmp = [
1121 # TODO: should use fastfunc.ShellEncode
1122 j8_lite.MaybeShellEncode(s) for s in values
1123 ]
1124 result = value.BashArray(tmp)
1125 else:
1126 e_die("Can't use @Q on %s" % ui.ValType(val), op)
1127
1128 elif op_id == Id.VOp0_a:
1129 val = self._ProcessUndef(val, vsub_token, vsub_state)
1130 UP_val = val
1131 # We're ONLY simluating -a and -A, not -r -x -n for now. See
1132 # spec/ble-idioms.test.sh.
1133 chars = [] # type: List[str]
1134 with tagswitch(vsub_state.h_value) as case:
1135 if case(value_e.BashArray):
1136 chars.append('a')
1137 elif case(value_e.BashAssoc):
1138 chars.append('A')
1139
1140 if var_name is not None: # e.g. ${?@a} is allowed
1141 cell = self.mem.GetCell(var_name)
1142 if cell:
1143 if cell.readonly:
1144 chars.append('r')
1145 if cell.exported:
1146 chars.append('x')
1147 if cell.nameref:
1148 chars.append('n')
1149
1150 count = 1
1151 with tagswitch(val) as case:
1152 if case(value_e.Undef):
1153 count = 0
1154 elif case(value_e.BashArray):
1155 val = cast(value.BashArray, UP_val)
1156 count = bash_impl.BashArray_Count(val)
1157 elif case(value_e.BashAssoc):
1158 val = cast(value.BashAssoc, UP_val)
1159 count = bash_impl.BashAssoc_Count(val)
1160
1161 result = value.BashArray([''.join(chars)] * count)
1162
1163 else:
1164 e_die('Var op %r not implemented' % lexer.TokenVal(op), op)
1165
1166 return result, quoted2
1167
1168 def _WholeArray(self, val, part, quoted, vsub_state):
1169 # type: (value_t, BracedVarSub, bool, VarSubState) -> value_t
1170 op_id = cast(bracket_op.WholeArray, part.bracket_op).op_id
1171
1172 if op_id == Id.Lit_At:
1173 op_str = '@'
1174 vsub_state.join_array = not quoted # ${a[@]} decays but "${a[@]}" doesn't
1175 elif op_id == Id.Arith_Star:
1176 op_str = '*'
1177 vsub_state.join_array = True # both ${a[*]} and "${a[*]}" decay
1178 else:
1179 raise AssertionError(op_id) # unknown
1180
1181 with tagswitch(val) as case2:
1182 if case2(value_e.Undef):
1183 # For an undefined array, we save the token of the array
1184 # reference for the later error message.
1185 vsub_state.array_ref = part.name_tok
1186 elif case2(value_e.Str):
1187 if self.exec_opts.strict_array():
1188 e_die("Can't index string with %s" % op_str,
1189 loc.WordPart(part))
1190 elif case2(value_e.BashArray, value_e.SparseArray,
1191 value_e.BashAssoc):
1192 pass # no-op
1193 else:
1194 # The other YSH types such as List, Dict, and Float are not
1195 # supported. Error messages will be printed later, so we here
1196 # return the unsupported objects without modification.
1197 pass # no-op
1198
1199 return val
1200
1201 def _ArrayIndex(self, val, part, vtest_place):
1202 # type: (value_t, BracedVarSub, VTestPlace) -> value_t
1203 """Process a numeric array index like ${a[i+1]}"""
1204 anode = cast(bracket_op.ArrayIndex, part.bracket_op).expr
1205
1206 UP_val = val
1207 with tagswitch(val) as case2:
1208 if case2(value_e.Undef):
1209 pass # it will be checked later
1210
1211 elif case2(value_e.Str):
1212 # Bash treats any string as an array, so we can't add our own
1213 # behavior here without making valid OSH invalid bash.
1214 e_die("Can't index string %r with integer" % part.var_name,
1215 part.name_tok)
1216
1217 elif case2(value_e.BashArray):
1218 array_val = cast(value.BashArray, UP_val)
1219 index = self.arith_ev.EvalToInt(anode)
1220 vtest_place.index = a_index.Int(index)
1221
1222 s, error_code = bash_impl.BashArray_GetElement(
1223 array_val, index)
1224 if error_code == error_code_e.IndexOutOfRange:
1225 # Note: Bash outputs warning but does not make it a real
1226 # error. We follow the Bash behavior here.
1227 self.errfmt.Print_(
1228 "Index %d out of bounds for array of length %d" %
1229 (index, bash_impl.BashArray_Length(array_val)),
1230 blame_loc=part.name_tok)
1231
1232 if s is None:
1233 val = value.Undef
1234 else:
1235 val = value.Str(s)
1236
1237 elif case2(value_e.SparseArray):
1238 sparse_val = cast(value.SparseArray, UP_val)
1239 big_index = self.arith_ev.EvalToBigInt(anode)
1240 vtest_place.index = a_index.Int(mops.BigTruncate(big_index))
1241
1242 s, error_code = bash_impl.SparseArray_GetElement(
1243 sparse_val, big_index)
1244 if error_code == error_code_e.IndexOutOfRange:
1245 # Note: Bash outputs warning but does not make it a real
1246 # error. We follow the Bash behavior here.
1247 big_length = bash_impl.SparseArray_Length(sparse_val)
1248 self.errfmt.Print_(
1249 "Index %s out of bounds for array of length %s" %
1250 (mops.ToStr(big_index), mops.ToStr(big_length)),
1251 blame_loc=part.name_tok)
1252
1253 if s is None:
1254 val = value.Undef
1255 else:
1256 val = value.Str(s)
1257
1258 elif case2(value_e.BashAssoc):
1259 assoc_val = cast(value.BashAssoc, UP_val)
1260 # Location could also be attached to bracket_op? But
1261 # arith_expr.VarSub works OK too
1262 key = self.arith_ev.EvalWordToString(
1263 anode, blame_loc=location.TokenForArith(anode))
1264
1265 vtest_place.index = a_index.Str(key) # out param
1266 s = bash_impl.BashAssoc_GetElement(assoc_val, key)
1267
1268 if s is None:
1269 val = value.Undef
1270 else:
1271 val = value.Str(s)
1272
1273 else:
1274 raise error.TypeErr(val,
1275 'Index op expected BashArray, BashAssoc',
1276 loc.WordPart(part))
1277
1278 return val
1279
1280 def _EvalDoubleQuoted(self, parts, part_vals):
1281 # type: (List[word_part_t], List[part_value_t]) -> None
1282 """Evaluate parts of a DoubleQuoted part.
1283
1284 Args:
1285 part_vals: output param to append to.
1286 """
1287 # Example of returning array:
1288 # $ a=(1 2); b=(3); $ c=(4 5)
1289 # $ argv "${a[@]}${b[@]}${c[@]}"
1290 # ['1', '234', '5']
1291 #
1292 # Example of multiple parts
1293 # $ argv "${a[@]}${undef[@]:-${c[@]}}"
1294 # ['1', '24', '5']
1295
1296 # Special case for "". The parser outputs (DoubleQuoted []), instead
1297 # of (DoubleQuoted [Literal '']). This is better but it means we
1298 # have to check for it.
1299 if len(parts) == 0:
1300 v = Piece('', True, False)
1301 part_vals.append(v)
1302 return
1303
1304 for p in parts:
1305 self._EvalWordPart(p, part_vals, QUOTED)
1306
1307 def EvalDoubleQuotedToString(self, dq_part):
1308 # type: (DoubleQuoted) -> str
1309 """For double quoted strings in YSH expressions.
1310
1311 Example: var x = "$foo-${foo}"
1312 """
1313 part_vals = [] # type: List[part_value_t]
1314 self._EvalDoubleQuoted(dq_part.parts, part_vals)
1315 return self._ConcatPartVals(part_vals, dq_part.left)
1316
1317 def _DecayArray(self, val):
1318 # type: (value.BashArray) -> value.Str
1319 """Decay $* to a string."""
1320 assert val.tag() == value_e.BashArray, val
1321 sep = self.splitter.GetJoinChar()
1322 tmp = [s for s in bash_impl.BashArray_GetValues(val) if s is not None]
1323 return value.Str(sep.join(tmp))
1324
1325 def _ProcessUndef(self, val, name_tok, vsub_state):
1326 # type: (value_t, Token, VarSubState) -> value_t
1327 assert name_tok is not None
1328
1329 if val.tag() != value_e.Undef:
1330 return val
1331
1332 if vsub_state.array_ref is not None:
1333 array_tok = vsub_state.array_ref
1334 if self.exec_opts.nounset():
1335 e_die('Undefined array %r' % lexer.TokenVal(array_tok),
1336 array_tok)
1337 else:
1338 return value.BashArray([])
1339 else:
1340 if self.exec_opts.nounset():
1341 tok_str = lexer.TokenVal(name_tok)
1342 name = tok_str[1:] if tok_str.startswith('$') else tok_str
1343 e_die('Undefined variable %r' % name, name_tok)
1344 else:
1345 return value.Str('')
1346
1347 def _EvalBracketOp(self, val, part, quoted, vsub_state, vtest_place):
1348 # type: (value_t, BracedVarSub, bool, VarSubState, VTestPlace) -> value_t
1349
1350 if part.bracket_op:
1351 with tagswitch(part.bracket_op) as case:
1352 if case(bracket_op_e.WholeArray):
1353 val = self._WholeArray(val, part, quoted, vsub_state)
1354
1355 elif case(bracket_op_e.ArrayIndex):
1356 val = self._ArrayIndex(val, part, vtest_place)
1357
1358 else:
1359 raise AssertionError(part.bracket_op.tag())
1360
1361 else: # no bracket op
1362 var_name = vtest_place.name
1363 if (var_name is not None and
1364 val.tag() in (value_e.BashArray, value_e.BashAssoc)):
1365 if ShouldArrayDecay(var_name, self.exec_opts,
1366 not (part.prefix_op or part.suffix_op)):
1367 # for ${BASH_SOURCE}, etc.
1368 val = DecayArray(val)
1369 else:
1370 e_die(
1371 "Array %r can't be referred to as a scalar (without @ or *)"
1372 % var_name, loc.WordPart(part))
1373
1374 return val
1375
1376 def _VarRefValue(self, part, quoted, vsub_state, vtest_place):
1377 # type: (BracedVarSub, bool, VarSubState, VTestPlace) -> value_t
1378 """Duplicates some logic from _EvalBracedVarSub, but returns a
1379 value_t."""
1380
1381 # 1. Evaluate from (var_name, var_num, token Id) -> value
1382 if part.name_tok.id == Id.VSub_Name:
1383 vtest_place.name = part.var_name
1384 val = self.mem.GetValue(part.var_name)
1385
1386 elif part.name_tok.id == Id.VSub_Number:
1387 var_num = int(part.var_name)
1388 val = self._EvalVarNum(var_num)
1389
1390 else:
1391 # $* decays
1392 val = self._EvalSpecialVar(part.name_tok.id, quoted, vsub_state)
1393
1394 # update h-value (i.e., the holder of the current value)
1395 vsub_state.h_value = val
1396
1397 # We don't need var_index because it's only for L-Values of test ops?
1398 if self.exec_opts.eval_unsafe_arith():
1399 val = self._EvalBracketOp(val, part, quoted, vsub_state,
1400 vtest_place)
1401 else:
1402 with state.ctx_Option(self.mutable_opts,
1403 [option_i._allow_command_sub], False):
1404 val = self._EvalBracketOp(val, part, quoted, vsub_state,
1405 vtest_place)
1406
1407 return val
1408
1409 def _EvalBracedVarSub(self, part, part_vals, quoted):
1410 # type: (BracedVarSub, List[part_value_t], bool) -> None
1411 """
1412 Args:
1413 part_vals: output param to append to.
1414 """
1415 # We have different operators that interact in a non-obvious order.
1416 #
1417 # 1. bracket_op: value -> value, with side effect on vsub_state
1418 #
1419 # 2. prefix_op
1420 # a. length ${#x}: value -> value
1421 # b. var ref ${!ref}: can expand to an array
1422 #
1423 # 3. suffix_op:
1424 # a. no operator: you have a value
1425 # b. Test: value -> part_value[]
1426 # c. Other Suffix: value -> value
1427 #
1428 # 4. Process vsub_state.join_array here before returning.
1429 #
1430 # These cases are hard to distinguish:
1431 # - ${!prefix@} prefix query
1432 # - ${!array[@]} keys
1433 # - ${!ref} named reference
1434 # - ${!ref[0]} named reference
1435 #
1436 # I think we need several stages:
1437 #
1438 # 1. value: name, number, special, prefix query
1439 # 2. bracket_op
1440 # 3. prefix length -- this is TERMINAL
1441 # 4. indirection? Only for some of the ! cases
1442 # 5. string transformation suffix ops like ##
1443 # 6. test op
1444 # 7. vsub_state.join_array
1445
1446 # vsub_state.join_array is for joining "${a[*]}" and unquoted ${a[@]} AFTER
1447 # suffix ops are applied. If we take the length with a prefix op, the
1448 # distinction is ignored.
1449
1450 var_name = None # type: Optional[str] # used throughout the function
1451 vtest_place = VTestPlace(var_name, None) # For ${foo=default}
1452 vsub_state = VarSubState.CreateNull() # for $*, ${a[*]}, etc.
1453
1454 # 1. Evaluate from (var_name, var_num, token Id) -> value
1455 if part.name_tok.id == Id.VSub_Name:
1456 # Handle ${!prefix@} first, since that looks at names and not values
1457 # Do NOT handle ${!A[@]@a} here!
1458 if (part.prefix_op is not None and part.bracket_op is None and
1459 part.suffix_op is not None and
1460 part.suffix_op.tag() == suffix_op_e.Nullary):
1461 nullary_op = cast(Token, part.suffix_op)
1462 # ${!x@} but not ${!x@P}
1463 if consts.GetKind(nullary_op.id) == Kind.VOp3:
1464 names = self.mem.VarNamesStartingWith(part.var_name)
1465 names.sort()
1466
1467 if quoted and nullary_op.id == Id.VOp3_At:
1468 part_vals.append(part_value.Array(names))
1469 else:
1470 sep = self.splitter.GetJoinChar()
1471 part_vals.append(Piece(sep.join(names), quoted, True))
1472 return # EARLY RETURN
1473
1474 var_name = part.var_name
1475 vtest_place.name = var_name # for _ApplyTestOp
1476
1477 val = self.mem.GetValue(var_name)
1478
1479 elif part.name_tok.id == Id.VSub_Number:
1480 var_num = int(part.var_name)
1481 val = self._EvalVarNum(var_num)
1482 else:
1483 # $* decays
1484 val = self._EvalSpecialVar(part.name_tok.id, quoted, vsub_state)
1485
1486 suffix_op_ = part.suffix_op
1487 if suffix_op_:
1488 UP_op = suffix_op_
1489 vsub_state.h_value = val
1490
1491 # 2. Bracket Op
1492 val = self._EvalBracketOp(val, part, quoted, vsub_state, vtest_place)
1493
1494 if part.prefix_op:
1495 if part.prefix_op.id == Id.VSub_Pound: # ${#var} for length
1496 # undef -> '' BEFORE length
1497 val = self._ProcessUndef(val, part.name_tok, vsub_state)
1498
1499 n = self._Count(val, part.name_tok)
1500 part_vals.append(Piece(str(n), quoted, False))
1501 return # EARLY EXIT: nothing else can come after length
1502
1503 elif part.prefix_op.id == Id.VSub_Bang:
1504 if (part.bracket_op and
1505 part.bracket_op.tag() == bracket_op_e.WholeArray and
1506 not suffix_op_):
1507 # undef -> empty array
1508 val = self._ProcessUndef(val, part.name_tok, vsub_state)
1509
1510 # ${!array[@]} to get indices/keys
1511 val = self._Keys(val, part.name_tok)
1512 # already set vsub_State.join_array ABOVE
1513 else:
1514 # Process ${!ref}. SURPRISE: ${!a[0]} is an indirect expansion unlike
1515 # ${!a[@]} !
1516 # ${!ref} can expand into an array if ref='array[@]'
1517
1518 # Clear it now that we have a var ref
1519 vtest_place.name = None
1520 vtest_place.index = None
1521
1522 val = self._EvalVarRef(val, part.name_tok, quoted,
1523 vsub_state, vtest_place)
1524
1525 else:
1526 raise AssertionError(part.prefix_op)
1527
1528 quoted2 = False # another bit for @Q
1529 if suffix_op_:
1530 op = suffix_op_ # could get rid of this alias
1531
1532 with tagswitch(suffix_op_) as case:
1533 if case(suffix_op_e.Nullary):
1534 op = cast(Token, UP_op)
1535 val, quoted2 = self._Nullary(val, op, var_name,
1536 part.name_tok, vsub_state)
1537
1538 elif case(suffix_op_e.Unary):
1539 op = cast(suffix_op.Unary, UP_op)
1540 if consts.GetKind(op.op.id) == Kind.VTest:
1541 # Note: _ProcessUndef (i.e., the conversion of undef ->
1542 # '') is not applied to the VTest operators such as
1543 # ${a:-def}, ${a+set}, etc.
1544 if self._ApplyTestOp(val, op, quoted, part_vals,
1545 vtest_place, part.name_tok):
1546 # e.g. to evaluate ${undef:-'default'}, we already appended
1547 # what we need
1548 return
1549
1550 else:
1551 # Other suffix: value -> value
1552 val = self._ProcessUndef(val, part.name_tok,
1553 vsub_state)
1554 val = self._ApplyUnarySuffixOp(val, op)
1555
1556 elif case(suffix_op_e.PatSub): # PatSub, vectorized
1557 op = cast(suffix_op.PatSub, UP_op)
1558 val = self._ProcessUndef(val, part.name_tok, vsub_state)
1559 val = self._PatSub(val, op)
1560
1561 elif case(suffix_op_e.Slice):
1562 op = cast(suffix_op.Slice, UP_op)
1563 val = self._ProcessUndef(val, part.name_tok, vsub_state)
1564 val = self._Slice(val, op, var_name, part)
1565
1566 elif case(suffix_op_e.Static):
1567 op = cast(suffix_op.Static, UP_op)
1568 e_die('Not implemented', op.tok)
1569
1570 else:
1571 raise AssertionError()
1572 else:
1573 val = self._ProcessUndef(val, part.name_tok, vsub_state)
1574
1575 # After applying suffixes, process join_array here.
1576 UP_val = val
1577 if val.tag() == value_e.BashArray:
1578 array_val = cast(value.BashArray, UP_val)
1579 if vsub_state.join_array:
1580 val = self._DecayArray(array_val)
1581 else:
1582 val = array_val
1583
1584 # For example, ${a} evaluates to value.Str(), but we want a
1585 # Piece().
1586 part_val = _ValueToPartValue(val, quoted or quoted2, part)
1587 part_vals.append(part_val)
1588
1589 def _ConcatPartVals(self, part_vals, location):
1590 # type: (List[part_value_t], loc_t) -> str
1591
1592 strs = [] # type: List[str]
1593 for part_val in part_vals:
1594 UP_part_val = part_val
1595 with tagswitch(part_val) as case:
1596 if case(part_value_e.String):
1597 part_val = cast(Piece, UP_part_val)
1598 s = part_val.s
1599
1600 elif case(part_value_e.Array):
1601 part_val = cast(part_value.Array, UP_part_val)
1602 if self.exec_opts.strict_array():
1603 # Examples: echo f > "$@"; local foo="$@"
1604 e_die("Illegal array word part (strict_array)",
1605 location)
1606 else:
1607 # It appears to not respect IFS
1608 # TODO: eliminate double join()?
1609 tmp = [s for s in part_val.strs if s is not None]
1610 s = ' '.join(tmp)
1611
1612 else:
1613 raise AssertionError()
1614
1615 strs.append(s)
1616
1617 return ''.join(strs)
1618
1619 def EvalBracedVarSubToString(self, part):
1620 # type: (BracedVarSub) -> str
1621 """For double quoted strings in YSH expressions.
1622
1623 Example: var x = "$foo-${foo}"
1624 """
1625 part_vals = [] # type: List[part_value_t]
1626 self._EvalBracedVarSub(part, part_vals, False)
1627 # blame ${ location
1628 return self._ConcatPartVals(part_vals, part.left)
1629
1630 def _EvalSimpleVarSub(self, part, part_vals, quoted):
1631 # type: (SimpleVarSub, List[part_value_t], bool) -> None
1632
1633 token = part.tok
1634
1635 vsub_state = VarSubState.CreateNull()
1636
1637 # 1. Evaluate from (var_name, var_num, Token) -> defined, value
1638 if token.id == Id.VSub_DollarName:
1639 var_name = lexer.LazyStr(token)
1640 # TODO: Special case for LINENO
1641 val = self.mem.GetValue(var_name)
1642 if val.tag() in (value_e.BashArray, value_e.BashAssoc):
1643 if ShouldArrayDecay(var_name, self.exec_opts):
1644 # for $BASH_SOURCE, etc.
1645 val = DecayArray(val)
1646 else:
1647 e_die(
1648 "Array %r can't be referred to as a scalar (without @ or *)"
1649 % var_name, token)
1650
1651 elif token.id == Id.VSub_Number:
1652 var_num = int(lexer.LazyStr(token))
1653 val = self._EvalVarNum(var_num)
1654
1655 else:
1656 val = self._EvalSpecialVar(token.id, quoted, vsub_state)
1657
1658 #log('SIMPLE %s', part)
1659 val = self._ProcessUndef(val, token, vsub_state)
1660 UP_val = val
1661 if val.tag() == value_e.BashArray:
1662 array_val = cast(value.BashArray, UP_val)
1663 if vsub_state.join_array:
1664 val = self._DecayArray(array_val)
1665 else:
1666 val = array_val
1667
1668 v = _ValueToPartValue(val, quoted, part)
1669 part_vals.append(v)
1670
1671 def EvalSimpleVarSubToString(self, node):
1672 # type: (SimpleVarSub) -> str
1673 """For double quoted strings in YSH expressions.
1674
1675 Example: var x = "$foo-${foo}"
1676 """
1677 part_vals = [] # type: List[part_value_t]
1678 self._EvalSimpleVarSub(node, part_vals, False)
1679 return self._ConcatPartVals(part_vals, node.tok)
1680
1681 def _EvalExtGlob(self, part, part_vals):
1682 # type: (word_part.ExtGlob, List[part_value_t]) -> None
1683 """Evaluate @($x|'foo'|$(hostname)) and flatten it."""
1684 op = part.op
1685 if op.id == Id.ExtGlob_Comma:
1686 op_str = '@('
1687 else:
1688 op_str = lexer.LazyStr(op)
1689 # Do NOT split these.
1690 part_vals.append(Piece(op_str, False, False))
1691
1692 for i, w in enumerate(part.arms):
1693 if i != 0:
1694 part_vals.append(Piece('|', False, False)) # separator
1695 # FLATTEN the tree of extglob "arms".
1696 self._EvalWordToParts(w, part_vals, EXTGLOB_NESTED)
1697 part_vals.append(Piece(')', False, False)) # closing )
1698
1699 def _TranslateExtGlob(self, part_vals, w, glob_parts, fnmatch_parts):
1700 # type: (List[part_value_t], CompoundWord, List[str], List[str]) -> None
1701 """Translate a flattened WORD with an ExtGlob part to string patterns.
1702
1703 We need both glob and fnmatch patterns. _EvalExtGlob does the
1704 flattening.
1705 """
1706 for i, part_val in enumerate(part_vals):
1707 UP_part_val = part_val
1708 with tagswitch(part_val) as case:
1709 if case(part_value_e.String):
1710 part_val = cast(Piece, UP_part_val)
1711 if part_val.quoted and not self.exec_opts.noglob():
1712 s = glob_.GlobEscape(part_val.s)
1713 else:
1714 # e.g. the @( and | in @(foo|bar) aren't quoted
1715 s = part_val.s
1716 glob_parts.append(s)
1717 fnmatch_parts.append(s) # from _EvalExtGlob()
1718
1719 elif case(part_value_e.Array):
1720 # Disallow array
1721 e_die(
1722 "Extended globs and arrays can't appear in the same word",
1723 w)
1724
1725 elif case(part_value_e.ExtGlob):
1726 part_val = cast(part_value.ExtGlob, UP_part_val)
1727 # keep appending fnmatch_parts, but repplace glob_parts with '*'
1728 self._TranslateExtGlob(part_val.part_vals, w, [],
1729 fnmatch_parts)
1730 glob_parts.append('*')
1731
1732 else:
1733 raise AssertionError()
1734
1735 def _EvalWordPart(self, part, part_vals, flags):
1736 # type: (word_part_t, List[part_value_t], int) -> None
1737 """Evaluate a word part, appending to part_vals
1738
1739 Called by _EvalWordToParts, EvalWordToString, and _EvalDoubleQuoted.
1740 """
1741 quoted = bool(flags & QUOTED)
1742 is_subst = bool(flags & IS_SUBST)
1743
1744 UP_part = part
1745 with tagswitch(part) as case:
1746 if case(word_part_e.ShArrayLiteral):
1747 part = cast(ShArrayLiteral, UP_part)
1748 e_die("Unexpected array literal", loc.WordPart(part))
1749 elif case(word_part_e.BashAssocLiteral):
1750 part = cast(word_part.BashAssocLiteral, UP_part)
1751 e_die("Unexpected associative array literal",
1752 loc.WordPart(part))
1753
1754 elif case(word_part_e.Literal):
1755 part = cast(Token, UP_part)
1756 # Split if it's in a substitution.
1757 # That is: echo is not split, but ${foo:-echo} is split
1758 v = Piece(lexer.LazyStr(part), quoted, is_subst)
1759 part_vals.append(v)
1760
1761 elif case(word_part_e.EscapedLiteral):
1762 part = cast(word_part.EscapedLiteral, UP_part)
1763 v = Piece(part.ch, True, False)
1764 part_vals.append(v)
1765
1766 elif case(word_part_e.SingleQuoted):
1767 part = cast(SingleQuoted, UP_part)
1768 v = Piece(part.sval, True, False)
1769 part_vals.append(v)
1770
1771 elif case(word_part_e.DoubleQuoted):
1772 part = cast(DoubleQuoted, UP_part)
1773 self._EvalDoubleQuoted(part.parts, part_vals)
1774
1775 elif case(word_part_e.CommandSub):
1776 part = cast(CommandSub, UP_part)
1777 id_ = part.left_token.id
1778 if id_ in (Id.Left_DollarParen, Id.Left_AtParen,
1779 Id.Left_Backtick):
1780 sv = self._EvalCommandSub(part,
1781 quoted) # type: part_value_t
1782
1783 elif id_ in (Id.Left_ProcSubIn, Id.Left_ProcSubOut):
1784 sv = self._EvalProcessSub(part)
1785
1786 else:
1787 raise AssertionError(id_)
1788
1789 part_vals.append(sv)
1790
1791 elif case(word_part_e.SimpleVarSub):
1792 part = cast(SimpleVarSub, UP_part)
1793 self._EvalSimpleVarSub(part, part_vals, quoted)
1794
1795 elif case(word_part_e.BracedVarSub):
1796 part = cast(BracedVarSub, UP_part)
1797 self._EvalBracedVarSub(part, part_vals, quoted)
1798
1799 elif case(word_part_e.TildeSub):
1800 part = cast(word_part.TildeSub, UP_part)
1801 # We never parse a quoted string into a TildeSub.
1802 assert not quoted
1803 s = self.tilde_ev.Eval(part)
1804 v = Piece(s, True, False) # NOT split even when unquoted!
1805 part_vals.append(v)
1806
1807 elif case(word_part_e.ArithSub):
1808 part = cast(word_part.ArithSub, UP_part)
1809 num = self.arith_ev.EvalToBigInt(part.anode)
1810 v = Piece(mops.ToStr(num), quoted, not quoted)
1811 part_vals.append(v)
1812
1813 elif case(word_part_e.ExtGlob):
1814 part = cast(word_part.ExtGlob, UP_part)
1815 #if not self.exec_opts.extglob():
1816 # die() # disallow at runtime? Don't just decay
1817
1818 # Create a node to hold the flattened tree. The caller decides whether
1819 # to pass it to fnmatch() or replace it with '*' and pass it to glob().
1820 part_vals2 = [] # type: List[part_value_t]
1821 self._EvalExtGlob(part, part_vals2) # flattens tree
1822 part_vals.append(part_value.ExtGlob(part_vals2))
1823
1824 elif case(word_part_e.BashRegexGroup):
1825 part = cast(word_part.BashRegexGroup, UP_part)
1826
1827 part_vals.append(Piece('(', False, False)) # not quoted
1828 if part.child:
1829 self._EvalWordToParts(part.child, part_vals, 0)
1830 part_vals.append(Piece(')', False, False))
1831
1832 elif case(word_part_e.Splice):
1833 part = cast(word_part.Splice, UP_part)
1834 val = self.mem.GetValue(part.var_name)
1835
1836 strs = self.expr_ev.SpliceValue(val, part)
1837 part_vals.append(part_value.Array(strs))
1838
1839 elif case(word_part_e.ExprSub):
1840 part = cast(word_part.ExprSub, UP_part)
1841 part_val = self.expr_ev.EvalExprSub(part)
1842 part_vals.append(part_val)
1843
1844 elif case(word_part_e.ZshVarSub):
1845 part = cast(word_part.ZshVarSub, UP_part)
1846 e_die("ZSH var subs are parsed, but can't be evaluated",
1847 part.left)
1848
1849 else:
1850 raise AssertionError(part.tag())
1851
1852 def _EvalRhsWordToParts(self, w, part_vals, eval_flags=0):
1853 # type: (rhs_word_t, List[part_value_t], int) -> None
1854 quoted = bool(eval_flags & QUOTED)
1855
1856 UP_w = w
1857 with tagswitch(w) as case:
1858 if case(rhs_word_e.Empty):
1859 part_vals.append(Piece('', quoted, not quoted))
1860
1861 elif case(rhs_word_e.Compound):
1862 w = cast(CompoundWord, UP_w)
1863 self._EvalWordToParts(w, part_vals, eval_flags=eval_flags)
1864
1865 else:
1866 raise AssertionError()
1867
1868 def _EvalWordToParts(self, w, part_vals, eval_flags=0):
1869 # type: (CompoundWord, List[part_value_t], int) -> None
1870 """Helper for EvalRhsWord, EvalWordSequence, etc.
1871
1872 Returns:
1873 Appends to part_vals. Note that this is a TREE.
1874 """
1875 # Does the word have an extended glob? This is a special case because
1876 # of the way we use glob() and then fnmatch(..., FNM_EXTMATCH) to
1877 # implement extended globs. It's hard to carry that extra information
1878 # all the way past the word splitting stage.
1879
1880 # OSH semantic limitations: If a word has an extended glob part, then
1881 # 1. It can't have an array
1882 # 2. Word splitting of unquoted words isn't respected
1883
1884 word_part_vals = [] # type: List[part_value_t]
1885 has_extglob = False
1886 for p in w.parts:
1887 if p.tag() == word_part_e.ExtGlob:
1888 has_extglob = True
1889 self._EvalWordPart(p, word_part_vals, eval_flags)
1890
1891 # Caller REQUESTED extglob evaluation, AND we parsed word_part.ExtGlob()
1892 if has_extglob:
1893 if bool(eval_flags & EXTGLOB_FILES):
1894 # Treat the WHOLE word as a pattern. We need to TWO VARIANTS of the
1895 # word because of the way we use libc:
1896 # 1. With '*' for extglob parts
1897 # 2. With _EvalExtGlob() for extglob parts
1898
1899 glob_parts = [] # type: List[str]
1900 fnmatch_parts = [] # type: List[str]
1901 self._TranslateExtGlob(word_part_vals, w, glob_parts,
1902 fnmatch_parts)
1903
1904 #log('word_part_vals %s', word_part_vals)
1905 glob_pat = ''.join(glob_parts)
1906 fnmatch_pat = ''.join(fnmatch_parts)
1907 #log("glob %s fnmatch %s", glob_pat, fnmatch_pat)
1908
1909 results = [] # type: List[str]
1910 n = self.globber.ExpandExtended(glob_pat, fnmatch_pat, results)
1911 if n < 0:
1912 raise error.FailGlob(
1913 'Extended glob %r matched no files' % fnmatch_pat, w)
1914
1915 part_vals.append(part_value.Array(results))
1916 elif bool(eval_flags & EXTGLOB_NESTED):
1917 # We only glob at the TOP level of @(nested|@(pattern))
1918 part_vals.extend(word_part_vals)
1919 else:
1920 # e.g. simple_word_eval, assignment builtin
1921 e_die('Extended glob not allowed in this word', w)
1922 else:
1923 part_vals.extend(word_part_vals)
1924
1925 def _PartValsToString(self, part_vals, w, eval_flags, strs):
1926 # type: (List[part_value_t], CompoundWord, int, List[str]) -> None
1927 """Helper for EvalWordToString, similar to _ConcatPartVals() above.
1928
1929 Note: arg 'w' could just be a span ID
1930 """
1931 for part_val in part_vals:
1932 UP_part_val = part_val
1933 with tagswitch(part_val) as case:
1934 if case(part_value_e.String):
1935 part_val = cast(Piece, UP_part_val)
1936 s = part_val.s
1937 if part_val.quoted:
1938 if eval_flags & QUOTE_FNMATCH:
1939 # [[ foo == */"*".py ]] or case (*.py) or ${x%*.py} or ${x//*.py/}
1940 s = glob_.GlobEscape(s)
1941 elif eval_flags & QUOTE_ERE:
1942 s = glob_.ExtendedRegexEscape(s)
1943 strs.append(s)
1944
1945 elif case(part_value_e.Array):
1946 part_val = cast(part_value.Array, UP_part_val)
1947 if self.exec_opts.strict_array():
1948 # Examples: echo f > "$@"; local foo="$@"
1949
1950 # TODO: This attributes too coarsely, to the word rather than the
1951 # parts. Problem: the word is a TREE of parts, but we only have a
1952 # flat list of part_vals. The only case where we really get arrays
1953 # is "$@", "${a[@]}", "${a[@]//pat/replace}", etc.
1954 e_die(
1955 "This word should yield a string, but it contains an array",
1956 w)
1957
1958 # TODO: Maybe add detail like this.
1959 #e_die('RHS of assignment should only have strings. '
1960 # 'To assign arrays, use b=( "${a[@]}" )')
1961 else:
1962 # It appears to not respect IFS
1963 tmp = [s for s in part_val.strs if s is not None]
1964 s = ' '.join(tmp) # TODO: eliminate double join()?
1965 strs.append(s)
1966
1967 elif case(part_value_e.ExtGlob):
1968 part_val = cast(part_value.ExtGlob, UP_part_val)
1969
1970 # Extended globs are only allowed where we expect them!
1971 if not bool(eval_flags & QUOTE_FNMATCH):
1972 e_die('extended glob not allowed in this word', w)
1973
1974 # recursive call
1975 self._PartValsToString(part_val.part_vals, w, eval_flags,
1976 strs)
1977
1978 else:
1979 raise AssertionError()
1980
1981 def EvalWordToString(self, UP_w, eval_flags=0):
1982 # type: (word_t, int) -> value.Str
1983 """Given a word, return a string.
1984
1985 Flags can contain a quoting algorithm.
1986 """
1987 assert UP_w.tag() == word_e.Compound, UP_w
1988 w = cast(CompoundWord, UP_w)
1989
1990 if eval_flags == 0: # QUOTE_FNMATCH etc. breaks optimization
1991 fast_str = word_.FastStrEval(w)
1992 if fast_str is not None:
1993 return value.Str(fast_str)
1994
1995 # Could we additionally optimize a=$b, if we know $b isn't an array
1996 # etc.?
1997
1998 # Note: these empty lists are hot in fib benchmark
1999
2000 part_vals = [] # type: List[part_value_t]
2001 for p in w.parts:
2002 # this doesn't use eval_flags, which is slightly confusing
2003 self._EvalWordPart(p, part_vals, 0)
2004
2005 strs = [] # type: List[str]
2006 self._PartValsToString(part_vals, w, eval_flags, strs)
2007 return value.Str(''.join(strs))
2008
2009 def EvalWordToPattern(self, UP_w):
2010 # type: (rhs_word_t) -> Tuple[value.Str, bool]
2011 """Like EvalWordToString, but returns whether we got ExtGlob."""
2012 if UP_w.tag() == rhs_word_e.Empty:
2013 return value.Str(''), False
2014
2015 assert UP_w.tag() == rhs_word_e.Compound, UP_w
2016 w = cast(CompoundWord, UP_w)
2017
2018 has_extglob = False
2019 part_vals = [] # type: List[part_value_t]
2020 for p in w.parts:
2021 # this doesn't use eval_flags, which is slightly confusing
2022 self._EvalWordPart(p, part_vals, 0)
2023 if p.tag() == word_part_e.ExtGlob:
2024 has_extglob = True
2025
2026 strs = [] # type: List[str]
2027 self._PartValsToString(part_vals, w, QUOTE_FNMATCH, strs)
2028 return value.Str(''.join(strs)), has_extglob
2029
2030 def EvalForPlugin(self, w):
2031 # type: (CompoundWord) -> value.Str
2032 """Wrapper around EvalWordToString that prevents errors.
2033
2034 Runtime errors like $(( 1 / 0 )) and mutating $? like $(exit 42)
2035 are handled here.
2036
2037 Similar to ExprEvaluator.PluginCall().
2038 """
2039 with state.ctx_Registers(self.mem): # to "sandbox" $? and $PIPESTATUS
2040 try:
2041 val = self.EvalWordToString(w)
2042 except error.FatalRuntime as e:
2043 val = value.Str('<Runtime error: %s>' % e.UserErrorString())
2044
2045 except (IOError, OSError) as e:
2046 val = value.Str('<I/O error: %s>' % pyutil.strerror(e))
2047
2048 except KeyboardInterrupt:
2049 val = value.Str('<Ctrl-C>')
2050
2051 return val
2052
2053 def EvalRhsWord(self, UP_w):
2054 # type: (rhs_word_t) -> value_t
2055 """Used for RHS of assignment.
2056
2057 There is no splitting.
2058 """
2059 if UP_w.tag() == rhs_word_e.Empty:
2060 return value.Str('')
2061
2062 assert UP_w.tag() == word_e.Compound, UP_w
2063 w = cast(CompoundWord, UP_w)
2064
2065 if len(w.parts) == 1:
2066 part0 = w.parts[0]
2067 UP_part0 = part0
2068 tag = part0.tag()
2069 # Special case for a=(1 2). ShArrayLiteral won't appear in words that
2070 # don't look like assignments.
2071 if tag == word_part_e.ShArrayLiteral:
2072 part0 = cast(ShArrayLiteral, UP_part0)
2073 array_words = part0.words
2074 words = braces.BraceExpandWords(array_words)
2075 strs = self.EvalWordSequence(words)
2076 return value.BashArray(strs)
2077
2078 if tag == word_part_e.BashAssocLiteral:
2079 part0 = cast(word_part.BashAssocLiteral, UP_part0)
2080 d = NewDict() # type: Dict[str, str]
2081 for pair in part0.pairs:
2082 k = self.EvalWordToString(pair.key)
2083 v = self.EvalWordToString(pair.value)
2084 d[k.s] = v.s
2085 return value.BashAssoc(d)
2086
2087 # If RHS doesn't look like a=( ... ), then it must be a string.
2088 return self.EvalWordToString(w)
2089
2090 def _EvalWordFrame(self, frame, argv):
2091 # type: (List[Piece], List[str]) -> None
2092 all_empty = True
2093 all_quoted = True
2094 any_quoted = False
2095
2096 #log('--- frame %s', frame)
2097
2098 for piece in frame:
2099 if len(piece.s):
2100 all_empty = False
2101
2102 if piece.quoted:
2103 any_quoted = True
2104 else:
2105 all_quoted = False
2106
2107 # Elision of ${empty}${empty} but not $empty"$empty" or $empty""
2108 if all_empty and not any_quoted:
2109 return
2110
2111 # If every frag is quoted, e.g. "$a$b" or any part in "${a[@]}"x, then
2112 # don't do word splitting or globbing.
2113 if all_quoted:
2114 tmp = [piece.s for piece in frame]
2115 a = ''.join(tmp)
2116 argv.append(a)
2117 return
2118
2119 will_glob = not self.exec_opts.noglob()
2120
2121 if 0:
2122 log('---')
2123 log('FRAME')
2124 for i, piece in enumerate(frame):
2125 log('(%d) %s', i, piece)
2126 log('')
2127
2128 # Array of strings, some of which are BOTH IFS-escaped and GLOB escaped!
2129 frags = [] # type: List[str]
2130 for piece in frame:
2131 if will_glob and piece.quoted:
2132 frag = glob_.GlobEscape(piece.s)
2133 else:
2134 # If we have a literal \, then we turn it into \\\\.
2135 # Splitting takes \\\\ -> \\
2136 # Globbing takes \\ to \ if it doesn't match
2137 frag = _BackslashEscape(piece.s)
2138
2139 if piece.do_split:
2140 frag = _BackslashEscape(frag)
2141 else:
2142 frag = self.splitter.Escape(frag)
2143
2144 frags.append(frag)
2145
2146 if 0:
2147 log('---')
2148 log('FRAGS')
2149 for i, frag in enumerate(frags):
2150 log('(%d) %s', i, frag)
2151 log('')
2152
2153 flat = ''.join(frags)
2154 #log('flat: %r', flat)
2155
2156 args = self.splitter.SplitForWordEval(flat)
2157
2158 # space=' '; argv $space"". We have a quoted part, but we CANNOT elide.
2159 # Add it back and don't bother globbing.
2160 if len(args) == 0 and any_quoted:
2161 argv.append('')
2162 return
2163
2164 #log('split args: %r', args)
2165 for a in args:
2166 if glob_.LooksLikeGlob(a):
2167 n = self.globber.Expand(a, argv)
2168 if n < 0:
2169 # TODO: location info, with span IDs carried through the frame
2170 raise error.FailGlob('Pattern %r matched no files' % a,
2171 loc.Missing)
2172 else:
2173 argv.append(glob_.GlobUnescape(a))
2174
2175 def _EvalWordToArgv(self, w):
2176 # type: (CompoundWord) -> List[str]
2177 """Helper for _EvalAssignBuiltin.
2178
2179 Splitting and globbing are disabled for assignment builtins.
2180
2181 Example: declare -"${a[@]}" b=(1 2)
2182 where a is [x b=a d=a]
2183 """
2184 part_vals = [] # type: List[part_value_t]
2185 self._EvalWordToParts(w, part_vals, 0) # not double quoted
2186 frames = _MakeWordFrames(part_vals)
2187 argv = [] # type: List[str]
2188 for frame in frames:
2189 if len(frame): # empty array gives empty frame!
2190 tmp = [piece.s for piece in frame]
2191 argv.append(''.join(tmp)) # no split or glob
2192 #log('argv: %s', argv)
2193 return argv
2194
2195 def _EvalAssignBuiltin(self, builtin_id, arg0, words, meta_offset):
2196 # type: (builtin_t, str, List[CompoundWord], int) -> cmd_value.Assign
2197 """Handles both static and dynamic assignment, e.g.
2198
2199 x='foo=bar'
2200 local a=(1 2) $x
2201
2202 Grammar:
2203
2204 ('builtin' | 'command')* keyword flag* pair*
2205 flag = [-+].*
2206
2207 There is also command -p, but we haven't implemented it. Maybe just
2208 punt on it.
2209 """
2210 eval_to_pairs = True # except for -f and -F
2211 started_pairs = False
2212
2213 flags = [arg0] # initial flags like -p, and -f -F name1 name2
2214 flag_locs = [words[0]]
2215 assign_args = [] # type: List[AssignArg]
2216
2217 n = len(words)
2218 for i in xrange(meta_offset + 1, n): # skip first word
2219 w = words[i]
2220
2221 if word_.IsVarLike(w):
2222 started_pairs = True # Everything from now on is an assign_pair
2223
2224 if started_pairs:
2225 left_token, close_token, part_offset = word_.DetectShAssignment(
2226 w)
2227 if left_token: # Detected statically
2228 if left_token.id != Id.Lit_VarLike:
2229 # (not guaranteed since started_pairs is set twice)
2230 e_die('LHS array not allowed in assignment builtin', w)
2231
2232 if lexer.IsPlusEquals(left_token):
2233 var_name = lexer.TokenSliceRight(left_token, -2)
2234 append = True
2235 else:
2236 var_name = lexer.TokenSliceRight(left_token, -1)
2237 append = False
2238
2239 if part_offset == len(w.parts):
2240 rhs = rhs_word.Empty # type: rhs_word_t
2241 else:
2242 # tmp is for intersection of C++/MyPy type systems
2243 tmp = CompoundWord(w.parts[part_offset:])
2244 word_.TildeDetectAssign(tmp)
2245 rhs = tmp
2246
2247 with state.ctx_AssignBuiltin(self.mutable_opts):
2248 right = self.EvalRhsWord(rhs)
2249
2250 arg2 = AssignArg(var_name, right, append, w)
2251 assign_args.append(arg2)
2252
2253 else: # e.g. export $dynamic
2254 argv = self._EvalWordToArgv(w)
2255 for arg in argv:
2256 arg2 = _SplitAssignArg(arg, w)
2257 assign_args.append(arg2)
2258
2259 else:
2260 argv = self._EvalWordToArgv(w)
2261 for arg in argv:
2262 if arg.startswith('-') or arg.startswith('+'):
2263 # e.g. declare -r +r
2264 flags.append(arg)
2265 flag_locs.append(w)
2266
2267 # Shortcut that relies on -f and -F always meaning "function" for
2268 # all assignment builtins
2269 if 'f' in arg or 'F' in arg:
2270 eval_to_pairs = False
2271
2272 else: # e.g. export $dynamic
2273 if eval_to_pairs:
2274 arg2 = _SplitAssignArg(arg, w)
2275 assign_args.append(arg2)
2276 started_pairs = True
2277 else:
2278 flags.append(arg)
2279
2280 return cmd_value.Assign(builtin_id, flags, flag_locs, assign_args)
2281
2282 def _DetectAssignBuiltinStr(self, arg0, words, meta_offset):
2283 # type: (str, List[CompoundWord], int) -> Optional[cmd_value.Assign]
2284 builtin_id = consts.LookupAssignBuiltin(arg0)
2285 if builtin_id != consts.NO_INDEX:
2286 return self._EvalAssignBuiltin(builtin_id, arg0, words,
2287 meta_offset)
2288 return None
2289
2290 def _DetectAssignBuiltin(self, val0, words, meta_offset):
2291 # type: (part_value_t, List[CompoundWord], int) -> Optional[cmd_value.Assign]
2292 UP_val0 = val0
2293 if val0.tag() == part_value_e.String:
2294 val0 = cast(Piece, UP_val0)
2295 if not val0.quoted:
2296 return self._DetectAssignBuiltinStr(val0.s, words, meta_offset)
2297 return None
2298
2299 def SimpleEvalWordSequence2(self, words, is_last_cmd, allow_assign):
2300 # type: (List[CompoundWord], bool, bool) -> cmd_value_t
2301 """Simple word evaluation for YSH."""
2302 strs = [] # type: List[str]
2303 locs = [] # type: List[CompoundWord]
2304
2305 meta_offset = 0
2306 for i, w in enumerate(words):
2307 # No globbing in the first arg for command.Simple.
2308 if i == meta_offset and allow_assign:
2309 strs0 = self._EvalWordToArgv(w)
2310 # TODO: Remove this because YSH will disallow assignment
2311 # builtins? (including export?)
2312 if len(strs0) == 1:
2313 cmd_val = self._DetectAssignBuiltinStr(
2314 strs0[0], words, meta_offset)
2315 if cmd_val:
2316 return cmd_val
2317
2318 strs.extend(strs0)
2319 for _ in strs0:
2320 locs.append(w)
2321 continue
2322
2323 if glob_.LooksLikeStaticGlob(w):
2324 val = self.EvalWordToString(w) # respects strict-array
2325 num_appended = self.globber.Expand(val.s, strs)
2326 if num_appended < 0:
2327 raise error.FailGlob('Pattern %r matched no files' % val.s,
2328 w)
2329 for _ in xrange(num_appended):
2330 locs.append(w)
2331 continue
2332
2333 part_vals = [] # type: List[part_value_t]
2334 self._EvalWordToParts(w, part_vals, 0) # not quoted
2335
2336 if 0:
2337 log('')
2338 log('Static: part_vals after _EvalWordToParts:')
2339 for entry in part_vals:
2340 log(' %s', entry)
2341
2342 # Still need to process
2343 frames = _MakeWordFrames(part_vals)
2344
2345 if 0:
2346 log('')
2347 log('Static: frames after _MakeWordFrames:')
2348 for entry in frames:
2349 log(' %s', entry)
2350
2351 # We will still allow x"${a[@]"x, though it's deprecated by @a, which
2352 # disallows such expressions at parse time.
2353 for frame in frames:
2354 if len(frame): # empty array gives empty frame!
2355 tmp = [piece.s for piece in frame]
2356 strs.append(''.join(tmp)) # no split or glob
2357 locs.append(w)
2358
2359 assert len(strs) == len(locs), '%s vs. %d' % (strs, len(locs))
2360 return cmd_value.Argv(strs, locs, is_last_cmd, None, None)
2361
2362 def EvalWordSequence2(self, words, is_last_cmd, allow_assign=False):
2363 # type: (List[CompoundWord], bool, bool) -> cmd_value_t
2364 """Turns a list of Words into a list of strings.
2365
2366 Unlike the EvalWord*() methods, it does globbing.
2367
2368 Args:
2369 allow_assign: True for command.Simple, False for BashArray a=(1 2 3)
2370 """
2371 if self.exec_opts.simple_word_eval():
2372 return self.SimpleEvalWordSequence2(words, is_last_cmd,
2373 allow_assign)
2374
2375 # Parse time:
2376 # 1. brace expansion. TODO: Do at parse time.
2377 # 2. Tilde detection. DONE at parse time. Only if Id.Lit_Tilde is the
2378 # first WordPart.
2379 #
2380 # Run time:
2381 # 3. tilde sub, var sub, command sub, arith sub. These are all
2382 # "concurrent" on WordParts. (optional process sub with <() )
2383 # 4. word splitting. Can turn this off with a shell option? Definitely
2384 # off for oil.
2385 # 5. globbing -- several exec_opts affect this: nullglob, safeglob, etc.
2386
2387 #log('W %s', words)
2388 strs = [] # type: List[str]
2389 locs = [] # type: List[CompoundWord]
2390
2391 # 0 for declare x
2392 # 1 for builtin declare x
2393 # 2 for command builtin declare x
2394 # etc.
2395 meta_offset = 0
2396
2397 n = 0
2398 for i, w in enumerate(words):
2399 fast_str = word_.FastStrEval(w)
2400 if fast_str is not None:
2401 strs.append(fast_str)
2402 locs.append(w)
2403
2404 # e.g. the 'local' in 'local a=b c=d' will be here
2405 if allow_assign and i == meta_offset:
2406 cmd_val = self._DetectAssignBuiltinStr(
2407 fast_str, words, meta_offset)
2408 if cmd_val:
2409 return cmd_val
2410
2411 if i <= meta_offset and _DetectMetaBuiltinStr(fast_str):
2412 meta_offset += 1
2413
2414 # Bug fix: n must be updated on every loop iteration
2415 n = len(strs)
2416 assert len(strs) == len(locs), strs
2417 continue
2418
2419 part_vals = [] # type: List[part_value_t]
2420 self._EvalWordToParts(w, part_vals, EXTGLOB_FILES)
2421
2422 # DYNAMICALLY detect if we're going to run an assignment builtin, and
2423 # change the rest of the evaluation algorithm if so.
2424 #
2425 # We want to allow:
2426 # e=export
2427 # $e foo=bar
2428 #
2429 # But we don't want to evaluate the first word twice in the case of:
2430 # $(some-command) --flag
2431 if len(part_vals) == 1:
2432 if allow_assign and i == meta_offset:
2433 cmd_val = self._DetectAssignBuiltin(
2434 part_vals[0], words, meta_offset)
2435 if cmd_val:
2436 return cmd_val
2437
2438 if i <= meta_offset and _DetectMetaBuiltin(part_vals[0]):
2439 meta_offset += 1
2440
2441 if 0:
2442 log('')
2443 log('part_vals after _EvalWordToParts:')
2444 for entry in part_vals:
2445 log(' %s', entry)
2446
2447 frames = _MakeWordFrames(part_vals)
2448 if 0:
2449 log('')
2450 log('frames after _MakeWordFrames:')
2451 for entry in frames:
2452 log(' %s', entry)
2453
2454 # Do splitting and globbing. Each frame will append zero or more args.
2455 for frame in frames:
2456 self._EvalWordFrame(frame, strs)
2457
2458 # Fill in locations parallel to strs.
2459 n_next = len(strs)
2460 for _ in xrange(n_next - n):
2461 locs.append(w)
2462 n = n_next
2463
2464 # A non-assignment command.
2465 # NOTE: Can't look up builtins here like we did for assignment, because
2466 # functions can override builtins.
2467 assert len(strs) == len(locs), '%s vs. %d' % (strs, len(locs))
2468 return cmd_value.Argv(strs, locs, is_last_cmd, None, None)
2469
2470 def EvalWordSequence(self, words):
2471 # type: (List[CompoundWord]) -> List[str]
2472 """For arrays and for loops.
2473
2474 They don't allow assignment builtins.
2475 """
2476 # is_last_cmd is irrelevant
2477 cmd_val = self.EvalWordSequence2(words, False)
2478 assert cmd_val.tag() == cmd_value_e.Argv
2479 return cast(cmd_value.Argv, cmd_val).argv
2480
2481
2482class NormalWordEvaluator(AbstractWordEvaluator):
2483
2484 def __init__(
2485 self,
2486 mem, # type: state.Mem
2487 exec_opts, # type: optview.Exec
2488 mutable_opts, # type: state.MutableOpts
2489 tilde_ev, # type: TildeEvaluator
2490 splitter, # type: SplitContext
2491 errfmt, # type: ui.ErrorFormatter
2492 ):
2493 # type: (...) -> None
2494 AbstractWordEvaluator.__init__(self, mem, exec_opts, mutable_opts,
2495 tilde_ev, splitter, errfmt)
2496 self.shell_ex = None # type: _Executor
2497
2498 def CheckCircularDeps(self):
2499 # type: () -> None
2500 assert self.arith_ev is not None
2501 # Disabled for pure OSH
2502 #assert self.expr_ev is not None
2503 assert self.shell_ex is not None
2504 assert self.prompt_ev is not None
2505
2506 def _EvalCommandSub(self, cs_part, quoted):
2507 # type: (CommandSub, bool) -> part_value_t
2508 stdout_str = self.shell_ex.RunCommandSub(cs_part)
2509
2510 if cs_part.left_token.id == Id.Left_AtParen:
2511 # YSH splitting algorithm: does not depend on IFS
2512 try:
2513 strs = j8.SplitJ8Lines(stdout_str)
2514 except error.Decode as e:
2515 # status code 4 is special, for encode/decode errors.
2516 raise error.Structured(4, e.Message(), cs_part.left_token)
2517
2518 #strs = self.splitter.SplitForWordEval(stdout_str)
2519 return part_value.Array(strs)
2520 else:
2521 return Piece(stdout_str, quoted, not quoted)
2522
2523 def _EvalProcessSub(self, cs_part):
2524 # type: (CommandSub) -> Piece
2525 dev_path = self.shell_ex.RunProcessSub(cs_part)
2526 # pretend it's quoted; no split or glob
2527 return Piece(dev_path, True, False)
2528
2529
2530_DUMMY = '__NO_COMMAND_SUB__'
2531
2532
2533class CompletionWordEvaluator(AbstractWordEvaluator):
2534 """An evaluator that has no access to an executor.
2535
2536 NOTE: core/completion.py doesn't actually try to use these strings to
2537 complete. If you have something like 'echo $(echo hi)/f<TAB>', it sees the
2538 inner command as the last one, and knows that it is not at the end of the
2539 line.
2540 """
2541
2542 def __init__(
2543 self,
2544 mem, # type: state.Mem
2545 exec_opts, # type: optview.Exec
2546 mutable_opts, # type: state.MutableOpts
2547 tilde_ev, # type: TildeEvaluator
2548 splitter, # type: SplitContext
2549 errfmt, # type: ui.ErrorFormatter
2550 ):
2551 # type: (...) -> None
2552 AbstractWordEvaluator.__init__(self, mem, exec_opts, mutable_opts,
2553 tilde_ev, splitter, errfmt)
2554
2555 def CheckCircularDeps(self):
2556 # type: () -> None
2557 assert self.prompt_ev is not None
2558 assert self.arith_ev is not None
2559 assert self.expr_ev is not None
2560
2561 def _EvalCommandSub(self, cs_part, quoted):
2562 # type: (CommandSub, bool) -> part_value_t
2563 if cs_part.left_token.id == Id.Left_AtParen:
2564 return part_value.Array([_DUMMY])
2565 else:
2566 return Piece(_DUMMY, quoted, not quoted)
2567
2568 def _EvalProcessSub(self, cs_part):
2569 # type: (CommandSub) -> Piece
2570 # pretend it's quoted; no split or glob
2571 return Piece('__NO_PROCESS_SUB__', True, False)
2572
2573
2574# vim: sw=4