OILS / osh / word_eval.py View on Github | oils.pub

2603 lines, 1600 significant
1"""
2word_eval.py - Evaluator for the word language.
3"""
4
5from _devbuild.gen.id_kind_asdl import Id, Kind, Kind_str
6from _devbuild.gen.syntax_asdl import (
7 Token,
8 SimpleVarSub,
9 loc,
10 loc_t,
11 BracedVarSub,
12 CommandSub,
13 bracket_op,
14 bracket_op_e,
15 suffix_op,
16 suffix_op_e,
17 ShArrayLiteral,
18 SingleQuoted,
19 DoubleQuoted,
20 word_e,
21 word_t,
22 CompoundWord,
23 rhs_word,
24 rhs_word_e,
25 rhs_word_t,
26 word_part,
27 word_part_e,
28)
29from _devbuild.gen.runtime_asdl import (
30 part_value,
31 part_value_e,
32 part_value_t,
33 cmd_value,
34 cmd_value_e,
35 cmd_value_t,
36 error_code_e,
37 AssignArg,
38 a_index,
39 a_index_e,
40 VTestPlace,
41 VarSubState,
42 Piece,
43)
44from _devbuild.gen.option_asdl import option_i, builtin_i
45from _devbuild.gen.value_asdl import (
46 value,
47 value_e,
48 value_t,
49 sh_lvalue,
50 sh_lvalue_t,
51)
52from core import bash_impl
53from core import error
54from core import pyos
55from core import pyutil
56from core import state
57from display import ui
58from core import util
59from data_lang import j8
60from data_lang import j8_lite
61from core.error import e_die
62from frontend import consts
63from frontend import lexer
64from frontend import location
65from mycpp import mops
66from mycpp.mylib import log, tagswitch, NewDict
67from osh import braces
68from osh import glob_
69from osh import string_ops
70from osh import word_
71from ysh import expr_eval
72from ysh import val_ops
73
74from typing import Optional, Tuple, List, Dict, cast, TYPE_CHECKING
75
76if TYPE_CHECKING:
77 from _devbuild.gen.syntax_asdl import word_part_t
78 from _devbuild.gen.option_asdl import builtin_t
79 from core import optview
80 from core.state import Mem
81 from core.vm import _Executor
82 from osh.split import SplitContext
83 from osh import prompt
84 from osh import sh_expr_eval
85
86# Flags for _EvalWordToParts and _EvalWordPart (not all are used for both)
87QUOTED = 1 << 0
88IS_SUBST = 1 << 1
89
90EXTGLOB_FILES = 1 << 2 # allow @(cc) from file system?
91EXTGLOB_MATCH = 1 << 3 # allow @(cc) in pattern matching?
92EXTGLOB_NESTED = 1 << 4 # for @(one|!(two|three))
93
94# For EvalWordToString
95QUOTE_FNMATCH = 1 << 5
96QUOTE_ERE = 1 << 6
97
98# For compatibility, ${BASH_SOURCE} and ${BASH_SOURCE[@]} are both valid.
99# Ditto for ${FUNCNAME} and ${BASH_LINENO}.
100_STRING_AND_ARRAY = ['BASH_SOURCE', 'FUNCNAME', 'BASH_LINENO']
101
102
103def ShouldArrayDecay(var_name, exec_opts, is_plain_var_sub=True):
104 # type: (str, optview.Exec, bool) -> bool
105 """Return whether we should allow ${a} to mean ${a[0]}."""
106 return (not exec_opts.strict_array() or
107 is_plain_var_sub and var_name in _STRING_AND_ARRAY)
108
109
110def DecayArray(val):
111 # type: (value_t) -> value_t
112 """Resolve ${array} to ${array[0]}."""
113 if val.tag() == value_e.BashArray:
114 array_val = cast(value.BashArray, val)
115 s, error_code = bash_impl.BashArray_GetElement(array_val, 0)
116
117 # Note: index 0 should never cause the out-of-bound index error.
118 assert error_code == error_code_e.OK
119
120 elif val.tag() == value_e.BashAssoc:
121 assoc_val = cast(value.BashAssoc, val)
122 s = bash_impl.BashAssoc_GetElement(assoc_val, '0')
123 else:
124 raise AssertionError(val.tag())
125
126 if s is None:
127 return value.Undef
128 else:
129 return value.Str(s)
130
131
132def _DetectMetaBuiltinStr(s):
133 # type: (str) -> bool
134 """
135 We need to detect all of these cases:
136
137 builtin local
138 command local
139 builtin builtin local
140 builtin command local
141
142 Fundamentally, assignment builtins have different WORD EVALUATION RULES
143 for a=$x (no word splitting), so it seems hard to do this in
144 meta_oils.Builtin() or meta_oils.Command()
145 """
146 return (consts.LookupNormalBuiltin(s)
147 in (builtin_i.builtin, builtin_i.command))
148
149
150def _DetectMetaBuiltin(val0):
151 # type: (part_value_t) -> bool
152 UP_val0 = val0
153 if val0.tag() == part_value_e.String:
154 val0 = cast(Piece, UP_val0)
155 if not val0.quoted:
156 return _DetectMetaBuiltinStr(val0.s)
157 return False
158
159
160def _SplitAssignArg(arg, blame_word):
161 # type: (str, CompoundWord) -> AssignArg
162 """Dynamically parse argument to declare, export, etc.
163
164 This is a fallback to the static parsing done below.
165 """
166 # Note: it would be better to cache regcomp(), but we don't have an API for
167 # that, and it probably isn't a bottleneck now
168 m = util.RegexSearch(consts.ASSIGN_ARG_RE, arg)
169 if m is None:
170 e_die("Assignment builtin expected NAME=value, got %r" % arg,
171 blame_word)
172
173 var_name = m[1]
174 # m[2] is used for grouping; ERE doesn't have non-capturing groups
175
176 op = m[3]
177 assert op is not None, op
178 if len(op): # declare NAME=
179 val = value.Str(m[4]) # type: Optional[value_t]
180 append = op[0] == '+'
181 else: # declare NAME
182 val = None # no operator
183 append = False
184
185 return AssignArg(var_name, val, append, blame_word)
186
187
188# NOTE: Could be done with util.BackslashEscape like glob_.GlobEscape().
189def _BackslashEscape(s):
190 # type: (str) -> str
191 """Double up backslashes.
192
193 Useful for strings about to be globbed and strings about to be IFS
194 escaped.
195 """
196 return s.replace('\\', '\\\\')
197
198
199def _ValueToPartValue(val, quoted, part_loc):
200 # type: (value_t, bool, word_part_t) -> part_value_t
201 """Helper for VarSub evaluation.
202
203 Called by _EvalBracedVarSub and _EvalWordPart for SimpleVarSub.
204 """
205 UP_val = val
206
207 with tagswitch(val) as case:
208 if case(value_e.Undef):
209 # This happens in the case of ${undef+foo}. We skipped _ProcessUndef,
210 # but we have to append to the empty string.
211 return Piece('', quoted, not quoted)
212
213 elif case(value_e.Str):
214 val = cast(value.Str, UP_val)
215 return Piece(val.s, quoted, not quoted)
216
217 elif case(value_e.BashArray):
218 val = cast(value.BashArray, UP_val)
219 return part_value.Array(bash_impl.BashArray_GetValues(val))
220
221 elif case(value_e.BashAssoc):
222 val = cast(value.BashAssoc, UP_val)
223 # bash behavior: splice values!
224 return part_value.Array(bash_impl.BashAssoc_GetValues(val))
225
226 # Cases added for YSH
227 # value_e.List is also here - we use val_ops.Stringify()s err message
228 elif case(value_e.Null, value_e.Bool, value_e.Int, value_e.Float,
229 value_e.Eggex, value_e.List):
230 s = val_ops.Stringify(val, loc.WordPart(part_loc), 'Word eval ')
231 return Piece(s, quoted, not quoted)
232
233 else:
234 raise error.TypeErr(val, "Can't substitute into word",
235 loc.WordPart(part_loc))
236
237 raise AssertionError('for -Wreturn-type in C++')
238
239
240def _MakeWordFrames(part_vals):
241 # type: (List[part_value_t]) -> List[List[Piece]]
242 """A word evaluates to a flat list of part_value (String or Array). frame
243 is a portion that results in zero or more args. It can never be joined.
244 This idea exists because of arrays like "$@" and "${a[@]}".
245
246 Example:
247
248 a=(1 '2 3' 4)
249 x=x
250 y=y
251
252 # This word
253 $x"${a[@]}"$y
254
255 # Results in Three frames:
256 [ ('x', False, True), ('1', True, False) ]
257 [ ('2 3', True, False) ]
258 [ ('4', True, False), ('y', False, True) ]
259
260 Note: A frame is a 3-tuple that's identical to Piece()? Maybe we
261 should make that top level type.
262
263 TODO:
264 - Instead of List[List[Piece]], where List[Piece] is a Frame
265 - Change this representation to
266 Frames = (List[Piece] pieces, List[int] break_indices)
267 # where break_indices are the end
268
269 Consider a common case like "$x" or "${x}" - I think this a lot more
270 efficient?
271
272 And then change _EvalWordFrame(pieces: List[Piece], start: int, end: int)
273 """
274 current = [] # type: List[Piece]
275 frames = [current]
276
277 for p in part_vals:
278 UP_p = p
279
280 with tagswitch(p) as case:
281 if case(part_value_e.String):
282 p = cast(Piece, UP_p)
283 current.append(p)
284
285 elif case(part_value_e.Array):
286 p = cast(part_value.Array, UP_p)
287
288 is_first = True
289 for s in p.strs:
290 if s is None:
291 continue # ignore undefined array entries
292
293 # Arrays parts are always quoted; otherwise they would have
294 # decayed to a string.
295 piece = Piece(s, True, False)
296 if is_first:
297 current.append(piece)
298 is_first = False
299 else:
300 current = [piece]
301 frames.append(current) # singleton frame
302
303 else:
304 raise AssertionError()
305
306 return frames
307
308
309# TODO: This could be _MakeWordFrames and then sep.join(). It's redundant.
310def _DecayPartValuesToString(part_vals, join_char):
311 # type: (List[part_value_t], str) -> str
312 # Decay ${a=x"$@"x} to string.
313 out = [] # type: List[str]
314 for p in part_vals:
315 UP_p = p
316 with tagswitch(p) as case:
317 if case(part_value_e.String):
318 p = cast(Piece, UP_p)
319 out.append(p.s)
320 elif case(part_value_e.Array):
321 p = cast(part_value.Array, UP_p)
322 # TODO: Eliminate double join for speed?
323 tmp = [s for s in p.strs if s is not None]
324 out.append(join_char.join(tmp))
325 else:
326 raise AssertionError()
327 return ''.join(out)
328
329
330def _PerformSlice(
331 val, # type: value_t
332 offset, # type: mops.BigInt
333 length, # type: int
334 has_length, # type: bool
335 part, # type: BracedVarSub
336 arg0_val, # type: value.Str
337):
338 # type: (...) -> value_t
339 UP_val = val
340 with tagswitch(val) as case:
341 if case(value_e.Str): # Slice UTF-8 characters in a string.
342 val = cast(value.Str, UP_val)
343 s = val.s
344 n = len(s)
345
346 begin = mops.BigTruncate(offset)
347 if begin < 0: # Compute offset with unicode
348 byte_begin = n
349 num_iters = -begin
350 for _ in xrange(num_iters):
351 byte_begin = string_ops.PreviousUtf8Char(s, byte_begin)
352 else:
353 byte_begin = string_ops.AdvanceUtf8Chars(s, begin, 0)
354
355 if has_length:
356 if length < 0: # Compute offset with unicode
357 # Confusing: this is a POSITION
358 byte_end = n
359 num_iters = -length
360 for _ in xrange(num_iters):
361 byte_end = string_ops.PreviousUtf8Char(s, byte_end)
362 else:
363 byte_end = string_ops.AdvanceUtf8Chars(
364 s, length, byte_begin)
365 else:
366 byte_end = len(s)
367
368 substr = s[byte_begin:byte_end]
369 result = value.Str(substr) # type: value_t
370
371 elif case(value_e.BashArray,
372 value_e.SparseArray): # Slice array entries.
373 # NOTE: This error is ALWAYS fatal in bash. It's inconsistent with
374 # strings.
375 if has_length and length < 0:
376 e_die("Array slice can't have negative length: %d" % length,
377 loc.WordPart(part))
378
379 if bash_impl.BigInt_Less(offset, mops.ZERO):
380 # ${@:-3} starts counts from the end
381 if val.tag() == value_e.BashArray:
382 val = cast(value.BashArray, UP_val)
383 array_length = mops.IntWiden(
384 bash_impl.BashArray_Length(val))
385 elif val.tag() == value_e.SparseArray:
386 val = cast(value.SparseArray, UP_val)
387 array_length = bash_impl.SparseArray_Length(val)
388 else:
389 raise AssertionError()
390
391 # The array length counts $0 for $@ and $*
392 if arg0_val is not None:
393 array_length = mops.Add(array_length, mops.ONE)
394
395 offset = mops.Add(offset, array_length)
396
397 if bash_impl.BigInt_Less(offset, mops.ZERO):
398 strs = [] # type: List[str]
399 else:
400 # Quirk: "offset" for positional arguments ($@ and $*) counts $0.
401 prepends_arg0 = False
402 if arg0_val is not None:
403 if bash_impl.BigInt_Greater(offset, mops.ZERO):
404 offset = mops.Sub(offset, mops.ONE)
405 elif not has_length or length >= 1:
406 prepends_arg0 = True
407 length = length - 1
408
409 if has_length and length == 0:
410 strs = []
411
412 elif val.tag() == value_e.BashArray:
413 val = cast(value.BashArray, UP_val)
414 orig = bash_impl.BashArray_GetValues(val)
415 n = len(orig)
416
417 strs = []
418 i = mops.BigTruncate(offset)
419 count = 0
420 while i < n:
421 if has_length and count == length: # length could be 0
422 break
423 s = orig[i]
424 if s is not None: # Unset elements don't count towards the length
425 strs.append(s)
426 count += 1
427 i += 1
428
429 elif val.tag() == value_e.SparseArray:
430 val = cast(value.SparseArray, UP_val)
431
432 # TODO: We may optimize this by finding the first index
433 # using the binary search. Furthermore, the sorting by
434 # SparseArray_GetKeys can be replaced with the heap sort so
435 # that we only extract the first LENGTH elements of the
436 # indices greater or equal to OFFSET.
437 i = 0
438 for index in bash_impl.SparseArray_GetKeys(val):
439 if bash_impl.BigInt_GreaterEq(index, offset):
440 break
441 i = i + 1
442
443 if has_length:
444 strs = bash_impl.SparseArray_GetValues(val)[i:i +
445 length]
446 else:
447 strs = bash_impl.SparseArray_GetValues(val)[i:]
448
449 else:
450 raise AssertionError()
451
452 if prepends_arg0:
453 new_list = [arg0_val.s]
454 new_list.extend(strs)
455 strs = new_list
456
457 result = value.BashArray(strs)
458
459 elif case(value_e.BashAssoc):
460 e_die("Can't slice associative arrays", loc.WordPart(part))
461
462 else:
463 raise error.TypeErr(val, 'Slice op expected Str or BashArray',
464 loc.WordPart(part))
465
466 return result
467
468
469class StringWordEvaluator(object):
470 """Interface used by ArithEvaluator / BoolEvaluator"""
471
472 def __init__(self):
473 # type: () -> None
474 """Empty constructor for mycpp."""
475 pass
476
477 def EvalWordToString(self, w, eval_flags=0):
478 # type: (word_t, int) -> value.Str
479 raise NotImplementedError()
480
481
482def _GetDollarHyphen(exec_opts):
483 # type: (optview.Exec) -> str
484 chars = [] # type: List[str]
485 if exec_opts.interactive():
486 chars.append('i')
487
488 if exec_opts.errexit():
489 chars.append('e')
490 if exec_opts.noglob():
491 chars.append('f')
492 if exec_opts.noexec():
493 chars.append('n')
494 if exec_opts.nounset():
495 chars.append('u')
496 # NO letter for pipefail?
497 if exec_opts.xtrace():
498 chars.append('x')
499 if exec_opts.noclobber():
500 chars.append('C')
501
502 # bash has:
503 # - c for sh -c, i for sh -i (mksh also has this)
504 # - h for hashing (mksh also has this)
505 # - B for brace expansion
506 return ''.join(chars)
507
508
509class TildeEvaluator(object):
510
511 def __init__(self, mem, exec_opts):
512 # type: (Mem, optview.Exec) -> None
513 self.mem = mem
514 self.exec_opts = exec_opts
515
516 def GetMyHomeDir(self):
517 # type: () -> Optional[str]
518 """Consult $HOME first, and then make a libc call.
519
520 Important: the libc call can FAIL, which is why we prefer $HOME. See issue
521 #1578.
522 """
523 # First look up the HOME var, ENV.HOME, ...
524 s = self.mem.env_config.Get('HOME')
525 if s is not None:
526 return s
527
528 # Then ask the OS. This is what bash does.
529 return pyos.GetMyHomeDir()
530
531 def Eval(self, part):
532 # type: (word_part.TildeSub) -> str
533 """Evaluates ~ and ~user, given a Lit_TildeLike token."""
534
535 if part.user_name is None:
536 result = self.GetMyHomeDir()
537 else:
538 result = pyos.GetHomeDir(part.user_name)
539
540 if result is None:
541 if self.exec_opts.strict_tilde():
542 e_die("Error expanding tilde (e.g. invalid user)", part.left)
543 else:
544 # Return ~ or ~user literally
545 result = '~'
546 if part.user_name is not None:
547 result = result + part.user_name # mycpp doesn't have +=
548
549 return result
550
551
552class AbstractWordEvaluator(StringWordEvaluator):
553 """Abstract base class for word evaluators.
554
555 Public entry points:
556 EvalWordToString EvalForPlugin EvalRhsWord
557 EvalWordSequence EvalWordSequence2
558 """
559
560 def __init__(
561 self,
562 mem, # type: state.Mem
563 exec_opts, # type: optview.Exec
564 mutable_opts, # type: state.MutableOpts
565 tilde_ev, # type: TildeEvaluator
566 splitter, # type: SplitContext
567 errfmt, # type: ui.ErrorFormatter
568 ):
569 # type: (...) -> None
570 self.arith_ev = None # type: sh_expr_eval.ArithEvaluator
571 self.expr_ev = None # type: expr_eval.ExprEvaluator
572 self.prompt_ev = None # type: prompt.Evaluator
573
574 self.unsafe_arith = None # type: sh_expr_eval.UnsafeArith
575
576 self.tilde_ev = tilde_ev
577
578 self.mem = mem # for $HOME, $1, etc.
579 self.exec_opts = exec_opts # for nounset
580 self.mutable_opts = mutable_opts # for _allow_command_sub
581 self.splitter = splitter
582 self.errfmt = errfmt
583
584 self.globber = glob_.Globber(exec_opts)
585
586 def CheckCircularDeps(self):
587 # type: () -> None
588 raise NotImplementedError()
589
590 def _EvalCommandSub(self, cs_part, quoted):
591 # type: (CommandSub, bool) -> part_value_t
592 """Abstract since it has a side effect."""
593 raise NotImplementedError()
594
595 def _EvalProcessSub(self, cs_part):
596 # type: (CommandSub) -> part_value_t
597 """Abstract since it has a side effect."""
598 raise NotImplementedError()
599
600 def _EvalVarNum(self, var_num):
601 # type: (int) -> value_t
602 assert var_num >= 0
603 return self.mem.GetArgNum(var_num)
604
605 def _EvalSpecialVar(self, op_id, quoted, vsub_state):
606 # type: (int, bool, VarSubState) -> value_t
607 """Evaluate $?
608
609 and so forth
610 """
611 # $@ is special -- it need to know whether it is in a double quoted
612 # context.
613 #
614 # - If it's $@ in a double quoted context, return an ARRAY.
615 # - If it's $@ in a normal context, return a STRING, which then will be
616 # subject to splitting.
617
618 if op_id in (Id.VSub_At, Id.VSub_Star):
619 argv = self.mem.GetArgv()
620 val = value.BashArray(argv) # type: value_t
621 if op_id == Id.VSub_At:
622 # "$@" evaluates to an array, $@ should be decayed
623 vsub_state.join_array = not quoted
624 else: # $* "$*" are both decayed
625 vsub_state.join_array = True
626
627 elif op_id == Id.VSub_Hyphen:
628 val = value.Str(_GetDollarHyphen(self.exec_opts))
629
630 else:
631 val = self.mem.GetSpecialVar(op_id)
632
633 return val
634
635 def _ApplyTestOp(
636 self,
637 val, # type: value_t
638 op, # type: suffix_op.Unary
639 quoted, # type: bool
640 part_vals, # type: Optional[List[part_value_t]]
641 vtest_place, # type: VTestPlace
642 blame_token, # type: Token
643 vsub_state, # type: VarSubState
644 ):
645 # type: (...) -> bool
646 """
647 Returns:
648 Whether part_vals was mutated
649
650 ${a:-} returns part_value[]
651 ${a:+} returns part_value[]
652 ${a:?error} returns error word?
653 ${a:=} returns part_value[] but also needs self.mem for side effects.
654
655 So I guess it should return part_value[], and then a flag for raising
656 an error, and then a flag for assigning it?
657 The original BracedVarSub will have the name.
658
659 Example of needing multiple part_value[]
660
661 echo X-${a:-'def'"ault"}-X
662
663 We return two part values from the BracedVarSub. Also consider:
664
665 echo ${a:-x"$@"x}
666 """
667 eval_flags = IS_SUBST
668 if quoted:
669 eval_flags |= QUOTED
670
671 tok = op.op
672 # NOTE: Splicing part_values is necessary because of code like
673 # ${undef:-'a b' c 'd # e'}. Each part_value can have a different
674 # do_glob/do_elide setting.
675 UP_val = val
676 with tagswitch(val) as case:
677 if case(value_e.Undef):
678 is_falsey = True
679
680 elif case(value_e.Str):
681 val = cast(value.Str, UP_val)
682 if tok.id in (Id.VTest_ColonHyphen, Id.VTest_ColonEquals,
683 Id.VTest_ColonQMark, Id.VTest_ColonPlus):
684 is_falsey = len(val.s) == 0
685 else:
686 is_falsey = False
687
688 elif case(value_e.BashArray, value_e.BashAssoc):
689 if val.tag() == value_e.BashArray:
690 val = cast(value.BashArray, UP_val)
691 strs = bash_impl.BashArray_GetValues(val)
692 elif val.tag() == value_e.BashAssoc:
693 val = cast(value.BashAssoc, UP_val)
694 strs = bash_impl.BashAssoc_GetValues(val)
695 else:
696 raise AssertionError()
697
698 if tok.id in (Id.VTest_ColonHyphen, Id.VTest_ColonEquals,
699 Id.VTest_ColonQMark, Id.VTest_ColonPlus):
700 # "$*" - the separator is the first character of IFS
701 # $* $@ "$@" - the separator is a space
702 if quoted and vsub_state.join_array:
703 sep_width = len(self.splitter.GetJoinChar())
704 else:
705 sep_width = 1
706
707 # We test whether the joined string will be empty. When
708 # the separator is empty, all the elements need to be
709 # empty. When the separator is non-empty, one element is
710 # allowed at most and needs to be an empty string if any.
711 if sep_width == 0:
712 is_falsey = True
713 for s in strs:
714 if len(s) != 0:
715 is_falsey = False
716 break
717 else:
718 is_falsey = len(strs) == 0 or (len(strs) == 1 and
719 len(strs[0]) == 0)
720 else:
721 # TODO: allow undefined
722 is_falsey = len(strs) == 0
723
724 else:
725 # value.Eggex, etc. are all false
726 is_falsey = False
727
728 if tok.id in (Id.VTest_ColonHyphen, Id.VTest_Hyphen):
729 if is_falsey:
730 self._EvalRhsWordToParts(op.arg_word, part_vals, eval_flags)
731 return True
732 else:
733 return False
734
735 # Inverse of the above.
736 elif tok.id in (Id.VTest_ColonPlus, Id.VTest_Plus):
737 if is_falsey:
738 return False
739 else:
740 self._EvalRhsWordToParts(op.arg_word, part_vals, eval_flags)
741 return True
742
743 # Splice and assign
744 elif tok.id in (Id.VTest_ColonEquals, Id.VTest_Equals):
745 if is_falsey:
746 # Collect new part vals.
747 assign_part_vals = [] # type: List[part_value_t]
748 self._EvalRhsWordToParts(op.arg_word, assign_part_vals,
749 eval_flags)
750 # Append them to out param AND return them.
751 part_vals.extend(assign_part_vals)
752
753 if vtest_place.name is None:
754 # TODO: error context
755 e_die("Can't assign to special variable")
756 else:
757 # NOTE: This decays arrays too! 'shopt -s strict_array' could
758 # avoid it.
759 rhs_str = _DecayPartValuesToString(
760 assign_part_vals, self.splitter.GetJoinChar())
761 if vtest_place.index is None: # using None when no index
762 lval = location.LName(
763 vtest_place.name) # type: sh_lvalue_t
764 else:
765 var_name = vtest_place.name
766 var_index = vtest_place.index
767 UP_var_index = var_index
768
769 with tagswitch(var_index) as case:
770 if case(a_index_e.Int):
771 var_index = cast(a_index.Int, UP_var_index)
772 lval = sh_lvalue.Indexed(
773 var_name, var_index.i, loc.Missing)
774 elif case(a_index_e.Str):
775 var_index = cast(a_index.Str, UP_var_index)
776 lval = sh_lvalue.Keyed(var_name, var_index.s,
777 loc.Missing)
778 else:
779 raise AssertionError()
780
781 state.OshLanguageSetValue(self.mem, lval,
782 value.Str(rhs_str))
783 return True
784
785 else:
786 return False
787
788 elif tok.id in (Id.VTest_ColonQMark, Id.VTest_QMark):
789 if is_falsey:
790 # The arg is the error message
791 error_part_vals = [] # type: List[part_value_t]
792 self._EvalRhsWordToParts(op.arg_word, error_part_vals,
793 eval_flags)
794 error_str = _DecayPartValuesToString(
795 error_part_vals, self.splitter.GetJoinChar())
796
797 #
798 # Display fancy/helpful error
799 #
800 if vtest_place.name is None:
801 var_name = '???'
802 else:
803 var_name = vtest_place.name
804
805 if 0:
806 # This hint is nice, but looks too noisy for now
807 op_str = lexer.LazyStr(tok)
808 if tok.id == Id.VTest_ColonQMark:
809 why = 'empty or unset'
810 else:
811 why = 'unset'
812
813 self.errfmt.Print_(
814 "Hint: operator %s means a variable can't be %s" %
815 (op_str, why), tok)
816
817 if val.tag() == value_e.Undef:
818 actual = 'unset'
819 else:
820 actual = 'empty'
821
822 if len(error_str):
823 suffix = ': %r' % error_str
824 else:
825 suffix = ''
826 e_die("Var %s is %s%s" % (var_name, actual, suffix),
827 blame_token)
828
829 else:
830 return False
831
832 else:
833 raise AssertionError(tok.id)
834
835 def _Count(self, val, token):
836 # type: (value_t, Token) -> int
837 """Returns the length of the value, for ${#var}"""
838 UP_val = val
839 with tagswitch(val) as case:
840 if case(value_e.Str):
841 val = cast(value.Str, UP_val)
842 # NOTE: Whether bash counts bytes or chars is affected by LANG
843 # environment variables.
844 # Should we respect that, or another way to select? set -o
845 # count-bytes?
846
847 # https://stackoverflow.com/questions/17368067/length-of-string-in-bash
848 try:
849 count = string_ops.CountUtf8Chars(val.s)
850 except error.Strict as e:
851 # Add this here so we don't have to add it so far down the stack.
852 # TODO: It's better to show BOTH this CODE an the actual DATA
853 # somehow.
854 e.location = token
855
856 if self.exec_opts.strict_word_eval():
857 raise
858 else:
859 # NOTE: Doesn't make the command exit with 1; it just returns a
860 # length of -1.
861 self.errfmt.PrettyPrintError(e, prefix='warning: ')
862 return -1
863
864 elif case(value_e.BashArray):
865 val = cast(value.BashArray, UP_val)
866 count = bash_impl.BashArray_Count(val)
867
868 elif case(value_e.BashAssoc):
869 val = cast(value.BashAssoc, UP_val)
870 count = bash_impl.BashAssoc_Count(val)
871
872 elif case(value_e.SparseArray):
873 val = cast(value.SparseArray, UP_val)
874 count = bash_impl.SparseArray_Count(val)
875
876 else:
877 raise error.TypeErr(
878 val, "Length op expected Str, BashArray, BashAssoc", token)
879
880 return count
881
882 def _Keys(self, val, token):
883 # type: (value_t, Token) -> value_t
884 """Return keys of a container, for ${!array[@]}"""
885
886 UP_val = val
887 with tagswitch(val) as case:
888 if case(value_e.BashArray):
889 val = cast(value.BashArray, UP_val)
890 indices = [str(i) for i in bash_impl.BashArray_GetKeys(val)]
891 return value.BashArray(indices)
892
893 elif case(value_e.BashAssoc):
894 val = cast(value.BashAssoc, UP_val)
895 assert val.d is not None # for MyPy, so it's not Optional[]
896
897 # BUG: Keys aren't ordered according to insertion!
898 keys = bash_impl.BashAssoc_GetKeys(val)
899 return value.BashArray(keys)
900
901 else:
902 raise error.TypeErr(val, 'Keys op expected Str', token)
903
904 def _EvalVarRef(self, val, blame_tok, quoted, vsub_state, vtest_place):
905 # type: (value_t, Token, bool, VarSubState, VTestPlace) -> value_t
906 """Handles indirect expansion like ${!var} and ${!a[0]}.
907
908 Args:
909 blame_tok: 'foo' for ${!foo}
910 """
911 UP_val = val
912 with tagswitch(val) as case:
913 if case(value_e.Undef):
914 # bash-4.4 returned value.Undef here. bash-5.0 started to treat
915 # the variable name to be empty so that the indirection fails.
916 var_ref_str = ''
917
918 elif case(value_e.Str):
919 val = cast(value.Str, UP_val)
920 var_ref_str = val.s
921
922 elif case(value_e.BashArray): # caught earlier but OK
923 val = cast(value.BashArray, UP_val)
924 # When there are more than one element in the array, this
925 # produces a wrong variable name containing spaces.
926 var_ref_str = ' '.join(bash_impl.BashArray_GetValues(val))
927
928 elif case(value_e.BashAssoc): # caught earlier but OK
929 val = cast(value.BashAssoc, UP_val)
930 var_ref_str = ' '.join(bash_impl.BashAssoc_GetValues(val))
931
932 else:
933 raise error.TypeErr(val, 'Var Ref op expected Str', blame_tok)
934
935 try:
936 bvs_part = self.unsafe_arith.ParseVarRef(var_ref_str, blame_tok)
937 except error.FatalRuntime as e:
938 raise error.VarSubFailure(e.msg, e.location)
939
940 return self._VarRefValue(bvs_part, quoted, vsub_state, vtest_place)
941
942 def _ApplyUnarySuffixOp(self, val, op):
943 # type: (value_t, suffix_op.Unary) -> value_t
944 assert val.tag() != value_e.Undef
945
946 op_kind = consts.GetKind(op.op.id)
947
948 if op_kind == Kind.VOp1:
949 # NOTE: glob syntax is supported in ^ ^^ , ,, ! As well as % %% # ##.
950 # Detect has_extglob so that DoUnarySuffixOp doesn't use the fast
951 # shortcut for constant strings.
952 arg_val, has_extglob = self.EvalWordToPattern(op.arg_word)
953 assert arg_val.tag() == value_e.Str
954
955 UP_val = val
956 with tagswitch(val) as case:
957 if case(value_e.Str):
958 val = cast(value.Str, UP_val)
959 s = string_ops.DoUnarySuffixOp(val.s, op.op, arg_val.s,
960 has_extglob)
961 #log('%r %r -> %r', val.s, arg_val.s, s)
962 new_val = value.Str(s) # type: value_t
963
964 elif case(value_e.BashArray, value_e.BashAssoc):
965 # get values
966 if val.tag() == value_e.BashArray:
967 val = cast(value.BashArray, UP_val)
968 values = bash_impl.BashArray_GetValues(val)
969 elif val.tag() == value_e.BashAssoc:
970 val = cast(value.BashAssoc, UP_val)
971 values = bash_impl.BashAssoc_GetValues(val)
972 else:
973 raise AssertionError()
974
975 # ${a[@]#prefix} is VECTORIZED on arrays. YSH should have this too.
976 strs = [
977 string_ops.DoUnarySuffixOp(s, op.op, arg_val.s,
978 has_extglob) for s in values
979 ]
980 new_val = value.BashArray(strs)
981
982 else:
983 raise error.TypeErr(
984 val, 'Unary op expected Str, BashArray, BashAssoc',
985 op.op)
986
987 else:
988 raise AssertionError(Kind_str(op_kind))
989
990 return new_val
991
992 def _PatSub(self, val, op):
993 # type: (value_t, suffix_op.PatSub) -> value_t
994
995 pat_val, has_extglob = self.EvalWordToPattern(op.pat)
996 # Extended globs aren't supported because we only translate * ? etc. to
997 # ERE. I don't think there's a straightforward translation from !(*.py) to
998 # ERE! You would need an engine that supports negation? (Derivatives?)
999 if has_extglob:
1000 e_die('extended globs not supported in ${x//GLOB/}', op.pat)
1001
1002 if op.replace:
1003 replace_val = self.EvalRhsWord(op.replace)
1004 # Can't have an array, so must be a string
1005 assert replace_val.tag() == value_e.Str, replace_val
1006 replace_str = cast(value.Str, replace_val).s
1007 else:
1008 replace_str = ''
1009
1010 # note: doesn't support self.exec_opts.extglob()!
1011 regex, warnings = glob_.GlobToERE(pat_val.s)
1012 if len(warnings):
1013 # TODO:
1014 # - Add 'shopt -s strict_glob' mode and expose warnings.
1015 # "Glob is not in CANONICAL FORM".
1016 # - Propagate location info back to the 'op.pat' word.
1017 pass
1018 #log('regex %r', regex)
1019 replacer = string_ops.GlobReplacer(regex, replace_str, op.slash_tok)
1020
1021 with tagswitch(val) as case2:
1022 if case2(value_e.Str):
1023 str_val = cast(value.Str, val)
1024 s = replacer.Replace(str_val.s, op)
1025 val = value.Str(s)
1026
1027 elif case2(value_e.BashArray, value_e.BashAssoc):
1028 if val.tag() == value_e.BashArray:
1029 array_val = cast(value.BashArray, val)
1030 values = bash_impl.BashArray_GetValues(array_val)
1031 elif val.tag() == value_e.BashAssoc:
1032 assoc_val = cast(value.BashAssoc, val)
1033 values = bash_impl.BashAssoc_GetValues(assoc_val)
1034 else:
1035 raise AssertionError()
1036 strs = [replacer.Replace(s, op) for s in values]
1037 val = value.BashArray(strs)
1038
1039 else:
1040 raise error.TypeErr(
1041 val, 'Pat Sub op expected Str, BashArray, BashAssoc',
1042 op.slash_tok)
1043
1044 return val
1045
1046 def _Slice(self, val, op, var_name, part):
1047 # type: (value_t, suffix_op.Slice, Optional[str], BracedVarSub) -> value_t
1048
1049 begin = self.arith_ev.EvalToBigInt(op.begin)
1050
1051 # Note: bash allows lengths to be negative (with odd semantics), but
1052 # we don't allow that right now.
1053 has_length = False
1054 length = -1
1055 if op.length:
1056 has_length = True
1057 length = self.arith_ev.EvalToInt(op.length)
1058
1059 try:
1060 arg0_val = None # type: value.Str
1061 if var_name is None: # $* or $@
1062 arg0_val = self.mem.GetArg0()
1063 val = _PerformSlice(val, begin, length, has_length, part, arg0_val)
1064 except error.Strict as e:
1065 if self.exec_opts.strict_word_eval():
1066 raise
1067 else:
1068 self.errfmt.PrettyPrintError(e, prefix='warning: ')
1069 with tagswitch(val) as case2:
1070 if case2(value_e.Str):
1071 val = value.Str('')
1072 elif case2(value_e.BashArray):
1073 val = value.BashArray([])
1074 else:
1075 raise NotImplementedError()
1076 return val
1077
1078 def _Nullary(self, val, op, var_name, vsub_token, vsub_state):
1079 # type: (value_t, Token, Optional[str], Token, VarSubState) -> Tuple[value_t, bool]
1080
1081 quoted2 = False
1082 op_id = op.id
1083 if op_id == Id.VOp0_P:
1084 val = self._ProcessUndef(val, vsub_token, vsub_state)
1085 UP_val = val
1086 with tagswitch(val) as case:
1087 if case(value_e.Undef):
1088 result = value.Str('') # type: value_t
1089 elif case(value_e.Str):
1090 str_val = cast(value.Str, UP_val)
1091 prompt = self.prompt_ev.EvalPrompt(str_val.s)
1092 # readline gets rid of these, so we should too.
1093 p = prompt.replace('\x01', '').replace('\x02', '')
1094 result = value.Str(p)
1095 elif case(value_e.BashArray, value_e.BashAssoc):
1096 if val.tag() == value_e.BashArray:
1097 val = cast(value.BashArray, UP_val)
1098 values = [
1099 s for s in bash_impl.BashArray_GetValues(val)
1100 if s is not None
1101 ]
1102 elif val.tag() == value_e.BashAssoc:
1103 val = cast(value.BashAssoc, UP_val)
1104 values = bash_impl.BashAssoc_GetValues(val)
1105 else:
1106 raise AssertionError()
1107
1108 tmp = [
1109 self.prompt_ev.EvalPrompt(s).replace(
1110 '\x01', '').replace('\x02', '') for s in values
1111 ]
1112 result = value.BashArray(tmp)
1113 else:
1114 e_die("Can't use @P on %s" % ui.ValType(val), op)
1115
1116 elif op_id == Id.VOp0_Q:
1117 UP_val = val
1118 with tagswitch(val) as case:
1119 if case(value_e.Undef):
1120 # We need to issue an error when "-o nounset" is enabled.
1121 # Although we do not need to check val for value_e.Undef,
1122 # we call _ProcessUndef for consistency in the error
1123 # message.
1124 self._ProcessUndef(val, vsub_token, vsub_state)
1125
1126 # For unset variables, we do not generate any quoted words.
1127 if vsub_state.array_ref is not None:
1128 result = value.BashArray([])
1129 else:
1130 result = value.Str('')
1131
1132 elif case(value_e.Str):
1133 str_val = cast(value.Str, UP_val)
1134 result = value.Str(j8_lite.MaybeShellEncode(str_val.s))
1135 # oddly, 'echo ${x@Q}' is equivalent to 'echo "${x@Q}"' in
1136 # bash
1137 quoted2 = True
1138 elif case(value_e.BashArray, value_e.BashAssoc):
1139 if val.tag() == value_e.BashArray:
1140 val = cast(value.BashArray, UP_val)
1141 values = [s for s in bash_impl.BashArray_GetValues(val) if s is not None]
1142 elif val.tag() == value_e.BashAssoc:
1143 val = cast(value.BashAssoc, UP_val)
1144 values = bash_impl.BashAssoc_GetValues(val)
1145 else:
1146 raise AssertionError()
1147
1148 tmp = [
1149 # TODO: should use fastfunc.ShellEncode
1150 j8_lite.MaybeShellEncode(s) for s in values
1151 ]
1152 result = value.BashArray(tmp)
1153 else:
1154 e_die("Can't use @Q on %s" % ui.ValType(val), op)
1155
1156 elif op_id == Id.VOp0_a:
1157 val = self._ProcessUndef(val, vsub_token, vsub_state)
1158 UP_val = val
1159 # We're ONLY simluating -a and -A, not -r -x -n for now. See
1160 # spec/ble-idioms.test.sh.
1161 chars = [] # type: List[str]
1162 with tagswitch(vsub_state.h_value) as case:
1163 if case(value_e.BashArray):
1164 chars.append('a')
1165 elif case(value_e.BashAssoc):
1166 chars.append('A')
1167
1168 if var_name is not None: # e.g. ${?@a} is allowed
1169 cell = self.mem.GetCell(var_name)
1170 if cell:
1171 if cell.readonly:
1172 chars.append('r')
1173 if cell.exported:
1174 chars.append('x')
1175 if cell.nameref:
1176 chars.append('n')
1177
1178 count = 1
1179 with tagswitch(val) as case:
1180 if case(value_e.Undef):
1181 count = 0
1182 elif case(value_e.BashArray):
1183 val = cast(value.BashArray, UP_val)
1184 count = bash_impl.BashArray_Count(val)
1185 elif case(value_e.BashAssoc):
1186 val = cast(value.BashAssoc, UP_val)
1187 count = bash_impl.BashAssoc_Count(val)
1188
1189 result = value.BashArray([''.join(chars)] * count)
1190
1191 else:
1192 e_die('Var op %r not implemented' % lexer.TokenVal(op), op)
1193
1194 return result, quoted2
1195
1196 def _WholeArray(self, val, part, quoted, vsub_state):
1197 # type: (value_t, BracedVarSub, bool, VarSubState) -> value_t
1198 op_id = cast(bracket_op.WholeArray, part.bracket_op).op_id
1199
1200 if op_id == Id.Lit_At:
1201 op_str = '@'
1202 vsub_state.join_array = not quoted # ${a[@]} decays but "${a[@]}" doesn't
1203 elif op_id == Id.Arith_Star:
1204 op_str = '*'
1205 vsub_state.join_array = True # both ${a[*]} and "${a[*]}" decay
1206 else:
1207 raise AssertionError(op_id) # unknown
1208
1209 with tagswitch(val) as case2:
1210 if case2(value_e.Undef):
1211 # For an undefined array, we save the token of the array
1212 # reference for the later error message.
1213 vsub_state.array_ref = part.name_tok
1214 elif case2(value_e.Str):
1215 if self.exec_opts.strict_array():
1216 e_die("Can't index string with %s" % op_str,
1217 loc.WordPart(part))
1218 elif case2(value_e.BashArray, value_e.SparseArray,
1219 value_e.BashAssoc):
1220 pass # no-op
1221 else:
1222 # The other YSH types such as List, Dict, and Float are not
1223 # supported. Error messages will be printed later, so we here
1224 # return the unsupported objects without modification.
1225 pass # no-op
1226
1227 return val
1228
1229 def _ArrayIndex(self, val, part, vtest_place):
1230 # type: (value_t, BracedVarSub, VTestPlace) -> value_t
1231 """Process a numeric array index like ${a[i+1]}"""
1232 anode = cast(bracket_op.ArrayIndex, part.bracket_op).expr
1233
1234 UP_val = val
1235 with tagswitch(val) as case2:
1236 if case2(value_e.Undef):
1237 pass # it will be checked later
1238
1239 elif case2(value_e.Str):
1240 # Bash treats any string as an array, so we can't add our own
1241 # behavior here without making valid OSH invalid bash.
1242 e_die("Can't index string %r with integer" % part.var_name,
1243 part.name_tok)
1244
1245 elif case2(value_e.BashArray):
1246 array_val = cast(value.BashArray, UP_val)
1247 index = self.arith_ev.EvalToInt(anode)
1248 vtest_place.index = a_index.Int(index)
1249
1250 s, error_code = bash_impl.BashArray_GetElement(
1251 array_val, index)
1252 if error_code == error_code_e.IndexOutOfRange:
1253 # Note: Bash outputs warning but does not make it a real
1254 # error. We follow the Bash behavior here.
1255 self.errfmt.Print_(
1256 "Index %d out of bounds for array of length %d" %
1257 (index, bash_impl.BashArray_Length(array_val)),
1258 blame_loc=part.name_tok)
1259
1260 if s is None:
1261 val = value.Undef
1262 else:
1263 val = value.Str(s)
1264
1265 elif case2(value_e.SparseArray):
1266 sparse_val = cast(value.SparseArray, UP_val)
1267 big_index = self.arith_ev.EvalToBigInt(anode)
1268 vtest_place.index = a_index.Int(mops.BigTruncate(big_index))
1269
1270 s, error_code = bash_impl.SparseArray_GetElement(
1271 sparse_val, big_index)
1272 if error_code == error_code_e.IndexOutOfRange:
1273 # Note: Bash outputs warning but does not make it a real
1274 # error. We follow the Bash behavior here.
1275 big_length = bash_impl.SparseArray_Length(sparse_val)
1276 self.errfmt.Print_(
1277 "Index %s out of bounds for array of length %s" %
1278 (mops.ToStr(big_index), mops.ToStr(big_length)),
1279 blame_loc=part.name_tok)
1280
1281 if s is None:
1282 val = value.Undef
1283 else:
1284 val = value.Str(s)
1285
1286 elif case2(value_e.BashAssoc):
1287 assoc_val = cast(value.BashAssoc, UP_val)
1288 # Location could also be attached to bracket_op? But
1289 # arith_expr.VarSub works OK too
1290 key = self.arith_ev.EvalWordToString(
1291 anode, blame_loc=location.TokenForArith(anode))
1292
1293 vtest_place.index = a_index.Str(key) # out param
1294 s = bash_impl.BashAssoc_GetElement(assoc_val, key)
1295
1296 if s is None:
1297 val = value.Undef
1298 else:
1299 val = value.Str(s)
1300
1301 else:
1302 raise error.TypeErr(val,
1303 'Index op expected BashArray, BashAssoc',
1304 loc.WordPart(part))
1305
1306 return val
1307
1308 def _EvalDoubleQuoted(self, parts, part_vals):
1309 # type: (List[word_part_t], List[part_value_t]) -> None
1310 """Evaluate parts of a DoubleQuoted part.
1311
1312 Args:
1313 part_vals: output param to append to.
1314 """
1315 # Example of returning array:
1316 # $ a=(1 2); b=(3); $ c=(4 5)
1317 # $ argv "${a[@]}${b[@]}${c[@]}"
1318 # ['1', '234', '5']
1319 #
1320 # Example of multiple parts
1321 # $ argv "${a[@]}${undef[@]:-${c[@]}}"
1322 # ['1', '24', '5']
1323
1324 # Special case for "". The parser outputs (DoubleQuoted []), instead
1325 # of (DoubleQuoted [Literal '']). This is better but it means we
1326 # have to check for it.
1327 if len(parts) == 0:
1328 v = Piece('', True, False)
1329 part_vals.append(v)
1330 return
1331
1332 for p in parts:
1333 self._EvalWordPart(p, part_vals, QUOTED)
1334
1335 def EvalDoubleQuotedToString(self, dq_part):
1336 # type: (DoubleQuoted) -> str
1337 """For double quoted strings in YSH expressions.
1338
1339 Example: var x = "$foo-${foo}"
1340 """
1341 part_vals = [] # type: List[part_value_t]
1342 self._EvalDoubleQuoted(dq_part.parts, part_vals)
1343 return self._ConcatPartVals(part_vals, dq_part.left)
1344
1345 def _DecayArray(self, val):
1346 # type: (value.BashArray) -> value.Str
1347 """Decay $* to a string."""
1348 assert val.tag() == value_e.BashArray, val
1349 sep = self.splitter.GetJoinChar()
1350 tmp = [s for s in bash_impl.BashArray_GetValues(val) if s is not None]
1351 return value.Str(sep.join(tmp))
1352
1353 def _ProcessUndef(self, val, name_tok, vsub_state):
1354 # type: (value_t, Token, VarSubState) -> value_t
1355 assert name_tok is not None
1356
1357 if val.tag() != value_e.Undef:
1358 return val
1359
1360 if vsub_state.array_ref is not None:
1361 array_tok = vsub_state.array_ref
1362 if self.exec_opts.nounset():
1363 e_die('Undefined array %r' % lexer.TokenVal(array_tok),
1364 array_tok)
1365 else:
1366 return value.BashArray([])
1367 else:
1368 if self.exec_opts.nounset():
1369 tok_str = lexer.TokenVal(name_tok)
1370 name = tok_str[1:] if tok_str.startswith('$') else tok_str
1371 e_die('Undefined variable %r' % name, name_tok)
1372 else:
1373 return value.Str('')
1374
1375 def _EvalBracketOp(self, val, part, quoted, vsub_state, vtest_place):
1376 # type: (value_t, BracedVarSub, bool, VarSubState, VTestPlace) -> value_t
1377
1378 if part.bracket_op:
1379 with tagswitch(part.bracket_op) as case:
1380 if case(bracket_op_e.WholeArray):
1381 val = self._WholeArray(val, part, quoted, vsub_state)
1382
1383 elif case(bracket_op_e.ArrayIndex):
1384 val = self._ArrayIndex(val, part, vtest_place)
1385
1386 else:
1387 raise AssertionError(part.bracket_op.tag())
1388
1389 else: # no bracket op
1390 var_name = vtest_place.name
1391 if (var_name is not None and
1392 val.tag() in (value_e.BashArray, value_e.BashAssoc)):
1393 if ShouldArrayDecay(var_name, self.exec_opts,
1394 not (part.prefix_op or part.suffix_op)):
1395 # for ${BASH_SOURCE}, etc.
1396 val = DecayArray(val)
1397 else:
1398 e_die(
1399 "Array %r can't be referred to as a scalar (without @ or *)"
1400 % var_name, loc.WordPart(part))
1401
1402 return val
1403
1404 def _VarRefValue(self, part, quoted, vsub_state, vtest_place):
1405 # type: (BracedVarSub, bool, VarSubState, VTestPlace) -> value_t
1406 """Duplicates some logic from _EvalBracedVarSub, but returns a
1407 value_t."""
1408
1409 # 1. Evaluate from (var_name, var_num, token Id) -> value
1410 if part.name_tok.id == Id.VSub_Name:
1411 vtest_place.name = part.var_name
1412 val = self.mem.GetValue(part.var_name)
1413
1414 elif part.name_tok.id == Id.VSub_Number:
1415 var_num = int(part.var_name)
1416 val = self._EvalVarNum(var_num)
1417
1418 else:
1419 # $* decays
1420 val = self._EvalSpecialVar(part.name_tok.id, quoted, vsub_state)
1421
1422 # update h-value (i.e., the holder of the current value)
1423 vsub_state.h_value = val
1424
1425 # We don't need var_index because it's only for L-Values of test ops?
1426 if self.exec_opts.eval_unsafe_arith():
1427 val = self._EvalBracketOp(val, part, quoted, vsub_state,
1428 vtest_place)
1429 else:
1430 with state.ctx_Option(self.mutable_opts,
1431 [option_i._allow_command_sub], False):
1432 val = self._EvalBracketOp(val, part, quoted, vsub_state,
1433 vtest_place)
1434
1435 return val
1436
1437 def _EvalBracedVarSub(self, part, part_vals, quoted):
1438 # type: (BracedVarSub, List[part_value_t], bool) -> None
1439 """
1440 Args:
1441 part_vals: output param to append to.
1442 """
1443 # We have different operators that interact in a non-obvious order.
1444 #
1445 # 1. bracket_op: value -> value, with side effect on vsub_state
1446 #
1447 # 2. prefix_op
1448 # a. length ${#x}: value -> value
1449 # b. var ref ${!ref}: can expand to an array
1450 #
1451 # 3. suffix_op:
1452 # a. no operator: you have a value
1453 # b. Test: value -> part_value[]
1454 # c. Other Suffix: value -> value
1455 #
1456 # 4. Process vsub_state.join_array here before returning.
1457 #
1458 # These cases are hard to distinguish:
1459 # - ${!prefix@} prefix query
1460 # - ${!array[@]} keys
1461 # - ${!ref} named reference
1462 # - ${!ref[0]} named reference
1463 #
1464 # I think we need several stages:
1465 #
1466 # 1. value: name, number, special, prefix query
1467 # 2. bracket_op
1468 # 3. prefix length -- this is TERMINAL
1469 # 4. indirection? Only for some of the ! cases
1470 # 5. string transformation suffix ops like ##
1471 # 6. test op
1472 # 7. vsub_state.join_array
1473
1474 # vsub_state.join_array is for joining "${a[*]}" and unquoted ${a[@]} AFTER
1475 # suffix ops are applied. If we take the length with a prefix op, the
1476 # distinction is ignored.
1477
1478 var_name = None # type: Optional[str] # used throughout the function
1479 vtest_place = VTestPlace(var_name, None) # For ${foo=default}
1480 vsub_state = VarSubState.CreateNull() # for $*, ${a[*]}, etc.
1481
1482 # 1. Evaluate from (var_name, var_num, token Id) -> value
1483 if part.name_tok.id == Id.VSub_Name:
1484 # Handle ${!prefix@} first, since that looks at names and not values
1485 # Do NOT handle ${!A[@]@a} here!
1486 if (part.prefix_op is not None and part.bracket_op is None and
1487 part.suffix_op is not None and
1488 part.suffix_op.tag() == suffix_op_e.Nullary):
1489 nullary_op = cast(Token, part.suffix_op)
1490 # ${!x@} but not ${!x@P}
1491 if consts.GetKind(nullary_op.id) == Kind.VOp3:
1492 names = self.mem.VarNamesStartingWith(part.var_name)
1493 names.sort()
1494
1495 if quoted and nullary_op.id == Id.VOp3_At:
1496 part_vals.append(part_value.Array(names))
1497 else:
1498 sep = self.splitter.GetJoinChar()
1499 part_vals.append(Piece(sep.join(names), quoted, True))
1500 return # EARLY RETURN
1501
1502 var_name = part.var_name
1503 vtest_place.name = var_name # for _ApplyTestOp
1504
1505 val = self.mem.GetValue(var_name)
1506
1507 elif part.name_tok.id == Id.VSub_Number:
1508 var_num = int(part.var_name)
1509 val = self._EvalVarNum(var_num)
1510 else:
1511 # $* decays
1512 val = self._EvalSpecialVar(part.name_tok.id, quoted, vsub_state)
1513
1514 suffix_op_ = part.suffix_op
1515 if suffix_op_:
1516 UP_op = suffix_op_
1517 vsub_state.h_value = val
1518
1519 # 2. Bracket Op
1520 val = self._EvalBracketOp(val, part, quoted, vsub_state, vtest_place)
1521
1522 if part.prefix_op:
1523 if part.prefix_op.id == Id.VSub_Pound: # ${#var} for length
1524 # undef -> '' BEFORE length
1525 val = self._ProcessUndef(val, part.name_tok, vsub_state)
1526
1527 n = self._Count(val, part.name_tok)
1528 part_vals.append(Piece(str(n), quoted, False))
1529 return # EARLY EXIT: nothing else can come after length
1530
1531 elif part.prefix_op.id == Id.VSub_Bang:
1532 if (part.bracket_op and
1533 part.bracket_op.tag() == bracket_op_e.WholeArray and
1534 not suffix_op_):
1535 # undef -> empty array
1536 val = self._ProcessUndef(val, part.name_tok, vsub_state)
1537
1538 # ${!array[@]} to get indices/keys
1539 val = self._Keys(val, part.name_tok)
1540 # already set vsub_State.join_array ABOVE
1541 else:
1542 # Process ${!ref}. SURPRISE: ${!a[0]} is an indirect expansion unlike
1543 # ${!a[@]} !
1544 # ${!ref} can expand into an array if ref='array[@]'
1545
1546 # Clear it now that we have a var ref
1547 vtest_place.name = None
1548 vtest_place.index = None
1549
1550 val = self._EvalVarRef(val, part.name_tok, quoted,
1551 vsub_state, vtest_place)
1552
1553 else:
1554 raise AssertionError(part.prefix_op)
1555
1556 quoted2 = False # another bit for @Q
1557 if suffix_op_:
1558 op = suffix_op_ # could get rid of this alias
1559
1560 with tagswitch(suffix_op_) as case:
1561 if case(suffix_op_e.Nullary):
1562 op = cast(Token, UP_op)
1563 val, quoted2 = self._Nullary(val, op, var_name,
1564 part.name_tok, vsub_state)
1565
1566 elif case(suffix_op_e.Unary):
1567 op = cast(suffix_op.Unary, UP_op)
1568 if consts.GetKind(op.op.id) == Kind.VTest:
1569 # Note: _ProcessUndef (i.e., the conversion of undef ->
1570 # '') is not applied to the VTest operators such as
1571 # ${a:-def}, ${a+set}, etc.
1572 if self._ApplyTestOp(val, op, quoted, part_vals,
1573 vtest_place, part.name_tok,
1574 vsub_state):
1575 # e.g. to evaluate ${undef:-'default'}, we already appended
1576 # what we need
1577 return
1578
1579 else:
1580 # Other suffix: value -> value
1581 val = self._ProcessUndef(val, part.name_tok,
1582 vsub_state)
1583 val = self._ApplyUnarySuffixOp(val, op)
1584
1585 elif case(suffix_op_e.PatSub): # PatSub, vectorized
1586 op = cast(suffix_op.PatSub, UP_op)
1587 val = self._ProcessUndef(val, part.name_tok, vsub_state)
1588 val = self._PatSub(val, op)
1589
1590 elif case(suffix_op_e.Slice):
1591 op = cast(suffix_op.Slice, UP_op)
1592 val = self._ProcessUndef(val, part.name_tok, vsub_state)
1593 val = self._Slice(val, op, var_name, part)
1594
1595 elif case(suffix_op_e.Static):
1596 op = cast(suffix_op.Static, UP_op)
1597 e_die('Not implemented', op.tok)
1598
1599 else:
1600 raise AssertionError()
1601 else:
1602 val = self._ProcessUndef(val, part.name_tok, vsub_state)
1603
1604 # After applying suffixes, process join_array here.
1605 UP_val = val
1606 if val.tag() == value_e.BashArray:
1607 array_val = cast(value.BashArray, UP_val)
1608 if vsub_state.join_array:
1609 val = self._DecayArray(array_val)
1610 else:
1611 val = array_val
1612
1613 # For example, ${a} evaluates to value.Str(), but we want a
1614 # Piece().
1615 part_val = _ValueToPartValue(val, quoted or quoted2, part)
1616 part_vals.append(part_val)
1617
1618 def _ConcatPartVals(self, part_vals, location):
1619 # type: (List[part_value_t], loc_t) -> str
1620
1621 strs = [] # type: List[str]
1622 for part_val in part_vals:
1623 UP_part_val = part_val
1624 with tagswitch(part_val) as case:
1625 if case(part_value_e.String):
1626 part_val = cast(Piece, UP_part_val)
1627 s = part_val.s
1628
1629 elif case(part_value_e.Array):
1630 part_val = cast(part_value.Array, UP_part_val)
1631 if self.exec_opts.strict_array():
1632 # Examples: echo f > "$@"; local foo="$@"
1633 e_die("Illegal array word part (strict_array)",
1634 location)
1635 else:
1636 # It appears to not respect IFS
1637 # TODO: eliminate double join()?
1638 tmp = [s for s in part_val.strs if s is not None]
1639 s = ' '.join(tmp)
1640
1641 else:
1642 raise AssertionError()
1643
1644 strs.append(s)
1645
1646 return ''.join(strs)
1647
1648 def EvalBracedVarSubToString(self, part):
1649 # type: (BracedVarSub) -> str
1650 """For double quoted strings in YSH expressions.
1651
1652 Example: var x = "$foo-${foo}"
1653 """
1654 part_vals = [] # type: List[part_value_t]
1655 self._EvalBracedVarSub(part, part_vals, False)
1656 # blame ${ location
1657 return self._ConcatPartVals(part_vals, part.left)
1658
1659 def _EvalSimpleVarSub(self, part, part_vals, quoted):
1660 # type: (SimpleVarSub, List[part_value_t], bool) -> None
1661
1662 token = part.tok
1663
1664 vsub_state = VarSubState.CreateNull()
1665
1666 # 1. Evaluate from (var_name, var_num, Token) -> defined, value
1667 if token.id == Id.VSub_DollarName:
1668 var_name = lexer.LazyStr(token)
1669 # TODO: Special case for LINENO
1670 val = self.mem.GetValue(var_name)
1671 if val.tag() in (value_e.BashArray, value_e.BashAssoc):
1672 if ShouldArrayDecay(var_name, self.exec_opts):
1673 # for $BASH_SOURCE, etc.
1674 val = DecayArray(val)
1675 else:
1676 e_die(
1677 "Array %r can't be referred to as a scalar (without @ or *)"
1678 % var_name, token)
1679
1680 elif token.id == Id.VSub_Number:
1681 var_num = int(lexer.LazyStr(token))
1682 val = self._EvalVarNum(var_num)
1683
1684 else:
1685 val = self._EvalSpecialVar(token.id, quoted, vsub_state)
1686
1687 #log('SIMPLE %s', part)
1688 val = self._ProcessUndef(val, token, vsub_state)
1689 UP_val = val
1690 if val.tag() == value_e.BashArray:
1691 array_val = cast(value.BashArray, UP_val)
1692 if vsub_state.join_array:
1693 val = self._DecayArray(array_val)
1694 else:
1695 val = array_val
1696
1697 v = _ValueToPartValue(val, quoted, part)
1698 part_vals.append(v)
1699
1700 def EvalSimpleVarSubToString(self, node):
1701 # type: (SimpleVarSub) -> str
1702 """For double quoted strings in YSH expressions.
1703
1704 Example: var x = "$foo-${foo}"
1705 """
1706 part_vals = [] # type: List[part_value_t]
1707 self._EvalSimpleVarSub(node, part_vals, False)
1708 return self._ConcatPartVals(part_vals, node.tok)
1709
1710 def _EvalExtGlob(self, part, part_vals):
1711 # type: (word_part.ExtGlob, List[part_value_t]) -> None
1712 """Evaluate @($x|'foo'|$(hostname)) and flatten it."""
1713 op = part.op
1714 if op.id == Id.ExtGlob_Comma:
1715 op_str = '@('
1716 else:
1717 op_str = lexer.LazyStr(op)
1718 # Do NOT split these.
1719 part_vals.append(Piece(op_str, False, False))
1720
1721 for i, w in enumerate(part.arms):
1722 if i != 0:
1723 part_vals.append(Piece('|', False, False)) # separator
1724 # FLATTEN the tree of extglob "arms".
1725 self._EvalWordToParts(w, part_vals, EXTGLOB_NESTED)
1726 part_vals.append(Piece(')', False, False)) # closing )
1727
1728 def _TranslateExtGlob(self, part_vals, w, glob_parts, fnmatch_parts):
1729 # type: (List[part_value_t], CompoundWord, List[str], List[str]) -> None
1730 """Translate a flattened WORD with an ExtGlob part to string patterns.
1731
1732 We need both glob and fnmatch patterns. _EvalExtGlob does the
1733 flattening.
1734 """
1735 for i, part_val in enumerate(part_vals):
1736 UP_part_val = part_val
1737 with tagswitch(part_val) as case:
1738 if case(part_value_e.String):
1739 part_val = cast(Piece, UP_part_val)
1740 if part_val.quoted and not self.exec_opts.noglob():
1741 s = glob_.GlobEscape(part_val.s)
1742 else:
1743 # e.g. the @( and | in @(foo|bar) aren't quoted
1744 s = part_val.s
1745 glob_parts.append(s)
1746 fnmatch_parts.append(s) # from _EvalExtGlob()
1747
1748 elif case(part_value_e.Array):
1749 # Disallow array
1750 e_die(
1751 "Extended globs and arrays can't appear in the same word",
1752 w)
1753
1754 elif case(part_value_e.ExtGlob):
1755 part_val = cast(part_value.ExtGlob, UP_part_val)
1756 # keep appending fnmatch_parts, but repplace glob_parts with '*'
1757 self._TranslateExtGlob(part_val.part_vals, w, [],
1758 fnmatch_parts)
1759 glob_parts.append('*')
1760
1761 else:
1762 raise AssertionError()
1763
1764 def _EvalWordPart(self, part, part_vals, flags):
1765 # type: (word_part_t, List[part_value_t], int) -> None
1766 """Evaluate a word part, appending to part_vals
1767
1768 Called by _EvalWordToParts, EvalWordToString, and _EvalDoubleQuoted.
1769 """
1770 quoted = bool(flags & QUOTED)
1771 is_subst = bool(flags & IS_SUBST)
1772
1773 UP_part = part
1774 with tagswitch(part) as case:
1775 if case(word_part_e.ShArrayLiteral):
1776 part = cast(ShArrayLiteral, UP_part)
1777 e_die("Unexpected array literal", loc.WordPart(part))
1778 elif case(word_part_e.BashAssocLiteral):
1779 part = cast(word_part.BashAssocLiteral, UP_part)
1780 e_die("Unexpected associative array literal",
1781 loc.WordPart(part))
1782
1783 elif case(word_part_e.Literal):
1784 part = cast(Token, UP_part)
1785 # Split if it's in a substitution.
1786 # That is: echo is not split, but ${foo:-echo} is split
1787 v = Piece(lexer.LazyStr(part), quoted, is_subst)
1788 part_vals.append(v)
1789
1790 elif case(word_part_e.EscapedLiteral):
1791 part = cast(word_part.EscapedLiteral, UP_part)
1792 v = Piece(part.ch, True, False)
1793 part_vals.append(v)
1794
1795 elif case(word_part_e.SingleQuoted):
1796 part = cast(SingleQuoted, UP_part)
1797 v = Piece(part.sval, True, False)
1798 part_vals.append(v)
1799
1800 elif case(word_part_e.DoubleQuoted):
1801 part = cast(DoubleQuoted, UP_part)
1802 self._EvalDoubleQuoted(part.parts, part_vals)
1803
1804 elif case(word_part_e.CommandSub):
1805 part = cast(CommandSub, UP_part)
1806 id_ = part.left_token.id
1807 if id_ in (Id.Left_DollarParen, Id.Left_AtParen,
1808 Id.Left_Backtick):
1809 sv = self._EvalCommandSub(part,
1810 quoted) # type: part_value_t
1811
1812 elif id_ in (Id.Left_ProcSubIn, Id.Left_ProcSubOut):
1813 sv = self._EvalProcessSub(part)
1814
1815 else:
1816 raise AssertionError(id_)
1817
1818 part_vals.append(sv)
1819
1820 elif case(word_part_e.SimpleVarSub):
1821 part = cast(SimpleVarSub, UP_part)
1822 self._EvalSimpleVarSub(part, part_vals, quoted)
1823
1824 elif case(word_part_e.BracedVarSub):
1825 part = cast(BracedVarSub, UP_part)
1826 self._EvalBracedVarSub(part, part_vals, quoted)
1827
1828 elif case(word_part_e.TildeSub):
1829 part = cast(word_part.TildeSub, UP_part)
1830 # We never parse a quoted string into a TildeSub.
1831 assert not quoted
1832 s = self.tilde_ev.Eval(part)
1833 v = Piece(s, True, False) # NOT split even when unquoted!
1834 part_vals.append(v)
1835
1836 elif case(word_part_e.ArithSub):
1837 part = cast(word_part.ArithSub, UP_part)
1838 num = self.arith_ev.EvalToBigInt(part.anode)
1839 v = Piece(mops.ToStr(num), quoted, not quoted)
1840 part_vals.append(v)
1841
1842 elif case(word_part_e.ExtGlob):
1843 part = cast(word_part.ExtGlob, UP_part)
1844 #if not self.exec_opts.extglob():
1845 # die() # disallow at runtime? Don't just decay
1846
1847 # Create a node to hold the flattened tree. The caller decides whether
1848 # to pass it to fnmatch() or replace it with '*' and pass it to glob().
1849 part_vals2 = [] # type: List[part_value_t]
1850 self._EvalExtGlob(part, part_vals2) # flattens tree
1851 part_vals.append(part_value.ExtGlob(part_vals2))
1852
1853 elif case(word_part_e.BashRegexGroup):
1854 part = cast(word_part.BashRegexGroup, UP_part)
1855
1856 part_vals.append(Piece('(', False, False)) # not quoted
1857 if part.child:
1858 self._EvalWordToParts(part.child, part_vals, 0)
1859 part_vals.append(Piece(')', False, False))
1860
1861 elif case(word_part_e.Splice):
1862 part = cast(word_part.Splice, UP_part)
1863 val = self.mem.GetValue(part.var_name)
1864
1865 strs = self.expr_ev.SpliceValue(val, part)
1866 part_vals.append(part_value.Array(strs))
1867
1868 elif case(word_part_e.ExprSub):
1869 part = cast(word_part.ExprSub, UP_part)
1870 part_val = self.expr_ev.EvalExprSub(part)
1871 part_vals.append(part_val)
1872
1873 elif case(word_part_e.ZshVarSub):
1874 part = cast(word_part.ZshVarSub, UP_part)
1875 e_die("ZSH var subs are parsed, but can't be evaluated",
1876 part.left)
1877
1878 else:
1879 raise AssertionError(part.tag())
1880
1881 def _EvalRhsWordToParts(self, w, part_vals, eval_flags=0):
1882 # type: (rhs_word_t, List[part_value_t], int) -> None
1883 quoted = bool(eval_flags & QUOTED)
1884
1885 UP_w = w
1886 with tagswitch(w) as case:
1887 if case(rhs_word_e.Empty):
1888 part_vals.append(Piece('', quoted, not quoted))
1889
1890 elif case(rhs_word_e.Compound):
1891 w = cast(CompoundWord, UP_w)
1892 self._EvalWordToParts(w, part_vals, eval_flags=eval_flags)
1893
1894 else:
1895 raise AssertionError()
1896
1897 def _EvalWordToParts(self, w, part_vals, eval_flags=0):
1898 # type: (CompoundWord, List[part_value_t], int) -> None
1899 """Helper for EvalRhsWord, EvalWordSequence, etc.
1900
1901 Returns:
1902 Appends to part_vals. Note that this is a TREE.
1903 """
1904 # Does the word have an extended glob? This is a special case because
1905 # of the way we use glob() and then fnmatch(..., FNM_EXTMATCH) to
1906 # implement extended globs. It's hard to carry that extra information
1907 # all the way past the word splitting stage.
1908
1909 # OSH semantic limitations: If a word has an extended glob part, then
1910 # 1. It can't have an array
1911 # 2. Word splitting of unquoted words isn't respected
1912
1913 word_part_vals = [] # type: List[part_value_t]
1914 has_extglob = False
1915 for p in w.parts:
1916 if p.tag() == word_part_e.ExtGlob:
1917 has_extglob = True
1918 self._EvalWordPart(p, word_part_vals, eval_flags)
1919
1920 # Caller REQUESTED extglob evaluation, AND we parsed word_part.ExtGlob()
1921 if has_extglob:
1922 if bool(eval_flags & EXTGLOB_FILES):
1923 # Treat the WHOLE word as a pattern. We need to TWO VARIANTS of the
1924 # word because of the way we use libc:
1925 # 1. With '*' for extglob parts
1926 # 2. With _EvalExtGlob() for extglob parts
1927
1928 glob_parts = [] # type: List[str]
1929 fnmatch_parts = [] # type: List[str]
1930 self._TranslateExtGlob(word_part_vals, w, glob_parts,
1931 fnmatch_parts)
1932
1933 #log('word_part_vals %s', word_part_vals)
1934 glob_pat = ''.join(glob_parts)
1935 fnmatch_pat = ''.join(fnmatch_parts)
1936 #log("glob %s fnmatch %s", glob_pat, fnmatch_pat)
1937
1938 results = [] # type: List[str]
1939 n = self.globber.ExpandExtended(glob_pat, fnmatch_pat, results)
1940 if n < 0:
1941 raise error.FailGlob(
1942 'Extended glob %r matched no files' % fnmatch_pat, w)
1943
1944 part_vals.append(part_value.Array(results))
1945 elif bool(eval_flags & EXTGLOB_NESTED):
1946 # We only glob at the TOP level of @(nested|@(pattern))
1947 part_vals.extend(word_part_vals)
1948 else:
1949 # e.g. simple_word_eval, assignment builtin
1950 e_die('Extended glob not allowed in this word', w)
1951 else:
1952 part_vals.extend(word_part_vals)
1953
1954 def _PartValsToString(self, part_vals, w, eval_flags, strs):
1955 # type: (List[part_value_t], CompoundWord, int, List[str]) -> None
1956 """Helper for EvalWordToString, similar to _ConcatPartVals() above.
1957
1958 Note: arg 'w' could just be a span ID
1959 """
1960 for part_val in part_vals:
1961 UP_part_val = part_val
1962 with tagswitch(part_val) as case:
1963 if case(part_value_e.String):
1964 part_val = cast(Piece, UP_part_val)
1965 s = part_val.s
1966 if part_val.quoted:
1967 if eval_flags & QUOTE_FNMATCH:
1968 # [[ foo == */"*".py ]] or case (*.py) or ${x%*.py} or ${x//*.py/}
1969 s = glob_.GlobEscape(s)
1970 elif eval_flags & QUOTE_ERE:
1971 s = glob_.ExtendedRegexEscape(s)
1972 strs.append(s)
1973
1974 elif case(part_value_e.Array):
1975 part_val = cast(part_value.Array, UP_part_val)
1976 if self.exec_opts.strict_array():
1977 # Examples: echo f > "$@"; local foo="$@"
1978
1979 # TODO: This attributes too coarsely, to the word rather than the
1980 # parts. Problem: the word is a TREE of parts, but we only have a
1981 # flat list of part_vals. The only case where we really get arrays
1982 # is "$@", "${a[@]}", "${a[@]//pat/replace}", etc.
1983 e_die(
1984 "This word should yield a string, but it contains an array",
1985 w)
1986
1987 # TODO: Maybe add detail like this.
1988 #e_die('RHS of assignment should only have strings. '
1989 # 'To assign arrays, use b=( "${a[@]}" )')
1990 else:
1991 # It appears to not respect IFS
1992 tmp = [s for s in part_val.strs if s is not None]
1993 s = ' '.join(tmp) # TODO: eliminate double join()?
1994 strs.append(s)
1995
1996 elif case(part_value_e.ExtGlob):
1997 part_val = cast(part_value.ExtGlob, UP_part_val)
1998
1999 # Extended globs are only allowed where we expect them!
2000 if not bool(eval_flags & QUOTE_FNMATCH):
2001 e_die('extended glob not allowed in this word', w)
2002
2003 # recursive call
2004 self._PartValsToString(part_val.part_vals, w, eval_flags,
2005 strs)
2006
2007 else:
2008 raise AssertionError()
2009
2010 def EvalWordToString(self, UP_w, eval_flags=0):
2011 # type: (word_t, int) -> value.Str
2012 """Given a word, return a string.
2013
2014 Flags can contain a quoting algorithm.
2015 """
2016 assert UP_w.tag() == word_e.Compound, UP_w
2017 w = cast(CompoundWord, UP_w)
2018
2019 if eval_flags == 0: # QUOTE_FNMATCH etc. breaks optimization
2020 fast_str = word_.FastStrEval(w)
2021 if fast_str is not None:
2022 return value.Str(fast_str)
2023
2024 # Could we additionally optimize a=$b, if we know $b isn't an array
2025 # etc.?
2026
2027 # Note: these empty lists are hot in fib benchmark
2028
2029 part_vals = [] # type: List[part_value_t]
2030 for p in w.parts:
2031 # this doesn't use eval_flags, which is slightly confusing
2032 self._EvalWordPart(p, part_vals, 0)
2033
2034 strs = [] # type: List[str]
2035 self._PartValsToString(part_vals, w, eval_flags, strs)
2036 return value.Str(''.join(strs))
2037
2038 def EvalWordToPattern(self, UP_w):
2039 # type: (rhs_word_t) -> Tuple[value.Str, bool]
2040 """Like EvalWordToString, but returns whether we got ExtGlob."""
2041 if UP_w.tag() == rhs_word_e.Empty:
2042 return value.Str(''), False
2043
2044 assert UP_w.tag() == rhs_word_e.Compound, UP_w
2045 w = cast(CompoundWord, UP_w)
2046
2047 has_extglob = False
2048 part_vals = [] # type: List[part_value_t]
2049 for p in w.parts:
2050 # this doesn't use eval_flags, which is slightly confusing
2051 self._EvalWordPart(p, part_vals, 0)
2052 if p.tag() == word_part_e.ExtGlob:
2053 has_extglob = True
2054
2055 strs = [] # type: List[str]
2056 self._PartValsToString(part_vals, w, QUOTE_FNMATCH, strs)
2057 return value.Str(''.join(strs)), has_extglob
2058
2059 def EvalForPlugin(self, w):
2060 # type: (CompoundWord) -> value.Str
2061 """Wrapper around EvalWordToString that prevents errors.
2062
2063 Runtime errors like $(( 1 / 0 )) and mutating $? like $(exit 42)
2064 are handled here.
2065
2066 Similar to ExprEvaluator.PluginCall().
2067 """
2068 with state.ctx_Registers(self.mem): # to "sandbox" $? and $PIPESTATUS
2069 try:
2070 val = self.EvalWordToString(w)
2071 except error.FatalRuntime as e:
2072 val = value.Str('<Runtime error: %s>' % e.UserErrorString())
2073
2074 except (IOError, OSError) as e:
2075 val = value.Str('<I/O error: %s>' % pyutil.strerror(e))
2076
2077 except KeyboardInterrupt:
2078 val = value.Str('<Ctrl-C>')
2079
2080 return val
2081
2082 def EvalRhsWord(self, UP_w):
2083 # type: (rhs_word_t) -> value_t
2084 """Used for RHS of assignment.
2085
2086 There is no splitting.
2087 """
2088 if UP_w.tag() == rhs_word_e.Empty:
2089 return value.Str('')
2090
2091 assert UP_w.tag() == word_e.Compound, UP_w
2092 w = cast(CompoundWord, UP_w)
2093
2094 if len(w.parts) == 1:
2095 part0 = w.parts[0]
2096 UP_part0 = part0
2097 tag = part0.tag()
2098 # Special case for a=(1 2). ShArrayLiteral won't appear in words that
2099 # don't look like assignments.
2100 if tag == word_part_e.ShArrayLiteral:
2101 part0 = cast(ShArrayLiteral, UP_part0)
2102 array_words = part0.words
2103 words = braces.BraceExpandWords(array_words)
2104 strs = self.EvalWordSequence(words)
2105 return value.BashArray(strs)
2106
2107 if tag == word_part_e.BashAssocLiteral:
2108 part0 = cast(word_part.BashAssocLiteral, UP_part0)
2109 d = NewDict() # type: Dict[str, str]
2110 for pair in part0.pairs:
2111 k = self.EvalWordToString(pair.key)
2112 v = self.EvalWordToString(pair.value)
2113 d[k.s] = v.s
2114 return value.BashAssoc(d)
2115
2116 # If RHS doesn't look like a=( ... ), then it must be a string.
2117 return self.EvalWordToString(w)
2118
2119 def _EvalWordFrame(self, frame, argv):
2120 # type: (List[Piece], List[str]) -> None
2121 all_empty = True
2122 all_quoted = True
2123 any_quoted = False
2124
2125 #log('--- frame %s', frame)
2126
2127 for piece in frame:
2128 if len(piece.s):
2129 all_empty = False
2130
2131 if piece.quoted:
2132 any_quoted = True
2133 else:
2134 all_quoted = False
2135
2136 # Elision of ${empty}${empty} but not $empty"$empty" or $empty""
2137 if all_empty and not any_quoted:
2138 return
2139
2140 # If every frag is quoted, e.g. "$a$b" or any part in "${a[@]}"x, then
2141 # don't do word splitting or globbing.
2142 if all_quoted:
2143 tmp = [piece.s for piece in frame]
2144 a = ''.join(tmp)
2145 argv.append(a)
2146 return
2147
2148 will_glob = not self.exec_opts.noglob()
2149
2150 if 0:
2151 log('---')
2152 log('FRAME')
2153 for i, piece in enumerate(frame):
2154 log('(%d) %s', i, piece)
2155 log('')
2156
2157 # Array of strings, some of which are BOTH IFS-escaped and GLOB escaped!
2158 frags = [] # type: List[str]
2159 for piece in frame:
2160 if will_glob and piece.quoted:
2161 frag = glob_.GlobEscape(piece.s)
2162 else:
2163 # If we have a literal \, then we turn it into \\\\.
2164 # Splitting takes \\\\ -> \\
2165 # Globbing takes \\ to \ if it doesn't match
2166 frag = _BackslashEscape(piece.s)
2167
2168 if piece.do_split:
2169 frag = _BackslashEscape(frag)
2170 else:
2171 frag = self.splitter.Escape(frag)
2172
2173 frags.append(frag)
2174
2175 if 0:
2176 log('---')
2177 log('FRAGS')
2178 for i, frag in enumerate(frags):
2179 log('(%d) %s', i, frag)
2180 log('')
2181
2182 flat = ''.join(frags)
2183 #log('flat: %r', flat)
2184
2185 args = self.splitter.SplitForWordEval(flat)
2186
2187 # space=' '; argv $space"". We have a quoted part, but we CANNOT elide.
2188 # Add it back and don't bother globbing.
2189 if len(args) == 0 and any_quoted:
2190 argv.append('')
2191 return
2192
2193 #log('split args: %r', args)
2194 for a in args:
2195 if glob_.LooksLikeGlob(a):
2196 n = self.globber.Expand(a, argv)
2197 if n < 0:
2198 # TODO: location info, with span IDs carried through the frame
2199 raise error.FailGlob('Pattern %r matched no files' % a,
2200 loc.Missing)
2201 else:
2202 argv.append(glob_.GlobUnescape(a))
2203
2204 def _EvalWordToArgv(self, w):
2205 # type: (CompoundWord) -> List[str]
2206 """Helper for _EvalAssignBuiltin.
2207
2208 Splitting and globbing are disabled for assignment builtins.
2209
2210 Example: declare -"${a[@]}" b=(1 2)
2211 where a is [x b=a d=a]
2212 """
2213 part_vals = [] # type: List[part_value_t]
2214 self._EvalWordToParts(w, part_vals, 0) # not double quoted
2215 frames = _MakeWordFrames(part_vals)
2216 argv = [] # type: List[str]
2217 for frame in frames:
2218 if len(frame): # empty array gives empty frame!
2219 tmp = [piece.s for piece in frame]
2220 argv.append(''.join(tmp)) # no split or glob
2221 #log('argv: %s', argv)
2222 return argv
2223
2224 def _EvalAssignBuiltin(self, builtin_id, arg0, words, meta_offset):
2225 # type: (builtin_t, str, List[CompoundWord], int) -> cmd_value.Assign
2226 """Handles both static and dynamic assignment, e.g.
2227
2228 x='foo=bar'
2229 local a=(1 2) $x
2230
2231 Grammar:
2232
2233 ('builtin' | 'command')* keyword flag* pair*
2234 flag = [-+].*
2235
2236 There is also command -p, but we haven't implemented it. Maybe just
2237 punt on it.
2238 """
2239 eval_to_pairs = True # except for -f and -F
2240 started_pairs = False
2241
2242 flags = [arg0] # initial flags like -p, and -f -F name1 name2
2243 flag_locs = [words[0]]
2244 assign_args = [] # type: List[AssignArg]
2245
2246 n = len(words)
2247 for i in xrange(meta_offset + 1, n): # skip first word
2248 w = words[i]
2249
2250 if word_.IsVarLike(w):
2251 started_pairs = True # Everything from now on is an assign_pair
2252
2253 if started_pairs:
2254 left_token, close_token, part_offset = word_.DetectShAssignment(
2255 w)
2256 if left_token: # Detected statically
2257 if left_token.id != Id.Lit_VarLike:
2258 # (not guaranteed since started_pairs is set twice)
2259 e_die('LHS array not allowed in assignment builtin', w)
2260
2261 if lexer.IsPlusEquals(left_token):
2262 var_name = lexer.TokenSliceRight(left_token, -2)
2263 append = True
2264 else:
2265 var_name = lexer.TokenSliceRight(left_token, -1)
2266 append = False
2267
2268 if part_offset == len(w.parts):
2269 rhs = rhs_word.Empty # type: rhs_word_t
2270 else:
2271 # tmp is for intersection of C++/MyPy type systems
2272 tmp = CompoundWord(w.parts[part_offset:])
2273 word_.TildeDetectAssign(tmp)
2274 rhs = tmp
2275
2276 with state.ctx_AssignBuiltin(self.mutable_opts):
2277 right = self.EvalRhsWord(rhs)
2278
2279 arg2 = AssignArg(var_name, right, append, w)
2280 assign_args.append(arg2)
2281
2282 else: # e.g. export $dynamic
2283 argv = self._EvalWordToArgv(w)
2284 for arg in argv:
2285 arg2 = _SplitAssignArg(arg, w)
2286 assign_args.append(arg2)
2287
2288 else:
2289 argv = self._EvalWordToArgv(w)
2290 for arg in argv:
2291 if arg.startswith('-') or arg.startswith('+'):
2292 # e.g. declare -r +r
2293 flags.append(arg)
2294 flag_locs.append(w)
2295
2296 # Shortcut that relies on -f and -F always meaning "function" for
2297 # all assignment builtins
2298 if 'f' in arg or 'F' in arg:
2299 eval_to_pairs = False
2300
2301 else: # e.g. export $dynamic
2302 if eval_to_pairs:
2303 arg2 = _SplitAssignArg(arg, w)
2304 assign_args.append(arg2)
2305 started_pairs = True
2306 else:
2307 flags.append(arg)
2308
2309 return cmd_value.Assign(builtin_id, flags, flag_locs, assign_args)
2310
2311 def _DetectAssignBuiltinStr(self, arg0, words, meta_offset):
2312 # type: (str, List[CompoundWord], int) -> Optional[cmd_value.Assign]
2313 builtin_id = consts.LookupAssignBuiltin(arg0)
2314 if builtin_id != consts.NO_INDEX:
2315 return self._EvalAssignBuiltin(builtin_id, arg0, words,
2316 meta_offset)
2317 return None
2318
2319 def _DetectAssignBuiltin(self, val0, words, meta_offset):
2320 # type: (part_value_t, List[CompoundWord], int) -> Optional[cmd_value.Assign]
2321 UP_val0 = val0
2322 if val0.tag() == part_value_e.String:
2323 val0 = cast(Piece, UP_val0)
2324 if not val0.quoted:
2325 return self._DetectAssignBuiltinStr(val0.s, words, meta_offset)
2326 return None
2327
2328 def SimpleEvalWordSequence2(self, words, is_last_cmd, allow_assign):
2329 # type: (List[CompoundWord], bool, bool) -> cmd_value_t
2330 """Simple word evaluation for YSH."""
2331 strs = [] # type: List[str]
2332 locs = [] # type: List[CompoundWord]
2333
2334 meta_offset = 0
2335 for i, w in enumerate(words):
2336 # No globbing in the first arg for command.Simple.
2337 if i == meta_offset and allow_assign:
2338 strs0 = self._EvalWordToArgv(w)
2339 # TODO: Remove this because YSH will disallow assignment
2340 # builtins? (including export?)
2341 if len(strs0) == 1:
2342 cmd_val = self._DetectAssignBuiltinStr(
2343 strs0[0], words, meta_offset)
2344 if cmd_val:
2345 return cmd_val
2346
2347 strs.extend(strs0)
2348 for _ in strs0:
2349 locs.append(w)
2350 continue
2351
2352 if glob_.LooksLikeStaticGlob(w):
2353 val = self.EvalWordToString(w) # respects strict-array
2354 num_appended = self.globber.Expand(val.s, strs)
2355 if num_appended < 0:
2356 raise error.FailGlob('Pattern %r matched no files' % val.s,
2357 w)
2358 for _ in xrange(num_appended):
2359 locs.append(w)
2360 continue
2361
2362 part_vals = [] # type: List[part_value_t]
2363 self._EvalWordToParts(w, part_vals, 0) # not quoted
2364
2365 if 0:
2366 log('')
2367 log('Static: part_vals after _EvalWordToParts:')
2368 for entry in part_vals:
2369 log(' %s', entry)
2370
2371 # Still need to process
2372 frames = _MakeWordFrames(part_vals)
2373
2374 if 0:
2375 log('')
2376 log('Static: frames after _MakeWordFrames:')
2377 for entry in frames:
2378 log(' %s', entry)
2379
2380 # We will still allow x"${a[@]"x, though it's deprecated by @a, which
2381 # disallows such expressions at parse time.
2382 for frame in frames:
2383 if len(frame): # empty array gives empty frame!
2384 tmp = [piece.s for piece in frame]
2385 strs.append(''.join(tmp)) # no split or glob
2386 locs.append(w)
2387
2388 assert len(strs) == len(locs), '%s vs. %d' % (strs, len(locs))
2389 return cmd_value.Argv(strs, locs, is_last_cmd, None, None)
2390
2391 def EvalWordSequence2(self, words, is_last_cmd, allow_assign=False):
2392 # type: (List[CompoundWord], bool, bool) -> cmd_value_t
2393 """Turns a list of Words into a list of strings.
2394
2395 Unlike the EvalWord*() methods, it does globbing.
2396
2397 Args:
2398 allow_assign: True for command.Simple, False for BashArray a=(1 2 3)
2399 """
2400 if self.exec_opts.simple_word_eval():
2401 return self.SimpleEvalWordSequence2(words, is_last_cmd,
2402 allow_assign)
2403
2404 # Parse time:
2405 # 1. brace expansion. TODO: Do at parse time.
2406 # 2. Tilde detection. DONE at parse time. Only if Id.Lit_Tilde is the
2407 # first WordPart.
2408 #
2409 # Run time:
2410 # 3. tilde sub, var sub, command sub, arith sub. These are all
2411 # "concurrent" on WordParts. (optional process sub with <() )
2412 # 4. word splitting. Can turn this off with a shell option? Definitely
2413 # off for oil.
2414 # 5. globbing -- several exec_opts affect this: nullglob, safeglob, etc.
2415
2416 #log('W %s', words)
2417 strs = [] # type: List[str]
2418 locs = [] # type: List[CompoundWord]
2419
2420 # 0 for declare x
2421 # 1 for builtin declare x
2422 # 2 for command builtin declare x
2423 # etc.
2424 meta_offset = 0
2425
2426 n = 0
2427 for i, w in enumerate(words):
2428 fast_str = word_.FastStrEval(w)
2429 if fast_str is not None:
2430 strs.append(fast_str)
2431 locs.append(w)
2432
2433 # e.g. the 'local' in 'local a=b c=d' will be here
2434 if allow_assign and i == meta_offset:
2435 cmd_val = self._DetectAssignBuiltinStr(
2436 fast_str, words, meta_offset)
2437 if cmd_val:
2438 return cmd_val
2439
2440 if i <= meta_offset and _DetectMetaBuiltinStr(fast_str):
2441 meta_offset += 1
2442
2443 # Bug fix: n must be updated on every loop iteration
2444 n = len(strs)
2445 assert len(strs) == len(locs), strs
2446 continue
2447
2448 part_vals = [] # type: List[part_value_t]
2449 self._EvalWordToParts(w, part_vals, EXTGLOB_FILES)
2450
2451 # DYNAMICALLY detect if we're going to run an assignment builtin, and
2452 # change the rest of the evaluation algorithm if so.
2453 #
2454 # We want to allow:
2455 # e=export
2456 # $e foo=bar
2457 #
2458 # But we don't want to evaluate the first word twice in the case of:
2459 # $(some-command) --flag
2460 if len(part_vals) == 1:
2461 if allow_assign and i == meta_offset:
2462 cmd_val = self._DetectAssignBuiltin(
2463 part_vals[0], words, meta_offset)
2464 if cmd_val:
2465 return cmd_val
2466
2467 if i <= meta_offset and _DetectMetaBuiltin(part_vals[0]):
2468 meta_offset += 1
2469
2470 if 0:
2471 log('')
2472 log('part_vals after _EvalWordToParts:')
2473 for entry in part_vals:
2474 log(' %s', entry)
2475
2476 frames = _MakeWordFrames(part_vals)
2477 if 0:
2478 log('')
2479 log('frames after _MakeWordFrames:')
2480 for entry in frames:
2481 log(' %s', entry)
2482
2483 # Do splitting and globbing. Each frame will append zero or more args.
2484 for frame in frames:
2485 self._EvalWordFrame(frame, strs)
2486
2487 # Fill in locations parallel to strs.
2488 n_next = len(strs)
2489 for _ in xrange(n_next - n):
2490 locs.append(w)
2491 n = n_next
2492
2493 # A non-assignment command.
2494 # NOTE: Can't look up builtins here like we did for assignment, because
2495 # functions can override builtins.
2496 assert len(strs) == len(locs), '%s vs. %d' % (strs, len(locs))
2497 return cmd_value.Argv(strs, locs, is_last_cmd, None, None)
2498
2499 def EvalWordSequence(self, words):
2500 # type: (List[CompoundWord]) -> List[str]
2501 """For arrays and for loops.
2502
2503 They don't allow assignment builtins.
2504 """
2505 # is_last_cmd is irrelevant
2506 cmd_val = self.EvalWordSequence2(words, False)
2507 assert cmd_val.tag() == cmd_value_e.Argv
2508 return cast(cmd_value.Argv, cmd_val).argv
2509
2510
2511class NormalWordEvaluator(AbstractWordEvaluator):
2512
2513 def __init__(
2514 self,
2515 mem, # type: state.Mem
2516 exec_opts, # type: optview.Exec
2517 mutable_opts, # type: state.MutableOpts
2518 tilde_ev, # type: TildeEvaluator
2519 splitter, # type: SplitContext
2520 errfmt, # type: ui.ErrorFormatter
2521 ):
2522 # type: (...) -> None
2523 AbstractWordEvaluator.__init__(self, mem, exec_opts, mutable_opts,
2524 tilde_ev, splitter, errfmt)
2525 self.shell_ex = None # type: _Executor
2526
2527 def CheckCircularDeps(self):
2528 # type: () -> None
2529 assert self.arith_ev is not None
2530 # Disabled for pure OSH
2531 #assert self.expr_ev is not None
2532 assert self.shell_ex is not None
2533 assert self.prompt_ev is not None
2534
2535 def _EvalCommandSub(self, cs_part, quoted):
2536 # type: (CommandSub, bool) -> part_value_t
2537 stdout_str = self.shell_ex.RunCommandSub(cs_part)
2538
2539 if cs_part.left_token.id == Id.Left_AtParen:
2540 # YSH splitting algorithm: does not depend on IFS
2541 try:
2542 strs = j8.SplitJ8Lines(stdout_str)
2543 except error.Decode as e:
2544 # status code 4 is special, for encode/decode errors.
2545 raise error.Structured(4, e.Message(), cs_part.left_token)
2546
2547 #strs = self.splitter.SplitForWordEval(stdout_str)
2548 return part_value.Array(strs)
2549 else:
2550 return Piece(stdout_str, quoted, not quoted)
2551
2552 def _EvalProcessSub(self, cs_part):
2553 # type: (CommandSub) -> Piece
2554 dev_path = self.shell_ex.RunProcessSub(cs_part)
2555 # pretend it's quoted; no split or glob
2556 return Piece(dev_path, True, False)
2557
2558
2559_DUMMY = '__NO_COMMAND_SUB__'
2560
2561
2562class CompletionWordEvaluator(AbstractWordEvaluator):
2563 """An evaluator that has no access to an executor.
2564
2565 NOTE: core/completion.py doesn't actually try to use these strings to
2566 complete. If you have something like 'echo $(echo hi)/f<TAB>', it sees the
2567 inner command as the last one, and knows that it is not at the end of the
2568 line.
2569 """
2570
2571 def __init__(
2572 self,
2573 mem, # type: state.Mem
2574 exec_opts, # type: optview.Exec
2575 mutable_opts, # type: state.MutableOpts
2576 tilde_ev, # type: TildeEvaluator
2577 splitter, # type: SplitContext
2578 errfmt, # type: ui.ErrorFormatter
2579 ):
2580 # type: (...) -> None
2581 AbstractWordEvaluator.__init__(self, mem, exec_opts, mutable_opts,
2582 tilde_ev, splitter, errfmt)
2583
2584 def CheckCircularDeps(self):
2585 # type: () -> None
2586 assert self.prompt_ev is not None
2587 assert self.arith_ev is not None
2588 assert self.expr_ev is not None
2589
2590 def _EvalCommandSub(self, cs_part, quoted):
2591 # type: (CommandSub, bool) -> part_value_t
2592 if cs_part.left_token.id == Id.Left_AtParen:
2593 return part_value.Array([_DUMMY])
2594 else:
2595 return Piece(_DUMMY, quoted, not quoted)
2596
2597 def _EvalProcessSub(self, cs_part):
2598 # type: (CommandSub) -> Piece
2599 # pretend it's quoted; no split or glob
2600 return Piece('__NO_PROCESS_SUB__', True, False)
2601
2602
2603# vim: sw=4