OILS / osh / word_eval.py View on Github | oils.pub

2561 lines, 1566 significant
1"""
2word_eval.py - Evaluator for the word language.
3"""
4
5from _devbuild.gen.id_kind_asdl import Id, Kind, Kind_str
6from _devbuild.gen.syntax_asdl import (
7 Token,
8 SimpleVarSub,
9 loc,
10 loc_t,
11 BracedVarSub,
12 CommandSub,
13 bracket_op,
14 bracket_op_e,
15 suffix_op,
16 suffix_op_e,
17 ShArrayLiteral,
18 SingleQuoted,
19 DoubleQuoted,
20 word_e,
21 word_t,
22 CompoundWord,
23 rhs_word,
24 rhs_word_e,
25 rhs_word_t,
26 word_part,
27 word_part_e,
28)
29from _devbuild.gen.runtime_asdl import (
30 part_value,
31 part_value_e,
32 part_value_t,
33 cmd_value,
34 cmd_value_e,
35 cmd_value_t,
36 error_code_e,
37 AssignArg,
38 a_index,
39 a_index_e,
40 VTestPlace,
41 VarSubState,
42 Piece,
43)
44from _devbuild.gen.option_asdl import option_i, builtin_i
45from _devbuild.gen.value_asdl import (
46 value,
47 value_e,
48 value_t,
49 sh_lvalue,
50 sh_lvalue_t,
51)
52from core import bash_impl
53from core import error
54from core import pyos
55from core import pyutil
56from core import state
57from display import ui
58from core import util
59from data_lang import j8
60from data_lang import j8_lite
61from core.error import e_die
62from frontend import consts
63from frontend import lexer
64from frontend import location
65from mycpp import mops
66from mycpp.mylib import log, tagswitch, NewDict
67from osh import braces
68from osh import glob_
69from osh import string_ops
70from osh import word_
71from ysh import expr_eval
72from ysh import val_ops
73
74from typing import Optional, Tuple, List, Dict, cast, TYPE_CHECKING
75
76if TYPE_CHECKING:
77 from _devbuild.gen.syntax_asdl import word_part_t
78 from _devbuild.gen.option_asdl import builtin_t
79 from core import optview
80 from core.state import Mem
81 from core.vm import _Executor
82 from osh.split import SplitContext
83 from osh import prompt
84 from osh import sh_expr_eval
85
86# Flags for _EvalWordToParts and _EvalWordPart (not all are used for both)
87QUOTED = 1 << 0
88IS_SUBST = 1 << 1
89
90EXTGLOB_FILES = 1 << 2 # allow @(cc) from file system?
91EXTGLOB_MATCH = 1 << 3 # allow @(cc) in pattern matching?
92EXTGLOB_NESTED = 1 << 4 # for @(one|!(two|three))
93
94# For EvalWordToString
95QUOTE_FNMATCH = 1 << 5
96QUOTE_ERE = 1 << 6
97
98# For compatibility, ${BASH_SOURCE} and ${BASH_SOURCE[@]} are both valid.
99# Ditto for ${FUNCNAME} and ${BASH_LINENO}.
100_STRING_AND_ARRAY = ['BASH_SOURCE', 'FUNCNAME', 'BASH_LINENO']
101
102
103def ShouldArrayDecay(var_name, exec_opts, is_plain_var_sub=True):
104 # type: (str, optview.Exec, bool) -> bool
105 """Return whether we should allow ${a} to mean ${a[0]}."""
106 return (not exec_opts.strict_array() or
107 is_plain_var_sub and var_name in _STRING_AND_ARRAY)
108
109
110def DecayArray(val):
111 # type: (value_t) -> value_t
112 """Resolve ${array} to ${array[0]}."""
113 if val.tag() == value_e.BashArray:
114 array_val = cast(value.BashArray, val)
115 s, error_code = bash_impl.BashArray_GetElement(array_val, 0)
116
117 # Note: index 0 should never cause the out-of-bound index error.
118 assert error_code == error_code_e.OK
119
120 elif val.tag() == value_e.BashAssoc:
121 assoc_val = cast(value.BashAssoc, val)
122 s = bash_impl.BashAssoc_GetElement(assoc_val, '0')
123 else:
124 raise AssertionError(val.tag())
125
126 if s is None:
127 return value.Undef
128 else:
129 return value.Str(s)
130
131
132def _DetectMetaBuiltinStr(s):
133 # type: (str) -> bool
134 """
135 We need to detect all of these cases:
136
137 builtin local
138 command local
139 builtin builtin local
140 builtin command local
141
142 Fundamentally, assignment builtins have different WORD EVALUATION RULES
143 for a=$x (no word splitting), so it seems hard to do this in
144 meta_oils.Builtin() or meta_oils.Command()
145 """
146 return (consts.LookupNormalBuiltin(s)
147 in (builtin_i.builtin, builtin_i.command))
148
149
150def _DetectMetaBuiltin(val0):
151 # type: (part_value_t) -> bool
152 UP_val0 = val0
153 if val0.tag() == part_value_e.String:
154 val0 = cast(Piece, UP_val0)
155 if not val0.quoted:
156 return _DetectMetaBuiltinStr(val0.s)
157 return False
158
159
160def _SplitAssignArg(arg, blame_word):
161 # type: (str, CompoundWord) -> AssignArg
162 """Dynamically parse argument to declare, export, etc.
163
164 This is a fallback to the static parsing done below.
165 """
166 # Note: it would be better to cache regcomp(), but we don't have an API for
167 # that, and it probably isn't a bottleneck now
168 m = util.RegexSearch(consts.ASSIGN_ARG_RE, arg)
169 if m is None:
170 e_die("Assignment builtin expected NAME=value, got %r" % arg,
171 blame_word)
172
173 var_name = m[1]
174 # m[2] is used for grouping; ERE doesn't have non-capturing groups
175
176 op = m[3]
177 assert op is not None, op
178 if len(op): # declare NAME=
179 val = value.Str(m[4]) # type: Optional[value_t]
180 append = op[0] == '+'
181 else: # declare NAME
182 val = None # no operator
183 append = False
184
185 return AssignArg(var_name, val, append, blame_word)
186
187
188# NOTE: Could be done with util.BackslashEscape like glob_.GlobEscape().
189def _BackslashEscape(s):
190 # type: (str) -> str
191 """Double up backslashes.
192
193 Useful for strings about to be globbed and strings about to be IFS
194 escaped.
195 """
196 return s.replace('\\', '\\\\')
197
198
199def _ValueToPartValue(val, quoted, part_loc):
200 # type: (value_t, bool, word_part_t) -> part_value_t
201 """Helper for VarSub evaluation.
202
203 Called by _EvalBracedVarSub and _EvalWordPart for SimpleVarSub.
204 """
205 UP_val = val
206
207 with tagswitch(val) as case:
208 if case(value_e.Undef):
209 # This happens in the case of ${undef+foo}. We skipped _ProcessUndef,
210 # but we have to append to the empty string.
211 return Piece('', quoted, not quoted)
212
213 elif case(value_e.Str):
214 val = cast(value.Str, UP_val)
215 return Piece(val.s, quoted, not quoted)
216
217 elif case(value_e.BashArray):
218 val = cast(value.BashArray, UP_val)
219 return part_value.Array(bash_impl.BashArray_GetValues(val))
220
221 elif case(value_e.BashAssoc):
222 val = cast(value.BashAssoc, UP_val)
223 # bash behavior: splice values!
224 return part_value.Array(bash_impl.BashAssoc_GetValues(val))
225
226 # Cases added for YSH
227 # value_e.List is also here - we use val_ops.Stringify()s err message
228 elif case(value_e.Null, value_e.Bool, value_e.Int, value_e.Float,
229 value_e.Eggex, value_e.List):
230 s = val_ops.Stringify(val, loc.WordPart(part_loc), 'Word eval ')
231 return Piece(s, quoted, not quoted)
232
233 else:
234 raise error.TypeErr(val, "Can't substitute into word",
235 loc.WordPart(part_loc))
236
237 raise AssertionError('for -Wreturn-type in C++')
238
239
240def _MakeWordFrames(part_vals):
241 # type: (List[part_value_t]) -> List[List[Piece]]
242 """A word evaluates to a flat list of part_value (String or Array). frame
243 is a portion that results in zero or more args. It can never be joined.
244 This idea exists because of arrays like "$@" and "${a[@]}".
245
246 Example:
247
248 a=(1 '2 3' 4)
249 x=x
250 y=y
251
252 # This word
253 $x"${a[@]}"$y
254
255 # Results in Three frames:
256 [ ('x', False, True), ('1', True, False) ]
257 [ ('2 3', True, False) ]
258 [ ('4', True, False), ('y', False, True) ]
259
260 Note: A frame is a 3-tuple that's identical to Piece()? Maybe we
261 should make that top level type.
262
263 TODO:
264 - Instead of List[List[Piece]], where List[Piece] is a Frame
265 - Change this representation to
266 Frames = (List[Piece] pieces, List[int] break_indices)
267 # where break_indices are the end
268
269 Consider a common case like "$x" or "${x}" - I think this a lot more
270 efficient?
271
272 And then change _EvalWordFrame(pieces: List[Piece], start: int, end: int)
273 """
274 current = [] # type: List[Piece]
275 frames = [current]
276
277 for p in part_vals:
278 UP_p = p
279
280 with tagswitch(p) as case:
281 if case(part_value_e.String):
282 p = cast(Piece, UP_p)
283 current.append(p)
284
285 elif case(part_value_e.Array):
286 p = cast(part_value.Array, UP_p)
287
288 is_first = True
289 for s in p.strs:
290 if s is None:
291 continue # ignore undefined array entries
292
293 # Arrays parts are always quoted; otherwise they would have decayed to
294 # a string.
295 piece = Piece(s, True, False)
296 if is_first:
297 current.append(piece)
298 is_first = False
299 else:
300 current = [piece]
301 frames.append(current) # singleton frame
302
303 else:
304 raise AssertionError()
305
306 return frames
307
308
309# TODO: This could be _MakeWordFrames and then sep.join(). It's redundant.
310def _DecayPartValuesToString(part_vals, join_char):
311 # type: (List[part_value_t], str) -> str
312 # Decay ${a=x"$@"x} to string.
313 out = [] # type: List[str]
314 for p in part_vals:
315 UP_p = p
316 with tagswitch(p) as case:
317 if case(part_value_e.String):
318 p = cast(Piece, UP_p)
319 out.append(p.s)
320 elif case(part_value_e.Array):
321 p = cast(part_value.Array, UP_p)
322 # TODO: Eliminate double join for speed?
323 tmp = [s for s in p.strs if s is not None]
324 out.append(join_char.join(tmp))
325 else:
326 raise AssertionError()
327 return ''.join(out)
328
329
330def _PerformSlice(
331 val, # type: value_t
332 offset, # type: mops.BigInt
333 length, # type: int
334 has_length, # type: bool
335 part, # type: BracedVarSub
336 arg0_val, # type: value.Str
337):
338 # type: (...) -> value_t
339 UP_val = val
340 with tagswitch(val) as case:
341 if case(value_e.Str): # Slice UTF-8 characters in a string.
342 val = cast(value.Str, UP_val)
343 s = val.s
344 n = len(s)
345
346 begin = mops.BigTruncate(offset)
347 if begin < 0: # Compute offset with unicode
348 byte_begin = n
349 num_iters = -begin
350 for _ in xrange(num_iters):
351 byte_begin = string_ops.PreviousUtf8Char(s, byte_begin)
352 else:
353 byte_begin = string_ops.AdvanceUtf8Chars(s, begin, 0)
354
355 if has_length:
356 if length < 0: # Compute offset with unicode
357 # Confusing: this is a POSITION
358 byte_end = n
359 num_iters = -length
360 for _ in xrange(num_iters):
361 byte_end = string_ops.PreviousUtf8Char(s, byte_end)
362 else:
363 byte_end = string_ops.AdvanceUtf8Chars(
364 s, length, byte_begin)
365 else:
366 byte_end = len(s)
367
368 substr = s[byte_begin:byte_end]
369 result = value.Str(substr) # type: value_t
370
371 elif case(value_e.BashArray,
372 value_e.SparseArray): # Slice array entries.
373 # NOTE: This error is ALWAYS fatal in bash. It's inconsistent with
374 # strings.
375 if has_length and length < 0:
376 e_die("Array slice can't have negative length: %d" % length,
377 loc.WordPart(part))
378
379 if bash_impl.BigInt_Less(offset, mops.ZERO):
380 # ${@:-3} starts counts from the end
381 if val.tag() == value_e.BashArray:
382 val = cast(value.BashArray, UP_val)
383 array_length = mops.IntWiden(
384 bash_impl.BashArray_Length(val))
385 elif val.tag() == value_e.SparseArray:
386 val = cast(value.SparseArray, UP_val)
387 array_length = bash_impl.SparseArray_Length(val)
388 else:
389 raise AssertionError()
390
391 # The array length counts $0 for $@ and $*
392 if arg0_val is not None:
393 array_length = mops.Add(array_length, mops.ONE)
394
395 offset = mops.Add(offset, array_length)
396
397 if bash_impl.BigInt_Less(offset, mops.ZERO):
398 strs = [] # type: List[str]
399 else:
400 # Quirk: "offset" for positional arguments ($@ and $*) counts $0.
401 prepends_arg0 = False
402 if arg0_val is not None:
403 if bash_impl.BigInt_Greater(offset, mops.ZERO):
404 offset = mops.Sub(offset, mops.ONE)
405 elif not has_length or length >= 1:
406 prepends_arg0 = True
407 length = length - 1
408
409 if has_length and length == 0:
410 strs = []
411
412 elif val.tag() == value_e.BashArray:
413 val = cast(value.BashArray, UP_val)
414 orig = bash_impl.BashArray_GetValues(val)
415 n = len(orig)
416
417 strs = []
418 i = mops.BigTruncate(offset)
419 count = 0
420 while i < n:
421 if has_length and count == length: # length could be 0
422 break
423 s = orig[i]
424 if s is not None: # Unset elements don't count towards the length
425 strs.append(s)
426 count += 1
427 i += 1
428
429 elif val.tag() == value_e.SparseArray:
430 val = cast(value.SparseArray, UP_val)
431
432 # TODO: We may optimize this by finding the first index
433 # using the binary search. Furthermore, the sorting by
434 # SparseArray_GetKeys can be replaced with the heap sort so
435 # that we only extract the first LENGTH elements of the
436 # indices greater or equal to OFFSET.
437 i = 0
438 for index in bash_impl.SparseArray_GetKeys(val):
439 if bash_impl.BigInt_GreaterEq(index, offset):
440 break
441 i = i + 1
442
443 if has_length:
444 strs = bash_impl.SparseArray_GetValues(val)[i:i +
445 length]
446 else:
447 strs = bash_impl.SparseArray_GetValues(val)[i:]
448
449 else:
450 raise AssertionError()
451
452 if prepends_arg0:
453 new_list = [arg0_val.s]
454 new_list.extend(strs)
455 strs = new_list
456
457 result = value.BashArray(strs)
458
459 elif case(value_e.BashAssoc):
460 e_die("Can't slice associative arrays", loc.WordPart(part))
461
462 else:
463 raise error.TypeErr(val, 'Slice op expected Str or BashArray',
464 loc.WordPart(part))
465
466 return result
467
468
469class StringWordEvaluator(object):
470 """Interface used by ArithEvaluator / BoolEvaluator"""
471
472 def __init__(self):
473 # type: () -> None
474 """Empty constructor for mycpp."""
475 pass
476
477 def EvalWordToString(self, w, eval_flags=0):
478 # type: (word_t, int) -> value.Str
479 raise NotImplementedError()
480
481
482def _GetDollarHyphen(exec_opts):
483 # type: (optview.Exec) -> str
484 chars = [] # type: List[str]
485 if exec_opts.interactive():
486 chars.append('i')
487
488 if exec_opts.errexit():
489 chars.append('e')
490 if exec_opts.noglob():
491 chars.append('f')
492 if exec_opts.noexec():
493 chars.append('n')
494 if exec_opts.nounset():
495 chars.append('u')
496 # NO letter for pipefail?
497 if exec_opts.xtrace():
498 chars.append('x')
499 if exec_opts.noclobber():
500 chars.append('C')
501
502 # bash has:
503 # - c for sh -c, i for sh -i (mksh also has this)
504 # - h for hashing (mksh also has this)
505 # - B for brace expansion
506 return ''.join(chars)
507
508
509class TildeEvaluator(object):
510
511 def __init__(self, mem, exec_opts):
512 # type: (Mem, optview.Exec) -> None
513 self.mem = mem
514 self.exec_opts = exec_opts
515
516 def GetMyHomeDir(self):
517 # type: () -> Optional[str]
518 """Consult $HOME first, and then make a libc call.
519
520 Important: the libc call can FAIL, which is why we prefer $HOME. See issue
521 #1578.
522 """
523 # First look up the HOME var, ENV.HOME, ...
524 s = self.mem.env_config.Get('HOME')
525 if s is not None:
526 return s
527
528 # Then ask the OS. This is what bash does.
529 return pyos.GetMyHomeDir()
530
531 def Eval(self, part):
532 # type: (word_part.TildeSub) -> str
533 """Evaluates ~ and ~user, given a Lit_TildeLike token."""
534
535 if part.user_name is None:
536 result = self.GetMyHomeDir()
537 else:
538 result = pyos.GetHomeDir(part.user_name)
539
540 if result is None:
541 if self.exec_opts.strict_tilde():
542 e_die("Error expanding tilde (e.g. invalid user)", part.left)
543 else:
544 # Return ~ or ~user literally
545 result = '~'
546 if part.user_name is not None:
547 result = result + part.user_name # mycpp doesn't have +=
548
549 return result
550
551
552class AbstractWordEvaluator(StringWordEvaluator):
553 """Abstract base class for word evaluators.
554
555 Public entry points:
556 EvalWordToString EvalForPlugin EvalRhsWord
557 EvalWordSequence EvalWordSequence2
558 """
559
560 def __init__(
561 self,
562 mem, # type: state.Mem
563 exec_opts, # type: optview.Exec
564 mutable_opts, # type: state.MutableOpts
565 tilde_ev, # type: TildeEvaluator
566 splitter, # type: SplitContext
567 errfmt, # type: ui.ErrorFormatter
568 ):
569 # type: (...) -> None
570 self.arith_ev = None # type: sh_expr_eval.ArithEvaluator
571 self.expr_ev = None # type: expr_eval.ExprEvaluator
572 self.prompt_ev = None # type: prompt.Evaluator
573
574 self.unsafe_arith = None # type: sh_expr_eval.UnsafeArith
575
576 self.tilde_ev = tilde_ev
577
578 self.mem = mem # for $HOME, $1, etc.
579 self.exec_opts = exec_opts # for nounset
580 self.mutable_opts = mutable_opts # for _allow_command_sub
581 self.splitter = splitter
582 self.errfmt = errfmt
583
584 self.globber = glob_.Globber(exec_opts)
585
586 def CheckCircularDeps(self):
587 # type: () -> None
588 raise NotImplementedError()
589
590 def _EvalCommandSub(self, cs_part, quoted):
591 # type: (CommandSub, bool) -> part_value_t
592 """Abstract since it has a side effect."""
593 raise NotImplementedError()
594
595 def _EvalProcessSub(self, cs_part):
596 # type: (CommandSub) -> part_value_t
597 """Abstract since it has a side effect."""
598 raise NotImplementedError()
599
600 def _EvalVarNum(self, var_num):
601 # type: (int) -> value_t
602 assert var_num >= 0
603 return self.mem.GetArgNum(var_num)
604
605 def _EvalSpecialVar(self, op_id, quoted, vsub_state):
606 # type: (int, bool, VarSubState) -> value_t
607 """Evaluate $?
608
609 and so forth
610 """
611 # $@ is special -- it need to know whether it is in a double quoted
612 # context.
613 #
614 # - If it's $@ in a double quoted context, return an ARRAY.
615 # - If it's $@ in a normal context, return a STRING, which then will be
616 # subject to splitting.
617
618 if op_id in (Id.VSub_At, Id.VSub_Star):
619 argv = self.mem.GetArgv()
620 val = value.BashArray(argv) # type: value_t
621 if op_id == Id.VSub_At:
622 # "$@" evaluates to an array, $@ should be decayed
623 vsub_state.join_array = not quoted
624 else: # $* "$*" are both decayed
625 vsub_state.join_array = True
626
627 elif op_id == Id.VSub_Hyphen:
628 val = value.Str(_GetDollarHyphen(self.exec_opts))
629
630 else:
631 val = self.mem.GetSpecialVar(op_id)
632
633 return val
634
635 def _ApplyTestOp(
636 self,
637 val, # type: value_t
638 op, # type: suffix_op.Unary
639 quoted, # type: bool
640 part_vals, # type: Optional[List[part_value_t]]
641 vtest_place, # type: VTestPlace
642 blame_token, # type: Token
643 vsub_state, # type: VarSubState
644 ):
645 # type: (...) -> bool
646 """
647 Returns:
648 Whether part_vals was mutated
649
650 ${a:-} returns part_value[]
651 ${a:+} returns part_value[]
652 ${a:?error} returns error word?
653 ${a:=} returns part_value[] but also needs self.mem for side effects.
654
655 So I guess it should return part_value[], and then a flag for raising an
656 error, and then a flag for assigning it?
657 The original BracedVarSub will have the name.
658
659 Example of needing multiple part_value[]
660
661 echo X-${a:-'def'"ault"}-X
662
663 We return two part values from the BracedVarSub. Also consider:
664
665 echo ${a:-x"$@"x}
666 """
667 eval_flags = IS_SUBST
668 if quoted:
669 eval_flags |= QUOTED
670
671 tok = op.op
672 # NOTE: Splicing part_values is necessary because of code like
673 # ${undef:-'a b' c 'd # e'}. Each part_value can have a different
674 # do_glob/do_elide setting.
675 UP_val = val
676 with tagswitch(val) as case:
677 if case(value_e.Undef):
678 is_falsey = True
679
680 elif case(value_e.Str):
681 val = cast(value.Str, UP_val)
682 if tok.id in (Id.VTest_ColonHyphen, Id.VTest_ColonEquals,
683 Id.VTest_ColonQMark, Id.VTest_ColonPlus):
684 is_falsey = len(val.s) == 0
685 else:
686 is_falsey = False
687
688 elif case(value_e.BashArray, value_e.BashAssoc):
689 if val.tag() == value_e.BashArray:
690 val = cast(value.BashArray, UP_val)
691 strs = bash_impl.BashArray_GetValues(val)
692 elif val.tag() == value_e.BashAssoc:
693 val = cast(value.BashAssoc, UP_val)
694 strs = bash_impl.BashAssoc_GetValues(val)
695 else:
696 raise AssertionError()
697
698 if tok.id in (Id.VTest_ColonHyphen, Id.VTest_ColonEquals,
699 Id.VTest_ColonQMark, Id.VTest_ColonPlus):
700 if vsub_state.join_array:
701 sep_width = len(self.splitter.GetJoinChar())
702 else:
703 sep_width = 1 # we use ' ' for a[@]
704
705 if sep_width == 0:
706 is_falsey = all(len(s) == 0 for s in strs)
707 else:
708 is_falsey = len(strs) == 0 or (len(strs) == 1 and
709 len(strs[0]) == 0)
710 else:
711 # TODO: allow undefined
712 is_falsey = len(strs) == 0
713
714 else:
715 # value.Eggex, etc. are all false
716 is_falsey = False
717
718 if tok.id in (Id.VTest_ColonHyphen, Id.VTest_Hyphen):
719 if is_falsey:
720 self._EvalRhsWordToParts(op.arg_word, part_vals, eval_flags)
721 return True
722 else:
723 return False
724
725 # Inverse of the above.
726 elif tok.id in (Id.VTest_ColonPlus, Id.VTest_Plus):
727 if is_falsey:
728 return False
729 else:
730 self._EvalRhsWordToParts(op.arg_word, part_vals, eval_flags)
731 return True
732
733 # Splice and assign
734 elif tok.id in (Id.VTest_ColonEquals, Id.VTest_Equals):
735 if is_falsey:
736 # Collect new part vals.
737 assign_part_vals = [] # type: List[part_value_t]
738 self._EvalRhsWordToParts(op.arg_word, assign_part_vals,
739 eval_flags)
740 # Append them to out param AND return them.
741 part_vals.extend(assign_part_vals)
742
743 if vtest_place.name is None:
744 # TODO: error context
745 e_die("Can't assign to special variable")
746 else:
747 # NOTE: This decays arrays too! 'shopt -s strict_array' could
748 # avoid it.
749 rhs_str = _DecayPartValuesToString(
750 assign_part_vals, self.splitter.GetJoinChar())
751 if vtest_place.index is None: # using None when no index
752 lval = location.LName(
753 vtest_place.name) # type: sh_lvalue_t
754 else:
755 var_name = vtest_place.name
756 var_index = vtest_place.index
757 UP_var_index = var_index
758
759 with tagswitch(var_index) as case:
760 if case(a_index_e.Int):
761 var_index = cast(a_index.Int, UP_var_index)
762 lval = sh_lvalue.Indexed(
763 var_name, var_index.i, loc.Missing)
764 elif case(a_index_e.Str):
765 var_index = cast(a_index.Str, UP_var_index)
766 lval = sh_lvalue.Keyed(var_name, var_index.s,
767 loc.Missing)
768 else:
769 raise AssertionError()
770
771 state.OshLanguageSetValue(self.mem, lval,
772 value.Str(rhs_str))
773 return True
774
775 else:
776 return False
777
778 elif tok.id in (Id.VTest_ColonQMark, Id.VTest_QMark):
779 if is_falsey:
780 # The arg is the error message
781 error_part_vals = [] # type: List[part_value_t]
782 self._EvalRhsWordToParts(op.arg_word, error_part_vals,
783 eval_flags)
784 error_str = _DecayPartValuesToString(
785 error_part_vals, self.splitter.GetJoinChar())
786
787 #
788 # Display fancy/helpful error
789 #
790 if vtest_place.name is None:
791 var_name = '???'
792 else:
793 var_name = vtest_place.name
794
795 if 0:
796 # This hint is nice, but looks too noisy for now
797 op_str = lexer.LazyStr(tok)
798 if tok.id == Id.VTest_ColonQMark:
799 why = 'empty or unset'
800 else:
801 why = 'unset'
802
803 self.errfmt.Print_(
804 "Hint: operator %s means a variable can't be %s" %
805 (op_str, why), tok)
806
807 if val.tag() == value_e.Undef:
808 actual = 'unset'
809 else:
810 actual = 'empty'
811
812 if len(error_str):
813 suffix = ': %r' % error_str
814 else:
815 suffix = ''
816 e_die("Var %s is %s%s" % (var_name, actual, suffix),
817 blame_token)
818
819 else:
820 return False
821
822 else:
823 raise AssertionError(tok.id)
824
825 def _Count(self, val, token):
826 # type: (value_t, Token) -> int
827 """Returns the length of the value, for ${#var}"""
828 UP_val = val
829 with tagswitch(val) as case:
830 if case(value_e.Str):
831 val = cast(value.Str, UP_val)
832 # NOTE: Whether bash counts bytes or chars is affected by LANG
833 # environment variables.
834 # Should we respect that, or another way to select? set -o
835 # count-bytes?
836
837 # https://stackoverflow.com/questions/17368067/length-of-string-in-bash
838 try:
839 count = string_ops.CountUtf8Chars(val.s)
840 except error.Strict as e:
841 # Add this here so we don't have to add it so far down the stack.
842 # TODO: It's better to show BOTH this CODE an the actual DATA
843 # somehow.
844 e.location = token
845
846 if self.exec_opts.strict_word_eval():
847 raise
848 else:
849 # NOTE: Doesn't make the command exit with 1; it just returns a
850 # length of -1.
851 self.errfmt.PrettyPrintError(e, prefix='warning: ')
852 return -1
853
854 elif case(value_e.BashArray):
855 val = cast(value.BashArray, UP_val)
856 count = bash_impl.BashArray_Count(val)
857
858 elif case(value_e.BashAssoc):
859 val = cast(value.BashAssoc, UP_val)
860 count = bash_impl.BashAssoc_Count(val)
861
862 elif case(value_e.SparseArray):
863 val = cast(value.SparseArray, UP_val)
864 count = bash_impl.SparseArray_Count(val)
865
866 else:
867 raise error.TypeErr(
868 val, "Length op expected Str, BashArray, BashAssoc", token)
869
870 return count
871
872 def _Keys(self, val, token):
873 # type: (value_t, Token) -> value_t
874 """Return keys of a container, for ${!array[@]}"""
875
876 UP_val = val
877 with tagswitch(val) as case:
878 if case(value_e.BashArray):
879 val = cast(value.BashArray, UP_val)
880 indices = [str(i) for i in bash_impl.BashArray_GetKeys(val)]
881 return value.BashArray(indices)
882
883 elif case(value_e.BashAssoc):
884 val = cast(value.BashAssoc, UP_val)
885 assert val.d is not None # for MyPy, so it's not Optional[]
886
887 # BUG: Keys aren't ordered according to insertion!
888 keys = bash_impl.BashAssoc_GetKeys(val)
889 return value.BashArray(keys)
890
891 else:
892 raise error.TypeErr(val, 'Keys op expected Str', token)
893
894 def _EvalVarRef(self, val, blame_tok, quoted, vsub_state, vtest_place):
895 # type: (value_t, Token, bool, VarSubState, VTestPlace) -> value_t
896 """Handles indirect expansion like ${!var} and ${!a[0]}.
897
898 Args:
899 blame_tok: 'foo' for ${!foo}
900 """
901 UP_val = val
902 with tagswitch(val) as case:
903 if case(value_e.Undef):
904 # bash-4.4 returned value.Undef here. bash-5.0 started to treat
905 # the variable name to be empty so that the indirection fails.
906 var_ref_str = ''
907
908 elif case(value_e.Str):
909 val = cast(value.Str, UP_val)
910 var_ref_str = val.s
911
912 elif case(value_e.BashArray): # caught earlier but OK
913 val = cast(value.BashArray, UP_val)
914 # When there are more than one element in the array, this
915 # produces a wrong variable name containing spaces.
916 var_ref_str = ' '.join(bash_impl.BashArray_GetValues(val))
917
918 elif case(value_e.BashAssoc): # caught earlier but OK
919 val = cast(value.BashAssoc, UP_val)
920 var_ref_str = ' '.join(bash_impl.BashAssoc_GetValues(val))
921
922 else:
923 raise error.TypeErr(val, 'Var Ref op expected Str', blame_tok)
924
925 try:
926 bvs_part = self.unsafe_arith.ParseVarRef(var_ref_str, blame_tok)
927 except error.FatalRuntime as e:
928 raise error.VarSubFailure(e.msg, e.location)
929
930 return self._VarRefValue(bvs_part, quoted, vsub_state, vtest_place)
931
932 def _ApplyUnarySuffixOp(self, val, op):
933 # type: (value_t, suffix_op.Unary) -> value_t
934 assert val.tag() != value_e.Undef
935
936 op_kind = consts.GetKind(op.op.id)
937
938 if op_kind == Kind.VOp1:
939 # NOTE: glob syntax is supported in ^ ^^ , ,, ! As well as % %% # ##.
940 # Detect has_extglob so that DoUnarySuffixOp doesn't use the fast
941 # shortcut for constant strings.
942 arg_val, has_extglob = self.EvalWordToPattern(op.arg_word)
943 assert arg_val.tag() == value_e.Str
944
945 UP_val = val
946 with tagswitch(val) as case:
947 if case(value_e.Str):
948 val = cast(value.Str, UP_val)
949 s = string_ops.DoUnarySuffixOp(val.s, op.op, arg_val.s,
950 has_extglob)
951 #log('%r %r -> %r', val.s, arg_val.s, s)
952 new_val = value.Str(s) # type: value_t
953
954 elif case(value_e.BashArray, value_e.BashAssoc):
955 # get values
956 if val.tag() == value_e.BashArray:
957 val = cast(value.BashArray, UP_val)
958 values = bash_impl.BashArray_GetValues(val)
959 elif val.tag() == value_e.BashAssoc:
960 val = cast(value.BashAssoc, UP_val)
961 values = bash_impl.BashAssoc_GetValues(val)
962 else:
963 raise AssertionError()
964
965 # ${a[@]#prefix} is VECTORIZED on arrays. YSH should have this too.
966 strs = [
967 string_ops.DoUnarySuffixOp(s, op.op, arg_val.s,
968 has_extglob) for s in values
969 ]
970 new_val = value.BashArray(strs)
971
972 else:
973 raise error.TypeErr(
974 val, 'Unary op expected Str, BashArray, BashAssoc',
975 op.op)
976
977 else:
978 raise AssertionError(Kind_str(op_kind))
979
980 return new_val
981
982 def _PatSub(self, val, op):
983 # type: (value_t, suffix_op.PatSub) -> value_t
984
985 pat_val, has_extglob = self.EvalWordToPattern(op.pat)
986 # Extended globs aren't supported because we only translate * ? etc. to
987 # ERE. I don't think there's a straightforward translation from !(*.py) to
988 # ERE! You would need an engine that supports negation? (Derivatives?)
989 if has_extglob:
990 e_die('extended globs not supported in ${x//GLOB/}', op.pat)
991
992 if op.replace:
993 replace_val = self.EvalRhsWord(op.replace)
994 # Can't have an array, so must be a string
995 assert replace_val.tag() == value_e.Str, replace_val
996 replace_str = cast(value.Str, replace_val).s
997 else:
998 replace_str = ''
999
1000 # note: doesn't support self.exec_opts.extglob()!
1001 regex, warnings = glob_.GlobToERE(pat_val.s)
1002 if len(warnings):
1003 # TODO:
1004 # - Add 'shopt -s strict_glob' mode and expose warnings.
1005 # "Glob is not in CANONICAL FORM".
1006 # - Propagate location info back to the 'op.pat' word.
1007 pass
1008 #log('regex %r', regex)
1009 replacer = string_ops.GlobReplacer(regex, replace_str, op.slash_tok)
1010
1011 with tagswitch(val) as case2:
1012 if case2(value_e.Str):
1013 str_val = cast(value.Str, val)
1014 s = replacer.Replace(str_val.s, op)
1015 val = value.Str(s)
1016
1017 elif case2(value_e.BashArray, value_e.BashAssoc):
1018 if val.tag() == value_e.BashArray:
1019 array_val = cast(value.BashArray, val)
1020 values = bash_impl.BashArray_GetValues(array_val)
1021 elif val.tag() == value_e.BashAssoc:
1022 assoc_val = cast(value.BashAssoc, val)
1023 values = bash_impl.BashAssoc_GetValues(assoc_val)
1024 else:
1025 raise AssertionError()
1026 strs = [replacer.Replace(s, op) for s in values]
1027 val = value.BashArray(strs)
1028
1029 else:
1030 raise error.TypeErr(
1031 val, 'Pat Sub op expected Str, BashArray, BashAssoc',
1032 op.slash_tok)
1033
1034 return val
1035
1036 def _Slice(self, val, op, var_name, part):
1037 # type: (value_t, suffix_op.Slice, Optional[str], BracedVarSub) -> value_t
1038
1039 begin = self.arith_ev.EvalToBigInt(op.begin)
1040
1041 # Note: bash allows lengths to be negative (with odd semantics), but
1042 # we don't allow that right now.
1043 has_length = False
1044 length = -1
1045 if op.length:
1046 has_length = True
1047 length = self.arith_ev.EvalToInt(op.length)
1048
1049 try:
1050 arg0_val = None # type: value.Str
1051 if var_name is None: # $* or $@
1052 arg0_val = self.mem.GetArg0()
1053 val = _PerformSlice(val, begin, length, has_length, part, arg0_val)
1054 except error.Strict as e:
1055 if self.exec_opts.strict_word_eval():
1056 raise
1057 else:
1058 self.errfmt.PrettyPrintError(e, prefix='warning: ')
1059 with tagswitch(val) as case2:
1060 if case2(value_e.Str):
1061 val = value.Str('')
1062 elif case2(value_e.BashArray):
1063 val = value.BashArray([])
1064 else:
1065 raise NotImplementedError()
1066 return val
1067
1068 def _Nullary(self, val, op, var_name, vsub_token, vsub_state):
1069 # type: (value_t, Token, Optional[str], Token, VarSubState) -> Tuple[value.Str, bool]
1070
1071 quoted2 = False
1072 op_id = op.id
1073 if op_id == Id.VOp0_P:
1074 val = self._ProcessUndef(val, vsub_token, vsub_state)
1075 UP_val = val
1076 with tagswitch(val) as case:
1077 if case(value_e.Undef):
1078 result = value.Str('')
1079 elif case(value_e.Str):
1080 str_val = cast(value.Str, UP_val)
1081 prompt = self.prompt_ev.EvalPrompt(str_val)
1082 # readline gets rid of these, so we should too.
1083 p = prompt.replace('\x01', '').replace('\x02', '')
1084 result = value.Str(p)
1085 else:
1086 e_die("Can't use @P on %s" % ui.ValType(val), op)
1087
1088 elif op_id == Id.VOp0_Q:
1089 UP_val = val
1090 with tagswitch(val) as case:
1091 if case(value_e.Undef):
1092 # We need to issue an error when "-o nounset" is enabled.
1093 # Although we do not need to check val for value_e.Undef,
1094 # we call _ProcessUndef for consistency in the error
1095 # message.
1096 self._ProcessUndef(val, vsub_token, vsub_state)
1097
1098 # For unset variables, we do not generate any quoted words.
1099 result = value.Str('')
1100
1101 elif case(value_e.Str):
1102 str_val = cast(value.Str, UP_val)
1103 result = value.Str(j8_lite.MaybeShellEncode(str_val.s))
1104 # oddly, 'echo ${x@Q}' is equivalent to 'echo "${x@Q}"' in
1105 # bash
1106 quoted2 = True
1107 elif case(value_e.BashArray, value_e.BashAssoc):
1108 if val.tag() == value_e.BashArray:
1109 val = cast(value.BashArray, UP_val)
1110 values = [s for s in bash_impl.BashArray_GetValues(val) if s is not None]
1111 elif val.tag() == value_e.BashAssoc:
1112 val = cast(value.BashAssoc, UP_val)
1113 values = bash_impl.BashAssoc_GetValues(val)
1114 else:
1115 raise AssertionError()
1116
1117 tmp = [
1118 # TODO: should use fastfunc.ShellEncode
1119 j8_lite.MaybeShellEncode(s) for s in values
1120 ]
1121 result = value.Str(' '.join(tmp))
1122 else:
1123 e_die("Can't use @Q on %s" % ui.ValType(val), op)
1124
1125 elif op_id == Id.VOp0_a:
1126 val = self._ProcessUndef(val, vsub_token, vsub_state)
1127 UP_val = val
1128 # We're ONLY simluating -a and -A, not -r -x -n for now. See
1129 # spec/ble-idioms.test.sh.
1130 chars = [] # type: List[str]
1131 with tagswitch(vsub_state.h_value) as case:
1132 if case(value_e.BashArray):
1133 chars.append('a')
1134 elif case(value_e.BashAssoc):
1135 chars.append('A')
1136
1137 if var_name is not None: # e.g. ${?@a} is allowed
1138 cell = self.mem.GetCell(var_name)
1139 if cell:
1140 if cell.readonly:
1141 chars.append('r')
1142 if cell.exported:
1143 chars.append('x')
1144 if cell.nameref:
1145 chars.append('n')
1146
1147 result = value.Str(''.join(chars))
1148
1149 else:
1150 e_die('Var op %r not implemented' % lexer.TokenVal(op), op)
1151
1152 return result, quoted2
1153
1154 def _WholeArray(self, val, part, quoted, vsub_state):
1155 # type: (value_t, BracedVarSub, bool, VarSubState) -> value_t
1156 op_id = cast(bracket_op.WholeArray, part.bracket_op).op_id
1157
1158 if op_id == Id.Lit_At:
1159 op_str = '@'
1160 vsub_state.join_array = not quoted # ${a[@]} decays but "${a[@]}" doesn't
1161 elif op_id == Id.Arith_Star:
1162 op_str = '*'
1163 vsub_state.join_array = True # both ${a[*]} and "${a[*]}" decay
1164 else:
1165 raise AssertionError(op_id) # unknown
1166
1167 with tagswitch(val) as case2:
1168 if case2(value_e.Undef):
1169 # For an undefined array, we save the token of the array
1170 # reference for the later error message.
1171 vsub_state.array_ref = part.name_tok
1172 elif case2(value_e.Str):
1173 if self.exec_opts.strict_array():
1174 e_die("Can't index string with %s" % op_str,
1175 loc.WordPart(part))
1176 elif case2(value_e.BashArray, value_e.SparseArray,
1177 value_e.BashAssoc):
1178 pass # no-op
1179 else:
1180 # The other YSH types such as List, Dict, and Float are not
1181 # supported. Error messages will be printed later, so we here
1182 # return the unsupported objects without modification.
1183 pass # no-op
1184
1185 return val
1186
1187 def _ArrayIndex(self, val, part, vtest_place):
1188 # type: (value_t, BracedVarSub, VTestPlace) -> value_t
1189 """Process a numeric array index like ${a[i+1]}"""
1190 anode = cast(bracket_op.ArrayIndex, part.bracket_op).expr
1191
1192 UP_val = val
1193 with tagswitch(val) as case2:
1194 if case2(value_e.Undef):
1195 pass # it will be checked later
1196
1197 elif case2(value_e.Str):
1198 # Bash treats any string as an array, so we can't add our own
1199 # behavior here without making valid OSH invalid bash.
1200 e_die("Can't index string %r with integer" % part.var_name,
1201 part.name_tok)
1202
1203 elif case2(value_e.BashArray):
1204 array_val = cast(value.BashArray, UP_val)
1205 index = self.arith_ev.EvalToInt(anode)
1206 vtest_place.index = a_index.Int(index)
1207
1208 s, error_code = bash_impl.BashArray_GetElement(
1209 array_val, index)
1210 if error_code == error_code_e.IndexOutOfRange:
1211 # Note: Bash outputs warning but does not make it a real
1212 # error. We follow the Bash behavior here.
1213 self.errfmt.Print_(
1214 "Index %d out of bounds for array of length %d" %
1215 (index, bash_impl.BashArray_Length(array_val)),
1216 blame_loc=part.name_tok)
1217
1218 if s is None:
1219 val = value.Undef
1220 else:
1221 val = value.Str(s)
1222
1223 elif case2(value_e.SparseArray):
1224 sparse_val = cast(value.SparseArray, UP_val)
1225 big_index = self.arith_ev.EvalToBigInt(anode)
1226 vtest_place.index = a_index.Int(mops.BigTruncate(big_index))
1227
1228 s, error_code = bash_impl.SparseArray_GetElement(
1229 sparse_val, big_index)
1230 if error_code == error_code_e.IndexOutOfRange:
1231 # Note: Bash outputs warning but does not make it a real
1232 # error. We follow the Bash behavior here.
1233 big_length = bash_impl.SparseArray_Length(sparse_val)
1234 self.errfmt.Print_(
1235 "Index %s out of bounds for array of length %s" %
1236 (mops.ToStr(big_index), mops.ToStr(big_length)),
1237 blame_loc=part.name_tok)
1238
1239 if s is None:
1240 val = value.Undef
1241 else:
1242 val = value.Str(s)
1243
1244 elif case2(value_e.BashAssoc):
1245 assoc_val = cast(value.BashAssoc, UP_val)
1246 # Location could also be attached to bracket_op? But
1247 # arith_expr.VarSub works OK too
1248 key = self.arith_ev.EvalWordToString(
1249 anode, blame_loc=location.TokenForArith(anode))
1250
1251 vtest_place.index = a_index.Str(key) # out param
1252 s = bash_impl.BashAssoc_GetElement(assoc_val, key)
1253
1254 if s is None:
1255 val = value.Undef
1256 else:
1257 val = value.Str(s)
1258
1259 else:
1260 raise error.TypeErr(val,
1261 'Index op expected BashArray, BashAssoc',
1262 loc.WordPart(part))
1263
1264 return val
1265
1266 def _EvalDoubleQuoted(self, parts, part_vals):
1267 # type: (List[word_part_t], List[part_value_t]) -> None
1268 """Evaluate parts of a DoubleQuoted part.
1269
1270 Args:
1271 part_vals: output param to append to.
1272 """
1273 # Example of returning array:
1274 # $ a=(1 2); b=(3); $ c=(4 5)
1275 # $ argv "${a[@]}${b[@]}${c[@]}"
1276 # ['1', '234', '5']
1277 #
1278 # Example of multiple parts
1279 # $ argv "${a[@]}${undef[@]:-${c[@]}}"
1280 # ['1', '24', '5']
1281
1282 # Special case for "". The parser outputs (DoubleQuoted []), instead
1283 # of (DoubleQuoted [Literal '']). This is better but it means we
1284 # have to check for it.
1285 if len(parts) == 0:
1286 v = Piece('', True, False)
1287 part_vals.append(v)
1288 return
1289
1290 for p in parts:
1291 self._EvalWordPart(p, part_vals, QUOTED)
1292
1293 def EvalDoubleQuotedToString(self, dq_part):
1294 # type: (DoubleQuoted) -> str
1295 """For double quoted strings in YSH expressions.
1296
1297 Example: var x = "$foo-${foo}"
1298 """
1299 part_vals = [] # type: List[part_value_t]
1300 self._EvalDoubleQuoted(dq_part.parts, part_vals)
1301 return self._ConcatPartVals(part_vals, dq_part.left)
1302
1303 def _DecayArray(self, val):
1304 # type: (value.BashArray) -> value.Str
1305 """Decay $* to a string."""
1306 assert val.tag() == value_e.BashArray, val
1307 sep = self.splitter.GetJoinChar()
1308 tmp = [s for s in bash_impl.BashArray_GetValues(val) if s is not None]
1309 return value.Str(sep.join(tmp))
1310
1311 def _ProcessUndef(self, val, name_tok, vsub_state):
1312 # type: (value_t, Token, VarSubState) -> value_t
1313 assert name_tok is not None
1314
1315 if val.tag() != value_e.Undef:
1316 return val
1317
1318 if vsub_state.array_ref is not None:
1319 array_tok = vsub_state.array_ref
1320 if self.exec_opts.nounset():
1321 e_die('Undefined array %r' % lexer.TokenVal(array_tok),
1322 array_tok)
1323 else:
1324 return value.BashArray([])
1325 else:
1326 if self.exec_opts.nounset():
1327 tok_str = lexer.TokenVal(name_tok)
1328 name = tok_str[1:] if tok_str.startswith('$') else tok_str
1329 e_die('Undefined variable %r' % name, name_tok)
1330 else:
1331 return value.Str('')
1332
1333 def _EvalBracketOp(self, val, part, quoted, vsub_state, vtest_place):
1334 # type: (value_t, BracedVarSub, bool, VarSubState, VTestPlace) -> value_t
1335
1336 if part.bracket_op:
1337 with tagswitch(part.bracket_op) as case:
1338 if case(bracket_op_e.WholeArray):
1339 val = self._WholeArray(val, part, quoted, vsub_state)
1340
1341 elif case(bracket_op_e.ArrayIndex):
1342 val = self._ArrayIndex(val, part, vtest_place)
1343
1344 else:
1345 raise AssertionError(part.bracket_op.tag())
1346
1347 else: # no bracket op
1348 var_name = vtest_place.name
1349 if (var_name is not None and
1350 val.tag() in (value_e.BashArray, value_e.BashAssoc)):
1351 if ShouldArrayDecay(var_name, self.exec_opts,
1352 not (part.prefix_op or part.suffix_op)):
1353 # for ${BASH_SOURCE}, etc.
1354 val = DecayArray(val)
1355 else:
1356 e_die(
1357 "Array %r can't be referred to as a scalar (without @ or *)"
1358 % var_name, loc.WordPart(part))
1359
1360 return val
1361
1362 def _VarRefValue(self, part, quoted, vsub_state, vtest_place):
1363 # type: (BracedVarSub, bool, VarSubState, VTestPlace) -> value_t
1364 """Duplicates some logic from _EvalBracedVarSub, but returns a
1365 value_t."""
1366
1367 # 1. Evaluate from (var_name, var_num, token Id) -> value
1368 if part.name_tok.id == Id.VSub_Name:
1369 vtest_place.name = part.var_name
1370 val = self.mem.GetValue(part.var_name)
1371
1372 elif part.name_tok.id == Id.VSub_Number:
1373 var_num = int(part.var_name)
1374 val = self._EvalVarNum(var_num)
1375
1376 else:
1377 # $* decays
1378 val = self._EvalSpecialVar(part.name_tok.id, quoted, vsub_state)
1379
1380 # update h-value (i.e., the holder of the current value)
1381 vsub_state.h_value = val
1382
1383 # We don't need var_index because it's only for L-Values of test ops?
1384 if self.exec_opts.eval_unsafe_arith():
1385 val = self._EvalBracketOp(val, part, quoted, vsub_state,
1386 vtest_place)
1387 else:
1388 with state.ctx_Option(self.mutable_opts,
1389 [option_i._allow_command_sub], False):
1390 val = self._EvalBracketOp(val, part, quoted, vsub_state,
1391 vtest_place)
1392
1393 return val
1394
1395 def _EvalBracedVarSub(self, part, part_vals, quoted):
1396 # type: (BracedVarSub, List[part_value_t], bool) -> None
1397 """
1398 Args:
1399 part_vals: output param to append to.
1400 """
1401 # We have different operators that interact in a non-obvious order.
1402 #
1403 # 1. bracket_op: value -> value, with side effect on vsub_state
1404 #
1405 # 2. prefix_op
1406 # a. length ${#x}: value -> value
1407 # b. var ref ${!ref}: can expand to an array
1408 #
1409 # 3. suffix_op:
1410 # a. no operator: you have a value
1411 # b. Test: value -> part_value[]
1412 # c. Other Suffix: value -> value
1413 #
1414 # 4. Process vsub_state.join_array here before returning.
1415 #
1416 # These cases are hard to distinguish:
1417 # - ${!prefix@} prefix query
1418 # - ${!array[@]} keys
1419 # - ${!ref} named reference
1420 # - ${!ref[0]} named reference
1421 #
1422 # I think we need several stages:
1423 #
1424 # 1. value: name, number, special, prefix query
1425 # 2. bracket_op
1426 # 3. prefix length -- this is TERMINAL
1427 # 4. indirection? Only for some of the ! cases
1428 # 5. string transformation suffix ops like ##
1429 # 6. test op
1430 # 7. vsub_state.join_array
1431
1432 # vsub_state.join_array is for joining "${a[*]}" and unquoted ${a[@]} AFTER
1433 # suffix ops are applied. If we take the length with a prefix op, the
1434 # distinction is ignored.
1435
1436 var_name = None # type: Optional[str] # used throughout the function
1437 vtest_place = VTestPlace(var_name, None) # For ${foo=default}
1438 vsub_state = VarSubState.CreateNull() # for $*, ${a[*]}, etc.
1439
1440 # 1. Evaluate from (var_name, var_num, token Id) -> value
1441 if part.name_tok.id == Id.VSub_Name:
1442 # Handle ${!prefix@} first, since that looks at names and not values
1443 # Do NOT handle ${!A[@]@a} here!
1444 if (part.prefix_op is not None and part.bracket_op is None and
1445 part.suffix_op is not None and
1446 part.suffix_op.tag() == suffix_op_e.Nullary):
1447 nullary_op = cast(Token, part.suffix_op)
1448 # ${!x@} but not ${!x@P}
1449 if consts.GetKind(nullary_op.id) == Kind.VOp3:
1450 names = self.mem.VarNamesStartingWith(part.var_name)
1451 names.sort()
1452
1453 if quoted and nullary_op.id == Id.VOp3_At:
1454 part_vals.append(part_value.Array(names))
1455 else:
1456 sep = self.splitter.GetJoinChar()
1457 part_vals.append(Piece(sep.join(names), quoted, True))
1458 return # EARLY RETURN
1459
1460 var_name = part.var_name
1461 vtest_place.name = var_name # for _ApplyTestOp
1462
1463 val = self.mem.GetValue(var_name)
1464
1465 elif part.name_tok.id == Id.VSub_Number:
1466 var_num = int(part.var_name)
1467 val = self._EvalVarNum(var_num)
1468 else:
1469 # $* decays
1470 val = self._EvalSpecialVar(part.name_tok.id, quoted, vsub_state)
1471
1472 suffix_op_ = part.suffix_op
1473 if suffix_op_:
1474 UP_op = suffix_op_
1475 vsub_state.h_value = val
1476
1477 # 2. Bracket Op
1478 val = self._EvalBracketOp(val, part, quoted, vsub_state, vtest_place)
1479
1480 if part.prefix_op:
1481 if part.prefix_op.id == Id.VSub_Pound: # ${#var} for length
1482 # undef -> '' BEFORE length
1483 val = self._ProcessUndef(val, part.name_tok, vsub_state)
1484
1485 n = self._Count(val, part.name_tok)
1486 part_vals.append(Piece(str(n), quoted, False))
1487 return # EARLY EXIT: nothing else can come after length
1488
1489 elif part.prefix_op.id == Id.VSub_Bang:
1490 if (part.bracket_op and
1491 part.bracket_op.tag() == bracket_op_e.WholeArray and
1492 not suffix_op_):
1493 # undef -> empty array
1494 val = self._ProcessUndef(val, part.name_tok, vsub_state)
1495
1496 # ${!array[@]} to get indices/keys
1497 val = self._Keys(val, part.name_tok)
1498 # already set vsub_State.join_array ABOVE
1499 else:
1500 # Process ${!ref}. SURPRISE: ${!a[0]} is an indirect expansion unlike
1501 # ${!a[@]} !
1502 # ${!ref} can expand into an array if ref='array[@]'
1503
1504 # Clear it now that we have a var ref
1505 vtest_place.name = None
1506 vtest_place.index = None
1507
1508 val = self._EvalVarRef(val, part.name_tok, quoted,
1509 vsub_state, vtest_place)
1510
1511 else:
1512 raise AssertionError(part.prefix_op)
1513
1514 quoted2 = False # another bit for @Q
1515 if suffix_op_:
1516 op = suffix_op_ # could get rid of this alias
1517
1518 with tagswitch(suffix_op_) as case:
1519 if case(suffix_op_e.Nullary):
1520 op = cast(Token, UP_op)
1521 val, quoted2 = self._Nullary(val, op, var_name,
1522 part.name_tok, vsub_state)
1523
1524 elif case(suffix_op_e.Unary):
1525 op = cast(suffix_op.Unary, UP_op)
1526 if consts.GetKind(op.op.id) == Kind.VTest:
1527 # Note: _ProcessUndef (i.e., the conversion of undef ->
1528 # '') is not applied to the VTest operators such as
1529 # ${a:-def}, ${a+set}, etc.
1530 if self._ApplyTestOp(val, op, quoted, part_vals,
1531 vtest_place, part.name_tok,
1532 vsub_state):
1533 # e.g. to evaluate ${undef:-'default'}, we already appended
1534 # what we need
1535 return
1536
1537 else:
1538 # Other suffix: value -> value
1539 val = self._ProcessUndef(val, part.name_tok,
1540 vsub_state)
1541 val = self._ApplyUnarySuffixOp(val, op)
1542
1543 elif case(suffix_op_e.PatSub): # PatSub, vectorized
1544 op = cast(suffix_op.PatSub, UP_op)
1545 val = self._ProcessUndef(val, part.name_tok, vsub_state)
1546 val = self._PatSub(val, op)
1547
1548 elif case(suffix_op_e.Slice):
1549 op = cast(suffix_op.Slice, UP_op)
1550 val = self._ProcessUndef(val, part.name_tok, vsub_state)
1551 val = self._Slice(val, op, var_name, part)
1552
1553 elif case(suffix_op_e.Static):
1554 op = cast(suffix_op.Static, UP_op)
1555 e_die('Not implemented', op.tok)
1556
1557 else:
1558 raise AssertionError()
1559 else:
1560 val = self._ProcessUndef(val, part.name_tok, vsub_state)
1561
1562 # After applying suffixes, process join_array here.
1563 UP_val = val
1564 if val.tag() == value_e.BashArray:
1565 array_val = cast(value.BashArray, UP_val)
1566 if vsub_state.join_array:
1567 val = self._DecayArray(array_val)
1568 else:
1569 val = array_val
1570
1571 # For example, ${a} evaluates to value.Str(), but we want a
1572 # Piece().
1573 part_val = _ValueToPartValue(val, quoted or quoted2, part)
1574 part_vals.append(part_val)
1575
1576 def _ConcatPartVals(self, part_vals, location):
1577 # type: (List[part_value_t], loc_t) -> str
1578
1579 strs = [] # type: List[str]
1580 for part_val in part_vals:
1581 UP_part_val = part_val
1582 with tagswitch(part_val) as case:
1583 if case(part_value_e.String):
1584 part_val = cast(Piece, UP_part_val)
1585 s = part_val.s
1586
1587 elif case(part_value_e.Array):
1588 part_val = cast(part_value.Array, UP_part_val)
1589 if self.exec_opts.strict_array():
1590 # Examples: echo f > "$@"; local foo="$@"
1591 e_die("Illegal array word part (strict_array)",
1592 location)
1593 else:
1594 # It appears to not respect IFS
1595 # TODO: eliminate double join()?
1596 tmp = [s for s in part_val.strs if s is not None]
1597 s = ' '.join(tmp)
1598
1599 else:
1600 raise AssertionError()
1601
1602 strs.append(s)
1603
1604 return ''.join(strs)
1605
1606 def EvalBracedVarSubToString(self, part):
1607 # type: (BracedVarSub) -> str
1608 """For double quoted strings in YSH expressions.
1609
1610 Example: var x = "$foo-${foo}"
1611 """
1612 part_vals = [] # type: List[part_value_t]
1613 self._EvalBracedVarSub(part, part_vals, False)
1614 # blame ${ location
1615 return self._ConcatPartVals(part_vals, part.left)
1616
1617 def _EvalSimpleVarSub(self, part, part_vals, quoted):
1618 # type: (SimpleVarSub, List[part_value_t], bool) -> None
1619
1620 token = part.tok
1621
1622 vsub_state = VarSubState.CreateNull()
1623
1624 # 1. Evaluate from (var_name, var_num, Token) -> defined, value
1625 if token.id == Id.VSub_DollarName:
1626 var_name = lexer.LazyStr(token)
1627 # TODO: Special case for LINENO
1628 val = self.mem.GetValue(var_name)
1629 if val.tag() in (value_e.BashArray, value_e.BashAssoc):
1630 if ShouldArrayDecay(var_name, self.exec_opts):
1631 # for $BASH_SOURCE, etc.
1632 val = DecayArray(val)
1633 else:
1634 e_die(
1635 "Array %r can't be referred to as a scalar (without @ or *)"
1636 % var_name, token)
1637
1638 elif token.id == Id.VSub_Number:
1639 var_num = int(lexer.LazyStr(token))
1640 val = self._EvalVarNum(var_num)
1641
1642 else:
1643 val = self._EvalSpecialVar(token.id, quoted, vsub_state)
1644
1645 #log('SIMPLE %s', part)
1646 val = self._ProcessUndef(val, token, vsub_state)
1647 UP_val = val
1648 if val.tag() == value_e.BashArray:
1649 array_val = cast(value.BashArray, UP_val)
1650 if vsub_state.join_array:
1651 val = self._DecayArray(array_val)
1652 else:
1653 val = array_val
1654
1655 v = _ValueToPartValue(val, quoted, part)
1656 part_vals.append(v)
1657
1658 def EvalSimpleVarSubToString(self, node):
1659 # type: (SimpleVarSub) -> str
1660 """For double quoted strings in YSH expressions.
1661
1662 Example: var x = "$foo-${foo}"
1663 """
1664 part_vals = [] # type: List[part_value_t]
1665 self._EvalSimpleVarSub(node, part_vals, False)
1666 return self._ConcatPartVals(part_vals, node.tok)
1667
1668 def _EvalExtGlob(self, part, part_vals):
1669 # type: (word_part.ExtGlob, List[part_value_t]) -> None
1670 """Evaluate @($x|'foo'|$(hostname)) and flatten it."""
1671 op = part.op
1672 if op.id == Id.ExtGlob_Comma:
1673 op_str = '@('
1674 else:
1675 op_str = lexer.LazyStr(op)
1676 # Do NOT split these.
1677 part_vals.append(Piece(op_str, False, False))
1678
1679 for i, w in enumerate(part.arms):
1680 if i != 0:
1681 part_vals.append(Piece('|', False, False)) # separator
1682 # FLATTEN the tree of extglob "arms".
1683 self._EvalWordToParts(w, part_vals, EXTGLOB_NESTED)
1684 part_vals.append(Piece(')', False, False)) # closing )
1685
1686 def _TranslateExtGlob(self, part_vals, w, glob_parts, fnmatch_parts):
1687 # type: (List[part_value_t], CompoundWord, List[str], List[str]) -> None
1688 """Translate a flattened WORD with an ExtGlob part to string patterns.
1689
1690 We need both glob and fnmatch patterns. _EvalExtGlob does the
1691 flattening.
1692 """
1693 for i, part_val in enumerate(part_vals):
1694 UP_part_val = part_val
1695 with tagswitch(part_val) as case:
1696 if case(part_value_e.String):
1697 part_val = cast(Piece, UP_part_val)
1698 if part_val.quoted and not self.exec_opts.noglob():
1699 s = glob_.GlobEscape(part_val.s)
1700 else:
1701 # e.g. the @( and | in @(foo|bar) aren't quoted
1702 s = part_val.s
1703 glob_parts.append(s)
1704 fnmatch_parts.append(s) # from _EvalExtGlob()
1705
1706 elif case(part_value_e.Array):
1707 # Disallow array
1708 e_die(
1709 "Extended globs and arrays can't appear in the same word",
1710 w)
1711
1712 elif case(part_value_e.ExtGlob):
1713 part_val = cast(part_value.ExtGlob, UP_part_val)
1714 # keep appending fnmatch_parts, but repplace glob_parts with '*'
1715 self._TranslateExtGlob(part_val.part_vals, w, [],
1716 fnmatch_parts)
1717 glob_parts.append('*')
1718
1719 else:
1720 raise AssertionError()
1721
1722 def _EvalWordPart(self, part, part_vals, flags):
1723 # type: (word_part_t, List[part_value_t], int) -> None
1724 """Evaluate a word part, appending to part_vals
1725
1726 Called by _EvalWordToParts, EvalWordToString, and _EvalDoubleQuoted.
1727 """
1728 quoted = bool(flags & QUOTED)
1729 is_subst = bool(flags & IS_SUBST)
1730
1731 UP_part = part
1732 with tagswitch(part) as case:
1733 if case(word_part_e.ShArrayLiteral):
1734 part = cast(ShArrayLiteral, UP_part)
1735 e_die("Unexpected array literal", loc.WordPart(part))
1736 elif case(word_part_e.BashAssocLiteral):
1737 part = cast(word_part.BashAssocLiteral, UP_part)
1738 e_die("Unexpected associative array literal",
1739 loc.WordPart(part))
1740
1741 elif case(word_part_e.Literal):
1742 part = cast(Token, UP_part)
1743 # Split if it's in a substitution.
1744 # That is: echo is not split, but ${foo:-echo} is split
1745 v = Piece(lexer.LazyStr(part), quoted, is_subst)
1746 part_vals.append(v)
1747
1748 elif case(word_part_e.EscapedLiteral):
1749 part = cast(word_part.EscapedLiteral, UP_part)
1750 v = Piece(part.ch, True, False)
1751 part_vals.append(v)
1752
1753 elif case(word_part_e.SingleQuoted):
1754 part = cast(SingleQuoted, UP_part)
1755 v = Piece(part.sval, True, False)
1756 part_vals.append(v)
1757
1758 elif case(word_part_e.DoubleQuoted):
1759 part = cast(DoubleQuoted, UP_part)
1760 self._EvalDoubleQuoted(part.parts, part_vals)
1761
1762 elif case(word_part_e.CommandSub):
1763 part = cast(CommandSub, UP_part)
1764 id_ = part.left_token.id
1765 if id_ in (Id.Left_DollarParen, Id.Left_AtParen,
1766 Id.Left_Backtick):
1767 sv = self._EvalCommandSub(part,
1768 quoted) # type: part_value_t
1769
1770 elif id_ in (Id.Left_ProcSubIn, Id.Left_ProcSubOut):
1771 sv = self._EvalProcessSub(part)
1772
1773 else:
1774 raise AssertionError(id_)
1775
1776 part_vals.append(sv)
1777
1778 elif case(word_part_e.SimpleVarSub):
1779 part = cast(SimpleVarSub, UP_part)
1780 self._EvalSimpleVarSub(part, part_vals, quoted)
1781
1782 elif case(word_part_e.BracedVarSub):
1783 part = cast(BracedVarSub, UP_part)
1784 self._EvalBracedVarSub(part, part_vals, quoted)
1785
1786 elif case(word_part_e.TildeSub):
1787 part = cast(word_part.TildeSub, UP_part)
1788 # We never parse a quoted string into a TildeSub.
1789 assert not quoted
1790 s = self.tilde_ev.Eval(part)
1791 v = Piece(s, True, False) # NOT split even when unquoted!
1792 part_vals.append(v)
1793
1794 elif case(word_part_e.ArithSub):
1795 part = cast(word_part.ArithSub, UP_part)
1796 num = self.arith_ev.EvalToBigInt(part.anode)
1797 v = Piece(mops.ToStr(num), quoted, not quoted)
1798 part_vals.append(v)
1799
1800 elif case(word_part_e.ExtGlob):
1801 part = cast(word_part.ExtGlob, UP_part)
1802 #if not self.exec_opts.extglob():
1803 # die() # disallow at runtime? Don't just decay
1804
1805 # Create a node to hold the flattened tree. The caller decides whether
1806 # to pass it to fnmatch() or replace it with '*' and pass it to glob().
1807 part_vals2 = [] # type: List[part_value_t]
1808 self._EvalExtGlob(part, part_vals2) # flattens tree
1809 part_vals.append(part_value.ExtGlob(part_vals2))
1810
1811 elif case(word_part_e.BashRegexGroup):
1812 part = cast(word_part.BashRegexGroup, UP_part)
1813
1814 part_vals.append(Piece('(', False, False)) # not quoted
1815 if part.child:
1816 self._EvalWordToParts(part.child, part_vals, 0)
1817 part_vals.append(Piece(')', False, False))
1818
1819 elif case(word_part_e.Splice):
1820 part = cast(word_part.Splice, UP_part)
1821 val = self.mem.GetValue(part.var_name)
1822
1823 strs = self.expr_ev.SpliceValue(val, part)
1824 part_vals.append(part_value.Array(strs))
1825
1826 elif case(word_part_e.ExprSub):
1827 part = cast(word_part.ExprSub, UP_part)
1828 part_val = self.expr_ev.EvalExprSub(part)
1829 part_vals.append(part_val)
1830
1831 elif case(word_part_e.ZshVarSub):
1832 part = cast(word_part.ZshVarSub, UP_part)
1833 e_die("ZSH var subs are parsed, but can't be evaluated",
1834 part.left)
1835
1836 else:
1837 raise AssertionError(part.tag())
1838
1839 def _EvalRhsWordToParts(self, w, part_vals, eval_flags=0):
1840 # type: (rhs_word_t, List[part_value_t], int) -> None
1841 quoted = bool(eval_flags & QUOTED)
1842
1843 UP_w = w
1844 with tagswitch(w) as case:
1845 if case(rhs_word_e.Empty):
1846 part_vals.append(Piece('', quoted, not quoted))
1847
1848 elif case(rhs_word_e.Compound):
1849 w = cast(CompoundWord, UP_w)
1850 self._EvalWordToParts(w, part_vals, eval_flags=eval_flags)
1851
1852 else:
1853 raise AssertionError()
1854
1855 def _EvalWordToParts(self, w, part_vals, eval_flags=0):
1856 # type: (CompoundWord, List[part_value_t], int) -> None
1857 """Helper for EvalRhsWord, EvalWordSequence, etc.
1858
1859 Returns:
1860 Appends to part_vals. Note that this is a TREE.
1861 """
1862 # Does the word have an extended glob? This is a special case because
1863 # of the way we use glob() and then fnmatch(..., FNM_EXTMATCH) to
1864 # implement extended globs. It's hard to carry that extra information
1865 # all the way past the word splitting stage.
1866
1867 # OSH semantic limitations: If a word has an extended glob part, then
1868 # 1. It can't have an array
1869 # 2. Word splitting of unquoted words isn't respected
1870
1871 word_part_vals = [] # type: List[part_value_t]
1872 has_extglob = False
1873 for p in w.parts:
1874 if p.tag() == word_part_e.ExtGlob:
1875 has_extglob = True
1876 self._EvalWordPart(p, word_part_vals, eval_flags)
1877
1878 # Caller REQUESTED extglob evaluation, AND we parsed word_part.ExtGlob()
1879 if has_extglob:
1880 if bool(eval_flags & EXTGLOB_FILES):
1881 # Treat the WHOLE word as a pattern. We need to TWO VARIANTS of the
1882 # word because of the way we use libc:
1883 # 1. With '*' for extglob parts
1884 # 2. With _EvalExtGlob() for extglob parts
1885
1886 glob_parts = [] # type: List[str]
1887 fnmatch_parts = [] # type: List[str]
1888 self._TranslateExtGlob(word_part_vals, w, glob_parts,
1889 fnmatch_parts)
1890
1891 #log('word_part_vals %s', word_part_vals)
1892 glob_pat = ''.join(glob_parts)
1893 fnmatch_pat = ''.join(fnmatch_parts)
1894 #log("glob %s fnmatch %s", glob_pat, fnmatch_pat)
1895
1896 results = [] # type: List[str]
1897 n = self.globber.ExpandExtended(glob_pat, fnmatch_pat, results)
1898 if n < 0:
1899 raise error.FailGlob(
1900 'Extended glob %r matched no files' % fnmatch_pat, w)
1901
1902 part_vals.append(part_value.Array(results))
1903 elif bool(eval_flags & EXTGLOB_NESTED):
1904 # We only glob at the TOP level of @(nested|@(pattern))
1905 part_vals.extend(word_part_vals)
1906 else:
1907 # e.g. simple_word_eval, assignment builtin
1908 e_die('Extended glob not allowed in this word', w)
1909 else:
1910 part_vals.extend(word_part_vals)
1911
1912 def _PartValsToString(self, part_vals, w, eval_flags, strs):
1913 # type: (List[part_value_t], CompoundWord, int, List[str]) -> None
1914 """Helper for EvalWordToString, similar to _ConcatPartVals() above.
1915
1916 Note: arg 'w' could just be a span ID
1917 """
1918 for part_val in part_vals:
1919 UP_part_val = part_val
1920 with tagswitch(part_val) as case:
1921 if case(part_value_e.String):
1922 part_val = cast(Piece, UP_part_val)
1923 s = part_val.s
1924 if part_val.quoted:
1925 if eval_flags & QUOTE_FNMATCH:
1926 # [[ foo == */"*".py ]] or case (*.py) or ${x%*.py} or ${x//*.py/}
1927 s = glob_.GlobEscape(s)
1928 elif eval_flags & QUOTE_ERE:
1929 s = glob_.ExtendedRegexEscape(s)
1930 strs.append(s)
1931
1932 elif case(part_value_e.Array):
1933 part_val = cast(part_value.Array, UP_part_val)
1934 if self.exec_opts.strict_array():
1935 # Examples: echo f > "$@"; local foo="$@"
1936
1937 # TODO: This attributes too coarsely, to the word rather than the
1938 # parts. Problem: the word is a TREE of parts, but we only have a
1939 # flat list of part_vals. The only case where we really get arrays
1940 # is "$@", "${a[@]}", "${a[@]//pat/replace}", etc.
1941 e_die(
1942 "This word should yield a string, but it contains an array",
1943 w)
1944
1945 # TODO: Maybe add detail like this.
1946 #e_die('RHS of assignment should only have strings. '
1947 # 'To assign arrays, use b=( "${a[@]}" )')
1948 else:
1949 # It appears to not respect IFS
1950 tmp = [s for s in part_val.strs if s is not None]
1951 s = ' '.join(tmp) # TODO: eliminate double join()?
1952 strs.append(s)
1953
1954 elif case(part_value_e.ExtGlob):
1955 part_val = cast(part_value.ExtGlob, UP_part_val)
1956
1957 # Extended globs are only allowed where we expect them!
1958 if not bool(eval_flags & QUOTE_FNMATCH):
1959 e_die('extended glob not allowed in this word', w)
1960
1961 # recursive call
1962 self._PartValsToString(part_val.part_vals, w, eval_flags,
1963 strs)
1964
1965 else:
1966 raise AssertionError()
1967
1968 def EvalWordToString(self, UP_w, eval_flags=0):
1969 # type: (word_t, int) -> value.Str
1970 """Given a word, return a string.
1971
1972 Flags can contain a quoting algorithm.
1973 """
1974 assert UP_w.tag() == word_e.Compound, UP_w
1975 w = cast(CompoundWord, UP_w)
1976
1977 if eval_flags == 0: # QUOTE_FNMATCH etc. breaks optimization
1978 fast_str = word_.FastStrEval(w)
1979 if fast_str is not None:
1980 return value.Str(fast_str)
1981
1982 # Could we additionally optimize a=$b, if we know $b isn't an array
1983 # etc.?
1984
1985 # Note: these empty lists are hot in fib benchmark
1986
1987 part_vals = [] # type: List[part_value_t]
1988 for p in w.parts:
1989 # this doesn't use eval_flags, which is slightly confusing
1990 self._EvalWordPart(p, part_vals, 0)
1991
1992 strs = [] # type: List[str]
1993 self._PartValsToString(part_vals, w, eval_flags, strs)
1994 return value.Str(''.join(strs))
1995
1996 def EvalWordToPattern(self, UP_w):
1997 # type: (rhs_word_t) -> Tuple[value.Str, bool]
1998 """Like EvalWordToString, but returns whether we got ExtGlob."""
1999 if UP_w.tag() == rhs_word_e.Empty:
2000 return value.Str(''), False
2001
2002 assert UP_w.tag() == rhs_word_e.Compound, UP_w
2003 w = cast(CompoundWord, UP_w)
2004
2005 has_extglob = False
2006 part_vals = [] # type: List[part_value_t]
2007 for p in w.parts:
2008 # this doesn't use eval_flags, which is slightly confusing
2009 self._EvalWordPart(p, part_vals, 0)
2010 if p.tag() == word_part_e.ExtGlob:
2011 has_extglob = True
2012
2013 strs = [] # type: List[str]
2014 self._PartValsToString(part_vals, w, QUOTE_FNMATCH, strs)
2015 return value.Str(''.join(strs)), has_extglob
2016
2017 def EvalForPlugin(self, w):
2018 # type: (CompoundWord) -> value.Str
2019 """Wrapper around EvalWordToString that prevents errors.
2020
2021 Runtime errors like $(( 1 / 0 )) and mutating $? like $(exit 42)
2022 are handled here.
2023
2024 Similar to ExprEvaluator.PluginCall().
2025 """
2026 with state.ctx_Registers(self.mem): # to "sandbox" $? and $PIPESTATUS
2027 try:
2028 val = self.EvalWordToString(w)
2029 except error.FatalRuntime as e:
2030 val = value.Str('<Runtime error: %s>' % e.UserErrorString())
2031
2032 except (IOError, OSError) as e:
2033 val = value.Str('<I/O error: %s>' % pyutil.strerror(e))
2034
2035 except KeyboardInterrupt:
2036 val = value.Str('<Ctrl-C>')
2037
2038 return val
2039
2040 def EvalRhsWord(self, UP_w):
2041 # type: (rhs_word_t) -> value_t
2042 """Used for RHS of assignment.
2043
2044 There is no splitting.
2045 """
2046 if UP_w.tag() == rhs_word_e.Empty:
2047 return value.Str('')
2048
2049 assert UP_w.tag() == word_e.Compound, UP_w
2050 w = cast(CompoundWord, UP_w)
2051
2052 if len(w.parts) == 1:
2053 part0 = w.parts[0]
2054 UP_part0 = part0
2055 tag = part0.tag()
2056 # Special case for a=(1 2). ShArrayLiteral won't appear in words that
2057 # don't look like assignments.
2058 if tag == word_part_e.ShArrayLiteral:
2059 part0 = cast(ShArrayLiteral, UP_part0)
2060 array_words = part0.words
2061 words = braces.BraceExpandWords(array_words)
2062 strs = self.EvalWordSequence(words)
2063 return value.BashArray(strs)
2064
2065 if tag == word_part_e.BashAssocLiteral:
2066 part0 = cast(word_part.BashAssocLiteral, UP_part0)
2067 d = NewDict() # type: Dict[str, str]
2068 for pair in part0.pairs:
2069 k = self.EvalWordToString(pair.key)
2070 v = self.EvalWordToString(pair.value)
2071 d[k.s] = v.s
2072 return value.BashAssoc(d)
2073
2074 # If RHS doesn't look like a=( ... ), then it must be a string.
2075 return self.EvalWordToString(w)
2076
2077 def _EvalWordFrame(self, frame, argv):
2078 # type: (List[Piece], List[str]) -> None
2079 all_empty = True
2080 all_quoted = True
2081 any_quoted = False
2082
2083 #log('--- frame %s', frame)
2084
2085 for piece in frame:
2086 if len(piece.s):
2087 all_empty = False
2088
2089 if piece.quoted:
2090 any_quoted = True
2091 else:
2092 all_quoted = False
2093
2094 # Elision of ${empty}${empty} but not $empty"$empty" or $empty""
2095 if all_empty and not any_quoted:
2096 return
2097
2098 # If every frag is quoted, e.g. "$a$b" or any part in "${a[@]}"x, then
2099 # don't do word splitting or globbing.
2100 if all_quoted:
2101 tmp = [piece.s for piece in frame]
2102 a = ''.join(tmp)
2103 argv.append(a)
2104 return
2105
2106 will_glob = not self.exec_opts.noglob()
2107
2108 if 0:
2109 log('---')
2110 log('FRAME')
2111 for i, piece in enumerate(frame):
2112 log('(%d) %s', i, piece)
2113 log('')
2114
2115 # Array of strings, some of which are BOTH IFS-escaped and GLOB escaped!
2116 frags = [] # type: List[str]
2117 for piece in frame:
2118 if will_glob and piece.quoted:
2119 frag = glob_.GlobEscape(piece.s)
2120 else:
2121 # If we have a literal \, then we turn it into \\\\.
2122 # Splitting takes \\\\ -> \\
2123 # Globbing takes \\ to \ if it doesn't match
2124 frag = _BackslashEscape(piece.s)
2125
2126 if piece.do_split:
2127 frag = _BackslashEscape(frag)
2128 else:
2129 frag = self.splitter.Escape(frag)
2130
2131 frags.append(frag)
2132
2133 if 0:
2134 log('---')
2135 log('FRAGS')
2136 for i, frag in enumerate(frags):
2137 log('(%d) %s', i, frag)
2138 log('')
2139
2140 flat = ''.join(frags)
2141 #log('flat: %r', flat)
2142
2143 args = self.splitter.SplitForWordEval(flat)
2144
2145 # space=' '; argv $space"". We have a quoted part, but we CANNOT elide.
2146 # Add it back and don't bother globbing.
2147 if len(args) == 0 and any_quoted:
2148 argv.append('')
2149 return
2150
2151 #log('split args: %r', args)
2152 for a in args:
2153 if glob_.LooksLikeGlob(a):
2154 n = self.globber.Expand(a, argv)
2155 if n < 0:
2156 # TODO: location info, with span IDs carried through the frame
2157 raise error.FailGlob('Pattern %r matched no files' % a,
2158 loc.Missing)
2159 else:
2160 argv.append(glob_.GlobUnescape(a))
2161
2162 def _EvalWordToArgv(self, w):
2163 # type: (CompoundWord) -> List[str]
2164 """Helper for _EvalAssignBuiltin.
2165
2166 Splitting and globbing are disabled for assignment builtins.
2167
2168 Example: declare -"${a[@]}" b=(1 2)
2169 where a is [x b=a d=a]
2170 """
2171 part_vals = [] # type: List[part_value_t]
2172 self._EvalWordToParts(w, part_vals, 0) # not double quoted
2173 frames = _MakeWordFrames(part_vals)
2174 argv = [] # type: List[str]
2175 for frame in frames:
2176 if len(frame): # empty array gives empty frame!
2177 tmp = [piece.s for piece in frame]
2178 argv.append(''.join(tmp)) # no split or glob
2179 #log('argv: %s', argv)
2180 return argv
2181
2182 def _EvalAssignBuiltin(self, builtin_id, arg0, words, meta_offset):
2183 # type: (builtin_t, str, List[CompoundWord], int) -> cmd_value.Assign
2184 """Handles both static and dynamic assignment, e.g.
2185
2186 x='foo=bar'
2187 local a=(1 2) $x
2188
2189 Grammar:
2190
2191 ('builtin' | 'command')* keyword flag* pair*
2192 flag = [-+].*
2193
2194 There is also command -p, but we haven't implemented it. Maybe just
2195 punt on it.
2196 """
2197 eval_to_pairs = True # except for -f and -F
2198 started_pairs = False
2199
2200 flags = [arg0] # initial flags like -p, and -f -F name1 name2
2201 flag_locs = [words[0]]
2202 assign_args = [] # type: List[AssignArg]
2203
2204 n = len(words)
2205 for i in xrange(meta_offset + 1, n): # skip first word
2206 w = words[i]
2207
2208 if word_.IsVarLike(w):
2209 started_pairs = True # Everything from now on is an assign_pair
2210
2211 if started_pairs:
2212 left_token, close_token, part_offset = word_.DetectShAssignment(
2213 w)
2214 if left_token: # Detected statically
2215 if left_token.id != Id.Lit_VarLike:
2216 # (not guaranteed since started_pairs is set twice)
2217 e_die('LHS array not allowed in assignment builtin', w)
2218
2219 if lexer.IsPlusEquals(left_token):
2220 var_name = lexer.TokenSliceRight(left_token, -2)
2221 append = True
2222 else:
2223 var_name = lexer.TokenSliceRight(left_token, -1)
2224 append = False
2225
2226 if part_offset == len(w.parts):
2227 rhs = rhs_word.Empty # type: rhs_word_t
2228 else:
2229 # tmp is for intersection of C++/MyPy type systems
2230 tmp = CompoundWord(w.parts[part_offset:])
2231 word_.TildeDetectAssign(tmp)
2232 rhs = tmp
2233
2234 with state.ctx_AssignBuiltin(self.mutable_opts):
2235 right = self.EvalRhsWord(rhs)
2236
2237 arg2 = AssignArg(var_name, right, append, w)
2238 assign_args.append(arg2)
2239
2240 else: # e.g. export $dynamic
2241 argv = self._EvalWordToArgv(w)
2242 for arg in argv:
2243 arg2 = _SplitAssignArg(arg, w)
2244 assign_args.append(arg2)
2245
2246 else:
2247 argv = self._EvalWordToArgv(w)
2248 for arg in argv:
2249 if arg.startswith('-') or arg.startswith('+'):
2250 # e.g. declare -r +r
2251 flags.append(arg)
2252 flag_locs.append(w)
2253
2254 # Shortcut that relies on -f and -F always meaning "function" for
2255 # all assignment builtins
2256 if 'f' in arg or 'F' in arg:
2257 eval_to_pairs = False
2258
2259 else: # e.g. export $dynamic
2260 if eval_to_pairs:
2261 arg2 = _SplitAssignArg(arg, w)
2262 assign_args.append(arg2)
2263 started_pairs = True
2264 else:
2265 flags.append(arg)
2266
2267 return cmd_value.Assign(builtin_id, flags, flag_locs, assign_args)
2268
2269 def _DetectAssignBuiltinStr(self, arg0, words, meta_offset):
2270 # type: (str, List[CompoundWord], int) -> Optional[cmd_value.Assign]
2271 builtin_id = consts.LookupAssignBuiltin(arg0)
2272 if builtin_id != consts.NO_INDEX:
2273 return self._EvalAssignBuiltin(builtin_id, arg0, words,
2274 meta_offset)
2275 return None
2276
2277 def _DetectAssignBuiltin(self, val0, words, meta_offset):
2278 # type: (part_value_t, List[CompoundWord], int) -> Optional[cmd_value.Assign]
2279 UP_val0 = val0
2280 if val0.tag() == part_value_e.String:
2281 val0 = cast(Piece, UP_val0)
2282 if not val0.quoted:
2283 return self._DetectAssignBuiltinStr(val0.s, words, meta_offset)
2284 return None
2285
2286 def SimpleEvalWordSequence2(self, words, is_last_cmd, allow_assign):
2287 # type: (List[CompoundWord], bool, bool) -> cmd_value_t
2288 """Simple word evaluation for YSH."""
2289 strs = [] # type: List[str]
2290 locs = [] # type: List[CompoundWord]
2291
2292 meta_offset = 0
2293 for i, w in enumerate(words):
2294 # No globbing in the first arg for command.Simple.
2295 if i == meta_offset and allow_assign:
2296 strs0 = self._EvalWordToArgv(w)
2297 # TODO: Remove this because YSH will disallow assignment
2298 # builtins? (including export?)
2299 if len(strs0) == 1:
2300 cmd_val = self._DetectAssignBuiltinStr(
2301 strs0[0], words, meta_offset)
2302 if cmd_val:
2303 return cmd_val
2304
2305 strs.extend(strs0)
2306 for _ in strs0:
2307 locs.append(w)
2308 continue
2309
2310 if glob_.LooksLikeStaticGlob(w):
2311 val = self.EvalWordToString(w) # respects strict-array
2312 num_appended = self.globber.Expand(val.s, strs)
2313 if num_appended < 0:
2314 raise error.FailGlob('Pattern %r matched no files' % val.s,
2315 w)
2316 for _ in xrange(num_appended):
2317 locs.append(w)
2318 continue
2319
2320 part_vals = [] # type: List[part_value_t]
2321 self._EvalWordToParts(w, part_vals, 0) # not quoted
2322
2323 if 0:
2324 log('')
2325 log('Static: part_vals after _EvalWordToParts:')
2326 for entry in part_vals:
2327 log(' %s', entry)
2328
2329 # Still need to process
2330 frames = _MakeWordFrames(part_vals)
2331
2332 if 0:
2333 log('')
2334 log('Static: frames after _MakeWordFrames:')
2335 for entry in frames:
2336 log(' %s', entry)
2337
2338 # We will still allow x"${a[@]"x, though it's deprecated by @a, which
2339 # disallows such expressions at parse time.
2340 for frame in frames:
2341 if len(frame): # empty array gives empty frame!
2342 tmp = [piece.s for piece in frame]
2343 strs.append(''.join(tmp)) # no split or glob
2344 locs.append(w)
2345
2346 assert len(strs) == len(locs), '%s vs. %d' % (strs, len(locs))
2347 return cmd_value.Argv(strs, locs, is_last_cmd, None, None)
2348
2349 def EvalWordSequence2(self, words, is_last_cmd, allow_assign=False):
2350 # type: (List[CompoundWord], bool, bool) -> cmd_value_t
2351 """Turns a list of Words into a list of strings.
2352
2353 Unlike the EvalWord*() methods, it does globbing.
2354
2355 Args:
2356 allow_assign: True for command.Simple, False for BashArray a=(1 2 3)
2357 """
2358 if self.exec_opts.simple_word_eval():
2359 return self.SimpleEvalWordSequence2(words, is_last_cmd,
2360 allow_assign)
2361
2362 # Parse time:
2363 # 1. brace expansion. TODO: Do at parse time.
2364 # 2. Tilde detection. DONE at parse time. Only if Id.Lit_Tilde is the
2365 # first WordPart.
2366 #
2367 # Run time:
2368 # 3. tilde sub, var sub, command sub, arith sub. These are all
2369 # "concurrent" on WordParts. (optional process sub with <() )
2370 # 4. word splitting. Can turn this off with a shell option? Definitely
2371 # off for oil.
2372 # 5. globbing -- several exec_opts affect this: nullglob, safeglob, etc.
2373
2374 #log('W %s', words)
2375 strs = [] # type: List[str]
2376 locs = [] # type: List[CompoundWord]
2377
2378 # 0 for declare x
2379 # 1 for builtin declare x
2380 # 2 for command builtin declare x
2381 # etc.
2382 meta_offset = 0
2383
2384 n = 0
2385 for i, w in enumerate(words):
2386 fast_str = word_.FastStrEval(w)
2387 if fast_str is not None:
2388 strs.append(fast_str)
2389 locs.append(w)
2390
2391 # e.g. the 'local' in 'local a=b c=d' will be here
2392 if allow_assign and i == meta_offset:
2393 cmd_val = self._DetectAssignBuiltinStr(
2394 fast_str, words, meta_offset)
2395 if cmd_val:
2396 return cmd_val
2397
2398 if i <= meta_offset and _DetectMetaBuiltinStr(fast_str):
2399 meta_offset += 1
2400
2401 # Bug fix: n must be updated on every loop iteration
2402 n = len(strs)
2403 assert len(strs) == len(locs), strs
2404 continue
2405
2406 part_vals = [] # type: List[part_value_t]
2407 self._EvalWordToParts(w, part_vals, EXTGLOB_FILES)
2408
2409 # DYNAMICALLY detect if we're going to run an assignment builtin, and
2410 # change the rest of the evaluation algorithm if so.
2411 #
2412 # We want to allow:
2413 # e=export
2414 # $e foo=bar
2415 #
2416 # But we don't want to evaluate the first word twice in the case of:
2417 # $(some-command) --flag
2418 if len(part_vals) == 1:
2419 if allow_assign and i == meta_offset:
2420 cmd_val = self._DetectAssignBuiltin(
2421 part_vals[0], words, meta_offset)
2422 if cmd_val:
2423 return cmd_val
2424
2425 if i <= meta_offset and _DetectMetaBuiltin(part_vals[0]):
2426 meta_offset += 1
2427
2428 if 0:
2429 log('')
2430 log('part_vals after _EvalWordToParts:')
2431 for entry in part_vals:
2432 log(' %s', entry)
2433
2434 frames = _MakeWordFrames(part_vals)
2435 if 0:
2436 log('')
2437 log('frames after _MakeWordFrames:')
2438 for entry in frames:
2439 log(' %s', entry)
2440
2441 # Do splitting and globbing. Each frame will append zero or more args.
2442 for frame in frames:
2443 self._EvalWordFrame(frame, strs)
2444
2445 # Fill in locations parallel to strs.
2446 n_next = len(strs)
2447 for _ in xrange(n_next - n):
2448 locs.append(w)
2449 n = n_next
2450
2451 # A non-assignment command.
2452 # NOTE: Can't look up builtins here like we did for assignment, because
2453 # functions can override builtins.
2454 assert len(strs) == len(locs), '%s vs. %d' % (strs, len(locs))
2455 return cmd_value.Argv(strs, locs, is_last_cmd, None, None)
2456
2457 def EvalWordSequence(self, words):
2458 # type: (List[CompoundWord]) -> List[str]
2459 """For arrays and for loops.
2460
2461 They don't allow assignment builtins.
2462 """
2463 # is_last_cmd is irrelevant
2464 cmd_val = self.EvalWordSequence2(words, False)
2465 assert cmd_val.tag() == cmd_value_e.Argv
2466 return cast(cmd_value.Argv, cmd_val).argv
2467
2468
2469class NormalWordEvaluator(AbstractWordEvaluator):
2470
2471 def __init__(
2472 self,
2473 mem, # type: state.Mem
2474 exec_opts, # type: optview.Exec
2475 mutable_opts, # type: state.MutableOpts
2476 tilde_ev, # type: TildeEvaluator
2477 splitter, # type: SplitContext
2478 errfmt, # type: ui.ErrorFormatter
2479 ):
2480 # type: (...) -> None
2481 AbstractWordEvaluator.__init__(self, mem, exec_opts, mutable_opts,
2482 tilde_ev, splitter, errfmt)
2483 self.shell_ex = None # type: _Executor
2484
2485 def CheckCircularDeps(self):
2486 # type: () -> None
2487 assert self.arith_ev is not None
2488 # Disabled for pure OSH
2489 #assert self.expr_ev is not None
2490 assert self.shell_ex is not None
2491 assert self.prompt_ev is not None
2492
2493 def _EvalCommandSub(self, cs_part, quoted):
2494 # type: (CommandSub, bool) -> part_value_t
2495 stdout_str = self.shell_ex.RunCommandSub(cs_part)
2496
2497 if cs_part.left_token.id == Id.Left_AtParen:
2498 # YSH splitting algorithm: does not depend on IFS
2499 try:
2500 strs = j8.SplitJ8Lines(stdout_str)
2501 except error.Decode as e:
2502 # status code 4 is special, for encode/decode errors.
2503 raise error.Structured(4, e.Message(), cs_part.left_token)
2504
2505 #strs = self.splitter.SplitForWordEval(stdout_str)
2506 return part_value.Array(strs)
2507 else:
2508 return Piece(stdout_str, quoted, not quoted)
2509
2510 def _EvalProcessSub(self, cs_part):
2511 # type: (CommandSub) -> Piece
2512 dev_path = self.shell_ex.RunProcessSub(cs_part)
2513 # pretend it's quoted; no split or glob
2514 return Piece(dev_path, True, False)
2515
2516
2517_DUMMY = '__NO_COMMAND_SUB__'
2518
2519
2520class CompletionWordEvaluator(AbstractWordEvaluator):
2521 """An evaluator that has no access to an executor.
2522
2523 NOTE: core/completion.py doesn't actually try to use these strings to
2524 complete. If you have something like 'echo $(echo hi)/f<TAB>', it sees the
2525 inner command as the last one, and knows that it is not at the end of the
2526 line.
2527 """
2528
2529 def __init__(
2530 self,
2531 mem, # type: state.Mem
2532 exec_opts, # type: optview.Exec
2533 mutable_opts, # type: state.MutableOpts
2534 tilde_ev, # type: TildeEvaluator
2535 splitter, # type: SplitContext
2536 errfmt, # type: ui.ErrorFormatter
2537 ):
2538 # type: (...) -> None
2539 AbstractWordEvaluator.__init__(self, mem, exec_opts, mutable_opts,
2540 tilde_ev, splitter, errfmt)
2541
2542 def CheckCircularDeps(self):
2543 # type: () -> None
2544 assert self.prompt_ev is not None
2545 assert self.arith_ev is not None
2546 assert self.expr_ev is not None
2547
2548 def _EvalCommandSub(self, cs_part, quoted):
2549 # type: (CommandSub, bool) -> part_value_t
2550 if cs_part.left_token.id == Id.Left_AtParen:
2551 return part_value.Array([_DUMMY])
2552 else:
2553 return Piece(_DUMMY, quoted, not quoted)
2554
2555 def _EvalProcessSub(self, cs_part):
2556 # type: (CommandSub) -> Piece
2557 # pretend it's quoted; no split or glob
2558 return Piece('__NO_PROCESS_SUB__', True, False)
2559
2560
2561# vim: sw=4