OILS / osh / word_eval.py View on Github | oils.pub

2607 lines, 1600 significant
1"""
2word_eval.py - Evaluator for the word language.
3"""
4
5from _devbuild.gen.id_kind_asdl import Id, Kind, Kind_str
6from _devbuild.gen.syntax_asdl import (
7 Token,
8 SimpleVarSub,
9 loc,
10 loc_t,
11 BracedVarSub,
12 CommandSub,
13 bracket_op,
14 bracket_op_e,
15 suffix_op,
16 suffix_op_e,
17 ShArrayLiteral,
18 SingleQuoted,
19 DoubleQuoted,
20 word_e,
21 word_t,
22 CompoundWord,
23 rhs_word,
24 rhs_word_e,
25 rhs_word_t,
26 word_part,
27 word_part_e,
28)
29from _devbuild.gen.runtime_asdl import (
30 part_value,
31 part_value_e,
32 part_value_t,
33 cmd_value,
34 cmd_value_e,
35 cmd_value_t,
36 error_code_e,
37 AssignArg,
38 a_index,
39 a_index_e,
40 VTestPlace,
41 VarSubState,
42 Piece,
43)
44from _devbuild.gen.option_asdl import option_i, builtin_i
45from _devbuild.gen.value_asdl import (
46 value,
47 value_e,
48 value_t,
49 sh_lvalue,
50 sh_lvalue_t,
51)
52from core import bash_impl
53from core import error
54from core import pyos
55from core import pyutil
56from core import state
57from display import ui
58from core import util
59from data_lang import j8
60from data_lang import j8_lite
61from core.error import e_die
62from frontend import consts
63from frontend import lexer
64from frontend import location
65from mycpp import mops
66from mycpp.mylib import log, tagswitch, NewDict
67from osh import braces
68from osh import glob_
69from osh import string_ops
70from osh import word_
71from ysh import expr_eval
72from ysh import val_ops
73
74from typing import Optional, Tuple, List, Dict, cast, TYPE_CHECKING
75
76if TYPE_CHECKING:
77 from _devbuild.gen.syntax_asdl import word_part_t
78 from _devbuild.gen.option_asdl import builtin_t
79 from core import optview
80 from core.state import Mem
81 from core.vm import _Executor
82 from osh.split import SplitContext
83 from osh import prompt
84 from osh import sh_expr_eval
85
86# Flags for _EvalWordToParts and _EvalWordPart (not all are used for both)
87QUOTED = 1 << 0
88IS_SUBST = 1 << 1
89
90EXTGLOB_FILES = 1 << 2 # allow @(cc) from file system?
91EXTGLOB_MATCH = 1 << 3 # allow @(cc) in pattern matching?
92EXTGLOB_NESTED = 1 << 4 # for @(one|!(two|three))
93
94# For EvalWordToString
95QUOTE_FNMATCH = 1 << 5
96QUOTE_ERE = 1 << 6
97
98# For compatibility, ${BASH_SOURCE} and ${BASH_SOURCE[@]} are both valid.
99# Ditto for ${FUNCNAME} and ${BASH_LINENO}.
100_STRING_AND_ARRAY = ['BASH_SOURCE', 'FUNCNAME', 'BASH_LINENO']
101
102
103def ShouldArrayDecay(var_name, exec_opts, is_plain_var_sub=True):
104 # type: (str, optview.Exec, bool) -> bool
105 """Return whether we should allow ${a} to mean ${a[0]}."""
106 return (not exec_opts.strict_array() or
107 is_plain_var_sub and var_name in _STRING_AND_ARRAY)
108
109
110def DecayArray(val):
111 # type: (value_t) -> value_t
112 """Resolve ${array} to ${array[0]}."""
113 if val.tag() == value_e.BashArray:
114 array_val = cast(value.BashArray, val)
115 s, error_code = bash_impl.BashArray_GetElement(array_val, 0)
116
117 # Note: index 0 should never cause the out-of-bound index error.
118 assert error_code == error_code_e.OK
119
120 elif val.tag() == value_e.BashAssoc:
121 assoc_val = cast(value.BashAssoc, val)
122 s = bash_impl.BashAssoc_GetElement(assoc_val, '0')
123 else:
124 raise AssertionError(val.tag())
125
126 if s is None:
127 return value.Undef
128 else:
129 return value.Str(s)
130
131
132def _DetectMetaBuiltinStr(s):
133 # type: (str) -> bool
134 """
135 We need to detect all of these cases:
136
137 builtin local
138 command local
139 builtin builtin local
140 builtin command local
141
142 Fundamentally, assignment builtins have different WORD EVALUATION RULES
143 for a=$x (no word splitting), so it seems hard to do this in
144 meta_oils.Builtin() or meta_oils.Command()
145 """
146 return (consts.LookupNormalBuiltin(s)
147 in (builtin_i.builtin, builtin_i.command))
148
149
150def _DetectMetaBuiltin(val0):
151 # type: (part_value_t) -> bool
152 UP_val0 = val0
153 if val0.tag() == part_value_e.String:
154 val0 = cast(Piece, UP_val0)
155 if not val0.quoted:
156 return _DetectMetaBuiltinStr(val0.s)
157 return False
158
159
160def _SplitAssignArg(arg, blame_word):
161 # type: (str, CompoundWord) -> AssignArg
162 """Dynamically parse argument to declare, export, etc.
163
164 This is a fallback to the static parsing done below.
165 """
166 # Note: it would be better to cache regcomp(), but we don't have an API for
167 # that, and it probably isn't a bottleneck now
168 m = util.RegexSearch(consts.ASSIGN_ARG_RE, arg)
169 if m is None:
170 e_die("Assignment builtin expected NAME=value, got %r" % arg,
171 blame_word)
172
173 var_name = m[1]
174 # m[2] is used for grouping; ERE doesn't have non-capturing groups
175
176 op = m[3]
177 assert op is not None, op
178 if len(op): # declare NAME=
179 val = value.Str(m[4]) # type: Optional[value_t]
180 append = op[0] == '+'
181 else: # declare NAME
182 val = None # no operator
183 append = False
184
185 return AssignArg(var_name, val, append, blame_word)
186
187
188# NOTE: Could be done with util.BackslashEscape like glob_.GlobEscape().
189def _BackslashEscape(s):
190 # type: (str) -> str
191 """Double up backslashes.
192
193 Useful for strings about to be globbed and strings about to be IFS
194 escaped.
195 """
196 return s.replace('\\', '\\\\')
197
198
199def _ValueToPartValue(val, quoted, part_loc):
200 # type: (value_t, bool, word_part_t) -> part_value_t
201 """Helper for VarSub evaluation.
202
203 Called by _EvalBracedVarSub and _EvalWordPart for SimpleVarSub.
204 """
205 UP_val = val
206
207 with tagswitch(val) as case:
208 if case(value_e.Undef):
209 # This happens in the case of ${undef+foo}. We skipped _ProcessUndef,
210 # but we have to append to the empty string.
211 return Piece('', quoted, not quoted)
212
213 elif case(value_e.Str):
214 val = cast(value.Str, UP_val)
215 return Piece(val.s, quoted, not quoted)
216
217 elif case(value_e.BashArray):
218 val = cast(value.BashArray, UP_val)
219 return part_value.Array(bash_impl.BashArray_GetValues(val))
220
221 elif case(value_e.BashAssoc):
222 val = cast(value.BashAssoc, UP_val)
223 # bash behavior: splice values!
224 return part_value.Array(bash_impl.BashAssoc_GetValues(val))
225
226 # Cases added for YSH
227 # value_e.List is also here - we use val_ops.Stringify()s err message
228 elif case(value_e.Null, value_e.Bool, value_e.Int, value_e.Float,
229 value_e.Eggex, value_e.List):
230 s = val_ops.Stringify(val, loc.WordPart(part_loc), 'Word eval ')
231 return Piece(s, quoted, not quoted)
232
233 else:
234 raise error.TypeErr(val, "Can't substitute into word",
235 loc.WordPart(part_loc))
236
237 raise AssertionError('for -Wreturn-type in C++')
238
239
240def _MakeWordFrames(part_vals):
241 # type: (List[part_value_t]) -> List[List[Piece]]
242 """A word evaluates to a flat list of part_value (String or Array). frame
243 is a portion that results in zero or more args. It can never be joined.
244 This idea exists because of arrays like "$@" and "${a[@]}".
245
246 Example:
247
248 a=(1 '2 3' 4)
249 x=x
250 y=y
251
252 # This word
253 $x"${a[@]}"$y
254
255 # Results in Three frames:
256 [ ('x', False, True), ('1', True, False) ]
257 [ ('2 3', True, False) ]
258 [ ('4', True, False), ('y', False, True) ]
259
260 Note: A frame is a 3-tuple that's identical to Piece()? Maybe we
261 should make that top level type.
262
263 TODO:
264 - Instead of List[List[Piece]], where List[Piece] is a Frame
265 - Change this representation to
266 Frames = (List[Piece] pieces, List[int] break_indices)
267 # where break_indices are the end
268
269 Consider a common case like "$x" or "${x}" - I think this a lot more
270 efficient?
271
272 And then change _EvalWordFrame(pieces: List[Piece], start: int, end: int)
273 """
274 current = [] # type: List[Piece]
275 frames = [current]
276
277 for p in part_vals:
278 UP_p = p
279
280 with tagswitch(p) as case:
281 if case(part_value_e.String):
282 p = cast(Piece, UP_p)
283 current.append(p)
284
285 elif case(part_value_e.Array):
286 p = cast(part_value.Array, UP_p)
287
288 is_first = True
289 for s in p.strs:
290 if s is None:
291 continue # ignore undefined array entries
292
293 # Arrays parts are always quoted; otherwise they would have decayed to
294 # a string.
295 piece = Piece(s, True, False)
296 if is_first:
297 current.append(piece)
298 is_first = False
299 else:
300 current = [piece]
301 frames.append(current) # singleton frame
302
303 else:
304 raise AssertionError()
305
306 return frames
307
308
309# TODO: This could be _MakeWordFrames and then sep.join(). It's redundant.
310def _DecayPartValuesToString(part_vals, join_char):
311 # type: (List[part_value_t], str) -> str
312 # Decay ${a=x"$@"x} to string.
313 out = [] # type: List[str]
314 for p in part_vals:
315 UP_p = p
316 with tagswitch(p) as case:
317 if case(part_value_e.String):
318 p = cast(Piece, UP_p)
319 out.append(p.s)
320 elif case(part_value_e.Array):
321 p = cast(part_value.Array, UP_p)
322 # TODO: Eliminate double join for speed?
323 tmp = [s for s in p.strs if s is not None]
324 out.append(join_char.join(tmp))
325 else:
326 raise AssertionError()
327 return ''.join(out)
328
329
330def _PerformSlice(
331 val, # type: value_t
332 offset, # type: mops.BigInt
333 length, # type: int
334 has_length, # type: bool
335 part, # type: BracedVarSub
336 arg0_val, # type: value.Str
337):
338 # type: (...) -> value_t
339 UP_val = val
340 with tagswitch(val) as case:
341 if case(value_e.Str): # Slice UTF-8 characters in a string.
342 val = cast(value.Str, UP_val)
343 s = val.s
344 n = len(s)
345
346 begin = mops.BigTruncate(offset)
347 if begin < 0: # Compute offset with unicode
348 byte_begin = n
349 num_iters = -begin
350 for _ in xrange(num_iters):
351 byte_begin = string_ops.PreviousUtf8Char(s, byte_begin)
352 else:
353 byte_begin = string_ops.AdvanceUtf8Chars(s, begin, 0)
354
355 if has_length:
356 if length < 0: # Compute offset with unicode
357 # Confusing: this is a POSITION
358 byte_end = n
359 num_iters = -length
360 for _ in xrange(num_iters):
361 byte_end = string_ops.PreviousUtf8Char(s, byte_end)
362 else:
363 byte_end = string_ops.AdvanceUtf8Chars(
364 s, length, byte_begin)
365 else:
366 byte_end = len(s)
367
368 substr = s[byte_begin:byte_end]
369 result = value.Str(substr) # type: value_t
370
371 elif case(value_e.BashArray,
372 value_e.SparseArray): # Slice array entries.
373 # NOTE: This error is ALWAYS fatal in bash. It's inconsistent with
374 # strings.
375 if has_length and length < 0:
376 e_die("Array slice can't have negative length: %d" % length,
377 loc.WordPart(part))
378
379 if bash_impl.BigInt_Less(offset, mops.ZERO):
380 # ${@:-3} starts counts from the end
381 if val.tag() == value_e.BashArray:
382 val = cast(value.BashArray, UP_val)
383 array_length = mops.IntWiden(
384 bash_impl.BashArray_Length(val))
385 elif val.tag() == value_e.SparseArray:
386 val = cast(value.SparseArray, UP_val)
387 array_length = bash_impl.SparseArray_Length(val)
388 else:
389 raise AssertionError()
390
391 # The array length counts $0 for $@ and $*
392 if arg0_val is not None:
393 array_length = mops.Add(array_length, mops.ONE)
394
395 offset = mops.Add(offset, array_length)
396
397 if bash_impl.BigInt_Less(offset, mops.ZERO):
398 strs = [] # type: List[str]
399 else:
400 # Quirk: "offset" for positional arguments ($@ and $*) counts $0.
401 prepends_arg0 = False
402 if arg0_val is not None:
403 if bash_impl.BigInt_Greater(offset, mops.ZERO):
404 offset = mops.Sub(offset, mops.ONE)
405 elif not has_length or length >= 1:
406 prepends_arg0 = True
407 length = length - 1
408
409 if has_length and length == 0:
410 strs = []
411
412 elif val.tag() == value_e.BashArray:
413 val = cast(value.BashArray, UP_val)
414 orig = bash_impl.BashArray_GetValues(val)
415 n = len(orig)
416
417 strs = []
418 i = mops.BigTruncate(offset)
419 count = 0
420 while i < n:
421 if has_length and count == length: # length could be 0
422 break
423 s = orig[i]
424 if s is not None: # Unset elements don't count towards the length
425 strs.append(s)
426 count += 1
427 i += 1
428
429 elif val.tag() == value_e.SparseArray:
430 val = cast(value.SparseArray, UP_val)
431
432 # TODO: We may optimize this by finding the first index
433 # using the binary search. Furthermore, the sorting by
434 # SparseArray_GetKeys can be replaced with the heap sort so
435 # that we only extract the first LENGTH elements of the
436 # indices greater or equal to OFFSET.
437 i = 0
438 for index in bash_impl.SparseArray_GetKeys(val):
439 if bash_impl.BigInt_GreaterEq(index, offset):
440 break
441 i = i + 1
442
443 if has_length:
444 strs = bash_impl.SparseArray_GetValues(val)[i:i +
445 length]
446 else:
447 strs = bash_impl.SparseArray_GetValues(val)[i:]
448
449 else:
450 raise AssertionError()
451
452 if prepends_arg0:
453 new_list = [arg0_val.s]
454 new_list.extend(strs)
455 strs = new_list
456
457 result = value.BashArray(strs)
458
459 elif case(value_e.BashAssoc):
460 e_die("Can't slice associative arrays", loc.WordPart(part))
461
462 else:
463 raise error.TypeErr(val, 'Slice op expected Str or BashArray',
464 loc.WordPart(part))
465
466 return result
467
468
469class StringWordEvaluator(object):
470 """Interface used by ArithEvaluator / BoolEvaluator"""
471
472 def __init__(self):
473 # type: () -> None
474 """Empty constructor for mycpp."""
475 pass
476
477 def EvalWordToString(self, w, eval_flags=0):
478 # type: (word_t, int) -> value.Str
479 raise NotImplementedError()
480
481
482def _GetDollarHyphen(exec_opts):
483 # type: (optview.Exec) -> str
484 chars = [] # type: List[str]
485 if exec_opts.interactive():
486 chars.append('i')
487
488 if exec_opts.errexit():
489 chars.append('e')
490 if exec_opts.noglob():
491 chars.append('f')
492 if exec_opts.noexec():
493 chars.append('n')
494 if exec_opts.nounset():
495 chars.append('u')
496 # NO letter for pipefail?
497 if exec_opts.xtrace():
498 chars.append('x')
499 if exec_opts.noclobber():
500 chars.append('C')
501
502 # bash has:
503 # - c for sh -c, i for sh -i (mksh also has this)
504 # - h for hashing (mksh also has this)
505 # - B for brace expansion
506 return ''.join(chars)
507
508
509class TildeEvaluator(object):
510
511 def __init__(self, mem, exec_opts):
512 # type: (Mem, optview.Exec) -> None
513 self.mem = mem
514 self.exec_opts = exec_opts
515
516 def GetMyHomeDir(self):
517 # type: () -> Optional[str]
518 """Consult $HOME first, and then make a libc call.
519
520 Important: the libc call can FAIL, which is why we prefer $HOME. See issue
521 #1578.
522 """
523 # First look up the HOME var, ENV.HOME, ...
524 s = self.mem.env_config.Get('HOME')
525 if s is not None:
526 return s
527
528 # Then ask the OS. This is what bash does.
529 return pyos.GetMyHomeDir()
530
531 def Eval(self, part):
532 # type: (word_part.TildeSub) -> str
533 """Evaluates ~ and ~user, given a Lit_TildeLike token."""
534
535 if part.user_name is None:
536 result = self.GetMyHomeDir()
537 else:
538 result = pyos.GetHomeDir(part.user_name)
539
540 if result is None:
541 if self.exec_opts.strict_tilde():
542 e_die("Error expanding tilde (e.g. invalid user)", part.left)
543 else:
544 # Return ~ or ~user literally
545 result = '~'
546 if part.user_name is not None:
547 result = result + part.user_name # mycpp doesn't have +=
548
549 return result
550
551
552class AbstractWordEvaluator(StringWordEvaluator):
553 """Abstract base class for word evaluators.
554
555 Public entry points:
556 EvalWordToString EvalForPlugin EvalRhsWord
557 EvalWordSequence EvalWordSequence2
558 """
559
560 def __init__(
561 self,
562 mem, # type: state.Mem
563 exec_opts, # type: optview.Exec
564 mutable_opts, # type: state.MutableOpts
565 tilde_ev, # type: TildeEvaluator
566 splitter, # type: SplitContext
567 errfmt, # type: ui.ErrorFormatter
568 ):
569 # type: (...) -> None
570 self.arith_ev = None # type: sh_expr_eval.ArithEvaluator
571 self.expr_ev = None # type: expr_eval.ExprEvaluator
572 self.prompt_ev = None # type: prompt.Evaluator
573
574 self.unsafe_arith = None # type: sh_expr_eval.UnsafeArith
575
576 self.tilde_ev = tilde_ev
577
578 self.mem = mem # for $HOME, $1, etc.
579 self.exec_opts = exec_opts # for nounset
580 self.mutable_opts = mutable_opts # for _allow_command_sub
581 self.splitter = splitter
582 self.errfmt = errfmt
583
584 self.globber = glob_.Globber(exec_opts)
585
586 def CheckCircularDeps(self):
587 # type: () -> None
588 raise NotImplementedError()
589
590 def _EvalCommandSub(self, cs_part, quoted):
591 # type: (CommandSub, bool) -> part_value_t
592 """Abstract since it has a side effect."""
593 raise NotImplementedError()
594
595 def _EvalProcessSub(self, cs_part):
596 # type: (CommandSub) -> part_value_t
597 """Abstract since it has a side effect."""
598 raise NotImplementedError()
599
600 def _EvalVarNum(self, var_num):
601 # type: (int) -> value_t
602 assert var_num >= 0
603 return self.mem.GetArgNum(var_num)
604
605 def _EvalSpecialVar(self, op_id, quoted, vsub_state):
606 # type: (int, bool, VarSubState) -> value_t
607 """Evaluate $?
608
609 and so forth
610 """
611 # $@ is special -- it need to know whether it is in a double quoted
612 # context.
613 #
614 # - If it's $@ in a double quoted context, return an ARRAY.
615 # - If it's $@ in a normal context, return a STRING, which then will be
616 # subject to splitting.
617
618 if op_id in (Id.VSub_At, Id.VSub_Star):
619 argv = self.mem.GetArgv()
620 val = value.BashArray(argv) # type: value_t
621 if op_id == Id.VSub_At:
622 # "$@" evaluates to an array, $@ should be decayed
623 vsub_state.join_array = not quoted
624 else: # $* "$*" are both decayed
625 vsub_state.join_array = True
626
627 elif op_id == Id.VSub_Hyphen:
628 val = value.Str(_GetDollarHyphen(self.exec_opts))
629
630 else:
631 val = self.mem.GetSpecialVar(op_id)
632
633 return val
634
635 def _ApplyTestOp(
636 self,
637 val, # type: value_t
638 op, # type: suffix_op.Unary
639 quoted, # type: bool
640 part_vals, # type: Optional[List[part_value_t]]
641 vtest_place, # type: VTestPlace
642 blame_token, # type: Token
643 vsub_state, # type: VarSubState
644 ):
645 # type: (...) -> bool
646 """
647 Returns:
648 Whether part_vals was mutated
649
650 ${a:-} returns part_value[]
651 ${a:+} returns part_value[]
652 ${a:?error} returns error word?
653 ${a:=} returns part_value[] but also needs self.mem for side effects.
654
655 So I guess it should return part_value[], and then a flag for raising an
656 error, and then a flag for assigning it?
657 The original BracedVarSub will have the name.
658
659 Example of needing multiple part_value[]
660
661 echo X-${a:-'def'"ault"}-X
662
663 We return two part values from the BracedVarSub. Also consider:
664
665 echo ${a:-x"$@"x}
666 """
667 eval_flags = IS_SUBST
668 if quoted:
669 eval_flags |= QUOTED
670
671 tok = op.op
672 # NOTE: Splicing part_values is necessary because of code like
673 # ${undef:-'a b' c 'd # e'}. Each part_value can have a different
674 # do_glob/do_elide setting.
675 UP_val = val
676 with tagswitch(val) as case:
677 if case(value_e.Undef):
678 is_falsey = True
679
680 elif case(value_e.Str):
681 val = cast(value.Str, UP_val)
682 if tok.id in (Id.VTest_ColonHyphen, Id.VTest_ColonEquals,
683 Id.VTest_ColonQMark, Id.VTest_ColonPlus):
684 is_falsey = len(val.s) == 0
685 else:
686 is_falsey = False
687
688 elif case(value_e.BashArray, value_e.BashAssoc):
689 if val.tag() == value_e.BashArray:
690 val = cast(value.BashArray, UP_val)
691 strs = bash_impl.BashArray_GetValues(val)
692 elif val.tag() == value_e.BashAssoc:
693 val = cast(value.BashAssoc, UP_val)
694 strs = bash_impl.BashAssoc_GetValues(val)
695 else:
696 raise AssertionError()
697
698 if tok.id in (Id.VTest_ColonHyphen, Id.VTest_ColonEquals,
699 Id.VTest_ColonQMark, Id.VTest_ColonPlus):
700 # The first character of IFS is used as a separator only
701 # for the double-quoted "$*", or otherwise, a space " " is
702 # used (for $*, $@, and "$@").
703 # TODO: We current do not check whether the current $* is
704 # double-quoted or not. We should use IFS only when $* is
705 # double-quoted.
706 if vsub_state.join_array:
707 sep_width = len(self.splitter.GetJoinChar())
708 else:
709 sep_width = 1 # we use ' ' for a[@]
710
711 # We test whether the joined string will be empty. When
712 # the separator is empty, all the elements need to be
713 # empty. When the separator is non-empty, one element is
714 # allowed at most and needs to be an empty string if any.
715 if sep_width == 0:
716 is_falsey = True
717 for s in strs:
718 if len(s) != 0:
719 is_falsey = False
720 break
721 else:
722 is_falsey = len(strs) == 0 or (len(strs) == 1 and
723 len(strs[0]) == 0)
724 else:
725 # TODO: allow undefined
726 is_falsey = len(strs) == 0
727
728 else:
729 # value.Eggex, etc. are all false
730 is_falsey = False
731
732 if tok.id in (Id.VTest_ColonHyphen, Id.VTest_Hyphen):
733 if is_falsey:
734 self._EvalRhsWordToParts(op.arg_word, part_vals, eval_flags)
735 return True
736 else:
737 return False
738
739 # Inverse of the above.
740 elif tok.id in (Id.VTest_ColonPlus, Id.VTest_Plus):
741 if is_falsey:
742 return False
743 else:
744 self._EvalRhsWordToParts(op.arg_word, part_vals, eval_flags)
745 return True
746
747 # Splice and assign
748 elif tok.id in (Id.VTest_ColonEquals, Id.VTest_Equals):
749 if is_falsey:
750 # Collect new part vals.
751 assign_part_vals = [] # type: List[part_value_t]
752 self._EvalRhsWordToParts(op.arg_word, assign_part_vals,
753 eval_flags)
754 # Append them to out param AND return them.
755 part_vals.extend(assign_part_vals)
756
757 if vtest_place.name is None:
758 # TODO: error context
759 e_die("Can't assign to special variable")
760 else:
761 # NOTE: This decays arrays too! 'shopt -s strict_array' could
762 # avoid it.
763 rhs_str = _DecayPartValuesToString(
764 assign_part_vals, self.splitter.GetJoinChar())
765 if vtest_place.index is None: # using None when no index
766 lval = location.LName(
767 vtest_place.name) # type: sh_lvalue_t
768 else:
769 var_name = vtest_place.name
770 var_index = vtest_place.index
771 UP_var_index = var_index
772
773 with tagswitch(var_index) as case:
774 if case(a_index_e.Int):
775 var_index = cast(a_index.Int, UP_var_index)
776 lval = sh_lvalue.Indexed(
777 var_name, var_index.i, loc.Missing)
778 elif case(a_index_e.Str):
779 var_index = cast(a_index.Str, UP_var_index)
780 lval = sh_lvalue.Keyed(var_name, var_index.s,
781 loc.Missing)
782 else:
783 raise AssertionError()
784
785 state.OshLanguageSetValue(self.mem, lval,
786 value.Str(rhs_str))
787 return True
788
789 else:
790 return False
791
792 elif tok.id in (Id.VTest_ColonQMark, Id.VTest_QMark):
793 if is_falsey:
794 # The arg is the error message
795 error_part_vals = [] # type: List[part_value_t]
796 self._EvalRhsWordToParts(op.arg_word, error_part_vals,
797 eval_flags)
798 error_str = _DecayPartValuesToString(
799 error_part_vals, self.splitter.GetJoinChar())
800
801 #
802 # Display fancy/helpful error
803 #
804 if vtest_place.name is None:
805 var_name = '???'
806 else:
807 var_name = vtest_place.name
808
809 if 0:
810 # This hint is nice, but looks too noisy for now
811 op_str = lexer.LazyStr(tok)
812 if tok.id == Id.VTest_ColonQMark:
813 why = 'empty or unset'
814 else:
815 why = 'unset'
816
817 self.errfmt.Print_(
818 "Hint: operator %s means a variable can't be %s" %
819 (op_str, why), tok)
820
821 if val.tag() == value_e.Undef:
822 actual = 'unset'
823 else:
824 actual = 'empty'
825
826 if len(error_str):
827 suffix = ': %r' % error_str
828 else:
829 suffix = ''
830 e_die("Var %s is %s%s" % (var_name, actual, suffix),
831 blame_token)
832
833 else:
834 return False
835
836 else:
837 raise AssertionError(tok.id)
838
839 def _Count(self, val, token):
840 # type: (value_t, Token) -> int
841 """Returns the length of the value, for ${#var}"""
842 UP_val = val
843 with tagswitch(val) as case:
844 if case(value_e.Str):
845 val = cast(value.Str, UP_val)
846 # NOTE: Whether bash counts bytes or chars is affected by LANG
847 # environment variables.
848 # Should we respect that, or another way to select? set -o
849 # count-bytes?
850
851 # https://stackoverflow.com/questions/17368067/length-of-string-in-bash
852 try:
853 count = string_ops.CountUtf8Chars(val.s)
854 except error.Strict as e:
855 # Add this here so we don't have to add it so far down the stack.
856 # TODO: It's better to show BOTH this CODE an the actual DATA
857 # somehow.
858 e.location = token
859
860 if self.exec_opts.strict_word_eval():
861 raise
862 else:
863 # NOTE: Doesn't make the command exit with 1; it just returns a
864 # length of -1.
865 self.errfmt.PrettyPrintError(e, prefix='warning: ')
866 return -1
867
868 elif case(value_e.BashArray):
869 val = cast(value.BashArray, UP_val)
870 count = bash_impl.BashArray_Count(val)
871
872 elif case(value_e.BashAssoc):
873 val = cast(value.BashAssoc, UP_val)
874 count = bash_impl.BashAssoc_Count(val)
875
876 elif case(value_e.SparseArray):
877 val = cast(value.SparseArray, UP_val)
878 count = bash_impl.SparseArray_Count(val)
879
880 else:
881 raise error.TypeErr(
882 val, "Length op expected Str, BashArray, BashAssoc", token)
883
884 return count
885
886 def _Keys(self, val, token):
887 # type: (value_t, Token) -> value_t
888 """Return keys of a container, for ${!array[@]}"""
889
890 UP_val = val
891 with tagswitch(val) as case:
892 if case(value_e.BashArray):
893 val = cast(value.BashArray, UP_val)
894 indices = [str(i) for i in bash_impl.BashArray_GetKeys(val)]
895 return value.BashArray(indices)
896
897 elif case(value_e.BashAssoc):
898 val = cast(value.BashAssoc, UP_val)
899 assert val.d is not None # for MyPy, so it's not Optional[]
900
901 # BUG: Keys aren't ordered according to insertion!
902 keys = bash_impl.BashAssoc_GetKeys(val)
903 return value.BashArray(keys)
904
905 else:
906 raise error.TypeErr(val, 'Keys op expected Str', token)
907
908 def _EvalVarRef(self, val, blame_tok, quoted, vsub_state, vtest_place):
909 # type: (value_t, Token, bool, VarSubState, VTestPlace) -> value_t
910 """Handles indirect expansion like ${!var} and ${!a[0]}.
911
912 Args:
913 blame_tok: 'foo' for ${!foo}
914 """
915 UP_val = val
916 with tagswitch(val) as case:
917 if case(value_e.Undef):
918 # bash-4.4 returned value.Undef here. bash-5.0 started to treat
919 # the variable name to be empty so that the indirection fails.
920 var_ref_str = ''
921
922 elif case(value_e.Str):
923 val = cast(value.Str, UP_val)
924 var_ref_str = val.s
925
926 elif case(value_e.BashArray): # caught earlier but OK
927 val = cast(value.BashArray, UP_val)
928 # When there are more than one element in the array, this
929 # produces a wrong variable name containing spaces.
930 var_ref_str = ' '.join(bash_impl.BashArray_GetValues(val))
931
932 elif case(value_e.BashAssoc): # caught earlier but OK
933 val = cast(value.BashAssoc, UP_val)
934 var_ref_str = ' '.join(bash_impl.BashAssoc_GetValues(val))
935
936 else:
937 raise error.TypeErr(val, 'Var Ref op expected Str', blame_tok)
938
939 try:
940 bvs_part = self.unsafe_arith.ParseVarRef(var_ref_str, blame_tok)
941 except error.FatalRuntime as e:
942 raise error.VarSubFailure(e.msg, e.location)
943
944 return self._VarRefValue(bvs_part, quoted, vsub_state, vtest_place)
945
946 def _ApplyUnarySuffixOp(self, val, op):
947 # type: (value_t, suffix_op.Unary) -> value_t
948 assert val.tag() != value_e.Undef
949
950 op_kind = consts.GetKind(op.op.id)
951
952 if op_kind == Kind.VOp1:
953 # NOTE: glob syntax is supported in ^ ^^ , ,, ! As well as % %% # ##.
954 # Detect has_extglob so that DoUnarySuffixOp doesn't use the fast
955 # shortcut for constant strings.
956 arg_val, has_extglob = self.EvalWordToPattern(op.arg_word)
957 assert arg_val.tag() == value_e.Str
958
959 UP_val = val
960 with tagswitch(val) as case:
961 if case(value_e.Str):
962 val = cast(value.Str, UP_val)
963 s = string_ops.DoUnarySuffixOp(val.s, op.op, arg_val.s,
964 has_extglob)
965 #log('%r %r -> %r', val.s, arg_val.s, s)
966 new_val = value.Str(s) # type: value_t
967
968 elif case(value_e.BashArray, value_e.BashAssoc):
969 # get values
970 if val.tag() == value_e.BashArray:
971 val = cast(value.BashArray, UP_val)
972 values = bash_impl.BashArray_GetValues(val)
973 elif val.tag() == value_e.BashAssoc:
974 val = cast(value.BashAssoc, UP_val)
975 values = bash_impl.BashAssoc_GetValues(val)
976 else:
977 raise AssertionError()
978
979 # ${a[@]#prefix} is VECTORIZED on arrays. YSH should have this too.
980 strs = [
981 string_ops.DoUnarySuffixOp(s, op.op, arg_val.s,
982 has_extglob) for s in values
983 ]
984 new_val = value.BashArray(strs)
985
986 else:
987 raise error.TypeErr(
988 val, 'Unary op expected Str, BashArray, BashAssoc',
989 op.op)
990
991 else:
992 raise AssertionError(Kind_str(op_kind))
993
994 return new_val
995
996 def _PatSub(self, val, op):
997 # type: (value_t, suffix_op.PatSub) -> value_t
998
999 pat_val, has_extglob = self.EvalWordToPattern(op.pat)
1000 # Extended globs aren't supported because we only translate * ? etc. to
1001 # ERE. I don't think there's a straightforward translation from !(*.py) to
1002 # ERE! You would need an engine that supports negation? (Derivatives?)
1003 if has_extglob:
1004 e_die('extended globs not supported in ${x//GLOB/}', op.pat)
1005
1006 if op.replace:
1007 replace_val = self.EvalRhsWord(op.replace)
1008 # Can't have an array, so must be a string
1009 assert replace_val.tag() == value_e.Str, replace_val
1010 replace_str = cast(value.Str, replace_val).s
1011 else:
1012 replace_str = ''
1013
1014 # note: doesn't support self.exec_opts.extglob()!
1015 regex, warnings = glob_.GlobToERE(pat_val.s)
1016 if len(warnings):
1017 # TODO:
1018 # - Add 'shopt -s strict_glob' mode and expose warnings.
1019 # "Glob is not in CANONICAL FORM".
1020 # - Propagate location info back to the 'op.pat' word.
1021 pass
1022 #log('regex %r', regex)
1023 replacer = string_ops.GlobReplacer(regex, replace_str, op.slash_tok)
1024
1025 with tagswitch(val) as case2:
1026 if case2(value_e.Str):
1027 str_val = cast(value.Str, val)
1028 s = replacer.Replace(str_val.s, op)
1029 val = value.Str(s)
1030
1031 elif case2(value_e.BashArray, value_e.BashAssoc):
1032 if val.tag() == value_e.BashArray:
1033 array_val = cast(value.BashArray, val)
1034 values = bash_impl.BashArray_GetValues(array_val)
1035 elif val.tag() == value_e.BashAssoc:
1036 assoc_val = cast(value.BashAssoc, val)
1037 values = bash_impl.BashAssoc_GetValues(assoc_val)
1038 else:
1039 raise AssertionError()
1040 strs = [replacer.Replace(s, op) for s in values]
1041 val = value.BashArray(strs)
1042
1043 else:
1044 raise error.TypeErr(
1045 val, 'Pat Sub op expected Str, BashArray, BashAssoc',
1046 op.slash_tok)
1047
1048 return val
1049
1050 def _Slice(self, val, op, var_name, part):
1051 # type: (value_t, suffix_op.Slice, Optional[str], BracedVarSub) -> value_t
1052
1053 begin = self.arith_ev.EvalToBigInt(op.begin)
1054
1055 # Note: bash allows lengths to be negative (with odd semantics), but
1056 # we don't allow that right now.
1057 has_length = False
1058 length = -1
1059 if op.length:
1060 has_length = True
1061 length = self.arith_ev.EvalToInt(op.length)
1062
1063 try:
1064 arg0_val = None # type: value.Str
1065 if var_name is None: # $* or $@
1066 arg0_val = self.mem.GetArg0()
1067 val = _PerformSlice(val, begin, length, has_length, part, arg0_val)
1068 except error.Strict as e:
1069 if self.exec_opts.strict_word_eval():
1070 raise
1071 else:
1072 self.errfmt.PrettyPrintError(e, prefix='warning: ')
1073 with tagswitch(val) as case2:
1074 if case2(value_e.Str):
1075 val = value.Str('')
1076 elif case2(value_e.BashArray):
1077 val = value.BashArray([])
1078 else:
1079 raise NotImplementedError()
1080 return val
1081
1082 def _Nullary(self, val, op, var_name, vsub_token, vsub_state):
1083 # type: (value_t, Token, Optional[str], Token, VarSubState) -> Tuple[value_t, bool]
1084
1085 quoted2 = False
1086 op_id = op.id
1087 if op_id == Id.VOp0_P:
1088 val = self._ProcessUndef(val, vsub_token, vsub_state)
1089 UP_val = val
1090 with tagswitch(val) as case:
1091 if case(value_e.Undef):
1092 result = value.Str('') # type: value_t
1093 elif case(value_e.Str):
1094 str_val = cast(value.Str, UP_val)
1095 prompt = self.prompt_ev.EvalPrompt(str_val.s)
1096 # readline gets rid of these, so we should too.
1097 p = prompt.replace('\x01', '').replace('\x02', '')
1098 result = value.Str(p)
1099 elif case(value_e.BashArray, value_e.BashAssoc):
1100 if val.tag() == value_e.BashArray:
1101 val = cast(value.BashArray, UP_val)
1102 values = [
1103 s for s in bash_impl.BashArray_GetValues(val)
1104 if s is not None
1105 ]
1106 elif val.tag() == value_e.BashAssoc:
1107 val = cast(value.BashAssoc, UP_val)
1108 values = bash_impl.BashAssoc_GetValues(val)
1109 else:
1110 raise AssertionError()
1111
1112 tmp = [
1113 self.prompt_ev.EvalPrompt(s).replace(
1114 '\x01', '').replace('\x02', '') for s in values
1115 ]
1116 result = value.BashArray(tmp)
1117 else:
1118 e_die("Can't use @P on %s" % ui.ValType(val), op)
1119
1120 elif op_id == Id.VOp0_Q:
1121 UP_val = val
1122 with tagswitch(val) as case:
1123 if case(value_e.Undef):
1124 # We need to issue an error when "-o nounset" is enabled.
1125 # Although we do not need to check val for value_e.Undef,
1126 # we call _ProcessUndef for consistency in the error
1127 # message.
1128 self._ProcessUndef(val, vsub_token, vsub_state)
1129
1130 # For unset variables, we do not generate any quoted words.
1131 if vsub_state.array_ref is not None:
1132 result = value.BashArray([])
1133 else:
1134 result = value.Str('')
1135
1136 elif case(value_e.Str):
1137 str_val = cast(value.Str, UP_val)
1138 result = value.Str(j8_lite.MaybeShellEncode(str_val.s))
1139 # oddly, 'echo ${x@Q}' is equivalent to 'echo "${x@Q}"' in
1140 # bash
1141 quoted2 = True
1142 elif case(value_e.BashArray, value_e.BashAssoc):
1143 if val.tag() == value_e.BashArray:
1144 val = cast(value.BashArray, UP_val)
1145 values = [s for s in bash_impl.BashArray_GetValues(val) if s is not None]
1146 elif val.tag() == value_e.BashAssoc:
1147 val = cast(value.BashAssoc, UP_val)
1148 values = bash_impl.BashAssoc_GetValues(val)
1149 else:
1150 raise AssertionError()
1151
1152 tmp = [
1153 # TODO: should use fastfunc.ShellEncode
1154 j8_lite.MaybeShellEncode(s) for s in values
1155 ]
1156 result = value.BashArray(tmp)
1157 else:
1158 e_die("Can't use @Q on %s" % ui.ValType(val), op)
1159
1160 elif op_id == Id.VOp0_a:
1161 val = self._ProcessUndef(val, vsub_token, vsub_state)
1162 UP_val = val
1163 # We're ONLY simluating -a and -A, not -r -x -n for now. See
1164 # spec/ble-idioms.test.sh.
1165 chars = [] # type: List[str]
1166 with tagswitch(vsub_state.h_value) as case:
1167 if case(value_e.BashArray):
1168 chars.append('a')
1169 elif case(value_e.BashAssoc):
1170 chars.append('A')
1171
1172 if var_name is not None: # e.g. ${?@a} is allowed
1173 cell = self.mem.GetCell(var_name)
1174 if cell:
1175 if cell.readonly:
1176 chars.append('r')
1177 if cell.exported:
1178 chars.append('x')
1179 if cell.nameref:
1180 chars.append('n')
1181
1182 count = 1
1183 with tagswitch(val) as case:
1184 if case(value_e.Undef):
1185 count = 0
1186 elif case(value_e.BashArray):
1187 val = cast(value.BashArray, UP_val)
1188 count = bash_impl.BashArray_Count(val)
1189 elif case(value_e.BashAssoc):
1190 val = cast(value.BashAssoc, UP_val)
1191 count = bash_impl.BashAssoc_Count(val)
1192
1193 result = value.BashArray([''.join(chars)] * count)
1194
1195 else:
1196 e_die('Var op %r not implemented' % lexer.TokenVal(op), op)
1197
1198 return result, quoted2
1199
1200 def _WholeArray(self, val, part, quoted, vsub_state):
1201 # type: (value_t, BracedVarSub, bool, VarSubState) -> value_t
1202 op_id = cast(bracket_op.WholeArray, part.bracket_op).op_id
1203
1204 if op_id == Id.Lit_At:
1205 op_str = '@'
1206 vsub_state.join_array = not quoted # ${a[@]} decays but "${a[@]}" doesn't
1207 elif op_id == Id.Arith_Star:
1208 op_str = '*'
1209 vsub_state.join_array = True # both ${a[*]} and "${a[*]}" decay
1210 else:
1211 raise AssertionError(op_id) # unknown
1212
1213 with tagswitch(val) as case2:
1214 if case2(value_e.Undef):
1215 # For an undefined array, we save the token of the array
1216 # reference for the later error message.
1217 vsub_state.array_ref = part.name_tok
1218 elif case2(value_e.Str):
1219 if self.exec_opts.strict_array():
1220 e_die("Can't index string with %s" % op_str,
1221 loc.WordPart(part))
1222 elif case2(value_e.BashArray, value_e.SparseArray,
1223 value_e.BashAssoc):
1224 pass # no-op
1225 else:
1226 # The other YSH types such as List, Dict, and Float are not
1227 # supported. Error messages will be printed later, so we here
1228 # return the unsupported objects without modification.
1229 pass # no-op
1230
1231 return val
1232
1233 def _ArrayIndex(self, val, part, vtest_place):
1234 # type: (value_t, BracedVarSub, VTestPlace) -> value_t
1235 """Process a numeric array index like ${a[i+1]}"""
1236 anode = cast(bracket_op.ArrayIndex, part.bracket_op).expr
1237
1238 UP_val = val
1239 with tagswitch(val) as case2:
1240 if case2(value_e.Undef):
1241 pass # it will be checked later
1242
1243 elif case2(value_e.Str):
1244 # Bash treats any string as an array, so we can't add our own
1245 # behavior here without making valid OSH invalid bash.
1246 e_die("Can't index string %r with integer" % part.var_name,
1247 part.name_tok)
1248
1249 elif case2(value_e.BashArray):
1250 array_val = cast(value.BashArray, UP_val)
1251 index = self.arith_ev.EvalToInt(anode)
1252 vtest_place.index = a_index.Int(index)
1253
1254 s, error_code = bash_impl.BashArray_GetElement(
1255 array_val, index)
1256 if error_code == error_code_e.IndexOutOfRange:
1257 # Note: Bash outputs warning but does not make it a real
1258 # error. We follow the Bash behavior here.
1259 self.errfmt.Print_(
1260 "Index %d out of bounds for array of length %d" %
1261 (index, bash_impl.BashArray_Length(array_val)),
1262 blame_loc=part.name_tok)
1263
1264 if s is None:
1265 val = value.Undef
1266 else:
1267 val = value.Str(s)
1268
1269 elif case2(value_e.SparseArray):
1270 sparse_val = cast(value.SparseArray, UP_val)
1271 big_index = self.arith_ev.EvalToBigInt(anode)
1272 vtest_place.index = a_index.Int(mops.BigTruncate(big_index))
1273
1274 s, error_code = bash_impl.SparseArray_GetElement(
1275 sparse_val, big_index)
1276 if error_code == error_code_e.IndexOutOfRange:
1277 # Note: Bash outputs warning but does not make it a real
1278 # error. We follow the Bash behavior here.
1279 big_length = bash_impl.SparseArray_Length(sparse_val)
1280 self.errfmt.Print_(
1281 "Index %s out of bounds for array of length %s" %
1282 (mops.ToStr(big_index), mops.ToStr(big_length)),
1283 blame_loc=part.name_tok)
1284
1285 if s is None:
1286 val = value.Undef
1287 else:
1288 val = value.Str(s)
1289
1290 elif case2(value_e.BashAssoc):
1291 assoc_val = cast(value.BashAssoc, UP_val)
1292 # Location could also be attached to bracket_op? But
1293 # arith_expr.VarSub works OK too
1294 key = self.arith_ev.EvalWordToString(
1295 anode, blame_loc=location.TokenForArith(anode))
1296
1297 vtest_place.index = a_index.Str(key) # out param
1298 s = bash_impl.BashAssoc_GetElement(assoc_val, key)
1299
1300 if s is None:
1301 val = value.Undef
1302 else:
1303 val = value.Str(s)
1304
1305 else:
1306 raise error.TypeErr(val,
1307 'Index op expected BashArray, BashAssoc',
1308 loc.WordPart(part))
1309
1310 return val
1311
1312 def _EvalDoubleQuoted(self, parts, part_vals):
1313 # type: (List[word_part_t], List[part_value_t]) -> None
1314 """Evaluate parts of a DoubleQuoted part.
1315
1316 Args:
1317 part_vals: output param to append to.
1318 """
1319 # Example of returning array:
1320 # $ a=(1 2); b=(3); $ c=(4 5)
1321 # $ argv "${a[@]}${b[@]}${c[@]}"
1322 # ['1', '234', '5']
1323 #
1324 # Example of multiple parts
1325 # $ argv "${a[@]}${undef[@]:-${c[@]}}"
1326 # ['1', '24', '5']
1327
1328 # Special case for "". The parser outputs (DoubleQuoted []), instead
1329 # of (DoubleQuoted [Literal '']). This is better but it means we
1330 # have to check for it.
1331 if len(parts) == 0:
1332 v = Piece('', True, False)
1333 part_vals.append(v)
1334 return
1335
1336 for p in parts:
1337 self._EvalWordPart(p, part_vals, QUOTED)
1338
1339 def EvalDoubleQuotedToString(self, dq_part):
1340 # type: (DoubleQuoted) -> str
1341 """For double quoted strings in YSH expressions.
1342
1343 Example: var x = "$foo-${foo}"
1344 """
1345 part_vals = [] # type: List[part_value_t]
1346 self._EvalDoubleQuoted(dq_part.parts, part_vals)
1347 return self._ConcatPartVals(part_vals, dq_part.left)
1348
1349 def _DecayArray(self, val):
1350 # type: (value.BashArray) -> value.Str
1351 """Decay $* to a string."""
1352 assert val.tag() == value_e.BashArray, val
1353 sep = self.splitter.GetJoinChar()
1354 tmp = [s for s in bash_impl.BashArray_GetValues(val) if s is not None]
1355 return value.Str(sep.join(tmp))
1356
1357 def _ProcessUndef(self, val, name_tok, vsub_state):
1358 # type: (value_t, Token, VarSubState) -> value_t
1359 assert name_tok is not None
1360
1361 if val.tag() != value_e.Undef:
1362 return val
1363
1364 if vsub_state.array_ref is not None:
1365 array_tok = vsub_state.array_ref
1366 if self.exec_opts.nounset():
1367 e_die('Undefined array %r' % lexer.TokenVal(array_tok),
1368 array_tok)
1369 else:
1370 return value.BashArray([])
1371 else:
1372 if self.exec_opts.nounset():
1373 tok_str = lexer.TokenVal(name_tok)
1374 name = tok_str[1:] if tok_str.startswith('$') else tok_str
1375 e_die('Undefined variable %r' % name, name_tok)
1376 else:
1377 return value.Str('')
1378
1379 def _EvalBracketOp(self, val, part, quoted, vsub_state, vtest_place):
1380 # type: (value_t, BracedVarSub, bool, VarSubState, VTestPlace) -> value_t
1381
1382 if part.bracket_op:
1383 with tagswitch(part.bracket_op) as case:
1384 if case(bracket_op_e.WholeArray):
1385 val = self._WholeArray(val, part, quoted, vsub_state)
1386
1387 elif case(bracket_op_e.ArrayIndex):
1388 val = self._ArrayIndex(val, part, vtest_place)
1389
1390 else:
1391 raise AssertionError(part.bracket_op.tag())
1392
1393 else: # no bracket op
1394 var_name = vtest_place.name
1395 if (var_name is not None and
1396 val.tag() in (value_e.BashArray, value_e.BashAssoc)):
1397 if ShouldArrayDecay(var_name, self.exec_opts,
1398 not (part.prefix_op or part.suffix_op)):
1399 # for ${BASH_SOURCE}, etc.
1400 val = DecayArray(val)
1401 else:
1402 e_die(
1403 "Array %r can't be referred to as a scalar (without @ or *)"
1404 % var_name, loc.WordPart(part))
1405
1406 return val
1407
1408 def _VarRefValue(self, part, quoted, vsub_state, vtest_place):
1409 # type: (BracedVarSub, bool, VarSubState, VTestPlace) -> value_t
1410 """Duplicates some logic from _EvalBracedVarSub, but returns a
1411 value_t."""
1412
1413 # 1. Evaluate from (var_name, var_num, token Id) -> value
1414 if part.name_tok.id == Id.VSub_Name:
1415 vtest_place.name = part.var_name
1416 val = self.mem.GetValue(part.var_name)
1417
1418 elif part.name_tok.id == Id.VSub_Number:
1419 var_num = int(part.var_name)
1420 val = self._EvalVarNum(var_num)
1421
1422 else:
1423 # $* decays
1424 val = self._EvalSpecialVar(part.name_tok.id, quoted, vsub_state)
1425
1426 # update h-value (i.e., the holder of the current value)
1427 vsub_state.h_value = val
1428
1429 # We don't need var_index because it's only for L-Values of test ops?
1430 if self.exec_opts.eval_unsafe_arith():
1431 val = self._EvalBracketOp(val, part, quoted, vsub_state,
1432 vtest_place)
1433 else:
1434 with state.ctx_Option(self.mutable_opts,
1435 [option_i._allow_command_sub], False):
1436 val = self._EvalBracketOp(val, part, quoted, vsub_state,
1437 vtest_place)
1438
1439 return val
1440
1441 def _EvalBracedVarSub(self, part, part_vals, quoted):
1442 # type: (BracedVarSub, List[part_value_t], bool) -> None
1443 """
1444 Args:
1445 part_vals: output param to append to.
1446 """
1447 # We have different operators that interact in a non-obvious order.
1448 #
1449 # 1. bracket_op: value -> value, with side effect on vsub_state
1450 #
1451 # 2. prefix_op
1452 # a. length ${#x}: value -> value
1453 # b. var ref ${!ref}: can expand to an array
1454 #
1455 # 3. suffix_op:
1456 # a. no operator: you have a value
1457 # b. Test: value -> part_value[]
1458 # c. Other Suffix: value -> value
1459 #
1460 # 4. Process vsub_state.join_array here before returning.
1461 #
1462 # These cases are hard to distinguish:
1463 # - ${!prefix@} prefix query
1464 # - ${!array[@]} keys
1465 # - ${!ref} named reference
1466 # - ${!ref[0]} named reference
1467 #
1468 # I think we need several stages:
1469 #
1470 # 1. value: name, number, special, prefix query
1471 # 2. bracket_op
1472 # 3. prefix length -- this is TERMINAL
1473 # 4. indirection? Only for some of the ! cases
1474 # 5. string transformation suffix ops like ##
1475 # 6. test op
1476 # 7. vsub_state.join_array
1477
1478 # vsub_state.join_array is for joining "${a[*]}" and unquoted ${a[@]} AFTER
1479 # suffix ops are applied. If we take the length with a prefix op, the
1480 # distinction is ignored.
1481
1482 var_name = None # type: Optional[str] # used throughout the function
1483 vtest_place = VTestPlace(var_name, None) # For ${foo=default}
1484 vsub_state = VarSubState.CreateNull() # for $*, ${a[*]}, etc.
1485
1486 # 1. Evaluate from (var_name, var_num, token Id) -> value
1487 if part.name_tok.id == Id.VSub_Name:
1488 # Handle ${!prefix@} first, since that looks at names and not values
1489 # Do NOT handle ${!A[@]@a} here!
1490 if (part.prefix_op is not None and part.bracket_op is None and
1491 part.suffix_op is not None and
1492 part.suffix_op.tag() == suffix_op_e.Nullary):
1493 nullary_op = cast(Token, part.suffix_op)
1494 # ${!x@} but not ${!x@P}
1495 if consts.GetKind(nullary_op.id) == Kind.VOp3:
1496 names = self.mem.VarNamesStartingWith(part.var_name)
1497 names.sort()
1498
1499 if quoted and nullary_op.id == Id.VOp3_At:
1500 part_vals.append(part_value.Array(names))
1501 else:
1502 sep = self.splitter.GetJoinChar()
1503 part_vals.append(Piece(sep.join(names), quoted, True))
1504 return # EARLY RETURN
1505
1506 var_name = part.var_name
1507 vtest_place.name = var_name # for _ApplyTestOp
1508
1509 val = self.mem.GetValue(var_name)
1510
1511 elif part.name_tok.id == Id.VSub_Number:
1512 var_num = int(part.var_name)
1513 val = self._EvalVarNum(var_num)
1514 else:
1515 # $* decays
1516 val = self._EvalSpecialVar(part.name_tok.id, quoted, vsub_state)
1517
1518 suffix_op_ = part.suffix_op
1519 if suffix_op_:
1520 UP_op = suffix_op_
1521 vsub_state.h_value = val
1522
1523 # 2. Bracket Op
1524 val = self._EvalBracketOp(val, part, quoted, vsub_state, vtest_place)
1525
1526 if part.prefix_op:
1527 if part.prefix_op.id == Id.VSub_Pound: # ${#var} for length
1528 # undef -> '' BEFORE length
1529 val = self._ProcessUndef(val, part.name_tok, vsub_state)
1530
1531 n = self._Count(val, part.name_tok)
1532 part_vals.append(Piece(str(n), quoted, False))
1533 return # EARLY EXIT: nothing else can come after length
1534
1535 elif part.prefix_op.id == Id.VSub_Bang:
1536 if (part.bracket_op and
1537 part.bracket_op.tag() == bracket_op_e.WholeArray and
1538 not suffix_op_):
1539 # undef -> empty array
1540 val = self._ProcessUndef(val, part.name_tok, vsub_state)
1541
1542 # ${!array[@]} to get indices/keys
1543 val = self._Keys(val, part.name_tok)
1544 # already set vsub_State.join_array ABOVE
1545 else:
1546 # Process ${!ref}. SURPRISE: ${!a[0]} is an indirect expansion unlike
1547 # ${!a[@]} !
1548 # ${!ref} can expand into an array if ref='array[@]'
1549
1550 # Clear it now that we have a var ref
1551 vtest_place.name = None
1552 vtest_place.index = None
1553
1554 val = self._EvalVarRef(val, part.name_tok, quoted,
1555 vsub_state, vtest_place)
1556
1557 else:
1558 raise AssertionError(part.prefix_op)
1559
1560 quoted2 = False # another bit for @Q
1561 if suffix_op_:
1562 op = suffix_op_ # could get rid of this alias
1563
1564 with tagswitch(suffix_op_) as case:
1565 if case(suffix_op_e.Nullary):
1566 op = cast(Token, UP_op)
1567 val, quoted2 = self._Nullary(val, op, var_name,
1568 part.name_tok, vsub_state)
1569
1570 elif case(suffix_op_e.Unary):
1571 op = cast(suffix_op.Unary, UP_op)
1572 if consts.GetKind(op.op.id) == Kind.VTest:
1573 # Note: _ProcessUndef (i.e., the conversion of undef ->
1574 # '') is not applied to the VTest operators such as
1575 # ${a:-def}, ${a+set}, etc.
1576 if self._ApplyTestOp(val, op, quoted, part_vals,
1577 vtest_place, part.name_tok,
1578 vsub_state):
1579 # e.g. to evaluate ${undef:-'default'}, we already appended
1580 # what we need
1581 return
1582
1583 else:
1584 # Other suffix: value -> value
1585 val = self._ProcessUndef(val, part.name_tok,
1586 vsub_state)
1587 val = self._ApplyUnarySuffixOp(val, op)
1588
1589 elif case(suffix_op_e.PatSub): # PatSub, vectorized
1590 op = cast(suffix_op.PatSub, UP_op)
1591 val = self._ProcessUndef(val, part.name_tok, vsub_state)
1592 val = self._PatSub(val, op)
1593
1594 elif case(suffix_op_e.Slice):
1595 op = cast(suffix_op.Slice, UP_op)
1596 val = self._ProcessUndef(val, part.name_tok, vsub_state)
1597 val = self._Slice(val, op, var_name, part)
1598
1599 elif case(suffix_op_e.Static):
1600 op = cast(suffix_op.Static, UP_op)
1601 e_die('Not implemented', op.tok)
1602
1603 else:
1604 raise AssertionError()
1605 else:
1606 val = self._ProcessUndef(val, part.name_tok, vsub_state)
1607
1608 # After applying suffixes, process join_array here.
1609 UP_val = val
1610 if val.tag() == value_e.BashArray:
1611 array_val = cast(value.BashArray, UP_val)
1612 if vsub_state.join_array:
1613 val = self._DecayArray(array_val)
1614 else:
1615 val = array_val
1616
1617 # For example, ${a} evaluates to value.Str(), but we want a
1618 # Piece().
1619 part_val = _ValueToPartValue(val, quoted or quoted2, part)
1620 part_vals.append(part_val)
1621
1622 def _ConcatPartVals(self, part_vals, location):
1623 # type: (List[part_value_t], loc_t) -> str
1624
1625 strs = [] # type: List[str]
1626 for part_val in part_vals:
1627 UP_part_val = part_val
1628 with tagswitch(part_val) as case:
1629 if case(part_value_e.String):
1630 part_val = cast(Piece, UP_part_val)
1631 s = part_val.s
1632
1633 elif case(part_value_e.Array):
1634 part_val = cast(part_value.Array, UP_part_val)
1635 if self.exec_opts.strict_array():
1636 # Examples: echo f > "$@"; local foo="$@"
1637 e_die("Illegal array word part (strict_array)",
1638 location)
1639 else:
1640 # It appears to not respect IFS
1641 # TODO: eliminate double join()?
1642 tmp = [s for s in part_val.strs if s is not None]
1643 s = ' '.join(tmp)
1644
1645 else:
1646 raise AssertionError()
1647
1648 strs.append(s)
1649
1650 return ''.join(strs)
1651
1652 def EvalBracedVarSubToString(self, part):
1653 # type: (BracedVarSub) -> str
1654 """For double quoted strings in YSH expressions.
1655
1656 Example: var x = "$foo-${foo}"
1657 """
1658 part_vals = [] # type: List[part_value_t]
1659 self._EvalBracedVarSub(part, part_vals, False)
1660 # blame ${ location
1661 return self._ConcatPartVals(part_vals, part.left)
1662
1663 def _EvalSimpleVarSub(self, part, part_vals, quoted):
1664 # type: (SimpleVarSub, List[part_value_t], bool) -> None
1665
1666 token = part.tok
1667
1668 vsub_state = VarSubState.CreateNull()
1669
1670 # 1. Evaluate from (var_name, var_num, Token) -> defined, value
1671 if token.id == Id.VSub_DollarName:
1672 var_name = lexer.LazyStr(token)
1673 # TODO: Special case for LINENO
1674 val = self.mem.GetValue(var_name)
1675 if val.tag() in (value_e.BashArray, value_e.BashAssoc):
1676 if ShouldArrayDecay(var_name, self.exec_opts):
1677 # for $BASH_SOURCE, etc.
1678 val = DecayArray(val)
1679 else:
1680 e_die(
1681 "Array %r can't be referred to as a scalar (without @ or *)"
1682 % var_name, token)
1683
1684 elif token.id == Id.VSub_Number:
1685 var_num = int(lexer.LazyStr(token))
1686 val = self._EvalVarNum(var_num)
1687
1688 else:
1689 val = self._EvalSpecialVar(token.id, quoted, vsub_state)
1690
1691 #log('SIMPLE %s', part)
1692 val = self._ProcessUndef(val, token, vsub_state)
1693 UP_val = val
1694 if val.tag() == value_e.BashArray:
1695 array_val = cast(value.BashArray, UP_val)
1696 if vsub_state.join_array:
1697 val = self._DecayArray(array_val)
1698 else:
1699 val = array_val
1700
1701 v = _ValueToPartValue(val, quoted, part)
1702 part_vals.append(v)
1703
1704 def EvalSimpleVarSubToString(self, node):
1705 # type: (SimpleVarSub) -> str
1706 """For double quoted strings in YSH expressions.
1707
1708 Example: var x = "$foo-${foo}"
1709 """
1710 part_vals = [] # type: List[part_value_t]
1711 self._EvalSimpleVarSub(node, part_vals, False)
1712 return self._ConcatPartVals(part_vals, node.tok)
1713
1714 def _EvalExtGlob(self, part, part_vals):
1715 # type: (word_part.ExtGlob, List[part_value_t]) -> None
1716 """Evaluate @($x|'foo'|$(hostname)) and flatten it."""
1717 op = part.op
1718 if op.id == Id.ExtGlob_Comma:
1719 op_str = '@('
1720 else:
1721 op_str = lexer.LazyStr(op)
1722 # Do NOT split these.
1723 part_vals.append(Piece(op_str, False, False))
1724
1725 for i, w in enumerate(part.arms):
1726 if i != 0:
1727 part_vals.append(Piece('|', False, False)) # separator
1728 # FLATTEN the tree of extglob "arms".
1729 self._EvalWordToParts(w, part_vals, EXTGLOB_NESTED)
1730 part_vals.append(Piece(')', False, False)) # closing )
1731
1732 def _TranslateExtGlob(self, part_vals, w, glob_parts, fnmatch_parts):
1733 # type: (List[part_value_t], CompoundWord, List[str], List[str]) -> None
1734 """Translate a flattened WORD with an ExtGlob part to string patterns.
1735
1736 We need both glob and fnmatch patterns. _EvalExtGlob does the
1737 flattening.
1738 """
1739 for i, part_val in enumerate(part_vals):
1740 UP_part_val = part_val
1741 with tagswitch(part_val) as case:
1742 if case(part_value_e.String):
1743 part_val = cast(Piece, UP_part_val)
1744 if part_val.quoted and not self.exec_opts.noglob():
1745 s = glob_.GlobEscape(part_val.s)
1746 else:
1747 # e.g. the @( and | in @(foo|bar) aren't quoted
1748 s = part_val.s
1749 glob_parts.append(s)
1750 fnmatch_parts.append(s) # from _EvalExtGlob()
1751
1752 elif case(part_value_e.Array):
1753 # Disallow array
1754 e_die(
1755 "Extended globs and arrays can't appear in the same word",
1756 w)
1757
1758 elif case(part_value_e.ExtGlob):
1759 part_val = cast(part_value.ExtGlob, UP_part_val)
1760 # keep appending fnmatch_parts, but repplace glob_parts with '*'
1761 self._TranslateExtGlob(part_val.part_vals, w, [],
1762 fnmatch_parts)
1763 glob_parts.append('*')
1764
1765 else:
1766 raise AssertionError()
1767
1768 def _EvalWordPart(self, part, part_vals, flags):
1769 # type: (word_part_t, List[part_value_t], int) -> None
1770 """Evaluate a word part, appending to part_vals
1771
1772 Called by _EvalWordToParts, EvalWordToString, and _EvalDoubleQuoted.
1773 """
1774 quoted = bool(flags & QUOTED)
1775 is_subst = bool(flags & IS_SUBST)
1776
1777 UP_part = part
1778 with tagswitch(part) as case:
1779 if case(word_part_e.ShArrayLiteral):
1780 part = cast(ShArrayLiteral, UP_part)
1781 e_die("Unexpected array literal", loc.WordPart(part))
1782 elif case(word_part_e.BashAssocLiteral):
1783 part = cast(word_part.BashAssocLiteral, UP_part)
1784 e_die("Unexpected associative array literal",
1785 loc.WordPart(part))
1786
1787 elif case(word_part_e.Literal):
1788 part = cast(Token, UP_part)
1789 # Split if it's in a substitution.
1790 # That is: echo is not split, but ${foo:-echo} is split
1791 v = Piece(lexer.LazyStr(part), quoted, is_subst)
1792 part_vals.append(v)
1793
1794 elif case(word_part_e.EscapedLiteral):
1795 part = cast(word_part.EscapedLiteral, UP_part)
1796 v = Piece(part.ch, True, False)
1797 part_vals.append(v)
1798
1799 elif case(word_part_e.SingleQuoted):
1800 part = cast(SingleQuoted, UP_part)
1801 v = Piece(part.sval, True, False)
1802 part_vals.append(v)
1803
1804 elif case(word_part_e.DoubleQuoted):
1805 part = cast(DoubleQuoted, UP_part)
1806 self._EvalDoubleQuoted(part.parts, part_vals)
1807
1808 elif case(word_part_e.CommandSub):
1809 part = cast(CommandSub, UP_part)
1810 id_ = part.left_token.id
1811 if id_ in (Id.Left_DollarParen, Id.Left_AtParen,
1812 Id.Left_Backtick):
1813 sv = self._EvalCommandSub(part,
1814 quoted) # type: part_value_t
1815
1816 elif id_ in (Id.Left_ProcSubIn, Id.Left_ProcSubOut):
1817 sv = self._EvalProcessSub(part)
1818
1819 else:
1820 raise AssertionError(id_)
1821
1822 part_vals.append(sv)
1823
1824 elif case(word_part_e.SimpleVarSub):
1825 part = cast(SimpleVarSub, UP_part)
1826 self._EvalSimpleVarSub(part, part_vals, quoted)
1827
1828 elif case(word_part_e.BracedVarSub):
1829 part = cast(BracedVarSub, UP_part)
1830 self._EvalBracedVarSub(part, part_vals, quoted)
1831
1832 elif case(word_part_e.TildeSub):
1833 part = cast(word_part.TildeSub, UP_part)
1834 # We never parse a quoted string into a TildeSub.
1835 assert not quoted
1836 s = self.tilde_ev.Eval(part)
1837 v = Piece(s, True, False) # NOT split even when unquoted!
1838 part_vals.append(v)
1839
1840 elif case(word_part_e.ArithSub):
1841 part = cast(word_part.ArithSub, UP_part)
1842 num = self.arith_ev.EvalToBigInt(part.anode)
1843 v = Piece(mops.ToStr(num), quoted, not quoted)
1844 part_vals.append(v)
1845
1846 elif case(word_part_e.ExtGlob):
1847 part = cast(word_part.ExtGlob, UP_part)
1848 #if not self.exec_opts.extglob():
1849 # die() # disallow at runtime? Don't just decay
1850
1851 # Create a node to hold the flattened tree. The caller decides whether
1852 # to pass it to fnmatch() or replace it with '*' and pass it to glob().
1853 part_vals2 = [] # type: List[part_value_t]
1854 self._EvalExtGlob(part, part_vals2) # flattens tree
1855 part_vals.append(part_value.ExtGlob(part_vals2))
1856
1857 elif case(word_part_e.BashRegexGroup):
1858 part = cast(word_part.BashRegexGroup, UP_part)
1859
1860 part_vals.append(Piece('(', False, False)) # not quoted
1861 if part.child:
1862 self._EvalWordToParts(part.child, part_vals, 0)
1863 part_vals.append(Piece(')', False, False))
1864
1865 elif case(word_part_e.Splice):
1866 part = cast(word_part.Splice, UP_part)
1867 val = self.mem.GetValue(part.var_name)
1868
1869 strs = self.expr_ev.SpliceValue(val, part)
1870 part_vals.append(part_value.Array(strs))
1871
1872 elif case(word_part_e.ExprSub):
1873 part = cast(word_part.ExprSub, UP_part)
1874 part_val = self.expr_ev.EvalExprSub(part)
1875 part_vals.append(part_val)
1876
1877 elif case(word_part_e.ZshVarSub):
1878 part = cast(word_part.ZshVarSub, UP_part)
1879 e_die("ZSH var subs are parsed, but can't be evaluated",
1880 part.left)
1881
1882 else:
1883 raise AssertionError(part.tag())
1884
1885 def _EvalRhsWordToParts(self, w, part_vals, eval_flags=0):
1886 # type: (rhs_word_t, List[part_value_t], int) -> None
1887 quoted = bool(eval_flags & QUOTED)
1888
1889 UP_w = w
1890 with tagswitch(w) as case:
1891 if case(rhs_word_e.Empty):
1892 part_vals.append(Piece('', quoted, not quoted))
1893
1894 elif case(rhs_word_e.Compound):
1895 w = cast(CompoundWord, UP_w)
1896 self._EvalWordToParts(w, part_vals, eval_flags=eval_flags)
1897
1898 else:
1899 raise AssertionError()
1900
1901 def _EvalWordToParts(self, w, part_vals, eval_flags=0):
1902 # type: (CompoundWord, List[part_value_t], int) -> None
1903 """Helper for EvalRhsWord, EvalWordSequence, etc.
1904
1905 Returns:
1906 Appends to part_vals. Note that this is a TREE.
1907 """
1908 # Does the word have an extended glob? This is a special case because
1909 # of the way we use glob() and then fnmatch(..., FNM_EXTMATCH) to
1910 # implement extended globs. It's hard to carry that extra information
1911 # all the way past the word splitting stage.
1912
1913 # OSH semantic limitations: If a word has an extended glob part, then
1914 # 1. It can't have an array
1915 # 2. Word splitting of unquoted words isn't respected
1916
1917 word_part_vals = [] # type: List[part_value_t]
1918 has_extglob = False
1919 for p in w.parts:
1920 if p.tag() == word_part_e.ExtGlob:
1921 has_extglob = True
1922 self._EvalWordPart(p, word_part_vals, eval_flags)
1923
1924 # Caller REQUESTED extglob evaluation, AND we parsed word_part.ExtGlob()
1925 if has_extglob:
1926 if bool(eval_flags & EXTGLOB_FILES):
1927 # Treat the WHOLE word as a pattern. We need to TWO VARIANTS of the
1928 # word because of the way we use libc:
1929 # 1. With '*' for extglob parts
1930 # 2. With _EvalExtGlob() for extglob parts
1931
1932 glob_parts = [] # type: List[str]
1933 fnmatch_parts = [] # type: List[str]
1934 self._TranslateExtGlob(word_part_vals, w, glob_parts,
1935 fnmatch_parts)
1936
1937 #log('word_part_vals %s', word_part_vals)
1938 glob_pat = ''.join(glob_parts)
1939 fnmatch_pat = ''.join(fnmatch_parts)
1940 #log("glob %s fnmatch %s", glob_pat, fnmatch_pat)
1941
1942 results = [] # type: List[str]
1943 n = self.globber.ExpandExtended(glob_pat, fnmatch_pat, results)
1944 if n < 0:
1945 raise error.FailGlob(
1946 'Extended glob %r matched no files' % fnmatch_pat, w)
1947
1948 part_vals.append(part_value.Array(results))
1949 elif bool(eval_flags & EXTGLOB_NESTED):
1950 # We only glob at the TOP level of @(nested|@(pattern))
1951 part_vals.extend(word_part_vals)
1952 else:
1953 # e.g. simple_word_eval, assignment builtin
1954 e_die('Extended glob not allowed in this word', w)
1955 else:
1956 part_vals.extend(word_part_vals)
1957
1958 def _PartValsToString(self, part_vals, w, eval_flags, strs):
1959 # type: (List[part_value_t], CompoundWord, int, List[str]) -> None
1960 """Helper for EvalWordToString, similar to _ConcatPartVals() above.
1961
1962 Note: arg 'w' could just be a span ID
1963 """
1964 for part_val in part_vals:
1965 UP_part_val = part_val
1966 with tagswitch(part_val) as case:
1967 if case(part_value_e.String):
1968 part_val = cast(Piece, UP_part_val)
1969 s = part_val.s
1970 if part_val.quoted:
1971 if eval_flags & QUOTE_FNMATCH:
1972 # [[ foo == */"*".py ]] or case (*.py) or ${x%*.py} or ${x//*.py/}
1973 s = glob_.GlobEscape(s)
1974 elif eval_flags & QUOTE_ERE:
1975 s = glob_.ExtendedRegexEscape(s)
1976 strs.append(s)
1977
1978 elif case(part_value_e.Array):
1979 part_val = cast(part_value.Array, UP_part_val)
1980 if self.exec_opts.strict_array():
1981 # Examples: echo f > "$@"; local foo="$@"
1982
1983 # TODO: This attributes too coarsely, to the word rather than the
1984 # parts. Problem: the word is a TREE of parts, but we only have a
1985 # flat list of part_vals. The only case where we really get arrays
1986 # is "$@", "${a[@]}", "${a[@]//pat/replace}", etc.
1987 e_die(
1988 "This word should yield a string, but it contains an array",
1989 w)
1990
1991 # TODO: Maybe add detail like this.
1992 #e_die('RHS of assignment should only have strings. '
1993 # 'To assign arrays, use b=( "${a[@]}" )')
1994 else:
1995 # It appears to not respect IFS
1996 tmp = [s for s in part_val.strs if s is not None]
1997 s = ' '.join(tmp) # TODO: eliminate double join()?
1998 strs.append(s)
1999
2000 elif case(part_value_e.ExtGlob):
2001 part_val = cast(part_value.ExtGlob, UP_part_val)
2002
2003 # Extended globs are only allowed where we expect them!
2004 if not bool(eval_flags & QUOTE_FNMATCH):
2005 e_die('extended glob not allowed in this word', w)
2006
2007 # recursive call
2008 self._PartValsToString(part_val.part_vals, w, eval_flags,
2009 strs)
2010
2011 else:
2012 raise AssertionError()
2013
2014 def EvalWordToString(self, UP_w, eval_flags=0):
2015 # type: (word_t, int) -> value.Str
2016 """Given a word, return a string.
2017
2018 Flags can contain a quoting algorithm.
2019 """
2020 assert UP_w.tag() == word_e.Compound, UP_w
2021 w = cast(CompoundWord, UP_w)
2022
2023 if eval_flags == 0: # QUOTE_FNMATCH etc. breaks optimization
2024 fast_str = word_.FastStrEval(w)
2025 if fast_str is not None:
2026 return value.Str(fast_str)
2027
2028 # Could we additionally optimize a=$b, if we know $b isn't an array
2029 # etc.?
2030
2031 # Note: these empty lists are hot in fib benchmark
2032
2033 part_vals = [] # type: List[part_value_t]
2034 for p in w.parts:
2035 # this doesn't use eval_flags, which is slightly confusing
2036 self._EvalWordPart(p, part_vals, 0)
2037
2038 strs = [] # type: List[str]
2039 self._PartValsToString(part_vals, w, eval_flags, strs)
2040 return value.Str(''.join(strs))
2041
2042 def EvalWordToPattern(self, UP_w):
2043 # type: (rhs_word_t) -> Tuple[value.Str, bool]
2044 """Like EvalWordToString, but returns whether we got ExtGlob."""
2045 if UP_w.tag() == rhs_word_e.Empty:
2046 return value.Str(''), False
2047
2048 assert UP_w.tag() == rhs_word_e.Compound, UP_w
2049 w = cast(CompoundWord, UP_w)
2050
2051 has_extglob = False
2052 part_vals = [] # type: List[part_value_t]
2053 for p in w.parts:
2054 # this doesn't use eval_flags, which is slightly confusing
2055 self._EvalWordPart(p, part_vals, 0)
2056 if p.tag() == word_part_e.ExtGlob:
2057 has_extglob = True
2058
2059 strs = [] # type: List[str]
2060 self._PartValsToString(part_vals, w, QUOTE_FNMATCH, strs)
2061 return value.Str(''.join(strs)), has_extglob
2062
2063 def EvalForPlugin(self, w):
2064 # type: (CompoundWord) -> value.Str
2065 """Wrapper around EvalWordToString that prevents errors.
2066
2067 Runtime errors like $(( 1 / 0 )) and mutating $? like $(exit 42)
2068 are handled here.
2069
2070 Similar to ExprEvaluator.PluginCall().
2071 """
2072 with state.ctx_Registers(self.mem): # to "sandbox" $? and $PIPESTATUS
2073 try:
2074 val = self.EvalWordToString(w)
2075 except error.FatalRuntime as e:
2076 val = value.Str('<Runtime error: %s>' % e.UserErrorString())
2077
2078 except (IOError, OSError) as e:
2079 val = value.Str('<I/O error: %s>' % pyutil.strerror(e))
2080
2081 except KeyboardInterrupt:
2082 val = value.Str('<Ctrl-C>')
2083
2084 return val
2085
2086 def EvalRhsWord(self, UP_w):
2087 # type: (rhs_word_t) -> value_t
2088 """Used for RHS of assignment.
2089
2090 There is no splitting.
2091 """
2092 if UP_w.tag() == rhs_word_e.Empty:
2093 return value.Str('')
2094
2095 assert UP_w.tag() == word_e.Compound, UP_w
2096 w = cast(CompoundWord, UP_w)
2097
2098 if len(w.parts) == 1:
2099 part0 = w.parts[0]
2100 UP_part0 = part0
2101 tag = part0.tag()
2102 # Special case for a=(1 2). ShArrayLiteral won't appear in words that
2103 # don't look like assignments.
2104 if tag == word_part_e.ShArrayLiteral:
2105 part0 = cast(ShArrayLiteral, UP_part0)
2106 array_words = part0.words
2107 words = braces.BraceExpandWords(array_words)
2108 strs = self.EvalWordSequence(words)
2109 return value.BashArray(strs)
2110
2111 if tag == word_part_e.BashAssocLiteral:
2112 part0 = cast(word_part.BashAssocLiteral, UP_part0)
2113 d = NewDict() # type: Dict[str, str]
2114 for pair in part0.pairs:
2115 k = self.EvalWordToString(pair.key)
2116 v = self.EvalWordToString(pair.value)
2117 d[k.s] = v.s
2118 return value.BashAssoc(d)
2119
2120 # If RHS doesn't look like a=( ... ), then it must be a string.
2121 return self.EvalWordToString(w)
2122
2123 def _EvalWordFrame(self, frame, argv):
2124 # type: (List[Piece], List[str]) -> None
2125 all_empty = True
2126 all_quoted = True
2127 any_quoted = False
2128
2129 #log('--- frame %s', frame)
2130
2131 for piece in frame:
2132 if len(piece.s):
2133 all_empty = False
2134
2135 if piece.quoted:
2136 any_quoted = True
2137 else:
2138 all_quoted = False
2139
2140 # Elision of ${empty}${empty} but not $empty"$empty" or $empty""
2141 if all_empty and not any_quoted:
2142 return
2143
2144 # If every frag is quoted, e.g. "$a$b" or any part in "${a[@]}"x, then
2145 # don't do word splitting or globbing.
2146 if all_quoted:
2147 tmp = [piece.s for piece in frame]
2148 a = ''.join(tmp)
2149 argv.append(a)
2150 return
2151
2152 will_glob = not self.exec_opts.noglob()
2153
2154 if 0:
2155 log('---')
2156 log('FRAME')
2157 for i, piece in enumerate(frame):
2158 log('(%d) %s', i, piece)
2159 log('')
2160
2161 # Array of strings, some of which are BOTH IFS-escaped and GLOB escaped!
2162 frags = [] # type: List[str]
2163 for piece in frame:
2164 if will_glob and piece.quoted:
2165 frag = glob_.GlobEscape(piece.s)
2166 else:
2167 # If we have a literal \, then we turn it into \\\\.
2168 # Splitting takes \\\\ -> \\
2169 # Globbing takes \\ to \ if it doesn't match
2170 frag = _BackslashEscape(piece.s)
2171
2172 if piece.do_split:
2173 frag = _BackslashEscape(frag)
2174 else:
2175 frag = self.splitter.Escape(frag)
2176
2177 frags.append(frag)
2178
2179 if 0:
2180 log('---')
2181 log('FRAGS')
2182 for i, frag in enumerate(frags):
2183 log('(%d) %s', i, frag)
2184 log('')
2185
2186 flat = ''.join(frags)
2187 #log('flat: %r', flat)
2188
2189 args = self.splitter.SplitForWordEval(flat)
2190
2191 # space=' '; argv $space"". We have a quoted part, but we CANNOT elide.
2192 # Add it back and don't bother globbing.
2193 if len(args) == 0 and any_quoted:
2194 argv.append('')
2195 return
2196
2197 #log('split args: %r', args)
2198 for a in args:
2199 if glob_.LooksLikeGlob(a):
2200 n = self.globber.Expand(a, argv)
2201 if n < 0:
2202 # TODO: location info, with span IDs carried through the frame
2203 raise error.FailGlob('Pattern %r matched no files' % a,
2204 loc.Missing)
2205 else:
2206 argv.append(glob_.GlobUnescape(a))
2207
2208 def _EvalWordToArgv(self, w):
2209 # type: (CompoundWord) -> List[str]
2210 """Helper for _EvalAssignBuiltin.
2211
2212 Splitting and globbing are disabled for assignment builtins.
2213
2214 Example: declare -"${a[@]}" b=(1 2)
2215 where a is [x b=a d=a]
2216 """
2217 part_vals = [] # type: List[part_value_t]
2218 self._EvalWordToParts(w, part_vals, 0) # not double quoted
2219 frames = _MakeWordFrames(part_vals)
2220 argv = [] # type: List[str]
2221 for frame in frames:
2222 if len(frame): # empty array gives empty frame!
2223 tmp = [piece.s for piece in frame]
2224 argv.append(''.join(tmp)) # no split or glob
2225 #log('argv: %s', argv)
2226 return argv
2227
2228 def _EvalAssignBuiltin(self, builtin_id, arg0, words, meta_offset):
2229 # type: (builtin_t, str, List[CompoundWord], int) -> cmd_value.Assign
2230 """Handles both static and dynamic assignment, e.g.
2231
2232 x='foo=bar'
2233 local a=(1 2) $x
2234
2235 Grammar:
2236
2237 ('builtin' | 'command')* keyword flag* pair*
2238 flag = [-+].*
2239
2240 There is also command -p, but we haven't implemented it. Maybe just
2241 punt on it.
2242 """
2243 eval_to_pairs = True # except for -f and -F
2244 started_pairs = False
2245
2246 flags = [arg0] # initial flags like -p, and -f -F name1 name2
2247 flag_locs = [words[0]]
2248 assign_args = [] # type: List[AssignArg]
2249
2250 n = len(words)
2251 for i in xrange(meta_offset + 1, n): # skip first word
2252 w = words[i]
2253
2254 if word_.IsVarLike(w):
2255 started_pairs = True # Everything from now on is an assign_pair
2256
2257 if started_pairs:
2258 left_token, close_token, part_offset = word_.DetectShAssignment(
2259 w)
2260 if left_token: # Detected statically
2261 if left_token.id != Id.Lit_VarLike:
2262 # (not guaranteed since started_pairs is set twice)
2263 e_die('LHS array not allowed in assignment builtin', w)
2264
2265 if lexer.IsPlusEquals(left_token):
2266 var_name = lexer.TokenSliceRight(left_token, -2)
2267 append = True
2268 else:
2269 var_name = lexer.TokenSliceRight(left_token, -1)
2270 append = False
2271
2272 if part_offset == len(w.parts):
2273 rhs = rhs_word.Empty # type: rhs_word_t
2274 else:
2275 # tmp is for intersection of C++/MyPy type systems
2276 tmp = CompoundWord(w.parts[part_offset:])
2277 word_.TildeDetectAssign(tmp)
2278 rhs = tmp
2279
2280 with state.ctx_AssignBuiltin(self.mutable_opts):
2281 right = self.EvalRhsWord(rhs)
2282
2283 arg2 = AssignArg(var_name, right, append, w)
2284 assign_args.append(arg2)
2285
2286 else: # e.g. export $dynamic
2287 argv = self._EvalWordToArgv(w)
2288 for arg in argv:
2289 arg2 = _SplitAssignArg(arg, w)
2290 assign_args.append(arg2)
2291
2292 else:
2293 argv = self._EvalWordToArgv(w)
2294 for arg in argv:
2295 if arg.startswith('-') or arg.startswith('+'):
2296 # e.g. declare -r +r
2297 flags.append(arg)
2298 flag_locs.append(w)
2299
2300 # Shortcut that relies on -f and -F always meaning "function" for
2301 # all assignment builtins
2302 if 'f' in arg or 'F' in arg:
2303 eval_to_pairs = False
2304
2305 else: # e.g. export $dynamic
2306 if eval_to_pairs:
2307 arg2 = _SplitAssignArg(arg, w)
2308 assign_args.append(arg2)
2309 started_pairs = True
2310 else:
2311 flags.append(arg)
2312
2313 return cmd_value.Assign(builtin_id, flags, flag_locs, assign_args)
2314
2315 def _DetectAssignBuiltinStr(self, arg0, words, meta_offset):
2316 # type: (str, List[CompoundWord], int) -> Optional[cmd_value.Assign]
2317 builtin_id = consts.LookupAssignBuiltin(arg0)
2318 if builtin_id != consts.NO_INDEX:
2319 return self._EvalAssignBuiltin(builtin_id, arg0, words,
2320 meta_offset)
2321 return None
2322
2323 def _DetectAssignBuiltin(self, val0, words, meta_offset):
2324 # type: (part_value_t, List[CompoundWord], int) -> Optional[cmd_value.Assign]
2325 UP_val0 = val0
2326 if val0.tag() == part_value_e.String:
2327 val0 = cast(Piece, UP_val0)
2328 if not val0.quoted:
2329 return self._DetectAssignBuiltinStr(val0.s, words, meta_offset)
2330 return None
2331
2332 def SimpleEvalWordSequence2(self, words, is_last_cmd, allow_assign):
2333 # type: (List[CompoundWord], bool, bool) -> cmd_value_t
2334 """Simple word evaluation for YSH."""
2335 strs = [] # type: List[str]
2336 locs = [] # type: List[CompoundWord]
2337
2338 meta_offset = 0
2339 for i, w in enumerate(words):
2340 # No globbing in the first arg for command.Simple.
2341 if i == meta_offset and allow_assign:
2342 strs0 = self._EvalWordToArgv(w)
2343 # TODO: Remove this because YSH will disallow assignment
2344 # builtins? (including export?)
2345 if len(strs0) == 1:
2346 cmd_val = self._DetectAssignBuiltinStr(
2347 strs0[0], words, meta_offset)
2348 if cmd_val:
2349 return cmd_val
2350
2351 strs.extend(strs0)
2352 for _ in strs0:
2353 locs.append(w)
2354 continue
2355
2356 if glob_.LooksLikeStaticGlob(w):
2357 val = self.EvalWordToString(w) # respects strict-array
2358 num_appended = self.globber.Expand(val.s, strs)
2359 if num_appended < 0:
2360 raise error.FailGlob('Pattern %r matched no files' % val.s,
2361 w)
2362 for _ in xrange(num_appended):
2363 locs.append(w)
2364 continue
2365
2366 part_vals = [] # type: List[part_value_t]
2367 self._EvalWordToParts(w, part_vals, 0) # not quoted
2368
2369 if 0:
2370 log('')
2371 log('Static: part_vals after _EvalWordToParts:')
2372 for entry in part_vals:
2373 log(' %s', entry)
2374
2375 # Still need to process
2376 frames = _MakeWordFrames(part_vals)
2377
2378 if 0:
2379 log('')
2380 log('Static: frames after _MakeWordFrames:')
2381 for entry in frames:
2382 log(' %s', entry)
2383
2384 # We will still allow x"${a[@]"x, though it's deprecated by @a, which
2385 # disallows such expressions at parse time.
2386 for frame in frames:
2387 if len(frame): # empty array gives empty frame!
2388 tmp = [piece.s for piece in frame]
2389 strs.append(''.join(tmp)) # no split or glob
2390 locs.append(w)
2391
2392 assert len(strs) == len(locs), '%s vs. %d' % (strs, len(locs))
2393 return cmd_value.Argv(strs, locs, is_last_cmd, None, None)
2394
2395 def EvalWordSequence2(self, words, is_last_cmd, allow_assign=False):
2396 # type: (List[CompoundWord], bool, bool) -> cmd_value_t
2397 """Turns a list of Words into a list of strings.
2398
2399 Unlike the EvalWord*() methods, it does globbing.
2400
2401 Args:
2402 allow_assign: True for command.Simple, False for BashArray a=(1 2 3)
2403 """
2404 if self.exec_opts.simple_word_eval():
2405 return self.SimpleEvalWordSequence2(words, is_last_cmd,
2406 allow_assign)
2407
2408 # Parse time:
2409 # 1. brace expansion. TODO: Do at parse time.
2410 # 2. Tilde detection. DONE at parse time. Only if Id.Lit_Tilde is the
2411 # first WordPart.
2412 #
2413 # Run time:
2414 # 3. tilde sub, var sub, command sub, arith sub. These are all
2415 # "concurrent" on WordParts. (optional process sub with <() )
2416 # 4. word splitting. Can turn this off with a shell option? Definitely
2417 # off for oil.
2418 # 5. globbing -- several exec_opts affect this: nullglob, safeglob, etc.
2419
2420 #log('W %s', words)
2421 strs = [] # type: List[str]
2422 locs = [] # type: List[CompoundWord]
2423
2424 # 0 for declare x
2425 # 1 for builtin declare x
2426 # 2 for command builtin declare x
2427 # etc.
2428 meta_offset = 0
2429
2430 n = 0
2431 for i, w in enumerate(words):
2432 fast_str = word_.FastStrEval(w)
2433 if fast_str is not None:
2434 strs.append(fast_str)
2435 locs.append(w)
2436
2437 # e.g. the 'local' in 'local a=b c=d' will be here
2438 if allow_assign and i == meta_offset:
2439 cmd_val = self._DetectAssignBuiltinStr(
2440 fast_str, words, meta_offset)
2441 if cmd_val:
2442 return cmd_val
2443
2444 if i <= meta_offset and _DetectMetaBuiltinStr(fast_str):
2445 meta_offset += 1
2446
2447 # Bug fix: n must be updated on every loop iteration
2448 n = len(strs)
2449 assert len(strs) == len(locs), strs
2450 continue
2451
2452 part_vals = [] # type: List[part_value_t]
2453 self._EvalWordToParts(w, part_vals, EXTGLOB_FILES)
2454
2455 # DYNAMICALLY detect if we're going to run an assignment builtin, and
2456 # change the rest of the evaluation algorithm if so.
2457 #
2458 # We want to allow:
2459 # e=export
2460 # $e foo=bar
2461 #
2462 # But we don't want to evaluate the first word twice in the case of:
2463 # $(some-command) --flag
2464 if len(part_vals) == 1:
2465 if allow_assign and i == meta_offset:
2466 cmd_val = self._DetectAssignBuiltin(
2467 part_vals[0], words, meta_offset)
2468 if cmd_val:
2469 return cmd_val
2470
2471 if i <= meta_offset and _DetectMetaBuiltin(part_vals[0]):
2472 meta_offset += 1
2473
2474 if 0:
2475 log('')
2476 log('part_vals after _EvalWordToParts:')
2477 for entry in part_vals:
2478 log(' %s', entry)
2479
2480 frames = _MakeWordFrames(part_vals)
2481 if 0:
2482 log('')
2483 log('frames after _MakeWordFrames:')
2484 for entry in frames:
2485 log(' %s', entry)
2486
2487 # Do splitting and globbing. Each frame will append zero or more args.
2488 for frame in frames:
2489 self._EvalWordFrame(frame, strs)
2490
2491 # Fill in locations parallel to strs.
2492 n_next = len(strs)
2493 for _ in xrange(n_next - n):
2494 locs.append(w)
2495 n = n_next
2496
2497 # A non-assignment command.
2498 # NOTE: Can't look up builtins here like we did for assignment, because
2499 # functions can override builtins.
2500 assert len(strs) == len(locs), '%s vs. %d' % (strs, len(locs))
2501 return cmd_value.Argv(strs, locs, is_last_cmd, None, None)
2502
2503 def EvalWordSequence(self, words):
2504 # type: (List[CompoundWord]) -> List[str]
2505 """For arrays and for loops.
2506
2507 They don't allow assignment builtins.
2508 """
2509 # is_last_cmd is irrelevant
2510 cmd_val = self.EvalWordSequence2(words, False)
2511 assert cmd_val.tag() == cmd_value_e.Argv
2512 return cast(cmd_value.Argv, cmd_val).argv
2513
2514
2515class NormalWordEvaluator(AbstractWordEvaluator):
2516
2517 def __init__(
2518 self,
2519 mem, # type: state.Mem
2520 exec_opts, # type: optview.Exec
2521 mutable_opts, # type: state.MutableOpts
2522 tilde_ev, # type: TildeEvaluator
2523 splitter, # type: SplitContext
2524 errfmt, # type: ui.ErrorFormatter
2525 ):
2526 # type: (...) -> None
2527 AbstractWordEvaluator.__init__(self, mem, exec_opts, mutable_opts,
2528 tilde_ev, splitter, errfmt)
2529 self.shell_ex = None # type: _Executor
2530
2531 def CheckCircularDeps(self):
2532 # type: () -> None
2533 assert self.arith_ev is not None
2534 # Disabled for pure OSH
2535 #assert self.expr_ev is not None
2536 assert self.shell_ex is not None
2537 assert self.prompt_ev is not None
2538
2539 def _EvalCommandSub(self, cs_part, quoted):
2540 # type: (CommandSub, bool) -> part_value_t
2541 stdout_str = self.shell_ex.RunCommandSub(cs_part)
2542
2543 if cs_part.left_token.id == Id.Left_AtParen:
2544 # YSH splitting algorithm: does not depend on IFS
2545 try:
2546 strs = j8.SplitJ8Lines(stdout_str)
2547 except error.Decode as e:
2548 # status code 4 is special, for encode/decode errors.
2549 raise error.Structured(4, e.Message(), cs_part.left_token)
2550
2551 #strs = self.splitter.SplitForWordEval(stdout_str)
2552 return part_value.Array(strs)
2553 else:
2554 return Piece(stdout_str, quoted, not quoted)
2555
2556 def _EvalProcessSub(self, cs_part):
2557 # type: (CommandSub) -> Piece
2558 dev_path = self.shell_ex.RunProcessSub(cs_part)
2559 # pretend it's quoted; no split or glob
2560 return Piece(dev_path, True, False)
2561
2562
2563_DUMMY = '__NO_COMMAND_SUB__'
2564
2565
2566class CompletionWordEvaluator(AbstractWordEvaluator):
2567 """An evaluator that has no access to an executor.
2568
2569 NOTE: core/completion.py doesn't actually try to use these strings to
2570 complete. If you have something like 'echo $(echo hi)/f<TAB>', it sees the
2571 inner command as the last one, and knows that it is not at the end of the
2572 line.
2573 """
2574
2575 def __init__(
2576 self,
2577 mem, # type: state.Mem
2578 exec_opts, # type: optview.Exec
2579 mutable_opts, # type: state.MutableOpts
2580 tilde_ev, # type: TildeEvaluator
2581 splitter, # type: SplitContext
2582 errfmt, # type: ui.ErrorFormatter
2583 ):
2584 # type: (...) -> None
2585 AbstractWordEvaluator.__init__(self, mem, exec_opts, mutable_opts,
2586 tilde_ev, splitter, errfmt)
2587
2588 def CheckCircularDeps(self):
2589 # type: () -> None
2590 assert self.prompt_ev is not None
2591 assert self.arith_ev is not None
2592 assert self.expr_ev is not None
2593
2594 def _EvalCommandSub(self, cs_part, quoted):
2595 # type: (CommandSub, bool) -> part_value_t
2596 if cs_part.left_token.id == Id.Left_AtParen:
2597 return part_value.Array([_DUMMY])
2598 else:
2599 return Piece(_DUMMY, quoted, not quoted)
2600
2601 def _EvalProcessSub(self, cs_part):
2602 # type: (CommandSub) -> Piece
2603 # pretend it's quoted; no split or glob
2604 return Piece('__NO_PROCESS_SUB__', True, False)
2605
2606
2607# vim: sw=4