OILS / osh / word_eval.py View on Github | oils.pub

2633 lines, 1629 significant
1"""
2word_eval.py - Evaluator for the word language.
3"""
4
5from _devbuild.gen.id_kind_asdl import Id, Kind, Kind_str
6from _devbuild.gen.syntax_asdl import (
7 Token,
8 SimpleVarSub,
9 loc,
10 loc_t,
11 BracedVarSub,
12 CommandSub,
13 bracket_op,
14 bracket_op_e,
15 suffix_op,
16 suffix_op_e,
17 ShArrayLiteral,
18 SingleQuoted,
19 DoubleQuoted,
20 word_e,
21 word_t,
22 CompoundWord,
23 rhs_word,
24 rhs_word_e,
25 rhs_word_t,
26 word_part,
27 word_part_e,
28)
29from _devbuild.gen.runtime_asdl import (
30 part_value,
31 part_value_e,
32 part_value_t,
33 cmd_value,
34 cmd_value_e,
35 cmd_value_t,
36 error_code_e,
37 AssignArg,
38 a_index,
39 a_index_e,
40 VTestPlace,
41 VarSubState,
42 Piece,
43)
44from _devbuild.gen.option_asdl import option_i, builtin_i
45from _devbuild.gen.value_asdl import (
46 value,
47 value_e,
48 value_t,
49 sh_lvalue,
50 sh_lvalue_t,
51)
52from core import bash_impl
53from core import error
54from core import pyos
55from core import pyutil
56from core import state
57from display import ui
58from core import util
59from data_lang import j8
60from data_lang import j8_lite
61from core.error import e_die
62from frontend import consts
63from frontend import lexer
64from frontend import location
65from mycpp import mops
66from mycpp.mylib import log, tagswitch, NewDict
67from osh import braces
68from osh import glob_
69from osh import string_ops
70from osh import word_
71from ysh import expr_eval
72from ysh import val_ops
73
74from typing import Optional, Tuple, List, Dict, cast, TYPE_CHECKING
75
76if TYPE_CHECKING:
77 from _devbuild.gen.syntax_asdl import word_part_t
78 from _devbuild.gen.option_asdl import builtin_t
79 from core import optview
80 from core.state import Mem
81 from core.vm import _Executor
82 from osh.split import SplitContext
83 from osh import prompt
84 from osh import sh_expr_eval
85
86# Flags for _EvalWordToParts and _EvalWordPart (not all are used for both)
87QUOTED = 1 << 0
88IS_SUBST = 1 << 1
89
90EXTGLOB_FILES = 1 << 2 # allow @(cc) from file system?
91EXTGLOB_MATCH = 1 << 3 # allow @(cc) in pattern matching?
92EXTGLOB_NESTED = 1 << 4 # for @(one|!(two|three))
93
94# For EvalWordToString
95QUOTE_FNMATCH = 1 << 5
96QUOTE_ERE = 1 << 6
97
98# For compatibility, ${BASH_SOURCE} and ${BASH_SOURCE[@]} are both valid.
99# Ditto for ${FUNCNAME} and ${BASH_LINENO}.
100_STRING_AND_ARRAY = ['BASH_SOURCE', 'FUNCNAME', 'BASH_LINENO']
101
102
103def ShouldArrayDecay(var_name, exec_opts, is_plain_var_sub=True):
104 # type: (str, optview.Exec, bool) -> bool
105 """Return whether we should allow ${a} to mean ${a[0]}."""
106 return (not exec_opts.strict_array() or
107 is_plain_var_sub and var_name in _STRING_AND_ARRAY)
108
109
110def DecayArray(val):
111 # type: (value_t) -> value_t
112 """Resolve ${array} to ${array[0]}."""
113 if val.tag() == value_e.BashArray:
114 array_val = cast(value.BashArray, val)
115 s, error_code = bash_impl.BashArray_GetElement(array_val, 0)
116
117 # Note: index 0 should never cause the out-of-bound index error.
118 assert error_code == error_code_e.OK
119
120 elif val.tag() == value_e.BashAssoc:
121 assoc_val = cast(value.BashAssoc, val)
122 s = bash_impl.BashAssoc_GetElement(assoc_val, '0')
123 else:
124 raise AssertionError(val.tag())
125
126 if s is None:
127 return value.Undef
128 else:
129 return value.Str(s)
130
131
132def _DetectMetaBuiltinStr(s):
133 # type: (str) -> bool
134 """
135 We need to detect all of these cases:
136
137 builtin local
138 command local
139 builtin builtin local
140 builtin command local
141
142 Fundamentally, assignment builtins have different WORD EVALUATION RULES
143 for a=$x (no word splitting), so it seems hard to do this in
144 meta_oils.Builtin() or meta_oils.Command()
145 """
146 return (consts.LookupNormalBuiltin(s)
147 in (builtin_i.builtin, builtin_i.command))
148
149
150def _DetectMetaBuiltin(val0):
151 # type: (part_value_t) -> bool
152 UP_val0 = val0
153 if val0.tag() == part_value_e.String:
154 val0 = cast(Piece, UP_val0)
155 if not val0.quoted:
156 return _DetectMetaBuiltinStr(val0.s)
157 return False
158
159
160def _SplitAssignArg(arg, blame_word):
161 # type: (str, CompoundWord) -> AssignArg
162 """Dynamically parse argument to declare, export, etc.
163
164 This is a fallback to the static parsing done below.
165 """
166 # Note: it would be better to cache regcomp(), but we don't have an API for
167 # that, and it probably isn't a bottleneck now
168 m = util.RegexSearch(consts.ASSIGN_ARG_RE, arg)
169 if m is None:
170 e_die("Assignment builtin expected NAME=value, got %r" % arg,
171 blame_word)
172
173 var_name = m[1]
174 # m[2] is used for grouping; ERE doesn't have non-capturing groups
175
176 op = m[3]
177 assert op is not None, op
178 if len(op): # declare NAME=
179 val = value.Str(m[4]) # type: Optional[value_t]
180 append = op[0] == '+'
181 else: # declare NAME
182 val = None # no operator
183 append = False
184
185 return AssignArg(var_name, val, append, blame_word)
186
187
188# NOTE: Could be done with util.BackslashEscape like glob_.GlobEscape().
189def _BackslashEscape(s):
190 # type: (str) -> str
191 """Double up backslashes.
192
193 Useful for strings about to be globbed and strings about to be IFS
194 escaped.
195 """
196 return s.replace('\\', '\\\\')
197
198
199def _ValueToPartValue(val, quoted, part_loc):
200 # type: (value_t, bool, word_part_t) -> part_value_t
201 """Helper for VarSub evaluation.
202
203 Called by _EvalBracedVarSub and _EvalWordPart for SimpleVarSub.
204 """
205 UP_val = val
206
207 with tagswitch(val) as case:
208 if case(value_e.Undef):
209 # This happens in the case of ${undef+foo}. We skipped _ProcessUndef,
210 # but we have to append to the empty string.
211 return Piece('', quoted, not quoted)
212
213 elif case(value_e.Str):
214 val = cast(value.Str, UP_val)
215 return Piece(val.s, quoted, not quoted)
216
217 elif case(value_e.BashArray):
218 val = cast(value.BashArray, UP_val)
219 return part_value.Array(bash_impl.BashArray_GetValues(val))
220
221 elif case(value_e.SparseArray):
222 val = cast(value.SparseArray, UP_val)
223 return part_value.Array(bash_impl.SparseArray_GetValues(val))
224
225 elif case(value_e.BashAssoc):
226 val = cast(value.BashAssoc, UP_val)
227 # bash behavior: splice values!
228 return part_value.Array(bash_impl.BashAssoc_GetValues(val))
229
230 # Cases added for YSH
231 # value_e.List is also here - we use val_ops.Stringify()s err message
232 elif case(value_e.Null, value_e.Bool, value_e.Int, value_e.Float,
233 value_e.Eggex, value_e.List):
234 s = val_ops.Stringify(val, loc.WordPart(part_loc), 'Word eval ')
235 return Piece(s, quoted, not quoted)
236
237 else:
238 raise error.TypeErr(val, "Can't substitute into word",
239 loc.WordPart(part_loc))
240
241 raise AssertionError('for -Wreturn-type in C++')
242
243
244def _MakeWordFrames(part_vals):
245 # type: (List[part_value_t]) -> List[List[Piece]]
246 """A word evaluates to a flat list of part_value (String or Array). frame
247 is a portion that results in zero or more args. It can never be joined.
248 This idea exists because of arrays like "$@" and "${a[@]}".
249
250 Example:
251
252 a=(1 '2 3' 4)
253 x=x
254 y=y
255
256 # This word
257 $x"${a[@]}"$y
258
259 # Results in Three frames:
260 [ ('x', False, True), ('1', True, False) ]
261 [ ('2 3', True, False) ]
262 [ ('4', True, False), ('y', False, True) ]
263
264 Note: A frame is a 3-tuple that's identical to Piece()? Maybe we
265 should make that top level type.
266
267 TODO:
268 - Instead of List[List[Piece]], where List[Piece] is a Frame
269 - Change this representation to
270 Frames = (List[Piece] pieces, List[int] break_indices)
271 # where break_indices are the end
272
273 Consider a common case like "$x" or "${x}" - I think this a lot more
274 efficient?
275
276 And then change _EvalWordFrame(pieces: List[Piece], start: int, end: int)
277 """
278 current = [] # type: List[Piece]
279 frames = [current]
280
281 for p in part_vals:
282 UP_p = p
283
284 with tagswitch(p) as case:
285 if case(part_value_e.String):
286 p = cast(Piece, UP_p)
287 current.append(p)
288
289 elif case(part_value_e.Array):
290 p = cast(part_value.Array, UP_p)
291
292 is_first = True
293 for s in p.strs:
294 if s is None:
295 continue # ignore undefined array entries
296
297 # Arrays parts are always quoted; otherwise they would have
298 # decayed to a string.
299 piece = Piece(s, True, False)
300 if is_first:
301 current.append(piece)
302 is_first = False
303 else:
304 current = [piece]
305 frames.append(current) # singleton frame
306
307 else:
308 raise AssertionError()
309
310 return frames
311
312
313# TODO: This could be _MakeWordFrames and then sep.join(). It's redundant.
314def _DecayPartValuesToString(part_vals, join_char):
315 # type: (List[part_value_t], str) -> str
316 # Decay ${a=x"$@"x} to string.
317 out = [] # type: List[str]
318 for p in part_vals:
319 UP_p = p
320 with tagswitch(p) as case:
321 if case(part_value_e.String):
322 p = cast(Piece, UP_p)
323 out.append(p.s)
324 elif case(part_value_e.Array):
325 p = cast(part_value.Array, UP_p)
326 # TODO: Eliminate double join for speed?
327 tmp = [s for s in p.strs if s is not None]
328 out.append(join_char.join(tmp))
329 else:
330 raise AssertionError()
331 return ''.join(out)
332
333
334def _PerformSlice(
335 val, # type: value_t
336 offset, # type: mops.BigInt
337 length, # type: int
338 has_length, # type: bool
339 part, # type: BracedVarSub
340 arg0_val, # type: value.Str
341):
342 # type: (...) -> value_t
343 UP_val = val
344 with tagswitch(val) as case:
345 if case(value_e.Str): # Slice UTF-8 characters in a string.
346 val = cast(value.Str, UP_val)
347 s = val.s
348 n = len(s)
349
350 begin = mops.BigTruncate(offset)
351 if begin < 0: # Compute offset with unicode
352 byte_begin = n
353 num_iters = -begin
354 for _ in xrange(num_iters):
355 byte_begin = string_ops.PreviousUtf8Char(s, byte_begin)
356 else:
357 byte_begin = string_ops.AdvanceUtf8Chars(s, begin, 0)
358
359 if has_length:
360 if length < 0: # Compute offset with unicode
361 # Confusing: this is a POSITION
362 byte_end = n
363 num_iters = -length
364 for _ in xrange(num_iters):
365 byte_end = string_ops.PreviousUtf8Char(s, byte_end)
366 else:
367 byte_end = string_ops.AdvanceUtf8Chars(
368 s, length, byte_begin)
369 else:
370 byte_end = len(s)
371
372 substr = s[byte_begin:byte_end]
373 result = value.Str(substr) # type: value_t
374
375 elif case(value_e.BashArray,
376 value_e.SparseArray): # Slice array entries.
377 # NOTE: This error is ALWAYS fatal in bash. It's inconsistent with
378 # strings.
379 if has_length and length < 0:
380 e_die("Array slice can't have negative length: %d" % length,
381 loc.WordPart(part))
382
383 if bash_impl.BigInt_Less(offset, mops.ZERO):
384 # ${@:-3} starts counts from the end
385 if val.tag() == value_e.BashArray:
386 val = cast(value.BashArray, UP_val)
387 array_length = mops.IntWiden(
388 bash_impl.BashArray_Length(val))
389 elif val.tag() == value_e.SparseArray:
390 val = cast(value.SparseArray, UP_val)
391 array_length = bash_impl.SparseArray_Length(val)
392 else:
393 raise AssertionError()
394
395 # The array length counts $0 for $@ and $*
396 if arg0_val is not None:
397 array_length = mops.Add(array_length, mops.ONE)
398
399 offset = mops.Add(offset, array_length)
400
401 if bash_impl.BigInt_Less(offset, mops.ZERO):
402 strs = [] # type: List[str]
403 else:
404 # Quirk: "offset" for positional arguments ($@ and $*) counts $0.
405 prepends_arg0 = False
406 if arg0_val is not None:
407 if bash_impl.BigInt_Greater(offset, mops.ZERO):
408 offset = mops.Sub(offset, mops.ONE)
409 elif not has_length or length >= 1:
410 prepends_arg0 = True
411 length = length - 1
412
413 if has_length and length == 0:
414 strs = []
415
416 elif val.tag() == value_e.BashArray:
417 val = cast(value.BashArray, UP_val)
418 orig = bash_impl.BashArray_GetValues(val)
419 n = len(orig)
420
421 strs = []
422 i = mops.BigTruncate(offset)
423 count = 0
424 while i < n:
425 if has_length and count == length: # length could be 0
426 break
427 s = orig[i]
428 if s is not None: # Unset elements don't count towards the length
429 strs.append(s)
430 count += 1
431 i += 1
432
433 elif val.tag() == value_e.SparseArray:
434 val = cast(value.SparseArray, UP_val)
435
436 # TODO: We may optimize this by finding the first index
437 # using the binary search. Furthermore, the sorting by
438 # SparseArray_GetKeys can be replaced with the heap sort so
439 # that we only extract the first LENGTH elements of the
440 # indices greater or equal to OFFSET.
441 i = 0
442 for index in bash_impl.SparseArray_GetKeys(val):
443 if bash_impl.BigInt_GreaterEq(index, offset):
444 break
445 i = i + 1
446
447 if has_length:
448 strs = bash_impl.SparseArray_GetValues(val)[i:i +
449 length]
450 else:
451 strs = bash_impl.SparseArray_GetValues(val)[i:]
452
453 else:
454 raise AssertionError()
455
456 if prepends_arg0:
457 new_list = [arg0_val.s]
458 new_list.extend(strs)
459 strs = new_list
460
461 result = value.BashArray(strs)
462
463 elif case(value_e.BashAssoc):
464 e_die("Can't slice associative arrays", loc.WordPart(part))
465
466 else:
467 raise error.TypeErr(val, 'Slice op expected Str or BashArray',
468 loc.WordPart(part))
469
470 return result
471
472
473class StringWordEvaluator(object):
474 """Interface used by ArithEvaluator / BoolEvaluator"""
475
476 def __init__(self):
477 # type: () -> None
478 """Empty constructor for mycpp."""
479 pass
480
481 def EvalWordToString(self, w, eval_flags=0):
482 # type: (word_t, int) -> value.Str
483 raise NotImplementedError()
484
485
486def _GetDollarHyphen(exec_opts):
487 # type: (optview.Exec) -> str
488 chars = [] # type: List[str]
489 if exec_opts.interactive():
490 chars.append('i')
491
492 if exec_opts.errexit():
493 chars.append('e')
494 if exec_opts.noglob():
495 chars.append('f')
496 if exec_opts.noexec():
497 chars.append('n')
498 if exec_opts.nounset():
499 chars.append('u')
500 # NO letter for pipefail?
501 if exec_opts.xtrace():
502 chars.append('x')
503 if exec_opts.noclobber():
504 chars.append('C')
505
506 # bash has:
507 # - c for sh -c, i for sh -i (mksh also has this)
508 # - h for hashing (mksh also has this)
509 # - B for brace expansion
510 return ''.join(chars)
511
512
513class TildeEvaluator(object):
514
515 def __init__(self, mem, exec_opts):
516 # type: (Mem, optview.Exec) -> None
517 self.mem = mem
518 self.exec_opts = exec_opts
519
520 def GetMyHomeDir(self):
521 # type: () -> Optional[str]
522 """Consult $HOME first, and then make a libc call.
523
524 Important: the libc call can FAIL, which is why we prefer $HOME. See issue
525 #1578.
526 """
527 # First look up the HOME var, ENV.HOME, ...
528 s = self.mem.env_config.Get('HOME')
529 if s is not None:
530 return s
531
532 # Then ask the OS. This is what bash does.
533 return pyos.GetMyHomeDir()
534
535 def Eval(self, part):
536 # type: (word_part.TildeSub) -> str
537 """Evaluates ~ and ~user, given a Lit_TildeLike token."""
538
539 if part.user_name is None:
540 result = self.GetMyHomeDir()
541 else:
542 result = pyos.GetHomeDir(part.user_name)
543
544 if result is None:
545 if self.exec_opts.strict_tilde():
546 e_die("Error expanding tilde (e.g. invalid user)", part.left)
547 else:
548 # Return ~ or ~user literally
549 result = '~'
550 if part.user_name is not None:
551 result = result + part.user_name # mycpp doesn't have +=
552
553 return result
554
555
556class AbstractWordEvaluator(StringWordEvaluator):
557 """Abstract base class for word evaluators.
558
559 Public entry points:
560 EvalWordToString EvalForPlugin EvalRhsWord
561 EvalWordSequence EvalWordSequence2
562 """
563
564 def __init__(
565 self,
566 mem, # type: state.Mem
567 exec_opts, # type: optview.Exec
568 mutable_opts, # type: state.MutableOpts
569 tilde_ev, # type: TildeEvaluator
570 splitter, # type: SplitContext
571 errfmt, # type: ui.ErrorFormatter
572 ):
573 # type: (...) -> None
574 self.arith_ev = None # type: sh_expr_eval.ArithEvaluator
575 self.expr_ev = None # type: expr_eval.ExprEvaluator
576 self.prompt_ev = None # type: prompt.Evaluator
577
578 self.unsafe_arith = None # type: sh_expr_eval.UnsafeArith
579
580 self.tilde_ev = tilde_ev
581
582 self.mem = mem # for $HOME, $1, etc.
583 self.exec_opts = exec_opts # for nounset
584 self.mutable_opts = mutable_opts # for _allow_command_sub
585 self.splitter = splitter
586 self.errfmt = errfmt
587
588 self.globber = glob_.Globber(exec_opts)
589
590 def CheckCircularDeps(self):
591 # type: () -> None
592 raise NotImplementedError()
593
594 def _EvalCommandSub(self, cs_part, quoted):
595 # type: (CommandSub, bool) -> part_value_t
596 """Abstract since it has a side effect."""
597 raise NotImplementedError()
598
599 def _EvalProcessSub(self, cs_part):
600 # type: (CommandSub) -> part_value_t
601 """Abstract since it has a side effect."""
602 raise NotImplementedError()
603
604 def _EvalVarNum(self, var_num):
605 # type: (int) -> value_t
606 assert var_num >= 0
607 return self.mem.GetArgNum(var_num)
608
609 def _EvalSpecialVar(self, op_id, quoted, vsub_state):
610 # type: (int, bool, VarSubState) -> value_t
611 """Evaluate $?
612
613 and so forth
614 """
615 # $@ is special -- it need to know whether it is in a double quoted
616 # context.
617 #
618 # - If it's $@ in a double quoted context, return an ARRAY.
619 # - If it's $@ in a normal context, return a STRING, which then will be
620 # subject to splitting.
621
622 if op_id in (Id.VSub_At, Id.VSub_Star):
623 argv = self.mem.GetArgv()
624 val = value.BashArray(argv) # type: value_t
625 if op_id == Id.VSub_At:
626 # "$@" evaluates to an array, $@ should be decayed
627 vsub_state.join_array = not quoted
628 else: # $* "$*" are both decayed
629 vsub_state.join_array = True
630
631 elif op_id == Id.VSub_Hyphen:
632 val = value.Str(_GetDollarHyphen(self.exec_opts))
633
634 else:
635 val = self.mem.GetSpecialVar(op_id)
636
637 return val
638
639 def _ApplyTestOp(
640 self,
641 val, # type: value_t
642 op, # type: suffix_op.Unary
643 quoted, # type: bool
644 part_vals, # type: Optional[List[part_value_t]]
645 vtest_place, # type: VTestPlace
646 blame_token, # type: Token
647 vsub_state, # type: VarSubState
648 ):
649 # type: (...) -> bool
650 """
651 Returns:
652 Whether part_vals was mutated
653
654 ${a:-} returns part_value[]
655 ${a:+} returns part_value[]
656 ${a:?error} returns error word?
657 ${a:=} returns part_value[] but also needs self.mem for side effects.
658
659 So I guess it should return part_value[], and then a flag for raising
660 an error, and then a flag for assigning it?
661 The original BracedVarSub will have the name.
662
663 Example of needing multiple part_value[]
664
665 echo X-${a:-'def'"ault"}-X
666
667 We return two part values from the BracedVarSub. Also consider:
668
669 echo ${a:-x"$@"x}
670 """
671 eval_flags = IS_SUBST
672 if quoted:
673 eval_flags |= QUOTED
674
675 tok = op.op
676 # NOTE: Splicing part_values is necessary because of code like
677 # ${undef:-'a b' c 'd # e'}. Each part_value can have a different
678 # do_glob/do_elide setting.
679 UP_val = val
680 with tagswitch(val) as case:
681 if case(value_e.Undef):
682 is_falsey = True
683
684 elif case(value_e.Str):
685 val = cast(value.Str, UP_val)
686 if tok.id in (Id.VTest_ColonHyphen, Id.VTest_ColonEquals,
687 Id.VTest_ColonQMark, Id.VTest_ColonPlus):
688 is_falsey = len(val.s) == 0
689 else:
690 is_falsey = False
691
692 elif case(value_e.BashArray, value_e.SparseArray,
693 value_e.BashAssoc):
694 if val.tag() == value_e.BashArray:
695 val = cast(value.BashArray, UP_val)
696 strs = bash_impl.BashArray_GetValues(val)
697 elif val.tag() == value_e.SparseArray:
698 val = cast(value.SparseArray, UP_val)
699 strs = bash_impl.SparseArray_GetValues(val)
700 elif val.tag() == value_e.BashAssoc:
701 val = cast(value.BashAssoc, UP_val)
702 strs = bash_impl.BashAssoc_GetValues(val)
703 else:
704 raise AssertionError()
705
706 if tok.id in (Id.VTest_ColonHyphen, Id.VTest_ColonEquals,
707 Id.VTest_ColonQMark, Id.VTest_ColonPlus):
708 # "$*" - the separator is the first character of IFS
709 # $* $@ "$@" - the separator is a space
710 if quoted and vsub_state.join_array:
711 sep_width = len(self.splitter.GetJoinChar())
712 else:
713 sep_width = 1
714
715 # We test whether the joined string will be empty. When
716 # the separator is empty, all the elements need to be
717 # empty. When the separator is non-empty, one element is
718 # allowed at most and needs to be an empty string if any.
719 if sep_width == 0:
720 is_falsey = True
721 for s in strs:
722 if len(s) != 0:
723 is_falsey = False
724 break
725 else:
726 is_falsey = len(strs) == 0 or (len(strs) == 1 and
727 len(strs[0]) == 0)
728 else:
729 # TODO: allow undefined
730 is_falsey = len(strs) == 0
731
732 else:
733 # value.Eggex, etc. are all false
734 is_falsey = False
735
736 if tok.id in (Id.VTest_ColonHyphen, Id.VTest_Hyphen):
737 if is_falsey:
738 self._EvalRhsWordToParts(op.arg_word, part_vals, eval_flags)
739 return True
740 else:
741 return False
742
743 # Inverse of the above.
744 elif tok.id in (Id.VTest_ColonPlus, Id.VTest_Plus):
745 if is_falsey:
746 return False
747 else:
748 self._EvalRhsWordToParts(op.arg_word, part_vals, eval_flags)
749 return True
750
751 # Splice and assign
752 elif tok.id in (Id.VTest_ColonEquals, Id.VTest_Equals):
753 if is_falsey:
754 # Collect new part vals.
755 assign_part_vals = [] # type: List[part_value_t]
756 self._EvalRhsWordToParts(op.arg_word, assign_part_vals,
757 eval_flags)
758 # Append them to out param AND return them.
759 part_vals.extend(assign_part_vals)
760
761 if vtest_place.name is None:
762 # TODO: error context
763 e_die("Can't assign to special variable")
764 else:
765 # NOTE: This decays arrays too! 'shopt -s strict_array' could
766 # avoid it.
767 rhs_str = _DecayPartValuesToString(
768 assign_part_vals, self.splitter.GetJoinChar())
769 if vtest_place.index is None: # using None when no index
770 lval = location.LName(
771 vtest_place.name) # type: sh_lvalue_t
772 else:
773 var_name = vtest_place.name
774 var_index = vtest_place.index
775 UP_var_index = var_index
776
777 with tagswitch(var_index) as case:
778 if case(a_index_e.Int):
779 var_index = cast(a_index.Int, UP_var_index)
780 lval = sh_lvalue.Indexed(
781 var_name, var_index.i, loc.Missing)
782 elif case(a_index_e.Str):
783 var_index = cast(a_index.Str, UP_var_index)
784 lval = sh_lvalue.Keyed(var_name, var_index.s,
785 loc.Missing)
786 else:
787 raise AssertionError()
788
789 state.OshLanguageSetValue(self.mem, lval,
790 value.Str(rhs_str))
791 return True
792
793 else:
794 return False
795
796 elif tok.id in (Id.VTest_ColonQMark, Id.VTest_QMark):
797 if is_falsey:
798 # The arg is the error message
799 error_part_vals = [] # type: List[part_value_t]
800 self._EvalRhsWordToParts(op.arg_word, error_part_vals,
801 eval_flags)
802 error_str = _DecayPartValuesToString(
803 error_part_vals, self.splitter.GetJoinChar())
804
805 #
806 # Display fancy/helpful error
807 #
808 if vtest_place.name is None:
809 var_name = '???'
810 else:
811 var_name = vtest_place.name
812
813 if 0:
814 # This hint is nice, but looks too noisy for now
815 op_str = lexer.LazyStr(tok)
816 if tok.id == Id.VTest_ColonQMark:
817 why = 'empty or unset'
818 else:
819 why = 'unset'
820
821 self.errfmt.Print_(
822 "Hint: operator %s means a variable can't be %s" %
823 (op_str, why), tok)
824
825 if val.tag() == value_e.Undef:
826 actual = 'unset'
827 else:
828 actual = 'empty'
829
830 if len(error_str):
831 suffix = ': %r' % error_str
832 else:
833 suffix = ''
834 e_die("Var %s is %s%s" % (var_name, actual, suffix),
835 blame_token)
836
837 else:
838 return False
839
840 else:
841 raise AssertionError(tok.id)
842
843 def _Count(self, val, token):
844 # type: (value_t, Token) -> int
845 """Returns the length of the value, for ${#var}"""
846 UP_val = val
847 with tagswitch(val) as case:
848 if case(value_e.Str):
849 val = cast(value.Str, UP_val)
850 # NOTE: Whether bash counts bytes or chars is affected by LANG
851 # environment variables.
852 # Should we respect that, or another way to select? set -o
853 # count-bytes?
854
855 # https://stackoverflow.com/questions/17368067/length-of-string-in-bash
856 try:
857 count = string_ops.CountUtf8Chars(val.s)
858 except error.Strict as e:
859 # Add this here so we don't have to add it so far down the stack.
860 # TODO: It's better to show BOTH this CODE an the actual DATA
861 # somehow.
862 e.location = token
863
864 if self.exec_opts.strict_word_eval():
865 raise
866 else:
867 # NOTE: Doesn't make the command exit with 1; it just returns a
868 # length of -1.
869 self.errfmt.PrettyPrintError(e, prefix='warning: ')
870 return -1
871
872 elif case(value_e.BashArray):
873 val = cast(value.BashArray, UP_val)
874 count = bash_impl.BashArray_Count(val)
875
876 elif case(value_e.BashAssoc):
877 val = cast(value.BashAssoc, UP_val)
878 count = bash_impl.BashAssoc_Count(val)
879
880 elif case(value_e.SparseArray):
881 val = cast(value.SparseArray, UP_val)
882 count = bash_impl.SparseArray_Count(val)
883
884 else:
885 raise error.TypeErr(
886 val, "Length op expected Str, BashArray, BashAssoc", token)
887
888 return count
889
890 def _Keys(self, val, token):
891 # type: (value_t, Token) -> value_t
892 """Return keys of a container, for ${!array[@]}"""
893
894 UP_val = val
895 with tagswitch(val) as case:
896 if case(value_e.BashArray):
897 val = cast(value.BashArray, UP_val)
898 indices = [str(i) for i in bash_impl.BashArray_GetKeys(val)]
899 return value.BashArray(indices)
900
901 elif case(value_e.BashAssoc):
902 val = cast(value.BashAssoc, UP_val)
903 assert val.d is not None # for MyPy, so it's not Optional[]
904
905 # BUG: Keys aren't ordered according to insertion!
906 keys = bash_impl.BashAssoc_GetKeys(val)
907 return value.BashArray(keys)
908
909 else:
910 raise error.TypeErr(val, 'Keys op expected Str', token)
911
912 def _EvalVarRef(self, val, blame_tok, quoted, vsub_state, vtest_place):
913 # type: (value_t, Token, bool, VarSubState, VTestPlace) -> value_t
914 """Handles indirect expansion like ${!var} and ${!a[0]}.
915
916 Args:
917 blame_tok: 'foo' for ${!foo}
918 """
919 UP_val = val
920 with tagswitch(val) as case:
921 if case(value_e.Undef):
922 # bash-4.4 returned value.Undef here. bash-5.0 started to treat
923 # the variable name to be empty so that the indirection fails.
924 var_ref_str = ''
925
926 elif case(value_e.Str):
927 val = cast(value.Str, UP_val)
928 var_ref_str = val.s
929
930 elif case(value_e.BashArray): # caught earlier but OK
931 val = cast(value.BashArray, UP_val)
932 # When there are more than one element in the array, this
933 # produces a wrong variable name containing spaces.
934 var_ref_str = ' '.join(bash_impl.BashArray_GetValues(val))
935
936 elif case(value_e.BashAssoc): # caught earlier but OK
937 val = cast(value.BashAssoc, UP_val)
938 var_ref_str = ' '.join(bash_impl.BashAssoc_GetValues(val))
939
940 else:
941 raise error.TypeErr(val, 'Var Ref op expected Str', blame_tok)
942
943 try:
944 bvs_part = self.unsafe_arith.ParseVarRef(var_ref_str, blame_tok)
945 except error.FatalRuntime as e:
946 raise error.VarSubFailure(e.msg, e.location)
947
948 return self._VarRefValue(bvs_part, quoted, vsub_state, vtest_place)
949
950 def _ApplyUnarySuffixOp(self, val, op):
951 # type: (value_t, suffix_op.Unary) -> value_t
952 assert val.tag() != value_e.Undef
953
954 op_kind = consts.GetKind(op.op.id)
955
956 if op_kind == Kind.VOp1:
957 # NOTE: glob syntax is supported in ^ ^^ , ,, ! As well as % %% # ##.
958 # Detect has_extglob so that DoUnarySuffixOp doesn't use the fast
959 # shortcut for constant strings.
960 arg_val, has_extglob = self.EvalWordToPattern(op.arg_word)
961 assert arg_val.tag() == value_e.Str
962
963 UP_val = val
964 with tagswitch(val) as case:
965 if case(value_e.Str):
966 val = cast(value.Str, UP_val)
967 s = string_ops.DoUnarySuffixOp(val.s, op.op, arg_val.s,
968 has_extglob)
969 #log('%r %r -> %r', val.s, arg_val.s, s)
970 new_val = value.Str(s) # type: value_t
971
972 elif case(value_e.BashArray, value_e.SparseArray,
973 value_e.BashAssoc):
974 # get values
975 if val.tag() == value_e.BashArray:
976 val = cast(value.BashArray, UP_val)
977 values = bash_impl.BashArray_GetValues(val)
978 elif val.tag() == value_e.SparseArray:
979 val = cast(value.SparseArray, UP_val)
980 values = bash_impl.SparseArray_GetValues(val)
981 elif val.tag() == value_e.BashAssoc:
982 val = cast(value.BashAssoc, UP_val)
983 values = bash_impl.BashAssoc_GetValues(val)
984 else:
985 raise AssertionError()
986
987 # ${a[@]#prefix} is VECTORIZED on arrays. YSH should have this too.
988 strs = [
989 string_ops.DoUnarySuffixOp(s, op.op, arg_val.s,
990 has_extglob) for s in values
991 ]
992 new_val = value.BashArray(strs)
993
994 else:
995 raise error.TypeErr(
996 val, 'Unary op expected Str, BashArray, BashAssoc',
997 op.op)
998
999 else:
1000 raise AssertionError(Kind_str(op_kind))
1001
1002 return new_val
1003
1004 def _PatSub(self, val, op):
1005 # type: (value_t, suffix_op.PatSub) -> value_t
1006
1007 pat_val, has_extglob = self.EvalWordToPattern(op.pat)
1008 # Extended globs aren't supported because we only translate * ? etc. to
1009 # ERE. I don't think there's a straightforward translation from !(*.py) to
1010 # ERE! You would need an engine that supports negation? (Derivatives?)
1011 if has_extglob:
1012 e_die('extended globs not supported in ${x//GLOB/}', op.pat)
1013
1014 if op.replace:
1015 replace_val = self.EvalRhsWord(op.replace)
1016 # Can't have an array, so must be a string
1017 assert replace_val.tag() == value_e.Str, replace_val
1018 replace_str = cast(value.Str, replace_val).s
1019 else:
1020 replace_str = ''
1021
1022 # note: doesn't support self.exec_opts.extglob()!
1023 regex, warnings = glob_.GlobToERE(pat_val.s)
1024 if len(warnings):
1025 # TODO:
1026 # - Add 'shopt -s strict_glob' mode and expose warnings.
1027 # "Glob is not in CANONICAL FORM".
1028 # - Propagate location info back to the 'op.pat' word.
1029 pass
1030 #log('regex %r', regex)
1031 replacer = string_ops.GlobReplacer(regex, replace_str, op.slash_tok)
1032
1033 with tagswitch(val) as case2:
1034 if case2(value_e.Str):
1035 str_val = cast(value.Str, val)
1036 s = replacer.Replace(str_val.s, op)
1037 val = value.Str(s)
1038
1039 elif case2(value_e.BashArray, value_e.SparseArray,
1040 value_e.BashAssoc):
1041 if val.tag() == value_e.BashArray:
1042 array_val = cast(value.BashArray, val)
1043 values = bash_impl.BashArray_GetValues(array_val)
1044 elif val.tag() == value_e.SparseArray:
1045 sparse_val = cast(value.SparseArray, val)
1046 values = bash_impl.SparseArray_GetValues(sparse_val)
1047 elif val.tag() == value_e.BashAssoc:
1048 assoc_val = cast(value.BashAssoc, val)
1049 values = bash_impl.BashAssoc_GetValues(assoc_val)
1050 else:
1051 raise AssertionError()
1052 strs = [replacer.Replace(s, op) for s in values]
1053 val = value.BashArray(strs)
1054
1055 else:
1056 raise error.TypeErr(
1057 val, 'Pat Sub op expected Str, BashArray, BashAssoc',
1058 op.slash_tok)
1059
1060 return val
1061
1062 def _Slice(self, val, op, var_name, part):
1063 # type: (value_t, suffix_op.Slice, Optional[str], BracedVarSub) -> value_t
1064
1065 begin = self.arith_ev.EvalToBigInt(op.begin)
1066
1067 # Note: bash allows lengths to be negative (with odd semantics), but
1068 # we don't allow that right now.
1069 has_length = False
1070 length = -1
1071 if op.length:
1072 has_length = True
1073 length = self.arith_ev.EvalToInt(op.length)
1074
1075 try:
1076 arg0_val = None # type: value.Str
1077 if var_name is None: # $* or $@
1078 arg0_val = self.mem.GetArg0()
1079 val = _PerformSlice(val, begin, length, has_length, part, arg0_val)
1080 except error.Strict as e:
1081 if self.exec_opts.strict_word_eval():
1082 raise
1083 else:
1084 self.errfmt.PrettyPrintError(e, prefix='warning: ')
1085 with tagswitch(val) as case2:
1086 if case2(value_e.Str):
1087 val = value.Str('')
1088 elif case2(value_e.BashArray):
1089 val = value.BashArray([])
1090 else:
1091 raise NotImplementedError()
1092 return val
1093
1094 def _Nullary(self, val, op, var_name, vsub_token, vsub_state):
1095 # type: (value_t, Token, Optional[str], Token, VarSubState) -> Tuple[value_t, bool]
1096
1097 quoted2 = False
1098 op_id = op.id
1099 if op_id == Id.VOp0_P:
1100 val = self._ProcessUndef(val, vsub_token, vsub_state)
1101 UP_val = val
1102 with tagswitch(val) as case:
1103 if case(value_e.Undef):
1104 result = value.Str('') # type: value_t
1105 elif case(value_e.Str):
1106 str_val = cast(value.Str, UP_val)
1107 prompt = self.prompt_ev.EvalPrompt(str_val.s)
1108 # readline gets rid of these, so we should too.
1109 p = prompt.replace('\x01', '').replace('\x02', '')
1110 result = value.Str(p)
1111 elif case(value_e.BashArray, value_e.SparseArray,
1112 value_e.BashAssoc):
1113 if val.tag() == value_e.BashArray:
1114 val = cast(value.BashArray, UP_val)
1115 values = [
1116 s for s in bash_impl.BashArray_GetValues(val)
1117 if s is not None
1118 ]
1119 elif val.tag() == value_e.SparseArray:
1120 val = cast(value.SparseArray, UP_val)
1121 values = bash_impl.SparseArray_GetValues(val)
1122 elif val.tag() == value_e.BashAssoc:
1123 val = cast(value.BashAssoc, UP_val)
1124 values = bash_impl.BashAssoc_GetValues(val)
1125 else:
1126 raise AssertionError()
1127
1128 tmp = [
1129 self.prompt_ev.EvalPrompt(s).replace(
1130 '\x01', '').replace('\x02', '') for s in values
1131 ]
1132 result = value.BashArray(tmp)
1133 else:
1134 e_die("Can't use @P on %s" % ui.ValType(val), op)
1135
1136 elif op_id == Id.VOp0_Q:
1137 UP_val = val
1138 with tagswitch(val) as case:
1139 if case(value_e.Undef):
1140 # We need to issue an error when "-o nounset" is enabled.
1141 # Although we do not need to check val for value_e.Undef,
1142 # we call _ProcessUndef for consistency in the error
1143 # message.
1144 self._ProcessUndef(val, vsub_token, vsub_state)
1145
1146 # For unset variables, we do not generate any quoted words.
1147 if vsub_state.array_ref is not None:
1148 result = value.BashArray([])
1149 else:
1150 result = value.Str('')
1151
1152 elif case(value_e.Str):
1153 str_val = cast(value.Str, UP_val)
1154 result = value.Str(j8_lite.MaybeShellEncode(str_val.s))
1155 # oddly, 'echo ${x@Q}' is equivalent to 'echo "${x@Q}"' in
1156 # bash
1157 quoted2 = True
1158 elif case(value_e.BashArray, value_e.SparseArray,
1159 value_e.BashAssoc):
1160 if val.tag() == value_e.BashArray:
1161 val = cast(value.BashArray, UP_val)
1162 values = [
1163 s for s in bash_impl.BashArray_GetValues(val)
1164 if s is not None
1165 ]
1166 elif val.tag() == value_e.SparseArray:
1167 val = cast(value.SparseArray, UP_val)
1168 values = bash_impl.SparseArray_GetValues(val)
1169 elif val.tag() == value_e.BashAssoc:
1170 val = cast(value.BashAssoc, UP_val)
1171 values = bash_impl.BashAssoc_GetValues(val)
1172 else:
1173 raise AssertionError()
1174
1175 tmp = [
1176 # TODO: should use fastfunc.ShellEncode
1177 j8_lite.MaybeShellEncode(s) for s in values
1178 ]
1179 result = value.BashArray(tmp)
1180 else:
1181 e_die("Can't use @Q on %s" % ui.ValType(val), op)
1182
1183 elif op_id == Id.VOp0_a:
1184 val = self._ProcessUndef(val, vsub_token, vsub_state)
1185 UP_val = val
1186 # We're ONLY simluating -a and -A, not -r -x -n for now. See
1187 # spec/ble-idioms.test.sh.
1188 chars = [] # type: List[str]
1189 with tagswitch(vsub_state.h_value) as case:
1190 if case(value_e.BashArray, value_e.SparseArray):
1191 chars.append('a')
1192 elif case(value_e.BashAssoc):
1193 chars.append('A')
1194
1195 if var_name is not None: # e.g. ${?@a} is allowed
1196 cell = self.mem.GetCell(var_name)
1197 if cell:
1198 if cell.readonly:
1199 chars.append('r')
1200 if cell.exported:
1201 chars.append('x')
1202 if cell.nameref:
1203 chars.append('n')
1204
1205 count = 1
1206 with tagswitch(val) as case:
1207 if case(value_e.Undef):
1208 count = 0
1209 elif case(value_e.BashArray):
1210 val = cast(value.BashArray, UP_val)
1211 count = bash_impl.BashArray_Count(val)
1212 elif case(value_e.SparseArray):
1213 val = cast(value.SparseArray, UP_val)
1214 count = bash_impl.SparseArray_Count(val)
1215 elif case(value_e.BashAssoc):
1216 val = cast(value.BashAssoc, UP_val)
1217 count = bash_impl.BashAssoc_Count(val)
1218
1219 result = value.BashArray([''.join(chars)] * count)
1220
1221 else:
1222 e_die('Var op %r not implemented' % lexer.TokenVal(op), op)
1223
1224 return result, quoted2
1225
1226 def _WholeArray(self, val, part, quoted, vsub_state):
1227 # type: (value_t, BracedVarSub, bool, VarSubState) -> value_t
1228 op_id = cast(bracket_op.WholeArray, part.bracket_op).op_id
1229
1230 if op_id == Id.Lit_At:
1231 op_str = '@'
1232 vsub_state.join_array = not quoted # ${a[@]} decays but "${a[@]}" doesn't
1233 elif op_id == Id.Arith_Star:
1234 op_str = '*'
1235 vsub_state.join_array = True # both ${a[*]} and "${a[*]}" decay
1236 else:
1237 raise AssertionError(op_id) # unknown
1238
1239 with tagswitch(val) as case2:
1240 if case2(value_e.Undef):
1241 # For an undefined array, we save the token of the array
1242 # reference for the later error message.
1243 vsub_state.array_ref = part.name_tok
1244 elif case2(value_e.Str):
1245 if self.exec_opts.strict_array():
1246 e_die("Can't index string with %s" % op_str,
1247 loc.WordPart(part))
1248 elif case2(value_e.BashArray, value_e.SparseArray,
1249 value_e.BashAssoc):
1250 pass # no-op
1251 else:
1252 # The other YSH types such as List, Dict, and Float are not
1253 # supported. Error messages will be printed later, so we here
1254 # return the unsupported objects without modification.
1255 pass # no-op
1256
1257 return val
1258
1259 def _ArrayIndex(self, val, part, vtest_place):
1260 # type: (value_t, BracedVarSub, VTestPlace) -> value_t
1261 """Process a numeric array index like ${a[i+1]}"""
1262 anode = cast(bracket_op.ArrayIndex, part.bracket_op).expr
1263
1264 UP_val = val
1265 with tagswitch(val) as case2:
1266 if case2(value_e.Undef):
1267 pass # it will be checked later
1268
1269 elif case2(value_e.Str):
1270 # Bash treats any string as an array, so we can't add our own
1271 # behavior here without making valid OSH invalid bash.
1272 e_die("Can't index string %r with integer" % part.var_name,
1273 part.name_tok)
1274
1275 elif case2(value_e.BashArray):
1276 array_val = cast(value.BashArray, UP_val)
1277 index = self.arith_ev.EvalToInt(anode)
1278 vtest_place.index = a_index.Int(index)
1279
1280 s, error_code = bash_impl.BashArray_GetElement(
1281 array_val, index)
1282 if error_code == error_code_e.IndexOutOfRange:
1283 # Note: Bash outputs warning but does not make it a real
1284 # error. We follow the Bash behavior here.
1285 self.errfmt.Print_(
1286 "Index %d out of bounds for array of length %d" %
1287 (index, bash_impl.BashArray_Length(array_val)),
1288 blame_loc=part.name_tok)
1289
1290 if s is None:
1291 val = value.Undef
1292 else:
1293 val = value.Str(s)
1294
1295 elif case2(value_e.SparseArray):
1296 sparse_val = cast(value.SparseArray, UP_val)
1297 big_index = self.arith_ev.EvalToBigInt(anode)
1298 vtest_place.index = a_index.Int(mops.BigTruncate(big_index))
1299
1300 s, error_code = bash_impl.SparseArray_GetElement(
1301 sparse_val, big_index)
1302 if error_code == error_code_e.IndexOutOfRange:
1303 # Note: Bash outputs warning but does not make it a real
1304 # error. We follow the Bash behavior here.
1305 big_length = bash_impl.SparseArray_Length(sparse_val)
1306 self.errfmt.Print_(
1307 "Index %s out of bounds for array of length %s" %
1308 (mops.ToStr(big_index), mops.ToStr(big_length)),
1309 blame_loc=part.name_tok)
1310
1311 if s is None:
1312 val = value.Undef
1313 else:
1314 val = value.Str(s)
1315
1316 elif case2(value_e.BashAssoc):
1317 assoc_val = cast(value.BashAssoc, UP_val)
1318 # Location could also be attached to bracket_op? But
1319 # arith_expr.VarSub works OK too
1320 key = self.arith_ev.EvalWordToString(
1321 anode, blame_loc=location.TokenForArith(anode))
1322
1323 vtest_place.index = a_index.Str(key) # out param
1324 s = bash_impl.BashAssoc_GetElement(assoc_val, key)
1325
1326 if s is None:
1327 val = value.Undef
1328 else:
1329 val = value.Str(s)
1330
1331 else:
1332 raise error.TypeErr(val,
1333 'Index op expected BashArray, BashAssoc',
1334 loc.WordPart(part))
1335
1336 return val
1337
1338 def _EvalDoubleQuoted(self, parts, part_vals):
1339 # type: (List[word_part_t], List[part_value_t]) -> None
1340 """Evaluate parts of a DoubleQuoted part.
1341
1342 Args:
1343 part_vals: output param to append to.
1344 """
1345 # Example of returning array:
1346 # $ a=(1 2); b=(3); $ c=(4 5)
1347 # $ argv "${a[@]}${b[@]}${c[@]}"
1348 # ['1', '234', '5']
1349 #
1350 # Example of multiple parts
1351 # $ argv "${a[@]}${undef[@]:-${c[@]}}"
1352 # ['1', '24', '5']
1353
1354 # Special case for "". The parser outputs (DoubleQuoted []), instead
1355 # of (DoubleQuoted [Literal '']). This is better but it means we
1356 # have to check for it.
1357 if len(parts) == 0:
1358 v = Piece('', True, False)
1359 part_vals.append(v)
1360 return
1361
1362 for p in parts:
1363 self._EvalWordPart(p, part_vals, QUOTED)
1364
1365 def EvalDoubleQuotedToString(self, dq_part):
1366 # type: (DoubleQuoted) -> str
1367 """For double quoted strings in YSH expressions.
1368
1369 Example: var x = "$foo-${foo}"
1370 """
1371 part_vals = [] # type: List[part_value_t]
1372 self._EvalDoubleQuoted(dq_part.parts, part_vals)
1373 return self._ConcatPartVals(part_vals, dq_part.left)
1374
1375 def _DecayArray(self, val):
1376 # type: (value.BashArray) -> value.Str
1377 """Decay $* to a string."""
1378 assert val.tag() == value_e.BashArray, val
1379 sep = self.splitter.GetJoinChar()
1380 tmp = [s for s in bash_impl.BashArray_GetValues(val) if s is not None]
1381 return value.Str(sep.join(tmp))
1382
1383 def _ProcessUndef(self, val, name_tok, vsub_state):
1384 # type: (value_t, Token, VarSubState) -> value_t
1385 assert name_tok is not None
1386
1387 if val.tag() != value_e.Undef:
1388 return val
1389
1390 if vsub_state.array_ref is not None:
1391 array_tok = vsub_state.array_ref
1392 if self.exec_opts.nounset():
1393 e_die('Undefined array %r' % lexer.TokenVal(array_tok),
1394 array_tok)
1395 else:
1396 return value.BashArray([])
1397 else:
1398 if self.exec_opts.nounset():
1399 tok_str = lexer.TokenVal(name_tok)
1400 name = tok_str[1:] if tok_str.startswith('$') else tok_str
1401 e_die('Undefined variable %r' % name, name_tok)
1402 else:
1403 return value.Str('')
1404
1405 def _EvalBracketOp(self, val, part, quoted, vsub_state, vtest_place):
1406 # type: (value_t, BracedVarSub, bool, VarSubState, VTestPlace) -> value_t
1407
1408 if part.bracket_op:
1409 with tagswitch(part.bracket_op) as case:
1410 if case(bracket_op_e.WholeArray):
1411 val = self._WholeArray(val, part, quoted, vsub_state)
1412
1413 elif case(bracket_op_e.ArrayIndex):
1414 val = self._ArrayIndex(val, part, vtest_place)
1415
1416 else:
1417 raise AssertionError(part.bracket_op.tag())
1418
1419 else: # no bracket op
1420 var_name = vtest_place.name
1421 if (var_name is not None and
1422 val.tag() in (value_e.BashArray, value_e.BashAssoc)):
1423 if ShouldArrayDecay(var_name, self.exec_opts,
1424 not (part.prefix_op or part.suffix_op)):
1425 # for ${BASH_SOURCE}, etc.
1426 val = DecayArray(val)
1427 else:
1428 e_die(
1429 "Array %r can't be referred to as a scalar (without @ or *)"
1430 % var_name, loc.WordPart(part))
1431
1432 return val
1433
1434 def _VarRefValue(self, part, quoted, vsub_state, vtest_place):
1435 # type: (BracedVarSub, bool, VarSubState, VTestPlace) -> value_t
1436 """Duplicates some logic from _EvalBracedVarSub, but returns a
1437 value_t."""
1438
1439 # 1. Evaluate from (var_name, var_num, token Id) -> value
1440 if part.name_tok.id == Id.VSub_Name:
1441 vtest_place.name = part.var_name
1442 val = self.mem.GetValue(part.var_name)
1443
1444 elif part.name_tok.id == Id.VSub_Number:
1445 var_num = int(part.var_name)
1446 val = self._EvalVarNum(var_num)
1447
1448 else:
1449 # $* decays
1450 val = self._EvalSpecialVar(part.name_tok.id, quoted, vsub_state)
1451
1452 # update h-value (i.e., the holder of the current value)
1453 vsub_state.h_value = val
1454
1455 # We don't need var_index because it's only for L-Values of test ops?
1456 if self.exec_opts.eval_unsafe_arith():
1457 val = self._EvalBracketOp(val, part, quoted, vsub_state,
1458 vtest_place)
1459 else:
1460 with state.ctx_Option(self.mutable_opts,
1461 [option_i._allow_command_sub], False):
1462 val = self._EvalBracketOp(val, part, quoted, vsub_state,
1463 vtest_place)
1464
1465 return val
1466
1467 def _EvalBracedVarSub(self, part, part_vals, quoted):
1468 # type: (BracedVarSub, List[part_value_t], bool) -> None
1469 """
1470 Args:
1471 part_vals: output param to append to.
1472 """
1473 # We have different operators that interact in a non-obvious order.
1474 #
1475 # 1. bracket_op: value -> value, with side effect on vsub_state
1476 #
1477 # 2. prefix_op
1478 # a. length ${#x}: value -> value
1479 # b. var ref ${!ref}: can expand to an array
1480 #
1481 # 3. suffix_op:
1482 # a. no operator: you have a value
1483 # b. Test: value -> part_value[]
1484 # c. Other Suffix: value -> value
1485 #
1486 # 4. Process vsub_state.join_array here before returning.
1487 #
1488 # These cases are hard to distinguish:
1489 # - ${!prefix@} prefix query
1490 # - ${!array[@]} keys
1491 # - ${!ref} named reference
1492 # - ${!ref[0]} named reference
1493 #
1494 # I think we need several stages:
1495 #
1496 # 1. value: name, number, special, prefix query
1497 # 2. bracket_op
1498 # 3. prefix length -- this is TERMINAL
1499 # 4. indirection? Only for some of the ! cases
1500 # 5. string transformation suffix ops like ##
1501 # 6. test op
1502 # 7. vsub_state.join_array
1503
1504 # vsub_state.join_array is for joining "${a[*]}" and unquoted ${a[@]} AFTER
1505 # suffix ops are applied. If we take the length with a prefix op, the
1506 # distinction is ignored.
1507
1508 var_name = None # type: Optional[str] # used throughout the function
1509 vtest_place = VTestPlace(var_name, None) # For ${foo=default}
1510 vsub_state = VarSubState.CreateNull() # for $*, ${a[*]}, etc.
1511
1512 # 1. Evaluate from (var_name, var_num, token Id) -> value
1513 if part.name_tok.id == Id.VSub_Name:
1514 # Handle ${!prefix@} first, since that looks at names and not values
1515 # Do NOT handle ${!A[@]@a} here!
1516 if (part.prefix_op is not None and part.bracket_op is None and
1517 part.suffix_op is not None and
1518 part.suffix_op.tag() == suffix_op_e.Nullary):
1519 nullary_op = cast(Token, part.suffix_op)
1520 # ${!x@} but not ${!x@P}
1521 if consts.GetKind(nullary_op.id) == Kind.VOp3:
1522 names = self.mem.VarNamesStartingWith(part.var_name)
1523 names.sort()
1524
1525 if quoted and nullary_op.id == Id.VOp3_At:
1526 part_vals.append(part_value.Array(names))
1527 else:
1528 sep = self.splitter.GetJoinChar()
1529 part_vals.append(Piece(sep.join(names), quoted, True))
1530 return # EARLY RETURN
1531
1532 var_name = part.var_name
1533 vtest_place.name = var_name # for _ApplyTestOp
1534
1535 val = self.mem.GetValue(var_name)
1536
1537 elif part.name_tok.id == Id.VSub_Number:
1538 var_num = int(part.var_name)
1539 val = self._EvalVarNum(var_num)
1540 else:
1541 # $* decays
1542 val = self._EvalSpecialVar(part.name_tok.id, quoted, vsub_state)
1543
1544 suffix_op_ = part.suffix_op
1545 if suffix_op_:
1546 UP_op = suffix_op_
1547 vsub_state.h_value = val
1548
1549 # 2. Bracket Op
1550 val = self._EvalBracketOp(val, part, quoted, vsub_state, vtest_place)
1551
1552 if part.prefix_op:
1553 if part.prefix_op.id == Id.VSub_Pound: # ${#var} for length
1554 # undef -> '' BEFORE length
1555 val = self._ProcessUndef(val, part.name_tok, vsub_state)
1556
1557 n = self._Count(val, part.name_tok)
1558 part_vals.append(Piece(str(n), quoted, False))
1559 return # EARLY EXIT: nothing else can come after length
1560
1561 elif part.prefix_op.id == Id.VSub_Bang:
1562 if (part.bracket_op and
1563 part.bracket_op.tag() == bracket_op_e.WholeArray and
1564 not suffix_op_):
1565 # undef -> empty array
1566 val = self._ProcessUndef(val, part.name_tok, vsub_state)
1567
1568 # ${!array[@]} to get indices/keys
1569 val = self._Keys(val, part.name_tok)
1570 # already set vsub_State.join_array ABOVE
1571 else:
1572 # Process ${!ref}. SURPRISE: ${!a[0]} is an indirect expansion unlike
1573 # ${!a[@]} !
1574 # ${!ref} can expand into an array if ref='array[@]'
1575
1576 # Clear it now that we have a var ref
1577 vtest_place.name = None
1578 vtest_place.index = None
1579
1580 val = self._EvalVarRef(val, part.name_tok, quoted,
1581 vsub_state, vtest_place)
1582
1583 else:
1584 raise AssertionError(part.prefix_op)
1585
1586 quoted2 = False # another bit for @Q
1587 if suffix_op_:
1588 op = suffix_op_ # could get rid of this alias
1589
1590 with tagswitch(suffix_op_) as case:
1591 if case(suffix_op_e.Nullary):
1592 op = cast(Token, UP_op)
1593 val, quoted2 = self._Nullary(val, op, var_name,
1594 part.name_tok, vsub_state)
1595
1596 elif case(suffix_op_e.Unary):
1597 op = cast(suffix_op.Unary, UP_op)
1598 if consts.GetKind(op.op.id) == Kind.VTest:
1599 # Note: _ProcessUndef (i.e., the conversion of undef ->
1600 # '') is not applied to the VTest operators such as
1601 # ${a:-def}, ${a+set}, etc.
1602 if self._ApplyTestOp(val, op, quoted, part_vals,
1603 vtest_place, part.name_tok,
1604 vsub_state):
1605 # e.g. to evaluate ${undef:-'default'}, we already appended
1606 # what we need
1607 return
1608
1609 else:
1610 # Other suffix: value -> value
1611 val = self._ProcessUndef(val, part.name_tok,
1612 vsub_state)
1613 val = self._ApplyUnarySuffixOp(val, op)
1614
1615 elif case(suffix_op_e.PatSub): # PatSub, vectorized
1616 op = cast(suffix_op.PatSub, UP_op)
1617 val = self._ProcessUndef(val, part.name_tok, vsub_state)
1618 val = self._PatSub(val, op)
1619
1620 elif case(suffix_op_e.Slice):
1621 op = cast(suffix_op.Slice, UP_op)
1622 val = self._ProcessUndef(val, part.name_tok, vsub_state)
1623 val = self._Slice(val, op, var_name, part)
1624
1625 elif case(suffix_op_e.Static):
1626 op = cast(suffix_op.Static, UP_op)
1627 e_die('Not implemented', op.tok)
1628
1629 else:
1630 raise AssertionError()
1631 else:
1632 val = self._ProcessUndef(val, part.name_tok, vsub_state)
1633
1634 # After applying suffixes, process join_array here.
1635 UP_val = val
1636 if val.tag() == value_e.BashArray:
1637 array_val = cast(value.BashArray, UP_val)
1638 if vsub_state.join_array:
1639 val = self._DecayArray(array_val)
1640 else:
1641 val = array_val
1642
1643 # For example, ${a} evaluates to value.Str(), but we want a
1644 # Piece().
1645 part_val = _ValueToPartValue(val, quoted or quoted2, part)
1646 part_vals.append(part_val)
1647
1648 def _ConcatPartVals(self, part_vals, location):
1649 # type: (List[part_value_t], loc_t) -> str
1650
1651 strs = [] # type: List[str]
1652 for part_val in part_vals:
1653 UP_part_val = part_val
1654 with tagswitch(part_val) as case:
1655 if case(part_value_e.String):
1656 part_val = cast(Piece, UP_part_val)
1657 s = part_val.s
1658
1659 elif case(part_value_e.Array):
1660 part_val = cast(part_value.Array, UP_part_val)
1661 if self.exec_opts.strict_array():
1662 # Examples: echo f > "$@"; local foo="$@"
1663 e_die("Illegal array word part (strict_array)",
1664 location)
1665 else:
1666 # It appears to not respect IFS
1667 # TODO: eliminate double join()?
1668 tmp = [s for s in part_val.strs if s is not None]
1669 s = ' '.join(tmp)
1670
1671 else:
1672 raise AssertionError()
1673
1674 strs.append(s)
1675
1676 return ''.join(strs)
1677
1678 def EvalBracedVarSubToString(self, part):
1679 # type: (BracedVarSub) -> str
1680 """For double quoted strings in YSH expressions.
1681
1682 Example: var x = "$foo-${foo}"
1683 """
1684 part_vals = [] # type: List[part_value_t]
1685 self._EvalBracedVarSub(part, part_vals, False)
1686 # blame ${ location
1687 return self._ConcatPartVals(part_vals, part.left)
1688
1689 def _EvalSimpleVarSub(self, part, part_vals, quoted):
1690 # type: (SimpleVarSub, List[part_value_t], bool) -> None
1691
1692 token = part.tok
1693
1694 vsub_state = VarSubState.CreateNull()
1695
1696 # 1. Evaluate from (var_name, var_num, Token) -> defined, value
1697 if token.id == Id.VSub_DollarName:
1698 var_name = lexer.LazyStr(token)
1699 # TODO: Special case for LINENO
1700 val = self.mem.GetValue(var_name)
1701 if val.tag() in (value_e.BashArray, value_e.BashAssoc):
1702 if ShouldArrayDecay(var_name, self.exec_opts):
1703 # for $BASH_SOURCE, etc.
1704 val = DecayArray(val)
1705 else:
1706 e_die(
1707 "Array %r can't be referred to as a scalar (without @ or *)"
1708 % var_name, token)
1709
1710 elif token.id == Id.VSub_Number:
1711 var_num = int(lexer.LazyStr(token))
1712 val = self._EvalVarNum(var_num)
1713
1714 else:
1715 val = self._EvalSpecialVar(token.id, quoted, vsub_state)
1716
1717 #log('SIMPLE %s', part)
1718 val = self._ProcessUndef(val, token, vsub_state)
1719 UP_val = val
1720 if val.tag() == value_e.BashArray:
1721 array_val = cast(value.BashArray, UP_val)
1722 if vsub_state.join_array:
1723 val = self._DecayArray(array_val)
1724 else:
1725 val = array_val
1726
1727 v = _ValueToPartValue(val, quoted, part)
1728 part_vals.append(v)
1729
1730 def EvalSimpleVarSubToString(self, node):
1731 # type: (SimpleVarSub) -> str
1732 """For double quoted strings in YSH expressions.
1733
1734 Example: var x = "$foo-${foo}"
1735 """
1736 part_vals = [] # type: List[part_value_t]
1737 self._EvalSimpleVarSub(node, part_vals, False)
1738 return self._ConcatPartVals(part_vals, node.tok)
1739
1740 def _EvalExtGlob(self, part, part_vals):
1741 # type: (word_part.ExtGlob, List[part_value_t]) -> None
1742 """Evaluate @($x|'foo'|$(hostname)) and flatten it."""
1743 op = part.op
1744 if op.id == Id.ExtGlob_Comma:
1745 op_str = '@('
1746 else:
1747 op_str = lexer.LazyStr(op)
1748 # Do NOT split these.
1749 part_vals.append(Piece(op_str, False, False))
1750
1751 for i, w in enumerate(part.arms):
1752 if i != 0:
1753 part_vals.append(Piece('|', False, False)) # separator
1754 # FLATTEN the tree of extglob "arms".
1755 self._EvalWordToParts(w, part_vals, EXTGLOB_NESTED)
1756 part_vals.append(Piece(')', False, False)) # closing )
1757
1758 def _TranslateExtGlob(self, part_vals, w, glob_parts, fnmatch_parts):
1759 # type: (List[part_value_t], CompoundWord, List[str], List[str]) -> None
1760 """Translate a flattened WORD with an ExtGlob part to string patterns.
1761
1762 We need both glob and fnmatch patterns. _EvalExtGlob does the
1763 flattening.
1764 """
1765 for i, part_val in enumerate(part_vals):
1766 UP_part_val = part_val
1767 with tagswitch(part_val) as case:
1768 if case(part_value_e.String):
1769 part_val = cast(Piece, UP_part_val)
1770 if part_val.quoted and not self.exec_opts.noglob():
1771 s = glob_.GlobEscape(part_val.s)
1772 else:
1773 # e.g. the @( and | in @(foo|bar) aren't quoted
1774 s = part_val.s
1775 glob_parts.append(s)
1776 fnmatch_parts.append(s) # from _EvalExtGlob()
1777
1778 elif case(part_value_e.Array):
1779 # Disallow array
1780 e_die(
1781 "Extended globs and arrays can't appear in the same word",
1782 w)
1783
1784 elif case(part_value_e.ExtGlob):
1785 part_val = cast(part_value.ExtGlob, UP_part_val)
1786 # keep appending fnmatch_parts, but repplace glob_parts with '*'
1787 self._TranslateExtGlob(part_val.part_vals, w, [],
1788 fnmatch_parts)
1789 glob_parts.append('*')
1790
1791 else:
1792 raise AssertionError()
1793
1794 def _EvalWordPart(self, part, part_vals, flags):
1795 # type: (word_part_t, List[part_value_t], int) -> None
1796 """Evaluate a word part, appending to part_vals
1797
1798 Called by _EvalWordToParts, EvalWordToString, and _EvalDoubleQuoted.
1799 """
1800 quoted = bool(flags & QUOTED)
1801 is_subst = bool(flags & IS_SUBST)
1802
1803 UP_part = part
1804 with tagswitch(part) as case:
1805 if case(word_part_e.ShArrayLiteral):
1806 part = cast(ShArrayLiteral, UP_part)
1807 e_die("Unexpected array literal", loc.WordPart(part))
1808 elif case(word_part_e.BashAssocLiteral):
1809 part = cast(word_part.BashAssocLiteral, UP_part)
1810 e_die("Unexpected associative array literal",
1811 loc.WordPart(part))
1812
1813 elif case(word_part_e.Literal):
1814 part = cast(Token, UP_part)
1815 # Split if it's in a substitution.
1816 # That is: echo is not split, but ${foo:-echo} is split
1817 v = Piece(lexer.LazyStr(part), quoted, is_subst)
1818 part_vals.append(v)
1819
1820 elif case(word_part_e.EscapedLiteral):
1821 part = cast(word_part.EscapedLiteral, UP_part)
1822 v = Piece(part.ch, True, False)
1823 part_vals.append(v)
1824
1825 elif case(word_part_e.SingleQuoted):
1826 part = cast(SingleQuoted, UP_part)
1827 v = Piece(part.sval, True, False)
1828 part_vals.append(v)
1829
1830 elif case(word_part_e.DoubleQuoted):
1831 part = cast(DoubleQuoted, UP_part)
1832 self._EvalDoubleQuoted(part.parts, part_vals)
1833
1834 elif case(word_part_e.CommandSub):
1835 part = cast(CommandSub, UP_part)
1836 id_ = part.left_token.id
1837 if id_ in (Id.Left_DollarParen, Id.Left_AtParen,
1838 Id.Left_Backtick):
1839 sv = self._EvalCommandSub(part,
1840 quoted) # type: part_value_t
1841
1842 elif id_ in (Id.Left_ProcSubIn, Id.Left_ProcSubOut):
1843 sv = self._EvalProcessSub(part)
1844
1845 else:
1846 raise AssertionError(id_)
1847
1848 part_vals.append(sv)
1849
1850 elif case(word_part_e.SimpleVarSub):
1851 part = cast(SimpleVarSub, UP_part)
1852 self._EvalSimpleVarSub(part, part_vals, quoted)
1853
1854 elif case(word_part_e.BracedVarSub):
1855 part = cast(BracedVarSub, UP_part)
1856 self._EvalBracedVarSub(part, part_vals, quoted)
1857
1858 elif case(word_part_e.TildeSub):
1859 part = cast(word_part.TildeSub, UP_part)
1860 # We never parse a quoted string into a TildeSub.
1861 assert not quoted
1862 s = self.tilde_ev.Eval(part)
1863 v = Piece(s, True, False) # NOT split even when unquoted!
1864 part_vals.append(v)
1865
1866 elif case(word_part_e.ArithSub):
1867 part = cast(word_part.ArithSub, UP_part)
1868 num = self.arith_ev.EvalToBigInt(part.anode)
1869 v = Piece(mops.ToStr(num), quoted, not quoted)
1870 part_vals.append(v)
1871
1872 elif case(word_part_e.ExtGlob):
1873 part = cast(word_part.ExtGlob, UP_part)
1874 #if not self.exec_opts.extglob():
1875 # die() # disallow at runtime? Don't just decay
1876
1877 # Create a node to hold the flattened tree. The caller decides whether
1878 # to pass it to fnmatch() or replace it with '*' and pass it to glob().
1879 part_vals2 = [] # type: List[part_value_t]
1880 self._EvalExtGlob(part, part_vals2) # flattens tree
1881 part_vals.append(part_value.ExtGlob(part_vals2))
1882
1883 elif case(word_part_e.BashRegexGroup):
1884 part = cast(word_part.BashRegexGroup, UP_part)
1885
1886 part_vals.append(Piece('(', False, False)) # not quoted
1887 if part.child:
1888 self._EvalWordToParts(part.child, part_vals, 0)
1889 part_vals.append(Piece(')', False, False))
1890
1891 elif case(word_part_e.Splice):
1892 part = cast(word_part.Splice, UP_part)
1893 val = self.mem.GetValue(part.var_name)
1894
1895 strs = self.expr_ev.SpliceValue(val, part)
1896 part_vals.append(part_value.Array(strs))
1897
1898 elif case(word_part_e.ExprSub):
1899 part = cast(word_part.ExprSub, UP_part)
1900 part_val = self.expr_ev.EvalExprSub(part)
1901 part_vals.append(part_val)
1902
1903 elif case(word_part_e.ZshVarSub):
1904 part = cast(word_part.ZshVarSub, UP_part)
1905 e_die("ZSH var subs are parsed, but can't be evaluated",
1906 part.left)
1907
1908 else:
1909 raise AssertionError(part.tag())
1910
1911 def _EvalRhsWordToParts(self, w, part_vals, eval_flags=0):
1912 # type: (rhs_word_t, List[part_value_t], int) -> None
1913 quoted = bool(eval_flags & QUOTED)
1914
1915 UP_w = w
1916 with tagswitch(w) as case:
1917 if case(rhs_word_e.Empty):
1918 part_vals.append(Piece('', quoted, not quoted))
1919
1920 elif case(rhs_word_e.Compound):
1921 w = cast(CompoundWord, UP_w)
1922 self._EvalWordToParts(w, part_vals, eval_flags=eval_flags)
1923
1924 else:
1925 raise AssertionError()
1926
1927 def _EvalWordToParts(self, w, part_vals, eval_flags=0):
1928 # type: (CompoundWord, List[part_value_t], int) -> None
1929 """Helper for EvalRhsWord, EvalWordSequence, etc.
1930
1931 Returns:
1932 Appends to part_vals. Note that this is a TREE.
1933 """
1934 # Does the word have an extended glob? This is a special case because
1935 # of the way we use glob() and then fnmatch(..., FNM_EXTMATCH) to
1936 # implement extended globs. It's hard to carry that extra information
1937 # all the way past the word splitting stage.
1938
1939 # OSH semantic limitations: If a word has an extended glob part, then
1940 # 1. It can't have an array
1941 # 2. Word splitting of unquoted words isn't respected
1942
1943 word_part_vals = [] # type: List[part_value_t]
1944 has_extglob = False
1945 for p in w.parts:
1946 if p.tag() == word_part_e.ExtGlob:
1947 has_extglob = True
1948 self._EvalWordPart(p, word_part_vals, eval_flags)
1949
1950 # Caller REQUESTED extglob evaluation, AND we parsed word_part.ExtGlob()
1951 if has_extglob:
1952 if bool(eval_flags & EXTGLOB_FILES):
1953 # Treat the WHOLE word as a pattern. We need to TWO VARIANTS of the
1954 # word because of the way we use libc:
1955 # 1. With '*' for extglob parts
1956 # 2. With _EvalExtGlob() for extglob parts
1957
1958 glob_parts = [] # type: List[str]
1959 fnmatch_parts = [] # type: List[str]
1960 self._TranslateExtGlob(word_part_vals, w, glob_parts,
1961 fnmatch_parts)
1962
1963 #log('word_part_vals %s', word_part_vals)
1964 glob_pat = ''.join(glob_parts)
1965 fnmatch_pat = ''.join(fnmatch_parts)
1966 #log("glob %s fnmatch %s", glob_pat, fnmatch_pat)
1967
1968 results = [] # type: List[str]
1969 n = self.globber.ExpandExtended(glob_pat, fnmatch_pat, results)
1970 if n < 0:
1971 raise error.FailGlob(
1972 'Extended glob %r matched no files' % fnmatch_pat, w)
1973
1974 part_vals.append(part_value.Array(results))
1975 elif bool(eval_flags & EXTGLOB_NESTED):
1976 # We only glob at the TOP level of @(nested|@(pattern))
1977 part_vals.extend(word_part_vals)
1978 else:
1979 # e.g. simple_word_eval, assignment builtin
1980 e_die('Extended glob not allowed in this word', w)
1981 else:
1982 part_vals.extend(word_part_vals)
1983
1984 def _PartValsToString(self, part_vals, w, eval_flags, strs):
1985 # type: (List[part_value_t], CompoundWord, int, List[str]) -> None
1986 """Helper for EvalWordToString, similar to _ConcatPartVals() above.
1987
1988 Note: arg 'w' could just be a span ID
1989 """
1990 for part_val in part_vals:
1991 UP_part_val = part_val
1992 with tagswitch(part_val) as case:
1993 if case(part_value_e.String):
1994 part_val = cast(Piece, UP_part_val)
1995 s = part_val.s
1996 if part_val.quoted:
1997 if eval_flags & QUOTE_FNMATCH:
1998 # [[ foo == */"*".py ]] or case (*.py) or ${x%*.py} or ${x//*.py/}
1999 s = glob_.GlobEscape(s)
2000 elif eval_flags & QUOTE_ERE:
2001 s = glob_.ExtendedRegexEscape(s)
2002 strs.append(s)
2003
2004 elif case(part_value_e.Array):
2005 part_val = cast(part_value.Array, UP_part_val)
2006 if self.exec_opts.strict_array():
2007 # Examples: echo f > "$@"; local foo="$@"
2008
2009 # TODO: This attributes too coarsely, to the word rather than the
2010 # parts. Problem: the word is a TREE of parts, but we only have a
2011 # flat list of part_vals. The only case where we really get arrays
2012 # is "$@", "${a[@]}", "${a[@]//pat/replace}", etc.
2013 e_die(
2014 "This word should yield a string, but it contains an array",
2015 w)
2016
2017 # TODO: Maybe add detail like this.
2018 #e_die('RHS of assignment should only have strings. '
2019 # 'To assign arrays, use b=( "${a[@]}" )')
2020 else:
2021 # It appears to not respect IFS
2022 tmp = [s for s in part_val.strs if s is not None]
2023 s = ' '.join(tmp) # TODO: eliminate double join()?
2024 strs.append(s)
2025
2026 elif case(part_value_e.ExtGlob):
2027 part_val = cast(part_value.ExtGlob, UP_part_val)
2028
2029 # Extended globs are only allowed where we expect them!
2030 if not bool(eval_flags & QUOTE_FNMATCH):
2031 e_die('extended glob not allowed in this word', w)
2032
2033 # recursive call
2034 self._PartValsToString(part_val.part_vals, w, eval_flags,
2035 strs)
2036
2037 else:
2038 raise AssertionError()
2039
2040 def EvalWordToString(self, UP_w, eval_flags=0):
2041 # type: (word_t, int) -> value.Str
2042 """Given a word, return a string.
2043
2044 Flags can contain a quoting algorithm.
2045 """
2046 assert UP_w.tag() == word_e.Compound, UP_w
2047 w = cast(CompoundWord, UP_w)
2048
2049 if eval_flags == 0: # QUOTE_FNMATCH etc. breaks optimization
2050 fast_str = word_.FastStrEval(w)
2051 if fast_str is not None:
2052 return value.Str(fast_str)
2053
2054 # Could we additionally optimize a=$b, if we know $b isn't an array
2055 # etc.?
2056
2057 # Note: these empty lists are hot in fib benchmark
2058
2059 part_vals = [] # type: List[part_value_t]
2060 for p in w.parts:
2061 # this doesn't use eval_flags, which is slightly confusing
2062 self._EvalWordPart(p, part_vals, 0)
2063
2064 strs = [] # type: List[str]
2065 self._PartValsToString(part_vals, w, eval_flags, strs)
2066 return value.Str(''.join(strs))
2067
2068 def EvalWordToPattern(self, UP_w):
2069 # type: (rhs_word_t) -> Tuple[value.Str, bool]
2070 """Like EvalWordToString, but returns whether we got ExtGlob."""
2071 if UP_w.tag() == rhs_word_e.Empty:
2072 return value.Str(''), False
2073
2074 assert UP_w.tag() == rhs_word_e.Compound, UP_w
2075 w = cast(CompoundWord, UP_w)
2076
2077 has_extglob = False
2078 part_vals = [] # type: List[part_value_t]
2079 for p in w.parts:
2080 # this doesn't use eval_flags, which is slightly confusing
2081 self._EvalWordPart(p, part_vals, 0)
2082 if p.tag() == word_part_e.ExtGlob:
2083 has_extglob = True
2084
2085 strs = [] # type: List[str]
2086 self._PartValsToString(part_vals, w, QUOTE_FNMATCH, strs)
2087 return value.Str(''.join(strs)), has_extglob
2088
2089 def EvalForPlugin(self, w):
2090 # type: (CompoundWord) -> value.Str
2091 """Wrapper around EvalWordToString that prevents errors.
2092
2093 Runtime errors like $(( 1 / 0 )) and mutating $? like $(exit 42)
2094 are handled here.
2095
2096 Similar to ExprEvaluator.PluginCall().
2097 """
2098 with state.ctx_Registers(self.mem): # to "sandbox" $? and $PIPESTATUS
2099 try:
2100 val = self.EvalWordToString(w)
2101 except error.FatalRuntime as e:
2102 val = value.Str('<Runtime error: %s>' % e.UserErrorString())
2103
2104 except (IOError, OSError) as e:
2105 val = value.Str('<I/O error: %s>' % pyutil.strerror(e))
2106
2107 except KeyboardInterrupt:
2108 val = value.Str('<Ctrl-C>')
2109
2110 return val
2111
2112 def EvalRhsWord(self, UP_w):
2113 # type: (rhs_word_t) -> value_t
2114 """Used for RHS of assignment.
2115
2116 There is no splitting.
2117 """
2118 if UP_w.tag() == rhs_word_e.Empty:
2119 return value.Str('')
2120
2121 assert UP_w.tag() == word_e.Compound, UP_w
2122 w = cast(CompoundWord, UP_w)
2123
2124 if len(w.parts) == 1:
2125 part0 = w.parts[0]
2126 UP_part0 = part0
2127 tag = part0.tag()
2128 # Special case for a=(1 2). ShArrayLiteral won't appear in words that
2129 # don't look like assignments.
2130 if tag == word_part_e.ShArrayLiteral:
2131 part0 = cast(ShArrayLiteral, UP_part0)
2132 array_words = part0.words
2133 words = braces.BraceExpandWords(array_words)
2134 strs = self.EvalWordSequence(words)
2135 return value.BashArray(strs)
2136
2137 if tag == word_part_e.BashAssocLiteral:
2138 part0 = cast(word_part.BashAssocLiteral, UP_part0)
2139 d = NewDict() # type: Dict[str, str]
2140 for pair in part0.pairs:
2141 k = self.EvalWordToString(pair.key)
2142 v = self.EvalWordToString(pair.value)
2143 d[k.s] = v.s
2144 return value.BashAssoc(d)
2145
2146 # If RHS doesn't look like a=( ... ), then it must be a string.
2147 return self.EvalWordToString(w)
2148
2149 def _EvalWordFrame(self, frame, argv):
2150 # type: (List[Piece], List[str]) -> None
2151 all_empty = True
2152 all_quoted = True
2153 any_quoted = False
2154
2155 #log('--- frame %s', frame)
2156
2157 for piece in frame:
2158 if len(piece.s):
2159 all_empty = False
2160
2161 if piece.quoted:
2162 any_quoted = True
2163 else:
2164 all_quoted = False
2165
2166 # Elision of ${empty}${empty} but not $empty"$empty" or $empty""
2167 if all_empty and not any_quoted:
2168 return
2169
2170 # If every frag is quoted, e.g. "$a$b" or any part in "${a[@]}"x, then
2171 # don't do word splitting or globbing.
2172 if all_quoted:
2173 tmp = [piece.s for piece in frame]
2174 a = ''.join(tmp)
2175 argv.append(a)
2176 return
2177
2178 will_glob = not self.exec_opts.noglob()
2179
2180 if 0:
2181 log('---')
2182 log('FRAME')
2183 for i, piece in enumerate(frame):
2184 log('(%d) %s', i, piece)
2185 log('')
2186
2187 # Array of strings, some of which are BOTH IFS-escaped and GLOB escaped!
2188 frags = [] # type: List[str]
2189 for piece in frame:
2190 if will_glob and piece.quoted:
2191 frag = glob_.GlobEscape(piece.s)
2192 else:
2193 # If we have a literal \, then we turn it into \\\\.
2194 # Splitting takes \\\\ -> \\
2195 # Globbing takes \\ to \ if it doesn't match
2196 frag = _BackslashEscape(piece.s)
2197
2198 if piece.do_split:
2199 frag = _BackslashEscape(frag)
2200 else:
2201 frag = self.splitter.Escape(frag)
2202
2203 frags.append(frag)
2204
2205 if 0:
2206 log('---')
2207 log('FRAGS')
2208 for i, frag in enumerate(frags):
2209 log('(%d) %s', i, frag)
2210 log('')
2211
2212 flat = ''.join(frags)
2213 #log('flat: %r', flat)
2214
2215 args = self.splitter.SplitForWordEval(flat)
2216
2217 # space=' '; argv $space"". We have a quoted part, but we CANNOT elide.
2218 # Add it back and don't bother globbing.
2219 if len(args) == 0 and any_quoted:
2220 argv.append('')
2221 return
2222
2223 #log('split args: %r', args)
2224 for a in args:
2225 if glob_.LooksLikeGlob(a):
2226 n = self.globber.Expand(a, argv)
2227 if n < 0:
2228 # TODO: location info, with span IDs carried through the frame
2229 raise error.FailGlob('Pattern %r matched no files' % a,
2230 loc.Missing)
2231 else:
2232 argv.append(glob_.GlobUnescape(a))
2233
2234 def _EvalWordToArgv(self, w):
2235 # type: (CompoundWord) -> List[str]
2236 """Helper for _EvalAssignBuiltin.
2237
2238 Splitting and globbing are disabled for assignment builtins.
2239
2240 Example: declare -"${a[@]}" b=(1 2)
2241 where a is [x b=a d=a]
2242 """
2243 part_vals = [] # type: List[part_value_t]
2244 self._EvalWordToParts(w, part_vals, 0) # not double quoted
2245 frames = _MakeWordFrames(part_vals)
2246 argv = [] # type: List[str]
2247 for frame in frames:
2248 if len(frame): # empty array gives empty frame!
2249 tmp = [piece.s for piece in frame]
2250 argv.append(''.join(tmp)) # no split or glob
2251 #log('argv: %s', argv)
2252 return argv
2253
2254 def _EvalAssignBuiltin(self, builtin_id, arg0, words, meta_offset):
2255 # type: (builtin_t, str, List[CompoundWord], int) -> cmd_value.Assign
2256 """Handles both static and dynamic assignment, e.g.
2257
2258 x='foo=bar'
2259 local a=(1 2) $x
2260
2261 Grammar:
2262
2263 ('builtin' | 'command')* keyword flag* pair*
2264 flag = [-+].*
2265
2266 There is also command -p, but we haven't implemented it. Maybe just
2267 punt on it.
2268 """
2269 eval_to_pairs = True # except for -f and -F
2270 started_pairs = False
2271
2272 flags = [arg0] # initial flags like -p, and -f -F name1 name2
2273 flag_locs = [words[0]]
2274 assign_args = [] # type: List[AssignArg]
2275
2276 n = len(words)
2277 for i in xrange(meta_offset + 1, n): # skip first word
2278 w = words[i]
2279
2280 if word_.IsVarLike(w):
2281 started_pairs = True # Everything from now on is an assign_pair
2282
2283 if started_pairs:
2284 left_token, close_token, part_offset = word_.DetectShAssignment(
2285 w)
2286 if left_token: # Detected statically
2287 if left_token.id != Id.Lit_VarLike:
2288 # (not guaranteed since started_pairs is set twice)
2289 e_die('LHS array not allowed in assignment builtin', w)
2290
2291 if lexer.IsPlusEquals(left_token):
2292 var_name = lexer.TokenSliceRight(left_token, -2)
2293 append = True
2294 else:
2295 var_name = lexer.TokenSliceRight(left_token, -1)
2296 append = False
2297
2298 if part_offset == len(w.parts):
2299 rhs = rhs_word.Empty # type: rhs_word_t
2300 else:
2301 # tmp is for intersection of C++/MyPy type systems
2302 tmp = CompoundWord(w.parts[part_offset:])
2303 word_.TildeDetectAssign(tmp)
2304 rhs = tmp
2305
2306 with state.ctx_AssignBuiltin(self.mutable_opts):
2307 right = self.EvalRhsWord(rhs)
2308
2309 arg2 = AssignArg(var_name, right, append, w)
2310 assign_args.append(arg2)
2311
2312 else: # e.g. export $dynamic
2313 argv = self._EvalWordToArgv(w)
2314 for arg in argv:
2315 arg2 = _SplitAssignArg(arg, w)
2316 assign_args.append(arg2)
2317
2318 else:
2319 argv = self._EvalWordToArgv(w)
2320 for arg in argv:
2321 if arg.startswith('-') or arg.startswith('+'):
2322 # e.g. declare -r +r
2323 flags.append(arg)
2324 flag_locs.append(w)
2325
2326 # Shortcut that relies on -f and -F always meaning "function" for
2327 # all assignment builtins
2328 if 'f' in arg or 'F' in arg:
2329 eval_to_pairs = False
2330
2331 else: # e.g. export $dynamic
2332 if eval_to_pairs:
2333 arg2 = _SplitAssignArg(arg, w)
2334 assign_args.append(arg2)
2335 started_pairs = True
2336 else:
2337 flags.append(arg)
2338
2339 return cmd_value.Assign(builtin_id, flags, flag_locs, assign_args)
2340
2341 def _DetectAssignBuiltinStr(self, arg0, words, meta_offset):
2342 # type: (str, List[CompoundWord], int) -> Optional[cmd_value.Assign]
2343 builtin_id = consts.LookupAssignBuiltin(arg0)
2344 if builtin_id != consts.NO_INDEX:
2345 return self._EvalAssignBuiltin(builtin_id, arg0, words,
2346 meta_offset)
2347 return None
2348
2349 def _DetectAssignBuiltin(self, val0, words, meta_offset):
2350 # type: (part_value_t, List[CompoundWord], int) -> Optional[cmd_value.Assign]
2351 UP_val0 = val0
2352 if val0.tag() == part_value_e.String:
2353 val0 = cast(Piece, UP_val0)
2354 if not val0.quoted:
2355 return self._DetectAssignBuiltinStr(val0.s, words, meta_offset)
2356 return None
2357
2358 def SimpleEvalWordSequence2(self, words, is_last_cmd, allow_assign):
2359 # type: (List[CompoundWord], bool, bool) -> cmd_value_t
2360 """Simple word evaluation for YSH."""
2361 strs = [] # type: List[str]
2362 locs = [] # type: List[CompoundWord]
2363
2364 meta_offset = 0
2365 for i, w in enumerate(words):
2366 # No globbing in the first arg for command.Simple.
2367 if i == meta_offset and allow_assign:
2368 strs0 = self._EvalWordToArgv(w)
2369 # TODO: Remove this because YSH will disallow assignment
2370 # builtins? (including export?)
2371 if len(strs0) == 1:
2372 cmd_val = self._DetectAssignBuiltinStr(
2373 strs0[0], words, meta_offset)
2374 if cmd_val:
2375 return cmd_val
2376
2377 strs.extend(strs0)
2378 for _ in strs0:
2379 locs.append(w)
2380 continue
2381
2382 if glob_.LooksLikeStaticGlob(w):
2383 val = self.EvalWordToString(w) # respects strict-array
2384 num_appended = self.globber.Expand(val.s, strs)
2385 if num_appended < 0:
2386 raise error.FailGlob('Pattern %r matched no files' % val.s,
2387 w)
2388 for _ in xrange(num_appended):
2389 locs.append(w)
2390 continue
2391
2392 part_vals = [] # type: List[part_value_t]
2393 self._EvalWordToParts(w, part_vals, 0) # not quoted
2394
2395 if 0:
2396 log('')
2397 log('Static: part_vals after _EvalWordToParts:')
2398 for entry in part_vals:
2399 log(' %s', entry)
2400
2401 # Still need to process
2402 frames = _MakeWordFrames(part_vals)
2403
2404 if 0:
2405 log('')
2406 log('Static: frames after _MakeWordFrames:')
2407 for entry in frames:
2408 log(' %s', entry)
2409
2410 # We will still allow x"${a[@]"x, though it's deprecated by @a, which
2411 # disallows such expressions at parse time.
2412 for frame in frames:
2413 if len(frame): # empty array gives empty frame!
2414 tmp = [piece.s for piece in frame]
2415 strs.append(''.join(tmp)) # no split or glob
2416 locs.append(w)
2417
2418 assert len(strs) == len(locs), '%s vs. %d' % (strs, len(locs))
2419 return cmd_value.Argv(strs, locs, is_last_cmd, None, None)
2420
2421 def EvalWordSequence2(self, words, is_last_cmd, allow_assign=False):
2422 # type: (List[CompoundWord], bool, bool) -> cmd_value_t
2423 """Turns a list of Words into a list of strings.
2424
2425 Unlike the EvalWord*() methods, it does globbing.
2426
2427 Args:
2428 allow_assign: True for command.Simple, False for BashArray a=(1 2 3)
2429 """
2430 if self.exec_opts.simple_word_eval():
2431 return self.SimpleEvalWordSequence2(words, is_last_cmd,
2432 allow_assign)
2433
2434 # Parse time:
2435 # 1. brace expansion. TODO: Do at parse time.
2436 # 2. Tilde detection. DONE at parse time. Only if Id.Lit_Tilde is the
2437 # first WordPart.
2438 #
2439 # Run time:
2440 # 3. tilde sub, var sub, command sub, arith sub. These are all
2441 # "concurrent" on WordParts. (optional process sub with <() )
2442 # 4. word splitting. Can turn this off with a shell option? Definitely
2443 # off for oil.
2444 # 5. globbing -- several exec_opts affect this: nullglob, safeglob, etc.
2445
2446 #log('W %s', words)
2447 strs = [] # type: List[str]
2448 locs = [] # type: List[CompoundWord]
2449
2450 # 0 for declare x
2451 # 1 for builtin declare x
2452 # 2 for command builtin declare x
2453 # etc.
2454 meta_offset = 0
2455
2456 n = 0
2457 for i, w in enumerate(words):
2458 fast_str = word_.FastStrEval(w)
2459 if fast_str is not None:
2460 strs.append(fast_str)
2461 locs.append(w)
2462
2463 # e.g. the 'local' in 'local a=b c=d' will be here
2464 if allow_assign and i == meta_offset:
2465 cmd_val = self._DetectAssignBuiltinStr(
2466 fast_str, words, meta_offset)
2467 if cmd_val:
2468 return cmd_val
2469
2470 if i <= meta_offset and _DetectMetaBuiltinStr(fast_str):
2471 meta_offset += 1
2472
2473 # Bug fix: n must be updated on every loop iteration
2474 n = len(strs)
2475 assert len(strs) == len(locs), strs
2476 continue
2477
2478 part_vals = [] # type: List[part_value_t]
2479 self._EvalWordToParts(w, part_vals, EXTGLOB_FILES)
2480
2481 # DYNAMICALLY detect if we're going to run an assignment builtin, and
2482 # change the rest of the evaluation algorithm if so.
2483 #
2484 # We want to allow:
2485 # e=export
2486 # $e foo=bar
2487 #
2488 # But we don't want to evaluate the first word twice in the case of:
2489 # $(some-command) --flag
2490 if len(part_vals) == 1:
2491 if allow_assign and i == meta_offset:
2492 cmd_val = self._DetectAssignBuiltin(
2493 part_vals[0], words, meta_offset)
2494 if cmd_val:
2495 return cmd_val
2496
2497 if i <= meta_offset and _DetectMetaBuiltin(part_vals[0]):
2498 meta_offset += 1
2499
2500 if 0:
2501 log('')
2502 log('part_vals after _EvalWordToParts:')
2503 for entry in part_vals:
2504 log(' %s', entry)
2505
2506 frames = _MakeWordFrames(part_vals)
2507 if 0:
2508 log('')
2509 log('frames after _MakeWordFrames:')
2510 for entry in frames:
2511 log(' %s', entry)
2512
2513 # Do splitting and globbing. Each frame will append zero or more args.
2514 for frame in frames:
2515 self._EvalWordFrame(frame, strs)
2516
2517 # Fill in locations parallel to strs.
2518 n_next = len(strs)
2519 for _ in xrange(n_next - n):
2520 locs.append(w)
2521 n = n_next
2522
2523 # A non-assignment command.
2524 # NOTE: Can't look up builtins here like we did for assignment, because
2525 # functions can override builtins.
2526 assert len(strs) == len(locs), '%s vs. %d' % (strs, len(locs))
2527 return cmd_value.Argv(strs, locs, is_last_cmd, None, None)
2528
2529 def EvalWordSequence(self, words):
2530 # type: (List[CompoundWord]) -> List[str]
2531 """For arrays and for loops.
2532
2533 They don't allow assignment builtins.
2534 """
2535 # is_last_cmd is irrelevant
2536 cmd_val = self.EvalWordSequence2(words, False)
2537 assert cmd_val.tag() == cmd_value_e.Argv
2538 return cast(cmd_value.Argv, cmd_val).argv
2539
2540
2541class NormalWordEvaluator(AbstractWordEvaluator):
2542
2543 def __init__(
2544 self,
2545 mem, # type: state.Mem
2546 exec_opts, # type: optview.Exec
2547 mutable_opts, # type: state.MutableOpts
2548 tilde_ev, # type: TildeEvaluator
2549 splitter, # type: SplitContext
2550 errfmt, # type: ui.ErrorFormatter
2551 ):
2552 # type: (...) -> None
2553 AbstractWordEvaluator.__init__(self, mem, exec_opts, mutable_opts,
2554 tilde_ev, splitter, errfmt)
2555 self.shell_ex = None # type: _Executor
2556
2557 def CheckCircularDeps(self):
2558 # type: () -> None
2559 assert self.arith_ev is not None
2560 # Disabled for pure OSH
2561 #assert self.expr_ev is not None
2562 assert self.shell_ex is not None
2563 assert self.prompt_ev is not None
2564
2565 def _EvalCommandSub(self, cs_part, quoted):
2566 # type: (CommandSub, bool) -> part_value_t
2567 stdout_str = self.shell_ex.RunCommandSub(cs_part)
2568
2569 if cs_part.left_token.id == Id.Left_AtParen:
2570 # YSH splitting algorithm: does not depend on IFS
2571 try:
2572 strs = j8.SplitJ8Lines(stdout_str)
2573 except error.Decode as e:
2574 # status code 4 is special, for encode/decode errors.
2575 raise error.Structured(4, e.Message(), cs_part.left_token)
2576
2577 #strs = self.splitter.SplitForWordEval(stdout_str)
2578 return part_value.Array(strs)
2579 else:
2580 return Piece(stdout_str, quoted, not quoted)
2581
2582 def _EvalProcessSub(self, cs_part):
2583 # type: (CommandSub) -> Piece
2584 dev_path = self.shell_ex.RunProcessSub(cs_part)
2585 # pretend it's quoted; no split or glob
2586 return Piece(dev_path, True, False)
2587
2588
2589_DUMMY = '__NO_COMMAND_SUB__'
2590
2591
2592class CompletionWordEvaluator(AbstractWordEvaluator):
2593 """An evaluator that has no access to an executor.
2594
2595 NOTE: core/completion.py doesn't actually try to use these strings to
2596 complete. If you have something like 'echo $(echo hi)/f<TAB>', it sees the
2597 inner command as the last one, and knows that it is not at the end of the
2598 line.
2599 """
2600
2601 def __init__(
2602 self,
2603 mem, # type: state.Mem
2604 exec_opts, # type: optview.Exec
2605 mutable_opts, # type: state.MutableOpts
2606 tilde_ev, # type: TildeEvaluator
2607 splitter, # type: SplitContext
2608 errfmt, # type: ui.ErrorFormatter
2609 ):
2610 # type: (...) -> None
2611 AbstractWordEvaluator.__init__(self, mem, exec_opts, mutable_opts,
2612 tilde_ev, splitter, errfmt)
2613
2614 def CheckCircularDeps(self):
2615 # type: () -> None
2616 assert self.prompt_ev is not None
2617 assert self.arith_ev is not None
2618 assert self.expr_ev is not None
2619
2620 def _EvalCommandSub(self, cs_part, quoted):
2621 # type: (CommandSub, bool) -> part_value_t
2622 if cs_part.left_token.id == Id.Left_AtParen:
2623 return part_value.Array([_DUMMY])
2624 else:
2625 return Piece(_DUMMY, quoted, not quoted)
2626
2627 def _EvalProcessSub(self, cs_part):
2628 # type: (CommandSub) -> Piece
2629 # pretend it's quoted; no split or glob
2630 return Piece('__NO_PROCESS_SUB__', True, False)
2631
2632
2633# vim: sw=4