OILS / osh / word_eval.py View on Github | oils.pub

2642 lines, 1638 significant
1"""
2word_eval.py - Evaluator for the word language.
3"""
4
5from _devbuild.gen.id_kind_asdl import Id, Kind, Kind_str
6from _devbuild.gen.syntax_asdl import (
7 Token,
8 SimpleVarSub,
9 loc,
10 loc_t,
11 BracedVarSub,
12 CommandSub,
13 bracket_op,
14 bracket_op_e,
15 suffix_op,
16 suffix_op_e,
17 ShArrayLiteral,
18 SingleQuoted,
19 DoubleQuoted,
20 word_e,
21 word_t,
22 CompoundWord,
23 rhs_word,
24 rhs_word_e,
25 rhs_word_t,
26 word_part,
27 word_part_e,
28)
29from _devbuild.gen.runtime_asdl import (
30 part_value,
31 part_value_e,
32 part_value_t,
33 cmd_value,
34 cmd_value_e,
35 cmd_value_t,
36 error_code_e,
37 AssignArg,
38 a_index,
39 a_index_e,
40 VTestPlace,
41 VarSubState,
42 Piece,
43)
44from _devbuild.gen.option_asdl import option_i, builtin_i
45from _devbuild.gen.value_asdl import (
46 value,
47 value_e,
48 value_t,
49 sh_lvalue,
50 sh_lvalue_t,
51)
52from core import bash_impl
53from core import error
54from core import pyos
55from core import pyutil
56from core import state
57from display import ui
58from core import util
59from data_lang import j8
60from data_lang import j8_lite
61from core.error import e_die
62from frontend import consts
63from frontend import lexer
64from frontend import location
65from mycpp import mops
66from mycpp.mylib import log, tagswitch, NewDict
67from osh import braces
68from osh import glob_
69from osh import string_ops
70from osh import word_
71from ysh import expr_eval
72from ysh import val_ops
73
74from typing import Optional, Tuple, List, Dict, cast, TYPE_CHECKING
75
76if TYPE_CHECKING:
77 from _devbuild.gen.syntax_asdl import word_part_t
78 from _devbuild.gen.option_asdl import builtin_t
79 from core import optview
80 from core.state import Mem
81 from core.vm import _Executor
82 from osh.split import SplitContext
83 from osh import prompt
84 from osh import sh_expr_eval
85
86# Flags for _EvalWordToParts and _EvalWordPart (not all are used for both)
87QUOTED = 1 << 0
88IS_SUBST = 1 << 1
89
90EXTGLOB_FILES = 1 << 2 # allow @(cc) from file system?
91EXTGLOB_MATCH = 1 << 3 # allow @(cc) in pattern matching?
92EXTGLOB_NESTED = 1 << 4 # for @(one|!(two|three))
93
94# For EvalWordToString
95QUOTE_FNMATCH = 1 << 5
96QUOTE_ERE = 1 << 6
97
98# For compatibility, ${BASH_SOURCE} and ${BASH_SOURCE[@]} are both valid.
99# Ditto for ${FUNCNAME} and ${BASH_LINENO}.
100_STRING_AND_ARRAY = ['BASH_SOURCE', 'FUNCNAME', 'BASH_LINENO']
101
102
103def ShouldArrayDecay(var_name, exec_opts, is_plain_var_sub=True):
104 # type: (str, optview.Exec, bool) -> bool
105 """Return whether we should allow ${a} to mean ${a[0]}."""
106 return (not exec_opts.strict_array() or
107 is_plain_var_sub and var_name in _STRING_AND_ARRAY)
108
109
110def DecayArray(val):
111 # type: (value_t) -> value_t
112 """Resolve ${array} to ${array[0]}."""
113 if val.tag() in (value_e.BashArray, value_e.SparseArray):
114 if val.tag() == value_e.BashArray:
115 array_val = cast(value.BashArray, val)
116 s, error_code = bash_impl.BashArray_GetElement(array_val, 0)
117 elif val.tag() == value_e.SparseArray:
118 sparse_val = cast(value.SparseArray, val)
119 s, error_code = bash_impl.SparseArray_GetElement(
120 sparse_val, mops.ZERO)
121 else:
122 raise AssertionError(val.tag())
123
124 # Note: index 0 should never cause the out-of-bound index error.
125 assert error_code == error_code_e.OK
126
127 elif val.tag() == value_e.BashAssoc:
128 assoc_val = cast(value.BashAssoc, val)
129 s = bash_impl.BashAssoc_GetElement(assoc_val, '0')
130 else:
131 raise AssertionError(val.tag())
132
133 if s is None:
134 return value.Undef
135 else:
136 return value.Str(s)
137
138
139def _DetectMetaBuiltinStr(s):
140 # type: (str) -> bool
141 """
142 We need to detect all of these cases:
143
144 builtin local
145 command local
146 builtin builtin local
147 builtin command local
148
149 Fundamentally, assignment builtins have different WORD EVALUATION RULES
150 for a=$x (no word splitting), so it seems hard to do this in
151 meta_oils.Builtin() or meta_oils.Command()
152 """
153 return (consts.LookupNormalBuiltin(s)
154 in (builtin_i.builtin, builtin_i.command))
155
156
157def _DetectMetaBuiltin(val0):
158 # type: (part_value_t) -> bool
159 UP_val0 = val0
160 if val0.tag() == part_value_e.String:
161 val0 = cast(Piece, UP_val0)
162 if not val0.quoted:
163 return _DetectMetaBuiltinStr(val0.s)
164 return False
165
166
167def _SplitAssignArg(arg, blame_word):
168 # type: (str, CompoundWord) -> AssignArg
169 """Dynamically parse argument to declare, export, etc.
170
171 This is a fallback to the static parsing done below.
172 """
173 # Note: it would be better to cache regcomp(), but we don't have an API for
174 # that, and it probably isn't a bottleneck now
175 m = util.RegexSearch(consts.ASSIGN_ARG_RE, arg)
176 if m is None:
177 e_die("Assignment builtin expected NAME=value, got %r" % arg,
178 blame_word)
179
180 var_name = m[1]
181 # m[2] is used for grouping; ERE doesn't have non-capturing groups
182
183 op = m[3]
184 assert op is not None, op
185 if len(op): # declare NAME=
186 val = value.Str(m[4]) # type: Optional[value_t]
187 append = op[0] == '+'
188 else: # declare NAME
189 val = None # no operator
190 append = False
191
192 return AssignArg(var_name, val, append, blame_word)
193
194
195# NOTE: Could be done with util.BackslashEscape like glob_.GlobEscape().
196def _BackslashEscape(s):
197 # type: (str) -> str
198 """Double up backslashes.
199
200 Useful for strings about to be globbed and strings about to be IFS
201 escaped.
202 """
203 return s.replace('\\', '\\\\')
204
205
206def _ValueToPartValue(val, quoted, part_loc):
207 # type: (value_t, bool, word_part_t) -> part_value_t
208 """Helper for VarSub evaluation.
209
210 Called by _EvalBracedVarSub and _EvalWordPart for SimpleVarSub.
211 """
212 UP_val = val
213
214 with tagswitch(val) as case:
215 if case(value_e.Undef):
216 # This happens in the case of ${undef+foo}. We skipped _ProcessUndef,
217 # but we have to append to the empty string.
218 return Piece('', quoted, not quoted)
219
220 elif case(value_e.Str):
221 val = cast(value.Str, UP_val)
222 return Piece(val.s, quoted, not quoted)
223
224 elif case(value_e.BashArray):
225 val = cast(value.BashArray, UP_val)
226 return part_value.Array(bash_impl.BashArray_GetValues(val))
227
228 elif case(value_e.SparseArray):
229 val = cast(value.SparseArray, UP_val)
230 return part_value.Array(bash_impl.SparseArray_GetValues(val))
231
232 elif case(value_e.BashAssoc):
233 val = cast(value.BashAssoc, UP_val)
234 # bash behavior: splice values!
235 return part_value.Array(bash_impl.BashAssoc_GetValues(val))
236
237 # Cases added for YSH
238 # value_e.List is also here - we use val_ops.Stringify()s err message
239 elif case(value_e.Null, value_e.Bool, value_e.Int, value_e.Float,
240 value_e.Eggex, value_e.List):
241 s = val_ops.Stringify(val, loc.WordPart(part_loc), 'Word eval ')
242 return Piece(s, quoted, not quoted)
243
244 else:
245 raise error.TypeErr(val, "Can't substitute into word",
246 loc.WordPart(part_loc))
247
248 raise AssertionError('for -Wreturn-type in C++')
249
250
251def _MakeWordFrames(part_vals):
252 # type: (List[part_value_t]) -> List[List[Piece]]
253 """A word evaluates to a flat list of part_value (String or Array). frame
254 is a portion that results in zero or more args. It can never be joined.
255 This idea exists because of arrays like "$@" and "${a[@]}".
256
257 Example:
258
259 a=(1 '2 3' 4)
260 x=x
261 y=y
262
263 # This word
264 $x"${a[@]}"$y
265
266 # Results in Three frames:
267 [ ('x', False, True), ('1', True, False) ]
268 [ ('2 3', True, False) ]
269 [ ('4', True, False), ('y', False, True) ]
270
271 Note: A frame is a 3-tuple that's identical to Piece()? Maybe we
272 should make that top level type.
273
274 TODO:
275 - Instead of List[List[Piece]], where List[Piece] is a Frame
276 - Change this representation to
277 Frames = (List[Piece] pieces, List[int] break_indices)
278 # where break_indices are the end
279
280 Consider a common case like "$x" or "${x}" - I think this a lot more
281 efficient?
282
283 And then change _EvalWordFrame(pieces: List[Piece], start: int, end: int)
284 """
285 current = [] # type: List[Piece]
286 frames = [current]
287
288 for p in part_vals:
289 UP_p = p
290
291 with tagswitch(p) as case:
292 if case(part_value_e.String):
293 p = cast(Piece, UP_p)
294 current.append(p)
295
296 elif case(part_value_e.Array):
297 p = cast(part_value.Array, UP_p)
298
299 is_first = True
300 for s in p.strs:
301 if s is None:
302 continue # ignore undefined array entries
303
304 # Arrays parts are always quoted; otherwise they would have
305 # decayed to a string.
306 piece = Piece(s, True, False)
307 if is_first:
308 current.append(piece)
309 is_first = False
310 else:
311 current = [piece]
312 frames.append(current) # singleton frame
313
314 else:
315 raise AssertionError()
316
317 return frames
318
319
320# TODO: This could be _MakeWordFrames and then sep.join(). It's redundant.
321def _DecayPartValuesToString(part_vals, join_char):
322 # type: (List[part_value_t], str) -> str
323 # Decay ${a=x"$@"x} to string.
324 out = [] # type: List[str]
325 for p in part_vals:
326 UP_p = p
327 with tagswitch(p) as case:
328 if case(part_value_e.String):
329 p = cast(Piece, UP_p)
330 out.append(p.s)
331 elif case(part_value_e.Array):
332 p = cast(part_value.Array, UP_p)
333 # TODO: Eliminate double join for speed?
334 tmp = [s for s in p.strs if s is not None]
335 out.append(join_char.join(tmp))
336 else:
337 raise AssertionError()
338 return ''.join(out)
339
340
341def _PerformSlice(
342 val, # type: value_t
343 offset, # type: mops.BigInt
344 length, # type: int
345 has_length, # type: bool
346 part, # type: BracedVarSub
347 arg0_val, # type: value.Str
348):
349 # type: (...) -> value_t
350 UP_val = val
351 with tagswitch(val) as case:
352 if case(value_e.Str): # Slice UTF-8 characters in a string.
353 val = cast(value.Str, UP_val)
354 s = val.s
355 n = len(s)
356
357 begin = mops.BigTruncate(offset)
358 if begin < 0: # Compute offset with unicode
359 byte_begin = n
360 num_iters = -begin
361 for _ in xrange(num_iters):
362 byte_begin = string_ops.PreviousUtf8Char(s, byte_begin)
363 else:
364 byte_begin = string_ops.AdvanceUtf8Chars(s, begin, 0)
365
366 if has_length:
367 if length < 0: # Compute offset with unicode
368 # Confusing: this is a POSITION
369 byte_end = n
370 num_iters = -length
371 for _ in xrange(num_iters):
372 byte_end = string_ops.PreviousUtf8Char(s, byte_end)
373 else:
374 byte_end = string_ops.AdvanceUtf8Chars(
375 s, length, byte_begin)
376 else:
377 byte_end = len(s)
378
379 substr = s[byte_begin:byte_end]
380 result = value.Str(substr) # type: value_t
381
382 elif case(value_e.BashArray,
383 value_e.SparseArray): # Slice array entries.
384 # NOTE: This error is ALWAYS fatal in bash. It's inconsistent with
385 # strings.
386 if has_length and length < 0:
387 e_die("Array slice can't have negative length: %d" % length,
388 loc.WordPart(part))
389
390 if bash_impl.BigInt_Less(offset, mops.ZERO):
391 # ${@:-3} starts counts from the end
392 if val.tag() == value_e.BashArray:
393 val = cast(value.BashArray, UP_val)
394 array_length = mops.IntWiden(
395 bash_impl.BashArray_Length(val))
396 elif val.tag() == value_e.SparseArray:
397 val = cast(value.SparseArray, UP_val)
398 array_length = bash_impl.SparseArray_Length(val)
399 else:
400 raise AssertionError()
401
402 # The array length counts $0 for $@ and $*
403 if arg0_val is not None:
404 array_length = mops.Add(array_length, mops.ONE)
405
406 offset = mops.Add(offset, array_length)
407
408 if bash_impl.BigInt_Less(offset, mops.ZERO):
409 strs = [] # type: List[str]
410 else:
411 # Quirk: "offset" for positional arguments ($@ and $*) counts $0.
412 prepends_arg0 = False
413 if arg0_val is not None:
414 if bash_impl.BigInt_Greater(offset, mops.ZERO):
415 offset = mops.Sub(offset, mops.ONE)
416 elif not has_length or length >= 1:
417 prepends_arg0 = True
418 length = length - 1
419
420 if has_length and length == 0:
421 strs = []
422
423 elif val.tag() == value_e.BashArray:
424 val = cast(value.BashArray, UP_val)
425 orig = bash_impl.BashArray_GetValues(val)
426 n = len(orig)
427
428 strs = []
429 i = mops.BigTruncate(offset)
430 count = 0
431 while i < n:
432 if has_length and count == length: # length could be 0
433 break
434 s = orig[i]
435 if s is not None: # Unset elements don't count towards the length
436 strs.append(s)
437 count += 1
438 i += 1
439
440 elif val.tag() == value_e.SparseArray:
441 val = cast(value.SparseArray, UP_val)
442
443 # TODO: We may optimize this by finding the first index
444 # using the binary search. Furthermore, the sorting by
445 # SparseArray_GetKeys can be replaced with the heap sort so
446 # that we only extract the first LENGTH elements of the
447 # indices greater or equal to OFFSET.
448 i = 0
449 for index in bash_impl.SparseArray_GetKeys(val):
450 if bash_impl.BigInt_GreaterEq(index, offset):
451 break
452 i = i + 1
453
454 if has_length:
455 strs = bash_impl.SparseArray_GetValues(val)[i:i +
456 length]
457 else:
458 strs = bash_impl.SparseArray_GetValues(val)[i:]
459
460 else:
461 raise AssertionError()
462
463 if prepends_arg0:
464 new_list = [arg0_val.s]
465 new_list.extend(strs)
466 strs = new_list
467
468 result = value.BashArray(strs)
469
470 elif case(value_e.BashAssoc):
471 e_die("Can't slice associative arrays", loc.WordPart(part))
472
473 else:
474 raise error.TypeErr(val, 'Slice op expected Str or BashArray',
475 loc.WordPart(part))
476
477 return result
478
479
480class StringWordEvaluator(object):
481 """Interface used by ArithEvaluator / BoolEvaluator"""
482
483 def __init__(self):
484 # type: () -> None
485 """Empty constructor for mycpp."""
486 pass
487
488 def EvalWordToString(self, w, eval_flags=0):
489 # type: (word_t, int) -> value.Str
490 raise NotImplementedError()
491
492
493def _GetDollarHyphen(exec_opts):
494 # type: (optview.Exec) -> str
495 chars = [] # type: List[str]
496 if exec_opts.interactive():
497 chars.append('i')
498
499 if exec_opts.errexit():
500 chars.append('e')
501 if exec_opts.noglob():
502 chars.append('f')
503 if exec_opts.noexec():
504 chars.append('n')
505 if exec_opts.nounset():
506 chars.append('u')
507 # NO letter for pipefail?
508 if exec_opts.xtrace():
509 chars.append('x')
510 if exec_opts.noclobber():
511 chars.append('C')
512
513 # bash has:
514 # - c for sh -c, i for sh -i (mksh also has this)
515 # - h for hashing (mksh also has this)
516 # - B for brace expansion
517 return ''.join(chars)
518
519
520class TildeEvaluator(object):
521
522 def __init__(self, mem, exec_opts):
523 # type: (Mem, optview.Exec) -> None
524 self.mem = mem
525 self.exec_opts = exec_opts
526
527 def GetMyHomeDir(self):
528 # type: () -> Optional[str]
529 """Consult $HOME first, and then make a libc call.
530
531 Important: the libc call can FAIL, which is why we prefer $HOME. See issue
532 #1578.
533 """
534 # First look up the HOME var, ENV.HOME, ...
535 s = self.mem.env_config.Get('HOME')
536 if s is not None:
537 return s
538
539 # Then ask the OS. This is what bash does.
540 return pyos.GetMyHomeDir()
541
542 def Eval(self, part):
543 # type: (word_part.TildeSub) -> str
544 """Evaluates ~ and ~user, given a Lit_TildeLike token."""
545
546 if part.user_name is None:
547 result = self.GetMyHomeDir()
548 else:
549 result = pyos.GetHomeDir(part.user_name)
550
551 if result is None:
552 if self.exec_opts.strict_tilde():
553 e_die("Error expanding tilde (e.g. invalid user)", part.left)
554 else:
555 # Return ~ or ~user literally
556 result = '~'
557 if part.user_name is not None:
558 result = result + part.user_name # mycpp doesn't have +=
559
560 return result
561
562
563class AbstractWordEvaluator(StringWordEvaluator):
564 """Abstract base class for word evaluators.
565
566 Public entry points:
567 EvalWordToString EvalForPlugin EvalRhsWord
568 EvalWordSequence EvalWordSequence2
569 """
570
571 def __init__(
572 self,
573 mem, # type: state.Mem
574 exec_opts, # type: optview.Exec
575 mutable_opts, # type: state.MutableOpts
576 tilde_ev, # type: TildeEvaluator
577 splitter, # type: SplitContext
578 errfmt, # type: ui.ErrorFormatter
579 ):
580 # type: (...) -> None
581 self.arith_ev = None # type: sh_expr_eval.ArithEvaluator
582 self.expr_ev = None # type: expr_eval.ExprEvaluator
583 self.prompt_ev = None # type: prompt.Evaluator
584
585 self.unsafe_arith = None # type: sh_expr_eval.UnsafeArith
586
587 self.tilde_ev = tilde_ev
588
589 self.mem = mem # for $HOME, $1, etc.
590 self.exec_opts = exec_opts # for nounset
591 self.mutable_opts = mutable_opts # for _allow_command_sub
592 self.splitter = splitter
593 self.errfmt = errfmt
594
595 self.globber = glob_.Globber(exec_opts)
596
597 def CheckCircularDeps(self):
598 # type: () -> None
599 raise NotImplementedError()
600
601 def _EvalCommandSub(self, cs_part, quoted):
602 # type: (CommandSub, bool) -> part_value_t
603 """Abstract since it has a side effect."""
604 raise NotImplementedError()
605
606 def _EvalProcessSub(self, cs_part):
607 # type: (CommandSub) -> part_value_t
608 """Abstract since it has a side effect."""
609 raise NotImplementedError()
610
611 def _EvalVarNum(self, var_num):
612 # type: (int) -> value_t
613 assert var_num >= 0
614 return self.mem.GetArgNum(var_num)
615
616 def _EvalSpecialVar(self, op_id, quoted, vsub_state):
617 # type: (int, bool, VarSubState) -> value_t
618 """Evaluate $?
619
620 and so forth
621 """
622 # $@ is special -- it need to know whether it is in a double quoted
623 # context.
624 #
625 # - If it's $@ in a double quoted context, return an ARRAY.
626 # - If it's $@ in a normal context, return a STRING, which then will be
627 # subject to splitting.
628
629 if op_id in (Id.VSub_At, Id.VSub_Star):
630 argv = self.mem.GetArgv()
631 val = value.BashArray(argv) # type: value_t
632 if op_id == Id.VSub_At:
633 # "$@" evaluates to an array, $@ should be decayed
634 vsub_state.join_array = not quoted
635 else: # $* "$*" are both decayed
636 vsub_state.join_array = True
637
638 elif op_id == Id.VSub_Hyphen:
639 val = value.Str(_GetDollarHyphen(self.exec_opts))
640
641 else:
642 val = self.mem.GetSpecialVar(op_id)
643
644 return val
645
646 def _ApplyTestOp(
647 self,
648 val, # type: value_t
649 op, # type: suffix_op.Unary
650 quoted, # type: bool
651 part_vals, # type: Optional[List[part_value_t]]
652 vtest_place, # type: VTestPlace
653 blame_token, # type: Token
654 vsub_state, # type: VarSubState
655 ):
656 # type: (...) -> bool
657 """
658 Returns:
659 Whether part_vals was mutated
660
661 ${a:-} returns part_value[]
662 ${a:+} returns part_value[]
663 ${a:?error} returns error word?
664 ${a:=} returns part_value[] but also needs self.mem for side effects.
665
666 So I guess it should return part_value[], and then a flag for raising
667 an error, and then a flag for assigning it?
668 The original BracedVarSub will have the name.
669
670 Example of needing multiple part_value[]
671
672 echo X-${a:-'def'"ault"}-X
673
674 We return two part values from the BracedVarSub. Also consider:
675
676 echo ${a:-x"$@"x}
677 """
678 eval_flags = IS_SUBST
679 if quoted:
680 eval_flags |= QUOTED
681
682 tok = op.op
683 # NOTE: Splicing part_values is necessary because of code like
684 # ${undef:-'a b' c 'd # e'}. Each part_value can have a different
685 # do_glob/do_elide setting.
686 UP_val = val
687 with tagswitch(val) as case:
688 if case(value_e.Undef):
689 is_falsey = True
690
691 elif case(value_e.Str):
692 val = cast(value.Str, UP_val)
693 if tok.id in (Id.VTest_ColonHyphen, Id.VTest_ColonEquals,
694 Id.VTest_ColonQMark, Id.VTest_ColonPlus):
695 is_falsey = len(val.s) == 0
696 else:
697 is_falsey = False
698
699 elif case(value_e.BashArray, value_e.SparseArray,
700 value_e.BashAssoc):
701 if val.tag() == value_e.BashArray:
702 val = cast(value.BashArray, UP_val)
703 strs = bash_impl.BashArray_GetValues(val)
704 elif val.tag() == value_e.SparseArray:
705 val = cast(value.SparseArray, UP_val)
706 strs = bash_impl.SparseArray_GetValues(val)
707 elif val.tag() == value_e.BashAssoc:
708 val = cast(value.BashAssoc, UP_val)
709 strs = bash_impl.BashAssoc_GetValues(val)
710 else:
711 raise AssertionError()
712
713 if tok.id in (Id.VTest_ColonHyphen, Id.VTest_ColonEquals,
714 Id.VTest_ColonQMark, Id.VTest_ColonPlus):
715 # "$*" - the separator is the first character of IFS
716 # $* $@ "$@" - the separator is a space
717 if quoted and vsub_state.join_array:
718 sep_width = len(self.splitter.GetJoinChar())
719 else:
720 sep_width = 1
721
722 # We test whether the joined string will be empty. When
723 # the separator is empty, all the elements need to be
724 # empty. When the separator is non-empty, one element is
725 # allowed at most and needs to be an empty string if any.
726 if sep_width == 0:
727 is_falsey = True
728 for s in strs:
729 if len(s) != 0:
730 is_falsey = False
731 break
732 else:
733 is_falsey = len(strs) == 0 or (len(strs) == 1 and
734 len(strs[0]) == 0)
735 else:
736 # TODO: allow undefined
737 is_falsey = len(strs) == 0
738
739 else:
740 # value.Eggex, etc. are all false
741 is_falsey = False
742
743 if tok.id in (Id.VTest_ColonHyphen, Id.VTest_Hyphen):
744 if is_falsey:
745 self._EvalRhsWordToParts(op.arg_word, part_vals, eval_flags)
746 return True
747 else:
748 return False
749
750 # Inverse of the above.
751 elif tok.id in (Id.VTest_ColonPlus, Id.VTest_Plus):
752 if is_falsey:
753 return False
754 else:
755 self._EvalRhsWordToParts(op.arg_word, part_vals, eval_flags)
756 return True
757
758 # Splice and assign
759 elif tok.id in (Id.VTest_ColonEquals, Id.VTest_Equals):
760 if is_falsey:
761 # Collect new part vals.
762 assign_part_vals = [] # type: List[part_value_t]
763 self._EvalRhsWordToParts(op.arg_word, assign_part_vals,
764 eval_flags)
765 # Append them to out param AND return them.
766 part_vals.extend(assign_part_vals)
767
768 if vtest_place.name is None:
769 # TODO: error context
770 e_die("Can't assign to special variable")
771 else:
772 # NOTE: This decays arrays too! 'shopt -s strict_array' could
773 # avoid it.
774 rhs_str = _DecayPartValuesToString(
775 assign_part_vals, self.splitter.GetJoinChar())
776 if vtest_place.index is None: # using None when no index
777 lval = location.LName(
778 vtest_place.name) # type: sh_lvalue_t
779 else:
780 var_name = vtest_place.name
781 var_index = vtest_place.index
782 UP_var_index = var_index
783
784 with tagswitch(var_index) as case:
785 if case(a_index_e.Int):
786 var_index = cast(a_index.Int, UP_var_index)
787 lval = sh_lvalue.Indexed(
788 var_name, var_index.i, loc.Missing)
789 elif case(a_index_e.Str):
790 var_index = cast(a_index.Str, UP_var_index)
791 lval = sh_lvalue.Keyed(var_name, var_index.s,
792 loc.Missing)
793 else:
794 raise AssertionError()
795
796 state.OshLanguageSetValue(self.mem, lval,
797 value.Str(rhs_str))
798 return True
799
800 else:
801 return False
802
803 elif tok.id in (Id.VTest_ColonQMark, Id.VTest_QMark):
804 if is_falsey:
805 # The arg is the error message
806 error_part_vals = [] # type: List[part_value_t]
807 self._EvalRhsWordToParts(op.arg_word, error_part_vals,
808 eval_flags)
809 error_str = _DecayPartValuesToString(
810 error_part_vals, self.splitter.GetJoinChar())
811
812 #
813 # Display fancy/helpful error
814 #
815 if vtest_place.name is None:
816 var_name = '???'
817 else:
818 var_name = vtest_place.name
819
820 if 0:
821 # This hint is nice, but looks too noisy for now
822 op_str = lexer.LazyStr(tok)
823 if tok.id == Id.VTest_ColonQMark:
824 why = 'empty or unset'
825 else:
826 why = 'unset'
827
828 self.errfmt.Print_(
829 "Hint: operator %s means a variable can't be %s" %
830 (op_str, why), tok)
831
832 if val.tag() == value_e.Undef:
833 actual = 'unset'
834 else:
835 actual = 'empty'
836
837 if len(error_str):
838 suffix = ': %r' % error_str
839 else:
840 suffix = ''
841 e_die("Var %s is %s%s" % (var_name, actual, suffix),
842 blame_token)
843
844 else:
845 return False
846
847 else:
848 raise AssertionError(tok.id)
849
850 def _Count(self, val, token):
851 # type: (value_t, Token) -> int
852 """Returns the length of the value, for ${#var}"""
853 UP_val = val
854 with tagswitch(val) as case:
855 if case(value_e.Str):
856 val = cast(value.Str, UP_val)
857 # NOTE: Whether bash counts bytes or chars is affected by LANG
858 # environment variables.
859 # Should we respect that, or another way to select? set -o
860 # count-bytes?
861
862 # https://stackoverflow.com/questions/17368067/length-of-string-in-bash
863 try:
864 count = string_ops.CountUtf8Chars(val.s)
865 except error.Strict as e:
866 # Add this here so we don't have to add it so far down the stack.
867 # TODO: It's better to show BOTH this CODE an the actual DATA
868 # somehow.
869 e.location = token
870
871 if self.exec_opts.strict_word_eval():
872 raise
873 else:
874 # NOTE: Doesn't make the command exit with 1; it just returns a
875 # length of -1.
876 self.errfmt.PrettyPrintError(e, prefix='warning: ')
877 return -1
878
879 elif case(value_e.BashArray):
880 val = cast(value.BashArray, UP_val)
881 count = bash_impl.BashArray_Count(val)
882
883 elif case(value_e.BashAssoc):
884 val = cast(value.BashAssoc, UP_val)
885 count = bash_impl.BashAssoc_Count(val)
886
887 elif case(value_e.SparseArray):
888 val = cast(value.SparseArray, UP_val)
889 count = bash_impl.SparseArray_Count(val)
890
891 else:
892 raise error.TypeErr(
893 val, "Length op expected Str, BashArray, BashAssoc", token)
894
895 return count
896
897 def _Keys(self, val, token):
898 # type: (value_t, Token) -> value_t
899 """Return keys of a container, for ${!array[@]}"""
900
901 UP_val = val
902 with tagswitch(val) as case:
903 if case(value_e.BashArray):
904 val = cast(value.BashArray, UP_val)
905 indices = [str(i) for i in bash_impl.BashArray_GetKeys(val)]
906 return value.BashArray(indices)
907
908 elif case(value_e.BashAssoc):
909 val = cast(value.BashAssoc, UP_val)
910 assert val.d is not None # for MyPy, so it's not Optional[]
911
912 # BUG: Keys aren't ordered according to insertion!
913 keys = bash_impl.BashAssoc_GetKeys(val)
914 return value.BashArray(keys)
915
916 else:
917 raise error.TypeErr(val, 'Keys op expected Str', token)
918
919 def _EvalVarRef(self, val, blame_tok, quoted, vsub_state, vtest_place):
920 # type: (value_t, Token, bool, VarSubState, VTestPlace) -> value_t
921 """Handles indirect expansion like ${!var} and ${!a[0]}.
922
923 Args:
924 blame_tok: 'foo' for ${!foo}
925 """
926 UP_val = val
927 with tagswitch(val) as case:
928 if case(value_e.Undef):
929 # bash-4.4 returned value.Undef here. bash-5.0 started to treat
930 # the variable name to be empty so that the indirection fails.
931 var_ref_str = ''
932
933 elif case(value_e.Str):
934 val = cast(value.Str, UP_val)
935 var_ref_str = val.s
936
937 elif case(value_e.BashArray): # caught earlier but OK
938 val = cast(value.BashArray, UP_val)
939 # When there are more than one element in the array, this
940 # produces a wrong variable name containing spaces.
941 var_ref_str = ' '.join(bash_impl.BashArray_GetValues(val))
942
943 elif case(value_e.BashAssoc): # caught earlier but OK
944 val = cast(value.BashAssoc, UP_val)
945 var_ref_str = ' '.join(bash_impl.BashAssoc_GetValues(val))
946
947 else:
948 raise error.TypeErr(val, 'Var Ref op expected Str', blame_tok)
949
950 try:
951 bvs_part = self.unsafe_arith.ParseVarRef(var_ref_str, blame_tok)
952 except error.FatalRuntime as e:
953 raise error.VarSubFailure(e.msg, e.location)
954
955 return self._VarRefValue(bvs_part, quoted, vsub_state, vtest_place)
956
957 def _ApplyUnarySuffixOp(self, val, op):
958 # type: (value_t, suffix_op.Unary) -> value_t
959 assert val.tag() != value_e.Undef
960
961 op_kind = consts.GetKind(op.op.id)
962
963 if op_kind == Kind.VOp1:
964 # NOTE: glob syntax is supported in ^ ^^ , ,, ! As well as % %% # ##.
965 # Detect has_extglob so that DoUnarySuffixOp doesn't use the fast
966 # shortcut for constant strings.
967 arg_val, has_extglob = self.EvalWordToPattern(op.arg_word)
968 assert arg_val.tag() == value_e.Str
969
970 UP_val = val
971 with tagswitch(val) as case:
972 if case(value_e.Str):
973 val = cast(value.Str, UP_val)
974 s = string_ops.DoUnarySuffixOp(val.s, op.op, arg_val.s,
975 has_extglob)
976 #log('%r %r -> %r', val.s, arg_val.s, s)
977 new_val = value.Str(s) # type: value_t
978
979 elif case(value_e.BashArray, value_e.SparseArray,
980 value_e.BashAssoc):
981 # get values
982 if val.tag() == value_e.BashArray:
983 val = cast(value.BashArray, UP_val)
984 values = bash_impl.BashArray_GetValues(val)
985 elif val.tag() == value_e.SparseArray:
986 val = cast(value.SparseArray, UP_val)
987 values = bash_impl.SparseArray_GetValues(val)
988 elif val.tag() == value_e.BashAssoc:
989 val = cast(value.BashAssoc, UP_val)
990 values = bash_impl.BashAssoc_GetValues(val)
991 else:
992 raise AssertionError()
993
994 # ${a[@]#prefix} is VECTORIZED on arrays. YSH should have this too.
995 strs = [
996 string_ops.DoUnarySuffixOp(s, op.op, arg_val.s,
997 has_extglob) for s in values
998 ]
999 new_val = value.BashArray(strs)
1000
1001 else:
1002 raise error.TypeErr(
1003 val, 'Unary op expected Str, BashArray, BashAssoc',
1004 op.op)
1005
1006 else:
1007 raise AssertionError(Kind_str(op_kind))
1008
1009 return new_val
1010
1011 def _PatSub(self, val, op):
1012 # type: (value_t, suffix_op.PatSub) -> value_t
1013
1014 pat_val, has_extglob = self.EvalWordToPattern(op.pat)
1015 # Extended globs aren't supported because we only translate * ? etc. to
1016 # ERE. I don't think there's a straightforward translation from !(*.py) to
1017 # ERE! You would need an engine that supports negation? (Derivatives?)
1018 if has_extglob:
1019 e_die('extended globs not supported in ${x//GLOB/}', op.pat)
1020
1021 if op.replace:
1022 replace_val = self.EvalRhsWord(op.replace)
1023 # Can't have an array, so must be a string
1024 assert replace_val.tag() == value_e.Str, replace_val
1025 replace_str = cast(value.Str, replace_val).s
1026 else:
1027 replace_str = ''
1028
1029 # note: doesn't support self.exec_opts.extglob()!
1030 regex, warnings = glob_.GlobToERE(pat_val.s)
1031 if len(warnings):
1032 # TODO:
1033 # - Add 'shopt -s strict_glob' mode and expose warnings.
1034 # "Glob is not in CANONICAL FORM".
1035 # - Propagate location info back to the 'op.pat' word.
1036 pass
1037 #log('regex %r', regex)
1038 replacer = string_ops.GlobReplacer(regex, replace_str, op.slash_tok)
1039
1040 with tagswitch(val) as case2:
1041 if case2(value_e.Str):
1042 str_val = cast(value.Str, val)
1043 s = replacer.Replace(str_val.s, op)
1044 val = value.Str(s)
1045
1046 elif case2(value_e.BashArray, value_e.SparseArray,
1047 value_e.BashAssoc):
1048 if val.tag() == value_e.BashArray:
1049 array_val = cast(value.BashArray, val)
1050 values = bash_impl.BashArray_GetValues(array_val)
1051 elif val.tag() == value_e.SparseArray:
1052 sparse_val = cast(value.SparseArray, val)
1053 values = bash_impl.SparseArray_GetValues(sparse_val)
1054 elif val.tag() == value_e.BashAssoc:
1055 assoc_val = cast(value.BashAssoc, val)
1056 values = bash_impl.BashAssoc_GetValues(assoc_val)
1057 else:
1058 raise AssertionError()
1059 strs = [replacer.Replace(s, op) for s in values]
1060 val = value.BashArray(strs)
1061
1062 else:
1063 raise error.TypeErr(
1064 val, 'Pat Sub op expected Str, BashArray, BashAssoc',
1065 op.slash_tok)
1066
1067 return val
1068
1069 def _Slice(self, val, op, var_name, part):
1070 # type: (value_t, suffix_op.Slice, Optional[str], BracedVarSub) -> value_t
1071
1072 begin = self.arith_ev.EvalToBigInt(op.begin)
1073
1074 # Note: bash allows lengths to be negative (with odd semantics), but
1075 # we don't allow that right now.
1076 has_length = False
1077 length = -1
1078 if op.length:
1079 has_length = True
1080 length = self.arith_ev.EvalToInt(op.length)
1081
1082 try:
1083 arg0_val = None # type: value.Str
1084 if var_name is None: # $* or $@
1085 arg0_val = self.mem.GetArg0()
1086 val = _PerformSlice(val, begin, length, has_length, part, arg0_val)
1087 except error.Strict as e:
1088 if self.exec_opts.strict_word_eval():
1089 raise
1090 else:
1091 self.errfmt.PrettyPrintError(e, prefix='warning: ')
1092 with tagswitch(val) as case2:
1093 if case2(value_e.Str):
1094 val = value.Str('')
1095 elif case2(value_e.BashArray):
1096 val = value.BashArray([])
1097 else:
1098 raise NotImplementedError()
1099 return val
1100
1101 def _Nullary(self, val, op, var_name, vsub_token, vsub_state):
1102 # type: (value_t, Token, Optional[str], Token, VarSubState) -> Tuple[value_t, bool]
1103
1104 quoted2 = False
1105 op_id = op.id
1106 if op_id == Id.VOp0_P:
1107 val = self._ProcessUndef(val, vsub_token, vsub_state)
1108 UP_val = val
1109 with tagswitch(val) as case:
1110 if case(value_e.Undef):
1111 result = value.Str('') # type: value_t
1112 elif case(value_e.Str):
1113 str_val = cast(value.Str, UP_val)
1114 prompt = self.prompt_ev.EvalPrompt(str_val.s)
1115 # readline gets rid of these, so we should too.
1116 p = prompt.replace('\x01', '').replace('\x02', '')
1117 result = value.Str(p)
1118 elif case(value_e.BashArray, value_e.SparseArray,
1119 value_e.BashAssoc):
1120 if val.tag() == value_e.BashArray:
1121 val = cast(value.BashArray, UP_val)
1122 values = [
1123 s for s in bash_impl.BashArray_GetValues(val)
1124 if s is not None
1125 ]
1126 elif val.tag() == value_e.SparseArray:
1127 val = cast(value.SparseArray, UP_val)
1128 values = bash_impl.SparseArray_GetValues(val)
1129 elif val.tag() == value_e.BashAssoc:
1130 val = cast(value.BashAssoc, UP_val)
1131 values = bash_impl.BashAssoc_GetValues(val)
1132 else:
1133 raise AssertionError()
1134
1135 tmp = [
1136 self.prompt_ev.EvalPrompt(s).replace(
1137 '\x01', '').replace('\x02', '') for s in values
1138 ]
1139 result = value.BashArray(tmp)
1140 else:
1141 e_die("Can't use @P on %s" % ui.ValType(val), op)
1142
1143 elif op_id == Id.VOp0_Q:
1144 UP_val = val
1145 with tagswitch(val) as case:
1146 if case(value_e.Undef):
1147 # We need to issue an error when "-o nounset" is enabled.
1148 # Although we do not need to check val for value_e.Undef,
1149 # we call _ProcessUndef for consistency in the error
1150 # message.
1151 self._ProcessUndef(val, vsub_token, vsub_state)
1152
1153 # For unset variables, we do not generate any quoted words.
1154 if vsub_state.array_ref is not None:
1155 result = value.BashArray([])
1156 else:
1157 result = value.Str('')
1158
1159 elif case(value_e.Str):
1160 str_val = cast(value.Str, UP_val)
1161 result = value.Str(j8_lite.MaybeShellEncode(str_val.s))
1162 # oddly, 'echo ${x@Q}' is equivalent to 'echo "${x@Q}"' in
1163 # bash
1164 quoted2 = True
1165 elif case(value_e.BashArray, value_e.SparseArray,
1166 value_e.BashAssoc):
1167 if val.tag() == value_e.BashArray:
1168 val = cast(value.BashArray, UP_val)
1169 values = [
1170 s for s in bash_impl.BashArray_GetValues(val)
1171 if s is not None
1172 ]
1173 elif val.tag() == value_e.SparseArray:
1174 val = cast(value.SparseArray, UP_val)
1175 values = bash_impl.SparseArray_GetValues(val)
1176 elif val.tag() == value_e.BashAssoc:
1177 val = cast(value.BashAssoc, UP_val)
1178 values = bash_impl.BashAssoc_GetValues(val)
1179 else:
1180 raise AssertionError()
1181
1182 tmp = [
1183 # TODO: should use fastfunc.ShellEncode
1184 j8_lite.MaybeShellEncode(s) for s in values
1185 ]
1186 result = value.BashArray(tmp)
1187 else:
1188 e_die("Can't use @Q on %s" % ui.ValType(val), op)
1189
1190 elif op_id == Id.VOp0_a:
1191 val = self._ProcessUndef(val, vsub_token, vsub_state)
1192 UP_val = val
1193 # We're ONLY simluating -a and -A, not -r -x -n for now. See
1194 # spec/ble-idioms.test.sh.
1195 chars = [] # type: List[str]
1196 with tagswitch(vsub_state.h_value) as case:
1197 if case(value_e.BashArray, value_e.SparseArray):
1198 chars.append('a')
1199 elif case(value_e.BashAssoc):
1200 chars.append('A')
1201
1202 if var_name is not None: # e.g. ${?@a} is allowed
1203 cell = self.mem.GetCell(var_name)
1204 if cell:
1205 if cell.readonly:
1206 chars.append('r')
1207 if cell.exported:
1208 chars.append('x')
1209 if cell.nameref:
1210 chars.append('n')
1211
1212 count = 1
1213 with tagswitch(val) as case:
1214 if case(value_e.Undef):
1215 count = 0
1216 elif case(value_e.BashArray):
1217 val = cast(value.BashArray, UP_val)
1218 count = bash_impl.BashArray_Count(val)
1219 elif case(value_e.SparseArray):
1220 val = cast(value.SparseArray, UP_val)
1221 count = bash_impl.SparseArray_Count(val)
1222 elif case(value_e.BashAssoc):
1223 val = cast(value.BashAssoc, UP_val)
1224 count = bash_impl.BashAssoc_Count(val)
1225
1226 result = value.BashArray([''.join(chars)] * count)
1227
1228 else:
1229 e_die('Var op %r not implemented' % lexer.TokenVal(op), op)
1230
1231 return result, quoted2
1232
1233 def _WholeArray(self, val, part, quoted, vsub_state):
1234 # type: (value_t, BracedVarSub, bool, VarSubState) -> value_t
1235 op_id = cast(bracket_op.WholeArray, part.bracket_op).op_id
1236
1237 if op_id == Id.Lit_At:
1238 op_str = '@'
1239 vsub_state.join_array = not quoted # ${a[@]} decays but "${a[@]}" doesn't
1240 elif op_id == Id.Arith_Star:
1241 op_str = '*'
1242 vsub_state.join_array = True # both ${a[*]} and "${a[*]}" decay
1243 else:
1244 raise AssertionError(op_id) # unknown
1245
1246 with tagswitch(val) as case2:
1247 if case2(value_e.Undef):
1248 # For an undefined array, we save the token of the array
1249 # reference for the later error message.
1250 vsub_state.array_ref = part.name_tok
1251 elif case2(value_e.Str):
1252 if self.exec_opts.strict_array():
1253 e_die("Can't index string with %s" % op_str,
1254 loc.WordPart(part))
1255 elif case2(value_e.BashArray, value_e.SparseArray,
1256 value_e.BashAssoc):
1257 pass # no-op
1258 else:
1259 # The other YSH types such as List, Dict, and Float are not
1260 # supported. Error messages will be printed later, so we here
1261 # return the unsupported objects without modification.
1262 pass # no-op
1263
1264 return val
1265
1266 def _ArrayIndex(self, val, part, vtest_place):
1267 # type: (value_t, BracedVarSub, VTestPlace) -> value_t
1268 """Process a numeric array index like ${a[i+1]}"""
1269 anode = cast(bracket_op.ArrayIndex, part.bracket_op).expr
1270
1271 UP_val = val
1272 with tagswitch(val) as case2:
1273 if case2(value_e.Undef):
1274 pass # it will be checked later
1275
1276 elif case2(value_e.Str):
1277 # Bash treats any string as an array, so we can't add our own
1278 # behavior here without making valid OSH invalid bash.
1279 e_die("Can't index string %r with integer" % part.var_name,
1280 part.name_tok)
1281
1282 elif case2(value_e.BashArray):
1283 array_val = cast(value.BashArray, UP_val)
1284 index = self.arith_ev.EvalToInt(anode)
1285 vtest_place.index = a_index.Int(index)
1286
1287 s, error_code = bash_impl.BashArray_GetElement(
1288 array_val, index)
1289 if error_code == error_code_e.IndexOutOfRange:
1290 # Note: Bash outputs warning but does not make it a real
1291 # error. We follow the Bash behavior here.
1292 self.errfmt.Print_(
1293 "Index %d out of bounds for array of length %d" %
1294 (index, bash_impl.BashArray_Length(array_val)),
1295 blame_loc=part.name_tok)
1296
1297 if s is None:
1298 val = value.Undef
1299 else:
1300 val = value.Str(s)
1301
1302 elif case2(value_e.SparseArray):
1303 sparse_val = cast(value.SparseArray, UP_val)
1304 big_index = self.arith_ev.EvalToBigInt(anode)
1305 vtest_place.index = a_index.Int(mops.BigTruncate(big_index))
1306
1307 s, error_code = bash_impl.SparseArray_GetElement(
1308 sparse_val, big_index)
1309 if error_code == error_code_e.IndexOutOfRange:
1310 # Note: Bash outputs warning but does not make it a real
1311 # error. We follow the Bash behavior here.
1312 big_length = bash_impl.SparseArray_Length(sparse_val)
1313 self.errfmt.Print_(
1314 "Index %s out of bounds for array of length %s" %
1315 (mops.ToStr(big_index), mops.ToStr(big_length)),
1316 blame_loc=part.name_tok)
1317
1318 if s is None:
1319 val = value.Undef
1320 else:
1321 val = value.Str(s)
1322
1323 elif case2(value_e.BashAssoc):
1324 assoc_val = cast(value.BashAssoc, UP_val)
1325 # Location could also be attached to bracket_op? But
1326 # arith_expr.VarSub works OK too
1327 key = self.arith_ev.EvalWordToString(
1328 anode, blame_loc=location.TokenForArith(anode))
1329
1330 vtest_place.index = a_index.Str(key) # out param
1331 s = bash_impl.BashAssoc_GetElement(assoc_val, key)
1332
1333 if s is None:
1334 val = value.Undef
1335 else:
1336 val = value.Str(s)
1337
1338 else:
1339 raise error.TypeErr(val,
1340 'Index op expected BashArray, BashAssoc',
1341 loc.WordPart(part))
1342
1343 return val
1344
1345 def _EvalDoubleQuoted(self, parts, part_vals):
1346 # type: (List[word_part_t], List[part_value_t]) -> None
1347 """Evaluate parts of a DoubleQuoted part.
1348
1349 Args:
1350 part_vals: output param to append to.
1351 """
1352 # Example of returning array:
1353 # $ a=(1 2); b=(3); $ c=(4 5)
1354 # $ argv "${a[@]}${b[@]}${c[@]}"
1355 # ['1', '234', '5']
1356 #
1357 # Example of multiple parts
1358 # $ argv "${a[@]}${undef[@]:-${c[@]}}"
1359 # ['1', '24', '5']
1360
1361 # Special case for "". The parser outputs (DoubleQuoted []), instead
1362 # of (DoubleQuoted [Literal '']). This is better but it means we
1363 # have to check for it.
1364 if len(parts) == 0:
1365 v = Piece('', True, False)
1366 part_vals.append(v)
1367 return
1368
1369 for p in parts:
1370 self._EvalWordPart(p, part_vals, QUOTED)
1371
1372 def EvalDoubleQuotedToString(self, dq_part):
1373 # type: (DoubleQuoted) -> str
1374 """For double quoted strings in YSH expressions.
1375
1376 Example: var x = "$foo-${foo}"
1377 """
1378 part_vals = [] # type: List[part_value_t]
1379 self._EvalDoubleQuoted(dq_part.parts, part_vals)
1380 return self._ConcatPartVals(part_vals, dq_part.left)
1381
1382 def _DecayArray(self, val):
1383 # type: (value.BashArray) -> value.Str
1384 """Decay $* to a string."""
1385 assert val.tag() == value_e.BashArray, val
1386 sep = self.splitter.GetJoinChar()
1387 tmp = [s for s in bash_impl.BashArray_GetValues(val) if s is not None]
1388 return value.Str(sep.join(tmp))
1389
1390 def _ProcessUndef(self, val, name_tok, vsub_state):
1391 # type: (value_t, Token, VarSubState) -> value_t
1392 assert name_tok is not None
1393
1394 if val.tag() != value_e.Undef:
1395 return val
1396
1397 if vsub_state.array_ref is not None:
1398 array_tok = vsub_state.array_ref
1399 if self.exec_opts.nounset():
1400 e_die('Undefined array %r' % lexer.TokenVal(array_tok),
1401 array_tok)
1402 else:
1403 return value.BashArray([])
1404 else:
1405 if self.exec_opts.nounset():
1406 tok_str = lexer.TokenVal(name_tok)
1407 name = tok_str[1:] if tok_str.startswith('$') else tok_str
1408 e_die('Undefined variable %r' % name, name_tok)
1409 else:
1410 return value.Str('')
1411
1412 def _EvalBracketOp(self, val, part, quoted, vsub_state, vtest_place):
1413 # type: (value_t, BracedVarSub, bool, VarSubState, VTestPlace) -> value_t
1414
1415 if part.bracket_op:
1416 with tagswitch(part.bracket_op) as case:
1417 if case(bracket_op_e.WholeArray):
1418 val = self._WholeArray(val, part, quoted, vsub_state)
1419
1420 elif case(bracket_op_e.ArrayIndex):
1421 val = self._ArrayIndex(val, part, vtest_place)
1422
1423 else:
1424 raise AssertionError(part.bracket_op.tag())
1425
1426 else: # no bracket op
1427 var_name = vtest_place.name
1428 if (var_name is not None and
1429 val.tag() in (value_e.BashArray, value_e.SparseArray,
1430 value_e.BashAssoc)):
1431 if ShouldArrayDecay(var_name, self.exec_opts,
1432 not (part.prefix_op or part.suffix_op)):
1433 # for ${BASH_SOURCE}, etc.
1434 val = DecayArray(val)
1435 else:
1436 e_die(
1437 "Array %r can't be referred to as a scalar (without @ or *)"
1438 % var_name, loc.WordPart(part))
1439
1440 return val
1441
1442 def _VarRefValue(self, part, quoted, vsub_state, vtest_place):
1443 # type: (BracedVarSub, bool, VarSubState, VTestPlace) -> value_t
1444 """Duplicates some logic from _EvalBracedVarSub, but returns a
1445 value_t."""
1446
1447 # 1. Evaluate from (var_name, var_num, token Id) -> value
1448 if part.name_tok.id == Id.VSub_Name:
1449 vtest_place.name = part.var_name
1450 val = self.mem.GetValue(part.var_name)
1451
1452 elif part.name_tok.id == Id.VSub_Number:
1453 var_num = int(part.var_name)
1454 val = self._EvalVarNum(var_num)
1455
1456 else:
1457 # $* decays
1458 val = self._EvalSpecialVar(part.name_tok.id, quoted, vsub_state)
1459
1460 # update h-value (i.e., the holder of the current value)
1461 vsub_state.h_value = val
1462
1463 # We don't need var_index because it's only for L-Values of test ops?
1464 if self.exec_opts.eval_unsafe_arith():
1465 val = self._EvalBracketOp(val, part, quoted, vsub_state,
1466 vtest_place)
1467 else:
1468 with state.ctx_Option(self.mutable_opts,
1469 [option_i._allow_command_sub], False):
1470 val = self._EvalBracketOp(val, part, quoted, vsub_state,
1471 vtest_place)
1472
1473 return val
1474
1475 def _EvalBracedVarSub(self, part, part_vals, quoted):
1476 # type: (BracedVarSub, List[part_value_t], bool) -> None
1477 """
1478 Args:
1479 part_vals: output param to append to.
1480 """
1481 # We have different operators that interact in a non-obvious order.
1482 #
1483 # 1. bracket_op: value -> value, with side effect on vsub_state
1484 #
1485 # 2. prefix_op
1486 # a. length ${#x}: value -> value
1487 # b. var ref ${!ref}: can expand to an array
1488 #
1489 # 3. suffix_op:
1490 # a. no operator: you have a value
1491 # b. Test: value -> part_value[]
1492 # c. Other Suffix: value -> value
1493 #
1494 # 4. Process vsub_state.join_array here before returning.
1495 #
1496 # These cases are hard to distinguish:
1497 # - ${!prefix@} prefix query
1498 # - ${!array[@]} keys
1499 # - ${!ref} named reference
1500 # - ${!ref[0]} named reference
1501 #
1502 # I think we need several stages:
1503 #
1504 # 1. value: name, number, special, prefix query
1505 # 2. bracket_op
1506 # 3. prefix length -- this is TERMINAL
1507 # 4. indirection? Only for some of the ! cases
1508 # 5. string transformation suffix ops like ##
1509 # 6. test op
1510 # 7. vsub_state.join_array
1511
1512 # vsub_state.join_array is for joining "${a[*]}" and unquoted ${a[@]} AFTER
1513 # suffix ops are applied. If we take the length with a prefix op, the
1514 # distinction is ignored.
1515
1516 var_name = None # type: Optional[str] # used throughout the function
1517 vtest_place = VTestPlace(var_name, None) # For ${foo=default}
1518 vsub_state = VarSubState.CreateNull() # for $*, ${a[*]}, etc.
1519
1520 # 1. Evaluate from (var_name, var_num, token Id) -> value
1521 if part.name_tok.id == Id.VSub_Name:
1522 # Handle ${!prefix@} first, since that looks at names and not values
1523 # Do NOT handle ${!A[@]@a} here!
1524 if (part.prefix_op is not None and part.bracket_op is None and
1525 part.suffix_op is not None and
1526 part.suffix_op.tag() == suffix_op_e.Nullary):
1527 nullary_op = cast(Token, part.suffix_op)
1528 # ${!x@} but not ${!x@P}
1529 if consts.GetKind(nullary_op.id) == Kind.VOp3:
1530 names = self.mem.VarNamesStartingWith(part.var_name)
1531 names.sort()
1532
1533 if quoted and nullary_op.id == Id.VOp3_At:
1534 part_vals.append(part_value.Array(names))
1535 else:
1536 sep = self.splitter.GetJoinChar()
1537 part_vals.append(Piece(sep.join(names), quoted, True))
1538 return # EARLY RETURN
1539
1540 var_name = part.var_name
1541 vtest_place.name = var_name # for _ApplyTestOp
1542
1543 val = self.mem.GetValue(var_name)
1544
1545 elif part.name_tok.id == Id.VSub_Number:
1546 var_num = int(part.var_name)
1547 val = self._EvalVarNum(var_num)
1548 else:
1549 # $* decays
1550 val = self._EvalSpecialVar(part.name_tok.id, quoted, vsub_state)
1551
1552 suffix_op_ = part.suffix_op
1553 if suffix_op_:
1554 UP_op = suffix_op_
1555 vsub_state.h_value = val
1556
1557 # 2. Bracket Op
1558 val = self._EvalBracketOp(val, part, quoted, vsub_state, vtest_place)
1559
1560 if part.prefix_op:
1561 if part.prefix_op.id == Id.VSub_Pound: # ${#var} for length
1562 # undef -> '' BEFORE length
1563 val = self._ProcessUndef(val, part.name_tok, vsub_state)
1564
1565 n = self._Count(val, part.name_tok)
1566 part_vals.append(Piece(str(n), quoted, False))
1567 return # EARLY EXIT: nothing else can come after length
1568
1569 elif part.prefix_op.id == Id.VSub_Bang:
1570 if (part.bracket_op and
1571 part.bracket_op.tag() == bracket_op_e.WholeArray and
1572 not suffix_op_):
1573 # undef -> empty array
1574 val = self._ProcessUndef(val, part.name_tok, vsub_state)
1575
1576 # ${!array[@]} to get indices/keys
1577 val = self._Keys(val, part.name_tok)
1578 # already set vsub_State.join_array ABOVE
1579 else:
1580 # Process ${!ref}. SURPRISE: ${!a[0]} is an indirect expansion unlike
1581 # ${!a[@]} !
1582 # ${!ref} can expand into an array if ref='array[@]'
1583
1584 # Clear it now that we have a var ref
1585 vtest_place.name = None
1586 vtest_place.index = None
1587
1588 val = self._EvalVarRef(val, part.name_tok, quoted,
1589 vsub_state, vtest_place)
1590
1591 else:
1592 raise AssertionError(part.prefix_op)
1593
1594 quoted2 = False # another bit for @Q
1595 if suffix_op_:
1596 op = suffix_op_ # could get rid of this alias
1597
1598 with tagswitch(suffix_op_) as case:
1599 if case(suffix_op_e.Nullary):
1600 op = cast(Token, UP_op)
1601 val, quoted2 = self._Nullary(val, op, var_name,
1602 part.name_tok, vsub_state)
1603
1604 elif case(suffix_op_e.Unary):
1605 op = cast(suffix_op.Unary, UP_op)
1606 if consts.GetKind(op.op.id) == Kind.VTest:
1607 # Note: _ProcessUndef (i.e., the conversion of undef ->
1608 # '') is not applied to the VTest operators such as
1609 # ${a:-def}, ${a+set}, etc.
1610 if self._ApplyTestOp(val, op, quoted, part_vals,
1611 vtest_place, part.name_tok,
1612 vsub_state):
1613 # e.g. to evaluate ${undef:-'default'}, we already appended
1614 # what we need
1615 return
1616
1617 else:
1618 # Other suffix: value -> value
1619 val = self._ProcessUndef(val, part.name_tok,
1620 vsub_state)
1621 val = self._ApplyUnarySuffixOp(val, op)
1622
1623 elif case(suffix_op_e.PatSub): # PatSub, vectorized
1624 op = cast(suffix_op.PatSub, UP_op)
1625 val = self._ProcessUndef(val, part.name_tok, vsub_state)
1626 val = self._PatSub(val, op)
1627
1628 elif case(suffix_op_e.Slice):
1629 op = cast(suffix_op.Slice, UP_op)
1630 val = self._ProcessUndef(val, part.name_tok, vsub_state)
1631 val = self._Slice(val, op, var_name, part)
1632
1633 elif case(suffix_op_e.Static):
1634 op = cast(suffix_op.Static, UP_op)
1635 e_die('Not implemented', op.tok)
1636
1637 else:
1638 raise AssertionError()
1639 else:
1640 val = self._ProcessUndef(val, part.name_tok, vsub_state)
1641
1642 # After applying suffixes, process join_array here.
1643 UP_val = val
1644 if val.tag() == value_e.BashArray:
1645 array_val = cast(value.BashArray, UP_val)
1646 if vsub_state.join_array:
1647 val = self._DecayArray(array_val)
1648 else:
1649 val = array_val
1650
1651 # For example, ${a} evaluates to value.Str(), but we want a
1652 # Piece().
1653 part_val = _ValueToPartValue(val, quoted or quoted2, part)
1654 part_vals.append(part_val)
1655
1656 def _ConcatPartVals(self, part_vals, location):
1657 # type: (List[part_value_t], loc_t) -> str
1658
1659 strs = [] # type: List[str]
1660 for part_val in part_vals:
1661 UP_part_val = part_val
1662 with tagswitch(part_val) as case:
1663 if case(part_value_e.String):
1664 part_val = cast(Piece, UP_part_val)
1665 s = part_val.s
1666
1667 elif case(part_value_e.Array):
1668 part_val = cast(part_value.Array, UP_part_val)
1669 if self.exec_opts.strict_array():
1670 # Examples: echo f > "$@"; local foo="$@"
1671 e_die("Illegal array word part (strict_array)",
1672 location)
1673 else:
1674 # It appears to not respect IFS
1675 # TODO: eliminate double join()?
1676 tmp = [s for s in part_val.strs if s is not None]
1677 s = ' '.join(tmp)
1678
1679 else:
1680 raise AssertionError()
1681
1682 strs.append(s)
1683
1684 return ''.join(strs)
1685
1686 def EvalBracedVarSubToString(self, part):
1687 # type: (BracedVarSub) -> str
1688 """For double quoted strings in YSH expressions.
1689
1690 Example: var x = "$foo-${foo}"
1691 """
1692 part_vals = [] # type: List[part_value_t]
1693 self._EvalBracedVarSub(part, part_vals, False)
1694 # blame ${ location
1695 return self._ConcatPartVals(part_vals, part.left)
1696
1697 def _EvalSimpleVarSub(self, part, part_vals, quoted):
1698 # type: (SimpleVarSub, List[part_value_t], bool) -> None
1699
1700 token = part.tok
1701
1702 vsub_state = VarSubState.CreateNull()
1703
1704 # 1. Evaluate from (var_name, var_num, Token) -> defined, value
1705 if token.id == Id.VSub_DollarName:
1706 var_name = lexer.LazyStr(token)
1707 # TODO: Special case for LINENO
1708 val = self.mem.GetValue(var_name)
1709 if val.tag() in (value_e.BashArray, value_e.SparseArray,
1710 value_e.BashAssoc):
1711 if ShouldArrayDecay(var_name, self.exec_opts):
1712 # for $BASH_SOURCE, etc.
1713 val = DecayArray(val)
1714 else:
1715 e_die(
1716 "Array %r can't be referred to as a scalar (without @ or *)"
1717 % var_name, token)
1718
1719 elif token.id == Id.VSub_Number:
1720 var_num = int(lexer.LazyStr(token))
1721 val = self._EvalVarNum(var_num)
1722
1723 else:
1724 val = self._EvalSpecialVar(token.id, quoted, vsub_state)
1725
1726 #log('SIMPLE %s', part)
1727 val = self._ProcessUndef(val, token, vsub_state)
1728 UP_val = val
1729 if val.tag() == value_e.BashArray:
1730 array_val = cast(value.BashArray, UP_val)
1731 if vsub_state.join_array:
1732 val = self._DecayArray(array_val)
1733 else:
1734 val = array_val
1735
1736 v = _ValueToPartValue(val, quoted, part)
1737 part_vals.append(v)
1738
1739 def EvalSimpleVarSubToString(self, node):
1740 # type: (SimpleVarSub) -> str
1741 """For double quoted strings in YSH expressions.
1742
1743 Example: var x = "$foo-${foo}"
1744 """
1745 part_vals = [] # type: List[part_value_t]
1746 self._EvalSimpleVarSub(node, part_vals, False)
1747 return self._ConcatPartVals(part_vals, node.tok)
1748
1749 def _EvalExtGlob(self, part, part_vals):
1750 # type: (word_part.ExtGlob, List[part_value_t]) -> None
1751 """Evaluate @($x|'foo'|$(hostname)) and flatten it."""
1752 op = part.op
1753 if op.id == Id.ExtGlob_Comma:
1754 op_str = '@('
1755 else:
1756 op_str = lexer.LazyStr(op)
1757 # Do NOT split these.
1758 part_vals.append(Piece(op_str, False, False))
1759
1760 for i, w in enumerate(part.arms):
1761 if i != 0:
1762 part_vals.append(Piece('|', False, False)) # separator
1763 # FLATTEN the tree of extglob "arms".
1764 self._EvalWordToParts(w, part_vals, EXTGLOB_NESTED)
1765 part_vals.append(Piece(')', False, False)) # closing )
1766
1767 def _TranslateExtGlob(self, part_vals, w, glob_parts, fnmatch_parts):
1768 # type: (List[part_value_t], CompoundWord, List[str], List[str]) -> None
1769 """Translate a flattened WORD with an ExtGlob part to string patterns.
1770
1771 We need both glob and fnmatch patterns. _EvalExtGlob does the
1772 flattening.
1773 """
1774 for i, part_val in enumerate(part_vals):
1775 UP_part_val = part_val
1776 with tagswitch(part_val) as case:
1777 if case(part_value_e.String):
1778 part_val = cast(Piece, UP_part_val)
1779 if part_val.quoted and not self.exec_opts.noglob():
1780 s = glob_.GlobEscape(part_val.s)
1781 else:
1782 # e.g. the @( and | in @(foo|bar) aren't quoted
1783 s = part_val.s
1784 glob_parts.append(s)
1785 fnmatch_parts.append(s) # from _EvalExtGlob()
1786
1787 elif case(part_value_e.Array):
1788 # Disallow array
1789 e_die(
1790 "Extended globs and arrays can't appear in the same word",
1791 w)
1792
1793 elif case(part_value_e.ExtGlob):
1794 part_val = cast(part_value.ExtGlob, UP_part_val)
1795 # keep appending fnmatch_parts, but repplace glob_parts with '*'
1796 self._TranslateExtGlob(part_val.part_vals, w, [],
1797 fnmatch_parts)
1798 glob_parts.append('*')
1799
1800 else:
1801 raise AssertionError()
1802
1803 def _EvalWordPart(self, part, part_vals, flags):
1804 # type: (word_part_t, List[part_value_t], int) -> None
1805 """Evaluate a word part, appending to part_vals
1806
1807 Called by _EvalWordToParts, EvalWordToString, and _EvalDoubleQuoted.
1808 """
1809 quoted = bool(flags & QUOTED)
1810 is_subst = bool(flags & IS_SUBST)
1811
1812 UP_part = part
1813 with tagswitch(part) as case:
1814 if case(word_part_e.ShArrayLiteral):
1815 part = cast(ShArrayLiteral, UP_part)
1816 e_die("Unexpected array literal", loc.WordPart(part))
1817 elif case(word_part_e.BashAssocLiteral):
1818 part = cast(word_part.BashAssocLiteral, UP_part)
1819 e_die("Unexpected associative array literal",
1820 loc.WordPart(part))
1821
1822 elif case(word_part_e.Literal):
1823 part = cast(Token, UP_part)
1824 # Split if it's in a substitution.
1825 # That is: echo is not split, but ${foo:-echo} is split
1826 v = Piece(lexer.LazyStr(part), quoted, is_subst)
1827 part_vals.append(v)
1828
1829 elif case(word_part_e.EscapedLiteral):
1830 part = cast(word_part.EscapedLiteral, UP_part)
1831 v = Piece(part.ch, True, False)
1832 part_vals.append(v)
1833
1834 elif case(word_part_e.SingleQuoted):
1835 part = cast(SingleQuoted, UP_part)
1836 v = Piece(part.sval, True, False)
1837 part_vals.append(v)
1838
1839 elif case(word_part_e.DoubleQuoted):
1840 part = cast(DoubleQuoted, UP_part)
1841 self._EvalDoubleQuoted(part.parts, part_vals)
1842
1843 elif case(word_part_e.CommandSub):
1844 part = cast(CommandSub, UP_part)
1845 id_ = part.left_token.id
1846 if id_ in (Id.Left_DollarParen, Id.Left_AtParen,
1847 Id.Left_Backtick):
1848 sv = self._EvalCommandSub(part,
1849 quoted) # type: part_value_t
1850
1851 elif id_ in (Id.Left_ProcSubIn, Id.Left_ProcSubOut):
1852 sv = self._EvalProcessSub(part)
1853
1854 else:
1855 raise AssertionError(id_)
1856
1857 part_vals.append(sv)
1858
1859 elif case(word_part_e.SimpleVarSub):
1860 part = cast(SimpleVarSub, UP_part)
1861 self._EvalSimpleVarSub(part, part_vals, quoted)
1862
1863 elif case(word_part_e.BracedVarSub):
1864 part = cast(BracedVarSub, UP_part)
1865 self._EvalBracedVarSub(part, part_vals, quoted)
1866
1867 elif case(word_part_e.TildeSub):
1868 part = cast(word_part.TildeSub, UP_part)
1869 # We never parse a quoted string into a TildeSub.
1870 assert not quoted
1871 s = self.tilde_ev.Eval(part)
1872 v = Piece(s, True, False) # NOT split even when unquoted!
1873 part_vals.append(v)
1874
1875 elif case(word_part_e.ArithSub):
1876 part = cast(word_part.ArithSub, UP_part)
1877 num = self.arith_ev.EvalToBigInt(part.anode)
1878 v = Piece(mops.ToStr(num), quoted, not quoted)
1879 part_vals.append(v)
1880
1881 elif case(word_part_e.ExtGlob):
1882 part = cast(word_part.ExtGlob, UP_part)
1883 #if not self.exec_opts.extglob():
1884 # die() # disallow at runtime? Don't just decay
1885
1886 # Create a node to hold the flattened tree. The caller decides whether
1887 # to pass it to fnmatch() or replace it with '*' and pass it to glob().
1888 part_vals2 = [] # type: List[part_value_t]
1889 self._EvalExtGlob(part, part_vals2) # flattens tree
1890 part_vals.append(part_value.ExtGlob(part_vals2))
1891
1892 elif case(word_part_e.BashRegexGroup):
1893 part = cast(word_part.BashRegexGroup, UP_part)
1894
1895 part_vals.append(Piece('(', False, False)) # not quoted
1896 if part.child:
1897 self._EvalWordToParts(part.child, part_vals, 0)
1898 part_vals.append(Piece(')', False, False))
1899
1900 elif case(word_part_e.Splice):
1901 part = cast(word_part.Splice, UP_part)
1902 val = self.mem.GetValue(part.var_name)
1903
1904 strs = self.expr_ev.SpliceValue(val, part)
1905 part_vals.append(part_value.Array(strs))
1906
1907 elif case(word_part_e.ExprSub):
1908 part = cast(word_part.ExprSub, UP_part)
1909 part_val = self.expr_ev.EvalExprSub(part)
1910 part_vals.append(part_val)
1911
1912 elif case(word_part_e.ZshVarSub):
1913 part = cast(word_part.ZshVarSub, UP_part)
1914 e_die("ZSH var subs are parsed, but can't be evaluated",
1915 part.left)
1916
1917 else:
1918 raise AssertionError(part.tag())
1919
1920 def _EvalRhsWordToParts(self, w, part_vals, eval_flags=0):
1921 # type: (rhs_word_t, List[part_value_t], int) -> None
1922 quoted = bool(eval_flags & QUOTED)
1923
1924 UP_w = w
1925 with tagswitch(w) as case:
1926 if case(rhs_word_e.Empty):
1927 part_vals.append(Piece('', quoted, not quoted))
1928
1929 elif case(rhs_word_e.Compound):
1930 w = cast(CompoundWord, UP_w)
1931 self._EvalWordToParts(w, part_vals, eval_flags=eval_flags)
1932
1933 else:
1934 raise AssertionError()
1935
1936 def _EvalWordToParts(self, w, part_vals, eval_flags=0):
1937 # type: (CompoundWord, List[part_value_t], int) -> None
1938 """Helper for EvalRhsWord, EvalWordSequence, etc.
1939
1940 Returns:
1941 Appends to part_vals. Note that this is a TREE.
1942 """
1943 # Does the word have an extended glob? This is a special case because
1944 # of the way we use glob() and then fnmatch(..., FNM_EXTMATCH) to
1945 # implement extended globs. It's hard to carry that extra information
1946 # all the way past the word splitting stage.
1947
1948 # OSH semantic limitations: If a word has an extended glob part, then
1949 # 1. It can't have an array
1950 # 2. Word splitting of unquoted words isn't respected
1951
1952 word_part_vals = [] # type: List[part_value_t]
1953 has_extglob = False
1954 for p in w.parts:
1955 if p.tag() == word_part_e.ExtGlob:
1956 has_extglob = True
1957 self._EvalWordPart(p, word_part_vals, eval_flags)
1958
1959 # Caller REQUESTED extglob evaluation, AND we parsed word_part.ExtGlob()
1960 if has_extglob:
1961 if bool(eval_flags & EXTGLOB_FILES):
1962 # Treat the WHOLE word as a pattern. We need to TWO VARIANTS of the
1963 # word because of the way we use libc:
1964 # 1. With '*' for extglob parts
1965 # 2. With _EvalExtGlob() for extglob parts
1966
1967 glob_parts = [] # type: List[str]
1968 fnmatch_parts = [] # type: List[str]
1969 self._TranslateExtGlob(word_part_vals, w, glob_parts,
1970 fnmatch_parts)
1971
1972 #log('word_part_vals %s', word_part_vals)
1973 glob_pat = ''.join(glob_parts)
1974 fnmatch_pat = ''.join(fnmatch_parts)
1975 #log("glob %s fnmatch %s", glob_pat, fnmatch_pat)
1976
1977 results = [] # type: List[str]
1978 n = self.globber.ExpandExtended(glob_pat, fnmatch_pat, results)
1979 if n < 0:
1980 raise error.FailGlob(
1981 'Extended glob %r matched no files' % fnmatch_pat, w)
1982
1983 part_vals.append(part_value.Array(results))
1984 elif bool(eval_flags & EXTGLOB_NESTED):
1985 # We only glob at the TOP level of @(nested|@(pattern))
1986 part_vals.extend(word_part_vals)
1987 else:
1988 # e.g. simple_word_eval, assignment builtin
1989 e_die('Extended glob not allowed in this word', w)
1990 else:
1991 part_vals.extend(word_part_vals)
1992
1993 def _PartValsToString(self, part_vals, w, eval_flags, strs):
1994 # type: (List[part_value_t], CompoundWord, int, List[str]) -> None
1995 """Helper for EvalWordToString, similar to _ConcatPartVals() above.
1996
1997 Note: arg 'w' could just be a span ID
1998 """
1999 for part_val in part_vals:
2000 UP_part_val = part_val
2001 with tagswitch(part_val) as case:
2002 if case(part_value_e.String):
2003 part_val = cast(Piece, UP_part_val)
2004 s = part_val.s
2005 if part_val.quoted:
2006 if eval_flags & QUOTE_FNMATCH:
2007 # [[ foo == */"*".py ]] or case (*.py) or ${x%*.py} or ${x//*.py/}
2008 s = glob_.GlobEscape(s)
2009 elif eval_flags & QUOTE_ERE:
2010 s = glob_.ExtendedRegexEscape(s)
2011 strs.append(s)
2012
2013 elif case(part_value_e.Array):
2014 part_val = cast(part_value.Array, UP_part_val)
2015 if self.exec_opts.strict_array():
2016 # Examples: echo f > "$@"; local foo="$@"
2017
2018 # TODO: This attributes too coarsely, to the word rather than the
2019 # parts. Problem: the word is a TREE of parts, but we only have a
2020 # flat list of part_vals. The only case where we really get arrays
2021 # is "$@", "${a[@]}", "${a[@]//pat/replace}", etc.
2022 e_die(
2023 "This word should yield a string, but it contains an array",
2024 w)
2025
2026 # TODO: Maybe add detail like this.
2027 #e_die('RHS of assignment should only have strings. '
2028 # 'To assign arrays, use b=( "${a[@]}" )')
2029 else:
2030 # It appears to not respect IFS
2031 tmp = [s for s in part_val.strs if s is not None]
2032 s = ' '.join(tmp) # TODO: eliminate double join()?
2033 strs.append(s)
2034
2035 elif case(part_value_e.ExtGlob):
2036 part_val = cast(part_value.ExtGlob, UP_part_val)
2037
2038 # Extended globs are only allowed where we expect them!
2039 if not bool(eval_flags & QUOTE_FNMATCH):
2040 e_die('extended glob not allowed in this word', w)
2041
2042 # recursive call
2043 self._PartValsToString(part_val.part_vals, w, eval_flags,
2044 strs)
2045
2046 else:
2047 raise AssertionError()
2048
2049 def EvalWordToString(self, UP_w, eval_flags=0):
2050 # type: (word_t, int) -> value.Str
2051 """Given a word, return a string.
2052
2053 Flags can contain a quoting algorithm.
2054 """
2055 assert UP_w.tag() == word_e.Compound, UP_w
2056 w = cast(CompoundWord, UP_w)
2057
2058 if eval_flags == 0: # QUOTE_FNMATCH etc. breaks optimization
2059 fast_str = word_.FastStrEval(w)
2060 if fast_str is not None:
2061 return value.Str(fast_str)
2062
2063 # Could we additionally optimize a=$b, if we know $b isn't an array
2064 # etc.?
2065
2066 # Note: these empty lists are hot in fib benchmark
2067
2068 part_vals = [] # type: List[part_value_t]
2069 for p in w.parts:
2070 # this doesn't use eval_flags, which is slightly confusing
2071 self._EvalWordPart(p, part_vals, 0)
2072
2073 strs = [] # type: List[str]
2074 self._PartValsToString(part_vals, w, eval_flags, strs)
2075 return value.Str(''.join(strs))
2076
2077 def EvalWordToPattern(self, UP_w):
2078 # type: (rhs_word_t) -> Tuple[value.Str, bool]
2079 """Like EvalWordToString, but returns whether we got ExtGlob."""
2080 if UP_w.tag() == rhs_word_e.Empty:
2081 return value.Str(''), False
2082
2083 assert UP_w.tag() == rhs_word_e.Compound, UP_w
2084 w = cast(CompoundWord, UP_w)
2085
2086 has_extglob = False
2087 part_vals = [] # type: List[part_value_t]
2088 for p in w.parts:
2089 # this doesn't use eval_flags, which is slightly confusing
2090 self._EvalWordPart(p, part_vals, 0)
2091 if p.tag() == word_part_e.ExtGlob:
2092 has_extglob = True
2093
2094 strs = [] # type: List[str]
2095 self._PartValsToString(part_vals, w, QUOTE_FNMATCH, strs)
2096 return value.Str(''.join(strs)), has_extglob
2097
2098 def EvalForPlugin(self, w):
2099 # type: (CompoundWord) -> value.Str
2100 """Wrapper around EvalWordToString that prevents errors.
2101
2102 Runtime errors like $(( 1 / 0 )) and mutating $? like $(exit 42)
2103 are handled here.
2104
2105 Similar to ExprEvaluator.PluginCall().
2106 """
2107 with state.ctx_Registers(self.mem): # to "sandbox" $? and $PIPESTATUS
2108 try:
2109 val = self.EvalWordToString(w)
2110 except error.FatalRuntime as e:
2111 val = value.Str('<Runtime error: %s>' % e.UserErrorString())
2112
2113 except (IOError, OSError) as e:
2114 val = value.Str('<I/O error: %s>' % pyutil.strerror(e))
2115
2116 except KeyboardInterrupt:
2117 val = value.Str('<Ctrl-C>')
2118
2119 return val
2120
2121 def EvalRhsWord(self, UP_w):
2122 # type: (rhs_word_t) -> value_t
2123 """Used for RHS of assignment.
2124
2125 There is no splitting.
2126 """
2127 if UP_w.tag() == rhs_word_e.Empty:
2128 return value.Str('')
2129
2130 assert UP_w.tag() == word_e.Compound, UP_w
2131 w = cast(CompoundWord, UP_w)
2132
2133 if len(w.parts) == 1:
2134 part0 = w.parts[0]
2135 UP_part0 = part0
2136 tag = part0.tag()
2137 # Special case for a=(1 2). ShArrayLiteral won't appear in words that
2138 # don't look like assignments.
2139 if tag == word_part_e.ShArrayLiteral:
2140 part0 = cast(ShArrayLiteral, UP_part0)
2141 array_words = part0.words
2142 words = braces.BraceExpandWords(array_words)
2143 strs = self.EvalWordSequence(words)
2144 return value.BashArray(strs)
2145
2146 if tag == word_part_e.BashAssocLiteral:
2147 part0 = cast(word_part.BashAssocLiteral, UP_part0)
2148 d = NewDict() # type: Dict[str, str]
2149 for pair in part0.pairs:
2150 k = self.EvalWordToString(pair.key)
2151 v = self.EvalWordToString(pair.value)
2152 d[k.s] = v.s
2153 return value.BashAssoc(d)
2154
2155 # If RHS doesn't look like a=( ... ), then it must be a string.
2156 return self.EvalWordToString(w)
2157
2158 def _EvalWordFrame(self, frame, argv):
2159 # type: (List[Piece], List[str]) -> None
2160 all_empty = True
2161 all_quoted = True
2162 any_quoted = False
2163
2164 #log('--- frame %s', frame)
2165
2166 for piece in frame:
2167 if len(piece.s):
2168 all_empty = False
2169
2170 if piece.quoted:
2171 any_quoted = True
2172 else:
2173 all_quoted = False
2174
2175 # Elision of ${empty}${empty} but not $empty"$empty" or $empty""
2176 if all_empty and not any_quoted:
2177 return
2178
2179 # If every frag is quoted, e.g. "$a$b" or any part in "${a[@]}"x, then
2180 # don't do word splitting or globbing.
2181 if all_quoted:
2182 tmp = [piece.s for piece in frame]
2183 a = ''.join(tmp)
2184 argv.append(a)
2185 return
2186
2187 will_glob = not self.exec_opts.noglob()
2188
2189 if 0:
2190 log('---')
2191 log('FRAME')
2192 for i, piece in enumerate(frame):
2193 log('(%d) %s', i, piece)
2194 log('')
2195
2196 # Array of strings, some of which are BOTH IFS-escaped and GLOB escaped!
2197 frags = [] # type: List[str]
2198 for piece in frame:
2199 if will_glob and piece.quoted:
2200 frag = glob_.GlobEscape(piece.s)
2201 else:
2202 # If we have a literal \, then we turn it into \\\\.
2203 # Splitting takes \\\\ -> \\
2204 # Globbing takes \\ to \ if it doesn't match
2205 frag = _BackslashEscape(piece.s)
2206
2207 if piece.do_split:
2208 frag = _BackslashEscape(frag)
2209 else:
2210 frag = self.splitter.Escape(frag)
2211
2212 frags.append(frag)
2213
2214 if 0:
2215 log('---')
2216 log('FRAGS')
2217 for i, frag in enumerate(frags):
2218 log('(%d) %s', i, frag)
2219 log('')
2220
2221 flat = ''.join(frags)
2222 #log('flat: %r', flat)
2223
2224 args = self.splitter.SplitForWordEval(flat)
2225
2226 # space=' '; argv $space"". We have a quoted part, but we CANNOT elide.
2227 # Add it back and don't bother globbing.
2228 if len(args) == 0 and any_quoted:
2229 argv.append('')
2230 return
2231
2232 #log('split args: %r', args)
2233 for a in args:
2234 if glob_.LooksLikeGlob(a):
2235 n = self.globber.Expand(a, argv)
2236 if n < 0:
2237 # TODO: location info, with span IDs carried through the frame
2238 raise error.FailGlob('Pattern %r matched no files' % a,
2239 loc.Missing)
2240 else:
2241 argv.append(glob_.GlobUnescape(a))
2242
2243 def _EvalWordToArgv(self, w):
2244 # type: (CompoundWord) -> List[str]
2245 """Helper for _EvalAssignBuiltin.
2246
2247 Splitting and globbing are disabled for assignment builtins.
2248
2249 Example: declare -"${a[@]}" b=(1 2)
2250 where a is [x b=a d=a]
2251 """
2252 part_vals = [] # type: List[part_value_t]
2253 self._EvalWordToParts(w, part_vals, 0) # not double quoted
2254 frames = _MakeWordFrames(part_vals)
2255 argv = [] # type: List[str]
2256 for frame in frames:
2257 if len(frame): # empty array gives empty frame!
2258 tmp = [piece.s for piece in frame]
2259 argv.append(''.join(tmp)) # no split or glob
2260 #log('argv: %s', argv)
2261 return argv
2262
2263 def _EvalAssignBuiltin(self, builtin_id, arg0, words, meta_offset):
2264 # type: (builtin_t, str, List[CompoundWord], int) -> cmd_value.Assign
2265 """Handles both static and dynamic assignment, e.g.
2266
2267 x='foo=bar'
2268 local a=(1 2) $x
2269
2270 Grammar:
2271
2272 ('builtin' | 'command')* keyword flag* pair*
2273 flag = [-+].*
2274
2275 There is also command -p, but we haven't implemented it. Maybe just
2276 punt on it.
2277 """
2278 eval_to_pairs = True # except for -f and -F
2279 started_pairs = False
2280
2281 flags = [arg0] # initial flags like -p, and -f -F name1 name2
2282 flag_locs = [words[0]]
2283 assign_args = [] # type: List[AssignArg]
2284
2285 n = len(words)
2286 for i in xrange(meta_offset + 1, n): # skip first word
2287 w = words[i]
2288
2289 if word_.IsVarLike(w):
2290 started_pairs = True # Everything from now on is an assign_pair
2291
2292 if started_pairs:
2293 left_token, close_token, part_offset = word_.DetectShAssignment(
2294 w)
2295 if left_token: # Detected statically
2296 if left_token.id != Id.Lit_VarLike:
2297 # (not guaranteed since started_pairs is set twice)
2298 e_die('LHS array not allowed in assignment builtin', w)
2299
2300 if lexer.IsPlusEquals(left_token):
2301 var_name = lexer.TokenSliceRight(left_token, -2)
2302 append = True
2303 else:
2304 var_name = lexer.TokenSliceRight(left_token, -1)
2305 append = False
2306
2307 if part_offset == len(w.parts):
2308 rhs = rhs_word.Empty # type: rhs_word_t
2309 else:
2310 # tmp is for intersection of C++/MyPy type systems
2311 tmp = CompoundWord(w.parts[part_offset:])
2312 word_.TildeDetectAssign(tmp)
2313 rhs = tmp
2314
2315 with state.ctx_AssignBuiltin(self.mutable_opts):
2316 right = self.EvalRhsWord(rhs)
2317
2318 arg2 = AssignArg(var_name, right, append, w)
2319 assign_args.append(arg2)
2320
2321 else: # e.g. export $dynamic
2322 argv = self._EvalWordToArgv(w)
2323 for arg in argv:
2324 arg2 = _SplitAssignArg(arg, w)
2325 assign_args.append(arg2)
2326
2327 else:
2328 argv = self._EvalWordToArgv(w)
2329 for arg in argv:
2330 if arg.startswith('-') or arg.startswith('+'):
2331 # e.g. declare -r +r
2332 flags.append(arg)
2333 flag_locs.append(w)
2334
2335 # Shortcut that relies on -f and -F always meaning "function" for
2336 # all assignment builtins
2337 if 'f' in arg or 'F' in arg:
2338 eval_to_pairs = False
2339
2340 else: # e.g. export $dynamic
2341 if eval_to_pairs:
2342 arg2 = _SplitAssignArg(arg, w)
2343 assign_args.append(arg2)
2344 started_pairs = True
2345 else:
2346 flags.append(arg)
2347
2348 return cmd_value.Assign(builtin_id, flags, flag_locs, assign_args)
2349
2350 def _DetectAssignBuiltinStr(self, arg0, words, meta_offset):
2351 # type: (str, List[CompoundWord], int) -> Optional[cmd_value.Assign]
2352 builtin_id = consts.LookupAssignBuiltin(arg0)
2353 if builtin_id != consts.NO_INDEX:
2354 return self._EvalAssignBuiltin(builtin_id, arg0, words,
2355 meta_offset)
2356 return None
2357
2358 def _DetectAssignBuiltin(self, val0, words, meta_offset):
2359 # type: (part_value_t, List[CompoundWord], int) -> Optional[cmd_value.Assign]
2360 UP_val0 = val0
2361 if val0.tag() == part_value_e.String:
2362 val0 = cast(Piece, UP_val0)
2363 if not val0.quoted:
2364 return self._DetectAssignBuiltinStr(val0.s, words, meta_offset)
2365 return None
2366
2367 def SimpleEvalWordSequence2(self, words, is_last_cmd, allow_assign):
2368 # type: (List[CompoundWord], bool, bool) -> cmd_value_t
2369 """Simple word evaluation for YSH."""
2370 strs = [] # type: List[str]
2371 locs = [] # type: List[CompoundWord]
2372
2373 meta_offset = 0
2374 for i, w in enumerate(words):
2375 # No globbing in the first arg for command.Simple.
2376 if i == meta_offset and allow_assign:
2377 strs0 = self._EvalWordToArgv(w)
2378 # TODO: Remove this because YSH will disallow assignment
2379 # builtins? (including export?)
2380 if len(strs0) == 1:
2381 cmd_val = self._DetectAssignBuiltinStr(
2382 strs0[0], words, meta_offset)
2383 if cmd_val:
2384 return cmd_val
2385
2386 strs.extend(strs0)
2387 for _ in strs0:
2388 locs.append(w)
2389 continue
2390
2391 if glob_.LooksLikeStaticGlob(w):
2392 val = self.EvalWordToString(w) # respects strict-array
2393 num_appended = self.globber.Expand(val.s, strs)
2394 if num_appended < 0:
2395 raise error.FailGlob('Pattern %r matched no files' % val.s,
2396 w)
2397 for _ in xrange(num_appended):
2398 locs.append(w)
2399 continue
2400
2401 part_vals = [] # type: List[part_value_t]
2402 self._EvalWordToParts(w, part_vals, 0) # not quoted
2403
2404 if 0:
2405 log('')
2406 log('Static: part_vals after _EvalWordToParts:')
2407 for entry in part_vals:
2408 log(' %s', entry)
2409
2410 # Still need to process
2411 frames = _MakeWordFrames(part_vals)
2412
2413 if 0:
2414 log('')
2415 log('Static: frames after _MakeWordFrames:')
2416 for entry in frames:
2417 log(' %s', entry)
2418
2419 # We will still allow x"${a[@]"x, though it's deprecated by @a, which
2420 # disallows such expressions at parse time.
2421 for frame in frames:
2422 if len(frame): # empty array gives empty frame!
2423 tmp = [piece.s for piece in frame]
2424 strs.append(''.join(tmp)) # no split or glob
2425 locs.append(w)
2426
2427 assert len(strs) == len(locs), '%s vs. %d' % (strs, len(locs))
2428 return cmd_value.Argv(strs, locs, is_last_cmd, None, None)
2429
2430 def EvalWordSequence2(self, words, is_last_cmd, allow_assign=False):
2431 # type: (List[CompoundWord], bool, bool) -> cmd_value_t
2432 """Turns a list of Words into a list of strings.
2433
2434 Unlike the EvalWord*() methods, it does globbing.
2435
2436 Args:
2437 allow_assign: True for command.Simple, False for BashArray a=(1 2 3)
2438 """
2439 if self.exec_opts.simple_word_eval():
2440 return self.SimpleEvalWordSequence2(words, is_last_cmd,
2441 allow_assign)
2442
2443 # Parse time:
2444 # 1. brace expansion. TODO: Do at parse time.
2445 # 2. Tilde detection. DONE at parse time. Only if Id.Lit_Tilde is the
2446 # first WordPart.
2447 #
2448 # Run time:
2449 # 3. tilde sub, var sub, command sub, arith sub. These are all
2450 # "concurrent" on WordParts. (optional process sub with <() )
2451 # 4. word splitting. Can turn this off with a shell option? Definitely
2452 # off for oil.
2453 # 5. globbing -- several exec_opts affect this: nullglob, safeglob, etc.
2454
2455 #log('W %s', words)
2456 strs = [] # type: List[str]
2457 locs = [] # type: List[CompoundWord]
2458
2459 # 0 for declare x
2460 # 1 for builtin declare x
2461 # 2 for command builtin declare x
2462 # etc.
2463 meta_offset = 0
2464
2465 n = 0
2466 for i, w in enumerate(words):
2467 fast_str = word_.FastStrEval(w)
2468 if fast_str is not None:
2469 strs.append(fast_str)
2470 locs.append(w)
2471
2472 # e.g. the 'local' in 'local a=b c=d' will be here
2473 if allow_assign and i == meta_offset:
2474 cmd_val = self._DetectAssignBuiltinStr(
2475 fast_str, words, meta_offset)
2476 if cmd_val:
2477 return cmd_val
2478
2479 if i <= meta_offset and _DetectMetaBuiltinStr(fast_str):
2480 meta_offset += 1
2481
2482 # Bug fix: n must be updated on every loop iteration
2483 n = len(strs)
2484 assert len(strs) == len(locs), strs
2485 continue
2486
2487 part_vals = [] # type: List[part_value_t]
2488 self._EvalWordToParts(w, part_vals, EXTGLOB_FILES)
2489
2490 # DYNAMICALLY detect if we're going to run an assignment builtin, and
2491 # change the rest of the evaluation algorithm if so.
2492 #
2493 # We want to allow:
2494 # e=export
2495 # $e foo=bar
2496 #
2497 # But we don't want to evaluate the first word twice in the case of:
2498 # $(some-command) --flag
2499 if len(part_vals) == 1:
2500 if allow_assign and i == meta_offset:
2501 cmd_val = self._DetectAssignBuiltin(
2502 part_vals[0], words, meta_offset)
2503 if cmd_val:
2504 return cmd_val
2505
2506 if i <= meta_offset and _DetectMetaBuiltin(part_vals[0]):
2507 meta_offset += 1
2508
2509 if 0:
2510 log('')
2511 log('part_vals after _EvalWordToParts:')
2512 for entry in part_vals:
2513 log(' %s', entry)
2514
2515 frames = _MakeWordFrames(part_vals)
2516 if 0:
2517 log('')
2518 log('frames after _MakeWordFrames:')
2519 for entry in frames:
2520 log(' %s', entry)
2521
2522 # Do splitting and globbing. Each frame will append zero or more args.
2523 for frame in frames:
2524 self._EvalWordFrame(frame, strs)
2525
2526 # Fill in locations parallel to strs.
2527 n_next = len(strs)
2528 for _ in xrange(n_next - n):
2529 locs.append(w)
2530 n = n_next
2531
2532 # A non-assignment command.
2533 # NOTE: Can't look up builtins here like we did for assignment, because
2534 # functions can override builtins.
2535 assert len(strs) == len(locs), '%s vs. %d' % (strs, len(locs))
2536 return cmd_value.Argv(strs, locs, is_last_cmd, None, None)
2537
2538 def EvalWordSequence(self, words):
2539 # type: (List[CompoundWord]) -> List[str]
2540 """For arrays and for loops.
2541
2542 They don't allow assignment builtins.
2543 """
2544 # is_last_cmd is irrelevant
2545 cmd_val = self.EvalWordSequence2(words, False)
2546 assert cmd_val.tag() == cmd_value_e.Argv
2547 return cast(cmd_value.Argv, cmd_val).argv
2548
2549
2550class NormalWordEvaluator(AbstractWordEvaluator):
2551
2552 def __init__(
2553 self,
2554 mem, # type: state.Mem
2555 exec_opts, # type: optview.Exec
2556 mutable_opts, # type: state.MutableOpts
2557 tilde_ev, # type: TildeEvaluator
2558 splitter, # type: SplitContext
2559 errfmt, # type: ui.ErrorFormatter
2560 ):
2561 # type: (...) -> None
2562 AbstractWordEvaluator.__init__(self, mem, exec_opts, mutable_opts,
2563 tilde_ev, splitter, errfmt)
2564 self.shell_ex = None # type: _Executor
2565
2566 def CheckCircularDeps(self):
2567 # type: () -> None
2568 assert self.arith_ev is not None
2569 # Disabled for pure OSH
2570 #assert self.expr_ev is not None
2571 assert self.shell_ex is not None
2572 assert self.prompt_ev is not None
2573
2574 def _EvalCommandSub(self, cs_part, quoted):
2575 # type: (CommandSub, bool) -> part_value_t
2576 stdout_str = self.shell_ex.RunCommandSub(cs_part)
2577
2578 if cs_part.left_token.id == Id.Left_AtParen:
2579 # YSH splitting algorithm: does not depend on IFS
2580 try:
2581 strs = j8.SplitJ8Lines(stdout_str)
2582 except error.Decode as e:
2583 # status code 4 is special, for encode/decode errors.
2584 raise error.Structured(4, e.Message(), cs_part.left_token)
2585
2586 #strs = self.splitter.SplitForWordEval(stdout_str)
2587 return part_value.Array(strs)
2588 else:
2589 return Piece(stdout_str, quoted, not quoted)
2590
2591 def _EvalProcessSub(self, cs_part):
2592 # type: (CommandSub) -> Piece
2593 dev_path = self.shell_ex.RunProcessSub(cs_part)
2594 # pretend it's quoted; no split or glob
2595 return Piece(dev_path, True, False)
2596
2597
2598_DUMMY = '__NO_COMMAND_SUB__'
2599
2600
2601class CompletionWordEvaluator(AbstractWordEvaluator):
2602 """An evaluator that has no access to an executor.
2603
2604 NOTE: core/completion.py doesn't actually try to use these strings to
2605 complete. If you have something like 'echo $(echo hi)/f<TAB>', it sees the
2606 inner command as the last one, and knows that it is not at the end of the
2607 line.
2608 """
2609
2610 def __init__(
2611 self,
2612 mem, # type: state.Mem
2613 exec_opts, # type: optview.Exec
2614 mutable_opts, # type: state.MutableOpts
2615 tilde_ev, # type: TildeEvaluator
2616 splitter, # type: SplitContext
2617 errfmt, # type: ui.ErrorFormatter
2618 ):
2619 # type: (...) -> None
2620 AbstractWordEvaluator.__init__(self, mem, exec_opts, mutable_opts,
2621 tilde_ev, splitter, errfmt)
2622
2623 def CheckCircularDeps(self):
2624 # type: () -> None
2625 assert self.prompt_ev is not None
2626 assert self.arith_ev is not None
2627 assert self.expr_ev is not None
2628
2629 def _EvalCommandSub(self, cs_part, quoted):
2630 # type: (CommandSub, bool) -> part_value_t
2631 if cs_part.left_token.id == Id.Left_AtParen:
2632 return part_value.Array([_DUMMY])
2633 else:
2634 return Piece(_DUMMY, quoted, not quoted)
2635
2636 def _EvalProcessSub(self, cs_part):
2637 # type: (CommandSub) -> Piece
2638 # pretend it's quoted; no split or glob
2639 return Piece('__NO_PROCESS_SUB__', True, False)
2640
2641
2642# vim: sw=4