OILS / osh / word_eval.py View on Github | oilshell.org

2435 lines, 1474 significant
1"""
2word_eval.py - Evaluator for the word language.
3"""
4
5from _devbuild.gen.id_kind_asdl import Id, Kind, Kind_str
6from _devbuild.gen.syntax_asdl import (
7 Token,
8 SimpleVarSub,
9 loc,
10 loc_t,
11 BracedVarSub,
12 CommandSub,
13 bracket_op,
14 bracket_op_e,
15 suffix_op,
16 suffix_op_e,
17 ShArrayLiteral,
18 SingleQuoted,
19 DoubleQuoted,
20 word_e,
21 word_t,
22 CompoundWord,
23 rhs_word,
24 rhs_word_e,
25 rhs_word_t,
26 word_part,
27 word_part_e,
28)
29from _devbuild.gen.runtime_asdl import (
30 part_value,
31 part_value_e,
32 part_value_t,
33 cmd_value,
34 cmd_value_e,
35 cmd_value_t,
36 AssignArg,
37 a_index,
38 a_index_e,
39 VTestPlace,
40 VarSubState,
41 Piece,
42)
43from _devbuild.gen.option_asdl import option_i, builtin_i
44from _devbuild.gen.value_asdl import (
45 value,
46 value_e,
47 value_t,
48 sh_lvalue,
49 sh_lvalue_t,
50)
51from core import error
52from core import pyos
53from core import pyutil
54from core import state
55from display import ui
56from core import util
57from data_lang import j8
58from data_lang import j8_lite
59from core.error import e_die
60from frontend import consts
61from frontend import lexer
62from frontend import location
63from mycpp import mops
64from mycpp.mylib import log, tagswitch, NewDict
65from osh import braces
66from osh import glob_
67from osh import string_ops
68from osh import word_
69from ysh import expr_eval
70from ysh import val_ops
71
72from typing import Optional, Tuple, List, Dict, cast, TYPE_CHECKING
73
74if TYPE_CHECKING:
75 from _devbuild.gen.syntax_asdl import word_part_t
76 from _devbuild.gen.option_asdl import builtin_t
77 from core import optview
78 from core.state import Mem
79 from core.vm import _Executor
80 from osh.split import SplitContext
81 from osh import prompt
82 from osh import sh_expr_eval
83
84# Flags for _EvalWordToParts and _EvalWordPart (not all are used for both)
85QUOTED = 1 << 0
86IS_SUBST = 1 << 1
87
88EXTGLOB_FILES = 1 << 2 # allow @(cc) from file system?
89EXTGLOB_MATCH = 1 << 3 # allow @(cc) in pattern matching?
90EXTGLOB_NESTED = 1 << 4 # for @(one|!(two|three))
91
92# For EvalWordToString
93QUOTE_FNMATCH = 1 << 5
94QUOTE_ERE = 1 << 6
95
96# For compatibility, ${BASH_SOURCE} and ${BASH_SOURCE[@]} are both valid.
97# Ditto for ${FUNCNAME} and ${BASH_LINENO}.
98_STRING_AND_ARRAY = ['BASH_SOURCE', 'FUNCNAME', 'BASH_LINENO']
99
100
101def ShouldArrayDecay(var_name, exec_opts, is_plain_var_sub=True):
102 # type: (str, optview.Exec, bool) -> bool
103 """Return whether we should allow ${a} to mean ${a[0]}."""
104 return (not exec_opts.strict_array() or
105 is_plain_var_sub and var_name in _STRING_AND_ARRAY)
106
107
108def DecayArray(val):
109 # type: (value_t) -> value_t
110 """Resolve ${array} to ${array[0]}."""
111 if val.tag() == value_e.BashArray:
112 array_val = cast(value.BashArray, val)
113 s = array_val.strs[0] if len(array_val.strs) else None
114 elif val.tag() == value_e.BashAssoc:
115 assoc_val = cast(value.BashAssoc, val)
116 s = assoc_val.d['0'] if '0' in assoc_val.d else None
117 else:
118 raise AssertionError(val.tag())
119
120 if s is None:
121 return value.Undef
122 else:
123 return value.Str(s)
124
125
126def GetArrayItem(strs, index):
127 # type: (List[str], int) -> Optional[str]
128
129 n = len(strs)
130 if index < 0:
131 index += n
132
133 if 0 <= index and index < n:
134 # TODO: strs->index() has a redundant check for (i < 0)
135 s = strs[index]
136 # note: s could be None because representation is sparse
137 else:
138 s = None
139 return s
140
141
142def _DetectMetaBuiltinStr(s):
143 # type: (str) -> bool
144 """
145 We need to detect all of these cases:
146
147 builtin local
148 command local
149 builtin builtin local
150 builtin command local
151
152 Fundamentally, assignment builtins have different WORD EVALUATION RULES
153 for a=$x (no word splitting), so it seems hard to do this in
154 meta_oils.Builtin() or meta_oils.Command()
155 """
156 return (consts.LookupNormalBuiltin(s)
157 in (builtin_i.builtin, builtin_i.command))
158
159
160def _DetectMetaBuiltin(val0):
161 # type: (part_value_t) -> bool
162 UP_val0 = val0
163 if val0.tag() == part_value_e.String:
164 val0 = cast(Piece, UP_val0)
165 if not val0.quoted:
166 return _DetectMetaBuiltinStr(val0.s)
167 return False
168
169
170def _SplitAssignArg(arg, blame_word):
171 # type: (str, CompoundWord) -> AssignArg
172 """Dynamically parse argument to declare, export, etc.
173
174 This is a fallback to the static parsing done below.
175 """
176 # Note: it would be better to cache regcomp(), but we don't have an API for
177 # that, and it probably isn't a bottleneck now
178 m = util.RegexSearch(consts.ASSIGN_ARG_RE, arg)
179 if m is None:
180 e_die("Assignment builtin expected NAME=value, got %r" % arg,
181 blame_word)
182
183 var_name = m[1]
184 # m[2] is used for grouping; ERE doesn't have non-capturing groups
185
186 op = m[3]
187 assert op is not None, op
188 if len(op): # declare NAME=
189 val = value.Str(m[4]) # type: Optional[value_t]
190 append = op[0] == '+'
191 else: # declare NAME
192 val = None # no operator
193 append = False
194
195 return AssignArg(var_name, val, append, blame_word)
196
197
198# NOTE: Could be done with util.BackslashEscape like glob_.GlobEscape().
199def _BackslashEscape(s):
200 # type: (str) -> str
201 """Double up backslashes.
202
203 Useful for strings about to be globbed and strings about to be IFS
204 escaped.
205 """
206 return s.replace('\\', '\\\\')
207
208
209def _ValueToPartValue(val, quoted, part_loc):
210 # type: (value_t, bool, word_part_t) -> part_value_t
211 """Helper for VarSub evaluation.
212
213 Called by _EvalBracedVarSub and _EvalWordPart for SimpleVarSub.
214 """
215 UP_val = val
216
217 with tagswitch(val) as case:
218 if case(value_e.Undef):
219 # This happens in the case of ${undef+foo}. We skipped _EmptyStrOrError,
220 # but we have to append to the empty string.
221 return Piece('', quoted, not quoted)
222
223 elif case(value_e.Str):
224 val = cast(value.Str, UP_val)
225 return Piece(val.s, quoted, not quoted)
226
227 elif case(value_e.BashArray):
228 val = cast(value.BashArray, UP_val)
229 return part_value.Array(val.strs)
230
231 elif case(value_e.BashAssoc):
232 val = cast(value.BashAssoc, UP_val)
233 # bash behavior: splice values!
234 return part_value.Array(val.d.values())
235
236 # Cases added for YSH
237 # value_e.List is also here - we use val_ops.Stringify()s err message
238 elif case(value_e.Null, value_e.Bool, value_e.Int, value_e.Float,
239 value_e.Eggex, value_e.List):
240 s = val_ops.Stringify(val, loc.Missing, 'Word eval ')
241 return Piece(s, quoted, not quoted)
242
243 else:
244 raise error.TypeErr(val, "Can't substitute into word",
245 loc.WordPart(part_loc))
246
247 raise AssertionError('for -Wreturn-type in C++')
248
249
250def _MakeWordFrames(part_vals):
251 # type: (List[part_value_t]) -> List[List[Piece]]
252 """A word evaluates to a flat list of part_value (String or Array). frame
253 is a portion that results in zero or more args. It can never be joined.
254 This idea exists because of arrays like "$@" and "${a[@]}".
255
256 Example:
257
258 a=(1 '2 3' 4)
259 x=x
260 y=y
261
262 # This word
263 $x"${a[@]}"$y
264
265 # Results in Three frames:
266 [ ('x', False, True), ('1', True, False) ]
267 [ ('2 3', True, False) ]
268 [ ('4', True, False), ('y', False, True) ]
269
270 Note: A frame is a 3-tuple that's identical to Piece()? Maybe we
271 should make that top level type.
272
273 TODO:
274 - Instead of List[List[Piece]], where List[Piece] is a Frame
275 - Change this representation to
276 Frames = (List[Piece] pieces, List[int] break_indices)
277 # where break_indices are the end
278
279 Consider a common case like "$x" or "${x}" - I think this a lot more
280 efficient?
281
282 And then change _EvalWordFrame(pieces: List[Piece], start: int, end: int)
283 """
284 current = [] # type: List[Piece]
285 frames = [current]
286
287 for p in part_vals:
288 UP_p = p
289
290 with tagswitch(p) as case:
291 if case(part_value_e.String):
292 p = cast(Piece, UP_p)
293 current.append(p)
294
295 elif case(part_value_e.Array):
296 p = cast(part_value.Array, UP_p)
297
298 is_first = True
299 for s in p.strs:
300 if s is None:
301 continue # ignore undefined array entries
302
303 # Arrays parts are always quoted; otherwise they would have decayed to
304 # a string.
305 piece = Piece(s, True, False)
306 if is_first:
307 current.append(piece)
308 is_first = False
309 else:
310 current = [piece]
311 frames.append(current) # singleton frame
312
313 else:
314 raise AssertionError()
315
316 return frames
317
318
319# TODO: This could be _MakeWordFrames and then sep.join(). It's redundant.
320def _DecayPartValuesToString(part_vals, join_char):
321 # type: (List[part_value_t], str) -> str
322 # Decay ${a=x"$@"x} to string.
323 out = [] # type: List[str]
324 for p in part_vals:
325 UP_p = p
326 with tagswitch(p) as case:
327 if case(part_value_e.String):
328 p = cast(Piece, UP_p)
329 out.append(p.s)
330 elif case(part_value_e.Array):
331 p = cast(part_value.Array, UP_p)
332 # TODO: Eliminate double join for speed?
333 tmp = [s for s in p.strs if s is not None]
334 out.append(join_char.join(tmp))
335 else:
336 raise AssertionError()
337 return ''.join(out)
338
339
340def _PerformSlice(
341 val, # type: value_t
342 begin, # type: int
343 length, # type: int
344 has_length, # type: bool
345 part, # type: BracedVarSub
346 arg0_val, # type: value.Str
347):
348 # type: (...) -> value_t
349 UP_val = val
350 with tagswitch(val) as case:
351 if case(value_e.Str): # Slice UTF-8 characters in a string.
352 val = cast(value.Str, UP_val)
353 s = val.s
354 n = len(s)
355
356 if begin < 0: # Compute offset with unicode
357 byte_begin = n
358 num_iters = -begin
359 for _ in xrange(num_iters):
360 byte_begin = string_ops.PreviousUtf8Char(s, byte_begin)
361 else:
362 byte_begin = string_ops.AdvanceUtf8Chars(s, begin, 0)
363
364 if has_length:
365 if length < 0: # Compute offset with unicode
366 # Confusing: this is a POSITION
367 byte_end = n
368 num_iters = -length
369 for _ in xrange(num_iters):
370 byte_end = string_ops.PreviousUtf8Char(s, byte_end)
371 else:
372 byte_end = string_ops.AdvanceUtf8Chars(
373 s, length, byte_begin)
374 else:
375 byte_end = len(s)
376
377 substr = s[byte_begin:byte_end]
378 result = value.Str(substr) # type: value_t
379
380 elif case(value_e.BashArray): # Slice array entries.
381 val = cast(value.BashArray, UP_val)
382 # NOTE: This error is ALWAYS fatal in bash. It's inconsistent with
383 # strings.
384 if has_length and length < 0:
385 e_die("Array slice can't have negative length: %d" % length,
386 loc.WordPart(part))
387
388 # Quirk: "begin" for positional arguments ($@ and $*) counts $0.
389 if arg0_val is not None:
390 orig = [arg0_val.s]
391 orig.extend(val.strs)
392 else:
393 orig = val.strs
394
395 n = len(orig)
396 if begin < 0:
397 i = n + begin # ${@:-3} starts counts from the end
398 else:
399 i = begin
400 strs = [] # type: List[str]
401 count = 0
402 while i < n:
403 if has_length and count == length: # length could be 0
404 break
405 s = orig[i]
406 if s is not None: # Unset elements don't count towards the length
407 strs.append(s)
408 count += 1
409 i += 1
410
411 result = value.BashArray(strs)
412
413 elif case(value_e.BashAssoc):
414 e_die("Can't slice associative arrays", loc.WordPart(part))
415
416 else:
417 raise error.TypeErr(val, 'Slice op expected Str or BashArray',
418 loc.WordPart(part))
419
420 return result
421
422
423class StringWordEvaluator(object):
424 """Interface used by ArithEvaluator / BoolEvaluator"""
425
426 def __init__(self):
427 # type: () -> None
428 """Empty constructor for mycpp."""
429 pass
430
431 def EvalWordToString(self, w, eval_flags=0):
432 # type: (word_t, int) -> value.Str
433 raise NotImplementedError()
434
435
436def _GetDollarHyphen(exec_opts):
437 # type: (optview.Exec) -> str
438 chars = [] # type: List[str]
439 if exec_opts.interactive():
440 chars.append('i')
441
442 if exec_opts.errexit():
443 chars.append('e')
444 if exec_opts.noglob():
445 chars.append('f')
446 if exec_opts.noexec():
447 chars.append('n')
448 if exec_opts.nounset():
449 chars.append('u')
450 # NO letter for pipefail?
451 if exec_opts.xtrace():
452 chars.append('x')
453 if exec_opts.noclobber():
454 chars.append('C')
455
456 # bash has:
457 # - c for sh -c, i for sh -i (mksh also has this)
458 # - h for hashing (mksh also has this)
459 # - B for brace expansion
460 return ''.join(chars)
461
462
463class TildeEvaluator(object):
464
465 def __init__(self, mem, exec_opts):
466 # type: (Mem, optview.Exec) -> None
467 self.mem = mem
468 self.exec_opts = exec_opts
469
470 def GetMyHomeDir(self):
471 # type: () -> Optional[str]
472 """Consult $HOME first, and then make a libc call.
473
474 Important: the libc call can FAIL, which is why we prefer $HOME. See issue
475 #1578.
476 """
477 # First look up the HOME var, then ask the OS. This is what bash does.
478 val = self.mem.GetValue('HOME')
479 UP_val = val
480 if val.tag() == value_e.Str:
481 val = cast(value.Str, UP_val)
482 return val.s
483 return pyos.GetMyHomeDir()
484
485 def Eval(self, part):
486 # type: (word_part.TildeSub) -> str
487 """Evaluates ~ and ~user, given a Lit_TildeLike token."""
488
489 if part.user_name is None:
490 result = self.GetMyHomeDir()
491 else:
492 result = pyos.GetHomeDir(part.user_name)
493
494 if result is None:
495 if self.exec_opts.strict_tilde():
496 e_die("Error expanding tilde (e.g. invalid user)", part.left)
497 else:
498 # Return ~ or ~user literally
499 result = '~'
500 if part.user_name is not None:
501 result = result + part.user_name # mycpp doesn't have +=
502
503 return result
504
505
506class AbstractWordEvaluator(StringWordEvaluator):
507 """Abstract base class for word evaluators.
508
509 Public entry points:
510 EvalWordToString EvalForPlugin EvalRhsWord
511 EvalWordSequence EvalWordSequence2
512 """
513
514 def __init__(
515 self,
516 mem, # type: state.Mem
517 exec_opts, # type: optview.Exec
518 mutable_opts, # type: state.MutableOpts
519 tilde_ev, # type: TildeEvaluator
520 splitter, # type: SplitContext
521 errfmt, # type: ui.ErrorFormatter
522 ):
523 # type: (...) -> None
524 self.arith_ev = None # type: sh_expr_eval.ArithEvaluator
525 self.expr_ev = None # type: expr_eval.ExprEvaluator
526 self.prompt_ev = None # type: prompt.Evaluator
527
528 self.unsafe_arith = None # type: sh_expr_eval.UnsafeArith
529
530 self.tilde_ev = tilde_ev
531
532 self.mem = mem # for $HOME, $1, etc.
533 self.exec_opts = exec_opts # for nounset
534 self.mutable_opts = mutable_opts # for _allow_command_sub
535 self.splitter = splitter
536 self.errfmt = errfmt
537
538 self.globber = glob_.Globber(exec_opts)
539
540 def CheckCircularDeps(self):
541 # type: () -> None
542 raise NotImplementedError()
543
544 def _EvalCommandSub(self, cs_part, quoted):
545 # type: (CommandSub, bool) -> part_value_t
546 """Abstract since it has a side effect."""
547 raise NotImplementedError()
548
549 def _EvalProcessSub(self, cs_part):
550 # type: (CommandSub) -> part_value_t
551 """Abstract since it has a side effect."""
552 raise NotImplementedError()
553
554 def _EvalVarNum(self, var_num):
555 # type: (int) -> value_t
556 assert var_num >= 0
557 return self.mem.GetArgNum(var_num)
558
559 def _EvalSpecialVar(self, op_id, quoted, vsub_state):
560 # type: (int, bool, VarSubState) -> value_t
561 """Evaluate $?
562
563 and so forth
564 """
565 # $@ is special -- it need to know whether it is in a double quoted
566 # context.
567 #
568 # - If it's $@ in a double quoted context, return an ARRAY.
569 # - If it's $@ in a normal context, return a STRING, which then will be
570 # subject to splitting.
571
572 if op_id in (Id.VSub_At, Id.VSub_Star):
573 argv = self.mem.GetArgv()
574 val = value.BashArray(argv) # type: value_t
575 if op_id == Id.VSub_At:
576 # "$@" evaluates to an array, $@ should be decayed
577 vsub_state.join_array = not quoted
578 else: # $* "$*" are both decayed
579 vsub_state.join_array = True
580
581 elif op_id == Id.VSub_Hyphen:
582 val = value.Str(_GetDollarHyphen(self.exec_opts))
583
584 else:
585 val = self.mem.GetSpecialVar(op_id)
586
587 return val
588
589 def _ApplyTestOp(
590 self,
591 val, # type: value_t
592 op, # type: suffix_op.Unary
593 quoted, # type: bool
594 part_vals, # type: Optional[List[part_value_t]]
595 vtest_place, # type: VTestPlace
596 blame_token, # type: Token
597 ):
598 # type: (...) -> bool
599 """
600 Returns:
601 Whether part_vals was mutated
602
603 ${a:-} returns part_value[]
604 ${a:+} returns part_value[]
605 ${a:?error} returns error word?
606 ${a:=} returns part_value[] but also needs self.mem for side effects.
607
608 So I guess it should return part_value[], and then a flag for raising an
609 error, and then a flag for assigning it?
610 The original BracedVarSub will have the name.
611
612 Example of needing multiple part_value[]
613
614 echo X-${a:-'def'"ault"}-X
615
616 We return two part values from the BracedVarSub. Also consider:
617
618 echo ${a:-x"$@"x}
619 """
620 eval_flags = IS_SUBST
621 if quoted:
622 eval_flags |= QUOTED
623
624 tok = op.op
625 # NOTE: Splicing part_values is necessary because of code like
626 # ${undef:-'a b' c 'd # e'}. Each part_value can have a different
627 # do_glob/do_elide setting.
628 UP_val = val
629 with tagswitch(val) as case:
630 if case(value_e.Undef):
631 is_falsey = True
632
633 elif case(value_e.Str):
634 val = cast(value.Str, UP_val)
635 if tok.id in (Id.VTest_ColonHyphen, Id.VTest_ColonEquals,
636 Id.VTest_ColonQMark, Id.VTest_ColonPlus):
637 is_falsey = len(val.s) == 0
638 else:
639 is_falsey = False
640
641 elif case(value_e.BashArray):
642 val = cast(value.BashArray, UP_val)
643 # TODO: allow undefined
644 is_falsey = len(val.strs) == 0
645
646 elif case(value_e.BashAssoc):
647 val = cast(value.BashAssoc, UP_val)
648 is_falsey = len(val.d) == 0
649
650 else:
651 # value.Eggex, etc. are all false
652 is_falsey = False
653
654 if tok.id in (Id.VTest_ColonHyphen, Id.VTest_Hyphen):
655 if is_falsey:
656 self._EvalRhsWordToParts(op.arg_word, part_vals, eval_flags)
657 return True
658 else:
659 return False
660
661 # Inverse of the above.
662 elif tok.id in (Id.VTest_ColonPlus, Id.VTest_Plus):
663 if is_falsey:
664 return False
665 else:
666 self._EvalRhsWordToParts(op.arg_word, part_vals, eval_flags)
667 return True
668
669 # Splice and assign
670 elif tok.id in (Id.VTest_ColonEquals, Id.VTest_Equals):
671 if is_falsey:
672 # Collect new part vals.
673 assign_part_vals = [] # type: List[part_value_t]
674 self._EvalRhsWordToParts(op.arg_word, assign_part_vals,
675 eval_flags)
676 # Append them to out param AND return them.
677 part_vals.extend(assign_part_vals)
678
679 if vtest_place.name is None:
680 # TODO: error context
681 e_die("Can't assign to special variable")
682 else:
683 # NOTE: This decays arrays too! 'shopt -s strict_array' could
684 # avoid it.
685 rhs_str = _DecayPartValuesToString(
686 assign_part_vals, self.splitter.GetJoinChar())
687 if vtest_place.index is None: # using None when no index
688 lval = location.LName(
689 vtest_place.name) # type: sh_lvalue_t
690 else:
691 var_name = vtest_place.name
692 var_index = vtest_place.index
693 UP_var_index = var_index
694
695 with tagswitch(var_index) as case:
696 if case(a_index_e.Int):
697 var_index = cast(a_index.Int, UP_var_index)
698 lval = sh_lvalue.Indexed(
699 var_name, var_index.i, loc.Missing)
700 elif case(a_index_e.Str):
701 var_index = cast(a_index.Str, UP_var_index)
702 lval = sh_lvalue.Keyed(var_name, var_index.s,
703 loc.Missing)
704 else:
705 raise AssertionError()
706
707 state.OshLanguageSetValue(self.mem, lval,
708 value.Str(rhs_str))
709 return True
710
711 else:
712 return False
713
714 elif tok.id in (Id.VTest_ColonQMark, Id.VTest_QMark):
715 if is_falsey:
716 # The arg is the error message
717 error_part_vals = [] # type: List[part_value_t]
718 self._EvalRhsWordToParts(op.arg_word, error_part_vals,
719 eval_flags)
720 error_str = _DecayPartValuesToString(
721 error_part_vals, self.splitter.GetJoinChar())
722
723 #
724 # Display fancy/helpful error
725 #
726 if vtest_place.name is None:
727 var_name = '???'
728 else:
729 var_name = vtest_place.name
730
731 if 0:
732 # This hint is nice, but looks too noisy for now
733 op_str = lexer.LazyStr(tok)
734 if tok.id == Id.VTest_ColonQMark:
735 why = 'empty or unset'
736 else:
737 why = 'unset'
738
739 self.errfmt.Print_(
740 "Hint: operator %s means a variable can't be %s" %
741 (op_str, why), tok)
742
743 if val.tag() == value_e.Undef:
744 actual = 'unset'
745 else:
746 actual = 'empty'
747
748 if len(error_str):
749 suffix = ': %r' % error_str
750 else:
751 suffix = ''
752 e_die("Var %s is %s%s" % (var_name, actual, suffix),
753 blame_token)
754
755 else:
756 return False
757
758 else:
759 raise AssertionError(tok.id)
760
761 def _Length(self, val, token):
762 # type: (value_t, Token) -> int
763 """Returns the length of the value, for ${#var}"""
764 UP_val = val
765 with tagswitch(val) as case:
766 if case(value_e.Str):
767 val = cast(value.Str, UP_val)
768 # NOTE: Whether bash counts bytes or chars is affected by LANG
769 # environment variables.
770 # Should we respect that, or another way to select? set -o
771 # count-bytes?
772
773 # https://stackoverflow.com/questions/17368067/length-of-string-in-bash
774 try:
775 length = string_ops.CountUtf8Chars(val.s)
776 except error.Strict as e:
777 # Add this here so we don't have to add it so far down the stack.
778 # TODO: It's better to show BOTH this CODE an the actual DATA
779 # somehow.
780 e.location = token
781
782 if self.exec_opts.strict_word_eval():
783 raise
784 else:
785 # NOTE: Doesn't make the command exit with 1; it just returns a
786 # length of -1.
787 self.errfmt.PrettyPrintError(e, prefix='warning: ')
788 return -1
789
790 elif case(value_e.BashArray):
791 val = cast(value.BashArray, UP_val)
792 # There can be empty placeholder values in the array.
793 length = 0
794 for s in val.strs:
795 if s is not None:
796 length += 1
797
798 elif case(value_e.BashAssoc):
799 val = cast(value.BashAssoc, UP_val)
800 length = len(val.d)
801
802 else:
803 raise error.TypeErr(
804 val, "Length op expected Str, BashArray, BashAssoc", token)
805
806 return length
807
808 def _Keys(self, val, token):
809 # type: (value_t, Token) -> value_t
810 """Return keys of a container, for ${!array[@]}"""
811
812 UP_val = val
813 with tagswitch(val) as case:
814 if case(value_e.BashArray):
815 val = cast(value.BashArray, UP_val)
816 # translation issue: tuple indices not supported in list comprehensions
817 #indices = [str(i) for i, s in enumerate(val.strs) if s is not None]
818 indices = [] # type: List[str]
819 for i, s in enumerate(val.strs):
820 if s is not None:
821 indices.append(str(i))
822 return value.BashArray(indices)
823
824 elif case(value_e.BashAssoc):
825 val = cast(value.BashAssoc, UP_val)
826 assert val.d is not None # for MyPy, so it's not Optional[]
827
828 # BUG: Keys aren't ordered according to insertion!
829 return value.BashArray(val.d.keys())
830
831 else:
832 raise error.TypeErr(val, 'Keys op expected Str', token)
833
834 def _EvalVarRef(self, val, blame_tok, quoted, vsub_state, vtest_place):
835 # type: (value_t, Token, bool, VarSubState, VTestPlace) -> value_t
836 """Handles indirect expansion like ${!var} and ${!a[0]}.
837
838 Args:
839 blame_tok: 'foo' for ${!foo}
840 """
841 UP_val = val
842 with tagswitch(val) as case:
843 if case(value_e.Undef):
844 return value.Undef # ${!undef} is just weird bash behavior
845
846 elif case(value_e.Str):
847 val = cast(value.Str, UP_val)
848 bvs_part = self.unsafe_arith.ParseVarRef(val.s, blame_tok)
849 return self._VarRefValue(bvs_part, quoted, vsub_state,
850 vtest_place)
851
852 elif case(value_e.BashArray): # caught earlier but OK
853 e_die('Indirect expansion of array')
854
855 elif case(value_e.BashAssoc): # caught earlier but OK
856 e_die('Indirect expansion of assoc array')
857
858 else:
859 raise error.TypeErr(val, 'Var Ref op expected Str', blame_tok)
860
861 def _ApplyUnarySuffixOp(self, val, op):
862 # type: (value_t, suffix_op.Unary) -> value_t
863 assert val.tag() != value_e.Undef
864
865 op_kind = consts.GetKind(op.op.id)
866
867 if op_kind == Kind.VOp1:
868 # NOTE: glob syntax is supported in ^ ^^ , ,, ! As well as % %% # ##.
869 # Detect has_extglob so that DoUnarySuffixOp doesn't use the fast
870 # shortcut for constant strings.
871 arg_val, has_extglob = self.EvalWordToPattern(op.arg_word)
872 assert arg_val.tag() == value_e.Str
873
874 UP_val = val
875 with tagswitch(val) as case:
876 if case(value_e.Str):
877 val = cast(value.Str, UP_val)
878 s = string_ops.DoUnarySuffixOp(val.s, op.op, arg_val.s,
879 has_extglob)
880 #log('%r %r -> %r', val.s, arg_val.s, s)
881 new_val = value.Str(s) # type: value_t
882
883 elif case(value_e.BashArray):
884 val = cast(value.BashArray, UP_val)
885 # ${a[@]#prefix} is VECTORIZED on arrays. YSH should have this too.
886 strs = [] # type: List[str]
887 for s in val.strs:
888 if s is not None:
889 strs.append(
890 string_ops.DoUnarySuffixOp(
891 s, op.op, arg_val.s, has_extglob))
892 new_val = value.BashArray(strs)
893
894 elif case(value_e.BashAssoc):
895 val = cast(value.BashAssoc, UP_val)
896 strs = []
897 for s in val.d.values():
898 strs.append(
899 string_ops.DoUnarySuffixOp(s, op.op, arg_val.s,
900 has_extglob))
901 new_val = value.BashArray(strs)
902
903 else:
904 raise error.TypeErr(
905 val, 'Unary op expected Str, BashArray, BashAssoc',
906 op.op)
907
908 else:
909 raise AssertionError(Kind_str(op_kind))
910
911 return new_val
912
913 def _PatSub(self, val, op):
914 # type: (value_t, suffix_op.PatSub) -> value_t
915
916 pat_val, has_extglob = self.EvalWordToPattern(op.pat)
917 # Extended globs aren't supported because we only translate * ? etc. to
918 # ERE. I don't think there's a straightforward translation from !(*.py) to
919 # ERE! You would need an engine that supports negation? (Derivatives?)
920 if has_extglob:
921 e_die('extended globs not supported in ${x//GLOB/}', op.pat)
922
923 if op.replace:
924 replace_val = self.EvalRhsWord(op.replace)
925 # Can't have an array, so must be a string
926 assert replace_val.tag() == value_e.Str, replace_val
927 replace_str = cast(value.Str, replace_val).s
928 else:
929 replace_str = ''
930
931 # note: doesn't support self.exec_opts.extglob()!
932 regex, warnings = glob_.GlobToERE(pat_val.s)
933 if len(warnings):
934 # TODO:
935 # - Add 'shopt -s strict_glob' mode and expose warnings.
936 # "Glob is not in CANONICAL FORM".
937 # - Propagate location info back to the 'op.pat' word.
938 pass
939 #log('regex %r', regex)
940 replacer = string_ops.GlobReplacer(regex, replace_str, op.slash_tok)
941
942 with tagswitch(val) as case2:
943 if case2(value_e.Str):
944 str_val = cast(value.Str, val)
945 s = replacer.Replace(str_val.s, op)
946 val = value.Str(s)
947
948 elif case2(value_e.BashArray):
949 array_val = cast(value.BashArray, val)
950 strs = [] # type: List[str]
951 for s in array_val.strs:
952 if s is not None:
953 strs.append(replacer.Replace(s, op))
954 val = value.BashArray(strs)
955
956 elif case2(value_e.BashAssoc):
957 assoc_val = cast(value.BashAssoc, val)
958 strs = []
959 for s in assoc_val.d.values():
960 strs.append(replacer.Replace(s, op))
961 val = value.BashArray(strs)
962
963 else:
964 raise error.TypeErr(
965 val, 'Pat Sub op expected Str, BashArray, BashAssoc',
966 op.slash_tok)
967
968 return val
969
970 def _Slice(self, val, op, var_name, part):
971 # type: (value_t, suffix_op.Slice, Optional[str], BracedVarSub) -> value_t
972
973 begin = self.arith_ev.EvalToInt(op.begin)
974
975 # Note: bash allows lengths to be negative (with odd semantics), but
976 # we don't allow that right now.
977 has_length = False
978 length = -1
979 if op.length:
980 has_length = True
981 length = self.arith_ev.EvalToInt(op.length)
982
983 try:
984 arg0_val = None # type: value.Str
985 if var_name is None: # $* or $@
986 arg0_val = self.mem.GetArg0()
987 val = _PerformSlice(val, begin, length, has_length, part, arg0_val)
988 except error.Strict as e:
989 if self.exec_opts.strict_word_eval():
990 raise
991 else:
992 self.errfmt.PrettyPrintError(e, prefix='warning: ')
993 with tagswitch(val) as case2:
994 if case2(value_e.Str):
995 val = value.Str('')
996 elif case2(value_e.BashArray):
997 val = value.BashArray([])
998 else:
999 raise NotImplementedError()
1000 return val
1001
1002 def _Nullary(self, val, op, var_name):
1003 # type: (value_t, Token, Optional[str]) -> Tuple[value.Str, bool]
1004
1005 UP_val = val
1006 quoted2 = False
1007 op_id = op.id
1008 if op_id == Id.VOp0_P:
1009 with tagswitch(val) as case:
1010 if case(value_e.Str):
1011 str_val = cast(value.Str, UP_val)
1012 prompt = self.prompt_ev.EvalPrompt(str_val)
1013 # readline gets rid of these, so we should too.
1014 p = prompt.replace('\x01', '').replace('\x02', '')
1015 result = value.Str(p)
1016 else:
1017 e_die("Can't use @P on %s" % ui.ValType(val), op)
1018
1019 elif op_id == Id.VOp0_Q:
1020 with tagswitch(val) as case:
1021 if case(value_e.Str):
1022 str_val = cast(value.Str, UP_val)
1023 result = value.Str(j8_lite.MaybeShellEncode(str_val.s))
1024 # oddly, 'echo ${x@Q}' is equivalent to 'echo "${x@Q}"' in
1025 # bash
1026 quoted2 = True
1027 elif case(value_e.BashArray):
1028 array_val = cast(value.BashArray, UP_val)
1029
1030 # TODO: should use fastfunc.ShellEncode
1031 tmp = [j8_lite.MaybeShellEncode(s) for s in array_val.strs]
1032 result = value.Str(' '.join(tmp))
1033 else:
1034 e_die("Can't use @Q on %s" % ui.ValType(val), op)
1035
1036 elif op_id == Id.VOp0_a:
1037 # We're ONLY simluating -a and -A, not -r -x -n for now. See
1038 # spec/ble-idioms.test.sh.
1039 chars = [] # type: List[str]
1040 with tagswitch(val) as case:
1041 if case(value_e.BashArray):
1042 chars.append('a')
1043 elif case(value_e.BashAssoc):
1044 chars.append('A')
1045
1046 if var_name is not None: # e.g. ${?@a} is allowed
1047 cell = self.mem.GetCell(var_name)
1048 if cell:
1049 if cell.readonly:
1050 chars.append('r')
1051 if cell.exported:
1052 chars.append('x')
1053 if cell.nameref:
1054 chars.append('n')
1055
1056 result = value.Str(''.join(chars))
1057
1058 else:
1059 e_die('Var op %r not implemented' % lexer.TokenVal(op), op)
1060
1061 return result, quoted2
1062
1063 def _WholeArray(self, val, part, quoted, vsub_state):
1064 # type: (value_t, BracedVarSub, bool, VarSubState) -> value_t
1065 op_id = cast(bracket_op.WholeArray, part.bracket_op).op_id
1066
1067 if op_id == Id.Lit_At:
1068 vsub_state.join_array = not quoted # ${a[@]} decays but "${a[@]}" doesn't
1069 UP_val = val
1070 with tagswitch(val) as case2:
1071 if case2(value_e.Undef):
1072 if not vsub_state.has_test_op:
1073 val = self._EmptyBashArrayOrError(part.token)
1074 elif case2(value_e.Str):
1075 if self.exec_opts.strict_array():
1076 e_die("Can't index string with @", loc.WordPart(part))
1077 elif case2(value_e.BashArray):
1078 pass # no-op
1079
1080 elif op_id == Id.Arith_Star:
1081 vsub_state.join_array = True # both ${a[*]} and "${a[*]}" decay
1082 UP_val = val
1083 with tagswitch(val) as case2:
1084 if case2(value_e.Undef):
1085 if not vsub_state.has_test_op:
1086 val = self._EmptyBashArrayOrError(part.token)
1087 elif case2(value_e.Str):
1088 if self.exec_opts.strict_array():
1089 e_die("Can't index string with *", loc.WordPart(part))
1090 elif case2(value_e.BashArray):
1091 pass # no-op
1092
1093 else:
1094 raise AssertionError(op_id) # unknown
1095
1096 return val
1097
1098 def _ArrayIndex(self, val, part, vtest_place):
1099 # type: (value_t, BracedVarSub, VTestPlace) -> value_t
1100 """Process a numeric array index like ${a[i+1]}"""
1101 anode = cast(bracket_op.ArrayIndex, part.bracket_op).expr
1102
1103 UP_val = val
1104 with tagswitch(val) as case2:
1105 if case2(value_e.Undef):
1106 pass # it will be checked later
1107
1108 elif case2(value_e.Str):
1109 # Bash treats any string as an array, so we can't add our own
1110 # behavior here without making valid OSH invalid bash.
1111 e_die("Can't index string %r with integer" % part.var_name,
1112 part.token)
1113
1114 elif case2(value_e.BashArray):
1115 array_val = cast(value.BashArray, UP_val)
1116 index = self.arith_ev.EvalToInt(anode)
1117 vtest_place.index = a_index.Int(index)
1118
1119 s = GetArrayItem(array_val.strs, index)
1120
1121 if s is None:
1122 val = value.Undef
1123 else:
1124 val = value.Str(s)
1125
1126 elif case2(value_e.BashAssoc):
1127 assoc_val = cast(value.BashAssoc, UP_val)
1128 # Location could also be attached to bracket_op? But
1129 # arith_expr.VarSub works OK too
1130 key = self.arith_ev.EvalWordToString(
1131 anode, blame_loc=location.TokenForArith(anode))
1132
1133 vtest_place.index = a_index.Str(key) # out param
1134 s = assoc_val.d.get(key)
1135
1136 if s is None:
1137 val = value.Undef
1138 else:
1139 val = value.Str(s)
1140
1141 else:
1142 raise error.TypeErr(val,
1143 'Index op expected BashArray, BashAssoc',
1144 loc.WordPart(part))
1145
1146 return val
1147
1148 def _EvalDoubleQuoted(self, parts, part_vals):
1149 # type: (List[word_part_t], List[part_value_t]) -> None
1150 """Evaluate parts of a DoubleQuoted part.
1151
1152 Args:
1153 part_vals: output param to append to.
1154 """
1155 # Example of returning array:
1156 # $ a=(1 2); b=(3); $ c=(4 5)
1157 # $ argv "${a[@]}${b[@]}${c[@]}"
1158 # ['1', '234', '5']
1159 #
1160 # Example of multiple parts
1161 # $ argv "${a[@]}${undef[@]:-${c[@]}}"
1162 # ['1', '24', '5']
1163
1164 # Special case for "". The parser outputs (DoubleQuoted []), instead
1165 # of (DoubleQuoted [Literal '']). This is better but it means we
1166 # have to check for it.
1167 if len(parts) == 0:
1168 v = Piece('', True, False)
1169 part_vals.append(v)
1170 return
1171
1172 for p in parts:
1173 self._EvalWordPart(p, part_vals, QUOTED)
1174
1175 def EvalDoubleQuotedToString(self, dq_part):
1176 # type: (DoubleQuoted) -> str
1177 """For double quoted strings in YSH expressions.
1178
1179 Example: var x = "$foo-${foo}"
1180 """
1181 part_vals = [] # type: List[part_value_t]
1182 self._EvalDoubleQuoted(dq_part.parts, part_vals)
1183 return self._ConcatPartVals(part_vals, dq_part.left)
1184
1185 def _DecayArray(self, val):
1186 # type: (value.BashArray) -> value.Str
1187 """Decay $* to a string."""
1188 assert val.tag() == value_e.BashArray, val
1189 sep = self.splitter.GetJoinChar()
1190 tmp = [s for s in val.strs if s is not None]
1191 return value.Str(sep.join(tmp))
1192
1193 def _EmptyStrOrError(self, val, token):
1194 # type: (value_t, Token) -> value_t
1195 if val.tag() != value_e.Undef:
1196 return val
1197
1198 if not self.exec_opts.nounset():
1199 return value.Str('')
1200
1201 tok_str = lexer.TokenVal(token)
1202 name = tok_str[1:] if tok_str.startswith('$') else tok_str
1203 e_die('Undefined variable %r' % name, token)
1204
1205 def _EmptyBashArrayOrError(self, token):
1206 # type: (Token) -> value_t
1207 assert token is not None
1208 if self.exec_opts.nounset():
1209 e_die('Undefined array %r' % lexer.TokenVal(token), token)
1210 else:
1211 return value.BashArray([])
1212
1213 def _EvalBracketOp(self, val, part, quoted, vsub_state, vtest_place):
1214 # type: (value_t, BracedVarSub, bool, VarSubState, VTestPlace) -> value_t
1215
1216 if part.bracket_op:
1217 with tagswitch(part.bracket_op) as case:
1218 if case(bracket_op_e.WholeArray):
1219 val = self._WholeArray(val, part, quoted, vsub_state)
1220
1221 elif case(bracket_op_e.ArrayIndex):
1222 val = self._ArrayIndex(val, part, vtest_place)
1223
1224 else:
1225 raise AssertionError(part.bracket_op.tag())
1226
1227 else: # no bracket op
1228 var_name = vtest_place.name
1229 if (var_name is not None and
1230 val.tag() in (value_e.BashArray, value_e.BashAssoc) and
1231 not vsub_state.is_type_query):
1232 if ShouldArrayDecay(var_name, self.exec_opts,
1233 not (part.prefix_op or part.suffix_op)):
1234 # for ${BASH_SOURCE}, etc.
1235 val = DecayArray(val)
1236 else:
1237 e_die(
1238 "Array %r can't be referred to as a scalar (without @ or *)"
1239 % var_name, loc.WordPart(part))
1240
1241 return val
1242
1243 def _VarRefValue(self, part, quoted, vsub_state, vtest_place):
1244 # type: (BracedVarSub, bool, VarSubState, VTestPlace) -> value_t
1245 """Duplicates some logic from _EvalBracedVarSub, but returns a
1246 value_t."""
1247
1248 # 1. Evaluate from (var_name, var_num, token Id) -> value
1249 if part.token.id == Id.VSub_Name:
1250 vtest_place.name = part.var_name
1251 val = self.mem.GetValue(part.var_name)
1252
1253 elif part.token.id == Id.VSub_Number:
1254 var_num = int(part.var_name)
1255 val = self._EvalVarNum(var_num)
1256
1257 else:
1258 # $* decays
1259 val = self._EvalSpecialVar(part.token.id, quoted, vsub_state)
1260
1261 # We don't need var_index because it's only for L-Values of test ops?
1262 if self.exec_opts.eval_unsafe_arith():
1263 val = self._EvalBracketOp(val, part, quoted, vsub_state,
1264 vtest_place)
1265 else:
1266 with state.ctx_Option(self.mutable_opts,
1267 [option_i._allow_command_sub], False):
1268 val = self._EvalBracketOp(val, part, quoted, vsub_state,
1269 vtest_place)
1270
1271 return val
1272
1273 def _EvalBracedVarSub(self, part, part_vals, quoted):
1274 # type: (BracedVarSub, List[part_value_t], bool) -> None
1275 """
1276 Args:
1277 part_vals: output param to append to.
1278 """
1279 # We have different operators that interact in a non-obvious order.
1280 #
1281 # 1. bracket_op: value -> value, with side effect on vsub_state
1282 #
1283 # 2. prefix_op
1284 # a. length ${#x}: value -> value
1285 # b. var ref ${!ref}: can expand to an array
1286 #
1287 # 3. suffix_op:
1288 # a. no operator: you have a value
1289 # b. Test: value -> part_value[]
1290 # c. Other Suffix: value -> value
1291 #
1292 # 4. Process vsub_state.join_array here before returning.
1293 #
1294 # These cases are hard to distinguish:
1295 # - ${!prefix@} prefix query
1296 # - ${!array[@]} keys
1297 # - ${!ref} named reference
1298 # - ${!ref[0]} named reference
1299 #
1300 # I think we need several stages:
1301 #
1302 # 1. value: name, number, special, prefix query
1303 # 2. bracket_op
1304 # 3. prefix length -- this is TERMINAL
1305 # 4. indirection? Only for some of the ! cases
1306 # 5. string transformation suffix ops like ##
1307 # 6. test op
1308 # 7. vsub_state.join_array
1309
1310 # vsub_state.join_array is for joining "${a[*]}" and unquoted ${a[@]} AFTER
1311 # suffix ops are applied. If we take the length with a prefix op, the
1312 # distinction is ignored.
1313
1314 var_name = None # type: Optional[str] # used throughout the function
1315 vtest_place = VTestPlace(var_name, None) # For ${foo=default}
1316 vsub_state = VarSubState.CreateNull() # for $*, ${a[*]}, etc.
1317
1318 # 1. Evaluate from (var_name, var_num, token Id) -> value
1319 if part.token.id == Id.VSub_Name:
1320 # Handle ${!prefix@} first, since that looks at names and not values
1321 # Do NOT handle ${!A[@]@a} here!
1322 if (part.prefix_op is not None and part.bracket_op is None and
1323 part.suffix_op is not None and
1324 part.suffix_op.tag() == suffix_op_e.Nullary):
1325 nullary_op = cast(Token, part.suffix_op)
1326 # ${!x@} but not ${!x@P}
1327 if consts.GetKind(nullary_op.id) == Kind.VOp3:
1328 names = self.mem.VarNamesStartingWith(part.var_name)
1329 names.sort()
1330
1331 if quoted and nullary_op.id == Id.VOp3_At:
1332 part_vals.append(part_value.Array(names))
1333 else:
1334 sep = self.splitter.GetJoinChar()
1335 part_vals.append(Piece(sep.join(names), quoted, True))
1336 return # EARLY RETURN
1337
1338 var_name = part.var_name
1339 vtest_place.name = var_name # for _ApplyTestOp
1340
1341 val = self.mem.GetValue(var_name)
1342
1343 elif part.token.id == Id.VSub_Number:
1344 var_num = int(part.var_name)
1345 val = self._EvalVarNum(var_num)
1346 else:
1347 # $* decays
1348 val = self._EvalSpecialVar(part.token.id, quoted, vsub_state)
1349
1350 suffix_op_ = part.suffix_op
1351 if suffix_op_:
1352 UP_op = suffix_op_
1353 with tagswitch(suffix_op_) as case:
1354 if case(suffix_op_e.Nullary):
1355 suffix_op_ = cast(Token, UP_op)
1356
1357 # Type query ${array@a} is a STRING, not an array
1358 # NOTE: ${array@Q} is ${array[0]@Q} in bash, which is different than
1359 # ${array[@]@Q}
1360 if suffix_op_.id == Id.VOp0_a:
1361 vsub_state.is_type_query = True
1362
1363 elif case(suffix_op_e.Unary):
1364 suffix_op_ = cast(suffix_op.Unary, UP_op)
1365
1366 # Do the _EmptyStrOrError/_EmptyBashArrayOrError up front, EXCEPT in
1367 # the case of Kind.VTest
1368 if consts.GetKind(suffix_op_.op.id) == Kind.VTest:
1369 vsub_state.has_test_op = True
1370
1371 # 2. Bracket Op
1372 val = self._EvalBracketOp(val, part, quoted, vsub_state, vtest_place)
1373
1374 if part.prefix_op:
1375 if part.prefix_op.id == Id.VSub_Pound: # ${#var} for length
1376 if not vsub_state.has_test_op: # undef -> '' BEFORE length
1377 val = self._EmptyStrOrError(val, part.token)
1378
1379 n = self._Length(val, part.token)
1380 part_vals.append(Piece(str(n), quoted, False))
1381 return # EARLY EXIT: nothing else can come after length
1382
1383 elif part.prefix_op.id == Id.VSub_Bang:
1384 if (part.bracket_op and
1385 part.bracket_op.tag() == bracket_op_e.WholeArray):
1386 if vsub_state.has_test_op:
1387 # ${!a[@]-'default'} is a non-fatal runtime error in bash. Here
1388 # it's fatal.
1389 op_tok = cast(suffix_op.Unary, UP_op).op
1390 e_die('Test operation not allowed with ${!array[@]}',
1391 op_tok)
1392
1393 # ${!array[@]} to get indices/keys
1394 val = self._Keys(val, part.token)
1395 # already set vsub_State.join_array ABOVE
1396 else:
1397 # Process ${!ref}. SURPRISE: ${!a[0]} is an indirect expansion unlike
1398 # ${!a[@]} !
1399 # ${!ref} can expand into an array if ref='array[@]'
1400
1401 # Clear it now that we have a var ref
1402 vtest_place.name = None
1403 vtest_place.index = None
1404
1405 val = self._EvalVarRef(val, part.token, quoted, vsub_state,
1406 vtest_place)
1407
1408 if not vsub_state.has_test_op: # undef -> '' AFTER indirection
1409 val = self._EmptyStrOrError(val, part.token)
1410
1411 else:
1412 raise AssertionError(part.prefix_op)
1413
1414 else:
1415 if not vsub_state.has_test_op: # undef -> '' if no prefix op
1416 val = self._EmptyStrOrError(val, part.token)
1417
1418 quoted2 = False # another bit for @Q
1419 if suffix_op_:
1420 op = suffix_op_ # could get rid of this alias
1421
1422 with tagswitch(suffix_op_) as case:
1423 if case(suffix_op_e.Nullary):
1424 op = cast(Token, UP_op)
1425 val, quoted2 = self._Nullary(val, op, var_name)
1426
1427 elif case(suffix_op_e.Unary):
1428 op = cast(suffix_op.Unary, UP_op)
1429 if consts.GetKind(op.op.id) == Kind.VTest:
1430 if self._ApplyTestOp(val, op, quoted, part_vals,
1431 vtest_place, part.token):
1432 # e.g. to evaluate ${undef:-'default'}, we already appended
1433 # what we need
1434 return
1435
1436 else:
1437 # Other suffix: value -> value
1438 val = self._ApplyUnarySuffixOp(val, op)
1439
1440 elif case(suffix_op_e.PatSub): # PatSub, vectorized
1441 op = cast(suffix_op.PatSub, UP_op)
1442 val = self._PatSub(val, op)
1443
1444 elif case(suffix_op_e.Slice):
1445 op = cast(suffix_op.Slice, UP_op)
1446 val = self._Slice(val, op, var_name, part)
1447
1448 elif case(suffix_op_e.Static):
1449 op = cast(suffix_op.Static, UP_op)
1450 e_die('Not implemented', op.tok)
1451
1452 else:
1453 raise AssertionError()
1454
1455 # After applying suffixes, process join_array here.
1456 UP_val = val
1457 if val.tag() == value_e.BashArray:
1458 array_val = cast(value.BashArray, UP_val)
1459 if vsub_state.join_array:
1460 val = self._DecayArray(array_val)
1461 else:
1462 val = array_val
1463
1464 # For example, ${a} evaluates to value.Str(), but we want a
1465 # Piece().
1466 part_val = _ValueToPartValue(val, quoted or quoted2, part)
1467 part_vals.append(part_val)
1468
1469 def _ConcatPartVals(self, part_vals, location):
1470 # type: (List[part_value_t], loc_t) -> str
1471
1472 strs = [] # type: List[str]
1473 for part_val in part_vals:
1474 UP_part_val = part_val
1475 with tagswitch(part_val) as case:
1476 if case(part_value_e.String):
1477 part_val = cast(Piece, UP_part_val)
1478 s = part_val.s
1479
1480 elif case(part_value_e.Array):
1481 part_val = cast(part_value.Array, UP_part_val)
1482 if self.exec_opts.strict_array():
1483 # Examples: echo f > "$@"; local foo="$@"
1484 e_die("Illegal array word part (strict_array)",
1485 location)
1486 else:
1487 # It appears to not respect IFS
1488 # TODO: eliminate double join()?
1489 tmp = [s for s in part_val.strs if s is not None]
1490 s = ' '.join(tmp)
1491
1492 else:
1493 raise AssertionError()
1494
1495 strs.append(s)
1496
1497 return ''.join(strs)
1498
1499 def EvalBracedVarSubToString(self, part):
1500 # type: (BracedVarSub) -> str
1501 """For double quoted strings in YSH expressions.
1502
1503 Example: var x = "$foo-${foo}"
1504 """
1505 part_vals = [] # type: List[part_value_t]
1506 self._EvalBracedVarSub(part, part_vals, False)
1507 # blame ${ location
1508 return self._ConcatPartVals(part_vals, part.left)
1509
1510 def _EvalSimpleVarSub(self, part, part_vals, quoted):
1511 # type: (SimpleVarSub, List[part_value_t], bool) -> None
1512
1513 token = part.tok
1514
1515 vsub_state = VarSubState.CreateNull()
1516
1517 # 1. Evaluate from (var_name, var_num, Token) -> defined, value
1518 if token.id == Id.VSub_DollarName:
1519 var_name = lexer.LazyStr(token)
1520 # TODO: Special case for LINENO
1521 val = self.mem.GetValue(var_name)
1522 if val.tag() in (value_e.BashArray, value_e.BashAssoc):
1523 if ShouldArrayDecay(var_name, self.exec_opts):
1524 # for $BASH_SOURCE, etc.
1525 val = DecayArray(val)
1526 else:
1527 e_die(
1528 "Array %r can't be referred to as a scalar (without @ or *)"
1529 % var_name, token)
1530
1531 elif token.id == Id.VSub_Number:
1532 var_num = int(lexer.LazyStr(token))
1533 val = self._EvalVarNum(var_num)
1534
1535 else:
1536 val = self._EvalSpecialVar(token.id, quoted, vsub_state)
1537
1538 #log('SIMPLE %s', part)
1539 val = self._EmptyStrOrError(val, token)
1540 UP_val = val
1541 if val.tag() == value_e.BashArray:
1542 array_val = cast(value.BashArray, UP_val)
1543 if vsub_state.join_array:
1544 val = self._DecayArray(array_val)
1545 else:
1546 val = array_val
1547
1548 v = _ValueToPartValue(val, quoted, part)
1549 part_vals.append(v)
1550
1551 def EvalSimpleVarSubToString(self, node):
1552 # type: (SimpleVarSub) -> str
1553 """For double quoted strings in YSH expressions.
1554
1555 Example: var x = "$foo-${foo}"
1556 """
1557 part_vals = [] # type: List[part_value_t]
1558 self._EvalSimpleVarSub(node, part_vals, False)
1559 return self._ConcatPartVals(part_vals, node.tok)
1560
1561 def _EvalExtGlob(self, part, part_vals):
1562 # type: (word_part.ExtGlob, List[part_value_t]) -> None
1563 """Evaluate @($x|'foo'|$(hostname)) and flatten it."""
1564 op = part.op
1565 if op.id == Id.ExtGlob_Comma:
1566 op_str = '@('
1567 else:
1568 op_str = lexer.LazyStr(op)
1569 # Do NOT split these.
1570 part_vals.append(Piece(op_str, False, False))
1571
1572 for i, w in enumerate(part.arms):
1573 if i != 0:
1574 part_vals.append(Piece('|', False, False)) # separator
1575 # FLATTEN the tree of extglob "arms".
1576 self._EvalWordToParts(w, part_vals, EXTGLOB_NESTED)
1577 part_vals.append(Piece(')', False, False)) # closing )
1578
1579 def _TranslateExtGlob(self, part_vals, w, glob_parts, fnmatch_parts):
1580 # type: (List[part_value_t], CompoundWord, List[str], List[str]) -> None
1581 """Translate a flattened WORD with an ExtGlob part to string patterns.
1582
1583 We need both glob and fnmatch patterns. _EvalExtGlob does the
1584 flattening.
1585 """
1586 for i, part_val in enumerate(part_vals):
1587 UP_part_val = part_val
1588 with tagswitch(part_val) as case:
1589 if case(part_value_e.String):
1590 part_val = cast(Piece, UP_part_val)
1591 if part_val.quoted and not self.exec_opts.noglob():
1592 s = glob_.GlobEscape(part_val.s)
1593 else:
1594 # e.g. the @( and | in @(foo|bar) aren't quoted
1595 s = part_val.s
1596 glob_parts.append(s)
1597 fnmatch_parts.append(s) # from _EvalExtGlob()
1598
1599 elif case(part_value_e.Array):
1600 # Disallow array
1601 e_die(
1602 "Extended globs and arrays can't appear in the same word",
1603 w)
1604
1605 elif case(part_value_e.ExtGlob):
1606 part_val = cast(part_value.ExtGlob, UP_part_val)
1607 # keep appending fnmatch_parts, but repplace glob_parts with '*'
1608 self._TranslateExtGlob(part_val.part_vals, w, [],
1609 fnmatch_parts)
1610 glob_parts.append('*')
1611
1612 else:
1613 raise AssertionError()
1614
1615 def _EvalWordPart(self, part, part_vals, flags):
1616 # type: (word_part_t, List[part_value_t], int) -> None
1617 """Evaluate a word part, appending to part_vals
1618
1619 Called by _EvalWordToParts, EvalWordToString, and _EvalDoubleQuoted.
1620 """
1621 quoted = bool(flags & QUOTED)
1622 is_subst = bool(flags & IS_SUBST)
1623
1624 UP_part = part
1625 with tagswitch(part) as case:
1626 if case(word_part_e.ShArrayLiteral):
1627 part = cast(ShArrayLiteral, UP_part)
1628 e_die("Unexpected array literal", loc.WordPart(part))
1629 elif case(word_part_e.BashAssocLiteral):
1630 part = cast(word_part.BashAssocLiteral, UP_part)
1631 e_die("Unexpected associative array literal",
1632 loc.WordPart(part))
1633
1634 elif case(word_part_e.Literal):
1635 part = cast(Token, UP_part)
1636 # Split if it's in a substitution.
1637 # That is: echo is not split, but ${foo:-echo} is split
1638 v = Piece(lexer.LazyStr(part), quoted, is_subst)
1639 part_vals.append(v)
1640
1641 elif case(word_part_e.EscapedLiteral):
1642 part = cast(word_part.EscapedLiteral, UP_part)
1643 v = Piece(part.ch, True, False)
1644 part_vals.append(v)
1645
1646 elif case(word_part_e.SingleQuoted):
1647 part = cast(SingleQuoted, UP_part)
1648 v = Piece(part.sval, True, False)
1649 part_vals.append(v)
1650
1651 elif case(word_part_e.DoubleQuoted):
1652 part = cast(DoubleQuoted, UP_part)
1653 self._EvalDoubleQuoted(part.parts, part_vals)
1654
1655 elif case(word_part_e.CommandSub):
1656 part = cast(CommandSub, UP_part)
1657 id_ = part.left_token.id
1658 if id_ in (Id.Left_DollarParen, Id.Left_AtParen,
1659 Id.Left_Backtick):
1660 sv = self._EvalCommandSub(part,
1661 quoted) # type: part_value_t
1662
1663 elif id_ in (Id.Left_ProcSubIn, Id.Left_ProcSubOut):
1664 sv = self._EvalProcessSub(part)
1665
1666 else:
1667 raise AssertionError(id_)
1668
1669 part_vals.append(sv)
1670
1671 elif case(word_part_e.SimpleVarSub):
1672 part = cast(SimpleVarSub, UP_part)
1673 self._EvalSimpleVarSub(part, part_vals, quoted)
1674
1675 elif case(word_part_e.BracedVarSub):
1676 part = cast(BracedVarSub, UP_part)
1677 self._EvalBracedVarSub(part, part_vals, quoted)
1678
1679 elif case(word_part_e.TildeSub):
1680 part = cast(word_part.TildeSub, UP_part)
1681 # We never parse a quoted string into a TildeSub.
1682 assert not quoted
1683 s = self.tilde_ev.Eval(part)
1684 v = Piece(s, True, False) # NOT split even when unquoted!
1685 part_vals.append(v)
1686
1687 elif case(word_part_e.ArithSub):
1688 part = cast(word_part.ArithSub, UP_part)
1689 num = self.arith_ev.EvalToBigInt(part.anode)
1690 v = Piece(mops.ToStr(num), quoted, not quoted)
1691 part_vals.append(v)
1692
1693 elif case(word_part_e.ExtGlob):
1694 part = cast(word_part.ExtGlob, UP_part)
1695 #if not self.exec_opts.extglob():
1696 # die() # disallow at runtime? Don't just decay
1697
1698 # Create a node to hold the flattened tree. The caller decides whether
1699 # to pass it to fnmatch() or replace it with '*' and pass it to glob().
1700 part_vals2 = [] # type: List[part_value_t]
1701 self._EvalExtGlob(part, part_vals2) # flattens tree
1702 part_vals.append(part_value.ExtGlob(part_vals2))
1703
1704 elif case(word_part_e.BashRegexGroup):
1705 part = cast(word_part.BashRegexGroup, UP_part)
1706
1707 part_vals.append(Piece('(', False, False)) # not quoted
1708 if part.child:
1709 self._EvalWordToParts(part.child, part_vals, 0)
1710 part_vals.append(Piece(')', False, False))
1711
1712 elif case(word_part_e.Splice):
1713 part = cast(word_part.Splice, UP_part)
1714 val = self.mem.GetValue(part.var_name)
1715
1716 strs = self.expr_ev.SpliceValue(val, part)
1717 part_vals.append(part_value.Array(strs))
1718
1719 elif case(word_part_e.ExprSub):
1720 part = cast(word_part.ExprSub, UP_part)
1721 part_val = self.expr_ev.EvalExprSub(part)
1722 part_vals.append(part_val)
1723
1724 elif case(word_part_e.ZshVarSub):
1725 part = cast(word_part.ZshVarSub, UP_part)
1726 e_die("ZSH var subs are parsed, but can't be evaluated",
1727 part.left)
1728
1729 else:
1730 raise AssertionError(part.tag())
1731
1732 def _EvalRhsWordToParts(self, w, part_vals, eval_flags=0):
1733 # type: (rhs_word_t, List[part_value_t], int) -> None
1734 quoted = bool(eval_flags & QUOTED)
1735
1736 UP_w = w
1737 with tagswitch(w) as case:
1738 if case(rhs_word_e.Empty):
1739 part_vals.append(Piece('', quoted, not quoted))
1740
1741 elif case(rhs_word_e.Compound):
1742 w = cast(CompoundWord, UP_w)
1743 self._EvalWordToParts(w, part_vals, eval_flags=eval_flags)
1744
1745 else:
1746 raise AssertionError()
1747
1748 def _EvalWordToParts(self, w, part_vals, eval_flags=0):
1749 # type: (CompoundWord, List[part_value_t], int) -> None
1750 """Helper for EvalRhsWord, EvalWordSequence, etc.
1751
1752 Returns:
1753 Appends to part_vals. Note that this is a TREE.
1754 """
1755 # Does the word have an extended glob? This is a special case because
1756 # of the way we use glob() and then fnmatch(..., FNM_EXTMATCH) to
1757 # implement extended globs. It's hard to carry that extra information
1758 # all the way past the word splitting stage.
1759
1760 # OSH semantic limitations: If a word has an extended glob part, then
1761 # 1. It can't have an array
1762 # 2. Word splitting of unquoted words isn't respected
1763
1764 word_part_vals = [] # type: List[part_value_t]
1765 has_extglob = False
1766 for p in w.parts:
1767 if p.tag() == word_part_e.ExtGlob:
1768 has_extglob = True
1769 self._EvalWordPart(p, word_part_vals, eval_flags)
1770
1771 # Caller REQUESTED extglob evaluation, AND we parsed word_part.ExtGlob()
1772 if has_extglob:
1773 if bool(eval_flags & EXTGLOB_FILES):
1774 # Treat the WHOLE word as a pattern. We need to TWO VARIANTS of the
1775 # word because of the way we use libc:
1776 # 1. With '*' for extglob parts
1777 # 2. With _EvalExtGlob() for extglob parts
1778
1779 glob_parts = [] # type: List[str]
1780 fnmatch_parts = [] # type: List[str]
1781 self._TranslateExtGlob(word_part_vals, w, glob_parts,
1782 fnmatch_parts)
1783
1784 #log('word_part_vals %s', word_part_vals)
1785 glob_pat = ''.join(glob_parts)
1786 fnmatch_pat = ''.join(fnmatch_parts)
1787 #log("glob %s fnmatch %s", glob_pat, fnmatch_pat)
1788
1789 results = [] # type: List[str]
1790 n = self.globber.ExpandExtended(glob_pat, fnmatch_pat, results)
1791 if n < 0:
1792 raise error.FailGlob(
1793 'Extended glob %r matched no files' % fnmatch_pat, w)
1794
1795 part_vals.append(part_value.Array(results))
1796 elif bool(eval_flags & EXTGLOB_NESTED):
1797 # We only glob at the TOP level of @(nested|@(pattern))
1798 part_vals.extend(word_part_vals)
1799 else:
1800 # e.g. simple_word_eval, assignment builtin
1801 e_die('Extended glob not allowed in this word', w)
1802 else:
1803 part_vals.extend(word_part_vals)
1804
1805 def _PartValsToString(self, part_vals, w, eval_flags, strs):
1806 # type: (List[part_value_t], CompoundWord, int, List[str]) -> None
1807 """Helper for EvalWordToString, similar to _ConcatPartVals() above.
1808
1809 Note: arg 'w' could just be a span ID
1810 """
1811 for part_val in part_vals:
1812 UP_part_val = part_val
1813 with tagswitch(part_val) as case:
1814 if case(part_value_e.String):
1815 part_val = cast(Piece, UP_part_val)
1816 s = part_val.s
1817 if part_val.quoted:
1818 if eval_flags & QUOTE_FNMATCH:
1819 # [[ foo == */"*".py ]] or case (*.py) or ${x%*.py} or ${x//*.py/}
1820 s = glob_.GlobEscape(s)
1821 elif eval_flags & QUOTE_ERE:
1822 s = glob_.ExtendedRegexEscape(s)
1823 strs.append(s)
1824
1825 elif case(part_value_e.Array):
1826 part_val = cast(part_value.Array, UP_part_val)
1827 if self.exec_opts.strict_array():
1828 # Examples: echo f > "$@"; local foo="$@"
1829
1830 # TODO: This attributes too coarsely, to the word rather than the
1831 # parts. Problem: the word is a TREE of parts, but we only have a
1832 # flat list of part_vals. The only case where we really get arrays
1833 # is "$@", "${a[@]}", "${a[@]//pat/replace}", etc.
1834 e_die(
1835 "This word should yield a string, but it contains an array",
1836 w)
1837
1838 # TODO: Maybe add detail like this.
1839 #e_die('RHS of assignment should only have strings. '
1840 # 'To assign arrays, use b=( "${a[@]}" )')
1841 else:
1842 # It appears to not respect IFS
1843 tmp = [s for s in part_val.strs if s is not None]
1844 s = ' '.join(tmp) # TODO: eliminate double join()?
1845 strs.append(s)
1846
1847 elif case(part_value_e.ExtGlob):
1848 part_val = cast(part_value.ExtGlob, UP_part_val)
1849
1850 # Extended globs are only allowed where we expect them!
1851 if not bool(eval_flags & QUOTE_FNMATCH):
1852 e_die('extended glob not allowed in this word', w)
1853
1854 # recursive call
1855 self._PartValsToString(part_val.part_vals, w, eval_flags,
1856 strs)
1857
1858 else:
1859 raise AssertionError()
1860
1861 def EvalWordToString(self, UP_w, eval_flags=0):
1862 # type: (word_t, int) -> value.Str
1863 """Given a word, return a string.
1864
1865 Flags can contain a quoting algorithm.
1866 """
1867 assert UP_w.tag() == word_e.Compound, UP_w
1868 w = cast(CompoundWord, UP_w)
1869
1870 if eval_flags == 0: # QUOTE_FNMATCH etc. breaks optimization
1871 fast_str = word_.FastStrEval(w)
1872 if fast_str is not None:
1873 return value.Str(fast_str)
1874
1875 # Could we additionally optimize a=$b, if we know $b isn't an array
1876 # etc.?
1877
1878 # Note: these empty lists are hot in fib benchmark
1879
1880 part_vals = [] # type: List[part_value_t]
1881 for p in w.parts:
1882 # this doesn't use eval_flags, which is slightly confusing
1883 self._EvalWordPart(p, part_vals, 0)
1884
1885 strs = [] # type: List[str]
1886 self._PartValsToString(part_vals, w, eval_flags, strs)
1887 return value.Str(''.join(strs))
1888
1889 def EvalWordToPattern(self, UP_w):
1890 # type: (rhs_word_t) -> Tuple[value.Str, bool]
1891 """Like EvalWordToString, but returns whether we got ExtGlob."""
1892 if UP_w.tag() == rhs_word_e.Empty:
1893 return value.Str(''), False
1894
1895 assert UP_w.tag() == rhs_word_e.Compound, UP_w
1896 w = cast(CompoundWord, UP_w)
1897
1898 has_extglob = False
1899 part_vals = [] # type: List[part_value_t]
1900 for p in w.parts:
1901 # this doesn't use eval_flags, which is slightly confusing
1902 self._EvalWordPart(p, part_vals, 0)
1903 if p.tag() == word_part_e.ExtGlob:
1904 has_extglob = True
1905
1906 strs = [] # type: List[str]
1907 self._PartValsToString(part_vals, w, QUOTE_FNMATCH, strs)
1908 return value.Str(''.join(strs)), has_extglob
1909
1910 def EvalForPlugin(self, w):
1911 # type: (CompoundWord) -> value.Str
1912 """Wrapper around EvalWordToString that prevents errors.
1913
1914 Runtime errors like $(( 1 / 0 )) and mutating $? like $(exit 42)
1915 are handled here.
1916
1917 Similar to ExprEvaluator.PluginCall().
1918 """
1919 with state.ctx_Registers(self.mem): # to "sandbox" $? and $PIPESTATUS
1920 try:
1921 val = self.EvalWordToString(w)
1922 except error.FatalRuntime as e:
1923 val = value.Str('<Runtime error: %s>' % e.UserErrorString())
1924
1925 except (IOError, OSError) as e:
1926 val = value.Str('<I/O error: %s>' % pyutil.strerror(e))
1927
1928 except KeyboardInterrupt:
1929 val = value.Str('<Ctrl-C>')
1930
1931 return val
1932
1933 def EvalRhsWord(self, UP_w):
1934 # type: (rhs_word_t) -> value_t
1935 """Used for RHS of assignment.
1936
1937 There is no splitting.
1938 """
1939 if UP_w.tag() == rhs_word_e.Empty:
1940 return value.Str('')
1941
1942 assert UP_w.tag() == word_e.Compound, UP_w
1943 w = cast(CompoundWord, UP_w)
1944
1945 if len(w.parts) == 1:
1946 part0 = w.parts[0]
1947 UP_part0 = part0
1948 tag = part0.tag()
1949 # Special case for a=(1 2). ShArrayLiteral won't appear in words that
1950 # don't look like assignments.
1951 if tag == word_part_e.ShArrayLiteral:
1952 part0 = cast(ShArrayLiteral, UP_part0)
1953 array_words = part0.words
1954 words = braces.BraceExpandWords(array_words)
1955 strs = self.EvalWordSequence(words)
1956 return value.BashArray(strs)
1957
1958 if tag == word_part_e.BashAssocLiteral:
1959 part0 = cast(word_part.BashAssocLiteral, UP_part0)
1960 d = NewDict() # type: Dict[str, str]
1961 for pair in part0.pairs:
1962 k = self.EvalWordToString(pair.key)
1963 v = self.EvalWordToString(pair.value)
1964 d[k.s] = v.s
1965 return value.BashAssoc(d)
1966
1967 # If RHS doesn't look like a=( ... ), then it must be a string.
1968 return self.EvalWordToString(w)
1969
1970 def _EvalWordFrame(self, frame, argv):
1971 # type: (List[Piece], List[str]) -> None
1972 all_empty = True
1973 all_quoted = True
1974 any_quoted = False
1975
1976 #log('--- frame %s', frame)
1977
1978 for piece in frame:
1979 if len(piece.s):
1980 all_empty = False
1981
1982 if piece.quoted:
1983 any_quoted = True
1984 else:
1985 all_quoted = False
1986
1987 # Elision of ${empty}${empty} but not $empty"$empty" or $empty""
1988 if all_empty and not any_quoted:
1989 return
1990
1991 # If every frag is quoted, e.g. "$a$b" or any part in "${a[@]}"x, then
1992 # don't do word splitting or globbing.
1993 if all_quoted:
1994 tmp = [piece.s for piece in frame]
1995 a = ''.join(tmp)
1996 argv.append(a)
1997 return
1998
1999 will_glob = not self.exec_opts.noglob()
2000
2001 # Array of strings, some of which are BOTH IFS-escaped and GLOB escaped!
2002 frags = [] # type: List[str]
2003 for piece in frame:
2004 if will_glob and piece.quoted:
2005 frag = glob_.GlobEscape(piece.s)
2006 else:
2007 # If we have a literal \, then we turn it into \\\\.
2008 # Splitting takes \\\\ -> \\
2009 # Globbing takes \\ to \ if it doesn't match
2010 frag = _BackslashEscape(piece.s)
2011
2012 if piece.do_split:
2013 frag = _BackslashEscape(frag)
2014 else:
2015 frag = self.splitter.Escape(frag)
2016
2017 frags.append(frag)
2018
2019 flat = ''.join(frags)
2020 #log('flat: %r', flat)
2021
2022 args = self.splitter.SplitForWordEval(flat)
2023
2024 # space=' '; argv $space"". We have a quoted part, but we CANNOT elide.
2025 # Add it back and don't bother globbing.
2026 if len(args) == 0 and any_quoted:
2027 argv.append('')
2028 return
2029
2030 #log('split args: %r', args)
2031 for a in args:
2032 if glob_.LooksLikeGlob(a):
2033 n = self.globber.Expand(a, argv)
2034 if n < 0:
2035 # TODO: location info, with span IDs carried through the frame
2036 raise error.FailGlob('Pattern %r matched no files' % a,
2037 loc.Missing)
2038 else:
2039 argv.append(glob_.GlobUnescape(a))
2040
2041 def _EvalWordToArgv(self, w):
2042 # type: (CompoundWord) -> List[str]
2043 """Helper for _EvalAssignBuiltin.
2044
2045 Splitting and globbing are disabled for assignment builtins.
2046
2047 Example: declare -"${a[@]}" b=(1 2)
2048 where a is [x b=a d=a]
2049 """
2050 part_vals = [] # type: List[part_value_t]
2051 self._EvalWordToParts(w, part_vals, 0) # not double quoted
2052 frames = _MakeWordFrames(part_vals)
2053 argv = [] # type: List[str]
2054 for frame in frames:
2055 if len(frame): # empty array gives empty frame!
2056 tmp = [piece.s for piece in frame]
2057 argv.append(''.join(tmp)) # no split or glob
2058 #log('argv: %s', argv)
2059 return argv
2060
2061 def _EvalAssignBuiltin(self, builtin_id, arg0, words, meta_offset):
2062 # type: (builtin_t, str, List[CompoundWord], int) -> cmd_value.Assign
2063 """Handles both static and dynamic assignment, e.g.
2064
2065 x='foo=bar'
2066 local a=(1 2) $x
2067
2068 Grammar:
2069
2070 ('builtin' | 'command')* keyword flag* pair*
2071 flag = [-+].*
2072
2073 There is also command -p, but we haven't implemented it. Maybe just
2074 punt on it.
2075 """
2076 eval_to_pairs = True # except for -f and -F
2077 started_pairs = False
2078
2079 flags = [arg0] # initial flags like -p, and -f -F name1 name2
2080 flag_locs = [words[0]]
2081 assign_args = [] # type: List[AssignArg]
2082
2083 n = len(words)
2084 for i in xrange(meta_offset + 1, n): # skip first word
2085 w = words[i]
2086
2087 if word_.IsVarLike(w):
2088 started_pairs = True # Everything from now on is an assign_pair
2089
2090 if started_pairs:
2091 left_token, close_token, part_offset = word_.DetectShAssignment(
2092 w)
2093 if left_token: # Detected statically
2094 if left_token.id != Id.Lit_VarLike:
2095 # (not guaranteed since started_pairs is set twice)
2096 e_die('LHS array not allowed in assignment builtin', w)
2097
2098 if lexer.IsPlusEquals(left_token):
2099 var_name = lexer.TokenSliceRight(left_token, -2)
2100 append = True
2101 else:
2102 var_name = lexer.TokenSliceRight(left_token, -1)
2103 append = False
2104
2105 if part_offset == len(w.parts):
2106 rhs = rhs_word.Empty # type: rhs_word_t
2107 else:
2108 # tmp is for intersection of C++/MyPy type systems
2109 tmp = CompoundWord(w.parts[part_offset:])
2110 word_.TildeDetectAssign(tmp)
2111 rhs = tmp
2112
2113 with state.ctx_AssignBuiltin(self.mutable_opts):
2114 right = self.EvalRhsWord(rhs)
2115
2116 arg2 = AssignArg(var_name, right, append, w)
2117 assign_args.append(arg2)
2118
2119 else: # e.g. export $dynamic
2120 argv = self._EvalWordToArgv(w)
2121 for arg in argv:
2122 arg2 = _SplitAssignArg(arg, w)
2123 assign_args.append(arg2)
2124
2125 else:
2126 argv = self._EvalWordToArgv(w)
2127 for arg in argv:
2128 if arg.startswith('-') or arg.startswith('+'):
2129 # e.g. declare -r +r
2130 flags.append(arg)
2131 flag_locs.append(w)
2132
2133 # Shortcut that relies on -f and -F always meaning "function" for
2134 # all assignment builtins
2135 if 'f' in arg or 'F' in arg:
2136 eval_to_pairs = False
2137
2138 else: # e.g. export $dynamic
2139 if eval_to_pairs:
2140 arg2 = _SplitAssignArg(arg, w)
2141 assign_args.append(arg2)
2142 started_pairs = True
2143 else:
2144 flags.append(arg)
2145
2146 return cmd_value.Assign(builtin_id, flags, flag_locs, assign_args)
2147
2148 def _DetectAssignBuiltinStr(self, arg0, words, meta_offset):
2149 # type: (str, List[CompoundWord], int) -> Optional[cmd_value.Assign]
2150 builtin_id = consts.LookupAssignBuiltin(arg0)
2151 if builtin_id != consts.NO_INDEX:
2152 return self._EvalAssignBuiltin(builtin_id, arg0, words,
2153 meta_offset)
2154 return None
2155
2156 def _DetectAssignBuiltin(self, val0, words, meta_offset):
2157 # type: (part_value_t, List[CompoundWord], int) -> Optional[cmd_value.Assign]
2158 UP_val0 = val0
2159 if val0.tag() == part_value_e.String:
2160 val0 = cast(Piece, UP_val0)
2161 if not val0.quoted:
2162 return self._DetectAssignBuiltinStr(val0.s, words, meta_offset)
2163 return None
2164
2165 def SimpleEvalWordSequence2(self, words, is_last_cmd, allow_assign):
2166 # type: (List[CompoundWord], bool, bool) -> cmd_value_t
2167 """Simple word evaluation for YSH."""
2168 strs = [] # type: List[str]
2169 locs = [] # type: List[CompoundWord]
2170
2171 meta_offset = 0
2172 for i, w in enumerate(words):
2173 # No globbing in the first arg for command.Simple.
2174 if i == meta_offset and allow_assign:
2175 strs0 = self._EvalWordToArgv(w)
2176 # TODO: Remove this because YSH will disallow assignment
2177 # builtins? (including export?)
2178 if len(strs0) == 1:
2179 cmd_val = self._DetectAssignBuiltinStr(
2180 strs0[0], words, meta_offset)
2181 if cmd_val:
2182 return cmd_val
2183
2184 strs.extend(strs0)
2185 for _ in strs0:
2186 locs.append(w)
2187 continue
2188
2189 if glob_.LooksLikeStaticGlob(w):
2190 val = self.EvalWordToString(w) # respects strict-array
2191 num_appended = self.globber.Expand(val.s, strs)
2192 if num_appended < 0:
2193 raise error.FailGlob('Pattern %r matched no files' % val.s,
2194 w)
2195 for _ in xrange(num_appended):
2196 locs.append(w)
2197 continue
2198
2199 part_vals = [] # type: List[part_value_t]
2200 self._EvalWordToParts(w, part_vals, 0) # not quoted
2201
2202 if 0:
2203 log('')
2204 log('Static: part_vals after _EvalWordToParts:')
2205 for entry in part_vals:
2206 log(' %s', entry)
2207
2208 # Still need to process
2209 frames = _MakeWordFrames(part_vals)
2210
2211 if 0:
2212 log('')
2213 log('Static: frames after _MakeWordFrames:')
2214 for entry in frames:
2215 log(' %s', entry)
2216
2217 # We will still allow x"${a[@]"x, though it's deprecated by @a, which
2218 # disallows such expressions at parse time.
2219 for frame in frames:
2220 if len(frame): # empty array gives empty frame!
2221 tmp = [piece.s for piece in frame]
2222 strs.append(''.join(tmp)) # no split or glob
2223 locs.append(w)
2224
2225 return cmd_value.Argv(strs, locs, is_last_cmd, None, None)
2226
2227 def EvalWordSequence2(self, words, is_last_cmd, allow_assign=False):
2228 # type: (List[CompoundWord], bool, bool) -> cmd_value_t
2229 """Turns a list of Words into a list of strings.
2230
2231 Unlike the EvalWord*() methods, it does globbing.
2232
2233 Args:
2234 allow_assign: True for command.Simple, False for BashArray a=(1 2 3)
2235 """
2236 if self.exec_opts.simple_word_eval():
2237 return self.SimpleEvalWordSequence2(words, is_last_cmd,
2238 allow_assign)
2239
2240 # Parse time:
2241 # 1. brace expansion. TODO: Do at parse time.
2242 # 2. Tilde detection. DONE at parse time. Only if Id.Lit_Tilde is the
2243 # first WordPart.
2244 #
2245 # Run time:
2246 # 3. tilde sub, var sub, command sub, arith sub. These are all
2247 # "concurrent" on WordParts. (optional process sub with <() )
2248 # 4. word splitting. Can turn this off with a shell option? Definitely
2249 # off for oil.
2250 # 5. globbing -- several exec_opts affect this: nullglob, safeglob, etc.
2251
2252 #log('W %s', words)
2253 strs = [] # type: List[str]
2254 locs = [] # type: List[CompoundWord]
2255
2256 # 0 for declare x
2257 # 1 for builtin declare x
2258 # 2 for command builtin declare x
2259 # etc.
2260 meta_offset = 0
2261
2262 n = 0
2263 for i, w in enumerate(words):
2264 fast_str = word_.FastStrEval(w)
2265 if fast_str is not None:
2266 strs.append(fast_str)
2267 locs.append(w)
2268
2269 # e.g. the 'local' in 'local a=b c=d' will be here
2270 if allow_assign and i == meta_offset:
2271 cmd_val = self._DetectAssignBuiltinStr(
2272 fast_str, words, meta_offset)
2273 if cmd_val:
2274 return cmd_val
2275
2276 if i <= meta_offset and _DetectMetaBuiltinStr(fast_str):
2277 meta_offset += 1
2278
2279 continue
2280
2281 part_vals = [] # type: List[part_value_t]
2282 self._EvalWordToParts(w, part_vals, EXTGLOB_FILES)
2283
2284 # DYNAMICALLY detect if we're going to run an assignment builtin, and
2285 # change the rest of the evaluation algorithm if so.
2286 #
2287 # We want to allow:
2288 # e=export
2289 # $e foo=bar
2290 #
2291 # But we don't want to evaluate the first word twice in the case of:
2292 # $(some-command) --flag
2293 if len(part_vals) == 1:
2294 if allow_assign and i == meta_offset:
2295 cmd_val = self._DetectAssignBuiltin(
2296 part_vals[0], words, meta_offset)
2297 if cmd_val:
2298 return cmd_val
2299
2300 if i <= meta_offset and _DetectMetaBuiltin(part_vals[0]):
2301 meta_offset += 1
2302
2303 if 0:
2304 log('')
2305 log('part_vals after _EvalWordToParts:')
2306 for entry in part_vals:
2307 log(' %s', entry)
2308
2309 frames = _MakeWordFrames(part_vals)
2310 if 0:
2311 log('')
2312 log('frames after _MakeWordFrames:')
2313 for entry in frames:
2314 log(' %s', entry)
2315
2316 # Do splitting and globbing. Each frame will append zero or more args.
2317 for frame in frames:
2318 self._EvalWordFrame(frame, strs)
2319
2320 # Fill in locations parallel to strs.
2321 n_next = len(strs)
2322 for _ in xrange(n_next - n):
2323 locs.append(w)
2324 n = n_next
2325
2326 # A non-assignment command.
2327 # NOTE: Can't look up builtins here like we did for assignment, because
2328 # functions can override builtins.
2329 return cmd_value.Argv(strs, locs, is_last_cmd, None, None)
2330
2331 def EvalWordSequence(self, words):
2332 # type: (List[CompoundWord]) -> List[str]
2333 """For arrays and for loops.
2334
2335 They don't allow assignment builtins.
2336 """
2337 # is_last_cmd is irrelevant
2338 cmd_val = self.EvalWordSequence2(words, False)
2339 assert cmd_val.tag() == cmd_value_e.Argv
2340 return cast(cmd_value.Argv, cmd_val).argv
2341
2342
2343class NormalWordEvaluator(AbstractWordEvaluator):
2344
2345 def __init__(
2346 self,
2347 mem, # type: state.Mem
2348 exec_opts, # type: optview.Exec
2349 mutable_opts, # type: state.MutableOpts
2350 tilde_ev, # type: TildeEvaluator
2351 splitter, # type: SplitContext
2352 errfmt, # type: ui.ErrorFormatter
2353 ):
2354 # type: (...) -> None
2355 AbstractWordEvaluator.__init__(self, mem, exec_opts, mutable_opts,
2356 tilde_ev, splitter, errfmt)
2357 self.shell_ex = None # type: _Executor
2358
2359 def CheckCircularDeps(self):
2360 # type: () -> None
2361 assert self.arith_ev is not None
2362 # Disabled for pure OSH
2363 #assert self.expr_ev is not None
2364 assert self.shell_ex is not None
2365 assert self.prompt_ev is not None
2366
2367 def _EvalCommandSub(self, cs_part, quoted):
2368 # type: (CommandSub, bool) -> part_value_t
2369 stdout_str = self.shell_ex.RunCommandSub(cs_part)
2370
2371 if cs_part.left_token.id == Id.Left_AtParen:
2372 # YSH splitting algorithm: does not depend on IFS
2373 try:
2374 strs = j8.SplitJ8Lines(stdout_str)
2375 except error.Decode as e:
2376 # status code 4 is special, for encode/decode errors.
2377 raise error.Structured(4, e.Message(), cs_part.left_token)
2378
2379 #strs = self.splitter.SplitForWordEval(stdout_str)
2380 return part_value.Array(strs)
2381 else:
2382 return Piece(stdout_str, quoted, not quoted)
2383
2384 def _EvalProcessSub(self, cs_part):
2385 # type: (CommandSub) -> Piece
2386 dev_path = self.shell_ex.RunProcessSub(cs_part)
2387 # pretend it's quoted; no split or glob
2388 return Piece(dev_path, True, False)
2389
2390
2391_DUMMY = '__NO_COMMAND_SUB__'
2392
2393
2394class CompletionWordEvaluator(AbstractWordEvaluator):
2395 """An evaluator that has no access to an executor.
2396
2397 NOTE: core/completion.py doesn't actually try to use these strings to
2398 complete. If you have something like 'echo $(echo hi)/f<TAB>', it sees the
2399 inner command as the last one, and knows that it is not at the end of the
2400 line.
2401 """
2402
2403 def __init__(
2404 self,
2405 mem, # type: state.Mem
2406 exec_opts, # type: optview.Exec
2407 mutable_opts, # type: state.MutableOpts
2408 tilde_ev, # type: TildeEvaluator
2409 splitter, # type: SplitContext
2410 errfmt, # type: ui.ErrorFormatter
2411 ):
2412 # type: (...) -> None
2413 AbstractWordEvaluator.__init__(self, mem, exec_opts, mutable_opts,
2414 tilde_ev, splitter, errfmt)
2415
2416 def CheckCircularDeps(self):
2417 # type: () -> None
2418 assert self.prompt_ev is not None
2419 assert self.arith_ev is not None
2420 assert self.expr_ev is not None
2421
2422 def _EvalCommandSub(self, cs_part, quoted):
2423 # type: (CommandSub, bool) -> part_value_t
2424 if cs_part.left_token.id == Id.Left_AtParen:
2425 return part_value.Array([_DUMMY])
2426 else:
2427 return Piece(_DUMMY, quoted, not quoted)
2428
2429 def _EvalProcessSub(self, cs_part):
2430 # type: (CommandSub) -> Piece
2431 # pretend it's quoted; no split or glob
2432 return Piece('__NO_PROCESS_SUB__', True, False)
2433
2434
2435# vim: sw=4