OILS / osh / sh_expr_eval.py View on Github | oils.pub

1299 lines, 839 significant
1#!/usr/bin/env python2
2# Copyright 2016 Andy Chu. All rights reserved.
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8"""
9sh_expr_eval.py -- Shell boolean and arithmetic expressions.
10"""
11from __future__ import print_function
12
13from _devbuild.gen.id_kind_asdl import Id
14from _devbuild.gen.runtime_asdl import error_code_e, scope_t
15from _devbuild.gen.syntax_asdl import (
16 word_t,
17 CompoundWord,
18 Token,
19 loc,
20 loc_t,
21 source,
22 arith_expr,
23 arith_expr_e,
24 arith_expr_t,
25 bool_expr,
26 bool_expr_e,
27 bool_expr_t,
28 sh_lhs,
29 sh_lhs_e,
30 sh_lhs_t,
31 BracedVarSub,
32)
33from _devbuild.gen.option_asdl import option_i
34from _devbuild.gen.types_asdl import bool_arg_type_e
35from _devbuild.gen.value_asdl import (
36 value,
37 value_e,
38 value_t,
39 sh_lvalue,
40 sh_lvalue_e,
41 sh_lvalue_t,
42 LeftName,
43 eggex_ops,
44 regex_match,
45 RegexMatch,
46)
47from core import alloc
48from core import bash_impl
49from core import error
50from core.error import e_die, e_die_status, e_strict, e_usage
51from core import num
52from core import state
53from display import ui
54from core import util
55from frontend import consts
56from frontend import lexer
57from frontend import location
58from frontend import match
59from frontend import reader
60from mycpp import mops
61from mycpp import mylib
62from mycpp.mylib import log, tagswitch, switch, str_cmp
63from osh import bool_stat
64from osh import word_eval
65
66import libc # for fnmatch
67# Import these names directly because the C++ translation uses macros literally.
68from libc import FNM_CASEFOLD, REG_ICASE
69
70from typing import Tuple, Optional, cast, TYPE_CHECKING
71if TYPE_CHECKING:
72 from core import optview
73 from frontend import parse_lib
74
75_ = log
76
77#
78# Arith and Command/Word variants of assignment
79#
80# Calls EvalShellLhs()
81# a[$key]=$val # osh/cmd_eval.py:814 (command_e.ShAssignment)
82# Calls EvalArithLhs()
83# (( a[key] = val )) # osh/sh_expr_eval.py:326 (_EvalLhsArith)
84#
85# Calls OldValue()
86# a[$key]+=$val # osh/cmd_eval.py:795 (assign_op_e.PlusEqual)
87# (( a[key] += val )) # osh/sh_expr_eval.py:308 (_EvalLhsAndLookupArith)
88#
89# RHS Indexing
90# val=${a[$key]} # osh/word_eval.py:639 (bracket_op_e.ArrayIndex)
91# (( val = a[key] )) # osh/sh_expr_eval.py:509 (Id.Arith_LBracket)
92#
93
94
95def OldValue(lval, mem, exec_opts):
96 # type: (sh_lvalue_t, state.Mem, Optional[optview.Exec]) -> value_t
97 """Look up for augmented assignment.
98
99 For s+=val and (( i += 1 ))
100
101 Args:
102 lval: value we need to
103 exec_opts: can be None if we don't want to check set -u!
104 Because s+=val doesn't check it.
105
106 TODO: A stricter and less ambiguous version for YSH.
107 - Problem: why does sh_lvalue have Indexed and Keyed, while sh_lhs only has
108 IndexedName?
109 - should I have location.LName and sh_lvalue.Indexed only?
110 - and Indexed uses the index_t type?
111 - well that might be Str or Int
112 """
113 assert isinstance(lval, sh_lvalue_t), lval
114
115 # TODO: refactor sh_lvalue_t to make this simpler
116 UP_lval = lval
117 with tagswitch(lval) as case:
118 if case(sh_lvalue_e.Var): # (( i++ ))
119 lval = cast(LeftName, UP_lval)
120 var_name = lval.name
121 elif case(sh_lvalue_e.Indexed): # (( a[i]++ ))
122 lval = cast(sh_lvalue.Indexed, UP_lval)
123 var_name = lval.name
124 elif case(sh_lvalue_e.Keyed): # (( A['K']++ )) ? I think this works
125 lval = cast(sh_lvalue.Keyed, UP_lval)
126 var_name = lval.name
127 else:
128 raise AssertionError()
129
130 val = mem.GetValue(var_name)
131 if exec_opts and exec_opts.nounset() and val.tag() == value_e.Undef:
132 e_die('Undefined variable %r' % var_name) # TODO: location info
133
134 UP_val = val
135 with tagswitch(lval) as case:
136 if case(sh_lvalue_e.Var):
137 return val
138
139 elif case(sh_lvalue_e.Indexed):
140 lval = cast(sh_lvalue.Indexed, UP_lval)
141
142 with tagswitch(val) as case2:
143 if case2(value_e.Undef):
144 s = None # type: Optional[str]
145 elif case2(value_e.BashArray):
146 array_val = cast(value.BashArray, UP_val)
147 s, _ = bash_impl.BashArray_GetElement(
148 array_val, lval.index)
149 # Note: We ignore error_code in the return value of
150 # BashArray_GetElement because an invalid index will be
151 # reported on the assignment stage anyway.
152 elif case2(value_e.SparseArray):
153 sparse_val = cast(value.SparseArray, UP_val)
154 s, _ = bash_impl.SparseArray_GetElement(
155 sparse_val, mops.IntWiden(lval.index))
156 else:
157 e_die("Can't use [] on value of type %s" % ui.ValType(val))
158
159 if s is None:
160 val = value.Str('') # NOTE: Other logic is value.Undef? 0?
161 else:
162 assert isinstance(s, str), s
163 val = value.Str(s)
164
165 elif case(sh_lvalue_e.Keyed):
166 lval = cast(sh_lvalue.Keyed, UP_lval)
167
168 assoc_val = None # type: value.BashAssoc
169 with tagswitch(val) as case2:
170 if case2(value_e.Undef):
171 # This never happens, because undef[x]+= is assumed to
172 raise AssertionError()
173 elif case2(value_e.BashAssoc):
174 tmp2 = cast(value.BashAssoc, UP_val)
175 # mycpp rewrite: add tmp. cast() creates a new var in inner scope
176 assoc_val = tmp2
177 s = bash_impl.BashAssoc_GetElement(assoc_val, lval.key)
178 else:
179 e_die("Can't use [] on value of type %s" % ui.ValType(val))
180
181 if s is None:
182 val = value.Str('')
183 else:
184 val = value.Str(s)
185
186 else:
187 raise AssertionError()
188
189 return val
190
191
192# TODO: Should refactor for int/char-based processing
193if mylib.PYTHON:
194
195 def IsLower(ch):
196 # type: (str) -> bool
197 return 'a' <= ch and ch <= 'z'
198
199 def IsUpper(ch):
200 # type: (str) -> bool
201 return 'A' <= ch and ch <= 'Z'
202
203
204class UnsafeArith(object):
205 """For parsing a[i] at RUNTIME."""
206
207 def __init__(
208 self,
209 mem, # type: state.Mem
210 exec_opts, # type: optview.Exec
211 mutable_opts, # type: state.MutableOpts
212 parse_ctx, # type: parse_lib.ParseContext
213 arith_ev, # type: ArithEvaluator
214 errfmt, # type: ui.ErrorFormatter
215 ):
216 # type: (...) -> None
217 self.mem = mem
218 self.exec_opts = exec_opts
219 self.mutable_opts = mutable_opts
220 self.parse_ctx = parse_ctx
221 self.arith_ev = arith_ev
222 self.errfmt = errfmt
223
224 self.arena = self.parse_ctx.arena
225
226 def ParseLValue(self, s, location):
227 # type: (str, loc_t) -> sh_lvalue_t
228 """Parse sh_lvalue for 'unset' and 'printf -v'.
229
230 It uses the arith parser, so it behaves like the LHS of (( a[i] = x ))
231 """
232 if not self.parse_ctx.parse_opts.parse_sh_arith():
233 # Do something simpler for YSH
234 if not match.IsValidVarName(s):
235 e_die('Invalid variable name %r (parse_sh_arith is off)' % s,
236 location)
237 return LeftName(s, location)
238
239 a_parser = self.parse_ctx.MakeArithParser(s)
240
241 with alloc.ctx_SourceCode(self.arena,
242 source.Dynamic('dynamic LHS', location)):
243 try:
244 anode = a_parser.Parse()
245 except error.Parse as e:
246 self.errfmt.PrettyPrintError(e)
247 # Exception for builtins 'unset' and 'printf'
248 e_usage('got invalid LHS expression', location)
249
250 # Note: we parse '1+2', and then it becomes a runtime error because
251 # it's not a valid LHS. Could be a parse error.
252
253 if self.exec_opts.eval_unsafe_arith():
254 lval = self.arith_ev.EvalArithLhs(anode)
255 else:
256 # Prevent attacks like these by default:
257 #
258 # unset -v 'A["$(echo K; rm *)"]'
259 with state.ctx_Option(self.mutable_opts,
260 [option_i._allow_command_sub], False):
261 lval = self.arith_ev.EvalArithLhs(anode)
262
263 return lval
264
265 def ParseVarRef(self, ref_str, blame_tok):
266 # type: (str, Token) -> BracedVarSub
267 """Parse and evaluate value for ${!ref}
268
269 This supports:
270 - 0 to 9 for $0 to $9
271 - @ for "$@" etc.
272
273 See grammar in osh/word_parse.py, which is related to grammar in
274 osh/word_parse.py _ReadBracedVarSub
275
276 Note: declare -n allows 'varname' and 'varname[i]' and 'varname[@]', but it
277 does NOT allow 0 to 9, @, *
278
279 NamerefExpr = NAME Subscript? # this allows @ and * too
280
281 _ResolveNameOrRef currently gives you a 'cell'. So it might not support
282 sh_lvalue.Indexed?
283 """
284 line_reader = reader.StringLineReader(ref_str, self.arena)
285 lexer = self.parse_ctx.MakeLexer(line_reader)
286 w_parser = self.parse_ctx.MakeWordParser(lexer, line_reader)
287
288 src = source.VarRef(blame_tok)
289 with alloc.ctx_SourceCode(self.arena, src):
290 try:
291 bvs_part = w_parser.ParseVarRef()
292 except error.Parse as e:
293 # This prints the inner location
294 self.errfmt.PrettyPrintError(e)
295
296 # this affects builtins 'unset' and 'printf'
297 e_die("Invalid var ref expression", blame_tok)
298
299 return bvs_part
300
301
302def _ParseOshInteger(s, blame_loc):
303 # type: (str, loc_t) -> Tuple[bool, mops.BigInt]
304 """
305 Returns:
306 (True, value) when the string looks like an integer
307 (False, ...) when it doesn't
308
309 Integer formats that are recognized:
310 0xAB hex
311 042 octal
312 42 decimal
313 64#z arbitrary base
314 """
315 id_, pos = match.MatchShNumberToken(s, 0) # use re2c lexer
316 if pos != len(s):
317 # trailing data isn't allowed
318 return (False, mops.BigInt(0))
319
320 # Do conversions
321
322 if id_ == Id.ShNumber_Dec:
323 # Normal base 10 integer.
324 ok, big_int = mops.FromStr2(s)
325 if not ok:
326 e_die('Integer too big: %s' % s, blame_loc)
327 return (True, big_int)
328
329 elif id_ == Id.ShNumber_Oct:
330 # 0123, offset by 1
331 ok, big_int = mops.FromStr2(s[1:], 8)
332 if not ok:
333 e_die('Octal integer too big: %s' % s, blame_loc)
334 return (True, big_int)
335
336 elif id_ == Id.ShNumber_Hex:
337 # 0xff, offset by 2
338 ok, big_int = mops.FromStr2(s[2:], 16)
339 if not ok:
340 e_die('Hex integer too big: %s' % s, blame_loc)
341 return (True, big_int)
342
343 elif id_ == Id.ShNumber_BaseN:
344 b, digits = mylib.split_once(s, '#')
345 assert digits is not None, digits # assured by lexer
346
347 try:
348 base = int(b) # machine integer, not BigInt
349 except ValueError:
350 # Unreachable per the regex validation above
351 raise AssertionError()
352
353 if base > 64:
354 e_strict('Base %d cannot be larger than 64' % base, blame_loc)
355 if base < 2:
356 e_strict('Base %d must be larger than 2' % base, blame_loc)
357
358 integer = mops.ZERO
359 for ch in digits:
360 if IsLower(ch):
361 digit = ord(ch) - ord('a') + 10
362 elif IsUpper(ch):
363 digit = ord(ch) - ord('A') + 36
364 elif ch == '@': # horrible syntax
365 digit = 62
366 elif ch == '_':
367 digit = 63
368 elif ch.isdigit():
369 digit = int(ch)
370 else:
371 # Unreachable per the regex validation above
372 raise AssertionError()
373
374 if digit >= base:
375 e_strict('Digits %r out of range for base %d' % (digits, base),
376 blame_loc)
377
378 # formula is:
379 # integer = integer * base + digit
380 integer = mops.Add(mops.Mul(integer, mops.IntWiden(base)),
381 mops.IntWiden(digit))
382 return (True, integer)
383
384 else:
385 # Id.Unknown_Tok or Id.Eol_Tok
386 return (False, mops.BigInt(0)) # not an integer
387
388
389class ArithEvaluator(object):
390 """Shared between arith and bool evaluators.
391
392 They both:
393
394 1. Convert strings to integers, respecting shopt -s strict_arith.
395 2. Look up variables and evaluate words.
396 """
397
398 def __init__(
399 self,
400 mem, # type: state.Mem
401 exec_opts, # type: optview.Exec
402 mutable_opts, # type: state.MutableOpts
403 parse_ctx, # type: Optional[parse_lib.ParseContext]
404 errfmt, # type: ui.ErrorFormatter
405 ):
406 # type: (...) -> None
407 self.word_ev = None # type: word_eval.StringWordEvaluator
408 self.mem = mem
409 self.exec_opts = exec_opts
410 self.mutable_opts = mutable_opts
411 self.parse_ctx = parse_ctx
412 self.errfmt = errfmt
413
414 def CheckCircularDeps(self):
415 # type: () -> None
416 assert self.word_ev is not None
417
418 def _StringToBigInt(self, s, blame_loc):
419 # type: (str, loc_t) -> mops.BigInt
420 """Use bash-like rules to coerce a string to an integer.
421
422 Runtime parsing enables silly stuff like $(( $(echo 1)$(echo 2) + 1 )) => 13
423
424 bare word: variable
425 quoted word: string (not done?)
426 """
427 s = s.strip()
428
429 ok, i = _ParseOshInteger(s, blame_loc)
430 if ok:
431 return i
432
433 # Doesn't look like an integer
434
435 # note: 'test' and '[' never evaluate recursively
436 if self.parse_ctx is None:
437 if len(s) == 0 or match.IsValidVarName(s):
438 # x42 could evaluate to 0
439 e_strict("Invalid integer constant %r" % s, blame_loc)
440 else:
441 # 42x is always fatal!
442 e_die("Invalid integer constant %r" % s, blame_loc)
443
444 # Special case so we don't get EOF error
445 if len(s) == 0:
446 return mops.ZERO
447
448 # For compatibility: Try to parse it as an expression and evaluate it.
449 a_parser = self.parse_ctx.MakeArithParser(s)
450
451 try:
452 node2 = a_parser.Parse() # may raise error.Parse
453 except error.Parse as e:
454 self.errfmt.PrettyPrintError(e)
455 e_die('Parse error in recursive arithmetic', e.location)
456
457 # Prevent infinite recursion of $(( 1x )) -- it's a word that evaluates
458 # to itself, and you don't want to reparse it as a word.
459 if node2.tag() == arith_expr_e.Word:
460 e_die("Invalid integer constant %r" % s, blame_loc)
461
462 if self.exec_opts.eval_unsafe_arith():
463 integer = self.EvalToBigInt(node2)
464 else:
465 # BoolEvaluator doesn't have parse_ctx or mutable_opts
466 assert self.mutable_opts is not None
467
468 # We don't need to flip _allow_process_sub, because they can't be
469 # parsed. See spec/bugs.test.sh.
470 with state.ctx_Option(self.mutable_opts,
471 [option_i._allow_command_sub], False):
472 integer = self.EvalToBigInt(node2)
473
474 return integer
475
476 def _ValToIntOrError(self, val, blame):
477 # type: (value_t, arith_expr_t) -> mops.BigInt
478 try:
479 UP_val = val
480 with tagswitch(val) as case:
481 if case(value_e.Undef):
482 # 'nounset' already handled before got here
483 # Happens upon a[undefined]=42, which unfortunately turns into a[0]=42.
484 e_strict('Undefined value in arithmetic context',
485 loc.Arith(blame))
486
487 elif case(value_e.Int):
488 val = cast(value.Int, UP_val)
489 return val.i
490
491 elif case(value_e.Str):
492 val = cast(value.Str, UP_val)
493 # calls e_strict
494 return self._StringToBigInt(val.s, loc.Arith(blame))
495
496 except error.Strict as e:
497 if self.exec_opts.strict_arith():
498 raise
499 else:
500 return mops.ZERO
501
502 # Arrays and associative arrays always fail -- not controlled by
503 # strict_arith.
504 # In bash, (( a )) is like (( a[0] )), but I don't want that.
505 # And returning '0' gives different results.
506 e_die(
507 "Expected a value convertible to integer, got %s" %
508 ui.ValType(val), loc.Arith(blame))
509
510 def _EvalLhsAndLookupArith(self, node):
511 # type: (arith_expr_t) -> Tuple[mops.BigInt, sh_lvalue_t]
512 """ For x = y and x += y and ++x """
513
514 lval = self.EvalArithLhs(node)
515 val = OldValue(lval, self.mem, self.exec_opts)
516
517 # BASH_LINENO, arr (array name without strict_array), etc.
518 if (val.tag() in (value_e.BashArray, value_e.BashAssoc,
519 value_e.SparseArray) and
520 lval.tag() == sh_lvalue_e.Var):
521 named_lval = cast(LeftName, lval)
522 if word_eval.ShouldArrayDecay(named_lval.name, self.exec_opts):
523 if val.tag() in (value_e.BashArray, value_e.SparseArray):
524 lval = sh_lvalue.Indexed(named_lval.name, 0, loc.Missing)
525 elif val.tag() == value_e.BashAssoc:
526 lval = sh_lvalue.Keyed(named_lval.name, '0', loc.Missing)
527 val = word_eval.DecayArray(val)
528
529 # This error message could be better, but we already have one
530 #if val.tag() in (value_e.BashArray, value_e.SparseArray):
531 # e_die("Can't use assignment like ++ or += on arrays")
532
533 i = self._ValToIntOrError(val, node)
534 return i, lval
535
536 def _Store(self, lval, new_int):
537 # type: (sh_lvalue_t, mops.BigInt) -> None
538 val = value.Str(mops.ToStr(new_int))
539 state.OshLanguageSetValue(self.mem, lval, val)
540
541 def EvalToBigInt(self, node):
542 # type: (arith_expr_t) -> mops.BigInt
543 """Used externally by ${a[i+1]} and ${a:start:len}.
544
545 Also used internally.
546 """
547 val = self.Eval(node)
548
549 # BASH_LINENO, arr (array name without strict_array), etc.
550 if (val.tag() in (value_e.BashArray, value_e.BashAssoc,
551 value_e.SparseArray) and
552 node.tag() == arith_expr_e.VarSub):
553 vsub = cast(Token, node)
554 if word_eval.ShouldArrayDecay(lexer.LazyStr(vsub), self.exec_opts):
555 val = word_eval.DecayArray(val)
556
557 i = self._ValToIntOrError(val, node)
558 return i
559
560 def EvalToInt(self, node):
561 # type: (arith_expr_t) -> int
562 return mops.BigTruncate(self.EvalToBigInt(node))
563
564 def Eval(self, node):
565 # type: (arith_expr_t) -> value_t
566 """
567 Returns:
568 None for Undef (e.g. empty cell) TODO: Don't return 0!
569 int for Str
570 List[int] for BashArray and SparseArray
571 Dict[str, str] for BashAssoc (TODO: Should we support this?)
572
573 NOTE: (( A['x'] = 'x' )) and (( x = A['x'] )) are syntactically valid in
574 bash, but don't do what you'd think. 'x' sometimes a variable name and
575 sometimes a key.
576 """
577 # OSH semantics: Variable NAMES cannot be formed dynamically; but INTEGERS
578 # can. ${foo:-3}4 is OK. $? will be a compound word too, so we don't have
579 # to handle that as a special case.
580
581 UP_node = node
582 with tagswitch(node) as case:
583 if case(arith_expr_e.EmptyZero): # $(( ))
584 return value.Int(mops.ZERO) # Weird axiom
585
586 elif case(arith_expr_e.EmptyOne): # for (( ; ; ))
587 return value.Int(mops.ONE)
588
589 elif case(arith_expr_e.VarSub): # $(( x )) (can be array)
590 vsub = cast(Token, UP_node)
591 var_name = lexer.LazyStr(vsub)
592 val = self.mem.GetValue(var_name)
593 if val.tag() == value_e.Undef and self.exec_opts.nounset():
594 e_die('Undefined variable %r' % var_name, vsub)
595 return val
596
597 elif case(arith_expr_e.Word): # $(( $x )) $(( ${x}${y} )), etc.
598 w = cast(CompoundWord, UP_node)
599 return self.word_ev.EvalWordToString(w)
600
601 elif case(arith_expr_e.UnaryAssign): # a++
602 node = cast(arith_expr.UnaryAssign, UP_node)
603
604 op_id = node.op_id
605 old_big, lval = self._EvalLhsAndLookupArith(node.child)
606
607 if op_id == Id.Node_PostDPlus: # post-increment
608 new_big = mops.Add(old_big, mops.ONE)
609 result = old_big
610
611 elif op_id == Id.Node_PostDMinus: # post-decrement
612 new_big = mops.Sub(old_big, mops.ONE)
613 result = old_big
614
615 elif op_id == Id.Arith_DPlus: # pre-increment
616 new_big = mops.Add(old_big, mops.ONE)
617 result = new_big
618
619 elif op_id == Id.Arith_DMinus: # pre-decrement
620 new_big = mops.Sub(old_big, mops.ONE)
621 result = new_big
622
623 else:
624 raise AssertionError(op_id)
625
626 self._Store(lval, new_big)
627 return value.Int(result)
628
629 elif case(arith_expr_e.BinaryAssign): # a=1, a+=5, a[1]+=5
630 node = cast(arith_expr.BinaryAssign, UP_node)
631 op_id = node.op_id
632
633 if op_id == Id.Arith_Equal:
634 # Don't really need a span ID here, because tdop.CheckLhsExpr should
635 # have done all the validation.
636 lval = self.EvalArithLhs(node.left)
637 rhs_big = self.EvalToBigInt(node.right)
638
639 self._Store(lval, rhs_big)
640 return value.Int(rhs_big)
641
642 old_big, lval = self._EvalLhsAndLookupArith(node.left)
643 rhs_big = self.EvalToBigInt(node.right)
644
645 if op_id == Id.Arith_PlusEqual:
646 new_big = mops.Add(old_big, rhs_big)
647 elif op_id == Id.Arith_MinusEqual:
648 new_big = mops.Sub(old_big, rhs_big)
649 elif op_id == Id.Arith_StarEqual:
650 new_big = mops.Mul(old_big, rhs_big)
651
652 elif op_id == Id.Arith_SlashEqual:
653 if mops.Equal(rhs_big, mops.ZERO):
654 e_die('Divide by zero') # TODO: location
655 new_big = mops.Div(old_big, rhs_big)
656
657 elif op_id == Id.Arith_PercentEqual:
658 if mops.Equal(rhs_big, mops.ZERO):
659 e_die('Divide by zero') # TODO: location
660 new_big = mops.Rem(old_big, rhs_big)
661
662 elif op_id == Id.Arith_DGreatEqual:
663 new_big = mops.RShift(old_big, rhs_big)
664 elif op_id == Id.Arith_DLessEqual:
665 new_big = mops.LShift(old_big, rhs_big)
666 elif op_id == Id.Arith_AmpEqual:
667 new_big = mops.BitAnd(old_big, rhs_big)
668 elif op_id == Id.Arith_PipeEqual:
669 new_big = mops.BitOr(old_big, rhs_big)
670 elif op_id == Id.Arith_CaretEqual:
671 new_big = mops.BitXor(old_big, rhs_big)
672 else:
673 raise AssertionError(op_id) # shouldn't get here
674
675 self._Store(lval, new_big)
676 return value.Int(new_big)
677
678 elif case(arith_expr_e.Unary):
679 node = cast(arith_expr.Unary, UP_node)
680 op_id = node.op_id
681
682 i = self.EvalToBigInt(node.child)
683
684 if op_id == Id.Node_UnaryPlus: # +i
685 result = i
686 elif op_id == Id.Node_UnaryMinus: # -i
687 result = mops.Sub(mops.ZERO, i)
688
689 elif op_id == Id.Arith_Bang: # logical negation
690 if mops.Equal(i, mops.ZERO):
691 result = mops.ONE
692 else:
693 result = mops.ZERO
694 elif op_id == Id.Arith_Tilde: # bitwise complement
695 result = mops.BitNot(i)
696 else:
697 raise AssertionError(op_id) # shouldn't get here
698
699 return value.Int(result)
700
701 elif case(arith_expr_e.Binary):
702 node = cast(arith_expr.Binary, UP_node)
703 op_id = node.op.id
704
705 # Short-circuit evaluation for || and &&.
706 if op_id == Id.Arith_DPipe:
707 lhs_big = self.EvalToBigInt(node.left)
708 if mops.Equal(lhs_big, mops.ZERO):
709 rhs_big = self.EvalToBigInt(node.right)
710 if mops.Equal(rhs_big, mops.ZERO):
711 result = mops.ZERO # false
712 else:
713 result = mops.ONE # true
714 else:
715 result = mops.ONE # true
716 return value.Int(result)
717
718 if op_id == Id.Arith_DAmp:
719 lhs_big = self.EvalToBigInt(node.left)
720 if mops.Equal(lhs_big, mops.ZERO):
721 result = mops.ZERO # false
722 else:
723 rhs_big = self.EvalToBigInt(node.right)
724 if mops.Equal(rhs_big, mops.ZERO):
725 result = mops.ZERO # false
726 else:
727 result = mops.ONE # true
728 return value.Int(result)
729
730 if op_id == Id.Arith_LBracket:
731 # NOTE: Similar to bracket_op_e.ArrayIndex in osh/word_eval.py
732
733 left = self.Eval(node.left)
734 UP_left = left
735 with tagswitch(left) as case:
736 if case(value_e.BashArray):
737 array_val = cast(value.BashArray, UP_left)
738 small_i = mops.BigTruncate(
739 self.EvalToBigInt(node.right))
740 s, error_code = bash_impl.BashArray_GetElement(
741 array_val, small_i)
742 if error_code == error_code_e.IndexOutOfRange:
743 # Note: Bash outputs warning but does not make
744 # it a real error. We follow the Bash behavior
745 # here.
746 small_length = bash_impl.BashArray_Length(
747 array_val)
748 self.errfmt.Print_(
749 "Index %d out of bounds for array of length %d"
750 % (small_i, small_length),
751 blame_loc=node.op)
752
753 elif case(value_e.SparseArray):
754 sparse_val = cast(value.SparseArray, UP_left)
755 i = self.EvalToBigInt(node.right)
756 s, error_code = bash_impl.SparseArray_GetElement(
757 sparse_val, i)
758 if error_code == error_code_e.IndexOutOfRange:
759 # Note: Bash outputs warning but does not make
760 # it a real error. We follow the Bash behavior
761 # here.
762 length = bash_impl.SparseArray_Length(
763 sparse_val)
764 self.errfmt.Print_(
765 "Index %s out of bounds for array of length %s"
766 % (mops.ToStr(i), mops.ToStr(length)),
767 blame_loc=node.op)
768
769 elif case(value_e.BashAssoc):
770 left = cast(value.BashAssoc, UP_left)
771 key = self.EvalWordToString(node.right)
772 s = bash_impl.BashAssoc_GetElement(left, key)
773
774 elif case(value_e.Str):
775 left = cast(value.Str, UP_left)
776 if self.exec_opts.strict_arith():
777 e_die(
778 "Value of type Str can't be indexed (strict_arith)",
779 node.op)
780 index = self.EvalToBigInt(node.right)
781 # s[0] evaluates to s
782 # s[1] evaluates to Undef
783 s = left.s if mops.Equal(index,
784 mops.ZERO) else None
785
786 elif case(value_e.Undef):
787 if self.exec_opts.strict_arith():
788 e_die(
789 "Value of type Undef can't be indexed (strict_arith)",
790 node.op)
791 s = None # value.Undef
792
793 # There isn't a way to distinguish Undef vs. empty
794 # string, even with set -o nounset?
795 # s = ''
796
797 else:
798 # TODO: Add error context
799 e_die(
800 "Value of type %s can't be indexed" %
801 ui.ValType(left), node.op)
802
803 if s is None:
804 val = value.Undef
805 else:
806 val = value.Str(s)
807
808 return val
809
810 if op_id == Id.Arith_Comma:
811 self.EvalToBigInt(node.left) # throw away result
812 result = self.EvalToBigInt(node.right)
813 return value.Int(result)
814
815 # Rest are integers
816 lhs_big = self.EvalToBigInt(node.left)
817 rhs_big = self.EvalToBigInt(node.right)
818
819 if op_id == Id.Arith_Plus:
820 result = mops.Add(lhs_big, rhs_big)
821 elif op_id == Id.Arith_Minus:
822 result = mops.Sub(lhs_big, rhs_big)
823 elif op_id == Id.Arith_Star:
824 result = mops.Mul(lhs_big, rhs_big)
825 elif op_id == Id.Arith_Slash:
826 if mops.Equal(rhs_big, mops.ZERO):
827 e_die('Divide by zero', node.op)
828 result = mops.Div(lhs_big, rhs_big)
829
830 elif op_id == Id.Arith_Percent:
831 if mops.Equal(rhs_big, mops.ZERO):
832 e_die('Divide by zero', node.op)
833 result = mops.Rem(lhs_big, rhs_big)
834
835 elif op_id == Id.Arith_DStar:
836 if mops.Greater(mops.ZERO, rhs_big):
837 e_die("Exponent can't be a negative number",
838 loc.Arith(node.right))
839 result = num.Exponent(lhs_big, rhs_big)
840
841 elif op_id == Id.Arith_DEqual:
842 result = mops.FromBool(mops.Equal(lhs_big, rhs_big))
843 elif op_id == Id.Arith_NEqual:
844 result = mops.FromBool(not mops.Equal(lhs_big, rhs_big))
845 elif op_id == Id.Arith_Great:
846 result = mops.FromBool(mops.Greater(lhs_big, rhs_big))
847 elif op_id == Id.Arith_GreatEqual:
848 result = mops.FromBool(
849 mops.Greater(lhs_big, rhs_big) or
850 mops.Equal(lhs_big, rhs_big))
851 elif op_id == Id.Arith_Less:
852 result = mops.FromBool(mops.Greater(rhs_big, lhs_big))
853 elif op_id == Id.Arith_LessEqual:
854 result = mops.FromBool(
855 mops.Greater(rhs_big, lhs_big) or
856 mops.Equal(lhs_big, rhs_big))
857
858 elif op_id == Id.Arith_Pipe:
859 result = mops.BitOr(lhs_big, rhs_big)
860 elif op_id == Id.Arith_Amp:
861 result = mops.BitAnd(lhs_big, rhs_big)
862 elif op_id == Id.Arith_Caret:
863 result = mops.BitXor(lhs_big, rhs_big)
864
865 # Note: how to define shift of negative numbers?
866 elif op_id == Id.Arith_DLess:
867 if mops.Greater(mops.ZERO, rhs_big): # rhs_big < 0
868 raise error.Expr("Can't left shift by negative number",
869 node.op)
870 result = mops.LShift(lhs_big, rhs_big)
871 elif op_id == Id.Arith_DGreat:
872 if mops.Greater(mops.ZERO, rhs_big): # rhs_big < 0
873 raise error.Expr(
874 "Can't right shift by negative number", node.op)
875 result = mops.RShift(lhs_big, rhs_big)
876 else:
877 raise AssertionError(op_id)
878
879 return value.Int(result)
880
881 elif case(arith_expr_e.TernaryOp):
882 node = cast(arith_expr.TernaryOp, UP_node)
883
884 cond = self.EvalToBigInt(node.cond)
885 if mops.Equal(cond, mops.ZERO):
886 return self.Eval(node.false_expr)
887 else:
888 return self.Eval(node.true_expr)
889
890 else:
891 raise AssertionError(node.tag())
892
893 raise AssertionError('for -Wreturn-type in C++')
894
895 def EvalWordToString(self, node, blame_loc=loc.Missing):
896 # type: (arith_expr_t, loc_t) -> str
897 """
898 Raises:
899 error.FatalRuntime if the expression isn't a string
900 or if it contains a bare variable like a[x]
901
902 These are allowed because they're unambiguous, unlike a[x]
903
904 a[$x] a["$x"] a["x"] a['x']
905 """
906 UP_node = node
907 if node.tag() == arith_expr_e.Word: # $(( $x )) $(( ${x}${y} )), etc.
908 w = cast(CompoundWord, UP_node)
909 val = self.word_ev.EvalWordToString(w)
910 return val.s
911 else:
912 # A[x] is the "Parsing Bash is Undecidable" problem
913 # It is a string or var name?
914 # (It's parsed as arith_expr.VarSub)
915 e_die(
916 "Assoc array keys must be strings: $x 'x' \"$x\" etc. (OILS-ERR-101)",
917 blame_loc)
918
919 def EvalShellLhs(self, node, which_scopes):
920 # type: (sh_lhs_t, scope_t) -> sh_lvalue_t
921 """Evaluate a shell LHS expression
922
923 For a=b and a[x]=b etc.
924 """
925 assert isinstance(node, sh_lhs_t), node
926
927 UP_node = node
928 lval = None # type: sh_lvalue_t
929 with tagswitch(node) as case:
930 if case(sh_lhs_e.Name): # a=x
931 node = cast(sh_lhs.Name, UP_node)
932 assert node.name is not None
933
934 lval1 = LeftName(node.name, node.left)
935 lval = lval1
936
937 elif case(sh_lhs_e.IndexedName): # a[1+2]=x
938 node = cast(sh_lhs.IndexedName, UP_node)
939 assert node.name is not None
940
941 if self.mem.IsBashAssoc(node.name):
942 key = self.EvalWordToString(node.index,
943 blame_loc=node.left)
944 # node.left points to A[ in A[x]=1
945 lval2 = sh_lvalue.Keyed(node.name, key, node.left)
946 lval = lval2
947 else:
948 index = mops.BigTruncate(self.EvalToBigInt(node.index))
949 lval3 = sh_lvalue.Indexed(node.name, index, node.left)
950 lval = lval3
951
952 else:
953 raise AssertionError(node.tag())
954
955 return lval
956
957 def _VarNameOrWord(self, anode):
958 # type: (arith_expr_t) -> Tuple[Optional[str], loc_t]
959 """
960 Returns a variable name if the arith node can be interpreted that way.
961 """
962 UP_anode = anode
963 with tagswitch(anode) as case:
964 if case(arith_expr_e.VarSub):
965 tok = cast(Token, UP_anode)
966 return (lexer.LazyStr(tok), tok)
967
968 elif case(arith_expr_e.Word):
969 w = cast(CompoundWord, UP_anode)
970 var_name = self.EvalWordToString(w)
971 return (var_name, w)
972
973 no_str = None # type: Optional[str]
974 return (no_str, loc.Missing)
975
976 def EvalArithLhs(self, anode):
977 # type: (arith_expr_t) -> sh_lvalue_t
978 """
979 For (( a[x] = 1 )) etc.
980 """
981 UP_anode = anode
982 if anode.tag() == arith_expr_e.Binary:
983 anode = cast(arith_expr.Binary, UP_anode)
984 if anode.op.id == Id.Arith_LBracket:
985 var_name, blame_loc = self._VarNameOrWord(anode.left)
986
987 # (( 1[2] = 3 )) isn't valid
988 if not match.IsValidVarName(var_name):
989 e_die('Invalid variable name %r' % var_name, blame_loc)
990
991 if var_name is not None:
992 if self.mem.IsBashAssoc(var_name):
993 arith_loc = location.TokenForArith(anode)
994 key = self.EvalWordToString(anode.right,
995 blame_loc=arith_loc)
996 return sh_lvalue.Keyed(var_name, key, blame_loc)
997 else:
998 index = mops.BigTruncate(self.EvalToBigInt(
999 anode.right))
1000 return sh_lvalue.Indexed(var_name, index, blame_loc)
1001
1002 var_name, blame_loc = self._VarNameOrWord(anode)
1003 if var_name is not None:
1004 return LeftName(var_name, blame_loc)
1005
1006 # e.g. unset 'x-y'. status 2 for runtime parse error
1007 e_die_status(2, 'Invalid LHS to modify', blame_loc)
1008
1009
1010class BoolEvaluator(ArithEvaluator):
1011 """This is also an ArithEvaluator because it has to understand.
1012
1013 [[ x -eq 3 ]]
1014
1015 where x='1+2'
1016 """
1017
1018 def __init__(
1019 self,
1020 mem, # type: state.Mem
1021 exec_opts, # type: optview.Exec
1022 mutable_opts, # type: Optional[state.MutableOpts]
1023 parse_ctx, # type: Optional[parse_lib.ParseContext]
1024 errfmt, # type: ui.ErrorFormatter
1025 bracket=False # type: bool
1026 ):
1027 # type: (...) -> None
1028 ArithEvaluator.__init__(self, mem, exec_opts, mutable_opts, parse_ctx,
1029 errfmt)
1030 self.bracket = bracket # [ and [[ are slightly different
1031
1032 def _IsDefined(self, s, blame_loc):
1033 # type: (str, loc_t) -> bool
1034
1035 m = util.RegexSearch(consts.TEST_V_RE, s)
1036 if m is None:
1037 if self.exec_opts.strict_word_eval():
1038 e_die('-v expected name or name[index]', blame_loc)
1039 return False
1040
1041 var_name = m[1]
1042 index_str = m[3]
1043
1044 val = self.mem.GetValue(var_name)
1045 if len(index_str) == 0: # it's just a variable name
1046 return val.tag() != value_e.Undef
1047
1048 UP_val = val
1049 with tagswitch(val) as case:
1050 if case(value_e.BashArray, value_e.SparseArray):
1051 try:
1052 # could use mops.FromStr?
1053 index = int(index_str)
1054 except ValueError as e:
1055 if self.exec_opts.strict_word_eval():
1056 e_die(
1057 '-v got BashArray and invalid index %r' %
1058 index_str, blame_loc)
1059 return False
1060
1061 if val.tag() == value_e.BashArray:
1062 array_val = cast(value.BashArray, UP_val)
1063 result, error_code = bash_impl.BashArray_HasElement(
1064 array_val, index)
1065 if error_code == error_code_e.IndexOutOfRange:
1066 length = bash_impl.BashArray_Length(array_val)
1067 e_die(
1068 '-v got index %s, which is out of bounds for array of length %d'
1069 % (index_str, length), blame_loc)
1070
1071 elif val.tag() == value_e.SparseArray:
1072 sparse_val = cast(value.SparseArray, UP_val)
1073 result, error_code = bash_impl.SparseArray_HasElement(
1074 sparse_val, mops.IntWiden(index))
1075 if error_code == error_code_e.IndexOutOfRange:
1076 big_length = bash_impl.SparseArray_Length(sparse_val)
1077 e_die(
1078 '-v got index %s, which is out of bounds for array of length %s'
1079 % (index_str, mops.ToStr(big_length)), blame_loc)
1080
1081 else:
1082 raise AssertionError()
1083
1084 return result
1085
1086 elif case(value_e.BashAssoc):
1087 val = cast(value.BashAssoc, UP_val)
1088 return bash_impl.BashAssoc_HasElement(val, index_str)
1089
1090 else:
1091 # work around mycpp bug! parses as 'elif'
1092 pass
1093
1094 if self.exec_opts.strict_word_eval():
1095 raise error.TypeErr(
1096 val, 'Expected BashArray, SparseArray, or BashAssoc',
1097 blame_loc)
1098 return False
1099 raise AssertionError()
1100
1101 def _StringToBigIntOrError(self, s, blame_loc):
1102 # type: (str, loc_t) -> mops.BigInt
1103
1104 # Used by [ $x -gt 3 ]
1105 if self.bracket:
1106 if match.LooksLikeInteger(s):
1107 ok, i = mops.FromStr2(s)
1108 else:
1109 ok = False
1110
1111 if not ok:
1112 # builtin_bracket.py catches this and return status 2, so it's
1113 # not fatal
1114 e_die('Invalid integer %r' % s, blame_loc)
1115
1116 return i
1117
1118 # Used by both [[ $x -gt 3 ]] and $(( x ))
1119 else:
1120 try:
1121 i = self._StringToBigInt(s, blame_loc)
1122 except error.Strict as e:
1123 if self.bracket or self.exec_opts.strict_arith():
1124 raise
1125 else:
1126 i = mops.ZERO
1127 return i
1128
1129 def _EvalCompoundWord(self, word, eval_flags=0):
1130 # type: (word_t, int) -> str
1131 val = self.word_ev.EvalWordToString(word, eval_flags)
1132 return val.s
1133
1134 def EvalB(self, node):
1135 # type: (bool_expr_t) -> bool
1136
1137 UP_node = node
1138 with tagswitch(node) as case:
1139 if case(bool_expr_e.WordTest):
1140 node = cast(bool_expr.WordTest, UP_node)
1141 s = self._EvalCompoundWord(node.w)
1142 return bool(s)
1143
1144 elif case(bool_expr_e.LogicalNot):
1145 node = cast(bool_expr.LogicalNot, UP_node)
1146 b = self.EvalB(node.child)
1147 return not b
1148
1149 elif case(bool_expr_e.LogicalAnd):
1150 node = cast(bool_expr.LogicalAnd, UP_node)
1151 # Short-circuit evaluation
1152 if self.EvalB(node.left):
1153 return self.EvalB(node.right)
1154 else:
1155 return False
1156
1157 elif case(bool_expr_e.LogicalOr):
1158 node = cast(bool_expr.LogicalOr, UP_node)
1159 if self.EvalB(node.left):
1160 return True
1161 else:
1162 return self.EvalB(node.right)
1163
1164 elif case(bool_expr_e.Unary):
1165 node = cast(bool_expr.Unary, UP_node)
1166 op_id = node.op_id
1167 s = self._EvalCompoundWord(node.child)
1168
1169 # Now dispatch on arg type. (arg_type could be static in the
1170 # LST?)
1171 arg_type = consts.BoolArgType(op_id)
1172
1173 if arg_type == bool_arg_type_e.Path:
1174 return bool_stat.DoUnaryOp(op_id, s)
1175
1176 if arg_type == bool_arg_type_e.Str:
1177 if op_id == Id.BoolUnary_z:
1178 return not bool(s)
1179 if op_id == Id.BoolUnary_n:
1180 return bool(s)
1181 if op_id == Id.BoolUnary_true:
1182 return s == 'true'
1183 if op_id == Id.BoolUnary_false:
1184 return s == 'false'
1185
1186 raise AssertionError(op_id) # should never happen
1187
1188 if arg_type == bool_arg_type_e.Other:
1189 if op_id == Id.BoolUnary_t:
1190 return bool_stat.isatty(s, node.child)
1191
1192 # See whether 'set -o' options have been set
1193 if op_id == Id.BoolUnary_o:
1194 index = consts.OptionNum(s)
1195 if index == 0:
1196 return False
1197 else:
1198 return self.exec_opts.opt0_array[index]
1199
1200 if op_id == Id.BoolUnary_v:
1201 return self._IsDefined(s, loc.Word(node.child))
1202
1203 e_die("%s isn't implemented" %
1204 ui.PrettyId(op_id)) # implicit location
1205
1206 raise AssertionError(arg_type)
1207
1208 elif case(bool_expr_e.Binary):
1209 node = cast(bool_expr.Binary, UP_node)
1210
1211 op_id = node.op_id
1212 # Whether to glob escape
1213 eval_flags = 0
1214 with switch(op_id) as case2:
1215 if case2(Id.BoolBinary_GlobEqual, Id.BoolBinary_GlobDEqual,
1216 Id.BoolBinary_GlobNEqual):
1217 eval_flags |= word_eval.QUOTE_FNMATCH
1218 elif case2(Id.BoolBinary_EqualTilde):
1219 eval_flags |= word_eval.QUOTE_ERE
1220
1221 s1 = self._EvalCompoundWord(node.left)
1222 s2 = self._EvalCompoundWord(node.right, eval_flags)
1223
1224 # Now dispatch on arg type
1225 arg_type = consts.BoolArgType(op_id)
1226
1227 if arg_type == bool_arg_type_e.Path:
1228 return bool_stat.DoBinaryOp(op_id, s1, s2)
1229
1230 if arg_type == bool_arg_type_e.Int:
1231 # NOTE: We assume they are constants like [[ 3 -eq 3 ]].
1232 # Bash also allows [[ 1+2 -eq 3 ]].
1233 i1 = self._StringToBigIntOrError(s1, loc.Word(node.left))
1234 i2 = self._StringToBigIntOrError(s2, loc.Word(node.right))
1235
1236 if op_id == Id.BoolBinary_eq:
1237 return mops.Equal(i1, i2)
1238 if op_id == Id.BoolBinary_ne:
1239 return not mops.Equal(i1, i2)
1240 if op_id == Id.BoolBinary_gt:
1241 return mops.Greater(i1, i2)
1242 if op_id == Id.BoolBinary_ge:
1243 return mops.Greater(i1, i2) or mops.Equal(i1, i2)
1244 if op_id == Id.BoolBinary_lt:
1245 return mops.Greater(i2, i1)
1246 if op_id == Id.BoolBinary_le:
1247 return mops.Greater(i2, i1) or mops.Equal(i1, i2)
1248
1249 raise AssertionError(op_id) # should never happen
1250
1251 if arg_type == bool_arg_type_e.Str:
1252 fnmatch_flags = (FNM_CASEFOLD
1253 if self.exec_opts.nocasematch() else 0)
1254
1255 if op_id in (Id.BoolBinary_GlobEqual,
1256 Id.BoolBinary_GlobDEqual):
1257 #log('Matching %s against pattern %s', s1, s2)
1258 return libc.fnmatch(s2, s1, fnmatch_flags)
1259
1260 if op_id == Id.BoolBinary_GlobNEqual:
1261 return not libc.fnmatch(s2, s1, fnmatch_flags)
1262
1263 if op_id in (Id.BoolBinary_Equal, Id.BoolBinary_DEqual):
1264 return s1 == s2
1265
1266 if op_id == Id.BoolBinary_NEqual:
1267 return s1 != s2
1268
1269 if op_id == Id.BoolBinary_EqualTilde:
1270 # TODO: This should go to --debug-file
1271 #log('Matching %r against regex %r', s1, s2)
1272 regex_flags = (REG_ICASE
1273 if self.exec_opts.nocasematch() else 0)
1274
1275 try:
1276 indices = libc.regex_search(s2, regex_flags, s1, 0)
1277 except ValueError as e:
1278 # Status 2 indicates a regex parse error. This is
1279 # fatal in OSH but not in bash, which treats [[
1280 # like a command with an exit code.
1281 e_die_status(2, e.message, loc.Word(node.right))
1282
1283 if indices is not None:
1284 self.mem.SetRegexMatch(
1285 RegexMatch(s1, indices, eggex_ops.No))
1286 return True
1287 else:
1288 self.mem.SetRegexMatch(regex_match.No)
1289 return False
1290
1291 if op_id == Id.Op_Less:
1292 return str_cmp(s1, s2) < 0
1293
1294 if op_id == Id.Op_Great:
1295 return str_cmp(s1, s2) > 0
1296
1297 raise AssertionError(op_id) # should never happen
1298
1299 raise AssertionError(node.tag())