osh/sh_expr_eval.py

OILS / osh / sh_expr_eval.py View on Github | oilshell.org

1229 lines, 786 significant

1	#!/usr/bin/env python2
2	# Copyright 2016 Andy Chu. All rights reserved.
3	# Licensed under the Apache License, Version 2.0 (the "License");
4	# you may not use this file except in compliance with the License.
5	# You may obtain a copy of the License at
6	#
7	# http://www.apache.org/licenses/LICENSE-2.0
8	"""
9	sh_expr_eval.py -- Shell boolean and arithmetic expressions.
10	"""
11	from __future__ import print_function
12
13	from _devbuild.gen.id_kind_asdl import Id
14	from _devbuild.gen.runtime_asdl import scope_t
15	from _devbuild.gen.syntax_asdl import (
16	word_t,
17	CompoundWord,
18	Token,
19	loc,
20	loc_t,
21	source,
22	arith_expr,
23	arith_expr_e,
24	arith_expr_t,
25	bool_expr,
26	bool_expr_e,
27	bool_expr_t,
28	sh_lhs,
29	sh_lhs_e,
30	sh_lhs_t,
31	BracedVarSub,
32	)
33	from _devbuild.gen.option_asdl import option_i
34	from _devbuild.gen.types_asdl import bool_arg_type_e
35	from _devbuild.gen.value_asdl import (
36	value,
37	value_e,
38	value_t,
39	sh_lvalue,
40	sh_lvalue_e,
41	sh_lvalue_t,
42	LeftName,
43	eggex_ops,
44	regex_match,
45	RegexMatch,
46	)
47	from core import alloc
48	from core import error
49	from core.error import e_die, e_die_status, e_strict, e_usage
50	from core import num
51	from core import state
52	from display import ui
53	from core import util
54	from frontend import consts
55	from frontend import lexer
56	from frontend import location
57	from frontend import match
58	from frontend import reader
59	from mycpp import mops
60	from mycpp import mylib
61	from mycpp.mylib import log, tagswitch, switch, str_cmp
62	from osh import bool_stat
63	from osh import word_eval
64
65	import libc # for fnmatch
66	# Import these names directly because the C++ translation uses macros literally.
67	from libc import FNM_CASEFOLD, REG_ICASE
68
69	from typing import Tuple, Optional, cast, TYPE_CHECKING
70	if TYPE_CHECKING:
71	from core import optview
72	from frontend import parse_lib
73
74	_ = log
75
76	#
77	# Arith and Command/Word variants of assignment
78	#
79	# Calls EvalShellLhs()
80	# a[$key]=$val # osh/cmd_eval.py:814 (command_e.ShAssignment)
81	# Calls EvalArithLhs()
82	# (( a[key] = val )) # osh/sh_expr_eval.py:326 (_EvalLhsArith)
83	#
84	# Calls OldValue()
85	# a[$key]+=$val # osh/cmd_eval.py:795 (assign_op_e.PlusEqual)
86	# (( a[key] += val )) # osh/sh_expr_eval.py:308 (_EvalLhsAndLookupArith)
87	#
88	# RHS Indexing
89	# val=${a[$key]} # osh/word_eval.py:639 (bracket_op_e.ArrayIndex)
90	# (( val = a[key] )) # osh/sh_expr_eval.py:509 (Id.Arith_LBracket)
91	#
92
93
94	def OldValue(lval, mem, exec_opts):
95	# type: (sh_lvalue_t, state.Mem, Optional[optview.Exec]) -> value_t
96	"""Look up for augmented assignment.
97
98	For s+=val and (( i += 1 ))
99
100	Args:
101	lval: value we need to
102	exec_opts: can be None if we don't want to check set -u!
103	Because s+=val doesn't check it.
104
105	TODO: A stricter and less ambiguous version for YSH.
106	- Problem: why does sh_lvalue have Indexed and Keyed, while sh_lhs only has
107	IndexedName?
108	- should I have location.LName and sh_lvalue.Indexed only?
109	- and Indexed uses the index_t type?
110	- well that might be Str or Int
111	"""
112	assert isinstance(lval, sh_lvalue_t), lval
113
114	# TODO: refactor sh_lvalue_t to make this simpler
115	UP_lval = lval
116	with tagswitch(lval) as case:
117	if case(sh_lvalue_e.Var): # (( i++ ))
118	lval = cast(LeftName, UP_lval)
119	var_name = lval.name
120	elif case(sh_lvalue_e.Indexed): # (( a[i]++ ))
121	lval = cast(sh_lvalue.Indexed, UP_lval)
122	var_name = lval.name
123	elif case(sh_lvalue_e.Keyed): # (( A['K']++ )) ? I think this works
124	lval = cast(sh_lvalue.Keyed, UP_lval)
125	var_name = lval.name
126	else:
127	raise AssertionError()
128
129	val = mem.GetValue(var_name)
130	if exec_opts and exec_opts.nounset() and val.tag() == value_e.Undef:
131	e_die('Undefined variable %r' % var_name) # TODO: location info
132
133	UP_val = val
134	with tagswitch(lval) as case:
135	if case(sh_lvalue_e.Var):
136	return val
137
138	elif case(sh_lvalue_e.Indexed):
139	lval = cast(sh_lvalue.Indexed, UP_lval)
140
141	array_val = None # type: value.BashArray
142	with tagswitch(val) as case2:
143	if case2(value_e.Undef):
144	array_val = value.BashArray([])
145	elif case2(value_e.BashArray):
146	tmp = cast(value.BashArray, UP_val)
147	# mycpp rewrite: add tmp. cast() creates a new var in inner scope
148	array_val = tmp
149	else:
150	e_die("Can't use [] on value of type %s" % ui.ValType(val))
151
152	s = word_eval.GetArrayItem(array_val.strs, lval.index)
153
154	if s is None:
155	val = value.Str('') # NOTE: Other logic is value.Undef? 0?
156	else:
157	assert isinstance(s, str), s
158	val = value.Str(s)
159
160	elif case(sh_lvalue_e.Keyed):
161	lval = cast(sh_lvalue.Keyed, UP_lval)
162
163	assoc_val = None # type: value.BashAssoc
164	with tagswitch(val) as case2:
165	if case2(value_e.Undef):
166	# This never happens, because undef[x]+= is assumed to
167	raise AssertionError()
168	elif case2(value_e.BashAssoc):
169	tmp2 = cast(value.BashAssoc, UP_val)
170	# mycpp rewrite: add tmp. cast() creates a new var in inner scope
171	assoc_val = tmp2
172	else:
173	e_die("Can't use [] on value of type %s" % ui.ValType(val))
174
175	s = assoc_val.d.get(lval.key)
176	if s is None:
177	val = value.Str('')
178	else:
179	val = value.Str(s)
180
181	else:
182	raise AssertionError()
183
184	return val
185
186
187	# TODO: Should refactor for int/char-based processing
188	if mylib.PYTHON:
189
190	def IsLower(ch):
191	# type: (str) -> bool
192	return 'a' <= ch and ch <= 'z'
193
194	def IsUpper(ch):
195	# type: (str) -> bool
196	return 'A' <= ch and ch <= 'Z'
197
198
199	class UnsafeArith(object):
200	"""For parsing a[i] at RUNTIME."""
201
202	def __init__(
203	self,
204	mem, # type: state.Mem
205	exec_opts, # type: optview.Exec
206	mutable_opts, # type: state.MutableOpts
207	parse_ctx, # type: parse_lib.ParseContext
208	arith_ev, # type: ArithEvaluator
209	errfmt, # type: ui.ErrorFormatter
210	):
211	# type: (...) -> None
212	self.mem = mem
213	self.exec_opts = exec_opts
214	self.mutable_opts = mutable_opts
215	self.parse_ctx = parse_ctx
216	self.arith_ev = arith_ev
217	self.errfmt = errfmt
218
219	self.arena = self.parse_ctx.arena
220
221	def ParseLValue(self, s, location):
222	# type: (str, loc_t) -> sh_lvalue_t
223	"""Parse sh_lvalue for 'unset' and 'printf -v'.
224
225	It uses the arith parser, so it behaves like the LHS of (( a[i] = x ))
226	"""
227	if not self.parse_ctx.parse_opts.parse_sh_arith():
228	# Do something simpler for YSH
229	if not match.IsValidVarName(s):
230	e_die('Invalid variable name %r (parse_sh_arith is off)' % s,
231	location)
232	return LeftName(s, location)
233
234	a_parser = self.parse_ctx.MakeArithParser(s)
235
236	with alloc.ctx_SourceCode(self.arena,
237	source.Dynamic('dynamic LHS', location)):
238	try:
239	anode = a_parser.Parse()
240	except error.Parse as e:
241	self.errfmt.PrettyPrintError(e)
242	# Exception for builtins 'unset' and 'printf'
243	e_usage('got invalid LHS expression', location)
244
245	# Note: we parse '1+2', and then it becomes a runtime error because
246	# it's not a valid LHS. Could be a parse error.
247
248	if self.exec_opts.eval_unsafe_arith():
249	lval = self.arith_ev.EvalArithLhs(anode)
250	else:
251	# Prevent attacks like these by default:
252	#
253	# unset -v 'A["$(echo K; rm *)"]'
254	with state.ctx_Option(self.mutable_opts,
255	[option_i._allow_command_sub], False):
256	lval = self.arith_ev.EvalArithLhs(anode)
257
258	return lval
259
260	def ParseVarRef(self, ref_str, blame_tok):
261	# type: (str, Token) -> BracedVarSub
262	"""Parse and evaluate value for ${!ref}
263
264	This supports:
265	- 0 to 9 for $0 to $9
266	- @ for "$@" etc.
267
268	See grammar in osh/word_parse.py, which is related to grammar in
269	osh/word_parse.py _ReadBracedVarSub
270
271	Note: declare -n allows 'varname' and 'varname[i]' and 'varname[@]', but it
272	does NOT allow 0 to 9, @, *
273
274	NamerefExpr = NAME Subscript? # this allows @ and * too
275
276	_ResolveNameOrRef currently gives you a 'cell'. So it might not support
277	sh_lvalue.Indexed?
278	"""
279	line_reader = reader.StringLineReader(ref_str, self.arena)
280	lexer = self.parse_ctx.MakeLexer(line_reader)
281	w_parser = self.parse_ctx.MakeWordParser(lexer, line_reader)
282
283	src = source.VarRef(blame_tok)
284	with alloc.ctx_SourceCode(self.arena, src):
285	try:
286	bvs_part = w_parser.ParseVarRef()
287	except error.Parse as e:
288	# This prints the inner location
289	self.errfmt.PrettyPrintError(e)
290
291	# this affects builtins 'unset' and 'printf'
292	e_die("Invalid var ref expression", blame_tok)
293
294	return bvs_part
295
296
297	def _MaybeParseInt(s, blame_loc):
298	# type: (str, loc_t) -> Tuple[bool, mops.BigInt]
299	"""
300	Returns:
301	(True, value) when the string looks like an integer
302	(False, ...) when it doesn't
303
304	Integer formats that are recognized:
305	0xAB hex
306	042 octal
307	42 decimal
308	64#z arbitrary base
309	"""
310	id_, pos = match.MatchShNumberToken(s, 0) # use re2c lexer
311	if pos != len(s):
312	# trailing data isn't allowed
313	return (False, mops.BigInt(0))
314
315	# Do conversions
316
317	if id_ == Id.ShNumber_Dec:
318	# Normal base 10 integer.
319	return (True, mops.FromStr(s))
320
321	elif id_ == Id.ShNumber_Oct:
322	# 0123, offset by 1
323	return (True, mops.FromStr(s[1:], 8))
324
325	elif id_ == Id.ShNumber_Hex:
326	# 0xff, offset by 2
327	return (True, mops.FromStr(s[2:], 16))
328
329	elif id_ == Id.ShNumber_BaseN:
330	b, digits = mylib.split_once(s, '#')
331	assert digits is not None, digits # assured by lexer
332
333	try:
334	base = int(b) # machine integer, not BigInt
335	except ValueError:
336	# Unreachable per the regex validation above
337	raise AssertionError()
338
339	if base > 64:
340	e_strict('Base %d cannot be larger than 64' % base, blame_loc)
341	if base < 2:
342	e_strict('Base %d must be larger than 2' % base, blame_loc)
343
344	integer = mops.ZERO
345	for ch in digits:
346	if IsLower(ch):
347	digit = ord(ch) - ord('a') + 10
348	elif IsUpper(ch):
349	digit = ord(ch) - ord('A') + 36
350	elif ch == '@': # horrible syntax
351	digit = 62
352	elif ch == '_':
353	digit = 63
354	elif ch.isdigit():
355	digit = int(ch)
356	else:
357	# Unreachable per the regex validation above
358	raise AssertionError()
359
360	if digit >= base:
361	e_strict('Digits %r out of range for base %d' % (digits, base),
362	blame_loc)
363
364	# formula is:
365	# integer = integer * base + digit
366	integer = mops.Add(mops.Mul(integer, mops.BigInt(base)),
367	mops.BigInt(digit))
368	return (True, integer)
369
370	else:
371	# Id.Unknown_Tok or Id.Eol_Tok
372	return (False, mops.BigInt(0)) # not an integer
373
374
375	class ArithEvaluator(object):
376	"""Shared between arith and bool evaluators.
377
378	They both:
379
380	1. Convert strings to integers, respecting shopt -s strict_arith.
381	2. Look up variables and evaluate words.
382	"""
383
384	def __init__(
385	self,
386	mem, # type: state.Mem
387	exec_opts, # type: optview.Exec
388	mutable_opts, # type: state.MutableOpts
389	parse_ctx, # type: Optional[parse_lib.ParseContext]
390	errfmt, # type: ui.ErrorFormatter
391	):
392	# type: (...) -> None
393	self.word_ev = None # type: word_eval.StringWordEvaluator
394	self.mem = mem
395	self.exec_opts = exec_opts
396	self.mutable_opts = mutable_opts
397	self.parse_ctx = parse_ctx
398	self.errfmt = errfmt
399
400	def CheckCircularDeps(self):
401	# type: () -> None
402	assert self.word_ev is not None
403
404	def _StringToBigInt(self, s, blame_loc):
405	# type: (str, loc_t) -> mops.BigInt
406	"""Use bash-like rules to coerce a string to an integer.
407
408	Runtime parsing enables silly stuff like $(( $(echo 1)$(echo 2) + 1 )) => 13
409
410	bare word: variable
411	quoted word: string (not done?)
412	"""
413	s = s.strip()
414
415	ok, i = _MaybeParseInt(s, blame_loc)
416	if ok:
417	return i
418
419	# Doesn't look like an integer
420
421	# note: 'test' and '[' never evaluate recursively
422	if self.parse_ctx is None:
423	if len(s) == 0 or match.IsValidVarName(s):
424	# x42 could evaluate to 0
425	e_strict("Invalid integer constant %r" % s, blame_loc)
426	else:
427	# 42x is always fatal!
428	e_die("Invalid integer constant %r" % s, blame_loc)
429
430	# Special case so we don't get EOF error
431	if len(s) == 0:
432	return mops.ZERO
433
434	# For compatibility: Try to parse it as an expression and evaluate it.
435	a_parser = self.parse_ctx.MakeArithParser(s)
436
437	try:
438	node2 = a_parser.Parse() # may raise error.Parse
439	except error.Parse as e:
440	self.errfmt.PrettyPrintError(e)
441	e_die('Parse error in recursive arithmetic', e.location)
442
443	# Prevent infinite recursion of $(( 1x )) -- it's a word that evaluates
444	# to itself, and you don't want to reparse it as a word.
445	if node2.tag() == arith_expr_e.Word:
446	e_die("Invalid integer constant %r" % s, blame_loc)
447
448	if self.exec_opts.eval_unsafe_arith():
449	integer = self.EvalToBigInt(node2)
450	else:
451	# BoolEvaluator doesn't have parse_ctx or mutable_opts
452	assert self.mutable_opts is not None
453
454	# We don't need to flip _allow_process_sub, because they can't be
455	# parsed. See spec/bugs.test.sh.
456	with state.ctx_Option(self.mutable_opts,
457	[option_i._allow_command_sub], False):
458	integer = self.EvalToBigInt(node2)
459
460	return integer
461
462	def _ValToIntOrError(self, val, blame):
463	# type: (value_t, arith_expr_t) -> mops.BigInt
464	try:
465	UP_val = val
466	with tagswitch(val) as case:
467	if case(value_e.Undef):
468	# 'nounset' already handled before got here
469	# Happens upon a[undefined]=42, which unfortunately turns into a[0]=42.
470	e_strict('Undefined value in arithmetic context',
471	loc.Arith(blame))
472
473	elif case(value_e.Int):
474	val = cast(value.Int, UP_val)
475	return val.i
476
477	elif case(value_e.Str):
478	val = cast(value.Str, UP_val)
479	# calls e_strict
480	return self._StringToBigInt(val.s, loc.Arith(blame))
481
482	except error.Strict as e:
483	if self.exec_opts.strict_arith():
484	raise
485	else:
486	return mops.ZERO
487
488	# Arrays and associative arrays always fail -- not controlled by
489	# strict_arith.
490	# In bash, (( a )) is like (( a[0] )), but I don't want that.
491	# And returning '0' gives different results.
492	e_die(
493	"Expected a value convertible to integer, got %s" %
494	ui.ValType(val), loc.Arith(blame))
495
496	def _EvalLhsAndLookupArith(self, node):
497	# type: (arith_expr_t) -> Tuple[mops.BigInt, sh_lvalue_t]
498	""" For x = y and x += y and ++x """
499
500	lval = self.EvalArithLhs(node)
501	val = OldValue(lval, self.mem, self.exec_opts)
502
503	# BASH_LINENO, arr (array name without strict_array), etc.
504	if (val.tag() in (value_e.BashArray, value_e.BashAssoc) and
505	lval.tag() == sh_lvalue_e.Var):
506	named_lval = cast(LeftName, lval)
507	if word_eval.ShouldArrayDecay(named_lval.name, self.exec_opts):
508	if val.tag() == value_e.BashArray:
509	lval = sh_lvalue.Indexed(named_lval.name, 0, loc.Missing)
510	elif val.tag() == value_e.BashAssoc:
511	lval = sh_lvalue.Keyed(named_lval.name, '0', loc.Missing)
512	val = word_eval.DecayArray(val)
513
514	# This error message could be better, but we already have one
515	#if val.tag() == value_e.BashArray:
516	# e_die("Can't use assignment like ++ or += on arrays")
517
518	i = self._ValToIntOrError(val, node)
519	return i, lval
520
521	def _Store(self, lval, new_int):
522	# type: (sh_lvalue_t, mops.BigInt) -> None
523	val = value.Str(mops.ToStr(new_int))
524	state.OshLanguageSetValue(self.mem, lval, val)
525
526	def EvalToBigInt(self, node):
527	# type: (arith_expr_t) -> mops.BigInt
528	"""Used externally by ${a[i+1]} and ${a:start:len}.
529
530	Also used internally.
531	"""
532	val = self.Eval(node)
533
534	# BASH_LINENO, arr (array name without strict_array), etc.
535	if (val.tag() in (value_e.BashArray, value_e.BashAssoc) and
536	node.tag() == arith_expr_e.VarSub):
537	vsub = cast(Token, node)
538	if word_eval.ShouldArrayDecay(lexer.LazyStr(vsub), self.exec_opts):
539	val = word_eval.DecayArray(val)
540
541	i = self._ValToIntOrError(val, node)
542	return i
543
544	def EvalToInt(self, node):
545	# type: (arith_expr_t) -> int
546	return mops.BigTruncate(self.EvalToBigInt(node))
547
548	def Eval(self, node):
549	# type: (arith_expr_t) -> value_t
550	"""
551	Returns:
552	None for Undef (e.g. empty cell) TODO: Don't return 0!
553	int for Str
554	List[int] for BashArray
555	Dict[str, str] for BashAssoc (TODO: Should we support this?)
556
557	NOTE: (( A['x'] = 'x' )) and (( x = A['x'] )) are syntactically valid in
558	bash, but don't do what you'd think. 'x' sometimes a variable name and
559	sometimes a key.
560	"""
561	# OSH semantics: Variable NAMES cannot be formed dynamically; but INTEGERS
562	# can. ${foo:-3}4 is OK. $? will be a compound word too, so we don't have
563	# to handle that as a special case.
564
565	UP_node = node
566	with tagswitch(node) as case:
567	if case(arith_expr_e.EmptyZero): # $(( ))
568	return value.Int(mops.ZERO) # Weird axiom
569
570	elif case(arith_expr_e.EmptyOne): # for (( ; ; ))
571	return value.Int(mops.ONE)
572
573	elif case(arith_expr_e.VarSub): # $(( x )) (can be array)
574	vsub = cast(Token, UP_node)
575	var_name = lexer.LazyStr(vsub)
576	val = self.mem.GetValue(var_name)
577	if val.tag() == value_e.Undef and self.exec_opts.nounset():
578	e_die('Undefined variable %r' % var_name, vsub)
579	return val
580
581	elif case(arith_expr_e.Word): # $(( $x )) $(( ${x}${y} )), etc.
582	w = cast(CompoundWord, UP_node)
583	return self.word_ev.EvalWordToString(w)
584
585	elif case(arith_expr_e.UnaryAssign): # a++
586	node = cast(arith_expr.UnaryAssign, UP_node)
587
588	op_id = node.op_id
589	old_big, lval = self._EvalLhsAndLookupArith(node.child)
590
591	if op_id == Id.Node_PostDPlus: # post-increment
592	new_big = mops.Add(old_big, mops.ONE)
593	result = old_big
594
595	elif op_id == Id.Node_PostDMinus: # post-decrement
596	new_big = mops.Sub(old_big, mops.ONE)
597	result = old_big
598
599	elif op_id == Id.Arith_DPlus: # pre-increment
600	new_big = mops.Add(old_big, mops.ONE)
601	result = new_big
602
603	elif op_id == Id.Arith_DMinus: # pre-decrement
604	new_big = mops.Sub(old_big, mops.ONE)
605	result = new_big
606
607	else:
608	raise AssertionError(op_id)
609
610	self._Store(lval, new_big)
611	return value.Int(result)
612
613	elif case(arith_expr_e.BinaryAssign): # a=1, a+=5, a[1]+=5
614	node = cast(arith_expr.BinaryAssign, UP_node)
615	op_id = node.op_id
616
617	if op_id == Id.Arith_Equal:
618	# Don't really need a span ID here, because tdop.CheckLhsExpr should
619	# have done all the validation.
620	lval = self.EvalArithLhs(node.left)
621	rhs_big = self.EvalToBigInt(node.right)
622
623	self._Store(lval, rhs_big)
624	return value.Int(rhs_big)
625
626	old_big, lval = self._EvalLhsAndLookupArith(node.left)
627	rhs_big = self.EvalToBigInt(node.right)
628
629	if op_id == Id.Arith_PlusEqual:
630	new_big = mops.Add(old_big, rhs_big)
631	elif op_id == Id.Arith_MinusEqual:
632	new_big = mops.Sub(old_big, rhs_big)
633	elif op_id == Id.Arith_StarEqual:
634	new_big = mops.Mul(old_big, rhs_big)
635
636	elif op_id == Id.Arith_SlashEqual:
637	if mops.Equal(rhs_big, mops.ZERO):
638	e_die('Divide by zero') # TODO: location
639	new_big = mops.Div(old_big, rhs_big)
640
641	elif op_id == Id.Arith_PercentEqual:
642	if mops.Equal(rhs_big, mops.ZERO):
643	e_die('Divide by zero') # TODO: location
644	new_big = mops.Rem(old_big, rhs_big)
645
646	elif op_id == Id.Arith_DGreatEqual:
647	new_big = mops.RShift(old_big, rhs_big)
648	elif op_id == Id.Arith_DLessEqual:
649	new_big = mops.LShift(old_big, rhs_big)
650	elif op_id == Id.Arith_AmpEqual:
651	new_big = mops.BitAnd(old_big, rhs_big)
652	elif op_id == Id.Arith_PipeEqual:
653	new_big = mops.BitOr(old_big, rhs_big)
654	elif op_id == Id.Arith_CaretEqual:
655	new_big = mops.BitXor(old_big, rhs_big)
656	else:
657	raise AssertionError(op_id) # shouldn't get here
658
659	self._Store(lval, new_big)
660	return value.Int(new_big)
661
662	elif case(arith_expr_e.Unary):
663	node = cast(arith_expr.Unary, UP_node)
664	op_id = node.op_id
665
666	i = self.EvalToBigInt(node.child)
667
668	if op_id == Id.Node_UnaryPlus: # +i
669	result = i
670	elif op_id == Id.Node_UnaryMinus: # -i
671	result = mops.Sub(mops.ZERO, i)
672
673	elif op_id == Id.Arith_Bang: # logical negation
674	if mops.Equal(i, mops.ZERO):
675	result = mops.ONE
676	else:
677	result = mops.ZERO
678	elif op_id == Id.Arith_Tilde: # bitwise complement
679	result = mops.BitNot(i)
680	else:
681	raise AssertionError(op_id) # shouldn't get here
682
683	return value.Int(result)
684
685	elif case(arith_expr_e.Binary):
686	node = cast(arith_expr.Binary, UP_node)
687	op_id = node.op.id
688
689	# Short-circuit evaluation for \|\| and &&.
690	if op_id == Id.Arith_DPipe:
691	lhs_big = self.EvalToBigInt(node.left)
692	if mops.Equal(lhs_big, mops.ZERO):
693	rhs_big = self.EvalToBigInt(node.right)
694	if mops.Equal(rhs_big, mops.ZERO):
695	result = mops.ZERO # false
696	else:
697	result = mops.ONE # true
698	else:
699	result = mops.ONE # true
700	return value.Int(result)
701
702	if op_id == Id.Arith_DAmp:
703	lhs_big = self.EvalToBigInt(node.left)
704	if mops.Equal(lhs_big, mops.ZERO):
705	result = mops.ZERO # false
706	else:
707	rhs_big = self.EvalToBigInt(node.right)
708	if mops.Equal(rhs_big, mops.ZERO):
709	result = mops.ZERO # false
710	else:
711	result = mops.ONE # true
712	return value.Int(result)
713
714	if op_id == Id.Arith_LBracket:
715	# NOTE: Similar to bracket_op_e.ArrayIndex in osh/word_eval.py
716
717	left = self.Eval(node.left)
718	UP_left = left
719	with tagswitch(left) as case:
720	if case(value_e.BashArray):
721	array_val = cast(value.BashArray, UP_left)
722	small_i = mops.BigTruncate(
723	self.EvalToBigInt(node.right))
724	s = word_eval.GetArrayItem(array_val.strs, small_i)
725
726	elif case(value_e.BashAssoc):
727	left = cast(value.BashAssoc, UP_left)
728	key = self.EvalWordToString(node.right)
729	s = left.d.get(key)
730
731	elif case(value_e.Str):
732	left = cast(value.Str, UP_left)
733	if self.exec_opts.strict_arith():
734	e_die(
735	"Value of type Str can't be indexed (strict_arith)",
736	node.op)
737	index = self.EvalToBigInt(node.right)
738	# s[0] evaluates to s
739	# s[1] evaluates to Undef
740	s = left.s if mops.Equal(index,
741	mops.ZERO) else None
742
743	elif case(value_e.Undef):
744	if self.exec_opts.strict_arith():
745	e_die(
746	"Value of type Undef can't be indexed (strict_arith)",
747	node.op)
748	s = None # value.Undef
749
750	# There isn't a way to distinguish Undef vs. empty
751	# string, even with set -o nounset?
752	# s = ''
753
754	else:
755	# TODO: Add error context
756	e_die(
757	"Value of type %s can't be indexed" %
758	ui.ValType(left), node.op)
759
760	if s is None:
761	val = value.Undef
762	else:
763	val = value.Str(s)
764
765	return val
766
767	if op_id == Id.Arith_Comma:
768	self.EvalToBigInt(node.left) # throw away result
769	result = self.EvalToBigInt(node.right)
770	return value.Int(result)
771
772	# Rest are integers
773	lhs_big = self.EvalToBigInt(node.left)
774	rhs_big = self.EvalToBigInt(node.right)
775
776	if op_id == Id.Arith_Plus:
777	result = mops.Add(lhs_big, rhs_big)
778	elif op_id == Id.Arith_Minus:
779	result = mops.Sub(lhs_big, rhs_big)
780	elif op_id == Id.Arith_Star:
781	result = mops.Mul(lhs_big, rhs_big)
782	elif op_id == Id.Arith_Slash:
783	if mops.Equal(rhs_big, mops.ZERO):
784	e_die('Divide by zero', node.op)
785	result = mops.Div(lhs_big, rhs_big)
786
787	elif op_id == Id.Arith_Percent:
788	if mops.Equal(rhs_big, mops.ZERO):
789	e_die('Divide by zero', node.op)
790	result = mops.Rem(lhs_big, rhs_big)
791
792	elif op_id == Id.Arith_DStar:
793	if mops.Greater(mops.ZERO, rhs_big):
794	e_die("Exponent can't be a negative number",
795	loc.Arith(node.right))
796	result = num.Exponent(lhs_big, rhs_big)
797
798	elif op_id == Id.Arith_DEqual:
799	result = mops.FromBool(mops.Equal(lhs_big, rhs_big))
800	elif op_id == Id.Arith_NEqual:
801	result = mops.FromBool(not mops.Equal(lhs_big, rhs_big))
802	elif op_id == Id.Arith_Great:
803	result = mops.FromBool(mops.Greater(lhs_big, rhs_big))
804	elif op_id == Id.Arith_GreatEqual:
805	result = mops.FromBool(
806	mops.Greater(lhs_big, rhs_big) or
807	mops.Equal(lhs_big, rhs_big))
808	elif op_id == Id.Arith_Less:
809	result = mops.FromBool(mops.Greater(rhs_big, lhs_big))
810	elif op_id == Id.Arith_LessEqual:
811	result = mops.FromBool(
812	mops.Greater(rhs_big, lhs_big) or
813	mops.Equal(lhs_big, rhs_big))
814
815	elif op_id == Id.Arith_Pipe:
816	result = mops.BitOr(lhs_big, rhs_big)
817	elif op_id == Id.Arith_Amp:
818	result = mops.BitAnd(lhs_big, rhs_big)
819	elif op_id == Id.Arith_Caret:
820	result = mops.BitXor(lhs_big, rhs_big)
821
822	# Note: how to define shift of negative numbers?
823	elif op_id == Id.Arith_DLess:
824	if mops.Greater(mops.ZERO, rhs_big): # rhs_big < 0
825	raise error.Expr("Can't left shift by negative number",
826	node.op)
827	result = mops.LShift(lhs_big, rhs_big)
828	elif op_id == Id.Arith_DGreat:
829	if mops.Greater(mops.ZERO, rhs_big): # rhs_big < 0
830	raise error.Expr(
831	"Can't right shift by negative number", node.op)
832	result = mops.RShift(lhs_big, rhs_big)
833	else:
834	raise AssertionError(op_id)
835
836	return value.Int(result)
837
838	elif case(arith_expr_e.TernaryOp):
839	node = cast(arith_expr.TernaryOp, UP_node)
840
841	cond = self.EvalToBigInt(node.cond)
842	if mops.Equal(cond, mops.ZERO):
843	return self.Eval(node.false_expr)
844	else:
845	return self.Eval(node.true_expr)
846
847	else:
848	raise AssertionError(node.tag())
849
850	raise AssertionError('for -Wreturn-type in C++')
851
852	def EvalWordToString(self, node, blame_loc=loc.Missing):
853	# type: (arith_expr_t, loc_t) -> str
854	"""
855	Raises:
856	error.FatalRuntime if the expression isn't a string
857	or if it contains a bare variable like a[x]
858
859	These are allowed because they're unambiguous, unlike a[x]
860
861	a[$x] a["$x"] a["x"] a['x']
862	"""
863	UP_node = node
864	if node.tag() == arith_expr_e.Word: # $(( $x )) $(( ${x}${y} )), etc.
865	w = cast(CompoundWord, UP_node)
866	val = self.word_ev.EvalWordToString(w)
867	return val.s
868	else:
869	# A[x] is the "Parsing Bash is Undecidable" problem
870	# It is a string or var name?
871	# (It's parsed as arith_expr.VarSub)
872	e_die(
873	"Assoc array keys must be strings: $x 'x' \"$x\" etc. (OILS-ERR-101)",
874	blame_loc)
875
876	def EvalShellLhs(self, node, which_scopes):
877	# type: (sh_lhs_t, scope_t) -> sh_lvalue_t
878	"""Evaluate a shell LHS expression
879
880	For a=b and a[x]=b etc.
881	"""
882	assert isinstance(node, sh_lhs_t), node
883
884	UP_node = node
885	lval = None # type: sh_lvalue_t
886	with tagswitch(node) as case:
887	if case(sh_lhs_e.Name): # a=x
888	node = cast(sh_lhs.Name, UP_node)
889	assert node.name is not None
890
891	lval1 = LeftName(node.name, node.left)
892	lval = lval1
893
894	elif case(sh_lhs_e.IndexedName): # a[1+2]=x
895	node = cast(sh_lhs.IndexedName, UP_node)
896	assert node.name is not None
897
898	if self.mem.IsBashAssoc(node.name):
899	key = self.EvalWordToString(node.index,
900	blame_loc=node.left)
901	# node.left points to A[ in A[x]=1
902	lval2 = sh_lvalue.Keyed(node.name, key, node.left)
903	lval = lval2
904	else:
905	index = mops.BigTruncate(self.EvalToBigInt(node.index))
906	lval3 = sh_lvalue.Indexed(node.name, index, node.left)
907	lval = lval3
908
909	else:
910	raise AssertionError(node.tag())
911
912	return lval
913
914	def _VarNameOrWord(self, anode):
915	# type: (arith_expr_t) -> Tuple[Optional[str], loc_t]
916	"""
917	Returns a variable name if the arith node can be interpreted that way.
918	"""
919	UP_anode = anode
920	with tagswitch(anode) as case:
921	if case(arith_expr_e.VarSub):
922	tok = cast(Token, UP_anode)
923	return (lexer.LazyStr(tok), tok)
924
925	elif case(arith_expr_e.Word):
926	w = cast(CompoundWord, UP_anode)
927	var_name = self.EvalWordToString(w)
928	return (var_name, w)
929
930	no_str = None # type: str
931	return (no_str, loc.Missing)
932
933	def EvalArithLhs(self, anode):
934	# type: (arith_expr_t) -> sh_lvalue_t
935	"""
936	For (( a[x] = 1 )) etc.
937	"""
938	UP_anode = anode
939	if anode.tag() == arith_expr_e.Binary:
940	anode = cast(arith_expr.Binary, UP_anode)
941	if anode.op.id == Id.Arith_LBracket:
942	var_name, blame_loc = self._VarNameOrWord(anode.left)
943
944	# (( 1[2] = 3 )) isn't valid
945	if not match.IsValidVarName(var_name):
946	e_die('Invalid variable name %r' % var_name, blame_loc)
947
948	if var_name is not None:
949	if self.mem.IsBashAssoc(var_name):
950	arith_loc = location.TokenForArith(anode)
951	key = self.EvalWordToString(anode.right,
952	blame_loc=arith_loc)
953	return sh_lvalue.Keyed(var_name, key, blame_loc)
954	else:
955	index = mops.BigTruncate(self.EvalToBigInt(
956	anode.right))
957	return sh_lvalue.Indexed(var_name, index, blame_loc)
958
959	var_name, blame_loc = self._VarNameOrWord(anode)
960	if var_name is not None:
961	return LeftName(var_name, blame_loc)
962
963	# e.g. unset 'x-y'. status 2 for runtime parse error
964	e_die_status(2, 'Invalid LHS to modify', blame_loc)
965
966
967	class BoolEvaluator(ArithEvaluator):
968	"""This is also an ArithEvaluator because it has to understand.
969
970	[[ x -eq 3 ]]
971
972	where x='1+2'
973	"""
974
975	def __init__(
976	self,
977	mem, # type: state.Mem
978	exec_opts, # type: optview.Exec
979	mutable_opts, # type: Optional[state.MutableOpts]
980	parse_ctx, # type: Optional[parse_lib.ParseContext]
981	errfmt, # type: ui.ErrorFormatter
982	always_strict=False # type: bool
983	):
984	# type: (...) -> None
985	ArithEvaluator.__init__(self, mem, exec_opts, mutable_opts, parse_ctx,
986	errfmt)
987	self.always_strict = always_strict
988
989	def _IsDefined(self, s, blame_loc):
990	# type: (str, loc_t) -> bool
991
992	m = util.RegexSearch(consts.TEST_V_RE, s)
993	if m is None:
994	if self.exec_opts.strict_word_eval():
995	e_die('-v expected name or name[index]', blame_loc)
996	return False
997
998	var_name = m[1]
999	index_str = m[3]
1000
1001	val = self.mem.GetValue(var_name)
1002	if len(index_str) == 0: # it's just a variable name
1003	return val.tag() != value_e.Undef
1004
1005	UP_val = val
1006	with tagswitch(val) as case:
1007	if case(value_e.BashArray):
1008	val = cast(value.BashArray, UP_val)
1009
1010	# TODO: use mops.BigStr
1011	try:
1012	index = int(index_str)
1013	except ValueError as e:
1014	if self.exec_opts.strict_word_eval():
1015	e_die(
1016	'-v got BashArray and invalid index %r' %
1017	index_str, blame_loc)
1018	return False
1019
1020	if index < 0:
1021	if self.exec_opts.strict_word_eval():
1022	e_die('-v got invalid negative index %s' % index_str,
1023	blame_loc)
1024	return False
1025
1026	if index < len(val.strs):
1027	return val.strs[index] is not None
1028
1029	# out of range
1030	return False
1031
1032	elif case(value_e.BashAssoc):
1033	val = cast(value.BashAssoc, UP_val)
1034	return index_str in val.d
1035
1036	else:
1037	# work around mycpp bug! parses as 'elif'
1038	pass
1039
1040	if self.exec_opts.strict_word_eval():
1041	raise error.TypeErr(val, 'Expected BashArray or BashAssoc',
1042	blame_loc)
1043	return False
1044	raise AssertionError()
1045
1046	def _StringToBigIntOrError(self, s, blame_word=None):
1047	# type: (str, Optional[word_t]) -> mops.BigInt
1048	"""Used by both [[ $x -gt 3 ]] and (( $x ))."""
1049	if blame_word:
1050	location = loc.Word(blame_word) # type: loc_t
1051	else:
1052	location = loc.Missing
1053
1054	try:
1055	i = self._StringToBigInt(s, location)
1056	except error.Strict as e:
1057	if self.always_strict or self.exec_opts.strict_arith():
1058	raise
1059	else:
1060	i = mops.ZERO
1061	return i
1062
1063	def _EvalCompoundWord(self, word, eval_flags=0):
1064	# type: (word_t, int) -> str
1065	val = self.word_ev.EvalWordToString(word, eval_flags)
1066	return val.s
1067
1068	def EvalB(self, node):
1069	# type: (bool_expr_t) -> bool
1070
1071	UP_node = node
1072	with tagswitch(node) as case:
1073	if case(bool_expr_e.WordTest):
1074	node = cast(bool_expr.WordTest, UP_node)
1075	s = self._EvalCompoundWord(node.w)
1076	return bool(s)
1077
1078	elif case(bool_expr_e.LogicalNot):
1079	node = cast(bool_expr.LogicalNot, UP_node)
1080	b = self.EvalB(node.child)
1081	return not b
1082
1083	elif case(bool_expr_e.LogicalAnd):
1084	node = cast(bool_expr.LogicalAnd, UP_node)
1085	# Short-circuit evaluation
1086	if self.EvalB(node.left):
1087	return self.EvalB(node.right)
1088	else:
1089	return False
1090
1091	elif case(bool_expr_e.LogicalOr):
1092	node = cast(bool_expr.LogicalOr, UP_node)
1093	if self.EvalB(node.left):
1094	return True
1095	else:
1096	return self.EvalB(node.right)
1097
1098	elif case(bool_expr_e.Unary):
1099	node = cast(bool_expr.Unary, UP_node)
1100	op_id = node.op_id
1101	s = self._EvalCompoundWord(node.child)
1102
1103	# Now dispatch on arg type. (arg_type could be static in the
1104	# LST?)
1105	arg_type = consts.BoolArgType(op_id)
1106
1107	if arg_type == bool_arg_type_e.Path:
1108	return bool_stat.DoUnaryOp(op_id, s)
1109
1110	if arg_type == bool_arg_type_e.Str:
1111	if op_id == Id.BoolUnary_z:
1112	return not bool(s)
1113	if op_id == Id.BoolUnary_n:
1114	return bool(s)
1115
1116	raise AssertionError(op_id) # should never happen
1117
1118	if arg_type == bool_arg_type_e.Other:
1119	if op_id == Id.BoolUnary_t:
1120	return bool_stat.isatty(s, node.child)
1121
1122	# See whether 'set -o' options have been set
1123	if op_id == Id.BoolUnary_o:
1124	index = consts.OptionNum(s)
1125	if index == 0:
1126	return False
1127	else:
1128	return self.exec_opts.opt0_array[index]
1129
1130	if op_id == Id.BoolUnary_v:
1131	return self._IsDefined(s, loc.Word(node.child))
1132
1133	e_die("%s isn't implemented" %
1134	ui.PrettyId(op_id)) # implicit location
1135
1136	raise AssertionError(arg_type)
1137
1138	elif case(bool_expr_e.Binary):
1139	node = cast(bool_expr.Binary, UP_node)
1140
1141	op_id = node.op_id
1142	# Whether to glob escape
1143	eval_flags = 0
1144	with switch(op_id) as case2:
1145	if case2(Id.BoolBinary_GlobEqual, Id.BoolBinary_GlobDEqual,
1146	Id.BoolBinary_GlobNEqual):
1147	eval_flags \|= word_eval.QUOTE_FNMATCH
1148	elif case2(Id.BoolBinary_EqualTilde):
1149	eval_flags \|= word_eval.QUOTE_ERE
1150
1151	s1 = self._EvalCompoundWord(node.left)
1152	s2 = self._EvalCompoundWord(node.right, eval_flags)
1153
1154	# Now dispatch on arg type
1155	arg_type = consts.BoolArgType(op_id)
1156
1157	if arg_type == bool_arg_type_e.Path:
1158	return bool_stat.DoBinaryOp(op_id, s1, s2)
1159
1160	if arg_type == bool_arg_type_e.Int:
1161	# NOTE: We assume they are constants like [[ 3 -eq 3 ]].
1162	# Bash also allows [[ 1+2 -eq 3 ]].
1163	i1 = self._StringToBigIntOrError(s1, blame_word=node.left)
1164	i2 = self._StringToBigIntOrError(s2, blame_word=node.right)
1165
1166	if op_id == Id.BoolBinary_eq:
1167	return mops.Equal(i1, i2)
1168	if op_id == Id.BoolBinary_ne:
1169	return not mops.Equal(i1, i2)
1170	if op_id == Id.BoolBinary_gt:
1171	return mops.Greater(i1, i2)
1172	if op_id == Id.BoolBinary_ge:
1173	return mops.Greater(i1, i2) or mops.Equal(i1, i2)
1174	if op_id == Id.BoolBinary_lt:
1175	return mops.Greater(i2, i1)
1176	if op_id == Id.BoolBinary_le:
1177	return mops.Greater(i2, i1) or mops.Equal(i1, i2)
1178
1179	raise AssertionError(op_id) # should never happen
1180
1181	if arg_type == bool_arg_type_e.Str:
1182	fnmatch_flags = (FNM_CASEFOLD
1183	if self.exec_opts.nocasematch() else 0)
1184
1185	if op_id in (Id.BoolBinary_GlobEqual,
1186	Id.BoolBinary_GlobDEqual):
1187	#log('Matching %s against pattern %s', s1, s2)
1188	return libc.fnmatch(s2, s1, fnmatch_flags)
1189
1190	if op_id == Id.BoolBinary_GlobNEqual:
1191	return not libc.fnmatch(s2, s1, fnmatch_flags)
1192
1193	if op_id in (Id.BoolBinary_Equal, Id.BoolBinary_DEqual):
1194	return s1 == s2
1195
1196	if op_id == Id.BoolBinary_NEqual:
1197	return s1 != s2
1198
1199	if op_id == Id.BoolBinary_EqualTilde:
1200	# TODO: This should go to --debug-file
1201	#log('Matching %r against regex %r', s1, s2)
1202	regex_flags = (REG_ICASE
1203	if self.exec_opts.nocasematch() else 0)
1204
1205	try:
1206	indices = libc.regex_search(s2, regex_flags, s1, 0)
1207	except ValueError as e:
1208	# Status 2 indicates a regex parse error. This is
1209	# fatal in OSH but not in bash, which treats [[
1210	# like a command with an exit code.
1211	e_die_status(2, e.message, loc.Word(node.right))
1212
1213	if indices is not None:
1214	self.mem.SetRegexMatch(
1215	RegexMatch(s1, indices, eggex_ops.No))
1216	return True
1217	else:
1218	self.mem.SetRegexMatch(regex_match.No)
1219	return False
1220
1221	if op_id == Id.Op_Less:
1222	return str_cmp(s1, s2) < 0
1223
1224	if op_id == Id.Op_Great:
1225	return str_cmp(s1, s2) > 0
1226
1227	raise AssertionError(op_id) # should never happen
1228
1229	raise AssertionError(node.tag())