osh/sh_expr_eval.py

OILS / osh / sh_expr_eval.py View on Github | oils.pub

1299 lines, 839 significant

1	#!/usr/bin/env python2
2	# Copyright 2016 Andy Chu. All rights reserved.
3	# Licensed under the Apache License, Version 2.0 (the "License");
4	# you may not use this file except in compliance with the License.
5	# You may obtain a copy of the License at
6	#
7	# http://www.apache.org/licenses/LICENSE-2.0
8	"""
9	sh_expr_eval.py -- Shell boolean and arithmetic expressions.
10	"""
11	from __future__ import print_function
12
13	from _devbuild.gen.id_kind_asdl import Id
14	from _devbuild.gen.runtime_asdl import error_code_e, scope_t
15	from _devbuild.gen.syntax_asdl import (
16	word_t,
17	CompoundWord,
18	Token,
19	loc,
20	loc_t,
21	source,
22	arith_expr,
23	arith_expr_e,
24	arith_expr_t,
25	bool_expr,
26	bool_expr_e,
27	bool_expr_t,
28	sh_lhs,
29	sh_lhs_e,
30	sh_lhs_t,
31	BracedVarSub,
32	)
33	from _devbuild.gen.option_asdl import option_i
34	from _devbuild.gen.types_asdl import bool_arg_type_e
35	from _devbuild.gen.value_asdl import (
36	value,
37	value_e,
38	value_t,
39	sh_lvalue,
40	sh_lvalue_e,
41	sh_lvalue_t,
42	LeftName,
43	eggex_ops,
44	regex_match,
45	RegexMatch,
46	)
47	from core import alloc
48	from core import bash_impl
49	from core import error
50	from core.error import e_die, e_die_status, e_strict, e_usage
51	from core import num
52	from core import state
53	from display import ui
54	from core import util
55	from frontend import consts
56	from frontend import lexer
57	from frontend import location
58	from frontend import match
59	from frontend import reader
60	from mycpp import mops
61	from mycpp import mylib
62	from mycpp.mylib import log, tagswitch, switch, str_cmp
63	from osh import bool_stat
64	from osh import word_eval
65
66	import libc # for fnmatch
67	# Import these names directly because the C++ translation uses macros literally.
68	from libc import FNM_CASEFOLD, REG_ICASE
69
70	from typing import Tuple, Optional, cast, TYPE_CHECKING
71	if TYPE_CHECKING:
72	from core import optview
73	from frontend import parse_lib
74
75	_ = log
76
77	#
78	# Arith and Command/Word variants of assignment
79	#
80	# Calls EvalShellLhs()
81	# a[$key]=$val # osh/cmd_eval.py:814 (command_e.ShAssignment)
82	# Calls EvalArithLhs()
83	# (( a[key] = val )) # osh/sh_expr_eval.py:326 (_EvalLhsArith)
84	#
85	# Calls OldValue()
86	# a[$key]+=$val # osh/cmd_eval.py:795 (assign_op_e.PlusEqual)
87	# (( a[key] += val )) # osh/sh_expr_eval.py:308 (_EvalLhsAndLookupArith)
88	#
89	# RHS Indexing
90	# val=${a[$key]} # osh/word_eval.py:639 (bracket_op_e.ArrayIndex)
91	# (( val = a[key] )) # osh/sh_expr_eval.py:509 (Id.Arith_LBracket)
92	#
93
94
95	def OldValue(lval, mem, exec_opts):
96	# type: (sh_lvalue_t, state.Mem, Optional[optview.Exec]) -> value_t
97	"""Look up for augmented assignment.
98
99	For s+=val and (( i += 1 ))
100
101	Args:
102	lval: value we need to
103	exec_opts: can be None if we don't want to check set -u!
104	Because s+=val doesn't check it.
105
106	TODO: A stricter and less ambiguous version for YSH.
107	- Problem: why does sh_lvalue have Indexed and Keyed, while sh_lhs only has
108	IndexedName?
109	- should I have location.LName and sh_lvalue.Indexed only?
110	- and Indexed uses the index_t type?
111	- well that might be Str or Int
112	"""
113	assert isinstance(lval, sh_lvalue_t), lval
114
115	# TODO: refactor sh_lvalue_t to make this simpler
116	UP_lval = lval
117	with tagswitch(lval) as case:
118	if case(sh_lvalue_e.Var): # (( i++ ))
119	lval = cast(LeftName, UP_lval)
120	var_name = lval.name
121	elif case(sh_lvalue_e.Indexed): # (( a[i]++ ))
122	lval = cast(sh_lvalue.Indexed, UP_lval)
123	var_name = lval.name
124	elif case(sh_lvalue_e.Keyed): # (( A['K']++ )) ? I think this works
125	lval = cast(sh_lvalue.Keyed, UP_lval)
126	var_name = lval.name
127	else:
128	raise AssertionError()
129
130	val = mem.GetValue(var_name)
131	if exec_opts and exec_opts.nounset() and val.tag() == value_e.Undef:
132	e_die('Undefined variable %r' % var_name) # TODO: location info
133
134	UP_val = val
135	with tagswitch(lval) as case:
136	if case(sh_lvalue_e.Var):
137	return val
138
139	elif case(sh_lvalue_e.Indexed):
140	lval = cast(sh_lvalue.Indexed, UP_lval)
141
142	with tagswitch(val) as case2:
143	if case2(value_e.Undef):
144	s = None # type: Optional[str]
145	elif case2(value_e.BashArray):
146	array_val = cast(value.BashArray, UP_val)
147	s, _ = bash_impl.BashArray_GetElement(
148	array_val, lval.index)
149	# Note: We ignore error_code in the return value of
150	# BashArray_GetElement because an invalid index will be
151	# reported on the assignment stage anyway.
152	elif case2(value_e.SparseArray):
153	sparse_val = cast(value.SparseArray, UP_val)
154	s, _ = bash_impl.SparseArray_GetElement(
155	sparse_val, mops.IntWiden(lval.index))
156	else:
157	e_die("Can't use [] on value of type %s" % ui.ValType(val))
158
159	if s is None:
160	val = value.Str('') # NOTE: Other logic is value.Undef? 0?
161	else:
162	assert isinstance(s, str), s
163	val = value.Str(s)
164
165	elif case(sh_lvalue_e.Keyed):
166	lval = cast(sh_lvalue.Keyed, UP_lval)
167
168	assoc_val = None # type: value.BashAssoc
169	with tagswitch(val) as case2:
170	if case2(value_e.Undef):
171	# This never happens, because undef[x]+= is assumed to
172	raise AssertionError()
173	elif case2(value_e.BashAssoc):
174	tmp2 = cast(value.BashAssoc, UP_val)
175	# mycpp rewrite: add tmp. cast() creates a new var in inner scope
176	assoc_val = tmp2
177	s = bash_impl.BashAssoc_GetElement(assoc_val, lval.key)
178	else:
179	e_die("Can't use [] on value of type %s" % ui.ValType(val))
180
181	if s is None:
182	val = value.Str('')
183	else:
184	val = value.Str(s)
185
186	else:
187	raise AssertionError()
188
189	return val
190
191
192	# TODO: Should refactor for int/char-based processing
193	if mylib.PYTHON:
194
195	def IsLower(ch):
196	# type: (str) -> bool
197	return 'a' <= ch and ch <= 'z'
198
199	def IsUpper(ch):
200	# type: (str) -> bool
201	return 'A' <= ch and ch <= 'Z'
202
203
204	class UnsafeArith(object):
205	"""For parsing a[i] at RUNTIME."""
206
207	def __init__(
208	self,
209	mem, # type: state.Mem
210	exec_opts, # type: optview.Exec
211	mutable_opts, # type: state.MutableOpts
212	parse_ctx, # type: parse_lib.ParseContext
213	arith_ev, # type: ArithEvaluator
214	errfmt, # type: ui.ErrorFormatter
215	):
216	# type: (...) -> None
217	self.mem = mem
218	self.exec_opts = exec_opts
219	self.mutable_opts = mutable_opts
220	self.parse_ctx = parse_ctx
221	self.arith_ev = arith_ev
222	self.errfmt = errfmt
223
224	self.arena = self.parse_ctx.arena
225
226	def ParseLValue(self, s, location):
227	# type: (str, loc_t) -> sh_lvalue_t
228	"""Parse sh_lvalue for 'unset' and 'printf -v'.
229
230	It uses the arith parser, so it behaves like the LHS of (( a[i] = x ))
231	"""
232	if not self.parse_ctx.parse_opts.parse_sh_arith():
233	# Do something simpler for YSH
234	if not match.IsValidVarName(s):
235	e_die('Invalid variable name %r (parse_sh_arith is off)' % s,
236	location)
237	return LeftName(s, location)
238
239	a_parser = self.parse_ctx.MakeArithParser(s)
240
241	with alloc.ctx_SourceCode(self.arena,
242	source.Dynamic('dynamic LHS', location)):
243	try:
244	anode = a_parser.Parse()
245	except error.Parse as e:
246	self.errfmt.PrettyPrintError(e)
247	# Exception for builtins 'unset' and 'printf'
248	e_usage('got invalid LHS expression', location)
249
250	# Note: we parse '1+2', and then it becomes a runtime error because
251	# it's not a valid LHS. Could be a parse error.
252
253	if self.exec_opts.eval_unsafe_arith():
254	lval = self.arith_ev.EvalArithLhs(anode)
255	else:
256	# Prevent attacks like these by default:
257	#
258	# unset -v 'A["$(echo K; rm *)"]'
259	with state.ctx_Option(self.mutable_opts,
260	[option_i._allow_command_sub], False):
261	lval = self.arith_ev.EvalArithLhs(anode)
262
263	return lval
264
265	def ParseVarRef(self, ref_str, blame_tok):
266	# type: (str, Token) -> BracedVarSub
267	"""Parse and evaluate value for ${!ref}
268
269	This supports:
270	- 0 to 9 for $0 to $9
271	- @ for "$@" etc.
272
273	See grammar in osh/word_parse.py, which is related to grammar in
274	osh/word_parse.py _ReadBracedVarSub
275
276	Note: declare -n allows 'varname' and 'varname[i]' and 'varname[@]', but it
277	does NOT allow 0 to 9, @, *
278
279	NamerefExpr = NAME Subscript? # this allows @ and * too
280
281	_ResolveNameOrRef currently gives you a 'cell'. So it might not support
282	sh_lvalue.Indexed?
283	"""
284	line_reader = reader.StringLineReader(ref_str, self.arena)
285	lexer = self.parse_ctx.MakeLexer(line_reader)
286	w_parser = self.parse_ctx.MakeWordParser(lexer, line_reader)
287
288	src = source.VarRef(blame_tok)
289	with alloc.ctx_SourceCode(self.arena, src):
290	try:
291	bvs_part = w_parser.ParseVarRef()
292	except error.Parse as e:
293	# This prints the inner location
294	self.errfmt.PrettyPrintError(e)
295
296	# this affects builtins 'unset' and 'printf'
297	e_die("Invalid var ref expression", blame_tok)
298
299	return bvs_part
300
301
302	def _ParseOshInteger(s, blame_loc):
303	# type: (str, loc_t) -> Tuple[bool, mops.BigInt]
304	"""
305	Returns:
306	(True, value) when the string looks like an integer
307	(False, ...) when it doesn't
308
309	Integer formats that are recognized:
310	0xAB hex
311	042 octal
312	42 decimal
313	64#z arbitrary base
314	"""
315	id_, pos = match.MatchShNumberToken(s, 0) # use re2c lexer
316	if pos != len(s):
317	# trailing data isn't allowed
318	return (False, mops.BigInt(0))
319
320	# Do conversions
321
322	if id_ == Id.ShNumber_Dec:
323	# Normal base 10 integer.
324	ok, big_int = mops.FromStr2(s)
325	if not ok:
326	e_die('Integer too big: %s' % s, blame_loc)
327	return (True, big_int)
328
329	elif id_ == Id.ShNumber_Oct:
330	# 0123, offset by 1
331	ok, big_int = mops.FromStr2(s[1:], 8)
332	if not ok:
333	e_die('Octal integer too big: %s' % s, blame_loc)
334	return (True, big_int)
335
336	elif id_ == Id.ShNumber_Hex:
337	# 0xff, offset by 2
338	ok, big_int = mops.FromStr2(s[2:], 16)
339	if not ok:
340	e_die('Hex integer too big: %s' % s, blame_loc)
341	return (True, big_int)
342
343	elif id_ == Id.ShNumber_BaseN:
344	b, digits = mylib.split_once(s, '#')
345	assert digits is not None, digits # assured by lexer
346
347	try:
348	base = int(b) # machine integer, not BigInt
349	except ValueError:
350	# Unreachable per the regex validation above
351	raise AssertionError()
352
353	if base > 64:
354	e_strict('Base %d cannot be larger than 64' % base, blame_loc)
355	if base < 2:
356	e_strict('Base %d must be larger than 2' % base, blame_loc)
357
358	integer = mops.ZERO
359	for ch in digits:
360	if IsLower(ch):
361	digit = ord(ch) - ord('a') + 10
362	elif IsUpper(ch):
363	digit = ord(ch) - ord('A') + 36
364	elif ch == '@': # horrible syntax
365	digit = 62
366	elif ch == '_':
367	digit = 63
368	elif ch.isdigit():
369	digit = int(ch)
370	else:
371	# Unreachable per the regex validation above
372	raise AssertionError()
373
374	if digit >= base:
375	e_strict('Digits %r out of range for base %d' % (digits, base),
376	blame_loc)
377
378	# formula is:
379	# integer = integer * base + digit
380	integer = mops.Add(mops.Mul(integer, mops.IntWiden(base)),
381	mops.IntWiden(digit))
382	return (True, integer)
383
384	else:
385	# Id.Unknown_Tok or Id.Eol_Tok
386	return (False, mops.BigInt(0)) # not an integer
387
388
389	class ArithEvaluator(object):
390	"""Shared between arith and bool evaluators.
391
392	They both:
393
394	1. Convert strings to integers, respecting shopt -s strict_arith.
395	2. Look up variables and evaluate words.
396	"""
397
398	def __init__(
399	self,
400	mem, # type: state.Mem
401	exec_opts, # type: optview.Exec
402	mutable_opts, # type: state.MutableOpts
403	parse_ctx, # type: Optional[parse_lib.ParseContext]
404	errfmt, # type: ui.ErrorFormatter
405	):
406	# type: (...) -> None
407	self.word_ev = None # type: word_eval.StringWordEvaluator
408	self.mem = mem
409	self.exec_opts = exec_opts
410	self.mutable_opts = mutable_opts
411	self.parse_ctx = parse_ctx
412	self.errfmt = errfmt
413
414	def CheckCircularDeps(self):
415	# type: () -> None
416	assert self.word_ev is not None
417
418	def _StringToBigInt(self, s, blame_loc):
419	# type: (str, loc_t) -> mops.BigInt
420	"""Use bash-like rules to coerce a string to an integer.
421
422	Runtime parsing enables silly stuff like $(( $(echo 1)$(echo 2) + 1 )) => 13
423
424	bare word: variable
425	quoted word: string (not done?)
426	"""
427	s = s.strip()
428
429	ok, i = _ParseOshInteger(s, blame_loc)
430	if ok:
431	return i
432
433	# Doesn't look like an integer
434
435	# note: 'test' and '[' never evaluate recursively
436	if self.parse_ctx is None:
437	if len(s) == 0 or match.IsValidVarName(s):
438	# x42 could evaluate to 0
439	e_strict("Invalid integer constant %r" % s, blame_loc)
440	else:
441	# 42x is always fatal!
442	e_die("Invalid integer constant %r" % s, blame_loc)
443
444	# Special case so we don't get EOF error
445	if len(s) == 0:
446	return mops.ZERO
447
448	# For compatibility: Try to parse it as an expression and evaluate it.
449	a_parser = self.parse_ctx.MakeArithParser(s)
450
451	try:
452	node2 = a_parser.Parse() # may raise error.Parse
453	except error.Parse as e:
454	self.errfmt.PrettyPrintError(e)
455	e_die('Parse error in recursive arithmetic', e.location)
456
457	# Prevent infinite recursion of $(( 1x )) -- it's a word that evaluates
458	# to itself, and you don't want to reparse it as a word.
459	if node2.tag() == arith_expr_e.Word:
460	e_die("Invalid integer constant %r" % s, blame_loc)
461
462	if self.exec_opts.eval_unsafe_arith():
463	integer = self.EvalToBigInt(node2)
464	else:
465	# BoolEvaluator doesn't have parse_ctx or mutable_opts
466	assert self.mutable_opts is not None
467
468	# We don't need to flip _allow_process_sub, because they can't be
469	# parsed. See spec/bugs.test.sh.
470	with state.ctx_Option(self.mutable_opts,
471	[option_i._allow_command_sub], False):
472	integer = self.EvalToBigInt(node2)
473
474	return integer
475
476	def _ValToIntOrError(self, val, blame):
477	# type: (value_t, arith_expr_t) -> mops.BigInt
478	try:
479	UP_val = val
480	with tagswitch(val) as case:
481	if case(value_e.Undef):
482	# 'nounset' already handled before got here
483	# Happens upon a[undefined]=42, which unfortunately turns into a[0]=42.
484	e_strict('Undefined value in arithmetic context',
485	loc.Arith(blame))
486
487	elif case(value_e.Int):
488	val = cast(value.Int, UP_val)
489	return val.i
490
491	elif case(value_e.Str):
492	val = cast(value.Str, UP_val)
493	# calls e_strict
494	return self._StringToBigInt(val.s, loc.Arith(blame))
495
496	except error.Strict as e:
497	if self.exec_opts.strict_arith():
498	raise
499	else:
500	return mops.ZERO
501
502	# Arrays and associative arrays always fail -- not controlled by
503	# strict_arith.
504	# In bash, (( a )) is like (( a[0] )), but I don't want that.
505	# And returning '0' gives different results.
506	e_die(
507	"Expected a value convertible to integer, got %s" %
508	ui.ValType(val), loc.Arith(blame))
509
510	def _EvalLhsAndLookupArith(self, node):
511	# type: (arith_expr_t) -> Tuple[mops.BigInt, sh_lvalue_t]
512	""" For x = y and x += y and ++x """
513
514	lval = self.EvalArithLhs(node)
515	val = OldValue(lval, self.mem, self.exec_opts)
516
517	# BASH_LINENO, arr (array name without strict_array), etc.
518	if (val.tag() in (value_e.BashArray, value_e.BashAssoc,
519	value_e.SparseArray) and
520	lval.tag() == sh_lvalue_e.Var):
521	named_lval = cast(LeftName, lval)
522	if word_eval.ShouldArrayDecay(named_lval.name, self.exec_opts):
523	if val.tag() in (value_e.BashArray, value_e.SparseArray):
524	lval = sh_lvalue.Indexed(named_lval.name, 0, loc.Missing)
525	elif val.tag() == value_e.BashAssoc:
526	lval = sh_lvalue.Keyed(named_lval.name, '0', loc.Missing)
527	val = word_eval.DecayArray(val)
528
529	# This error message could be better, but we already have one
530	#if val.tag() in (value_e.BashArray, value_e.SparseArray):
531	# e_die("Can't use assignment like ++ or += on arrays")
532
533	i = self._ValToIntOrError(val, node)
534	return i, lval
535
536	def _Store(self, lval, new_int):
537	# type: (sh_lvalue_t, mops.BigInt) -> None
538	val = value.Str(mops.ToStr(new_int))
539	state.OshLanguageSetValue(self.mem, lval, val)
540
541	def EvalToBigInt(self, node):
542	# type: (arith_expr_t) -> mops.BigInt
543	"""Used externally by ${a[i+1]} and ${a:start:len}.
544
545	Also used internally.
546	"""
547	val = self.Eval(node)
548
549	# BASH_LINENO, arr (array name without strict_array), etc.
550	if (val.tag() in (value_e.BashArray, value_e.BashAssoc,
551	value_e.SparseArray) and
552	node.tag() == arith_expr_e.VarSub):
553	vsub = cast(Token, node)
554	if word_eval.ShouldArrayDecay(lexer.LazyStr(vsub), self.exec_opts):
555	val = word_eval.DecayArray(val)
556
557	i = self._ValToIntOrError(val, node)
558	return i
559
560	def EvalToInt(self, node):
561	# type: (arith_expr_t) -> int
562	return mops.BigTruncate(self.EvalToBigInt(node))
563
564	def Eval(self, node):
565	# type: (arith_expr_t) -> value_t
566	"""
567	Returns:
568	None for Undef (e.g. empty cell) TODO: Don't return 0!
569	int for Str
570	List[int] for BashArray and SparseArray
571	Dict[str, str] for BashAssoc (TODO: Should we support this?)
572
573	NOTE: (( A['x'] = 'x' )) and (( x = A['x'] )) are syntactically valid in
574	bash, but don't do what you'd think. 'x' sometimes a variable name and
575	sometimes a key.
576	"""
577	# OSH semantics: Variable NAMES cannot be formed dynamically; but INTEGERS
578	# can. ${foo:-3}4 is OK. $? will be a compound word too, so we don't have
579	# to handle that as a special case.
580
581	UP_node = node
582	with tagswitch(node) as case:
583	if case(arith_expr_e.EmptyZero): # $(( ))
584	return value.Int(mops.ZERO) # Weird axiom
585
586	elif case(arith_expr_e.EmptyOne): # for (( ; ; ))
587	return value.Int(mops.ONE)
588
589	elif case(arith_expr_e.VarSub): # $(( x )) (can be array)
590	vsub = cast(Token, UP_node)
591	var_name = lexer.LazyStr(vsub)
592	val = self.mem.GetValue(var_name)
593	if val.tag() == value_e.Undef and self.exec_opts.nounset():
594	e_die('Undefined variable %r' % var_name, vsub)
595	return val
596
597	elif case(arith_expr_e.Word): # $(( $x )) $(( ${x}${y} )), etc.
598	w = cast(CompoundWord, UP_node)
599	return self.word_ev.EvalWordToString(w)
600
601	elif case(arith_expr_e.UnaryAssign): # a++
602	node = cast(arith_expr.UnaryAssign, UP_node)
603
604	op_id = node.op_id
605	old_big, lval = self._EvalLhsAndLookupArith(node.child)
606
607	if op_id == Id.Node_PostDPlus: # post-increment
608	new_big = mops.Add(old_big, mops.ONE)
609	result = old_big
610
611	elif op_id == Id.Node_PostDMinus: # post-decrement
612	new_big = mops.Sub(old_big, mops.ONE)
613	result = old_big
614
615	elif op_id == Id.Arith_DPlus: # pre-increment
616	new_big = mops.Add(old_big, mops.ONE)
617	result = new_big
618
619	elif op_id == Id.Arith_DMinus: # pre-decrement
620	new_big = mops.Sub(old_big, mops.ONE)
621	result = new_big
622
623	else:
624	raise AssertionError(op_id)
625
626	self._Store(lval, new_big)
627	return value.Int(result)
628
629	elif case(arith_expr_e.BinaryAssign): # a=1, a+=5, a[1]+=5
630	node = cast(arith_expr.BinaryAssign, UP_node)
631	op_id = node.op_id
632
633	if op_id == Id.Arith_Equal:
634	# Don't really need a span ID here, because tdop.CheckLhsExpr should
635	# have done all the validation.
636	lval = self.EvalArithLhs(node.left)
637	rhs_big = self.EvalToBigInt(node.right)
638
639	self._Store(lval, rhs_big)
640	return value.Int(rhs_big)
641
642	old_big, lval = self._EvalLhsAndLookupArith(node.left)
643	rhs_big = self.EvalToBigInt(node.right)
644
645	if op_id == Id.Arith_PlusEqual:
646	new_big = mops.Add(old_big, rhs_big)
647	elif op_id == Id.Arith_MinusEqual:
648	new_big = mops.Sub(old_big, rhs_big)
649	elif op_id == Id.Arith_StarEqual:
650	new_big = mops.Mul(old_big, rhs_big)
651
652	elif op_id == Id.Arith_SlashEqual:
653	if mops.Equal(rhs_big, mops.ZERO):
654	e_die('Divide by zero') # TODO: location
655	new_big = mops.Div(old_big, rhs_big)
656
657	elif op_id == Id.Arith_PercentEqual:
658	if mops.Equal(rhs_big, mops.ZERO):
659	e_die('Divide by zero') # TODO: location
660	new_big = mops.Rem(old_big, rhs_big)
661
662	elif op_id == Id.Arith_DGreatEqual:
663	new_big = mops.RShift(old_big, rhs_big)
664	elif op_id == Id.Arith_DLessEqual:
665	new_big = mops.LShift(old_big, rhs_big)
666	elif op_id == Id.Arith_AmpEqual:
667	new_big = mops.BitAnd(old_big, rhs_big)
668	elif op_id == Id.Arith_PipeEqual:
669	new_big = mops.BitOr(old_big, rhs_big)
670	elif op_id == Id.Arith_CaretEqual:
671	new_big = mops.BitXor(old_big, rhs_big)
672	else:
673	raise AssertionError(op_id) # shouldn't get here
674
675	self._Store(lval, new_big)
676	return value.Int(new_big)
677
678	elif case(arith_expr_e.Unary):
679	node = cast(arith_expr.Unary, UP_node)
680	op_id = node.op_id
681
682	i = self.EvalToBigInt(node.child)
683
684	if op_id == Id.Node_UnaryPlus: # +i
685	result = i
686	elif op_id == Id.Node_UnaryMinus: # -i
687	result = mops.Sub(mops.ZERO, i)
688
689	elif op_id == Id.Arith_Bang: # logical negation
690	if mops.Equal(i, mops.ZERO):
691	result = mops.ONE
692	else:
693	result = mops.ZERO
694	elif op_id == Id.Arith_Tilde: # bitwise complement
695	result = mops.BitNot(i)
696	else:
697	raise AssertionError(op_id) # shouldn't get here
698
699	return value.Int(result)
700
701	elif case(arith_expr_e.Binary):
702	node = cast(arith_expr.Binary, UP_node)
703	op_id = node.op.id
704
705	# Short-circuit evaluation for \|\| and &&.
706	if op_id == Id.Arith_DPipe:
707	lhs_big = self.EvalToBigInt(node.left)
708	if mops.Equal(lhs_big, mops.ZERO):
709	rhs_big = self.EvalToBigInt(node.right)
710	if mops.Equal(rhs_big, mops.ZERO):
711	result = mops.ZERO # false
712	else:
713	result = mops.ONE # true
714	else:
715	result = mops.ONE # true
716	return value.Int(result)
717
718	if op_id == Id.Arith_DAmp:
719	lhs_big = self.EvalToBigInt(node.left)
720	if mops.Equal(lhs_big, mops.ZERO):
721	result = mops.ZERO # false
722	else:
723	rhs_big = self.EvalToBigInt(node.right)
724	if mops.Equal(rhs_big, mops.ZERO):
725	result = mops.ZERO # false
726	else:
727	result = mops.ONE # true
728	return value.Int(result)
729
730	if op_id == Id.Arith_LBracket:
731	# NOTE: Similar to bracket_op_e.ArrayIndex in osh/word_eval.py
732
733	left = self.Eval(node.left)
734	UP_left = left
735	with tagswitch(left) as case:
736	if case(value_e.BashArray):
737	array_val = cast(value.BashArray, UP_left)
738	small_i = mops.BigTruncate(
739	self.EvalToBigInt(node.right))
740	s, error_code = bash_impl.BashArray_GetElement(
741	array_val, small_i)
742	if error_code == error_code_e.IndexOutOfRange:
743	# Note: Bash outputs warning but does not make
744	# it a real error. We follow the Bash behavior
745	# here.
746	small_length = bash_impl.BashArray_Length(
747	array_val)
748	self.errfmt.Print_(
749	"Index %d out of bounds for array of length %d"
750	% (small_i, small_length),
751	blame_loc=node.op)
752
753	elif case(value_e.SparseArray):
754	sparse_val = cast(value.SparseArray, UP_left)
755	i = self.EvalToBigInt(node.right)
756	s, error_code = bash_impl.SparseArray_GetElement(
757	sparse_val, i)
758	if error_code == error_code_e.IndexOutOfRange:
759	# Note: Bash outputs warning but does not make
760	# it a real error. We follow the Bash behavior
761	# here.
762	length = bash_impl.SparseArray_Length(
763	sparse_val)
764	self.errfmt.Print_(
765	"Index %s out of bounds for array of length %s"
766	% (mops.ToStr(i), mops.ToStr(length)),
767	blame_loc=node.op)
768
769	elif case(value_e.BashAssoc):
770	left = cast(value.BashAssoc, UP_left)
771	key = self.EvalWordToString(node.right)
772	s = bash_impl.BashAssoc_GetElement(left, key)
773
774	elif case(value_e.Str):
775	left = cast(value.Str, UP_left)
776	if self.exec_opts.strict_arith():
777	e_die(
778	"Value of type Str can't be indexed (strict_arith)",
779	node.op)
780	index = self.EvalToBigInt(node.right)
781	# s[0] evaluates to s
782	# s[1] evaluates to Undef
783	s = left.s if mops.Equal(index,
784	mops.ZERO) else None
785
786	elif case(value_e.Undef):
787	if self.exec_opts.strict_arith():
788	e_die(
789	"Value of type Undef can't be indexed (strict_arith)",
790	node.op)
791	s = None # value.Undef
792
793	# There isn't a way to distinguish Undef vs. empty
794	# string, even with set -o nounset?
795	# s = ''
796
797	else:
798	# TODO: Add error context
799	e_die(
800	"Value of type %s can't be indexed" %
801	ui.ValType(left), node.op)
802
803	if s is None:
804	val = value.Undef
805	else:
806	val = value.Str(s)
807
808	return val
809
810	if op_id == Id.Arith_Comma:
811	self.EvalToBigInt(node.left) # throw away result
812	result = self.EvalToBigInt(node.right)
813	return value.Int(result)
814
815	# Rest are integers
816	lhs_big = self.EvalToBigInt(node.left)
817	rhs_big = self.EvalToBigInt(node.right)
818
819	if op_id == Id.Arith_Plus:
820	result = mops.Add(lhs_big, rhs_big)
821	elif op_id == Id.Arith_Minus:
822	result = mops.Sub(lhs_big, rhs_big)
823	elif op_id == Id.Arith_Star:
824	result = mops.Mul(lhs_big, rhs_big)
825	elif op_id == Id.Arith_Slash:
826	if mops.Equal(rhs_big, mops.ZERO):
827	e_die('Divide by zero', node.op)
828	result = mops.Div(lhs_big, rhs_big)
829
830	elif op_id == Id.Arith_Percent:
831	if mops.Equal(rhs_big, mops.ZERO):
832	e_die('Divide by zero', node.op)
833	result = mops.Rem(lhs_big, rhs_big)
834
835	elif op_id == Id.Arith_DStar:
836	if mops.Greater(mops.ZERO, rhs_big):
837	e_die("Exponent can't be a negative number",
838	loc.Arith(node.right))
839	result = num.Exponent(lhs_big, rhs_big)
840
841	elif op_id == Id.Arith_DEqual:
842	result = mops.FromBool(mops.Equal(lhs_big, rhs_big))
843	elif op_id == Id.Arith_NEqual:
844	result = mops.FromBool(not mops.Equal(lhs_big, rhs_big))
845	elif op_id == Id.Arith_Great:
846	result = mops.FromBool(mops.Greater(lhs_big, rhs_big))
847	elif op_id == Id.Arith_GreatEqual:
848	result = mops.FromBool(
849	mops.Greater(lhs_big, rhs_big) or
850	mops.Equal(lhs_big, rhs_big))
851	elif op_id == Id.Arith_Less:
852	result = mops.FromBool(mops.Greater(rhs_big, lhs_big))
853	elif op_id == Id.Arith_LessEqual:
854	result = mops.FromBool(
855	mops.Greater(rhs_big, lhs_big) or
856	mops.Equal(lhs_big, rhs_big))
857
858	elif op_id == Id.Arith_Pipe:
859	result = mops.BitOr(lhs_big, rhs_big)
860	elif op_id == Id.Arith_Amp:
861	result = mops.BitAnd(lhs_big, rhs_big)
862	elif op_id == Id.Arith_Caret:
863	result = mops.BitXor(lhs_big, rhs_big)
864
865	# Note: how to define shift of negative numbers?
866	elif op_id == Id.Arith_DLess:
867	if mops.Greater(mops.ZERO, rhs_big): # rhs_big < 0
868	raise error.Expr("Can't left shift by negative number",
869	node.op)
870	result = mops.LShift(lhs_big, rhs_big)
871	elif op_id == Id.Arith_DGreat:
872	if mops.Greater(mops.ZERO, rhs_big): # rhs_big < 0
873	raise error.Expr(
874	"Can't right shift by negative number", node.op)
875	result = mops.RShift(lhs_big, rhs_big)
876	else:
877	raise AssertionError(op_id)
878
879	return value.Int(result)
880
881	elif case(arith_expr_e.TernaryOp):
882	node = cast(arith_expr.TernaryOp, UP_node)
883
884	cond = self.EvalToBigInt(node.cond)
885	if mops.Equal(cond, mops.ZERO):
886	return self.Eval(node.false_expr)
887	else:
888	return self.Eval(node.true_expr)
889
890	else:
891	raise AssertionError(node.tag())
892
893	raise AssertionError('for -Wreturn-type in C++')
894
895	def EvalWordToString(self, node, blame_loc=loc.Missing):
896	# type: (arith_expr_t, loc_t) -> str
897	"""
898	Raises:
899	error.FatalRuntime if the expression isn't a string
900	or if it contains a bare variable like a[x]
901
902	These are allowed because they're unambiguous, unlike a[x]
903
904	a[$x] a["$x"] a["x"] a['x']
905	"""
906	UP_node = node
907	if node.tag() == arith_expr_e.Word: # $(( $x )) $(( ${x}${y} )), etc.
908	w = cast(CompoundWord, UP_node)
909	val = self.word_ev.EvalWordToString(w)
910	return val.s
911	else:
912	# A[x] is the "Parsing Bash is Undecidable" problem
913	# It is a string or var name?
914	# (It's parsed as arith_expr.VarSub)
915	e_die(
916	"Assoc array keys must be strings: $x 'x' \"$x\" etc. (OILS-ERR-101)",
917	blame_loc)
918
919	def EvalShellLhs(self, node, which_scopes):
920	# type: (sh_lhs_t, scope_t) -> sh_lvalue_t
921	"""Evaluate a shell LHS expression
922
923	For a=b and a[x]=b etc.
924	"""
925	assert isinstance(node, sh_lhs_t), node
926
927	UP_node = node
928	lval = None # type: sh_lvalue_t
929	with tagswitch(node) as case:
930	if case(sh_lhs_e.Name): # a=x
931	node = cast(sh_lhs.Name, UP_node)
932	assert node.name is not None
933
934	lval1 = LeftName(node.name, node.left)
935	lval = lval1
936
937	elif case(sh_lhs_e.IndexedName): # a[1+2]=x
938	node = cast(sh_lhs.IndexedName, UP_node)
939	assert node.name is not None
940
941	if self.mem.IsBashAssoc(node.name):
942	key = self.EvalWordToString(node.index,
943	blame_loc=node.left)
944	# node.left points to A[ in A[x]=1
945	lval2 = sh_lvalue.Keyed(node.name, key, node.left)
946	lval = lval2
947	else:
948	index = mops.BigTruncate(self.EvalToBigInt(node.index))
949	lval3 = sh_lvalue.Indexed(node.name, index, node.left)
950	lval = lval3
951
952	else:
953	raise AssertionError(node.tag())
954
955	return lval
956
957	def _VarNameOrWord(self, anode):
958	# type: (arith_expr_t) -> Tuple[Optional[str], loc_t]
959	"""
960	Returns a variable name if the arith node can be interpreted that way.
961	"""
962	UP_anode = anode
963	with tagswitch(anode) as case:
964	if case(arith_expr_e.VarSub):
965	tok = cast(Token, UP_anode)
966	return (lexer.LazyStr(tok), tok)
967
968	elif case(arith_expr_e.Word):
969	w = cast(CompoundWord, UP_anode)
970	var_name = self.EvalWordToString(w)
971	return (var_name, w)
972
973	no_str = None # type: Optional[str]
974	return (no_str, loc.Missing)
975
976	def EvalArithLhs(self, anode):
977	# type: (arith_expr_t) -> sh_lvalue_t
978	"""
979	For (( a[x] = 1 )) etc.
980	"""
981	UP_anode = anode
982	if anode.tag() == arith_expr_e.Binary:
983	anode = cast(arith_expr.Binary, UP_anode)
984	if anode.op.id == Id.Arith_LBracket:
985	var_name, blame_loc = self._VarNameOrWord(anode.left)
986
987	# (( 1[2] = 3 )) isn't valid
988	if not match.IsValidVarName(var_name):
989	e_die('Invalid variable name %r' % var_name, blame_loc)
990
991	if var_name is not None:
992	if self.mem.IsBashAssoc(var_name):
993	arith_loc = location.TokenForArith(anode)
994	key = self.EvalWordToString(anode.right,
995	blame_loc=arith_loc)
996	return sh_lvalue.Keyed(var_name, key, blame_loc)
997	else:
998	index = mops.BigTruncate(self.EvalToBigInt(
999	anode.right))
1000	return sh_lvalue.Indexed(var_name, index, blame_loc)
1001
1002	var_name, blame_loc = self._VarNameOrWord(anode)
1003	if var_name is not None:
1004	return LeftName(var_name, blame_loc)
1005
1006	# e.g. unset 'x-y'. status 2 for runtime parse error
1007	e_die_status(2, 'Invalid LHS to modify', blame_loc)
1008
1009
1010	class BoolEvaluator(ArithEvaluator):
1011	"""This is also an ArithEvaluator because it has to understand.
1012
1013	[[ x -eq 3 ]]
1014
1015	where x='1+2'
1016	"""
1017
1018	def __init__(
1019	self,
1020	mem, # type: state.Mem
1021	exec_opts, # type: optview.Exec
1022	mutable_opts, # type: Optional[state.MutableOpts]
1023	parse_ctx, # type: Optional[parse_lib.ParseContext]
1024	errfmt, # type: ui.ErrorFormatter
1025	bracket=False # type: bool
1026	):
1027	# type: (...) -> None
1028	ArithEvaluator.__init__(self, mem, exec_opts, mutable_opts, parse_ctx,
1029	errfmt)
1030	self.bracket = bracket # [ and [[ are slightly different
1031
1032	def _IsDefined(self, s, blame_loc):
1033	# type: (str, loc_t) -> bool
1034
1035	m = util.RegexSearch(consts.TEST_V_RE, s)
1036	if m is None:
1037	if self.exec_opts.strict_word_eval():
1038	e_die('-v expected name or name[index]', blame_loc)
1039	return False
1040
1041	var_name = m[1]
1042	index_str = m[3]
1043
1044	val = self.mem.GetValue(var_name)
1045	if len(index_str) == 0: # it's just a variable name
1046	return val.tag() != value_e.Undef
1047
1048	UP_val = val
1049	with tagswitch(val) as case:
1050	if case(value_e.BashArray, value_e.SparseArray):
1051	try:
1052	# could use mops.FromStr?
1053	index = int(index_str)
1054	except ValueError as e:
1055	if self.exec_opts.strict_word_eval():
1056	e_die(
1057	'-v got BashArray and invalid index %r' %
1058	index_str, blame_loc)
1059	return False
1060
1061	if val.tag() == value_e.BashArray:
1062	array_val = cast(value.BashArray, UP_val)
1063	result, error_code = bash_impl.BashArray_HasElement(
1064	array_val, index)
1065	if error_code == error_code_e.IndexOutOfRange:
1066	length = bash_impl.BashArray_Length(array_val)
1067	e_die(
1068	'-v got index %s, which is out of bounds for array of length %d'
1069	% (index_str, length), blame_loc)
1070
1071	elif val.tag() == value_e.SparseArray:
1072	sparse_val = cast(value.SparseArray, UP_val)
1073	result, error_code = bash_impl.SparseArray_HasElement(
1074	sparse_val, mops.IntWiden(index))
1075	if error_code == error_code_e.IndexOutOfRange:
1076	big_length = bash_impl.SparseArray_Length(sparse_val)
1077	e_die(
1078	'-v got index %s, which is out of bounds for array of length %s'
1079	% (index_str, mops.ToStr(big_length)), blame_loc)
1080
1081	else:
1082	raise AssertionError()
1083
1084	return result
1085
1086	elif case(value_e.BashAssoc):
1087	val = cast(value.BashAssoc, UP_val)
1088	return bash_impl.BashAssoc_HasElement(val, index_str)
1089
1090	else:
1091	# work around mycpp bug! parses as 'elif'
1092	pass
1093
1094	if self.exec_opts.strict_word_eval():
1095	raise error.TypeErr(
1096	val, 'Expected BashArray, SparseArray, or BashAssoc',
1097	blame_loc)
1098	return False
1099	raise AssertionError()
1100
1101	def _StringToBigIntOrError(self, s, blame_loc):
1102	# type: (str, loc_t) -> mops.BigInt
1103
1104	# Used by [ $x -gt 3 ]
1105	if self.bracket:
1106	if match.LooksLikeInteger(s):
1107	ok, i = mops.FromStr2(s)
1108	else:
1109	ok = False
1110
1111	if not ok:
1112	# builtin_bracket.py catches this and return status 2, so it's
1113	# not fatal
1114	e_die('Invalid integer %r' % s, blame_loc)
1115
1116	return i
1117
1118	# Used by both [[ $x -gt 3 ]] and $(( x ))
1119	else:
1120	try:
1121	i = self._StringToBigInt(s, blame_loc)
1122	except error.Strict as e:
1123	if self.bracket or self.exec_opts.strict_arith():
1124	raise
1125	else:
1126	i = mops.ZERO
1127	return i
1128
1129	def _EvalCompoundWord(self, word, eval_flags=0):
1130	# type: (word_t, int) -> str
1131	val = self.word_ev.EvalWordToString(word, eval_flags)
1132	return val.s
1133
1134	def EvalB(self, node):
1135	# type: (bool_expr_t) -> bool
1136
1137	UP_node = node
1138	with tagswitch(node) as case:
1139	if case(bool_expr_e.WordTest):
1140	node = cast(bool_expr.WordTest, UP_node)
1141	s = self._EvalCompoundWord(node.w)
1142	return bool(s)
1143
1144	elif case(bool_expr_e.LogicalNot):
1145	node = cast(bool_expr.LogicalNot, UP_node)
1146	b = self.EvalB(node.child)
1147	return not b
1148
1149	elif case(bool_expr_e.LogicalAnd):
1150	node = cast(bool_expr.LogicalAnd, UP_node)
1151	# Short-circuit evaluation
1152	if self.EvalB(node.left):
1153	return self.EvalB(node.right)
1154	else:
1155	return False
1156
1157	elif case(bool_expr_e.LogicalOr):
1158	node = cast(bool_expr.LogicalOr, UP_node)
1159	if self.EvalB(node.left):
1160	return True
1161	else:
1162	return self.EvalB(node.right)
1163
1164	elif case(bool_expr_e.Unary):
1165	node = cast(bool_expr.Unary, UP_node)
1166	op_id = node.op_id
1167	s = self._EvalCompoundWord(node.child)
1168
1169	# Now dispatch on arg type. (arg_type could be static in the
1170	# LST?)
1171	arg_type = consts.BoolArgType(op_id)
1172
1173	if arg_type == bool_arg_type_e.Path:
1174	return bool_stat.DoUnaryOp(op_id, s)
1175
1176	if arg_type == bool_arg_type_e.Str:
1177	if op_id == Id.BoolUnary_z:
1178	return not bool(s)
1179	if op_id == Id.BoolUnary_n:
1180	return bool(s)
1181	if op_id == Id.BoolUnary_true:
1182	return s == 'true'
1183	if op_id == Id.BoolUnary_false:
1184	return s == 'false'
1185
1186	raise AssertionError(op_id) # should never happen
1187
1188	if arg_type == bool_arg_type_e.Other:
1189	if op_id == Id.BoolUnary_t:
1190	return bool_stat.isatty(s, node.child)
1191
1192	# See whether 'set -o' options have been set
1193	if op_id == Id.BoolUnary_o:
1194	index = consts.OptionNum(s)
1195	if index == 0:
1196	return False
1197	else:
1198	return self.exec_opts.opt0_array[index]
1199
1200	if op_id == Id.BoolUnary_v:
1201	return self._IsDefined(s, loc.Word(node.child))
1202
1203	e_die("%s isn't implemented" %
1204	ui.PrettyId(op_id)) # implicit location
1205
1206	raise AssertionError(arg_type)
1207
1208	elif case(bool_expr_e.Binary):
1209	node = cast(bool_expr.Binary, UP_node)
1210
1211	op_id = node.op_id
1212	# Whether to glob escape
1213	eval_flags = 0
1214	with switch(op_id) as case2:
1215	if case2(Id.BoolBinary_GlobEqual, Id.BoolBinary_GlobDEqual,
1216	Id.BoolBinary_GlobNEqual):
1217	eval_flags \|= word_eval.QUOTE_FNMATCH
1218	elif case2(Id.BoolBinary_EqualTilde):
1219	eval_flags \|= word_eval.QUOTE_ERE
1220
1221	s1 = self._EvalCompoundWord(node.left)
1222	s2 = self._EvalCompoundWord(node.right, eval_flags)
1223
1224	# Now dispatch on arg type
1225	arg_type = consts.BoolArgType(op_id)
1226
1227	if arg_type == bool_arg_type_e.Path:
1228	return bool_stat.DoBinaryOp(op_id, s1, s2)
1229
1230	if arg_type == bool_arg_type_e.Int:
1231	# NOTE: We assume they are constants like [[ 3 -eq 3 ]].
1232	# Bash also allows [[ 1+2 -eq 3 ]].
1233	i1 = self._StringToBigIntOrError(s1, loc.Word(node.left))
1234	i2 = self._StringToBigIntOrError(s2, loc.Word(node.right))
1235
1236	if op_id == Id.BoolBinary_eq:
1237	return mops.Equal(i1, i2)
1238	if op_id == Id.BoolBinary_ne:
1239	return not mops.Equal(i1, i2)
1240	if op_id == Id.BoolBinary_gt:
1241	return mops.Greater(i1, i2)
1242	if op_id == Id.BoolBinary_ge:
1243	return mops.Greater(i1, i2) or mops.Equal(i1, i2)
1244	if op_id == Id.BoolBinary_lt:
1245	return mops.Greater(i2, i1)
1246	if op_id == Id.BoolBinary_le:
1247	return mops.Greater(i2, i1) or mops.Equal(i1, i2)
1248
1249	raise AssertionError(op_id) # should never happen
1250
1251	if arg_type == bool_arg_type_e.Str:
1252	fnmatch_flags = (FNM_CASEFOLD
1253	if self.exec_opts.nocasematch() else 0)
1254
1255	if op_id in (Id.BoolBinary_GlobEqual,
1256	Id.BoolBinary_GlobDEqual):
1257	#log('Matching %s against pattern %s', s1, s2)
1258	return libc.fnmatch(s2, s1, fnmatch_flags)
1259
1260	if op_id == Id.BoolBinary_GlobNEqual:
1261	return not libc.fnmatch(s2, s1, fnmatch_flags)
1262
1263	if op_id in (Id.BoolBinary_Equal, Id.BoolBinary_DEqual):
1264	return s1 == s2
1265
1266	if op_id == Id.BoolBinary_NEqual:
1267	return s1 != s2
1268
1269	if op_id == Id.BoolBinary_EqualTilde:
1270	# TODO: This should go to --debug-file
1271	#log('Matching %r against regex %r', s1, s2)
1272	regex_flags = (REG_ICASE
1273	if self.exec_opts.nocasematch() else 0)
1274
1275	try:
1276	indices = libc.regex_search(s2, regex_flags, s1, 0)
1277	except ValueError as e:
1278	# Status 2 indicates a regex parse error. This is
1279	# fatal in OSH but not in bash, which treats [[
1280	# like a command with an exit code.
1281	e_die_status(2, e.message, loc.Word(node.right))
1282
1283	if indices is not None:
1284	self.mem.SetRegexMatch(
1285	RegexMatch(s1, indices, eggex_ops.No))
1286	return True
1287	else:
1288	self.mem.SetRegexMatch(regex_match.No)
1289	return False
1290
1291	if op_id == Id.Op_Less:
1292	return str_cmp(s1, s2) < 0
1293
1294	if op_id == Id.Op_Great:
1295	return str_cmp(s1, s2) > 0
1296
1297	raise AssertionError(op_id) # should never happen
1298
1299	raise AssertionError(node.tag())