osh/sh_expr_eval.py

OILS / osh / sh_expr_eval.py View on Github | oils.pub

1309 lines, 848 significant

1	#!/usr/bin/env python2
2	# Copyright 2016 Andy Chu. All rights reserved.
3	# Licensed under the Apache License, Version 2.0 (the "License");
4	# you may not use this file except in compliance with the License.
5	# You may obtain a copy of the License at
6	#
7	# http://www.apache.org/licenses/LICENSE-2.0
8	"""
9	sh_expr_eval.py -- Shell boolean and arithmetic expressions.
10	"""
11	from __future__ import print_function
12
13	from _devbuild.gen.id_kind_asdl import Id
14	from _devbuild.gen.runtime_asdl import error_code_e, scope_t
15	from _devbuild.gen.syntax_asdl import (
16	word_t,
17	CompoundWord,
18	Token,
19	loc,
20	loc_t,
21	source,
22	arith_expr,
23	arith_expr_e,
24	arith_expr_t,
25	bool_expr,
26	bool_expr_e,
27	bool_expr_t,
28	sh_lhs,
29	sh_lhs_e,
30	sh_lhs_t,
31	BracedVarSub,
32	)
33	from _devbuild.gen.option_asdl import option_i
34	from _devbuild.gen.types_asdl import bool_arg_type_e
35	from _devbuild.gen.value_asdl import (
36	value,
37	value_e,
38	value_t,
39	sh_lvalue,
40	sh_lvalue_e,
41	sh_lvalue_t,
42	LeftName,
43	eggex_ops,
44	regex_match,
45	RegexMatch,
46	)
47	from core import alloc
48	from core import bash_impl
49	from core import error
50	from core.error import e_die, e_die_status, e_strict, e_usage
51	from core import num
52	from core import state
53	from display import ui
54	from core import util
55	from frontend import consts
56	from frontend import lexer
57	from frontend import location
58	from frontend import match
59	from frontend import reader
60	from mycpp import mops
61	from mycpp import mylib
62	from mycpp.mylib import log, tagswitch, switch, str_cmp
63	from osh import bool_stat
64	from osh import word_eval
65
66	import libc # for fnmatch
67	# Import these names directly because the C++ translation uses macros literally.
68	from libc import FNM_CASEFOLD, REG_ICASE
69
70	from typing import Tuple, Optional, cast, TYPE_CHECKING
71	if TYPE_CHECKING:
72	from core import optview
73	from frontend import parse_lib
74
75	_ = log
76
77	#
78	# Arith and Command/Word variants of assignment
79	#
80	# Calls EvalShellLhs()
81	# a[$key]=$val # osh/cmd_eval.py:814 (command_e.ShAssignment)
82	# Calls EvalArithLhs()
83	# (( a[key] = val )) # osh/sh_expr_eval.py:326 (_EvalLhsArith)
84	#
85	# Calls OldValue()
86	# a[$key]+=$val # osh/cmd_eval.py:795 (assign_op_e.PlusEqual)
87	# (( a[key] += val )) # osh/sh_expr_eval.py:308 (_EvalLhsAndLookupArith)
88	#
89	# RHS Indexing
90	# val=${a[$key]} # osh/word_eval.py:639 (bracket_op_e.ArrayIndex)
91	# (( val = a[key] )) # osh/sh_expr_eval.py:509 (Id.Arith_LBracket)
92	#
93
94
95	def OldValue(lval, mem, exec_opts, blame_loc):
96	# type: (sh_lvalue_t, state.Mem, Optional[optview.Exec], loc_t) -> value_t
97	"""Look up for augmented assignment.
98
99	For s+=val and (( i += 1 ))
100
101	Args:
102	lval: value we need to
103	exec_opts: can be None if we don't want to check set -u!
104	Because s+=val doesn't check it.
105
106	TODO: A stricter and less ambiguous version for YSH.
107	- Problem: why does sh_lvalue have Indexed and Keyed, while sh_lhs only has
108	IndexedName?
109	- should I have location.LName and sh_lvalue.Indexed only?
110	- and Indexed uses the index_t type?
111	- well that might be Str or Int
112	"""
113	assert isinstance(lval, sh_lvalue_t), lval
114
115	# TODO: refactor sh_lvalue_t to make this simpler
116	UP_lval = lval
117	with tagswitch(lval) as case:
118	if case(sh_lvalue_e.Var): # (( i++ ))
119	lval = cast(LeftName, UP_lval)
120	var_name = lval.name
121	elif case(sh_lvalue_e.Indexed): # (( a[i]++ ))
122	lval = cast(sh_lvalue.Indexed, UP_lval)
123	var_name = lval.name
124	elif case(sh_lvalue_e.Keyed): # (( A['K']++ )) ? I think this works
125	lval = cast(sh_lvalue.Keyed, UP_lval)
126	var_name = lval.name
127	else:
128	raise AssertionError()
129
130	cell = mem.GetCellDeref(var_name)
131	if cell is not None:
132	if cell.readonly:
133	e_die("Can't assign to readonly variable %r" % var_name, blame_loc)
134	val = cell.val
135	else:
136	val = value.Undef
137
138	if exec_opts and exec_opts.nounset() and val.tag() == value_e.Undef:
139	e_die('Undefined variable %r' % var_name, blame_loc)
140
141	UP_val = val
142	with tagswitch(lval) as case:
143	if case(sh_lvalue_e.Var):
144	return val
145
146	elif case(sh_lvalue_e.Indexed):
147	lval = cast(sh_lvalue.Indexed, UP_lval)
148
149	with tagswitch(val) as case2:
150	if case2(value_e.Undef):
151	s = None # type: Optional[str]
152	elif case2(value_e.InternalStringArray):
153	array_val = cast(value.InternalStringArray, UP_val)
154	s, _ = bash_impl.InternalStringArray_GetElement(
155	array_val, lval.index)
156	# Note: We ignore error_code in the return value of
157	# InternalStringArray_GetElement because an invalid index will be
158	# reported on the assignment stage anyway.
159	elif case2(value_e.BashArray):
160	sparse_val = cast(value.BashArray, UP_val)
161	s, _ = bash_impl.BashArray_GetElement(
162	sparse_val, mops.IntWiden(lval.index))
163	else:
164	e_die("Can't use [] on value of type %s" % ui.ValType(val))
165
166	if s is None:
167	val = value.Str('') # NOTE: Other logic is value.Undef? 0?
168	else:
169	assert isinstance(s, str), s
170	val = value.Str(s)
171
172	elif case(sh_lvalue_e.Keyed):
173	lval = cast(sh_lvalue.Keyed, UP_lval)
174
175	assoc_val = None # type: value.BashAssoc
176	with tagswitch(val) as case2:
177	if case2(value_e.Undef):
178	# This never happens, because undef[x]+= is assumed to
179	raise AssertionError()
180	elif case2(value_e.BashAssoc):
181	tmp2 = cast(value.BashAssoc, UP_val)
182	# mycpp rewrite: add tmp. cast() creates a new var in inner scope
183	assoc_val = tmp2
184	s = bash_impl.BashAssoc_GetElement(assoc_val, lval.key)
185	else:
186	e_die("Can't use [] on value of type %s" % ui.ValType(val))
187
188	if s is None:
189	val = value.Str('')
190	else:
191	val = value.Str(s)
192
193	else:
194	raise AssertionError()
195
196	return val
197
198
199	# TODO: Should refactor for int/char-based processing
200	if mylib.PYTHON:
201
202	def IsLower(ch):
203	# type: (str) -> bool
204	return 'a' <= ch and ch <= 'z'
205
206	def IsUpper(ch):
207	# type: (str) -> bool
208	return 'A' <= ch and ch <= 'Z'
209
210
211	class UnsafeArith(object):
212	"""For parsing a[i] at RUNTIME."""
213
214	def __init__(
215	self,
216	mem, # type: state.Mem
217	exec_opts, # type: optview.Exec
218	mutable_opts, # type: state.MutableOpts
219	parse_ctx, # type: parse_lib.ParseContext
220	arith_ev, # type: ArithEvaluator
221	errfmt, # type: ui.ErrorFormatter
222	):
223	# type: (...) -> None
224	self.mem = mem
225	self.exec_opts = exec_opts
226	self.mutable_opts = mutable_opts
227	self.parse_ctx = parse_ctx
228	self.arith_ev = arith_ev
229	self.errfmt = errfmt
230
231	self.arena = self.parse_ctx.arena
232
233	def ParseLValue(self, s, location):
234	# type: (str, loc_t) -> sh_lvalue_t
235	"""Parse sh_lvalue for 'unset' and 'printf -v'.
236
237	It uses the arith parser, so it behaves like the LHS of (( a[i] = x ))
238	"""
239	if not self.parse_ctx.parse_opts.parse_sh_arith():
240	# Do something simpler for YSH
241	if not match.IsValidVarName(s):
242	e_die('Invalid variable name %r (parse_sh_arith is off)' % s,
243	location)
244	return LeftName(s, location)
245
246	a_parser = self.parse_ctx.MakeArithParser(s)
247
248	with alloc.ctx_SourceCode(self.arena,
249	source.Dynamic('dynamic LHS', location)):
250	try:
251	anode = a_parser.Parse()
252	except error.Parse as e:
253	self.errfmt.PrettyPrintError(e)
254	# Exception for builtins 'unset' and 'printf'
255	e_usage('got invalid LHS expression', location)
256
257	# Note: we parse '1+2', and then it becomes a runtime error because
258	# it's not a valid LHS. Could be a parse error.
259
260	if self.exec_opts.eval_unsafe_arith():
261	lval = self.arith_ev.EvalArithLhs(anode)
262	else:
263	# Prevent attacks like these by default:
264	#
265	# unset -v 'A["$(echo K; rm *)"]'
266	with state.ctx_Option(self.mutable_opts,
267	[option_i._allow_command_sub], False):
268	lval = self.arith_ev.EvalArithLhs(anode)
269
270	return lval
271
272	def ParseVarRef(self, ref_str, blame_tok):
273	# type: (str, Token) -> BracedVarSub
274	"""Parse and evaluate value for ${!ref}
275
276	This supports:
277	- 0 to 9 for $0 to $9
278	- @ for "$@" etc.
279
280	See grammar in osh/word_parse.py, which is related to grammar in
281	osh/word_parse.py _ReadBracedVarSub
282
283	Note: declare -n allows 'varname' and 'varname[i]' and 'varname[@]', but it
284	does NOT allow 0 to 9, @, *
285
286	NamerefExpr = NAME Subscript? # this allows @ and * too
287
288	_ResolveNameOrRef currently gives you a 'cell'. So it might not support
289	sh_lvalue.Indexed?
290	"""
291	line_reader = reader.StringLineReader(ref_str, self.arena)
292	lexer = self.parse_ctx.MakeLexer(line_reader)
293	w_parser = self.parse_ctx.MakeWordParser(lexer, line_reader)
294
295	src = source.VarRef(blame_tok)
296	with alloc.ctx_SourceCode(self.arena, src):
297	try:
298	bvs_part = w_parser.ParseVarRef()
299	except error.Parse as e:
300	# This prints the inner location
301	self.errfmt.PrettyPrintError(e)
302
303	# this affects builtins 'unset' and 'printf'
304	e_die("Invalid var ref expression", blame_tok)
305
306	return bvs_part
307
308
309	def _ParseOshInteger(s, blame_loc):
310	# type: (str, loc_t) -> Tuple[bool, mops.BigInt]
311	"""
312	Returns:
313	(True, value) when the string looks like an integer
314	(False, ...) when it doesn't
315
316	Integer formats that are recognized:
317	0xAB hex
318	042 octal
319	42 decimal
320	64#z arbitrary base
321	"""
322	id_, pos = match.MatchShNumberToken(s, 0) # use re2c lexer
323	if pos != len(s):
324	# trailing data isn't allowed
325	return (False, mops.BigInt(0))
326
327	# Do conversions
328
329	if id_ == Id.ShNumber_Dec:
330	# Normal base 10 integer.
331	ok, big_int = mops.FromStr2(s)
332	if not ok:
333	e_die('Integer too big: %s' % s, blame_loc)
334	return (True, big_int)
335
336	elif id_ == Id.ShNumber_Oct:
337	# 0123, offset by 1
338	ok, big_int = mops.FromStr2(s[1:], 8)
339	if not ok:
340	e_die('Octal integer too big: %s' % s, blame_loc)
341	return (True, big_int)
342
343	elif id_ == Id.ShNumber_Hex:
344	# 0xff, offset by 2
345	ok, big_int = mops.FromStr2(s[2:], 16)
346	if not ok:
347	e_die('Hex integer too big: %s' % s, blame_loc)
348	return (True, big_int)
349
350	elif id_ == Id.ShNumber_BaseN:
351	b, digits = mylib.split_once(s, '#')
352	assert digits is not None, digits # assured by lexer
353
354	try:
355	base = int(b) # machine integer, not BigInt
356	except ValueError:
357	# Unreachable per the regex validation above
358	raise AssertionError()
359
360	if base > 64:
361	e_strict('Base %d cannot be larger than 64' % base, blame_loc)
362	if base < 2:
363	e_strict('Base %d must be larger than 2' % base, blame_loc)
364
365	integer = mops.ZERO
366	for ch in digits:
367	if IsLower(ch):
368	digit = ord(ch) - ord('a') + 10
369	elif IsUpper(ch):
370	digit = ord(ch) - ord('A') + 36
371	elif ch == '@': # horrible syntax
372	digit = 62
373	elif ch == '_':
374	digit = 63
375	elif ch.isdigit():
376	digit = int(ch)
377	else:
378	# Unreachable per the regex validation above
379	raise AssertionError()
380
381	if digit >= base:
382	e_strict('Digits %r out of range for base %d' % (digits, base),
383	blame_loc)
384
385	# formula is:
386	# integer = integer * base + digit
387	integer = mops.Add(mops.Mul(integer, mops.IntWiden(base)),
388	mops.IntWiden(digit))
389	return (True, integer)
390
391	else:
392	# Id.Unknown_Tok or Id.Eol_Tok
393	return (False, mops.BigInt(0)) # not an integer
394
395
396	class ArithEvaluator(bash_impl.ArrayIndexEvaluator):
397	"""Shared between arith and bool evaluators.
398
399	They both:
400
401	1. Convert strings to integers, respecting shopt -s strict_arith.
402	2. Look up variables and evaluate words.
403	"""
404
405	def __init__(
406	self,
407	mem, # type: state.Mem
408	exec_opts, # type: optview.Exec
409	mutable_opts, # type: state.MutableOpts
410	parse_ctx, # type: Optional[parse_lib.ParseContext]
411	errfmt, # type: ui.ErrorFormatter
412	):
413	# type: (...) -> None
414	bash_impl.ArrayIndexEvaluator.__init__(self)
415	self.word_ev = None # type: word_eval.StringWordEvaluator
416	self.mem = mem
417	self.exec_opts = exec_opts
418	self.mutable_opts = mutable_opts
419	self.parse_ctx = parse_ctx
420	self.errfmt = errfmt
421
422	def CheckCircularDeps(self):
423	# type: () -> None
424	assert self.word_ev is not None
425
426	def StringToBigInt(self, s, blame_loc):
427	# type: (str, loc_t) -> mops.BigInt
428	"""Use bash-like rules to coerce a string to an integer.
429
430	Runtime parsing enables silly stuff like $(( $(echo 1)$(echo 2) + 1 )) => 13
431
432	bare word: variable
433	quoted word: string (not done?)
434	"""
435	s = s.strip()
436
437	ok, i = _ParseOshInteger(s, blame_loc)
438	if ok:
439	return i
440
441	# Doesn't look like an integer
442
443	# note: 'test' and '[' never evaluate recursively
444	if self.parse_ctx is None:
445	if len(s) == 0 or match.IsValidVarName(s):
446	# x42 could evaluate to 0
447	e_strict("Invalid integer constant %r" % s, blame_loc)
448	else:
449	# 42x is always fatal!
450	e_die("Invalid integer constant %r" % s, blame_loc)
451
452	# Special case so we don't get EOF error
453	if len(s) == 0:
454	return mops.ZERO
455
456	# For compatibility: Try to parse it as an expression and evaluate it.
457	a_parser = self.parse_ctx.MakeArithParser(s)
458
459	try:
460	node2 = a_parser.Parse() # may raise error.Parse
461	except error.Parse as e:
462	self.errfmt.PrettyPrintError(e)
463	e_die('Parse error in recursive arithmetic', e.location)
464
465	# Prevent infinite recursion of $(( 1x )) -- it's a word that evaluates
466	# to itself, and you don't want to reparse it as a word.
467	if node2.tag() == arith_expr_e.Word:
468	e_die("Invalid integer constant %r" % s, blame_loc)
469
470	if self.exec_opts.eval_unsafe_arith():
471	integer = self.EvalToBigInt(node2)
472	else:
473	# BoolEvaluator doesn't have parse_ctx or mutable_opts
474	assert self.mutable_opts is not None
475
476	# We don't need to flip _allow_process_sub, because they can't be
477	# parsed. See spec/bugs.test.sh.
478	with state.ctx_Option(self.mutable_opts,
479	[option_i._allow_command_sub], False):
480	integer = self.EvalToBigInt(node2)
481
482	return integer
483
484	def _ValToIntOrError(self, val, blame):
485	# type: (value_t, arith_expr_t) -> mops.BigInt
486	try:
487	UP_val = val
488	with tagswitch(val) as case:
489	if case(value_e.Undef):
490	# 'nounset' already handled before got here
491	# Happens upon a[undefined]=42, which unfortunately turns into a[0]=42.
492	e_strict('Undefined value in arithmetic context',
493	loc.Arith(blame))
494
495	elif case(value_e.Int):
496	val = cast(value.Int, UP_val)
497	return val.i
498
499	elif case(value_e.Str):
500	val = cast(value.Str, UP_val)
501	# calls e_strict
502	return self.StringToBigInt(val.s, loc.Arith(blame))
503
504	except error.Strict as e:
505	if self.exec_opts.strict_arith():
506	raise
507	else:
508	return mops.ZERO
509
510	# Arrays and associative arrays always fail -- not controlled by
511	# strict_arith.
512	# In bash, (( a )) is like (( a[0] )), but I don't want that.
513	# And returning '0' gives different results.
514	e_die(
515	"Expected a value convertible to integer, got %s" %
516	ui.ValType(val), loc.Arith(blame))
517
518	def _EvalLhsAndLookupArith(self, node):
519	# type: (arith_expr_t) -> Tuple[mops.BigInt, sh_lvalue_t]
520	""" For x = y and x += y and ++x """
521
522	lval = self.EvalArithLhs(node)
523	val = OldValue(lval, self.mem, self.exec_opts,
524	location.TokenForArith(node))
525
526	# BASH_LINENO, arr (array name without strict_array), etc.
527	if (val.tag() in (value_e.InternalStringArray, value_e.BashAssoc,
528	value_e.BashArray) and
529	lval.tag() == sh_lvalue_e.Var):
530	named_lval = cast(LeftName, lval)
531	if word_eval.ShouldArrayDecay(named_lval.name, self.exec_opts):
532	if val.tag() in (value_e.InternalStringArray,
533	value_e.BashArray):
534	lval = sh_lvalue.Indexed(named_lval.name, 0, loc.Missing)
535	elif val.tag() == value_e.BashAssoc:
536	lval = sh_lvalue.Keyed(named_lval.name, '0', loc.Missing)
537	val = word_eval.DecayArray(val)
538
539	# This error message could be better, but we already have one
540	#if val.tag() in (value_e.InternalStringArray, value_e.BashArray):
541	# e_die("Can't use assignment like ++ or += on arrays")
542
543	i = self._ValToIntOrError(val, node)
544	return i, lval
545
546	def _Store(self, lval, new_int):
547	# type: (sh_lvalue_t, mops.BigInt) -> None
548	val = value.Str(mops.ToStr(new_int))
549	state.OshLanguageSetValue(self.mem, lval, val)
550
551	def EvalToBigInt(self, node):
552	# type: (arith_expr_t) -> mops.BigInt
553	"""Used externally by ${a[i+1]} and ${a:start:len}.
554
555	Also used internally.
556	"""
557	val = self.Eval(node)
558
559	# BASH_LINENO, arr (array name without strict_array), etc.
560	if (val.tag() in (value_e.InternalStringArray, value_e.BashAssoc,
561	value_e.BashArray) and
562	node.tag() == arith_expr_e.VarSub):
563	vsub = cast(Token, node)
564	if word_eval.ShouldArrayDecay(lexer.LazyStr(vsub), self.exec_opts):
565	val = word_eval.DecayArray(val)
566
567	i = self._ValToIntOrError(val, node)
568	return i
569
570	def EvalToInt(self, node):
571	# type: (arith_expr_t) -> int
572	return mops.BigTruncate(self.EvalToBigInt(node))
573
574	def Eval(self, node):
575	# type: (arith_expr_t) -> value_t
576	"""
577	Returns:
578	None for Undef (e.g. empty cell) TODO: Don't return 0!
579	int for Str
580	List[int] for InternalStringArray and BashArray
581	Dict[str, str] for BashAssoc (TODO: Should we support this?)
582
583	NOTE: (( A['x'] = 'x' )) and (( x = A['x'] )) are syntactically valid in
584	bash, but don't do what you'd think. 'x' sometimes a variable name and
585	sometimes a key.
586	"""
587	# OSH semantics: Variable NAMES cannot be formed dynamically; but INTEGERS
588	# can. ${foo:-3}4 is OK. $? will be a compound word too, so we don't have
589	# to handle that as a special case.
590
591	UP_node = node
592	with tagswitch(node) as case:
593	if case(arith_expr_e.EmptyZero): # $(( ))
594	return value.Int(mops.ZERO) # Weird axiom
595
596	elif case(arith_expr_e.EmptyOne): # for (( ; ; ))
597	return value.Int(mops.ONE)
598
599	elif case(arith_expr_e.VarSub): # $(( x )) (can be array)
600	vsub = cast(Token, UP_node)
601	var_name = lexer.LazyStr(vsub)
602	val = self.mem.GetValue(var_name)
603	if val.tag() == value_e.Undef and self.exec_opts.nounset():
604	e_die('Undefined variable %r' % var_name, vsub)
605	return val
606
607	elif case(arith_expr_e.Word): # $(( $x )) $(( ${x}${y} )), etc.
608	w = cast(CompoundWord, UP_node)
609	return self.word_ev.EvalWordToString(w)
610
611	elif case(arith_expr_e.UnaryAssign): # a++
612	node = cast(arith_expr.UnaryAssign, UP_node)
613
614	op_id = node.op_id
615	old_big, lval = self._EvalLhsAndLookupArith(node.child)
616
617	if op_id == Id.Node_PostDPlus: # post-increment
618	new_big = mops.Add(old_big, mops.ONE)
619	result = old_big
620
621	elif op_id == Id.Node_PostDMinus: # post-decrement
622	new_big = mops.Sub(old_big, mops.ONE)
623	result = old_big
624
625	elif op_id == Id.Arith_DPlus: # pre-increment
626	new_big = mops.Add(old_big, mops.ONE)
627	result = new_big
628
629	elif op_id == Id.Arith_DMinus: # pre-decrement
630	new_big = mops.Sub(old_big, mops.ONE)
631	result = new_big
632
633	else:
634	raise AssertionError(op_id)
635
636	self._Store(lval, new_big)
637	return value.Int(result)
638
639	elif case(arith_expr_e.BinaryAssign): # a=1, a+=5, a[1]+=5
640	node = cast(arith_expr.BinaryAssign, UP_node)
641	op_id = node.op_id
642
643	if op_id == Id.Arith_Equal:
644	# Don't really need a span ID here, because tdop.CheckLhsExpr should
645	# have done all the validation.
646	lval = self.EvalArithLhs(node.left)
647	rhs_big = self.EvalToBigInt(node.right)
648
649	self._Store(lval, rhs_big)
650	return value.Int(rhs_big)
651
652	old_big, lval = self._EvalLhsAndLookupArith(node.left)
653	rhs_big = self.EvalToBigInt(node.right)
654
655	if op_id == Id.Arith_PlusEqual:
656	new_big = mops.Add(old_big, rhs_big)
657	elif op_id == Id.Arith_MinusEqual:
658	new_big = mops.Sub(old_big, rhs_big)
659	elif op_id == Id.Arith_StarEqual:
660	new_big = mops.Mul(old_big, rhs_big)
661
662	elif op_id == Id.Arith_SlashEqual:
663	if mops.Equal(rhs_big, mops.ZERO):
664	e_die('Divide by zero') # TODO: location
665	new_big = mops.Div(old_big, rhs_big)
666
667	elif op_id == Id.Arith_PercentEqual:
668	if mops.Equal(rhs_big, mops.ZERO):
669	e_die('Divide by zero') # TODO: location
670	new_big = mops.Rem(old_big, rhs_big)
671
672	elif op_id == Id.Arith_DGreatEqual:
673	new_big = mops.RShift(old_big, rhs_big)
674	elif op_id == Id.Arith_DLessEqual:
675	new_big = mops.LShift(old_big, rhs_big)
676	elif op_id == Id.Arith_AmpEqual:
677	new_big = mops.BitAnd(old_big, rhs_big)
678	elif op_id == Id.Arith_PipeEqual:
679	new_big = mops.BitOr(old_big, rhs_big)
680	elif op_id == Id.Arith_CaretEqual:
681	new_big = mops.BitXor(old_big, rhs_big)
682	else:
683	raise AssertionError(op_id) # shouldn't get here
684
685	self._Store(lval, new_big)
686	return value.Int(new_big)
687
688	elif case(arith_expr_e.Unary):
689	node = cast(arith_expr.Unary, UP_node)
690	op_id = node.op_id
691
692	i = self.EvalToBigInt(node.child)
693
694	if op_id == Id.Node_UnaryPlus: # +i
695	result = i
696	elif op_id == Id.Node_UnaryMinus: # -i
697	result = mops.Sub(mops.ZERO, i)
698
699	elif op_id == Id.Arith_Bang: # logical negation
700	if mops.Equal(i, mops.ZERO):
701	result = mops.ONE
702	else:
703	result = mops.ZERO
704	elif op_id == Id.Arith_Tilde: # bitwise complement
705	result = mops.BitNot(i)
706	else:
707	raise AssertionError(op_id) # shouldn't get here
708
709	return value.Int(result)
710
711	elif case(arith_expr_e.Binary):
712	node = cast(arith_expr.Binary, UP_node)
713	op_id = node.op.id
714
715	# Short-circuit evaluation for \|\| and &&.
716	if op_id == Id.Arith_DPipe:
717	lhs_big = self.EvalToBigInt(node.left)
718	if mops.Equal(lhs_big, mops.ZERO):
719	rhs_big = self.EvalToBigInt(node.right)
720	if mops.Equal(rhs_big, mops.ZERO):
721	result = mops.ZERO # false
722	else:
723	result = mops.ONE # true
724	else:
725	result = mops.ONE # true
726	return value.Int(result)
727
728	if op_id == Id.Arith_DAmp:
729	lhs_big = self.EvalToBigInt(node.left)
730	if mops.Equal(lhs_big, mops.ZERO):
731	result = mops.ZERO # false
732	else:
733	rhs_big = self.EvalToBigInt(node.right)
734	if mops.Equal(rhs_big, mops.ZERO):
735	result = mops.ZERO # false
736	else:
737	result = mops.ONE # true
738	return value.Int(result)
739
740	if op_id == Id.Arith_LBracket:
741	# NOTE: Similar to bracket_op_e.ArrayIndex in osh/word_eval.py
742
743	left = self.Eval(node.left)
744	UP_left = left
745	with tagswitch(left) as case:
746	if case(value_e.InternalStringArray):
747	array_val = cast(value.InternalStringArray,
748	UP_left)
749	small_i = mops.BigTruncate(
750	self.EvalToBigInt(node.right))
751	s, error_code = bash_impl.InternalStringArray_GetElement(
752	array_val, small_i)
753	if error_code == error_code_e.IndexOutOfRange:
754	# Note: Bash outputs warning but does not make
755	# it a real error. We follow the Bash behavior
756	# here.
757	small_length = bash_impl.InternalStringArray_Length(
758	array_val)
759	self.errfmt.Print_(
760	"Index %d out of bounds for array of length %d"
761	% (small_i, small_length),
762	blame_loc=node.op)
763
764	elif case(value_e.BashArray):
765	sparse_val = cast(value.BashArray, UP_left)
766	i = self.EvalToBigInt(node.right)
767	s, error_code = bash_impl.BashArray_GetElement(
768	sparse_val, i)
769	if error_code == error_code_e.IndexOutOfRange:
770	# Note: Bash outputs warning but does not make
771	# it a real error. We follow the Bash behavior
772	# here.
773	length = bash_impl.BashArray_Length(sparse_val)
774	self.errfmt.Print_(
775	"Index %s out of bounds for array of length %s"
776	% (mops.ToStr(i), mops.ToStr(length)),
777	blame_loc=node.op)
778
779	elif case(value_e.BashAssoc):
780	left = cast(value.BashAssoc, UP_left)
781	key = self.EvalWordToString(node.right)
782	s = bash_impl.BashAssoc_GetElement(left, key)
783
784	elif case(value_e.Str):
785	left = cast(value.Str, UP_left)
786	if self.exec_opts.strict_arith():
787	e_die(
788	"Value of type Str can't be indexed (strict_arith)",
789	node.op)
790	index = self.EvalToBigInt(node.right)
791	# s[0] evaluates to s
792	# s[1] evaluates to Undef
793	s = left.s if mops.Equal(index,
794	mops.ZERO) else None
795
796	elif case(value_e.Undef):
797	if self.exec_opts.strict_arith():
798	e_die(
799	"Value of type Undef can't be indexed (strict_arith)",
800	node.op)
801	s = None # value.Undef
802
803	# There isn't a way to distinguish Undef vs. empty
804	# string, even with set -o nounset?
805	# s = ''
806
807	else:
808	# TODO: Add error context
809	e_die(
810	"Value of type %s can't be indexed" %
811	ui.ValType(left), node.op)
812
813	if s is None:
814	val = value.Undef
815	else:
816	val = value.Str(s)
817
818	return val
819
820	if op_id == Id.Arith_Comma:
821	self.EvalToBigInt(node.left) # throw away result
822	result = self.EvalToBigInt(node.right)
823	return value.Int(result)
824
825	# Rest are integers
826	lhs_big = self.EvalToBigInt(node.left)
827	rhs_big = self.EvalToBigInt(node.right)
828
829	if op_id == Id.Arith_Plus:
830	result = mops.Add(lhs_big, rhs_big)
831	elif op_id == Id.Arith_Minus:
832	result = mops.Sub(lhs_big, rhs_big)
833	elif op_id == Id.Arith_Star:
834	result = mops.Mul(lhs_big, rhs_big)
835	elif op_id == Id.Arith_Slash:
836	if mops.Equal(rhs_big, mops.ZERO):
837	e_die('Divide by zero', node.op)
838	result = mops.Div(lhs_big, rhs_big)
839
840	elif op_id == Id.Arith_Percent:
841	if mops.Equal(rhs_big, mops.ZERO):
842	e_die('Divide by zero', node.op)
843	result = mops.Rem(lhs_big, rhs_big)
844
845	elif op_id == Id.Arith_DStar:
846	if mops.Greater(mops.ZERO, rhs_big):
847	e_die("Exponent can't be a negative number",
848	loc.Arith(node.right))
849	result = num.Exponent(lhs_big, rhs_big)
850
851	elif op_id == Id.Arith_DEqual:
852	result = mops.FromBool(mops.Equal(lhs_big, rhs_big))
853	elif op_id == Id.Arith_NEqual:
854	result = mops.FromBool(not mops.Equal(lhs_big, rhs_big))
855	elif op_id == Id.Arith_Great:
856	result = mops.FromBool(mops.Greater(lhs_big, rhs_big))
857	elif op_id == Id.Arith_GreatEqual:
858	result = mops.FromBool(
859	mops.Greater(lhs_big, rhs_big) or
860	mops.Equal(lhs_big, rhs_big))
861	elif op_id == Id.Arith_Less:
862	result = mops.FromBool(mops.Greater(rhs_big, lhs_big))
863	elif op_id == Id.Arith_LessEqual:
864	result = mops.FromBool(
865	mops.Greater(rhs_big, lhs_big) or
866	mops.Equal(lhs_big, rhs_big))
867
868	elif op_id == Id.Arith_Pipe:
869	result = mops.BitOr(lhs_big, rhs_big)
870	elif op_id == Id.Arith_Amp:
871	result = mops.BitAnd(lhs_big, rhs_big)
872	elif op_id == Id.Arith_Caret:
873	result = mops.BitXor(lhs_big, rhs_big)
874
875	# Note: how to define shift of negative numbers?
876	elif op_id == Id.Arith_DLess:
877	if mops.Greater(mops.ZERO, rhs_big): # rhs_big < 0
878	raise error.Expr("Can't left shift by negative number",
879	node.op)
880	result = mops.LShift(lhs_big, rhs_big)
881	elif op_id == Id.Arith_DGreat:
882	if mops.Greater(mops.ZERO, rhs_big): # rhs_big < 0
883	raise error.Expr(
884	"Can't right shift by negative number", node.op)
885	result = mops.RShift(lhs_big, rhs_big)
886	else:
887	raise AssertionError(op_id)
888
889	return value.Int(result)
890
891	elif case(arith_expr_e.TernaryOp):
892	node = cast(arith_expr.TernaryOp, UP_node)
893
894	cond = self.EvalToBigInt(node.cond)
895	if mops.Equal(cond, mops.ZERO):
896	return self.Eval(node.false_expr)
897	else:
898	return self.Eval(node.true_expr)
899
900	else:
901	raise AssertionError(node.tag())
902
903	raise AssertionError('for -Wreturn-type in C++')
904
905	def EvalWordToString(self, node, blame_loc=loc.Missing):
906	# type: (arith_expr_t, loc_t) -> str
907	"""
908	Raises:
909	error.FatalRuntime if the expression isn't a string
910	or if it contains a bare variable like a[x]
911
912	These are allowed because they're unambiguous, unlike a[x]
913
914	a[$x] a["$x"] a["x"] a['x']
915	"""
916	UP_node = node
917	if node.tag() == arith_expr_e.Word: # $(( $x )) $(( ${x}${y} )), etc.
918	w = cast(CompoundWord, UP_node)
919	val = self.word_ev.EvalWordToString(w)
920	return val.s
921	else:
922	# A[x] is the "Parsing Bash is Undecidable" problem
923	# It is a string or var name?
924	# (It's parsed as arith_expr.VarSub)
925	e_die(
926	"Assoc array keys must be strings: $x 'x' \"$x\" etc. (OILS-ERR-101)",
927	blame_loc)
928
929	def EvalShellLhs(self, node, which_scopes):
930	# type: (sh_lhs_t, scope_t) -> sh_lvalue_t
931	"""Evaluate a shell LHS expression
932
933	For a=b and a[x]=b etc.
934	"""
935	assert isinstance(node, sh_lhs_t), node
936
937	UP_node = node
938	lval = None # type: sh_lvalue_t
939	with tagswitch(node) as case:
940	if case(sh_lhs_e.Name): # a=x
941	node = cast(sh_lhs.Name, UP_node)
942	assert node.name is not None
943
944	lval1 = LeftName(node.name, node.left)
945	lval = lval1
946
947	elif case(sh_lhs_e.IndexedName): # a[1+2]=x
948	node = cast(sh_lhs.IndexedName, UP_node)
949	assert node.name is not None
950
951	if self.mem.IsBashAssoc(node.name):
952	key = self.EvalWordToString(node.index,
953	blame_loc=node.left)
954	# node.left points to A[ in A[x]=1
955	lval2 = sh_lvalue.Keyed(node.name, key, node.left)
956	lval = lval2
957	else:
958	index = mops.BigTruncate(self.EvalToBigInt(node.index))
959	lval3 = sh_lvalue.Indexed(node.name, index, node.left)
960	lval = lval3
961
962	else:
963	raise AssertionError(node.tag())
964
965	return lval
966
967	def _VarNameOrWord(self, anode):
968	# type: (arith_expr_t) -> Tuple[Optional[str], loc_t]
969	"""
970	Returns a variable name if the arith node can be interpreted that way.
971	"""
972	UP_anode = anode
973	with tagswitch(anode) as case:
974	if case(arith_expr_e.VarSub):
975	tok = cast(Token, UP_anode)
976	return (lexer.LazyStr(tok), tok)
977
978	elif case(arith_expr_e.Word):
979	w = cast(CompoundWord, UP_anode)
980	var_name = self.EvalWordToString(w)
981	return (var_name, w)
982
983	no_str = None # type: Optional[str]
984	return (no_str, loc.Missing)
985
986	def EvalArithLhs(self, anode):
987	# type: (arith_expr_t) -> sh_lvalue_t
988	"""
989	For (( a[x] = 1 )) etc.
990	"""
991	UP_anode = anode
992	if anode.tag() == arith_expr_e.Binary:
993	anode = cast(arith_expr.Binary, UP_anode)
994	if anode.op.id == Id.Arith_LBracket:
995	var_name, blame_loc = self._VarNameOrWord(anode.left)
996
997	# (( 1[2] = 3 )) isn't valid
998	if not match.IsValidVarName(var_name):
999	e_die('Invalid variable name %r' % var_name, blame_loc)
1000
1001	if var_name is not None:
1002	if self.mem.IsBashAssoc(var_name):
1003	arith_loc = location.TokenForArith(anode)
1004	key = self.EvalWordToString(anode.right,
1005	blame_loc=arith_loc)
1006	return sh_lvalue.Keyed(var_name, key, blame_loc)
1007	else:
1008	index = mops.BigTruncate(self.EvalToBigInt(
1009	anode.right))
1010	return sh_lvalue.Indexed(var_name, index, blame_loc)
1011
1012	var_name, blame_loc = self._VarNameOrWord(anode)
1013	if var_name is not None:
1014	return LeftName(var_name, blame_loc)
1015
1016	# e.g. unset 'x-y'. status 2 for runtime parse error
1017	e_die_status(2, 'Invalid LHS to modify', blame_loc)
1018
1019
1020	class BoolEvaluator(ArithEvaluator):
1021	"""This is also an ArithEvaluator because it has to understand.
1022
1023	[[ x -eq 3 ]]
1024
1025	where x='1+2'
1026	"""
1027
1028	def __init__(
1029	self,
1030	mem, # type: state.Mem
1031	exec_opts, # type: optview.Exec
1032	mutable_opts, # type: Optional[state.MutableOpts]
1033	parse_ctx, # type: Optional[parse_lib.ParseContext]
1034	errfmt, # type: ui.ErrorFormatter
1035	bracket=False # type: bool
1036	):
1037	# type: (...) -> None
1038	ArithEvaluator.__init__(self, mem, exec_opts, mutable_opts, parse_ctx,
1039	errfmt)
1040	self.bracket = bracket # [ and [[ are slightly different
1041
1042	def _IsDefined(self, s, blame_loc):
1043	# type: (str, loc_t) -> bool
1044
1045	m = util.RegexSearch(consts.TEST_V_RE, s)
1046	if m is None:
1047	if self.exec_opts.strict_word_eval():
1048	e_die('-v expected name or name[index]', blame_loc)
1049	return False
1050
1051	var_name = m[1]
1052	index_str = m[3]
1053
1054	val = self.mem.GetValue(var_name)
1055	if len(index_str) == 0: # it's just a variable name
1056	return val.tag() != value_e.Undef
1057
1058	UP_val = val
1059	with tagswitch(val) as case:
1060	if case(value_e.InternalStringArray, value_e.BashArray):
1061	try:
1062	# could use mops.FromStr?
1063	index = int(index_str)
1064	except ValueError as e:
1065	if self.exec_opts.strict_word_eval():
1066	e_die(
1067	'-v got BashArray and invalid index %r' %
1068	index_str, blame_loc)
1069	return False
1070
1071	if val.tag() == value_e.InternalStringArray:
1072	array_val = cast(value.InternalStringArray, UP_val)
1073	result, error_code = bash_impl.InternalStringArray_HasElement(
1074	array_val, index)
1075	if error_code == error_code_e.IndexOutOfRange:
1076	length = bash_impl.InternalStringArray_Length(
1077	array_val)
1078	e_die(
1079	'-v got index %s, which is out of bounds for array of length %d'
1080	% (index_str, length), blame_loc)
1081
1082	elif val.tag() == value_e.BashArray:
1083	sparse_val = cast(value.BashArray, UP_val)
1084	result, error_code = bash_impl.BashArray_HasElement(
1085	sparse_val, mops.IntWiden(index))
1086	if error_code == error_code_e.IndexOutOfRange:
1087	big_length = bash_impl.BashArray_Length(sparse_val)
1088	e_die(
1089	'-v got index %s, which is out of bounds for array of length %s'
1090	% (index_str, mops.ToStr(big_length)), blame_loc)
1091
1092	else:
1093	raise AssertionError()
1094
1095	return result
1096
1097	elif case(value_e.BashAssoc):
1098	val = cast(value.BashAssoc, UP_val)
1099	return bash_impl.BashAssoc_HasElement(val, index_str)
1100
1101	else:
1102	# work around mycpp bug! parses as 'elif'
1103	pass
1104
1105	if self.exec_opts.strict_word_eval():
1106	raise error.TypeErr(val, 'Expected BashArray or BashAssoc',
1107	blame_loc)
1108	return False
1109	raise AssertionError()
1110
1111	def _StringToBigIntOrError(self, s, blame_loc):
1112	# type: (str, loc_t) -> mops.BigInt
1113
1114	# Used by [ $x -gt 3 ]
1115	if self.bracket:
1116	if match.LooksLikeInteger(s):
1117	ok, i = mops.FromStr2(s)
1118	else:
1119	ok = False
1120
1121	if not ok:
1122	# builtin_bracket.py catches this and return status 2, so it's
1123	# not fatal
1124	e_die('Invalid integer %r' % s, blame_loc)
1125
1126	return i
1127
1128	# Used by both [[ $x -gt 3 ]] and $(( x ))
1129	else:
1130	try:
1131	i = self.StringToBigInt(s, blame_loc)
1132	except error.Strict as e:
1133	if self.bracket or self.exec_opts.strict_arith():
1134	raise
1135	else:
1136	i = mops.ZERO
1137	return i
1138
1139	def _EvalCompoundWord(self, word, eval_flags=0):
1140	# type: (word_t, int) -> str
1141	val = self.word_ev.EvalWordToString(word, eval_flags)
1142	return val.s
1143
1144	def EvalB(self, node):
1145	# type: (bool_expr_t) -> bool
1146
1147	UP_node = node
1148	with tagswitch(node) as case:
1149	if case(bool_expr_e.WordTest):
1150	node = cast(bool_expr.WordTest, UP_node)
1151	s = self._EvalCompoundWord(node.w)
1152	return bool(s)
1153
1154	elif case(bool_expr_e.LogicalNot):
1155	node = cast(bool_expr.LogicalNot, UP_node)
1156	b = self.EvalB(node.child)
1157	return not b
1158
1159	elif case(bool_expr_e.LogicalAnd):
1160	node = cast(bool_expr.LogicalAnd, UP_node)
1161	# Short-circuit evaluation
1162	if self.EvalB(node.left):
1163	return self.EvalB(node.right)
1164	else:
1165	return False
1166
1167	elif case(bool_expr_e.LogicalOr):
1168	node = cast(bool_expr.LogicalOr, UP_node)
1169	if self.EvalB(node.left):
1170	return True
1171	else:
1172	return self.EvalB(node.right)
1173
1174	elif case(bool_expr_e.Unary):
1175	node = cast(bool_expr.Unary, UP_node)
1176	op_id = node.op_id
1177	s = self._EvalCompoundWord(node.child)
1178
1179	# Now dispatch on arg type. (arg_type could be static in the
1180	# LST?)
1181	arg_type = consts.BoolArgType(op_id)
1182
1183	if arg_type == bool_arg_type_e.Path:
1184	return bool_stat.DoUnaryOp(op_id, s)
1185
1186	if arg_type == bool_arg_type_e.Str:
1187	if op_id == Id.BoolUnary_z:
1188	return not bool(s)
1189	if op_id == Id.BoolUnary_n:
1190	return bool(s)
1191	if op_id == Id.BoolUnary_true:
1192	return s == 'true'
1193	if op_id == Id.BoolUnary_false:
1194	return s == 'false'
1195
1196	raise AssertionError(op_id) # should never happen
1197
1198	if arg_type == bool_arg_type_e.Other:
1199	if op_id == Id.BoolUnary_t:
1200	return bool_stat.isatty(s, node.child)
1201
1202	# See whether 'set -o' options have been set
1203	if op_id == Id.BoolUnary_o:
1204	index = consts.OptionNum(s)
1205	if index == 0:
1206	return False
1207	else:
1208	return self.exec_opts.opt0_array[index]
1209
1210	if op_id == Id.BoolUnary_v:
1211	return self._IsDefined(s, loc.Word(node.child))
1212
1213	e_die("%s isn't implemented" %
1214	ui.PrettyId(op_id)) # implicit location
1215
1216	raise AssertionError(arg_type)
1217
1218	elif case(bool_expr_e.Binary):
1219	node = cast(bool_expr.Binary, UP_node)
1220
1221	op_id = node.op_id
1222	# Whether to glob escape
1223	eval_flags = 0
1224	with switch(op_id) as case2:
1225	if case2(Id.BoolBinary_GlobEqual, Id.BoolBinary_GlobDEqual,
1226	Id.BoolBinary_GlobNEqual):
1227	eval_flags \|= word_eval.QUOTE_FNMATCH
1228	elif case2(Id.BoolBinary_EqualTilde):
1229	eval_flags \|= word_eval.QUOTE_ERE
1230
1231	s1 = self._EvalCompoundWord(node.left)
1232	s2 = self._EvalCompoundWord(node.right, eval_flags)
1233
1234	# Now dispatch on arg type
1235	arg_type = consts.BoolArgType(op_id)
1236
1237	if arg_type == bool_arg_type_e.Path:
1238	return bool_stat.DoBinaryOp(op_id, s1, s2)
1239
1240	if arg_type == bool_arg_type_e.Int:
1241	# NOTE: We assume they are constants like [[ 3 -eq 3 ]].
1242	# Bash also allows [[ 1+2 -eq 3 ]].
1243	i1 = self._StringToBigIntOrError(s1, loc.Word(node.left))
1244	i2 = self._StringToBigIntOrError(s2, loc.Word(node.right))
1245
1246	if op_id == Id.BoolBinary_eq:
1247	return mops.Equal(i1, i2)
1248	if op_id == Id.BoolBinary_ne:
1249	return not mops.Equal(i1, i2)
1250	if op_id == Id.BoolBinary_gt:
1251	return mops.Greater(i1, i2)
1252	if op_id == Id.BoolBinary_ge:
1253	return mops.Greater(i1, i2) or mops.Equal(i1, i2)
1254	if op_id == Id.BoolBinary_lt:
1255	return mops.Greater(i2, i1)
1256	if op_id == Id.BoolBinary_le:
1257	return mops.Greater(i2, i1) or mops.Equal(i1, i2)
1258
1259	raise AssertionError(op_id) # should never happen
1260
1261	if arg_type == bool_arg_type_e.Str:
1262	fnmatch_flags = (FNM_CASEFOLD
1263	if self.exec_opts.nocasematch() else 0)
1264
1265	if op_id in (Id.BoolBinary_GlobEqual,
1266	Id.BoolBinary_GlobDEqual):
1267	#log('Matching %s against pattern %s', s1, s2)
1268	return libc.fnmatch(s2, s1, fnmatch_flags)
1269
1270	if op_id == Id.BoolBinary_GlobNEqual:
1271	return not libc.fnmatch(s2, s1, fnmatch_flags)
1272
1273	if op_id in (Id.BoolBinary_Equal, Id.BoolBinary_DEqual):
1274	return s1 == s2
1275
1276	if op_id == Id.BoolBinary_NEqual:
1277	return s1 != s2
1278
1279	if op_id == Id.BoolBinary_EqualTilde:
1280	# TODO: This should go to --debug-file
1281	#log('Matching %r against regex %r', s1, s2)
1282	regex_flags = (REG_ICASE
1283	if self.exec_opts.nocasematch() else 0)
1284
1285	try:
1286	indices = libc.regex_search(s2, regex_flags, s1, 0)
1287	except ValueError as e:
1288	# Status 2 indicates a regex parse error. This is
1289	# fatal in OSH but not in bash, which treats [[
1290	# like a command with an exit code.
1291	e_die_status(2, e.message, loc.Word(node.right))
1292
1293	if indices is not None:
1294	self.mem.SetRegexMatch(
1295	RegexMatch(s1, indices, eggex_ops.No))
1296	return True
1297	else:
1298	self.mem.SetRegexMatch(regex_match.No)
1299	return False
1300
1301	if op_id == Id.Op_Less:
1302	return str_cmp(s1, s2) < 0
1303
1304	if op_id == Id.Op_Great:
1305	return str_cmp(s1, s2) > 0
1306
1307	raise AssertionError(op_id) # should never happen
1308
1309	raise AssertionError(node.tag())