osh/bool_parse.py

OILS / osh / bool_parse.py View on Github | oils.pub

286 lines, 142 significant

1	#!/usr/bin/env python2
2	# Copyright 2016 Andy Chu. All rights reserved.
3	# Licensed under the Apache License, Version 2.0 (the "License");
4	# you may not use this file except in compliance with the License.
5	# You may obtain a copy of the License at
6	#
7	# http://www.apache.org/licenses/LICENSE-2.0
8	"""
9	bool_parse.py - Parse boolean expressions.
10
11	In contrast to test / [, the parsing of [[ expressions is done BEFORE
12	evaluation. So we are parsing a list of Word instances to an AST, rather than
13	a list of strings.
14
15	Grammar from http://compilers.iecc.com/crenshaw/tutor6.txt, adapted to ANTLR
16	syntax.
17
18	Expr : Term (OR Term)*
19	Term : Negated (AND Negated)*
20	Negated : '!'? Factor
21	Factor : WORD
22	\| UNARY_OP WORD
23	\| WORD BINARY_OP WORD
24	\| '(' Expr ')'
25
26	OR = \|\| -o
27	AND = && -a
28	WORD = any word
29	UNARY_OP: -z -n, etc.
30	BINARY_OP: -gt, -ot, ==, etc.
31	"""
32
33	from _devbuild.gen.id_kind_asdl import Id, Kind
34	#from _devbuild.gen.id_kind_asdl import Id_str, Kind_str # for debugging
35	from _devbuild.gen.types_asdl import lex_mode_t, lex_mode_e
36	from _devbuild.gen.syntax_asdl import (loc, word_t, word_e, bool_expr,
37	bool_expr_t, Token)
38	from core.error import p_die
39	from display import ui
40	from frontend import consts
41	from mycpp.mylib import log
42	from osh import word_
43
44	from typing import List, Optional, Tuple, TYPE_CHECKING
45	if TYPE_CHECKING:
46	from osh.word_parse import WordEmitter
47
48	# import libc # for regex_parse
49
50	_ = log
51
52
53	class BoolParser(object):
54	"""Parses [[ at compile time and [ at runtime."""
55
56	def __init__(self, w_parser):
57	# type: (WordEmitter) -> None
58	self.w_parser = w_parser
59	# Either one word or two words for lookahead
60	self.words = [] # type: List[word_t]
61
62	self.cur_word = None # type: Optional[word_t]
63	self.bool_id = Id.Undefined_Tok
64	self.bool_kind = Kind.Undefined
65
66	def _NextOne(self, lex_mode=lex_mode_e.DBracket):
67	# type: (lex_mode_t) -> None
68	n = len(self.words)
69	if n == 2:
70	assert lex_mode == lex_mode_e.DBracket
71	self.words[0] = self.words[1]
72	self.cur_word = self.words[0]
73	self.words.pop()
74	elif n in (0, 1):
75	w = self.w_parser.ReadWord(lex_mode) # may raise
76	if n == 0:
77	self.words.append(w)
78	else:
79	self.words[0] = w
80	self.cur_word = w
81
82	assert self.cur_word is not None
83	self.bool_id = word_.BoolId(self.cur_word)
84	self.bool_kind = consts.GetKind(self.bool_id)
85	#log('--- word %s', self.cur_word)
86	#log('bool_id %s %s %s', Id_str(self.bool_id), Kind_str(self.bool_kind), lex_mode)
87
88	def _Next(self, lex_mode=lex_mode_e.DBracket):
89	# type: (lex_mode_t) -> None
90	"""Advance to the next token, skipping newlines.
91
92	We don't handle newlines in the lexer because we want the
93	newline after ]] to be Id.Op_Newline rather than Id.WS_Newline.
94	It's more complicated if it's Id.WS_Newline -- we might have to
95	unread tokens, etc.
96	"""
97	while True:
98	self._NextOne(lex_mode=lex_mode)
99	if self.bool_id != Id.Op_Newline:
100	break
101
102	def _LookAhead(self):
103	# type: () -> word_t
104	n = len(self.words)
105	if n != 1:
106	raise AssertionError(n)
107
108	w = self.w_parser.ReadWord(lex_mode_e.DBracket) # may raise
109	self.words.append(w) # Save it for _Next()
110	return w
111
112	def Parse(self):
113	# type: () -> Tuple[bool_expr_t, Token]
114	self._Next()
115
116	node = self.ParseExpr()
117	if self.bool_id != Id.Lit_DRightBracket:
118	#p_die("Expected ]], got %r", self.cur_word, word=self.cur_word)
119	# NOTE: This might be better as unexpected token, since ]] doesn't always
120	# make sense.
121	p_die('Expected ]]', loc.Word(self.cur_word))
122
123	# Extract the ']]' keyword and return it's token for location tracking
124	right = word_.LiteralToken(self.cur_word)
125	assert right is not None
126
127	return node, right
128
129	def _TestAtEnd(self):
130	# type: () -> bool
131	"""For unit tests only."""
132	return self.bool_id == Id.Lit_DRightBracket
133
134	def ParseForBuiltin(self):
135	# type: () -> bool_expr_t
136	"""For test builtin."""
137	self._Next()
138
139	node = self.ParseExpr()
140	if self.bool_id != Id.Eof_Real:
141	p_die('Unexpected trailing word %s' % word_.Pretty(self.cur_word),
142	loc.Word(self.cur_word))
143
144	return node
145
146	def ParseExpr(self):
147	# type: () -> bool_expr_t
148	"""
149	Iterative:
150	Expr : Term (OR Term)*
151
152	Right recursion:
153	Expr : Term (OR Expr)?
154	"""
155	left = self.ParseTerm()
156	# [[ uses \|\| but [ uses -o
157	if self.bool_id in (Id.Op_DPipe, Id.BoolUnary_o):
158	self._Next()
159	right = self.ParseExpr()
160	return bool_expr.LogicalOr(left, right)
161	else:
162	return left
163
164	def ParseTerm(self):
165	# type: () -> bool_expr_t
166	"""
167	Term : Negated (AND Negated)*
168
169	Right recursion:
170	Term : Negated (AND Term)?
171	"""
172	left = self.ParseNegatedFactor()
173	# [[ uses && but [ uses -a
174	if self.bool_id in (Id.Op_DAmp, Id.BoolUnary_a):
175	self._Next()
176	right = self.ParseTerm()
177	return bool_expr.LogicalAnd(left, right)
178	else:
179	return left
180
181	def ParseNegatedFactor(self):
182	# type: () -> bool_expr_t
183	"""
184	Negated : '!'? Factor
185	"""
186	if self.bool_id == Id.KW_Bang:
187	self._Next()
188	child = self.ParseFactor()
189	return bool_expr.LogicalNot(child)
190	else:
191	return self.ParseFactor()
192
193	def ParseFactor(self):
194	# type: () -> bool_expr_t
195	"""
196	Factor : WORD
197	\| UNARY_OP WORD
198	\| WORD =~ Regex
199	\| WORD BINARY_OP WORD
200	\| '(' Expr ')'
201
202	Note: this grammar is ambiguous, and the design of the 'test' builtin
203	is terrible in general.
204
205	We roughly follow bash's ordering rules, which gives a result that
206	almost all shells agree on:
207
208	1. Look for ( first
209	2. Then look ahead to see if you got Kind.BoolBinary, like =
210	3. Then unary operators: test -d / [[ -d / ]]
211	4. Then non-empty string tests: test foo [[ foo ]]
212	"""
213	if self.bool_id == Id.Op_LParen:
214	self._Next()
215	node = self.ParseExpr() # type: bool_expr_t
216	if self.bool_id != Id.Op_RParen:
217	p_die('Expected ), got %s' % word_.Pretty(self.cur_word),
218	loc.Word(self.cur_word))
219	self._Next()
220	return node
221
222	# Peek ahead another token.
223	t2 = self._LookAhead()
224	t2_bool_id = word_.BoolId(t2)
225	t2_bool_kind = consts.GetKind(t2_bool_id)
226
227	#log('t2 %s / t2_bool_id %s / t2_bool_kind %s', t2, t2_bool_id, t2_bool_kind)
228	# Op for < and >, -a and -o pun
229	if t2_bool_kind == Kind.BoolBinary or t2_bool_id in (Id.Op_Less,
230	Id.Op_Great):
231	left = self.cur_word
232
233	self._Next()
234	op = self.bool_id
235
236	if t2_bool_id == Id.BoolBinary_EqualTilde:
237	self._Next(lex_mode=lex_mode_e.BashRegex)
238	else:
239	self._Next()
240
241	right = self.cur_word
242	self._Next()
243
244	tilde = word_.TildeDetect(left)
245	if tilde:
246	left = tilde
247	tilde = word_.TildeDetect(right)
248	if tilde:
249	right = tilde
250
251	return bool_expr.Binary(op, left, right)
252
253	if self.bool_kind == Kind.BoolUnary:
254	# Just save the type and not the token itself?
255	op = self.bool_id
256	self._Next()
257	w = self.cur_word
258	# e.g. [[ -f < ]]. But [[ -f '<' ]] is OK
259
260	tag = w.tag()
261	if tag != word_e.Compound and tag != word_e.String:
262	p_die('Invalid argument to unary operator', loc.Word(w))
263	self._Next()
264
265	tilde = word_.TildeDetect(w)
266	if tilde:
267	w = tilde
268
269	node = bool_expr.Unary(op, w)
270	return node
271
272	# [[ foo ]]
273	# Note: ( = ) and ( == ) may also hit this path, but they are NOT Kind.Word
274	if self.bool_kind != Kind.Op:
275	w = self.cur_word
276	tilde = word_.TildeDetect(w)
277	if tilde:
278	w = tilde
279	self._Next()
280	return bool_expr.WordTest(w)
281
282	# Error for [[ ) ]]
283	# It's not WORD, UNARY_OP, or '('
284	p_die(
285	'Unexpected token in boolean expression (%s)' %
286	ui.PrettyId(self.bool_id), loc.Word(self.cur_word))