osh/bool_parse.py

OILS / osh / bool_parse.py View on Github | oils.pub

296 lines, 145 significant

1	#!/usr/bin/env python2
2	# Copyright 2016 Andy Chu. All rights reserved.
3	# Licensed under the Apache License, Version 2.0 (the "License");
4	# you may not use this file except in compliance with the License.
5	# You may obtain a copy of the License at
6	#
7	# http://www.apache.org/licenses/LICENSE-2.0
8	"""
9	bool_parse.py - Parse boolean expressions.
10
11	In contrast to test / [, the parsing of [[ expressions is done BEFORE
12	evaluation. So we are parsing a list of Word instances to an AST, rather than
13	a list of strings.
14
15	Grammar from http://compilers.iecc.com/crenshaw/tutor6.txt, adapted to ANTLR
16	syntax.
17
18	Expr : Term (OR Term)*
19	Term : Negated (AND Negated)*
20	Negated : '!'? Factor
21	Factor : WORD
22	\| UNARY_OP WORD
23	\| WORD BINARY_OP WORD
24	\| '(' Expr ')'
25
26	OR = \|\| -o
27	AND = && -a
28	WORD = any word
29	UNARY_OP: -z -n, etc.
30	BINARY_OP: -gt, -ot, ==, etc.
31	"""
32
33	from _devbuild.gen.id_kind_asdl import Id, Kind
34	#from _devbuild.gen.id_kind_asdl import Id_str, Kind_str # for debugging
35	from _devbuild.gen.types_asdl import lex_mode_t, lex_mode_e
36	from _devbuild.gen.syntax_asdl import (loc, word_t, word_e, bool_expr,
37	bool_expr_t, Token)
38	from core.error import p_die
39	from display import ui
40	from frontend import consts
41	from mycpp.mylib import log
42	from osh import word_
43
44	from typing import List, Optional, Tuple, TYPE_CHECKING
45	if TYPE_CHECKING:
46	from osh.word_parse import WordEmitter
47
48	# import libc # for regex_parse
49
50	_ = log
51
52
53	class BoolParser(object):
54	"""Parses [[ at compile time and [ at runtime."""
55
56	def __init__(self, w_parser):
57	# type: (WordEmitter) -> None
58	self.w_parser = w_parser
59	# Either one word or two words for lookahead
60	self.words = [] # type: List[word_t]
61
62	self.cur_word = None # type: Optional[word_t]
63	self.bool_id = Id.Undefined_Tok
64	self.bool_kind = Kind.Undefined
65
66	def _NextOne(self, lex_mode=lex_mode_e.DBracket):
67	# type: (lex_mode_t) -> None
68	n = len(self.words)
69	if n == 2:
70	assert lex_mode == lex_mode_e.DBracket
71	self.words[0] = self.words[1]
72	self.cur_word = self.words[0]
73	self.words.pop()
74	elif n == 1:
75	w = self.w_parser.ReadWord(lex_mode) # may raise
76	self.words[0] = w
77	self.cur_word = w
78	elif n == 0:
79	w = self.w_parser.ReadWord(lex_mode) # may raise
80	self.words.append(w)
81	self.cur_word = w
82	else:
83	raise AssertionError('Unexpected n = %d' % n)
84
85	assert self.cur_word is not None
86	self.bool_id = word_.BoolId(self.cur_word)
87	self.bool_kind = consts.GetKind(self.bool_id)
88	#log('--- word %s', self.cur_word)
89	#log('bool_id %s %s %s', Id_str(self.bool_id), Kind_str(self.bool_kind), lex_mode)
90
91	def _Next(self, lex_mode=lex_mode_e.DBracket):
92	# type: (lex_mode_t) -> None
93	"""Advance to the next token, skipping newlines.
94
95	We don't handle newlines in the lexer because we want the
96	newline after ]] to be Id.Op_Newline rather than Id.WS_Newline.
97	It's more complicated if it's Id.WS_Newline -- we might have to
98	unread tokens, etc.
99	"""
100	while True:
101	self._NextOne(lex_mode=lex_mode)
102	if self.bool_id != Id.Op_Newline:
103	break
104
105	def _LookAhead(self):
106	# type: () -> word_t
107	"""
108	TODO: Look ahead 2 tokens for
109
110	[ -f foo ] # unary
111	[ -f = -f ] # binary
112	[ -f = ] # unary
113	"""
114	n = len(self.words)
115	if n != 1:
116	raise AssertionError(n)
117
118	w = self.w_parser.ReadWord(lex_mode_e.DBracket) # may raise
119	self.words.append(w) # Save it for _Next()
120	return w
121
122	def Parse(self):
123	# type: () -> Tuple[bool_expr_t, Token]
124	self._Next()
125
126	node = self.ParseExpr()
127	if self.bool_id != Id.Lit_DRightBracket:
128	#p_die("Expected ]], got %r", self.cur_word, word=self.cur_word)
129	# NOTE: This might be better as unexpected token, since ]] doesn't always
130	# make sense.
131	p_die('Expected ]]', loc.Word(self.cur_word))
132
133	# Extract the ']]' keyword and return it's token for location tracking
134	right = word_.LiteralToken(self.cur_word)
135	assert right is not None
136
137	return node, right
138
139	def _TestAtEnd(self):
140	# type: () -> bool
141	"""For unit tests only."""
142	return self.bool_id == Id.Lit_DRightBracket
143
144	def ParseForBuiltin(self):
145	# type: () -> bool_expr_t
146	"""For test builtin."""
147	self._Next()
148
149	node = self.ParseExpr()
150	if self.bool_id != Id.Eof_Real:
151	p_die('Unexpected trailing word %s' % word_.Pretty(self.cur_word),
152	loc.Word(self.cur_word))
153
154	return node
155
156	def ParseExpr(self):
157	# type: () -> bool_expr_t
158	"""
159	Iterative:
160	Expr : Term (OR Term)*
161
162	Right recursion:
163	Expr : Term (OR Expr)?
164	"""
165	left = self.ParseTerm()
166	# [[ uses \|\| but [ uses -o
167	if self.bool_id in (Id.Op_DPipe, Id.BoolUnary_o):
168	self._Next()
169	right = self.ParseExpr()
170	return bool_expr.LogicalOr(left, right)
171	else:
172	return left
173
174	def ParseTerm(self):
175	# type: () -> bool_expr_t
176	"""
177	Term : Negated (AND Negated)*
178
179	Right recursion:
180	Term : Negated (AND Term)?
181	"""
182	left = self.ParseNegatedFactor()
183	# [[ uses && but [ uses -a
184	if self.bool_id in (Id.Op_DAmp, Id.BoolUnary_a):
185	self._Next()
186	right = self.ParseTerm()
187	return bool_expr.LogicalAnd(left, right)
188	else:
189	return left
190
191	def ParseNegatedFactor(self):
192	# type: () -> bool_expr_t
193	"""
194	Negated : '!'? Factor
195	"""
196	if self.bool_id == Id.KW_Bang:
197	self._Next()
198	child = self.ParseFactor()
199	return bool_expr.LogicalNot(child)
200	else:
201	return self.ParseFactor()
202
203	def ParseFactor(self):
204	# type: () -> bool_expr_t
205	"""
206	Factor : WORD
207	\| UNARY_OP WORD
208	\| WORD =~ Regex
209	\| WORD BINARY_OP WORD
210	\| '(' Expr ')'
211
212	Note: this grammar is ambiguous, and the design of the 'test' builtin
213	is terrible in general.
214
215	We roughly follow bash's ordering rules, which gives a result that
216	almost all shells agree on:
217
218	1. Look for ( first
219	2. Then look ahead to see if you got Kind.BoolBinary, like =
220	3. Then unary operators: test -d / [[ -d / ]]
221	4. Then non-empty string tests: test foo [[ foo ]]
222	"""
223	if self.bool_id == Id.Op_LParen:
224	self._Next()
225	node = self.ParseExpr() # type: bool_expr_t
226	if self.bool_id != Id.Op_RParen:
227	p_die('Expected ), got %s' % word_.Pretty(self.cur_word),
228	loc.Word(self.cur_word))
229	self._Next()
230	return node
231
232	# Peek ahead another token.
233	t2 = self._LookAhead()
234	t2_bool_id = word_.BoolId(t2)
235	t2_bool_kind = consts.GetKind(t2_bool_id)
236
237	#log('t2 %s / t2_bool_id %s / t2_bool_kind %s', t2, t2_bool_id, t2_bool_kind)
238	# Op for < and >, -a and -o pun
239	if t2_bool_kind == Kind.BoolBinary or t2_bool_id in (Id.Op_Less,
240	Id.Op_Great):
241	left = self.cur_word
242
243	self._Next()
244	op = self.bool_id
245
246	if t2_bool_id == Id.BoolBinary_EqualTilde:
247	self._Next(lex_mode=lex_mode_e.BashRegex)
248	else:
249	self._Next()
250
251	right = self.cur_word
252	self._Next()
253
254	tilde = word_.TildeDetect(left)
255	if tilde:
256	left = tilde
257	tilde = word_.TildeDetect(right)
258	if tilde:
259	right = tilde
260
261	return bool_expr.Binary(op, left, right)
262
263	if self.bool_kind == Kind.BoolUnary:
264	# Just save the type and not the token itself?
265	op = self.bool_id
266	self._Next()
267	w = self.cur_word
268	# e.g. [[ -f < ]]. But [[ -f '<' ]] is OK
269
270	tag = w.tag()
271	if tag != word_e.Compound and tag != word_e.String:
272	p_die('Invalid argument to unary operator', loc.Word(w))
273	self._Next()
274
275	tilde = word_.TildeDetect(w)
276	if tilde:
277	w = tilde
278
279	node = bool_expr.Unary(op, w)
280	return node
281
282	# [[ foo ]]
283	# Note: ( = ) and ( == ) may also hit this path, but they are NOT Kind.Word
284	if self.bool_kind != Kind.Op:
285	w = self.cur_word
286	tilde = word_.TildeDetect(w)
287	if tilde:
288	w = tilde
289	self._Next()
290	return bool_expr.WordTest(w)
291
292	# Error for [[ ) ]]
293	# It's not WORD, UNARY_OP, or '('
294	p_die(
295	'Unexpected token in boolean expression (%s)' %
296	ui.PrettyId(self.bool_id), loc.Word(self.cur_word))