osh/bool_parse.py

OILS / osh / bool_parse.py View on Github | oils.pub

302 lines, 144 significant

1	#!/usr/bin/env python2
2	# Copyright 2016 Andy Chu. All rights reserved.
3	# Licensed under the Apache License, Version 2.0 (the "License");
4	# you may not use this file except in compliance with the License.
5	# You may obtain a copy of the License at
6	#
7	# http://www.apache.org/licenses/LICENSE-2.0
8	"""
9	bool_parse.py - Parse boolean expressions.
10
11	In contrast to test / [, the parsing of [[ expressions is done BEFORE
12	evaluation. So we are parsing a list of Word instances to an AST, rather than
13	a list of strings.
14
15	Grammar from http://compilers.iecc.com/crenshaw/tutor6.txt, adapted to ANTLR
16	syntax.
17
18	Expr : Term (OR Term)*
19	Term : Negated (AND Negated)*
20	Negated : '!'? Factor
21	Factor : WORD
22	\| UNARY_OP WORD
23	\| WORD BINARY_OP WORD
24	\| '(' Expr ')'
25
26	OR = \|\| -o
27	AND = && -a
28	WORD = any word
29	UNARY_OP: -z -n, etc.
30	BINARY_OP: -gt, -ot, ==, etc.
31	"""
32
33	from _devbuild.gen.id_kind_asdl import Id, Kind
34	from _devbuild.gen.id_kind_asdl import Id_str # for debugging
35	from _devbuild.gen.types_asdl import lex_mode_t, lex_mode_e
36	from _devbuild.gen.syntax_asdl import (loc, word_t, word_e, bool_expr,
37	bool_expr_t, Token)
38	from core.error import p_die
39	from display import ui
40	from frontend import consts
41	from mycpp import mylib
42	from mycpp.mylib import log
43	from osh import word_
44
45	from typing import Optional, Tuple, TYPE_CHECKING
46	if TYPE_CHECKING:
47	from osh.word_parse import WordEmitter
48
49	# import libc # for regex_parse
50
51	_ = log
52
53
54	class BoolParser(object):
55	"""Parses [[ at compile time and [ at runtime."""
56
57	def __init__(self, w_parser):
58	# type: (WordEmitter) -> None
59	self.w_parser = w_parser
60
61	self.cur_word = None # type: Optional[word_t]
62	self.bool_id = Id.Undefined_Tok
63	self.bool_kind = Kind.Undefined
64
65	# Lookahead words
66	self.ahead1 = None # type: Optional[word_t]
67	self.ahead2 = None # type: Optional[word_t]
68
69	if mylib.PYTHON:
70	# For unit tests only
71
72	def _Dump(self):
73	# type: () -> None
74	log('cur_word = %s', self.cur_word)
75	log('bool_id = %s', Id_str(self.bool_id))
76	log('ahead1 = %s', self.ahead1)
77	log('ahead2 = %s', self.ahead2)
78	log(' ///')
79
80	def _TestAtEnd(self):
81	# type: () -> bool
82	return self.bool_id == Id.Lit_DRightBracket
83
84	def _NextOne(self, lex_mode=lex_mode_e.DBracket):
85	# type: (lex_mode_t) -> None
86	"""
87	Sets self.cur_word, self.bool_id, self.bool_kind
88
89	Tries to maintain a "buffer" self.words of length 1, unless there is lookahead
90	"""
91	if self.ahead2:
92	self.cur_word = self.ahead1
93	self.ahead1 = self.ahead2
94	self.ahead2 = None
95	elif self.ahead1: # look2 set
96	self.cur_word = self.ahead1
97	self.ahead1 = None
98	else:
99	self.cur_word = self.w_parser.ReadWord(lex_mode) # may raise
100
101	self.bool_id = word_.BoolId(self.cur_word)
102	self.bool_kind = consts.GetKind(self.bool_id)
103
104	#log('bool_id %s %s %s', Id_str(self.bool_id), Kind_str(self.bool_kind), lex_mode)
105
106	def _Next(self, lex_mode=lex_mode_e.DBracket):
107	# type: (lex_mode_t) -> None
108	"""Advance to the next token, skipping newlines.
109
110	We don't handle newlines in the lexer because we want the
111	newline after ]] to be Id.Op_Newline rather than Id.WS_Newline.
112	It's more complicated if it's Id.WS_Newline -- we might have to
113	unread tokens, etc.
114	"""
115	while True:
116	self._NextOne(lex_mode=lex_mode)
117	if self.bool_id != Id.Op_Newline:
118	break
119
120	def _LookAhead(self):
121	# type: () -> word_t
122	"""
123	TODO: change to LookAheadForBinary, with 2 tokens, for
124
125	[ -f foo ] # unary
126	[ -f = -f ] # binary
127	[ -f = ] # unary
128	"""
129	w = self.w_parser.ReadWord(lex_mode_e.DBracket) # may raise
130	self.ahead1 = w
131	return w
132
133	def Parse(self):
134	# type: () -> Tuple[bool_expr_t, Token]
135	self._Next()
136
137	node = self.ParseExpr()
138	if self.bool_id != Id.Lit_DRightBracket:
139	#p_die("Expected ]], got %r", self.cur_word, word=self.cur_word)
140	# NOTE: This might be better as unexpected token, since ]] doesn't always
141	# make sense.
142	p_die('Expected ]]', loc.Word(self.cur_word))
143
144	# Extract the ']]' keyword and return it's token for location tracking
145	right = word_.LiteralToken(self.cur_word)
146	assert right is not None
147
148	return node, right
149
150	def ParseForBuiltin(self):
151	# type: () -> bool_expr_t
152	"""For test builtin."""
153	self._Next()
154
155	node = self.ParseExpr()
156	if self.bool_id != Id.Eof_Real:
157	p_die('Unexpected trailing word %s' % word_.Pretty(self.cur_word),
158	loc.Word(self.cur_word))
159
160	return node
161
162	def ParseExpr(self):
163	# type: () -> bool_expr_t
164	"""
165	Iterative:
166	Expr : Term (OR Term)*
167
168	Right recursion:
169	Expr : Term (OR Expr)?
170	"""
171	left = self.ParseTerm()
172	# [[ uses \|\| but [ uses -o
173	if self.bool_id in (Id.Op_DPipe, Id.BoolUnary_o):
174	self._Next()
175	right = self.ParseExpr()
176	return bool_expr.LogicalOr(left, right)
177	else:
178	return left
179
180	def ParseTerm(self):
181	# type: () -> bool_expr_t
182	"""
183	Term : Negated (AND Negated)*
184
185	Right recursion:
186	Term : Negated (AND Term)?
187	"""
188	left = self.ParseNegatedFactor()
189	# [[ uses && but [ uses -a
190	if self.bool_id in (Id.Op_DAmp, Id.BoolUnary_a):
191	self._Next()
192	right = self.ParseTerm()
193	return bool_expr.LogicalAnd(left, right)
194	else:
195	return left
196
197	def ParseNegatedFactor(self):
198	# type: () -> bool_expr_t
199	"""
200	Negated : '!'? Factor
201	"""
202	if self.bool_id == Id.KW_Bang:
203	self._Next()
204	child = self.ParseFactor()
205	return bool_expr.LogicalNot(child)
206	else:
207	return self.ParseFactor()
208
209	def ParseFactor(self):
210	# type: () -> bool_expr_t
211	"""
212	Factor : WORD
213	\| UNARY_OP WORD
214	\| WORD =~ Regex
215	\| WORD BINARY_OP WORD
216	\| '(' Expr ')'
217
218	Note: this grammar is ambiguous, and the design of the 'test' builtin
219	is terrible in general.
220
221	We roughly follow bash's ordering rules, which gives a result that
222	almost all shells agree on:
223
224	1. Look for ( first
225	2. Then look ahead to see if you got Kind.BoolBinary, like =
226	3. Then unary operators: test -d / [[ -d / ]]
227	4. Then non-empty string tests: test foo [[ foo ]]
228	"""
229	if self.bool_id == Id.Op_LParen:
230	self._Next() # past (
231	node = self.ParseExpr() # type: bool_expr_t
232	if self.bool_id != Id.Op_RParen:
233	p_die('Expected ), got %s' % word_.Pretty(self.cur_word),
234	loc.Word(self.cur_word))
235	self._Next() # past )
236	return node
237
238	# Peek ahead another token.
239	t2 = self._LookAhead()
240	t2_bool_id = word_.BoolId(t2)
241	t2_bool_kind = consts.GetKind(t2_bool_id)
242
243	#log('t2 %s / t2_bool_id %s / t2_bool_kind %s', t2, t2_bool_id, t2_bool_kind)
244	# Op for < and >, -a and -o pun
245	if t2_bool_kind == Kind.BoolBinary or t2_bool_id in (Id.Op_Less,
246	Id.Op_Great):
247	left = self.cur_word
248
249	self._Next()
250	op = self.bool_id
251
252	if t2_bool_id == Id.BoolBinary_EqualTilde:
253	self._Next(lex_mode=lex_mode_e.BashRegex)
254	else:
255	self._Next()
256
257	right = self.cur_word
258	self._Next()
259
260	tilde = word_.TildeDetect(left)
261	if tilde:
262	left = tilde
263	tilde = word_.TildeDetect(right)
264	if tilde:
265	right = tilde
266
267	return bool_expr.Binary(op, left, right)
268
269	if self.bool_kind == Kind.BoolUnary:
270	# Just save the type and not the token itself?
271	op = self.bool_id
272	self._Next()
273	w = self.cur_word
274	# e.g. [[ -f < ]]. But [[ -f '<' ]] is OK
275
276	tag = w.tag()
277	if tag != word_e.Compound and tag != word_e.String:
278	p_die('Invalid argument to unary operator', loc.Word(w))
279	self._Next()
280
281	tilde = word_.TildeDetect(w)
282	if tilde:
283	w = tilde
284
285	node = bool_expr.Unary(op, w)
286	return node
287
288	# [[ foo ]]
289	# Note: ( = ) and ( == ) may also hit this path, but they are NOT Kind.Word
290	if self.bool_kind != Kind.Op:
291	w = self.cur_word
292	tilde = word_.TildeDetect(w)
293	if tilde:
294	w = tilde
295	self._Next()
296	return bool_expr.WordTest(w)
297
298	# Error for [[ ) ]]
299	# It's not WORD, UNARY_OP, or '('
300	p_die(
301	'Unexpected token in boolean expression (%s)' %
302	ui.PrettyId(self.bool_id), loc.Word(self.cur_word))