osh/bool_parse.py

OILS / osh / bool_parse.py View on Github | oilshell.org

272 lines, 143 significant

1	#!/usr/bin/env python2
2	# Copyright 2016 Andy Chu. All rights reserved.
3	# Licensed under the Apache License, Version 2.0 (the "License");
4	# you may not use this file except in compliance with the License.
5	# You may obtain a copy of the License at
6	#
7	# http://www.apache.org/licenses/LICENSE-2.0
8	"""
9	bool_parse.py - Parse boolean expressions.
10
11	In contrast to test / [, the parsing of [[ expressions is done BEFORE
12	evaluation. So we are parsing a list of Word instances to an AST, rather than
13	a list of strings.
14
15	Grammar from http://compilers.iecc.com/crenshaw/tutor6.txt, adapted to ANTLR
16	syntax.
17
18	Expr : Term (OR Term)*
19	Term : Negated (AND Negated)*
20	Negated : '!'? Factor
21	Factor : WORD
22	\| UNARY_OP WORD
23	\| WORD BINARY_OP WORD
24	\| '(' Expr ')'
25
26	OR = \|\| -o
27	AND = && -a
28	WORD = any word
29	UNARY_OP: -z -n, etc.
30	BINARY_OP: -gt, -ot, ==, etc.
31	"""
32
33	from _devbuild.gen.id_kind_asdl import Id, Kind
34	from _devbuild.gen.types_asdl import lex_mode_t, lex_mode_e
35	from _devbuild.gen.syntax_asdl import (loc, word_t, word_e, bool_expr,
36	bool_expr_t, Token)
37	from display import ui
38	from core.error import p_die
39	from frontend import consts
40	from mycpp.mylib import log
41	from osh import word_
42
43	from typing import List, Optional, Tuple, TYPE_CHECKING
44	if TYPE_CHECKING:
45	from osh.word_parse import WordEmitter
46
47	# import libc # for regex_parse
48
49	_ = log
50
51
52	class BoolParser(object):
53	"""Parses [[ at compile time and [ at runtime."""
54
55	def __init__(self, w_parser):
56	# type: (WordEmitter) -> None
57	self.w_parser = w_parser
58	# Either one word or two words for lookahead
59	self.words = [] # type: List[word_t]
60
61	self.cur_word = None # type: Optional[word_t]
62	self.bool_id = Id.Undefined_Tok
63	self.bool_kind = Kind.Undefined
64
65	def _NextOne(self, lex_mode=lex_mode_e.DBracket):
66	# type: (lex_mode_t) -> None
67	n = len(self.words)
68	if n == 2:
69	assert lex_mode == lex_mode_e.DBracket
70	self.words[0] = self.words[1]
71	self.cur_word = self.words[0]
72	self.words.pop()
73	elif n in (0, 1):
74	w = self.w_parser.ReadWord(lex_mode) # may raise
75	if n == 0:
76	self.words.append(w)
77	else:
78	self.words[0] = w
79	self.cur_word = w
80
81	assert self.cur_word is not None
82	self.bool_id = word_.BoolId(self.cur_word)
83	self.bool_kind = consts.GetKind(self.bool_id)
84	#log('--- word %s', self.cur_word)
85	#log('bool_id %s %s %s', Id_str(self.bool_id), Kind_str(self.bool_kind), lex_mode)
86
87	def _Next(self, lex_mode=lex_mode_e.DBracket):
88	# type: (lex_mode_t) -> None
89	"""Advance to the next token, skipping newlines.
90
91	We don't handle newlines in the lexer because we want the
92	newline after ]] to be Id.Op_Newline rather than Id.WS_Newline.
93	It's more complicated if it's Id.WS_Newline -- we might have to
94	unread tokens, etc.
95	"""
96	while True:
97	self._NextOne(lex_mode=lex_mode)
98	if self.bool_id != Id.Op_Newline:
99	break
100
101	def _LookAhead(self):
102	# type: () -> word_t
103	n = len(self.words)
104	if n != 1:
105	raise AssertionError(n)
106
107	w = self.w_parser.ReadWord(lex_mode_e.DBracket) # may raise
108	self.words.append(w) # Save it for _Next()
109	return w
110
111	def Parse(self):
112	# type: () -> Tuple[bool_expr_t, Token]
113	self._Next()
114
115	node = self.ParseExpr()
116	if self.bool_id != Id.Lit_DRightBracket:
117	#p_die("Expected ]], got %r", self.cur_word, word=self.cur_word)
118	# NOTE: This might be better as unexpected token, since ]] doesn't always
119	# make sense.
120	p_die('Expected ]]', loc.Word(self.cur_word))
121
122	# Extract the ']]' keyword and return it's token for location tracking
123	right = word_.LiteralToken(self.cur_word)
124	assert right is not None
125
126	return node, right
127
128	def _TestAtEnd(self):
129	# type: () -> bool
130	"""For unit tests only."""
131	return self.bool_id == Id.Lit_DRightBracket
132
133	def ParseForBuiltin(self):
134	# type: () -> bool_expr_t
135	"""For test builtin."""
136	self._Next()
137
138	node = self.ParseExpr()
139	if self.bool_id != Id.Eof_Real:
140	p_die('Unexpected trailing word %s' % word_.Pretty(self.cur_word),
141	loc.Word(self.cur_word))
142
143	return node
144
145	def ParseExpr(self):
146	# type: () -> bool_expr_t
147	"""
148	Iterative:
149	Expr : Term (OR Term)*
150
151	Right recursion:
152	Expr : Term (OR Expr)?
153	"""
154	left = self.ParseTerm()
155	# [[ uses \|\| but [ uses -o
156	if self.bool_id in (Id.Op_DPipe, Id.BoolUnary_o):
157	self._Next()
158	right = self.ParseExpr()
159	return bool_expr.LogicalOr(left, right)
160	else:
161	return left
162
163	def ParseTerm(self):
164	# type: () -> bool_expr_t
165	"""
166	Term : Negated (AND Negated)*
167
168	Right recursion:
169	Term : Negated (AND Term)?
170	"""
171	left = self.ParseNegatedFactor()
172	# [[ uses && but [ uses -a
173	if self.bool_id in (Id.Op_DAmp, Id.BoolUnary_a):
174	self._Next()
175	right = self.ParseTerm()
176	return bool_expr.LogicalAnd(left, right)
177	else:
178	return left
179
180	def ParseNegatedFactor(self):
181	# type: () -> bool_expr_t
182	"""
183	Negated : '!'? Factor
184	"""
185	if self.bool_id == Id.KW_Bang:
186	self._Next()
187	child = self.ParseFactor()
188	return bool_expr.LogicalNot(child)
189	else:
190	return self.ParseFactor()
191
192	def ParseFactor(self):
193	# type: () -> bool_expr_t
194	"""
195	Factor : WORD
196	\| UNARY_OP WORD
197	\| WORD =~ Regex
198	\| WORD BINARY_OP WORD
199	\| '(' Expr ')'
200	"""
201	if self.bool_kind == Kind.BoolUnary:
202	# Just save the type and not the token itself?
203	op = self.bool_id
204	self._Next()
205	w = self.cur_word
206	# e.g. [[ -f < ]]. But [[ -f '<' ]] is OK
207
208	tag = w.tag()
209	if tag != word_e.Compound and tag != word_e.String:
210	p_die('Invalid argument to unary operator', loc.Word(w))
211	self._Next()
212
213	tilde = word_.TildeDetect(w)
214	if tilde:
215	w = tilde
216
217	node = bool_expr.Unary(op, w) # type: bool_expr_t
218	return node
219
220	if self.bool_kind == Kind.Word:
221	# Peek ahead another token.
222	t2 = self._LookAhead()
223	t2_bool_id = word_.BoolId(t2)
224	t2_bool_kind = consts.GetKind(t2_bool_id)
225
226	#log('t2 %s / t2_bool_id %s / t2_bool_kind %s', t2, t2_bool_id, t2_bool_kind)
227	# Op for < and >, -a and -o pun
228	if t2_bool_kind == Kind.BoolBinary or t2_bool_id in (Id.Op_Less,
229	Id.Op_Great):
230	left = self.cur_word
231
232	self._Next()
233	op = self.bool_id
234
235	if t2_bool_id == Id.BoolBinary_EqualTilde:
236	self._Next(lex_mode=lex_mode_e.BashRegex)
237	else:
238	self._Next()
239
240	right = self.cur_word
241	self._Next()
242
243	tilde = word_.TildeDetect(left)
244	if tilde:
245	left = tilde
246	tilde = word_.TildeDetect(right)
247	if tilde:
248	right = tilde
249
250	return bool_expr.Binary(op, left, right)
251
252	else: # [[ foo ]]
253	w = self.cur_word
254	tilde = word_.TildeDetect(w)
255	if tilde:
256	w = tilde
257	self._Next()
258	return bool_expr.WordTest(w)
259
260	if self.bool_id == Id.Op_LParen:
261	self._Next()
262	node = self.ParseExpr()
263	if self.bool_id != Id.Op_RParen:
264	p_die('Expected ), got %s' % word_.Pretty(self.cur_word),
265	loc.Word(self.cur_word))
266	self._Next()
267	return node
268
269	# It's not WORD, UNARY_OP, or '('
270	p_die(
271	'Unexpected token in boolean expression (%s)' %
272	ui.PrettyId(self.bool_id), loc.Word(self.cur_word))