OILS / osh / bool_parse.py View on Github | oils.pub

296 lines, 145 significant
1#!/usr/bin/env python2
2# Copyright 2016 Andy Chu. All rights reserved.
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8"""
9bool_parse.py - Parse boolean expressions.
10
11In contrast to test / [, the parsing of [[ expressions is done BEFORE
12evaluation. So we are parsing a list of Word instances to an AST, rather than
13a list of strings.
14
15Grammar from http://compilers.iecc.com/crenshaw/tutor6.txt, adapted to ANTLR
16syntax.
17
18 Expr : Term (OR Term)*
19 Term : Negated (AND Negated)*
20 Negated : '!'? Factor
21 Factor : WORD
22 | UNARY_OP WORD
23 | WORD BINARY_OP WORD
24 | '(' Expr ')'
25
26OR = || -o
27AND = && -a
28WORD = any word
29UNARY_OP: -z -n, etc.
30BINARY_OP: -gt, -ot, ==, etc.
31"""
32
33from _devbuild.gen.id_kind_asdl import Id, Kind
34#from _devbuild.gen.id_kind_asdl import Id_str, Kind_str # for debugging
35from _devbuild.gen.types_asdl import lex_mode_t, lex_mode_e
36from _devbuild.gen.syntax_asdl import (loc, word_t, word_e, bool_expr,
37 bool_expr_t, Token)
38from core.error import p_die
39from display import ui
40from frontend import consts
41from mycpp.mylib import log
42from osh import word_
43
44from typing import List, Optional, Tuple, TYPE_CHECKING
45if TYPE_CHECKING:
46 from osh.word_parse import WordEmitter
47
48# import libc # for regex_parse
49
50_ = log
51
52
53class BoolParser(object):
54 """Parses [[ at compile time and [ at runtime."""
55
56 def __init__(self, w_parser):
57 # type: (WordEmitter) -> None
58 self.w_parser = w_parser
59 # Either one word or two words for lookahead
60 self.words = [] # type: List[word_t]
61
62 self.cur_word = None # type: Optional[word_t]
63 self.bool_id = Id.Undefined_Tok
64 self.bool_kind = Kind.Undefined
65
66 def _NextOne(self, lex_mode=lex_mode_e.DBracket):
67 # type: (lex_mode_t) -> None
68 n = len(self.words)
69 if n == 2:
70 assert lex_mode == lex_mode_e.DBracket
71 self.words[0] = self.words[1]
72 self.cur_word = self.words[0]
73 self.words.pop()
74 elif n == 1:
75 w = self.w_parser.ReadWord(lex_mode) # may raise
76 self.words[0] = w
77 self.cur_word = w
78 elif n == 0:
79 w = self.w_parser.ReadWord(lex_mode) # may raise
80 self.words.append(w)
81 self.cur_word = w
82 else:
83 raise AssertionError('Unexpected n = %d' % n)
84
85 assert self.cur_word is not None
86 self.bool_id = word_.BoolId(self.cur_word)
87 self.bool_kind = consts.GetKind(self.bool_id)
88 #log('--- word %s', self.cur_word)
89 #log('bool_id %s %s %s', Id_str(self.bool_id), Kind_str(self.bool_kind), lex_mode)
90
91 def _Next(self, lex_mode=lex_mode_e.DBracket):
92 # type: (lex_mode_t) -> None
93 """Advance to the next token, skipping newlines.
94
95 We don't handle newlines in the lexer because we want the
96 newline after ]] to be Id.Op_Newline rather than Id.WS_Newline.
97 It's more complicated if it's Id.WS_Newline -- we might have to
98 unread tokens, etc.
99 """
100 while True:
101 self._NextOne(lex_mode=lex_mode)
102 if self.bool_id != Id.Op_Newline:
103 break
104
105 def _LookAhead(self):
106 # type: () -> word_t
107 """
108 TODO: Look ahead 2 tokens for
109
110 [ -f foo ] # unary
111 [ -f = -f ] # binary
112 [ -f = ] # unary
113 """
114 n = len(self.words)
115 if n != 1:
116 raise AssertionError(n)
117
118 w = self.w_parser.ReadWord(lex_mode_e.DBracket) # may raise
119 self.words.append(w) # Save it for _Next()
120 return w
121
122 def Parse(self):
123 # type: () -> Tuple[bool_expr_t, Token]
124 self._Next()
125
126 node = self.ParseExpr()
127 if self.bool_id != Id.Lit_DRightBracket:
128 #p_die("Expected ]], got %r", self.cur_word, word=self.cur_word)
129 # NOTE: This might be better as unexpected token, since ]] doesn't always
130 # make sense.
131 p_die('Expected ]]', loc.Word(self.cur_word))
132
133 # Extract the ']]' keyword and return it's token for location tracking
134 right = word_.LiteralToken(self.cur_word)
135 assert right is not None
136
137 return node, right
138
139 def _TestAtEnd(self):
140 # type: () -> bool
141 """For unit tests only."""
142 return self.bool_id == Id.Lit_DRightBracket
143
144 def ParseForBuiltin(self):
145 # type: () -> bool_expr_t
146 """For test builtin."""
147 self._Next()
148
149 node = self.ParseExpr()
150 if self.bool_id != Id.Eof_Real:
151 p_die('Unexpected trailing word %s' % word_.Pretty(self.cur_word),
152 loc.Word(self.cur_word))
153
154 return node
155
156 def ParseExpr(self):
157 # type: () -> bool_expr_t
158 """
159 Iterative:
160 Expr : Term (OR Term)*
161
162 Right recursion:
163 Expr : Term (OR Expr)?
164 """
165 left = self.ParseTerm()
166 # [[ uses || but [ uses -o
167 if self.bool_id in (Id.Op_DPipe, Id.BoolUnary_o):
168 self._Next()
169 right = self.ParseExpr()
170 return bool_expr.LogicalOr(left, right)
171 else:
172 return left
173
174 def ParseTerm(self):
175 # type: () -> bool_expr_t
176 """
177 Term : Negated (AND Negated)*
178
179 Right recursion:
180 Term : Negated (AND Term)?
181 """
182 left = self.ParseNegatedFactor()
183 # [[ uses && but [ uses -a
184 if self.bool_id in (Id.Op_DAmp, Id.BoolUnary_a):
185 self._Next()
186 right = self.ParseTerm()
187 return bool_expr.LogicalAnd(left, right)
188 else:
189 return left
190
191 def ParseNegatedFactor(self):
192 # type: () -> bool_expr_t
193 """
194 Negated : '!'? Factor
195 """
196 if self.bool_id == Id.KW_Bang:
197 self._Next()
198 child = self.ParseFactor()
199 return bool_expr.LogicalNot(child)
200 else:
201 return self.ParseFactor()
202
203 def ParseFactor(self):
204 # type: () -> bool_expr_t
205 """
206 Factor : WORD
207 | UNARY_OP WORD
208 | WORD =~ Regex
209 | WORD BINARY_OP WORD
210 | '(' Expr ')'
211
212 Note: this grammar is ambiguous, and the design of the 'test' builtin
213 is terrible in general.
214
215 We roughly follow bash's ordering rules, which gives a result that
216 almost all shells agree on:
217
218 1. Look for ( first
219 2. Then look ahead to see if you got Kind.BoolBinary, like =
220 3. Then unary operators: test -d / [[ -d / ]]
221 4. Then non-empty string tests: test foo [[ foo ]]
222 """
223 if self.bool_id == Id.Op_LParen:
224 self._Next()
225 node = self.ParseExpr() # type: bool_expr_t
226 if self.bool_id != Id.Op_RParen:
227 p_die('Expected ), got %s' % word_.Pretty(self.cur_word),
228 loc.Word(self.cur_word))
229 self._Next()
230 return node
231
232 # Peek ahead another token.
233 t2 = self._LookAhead()
234 t2_bool_id = word_.BoolId(t2)
235 t2_bool_kind = consts.GetKind(t2_bool_id)
236
237 #log('t2 %s / t2_bool_id %s / t2_bool_kind %s', t2, t2_bool_id, t2_bool_kind)
238 # Op for < and >, -a and -o pun
239 if t2_bool_kind == Kind.BoolBinary or t2_bool_id in (Id.Op_Less,
240 Id.Op_Great):
241 left = self.cur_word
242
243 self._Next()
244 op = self.bool_id
245
246 if t2_bool_id == Id.BoolBinary_EqualTilde:
247 self._Next(lex_mode=lex_mode_e.BashRegex)
248 else:
249 self._Next()
250
251 right = self.cur_word
252 self._Next()
253
254 tilde = word_.TildeDetect(left)
255 if tilde:
256 left = tilde
257 tilde = word_.TildeDetect(right)
258 if tilde:
259 right = tilde
260
261 return bool_expr.Binary(op, left, right)
262
263 if self.bool_kind == Kind.BoolUnary:
264 # Just save the type and not the token itself?
265 op = self.bool_id
266 self._Next()
267 w = self.cur_word
268 # e.g. [[ -f < ]]. But [[ -f '<' ]] is OK
269
270 tag = w.tag()
271 if tag != word_e.Compound and tag != word_e.String:
272 p_die('Invalid argument to unary operator', loc.Word(w))
273 self._Next()
274
275 tilde = word_.TildeDetect(w)
276 if tilde:
277 w = tilde
278
279 node = bool_expr.Unary(op, w)
280 return node
281
282 # [[ foo ]]
283 # Note: ( = ) and ( == ) may also hit this path, but they are NOT Kind.Word
284 if self.bool_kind != Kind.Op:
285 w = self.cur_word
286 tilde = word_.TildeDetect(w)
287 if tilde:
288 w = tilde
289 self._Next()
290 return bool_expr.WordTest(w)
291
292 # Error for [[ ) ]]
293 # It's not WORD, UNARY_OP, or '('
294 p_die(
295 'Unexpected token in boolean expression (%s)' %
296 ui.PrettyId(self.bool_id), loc.Word(self.cur_word))