OILS / osh / bool_parse.py View on Github | oils.pub

302 lines, 144 significant
1#!/usr/bin/env python2
2# Copyright 2016 Andy Chu. All rights reserved.
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8"""
9bool_parse.py - Parse boolean expressions.
10
11In contrast to test / [, the parsing of [[ expressions is done BEFORE
12evaluation. So we are parsing a list of Word instances to an AST, rather than
13a list of strings.
14
15Grammar from http://compilers.iecc.com/crenshaw/tutor6.txt, adapted to ANTLR
16syntax.
17
18 Expr : Term (OR Term)*
19 Term : Negated (AND Negated)*
20 Negated : '!'? Factor
21 Factor : WORD
22 | UNARY_OP WORD
23 | WORD BINARY_OP WORD
24 | '(' Expr ')'
25
26OR = || -o
27AND = && -a
28WORD = any word
29UNARY_OP: -z -n, etc.
30BINARY_OP: -gt, -ot, ==, etc.
31"""
32
33from _devbuild.gen.id_kind_asdl import Id, Kind
34from _devbuild.gen.id_kind_asdl import Id_str # for debugging
35from _devbuild.gen.types_asdl import lex_mode_t, lex_mode_e
36from _devbuild.gen.syntax_asdl import (loc, word_t, word_e, bool_expr,
37 bool_expr_t, Token)
38from core.error import p_die
39from display import ui
40from frontend import consts
41from mycpp import mylib
42from mycpp.mylib import log
43from osh import word_
44
45from typing import Optional, Tuple, TYPE_CHECKING
46if TYPE_CHECKING:
47 from osh.word_parse import WordEmitter
48
49# import libc # for regex_parse
50
51_ = log
52
53
54class BoolParser(object):
55 """Parses [[ at compile time and [ at runtime."""
56
57 def __init__(self, w_parser):
58 # type: (WordEmitter) -> None
59 self.w_parser = w_parser
60
61 self.cur_word = None # type: Optional[word_t]
62 self.bool_id = Id.Undefined_Tok
63 self.bool_kind = Kind.Undefined
64
65 # Lookahead words
66 self.ahead1 = None # type: Optional[word_t]
67 self.ahead2 = None # type: Optional[word_t]
68
69 if mylib.PYTHON:
70 # For unit tests only
71
72 def _Dump(self):
73 # type: () -> None
74 log('cur_word = %s', self.cur_word)
75 log('bool_id = %s', Id_str(self.bool_id))
76 log('ahead1 = %s', self.ahead1)
77 log('ahead2 = %s', self.ahead2)
78 log(' ///')
79
80 def _TestAtEnd(self):
81 # type: () -> bool
82 return self.bool_id == Id.Lit_DRightBracket
83
84 def _NextOne(self, lex_mode=lex_mode_e.DBracket):
85 # type: (lex_mode_t) -> None
86 """
87 Sets self.cur_word, self.bool_id, self.bool_kind
88
89 Tries to maintain a "buffer" self.words of length 1, unless there is lookahead
90 """
91 if self.ahead2:
92 self.cur_word = self.ahead1
93 self.ahead1 = self.ahead2
94 self.ahead2 = None
95 elif self.ahead1: # look2 set
96 self.cur_word = self.ahead1
97 self.ahead1 = None
98 else:
99 self.cur_word = self.w_parser.ReadWord(lex_mode) # may raise
100
101 self.bool_id = word_.BoolId(self.cur_word)
102 self.bool_kind = consts.GetKind(self.bool_id)
103
104 #log('bool_id %s %s %s', Id_str(self.bool_id), Kind_str(self.bool_kind), lex_mode)
105
106 def _Next(self, lex_mode=lex_mode_e.DBracket):
107 # type: (lex_mode_t) -> None
108 """Advance to the next token, skipping newlines.
109
110 We don't handle newlines in the lexer because we want the
111 newline after ]] to be Id.Op_Newline rather than Id.WS_Newline.
112 It's more complicated if it's Id.WS_Newline -- we might have to
113 unread tokens, etc.
114 """
115 while True:
116 self._NextOne(lex_mode=lex_mode)
117 if self.bool_id != Id.Op_Newline:
118 break
119
120 def _LookAhead(self):
121 # type: () -> word_t
122 """
123 TODO: change to LookAheadForBinary, with 2 tokens, for
124
125 [ -f foo ] # unary
126 [ -f = -f ] # binary
127 [ -f = ] # unary
128 """
129 w = self.w_parser.ReadWord(lex_mode_e.DBracket) # may raise
130 self.ahead1 = w
131 return w
132
133 def Parse(self):
134 # type: () -> Tuple[bool_expr_t, Token]
135 self._Next()
136
137 node = self.ParseExpr()
138 if self.bool_id != Id.Lit_DRightBracket:
139 #p_die("Expected ]], got %r", self.cur_word, word=self.cur_word)
140 # NOTE: This might be better as unexpected token, since ]] doesn't always
141 # make sense.
142 p_die('Expected ]]', loc.Word(self.cur_word))
143
144 # Extract the ']]' keyword and return it's token for location tracking
145 right = word_.LiteralToken(self.cur_word)
146 assert right is not None
147
148 return node, right
149
150 def ParseForBuiltin(self):
151 # type: () -> bool_expr_t
152 """For test builtin."""
153 self._Next()
154
155 node = self.ParseExpr()
156 if self.bool_id != Id.Eof_Real:
157 p_die('Unexpected trailing word %s' % word_.Pretty(self.cur_word),
158 loc.Word(self.cur_word))
159
160 return node
161
162 def ParseExpr(self):
163 # type: () -> bool_expr_t
164 """
165 Iterative:
166 Expr : Term (OR Term)*
167
168 Right recursion:
169 Expr : Term (OR Expr)?
170 """
171 left = self.ParseTerm()
172 # [[ uses || but [ uses -o
173 if self.bool_id in (Id.Op_DPipe, Id.BoolUnary_o):
174 self._Next()
175 right = self.ParseExpr()
176 return bool_expr.LogicalOr(left, right)
177 else:
178 return left
179
180 def ParseTerm(self):
181 # type: () -> bool_expr_t
182 """
183 Term : Negated (AND Negated)*
184
185 Right recursion:
186 Term : Negated (AND Term)?
187 """
188 left = self.ParseNegatedFactor()
189 # [[ uses && but [ uses -a
190 if self.bool_id in (Id.Op_DAmp, Id.BoolUnary_a):
191 self._Next()
192 right = self.ParseTerm()
193 return bool_expr.LogicalAnd(left, right)
194 else:
195 return left
196
197 def ParseNegatedFactor(self):
198 # type: () -> bool_expr_t
199 """
200 Negated : '!'? Factor
201 """
202 if self.bool_id == Id.KW_Bang:
203 self._Next()
204 child = self.ParseFactor()
205 return bool_expr.LogicalNot(child)
206 else:
207 return self.ParseFactor()
208
209 def ParseFactor(self):
210 # type: () -> bool_expr_t
211 """
212 Factor : WORD
213 | UNARY_OP WORD
214 | WORD =~ Regex
215 | WORD BINARY_OP WORD
216 | '(' Expr ')'
217
218 Note: this grammar is ambiguous, and the design of the 'test' builtin
219 is terrible in general.
220
221 We roughly follow bash's ordering rules, which gives a result that
222 almost all shells agree on:
223
224 1. Look for ( first
225 2. Then look ahead to see if you got Kind.BoolBinary, like =
226 3. Then unary operators: test -d / [[ -d / ]]
227 4. Then non-empty string tests: test foo [[ foo ]]
228 """
229 if self.bool_id == Id.Op_LParen:
230 self._Next() # past (
231 node = self.ParseExpr() # type: bool_expr_t
232 if self.bool_id != Id.Op_RParen:
233 p_die('Expected ), got %s' % word_.Pretty(self.cur_word),
234 loc.Word(self.cur_word))
235 self._Next() # past )
236 return node
237
238 # Peek ahead another token.
239 t2 = self._LookAhead()
240 t2_bool_id = word_.BoolId(t2)
241 t2_bool_kind = consts.GetKind(t2_bool_id)
242
243 #log('t2 %s / t2_bool_id %s / t2_bool_kind %s', t2, t2_bool_id, t2_bool_kind)
244 # Op for < and >, -a and -o pun
245 if t2_bool_kind == Kind.BoolBinary or t2_bool_id in (Id.Op_Less,
246 Id.Op_Great):
247 left = self.cur_word
248
249 self._Next()
250 op = self.bool_id
251
252 if t2_bool_id == Id.BoolBinary_EqualTilde:
253 self._Next(lex_mode=lex_mode_e.BashRegex)
254 else:
255 self._Next()
256
257 right = self.cur_word
258 self._Next()
259
260 tilde = word_.TildeDetect(left)
261 if tilde:
262 left = tilde
263 tilde = word_.TildeDetect(right)
264 if tilde:
265 right = tilde
266
267 return bool_expr.Binary(op, left, right)
268
269 if self.bool_kind == Kind.BoolUnary:
270 # Just save the type and not the token itself?
271 op = self.bool_id
272 self._Next()
273 w = self.cur_word
274 # e.g. [[ -f < ]]. But [[ -f '<' ]] is OK
275
276 tag = w.tag()
277 if tag != word_e.Compound and tag != word_e.String:
278 p_die('Invalid argument to unary operator', loc.Word(w))
279 self._Next()
280
281 tilde = word_.TildeDetect(w)
282 if tilde:
283 w = tilde
284
285 node = bool_expr.Unary(op, w)
286 return node
287
288 # [[ foo ]]
289 # Note: ( = ) and ( == ) may also hit this path, but they are NOT Kind.Word
290 if self.bool_kind != Kind.Op:
291 w = self.cur_word
292 tilde = word_.TildeDetect(w)
293 if tilde:
294 w = tilde
295 self._Next()
296 return bool_expr.WordTest(w)
297
298 # Error for [[ ) ]]
299 # It's not WORD, UNARY_OP, or '('
300 p_die(
301 'Unexpected token in boolean expression (%s)' %
302 ui.PrettyId(self.bool_id), loc.Word(self.cur_word))