OILS / osh / bool_parse.py View on Github | oils.pub

286 lines, 142 significant
1#!/usr/bin/env python2
2# Copyright 2016 Andy Chu. All rights reserved.
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8"""
9bool_parse.py - Parse boolean expressions.
10
11In contrast to test / [, the parsing of [[ expressions is done BEFORE
12evaluation. So we are parsing a list of Word instances to an AST, rather than
13a list of strings.
14
15Grammar from http://compilers.iecc.com/crenshaw/tutor6.txt, adapted to ANTLR
16syntax.
17
18 Expr : Term (OR Term)*
19 Term : Negated (AND Negated)*
20 Negated : '!'? Factor
21 Factor : WORD
22 | UNARY_OP WORD
23 | WORD BINARY_OP WORD
24 | '(' Expr ')'
25
26OR = || -o
27AND = && -a
28WORD = any word
29UNARY_OP: -z -n, etc.
30BINARY_OP: -gt, -ot, ==, etc.
31"""
32
33from _devbuild.gen.id_kind_asdl import Id, Kind
34#from _devbuild.gen.id_kind_asdl import Id_str, Kind_str # for debugging
35from _devbuild.gen.types_asdl import lex_mode_t, lex_mode_e
36from _devbuild.gen.syntax_asdl import (loc, word_t, word_e, bool_expr,
37 bool_expr_t, Token)
38from core.error import p_die
39from display import ui
40from frontend import consts
41from mycpp.mylib import log
42from osh import word_
43
44from typing import List, Optional, Tuple, TYPE_CHECKING
45if TYPE_CHECKING:
46 from osh.word_parse import WordEmitter
47
48# import libc # for regex_parse
49
50_ = log
51
52
53class BoolParser(object):
54 """Parses [[ at compile time and [ at runtime."""
55
56 def __init__(self, w_parser):
57 # type: (WordEmitter) -> None
58 self.w_parser = w_parser
59 # Either one word or two words for lookahead
60 self.words = [] # type: List[word_t]
61
62 self.cur_word = None # type: Optional[word_t]
63 self.bool_id = Id.Undefined_Tok
64 self.bool_kind = Kind.Undefined
65
66 def _NextOne(self, lex_mode=lex_mode_e.DBracket):
67 # type: (lex_mode_t) -> None
68 n = len(self.words)
69 if n == 2:
70 assert lex_mode == lex_mode_e.DBracket
71 self.words[0] = self.words[1]
72 self.cur_word = self.words[0]
73 self.words.pop()
74 elif n in (0, 1):
75 w = self.w_parser.ReadWord(lex_mode) # may raise
76 if n == 0:
77 self.words.append(w)
78 else:
79 self.words[0] = w
80 self.cur_word = w
81
82 assert self.cur_word is not None
83 self.bool_id = word_.BoolId(self.cur_word)
84 self.bool_kind = consts.GetKind(self.bool_id)
85 #log('--- word %s', self.cur_word)
86 #log('bool_id %s %s %s', Id_str(self.bool_id), Kind_str(self.bool_kind), lex_mode)
87
88 def _Next(self, lex_mode=lex_mode_e.DBracket):
89 # type: (lex_mode_t) -> None
90 """Advance to the next token, skipping newlines.
91
92 We don't handle newlines in the lexer because we want the
93 newline after ]] to be Id.Op_Newline rather than Id.WS_Newline.
94 It's more complicated if it's Id.WS_Newline -- we might have to
95 unread tokens, etc.
96 """
97 while True:
98 self._NextOne(lex_mode=lex_mode)
99 if self.bool_id != Id.Op_Newline:
100 break
101
102 def _LookAhead(self):
103 # type: () -> word_t
104 n = len(self.words)
105 if n != 1:
106 raise AssertionError(n)
107
108 w = self.w_parser.ReadWord(lex_mode_e.DBracket) # may raise
109 self.words.append(w) # Save it for _Next()
110 return w
111
112 def Parse(self):
113 # type: () -> Tuple[bool_expr_t, Token]
114 self._Next()
115
116 node = self.ParseExpr()
117 if self.bool_id != Id.Lit_DRightBracket:
118 #p_die("Expected ]], got %r", self.cur_word, word=self.cur_word)
119 # NOTE: This might be better as unexpected token, since ]] doesn't always
120 # make sense.
121 p_die('Expected ]]', loc.Word(self.cur_word))
122
123 # Extract the ']]' keyword and return it's token for location tracking
124 right = word_.LiteralToken(self.cur_word)
125 assert right is not None
126
127 return node, right
128
129 def _TestAtEnd(self):
130 # type: () -> bool
131 """For unit tests only."""
132 return self.bool_id == Id.Lit_DRightBracket
133
134 def ParseForBuiltin(self):
135 # type: () -> bool_expr_t
136 """For test builtin."""
137 self._Next()
138
139 node = self.ParseExpr()
140 if self.bool_id != Id.Eof_Real:
141 p_die('Unexpected trailing word %s' % word_.Pretty(self.cur_word),
142 loc.Word(self.cur_word))
143
144 return node
145
146 def ParseExpr(self):
147 # type: () -> bool_expr_t
148 """
149 Iterative:
150 Expr : Term (OR Term)*
151
152 Right recursion:
153 Expr : Term (OR Expr)?
154 """
155 left = self.ParseTerm()
156 # [[ uses || but [ uses -o
157 if self.bool_id in (Id.Op_DPipe, Id.BoolUnary_o):
158 self._Next()
159 right = self.ParseExpr()
160 return bool_expr.LogicalOr(left, right)
161 else:
162 return left
163
164 def ParseTerm(self):
165 # type: () -> bool_expr_t
166 """
167 Term : Negated (AND Negated)*
168
169 Right recursion:
170 Term : Negated (AND Term)?
171 """
172 left = self.ParseNegatedFactor()
173 # [[ uses && but [ uses -a
174 if self.bool_id in (Id.Op_DAmp, Id.BoolUnary_a):
175 self._Next()
176 right = self.ParseTerm()
177 return bool_expr.LogicalAnd(left, right)
178 else:
179 return left
180
181 def ParseNegatedFactor(self):
182 # type: () -> bool_expr_t
183 """
184 Negated : '!'? Factor
185 """
186 if self.bool_id == Id.KW_Bang:
187 self._Next()
188 child = self.ParseFactor()
189 return bool_expr.LogicalNot(child)
190 else:
191 return self.ParseFactor()
192
193 def ParseFactor(self):
194 # type: () -> bool_expr_t
195 """
196 Factor : WORD
197 | UNARY_OP WORD
198 | WORD =~ Regex
199 | WORD BINARY_OP WORD
200 | '(' Expr ')'
201
202 Note: this grammar is ambiguous, and the design of the 'test' builtin
203 is terrible in general.
204
205 We roughly follow bash's ordering rules, which gives a result that
206 almost all shells agree on:
207
208 1. Look for ( first
209 2. Then look ahead to see if you got Kind.BoolBinary, like =
210 3. Then unary operators: test -d / [[ -d / ]]
211 4. Then non-empty string tests: test foo [[ foo ]]
212 """
213 if self.bool_id == Id.Op_LParen:
214 self._Next()
215 node = self.ParseExpr() # type: bool_expr_t
216 if self.bool_id != Id.Op_RParen:
217 p_die('Expected ), got %s' % word_.Pretty(self.cur_word),
218 loc.Word(self.cur_word))
219 self._Next()
220 return node
221
222 # Peek ahead another token.
223 t2 = self._LookAhead()
224 t2_bool_id = word_.BoolId(t2)
225 t2_bool_kind = consts.GetKind(t2_bool_id)
226
227 #log('t2 %s / t2_bool_id %s / t2_bool_kind %s', t2, t2_bool_id, t2_bool_kind)
228 # Op for < and >, -a and -o pun
229 if t2_bool_kind == Kind.BoolBinary or t2_bool_id in (Id.Op_Less,
230 Id.Op_Great):
231 left = self.cur_word
232
233 self._Next()
234 op = self.bool_id
235
236 if t2_bool_id == Id.BoolBinary_EqualTilde:
237 self._Next(lex_mode=lex_mode_e.BashRegex)
238 else:
239 self._Next()
240
241 right = self.cur_word
242 self._Next()
243
244 tilde = word_.TildeDetect(left)
245 if tilde:
246 left = tilde
247 tilde = word_.TildeDetect(right)
248 if tilde:
249 right = tilde
250
251 return bool_expr.Binary(op, left, right)
252
253 if self.bool_kind == Kind.BoolUnary:
254 # Just save the type and not the token itself?
255 op = self.bool_id
256 self._Next()
257 w = self.cur_word
258 # e.g. [[ -f < ]]. But [[ -f '<' ]] is OK
259
260 tag = w.tag()
261 if tag != word_e.Compound and tag != word_e.String:
262 p_die('Invalid argument to unary operator', loc.Word(w))
263 self._Next()
264
265 tilde = word_.TildeDetect(w)
266 if tilde:
267 w = tilde
268
269 node = bool_expr.Unary(op, w)
270 return node
271
272 # [[ foo ]]
273 # Note: ( = ) and ( == ) may also hit this path, but they are NOT Kind.Word
274 if self.bool_kind != Kind.Op:
275 w = self.cur_word
276 tilde = word_.TildeDetect(w)
277 if tilde:
278 w = tilde
279 self._Next()
280 return bool_expr.WordTest(w)
281
282 # Error for [[ ) ]]
283 # It's not WORD, UNARY_OP, or '('
284 p_die(
285 'Unexpected token in boolean expression (%s)' %
286 ui.PrettyId(self.bool_id), loc.Word(self.cur_word))