OILS / osh / bool_parse.py View on Github | oilshell.org

272 lines, 143 significant
1#!/usr/bin/env python2
2# Copyright 2016 Andy Chu. All rights reserved.
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8"""
9bool_parse.py - Parse boolean expressions.
10
11In contrast to test / [, the parsing of [[ expressions is done BEFORE
12evaluation. So we are parsing a list of Word instances to an AST, rather than
13a list of strings.
14
15Grammar from http://compilers.iecc.com/crenshaw/tutor6.txt, adapted to ANTLR
16syntax.
17
18 Expr : Term (OR Term)*
19 Term : Negated (AND Negated)*
20 Negated : '!'? Factor
21 Factor : WORD
22 | UNARY_OP WORD
23 | WORD BINARY_OP WORD
24 | '(' Expr ')'
25
26OR = || -o
27AND = && -a
28WORD = any word
29UNARY_OP: -z -n, etc.
30BINARY_OP: -gt, -ot, ==, etc.
31"""
32
33from _devbuild.gen.id_kind_asdl import Id, Kind
34from _devbuild.gen.types_asdl import lex_mode_t, lex_mode_e
35from _devbuild.gen.syntax_asdl import (loc, word_t, word_e, bool_expr,
36 bool_expr_t, Token)
37from display import ui
38from core.error import p_die
39from frontend import consts
40from mycpp.mylib import log
41from osh import word_
42
43from typing import List, Optional, Tuple, TYPE_CHECKING
44if TYPE_CHECKING:
45 from osh.word_parse import WordEmitter
46
47# import libc # for regex_parse
48
49_ = log
50
51
52class BoolParser(object):
53 """Parses [[ at compile time and [ at runtime."""
54
55 def __init__(self, w_parser):
56 # type: (WordEmitter) -> None
57 self.w_parser = w_parser
58 # Either one word or two words for lookahead
59 self.words = [] # type: List[word_t]
60
61 self.cur_word = None # type: Optional[word_t]
62 self.bool_id = Id.Undefined_Tok
63 self.bool_kind = Kind.Undefined
64
65 def _NextOne(self, lex_mode=lex_mode_e.DBracket):
66 # type: (lex_mode_t) -> None
67 n = len(self.words)
68 if n == 2:
69 assert lex_mode == lex_mode_e.DBracket
70 self.words[0] = self.words[1]
71 self.cur_word = self.words[0]
72 self.words.pop()
73 elif n in (0, 1):
74 w = self.w_parser.ReadWord(lex_mode) # may raise
75 if n == 0:
76 self.words.append(w)
77 else:
78 self.words[0] = w
79 self.cur_word = w
80
81 assert self.cur_word is not None
82 self.bool_id = word_.BoolId(self.cur_word)
83 self.bool_kind = consts.GetKind(self.bool_id)
84 #log('--- word %s', self.cur_word)
85 #log('bool_id %s %s %s', Id_str(self.bool_id), Kind_str(self.bool_kind), lex_mode)
86
87 def _Next(self, lex_mode=lex_mode_e.DBracket):
88 # type: (lex_mode_t) -> None
89 """Advance to the next token, skipping newlines.
90
91 We don't handle newlines in the lexer because we want the
92 newline after ]] to be Id.Op_Newline rather than Id.WS_Newline.
93 It's more complicated if it's Id.WS_Newline -- we might have to
94 unread tokens, etc.
95 """
96 while True:
97 self._NextOne(lex_mode=lex_mode)
98 if self.bool_id != Id.Op_Newline:
99 break
100
101 def _LookAhead(self):
102 # type: () -> word_t
103 n = len(self.words)
104 if n != 1:
105 raise AssertionError(n)
106
107 w = self.w_parser.ReadWord(lex_mode_e.DBracket) # may raise
108 self.words.append(w) # Save it for _Next()
109 return w
110
111 def Parse(self):
112 # type: () -> Tuple[bool_expr_t, Token]
113 self._Next()
114
115 node = self.ParseExpr()
116 if self.bool_id != Id.Lit_DRightBracket:
117 #p_die("Expected ]], got %r", self.cur_word, word=self.cur_word)
118 # NOTE: This might be better as unexpected token, since ]] doesn't always
119 # make sense.
120 p_die('Expected ]]', loc.Word(self.cur_word))
121
122 # Extract the ']]' keyword and return it's token for location tracking
123 right = word_.LiteralToken(self.cur_word)
124 assert right is not None
125
126 return node, right
127
128 def _TestAtEnd(self):
129 # type: () -> bool
130 """For unit tests only."""
131 return self.bool_id == Id.Lit_DRightBracket
132
133 def ParseForBuiltin(self):
134 # type: () -> bool_expr_t
135 """For test builtin."""
136 self._Next()
137
138 node = self.ParseExpr()
139 if self.bool_id != Id.Eof_Real:
140 p_die('Unexpected trailing word %s' % word_.Pretty(self.cur_word),
141 loc.Word(self.cur_word))
142
143 return node
144
145 def ParseExpr(self):
146 # type: () -> bool_expr_t
147 """
148 Iterative:
149 Expr : Term (OR Term)*
150
151 Right recursion:
152 Expr : Term (OR Expr)?
153 """
154 left = self.ParseTerm()
155 # [[ uses || but [ uses -o
156 if self.bool_id in (Id.Op_DPipe, Id.BoolUnary_o):
157 self._Next()
158 right = self.ParseExpr()
159 return bool_expr.LogicalOr(left, right)
160 else:
161 return left
162
163 def ParseTerm(self):
164 # type: () -> bool_expr_t
165 """
166 Term : Negated (AND Negated)*
167
168 Right recursion:
169 Term : Negated (AND Term)?
170 """
171 left = self.ParseNegatedFactor()
172 # [[ uses && but [ uses -a
173 if self.bool_id in (Id.Op_DAmp, Id.BoolUnary_a):
174 self._Next()
175 right = self.ParseTerm()
176 return bool_expr.LogicalAnd(left, right)
177 else:
178 return left
179
180 def ParseNegatedFactor(self):
181 # type: () -> bool_expr_t
182 """
183 Negated : '!'? Factor
184 """
185 if self.bool_id == Id.KW_Bang:
186 self._Next()
187 child = self.ParseFactor()
188 return bool_expr.LogicalNot(child)
189 else:
190 return self.ParseFactor()
191
192 def ParseFactor(self):
193 # type: () -> bool_expr_t
194 """
195 Factor : WORD
196 | UNARY_OP WORD
197 | WORD =~ Regex
198 | WORD BINARY_OP WORD
199 | '(' Expr ')'
200 """
201 if self.bool_kind == Kind.BoolUnary:
202 # Just save the type and not the token itself?
203 op = self.bool_id
204 self._Next()
205 w = self.cur_word
206 # e.g. [[ -f < ]]. But [[ -f '<' ]] is OK
207
208 tag = w.tag()
209 if tag != word_e.Compound and tag != word_e.String:
210 p_die('Invalid argument to unary operator', loc.Word(w))
211 self._Next()
212
213 tilde = word_.TildeDetect(w)
214 if tilde:
215 w = tilde
216
217 node = bool_expr.Unary(op, w) # type: bool_expr_t
218 return node
219
220 if self.bool_kind == Kind.Word:
221 # Peek ahead another token.
222 t2 = self._LookAhead()
223 t2_bool_id = word_.BoolId(t2)
224 t2_bool_kind = consts.GetKind(t2_bool_id)
225
226 #log('t2 %s / t2_bool_id %s / t2_bool_kind %s', t2, t2_bool_id, t2_bool_kind)
227 # Op for < and >, -a and -o pun
228 if t2_bool_kind == Kind.BoolBinary or t2_bool_id in (Id.Op_Less,
229 Id.Op_Great):
230 left = self.cur_word
231
232 self._Next()
233 op = self.bool_id
234
235 if t2_bool_id == Id.BoolBinary_EqualTilde:
236 self._Next(lex_mode=lex_mode_e.BashRegex)
237 else:
238 self._Next()
239
240 right = self.cur_word
241 self._Next()
242
243 tilde = word_.TildeDetect(left)
244 if tilde:
245 left = tilde
246 tilde = word_.TildeDetect(right)
247 if tilde:
248 right = tilde
249
250 return bool_expr.Binary(op, left, right)
251
252 else: # [[ foo ]]
253 w = self.cur_word
254 tilde = word_.TildeDetect(w)
255 if tilde:
256 w = tilde
257 self._Next()
258 return bool_expr.WordTest(w)
259
260 if self.bool_id == Id.Op_LParen:
261 self._Next()
262 node = self.ParseExpr()
263 if self.bool_id != Id.Op_RParen:
264 p_die('Expected ), got %s' % word_.Pretty(self.cur_word),
265 loc.Word(self.cur_word))
266 self._Next()
267 return node
268
269 # It's not WORD, UNARY_OP, or '('
270 p_die(
271 'Unexpected token in boolean expression (%s)' %
272 ui.PrettyId(self.bool_id), loc.Word(self.cur_word))