OILS / frontend / parse_lib.py View on Github | oilshell.org

405 lines, 224 significant
1"""
2parse_lib.py - Consolidate various parser instantiations here.
3"""
4
5from _devbuild.gen.id_kind_asdl import Id_t
6from _devbuild.gen.syntax_asdl import (Token, CompoundWord, expr_t, Redir,
7 ArgList, Proc, Func, command, pat_t)
8from _devbuild.gen.types_asdl import lex_mode_e
9from _devbuild.gen import grammar_nt
10
11from asdl import format as fmt
12from core import state
13from frontend import lexer
14from frontend import reader
15from osh import tdop
16from osh import arith_parse
17from osh import cmd_parse
18from osh import word_parse
19from mycpp import mylib
20from mycpp.mylib import log
21from ysh import expr_parse
22from ysh import expr_to_ast
23from ysh.expr_parse import ctx_PNodeAllocator
24
25_ = log
26
27from typing import Any, List, Tuple, Dict, TYPE_CHECKING
28if TYPE_CHECKING:
29 from core.alloc import Arena
30 from core.util import _DebugFile
31 from core import optview
32 from frontend.lexer import Lexer
33 from frontend.reader import _Reader
34 from osh.tdop import TdopParser
35 from osh.word_parse import WordParser
36 from osh.cmd_parse import CommandParser
37 from pgen2.grammar import Grammar
38
39
40class _BaseTrail(object):
41 """Base class has members, but no-ops for methods."""
42
43 def __init__(self):
44 # type: () -> None
45 # word from a partially completed command.
46 # Filled in by _ScanSimpleCommand in osh/cmd_parse.py.
47 self.words = [] # type: List[CompoundWord]
48 self.redirects = [] # type: List[Redir]
49 # TODO: We should maintain the LST invariant and have a single list, but
50 # that I ran into the "cases classes are better than variants" problem.
51
52 # Non-ignored tokens, after PushHint translation. Used for variable name
53 # completion. Filled in by _Peek() in osh/word_parse.py.
54 #
55 # Example:
56 # $ echo $\
57 # f<TAB>
58 # This could complete $foo.
59 # Problem: readline doesn't even allow that, because it spans more than one
60 # line!
61 self.tokens = [] # type: List[Token]
62
63 self.alias_words = [
64 ] # type: List[CompoundWord] # words INSIDE an alias expansion
65 self._expanding_alias = False
66
67 def Clear(self):
68 # type: () -> None
69 pass
70
71 def SetLatestWords(self, words, redirects):
72 # type: (List[CompoundWord], List[Redir]) -> None
73 pass
74
75 def AppendToken(self, token):
76 # type: (Token) -> None
77 pass
78
79 def BeginAliasExpansion(self):
80 # type: () -> None
81 pass
82
83 def EndAliasExpansion(self):
84 # type: () -> None
85 pass
86
87 if mylib.PYTHON:
88
89 def PrintDebugString(self, debug_f):
90 # type: (_DebugFile) -> None
91
92 # note: could cast DebugFile to IO[str] instead of ignoring?
93 debug_f.writeln(' words:')
94 for w in self.words:
95 fmt.PrettyPrint(w, f=debug_f) # type: ignore
96 debug_f.writeln('')
97
98 debug_f.writeln(' redirects:')
99 for r in self.redirects:
100 fmt.PrettyPrint(r, f=debug_f) # type: ignore
101 debug_f.writeln('')
102
103 debug_f.writeln(' tokens:')
104 for p in self.tokens:
105 fmt.PrettyPrint(p, f=debug_f) # type: ignore
106 debug_f.writeln('')
107
108 debug_f.writeln(' alias_words:')
109 for w in self.alias_words:
110 fmt.PrettyPrint(w, f=debug_f) # type: ignore
111 debug_f.writeln('')
112
113 def __repr__(self):
114 # type: () -> str
115 return '<Trail %s %s %s %s>' % (self.words, self.redirects,
116 self.tokens, self.alias_words)
117
118
119class ctx_Alias(object):
120 """Used by CommandParser so we know to be ready for FIRST alias word.
121
122 For example, for
123
124 alias ll='ls -l'
125
126 Then we want to capture 'ls' as the first word.
127
128 We do NOT want SetLatestWords or AppendToken to be active, because we don't
129 need other tokens from 'ls -l'.
130
131 It would also probably cause bugs in history expansion, e.g. echo !1 should
132 be the first word the user typed, not the first word after alias expansion.
133 """
134
135 def __init__(self, trail):
136 # type: (_BaseTrail) -> None
137 trail._expanding_alias = True
138 self.trail = trail
139
140 def __enter__(self):
141 # type: () -> None
142 pass
143
144 def __exit__(self, type, value, traceback):
145 # type: (Any, Any, Any) -> None
146 self.trail._expanding_alias = False
147
148
149class Trail(_BaseTrail):
150 """Info left by the parser to help us complete shell syntax and commands.
151
152 It's also used for history expansion.
153 """
154
155 def __init__(self):
156 # type: () -> None
157 """Empty constructor for mycpp."""
158 _BaseTrail.__init__(self)
159
160 def Clear(self):
161 # type: () -> None
162 del self.words[:]
163 del self.redirects[:]
164 # The other ones don't need to be reset?
165 del self.tokens[:]
166 del self.alias_words[:]
167
168 def SetLatestWords(self, words, redirects):
169 # type: (List[CompoundWord], List[Redir]) -> None
170 if self._expanding_alias:
171 self.alias_words = words # Save these separately
172 return
173 self.words = words
174 self.redirects = redirects
175
176 def AppendToken(self, token):
177 # type: (Token) -> None
178 if self._expanding_alias: # We don't want tokens inside aliases
179 return
180 self.tokens.append(token)
181
182
183if TYPE_CHECKING:
184 AliasesInFlight = List[Tuple[str, int]]
185
186
187class ParseContext(object):
188 """Context shared between the mutually recursive Command and Word parsers.
189
190 In contrast, STATE is stored in the CommandParser and WordParser
191 instances.
192 """
193
194 def __init__(self,
195 arena,
196 parse_opts,
197 aliases,
198 ysh_grammar,
199 do_lossless=False):
200 # type: (Arena, optview.Parse, Dict[str, str], Grammar, bool) -> None
201 self.arena = arena
202 self.parse_opts = parse_opts
203 self.aliases = aliases
204 self.ysh_grammar = ysh_grammar
205 self.do_lossless = do_lossless
206
207 # NOTE: The transformer is really a pure function.
208 if ysh_grammar:
209 self.tr = expr_to_ast.Transformer(ysh_grammar)
210 else: # hack for unit tests, which pass None
211 self.tr = None
212
213 if mylib.PYTHON:
214 if self.tr:
215 self.p_printer = self.tr.p_printer
216 else:
217 self.p_printer = None
218
219 # Completion state lives here since it may span multiple parsers.
220 self.trail = _BaseTrail() # no-op by default
221
222 def Init_Trail(self, trail):
223 # type: (_BaseTrail) -> None
224 self.trail = trail
225
226 def MakeLexer(self, line_reader):
227 # type: (_Reader) -> Lexer
228 """Helper function.
229
230 NOTE: I tried to combine the LineLexer and Lexer, and it didn't perform
231 better.
232 """
233 # Take Arena from LineReader
234 line_lexer = lexer.LineLexer(line_reader.arena)
235 return lexer.Lexer(line_lexer, line_reader)
236
237 def MakeOshParser(self, line_reader, emit_comp_dummy=False):
238 # type: (_Reader, bool) -> CommandParser
239 lx = self.MakeLexer(line_reader)
240 if emit_comp_dummy:
241 lx.EmitCompDummy() # A special token before EOF!
242
243 w_parser = word_parse.WordParser(self, lx, line_reader)
244 c_parser = cmd_parse.CommandParser(self, self.parse_opts, w_parser, lx,
245 line_reader)
246 return c_parser
247
248 def MakeConfigParser(self, line_reader):
249 # type: (_Reader) -> CommandParser
250 lx = self.MakeLexer(line_reader)
251 parse_opts = state.MakeYshParseOpts()
252 w_parser = word_parse.WordParser(self, lx, line_reader)
253 c_parser = cmd_parse.CommandParser(self, parse_opts, w_parser, lx,
254 line_reader)
255 return c_parser
256
257 def MakeWordParserForHereDoc(self, line_reader):
258 # type: (_Reader) -> WordParser
259 lx = self.MakeLexer(line_reader)
260 return word_parse.WordParser(self, lx, line_reader)
261
262 def MakeWordParser(self, lx, line_reader):
263 # type: (Lexer, _Reader) -> WordParser
264 return word_parse.WordParser(self, lx, line_reader)
265
266 def MakeArithParser(self, code_str):
267 # type: (str) -> TdopParser
268 """Used for a[x+1]=foo in the CommandParser."""
269 line_reader = reader.StringLineReader(code_str, self.arena)
270 lx = self.MakeLexer(line_reader)
271 w_parser = word_parse.WordParser(self, lx, line_reader)
272 w_parser.Init(lex_mode_e.Arith) # Special initialization
273 a_parser = tdop.TdopParser(arith_parse.Spec(), w_parser,
274 self.parse_opts)
275 return a_parser
276
277 def MakeParserForCommandSub(self, line_reader, lexer, eof_id):
278 # type: (_Reader, Lexer, Id_t) -> CommandParser
279 """To parse command sub, we want a fresh word parser state."""
280 w_parser = word_parse.WordParser(self, lexer, line_reader)
281 c_parser = cmd_parse.CommandParser(self,
282 self.parse_opts,
283 w_parser,
284 lexer,
285 line_reader,
286 eof_id=eof_id)
287 return c_parser
288
289 def MakeWordParserForPlugin(self, code_str):
290 # type: (str) -> WordParser
291 """For $PS1, $PS4, etc."""
292 line_reader = reader.StringLineReader(code_str, self.arena)
293 lx = self.MakeLexer(line_reader)
294 return word_parse.WordParser(self, lx, line_reader)
295
296 def _YshParser(self):
297 # type: () -> expr_parse.ExprParser
298 return expr_parse.ExprParser(self, self.ysh_grammar)
299
300 def ParseVarDecl(self, kw_token, lexer):
301 # type: (Token, Lexer) -> Tuple[command.VarDecl, Token]
302 """ var mylist = [1, 2, 3] """
303 e_parser = self._YshParser()
304 with ctx_PNodeAllocator(e_parser):
305 pnode, last_token = e_parser.Parse(lexer, grammar_nt.ysh_var_decl)
306
307 if 0:
308 self.p_printer.Print(pnode)
309
310 ast_node = self.tr.MakeVarDecl(pnode)
311 ast_node.keyword = kw_token # VarDecl didn't fill this in
312
313 return ast_node, last_token
314
315 def ParseMutation(self, kw_token, lexer):
316 # type: (Token, Lexer) -> Tuple[command.Mutation, Token]
317 """ setvar d['a'] += 1 """
318 e_parser = self._YshParser()
319 with ctx_PNodeAllocator(e_parser):
320 pnode, last_token = e_parser.Parse(lexer, grammar_nt.ysh_mutation)
321 if 0:
322 self.p_printer.Print(pnode)
323 ast_node = self.tr.MakeMutation(pnode)
324 ast_node.keyword = kw_token # VarDecl didn't fill this in
325
326 return ast_node, last_token
327
328 def ParseProcCallArgs(self, lx, out, start_symbol):
329 # type: (Lexer, ArgList, int) -> None
330 """ json write (x, foo=1) and assert [42 === x] """
331
332 e_parser = self._YshParser()
333 with ctx_PNodeAllocator(e_parser):
334 pnode, last_token = e_parser.Parse(lx, start_symbol)
335
336 if 0:
337 self.p_printer.Print(pnode)
338
339 self.tr.ProcCallArgs(pnode, out)
340 out.right = last_token
341
342 def ParseYshExpr(self, lx, start_symbol):
343 # type: (Lexer, int) -> Tuple[expr_t, Token]
344 """if (x > 0) { ...
345
346 }, while, etc.
347 """
348
349 e_parser = self._YshParser()
350 with ctx_PNodeAllocator(e_parser):
351 pnode, last_token = e_parser.Parse(lx, start_symbol)
352 if 0:
353 self.p_printer.Print(pnode)
354
355 ast_node = self.tr.Expr(pnode)
356
357 return ast_node, last_token
358
359 def ParseYshCasePattern(self, lexer):
360 # type: (Lexer) -> Tuple[pat_t, Token, Token]
361 """(6) | (7), / dot* '.py' /, (else), etc.
362
363 Alongside the pattern, this returns the first token in the pattern and
364 the LBrace token at the start of the case arm body.
365 """
366 e_parser = self._YshParser()
367 with ctx_PNodeAllocator(e_parser):
368 pnode, last_token = e_parser.Parse(lexer, grammar_nt.ysh_case_pat)
369
370 left_tok = pnode.GetChild(0).tok
371 pattern = self.tr.YshCasePattern(pnode)
372
373 return pattern, left_tok, last_token
374
375 def ParseProc(self, lexer, out):
376 # type: (Lexer, Proc) -> Token
377 """proc f(x, y, @args) {"""
378 e_parser = self._YshParser()
379 with ctx_PNodeAllocator(e_parser):
380 pnode, last_token = e_parser.Parse(lexer, grammar_nt.ysh_proc)
381
382 if 0:
383 self.p_printer.Print(pnode)
384
385 out.sig = self.tr.Proc(pnode)
386
387 return last_token
388
389 def ParseFunc(self, lexer, out):
390 # type: (Lexer, Func) -> Token
391 """ func f(x Int, y Int = 0, ...args; z Int = 3, ...named) => Int """
392 e_parser = self._YshParser()
393 with ctx_PNodeAllocator(e_parser):
394 pnode, last_token = e_parser.Parse(lexer, grammar_nt.ysh_func)
395
396 if 0:
397 self.p_printer.Print(pnode)
398
399 self.tr.YshFunc(pnode, out)
400 return last_token
401
402
403# Another parser instantiation:
404# - For Array Literal in word_parse.py WordParser:
405# w_parser = WordParser(self.lexer, self.line_reader)