OILS / frontend / parse_lib.py View on Github | oils.pub

406 lines, 225 significant
1"""
2parse_lib.py - Consolidate various parser instantiations here.
3"""
4
5from _devbuild.gen.id_kind_asdl import Id_t
6from _devbuild.gen.syntax_asdl import (Token, CompoundWord, expr_t, Redir,
7 ArgList, Proc, Func, pat_t, VarDecl,
8 Mutation)
9from _devbuild.gen.types_asdl import lex_mode_e
10from _devbuild.gen import grammar_nt
11
12from asdl import format as fmt
13from core import state
14from frontend import lexer
15from frontend import reader
16from osh import tdop
17from osh import arith_parse
18from osh import cmd_parse
19from osh import word_parse
20from mycpp import mylib
21from mycpp.mylib import log
22from ysh import expr_parse
23from ysh import expr_to_ast
24from ysh.expr_parse import ctx_PNodeAllocator
25
26_ = log
27
28from typing import Any, List, Tuple, Dict, TYPE_CHECKING
29if TYPE_CHECKING:
30 from core.alloc import Arena
31 from core.util import _DebugFile
32 from core import optview
33 from frontend.lexer import Lexer
34 from frontend.reader import _Reader
35 from osh.tdop import TdopParser
36 from osh.word_parse import WordParser
37 from osh.cmd_parse import CommandParser
38 from pgen2.grammar import Grammar
39
40
41class _BaseTrail(object):
42 """Base class has members, but no-ops for methods."""
43
44 def __init__(self):
45 # type: () -> None
46 # word from a partially completed command.
47 # Filled in by _ScanSimpleCommand in osh/cmd_parse.py.
48 self.words = [] # type: List[CompoundWord]
49 self.redirects = [] # type: List[Redir]
50 # TODO: We should maintain the LST invariant and have a single list, but
51 # that I ran into the "cases classes are better than variants" problem.
52
53 # Non-ignored tokens, after PushHint translation. Used for variable name
54 # completion. Filled in by _Peek() in osh/word_parse.py.
55 #
56 # Example:
57 # $ echo $\
58 # f<TAB>
59 # This could complete $foo.
60 # Problem: readline doesn't even allow that, because it spans more than one
61 # line!
62 self.tokens = [] # type: List[Token]
63
64 self.alias_words = [
65 ] # type: List[CompoundWord] # words INSIDE an alias expansion
66 self._expanding_alias = False
67
68 def Clear(self):
69 # type: () -> None
70 pass
71
72 def SetLatestWords(self, words, redirects):
73 # type: (List[CompoundWord], List[Redir]) -> None
74 pass
75
76 def AppendToken(self, token):
77 # type: (Token) -> None
78 pass
79
80 def BeginAliasExpansion(self):
81 # type: () -> None
82 pass
83
84 def EndAliasExpansion(self):
85 # type: () -> None
86 pass
87
88 if mylib.PYTHON:
89
90 def PrintDebugString(self, debug_f):
91 # type: (_DebugFile) -> None
92
93 # note: could cast DebugFile to IO[str] instead of ignoring?
94 debug_f.writeln(' words:')
95 for w in self.words:
96 fmt.PrettyPrint(w, f=debug_f) # type: ignore
97 debug_f.writeln('')
98
99 debug_f.writeln(' redirects:')
100 for r in self.redirects:
101 fmt.PrettyPrint(r, f=debug_f) # type: ignore
102 debug_f.writeln('')
103
104 debug_f.writeln(' tokens:')
105 for p in self.tokens:
106 fmt.PrettyPrint(p, f=debug_f) # type: ignore
107 debug_f.writeln('')
108
109 debug_f.writeln(' alias_words:')
110 for w in self.alias_words:
111 fmt.PrettyPrint(w, f=debug_f) # type: ignore
112 debug_f.writeln('')
113
114 def __repr__(self):
115 # type: () -> str
116 return '<Trail %s %s %s %s>' % (self.words, self.redirects,
117 self.tokens, self.alias_words)
118
119
120class ctx_Alias(object):
121 """Used by CommandParser so we know to be ready for FIRST alias word.
122
123 For example, for
124
125 alias ll='ls -l'
126
127 Then we want to capture 'ls' as the first word.
128
129 We do NOT want SetLatestWords or AppendToken to be active, because we don't
130 need other tokens from 'ls -l'.
131
132 It would also probably cause bugs in history expansion, e.g. echo !1 should
133 be the first word the user typed, not the first word after alias expansion.
134 """
135
136 def __init__(self, trail):
137 # type: (_BaseTrail) -> None
138 trail._expanding_alias = True
139 self.trail = trail
140
141 def __enter__(self):
142 # type: () -> None
143 pass
144
145 def __exit__(self, type, value, traceback):
146 # type: (Any, Any, Any) -> None
147 self.trail._expanding_alias = False
148
149
150class Trail(_BaseTrail):
151 """Info left by the parser to help us complete shell syntax and commands.
152
153 It's also used for history expansion.
154 """
155
156 def __init__(self):
157 # type: () -> None
158 """Empty constructor for mycpp."""
159 _BaseTrail.__init__(self)
160
161 def Clear(self):
162 # type: () -> None
163 del self.words[:]
164 del self.redirects[:]
165 # The other ones don't need to be reset?
166 del self.tokens[:]
167 del self.alias_words[:]
168
169 def SetLatestWords(self, words, redirects):
170 # type: (List[CompoundWord], List[Redir]) -> None
171 if self._expanding_alias:
172 self.alias_words = words # Save these separately
173 return
174 self.words = words
175 self.redirects = redirects
176
177 def AppendToken(self, token):
178 # type: (Token) -> None
179 if self._expanding_alias: # We don't want tokens inside aliases
180 return
181 self.tokens.append(token)
182
183
184if TYPE_CHECKING:
185 AliasesInFlight = List[Tuple[str, int]]
186
187
188class ParseContext(object):
189 """Context shared between the mutually recursive Command and Word parsers.
190
191 In contrast, STATE is stored in the CommandParser and WordParser
192 instances.
193 """
194
195 def __init__(self,
196 arena,
197 parse_opts,
198 aliases,
199 ysh_grammar,
200 do_lossless=False):
201 # type: (Arena, optview.Parse, Dict[str, str], Grammar, bool) -> None
202 self.arena = arena
203 self.parse_opts = parse_opts
204 self.aliases = aliases
205 self.ysh_grammar = ysh_grammar
206 self.do_lossless = do_lossless
207
208 # NOTE: The transformer is really a pure function.
209 if ysh_grammar:
210 self.tr = expr_to_ast.Transformer(ysh_grammar)
211 else: # hack for unit tests, which pass None
212 self.tr = None
213
214 if mylib.PYTHON:
215 if self.tr:
216 self.p_printer = self.tr.p_printer
217 else:
218 self.p_printer = None
219
220 # Completion state lives here since it may span multiple parsers.
221 self.trail = _BaseTrail() # no-op by default
222
223 def Init_Trail(self, trail):
224 # type: (_BaseTrail) -> None
225 self.trail = trail
226
227 def MakeLexer(self, line_reader):
228 # type: (_Reader) -> Lexer
229 """Helper function.
230
231 NOTE: I tried to combine the LineLexer and Lexer, and it didn't perform
232 better.
233 """
234 # Take Arena from LineReader
235 line_lexer = lexer.LineLexer(line_reader.arena)
236 return lexer.Lexer(line_lexer, line_reader)
237
238 def MakeOshParser(self, line_reader, emit_comp_dummy=False):
239 # type: (_Reader, bool) -> CommandParser
240 lx = self.MakeLexer(line_reader)
241 if emit_comp_dummy:
242 lx.EmitCompDummy() # A special token before EOF!
243
244 w_parser = word_parse.WordParser(self, lx, line_reader)
245 c_parser = cmd_parse.CommandParser(self, self.parse_opts, w_parser, lx,
246 line_reader)
247 return c_parser
248
249 def MakeConfigParser(self, line_reader):
250 # type: (_Reader) -> CommandParser
251 lx = self.MakeLexer(line_reader)
252 parse_opts = state.MakeYshParseOpts()
253 w_parser = word_parse.WordParser(self, lx, line_reader)
254 c_parser = cmd_parse.CommandParser(self, parse_opts, w_parser, lx,
255 line_reader)
256 return c_parser
257
258 def MakeWordParserForHereDoc(self, line_reader):
259 # type: (_Reader) -> WordParser
260 lx = self.MakeLexer(line_reader)
261 return word_parse.WordParser(self, lx, line_reader)
262
263 def MakeWordParser(self, lx, line_reader):
264 # type: (Lexer, _Reader) -> WordParser
265 return word_parse.WordParser(self, lx, line_reader)
266
267 def MakeArithParser(self, code_str):
268 # type: (str) -> TdopParser
269 """Used for a[x+1]=foo in the CommandParser."""
270 line_reader = reader.StringLineReader(code_str, self.arena)
271 lx = self.MakeLexer(line_reader)
272 w_parser = word_parse.WordParser(self, lx, line_reader)
273 w_parser.Init(lex_mode_e.Arith) # Special initialization
274 a_parser = tdop.TdopParser(arith_parse.Spec(), w_parser,
275 self.parse_opts)
276 return a_parser
277
278 def MakeParserForCommandSub(self, line_reader, lexer, eof_id):
279 # type: (_Reader, Lexer, Id_t) -> CommandParser
280 """To parse command sub, we want a fresh word parser state."""
281 w_parser = word_parse.WordParser(self, lexer, line_reader)
282 c_parser = cmd_parse.CommandParser(self,
283 self.parse_opts,
284 w_parser,
285 lexer,
286 line_reader,
287 eof_id=eof_id)
288 return c_parser
289
290 def MakeWordParserForPlugin(self, code_str):
291 # type: (str) -> WordParser
292 """For $PS1, $PS4, etc."""
293 line_reader = reader.StringLineReader(code_str, self.arena)
294 lx = self.MakeLexer(line_reader)
295 return word_parse.WordParser(self, lx, line_reader)
296
297 def _YshParser(self):
298 # type: () -> expr_parse.ExprParser
299 return expr_parse.ExprParser(self, self.ysh_grammar)
300
301 def ParseVarDecl(self, kw_token, lexer):
302 # type: (Token, Lexer) -> Tuple[VarDecl, Token]
303 """ var mylist = [1, 2, 3] """
304 e_parser = self._YshParser()
305 with ctx_PNodeAllocator(e_parser):
306 pnode, last_token = e_parser.Parse(lexer, grammar_nt.ysh_var_decl)
307
308 if 0:
309 self.p_printer.Print(pnode)
310
311 ast_node = self.tr.MakeVarDecl(pnode)
312 ast_node.keyword = kw_token # VarDecl didn't fill this in
313
314 return ast_node, last_token
315
316 def ParseMutation(self, kw_token, lexer):
317 # type: (Token, Lexer) -> Tuple[Mutation, Token]
318 """ setvar d['a'] += 1 """
319 e_parser = self._YshParser()
320 with ctx_PNodeAllocator(e_parser):
321 pnode, last_token = e_parser.Parse(lexer, grammar_nt.ysh_mutation)
322 if 0:
323 self.p_printer.Print(pnode)
324 ast_node = self.tr.MakeMutation(pnode)
325 ast_node.keyword = kw_token # VarDecl didn't fill this in
326
327 return ast_node, last_token
328
329 def ParseProcCallArgs(self, lx, out, start_symbol):
330 # type: (Lexer, ArgList, int) -> None
331 """ json write (x, foo=1) and assert [42 === x] """
332
333 e_parser = self._YshParser()
334 with ctx_PNodeAllocator(e_parser):
335 pnode, last_token = e_parser.Parse(lx, start_symbol)
336
337 if 0:
338 self.p_printer.Print(pnode)
339
340 self.tr.ProcCallArgs(pnode, out)
341 out.right = last_token
342
343 def ParseYshExpr(self, lx, start_symbol):
344 # type: (Lexer, int) -> Tuple[expr_t, Token]
345 """if (x > 0) { ...
346
347 }, while, etc.
348 """
349
350 e_parser = self._YshParser()
351 with ctx_PNodeAllocator(e_parser):
352 pnode, last_token = e_parser.Parse(lx, start_symbol)
353 if 0:
354 self.p_printer.Print(pnode)
355
356 ast_node = self.tr.Expr(pnode)
357
358 return ast_node, last_token
359
360 def ParseYshCasePattern(self, lexer):
361 # type: (Lexer) -> Tuple[pat_t, Token, Token]
362 """(6) | (7), / dot* '.py' /, (else), etc.
363
364 Alongside the pattern, this returns the first token in the pattern and
365 the LBrace token at the start of the case arm body.
366 """
367 e_parser = self._YshParser()
368 with ctx_PNodeAllocator(e_parser):
369 pnode, last_token = e_parser.Parse(lexer, grammar_nt.ysh_case_pat)
370
371 left_tok = pnode.GetChild(0).tok
372 pattern = self.tr.YshCasePattern(pnode)
373
374 return pattern, left_tok, last_token
375
376 def ParseProc(self, lexer, out):
377 # type: (Lexer, Proc) -> Token
378 """proc f(x, y, @args) {"""
379 e_parser = self._YshParser()
380 with ctx_PNodeAllocator(e_parser):
381 pnode, last_token = e_parser.Parse(lexer, grammar_nt.ysh_proc)
382
383 if 0:
384 self.p_printer.Print(pnode)
385
386 out.sig = self.tr.Proc(pnode)
387
388 return last_token
389
390 def ParseFunc(self, lexer, out):
391 # type: (Lexer, Func) -> Token
392 """ func f(x Int, y Int = 0, ...args; z Int = 3, ...named) => Int """
393 e_parser = self._YshParser()
394 with ctx_PNodeAllocator(e_parser):
395 pnode, last_token = e_parser.Parse(lexer, grammar_nt.ysh_func)
396
397 if 0:
398 self.p_printer.Print(pnode)
399
400 self.tr.YshFunc(pnode, out)
401 return last_token
402
403
404# Another parser instantiation:
405# - For Array Literal in word_parse.py WordParser:
406# w_parser = WordParser(self.lexer, self.line_reader)