OILS / frontend / parse_lib.py View on Github | oils.pub

410 lines, 228 significant
1"""
2parse_lib.py - Consolidate various parser instantiations here.
3"""
4
5from _devbuild.gen.id_kind_asdl import Id_t
6from _devbuild.gen.syntax_asdl import (Token, CompoundWord, expr_t, Redir,
7 ArgList, Proc, Func, pat_t, VarDecl,
8 Mutation, source, loc, loc_t)
9from _devbuild.gen.types_asdl import lex_mode_e
10from _devbuild.gen import grammar_nt
11
12from asdl import format as fmt
13from core import alloc
14from core import state
15from frontend import lexer
16from frontend import reader
17from osh import tdop
18from osh import arith_parse
19from osh import cmd_parse
20from osh import word_parse
21from mycpp import mylib
22from mycpp.mylib import log
23from ysh import expr_parse
24from ysh import expr_to_ast
25from ysh.expr_parse import ctx_PNodeAllocator
26
27_ = log
28
29from typing import Any, List, Tuple, Dict, TYPE_CHECKING
30if TYPE_CHECKING:
31 from core.util import _DebugFile
32 from core import optview
33 from frontend.lexer import Lexer
34 from frontend.reader import _Reader
35 from osh.tdop import TdopParser
36 from osh.word_parse import WordParser
37 from osh.cmd_parse import CommandParser
38 from pgen2.grammar import Grammar
39
40
41class _BaseTrail(object):
42 """Base class has members, but no-ops for methods."""
43
44 def __init__(self):
45 # type: () -> None
46 # word from a partially completed command.
47 # Filled in by _ScanSimpleCommand in osh/cmd_parse.py.
48 self.words = [] # type: List[CompoundWord]
49 self.redirects = [] # type: List[Redir]
50 # TODO: We should maintain the LST invariant and have a single list, but
51 # that I ran into the "cases classes are better than variants" problem.
52
53 # Non-ignored tokens, after PushHint translation. Used for variable name
54 # completion. Filled in by _Peek() in osh/word_parse.py.
55 #
56 # Example:
57 # $ echo $\
58 # f<TAB>
59 # This could complete $foo.
60 # Problem: readline doesn't even allow that, because it spans more than one
61 # line!
62 self.tokens = [] # type: List[Token]
63
64 self.alias_words = [
65 ] # type: List[CompoundWord] # words INSIDE an alias expansion
66 self._expanding_alias = False
67
68 def Clear(self):
69 # type: () -> None
70 pass
71
72 def SetLatestWords(self, words, redirects):
73 # type: (List[CompoundWord], List[Redir]) -> None
74 pass
75
76 def AppendToken(self, token):
77 # type: (Token) -> None
78 pass
79
80 def BeginAliasExpansion(self):
81 # type: () -> None
82 pass
83
84 def EndAliasExpansion(self):
85 # type: () -> None
86 pass
87
88 if mylib.PYTHON:
89
90 def PrintDebugString(self, debug_f):
91 # type: (_DebugFile) -> None
92
93 # note: could cast DebugFile to IO[str] instead of ignoring?
94 debug_f.writeln(' words:')
95 for w in self.words:
96 fmt.PrettyPrint(w, f=debug_f) # type: ignore
97 debug_f.writeln('')
98
99 debug_f.writeln(' redirects:')
100 for r in self.redirects:
101 fmt.PrettyPrint(r, f=debug_f) # type: ignore
102 debug_f.writeln('')
103
104 debug_f.writeln(' tokens:')
105 for p in self.tokens:
106 fmt.PrettyPrint(p, f=debug_f) # type: ignore
107 debug_f.writeln('')
108
109 debug_f.writeln(' alias_words:')
110 for w in self.alias_words:
111 fmt.PrettyPrint(w, f=debug_f) # type: ignore
112 debug_f.writeln('')
113
114 def __repr__(self):
115 # type: () -> str
116 return '<Trail %s %s %s %s>' % (self.words, self.redirects,
117 self.tokens, self.alias_words)
118
119
120class ctx_Alias(object):
121 """Used by CommandParser so we know to be ready for FIRST alias word.
122
123 For example, for
124
125 alias ll='ls -l'
126
127 Then we want to capture 'ls' as the first word.
128
129 We do NOT want SetLatestWords or AppendToken to be active, because we don't
130 need other tokens from 'ls -l'.
131
132 It would also probably cause bugs in history expansion, e.g. echo !1 should
133 be the first word the user typed, not the first word after alias expansion.
134 """
135
136 def __init__(self, trail):
137 # type: (_BaseTrail) -> None
138 trail._expanding_alias = True
139 self.trail = trail
140
141 def __enter__(self):
142 # type: () -> None
143 pass
144
145 def __exit__(self, type, value, traceback):
146 # type: (Any, Any, Any) -> None
147 self.trail._expanding_alias = False
148
149
150class Trail(_BaseTrail):
151 """Info left by the parser to help us complete shell syntax and commands.
152
153 It's also used for history expansion.
154 """
155
156 def __init__(self):
157 # type: () -> None
158 """Empty constructor for mycpp."""
159 _BaseTrail.__init__(self)
160
161 def Clear(self):
162 # type: () -> None
163 del self.words[:]
164 del self.redirects[:]
165 # The other ones don't need to be reset?
166 del self.tokens[:]
167 del self.alias_words[:]
168
169 def SetLatestWords(self, words, redirects):
170 # type: (List[CompoundWord], List[Redir]) -> None
171 if self._expanding_alias:
172 self.alias_words = words # Save these separately
173 return
174 self.words = words
175 self.redirects = redirects
176
177 def AppendToken(self, token):
178 # type: (Token) -> None
179 if self._expanding_alias: # We don't want tokens inside aliases
180 return
181 self.tokens.append(token)
182
183
184if TYPE_CHECKING:
185 AliasesInFlight = List[Tuple[str, int]]
186
187
188class ParseContext(object):
189 """Context shared between the mutually recursive Command and Word parsers.
190
191 In contrast, STATE is stored in the CommandParser and WordParser
192 instances.
193 """
194
195 def __init__(self,
196 arena,
197 parse_opts,
198 aliases,
199 ysh_grammar,
200 do_lossless=False):
201 # type: (alloc.Arena, optview.Parse, Dict[str, str], Grammar, bool) -> None
202 self.arena = arena
203 self.parse_opts = parse_opts
204 self.aliases = aliases
205 self.ysh_grammar = ysh_grammar
206 self.do_lossless = do_lossless
207
208 # NOTE: The transformer is really a pure function.
209 if ysh_grammar:
210 self.tr = expr_to_ast.Transformer(ysh_grammar)
211 else: # hack for unit tests, which pass None
212 self.tr = None
213
214 if mylib.PYTHON:
215 if self.tr:
216 self.p_printer = self.tr.p_printer
217 else:
218 self.p_printer = None
219
220 # Completion state lives here since it may span multiple parsers.
221 self.trail = _BaseTrail() # no-op by default
222
223 def Init_Trail(self, trail):
224 # type: (_BaseTrail) -> None
225 self.trail = trail
226
227 def MakeLexer(self, line_reader):
228 # type: (_Reader) -> Lexer
229 """Helper function.
230
231 NOTE: I tried to combine the LineLexer and Lexer, and it didn't perform
232 better.
233 """
234 # Take Arena from LineReader
235 line_lexer = lexer.LineLexer(line_reader.arena)
236 return lexer.Lexer(line_lexer, line_reader)
237
238 def MakeOshParser(self, line_reader, interactive, emit_comp_dummy=False):
239 # type: (_Reader, bool, bool) -> CommandParser
240 lx = self.MakeLexer(line_reader)
241 if emit_comp_dummy:
242 lx.EmitCompDummy() # A special token before EOF!
243
244 w_parser = word_parse.WordParser(self, lx, line_reader)
245 c_parser = cmd_parse.CommandParser(self, self.parse_opts, w_parser, lx,
246 line_reader, interactive)
247 return c_parser
248
249 def MakeConfigParser(self, line_reader):
250 # type: (_Reader) -> CommandParser
251 lx = self.MakeLexer(line_reader)
252 parse_opts = state.MakeYshParseOpts()
253 w_parser = word_parse.WordParser(self, lx, line_reader)
254 c_parser = cmd_parse.CommandParser(self, parse_opts, w_parser, lx,
255 line_reader, False)
256 return c_parser
257
258 def MakeWordParserForHereDoc(self, line_reader):
259 # type: (_Reader) -> WordParser
260 lx = self.MakeLexer(line_reader)
261 return word_parse.WordParser(self, lx, line_reader)
262
263 def MakeWordParser(self, lx, line_reader):
264 # type: (Lexer, _Reader) -> WordParser
265 return word_parse.WordParser(self, lx, line_reader)
266
267 def MakeArithParser(self, code_str, blame_loc=loc.Missing):
268 # type: (str, loc_t) -> TdopParser
269 """Used for a[x+1]=foo in the CommandParser, unset, printf -v"""
270 # Save lines into temp arena, for dynamic parsing
271 arena = alloc.Arena()
272 arena.PushSource(source.Dynamic('sh arith expr', blame_loc))
273 line_reader = reader.StringLineReader(code_str, arena)
274 lx = self.MakeLexer(line_reader)
275 w_parser = word_parse.WordParser(self, lx, line_reader)
276 w_parser.Init(lex_mode_e.Arith) # Special initialization
277 a_parser = tdop.TdopParser(arith_parse.Spec(), w_parser,
278 self.parse_opts)
279 return a_parser
280
281 def MakeParserForCommandSub(self, line_reader, lexer, eof_id):
282 # type: (_Reader, Lexer, Id_t) -> CommandParser
283 """To parse command sub, we want a fresh word parser state."""
284 w_parser = word_parse.WordParser(self, lexer, line_reader)
285 c_parser = cmd_parse.CommandParser(self,
286 self.parse_opts,
287 w_parser,
288 lexer,
289 line_reader,
290 eof_id=eof_id,
291 interactive=False)
292 return c_parser
293
294 def MakeWordParserForPlugin(self, code_str):
295 # type: (str) -> WordParser
296 """For $PS1, $PS4, etc."""
297 line_reader = reader.StringLineReader(code_str, self.arena)
298 lx = self.MakeLexer(line_reader)
299 return word_parse.WordParser(self, lx, line_reader)
300
301 def _YshParser(self):
302 # type: () -> expr_parse.ExprParser
303 return expr_parse.ExprParser(self, self.ysh_grammar)
304
305 def ParseVarDecl(self, kw_token, lexer):
306 # type: (Token, Lexer) -> Tuple[VarDecl, Token]
307 """ var mylist = [1, 2, 3] """
308 e_parser = self._YshParser()
309 with ctx_PNodeAllocator(e_parser):
310 pnode, last_token = e_parser.Parse(lexer, grammar_nt.ysh_var_decl)
311
312 if 0:
313 self.p_printer.Print(pnode)
314
315 ast_node = self.tr.MakeVarDecl(pnode)
316 ast_node.keyword = kw_token # VarDecl didn't fill this in
317
318 return ast_node, last_token
319
320 def ParseMutation(self, kw_token, lexer):
321 # type: (Token, Lexer) -> Tuple[Mutation, Token]
322 """ setvar d['a'] += 1 """
323 e_parser = self._YshParser()
324 with ctx_PNodeAllocator(e_parser):
325 pnode, last_token = e_parser.Parse(lexer, grammar_nt.ysh_mutation)
326 if 0:
327 self.p_printer.Print(pnode)
328 ast_node = self.tr.MakeMutation(pnode)
329 ast_node.keyword = kw_token # VarDecl didn't fill this in
330
331 return ast_node, last_token
332
333 def ParseProcCallArgs(self, lx, out, start_symbol):
334 # type: (Lexer, ArgList, int) -> None
335 """ json write (x, foo=1) and assert [42 === x] """
336
337 e_parser = self._YshParser()
338 with ctx_PNodeAllocator(e_parser):
339 pnode, last_token = e_parser.Parse(lx, start_symbol)
340
341 if 0:
342 self.p_printer.Print(pnode)
343
344 self.tr.ProcCallArgs(pnode, out)
345 out.right = last_token
346
347 def ParseYshExpr(self, lx, start_symbol):
348 # type: (Lexer, int) -> Tuple[expr_t, Token]
349 """if (x > 0) { ...
350
351 }, while, etc.
352 """
353
354 e_parser = self._YshParser()
355 with ctx_PNodeAllocator(e_parser):
356 pnode, last_token = e_parser.Parse(lx, start_symbol)
357 if 0:
358 self.p_printer.Print(pnode)
359
360 ast_node = self.tr.Expr(pnode)
361
362 return ast_node, last_token
363
364 def ParseYshCasePattern(self, lexer):
365 # type: (Lexer) -> Tuple[pat_t, Token, Token]
366 """(6) | (7), / dot* '.py' /, (else), etc.
367
368 Alongside the pattern, this returns the first token in the pattern and
369 the LBrace token at the start of the case arm body.
370 """
371 e_parser = self._YshParser()
372 with ctx_PNodeAllocator(e_parser):
373 pnode, last_token = e_parser.Parse(lexer, grammar_nt.ysh_case_pat)
374
375 left_tok = pnode.GetChild(0).tok
376 pattern = self.tr.YshCasePattern(pnode)
377
378 return pattern, left_tok, last_token
379
380 def ParseProc(self, lexer, out):
381 # type: (Lexer, Proc) -> Token
382 """proc f(x, y, @args) {"""
383 e_parser = self._YshParser()
384 with ctx_PNodeAllocator(e_parser):
385 pnode, last_token = e_parser.Parse(lexer, grammar_nt.ysh_proc)
386
387 if 0:
388 self.p_printer.Print(pnode)
389
390 out.sig = self.tr.Proc(pnode)
391
392 return last_token
393
394 def ParseFunc(self, lexer, out):
395 # type: (Lexer, Func) -> Token
396 """ func f(x Int, y Int = 0, ...args; z Int = 3, ...named) => Int """
397 e_parser = self._YshParser()
398 with ctx_PNodeAllocator(e_parser):
399 pnode, last_token = e_parser.Parse(lexer, grammar_nt.ysh_func)
400
401 if 0:
402 self.p_printer.Print(pnode)
403
404 self.tr.YshFunc(pnode, out)
405 return last_token
406
407
408# Another parser instantiation:
409# - For Array Literal in word_parse.py WordParser:
410# w_parser = WordParser(self.lexer, self.line_reader)