1 | """
|
2 | parse_lib.py - Consolidate various parser instantiations here.
|
3 | """
|
4 |
|
5 | from _devbuild.gen.id_kind_asdl import Id_t
|
6 | from _devbuild.gen.syntax_asdl import (Token, CompoundWord, expr_t, Redir,
|
7 | ArgList, Proc, Func, command, pat_t)
|
8 | from _devbuild.gen.types_asdl import lex_mode_e
|
9 | from _devbuild.gen import grammar_nt
|
10 |
|
11 | from asdl import format as fmt
|
12 | from core import state
|
13 | from frontend import lexer
|
14 | from frontend import reader
|
15 | from osh import tdop
|
16 | from osh import arith_parse
|
17 | from osh import cmd_parse
|
18 | from osh import word_parse
|
19 | from mycpp import mylib
|
20 | from mycpp.mylib import log
|
21 | from ysh import expr_parse
|
22 | from ysh import expr_to_ast
|
23 | from ysh.expr_parse import ctx_PNodeAllocator
|
24 |
|
25 | _ = log
|
26 |
|
27 | from typing import Any, List, Tuple, Dict, TYPE_CHECKING
|
28 | if TYPE_CHECKING:
|
29 | from core.alloc import Arena
|
30 | from core.util import _DebugFile
|
31 | from core import optview
|
32 | from frontend.lexer import Lexer
|
33 | from frontend.reader import _Reader
|
34 | from osh.tdop import TdopParser
|
35 | from osh.word_parse import WordParser
|
36 | from osh.cmd_parse import CommandParser
|
37 | from pgen2.grammar import Grammar
|
38 |
|
39 |
|
40 | class _BaseTrail(object):
|
41 | """Base class has members, but no-ops for methods."""
|
42 |
|
43 | def __init__(self):
|
44 | # type: () -> None
|
45 | # word from a partially completed command.
|
46 | # Filled in by _ScanSimpleCommand in osh/cmd_parse.py.
|
47 | self.words = [] # type: List[CompoundWord]
|
48 | self.redirects = [] # type: List[Redir]
|
49 | # TODO: We should maintain the LST invariant and have a single list, but
|
50 | # that I ran into the "cases classes are better than variants" problem.
|
51 |
|
52 | # Non-ignored tokens, after PushHint translation. Used for variable name
|
53 | # completion. Filled in by _Peek() in osh/word_parse.py.
|
54 | #
|
55 | # Example:
|
56 | # $ echo $\
|
57 | # f<TAB>
|
58 | # This could complete $foo.
|
59 | # Problem: readline doesn't even allow that, because it spans more than one
|
60 | # line!
|
61 | self.tokens = [] # type: List[Token]
|
62 |
|
63 | self.alias_words = [
|
64 | ] # type: List[CompoundWord] # words INSIDE an alias expansion
|
65 | self._expanding_alias = False
|
66 |
|
67 | def Clear(self):
|
68 | # type: () -> None
|
69 | pass
|
70 |
|
71 | def SetLatestWords(self, words, redirects):
|
72 | # type: (List[CompoundWord], List[Redir]) -> None
|
73 | pass
|
74 |
|
75 | def AppendToken(self, token):
|
76 | # type: (Token) -> None
|
77 | pass
|
78 |
|
79 | def BeginAliasExpansion(self):
|
80 | # type: () -> None
|
81 | pass
|
82 |
|
83 | def EndAliasExpansion(self):
|
84 | # type: () -> None
|
85 | pass
|
86 |
|
87 | if mylib.PYTHON:
|
88 |
|
89 | def PrintDebugString(self, debug_f):
|
90 | # type: (_DebugFile) -> None
|
91 |
|
92 | # note: could cast DebugFile to IO[str] instead of ignoring?
|
93 | debug_f.writeln(' words:')
|
94 | for w in self.words:
|
95 | fmt.PrettyPrint(w, f=debug_f) # type: ignore
|
96 | debug_f.writeln('')
|
97 |
|
98 | debug_f.writeln(' redirects:')
|
99 | for r in self.redirects:
|
100 | fmt.PrettyPrint(r, f=debug_f) # type: ignore
|
101 | debug_f.writeln('')
|
102 |
|
103 | debug_f.writeln(' tokens:')
|
104 | for p in self.tokens:
|
105 | fmt.PrettyPrint(p, f=debug_f) # type: ignore
|
106 | debug_f.writeln('')
|
107 |
|
108 | debug_f.writeln(' alias_words:')
|
109 | for w in self.alias_words:
|
110 | fmt.PrettyPrint(w, f=debug_f) # type: ignore
|
111 | debug_f.writeln('')
|
112 |
|
113 | def __repr__(self):
|
114 | # type: () -> str
|
115 | return '<Trail %s %s %s %s>' % (self.words, self.redirects,
|
116 | self.tokens, self.alias_words)
|
117 |
|
118 |
|
119 | class ctx_Alias(object):
|
120 | """Used by CommandParser so we know to be ready for FIRST alias word.
|
121 |
|
122 | For example, for
|
123 |
|
124 | alias ll='ls -l'
|
125 |
|
126 | Then we want to capture 'ls' as the first word.
|
127 |
|
128 | We do NOT want SetLatestWords or AppendToken to be active, because we don't
|
129 | need other tokens from 'ls -l'.
|
130 |
|
131 | It would also probably cause bugs in history expansion, e.g. echo !1 should
|
132 | be the first word the user typed, not the first word after alias expansion.
|
133 | """
|
134 |
|
135 | def __init__(self, trail):
|
136 | # type: (_BaseTrail) -> None
|
137 | trail._expanding_alias = True
|
138 | self.trail = trail
|
139 |
|
140 | def __enter__(self):
|
141 | # type: () -> None
|
142 | pass
|
143 |
|
144 | def __exit__(self, type, value, traceback):
|
145 | # type: (Any, Any, Any) -> None
|
146 | self.trail._expanding_alias = False
|
147 |
|
148 |
|
149 | class Trail(_BaseTrail):
|
150 | """Info left by the parser to help us complete shell syntax and commands.
|
151 |
|
152 | It's also used for history expansion.
|
153 | """
|
154 |
|
155 | def __init__(self):
|
156 | # type: () -> None
|
157 | """Empty constructor for mycpp."""
|
158 | _BaseTrail.__init__(self)
|
159 |
|
160 | def Clear(self):
|
161 | # type: () -> None
|
162 | del self.words[:]
|
163 | del self.redirects[:]
|
164 | # The other ones don't need to be reset?
|
165 | del self.tokens[:]
|
166 | del self.alias_words[:]
|
167 |
|
168 | def SetLatestWords(self, words, redirects):
|
169 | # type: (List[CompoundWord], List[Redir]) -> None
|
170 | if self._expanding_alias:
|
171 | self.alias_words = words # Save these separately
|
172 | return
|
173 | self.words = words
|
174 | self.redirects = redirects
|
175 |
|
176 | def AppendToken(self, token):
|
177 | # type: (Token) -> None
|
178 | if self._expanding_alias: # We don't want tokens inside aliases
|
179 | return
|
180 | self.tokens.append(token)
|
181 |
|
182 |
|
183 | if TYPE_CHECKING:
|
184 | AliasesInFlight = List[Tuple[str, int]]
|
185 |
|
186 |
|
187 | class ParseContext(object):
|
188 | """Context shared between the mutually recursive Command and Word parsers.
|
189 |
|
190 | In contrast, STATE is stored in the CommandParser and WordParser
|
191 | instances.
|
192 | """
|
193 |
|
194 | def __init__(self,
|
195 | arena,
|
196 | parse_opts,
|
197 | aliases,
|
198 | ysh_grammar,
|
199 | do_lossless=False):
|
200 | # type: (Arena, optview.Parse, Dict[str, str], Grammar, bool) -> None
|
201 | self.arena = arena
|
202 | self.parse_opts = parse_opts
|
203 | self.aliases = aliases
|
204 | self.ysh_grammar = ysh_grammar
|
205 | self.do_lossless = do_lossless
|
206 |
|
207 | # NOTE: The transformer is really a pure function.
|
208 | if ysh_grammar:
|
209 | self.tr = expr_to_ast.Transformer(ysh_grammar)
|
210 | else: # hack for unit tests, which pass None
|
211 | self.tr = None
|
212 |
|
213 | if mylib.PYTHON:
|
214 | if self.tr:
|
215 | self.p_printer = self.tr.p_printer
|
216 | else:
|
217 | self.p_printer = None
|
218 |
|
219 | # Completion state lives here since it may span multiple parsers.
|
220 | self.trail = _BaseTrail() # no-op by default
|
221 |
|
222 | def Init_Trail(self, trail):
|
223 | # type: (_BaseTrail) -> None
|
224 | self.trail = trail
|
225 |
|
226 | def MakeLexer(self, line_reader):
|
227 | # type: (_Reader) -> Lexer
|
228 | """Helper function.
|
229 |
|
230 | NOTE: I tried to combine the LineLexer and Lexer, and it didn't perform
|
231 | better.
|
232 | """
|
233 | # Take Arena from LineReader
|
234 | line_lexer = lexer.LineLexer(line_reader.arena)
|
235 | return lexer.Lexer(line_lexer, line_reader)
|
236 |
|
237 | def MakeOshParser(self, line_reader, emit_comp_dummy=False):
|
238 | # type: (_Reader, bool) -> CommandParser
|
239 | lx = self.MakeLexer(line_reader)
|
240 | if emit_comp_dummy:
|
241 | lx.EmitCompDummy() # A special token before EOF!
|
242 |
|
243 | w_parser = word_parse.WordParser(self, lx, line_reader)
|
244 | c_parser = cmd_parse.CommandParser(self, self.parse_opts, w_parser, lx,
|
245 | line_reader)
|
246 | return c_parser
|
247 |
|
248 | def MakeConfigParser(self, line_reader):
|
249 | # type: (_Reader) -> CommandParser
|
250 | lx = self.MakeLexer(line_reader)
|
251 | parse_opts = state.MakeYshParseOpts()
|
252 | w_parser = word_parse.WordParser(self, lx, line_reader)
|
253 | c_parser = cmd_parse.CommandParser(self, parse_opts, w_parser, lx,
|
254 | line_reader)
|
255 | return c_parser
|
256 |
|
257 | def MakeWordParserForHereDoc(self, line_reader):
|
258 | # type: (_Reader) -> WordParser
|
259 | lx = self.MakeLexer(line_reader)
|
260 | return word_parse.WordParser(self, lx, line_reader)
|
261 |
|
262 | def MakeWordParser(self, lx, line_reader):
|
263 | # type: (Lexer, _Reader) -> WordParser
|
264 | return word_parse.WordParser(self, lx, line_reader)
|
265 |
|
266 | def MakeArithParser(self, code_str):
|
267 | # type: (str) -> TdopParser
|
268 | """Used for a[x+1]=foo in the CommandParser."""
|
269 | line_reader = reader.StringLineReader(code_str, self.arena)
|
270 | lx = self.MakeLexer(line_reader)
|
271 | w_parser = word_parse.WordParser(self, lx, line_reader)
|
272 | w_parser.Init(lex_mode_e.Arith) # Special initialization
|
273 | a_parser = tdop.TdopParser(arith_parse.Spec(), w_parser,
|
274 | self.parse_opts)
|
275 | return a_parser
|
276 |
|
277 | def MakeParserForCommandSub(self, line_reader, lexer, eof_id):
|
278 | # type: (_Reader, Lexer, Id_t) -> CommandParser
|
279 | """To parse command sub, we want a fresh word parser state."""
|
280 | w_parser = word_parse.WordParser(self, lexer, line_reader)
|
281 | c_parser = cmd_parse.CommandParser(self,
|
282 | self.parse_opts,
|
283 | w_parser,
|
284 | lexer,
|
285 | line_reader,
|
286 | eof_id=eof_id)
|
287 | return c_parser
|
288 |
|
289 | def MakeWordParserForPlugin(self, code_str):
|
290 | # type: (str) -> WordParser
|
291 | """For $PS1, $PS4, etc."""
|
292 | line_reader = reader.StringLineReader(code_str, self.arena)
|
293 | lx = self.MakeLexer(line_reader)
|
294 | return word_parse.WordParser(self, lx, line_reader)
|
295 |
|
296 | def _YshParser(self):
|
297 | # type: () -> expr_parse.ExprParser
|
298 | return expr_parse.ExprParser(self, self.ysh_grammar)
|
299 |
|
300 | def ParseVarDecl(self, kw_token, lexer):
|
301 | # type: (Token, Lexer) -> Tuple[command.VarDecl, Token]
|
302 | """ var mylist = [1, 2, 3] """
|
303 | e_parser = self._YshParser()
|
304 | with ctx_PNodeAllocator(e_parser):
|
305 | pnode, last_token = e_parser.Parse(lexer, grammar_nt.ysh_var_decl)
|
306 |
|
307 | if 0:
|
308 | self.p_printer.Print(pnode)
|
309 |
|
310 | ast_node = self.tr.MakeVarDecl(pnode)
|
311 | ast_node.keyword = kw_token # VarDecl didn't fill this in
|
312 |
|
313 | return ast_node, last_token
|
314 |
|
315 | def ParseMutation(self, kw_token, lexer):
|
316 | # type: (Token, Lexer) -> Tuple[command.Mutation, Token]
|
317 | """ setvar d['a'] += 1 """
|
318 | e_parser = self._YshParser()
|
319 | with ctx_PNodeAllocator(e_parser):
|
320 | pnode, last_token = e_parser.Parse(lexer, grammar_nt.ysh_mutation)
|
321 | if 0:
|
322 | self.p_printer.Print(pnode)
|
323 | ast_node = self.tr.MakeMutation(pnode)
|
324 | ast_node.keyword = kw_token # VarDecl didn't fill this in
|
325 |
|
326 | return ast_node, last_token
|
327 |
|
328 | def ParseProcCallArgs(self, lx, out, start_symbol):
|
329 | # type: (Lexer, ArgList, int) -> None
|
330 | """ json write (x, foo=1) and assert [42 === x] """
|
331 |
|
332 | e_parser = self._YshParser()
|
333 | with ctx_PNodeAllocator(e_parser):
|
334 | pnode, last_token = e_parser.Parse(lx, start_symbol)
|
335 |
|
336 | if 0:
|
337 | self.p_printer.Print(pnode)
|
338 |
|
339 | self.tr.ProcCallArgs(pnode, out)
|
340 | out.right = last_token
|
341 |
|
342 | def ParseYshExpr(self, lx, start_symbol):
|
343 | # type: (Lexer, int) -> Tuple[expr_t, Token]
|
344 | """if (x > 0) { ...
|
345 |
|
346 | }, while, etc.
|
347 | """
|
348 |
|
349 | e_parser = self._YshParser()
|
350 | with ctx_PNodeAllocator(e_parser):
|
351 | pnode, last_token = e_parser.Parse(lx, start_symbol)
|
352 | if 0:
|
353 | self.p_printer.Print(pnode)
|
354 |
|
355 | ast_node = self.tr.Expr(pnode)
|
356 |
|
357 | return ast_node, last_token
|
358 |
|
359 | def ParseYshCasePattern(self, lexer):
|
360 | # type: (Lexer) -> Tuple[pat_t, Token, Token]
|
361 | """(6) | (7), / dot* '.py' /, (else), etc.
|
362 |
|
363 | Alongside the pattern, this returns the first token in the pattern and
|
364 | the LBrace token at the start of the case arm body.
|
365 | """
|
366 | e_parser = self._YshParser()
|
367 | with ctx_PNodeAllocator(e_parser):
|
368 | pnode, last_token = e_parser.Parse(lexer, grammar_nt.ysh_case_pat)
|
369 |
|
370 | left_tok = pnode.GetChild(0).tok
|
371 | pattern = self.tr.YshCasePattern(pnode)
|
372 |
|
373 | return pattern, left_tok, last_token
|
374 |
|
375 | def ParseProc(self, lexer, out):
|
376 | # type: (Lexer, Proc) -> Token
|
377 | """proc f(x, y, @args) {"""
|
378 | e_parser = self._YshParser()
|
379 | with ctx_PNodeAllocator(e_parser):
|
380 | pnode, last_token = e_parser.Parse(lexer, grammar_nt.ysh_proc)
|
381 |
|
382 | if 0:
|
383 | self.p_printer.Print(pnode)
|
384 |
|
385 | out.sig = self.tr.Proc(pnode)
|
386 |
|
387 | return last_token
|
388 |
|
389 | def ParseFunc(self, lexer, out):
|
390 | # type: (Lexer, Func) -> Token
|
391 | """ func f(x Int, y Int = 0, ...args; z Int = 3, ...named) => Int """
|
392 | e_parser = self._YshParser()
|
393 | with ctx_PNodeAllocator(e_parser):
|
394 | pnode, last_token = e_parser.Parse(lexer, grammar_nt.ysh_func)
|
395 |
|
396 | if 0:
|
397 | self.p_printer.Print(pnode)
|
398 |
|
399 | self.tr.YshFunc(pnode, out)
|
400 | return last_token
|
401 |
|
402 |
|
403 | # Another parser instantiation:
|
404 | # - For Array Literal in word_parse.py WordParser:
|
405 | # w_parser = WordParser(self.lexer, self.line_reader)
|