1 | from __future__ import print_function
|
2 | """
|
3 | ysh_ify.py: Roughly translate OSH to YSH. Doesn't respect semantics.
|
4 |
|
5 | ESSENTIAL
|
6 |
|
7 | Command:
|
8 |
|
9 | then/fi, do/done -> { }
|
10 |
|
11 | new case statement
|
12 |
|
13 | f() { } -> proc f { } (changes scope)
|
14 |
|
15 | subshell -> forkwait, because () is taken
|
16 | { } to fopen { }?
|
17 |
|
18 | Approximate: var declaration:
|
19 | local a=b -> var a = 'b', I think
|
20 |
|
21 | <<EOF here docs to '''
|
22 |
|
23 | Word:
|
24 | "$@" -> @ARGV
|
25 |
|
26 | Not common: unquoted $x -> @[split(x)]
|
27 |
|
28 | LEGACY that I don't personally use
|
29 |
|
30 | Builtins:
|
31 | [ -> test
|
32 | . -> source
|
33 |
|
34 | Word:
|
35 | backticks -> $() (I don't use this)
|
36 | quote removal "$foo" -> $foo
|
37 | brace removal ${foo} and "${foo}" -> $foo
|
38 |
|
39 | --tool format
|
40 |
|
41 | fix indentation and spacing, like clang-format
|
42 | can "lower" the LST to a rough representation with keywords / "first words",
|
43 | { } ( ), and comments
|
44 | - the "atoms" should not have newlines
|
45 | """
|
46 |
|
47 | from _devbuild.gen.id_kind_asdl import Id, Id_str
|
48 | from _devbuild.gen.runtime_asdl import word_style_e, word_style_t
|
49 | from _devbuild.gen.syntax_asdl import (
|
50 | loc,
|
51 | CompoundWord,
|
52 | Token,
|
53 | SimpleVarSub,
|
54 | BracedVarSub,
|
55 | CommandSub,
|
56 | DoubleQuoted,
|
57 | SingleQuoted,
|
58 | word_e,
|
59 | word_t,
|
60 | word_part,
|
61 | word_part_e,
|
62 | word_part_t,
|
63 | rhs_word_e,
|
64 | rhs_word_t,
|
65 | sh_lhs,
|
66 | sh_lhs_e,
|
67 | command,
|
68 | command_e,
|
69 | BraceGroup,
|
70 | for_iter_e,
|
71 | case_arg_e,
|
72 | case_arg,
|
73 | condition_e,
|
74 | redir_param,
|
75 | redir_param_e,
|
76 | Redir,
|
77 | List_of_command,
|
78 | )
|
79 | from asdl import runtime
|
80 | from core.error import p_die
|
81 | from frontend import lexer
|
82 | from frontend import location
|
83 | from osh import word_
|
84 | from mycpp import mylib
|
85 | from mycpp.mylib import log, print_stderr, tagswitch
|
86 |
|
87 | from typing import Dict, cast, TYPE_CHECKING
|
88 | if TYPE_CHECKING:
|
89 | from _devbuild.gen.syntax_asdl import command_t
|
90 | from core import alloc
|
91 |
|
92 | _ = log
|
93 |
|
94 |
|
95 | class Cursor(object):
|
96 | """
|
97 | API to print/transform a complete source file, stored in a single arena.
|
98 |
|
99 | In, core/alloc.py, SnipCodeBlock() and SnipCodeString work on lines. They
|
100 | don't iterate over tokens.
|
101 |
|
102 | Or add a separate hash table of Token -> span ID? That makes sense because
|
103 | we need that kind of "address hash" for type checking anyway.
|
104 |
|
105 | You use the hash table to go from next_token_id .. TokenId(until_token).
|
106 | """
|
107 |
|
108 | def __init__(self, arena, f):
|
109 | # type: (alloc.Arena, mylib.Writer) -> None
|
110 | self.arena = arena
|
111 | self.f = f
|
112 | self.next_span_id = 0
|
113 |
|
114 | def _PrintUntilSpid(self, until_span_id):
|
115 | # type: (int) -> None
|
116 |
|
117 | # Sometimes we add +1
|
118 | if until_span_id == runtime.NO_SPID:
|
119 | assert 0, 'Missing span ID, got %d' % until_span_id
|
120 |
|
121 | for span_id in xrange(self.next_span_id, until_span_id):
|
122 | span = self.arena.GetToken(span_id)
|
123 |
|
124 | # A span for Eof may not have a line when the file is completely empty.
|
125 | if span.line is None:
|
126 | continue
|
127 |
|
128 | # Special case for recovering stripped leading space!
|
129 | # See osh/word_compile.py
|
130 | start_index = (0 if span.id == Id.Lit_CharsWithoutPrefix else
|
131 | span.col)
|
132 | end_index = span.col + span.length
|
133 |
|
134 | piece = span.line.content[start_index:end_index]
|
135 | self.f.write(piece)
|
136 |
|
137 | self.next_span_id = until_span_id
|
138 |
|
139 | def _SkipUntilSpid(self, next_span_id):
|
140 | # type: (int) -> None
|
141 | """Skip everything before next_span_id.
|
142 |
|
143 | Printing will start at next_span_id
|
144 | """
|
145 | if (next_span_id == runtime.NO_SPID or
|
146 | next_span_id == runtime.NO_SPID + 1):
|
147 | assert 0, 'Missing span ID, got %d' % next_span_id
|
148 | self.next_span_id = next_span_id
|
149 |
|
150 | def SkipUntil(self, tok):
|
151 | # type: (Token) -> None
|
152 | span_id = self.arena.GetSpanId(tok)
|
153 | self._SkipUntilSpid(span_id)
|
154 |
|
155 | def SkipPast(self, tok):
|
156 | # type: (Token) -> None
|
157 | span_id = self.arena.GetSpanId(tok)
|
158 | self._SkipUntilSpid(span_id + 1)
|
159 |
|
160 | def PrintUntil(self, tok):
|
161 | # type: (Token) -> None
|
162 | span_id = self.arena.GetSpanId(tok)
|
163 |
|
164 | # Test invariant
|
165 | if mylib.PYTHON:
|
166 | arena_tok = self.arena.GetToken(span_id)
|
167 | if tok != arena_tok:
|
168 | raise AssertionError(
|
169 | '%s %d %d != %s %d %d' %
|
170 | (tok, span_id, id(tok), arena_tok,
|
171 | self.arena.GetSpanId(arena_tok), id(arena_tok)))
|
172 |
|
173 | self._PrintUntilSpid(span_id)
|
174 |
|
175 | def PrintIncluding(self, tok):
|
176 | # type: (Token) -> None
|
177 | span_id = self.arena.GetSpanId(tok)
|
178 | self._PrintUntilSpid(span_id + 1)
|
179 |
|
180 | def PrintUntilEnd(self):
|
181 | # type: () -> None
|
182 | self._PrintUntilSpid(self.arena.LastSpanId())
|
183 |
|
184 |
|
185 | def LosslessCat(arena):
|
186 | # type: (alloc.Arena) -> None
|
187 | """
|
188 | For testing the lossless invariant: the tokens "add up" to the original
|
189 | doc.
|
190 | """
|
191 | cursor = Cursor(arena, mylib.Stdout())
|
192 | cursor.PrintUntilEnd()
|
193 |
|
194 |
|
195 | def PrintTokens(arena):
|
196 | # type: (alloc.Arena) -> None
|
197 | """Debugging tool to see tokens."""
|
198 |
|
199 | if len(arena.tokens) == 1: # Special case for line_id == -1
|
200 | print('Empty file with EOF token on invalid line:')
|
201 | print('%s' % arena.tokens[0])
|
202 | return
|
203 |
|
204 | # TODO:
|
205 | # - TSV8: end position, token type
|
206 | # - then an option to print token text, as a J8 string
|
207 | # - and then there can be a separate tool to number the columns
|
208 | #
|
209 | # - Do we also have JSON8 / HTM8 / TSV8 tokens?
|
210 | # - And mini-languages like glob, etc.
|
211 | for i, tok in enumerate(arena.tokens):
|
212 | piece = tok.line.content[tok.col:tok.col + tok.length]
|
213 | print('%5d %-20s %r' % (i, Id_str(tok.id, dot=False), piece))
|
214 | print_stderr('(%d tokens)' % len(arena.tokens))
|
215 |
|
216 |
|
217 | def Ysh_ify(arena, node):
|
218 | # type: (alloc.Arena, command_t) -> None
|
219 | cursor = Cursor(arena, mylib.Stdout())
|
220 | fixer = YshPrinter(cursor, arena, mylib.Stdout())
|
221 | fixer.DoCommand(node, None, at_top_level=True) # no local symbols yet
|
222 | fixer.End()
|
223 |
|
224 |
|
225 | # PROBLEM: ~ substitution. That is disabled by "".
|
226 | # You can turn it into $HOME I guess
|
227 | # const foo = "$HOME/src"
|
228 | # const foo = %( ~/src )[0] # does this make sense?
|
229 |
|
230 |
|
231 | def _GetRhsStyle(w):
|
232 | # type: (rhs_word_t) -> word_style_t
|
233 | """Determine what style an assignment should use. '' or "", or an
|
234 | expression.
|
235 |
|
236 | SQ foo= setglobal foo = ''
|
237 | SQ foo='' setglobal foo = ''
|
238 | DQ foo="" setglobal foo = "" # Or we could normalize it if no subs?
|
239 | DQ foo="" setglobal foo = "" # Or we could normalize it if no subs?
|
240 |
|
241 | # Need these too.
|
242 | # Or honestly should C strings be the default? And then raw strings are
|
243 | # optional? Because most usages of \n and \0 can turn into Oil?
|
244 | # Yeah I want the default to be statically parseable, so we subvert the \t
|
245 | # and \n of command line tools?
|
246 | # As long as we are fully analyzing the strings, we might as well go all the
|
247 | # way!
|
248 | # I think I need a PartialStaticEval() to paper over this.
|
249 | #
|
250 | # The main issue is regex and globs, because they use escape for a different
|
251 | # purpose. I think just do
|
252 | # grep r'foo\tbar' or something.
|
253 |
|
254 | C_SQ foo=$'\n' setglobal foo = C'\n'
|
255 | C_DQ foo=$'\n'"$bar" setglobal foo = C"\n$(bar)"
|
256 |
|
257 | Expr path=${1:-} setglobal path = $1 or ''
|
258 | Expr host=${2:-$(hostname)} setglobal host = $2 or $[hostname]
|
259 |
|
260 | What's the difference between Expr and Unquoted? I think they're the same/
|
261 | """
|
262 | # Actually splitting NEVER HAPPENS ON ASSIGNMENT. LEAVE IT OFF.
|
263 |
|
264 | UP_w = w
|
265 | with tagswitch(w) as case:
|
266 | if case(rhs_word_e.Empty):
|
267 | return word_style_e.SQ
|
268 |
|
269 | elif case(rhs_word_e.Compound):
|
270 | w = cast(CompoundWord, UP_w)
|
271 | if len(w.parts) == 0:
|
272 | raise AssertionError(w)
|
273 |
|
274 | elif len(w.parts) == 1:
|
275 | part0 = w.parts[0]
|
276 | UP_part0 = part0
|
277 | with tagswitch(part0) as case:
|
278 | # VAR_SUBS
|
279 | if case(word_part_e.TildeSub):
|
280 | # x=~andy/src
|
281 | # -> setvar x = homedir('andy') + '/src'
|
282 | return word_style_e.Expr
|
283 |
|
284 | elif case(word_part_e.Literal):
|
285 | # local x=y
|
286 | # -> var x = 'y'
|
287 | return word_style_e.SQ
|
288 |
|
289 | elif case(word_part_e.SimpleVarSub):
|
290 | # local x=$myvar
|
291 | # -> var x = "$myvar"
|
292 | # or var x = ${myvar}
|
293 | # or var x = myvar
|
294 | return word_style_e.DQ
|
295 |
|
296 | elif case(word_part_e.BracedVarSub, word_part_e.CommandSub,
|
297 | word_part_e.ArithSub):
|
298 | # x=$(hostname)
|
299 | # -> setvar x = $(hostname)
|
300 | return word_style_e.Unquoted
|
301 |
|
302 | elif case(word_part_e.DoubleQuoted):
|
303 | part0 = cast(DoubleQuoted, UP_part0)
|
304 |
|
305 | # TODO: remove quotes in single part like "$(hostname)" -> $(hostname)
|
306 | return word_style_e.DQ
|
307 |
|
308 | else:
|
309 | # multiple parts use YSTR in general?
|
310 | # Depends if there are subs
|
311 | return word_style_e.DQ
|
312 |
|
313 | # Default
|
314 | return word_style_e.SQ
|
315 |
|
316 |
|
317 | class YshPrinter(object):
|
318 | """Prettify OSH to YSH."""
|
319 |
|
320 | def __init__(self, cursor, arena, f):
|
321 | # type: (Cursor, alloc.Arena, mylib.Writer) -> None
|
322 | self.cursor = cursor
|
323 | self.arena = arena
|
324 | self.f = f
|
325 |
|
326 | def _DebugSpid(self, spid):
|
327 | # type: (int) -> None
|
328 | span = self.arena.GetToken(spid)
|
329 | s = span.line.content[span.col:span.col + span.length]
|
330 | print_stderr('SPID %d = %r' % (spid, s))
|
331 |
|
332 | def End(self):
|
333 | # type: () -> None
|
334 | """Make sure we print until the end of the file."""
|
335 | self.cursor.PrintUntilEnd()
|
336 |
|
337 | def DoRedirect(self, node, local_symbols):
|
338 | # type: (Redir, Dict[str, bool]) -> None
|
339 | """
|
340 | Change here docs to <<< '''
|
341 | """
|
342 | #print(node, file=sys.stderr)
|
343 | op_id = node.op.id
|
344 | self.cursor.PrintUntil(node.op)
|
345 |
|
346 | if node.arg.tag() == redir_param_e.HereDoc:
|
347 | here_doc = cast(redir_param.HereDoc, node.arg)
|
348 |
|
349 | here_begin = here_doc.here_begin
|
350 | ok, delimiter, delim_quoted = word_.StaticEval(here_begin)
|
351 | if not ok:
|
352 | p_die('Invalid here doc delimiter', loc.Word(here_begin))
|
353 |
|
354 | # Turn everything into <<<. We just change the quotes
|
355 | self.f.write('<<<')
|
356 |
|
357 | if delim_quoted:
|
358 | self.f.write(" '''")
|
359 | else:
|
360 | self.f.write(' """')
|
361 |
|
362 | delim_end_tok = location.RightTokenForWord(here_begin)
|
363 | self.cursor.SkipPast(delim_end_tok)
|
364 |
|
365 | # Now print the lines. TODO: Have a flag to indent these to the level of
|
366 | # the owning command, e.g.
|
367 | # cat <<EOF
|
368 | # EOF
|
369 | # Or since most here docs are the top level, you could just have a hack
|
370 | # for a fixed indent? TODO: Look at real use cases.
|
371 | for part in here_doc.stdin_parts:
|
372 | self.DoWordPart(part, local_symbols)
|
373 |
|
374 | self.cursor.SkipPast(here_doc.here_end_tok)
|
375 | if delim_quoted:
|
376 | self.f.write("'''\n")
|
377 | else:
|
378 | self.f.write('"""\n')
|
379 |
|
380 | else:
|
381 | pass
|
382 |
|
383 | # cat << EOF
|
384 | # hello $name
|
385 | # EOF
|
386 | # cat <<< """
|
387 | # hello $name
|
388 | # """
|
389 |
|
390 | # cat << 'EOF'
|
391 | # no expansion
|
392 | # EOF
|
393 |
|
394 | # cat <<< '''
|
395 | # no expansion
|
396 | # '''
|
397 |
|
398 | def DoShAssignment(self, node, at_top_level, local_symbols):
|
399 | # type: (command.ShAssignment, bool, Dict[str, bool]) -> None
|
400 | """
|
401 | local_symbols:
|
402 | - Add every 'local' declaration to it
|
403 | - problem: what if you have local in an "if" ?
|
404 | - we could treat it like nested scope and see what happens? Do any
|
405 | programs have a problem with it?
|
406 | case/if/for/while/BraceGroup all define scopes or what?
|
407 | You don't want inconsistency of variables that could be defined at
|
408 | any point.
|
409 | - or maybe you only need it within "if / case" ? Well I guess
|
410 | for/while can break out of the loop and cause problems. A break is
|
411 | an "if".
|
412 |
|
413 | - for subsequent
|
414 | """
|
415 | # Change RHS to expression language. Bare words not allowed. foo -> 'foo'
|
416 |
|
417 | has_rhs = False # TODO: Should be on a per-variable basis.
|
418 | # local a=b c=d, or just punt on those
|
419 | defined_locally = False # is it a local variable in this function?
|
420 | # can't tell if global
|
421 |
|
422 | if True:
|
423 | self.cursor.PrintUntil(node.pairs[0].left)
|
424 |
|
425 | # For now, just detect whether the FIRST assignment on the line has been
|
426 | # declared locally. We might want to split every line into separate
|
427 | # statements.
|
428 | if local_symbols is not None:
|
429 | lhs0 = node.pairs[0].lhs
|
430 | #if lhs0.tag() == sh_lhs_e.Name and lhs0.name in local_symbols:
|
431 | # defined_locally = True
|
432 |
|
433 | #print("CHECKING NAME", lhs0.name, defined_locally, local_symbols)
|
434 |
|
435 | # TODO: Avoid translating these
|
436 | has_array_index = [
|
437 | pair.lhs.tag() == sh_lhs_e.UnparsedIndex for pair in node.pairs
|
438 | ]
|
439 |
|
440 | # need semantic analysis.
|
441 | # Would be nice to assume that it's a local though.
|
442 | if at_top_level:
|
443 | self.f.write('setvar ')
|
444 | elif defined_locally:
|
445 | self.f.write('set ')
|
446 | #self.f.write('[local mutated]')
|
447 | else:
|
448 | # We're in a function, but it's not defined locally, so we must be
|
449 | # mutating a global.
|
450 | self.f.write('setvar ')
|
451 |
|
452 | # foo=bar spam=eggs -> foo = 'bar', spam = 'eggs'
|
453 | n = len(node.pairs)
|
454 | for i, pair in enumerate(node.pairs):
|
455 | lhs = pair.lhs
|
456 | UP_lhs = lhs
|
457 | with tagswitch(lhs) as case:
|
458 | if case(sh_lhs_e.Name):
|
459 | lhs = cast(sh_lhs.Name, UP_lhs)
|
460 |
|
461 | self.cursor.PrintUntil(pair.left)
|
462 | # Assume skipping over one Lit_VarLike token
|
463 | self.cursor.SkipPast(pair.left)
|
464 |
|
465 | # Replace name. I guess it's Lit_Chars.
|
466 | self.f.write(lhs.name)
|
467 | self.f.write(' = ')
|
468 |
|
469 | # TODO: This should be translated from Empty.
|
470 | if pair.rhs.tag() == rhs_word_e.Empty:
|
471 | self.f.write("''") # local i -> var i = ''
|
472 | else:
|
473 | self.DoRhsWord(pair.rhs, local_symbols)
|
474 |
|
475 | elif case(sh_lhs_e.UnparsedIndex):
|
476 | # --one-pass-parse gives us this node, instead of IndexedName
|
477 | pass
|
478 |
|
479 | else:
|
480 | raise AssertionError(pair.lhs.__class__.__name__)
|
481 |
|
482 | if i != n - 1:
|
483 | self.f.write(',')
|
484 |
|
485 | def _DoSimple(self, node, local_symbols):
|
486 | # type: (command.Simple, Dict[str, bool]) -> None
|
487 |
|
488 | # How to preserve spaces between words? Do you want to do it?
|
489 | # Well you need to test this:
|
490 | #
|
491 | # echo foo \
|
492 | # bar
|
493 |
|
494 | if len(node.more_env):
|
495 | # We only need to transform the right side, not left side.
|
496 | for pair in node.more_env:
|
497 | self.DoRhsWord(pair.val, local_symbols)
|
498 |
|
499 | if len(node.words):
|
500 | first_word = node.words[0]
|
501 | ok, val, quoted = word_.StaticEval(first_word)
|
502 | word0_tok = location.LeftTokenForWord(first_word)
|
503 | if ok and not quoted:
|
504 | if val == '[' and len(node.words) >= 3:
|
505 | word2 = node.words[-2]
|
506 | last_word = node.words[-1]
|
507 |
|
508 | # Check if last word is ]
|
509 | ok, val, quoted = word_.StaticEval(last_word)
|
510 | if ok and not quoted and val == ']':
|
511 | # Replace [ with 'test'
|
512 | self.cursor.PrintUntil(word0_tok)
|
513 | self.cursor.SkipPast(word0_tok)
|
514 | self.f.write('test')
|
515 |
|
516 | for w in node.words[1:-1]:
|
517 | self.DoWordInCommand(w, local_symbols)
|
518 |
|
519 | # Now omit ]
|
520 | tok2 = location.RightTokenForWord(word2)
|
521 | rbrack_tok = location.LeftTokenForWord(last_word)
|
522 |
|
523 | # Skip the space token before ]
|
524 | self.cursor.PrintIncluding(tok2)
|
525 | # ] takes one spid
|
526 | self.cursor.SkipPast(rbrack_tok)
|
527 | return
|
528 | else:
|
529 | raise RuntimeError('Got [ without ]')
|
530 |
|
531 | elif val == '.':
|
532 | self.cursor.PrintUntil(word0_tok)
|
533 | self.cursor.SkipPast(word0_tok)
|
534 | self.f.write('source')
|
535 | return
|
536 |
|
537 | for w in node.words:
|
538 | self.DoWordInCommand(w, local_symbols)
|
539 |
|
540 | # TODO: Print the terminator. Could be \n or ;
|
541 | # Need to print env like PYTHONPATH = 'foo' && ls
|
542 | # Need to print redirects:
|
543 | # < > are the same. << is here string, and >> is assignment.
|
544 | # append is >+
|
545 |
|
546 | # TODO: static_eval of simple command
|
547 | # - [ -> "test". Eliminate trailing ].
|
548 | # - . -> source, etc.
|
549 |
|
550 | def DoCommand(self, node, local_symbols, at_top_level=False):
|
551 | # type: (command_t, Dict[str, bool], bool) -> None
|
552 |
|
553 | UP_node = node
|
554 |
|
555 | with tagswitch(node) as case:
|
556 | if case(command_e.CommandList):
|
557 | node = cast(command.CommandList, UP_node)
|
558 |
|
559 | # TODO: How to distinguish between echo hi; echo bye; and on
|
560 | # separate lines
|
561 | for child in node.children:
|
562 | self.DoCommand(child,
|
563 | local_symbols,
|
564 | at_top_level=at_top_level)
|
565 |
|
566 | elif case(command_e.Redirect):
|
567 | node = cast(command.Redirect, UP_node)
|
568 |
|
569 | self.DoCommand(node.child,
|
570 | local_symbols,
|
571 | at_top_level=at_top_level)
|
572 | for r in node.redirects:
|
573 | self.DoRedirect(r, local_symbols)
|
574 |
|
575 | elif case(command_e.Simple):
|
576 | node = cast(command.Simple, UP_node)
|
577 |
|
578 | self._DoSimple(node, local_symbols)
|
579 |
|
580 | elif case(command_e.ShAssignment):
|
581 | node = cast(command.ShAssignment, UP_node)
|
582 |
|
583 | self.DoShAssignment(node, at_top_level, local_symbols)
|
584 |
|
585 | elif case(command_e.Pipeline):
|
586 | node = cast(command.Pipeline, UP_node)
|
587 |
|
588 | for child in node.children:
|
589 | self.DoCommand(child, local_symbols)
|
590 |
|
591 | elif case(command_e.AndOr):
|
592 | node = cast(command.AndOr, UP_node)
|
593 |
|
594 | for child in node.children:
|
595 | self.DoCommand(child, local_symbols)
|
596 |
|
597 | elif case(command_e.Sentence):
|
598 | node = cast(command.Sentence, UP_node)
|
599 |
|
600 | # 'ls &' to 'fork ls'
|
601 | # Keep ; the same.
|
602 | self.DoCommand(node.child, local_symbols)
|
603 |
|
604 | # This has to be different in the function case.
|
605 | elif case(command_e.BraceGroup):
|
606 | node = cast(BraceGroup, UP_node)
|
607 |
|
608 | # { echo hi; } -> do { echo hi }
|
609 | # For now it might be OK to keep 'do { echo hi; }
|
610 | self.cursor.PrintUntil(node.left)
|
611 | self.cursor.SkipPast(node.left)
|
612 | self.f.write('do {')
|
613 |
|
614 | for child in node.children:
|
615 | self.DoCommand(child, local_symbols)
|
616 |
|
617 | elif case(command_e.Subshell):
|
618 | node = cast(command.Subshell, UP_node)
|
619 |
|
620 | # (echo hi) -> shell echo hi
|
621 | # (echo hi; echo bye) -> shell {echo hi; echo bye}
|
622 |
|
623 | self.cursor.PrintUntil(node.left)
|
624 | self.cursor.SkipPast(node.left)
|
625 | self.f.write('shell {')
|
626 |
|
627 | self.DoCommand(node.child, local_symbols)
|
628 |
|
629 | #self._DebugSpid(right_spid)
|
630 | #self._DebugSpid(right_spid + 1)
|
631 |
|
632 | #print('RIGHT SPID', right_spid)
|
633 | self.cursor.PrintUntil(node.right)
|
634 | self.cursor.SkipPast(node.right)
|
635 | self.f.write('}')
|
636 |
|
637 | elif case(command_e.ShFunction):
|
638 | node = cast(command.ShFunction, UP_node)
|
639 |
|
640 | # TODO: skip name
|
641 | #self.f.write('proc %s' % node.name)
|
642 |
|
643 | # New symbol table for every function.
|
644 | new_local_symbols = {} # type: Dict[str, bool]
|
645 |
|
646 | # Should be the left most span, including 'function'
|
647 | if node.keyword: # function foo { ...
|
648 | self.cursor.PrintUntil(node.keyword)
|
649 | else: # foo() { ...
|
650 | self.cursor.PrintUntil(node.name_tok)
|
651 |
|
652 | self.f.write('proc %s ' % node.name)
|
653 |
|
654 | UP_body = node.body
|
655 | with tagswitch(UP_body) as case:
|
656 | if case(command_e.BraceGroup):
|
657 | body = cast(BraceGroup, UP_body)
|
658 | self.cursor.SkipUntil(body.left)
|
659 |
|
660 | # Don't add "do" like a standalone brace group. Just use {}.
|
661 | for child in body.children:
|
662 | self.DoCommand(child, new_local_symbols)
|
663 | else:
|
664 | # very rare cases like f() ( subshell )
|
665 | pass
|
666 |
|
667 | elif case(command_e.DoGroup):
|
668 | node = cast(command.DoGroup, UP_node)
|
669 |
|
670 | self.cursor.PrintUntil(node.left)
|
671 | self.cursor.SkipPast(node.left)
|
672 | self.f.write('{')
|
673 |
|
674 | for child in node.children:
|
675 | self.DoCommand(child, local_symbols)
|
676 |
|
677 | self.cursor.PrintUntil(node.right)
|
678 | self.cursor.SkipPast(node.right)
|
679 | self.f.write('}')
|
680 |
|
681 | elif case(command_e.ForEach):
|
682 | node = cast(command.ForEach, UP_node)
|
683 |
|
684 | # Need to preserve spaces between words, because there can be line
|
685 | # wrapping.
|
686 | # for x in a b c \
|
687 | # d e f; do
|
688 |
|
689 | UP_iterable = node.iterable
|
690 | with tagswitch(node.iterable) as case:
|
691 | if case(for_iter_e.Args):
|
692 | self.f.write('for %s in @ARGV ' % node.iter_names[0])
|
693 |
|
694 | # note: command_t doesn't have .spids
|
695 | body_tok = location.TokenForCommand(node.body)
|
696 | self.cursor.SkipUntil(body_tok)
|
697 |
|
698 | elif case(for_iter_e.Words):
|
699 | pass
|
700 |
|
701 | elif case(for_iter_e.YshExpr):
|
702 | pass
|
703 |
|
704 | if node.semi_tok is not None:
|
705 | self.cursor.PrintUntil(node.semi_tok)
|
706 | self.cursor.SkipPast(node.semi_tok)
|
707 |
|
708 | self.DoCommand(node.body, local_symbols)
|
709 |
|
710 | elif case(command_e.WhileUntil):
|
711 | node = cast(command.WhileUntil, UP_node)
|
712 |
|
713 | # Skip 'until', and replace it with 'while not'
|
714 | if node.keyword.id == Id.KW_Until:
|
715 | self.cursor.PrintUntil(node.keyword)
|
716 | self.cursor.SkipPast(node.keyword)
|
717 | self.f.write('while !')
|
718 |
|
719 | if node.cond.tag() == condition_e.Shell:
|
720 | commands = cast(List_of_command, node.cond)
|
721 | # Skip the semi-colon in the condition, which is usually a Sentence
|
722 | if (len(commands) == 1 and
|
723 | commands[0].tag() == command_e.Sentence):
|
724 | sentence = cast(command.Sentence, commands[0])
|
725 | self.DoCommand(sentence.child, local_symbols)
|
726 | self.cursor.SkipPast(sentence.terminator)
|
727 |
|
728 | self.DoCommand(node.body, local_symbols)
|
729 |
|
730 | elif case(command_e.If):
|
731 | node = cast(command.If, UP_node)
|
732 |
|
733 | # if foo; then -> if foo {
|
734 | # elif foo; then -> } elif foo {
|
735 | for i, arm in enumerate(node.arms):
|
736 | elif_tok = arm.keyword
|
737 | then_tok = arm.then_tok
|
738 |
|
739 | if i != 0: # 'if' not 'elif' on the first arm
|
740 | self.cursor.PrintUntil(elif_tok)
|
741 | self.f.write('} ')
|
742 |
|
743 | cond = arm.cond
|
744 | if cond.tag() == condition_e.Shell:
|
745 | commands = cast(List_of_command, cond)
|
746 | if (len(commands) == 1 and
|
747 | commands[0].tag() == command_e.Sentence):
|
748 | sentence = cast(command.Sentence, commands[0])
|
749 | self.DoCommand(sentence, local_symbols)
|
750 |
|
751 | # Remove semi-colon
|
752 | self.cursor.PrintUntil(sentence.terminator)
|
753 | self.cursor.SkipPast(sentence.terminator)
|
754 | else:
|
755 | for child in commands:
|
756 | self.DoCommand(child, local_symbols)
|
757 |
|
758 | self.cursor.PrintUntil(then_tok)
|
759 | self.cursor.SkipPast(then_tok)
|
760 | self.f.write('{')
|
761 |
|
762 | for child in arm.action:
|
763 | self.DoCommand(child, local_symbols)
|
764 |
|
765 | # else -> } else {
|
766 | if len(node.else_action):
|
767 | self.cursor.PrintUntil(node.else_kw)
|
768 | self.f.write('} ')
|
769 | self.cursor.PrintIncluding(node.else_kw)
|
770 | self.f.write(' {')
|
771 |
|
772 | for child in node.else_action:
|
773 | self.DoCommand(child, local_symbols)
|
774 |
|
775 | # fi -> }
|
776 | self.cursor.PrintUntil(node.fi_kw)
|
777 | self.cursor.SkipPast(node.fi_kw)
|
778 | self.f.write('}')
|
779 |
|
780 | elif case(command_e.Case):
|
781 | node = cast(command.Case, UP_node)
|
782 |
|
783 | to_match = None # type: word_t
|
784 | with tagswitch(node.to_match) as case:
|
785 | if case(case_arg_e.YshExpr):
|
786 | return
|
787 | elif case(case_arg_e.Word):
|
788 | to_match = cast(case_arg.Word, node.to_match).w
|
789 | else:
|
790 | raise AssertionError()
|
791 |
|
792 | self.cursor.PrintIncluding(node.case_kw)
|
793 |
|
794 | # Figure out the variable name, so we can translate
|
795 | # - $var to (var)
|
796 | # - "$var" to (var)
|
797 | var_part = None # type: SimpleVarSub
|
798 | with tagswitch(to_match) as case:
|
799 | if case(word_e.Compound):
|
800 | w = cast(CompoundWord, to_match)
|
801 | part0 = w.parts[0]
|
802 |
|
803 | with tagswitch(part0) as case2:
|
804 | if case2(word_part_e.SimpleVarSub):
|
805 | var_part = cast(SimpleVarSub, part0)
|
806 |
|
807 | elif case2(word_part_e.DoubleQuoted):
|
808 | dq_part = cast(DoubleQuoted, part0)
|
809 | if len(dq_part.parts) == 1:
|
810 | dq_part0 = dq_part.parts[0]
|
811 |
|
812 | # Nesting is annoying -- it would be nice to use pattern
|
813 | # matching, but mycpp won't like it.
|
814 | # TODO: extract into a common function
|
815 | with tagswitch(dq_part0) as case3:
|
816 | if case3(word_part_e.SimpleVarSub):
|
817 | var_part = cast(
|
818 | SimpleVarSub, dq_part0)
|
819 | #log("VAR PART %s", var_part)
|
820 |
|
821 | if var_part:
|
822 | self.f.write(' (')
|
823 | self.f.write(lexer.LazyStr(var_part.tok))
|
824 | self.f.write(') ')
|
825 |
|
826 | self.cursor.SkipPast(node.arms_start) # Skip past 'in'
|
827 | self.f.write('{')
|
828 |
|
829 | missing_last_dsemi = False
|
830 |
|
831 | for case_arm in node.arms:
|
832 | # Replace ) with {
|
833 | self.cursor.PrintUntil(case_arm.middle)
|
834 | self.f.write(' {')
|
835 | self.cursor.SkipPast(case_arm.middle)
|
836 |
|
837 | for child in case_arm.action:
|
838 | self.DoCommand(child, local_symbols)
|
839 |
|
840 | if case_arm.right:
|
841 | # Change ;; to }
|
842 | self.cursor.PrintUntil(case_arm.right)
|
843 | self.f.write('}')
|
844 | self.cursor.SkipPast(case_arm.right)
|
845 | else:
|
846 | # valid: case $x in pat) echo hi ; esac
|
847 | missing_last_dsemi = True
|
848 |
|
849 | self.cursor.PrintUntil(node.arms_end) # 'esac' or }
|
850 |
|
851 | if missing_last_dsemi: # Print } for missing ;;
|
852 | self.f.write('}\n')
|
853 |
|
854 | self.cursor.SkipPast(node.arms_end) # 'esac' or }
|
855 |
|
856 | self.f.write('}') # in place of 'esac'
|
857 |
|
858 | elif case(command_e.TimeBlock):
|
859 | node = cast(command.TimeBlock, UP_node)
|
860 |
|
861 | self.DoCommand(node.pipeline, local_symbols)
|
862 |
|
863 | elif case(command_e.DParen):
|
864 | node = cast(command.DParen, UP_node)
|
865 | # TODO: arith expressions can words with command subs
|
866 | pass
|
867 |
|
868 | elif case(command_e.DBracket):
|
869 | node = cast(command.DBracket, UP_node)
|
870 |
|
871 | # TODO: bool_expr_t can have words with command subs
|
872 | pass
|
873 |
|
874 | else:
|
875 | pass
|
876 | #log('Command not handled: %s', node)
|
877 | #raise AssertionError(node.__class__.__name__)
|
878 |
|
879 | def DoRhsWord(self, node, local_symbols):
|
880 | # type: (rhs_word_t, Dict[str, bool]) -> None
|
881 | """For the RHS of assignments.
|
882 |
|
883 | TODO: for complex cases of word joining:
|
884 | local a=unquoted'single'"double"'"'
|
885 |
|
886 | We can try to handle it:
|
887 | var a = y"unquotedsingledouble\""
|
888 |
|
889 | Or simply abort and LEAVE IT ALONE. We should only translate things we
|
890 | recognize.
|
891 | """
|
892 | UP_node = node
|
893 | with tagswitch(node) as case:
|
894 | if case(rhs_word_e.Empty):
|
895 | self.f.write("''")
|
896 |
|
897 | elif case(rhs_word_e.Compound):
|
898 | node = cast(CompoundWord, UP_node)
|
899 |
|
900 | # TODO: This is wrong!
|
901 | style = _GetRhsStyle(node)
|
902 | if style == word_style_e.SQ:
|
903 | self.f.write("'")
|
904 | self.DoWordInCommand(node, local_symbols)
|
905 | self.f.write("'")
|
906 | elif style == word_style_e.DQ:
|
907 | self.f.write('"')
|
908 | self.DoWordInCommand(node, local_symbols)
|
909 | self.f.write('"')
|
910 | # TODO: Put these back
|
911 | #elif style == word_style_e.Expr:
|
912 | # pass
|
913 | #elif style == word_style_e.Unquoted:
|
914 | # pass
|
915 | else:
|
916 | # "${foo:-default}" -> foo or 'default'
|
917 | # ${foo:-default} -> @split(foo or 'default')
|
918 | # @(foo or 'default') -- implicit split.
|
919 |
|
920 | if word_.IsVarSub(node): # ${1} or "$1"
|
921 | # Do it in expression mode
|
922 | pass
|
923 | # NOTE: ArithSub with $(1 +2 ) is different than 1 + 2 because of
|
924 | # conversion to string.
|
925 |
|
926 | # For now, just stub it out
|
927 | self.DoWordInCommand(node, local_symbols)
|
928 |
|
929 | def DoWordInCommand(self, node, local_symbols):
|
930 | # type: (word_t, Dict[str, bool]) -> None
|
931 | """E.g. remove unquoted.
|
932 |
|
933 | echo "$x" -> echo $x
|
934 | """
|
935 | UP_node = node
|
936 |
|
937 | with tagswitch(node) as case:
|
938 | if case(word_e.Compound):
|
939 | node = cast(CompoundWord, UP_node)
|
940 |
|
941 | # UNQUOTE simple var subs
|
942 |
|
943 | # Special case for "$@".
|
944 | # TODO:
|
945 | # "$foo" -> $foo
|
946 | # "${foo}" -> $foo
|
947 |
|
948 | if (len(node.parts) == 1 and
|
949 | node.parts[0].tag() == word_part_e.DoubleQuoted):
|
950 | dq_part = cast(DoubleQuoted, node.parts[0])
|
951 |
|
952 | # NOTE: In double quoted case, this is the begin and end quote.
|
953 | # Do we need a HereDoc part?
|
954 |
|
955 | if len(dq_part.parts) == 1:
|
956 | part0 = dq_part.parts[0]
|
957 | if part0.tag() == word_part_e.SimpleVarSub:
|
958 | vsub_part = cast(SimpleVarSub, dq_part.parts[0])
|
959 | if vsub_part.tok.id == Id.VSub_At:
|
960 | self.cursor.PrintUntil(dq_part.left)
|
961 | self.cursor.SkipPast(
|
962 | dq_part.right) # " then $@ then "
|
963 | self.f.write('@ARGV')
|
964 | return # Done replacing
|
965 |
|
966 | # "$1" -> $1, "$foo" -> $foo
|
967 | if vsub_part.tok.id in (Id.VSub_Number,
|
968 | Id.VSub_DollarName):
|
969 | self.cursor.PrintUntil(dq_part.left)
|
970 | self.cursor.SkipPast(dq_part.right)
|
971 | self.f.write(lexer.TokenVal(vsub_part.tok))
|
972 | return
|
973 |
|
974 | # Single arith sub, command sub, etc.
|
975 | # On the other hand, an unquoted one needs to turn into
|
976 | #
|
977 | # $(echo one two) -> @[echo one two]
|
978 | # `echo one two` -> @[echo one two]
|
979 | #
|
980 | # ${var:-'the default'} -> @$(var or 'the default')
|
981 | #
|
982 | # $((1 + 2)) -> $(1 + 2) -- this is OK unquoted
|
983 |
|
984 | elif part0.tag() == word_part_e.BracedVarSub:
|
985 | # Skip over quote
|
986 | self.cursor.PrintUntil(dq_part.left)
|
987 | self.cursor.SkipPast(dq_part.left)
|
988 | self.DoWordPart(part0, local_symbols)
|
989 | self.cursor.SkipPast(dq_part.right)
|
990 | return
|
991 |
|
992 | elif part0.tag() == word_part_e.CommandSub:
|
993 | self.cursor.PrintUntil(dq_part.left)
|
994 | self.cursor.SkipPast(dq_part.left)
|
995 | self.DoWordPart(part0, local_symbols)
|
996 | self.cursor.SkipPast(dq_part.right)
|
997 | return
|
998 |
|
999 | # TODO: 'foo'"bar" should be "foobar", etc.
|
1000 | # If any part is double quoted, you can always double quote the whole
|
1001 | # thing?
|
1002 | for part in node.parts:
|
1003 | self.DoWordPart(part, local_symbols)
|
1004 |
|
1005 | elif case(word_e.BracedTree):
|
1006 | # Not doing anything now
|
1007 | pass
|
1008 |
|
1009 | else:
|
1010 | raise AssertionError(node.__class__.__name__)
|
1011 |
|
1012 | def DoWordPart(self, node, local_symbols, quoted=False):
|
1013 | # type: (word_part_t, Dict[str, bool], bool) -> None
|
1014 |
|
1015 | left_tok = location.LeftTokenForWordPart(node)
|
1016 | if left_tok:
|
1017 | self.cursor.PrintUntil(left_tok)
|
1018 |
|
1019 | UP_node = node
|
1020 |
|
1021 | with tagswitch(node) as case:
|
1022 | if case(word_part_e.ShArrayLiteral, word_part_e.BashAssocLiteral,
|
1023 | word_part_e.TildeSub, word_part_e.ExtGlob):
|
1024 | pass
|
1025 |
|
1026 | elif case(word_part_e.EscapedLiteral):
|
1027 | node = cast(word_part.EscapedLiteral, UP_node)
|
1028 | if quoted:
|
1029 | pass
|
1030 | else:
|
1031 | # If unquoted \e, it should quoted instead. ' ' vs. \<invisible space>
|
1032 | # Hm is this necessary though? I think the only motivation is changing
|
1033 | # \{ and \( for macros. And ' ' to be readable/visible.
|
1034 | t = node.token
|
1035 | val = lexer.TokenSliceLeft(t, 1)
|
1036 | assert len(val) == 1, val
|
1037 | if val != '\n':
|
1038 | self.cursor.PrintUntil(t)
|
1039 | self.cursor.SkipPast(t)
|
1040 | self.f.write("'%s'" % val)
|
1041 |
|
1042 | elif case(word_part_e.Literal):
|
1043 | node = cast(Token, UP_node)
|
1044 | self.cursor.PrintIncluding(node)
|
1045 |
|
1046 | elif case(word_part_e.SingleQuoted):
|
1047 | node = cast(SingleQuoted, UP_node)
|
1048 |
|
1049 | # TODO:
|
1050 | # '\n' is '\\n'
|
1051 | # $'\n' is '\n'
|
1052 | # TODO: Should print until right_spid
|
1053 | # left_spid, right_spid = node.spids
|
1054 | self.cursor.PrintUntil(node.right)
|
1055 |
|
1056 | elif case(word_part_e.DoubleQuoted):
|
1057 | node = cast(DoubleQuoted, UP_node)
|
1058 | for part in node.parts:
|
1059 | self.DoWordPart(part, local_symbols, quoted=True)
|
1060 |
|
1061 | elif case(word_part_e.SimpleVarSub):
|
1062 | node = cast(SimpleVarSub, UP_node)
|
1063 |
|
1064 | op_id = node.tok.id
|
1065 |
|
1066 | if op_id == Id.VSub_DollarName:
|
1067 | self.cursor.PrintIncluding(node.tok)
|
1068 |
|
1069 | elif op_id == Id.VSub_Number:
|
1070 | self.cursor.PrintIncluding(node.tok)
|
1071 |
|
1072 | elif op_id == Id.VSub_At: # $@ -- handled quoted case above
|
1073 | self.f.write('$[join(ARGV)]')
|
1074 | self.cursor.SkipPast(node.tok)
|
1075 |
|
1076 | elif op_id == Id.VSub_Star: # $*
|
1077 | # PEDANTIC: Depends if quoted or unquoted
|
1078 | self.f.write('$[join(ARGV)]')
|
1079 | self.cursor.SkipPast(node.tok)
|
1080 |
|
1081 | elif op_id == Id.VSub_Pound: # $#
|
1082 | # len(ARGV) ?
|
1083 | self.f.write('$Argc')
|
1084 | self.cursor.SkipPast(node.tok)
|
1085 |
|
1086 | else:
|
1087 | pass
|
1088 |
|
1089 | elif case(word_part_e.BracedVarSub):
|
1090 | node = cast(BracedVarSub, UP_node)
|
1091 |
|
1092 | # NOTE: Why do we need this but we don't need it in command sub?
|
1093 | self.cursor.PrintUntil(node.left)
|
1094 |
|
1095 | if node.bracket_op:
|
1096 | # a[1]
|
1097 | # These two change the sigil! ${a[@]} is now @a!
|
1098 | # a[@]
|
1099 | # a[*]
|
1100 | pass
|
1101 |
|
1102 | if node.prefix_op:
|
1103 | # len()
|
1104 | pass
|
1105 | if node.suffix_op:
|
1106 | pass
|
1107 |
|
1108 | op_id = node.name_tok.id
|
1109 | if op_id == Id.VSub_QMark:
|
1110 | self.cursor.PrintIncluding(node.name_tok)
|
1111 |
|
1112 | self.cursor.PrintIncluding(node.right)
|
1113 |
|
1114 | elif case(word_part_e.CommandSub):
|
1115 | node = cast(CommandSub, UP_node)
|
1116 |
|
1117 | if node.left_token.id == Id.Left_Backtick:
|
1118 | self.cursor.PrintUntil(node.left_token)
|
1119 | self.f.write('$(')
|
1120 | self.cursor.SkipPast(node.left_token)
|
1121 |
|
1122 | self.DoCommand(node.child, local_symbols)
|
1123 |
|
1124 | # Skip over right `
|
1125 | self.cursor.SkipPast(node.right)
|
1126 | self.f.write(')')
|
1127 |
|
1128 | else:
|
1129 | self.cursor.PrintIncluding(node.right)
|
1130 |
|
1131 | else:
|
1132 | pass
|