OILS / tools / ysh_ify.py View on Github | oils.pub

1321 lines, 698 significant
1from __future__ import print_function
2"""
3ysh_ify.py: Roughly translate OSH to YSH. Doesn't respect semantics.
4
5ESSENTIAL
6
7Command:
8
9 then/fi, do/done -> { }
10
11 new case statement
12
13 f() { } -> proc f { } (changes scope)
14
15 subshell -> forkwait, because () is taken
16 { } to fopen { }?
17
18 Approximate: var declaration:
19 local a=b -> var a = 'b', I think
20
21 <<EOF here docs to '''
22
23Word:
24 "$@" -> @ARGV
25
26 Not common: unquoted $x -> @[split(x)]
27
28LEGACY that I don't personally use
29
30Builtins:
31 [ -> test
32 . -> source
33
34Word:
35 backticks -> $() (I don't use this)
36 quote removal "$foo" -> $foo
37 brace removal ${foo} and "${foo}" -> $foo
38
39--tool format
40
41 fix indentation and spacing, like clang-format
42 can "lower" the LST to a rough representation with keywords / "first words",
43 { } ( ), and comments
44 - the "atoms" should not have newlines
45"""
46
47from _devbuild.gen.id_kind_asdl import Id, Id_str
48from _devbuild.gen.runtime_asdl import word_style_e, word_style_t
49from _devbuild.gen.syntax_asdl import (
50 loc,
51 CompoundWord,
52 Token,
53 SimpleVarSub,
54 BracedVarSub,
55 CommandSub,
56 DoubleQuoted,
57 SingleQuoted,
58 word_e,
59 word_t,
60 #word_str,
61 word_part,
62 word_part_e,
63 word_part_t,
64 rhs_word_e,
65 rhs_word_t,
66 sh_lhs,
67 sh_lhs_e,
68 command,
69 command_e,
70 #command_str,
71 BraceGroup,
72 for_iter_e,
73 case_arg_e,
74 case_arg,
75 condition_e,
76 redir_param,
77 redir_param_e,
78 Redir,
79 List_of_command,
80 ShFunction,
81)
82from asdl import runtime
83from core.error import p_die
84from display import ui
85from frontend import lexer
86from frontend import location
87from osh import word_
88from mycpp import mylib
89from mycpp.mylib import log, print_stderr, tagswitch
90
91from typing import Dict, cast, TYPE_CHECKING
92if TYPE_CHECKING:
93 from _devbuild.gen.syntax_asdl import command_t
94 from core import alloc
95
96_ = log
97
98
99class Cursor(object):
100 """
101 API to print/transform a complete source file, stored in a single arena.
102
103 In, core/alloc.py, SnipCodeBlock() and SnipCodeString work on lines. They
104 don't iterate over tokens.
105
106 Or add a separate hash table of Token -> span ID? That makes sense because
107 we need that kind of "address hash" for type checking anyway.
108
109 You use the hash table to go from next_token_id .. TokenId(until_token).
110 """
111
112 def __init__(self, arena, f):
113 # type: (alloc.Arena, mylib.Writer) -> None
114 self.arena = arena
115 self.f = f
116 self.next_span_id = 0
117
118 def _PrintUntilSpid(self, until_span_id):
119 # type: (int) -> None
120
121 # Sometimes we add +1
122 if until_span_id == runtime.NO_SPID:
123 assert 0, 'Missing span ID, got %d' % until_span_id
124
125 for span_id in xrange(self.next_span_id, until_span_id):
126 span = self.arena.GetToken(span_id)
127
128 # A span for Eof may not have a line when the file is completely empty.
129 if span.line is None:
130 continue
131
132 # Special case for recovering stripped leading space!
133 # See osh/word_compile.py
134 start_index = (0 if span.id == Id.Lit_CharsWithoutPrefix else
135 span.col)
136 end_index = span.col + span.length
137
138 piece = span.line.content[start_index:end_index]
139 self.f.write(piece)
140
141 self.next_span_id = until_span_id
142
143 def _SkipUntilSpid(self, next_span_id):
144 # type: (int) -> None
145 """Skip everything before next_span_id.
146
147 Printing will start at next_span_id
148 """
149 if (next_span_id == runtime.NO_SPID or
150 next_span_id == runtime.NO_SPID + 1):
151 assert 0, 'Missing span ID, got %d' % next_span_id
152 self.next_span_id = next_span_id
153
154 def SkipUntil(self, tok):
155 # type: (Token) -> None
156 span_id = self.arena.GetSpanId(tok)
157 self._SkipUntilSpid(span_id)
158
159 def SkipPast(self, tok):
160 # type: (Token) -> None
161 span_id = self.arena.GetSpanId(tok)
162 self._SkipUntilSpid(span_id + 1)
163
164 def PrintUntil(self, tok):
165 # type: (Token) -> None
166 span_id = self.arena.GetSpanId(tok)
167
168 # Test invariant
169 if mylib.PYTHON:
170 arena_tok = self.arena.GetToken(span_id)
171 if tok != arena_tok:
172 raise AssertionError(
173 '%s %d %d != %s %d %d' %
174 (tok, span_id, id(tok), arena_tok,
175 self.arena.GetSpanId(arena_tok), id(arena_tok)))
176
177 self._PrintUntilSpid(span_id)
178
179 def PrintIncluding(self, tok):
180 # type: (Token) -> None
181 span_id = self.arena.GetSpanId(tok)
182 self._PrintUntilSpid(span_id + 1)
183
184 def PrintUntilEnd(self):
185 # type: () -> None
186 self._PrintUntilSpid(self.arena.LastSpanId())
187
188
189def LosslessCat(arena):
190 # type: (alloc.Arena) -> None
191 """
192 For testing the lossless invariant: the tokens "add up" to the original
193 doc.
194 """
195 cursor = Cursor(arena, mylib.Stdout())
196 cursor.PrintUntilEnd()
197
198
199def PrintTokens(arena):
200 # type: (alloc.Arena) -> None
201 """Debugging tool to see tokens."""
202
203 if len(arena.tokens) == 1: # Special case for line_id == -1
204 print('Empty file with EOF token on invalid line:')
205 print('%s' % arena.tokens[0])
206 return
207
208 # TODO:
209 # - TSV8: end position, token type
210 # - then an option to print token text, as a J8 string
211 # - and then there can be a separate tool to number the columns
212 #
213 # - Do we also have JSON8 / HTM8 / TSV8 tokens?
214 # - And mini-languages like glob, etc.
215 for i, tok in enumerate(arena.tokens):
216 piece = tok.line.content[tok.col:tok.col + tok.length]
217 print('%5d %-20s %r' % (i, Id_str(tok.id, dot=False), piece))
218 print_stderr('(%d tokens)' % len(arena.tokens))
219
220
221def TreeFind(arena, node, errfmt):
222 # type: (alloc.Arena, command_t, ui.ErrorFormatter) -> None
223 """
224 Find various constructs in the tree. TODO: could this be like query
225 language?
226 TreeSitter has a query language for CSTs. But those are untyped, whereas
227 we are strongly typed.
228 """
229 fi = Finder(arena, errfmt)
230 fi.DoCommand(node)
231
232
233class Finder(object):
234 """
235 Walk an OSH command_t syntax tree, looking for certain constructs.
236
237 Note: it might be nice to have some kind of visitor, so we don't have to
238 repeat the traversal logic?
239
240 Or even better would be a homogeneous traversal, or query language.
241 """
242
243 def __init__(self, arena, errfmt):
244 # type: (alloc.Arena, ui.ErrorFormatter) -> None
245 self.arena = arena
246 self.errfmt = errfmt
247
248 def DoWordPart(self, p):
249 # type: (word_part_t) -> None
250 UP_p = p
251 with tagswitch(p) as case:
252 if case(word_part_e.Literal):
253 tok = cast(Token, UP_p)
254 if tok.id == Id.Lit_ArrayLhsOpen:
255 #log('*** %s', tok)
256 self.errfmt.Print_('BAD parse?', tok)
257 elif case(word_part_e.CommandSub):
258 p = cast(CommandSub, UP_p)
259 self.DoCommand(p.child)
260
261 def DoWord(self, w):
262 # type: (word_t) -> None
263 UP_w = w
264 with tagswitch(w) as case:
265 if case(word_e.Compound):
266 w = cast(CompoundWord, UP_w)
267 part0 = w.parts[0]
268 self.DoWordPart(part0)
269 #log('p %r', part0)
270 else:
271 #log('?Word? %s', word_str(w.tag()))
272 pass
273
274 def DoRhsWord(self, w):
275 # type: (rhs_word_t) -> None
276 UP_w = w
277 with tagswitch(w) as case:
278 if case(rhs_word_e.Compound):
279 w = cast(CompoundWord, UP_w)
280 self.DoWord(w)
281 elif case(rhs_word_e.Empty):
282 pass
283 else:
284 raise AssertionError()
285
286 def DoCommand(self, node):
287 # type: (command_t) -> None
288
289 UP_node = node
290 with tagswitch(node) as case:
291 if case(command_e.Simple):
292 node = cast(command.Simple, UP_node)
293 # Only first word has a[
294 if len(node.words):
295 self.DoWord(node.words[0])
296
297 elif case(command_e.Sentence):
298 node = cast(command.Sentence, UP_node)
299 self.DoCommand(node.child)
300
301 elif case(command_e.ShAssignment):
302 node = cast(command.ShAssignment, UP_node)
303 for pair in node.pairs:
304 # FYI
305 if 0:
306 if pair.left.id == Id.Lit_ArrayLhsOpen:
307 self.errfmt.Print_('OK', pair.left)
308
309 self.DoRhsWord(pair.rhs)
310
311 elif case(command_e.CommandList):
312 node = cast(command.CommandList, UP_node)
313 for child in node.children:
314 self.DoCommand(child)
315
316 elif case(command_e.Redirect):
317 node = cast(command.Redirect, UP_node)
318 self.DoCommand(node.child)
319
320 elif case(command_e.Pipeline):
321 node = cast(command.Pipeline, UP_node)
322 for child in node.children:
323 self.DoCommand(child)
324
325 elif case(command_e.AndOr):
326 node = cast(command.AndOr, UP_node)
327 for child in node.children:
328 self.DoCommand(child)
329
330 # This has to be different in the function case.
331 elif case(command_e.BraceGroup):
332 node = cast(BraceGroup, UP_node)
333 for child in node.children:
334 self.DoCommand(child)
335
336 elif case(command_e.Subshell):
337 node = cast(command.Subshell, UP_node)
338 self.DoCommand(node.child)
339
340 elif case(command_e.ShFunction):
341 node = cast(ShFunction, UP_node)
342 self.DoCommand(node.body)
343
344 elif case(command_e.DoGroup):
345 node = cast(command.DoGroup, UP_node)
346 for child in node.children:
347 self.DoCommand(child)
348
349 elif case(command_e.ForEach):
350 node = cast(command.ForEach, UP_node)
351 self.DoCommand(node.body)
352
353 elif case(command_e.WhileUntil):
354 node = cast(command.WhileUntil, UP_node)
355 # TODO: cond
356 self.DoCommand(node.body)
357
358 elif case(command_e.If):
359 node = cast(command.If, UP_node)
360
361 for i, arm in enumerate(node.arms):
362 # TODO: cond
363 for child in arm.action:
364 self.DoCommand(child)
365
366 # else -> } else {
367 if len(node.else_action):
368 for child in node.else_action:
369 self.DoCommand(child)
370
371 elif case(command_e.Case):
372 node = cast(command.Case, UP_node)
373
374 for case_arm in node.arms:
375 for child in case_arm.action:
376 self.DoCommand(child)
377
378 elif case(command_e.TimeBlock):
379 node = cast(command.TimeBlock, UP_node)
380 self.DoCommand(node.pipeline)
381
382 elif case(command_e.DParen):
383 node = cast(command.DParen, UP_node)
384 # TODO: arith expressions can words with command subs
385 pass
386
387 elif case(command_e.DBracket):
388 node = cast(command.DBracket, UP_node)
389
390 # TODO: bool_expr_t can have words with command subs
391 pass
392
393 else:
394 #log('?Command? %s', command_str(node.tag()))
395 pass
396
397 #cursor = Cursor(arena, mylib.Stdout())
398 #cursor.PrintUntilEnd()
399
400
401def Ysh_ify(arena, node):
402 # type: (alloc.Arena, command_t) -> None
403 cursor = Cursor(arena, mylib.Stdout())
404 fixer = YshPrinter(cursor, arena, mylib.Stdout())
405 fixer.DoCommand(node, None, at_top_level=True) # no local symbols yet
406 fixer.End()
407
408
409# PROBLEM: ~ substitution. That is disabled by "".
410# You can turn it into $HOME I guess
411# const foo = "$HOME/src"
412# const foo = %( ~/src )[0] # does this make sense?
413
414
415def _GetRhsStyle(w):
416 # type: (rhs_word_t) -> word_style_t
417 """Determine what style an assignment should use. '' or "", or an
418 expression.
419
420 SQ foo= setglobal foo = ''
421 SQ foo='' setglobal foo = ''
422 DQ foo="" setglobal foo = "" # Or we could normalize it if no subs?
423 DQ foo="" setglobal foo = "" # Or we could normalize it if no subs?
424
425 # Need these too.
426 # Or honestly should C strings be the default? And then raw strings are
427 # optional? Because most usages of \n and \0 can turn into Oil?
428 # Yeah I want the default to be statically parseable, so we subvert the \t
429 # and \n of command line tools?
430 # As long as we are fully analyzing the strings, we might as well go all the
431 # way!
432 # I think I need a PartialStaticEval() to paper over this.
433 #
434 # The main issue is regex and globs, because they use escape for a different
435 # purpose. I think just do
436 # grep r'foo\tbar' or something.
437
438 C_SQ foo=$'\n' setglobal foo = C'\n'
439 C_DQ foo=$'\n'"$bar" setglobal foo = C"\n$(bar)"
440
441 Expr path=${1:-} setglobal path = $1 or ''
442 Expr host=${2:-$(hostname)} setglobal host = $2 or $[hostname]
443
444 What's the difference between Expr and Unquoted? I think they're the same/
445 """
446 # Actually splitting NEVER HAPPENS ON ASSIGNMENT. LEAVE IT OFF.
447
448 UP_w = w
449 with tagswitch(w) as case:
450 if case(rhs_word_e.Empty):
451 return word_style_e.SQ
452
453 elif case(rhs_word_e.Compound):
454 w = cast(CompoundWord, UP_w)
455 if len(w.parts) == 0:
456 raise AssertionError(w)
457
458 elif len(w.parts) == 1:
459 part0 = w.parts[0]
460 UP_part0 = part0
461 with tagswitch(part0) as case:
462 # VAR_SUBS
463 if case(word_part_e.TildeSub):
464 # x=~andy/src
465 # -> setvar x = homedir('andy') + '/src'
466 return word_style_e.Expr
467
468 elif case(word_part_e.Literal):
469 # local x=y
470 # -> var x = 'y'
471 return word_style_e.SQ
472
473 elif case(word_part_e.SimpleVarSub):
474 # local x=$myvar
475 # -> var x = "$myvar"
476 # or var x = ${myvar}
477 # or var x = myvar
478 return word_style_e.DQ
479
480 elif case(word_part_e.BracedVarSub, word_part_e.CommandSub,
481 word_part_e.ArithSub):
482 # x=$(hostname)
483 # -> setvar x = $(hostname)
484 return word_style_e.Unquoted
485
486 elif case(word_part_e.DoubleQuoted):
487 part0 = cast(DoubleQuoted, UP_part0)
488
489 # TODO: remove quotes in single part like "$(hostname)" -> $(hostname)
490 return word_style_e.DQ
491
492 else:
493 # multiple parts use YSTR in general?
494 # Depends if there are subs
495 return word_style_e.DQ
496
497 # Default
498 return word_style_e.SQ
499
500
501class YshPrinter(object):
502 """Prettify OSH to YSH."""
503
504 def __init__(self, cursor, arena, f):
505 # type: (Cursor, alloc.Arena, mylib.Writer) -> None
506 self.cursor = cursor
507 self.arena = arena
508 self.f = f
509
510 def _DebugSpid(self, spid):
511 # type: (int) -> None
512 span = self.arena.GetToken(spid)
513 s = span.line.content[span.col:span.col + span.length]
514 print_stderr('SPID %d = %r' % (spid, s))
515
516 def End(self):
517 # type: () -> None
518 """Make sure we print until the end of the file."""
519 self.cursor.PrintUntilEnd()
520
521 def DoRedirect(self, node, local_symbols):
522 # type: (Redir, Dict[str, bool]) -> None
523 """
524 Change here docs to <<< '''
525 """
526 #print(node, file=sys.stderr)
527 op_id = node.op.id
528 self.cursor.PrintUntil(node.op)
529
530 if node.arg.tag() == redir_param_e.HereDoc:
531 here_doc = cast(redir_param.HereDoc, node.arg)
532
533 here_begin = here_doc.here_begin
534 ok, delimiter, delim_quoted = word_.StaticEval(here_begin)
535 if not ok:
536 p_die('Invalid here doc delimiter', loc.Word(here_begin))
537
538 # Turn everything into <<<. We just change the quotes
539 self.f.write('<<<')
540
541 if delim_quoted:
542 self.f.write(" '''")
543 else:
544 self.f.write(' """')
545
546 delim_end_tok = location.RightTokenForWord(here_begin)
547 self.cursor.SkipPast(delim_end_tok)
548
549 # Now print the lines. TODO: Have a flag to indent these to the level of
550 # the owning command, e.g.
551 # cat <<EOF
552 # EOF
553 # Or since most here docs are the top level, you could just have a hack
554 # for a fixed indent? TODO: Look at real use cases.
555 for part in here_doc.stdin_parts:
556 self.DoWordPart(part, local_symbols)
557
558 self.cursor.SkipPast(here_doc.here_end_tok)
559 if delim_quoted:
560 self.f.write("'''\n")
561 else:
562 self.f.write('"""\n')
563
564 else:
565 pass
566
567 # cat << EOF
568 # hello $name
569 # EOF
570 # cat <<< """
571 # hello $name
572 # """
573
574 # cat << 'EOF'
575 # no expansion
576 # EOF
577
578 # cat <<< '''
579 # no expansion
580 # '''
581
582 def DoShAssignment(self, node, at_top_level, local_symbols):
583 # type: (command.ShAssignment, bool, Dict[str, bool]) -> None
584 """
585 local_symbols:
586 - Add every 'local' declaration to it
587 - problem: what if you have local in an "if" ?
588 - we could treat it like nested scope and see what happens? Do any
589 programs have a problem with it?
590 case/if/for/while/BraceGroup all define scopes or what?
591 You don't want inconsistency of variables that could be defined at
592 any point.
593 - or maybe you only need it within "if / case" ? Well I guess
594 for/while can break out of the loop and cause problems. A break is
595 an "if".
596
597 - for subsequent
598 """
599 # Change RHS to expression language. Bare words not allowed. foo -> 'foo'
600
601 has_rhs = False # TODO: Should be on a per-variable basis.
602 # local a=b c=d, or just punt on those
603 defined_locally = False # is it a local variable in this function?
604 # can't tell if global
605
606 if True:
607 self.cursor.PrintUntil(node.pairs[0].left)
608
609 # For now, just detect whether the FIRST assignment on the line has been
610 # declared locally. We might want to split every line into separate
611 # statements.
612 if local_symbols is not None:
613 lhs0 = node.pairs[0].lhs
614 #if lhs0.tag() == sh_lhs_e.Name and lhs0.name in local_symbols:
615 # defined_locally = True
616
617 #print("CHECKING NAME", lhs0.name, defined_locally, local_symbols)
618
619 # TODO: Avoid translating these
620 has_array_index = [
621 pair.lhs.tag() == sh_lhs_e.UnparsedIndex for pair in node.pairs
622 ]
623
624 # need semantic analysis.
625 # Would be nice to assume that it's a local though.
626 if at_top_level:
627 self.f.write('setvar ')
628 elif defined_locally:
629 self.f.write('set ')
630 #self.f.write('[local mutated]')
631 else:
632 # We're in a function, but it's not defined locally, so we must be
633 # mutating a global.
634 self.f.write('setvar ')
635
636 # foo=bar spam=eggs -> foo = 'bar', spam = 'eggs'
637 n = len(node.pairs)
638 for i, pair in enumerate(node.pairs):
639 lhs = pair.lhs
640 UP_lhs = lhs
641 with tagswitch(lhs) as case:
642 if case(sh_lhs_e.Name):
643 lhs = cast(sh_lhs.Name, UP_lhs)
644
645 self.cursor.PrintUntil(pair.left)
646 # Assume skipping over one Lit_VarLike token
647 self.cursor.SkipPast(pair.left)
648
649 # Replace name. I guess it's Lit_Chars.
650 self.f.write(lhs.name)
651 self.f.write(' = ')
652
653 # TODO: This should be translated from Empty.
654 if pair.rhs.tag() == rhs_word_e.Empty:
655 self.f.write("''") # local i -> var i = ''
656 else:
657 self.DoRhsWord(pair.rhs, local_symbols)
658
659 elif case(sh_lhs_e.UnparsedIndex):
660 # --one-pass-parse gives us this node, instead of IndexedName
661 pass
662
663 else:
664 raise AssertionError(pair.lhs.__class__.__name__)
665
666 if i != n - 1:
667 self.f.write(',')
668
669 def _DoSimple(self, node, local_symbols):
670 # type: (command.Simple, Dict[str, bool]) -> None
671
672 # How to preserve spaces between words? Do you want to do it?
673 # Well you need to test this:
674 #
675 # echo foo \
676 # bar
677
678 if len(node.more_env):
679 # We only need to transform the right side, not left side.
680 for pair in node.more_env:
681 self.DoRhsWord(pair.val, local_symbols)
682
683 if len(node.words):
684 first_word = node.words[0]
685 ok, val, quoted = word_.StaticEval(first_word)
686 word0_tok = location.LeftTokenForWord(first_word)
687 if ok and not quoted:
688 if val == '[' and len(node.words) >= 3:
689 word2 = node.words[-2]
690 last_word = node.words[-1]
691
692 # Check if last word is ]
693 ok, val, quoted = word_.StaticEval(last_word)
694 if ok and not quoted and val == ']':
695 # Replace [ with 'test'
696 self.cursor.PrintUntil(word0_tok)
697 self.cursor.SkipPast(word0_tok)
698 self.f.write('test')
699
700 for w in node.words[1:-1]:
701 self.DoWordInCommand(w, local_symbols)
702
703 # Now omit ]
704 tok2 = location.RightTokenForWord(word2)
705 rbrack_tok = location.LeftTokenForWord(last_word)
706
707 # Skip the space token before ]
708 self.cursor.PrintIncluding(tok2)
709 # ] takes one spid
710 self.cursor.SkipPast(rbrack_tok)
711 return
712 else:
713 raise RuntimeError('Got [ without ]')
714
715 elif val == '.':
716 self.cursor.PrintUntil(word0_tok)
717 self.cursor.SkipPast(word0_tok)
718 self.f.write('source')
719 return
720
721 for w in node.words:
722 self.DoWordInCommand(w, local_symbols)
723
724 # TODO: Print the terminator. Could be \n or ;
725 # Need to print env like PYTHONPATH = 'foo' && ls
726 # Need to print redirects:
727 # < > are the same. << is here string, and >> is assignment.
728 # append is >+
729
730 # TODO: static_eval of simple command
731 # - [ -> "test". Eliminate trailing ].
732 # - . -> source, etc.
733
734 def DoCommand(self, node, local_symbols, at_top_level=False):
735 # type: (command_t, Dict[str, bool], bool) -> None
736
737 UP_node = node
738
739 with tagswitch(node) as case:
740 if case(command_e.CommandList):
741 node = cast(command.CommandList, UP_node)
742
743 # TODO: How to distinguish between echo hi; echo bye; and on
744 # separate lines
745 for child in node.children:
746 self.DoCommand(child,
747 local_symbols,
748 at_top_level=at_top_level)
749
750 elif case(command_e.Redirect):
751 node = cast(command.Redirect, UP_node)
752
753 self.DoCommand(node.child,
754 local_symbols,
755 at_top_level=at_top_level)
756 for r in node.redirects:
757 self.DoRedirect(r, local_symbols)
758
759 elif case(command_e.Simple):
760 node = cast(command.Simple, UP_node)
761
762 self._DoSimple(node, local_symbols)
763
764 if node.redirects is not None:
765 for r in node.redirects:
766 self.DoRedirect(r, local_symbols)
767
768 elif case(command_e.ShAssignment):
769 node = cast(command.ShAssignment, UP_node)
770
771 self.DoShAssignment(node, at_top_level, local_symbols)
772
773 elif case(command_e.Pipeline):
774 node = cast(command.Pipeline, UP_node)
775
776 for child in node.children:
777 self.DoCommand(child, local_symbols)
778
779 elif case(command_e.AndOr):
780 node = cast(command.AndOr, UP_node)
781
782 for child in node.children:
783 self.DoCommand(child, local_symbols)
784
785 elif case(command_e.Sentence):
786 node = cast(command.Sentence, UP_node)
787
788 # 'ls &' to 'fork ls'
789 # Keep ; the same.
790 self.DoCommand(node.child, local_symbols)
791
792 # This has to be different in the function case.
793 elif case(command_e.BraceGroup):
794 node = cast(BraceGroup, UP_node)
795
796 # { echo hi; } -> do { echo hi }
797 # For now it might be OK to keep 'do { echo hi; }
798 self.cursor.PrintUntil(node.left)
799 self.cursor.SkipPast(node.left)
800 self.f.write('do {')
801
802 for child in node.children:
803 self.DoCommand(child, local_symbols)
804
805 elif case(command_e.Subshell):
806 node = cast(command.Subshell, UP_node)
807
808 # (echo hi) -> shell echo hi
809 # (echo hi; echo bye) -> shell {echo hi; echo bye}
810
811 self.cursor.PrintUntil(node.left)
812 self.cursor.SkipPast(node.left)
813 self.f.write('shell {')
814
815 self.DoCommand(node.child, local_symbols)
816
817 #self._DebugSpid(right_spid)
818 #self._DebugSpid(right_spid + 1)
819
820 #print('RIGHT SPID', right_spid)
821 self.cursor.PrintUntil(node.right)
822 self.cursor.SkipPast(node.right)
823 self.f.write('}')
824
825 elif case(command_e.ShFunction):
826 node = cast(ShFunction, UP_node)
827
828 # TODO: skip name
829 #self.f.write('proc %s' % node.name)
830
831 # New symbol table for every function.
832 new_local_symbols = {} # type: Dict[str, bool]
833
834 # Should be the left most span, including 'function'
835 if node.keyword: # function foo { ...
836 self.cursor.PrintUntil(node.keyword)
837 else: # foo() { ...
838 self.cursor.PrintUntil(node.name_tok)
839
840 self.f.write('proc %s ' % node.name)
841
842 UP_body = node.body
843 with tagswitch(UP_body) as case:
844 if case(command_e.BraceGroup):
845 body = cast(BraceGroup, UP_body)
846 self.cursor.SkipUntil(body.left)
847
848 # Don't add "do" like a standalone brace group. Just use {}.
849 for child in body.children:
850 self.DoCommand(child, new_local_symbols)
851 else:
852 # very rare cases like f() ( subshell )
853 pass
854
855 elif case(command_e.DoGroup):
856 node = cast(command.DoGroup, UP_node)
857
858 self.cursor.PrintUntil(node.left)
859 self.cursor.SkipPast(node.left)
860 self.f.write('{')
861
862 for child in node.children:
863 self.DoCommand(child, local_symbols)
864
865 self.cursor.PrintUntil(node.right)
866 self.cursor.SkipPast(node.right)
867 self.f.write('}')
868
869 elif case(command_e.ForEach):
870 node = cast(command.ForEach, UP_node)
871
872 # Need to preserve spaces between words, because there can be line
873 # wrapping.
874 # for x in a b c \
875 # d e f; do
876
877 UP_iterable = node.iterable
878 with tagswitch(node.iterable) as case:
879 if case(for_iter_e.Args):
880 self.f.write('for %s in @ARGV ' % node.iter_names[0])
881
882 # note: command_t doesn't have .spids
883 body_tok = location.TokenForCommand(node.body)
884 self.cursor.SkipUntil(body_tok)
885
886 elif case(for_iter_e.Words):
887 pass
888
889 elif case(for_iter_e.YshExpr):
890 pass
891
892 if node.semi_tok is not None:
893 self.cursor.PrintUntil(node.semi_tok)
894 self.cursor.SkipPast(node.semi_tok)
895
896 self.DoCommand(node.body, local_symbols)
897
898 elif case(command_e.WhileUntil):
899 node = cast(command.WhileUntil, UP_node)
900
901 # Skip 'until', and replace it with 'while not'
902 if node.keyword.id == Id.KW_Until:
903 self.cursor.PrintUntil(node.keyword)
904 self.cursor.SkipPast(node.keyword)
905 self.f.write('while !')
906
907 if node.cond.tag() == condition_e.Shell:
908 commands = cast(List_of_command, node.cond)
909 # Skip the semi-colon in the condition, which is usually a Sentence
910 if (len(commands) == 1 and
911 commands[0].tag() == command_e.Sentence):
912 sentence = cast(command.Sentence, commands[0])
913 self.DoCommand(sentence.child, local_symbols)
914 self.cursor.SkipPast(sentence.terminator)
915
916 self.DoCommand(node.body, local_symbols)
917
918 elif case(command_e.If):
919 node = cast(command.If, UP_node)
920
921 # if foo; then -> if foo {
922 # elif foo; then -> } elif foo {
923 for i, arm in enumerate(node.arms):
924 elif_tok = arm.keyword
925 then_tok = arm.then_tok
926
927 if i != 0: # 'if' not 'elif' on the first arm
928 self.cursor.PrintUntil(elif_tok)
929 self.f.write('} ')
930
931 cond = arm.cond
932 if cond.tag() == condition_e.Shell:
933 commands = cast(List_of_command, cond)
934 if (len(commands) == 1 and
935 commands[0].tag() == command_e.Sentence):
936 sentence = cast(command.Sentence, commands[0])
937 self.DoCommand(sentence, local_symbols)
938
939 # Remove semi-colon
940 self.cursor.PrintUntil(sentence.terminator)
941 self.cursor.SkipPast(sentence.terminator)
942 else:
943 for child in commands:
944 self.DoCommand(child, local_symbols)
945
946 self.cursor.PrintUntil(then_tok)
947 self.cursor.SkipPast(then_tok)
948 self.f.write('{')
949
950 for child in arm.action:
951 self.DoCommand(child, local_symbols)
952
953 # else -> } else {
954 if len(node.else_action):
955 self.cursor.PrintUntil(node.else_kw)
956 self.f.write('} ')
957 self.cursor.PrintIncluding(node.else_kw)
958 self.f.write(' {')
959
960 for child in node.else_action:
961 self.DoCommand(child, local_symbols)
962
963 # fi -> }
964 self.cursor.PrintUntil(node.fi_kw)
965 self.cursor.SkipPast(node.fi_kw)
966 self.f.write('}')
967
968 elif case(command_e.Case):
969 node = cast(command.Case, UP_node)
970
971 to_match = None # type: word_t
972 with tagswitch(node.to_match) as case:
973 if case(case_arg_e.YshExpr):
974 return
975 elif case(case_arg_e.Word):
976 to_match = cast(case_arg.Word, node.to_match).w
977 else:
978 raise AssertionError()
979
980 self.cursor.PrintIncluding(node.case_kw)
981
982 # Figure out the variable name, so we can translate
983 # - $var to (var)
984 # - "$var" to (var)
985 var_part = None # type: SimpleVarSub
986 with tagswitch(to_match) as case:
987 if case(word_e.Compound):
988 w = cast(CompoundWord, to_match)
989 part0 = w.parts[0]
990
991 with tagswitch(part0) as case2:
992 if case2(word_part_e.SimpleVarSub):
993 var_part = cast(SimpleVarSub, part0)
994
995 elif case2(word_part_e.DoubleQuoted):
996 dq_part = cast(DoubleQuoted, part0)
997 if len(dq_part.parts) == 1:
998 dq_part0 = dq_part.parts[0]
999
1000 # Nesting is annoying -- it would be nice to use pattern
1001 # matching, but mycpp won't like it.
1002 # TODO: extract into a common function
1003 with tagswitch(dq_part0) as case3:
1004 if case3(word_part_e.SimpleVarSub):
1005 var_part = cast(
1006 SimpleVarSub, dq_part0)
1007 #log("VAR PART %s", var_part)
1008
1009 if var_part:
1010 self.f.write(' (')
1011 self.f.write(lexer.LazyStr(var_part.tok))
1012 self.f.write(') ')
1013
1014 self.cursor.SkipPast(node.arms_start) # Skip past 'in'
1015 self.f.write('{')
1016
1017 missing_last_dsemi = False
1018
1019 for case_arm in node.arms:
1020 # Replace ) with {
1021 self.cursor.PrintUntil(case_arm.middle)
1022 self.f.write(' {')
1023 self.cursor.SkipPast(case_arm.middle)
1024
1025 for child in case_arm.action:
1026 self.DoCommand(child, local_symbols)
1027
1028 if case_arm.right:
1029 # Change ;; to }
1030 self.cursor.PrintUntil(case_arm.right)
1031 self.f.write('}')
1032 self.cursor.SkipPast(case_arm.right)
1033 else:
1034 # valid: case $x in pat) echo hi ; esac
1035 missing_last_dsemi = True
1036
1037 self.cursor.PrintUntil(node.arms_end) # 'esac' or }
1038
1039 if missing_last_dsemi: # Print } for missing ;;
1040 self.f.write('}\n')
1041
1042 self.cursor.SkipPast(node.arms_end) # 'esac' or }
1043
1044 self.f.write('}') # in place of 'esac'
1045
1046 elif case(command_e.TimeBlock):
1047 node = cast(command.TimeBlock, UP_node)
1048
1049 self.DoCommand(node.pipeline, local_symbols)
1050
1051 elif case(command_e.DParen):
1052 node = cast(command.DParen, UP_node)
1053 # TODO: arith expressions can words with command subs
1054 pass
1055
1056 elif case(command_e.DBracket):
1057 node = cast(command.DBracket, UP_node)
1058
1059 # TODO: bool_expr_t can have words with command subs
1060 pass
1061
1062 else:
1063 pass
1064 #log('Command not handled: %s', node)
1065 #raise AssertionError(node.__class__.__name__)
1066
1067 def DoRhsWord(self, node, local_symbols):
1068 # type: (rhs_word_t, Dict[str, bool]) -> None
1069 """For the RHS of assignments.
1070
1071 TODO: for complex cases of word joining:
1072 local a=unquoted'single'"double"'"'
1073
1074 We can try to handle it:
1075 var a = y"unquotedsingledouble\""
1076
1077 Or simply abort and LEAVE IT ALONE. We should only translate things we
1078 recognize.
1079 """
1080 UP_node = node
1081 with tagswitch(node) as case:
1082 if case(rhs_word_e.Empty):
1083 self.f.write("''")
1084
1085 elif case(rhs_word_e.Compound):
1086 node = cast(CompoundWord, UP_node)
1087
1088 # TODO: This is wrong!
1089 style = _GetRhsStyle(node)
1090 if style == word_style_e.SQ:
1091 self.f.write("'")
1092 self.DoWordInCommand(node, local_symbols)
1093 self.f.write("'")
1094 elif style == word_style_e.DQ:
1095 self.f.write('"')
1096 self.DoWordInCommand(node, local_symbols)
1097 self.f.write('"')
1098 # TODO: Put these back
1099 #elif style == word_style_e.Expr:
1100 # pass
1101 #elif style == word_style_e.Unquoted:
1102 # pass
1103 else:
1104 # "${foo:-default}" -> foo or 'default'
1105 # ${foo:-default} -> @split(foo or 'default')
1106 # @(foo or 'default') -- implicit split.
1107
1108 if word_.IsVarSub(node): # ${1} or "$1"
1109 # Do it in expression mode
1110 pass
1111 # NOTE: ArithSub with $(1 +2 ) is different than 1 + 2 because of
1112 # conversion to string.
1113
1114 # For now, just stub it out
1115 self.DoWordInCommand(node, local_symbols)
1116
1117 def DoWordInCommand(self, node, local_symbols):
1118 # type: (word_t, Dict[str, bool]) -> None
1119 """E.g. remove unquoted.
1120
1121 echo "$x" -> echo $x
1122 """
1123 UP_node = node
1124
1125 with tagswitch(node) as case:
1126 if case(word_e.Compound):
1127 node = cast(CompoundWord, UP_node)
1128
1129 # UNQUOTE simple var subs
1130
1131 # Special case for "$@".
1132 # TODO:
1133 # "$foo" -> $foo
1134 # "${foo}" -> $foo
1135
1136 if (len(node.parts) == 1 and
1137 node.parts[0].tag() == word_part_e.DoubleQuoted):
1138 dq_part = cast(DoubleQuoted, node.parts[0])
1139
1140 # NOTE: In double quoted case, this is the begin and end quote.
1141 # Do we need a HereDoc part?
1142
1143 if len(dq_part.parts) == 1:
1144 part0 = dq_part.parts[0]
1145 if part0.tag() == word_part_e.SimpleVarSub:
1146 vsub_part = cast(SimpleVarSub, dq_part.parts[0])
1147 if vsub_part.tok.id == Id.VSub_At:
1148 self.cursor.PrintUntil(dq_part.left)
1149 self.cursor.SkipPast(
1150 dq_part.right) # " then $@ then "
1151 self.f.write('@ARGV')
1152 return # Done replacing
1153
1154 # "$1" -> $1, "$foo" -> $foo
1155 if vsub_part.tok.id in (Id.VSub_Number,
1156 Id.VSub_DollarName):
1157 self.cursor.PrintUntil(dq_part.left)
1158 self.cursor.SkipPast(dq_part.right)
1159 self.f.write(lexer.TokenVal(vsub_part.tok))
1160 return
1161
1162 # Single arith sub, command sub, etc.
1163 # On the other hand, an unquoted one needs to turn into
1164 #
1165 # $(echo one two) -> @[echo one two]
1166 # `echo one two` -> @[echo one two]
1167 #
1168 # ${var:-'the default'} -> @$(var or 'the default')
1169 #
1170 # $((1 + 2)) -> $(1 + 2) -- this is OK unquoted
1171
1172 elif part0.tag() == word_part_e.BracedVarSub:
1173 # Skip over quote
1174 self.cursor.PrintUntil(dq_part.left)
1175 self.cursor.SkipPast(dq_part.left)
1176 self.DoWordPart(part0, local_symbols)
1177 self.cursor.SkipPast(dq_part.right)
1178 return
1179
1180 elif part0.tag() == word_part_e.CommandSub:
1181 self.cursor.PrintUntil(dq_part.left)
1182 self.cursor.SkipPast(dq_part.left)
1183 self.DoWordPart(part0, local_symbols)
1184 self.cursor.SkipPast(dq_part.right)
1185 return
1186
1187 # TODO: 'foo'"bar" should be "foobar", etc.
1188 # If any part is double quoted, you can always double quote the whole
1189 # thing?
1190 for part in node.parts:
1191 self.DoWordPart(part, local_symbols)
1192
1193 elif case(word_e.BracedTree):
1194 # Not doing anything now
1195 pass
1196
1197 else:
1198 raise AssertionError(node.__class__.__name__)
1199
1200 def DoWordPart(self, node, local_symbols, quoted=False):
1201 # type: (word_part_t, Dict[str, bool], bool) -> None
1202
1203 left_tok = location.LeftTokenForWordPart(node)
1204 if left_tok:
1205 self.cursor.PrintUntil(left_tok)
1206
1207 UP_node = node
1208
1209 with tagswitch(node) as case:
1210 if case(word_part_e.YshArrayLiteral,
1211 word_part_e.InitializerLiteral, word_part_e.TildeSub,
1212 word_part_e.ExtGlob):
1213 pass
1214
1215 elif case(word_part_e.EscapedLiteral):
1216 node = cast(word_part.EscapedLiteral, UP_node)
1217 if quoted:
1218 pass
1219 else:
1220 # If unquoted \e, it should quoted instead. ' ' vs. \<invisible space>
1221 # Hm is this necessary though? I think the only motivation is changing
1222 # \{ and \( for macros. And ' ' to be readable/visible.
1223 t = node.token
1224 val = lexer.TokenSliceLeft(t, 1)
1225 assert len(val) == 1, val
1226 if val != '\n':
1227 self.cursor.PrintUntil(t)
1228 self.cursor.SkipPast(t)
1229 self.f.write("'%s'" % val)
1230
1231 elif case(word_part_e.Literal):
1232 node = cast(Token, UP_node)
1233 self.cursor.PrintIncluding(node)
1234
1235 elif case(word_part_e.SingleQuoted):
1236 node = cast(SingleQuoted, UP_node)
1237
1238 # TODO:
1239 # '\n' is '\\n'
1240 # $'\n' is '\n'
1241 # TODO: Should print until right_spid
1242 # left_spid, right_spid = node.spids
1243 self.cursor.PrintUntil(node.right)
1244
1245 elif case(word_part_e.DoubleQuoted):
1246 node = cast(DoubleQuoted, UP_node)
1247 for part in node.parts:
1248 self.DoWordPart(part, local_symbols, quoted=True)
1249
1250 elif case(word_part_e.SimpleVarSub):
1251 node = cast(SimpleVarSub, UP_node)
1252
1253 op_id = node.tok.id
1254
1255 if op_id == Id.VSub_DollarName:
1256 self.cursor.PrintIncluding(node.tok)
1257
1258 elif op_id == Id.VSub_Number:
1259 self.cursor.PrintIncluding(node.tok)
1260
1261 elif op_id == Id.VSub_At: # $@ -- handled quoted case above
1262 self.f.write('$[join(ARGV)]')
1263 self.cursor.SkipPast(node.tok)
1264
1265 elif op_id == Id.VSub_Star: # $*
1266 # PEDANTIC: Depends if quoted or unquoted
1267 self.f.write('$[join(ARGV)]')
1268 self.cursor.SkipPast(node.tok)
1269
1270 elif op_id == Id.VSub_Pound: # $#
1271 # len(ARGV) ?
1272 self.f.write('$Argc')
1273 self.cursor.SkipPast(node.tok)
1274
1275 else:
1276 pass
1277
1278 elif case(word_part_e.BracedVarSub):
1279 node = cast(BracedVarSub, UP_node)
1280
1281 # NOTE: Why do we need this but we don't need it in command sub?
1282 self.cursor.PrintUntil(node.left)
1283
1284 if node.bracket_op:
1285 # a[1]
1286 # These two change the sigil! ${a[@]} is now @a!
1287 # a[@]
1288 # a[*]
1289 pass
1290
1291 if node.prefix_op:
1292 # len()
1293 pass
1294 if node.suffix_op:
1295 pass
1296
1297 op_id = node.name_tok.id
1298 if op_id == Id.VSub_QMark:
1299 self.cursor.PrintIncluding(node.name_tok)
1300
1301 self.cursor.PrintIncluding(node.right)
1302
1303 elif case(word_part_e.CommandSub):
1304 node = cast(CommandSub, UP_node)
1305
1306 if node.left_token.id == Id.Left_Backtick:
1307 self.cursor.PrintUntil(node.left_token)
1308 self.f.write('$(')
1309 self.cursor.SkipPast(node.left_token)
1310
1311 self.DoCommand(node.child, local_symbols)
1312
1313 # Skip over right `
1314 self.cursor.SkipPast(node.right)
1315 self.f.write(')')
1316
1317 else:
1318 self.cursor.PrintIncluding(node.right)
1319
1320 else:
1321 pass