OILS / tools / ysh_ify.py View on Github | oils.pub

1316 lines, 694 significant
1from __future__ import print_function
2"""
3ysh_ify.py: Roughly translate OSH to YSH. Doesn't respect semantics.
4
5ESSENTIAL
6
7Command:
8
9 then/fi, do/done -> { }
10
11 new case statement
12
13 f() { } -> proc f { } (changes scope)
14
15 subshell -> forkwait, because () is taken
16 { } to fopen { }?
17
18 Approximate: var declaration:
19 local a=b -> var a = 'b', I think
20
21 <<EOF here docs to '''
22
23Word:
24 "$@" -> @ARGV
25
26 Not common: unquoted $x -> @[split(x)]
27
28LEGACY that I don't personally use
29
30Builtins:
31 [ -> test
32 . -> source
33
34Word:
35 backticks -> $() (I don't use this)
36 quote removal "$foo" -> $foo
37 brace removal ${foo} and "${foo}" -> $foo
38
39--tool format
40
41 fix indentation and spacing, like clang-format
42 can "lower" the LST to a rough representation with keywords / "first words",
43 { } ( ), and comments
44 - the "atoms" should not have newlines
45"""
46
47from _devbuild.gen.id_kind_asdl import Id, Id_str
48from _devbuild.gen.runtime_asdl import word_style_e, word_style_t
49from _devbuild.gen.syntax_asdl import (
50 loc,
51 CompoundWord,
52 Token,
53 SimpleVarSub,
54 BracedVarSub,
55 CommandSub,
56 DoubleQuoted,
57 SingleQuoted,
58 word_e,
59 word_t,
60 #word_str,
61 word_part,
62 word_part_e,
63 word_part_t,
64 rhs_word_e,
65 rhs_word_t,
66 sh_lhs,
67 sh_lhs_e,
68 command,
69 command_e,
70 #command_str,
71 BraceGroup,
72 for_iter_e,
73 case_arg_e,
74 case_arg,
75 condition_e,
76 redir_param,
77 redir_param_e,
78 Redir,
79 List_of_command,
80)
81from asdl import runtime
82from core.error import p_die
83from display import ui
84from frontend import lexer
85from frontend import location
86from osh import word_
87from mycpp import mylib
88from mycpp.mylib import log, print_stderr, tagswitch
89
90from typing import Dict, cast, TYPE_CHECKING
91if TYPE_CHECKING:
92 from _devbuild.gen.syntax_asdl import command_t
93 from core import alloc
94
95_ = log
96
97
98class Cursor(object):
99 """
100 API to print/transform a complete source file, stored in a single arena.
101
102 In, core/alloc.py, SnipCodeBlock() and SnipCodeString work on lines. They
103 don't iterate over tokens.
104
105 Or add a separate hash table of Token -> span ID? That makes sense because
106 we need that kind of "address hash" for type checking anyway.
107
108 You use the hash table to go from next_token_id .. TokenId(until_token).
109 """
110
111 def __init__(self, arena, f):
112 # type: (alloc.Arena, mylib.Writer) -> None
113 self.arena = arena
114 self.f = f
115 self.next_span_id = 0
116
117 def _PrintUntilSpid(self, until_span_id):
118 # type: (int) -> None
119
120 # Sometimes we add +1
121 if until_span_id == runtime.NO_SPID:
122 assert 0, 'Missing span ID, got %d' % until_span_id
123
124 for span_id in xrange(self.next_span_id, until_span_id):
125 span = self.arena.GetToken(span_id)
126
127 # A span for Eof may not have a line when the file is completely empty.
128 if span.line is None:
129 continue
130
131 # Special case for recovering stripped leading space!
132 # See osh/word_compile.py
133 start_index = (0 if span.id == Id.Lit_CharsWithoutPrefix else
134 span.col)
135 end_index = span.col + span.length
136
137 piece = span.line.content[start_index:end_index]
138 self.f.write(piece)
139
140 self.next_span_id = until_span_id
141
142 def _SkipUntilSpid(self, next_span_id):
143 # type: (int) -> None
144 """Skip everything before next_span_id.
145
146 Printing will start at next_span_id
147 """
148 if (next_span_id == runtime.NO_SPID or
149 next_span_id == runtime.NO_SPID + 1):
150 assert 0, 'Missing span ID, got %d' % next_span_id
151 self.next_span_id = next_span_id
152
153 def SkipUntil(self, tok):
154 # type: (Token) -> None
155 span_id = self.arena.GetSpanId(tok)
156 self._SkipUntilSpid(span_id)
157
158 def SkipPast(self, tok):
159 # type: (Token) -> None
160 span_id = self.arena.GetSpanId(tok)
161 self._SkipUntilSpid(span_id + 1)
162
163 def PrintUntil(self, tok):
164 # type: (Token) -> None
165 span_id = self.arena.GetSpanId(tok)
166
167 # Test invariant
168 if mylib.PYTHON:
169 arena_tok = self.arena.GetToken(span_id)
170 if tok != arena_tok:
171 raise AssertionError(
172 '%s %d %d != %s %d %d' %
173 (tok, span_id, id(tok), arena_tok,
174 self.arena.GetSpanId(arena_tok), id(arena_tok)))
175
176 self._PrintUntilSpid(span_id)
177
178 def PrintIncluding(self, tok):
179 # type: (Token) -> None
180 span_id = self.arena.GetSpanId(tok)
181 self._PrintUntilSpid(span_id + 1)
182
183 def PrintUntilEnd(self):
184 # type: () -> None
185 self._PrintUntilSpid(self.arena.LastSpanId())
186
187
188def LosslessCat(arena):
189 # type: (alloc.Arena) -> None
190 """
191 For testing the lossless invariant: the tokens "add up" to the original
192 doc.
193 """
194 cursor = Cursor(arena, mylib.Stdout())
195 cursor.PrintUntilEnd()
196
197
198def PrintTokens(arena):
199 # type: (alloc.Arena) -> None
200 """Debugging tool to see tokens."""
201
202 if len(arena.tokens) == 1: # Special case for line_id == -1
203 print('Empty file with EOF token on invalid line:')
204 print('%s' % arena.tokens[0])
205 return
206
207 # TODO:
208 # - TSV8: end position, token type
209 # - then an option to print token text, as a J8 string
210 # - and then there can be a separate tool to number the columns
211 #
212 # - Do we also have JSON8 / HTM8 / TSV8 tokens?
213 # - And mini-languages like glob, etc.
214 for i, tok in enumerate(arena.tokens):
215 piece = tok.line.content[tok.col:tok.col + tok.length]
216 print('%5d %-20s %r' % (i, Id_str(tok.id, dot=False), piece))
217 print_stderr('(%d tokens)' % len(arena.tokens))
218
219
220def TreeFind(arena, node, errfmt):
221 # type: (alloc.Arena, command_t, ui.ErrorFormatter) -> None
222 """
223 Find various constructs in the tree. TODO: could this be like query
224 language?
225 TreeSitter has a query language for CSTs. But those are untyped, whereas
226 we are strongly typed.
227 """
228 fi = Finder(arena, errfmt)
229 fi.DoCommand(node)
230
231
232class Finder(object):
233 """
234 Walk an OSH command_t syntax tree, looking for certain constructs.
235
236 Note: it might be nice to have some kind of visitor, so we don't have to
237 repeat the traversal logic?
238
239 Or even better would be a homogeneous traversal, or query language.
240 """
241
242 def __init__(self, arena, errfmt):
243 # type: (alloc.Arena, ui.ErrorFormatter) -> None
244 self.arena = arena
245 self.errfmt = errfmt
246
247 def DoWordPart(self, p):
248 # type: (word_part_t) -> None
249 UP_p = p
250 with tagswitch(p) as case:
251 if case(word_part_e.Literal):
252 tok = cast(Token, UP_p)
253 if tok.id == Id.Lit_ArrayLhsOpen:
254 #log('*** %s', tok)
255 self.errfmt.Print_('BAD parse?', tok)
256 elif case(word_part_e.CommandSub):
257 p = cast(CommandSub, UP_p)
258 self.DoCommand(p.child)
259
260 def DoWord(self, w):
261 # type: (word_t) -> None
262 UP_w = w
263 with tagswitch(w) as case:
264 if case(word_e.Compound):
265 w = cast(CompoundWord, UP_w)
266 part0 = w.parts[0]
267 self.DoWordPart(part0)
268 #log('p %r', part0)
269 else:
270 #log('?Word? %s', word_str(w.tag()))
271 pass
272
273 def DoRhsWord(self, w):
274 # type: (rhs_word_t) -> None
275 UP_w = w
276 with tagswitch(w) as case:
277 if case(rhs_word_e.Compound):
278 w = cast(CompoundWord, UP_w)
279 self.DoWord(w)
280 elif case(rhs_word_e.Empty):
281 pass
282 else:
283 raise AssertionError()
284
285 def DoCommand(self, node):
286 # type: (command_t) -> None
287
288 UP_node = node
289 with tagswitch(node) as case:
290 if case(command_e.Simple):
291 node = cast(command.Simple, UP_node)
292 # Only first word has a[
293 if len(node.words):
294 self.DoWord(node.words[0])
295
296 elif case(command_e.Sentence):
297 node = cast(command.Sentence, UP_node)
298 self.DoCommand(node.child)
299
300 elif case(command_e.ShAssignment):
301 node = cast(command.ShAssignment, UP_node)
302 for pair in node.pairs:
303 # FYI
304 if 0:
305 if pair.left.id == Id.Lit_ArrayLhsOpen:
306 self.errfmt.Print_('OK', pair.left)
307
308 self.DoRhsWord(pair.rhs)
309
310 elif case(command_e.CommandList):
311 node = cast(command.CommandList, UP_node)
312 for child in node.children:
313 self.DoCommand(child)
314
315 elif case(command_e.Redirect):
316 node = cast(command.Redirect, UP_node)
317 self.DoCommand(node.child)
318
319 elif case(command_e.Pipeline):
320 node = cast(command.Pipeline, UP_node)
321 for child in node.children:
322 self.DoCommand(child)
323
324 elif case(command_e.AndOr):
325 node = cast(command.AndOr, UP_node)
326 for child in node.children:
327 self.DoCommand(child)
328
329 # This has to be different in the function case.
330 elif case(command_e.BraceGroup):
331 node = cast(BraceGroup, UP_node)
332 for child in node.children:
333 self.DoCommand(child)
334
335 elif case(command_e.Subshell):
336 node = cast(command.Subshell, UP_node)
337 self.DoCommand(node.child)
338
339 elif case(command_e.ShFunction):
340 node = cast(command.ShFunction, UP_node)
341 self.DoCommand(node.body)
342
343 elif case(command_e.DoGroup):
344 node = cast(command.DoGroup, UP_node)
345 for child in node.children:
346 self.DoCommand(child)
347
348 elif case(command_e.ForEach):
349 node = cast(command.ForEach, UP_node)
350 self.DoCommand(node.body)
351
352 elif case(command_e.WhileUntil):
353 node = cast(command.WhileUntil, UP_node)
354 # TODO: cond
355 self.DoCommand(node.body)
356
357 elif case(command_e.If):
358 node = cast(command.If, UP_node)
359
360 for i, arm in enumerate(node.arms):
361 # TODO: cond
362 for child in arm.action:
363 self.DoCommand(child)
364
365 # else -> } else {
366 if len(node.else_action):
367 for child in node.else_action:
368 self.DoCommand(child)
369
370 elif case(command_e.Case):
371 node = cast(command.Case, UP_node)
372
373 for case_arm in node.arms:
374 for child in case_arm.action:
375 self.DoCommand(child)
376
377 elif case(command_e.TimeBlock):
378 node = cast(command.TimeBlock, UP_node)
379 self.DoCommand(node.pipeline)
380
381 elif case(command_e.DParen):
382 node = cast(command.DParen, UP_node)
383 # TODO: arith expressions can words with command subs
384 pass
385
386 elif case(command_e.DBracket):
387 node = cast(command.DBracket, UP_node)
388
389 # TODO: bool_expr_t can have words with command subs
390 pass
391
392 else:
393 #log('?Command? %s', command_str(node.tag()))
394 pass
395
396 #cursor = Cursor(arena, mylib.Stdout())
397 #cursor.PrintUntilEnd()
398
399
400def Ysh_ify(arena, node):
401 # type: (alloc.Arena, command_t) -> None
402 cursor = Cursor(arena, mylib.Stdout())
403 fixer = YshPrinter(cursor, arena, mylib.Stdout())
404 fixer.DoCommand(node, None, at_top_level=True) # no local symbols yet
405 fixer.End()
406
407
408# PROBLEM: ~ substitution. That is disabled by "".
409# You can turn it into $HOME I guess
410# const foo = "$HOME/src"
411# const foo = %( ~/src )[0] # does this make sense?
412
413
414def _GetRhsStyle(w):
415 # type: (rhs_word_t) -> word_style_t
416 """Determine what style an assignment should use. '' or "", or an
417 expression.
418
419 SQ foo= setglobal foo = ''
420 SQ foo='' setglobal foo = ''
421 DQ foo="" setglobal foo = "" # Or we could normalize it if no subs?
422 DQ foo="" setglobal foo = "" # Or we could normalize it if no subs?
423
424 # Need these too.
425 # Or honestly should C strings be the default? And then raw strings are
426 # optional? Because most usages of \n and \0 can turn into Oil?
427 # Yeah I want the default to be statically parseable, so we subvert the \t
428 # and \n of command line tools?
429 # As long as we are fully analyzing the strings, we might as well go all the
430 # way!
431 # I think I need a PartialStaticEval() to paper over this.
432 #
433 # The main issue is regex and globs, because they use escape for a different
434 # purpose. I think just do
435 # grep r'foo\tbar' or something.
436
437 C_SQ foo=$'\n' setglobal foo = C'\n'
438 C_DQ foo=$'\n'"$bar" setglobal foo = C"\n$(bar)"
439
440 Expr path=${1:-} setglobal path = $1 or ''
441 Expr host=${2:-$(hostname)} setglobal host = $2 or $[hostname]
442
443 What's the difference between Expr and Unquoted? I think they're the same/
444 """
445 # Actually splitting NEVER HAPPENS ON ASSIGNMENT. LEAVE IT OFF.
446
447 UP_w = w
448 with tagswitch(w) as case:
449 if case(rhs_word_e.Empty):
450 return word_style_e.SQ
451
452 elif case(rhs_word_e.Compound):
453 w = cast(CompoundWord, UP_w)
454 if len(w.parts) == 0:
455 raise AssertionError(w)
456
457 elif len(w.parts) == 1:
458 part0 = w.parts[0]
459 UP_part0 = part0
460 with tagswitch(part0) as case:
461 # VAR_SUBS
462 if case(word_part_e.TildeSub):
463 # x=~andy/src
464 # -> setvar x = homedir('andy') + '/src'
465 return word_style_e.Expr
466
467 elif case(word_part_e.Literal):
468 # local x=y
469 # -> var x = 'y'
470 return word_style_e.SQ
471
472 elif case(word_part_e.SimpleVarSub):
473 # local x=$myvar
474 # -> var x = "$myvar"
475 # or var x = ${myvar}
476 # or var x = myvar
477 return word_style_e.DQ
478
479 elif case(word_part_e.BracedVarSub, word_part_e.CommandSub,
480 word_part_e.ArithSub):
481 # x=$(hostname)
482 # -> setvar x = $(hostname)
483 return word_style_e.Unquoted
484
485 elif case(word_part_e.DoubleQuoted):
486 part0 = cast(DoubleQuoted, UP_part0)
487
488 # TODO: remove quotes in single part like "$(hostname)" -> $(hostname)
489 return word_style_e.DQ
490
491 else:
492 # multiple parts use YSTR in general?
493 # Depends if there are subs
494 return word_style_e.DQ
495
496 # Default
497 return word_style_e.SQ
498
499
500class YshPrinter(object):
501 """Prettify OSH to YSH."""
502
503 def __init__(self, cursor, arena, f):
504 # type: (Cursor, alloc.Arena, mylib.Writer) -> None
505 self.cursor = cursor
506 self.arena = arena
507 self.f = f
508
509 def _DebugSpid(self, spid):
510 # type: (int) -> None
511 span = self.arena.GetToken(spid)
512 s = span.line.content[span.col:span.col + span.length]
513 print_stderr('SPID %d = %r' % (spid, s))
514
515 def End(self):
516 # type: () -> None
517 """Make sure we print until the end of the file."""
518 self.cursor.PrintUntilEnd()
519
520 def DoRedirect(self, node, local_symbols):
521 # type: (Redir, Dict[str, bool]) -> None
522 """
523 Change here docs to <<< '''
524 """
525 #print(node, file=sys.stderr)
526 op_id = node.op.id
527 self.cursor.PrintUntil(node.op)
528
529 if node.arg.tag() == redir_param_e.HereDoc:
530 here_doc = cast(redir_param.HereDoc, node.arg)
531
532 here_begin = here_doc.here_begin
533 ok, delimiter, delim_quoted = word_.StaticEval(here_begin)
534 if not ok:
535 p_die('Invalid here doc delimiter', loc.Word(here_begin))
536
537 # Turn everything into <<<. We just change the quotes
538 self.f.write('<<<')
539
540 if delim_quoted:
541 self.f.write(" '''")
542 else:
543 self.f.write(' """')
544
545 delim_end_tok = location.RightTokenForWord(here_begin)
546 self.cursor.SkipPast(delim_end_tok)
547
548 # Now print the lines. TODO: Have a flag to indent these to the level of
549 # the owning command, e.g.
550 # cat <<EOF
551 # EOF
552 # Or since most here docs are the top level, you could just have a hack
553 # for a fixed indent? TODO: Look at real use cases.
554 for part in here_doc.stdin_parts:
555 self.DoWordPart(part, local_symbols)
556
557 self.cursor.SkipPast(here_doc.here_end_tok)
558 if delim_quoted:
559 self.f.write("'''\n")
560 else:
561 self.f.write('"""\n')
562
563 else:
564 pass
565
566 # cat << EOF
567 # hello $name
568 # EOF
569 # cat <<< """
570 # hello $name
571 # """
572
573 # cat << 'EOF'
574 # no expansion
575 # EOF
576
577 # cat <<< '''
578 # no expansion
579 # '''
580
581 def DoShAssignment(self, node, at_top_level, local_symbols):
582 # type: (command.ShAssignment, bool, Dict[str, bool]) -> None
583 """
584 local_symbols:
585 - Add every 'local' declaration to it
586 - problem: what if you have local in an "if" ?
587 - we could treat it like nested scope and see what happens? Do any
588 programs have a problem with it?
589 case/if/for/while/BraceGroup all define scopes or what?
590 You don't want inconsistency of variables that could be defined at
591 any point.
592 - or maybe you only need it within "if / case" ? Well I guess
593 for/while can break out of the loop and cause problems. A break is
594 an "if".
595
596 - for subsequent
597 """
598 # Change RHS to expression language. Bare words not allowed. foo -> 'foo'
599
600 has_rhs = False # TODO: Should be on a per-variable basis.
601 # local a=b c=d, or just punt on those
602 defined_locally = False # is it a local variable in this function?
603 # can't tell if global
604
605 if True:
606 self.cursor.PrintUntil(node.pairs[0].left)
607
608 # For now, just detect whether the FIRST assignment on the line has been
609 # declared locally. We might want to split every line into separate
610 # statements.
611 if local_symbols is not None:
612 lhs0 = node.pairs[0].lhs
613 #if lhs0.tag() == sh_lhs_e.Name and lhs0.name in local_symbols:
614 # defined_locally = True
615
616 #print("CHECKING NAME", lhs0.name, defined_locally, local_symbols)
617
618 # TODO: Avoid translating these
619 has_array_index = [
620 pair.lhs.tag() == sh_lhs_e.UnparsedIndex for pair in node.pairs
621 ]
622
623 # need semantic analysis.
624 # Would be nice to assume that it's a local though.
625 if at_top_level:
626 self.f.write('setvar ')
627 elif defined_locally:
628 self.f.write('set ')
629 #self.f.write('[local mutated]')
630 else:
631 # We're in a function, but it's not defined locally, so we must be
632 # mutating a global.
633 self.f.write('setvar ')
634
635 # foo=bar spam=eggs -> foo = 'bar', spam = 'eggs'
636 n = len(node.pairs)
637 for i, pair in enumerate(node.pairs):
638 lhs = pair.lhs
639 UP_lhs = lhs
640 with tagswitch(lhs) as case:
641 if case(sh_lhs_e.Name):
642 lhs = cast(sh_lhs.Name, UP_lhs)
643
644 self.cursor.PrintUntil(pair.left)
645 # Assume skipping over one Lit_VarLike token
646 self.cursor.SkipPast(pair.left)
647
648 # Replace name. I guess it's Lit_Chars.
649 self.f.write(lhs.name)
650 self.f.write(' = ')
651
652 # TODO: This should be translated from Empty.
653 if pair.rhs.tag() == rhs_word_e.Empty:
654 self.f.write("''") # local i -> var i = ''
655 else:
656 self.DoRhsWord(pair.rhs, local_symbols)
657
658 elif case(sh_lhs_e.UnparsedIndex):
659 # --one-pass-parse gives us this node, instead of IndexedName
660 pass
661
662 else:
663 raise AssertionError(pair.lhs.__class__.__name__)
664
665 if i != n - 1:
666 self.f.write(',')
667
668 def _DoSimple(self, node, local_symbols):
669 # type: (command.Simple, Dict[str, bool]) -> None
670
671 # How to preserve spaces between words? Do you want to do it?
672 # Well you need to test this:
673 #
674 # echo foo \
675 # bar
676
677 if len(node.more_env):
678 # We only need to transform the right side, not left side.
679 for pair in node.more_env:
680 self.DoRhsWord(pair.val, local_symbols)
681
682 if len(node.words):
683 first_word = node.words[0]
684 ok, val, quoted = word_.StaticEval(first_word)
685 word0_tok = location.LeftTokenForWord(first_word)
686 if ok and not quoted:
687 if val == '[' and len(node.words) >= 3:
688 word2 = node.words[-2]
689 last_word = node.words[-1]
690
691 # Check if last word is ]
692 ok, val, quoted = word_.StaticEval(last_word)
693 if ok and not quoted and val == ']':
694 # Replace [ with 'test'
695 self.cursor.PrintUntil(word0_tok)
696 self.cursor.SkipPast(word0_tok)
697 self.f.write('test')
698
699 for w in node.words[1:-1]:
700 self.DoWordInCommand(w, local_symbols)
701
702 # Now omit ]
703 tok2 = location.RightTokenForWord(word2)
704 rbrack_tok = location.LeftTokenForWord(last_word)
705
706 # Skip the space token before ]
707 self.cursor.PrintIncluding(tok2)
708 # ] takes one spid
709 self.cursor.SkipPast(rbrack_tok)
710 return
711 else:
712 raise RuntimeError('Got [ without ]')
713
714 elif val == '.':
715 self.cursor.PrintUntil(word0_tok)
716 self.cursor.SkipPast(word0_tok)
717 self.f.write('source')
718 return
719
720 for w in node.words:
721 self.DoWordInCommand(w, local_symbols)
722
723 # TODO: Print the terminator. Could be \n or ;
724 # Need to print env like PYTHONPATH = 'foo' && ls
725 # Need to print redirects:
726 # < > are the same. << is here string, and >> is assignment.
727 # append is >+
728
729 # TODO: static_eval of simple command
730 # - [ -> "test". Eliminate trailing ].
731 # - . -> source, etc.
732
733 def DoCommand(self, node, local_symbols, at_top_level=False):
734 # type: (command_t, Dict[str, bool], bool) -> None
735
736 UP_node = node
737
738 with tagswitch(node) as case:
739 if case(command_e.CommandList):
740 node = cast(command.CommandList, UP_node)
741
742 # TODO: How to distinguish between echo hi; echo bye; and on
743 # separate lines
744 for child in node.children:
745 self.DoCommand(child,
746 local_symbols,
747 at_top_level=at_top_level)
748
749 elif case(command_e.Redirect):
750 node = cast(command.Redirect, UP_node)
751
752 self.DoCommand(node.child,
753 local_symbols,
754 at_top_level=at_top_level)
755 for r in node.redirects:
756 self.DoRedirect(r, local_symbols)
757
758 elif case(command_e.Simple):
759 node = cast(command.Simple, UP_node)
760
761 self._DoSimple(node, local_symbols)
762
763 elif case(command_e.ShAssignment):
764 node = cast(command.ShAssignment, UP_node)
765
766 self.DoShAssignment(node, at_top_level, local_symbols)
767
768 elif case(command_e.Pipeline):
769 node = cast(command.Pipeline, UP_node)
770
771 for child in node.children:
772 self.DoCommand(child, local_symbols)
773
774 elif case(command_e.AndOr):
775 node = cast(command.AndOr, UP_node)
776
777 for child in node.children:
778 self.DoCommand(child, local_symbols)
779
780 elif case(command_e.Sentence):
781 node = cast(command.Sentence, UP_node)
782
783 # 'ls &' to 'fork ls'
784 # Keep ; the same.
785 self.DoCommand(node.child, local_symbols)
786
787 # This has to be different in the function case.
788 elif case(command_e.BraceGroup):
789 node = cast(BraceGroup, UP_node)
790
791 # { echo hi; } -> do { echo hi }
792 # For now it might be OK to keep 'do { echo hi; }
793 self.cursor.PrintUntil(node.left)
794 self.cursor.SkipPast(node.left)
795 self.f.write('do {')
796
797 for child in node.children:
798 self.DoCommand(child, local_symbols)
799
800 elif case(command_e.Subshell):
801 node = cast(command.Subshell, UP_node)
802
803 # (echo hi) -> shell echo hi
804 # (echo hi; echo bye) -> shell {echo hi; echo bye}
805
806 self.cursor.PrintUntil(node.left)
807 self.cursor.SkipPast(node.left)
808 self.f.write('shell {')
809
810 self.DoCommand(node.child, local_symbols)
811
812 #self._DebugSpid(right_spid)
813 #self._DebugSpid(right_spid + 1)
814
815 #print('RIGHT SPID', right_spid)
816 self.cursor.PrintUntil(node.right)
817 self.cursor.SkipPast(node.right)
818 self.f.write('}')
819
820 elif case(command_e.ShFunction):
821 node = cast(command.ShFunction, UP_node)
822
823 # TODO: skip name
824 #self.f.write('proc %s' % node.name)
825
826 # New symbol table for every function.
827 new_local_symbols = {} # type: Dict[str, bool]
828
829 # Should be the left most span, including 'function'
830 if node.keyword: # function foo { ...
831 self.cursor.PrintUntil(node.keyword)
832 else: # foo() { ...
833 self.cursor.PrintUntil(node.name_tok)
834
835 self.f.write('proc %s ' % node.name)
836
837 UP_body = node.body
838 with tagswitch(UP_body) as case:
839 if case(command_e.BraceGroup):
840 body = cast(BraceGroup, UP_body)
841 self.cursor.SkipUntil(body.left)
842
843 # Don't add "do" like a standalone brace group. Just use {}.
844 for child in body.children:
845 self.DoCommand(child, new_local_symbols)
846 else:
847 # very rare cases like f() ( subshell )
848 pass
849
850 elif case(command_e.DoGroup):
851 node = cast(command.DoGroup, UP_node)
852
853 self.cursor.PrintUntil(node.left)
854 self.cursor.SkipPast(node.left)
855 self.f.write('{')
856
857 for child in node.children:
858 self.DoCommand(child, local_symbols)
859
860 self.cursor.PrintUntil(node.right)
861 self.cursor.SkipPast(node.right)
862 self.f.write('}')
863
864 elif case(command_e.ForEach):
865 node = cast(command.ForEach, UP_node)
866
867 # Need to preserve spaces between words, because there can be line
868 # wrapping.
869 # for x in a b c \
870 # d e f; do
871
872 UP_iterable = node.iterable
873 with tagswitch(node.iterable) as case:
874 if case(for_iter_e.Args):
875 self.f.write('for %s in @ARGV ' % node.iter_names[0])
876
877 # note: command_t doesn't have .spids
878 body_tok = location.TokenForCommand(node.body)
879 self.cursor.SkipUntil(body_tok)
880
881 elif case(for_iter_e.Words):
882 pass
883
884 elif case(for_iter_e.YshExpr):
885 pass
886
887 if node.semi_tok is not None:
888 self.cursor.PrintUntil(node.semi_tok)
889 self.cursor.SkipPast(node.semi_tok)
890
891 self.DoCommand(node.body, local_symbols)
892
893 elif case(command_e.WhileUntil):
894 node = cast(command.WhileUntil, UP_node)
895
896 # Skip 'until', and replace it with 'while not'
897 if node.keyword.id == Id.KW_Until:
898 self.cursor.PrintUntil(node.keyword)
899 self.cursor.SkipPast(node.keyword)
900 self.f.write('while !')
901
902 if node.cond.tag() == condition_e.Shell:
903 commands = cast(List_of_command, node.cond)
904 # Skip the semi-colon in the condition, which is usually a Sentence
905 if (len(commands) == 1 and
906 commands[0].tag() == command_e.Sentence):
907 sentence = cast(command.Sentence, commands[0])
908 self.DoCommand(sentence.child, local_symbols)
909 self.cursor.SkipPast(sentence.terminator)
910
911 self.DoCommand(node.body, local_symbols)
912
913 elif case(command_e.If):
914 node = cast(command.If, UP_node)
915
916 # if foo; then -> if foo {
917 # elif foo; then -> } elif foo {
918 for i, arm in enumerate(node.arms):
919 elif_tok = arm.keyword
920 then_tok = arm.then_tok
921
922 if i != 0: # 'if' not 'elif' on the first arm
923 self.cursor.PrintUntil(elif_tok)
924 self.f.write('} ')
925
926 cond = arm.cond
927 if cond.tag() == condition_e.Shell:
928 commands = cast(List_of_command, cond)
929 if (len(commands) == 1 and
930 commands[0].tag() == command_e.Sentence):
931 sentence = cast(command.Sentence, commands[0])
932 self.DoCommand(sentence, local_symbols)
933
934 # Remove semi-colon
935 self.cursor.PrintUntil(sentence.terminator)
936 self.cursor.SkipPast(sentence.terminator)
937 else:
938 for child in commands:
939 self.DoCommand(child, local_symbols)
940
941 self.cursor.PrintUntil(then_tok)
942 self.cursor.SkipPast(then_tok)
943 self.f.write('{')
944
945 for child in arm.action:
946 self.DoCommand(child, local_symbols)
947
948 # else -> } else {
949 if len(node.else_action):
950 self.cursor.PrintUntil(node.else_kw)
951 self.f.write('} ')
952 self.cursor.PrintIncluding(node.else_kw)
953 self.f.write(' {')
954
955 for child in node.else_action:
956 self.DoCommand(child, local_symbols)
957
958 # fi -> }
959 self.cursor.PrintUntil(node.fi_kw)
960 self.cursor.SkipPast(node.fi_kw)
961 self.f.write('}')
962
963 elif case(command_e.Case):
964 node = cast(command.Case, UP_node)
965
966 to_match = None # type: word_t
967 with tagswitch(node.to_match) as case:
968 if case(case_arg_e.YshExpr):
969 return
970 elif case(case_arg_e.Word):
971 to_match = cast(case_arg.Word, node.to_match).w
972 else:
973 raise AssertionError()
974
975 self.cursor.PrintIncluding(node.case_kw)
976
977 # Figure out the variable name, so we can translate
978 # - $var to (var)
979 # - "$var" to (var)
980 var_part = None # type: SimpleVarSub
981 with tagswitch(to_match) as case:
982 if case(word_e.Compound):
983 w = cast(CompoundWord, to_match)
984 part0 = w.parts[0]
985
986 with tagswitch(part0) as case2:
987 if case2(word_part_e.SimpleVarSub):
988 var_part = cast(SimpleVarSub, part0)
989
990 elif case2(word_part_e.DoubleQuoted):
991 dq_part = cast(DoubleQuoted, part0)
992 if len(dq_part.parts) == 1:
993 dq_part0 = dq_part.parts[0]
994
995 # Nesting is annoying -- it would be nice to use pattern
996 # matching, but mycpp won't like it.
997 # TODO: extract into a common function
998 with tagswitch(dq_part0) as case3:
999 if case3(word_part_e.SimpleVarSub):
1000 var_part = cast(
1001 SimpleVarSub, dq_part0)
1002 #log("VAR PART %s", var_part)
1003
1004 if var_part:
1005 self.f.write(' (')
1006 self.f.write(lexer.LazyStr(var_part.tok))
1007 self.f.write(') ')
1008
1009 self.cursor.SkipPast(node.arms_start) # Skip past 'in'
1010 self.f.write('{')
1011
1012 missing_last_dsemi = False
1013
1014 for case_arm in node.arms:
1015 # Replace ) with {
1016 self.cursor.PrintUntil(case_arm.middle)
1017 self.f.write(' {')
1018 self.cursor.SkipPast(case_arm.middle)
1019
1020 for child in case_arm.action:
1021 self.DoCommand(child, local_symbols)
1022
1023 if case_arm.right:
1024 # Change ;; to }
1025 self.cursor.PrintUntil(case_arm.right)
1026 self.f.write('}')
1027 self.cursor.SkipPast(case_arm.right)
1028 else:
1029 # valid: case $x in pat) echo hi ; esac
1030 missing_last_dsemi = True
1031
1032 self.cursor.PrintUntil(node.arms_end) # 'esac' or }
1033
1034 if missing_last_dsemi: # Print } for missing ;;
1035 self.f.write('}\n')
1036
1037 self.cursor.SkipPast(node.arms_end) # 'esac' or }
1038
1039 self.f.write('}') # in place of 'esac'
1040
1041 elif case(command_e.TimeBlock):
1042 node = cast(command.TimeBlock, UP_node)
1043
1044 self.DoCommand(node.pipeline, local_symbols)
1045
1046 elif case(command_e.DParen):
1047 node = cast(command.DParen, UP_node)
1048 # TODO: arith expressions can words with command subs
1049 pass
1050
1051 elif case(command_e.DBracket):
1052 node = cast(command.DBracket, UP_node)
1053
1054 # TODO: bool_expr_t can have words with command subs
1055 pass
1056
1057 else:
1058 pass
1059 #log('Command not handled: %s', node)
1060 #raise AssertionError(node.__class__.__name__)
1061
1062 def DoRhsWord(self, node, local_symbols):
1063 # type: (rhs_word_t, Dict[str, bool]) -> None
1064 """For the RHS of assignments.
1065
1066 TODO: for complex cases of word joining:
1067 local a=unquoted'single'"double"'"'
1068
1069 We can try to handle it:
1070 var a = y"unquotedsingledouble\""
1071
1072 Or simply abort and LEAVE IT ALONE. We should only translate things we
1073 recognize.
1074 """
1075 UP_node = node
1076 with tagswitch(node) as case:
1077 if case(rhs_word_e.Empty):
1078 self.f.write("''")
1079
1080 elif case(rhs_word_e.Compound):
1081 node = cast(CompoundWord, UP_node)
1082
1083 # TODO: This is wrong!
1084 style = _GetRhsStyle(node)
1085 if style == word_style_e.SQ:
1086 self.f.write("'")
1087 self.DoWordInCommand(node, local_symbols)
1088 self.f.write("'")
1089 elif style == word_style_e.DQ:
1090 self.f.write('"')
1091 self.DoWordInCommand(node, local_symbols)
1092 self.f.write('"')
1093 # TODO: Put these back
1094 #elif style == word_style_e.Expr:
1095 # pass
1096 #elif style == word_style_e.Unquoted:
1097 # pass
1098 else:
1099 # "${foo:-default}" -> foo or 'default'
1100 # ${foo:-default} -> @split(foo or 'default')
1101 # @(foo or 'default') -- implicit split.
1102
1103 if word_.IsVarSub(node): # ${1} or "$1"
1104 # Do it in expression mode
1105 pass
1106 # NOTE: ArithSub with $(1 +2 ) is different than 1 + 2 because of
1107 # conversion to string.
1108
1109 # For now, just stub it out
1110 self.DoWordInCommand(node, local_symbols)
1111
1112 def DoWordInCommand(self, node, local_symbols):
1113 # type: (word_t, Dict[str, bool]) -> None
1114 """E.g. remove unquoted.
1115
1116 echo "$x" -> echo $x
1117 """
1118 UP_node = node
1119
1120 with tagswitch(node) as case:
1121 if case(word_e.Compound):
1122 node = cast(CompoundWord, UP_node)
1123
1124 # UNQUOTE simple var subs
1125
1126 # Special case for "$@".
1127 # TODO:
1128 # "$foo" -> $foo
1129 # "${foo}" -> $foo
1130
1131 if (len(node.parts) == 1 and
1132 node.parts[0].tag() == word_part_e.DoubleQuoted):
1133 dq_part = cast(DoubleQuoted, node.parts[0])
1134
1135 # NOTE: In double quoted case, this is the begin and end quote.
1136 # Do we need a HereDoc part?
1137
1138 if len(dq_part.parts) == 1:
1139 part0 = dq_part.parts[0]
1140 if part0.tag() == word_part_e.SimpleVarSub:
1141 vsub_part = cast(SimpleVarSub, dq_part.parts[0])
1142 if vsub_part.tok.id == Id.VSub_At:
1143 self.cursor.PrintUntil(dq_part.left)
1144 self.cursor.SkipPast(
1145 dq_part.right) # " then $@ then "
1146 self.f.write('@ARGV')
1147 return # Done replacing
1148
1149 # "$1" -> $1, "$foo" -> $foo
1150 if vsub_part.tok.id in (Id.VSub_Number,
1151 Id.VSub_DollarName):
1152 self.cursor.PrintUntil(dq_part.left)
1153 self.cursor.SkipPast(dq_part.right)
1154 self.f.write(lexer.TokenVal(vsub_part.tok))
1155 return
1156
1157 # Single arith sub, command sub, etc.
1158 # On the other hand, an unquoted one needs to turn into
1159 #
1160 # $(echo one two) -> @[echo one two]
1161 # `echo one two` -> @[echo one two]
1162 #
1163 # ${var:-'the default'} -> @$(var or 'the default')
1164 #
1165 # $((1 + 2)) -> $(1 + 2) -- this is OK unquoted
1166
1167 elif part0.tag() == word_part_e.BracedVarSub:
1168 # Skip over quote
1169 self.cursor.PrintUntil(dq_part.left)
1170 self.cursor.SkipPast(dq_part.left)
1171 self.DoWordPart(part0, local_symbols)
1172 self.cursor.SkipPast(dq_part.right)
1173 return
1174
1175 elif part0.tag() == word_part_e.CommandSub:
1176 self.cursor.PrintUntil(dq_part.left)
1177 self.cursor.SkipPast(dq_part.left)
1178 self.DoWordPart(part0, local_symbols)
1179 self.cursor.SkipPast(dq_part.right)
1180 return
1181
1182 # TODO: 'foo'"bar" should be "foobar", etc.
1183 # If any part is double quoted, you can always double quote the whole
1184 # thing?
1185 for part in node.parts:
1186 self.DoWordPart(part, local_symbols)
1187
1188 elif case(word_e.BracedTree):
1189 # Not doing anything now
1190 pass
1191
1192 else:
1193 raise AssertionError(node.__class__.__name__)
1194
1195 def DoWordPart(self, node, local_symbols, quoted=False):
1196 # type: (word_part_t, Dict[str, bool], bool) -> None
1197
1198 left_tok = location.LeftTokenForWordPart(node)
1199 if left_tok:
1200 self.cursor.PrintUntil(left_tok)
1201
1202 UP_node = node
1203
1204 with tagswitch(node) as case:
1205 if case(word_part_e.YshArrayLiteral,
1206 word_part_e.InitializerLiteral, word_part_e.TildeSub,
1207 word_part_e.ExtGlob):
1208 pass
1209
1210 elif case(word_part_e.EscapedLiteral):
1211 node = cast(word_part.EscapedLiteral, UP_node)
1212 if quoted:
1213 pass
1214 else:
1215 # If unquoted \e, it should quoted instead. ' ' vs. \<invisible space>
1216 # Hm is this necessary though? I think the only motivation is changing
1217 # \{ and \( for macros. And ' ' to be readable/visible.
1218 t = node.token
1219 val = lexer.TokenSliceLeft(t, 1)
1220 assert len(val) == 1, val
1221 if val != '\n':
1222 self.cursor.PrintUntil(t)
1223 self.cursor.SkipPast(t)
1224 self.f.write("'%s'" % val)
1225
1226 elif case(word_part_e.Literal):
1227 node = cast(Token, UP_node)
1228 self.cursor.PrintIncluding(node)
1229
1230 elif case(word_part_e.SingleQuoted):
1231 node = cast(SingleQuoted, UP_node)
1232
1233 # TODO:
1234 # '\n' is '\\n'
1235 # $'\n' is '\n'
1236 # TODO: Should print until right_spid
1237 # left_spid, right_spid = node.spids
1238 self.cursor.PrintUntil(node.right)
1239
1240 elif case(word_part_e.DoubleQuoted):
1241 node = cast(DoubleQuoted, UP_node)
1242 for part in node.parts:
1243 self.DoWordPart(part, local_symbols, quoted=True)
1244
1245 elif case(word_part_e.SimpleVarSub):
1246 node = cast(SimpleVarSub, UP_node)
1247
1248 op_id = node.tok.id
1249
1250 if op_id == Id.VSub_DollarName:
1251 self.cursor.PrintIncluding(node.tok)
1252
1253 elif op_id == Id.VSub_Number:
1254 self.cursor.PrintIncluding(node.tok)
1255
1256 elif op_id == Id.VSub_At: # $@ -- handled quoted case above
1257 self.f.write('$[join(ARGV)]')
1258 self.cursor.SkipPast(node.tok)
1259
1260 elif op_id == Id.VSub_Star: # $*
1261 # PEDANTIC: Depends if quoted or unquoted
1262 self.f.write('$[join(ARGV)]')
1263 self.cursor.SkipPast(node.tok)
1264
1265 elif op_id == Id.VSub_Pound: # $#
1266 # len(ARGV) ?
1267 self.f.write('$Argc')
1268 self.cursor.SkipPast(node.tok)
1269
1270 else:
1271 pass
1272
1273 elif case(word_part_e.BracedVarSub):
1274 node = cast(BracedVarSub, UP_node)
1275
1276 # NOTE: Why do we need this but we don't need it in command sub?
1277 self.cursor.PrintUntil(node.left)
1278
1279 if node.bracket_op:
1280 # a[1]
1281 # These two change the sigil! ${a[@]} is now @a!
1282 # a[@]
1283 # a[*]
1284 pass
1285
1286 if node.prefix_op:
1287 # len()
1288 pass
1289 if node.suffix_op:
1290 pass
1291
1292 op_id = node.name_tok.id
1293 if op_id == Id.VSub_QMark:
1294 self.cursor.PrintIncluding(node.name_tok)
1295
1296 self.cursor.PrintIncluding(node.right)
1297
1298 elif case(word_part_e.CommandSub):
1299 node = cast(CommandSub, UP_node)
1300
1301 if node.left_token.id == Id.Left_Backtick:
1302 self.cursor.PrintUntil(node.left_token)
1303 self.f.write('$(')
1304 self.cursor.SkipPast(node.left_token)
1305
1306 self.DoCommand(node.child, local_symbols)
1307
1308 # Skip over right `
1309 self.cursor.SkipPast(node.right)
1310 self.f.write(')')
1311
1312 else:
1313 self.cursor.PrintIncluding(node.right)
1314
1315 else:
1316 pass