OILS / tools / ysh_ify.py View on Github | oils.pub

1132 lines, 580 significant
1from __future__ import print_function
2"""
3ysh_ify.py: Roughly translate OSH to YSH. Doesn't respect semantics.
4
5ESSENTIAL
6
7Command:
8
9 then/fi, do/done -> { }
10
11 new case statement
12
13 f() { } -> proc f { } (changes scope)
14
15 subshell -> forkwait, because () is taken
16 { } to fopen { }?
17
18 Approximate: var declaration:
19 local a=b -> var a = 'b', I think
20
21 <<EOF here docs to '''
22
23Word:
24 "$@" -> @ARGV
25
26 Not common: unquoted $x -> @[split(x)]
27
28LEGACY that I don't personally use
29
30Builtins:
31 [ -> test
32 . -> source
33
34Word:
35 backticks -> $() (I don't use this)
36 quote removal "$foo" -> $foo
37 brace removal ${foo} and "${foo}" -> $foo
38
39--tool format
40
41 fix indentation and spacing, like clang-format
42 can "lower" the LST to a rough representation with keywords / "first words",
43 { } ( ), and comments
44 - the "atoms" should not have newlines
45"""
46
47from _devbuild.gen.id_kind_asdl import Id, Id_str
48from _devbuild.gen.runtime_asdl import word_style_e, word_style_t
49from _devbuild.gen.syntax_asdl import (
50 loc,
51 CompoundWord,
52 Token,
53 SimpleVarSub,
54 BracedVarSub,
55 CommandSub,
56 DoubleQuoted,
57 SingleQuoted,
58 word_e,
59 word_t,
60 word_part,
61 word_part_e,
62 word_part_t,
63 rhs_word_e,
64 rhs_word_t,
65 sh_lhs,
66 sh_lhs_e,
67 command,
68 command_e,
69 BraceGroup,
70 for_iter_e,
71 case_arg_e,
72 case_arg,
73 condition_e,
74 redir_param,
75 redir_param_e,
76 Redir,
77 List_of_command,
78)
79from asdl import runtime
80from core.error import p_die
81from frontend import lexer
82from frontend import location
83from osh import word_
84from mycpp import mylib
85from mycpp.mylib import log, print_stderr, tagswitch
86
87from typing import Dict, cast, TYPE_CHECKING
88if TYPE_CHECKING:
89 from _devbuild.gen.syntax_asdl import command_t
90 from core import alloc
91
92_ = log
93
94
95class Cursor(object):
96 """
97 API to print/transform a complete source file, stored in a single arena.
98
99 In, core/alloc.py, SnipCodeBlock() and SnipCodeString work on lines. They
100 don't iterate over tokens.
101
102 Or add a separate hash table of Token -> span ID? That makes sense because
103 we need that kind of "address hash" for type checking anyway.
104
105 You use the hash table to go from next_token_id .. TokenId(until_token).
106 """
107
108 def __init__(self, arena, f):
109 # type: (alloc.Arena, mylib.Writer) -> None
110 self.arena = arena
111 self.f = f
112 self.next_span_id = 0
113
114 def _PrintUntilSpid(self, until_span_id):
115 # type: (int) -> None
116
117 # Sometimes we add +1
118 if until_span_id == runtime.NO_SPID:
119 assert 0, 'Missing span ID, got %d' % until_span_id
120
121 for span_id in xrange(self.next_span_id, until_span_id):
122 span = self.arena.GetToken(span_id)
123
124 # A span for Eof may not have a line when the file is completely empty.
125 if span.line is None:
126 continue
127
128 # Special case for recovering stripped leading space!
129 # See osh/word_compile.py
130 start_index = (0 if span.id == Id.Lit_CharsWithoutPrefix else
131 span.col)
132 end_index = span.col + span.length
133
134 piece = span.line.content[start_index:end_index]
135 self.f.write(piece)
136
137 self.next_span_id = until_span_id
138
139 def _SkipUntilSpid(self, next_span_id):
140 # type: (int) -> None
141 """Skip everything before next_span_id.
142
143 Printing will start at next_span_id
144 """
145 if (next_span_id == runtime.NO_SPID or
146 next_span_id == runtime.NO_SPID + 1):
147 assert 0, 'Missing span ID, got %d' % next_span_id
148 self.next_span_id = next_span_id
149
150 def SkipUntil(self, tok):
151 # type: (Token) -> None
152 span_id = self.arena.GetSpanId(tok)
153 self._SkipUntilSpid(span_id)
154
155 def SkipPast(self, tok):
156 # type: (Token) -> None
157 span_id = self.arena.GetSpanId(tok)
158 self._SkipUntilSpid(span_id + 1)
159
160 def PrintUntil(self, tok):
161 # type: (Token) -> None
162 span_id = self.arena.GetSpanId(tok)
163
164 # Test invariant
165 if mylib.PYTHON:
166 arena_tok = self.arena.GetToken(span_id)
167 if tok != arena_tok:
168 raise AssertionError(
169 '%s %d %d != %s %d %d' %
170 (tok, span_id, id(tok), arena_tok,
171 self.arena.GetSpanId(arena_tok), id(arena_tok)))
172
173 self._PrintUntilSpid(span_id)
174
175 def PrintIncluding(self, tok):
176 # type: (Token) -> None
177 span_id = self.arena.GetSpanId(tok)
178 self._PrintUntilSpid(span_id + 1)
179
180 def PrintUntilEnd(self):
181 # type: () -> None
182 self._PrintUntilSpid(self.arena.LastSpanId())
183
184
185def LosslessCat(arena):
186 # type: (alloc.Arena) -> None
187 """
188 For testing the lossless invariant: the tokens "add up" to the original
189 doc.
190 """
191 cursor = Cursor(arena, mylib.Stdout())
192 cursor.PrintUntilEnd()
193
194
195def PrintTokens(arena):
196 # type: (alloc.Arena) -> None
197 """Debugging tool to see tokens."""
198
199 if len(arena.tokens) == 1: # Special case for line_id == -1
200 print('Empty file with EOF token on invalid line:')
201 print('%s' % arena.tokens[0])
202 return
203
204 # TODO:
205 # - TSV8: end position, token type
206 # - then an option to print token text, as a J8 string
207 # - and then there can be a separate tool to number the columns
208 #
209 # - Do we also have JSON8 / HTM8 / TSV8 tokens?
210 # - And mini-languages like glob, etc.
211 for i, tok in enumerate(arena.tokens):
212 piece = tok.line.content[tok.col:tok.col + tok.length]
213 print('%5d %-20s %r' % (i, Id_str(tok.id, dot=False), piece))
214 print_stderr('(%d tokens)' % len(arena.tokens))
215
216
217def Ysh_ify(arena, node):
218 # type: (alloc.Arena, command_t) -> None
219 cursor = Cursor(arena, mylib.Stdout())
220 fixer = YshPrinter(cursor, arena, mylib.Stdout())
221 fixer.DoCommand(node, None, at_top_level=True) # no local symbols yet
222 fixer.End()
223
224
225# PROBLEM: ~ substitution. That is disabled by "".
226# You can turn it into $HOME I guess
227# const foo = "$HOME/src"
228# const foo = %( ~/src )[0] # does this make sense?
229
230
231def _GetRhsStyle(w):
232 # type: (rhs_word_t) -> word_style_t
233 """Determine what style an assignment should use. '' or "", or an
234 expression.
235
236 SQ foo= setglobal foo = ''
237 SQ foo='' setglobal foo = ''
238 DQ foo="" setglobal foo = "" # Or we could normalize it if no subs?
239 DQ foo="" setglobal foo = "" # Or we could normalize it if no subs?
240
241 # Need these too.
242 # Or honestly should C strings be the default? And then raw strings are
243 # optional? Because most usages of \n and \0 can turn into Oil?
244 # Yeah I want the default to be statically parseable, so we subvert the \t
245 # and \n of command line tools?
246 # As long as we are fully analyzing the strings, we might as well go all the
247 # way!
248 # I think I need a PartialStaticEval() to paper over this.
249 #
250 # The main issue is regex and globs, because they use escape for a different
251 # purpose. I think just do
252 # grep r'foo\tbar' or something.
253
254 C_SQ foo=$'\n' setglobal foo = C'\n'
255 C_DQ foo=$'\n'"$bar" setglobal foo = C"\n$(bar)"
256
257 Expr path=${1:-} setglobal path = $1 or ''
258 Expr host=${2:-$(hostname)} setglobal host = $2 or $[hostname]
259
260 What's the difference between Expr and Unquoted? I think they're the same/
261 """
262 # Actually splitting NEVER HAPPENS ON ASSIGNMENT. LEAVE IT OFF.
263
264 UP_w = w
265 with tagswitch(w) as case:
266 if case(rhs_word_e.Empty):
267 return word_style_e.SQ
268
269 elif case(rhs_word_e.Compound):
270 w = cast(CompoundWord, UP_w)
271 if len(w.parts) == 0:
272 raise AssertionError(w)
273
274 elif len(w.parts) == 1:
275 part0 = w.parts[0]
276 UP_part0 = part0
277 with tagswitch(part0) as case:
278 # VAR_SUBS
279 if case(word_part_e.TildeSub):
280 # x=~andy/src
281 # -> setvar x = homedir('andy') + '/src'
282 return word_style_e.Expr
283
284 elif case(word_part_e.Literal):
285 # local x=y
286 # -> var x = 'y'
287 return word_style_e.SQ
288
289 elif case(word_part_e.SimpleVarSub):
290 # local x=$myvar
291 # -> var x = "$myvar"
292 # or var x = ${myvar}
293 # or var x = myvar
294 return word_style_e.DQ
295
296 elif case(word_part_e.BracedVarSub, word_part_e.CommandSub,
297 word_part_e.ArithSub):
298 # x=$(hostname)
299 # -> setvar x = $(hostname)
300 return word_style_e.Unquoted
301
302 elif case(word_part_e.DoubleQuoted):
303 part0 = cast(DoubleQuoted, UP_part0)
304
305 # TODO: remove quotes in single part like "$(hostname)" -> $(hostname)
306 return word_style_e.DQ
307
308 else:
309 # multiple parts use YSTR in general?
310 # Depends if there are subs
311 return word_style_e.DQ
312
313 # Default
314 return word_style_e.SQ
315
316
317class YshPrinter(object):
318 """Prettify OSH to YSH."""
319
320 def __init__(self, cursor, arena, f):
321 # type: (Cursor, alloc.Arena, mylib.Writer) -> None
322 self.cursor = cursor
323 self.arena = arena
324 self.f = f
325
326 def _DebugSpid(self, spid):
327 # type: (int) -> None
328 span = self.arena.GetToken(spid)
329 s = span.line.content[span.col:span.col + span.length]
330 print_stderr('SPID %d = %r' % (spid, s))
331
332 def End(self):
333 # type: () -> None
334 """Make sure we print until the end of the file."""
335 self.cursor.PrintUntilEnd()
336
337 def DoRedirect(self, node, local_symbols):
338 # type: (Redir, Dict[str, bool]) -> None
339 """
340 Change here docs to <<< '''
341 """
342 #print(node, file=sys.stderr)
343 op_id = node.op.id
344 self.cursor.PrintUntil(node.op)
345
346 if node.arg.tag() == redir_param_e.HereDoc:
347 here_doc = cast(redir_param.HereDoc, node.arg)
348
349 here_begin = here_doc.here_begin
350 ok, delimiter, delim_quoted = word_.StaticEval(here_begin)
351 if not ok:
352 p_die('Invalid here doc delimiter', loc.Word(here_begin))
353
354 # Turn everything into <<<. We just change the quotes
355 self.f.write('<<<')
356
357 if delim_quoted:
358 self.f.write(" '''")
359 else:
360 self.f.write(' """')
361
362 delim_end_tok = location.RightTokenForWord(here_begin)
363 self.cursor.SkipPast(delim_end_tok)
364
365 # Now print the lines. TODO: Have a flag to indent these to the level of
366 # the owning command, e.g.
367 # cat <<EOF
368 # EOF
369 # Or since most here docs are the top level, you could just have a hack
370 # for a fixed indent? TODO: Look at real use cases.
371 for part in here_doc.stdin_parts:
372 self.DoWordPart(part, local_symbols)
373
374 self.cursor.SkipPast(here_doc.here_end_tok)
375 if delim_quoted:
376 self.f.write("'''\n")
377 else:
378 self.f.write('"""\n')
379
380 else:
381 pass
382
383 # cat << EOF
384 # hello $name
385 # EOF
386 # cat <<< """
387 # hello $name
388 # """
389
390 # cat << 'EOF'
391 # no expansion
392 # EOF
393
394 # cat <<< '''
395 # no expansion
396 # '''
397
398 def DoShAssignment(self, node, at_top_level, local_symbols):
399 # type: (command.ShAssignment, bool, Dict[str, bool]) -> None
400 """
401 local_symbols:
402 - Add every 'local' declaration to it
403 - problem: what if you have local in an "if" ?
404 - we could treat it like nested scope and see what happens? Do any
405 programs have a problem with it?
406 case/if/for/while/BraceGroup all define scopes or what?
407 You don't want inconsistency of variables that could be defined at
408 any point.
409 - or maybe you only need it within "if / case" ? Well I guess
410 for/while can break out of the loop and cause problems. A break is
411 an "if".
412
413 - for subsequent
414 """
415 # Change RHS to expression language. Bare words not allowed. foo -> 'foo'
416
417 has_rhs = False # TODO: Should be on a per-variable basis.
418 # local a=b c=d, or just punt on those
419 defined_locally = False # is it a local variable in this function?
420 # can't tell if global
421
422 if True:
423 self.cursor.PrintUntil(node.pairs[0].left)
424
425 # For now, just detect whether the FIRST assignment on the line has been
426 # declared locally. We might want to split every line into separate
427 # statements.
428 if local_symbols is not None:
429 lhs0 = node.pairs[0].lhs
430 #if lhs0.tag() == sh_lhs_e.Name and lhs0.name in local_symbols:
431 # defined_locally = True
432
433 #print("CHECKING NAME", lhs0.name, defined_locally, local_symbols)
434
435 # TODO: Avoid translating these
436 has_array_index = [
437 pair.lhs.tag() == sh_lhs_e.UnparsedIndex for pair in node.pairs
438 ]
439
440 # need semantic analysis.
441 # Would be nice to assume that it's a local though.
442 if at_top_level:
443 self.f.write('setvar ')
444 elif defined_locally:
445 self.f.write('set ')
446 #self.f.write('[local mutated]')
447 else:
448 # We're in a function, but it's not defined locally, so we must be
449 # mutating a global.
450 self.f.write('setvar ')
451
452 # foo=bar spam=eggs -> foo = 'bar', spam = 'eggs'
453 n = len(node.pairs)
454 for i, pair in enumerate(node.pairs):
455 lhs = pair.lhs
456 UP_lhs = lhs
457 with tagswitch(lhs) as case:
458 if case(sh_lhs_e.Name):
459 lhs = cast(sh_lhs.Name, UP_lhs)
460
461 self.cursor.PrintUntil(pair.left)
462 # Assume skipping over one Lit_VarLike token
463 self.cursor.SkipPast(pair.left)
464
465 # Replace name. I guess it's Lit_Chars.
466 self.f.write(lhs.name)
467 self.f.write(' = ')
468
469 # TODO: This should be translated from Empty.
470 if pair.rhs.tag() == rhs_word_e.Empty:
471 self.f.write("''") # local i -> var i = ''
472 else:
473 self.DoRhsWord(pair.rhs, local_symbols)
474
475 elif case(sh_lhs_e.UnparsedIndex):
476 # --one-pass-parse gives us this node, instead of IndexedName
477 pass
478
479 else:
480 raise AssertionError(pair.lhs.__class__.__name__)
481
482 if i != n - 1:
483 self.f.write(',')
484
485 def _DoSimple(self, node, local_symbols):
486 # type: (command.Simple, Dict[str, bool]) -> None
487
488 # How to preserve spaces between words? Do you want to do it?
489 # Well you need to test this:
490 #
491 # echo foo \
492 # bar
493
494 if len(node.more_env):
495 # We only need to transform the right side, not left side.
496 for pair in node.more_env:
497 self.DoRhsWord(pair.val, local_symbols)
498
499 if len(node.words):
500 first_word = node.words[0]
501 ok, val, quoted = word_.StaticEval(first_word)
502 word0_tok = location.LeftTokenForWord(first_word)
503 if ok and not quoted:
504 if val == '[' and len(node.words) >= 3:
505 word2 = node.words[-2]
506 last_word = node.words[-1]
507
508 # Check if last word is ]
509 ok, val, quoted = word_.StaticEval(last_word)
510 if ok and not quoted and val == ']':
511 # Replace [ with 'test'
512 self.cursor.PrintUntil(word0_tok)
513 self.cursor.SkipPast(word0_tok)
514 self.f.write('test')
515
516 for w in node.words[1:-1]:
517 self.DoWordInCommand(w, local_symbols)
518
519 # Now omit ]
520 tok2 = location.RightTokenForWord(word2)
521 rbrack_tok = location.LeftTokenForWord(last_word)
522
523 # Skip the space token before ]
524 self.cursor.PrintIncluding(tok2)
525 # ] takes one spid
526 self.cursor.SkipPast(rbrack_tok)
527 return
528 else:
529 raise RuntimeError('Got [ without ]')
530
531 elif val == '.':
532 self.cursor.PrintUntil(word0_tok)
533 self.cursor.SkipPast(word0_tok)
534 self.f.write('source')
535 return
536
537 for w in node.words:
538 self.DoWordInCommand(w, local_symbols)
539
540 # TODO: Print the terminator. Could be \n or ;
541 # Need to print env like PYTHONPATH = 'foo' && ls
542 # Need to print redirects:
543 # < > are the same. << is here string, and >> is assignment.
544 # append is >+
545
546 # TODO: static_eval of simple command
547 # - [ -> "test". Eliminate trailing ].
548 # - . -> source, etc.
549
550 def DoCommand(self, node, local_symbols, at_top_level=False):
551 # type: (command_t, Dict[str, bool], bool) -> None
552
553 UP_node = node
554
555 with tagswitch(node) as case:
556 if case(command_e.CommandList):
557 node = cast(command.CommandList, UP_node)
558
559 # TODO: How to distinguish between echo hi; echo bye; and on
560 # separate lines
561 for child in node.children:
562 self.DoCommand(child,
563 local_symbols,
564 at_top_level=at_top_level)
565
566 elif case(command_e.Redirect):
567 node = cast(command.Redirect, UP_node)
568
569 self.DoCommand(node.child,
570 local_symbols,
571 at_top_level=at_top_level)
572 for r in node.redirects:
573 self.DoRedirect(r, local_symbols)
574
575 elif case(command_e.Simple):
576 node = cast(command.Simple, UP_node)
577
578 self._DoSimple(node, local_symbols)
579
580 elif case(command_e.ShAssignment):
581 node = cast(command.ShAssignment, UP_node)
582
583 self.DoShAssignment(node, at_top_level, local_symbols)
584
585 elif case(command_e.Pipeline):
586 node = cast(command.Pipeline, UP_node)
587
588 for child in node.children:
589 self.DoCommand(child, local_symbols)
590
591 elif case(command_e.AndOr):
592 node = cast(command.AndOr, UP_node)
593
594 for child in node.children:
595 self.DoCommand(child, local_symbols)
596
597 elif case(command_e.Sentence):
598 node = cast(command.Sentence, UP_node)
599
600 # 'ls &' to 'fork ls'
601 # Keep ; the same.
602 self.DoCommand(node.child, local_symbols)
603
604 # This has to be different in the function case.
605 elif case(command_e.BraceGroup):
606 node = cast(BraceGroup, UP_node)
607
608 # { echo hi; } -> do { echo hi }
609 # For now it might be OK to keep 'do { echo hi; }
610 self.cursor.PrintUntil(node.left)
611 self.cursor.SkipPast(node.left)
612 self.f.write('do {')
613
614 for child in node.children:
615 self.DoCommand(child, local_symbols)
616
617 elif case(command_e.Subshell):
618 node = cast(command.Subshell, UP_node)
619
620 # (echo hi) -> shell echo hi
621 # (echo hi; echo bye) -> shell {echo hi; echo bye}
622
623 self.cursor.PrintUntil(node.left)
624 self.cursor.SkipPast(node.left)
625 self.f.write('shell {')
626
627 self.DoCommand(node.child, local_symbols)
628
629 #self._DebugSpid(right_spid)
630 #self._DebugSpid(right_spid + 1)
631
632 #print('RIGHT SPID', right_spid)
633 self.cursor.PrintUntil(node.right)
634 self.cursor.SkipPast(node.right)
635 self.f.write('}')
636
637 elif case(command_e.ShFunction):
638 node = cast(command.ShFunction, UP_node)
639
640 # TODO: skip name
641 #self.f.write('proc %s' % node.name)
642
643 # New symbol table for every function.
644 new_local_symbols = {} # type: Dict[str, bool]
645
646 # Should be the left most span, including 'function'
647 if node.keyword: # function foo { ...
648 self.cursor.PrintUntil(node.keyword)
649 else: # foo() { ...
650 self.cursor.PrintUntil(node.name_tok)
651
652 self.f.write('proc %s ' % node.name)
653
654 UP_body = node.body
655 with tagswitch(UP_body) as case:
656 if case(command_e.BraceGroup):
657 body = cast(BraceGroup, UP_body)
658 self.cursor.SkipUntil(body.left)
659
660 # Don't add "do" like a standalone brace group. Just use {}.
661 for child in body.children:
662 self.DoCommand(child, new_local_symbols)
663 else:
664 # very rare cases like f() ( subshell )
665 pass
666
667 elif case(command_e.DoGroup):
668 node = cast(command.DoGroup, UP_node)
669
670 self.cursor.PrintUntil(node.left)
671 self.cursor.SkipPast(node.left)
672 self.f.write('{')
673
674 for child in node.children:
675 self.DoCommand(child, local_symbols)
676
677 self.cursor.PrintUntil(node.right)
678 self.cursor.SkipPast(node.right)
679 self.f.write('}')
680
681 elif case(command_e.ForEach):
682 node = cast(command.ForEach, UP_node)
683
684 # Need to preserve spaces between words, because there can be line
685 # wrapping.
686 # for x in a b c \
687 # d e f; do
688
689 UP_iterable = node.iterable
690 with tagswitch(node.iterable) as case:
691 if case(for_iter_e.Args):
692 self.f.write('for %s in @ARGV ' % node.iter_names[0])
693
694 # note: command_t doesn't have .spids
695 body_tok = location.TokenForCommand(node.body)
696 self.cursor.SkipUntil(body_tok)
697
698 elif case(for_iter_e.Words):
699 pass
700
701 elif case(for_iter_e.YshExpr):
702 pass
703
704 if node.semi_tok is not None:
705 self.cursor.PrintUntil(node.semi_tok)
706 self.cursor.SkipPast(node.semi_tok)
707
708 self.DoCommand(node.body, local_symbols)
709
710 elif case(command_e.WhileUntil):
711 node = cast(command.WhileUntil, UP_node)
712
713 # Skip 'until', and replace it with 'while not'
714 if node.keyword.id == Id.KW_Until:
715 self.cursor.PrintUntil(node.keyword)
716 self.cursor.SkipPast(node.keyword)
717 self.f.write('while !')
718
719 if node.cond.tag() == condition_e.Shell:
720 commands = cast(List_of_command, node.cond)
721 # Skip the semi-colon in the condition, which is usually a Sentence
722 if (len(commands) == 1 and
723 commands[0].tag() == command_e.Sentence):
724 sentence = cast(command.Sentence, commands[0])
725 self.DoCommand(sentence.child, local_symbols)
726 self.cursor.SkipPast(sentence.terminator)
727
728 self.DoCommand(node.body, local_symbols)
729
730 elif case(command_e.If):
731 node = cast(command.If, UP_node)
732
733 # if foo; then -> if foo {
734 # elif foo; then -> } elif foo {
735 for i, arm in enumerate(node.arms):
736 elif_tok = arm.keyword
737 then_tok = arm.then_tok
738
739 if i != 0: # 'if' not 'elif' on the first arm
740 self.cursor.PrintUntil(elif_tok)
741 self.f.write('} ')
742
743 cond = arm.cond
744 if cond.tag() == condition_e.Shell:
745 commands = cast(List_of_command, cond)
746 if (len(commands) == 1 and
747 commands[0].tag() == command_e.Sentence):
748 sentence = cast(command.Sentence, commands[0])
749 self.DoCommand(sentence, local_symbols)
750
751 # Remove semi-colon
752 self.cursor.PrintUntil(sentence.terminator)
753 self.cursor.SkipPast(sentence.terminator)
754 else:
755 for child in commands:
756 self.DoCommand(child, local_symbols)
757
758 self.cursor.PrintUntil(then_tok)
759 self.cursor.SkipPast(then_tok)
760 self.f.write('{')
761
762 for child in arm.action:
763 self.DoCommand(child, local_symbols)
764
765 # else -> } else {
766 if len(node.else_action):
767 self.cursor.PrintUntil(node.else_kw)
768 self.f.write('} ')
769 self.cursor.PrintIncluding(node.else_kw)
770 self.f.write(' {')
771
772 for child in node.else_action:
773 self.DoCommand(child, local_symbols)
774
775 # fi -> }
776 self.cursor.PrintUntil(node.fi_kw)
777 self.cursor.SkipPast(node.fi_kw)
778 self.f.write('}')
779
780 elif case(command_e.Case):
781 node = cast(command.Case, UP_node)
782
783 to_match = None # type: word_t
784 with tagswitch(node.to_match) as case:
785 if case(case_arg_e.YshExpr):
786 return
787 elif case(case_arg_e.Word):
788 to_match = cast(case_arg.Word, node.to_match).w
789 else:
790 raise AssertionError()
791
792 self.cursor.PrintIncluding(node.case_kw)
793
794 # Figure out the variable name, so we can translate
795 # - $var to (var)
796 # - "$var" to (var)
797 var_part = None # type: SimpleVarSub
798 with tagswitch(to_match) as case:
799 if case(word_e.Compound):
800 w = cast(CompoundWord, to_match)
801 part0 = w.parts[0]
802
803 with tagswitch(part0) as case2:
804 if case2(word_part_e.SimpleVarSub):
805 var_part = cast(SimpleVarSub, part0)
806
807 elif case2(word_part_e.DoubleQuoted):
808 dq_part = cast(DoubleQuoted, part0)
809 if len(dq_part.parts) == 1:
810 dq_part0 = dq_part.parts[0]
811
812 # Nesting is annoying -- it would be nice to use pattern
813 # matching, but mycpp won't like it.
814 # TODO: extract into a common function
815 with tagswitch(dq_part0) as case3:
816 if case3(word_part_e.SimpleVarSub):
817 var_part = cast(
818 SimpleVarSub, dq_part0)
819 #log("VAR PART %s", var_part)
820
821 if var_part:
822 self.f.write(' (')
823 self.f.write(lexer.LazyStr(var_part.tok))
824 self.f.write(') ')
825
826 self.cursor.SkipPast(node.arms_start) # Skip past 'in'
827 self.f.write('{')
828
829 missing_last_dsemi = False
830
831 for case_arm in node.arms:
832 # Replace ) with {
833 self.cursor.PrintUntil(case_arm.middle)
834 self.f.write(' {')
835 self.cursor.SkipPast(case_arm.middle)
836
837 for child in case_arm.action:
838 self.DoCommand(child, local_symbols)
839
840 if case_arm.right:
841 # Change ;; to }
842 self.cursor.PrintUntil(case_arm.right)
843 self.f.write('}')
844 self.cursor.SkipPast(case_arm.right)
845 else:
846 # valid: case $x in pat) echo hi ; esac
847 missing_last_dsemi = True
848
849 self.cursor.PrintUntil(node.arms_end) # 'esac' or }
850
851 if missing_last_dsemi: # Print } for missing ;;
852 self.f.write('}\n')
853
854 self.cursor.SkipPast(node.arms_end) # 'esac' or }
855
856 self.f.write('}') # in place of 'esac'
857
858 elif case(command_e.TimeBlock):
859 node = cast(command.TimeBlock, UP_node)
860
861 self.DoCommand(node.pipeline, local_symbols)
862
863 elif case(command_e.DParen):
864 node = cast(command.DParen, UP_node)
865 # TODO: arith expressions can words with command subs
866 pass
867
868 elif case(command_e.DBracket):
869 node = cast(command.DBracket, UP_node)
870
871 # TODO: bool_expr_t can have words with command subs
872 pass
873
874 else:
875 pass
876 #log('Command not handled: %s', node)
877 #raise AssertionError(node.__class__.__name__)
878
879 def DoRhsWord(self, node, local_symbols):
880 # type: (rhs_word_t, Dict[str, bool]) -> None
881 """For the RHS of assignments.
882
883 TODO: for complex cases of word joining:
884 local a=unquoted'single'"double"'"'
885
886 We can try to handle it:
887 var a = y"unquotedsingledouble\""
888
889 Or simply abort and LEAVE IT ALONE. We should only translate things we
890 recognize.
891 """
892 UP_node = node
893 with tagswitch(node) as case:
894 if case(rhs_word_e.Empty):
895 self.f.write("''")
896
897 elif case(rhs_word_e.Compound):
898 node = cast(CompoundWord, UP_node)
899
900 # TODO: This is wrong!
901 style = _GetRhsStyle(node)
902 if style == word_style_e.SQ:
903 self.f.write("'")
904 self.DoWordInCommand(node, local_symbols)
905 self.f.write("'")
906 elif style == word_style_e.DQ:
907 self.f.write('"')
908 self.DoWordInCommand(node, local_symbols)
909 self.f.write('"')
910 # TODO: Put these back
911 #elif style == word_style_e.Expr:
912 # pass
913 #elif style == word_style_e.Unquoted:
914 # pass
915 else:
916 # "${foo:-default}" -> foo or 'default'
917 # ${foo:-default} -> @split(foo or 'default')
918 # @(foo or 'default') -- implicit split.
919
920 if word_.IsVarSub(node): # ${1} or "$1"
921 # Do it in expression mode
922 pass
923 # NOTE: ArithSub with $(1 +2 ) is different than 1 + 2 because of
924 # conversion to string.
925
926 # For now, just stub it out
927 self.DoWordInCommand(node, local_symbols)
928
929 def DoWordInCommand(self, node, local_symbols):
930 # type: (word_t, Dict[str, bool]) -> None
931 """E.g. remove unquoted.
932
933 echo "$x" -> echo $x
934 """
935 UP_node = node
936
937 with tagswitch(node) as case:
938 if case(word_e.Compound):
939 node = cast(CompoundWord, UP_node)
940
941 # UNQUOTE simple var subs
942
943 # Special case for "$@".
944 # TODO:
945 # "$foo" -> $foo
946 # "${foo}" -> $foo
947
948 if (len(node.parts) == 1 and
949 node.parts[0].tag() == word_part_e.DoubleQuoted):
950 dq_part = cast(DoubleQuoted, node.parts[0])
951
952 # NOTE: In double quoted case, this is the begin and end quote.
953 # Do we need a HereDoc part?
954
955 if len(dq_part.parts) == 1:
956 part0 = dq_part.parts[0]
957 if part0.tag() == word_part_e.SimpleVarSub:
958 vsub_part = cast(SimpleVarSub, dq_part.parts[0])
959 if vsub_part.tok.id == Id.VSub_At:
960 self.cursor.PrintUntil(dq_part.left)
961 self.cursor.SkipPast(
962 dq_part.right) # " then $@ then "
963 self.f.write('@ARGV')
964 return # Done replacing
965
966 # "$1" -> $1, "$foo" -> $foo
967 if vsub_part.tok.id in (Id.VSub_Number,
968 Id.VSub_DollarName):
969 self.cursor.PrintUntil(dq_part.left)
970 self.cursor.SkipPast(dq_part.right)
971 self.f.write(lexer.TokenVal(vsub_part.tok))
972 return
973
974 # Single arith sub, command sub, etc.
975 # On the other hand, an unquoted one needs to turn into
976 #
977 # $(echo one two) -> @[echo one two]
978 # `echo one two` -> @[echo one two]
979 #
980 # ${var:-'the default'} -> @$(var or 'the default')
981 #
982 # $((1 + 2)) -> $(1 + 2) -- this is OK unquoted
983
984 elif part0.tag() == word_part_e.BracedVarSub:
985 # Skip over quote
986 self.cursor.PrintUntil(dq_part.left)
987 self.cursor.SkipPast(dq_part.left)
988 self.DoWordPart(part0, local_symbols)
989 self.cursor.SkipPast(dq_part.right)
990 return
991
992 elif part0.tag() == word_part_e.CommandSub:
993 self.cursor.PrintUntil(dq_part.left)
994 self.cursor.SkipPast(dq_part.left)
995 self.DoWordPart(part0, local_symbols)
996 self.cursor.SkipPast(dq_part.right)
997 return
998
999 # TODO: 'foo'"bar" should be "foobar", etc.
1000 # If any part is double quoted, you can always double quote the whole
1001 # thing?
1002 for part in node.parts:
1003 self.DoWordPart(part, local_symbols)
1004
1005 elif case(word_e.BracedTree):
1006 # Not doing anything now
1007 pass
1008
1009 else:
1010 raise AssertionError(node.__class__.__name__)
1011
1012 def DoWordPart(self, node, local_symbols, quoted=False):
1013 # type: (word_part_t, Dict[str, bool], bool) -> None
1014
1015 left_tok = location.LeftTokenForWordPart(node)
1016 if left_tok:
1017 self.cursor.PrintUntil(left_tok)
1018
1019 UP_node = node
1020
1021 with tagswitch(node) as case:
1022 if case(word_part_e.ShArrayLiteral, word_part_e.BashAssocLiteral,
1023 word_part_e.TildeSub, word_part_e.ExtGlob):
1024 pass
1025
1026 elif case(word_part_e.EscapedLiteral):
1027 node = cast(word_part.EscapedLiteral, UP_node)
1028 if quoted:
1029 pass
1030 else:
1031 # If unquoted \e, it should quoted instead. ' ' vs. \<invisible space>
1032 # Hm is this necessary though? I think the only motivation is changing
1033 # \{ and \( for macros. And ' ' to be readable/visible.
1034 t = node.token
1035 val = lexer.TokenSliceLeft(t, 1)
1036 assert len(val) == 1, val
1037 if val != '\n':
1038 self.cursor.PrintUntil(t)
1039 self.cursor.SkipPast(t)
1040 self.f.write("'%s'" % val)
1041
1042 elif case(word_part_e.Literal):
1043 node = cast(Token, UP_node)
1044 self.cursor.PrintIncluding(node)
1045
1046 elif case(word_part_e.SingleQuoted):
1047 node = cast(SingleQuoted, UP_node)
1048
1049 # TODO:
1050 # '\n' is '\\n'
1051 # $'\n' is '\n'
1052 # TODO: Should print until right_spid
1053 # left_spid, right_spid = node.spids
1054 self.cursor.PrintUntil(node.right)
1055
1056 elif case(word_part_e.DoubleQuoted):
1057 node = cast(DoubleQuoted, UP_node)
1058 for part in node.parts:
1059 self.DoWordPart(part, local_symbols, quoted=True)
1060
1061 elif case(word_part_e.SimpleVarSub):
1062 node = cast(SimpleVarSub, UP_node)
1063
1064 op_id = node.tok.id
1065
1066 if op_id == Id.VSub_DollarName:
1067 self.cursor.PrintIncluding(node.tok)
1068
1069 elif op_id == Id.VSub_Number:
1070 self.cursor.PrintIncluding(node.tok)
1071
1072 elif op_id == Id.VSub_At: # $@ -- handled quoted case above
1073 self.f.write('$[join(ARGV)]')
1074 self.cursor.SkipPast(node.tok)
1075
1076 elif op_id == Id.VSub_Star: # $*
1077 # PEDANTIC: Depends if quoted or unquoted
1078 self.f.write('$[join(ARGV)]')
1079 self.cursor.SkipPast(node.tok)
1080
1081 elif op_id == Id.VSub_Pound: # $#
1082 # len(ARGV) ?
1083 self.f.write('$Argc')
1084 self.cursor.SkipPast(node.tok)
1085
1086 else:
1087 pass
1088
1089 elif case(word_part_e.BracedVarSub):
1090 node = cast(BracedVarSub, UP_node)
1091
1092 # NOTE: Why do we need this but we don't need it in command sub?
1093 self.cursor.PrintUntil(node.left)
1094
1095 if node.bracket_op:
1096 # a[1]
1097 # These two change the sigil! ${a[@]} is now @a!
1098 # a[@]
1099 # a[*]
1100 pass
1101
1102 if node.prefix_op:
1103 # len()
1104 pass
1105 if node.suffix_op:
1106 pass
1107
1108 op_id = node.name_tok.id
1109 if op_id == Id.VSub_QMark:
1110 self.cursor.PrintIncluding(node.name_tok)
1111
1112 self.cursor.PrintIncluding(node.right)
1113
1114 elif case(word_part_e.CommandSub):
1115 node = cast(CommandSub, UP_node)
1116
1117 if node.left_token.id == Id.Left_Backtick:
1118 self.cursor.PrintUntil(node.left_token)
1119 self.f.write('$(')
1120 self.cursor.SkipPast(node.left_token)
1121
1122 self.DoCommand(node.child, local_symbols)
1123
1124 # Skip over right `
1125 self.cursor.SkipPast(node.right)
1126 self.f.write(')')
1127
1128 else:
1129 self.cursor.PrintIncluding(node.right)
1130
1131 else:
1132 pass