| 1 | """
|
| 2 | word.py - Utility functions for words, e.g. treating them as "tokens".
|
| 3 | """
|
| 4 |
|
| 5 | from _devbuild.gen.id_kind_asdl import Id, Kind, Id_t, Kind_t
|
| 6 | from _devbuild.gen.syntax_asdl import (
|
| 7 | Token,
|
| 8 | CompoundWord,
|
| 9 | DoubleQuoted,
|
| 10 | SingleQuoted,
|
| 11 | word,
|
| 12 | word_e,
|
| 13 | word_t,
|
| 14 | word_str,
|
| 15 | word_part,
|
| 16 | word_part_t,
|
| 17 | word_part_e,
|
| 18 | AssocPair,
|
| 19 | )
|
| 20 | from frontend import consts
|
| 21 | from frontend import lexer
|
| 22 | from mycpp import mylib
|
| 23 | from mycpp.mylib import tagswitch, log
|
| 24 |
|
| 25 | from typing import Tuple, Optional, List, Any, cast, TYPE_CHECKING
|
| 26 | if TYPE_CHECKING:
|
| 27 | from osh.word_parse import WordParser
|
| 28 |
|
| 29 | _ = log
|
| 30 |
|
| 31 |
|
| 32 | def LiteralId(p):
|
| 33 | # type: (word_part_t) -> Id_t
|
| 34 | """If the WordPart consists of a single literal token, return its Id.
|
| 35 |
|
| 36 | Used for Id.KW_For, or Id.RBrace, etc.
|
| 37 | """
|
| 38 | UP_part = p
|
| 39 | if p.tag() == word_part_e.Literal:
|
| 40 | return cast(Token, UP_part).id
|
| 41 | else:
|
| 42 | return Id.Undefined_Tok # unequal to any other Id
|
| 43 |
|
| 44 |
|
| 45 | def CheckLiteralId(part, tok_id):
|
| 46 | # type: (word_part_t, Id_t) -> Optional[Token]
|
| 47 | """If the WordPart is a Token of a given Id, return the Token."""
|
| 48 | if part.tag() != word_part_e.Literal:
|
| 49 | return None
|
| 50 |
|
| 51 | tok = cast(Token, part)
|
| 52 | if tok.id == tok_id:
|
| 53 | return tok
|
| 54 |
|
| 55 | return None
|
| 56 |
|
| 57 |
|
| 58 | def _EvalWordPart(part):
|
| 59 | # type: (word_part_t) -> Tuple[bool, str, bool]
|
| 60 | """Evaluate a WordPart at PARSE TIME.
|
| 61 |
|
| 62 | Used for:
|
| 63 |
|
| 64 | 1. here doc delimiters
|
| 65 | 2. function names
|
| 66 | 3. for loop variable names
|
| 67 | 4. Compiling constant regex words at parse time
|
| 68 | 5. a special case for ${a////c} to see if we got a leading slash in the
|
| 69 | pattern.
|
| 70 |
|
| 71 | Returns:
|
| 72 | 3-tuple of
|
| 73 | ok: bool, success. If there are parts that can't be statically
|
| 74 | evaluated, then we return false.
|
| 75 | value: a string (not Value)
|
| 76 | quoted: whether any part of the word was quoted
|
| 77 | """
|
| 78 | UP_part = part
|
| 79 | with tagswitch(part) as case:
|
| 80 | if case(word_part_e.Literal):
|
| 81 | tok = cast(Token, UP_part)
|
| 82 | # Weird performance issue: if we change this to lexer.LazyStr(),
|
| 83 | # the parser slows down, e.g. on configure-coreutils from 805 B
|
| 84 | # irefs to ~830 B. The real issue is that we should avoid calling
|
| 85 | # this from CommandParser - for the Hay node.
|
| 86 | return True, lexer.TokenVal(tok), False
|
| 87 | #return True, lexer.LazyStr(tok), False
|
| 88 |
|
| 89 | elif case(word_part_e.EscapedLiteral):
|
| 90 | part = cast(word_part.EscapedLiteral, UP_part)
|
| 91 | if mylib.PYTHON:
|
| 92 | val = lexer.TokenVal(part.token)
|
| 93 | assert len(val) == 2, val # e.g. \*
|
| 94 | assert val[0] == '\\'
|
| 95 | s = lexer.TokenSliceLeft(part.token, 1)
|
| 96 | return True, s, True
|
| 97 |
|
| 98 | elif case(word_part_e.SingleQuoted):
|
| 99 | part = cast(SingleQuoted, UP_part)
|
| 100 | return True, part.sval, True
|
| 101 |
|
| 102 | elif case(word_part_e.DoubleQuoted):
|
| 103 | part = cast(DoubleQuoted, UP_part)
|
| 104 | strs = [] # type: List[str]
|
| 105 | for p in part.parts:
|
| 106 | ok, s, _ = _EvalWordPart(p)
|
| 107 | if not ok:
|
| 108 | return False, '', True
|
| 109 | strs.append(s)
|
| 110 |
|
| 111 | return True, ''.join(strs), True # At least one part was quoted!
|
| 112 |
|
| 113 | elif case(word_part_e.YshArrayLiteral, word_part_e.InitializerLiteral,
|
| 114 | word_part_e.ZshVarSub, word_part_e.CommandSub,
|
| 115 | word_part_e.SimpleVarSub, word_part_e.BracedVarSub,
|
| 116 | word_part_e.TildeSub, word_part_e.ArithSub,
|
| 117 | word_part_e.ExtGlob, word_part_e.Splice,
|
| 118 | word_part_e.ExprSub):
|
| 119 | return False, '', False
|
| 120 |
|
| 121 | else:
|
| 122 | raise AssertionError(part.tag())
|
| 123 |
|
| 124 |
|
| 125 | def FastStrEval(w):
|
| 126 | # type: (CompoundWord) -> Optional[str]
|
| 127 | """
|
| 128 | Detects common case
|
| 129 |
|
| 130 | (1) CompoundWord([LiteralPart(Id.LitChars)])
|
| 131 | For echo -e, test x -lt 0, etc.
|
| 132 | (2) single quoted word like 'foo'
|
| 133 |
|
| 134 | Other patterns we could detect are:
|
| 135 | (1) "foo"
|
| 136 | (2) "$var" and "${var}" - I think these are very common in OSH code (but not YSH)
|
| 137 | - I think val_ops.Stringify() can handle all the errors
|
| 138 | """
|
| 139 | if len(w.parts) != 1:
|
| 140 | return None
|
| 141 |
|
| 142 | part0 = w.parts[0]
|
| 143 | UP_part0 = part0
|
| 144 | with tagswitch(part0) as case:
|
| 145 | if case(word_part_e.Literal):
|
| 146 | part0 = cast(Token, UP_part0)
|
| 147 |
|
| 148 | if part0.id in (Id.Lit_Chars, Id.Lit_LBracket, Id.Lit_RBracket):
|
| 149 | # Could add more tokens in this case
|
| 150 | # e.g. + is Lit_Other, and it's a Token in 'expr'
|
| 151 | # Right now it's Lit_Chars (e.g. ls -l) and [ and ] because I
|
| 152 | # know those are common
|
| 153 | # { } are not as common
|
| 154 | return lexer.LazyStr(part0)
|
| 155 |
|
| 156 | else:
|
| 157 | # e.g. Id.Lit_Star needs to be glob expanded
|
| 158 | # TODO: Consider moving Id.Lit_Star etc. to Kind.MaybeGlob?
|
| 159 | return None
|
| 160 |
|
| 161 | elif case(word_part_e.SingleQuoted):
|
| 162 | part0 = cast(SingleQuoted, UP_part0)
|
| 163 | # TODO: SingleQuoted should have lazy (str? sval) field
|
| 164 | # This would only affect multi-line strings though?
|
| 165 | return part0.sval
|
| 166 |
|
| 167 | else:
|
| 168 | # e.g. DoubleQuoted can't be optimized to a string, because it
|
| 169 | # might have "$@" and such
|
| 170 | return None
|
| 171 |
|
| 172 |
|
| 173 | def StaticEval(UP_w):
|
| 174 | # type: (word_t) -> Tuple[bool, str, bool]
|
| 175 | """Evaluate a Compound at PARSE TIME."""
|
| 176 | quoted = False
|
| 177 |
|
| 178 | # e.g. for ( instead of for (( is a token word
|
| 179 | if UP_w.tag() != word_e.Compound:
|
| 180 | return False, '', quoted
|
| 181 |
|
| 182 | w = cast(CompoundWord, UP_w)
|
| 183 |
|
| 184 | strs = [] # type: List[str]
|
| 185 | for part in w.parts:
|
| 186 | ok, s, q = _EvalWordPart(part)
|
| 187 | if not ok:
|
| 188 | return False, '', quoted
|
| 189 | if q:
|
| 190 | quoted = True # at least one part was quoted
|
| 191 | strs.append(s)
|
| 192 | #log('StaticEval parts %s', w.parts)
|
| 193 | return True, ''.join(strs), quoted
|
| 194 |
|
| 195 |
|
| 196 | # From bash, general.c, unquoted_tilde_word():
|
| 197 | # POSIX.2, 3.6.1: A tilde-prefix consists of an unquoted tilde character at
|
| 198 | # the beginning of the word, followed by all of the characters preceding the
|
| 199 | # first unquoted slash in the word, or all the characters in the word if there
|
| 200 | # is no slash...If none of the characters in the tilde-prefix are quoted, the
|
| 201 | # characters in the tilde-prefix following the tilde shell be treated as a
|
| 202 | # possible login name.
|
| 203 | #define TILDE_END(c) ((c) == '\0' || (c) == '/' || (c) == ':')
|
| 204 | #
|
| 205 | # So an unquoted tilde can ALWAYS start a new lex mode? You respect quotes and
|
| 206 | # substitutions.
|
| 207 | #
|
| 208 | # We only detect ~Lit_Chars and split. So we might as well just write a regex.
|
| 209 |
|
| 210 |
|
| 211 | def TildeDetect(UP_w):
|
| 212 | # type: (word_t) -> Optional[CompoundWord]
|
| 213 | """Detect tilde expansion in a word.
|
| 214 |
|
| 215 | It might begin with Literal that needs to be turned into a TildeSub.
|
| 216 | (It depends on whether the second token begins with slash).
|
| 217 |
|
| 218 | If so, it return a new word. Otherwise return None.
|
| 219 |
|
| 220 | NOTE:
|
| 221 | - The regex for Lit_TildeLike could be expanded. Right now it's
|
| 222 | conservative, like Lit_Chars without the /.
|
| 223 | - It's possible to write this in a mutating style, since only the first token
|
| 224 | is changed. But note that we CANNOT know this during lexing.
|
| 225 | """
|
| 226 | # BracedTree can't be tilde expanded
|
| 227 | if UP_w.tag() != word_e.Compound:
|
| 228 | return None
|
| 229 |
|
| 230 | w = cast(CompoundWord, UP_w)
|
| 231 | return TildeDetect2(w)
|
| 232 |
|
| 233 |
|
| 234 | def TildeDetect2(w):
|
| 235 | # type: (CompoundWord) -> Optional[CompoundWord]
|
| 236 | """If tilde sub is detected, returns a new CompoundWord.
|
| 237 |
|
| 238 | Accepts CompoundWord, not word_t. After brace expansion, we know we have a
|
| 239 | List[CompoundWord].
|
| 240 |
|
| 241 | Tilde detection:
|
| 242 |
|
| 243 | YES:
|
| 244 | ~ ~/
|
| 245 | ~bob ~bob/
|
| 246 |
|
| 247 | NO:
|
| 248 | ~bob# ~bob#/
|
| 249 | ~bob$x
|
| 250 | ~$x
|
| 251 |
|
| 252 | Pattern to match (all must be word_part_e.Literal):
|
| 253 |
|
| 254 | Lit_Tilde Lit_Chars? (Lit_Slash | %end)
|
| 255 | """
|
| 256 | if len(w.parts) == 0: # ${a-} has no parts
|
| 257 | return None
|
| 258 |
|
| 259 | part0 = w.parts[0]
|
| 260 | id0 = LiteralId(part0)
|
| 261 | if id0 != Id.Lit_Tilde:
|
| 262 | return None # $x is not TildeSub
|
| 263 |
|
| 264 | tok0 = cast(Token, part0)
|
| 265 |
|
| 266 | new_parts = [] # type: List[word_part_t]
|
| 267 |
|
| 268 | if len(w.parts) == 1: # ~
|
| 269 | new_parts.append(word_part.TildeSub(tok0, None, None))
|
| 270 | return CompoundWord(new_parts)
|
| 271 |
|
| 272 | id1 = LiteralId(w.parts[1])
|
| 273 | if id1 == Id.Lit_Slash: # ~/
|
| 274 | new_parts.append(word_part.TildeSub(tok0, None, None))
|
| 275 | new_parts.extend(w.parts[1:])
|
| 276 | return CompoundWord(new_parts)
|
| 277 |
|
| 278 | if id1 != Id.Lit_Chars:
|
| 279 | return None # ~$x is not TildeSub
|
| 280 |
|
| 281 | tok1 = cast(Token, w.parts[1])
|
| 282 |
|
| 283 | if len(w.parts) == 2: # ~foo
|
| 284 | new_parts.append(word_part.TildeSub(tok0, tok1, lexer.TokenVal(tok1)))
|
| 285 | return CompoundWord(new_parts)
|
| 286 |
|
| 287 | id2 = LiteralId(w.parts[2])
|
| 288 | if id2 != Id.Lit_Slash: # ~foo$x is not TildeSub
|
| 289 | return None
|
| 290 |
|
| 291 | new_parts.append(word_part.TildeSub(tok0, tok1, lexer.TokenVal(tok1)))
|
| 292 | new_parts.extend(w.parts[2:])
|
| 293 | return CompoundWord(new_parts)
|
| 294 |
|
| 295 |
|
| 296 | def TildeDetectAssign(w):
|
| 297 | # type: (CompoundWord) -> None
|
| 298 | """Detects multiple tilde sub, like a=~:~/src:~bob
|
| 299 |
|
| 300 | MUTATES its argument.
|
| 301 |
|
| 302 | Pattern for to match (all must be word_part_e.Literal):
|
| 303 |
|
| 304 | Lit_Tilde Lit_Chars? (Lit_Slash | Lit_Colon | %end)
|
| 305 | """
|
| 306 | parts = w.parts
|
| 307 |
|
| 308 | # Bail out EARLY if there are no ~ at all
|
| 309 | has_tilde = False
|
| 310 | for part in parts:
|
| 311 | if LiteralId(part) == Id.Lit_Tilde:
|
| 312 | has_tilde = True
|
| 313 | break
|
| 314 | if not has_tilde:
|
| 315 | return # Avoid further work and allocations
|
| 316 |
|
| 317 | # Avoid IndexError, since we have to look ahead up to 2 tokens
|
| 318 | parts.append(None)
|
| 319 | parts.append(None)
|
| 320 |
|
| 321 | new_parts = [] # type: List[word_part_t]
|
| 322 |
|
| 323 | tilde_could_be_next = True # true at first, and true after :
|
| 324 |
|
| 325 | i = 0
|
| 326 | n = len(parts)
|
| 327 |
|
| 328 | while i < n:
|
| 329 | part0 = parts[i]
|
| 330 | if part0 is None:
|
| 331 | break
|
| 332 |
|
| 333 | #log('i = %d', i)
|
| 334 | #log('part0 %s', part0)
|
| 335 |
|
| 336 | # Skip tilde in middle of word, like a=foo~bar
|
| 337 | if tilde_could_be_next and LiteralId(part0) == Id.Lit_Tilde:
|
| 338 | # If ~ ends the string, we have
|
| 339 | part1 = parts[i + 1]
|
| 340 | part2 = parts[i + 2]
|
| 341 |
|
| 342 | tok0 = cast(Token, part0)
|
| 343 |
|
| 344 | if part1 is None: # x=foo:~
|
| 345 | new_parts.append(word_part.TildeSub(tok0, None, None))
|
| 346 | break # at end
|
| 347 |
|
| 348 | id1 = LiteralId(part1)
|
| 349 |
|
| 350 | if id1 in (Id.Lit_Slash, Id.Lit_Colon): # x=foo:~/ or x=foo:~:
|
| 351 | new_parts.append(word_part.TildeSub(tok0, None, None))
|
| 352 | new_parts.append(part1)
|
| 353 | i += 2
|
| 354 | continue
|
| 355 |
|
| 356 | if id1 != Id.Lit_Chars:
|
| 357 | new_parts.append(part0) # unchanged
|
| 358 | new_parts.append(part1) # ...
|
| 359 | i += 2
|
| 360 | continue # x=foo:~$x is not tilde sub
|
| 361 |
|
| 362 | tok1 = cast(Token, part1)
|
| 363 |
|
| 364 | if part2 is None: # x=foo:~foo
|
| 365 | # consume both
|
| 366 | new_parts.append(
|
| 367 | word_part.TildeSub(tok0, tok1, lexer.TokenVal(tok1)))
|
| 368 | break # at end
|
| 369 |
|
| 370 | id2 = LiteralId(part2)
|
| 371 | if id2 not in (Id.Lit_Slash, Id.Lit_Colon): # x=foo:~foo$x
|
| 372 | new_parts.append(part0) # unchanged
|
| 373 | new_parts.append(part1) # ...
|
| 374 | new_parts.append(part2) # ...
|
| 375 | i += 3
|
| 376 | continue
|
| 377 |
|
| 378 | new_parts.append(
|
| 379 | word_part.TildeSub(tok0, tok1, lexer.TokenVal(tok1)))
|
| 380 | new_parts.append(part2)
|
| 381 | i += 3
|
| 382 |
|
| 383 | tilde_could_be_next = (id2 == Id.Lit_Colon)
|
| 384 |
|
| 385 | else:
|
| 386 | new_parts.append(part0)
|
| 387 | i += 1
|
| 388 |
|
| 389 | tilde_could_be_next = (LiteralId(part0) == Id.Lit_Colon)
|
| 390 |
|
| 391 | parts.pop()
|
| 392 | parts.pop()
|
| 393 |
|
| 394 | # Mutate argument
|
| 395 | w.parts = new_parts
|
| 396 |
|
| 397 |
|
| 398 | def TildeDetectAll(words):
|
| 399 | # type: (List[word_t]) -> List[word_t]
|
| 400 | out = [] # type: List[word_t]
|
| 401 | for w in words:
|
| 402 | t = TildeDetect(w)
|
| 403 | if t:
|
| 404 | out.append(t)
|
| 405 | else:
|
| 406 | out.append(w)
|
| 407 | return out
|
| 408 |
|
| 409 |
|
| 410 | def HasArrayPart(w):
|
| 411 | # type: (CompoundWord) -> bool
|
| 412 | """Used in cmd_parse."""
|
| 413 | for part in w.parts:
|
| 414 | if part.tag() == word_part_e.InitializerLiteral:
|
| 415 | return True
|
| 416 | return False
|
| 417 |
|
| 418 |
|
| 419 | def ShFunctionName(w):
|
| 420 | # type: (CompoundWord) -> str
|
| 421 | """Returns a valid shell function name, or the empty string.
|
| 422 |
|
| 423 | TODO: Maybe use this regex to validate:
|
| 424 |
|
| 425 | FUNCTION_NAME_RE = r'[^{}\[\]=]*'
|
| 426 |
|
| 427 | Bash is very lenient, but that would disallow confusing characters, for
|
| 428 | better error messages on a[x]=(), etc.
|
| 429 | """
|
| 430 | ok, s, quoted = StaticEval(w)
|
| 431 | # Function names should not have quotes
|
| 432 | if not ok or quoted:
|
| 433 | return ''
|
| 434 | return s
|
| 435 |
|
| 436 |
|
| 437 | def IsVarLike(w):
|
| 438 | # type: (CompoundWord) -> bool
|
| 439 | """Tests whether a word looks like FOO=bar.
|
| 440 |
|
| 441 | This is a quick test for the command parser to distinguish:
|
| 442 |
|
| 443 | func() { echo hi; }
|
| 444 | func=(1 2 3)
|
| 445 | """
|
| 446 | if len(w.parts) == 0:
|
| 447 | return False
|
| 448 |
|
| 449 | return LiteralId(w.parts[0]) == Id.Lit_VarLike
|
| 450 |
|
| 451 |
|
| 452 | def LooksLikeArithVar(UP_w):
|
| 453 | # type: (word_t) -> Optional[Token]
|
| 454 | """Return a token if this word looks like an arith var.
|
| 455 |
|
| 456 | NOTE: This can't be combined with DetectShAssignment because VarLike and
|
| 457 | ArithVarLike must be different tokens. Otherwise _ReadCompoundWord will be
|
| 458 | confused between array assignments foo=(1 2) and function calls foo(1, 2).
|
| 459 | """
|
| 460 | if UP_w.tag() != word_e.Compound:
|
| 461 | return None
|
| 462 |
|
| 463 | w = cast(CompoundWord, UP_w)
|
| 464 | if len(w.parts) != 1:
|
| 465 | return None
|
| 466 |
|
| 467 | return CheckLiteralId(w.parts[0], Id.Lit_ArithVarLike)
|
| 468 |
|
| 469 |
|
| 470 | def CheckLeadingEquals(w):
|
| 471 | # type: (CompoundWord) -> Optional[Token]
|
| 472 | """Test whether a word looks like =word
|
| 473 |
|
| 474 | For shopt --set strict_parse_equals
|
| 475 | """
|
| 476 | if len(w.parts) == 0:
|
| 477 | return None
|
| 478 |
|
| 479 | return CheckLiteralId(w.parts[0], Id.Lit_Equals)
|
| 480 |
|
| 481 |
|
| 482 | def DetectShAssignment(w):
|
| 483 | # type: (CompoundWord) -> Tuple[Optional[Token], Optional[Token], int]
|
| 484 | """Detects whether a word looks like FOO=bar or FOO[x]=bar.
|
| 485 |
|
| 486 | Returns:
|
| 487 | left_token or None # Lit_VarLike, Lit_ArrayLhsOpen, or None if it's not an
|
| 488 | # assignment
|
| 489 | close_token, # Lit_ArrayLhsClose if it was detected, or None
|
| 490 | part_offset # where to start the value word, 0 if not an assignment
|
| 491 |
|
| 492 | Cases:
|
| 493 |
|
| 494 | s=1
|
| 495 | s+=1
|
| 496 | s[x]=1
|
| 497 | s[x]+=1
|
| 498 |
|
| 499 | a=()
|
| 500 | a+=()
|
| 501 | a[x]=(
|
| 502 | a[x]+=() # We parse this (as bash does), but it's never valid because arrays
|
| 503 | # can't be nested.
|
| 504 | """
|
| 505 | no_token = None # type: Optional[Token]
|
| 506 |
|
| 507 | n = len(w.parts)
|
| 508 | if n == 0:
|
| 509 | return no_token, no_token, 0
|
| 510 |
|
| 511 | part0 = w.parts[0]
|
| 512 | if part0.tag() != word_part_e.Literal:
|
| 513 | return no_token, no_token, 0
|
| 514 |
|
| 515 | tok0 = cast(Token, part0)
|
| 516 |
|
| 517 | if tok0.id == Id.Lit_VarLike:
|
| 518 | return tok0, no_token, 1 # everything after first token is the value
|
| 519 |
|
| 520 | if tok0.id == Id.Lit_ArrayLhsOpen:
|
| 521 | # NOTE that a[]=x should be an error. We don't want to silently decay.
|
| 522 | if n < 2:
|
| 523 | return no_token, no_token, 0
|
| 524 | for i in xrange(1, n):
|
| 525 | part = w.parts[i]
|
| 526 | tok_close = CheckLiteralId(part, Id.Lit_ArrayLhsClose)
|
| 527 | if tok_close:
|
| 528 | return tok0, tok_close, i + 1
|
| 529 |
|
| 530 | # Nothing detected. Could be 'foobar' or a[x+1+2/' without the closing ].
|
| 531 | return no_token, no_token, 0
|
| 532 |
|
| 533 |
|
| 534 | def DetectAssocPair(w):
|
| 535 | # type: (CompoundWord) -> Optional[AssocPair]
|
| 536 | """Like DetectShAssignment, but for A=(['k']=v ['k2']=v)
|
| 537 |
|
| 538 | The key and the value are both strings. So we just pick out
|
| 539 | word_part. Unlike a[k]=v, A=([k]=v) is NOT ambiguous, because the
|
| 540 | [k] syntax is only used for associative array literals, as opposed
|
| 541 | to indexed array literals.
|
| 542 | """
|
| 543 | parts = w.parts
|
| 544 | if LiteralId(parts[0]) != Id.Lit_LBracket:
|
| 545 | return None
|
| 546 |
|
| 547 | n = len(parts)
|
| 548 | for i in xrange(n):
|
| 549 | id_ = LiteralId(parts[i])
|
| 550 | if id_ == Id.Lit_ArrayLhsClose: # ]=
|
| 551 | # e.g. if we have [$x$y]=$a$b
|
| 552 | key = CompoundWord(parts[1:i]) # $x$y
|
| 553 | value = CompoundWord(parts[i + 1:]) # $a$b from
|
| 554 |
|
| 555 | has_plus = lexer.IsPlusEquals(cast(Token, parts[i]))
|
| 556 |
|
| 557 | # Type-annotated intermediate value for mycpp translation
|
| 558 | return AssocPair(key, value, has_plus)
|
| 559 |
|
| 560 | return None
|
| 561 |
|
| 562 |
|
| 563 | def IsControlFlow(w):
|
| 564 | # type: (CompoundWord) -> Tuple[Kind_t, Optional[Token]]
|
| 565 | """Tests if a word is a control flow word."""
|
| 566 | no_token = None # type: Optional[Token]
|
| 567 |
|
| 568 | if len(w.parts) != 1:
|
| 569 | return Kind.Undefined, no_token
|
| 570 |
|
| 571 | UP_part0 = w.parts[0]
|
| 572 | token_type = LiteralId(UP_part0)
|
| 573 | if token_type == Id.Undefined_Tok:
|
| 574 | return Kind.Undefined, no_token
|
| 575 |
|
| 576 | token_kind = consts.GetKind(token_type)
|
| 577 | if token_kind == Kind.ControlFlow:
|
| 578 | return token_kind, cast(Token, UP_part0)
|
| 579 |
|
| 580 | return Kind.Undefined, no_token
|
| 581 |
|
| 582 |
|
| 583 | def LiteralToken(UP_w):
|
| 584 | # type: (word_t) -> Optional[Token]
|
| 585 | """If a word consists of a literal token, return it.
|
| 586 |
|
| 587 | Otherwise return None.
|
| 588 | """
|
| 589 | # We're casting here because this function is called by the CommandParser for
|
| 590 | # var, setvar, '...', etc. It's easier to cast in one place.
|
| 591 | assert UP_w.tag() == word_e.Compound, UP_w
|
| 592 | w = cast(CompoundWord, UP_w)
|
| 593 |
|
| 594 | if len(w.parts) != 1:
|
| 595 | return None
|
| 596 |
|
| 597 | part0 = w.parts[0]
|
| 598 | if part0.tag() == word_part_e.Literal:
|
| 599 | return cast(Token, part0)
|
| 600 |
|
| 601 | return None
|
| 602 |
|
| 603 |
|
| 604 | def BraceToken(UP_w):
|
| 605 | # type: (word_t) -> Optional[Token]
|
| 606 | """If a word has Id.Lit_LBrace or Lit_RBrace, return a Token.
|
| 607 |
|
| 608 | This is a special case for osh/cmd_parse.py
|
| 609 |
|
| 610 | The WordParser changes Id.Op_LBrace from ExprParser into Id.Lit_LBrace, so we
|
| 611 | may get a token, not a word.
|
| 612 | """
|
| 613 | with tagswitch(UP_w) as case:
|
| 614 | if case(word_e.Operator):
|
| 615 | tok = cast(Token, UP_w)
|
| 616 | assert tok.id in (Id.Lit_LBrace, Id.Lit_RBrace), tok
|
| 617 | return tok
|
| 618 |
|
| 619 | elif case(word_e.Compound):
|
| 620 | w = cast(CompoundWord, UP_w)
|
| 621 | return LiteralToken(w)
|
| 622 |
|
| 623 | else:
|
| 624 | raise AssertionError()
|
| 625 |
|
| 626 |
|
| 627 | def AsKeywordToken(UP_w):
|
| 628 | # type: (word_t) -> Token
|
| 629 | """
|
| 630 | Given a word that IS A CompoundWord containing just a keyword, return the
|
| 631 | single token at the start.
|
| 632 | """
|
| 633 | assert UP_w.tag() == word_e.Compound, UP_w
|
| 634 | w = cast(CompoundWord, UP_w)
|
| 635 |
|
| 636 | part = w.parts[0]
|
| 637 | assert part.tag() == word_part_e.Literal, part
|
| 638 | tok = cast(Token, part)
|
| 639 | assert consts.GetKind(tok.id) == Kind.KW, tok
|
| 640 | return tok
|
| 641 |
|
| 642 |
|
| 643 | def AsOperatorToken(word):
|
| 644 | # type: (word_t) -> Token
|
| 645 | """For a word that IS an operator (word.Token), return that token.
|
| 646 |
|
| 647 | This must only be called on a word which is known to be an operator
|
| 648 | (word.Token).
|
| 649 | """
|
| 650 | assert word.tag() == word_e.Operator, word
|
| 651 | return cast(Token, word)
|
| 652 |
|
| 653 |
|
| 654 | #
|
| 655 | # Polymorphic between Token and Compound
|
| 656 | #
|
| 657 |
|
| 658 |
|
| 659 | def ArithId(w):
|
| 660 | # type: (word_t) -> Id_t
|
| 661 | """Used by shell arithmetic parsing."""
|
| 662 | if w.tag() == word_e.Operator:
|
| 663 | tok = cast(Token, w)
|
| 664 | return tok.id
|
| 665 |
|
| 666 | assert isinstance(w, CompoundWord)
|
| 667 | return Id.Word_Compound
|
| 668 |
|
| 669 |
|
| 670 | def BoolId(w):
|
| 671 | # type: (word_t) -> Id_t
|
| 672 | UP_w = w
|
| 673 | with tagswitch(w) as case:
|
| 674 | if case(word_e.String): # for test/[
|
| 675 | w = cast(word.String, UP_w)
|
| 676 | return w.id
|
| 677 |
|
| 678 | elif case(word_e.Operator):
|
| 679 | tok = cast(Token, UP_w)
|
| 680 | return tok.id
|
| 681 |
|
| 682 | elif case(word_e.Compound):
|
| 683 | w = cast(CompoundWord, UP_w)
|
| 684 |
|
| 685 | if len(w.parts) != 1:
|
| 686 | return Id.Word_Compound
|
| 687 |
|
| 688 | token_type = LiteralId(w.parts[0])
|
| 689 | if token_type == Id.Undefined_Tok:
|
| 690 | return Id.Word_Compound # It's a regular word
|
| 691 |
|
| 692 | # This is outside the BoolUnary/BoolBinary namespace, but works the same.
|
| 693 | if token_type in (Id.KW_Bang, Id.Lit_DRightBracket):
|
| 694 | return token_type # special boolean "tokens"
|
| 695 |
|
| 696 | token_kind = consts.GetKind(token_type)
|
| 697 | if token_kind in (Kind.BoolUnary, Kind.BoolBinary):
|
| 698 | return token_type # boolean operators
|
| 699 |
|
| 700 | return Id.Word_Compound
|
| 701 |
|
| 702 | else:
|
| 703 | # I think Empty never happens in this context?
|
| 704 | raise AssertionError(w.tag())
|
| 705 |
|
| 706 |
|
| 707 | def CommandId(w):
|
| 708 | # type: (word_t) -> Id_t
|
| 709 | """Used by CommandParser."""
|
| 710 | UP_w = w
|
| 711 | with tagswitch(w) as case:
|
| 712 | if case(word_e.Operator):
|
| 713 | tok = cast(Token, UP_w)
|
| 714 | return tok.id
|
| 715 |
|
| 716 | elif case(word_e.Compound):
|
| 717 | w = cast(CompoundWord, UP_w)
|
| 718 |
|
| 719 | # Fine-grained categorization of SINGLE literal parts
|
| 720 | if len(w.parts) != 1:
|
| 721 | return Id.Word_Compound # generic word
|
| 722 |
|
| 723 | token_type = LiteralId(w.parts[0])
|
| 724 | if token_type == Id.Undefined_Tok:
|
| 725 | return Id.Word_Compound # Not Kind.Lit, generic word
|
| 726 |
|
| 727 | if token_type in (Id.Lit_LBrace, Id.Lit_RBrace, Id.Lit_Equals,
|
| 728 | Id.Lit_TDot):
|
| 729 | # - { } are for YSH braces
|
| 730 | # - = is for the = keyword
|
| 731 | # - ... is to start multiline mode
|
| 732 | #
|
| 733 | # TODO: Should we use Op_{LBrace,RBrace} and Kind.Op when
|
| 734 | # parse_brace? Lit_Equals could be KW_Equals?
|
| 735 | return token_type
|
| 736 |
|
| 737 | token_kind = consts.GetKind(token_type)
|
| 738 | if token_kind == Kind.KW:
|
| 739 | return token_type # Id.KW_Var, etc.
|
| 740 |
|
| 741 | return Id.Word_Compound # generic word
|
| 742 |
|
| 743 | else:
|
| 744 | raise AssertionError(w.tag())
|
| 745 |
|
| 746 |
|
| 747 | def CommandKind(w):
|
| 748 | # type: (word_t) -> Kind_t
|
| 749 | """The CommandKind is for coarse-grained decisions in the CommandParser.
|
| 750 |
|
| 751 | NOTE: This is inconsistent with CommandId(), because we never return
|
| 752 | Kind.KW or Kind.Lit. But the CommandParser is easier to write this way.
|
| 753 |
|
| 754 | For example, these are valid redirects to a Kind.Word, and the parser
|
| 755 | checks:
|
| 756 |
|
| 757 | echo hi > =
|
| 758 | echo hi > {
|
| 759 |
|
| 760 | Invalid:
|
| 761 | echo hi > (
|
| 762 | echo hi > ;
|
| 763 | """
|
| 764 | if w.tag() == word_e.Operator:
|
| 765 | tok = cast(Token, w)
|
| 766 | # CommandParser uses Kind.Redir, Kind.Op, Kind.Eof, etc.
|
| 767 | return consts.GetKind(tok.id)
|
| 768 |
|
| 769 | return Kind.Word
|
| 770 |
|
| 771 |
|
| 772 | # Stubs for converting RHS of assignment to expression mode.
|
| 773 | # For ysh_ify.py
|
| 774 | def IsVarSub(w):
|
| 775 | # type: (word_t) -> bool
|
| 776 | """Return whether it's any var sub, or a double quoted one."""
|
| 777 | return False
|
| 778 |
|
| 779 |
|
| 780 | # Doesn't translate with mycpp because of dynamic %
|
| 781 | def ErrorWord(error_str):
|
| 782 | # type: (str) -> CompoundWord
|
| 783 | t = lexer.DummyToken(Id.Lit_Chars, error_str)
|
| 784 | return CompoundWord([t])
|
| 785 |
|
| 786 |
|
| 787 | def Pretty(w):
|
| 788 | # type: (word_t) -> str
|
| 789 | """Return a string to display to the user."""
|
| 790 | UP_w = w
|
| 791 | if w.tag() == word_e.String:
|
| 792 | w = cast(word.String, UP_w)
|
| 793 | if w.id == Id.Eof_Real:
|
| 794 | return 'EOF'
|
| 795 | else:
|
| 796 | return repr(w.s)
|
| 797 | else:
|
| 798 | return word_str(w.tag()) # tag name
|
| 799 |
|
| 800 |
|
| 801 | class ctx_EmitDocToken(object):
|
| 802 | """For doc comments."""
|
| 803 |
|
| 804 | def __init__(self, w_parser):
|
| 805 | # type: (WordParser) -> None
|
| 806 | w_parser.EmitDocToken(True)
|
| 807 | self.w_parser = w_parser
|
| 808 |
|
| 809 | def __enter__(self):
|
| 810 | # type: () -> None
|
| 811 | pass
|
| 812 |
|
| 813 | def __exit__(self, type, value, traceback):
|
| 814 | # type: (Any, Any, Any) -> None
|
| 815 | self.w_parser.EmitDocToken(False)
|
| 816 |
|
| 817 |
|
| 818 | class ctx_Multiline(object):
|
| 819 | """For multiline commands."""
|
| 820 |
|
| 821 | def __init__(self, w_parser):
|
| 822 | # type: (WordParser) -> None
|
| 823 | w_parser.Multiline(True)
|
| 824 | self.w_parser = w_parser
|
| 825 |
|
| 826 | def __enter__(self):
|
| 827 | # type: () -> None
|
| 828 | pass
|
| 829 |
|
| 830 | def __exit__(self, type, value, traceback):
|
| 831 | # type: (Any, Any, Any) -> None
|
| 832 | self.w_parser.Multiline(False)
|