| 1 | """
|
| 2 | word.py - Utility functions for words, e.g. treating them as "tokens".
|
| 3 | """
|
| 4 |
|
| 5 | from _devbuild.gen.id_kind_asdl import Id, Kind, Id_t, Kind_t
|
| 6 | from _devbuild.gen.runtime_asdl import Piece
|
| 7 | from _devbuild.gen.syntax_asdl import (
|
| 8 | Token,
|
| 9 | CompoundWord,
|
| 10 | DoubleQuoted,
|
| 11 | SingleQuoted,
|
| 12 | word,
|
| 13 | word_e,
|
| 14 | word_t,
|
| 15 | word_str,
|
| 16 | word_part,
|
| 17 | word_part_t,
|
| 18 | word_part_e,
|
| 19 | AssocPair,
|
| 20 | )
|
| 21 | from frontend import consts
|
| 22 | from frontend import lexer
|
| 23 | from mycpp import mylib
|
| 24 | from mycpp.mylib import tagswitch, log
|
| 25 |
|
| 26 | from typing import Tuple, Optional, List, Any, cast, TYPE_CHECKING
|
| 27 | if TYPE_CHECKING:
|
| 28 | from osh.word_parse import WordParser
|
| 29 |
|
| 30 | _ = log
|
| 31 |
|
| 32 |
|
| 33 | def MakePiece(s, quoted):
|
| 34 | # type: (str, bool) -> Piece
|
| 35 | """
|
| 36 | For $x versus "$x", etc.
|
| 37 | """
|
| 38 | return Piece(s, quoted, not quoted)
|
| 39 |
|
| 40 |
|
| 41 | def PieceQuoted(s):
|
| 42 | # type: (str) -> Piece
|
| 43 | """
|
| 44 | For 'hi' "$x"
|
| 45 | and $[myexpr] in YSH
|
| 46 | """
|
| 47 | # quoted=True, do_split=False
|
| 48 | return Piece(s, True, False)
|
| 49 |
|
| 50 |
|
| 51 | def PieceOperator(s):
|
| 52 | # type: (str) -> Piece
|
| 53 | """
|
| 54 | For Extended glob @(--verbose|help)
|
| 55 | And BashRegexGroup [[ foo =~ x(a b)y ]
|
| 56 |
|
| 57 | We don't want ( to become \(, so quoted=False
|
| 58 | """
|
| 59 | # quoted=False, do_split=False
|
| 60 | return Piece(s, False, False)
|
| 61 |
|
| 62 |
|
| 63 | def LiteralId(part):
|
| 64 | # type: (word_part_t) -> Id_t
|
| 65 | """If the WordPart consists of a single literal token, return its Id.
|
| 66 |
|
| 67 | Used for Id.KW_For, or Id.RBrace, etc.
|
| 68 | """
|
| 69 | if part.tag() != word_part_e.Literal:
|
| 70 | return Id.Undefined_Tok # unequal to any other Id
|
| 71 |
|
| 72 | return cast(Token, part).id
|
| 73 |
|
| 74 |
|
| 75 | def CheckLiteralId(part, tok_id):
|
| 76 | # type: (word_part_t, Id_t) -> Optional[Token]
|
| 77 | """If the WordPart is a Token of a given Id, return the Token."""
|
| 78 | if part.tag() != word_part_e.Literal:
|
| 79 | return None
|
| 80 |
|
| 81 | tok = cast(Token, part)
|
| 82 | if tok.id == tok_id:
|
| 83 | return tok
|
| 84 |
|
| 85 | return None
|
| 86 |
|
| 87 |
|
| 88 | def LiteralToken(UP_w):
|
| 89 | # type: (word_t) -> Optional[Token]
|
| 90 | """If a word consists of a literal token, return it.
|
| 91 |
|
| 92 | Otherwise return None.
|
| 93 | """
|
| 94 | # We're casting here because this function is called by the CommandParser for
|
| 95 | # var, setvar, '...', etc. It's easier to cast in one place.
|
| 96 | assert UP_w.tag() == word_e.Compound, UP_w
|
| 97 | w = cast(CompoundWord, UP_w)
|
| 98 |
|
| 99 | if len(w.parts) != 1:
|
| 100 | return None
|
| 101 |
|
| 102 | part0 = w.parts[0]
|
| 103 | if part0.tag() != word_part_e.Literal:
|
| 104 | return None
|
| 105 |
|
| 106 | return cast(Token, part0)
|
| 107 |
|
| 108 |
|
| 109 | def _EvalWordPart(part):
|
| 110 | # type: (word_part_t) -> Tuple[bool, str, bool]
|
| 111 | """Evaluate a WordPart at PARSE TIME.
|
| 112 |
|
| 113 | Used for:
|
| 114 |
|
| 115 | 1. here doc delimiters
|
| 116 | 2. function names
|
| 117 | 3. for loop variable names
|
| 118 | 4. Compiling constant regex words at parse time
|
| 119 | 5. a special case for ${a////c} to see if we got a leading slash in the
|
| 120 | pattern.
|
| 121 |
|
| 122 | Returns:
|
| 123 | 3-tuple of
|
| 124 | ok: bool, success. If there are parts that can't be statically
|
| 125 | evaluated, then we return false.
|
| 126 | value: a string (not Value)
|
| 127 | quoted: whether any part of the word was quoted
|
| 128 | """
|
| 129 | UP_part = part
|
| 130 | with tagswitch(part) as case:
|
| 131 | if case(word_part_e.Literal):
|
| 132 | tok = cast(Token, UP_part)
|
| 133 | # Weird performance issue: if we change this to lexer.LazyStr(),
|
| 134 | # the parser slows down, e.g. on configure-coreutils from 805 B
|
| 135 | # irefs to ~830 B. The real issue is that we should avoid calling
|
| 136 | # this from CommandParser - for the Hay node.
|
| 137 | return True, lexer.TokenVal(tok), False
|
| 138 | #return True, lexer.LazyStr(tok), False
|
| 139 |
|
| 140 | elif case(word_part_e.EscapedLiteral):
|
| 141 | part = cast(word_part.EscapedLiteral, UP_part)
|
| 142 | if mylib.PYTHON:
|
| 143 | val = lexer.TokenVal(part.token)
|
| 144 | assert len(val) == 2, val # e.g. \*
|
| 145 | assert val[0] == '\\'
|
| 146 | s = lexer.TokenSliceLeft(part.token, 1)
|
| 147 | return True, s, True
|
| 148 |
|
| 149 | elif case(word_part_e.SingleQuoted):
|
| 150 | part = cast(SingleQuoted, UP_part)
|
| 151 | return True, part.sval, True
|
| 152 |
|
| 153 | elif case(word_part_e.DoubleQuoted):
|
| 154 | part = cast(DoubleQuoted, UP_part)
|
| 155 | strs = [] # type: List[str]
|
| 156 | for p in part.parts:
|
| 157 | ok, s, _ = _EvalWordPart(p)
|
| 158 | if not ok:
|
| 159 | return False, '', True
|
| 160 | strs.append(s)
|
| 161 |
|
| 162 | return True, ''.join(strs), True # At least one part was quoted!
|
| 163 |
|
| 164 | elif case(word_part_e.YshArrayLiteral, word_part_e.InitializerLiteral,
|
| 165 | word_part_e.ZshVarSub, word_part_e.CommandSub,
|
| 166 | word_part_e.SimpleVarSub, word_part_e.BracedVarSub,
|
| 167 | word_part_e.TildeSub, word_part_e.ArithSub,
|
| 168 | word_part_e.ExtGlob, word_part_e.Splice,
|
| 169 | word_part_e.ExprSub):
|
| 170 | return False, '', False
|
| 171 |
|
| 172 | else:
|
| 173 | raise AssertionError(part.tag())
|
| 174 |
|
| 175 |
|
| 176 | def FastStrEval(w):
|
| 177 | # type: (CompoundWord) -> Optional[str]
|
| 178 | """
|
| 179 | Detects common case
|
| 180 |
|
| 181 | (1) CompoundWord([LiteralPart(Id.LitChars)])
|
| 182 | For echo -e, test x -lt 0, etc.
|
| 183 | (2) single quoted word like 'foo'
|
| 184 |
|
| 185 | Other patterns we could detect are:
|
| 186 | (1) "foo"
|
| 187 | (2) "$var" and "${var}" - I think these are very common in OSH code (but not YSH)
|
| 188 | - I think val_ops.Stringify() can handle all the errors
|
| 189 | """
|
| 190 | if len(w.parts) != 1:
|
| 191 | return None
|
| 192 |
|
| 193 | part0 = w.parts[0]
|
| 194 | UP_part0 = part0
|
| 195 | with tagswitch(part0) as case:
|
| 196 | if case(word_part_e.Literal):
|
| 197 | part0 = cast(Token, UP_part0)
|
| 198 |
|
| 199 | if part0.id in (Id.Lit_Chars, Id.Lit_LBracket, Id.Lit_RBracket):
|
| 200 | # Could add more tokens in this case
|
| 201 | # e.g. + is Lit_Other, and it's a Token in 'expr'
|
| 202 | # Right now it's Lit_Chars (e.g. ls -l) and [ and ] because I
|
| 203 | # know those are common
|
| 204 | # { } are not as common
|
| 205 | return lexer.LazyStr(part0)
|
| 206 |
|
| 207 | else:
|
| 208 | # e.g. Id.Lit_Star needs to be glob expanded
|
| 209 | # TODO: Consider moving Id.Lit_Star etc. to Kind.MaybeGlob?
|
| 210 | return None
|
| 211 |
|
| 212 | elif case(word_part_e.SingleQuoted):
|
| 213 | part0 = cast(SingleQuoted, UP_part0)
|
| 214 | # TODO: SingleQuoted should have lazy (str? sval) field
|
| 215 | # This would only affect multi-line strings though?
|
| 216 | return part0.sval
|
| 217 |
|
| 218 | else:
|
| 219 | # e.g. DoubleQuoted can't be optimized to a string, because it
|
| 220 | # might have "$@" and such
|
| 221 | return None
|
| 222 |
|
| 223 |
|
| 224 | def StaticEval(UP_w):
|
| 225 | # type: (word_t) -> Tuple[bool, str, bool]
|
| 226 | """Evaluate a Compound at PARSE TIME."""
|
| 227 | quoted = False
|
| 228 |
|
| 229 | # e.g. for ( instead of for (( is a token word
|
| 230 | if UP_w.tag() != word_e.Compound:
|
| 231 | return False, '', quoted
|
| 232 |
|
| 233 | w = cast(CompoundWord, UP_w)
|
| 234 |
|
| 235 | strs = [] # type: List[str]
|
| 236 | for part in w.parts:
|
| 237 | ok, s, q = _EvalWordPart(part)
|
| 238 | if not ok:
|
| 239 | return False, '', quoted
|
| 240 | if q:
|
| 241 | quoted = True # at least one part was quoted
|
| 242 | strs.append(s)
|
| 243 | #log('StaticEval parts %s', w.parts)
|
| 244 | return True, ''.join(strs), quoted
|
| 245 |
|
| 246 |
|
| 247 | # From bash, general.c, unquoted_tilde_word():
|
| 248 | # POSIX.2, 3.6.1: A tilde-prefix consists of an unquoted tilde character at
|
| 249 | # the beginning of the word, followed by all of the characters preceding the
|
| 250 | # first unquoted slash in the word, or all the characters in the word if there
|
| 251 | # is no slash...If none of the characters in the tilde-prefix are quoted, the
|
| 252 | # characters in the tilde-prefix following the tilde shell be treated as a
|
| 253 | # possible login name.
|
| 254 | #define TILDE_END(c) ((c) == '\0' || (c) == '/' || (c) == ':')
|
| 255 | #
|
| 256 | # So an unquoted tilde can ALWAYS start a new lex mode? You respect quotes and
|
| 257 | # substitutions.
|
| 258 | #
|
| 259 | # We only detect ~Lit_Chars and split. So we might as well just write a regex.
|
| 260 |
|
| 261 |
|
| 262 | def TildeDetect(UP_w):
|
| 263 | # type: (word_t) -> Optional[CompoundWord]
|
| 264 | """Detect tilde expansion in a word.
|
| 265 |
|
| 266 | It might begin with Literal that needs to be turned into a TildeSub.
|
| 267 | (It depends on whether the second token begins with slash).
|
| 268 |
|
| 269 | If so, it return a new word. Otherwise return None.
|
| 270 |
|
| 271 | NOTE:
|
| 272 | - The regex for Lit_TildeLike could be expanded. Right now it's
|
| 273 | conservative, like Lit_Chars without the /.
|
| 274 | - It's possible to write this in a mutating style, since only the first token
|
| 275 | is changed. But note that we CANNOT know this during lexing.
|
| 276 | """
|
| 277 | # BracedTree can't be tilde expanded
|
| 278 | if UP_w.tag() != word_e.Compound:
|
| 279 | return None
|
| 280 |
|
| 281 | w = cast(CompoundWord, UP_w)
|
| 282 | return TildeDetect2(w)
|
| 283 |
|
| 284 |
|
| 285 | def TildeDetect2(w):
|
| 286 | # type: (CompoundWord) -> Optional[CompoundWord]
|
| 287 | """If tilde sub is detected, returns a new CompoundWord.
|
| 288 |
|
| 289 | Accepts CompoundWord, not word_t. After brace expansion, we know we have a
|
| 290 | List[CompoundWord].
|
| 291 |
|
| 292 | Tilde detection:
|
| 293 |
|
| 294 | YES:
|
| 295 | ~ ~/
|
| 296 | ~bob ~bob/
|
| 297 |
|
| 298 | NO:
|
| 299 | ~bob# ~bob#/
|
| 300 | ~bob$x
|
| 301 | ~$x
|
| 302 |
|
| 303 | Pattern to match (all must be word_part_e.Literal):
|
| 304 |
|
| 305 | Lit_Tilde Lit_Chars? (Lit_Slash | %end)
|
| 306 | """
|
| 307 | if len(w.parts) == 0: # ${a-} has no parts
|
| 308 | return None
|
| 309 |
|
| 310 | tok0 = CheckLiteralId(w.parts[0], Id.Lit_Tilde)
|
| 311 | if tok0 is None:
|
| 312 | return None
|
| 313 |
|
| 314 | new_parts = [] # type: List[word_part_t]
|
| 315 |
|
| 316 | if len(w.parts) == 1: # ~
|
| 317 | new_parts.append(word_part.TildeSub(tok0, None, None))
|
| 318 | return CompoundWord(new_parts)
|
| 319 |
|
| 320 | id1 = LiteralId(w.parts[1])
|
| 321 | if id1 == Id.Lit_Slash: # ~/
|
| 322 | new_parts.append(word_part.TildeSub(tok0, None, None))
|
| 323 | new_parts.extend(w.parts[1:])
|
| 324 | return CompoundWord(new_parts)
|
| 325 |
|
| 326 | if id1 != Id.Lit_Chars:
|
| 327 | return None # ~$x is not TildeSub
|
| 328 |
|
| 329 | tok1 = cast(Token, w.parts[1])
|
| 330 |
|
| 331 | if len(w.parts) == 2: # ~foo
|
| 332 | new_parts.append(word_part.TildeSub(tok0, tok1, lexer.TokenVal(tok1)))
|
| 333 | return CompoundWord(new_parts)
|
| 334 |
|
| 335 | id2 = LiteralId(w.parts[2])
|
| 336 | if id2 != Id.Lit_Slash: # ~foo$x is not TildeSub
|
| 337 | return None
|
| 338 |
|
| 339 | new_parts.append(word_part.TildeSub(tok0, tok1, lexer.TokenVal(tok1)))
|
| 340 | new_parts.extend(w.parts[2:])
|
| 341 | return CompoundWord(new_parts)
|
| 342 |
|
| 343 |
|
| 344 | def TildeDetectAssign(w):
|
| 345 | # type: (CompoundWord) -> None
|
| 346 | """Detects multiple tilde sub, like a=~:~/src:~bob
|
| 347 |
|
| 348 | MUTATES its argument.
|
| 349 |
|
| 350 | Pattern for to match (all must be word_part_e.Literal):
|
| 351 |
|
| 352 | Lit_Tilde Lit_Chars? (Lit_Slash | Lit_Colon | %end)
|
| 353 | """
|
| 354 | parts = w.parts
|
| 355 |
|
| 356 | # Bail out EARLY if there are no ~ at all
|
| 357 | has_tilde = False
|
| 358 | for part in parts:
|
| 359 | if LiteralId(part) == Id.Lit_Tilde:
|
| 360 | has_tilde = True
|
| 361 | break
|
| 362 | if not has_tilde:
|
| 363 | return # Avoid further work and allocations
|
| 364 |
|
| 365 | # Avoid IndexError, since we have to look ahead up to 2 tokens
|
| 366 | parts.append(None)
|
| 367 | parts.append(None)
|
| 368 |
|
| 369 | new_parts = [] # type: List[word_part_t]
|
| 370 |
|
| 371 | tilde_could_be_next = True # true at first, and true after :
|
| 372 |
|
| 373 | i = 0
|
| 374 | n = len(parts)
|
| 375 |
|
| 376 | while i < n:
|
| 377 | part0 = parts[i]
|
| 378 | if part0 is None:
|
| 379 | break
|
| 380 |
|
| 381 | #log('i = %d', i)
|
| 382 | #log('part0 %s', part0)
|
| 383 |
|
| 384 | # Skip tilde in middle of word, like a=foo~bar
|
| 385 | if tilde_could_be_next and LiteralId(part0) == Id.Lit_Tilde:
|
| 386 | # If ~ ends the string, we have
|
| 387 | part1 = parts[i + 1]
|
| 388 | part2 = parts[i + 2]
|
| 389 |
|
| 390 | tok0 = cast(Token, part0)
|
| 391 |
|
| 392 | if part1 is None: # x=foo:~
|
| 393 | new_parts.append(word_part.TildeSub(tok0, None, None))
|
| 394 | break # at end
|
| 395 |
|
| 396 | id1 = LiteralId(part1)
|
| 397 |
|
| 398 | if id1 in (Id.Lit_Slash, Id.Lit_Colon): # x=foo:~/ or x=foo:~:
|
| 399 | new_parts.append(word_part.TildeSub(tok0, None, None))
|
| 400 | new_parts.append(part1)
|
| 401 | i += 2
|
| 402 | continue
|
| 403 |
|
| 404 | if id1 != Id.Lit_Chars:
|
| 405 | new_parts.append(part0) # unchanged
|
| 406 | new_parts.append(part1) # ...
|
| 407 | i += 2
|
| 408 | continue # x=foo:~$x is not tilde sub
|
| 409 |
|
| 410 | tok1 = cast(Token, part1)
|
| 411 |
|
| 412 | if part2 is None: # x=foo:~foo
|
| 413 | # consume both
|
| 414 | new_parts.append(
|
| 415 | word_part.TildeSub(tok0, tok1, lexer.TokenVal(tok1)))
|
| 416 | break # at end
|
| 417 |
|
| 418 | id2 = LiteralId(part2)
|
| 419 | if id2 not in (Id.Lit_Slash, Id.Lit_Colon): # x=foo:~foo$x
|
| 420 | new_parts.append(part0) # unchanged
|
| 421 | new_parts.append(part1) # ...
|
| 422 | new_parts.append(part2) # ...
|
| 423 | i += 3
|
| 424 | continue
|
| 425 |
|
| 426 | new_parts.append(
|
| 427 | word_part.TildeSub(tok0, tok1, lexer.TokenVal(tok1)))
|
| 428 | new_parts.append(part2)
|
| 429 | i += 3
|
| 430 |
|
| 431 | tilde_could_be_next = (id2 == Id.Lit_Colon)
|
| 432 |
|
| 433 | else:
|
| 434 | new_parts.append(part0)
|
| 435 | i += 1
|
| 436 |
|
| 437 | tilde_could_be_next = (LiteralId(part0) == Id.Lit_Colon)
|
| 438 |
|
| 439 | parts.pop()
|
| 440 | parts.pop()
|
| 441 |
|
| 442 | # Mutate argument
|
| 443 | w.parts = new_parts
|
| 444 |
|
| 445 |
|
| 446 | def TildeDetectAll(words):
|
| 447 | # type: (List[word_t]) -> List[word_t]
|
| 448 | out = [] # type: List[word_t]
|
| 449 | for w in words:
|
| 450 | t = TildeDetect(w)
|
| 451 | if t:
|
| 452 | out.append(t)
|
| 453 | else:
|
| 454 | out.append(w)
|
| 455 | return out
|
| 456 |
|
| 457 |
|
| 458 | def HasArrayPart(w):
|
| 459 | # type: (CompoundWord) -> bool
|
| 460 | """Used in cmd_parse."""
|
| 461 | for part in w.parts:
|
| 462 | if part.tag() == word_part_e.InitializerLiteral:
|
| 463 | return True
|
| 464 | return False
|
| 465 |
|
| 466 |
|
| 467 | def ShFunctionName(w):
|
| 468 | # type: (CompoundWord) -> str
|
| 469 | """Returns a valid shell function name, or the empty string.
|
| 470 |
|
| 471 | TODO: Maybe use this regex to validate:
|
| 472 |
|
| 473 | FUNCTION_NAME_RE = r'[^{}\[\]=]*'
|
| 474 |
|
| 475 | Bash is very lenient, but that would disallow confusing characters, for
|
| 476 | better error messages on a[x]=(), etc.
|
| 477 | """
|
| 478 | ok, s, quoted = StaticEval(w)
|
| 479 | # Function names should not have quotes
|
| 480 | if not ok or quoted:
|
| 481 | return ''
|
| 482 | return s
|
| 483 |
|
| 484 |
|
| 485 | def IsVarLike(w):
|
| 486 | # type: (CompoundWord) -> bool
|
| 487 | """Tests whether a word looks like FOO=bar.
|
| 488 |
|
| 489 | This is a quick test for the command parser to distinguish:
|
| 490 |
|
| 491 | func() { echo hi; }
|
| 492 | func=(1 2 3)
|
| 493 | """
|
| 494 | if len(w.parts) == 0:
|
| 495 | return False
|
| 496 |
|
| 497 | return LiteralId(w.parts[0]) == Id.Lit_VarLike
|
| 498 |
|
| 499 |
|
| 500 | def LooksLikeArithVar(UP_w):
|
| 501 | # type: (word_t) -> Optional[Token]
|
| 502 | """Return a token if this word looks like an arith var.
|
| 503 |
|
| 504 | NOTE: This can't be combined with DetectShAssignment because VarLike and
|
| 505 | ArithVarLike must be different tokens. Otherwise _ReadCompoundWord will be
|
| 506 | confused between array assignments foo=(1 2) and function calls foo(1, 2).
|
| 507 | """
|
| 508 | if UP_w.tag() != word_e.Compound:
|
| 509 | return None
|
| 510 |
|
| 511 | w = cast(CompoundWord, UP_w)
|
| 512 | if len(w.parts) != 1:
|
| 513 | return None
|
| 514 |
|
| 515 | return CheckLiteralId(w.parts[0], Id.Lit_ArithVarLike)
|
| 516 |
|
| 517 |
|
| 518 | def CheckLeadingEquals(w):
|
| 519 | # type: (CompoundWord) -> Optional[Token]
|
| 520 | """Test whether a word looks like =word
|
| 521 |
|
| 522 | For shopt --set strict_parse_equals
|
| 523 | """
|
| 524 | if len(w.parts) == 0:
|
| 525 | return None
|
| 526 |
|
| 527 | return CheckLiteralId(w.parts[0], Id.Lit_Equals)
|
| 528 |
|
| 529 |
|
| 530 | def DetectShAssignment(w):
|
| 531 | # type: (CompoundWord) -> Tuple[Optional[Token], Optional[Token], int]
|
| 532 | """Detects whether a word looks like FOO=bar or FOO[x]=bar.
|
| 533 |
|
| 534 | Returns:
|
| 535 | left_token or None # Lit_VarLike, Lit_ArrayLhsOpen, or None if it's not an
|
| 536 | # assignment
|
| 537 | close_token, # Lit_ArrayLhsClose if it was detected, or None
|
| 538 | part_offset # where to start the value word, 0 if not an assignment
|
| 539 |
|
| 540 | Cases:
|
| 541 |
|
| 542 | s=1
|
| 543 | s+=1
|
| 544 | s[x]=1
|
| 545 | s[x]+=1
|
| 546 |
|
| 547 | a=()
|
| 548 | a+=()
|
| 549 | a[x]=(
|
| 550 | a[x]+=() # We parse this (as bash does), but it's never valid because arrays
|
| 551 | # can't be nested.
|
| 552 | """
|
| 553 | no_token = None # type: Optional[Token]
|
| 554 |
|
| 555 | n = len(w.parts)
|
| 556 | if n == 0:
|
| 557 | return no_token, no_token, 0
|
| 558 |
|
| 559 | part0 = w.parts[0]
|
| 560 | if part0.tag() != word_part_e.Literal:
|
| 561 | return no_token, no_token, 0
|
| 562 |
|
| 563 | tok0 = cast(Token, part0)
|
| 564 |
|
| 565 | if tok0.id == Id.Lit_VarLike:
|
| 566 | return tok0, no_token, 1 # everything after first token is the value
|
| 567 |
|
| 568 | if tok0.id == Id.Lit_ArrayLhsOpen:
|
| 569 | # NOTE that a[]=x should be an error. We don't want to silently decay.
|
| 570 | if n < 2:
|
| 571 | return no_token, no_token, 0
|
| 572 | for i in xrange(1, n):
|
| 573 | part = w.parts[i]
|
| 574 | tok_close = CheckLiteralId(part, Id.Lit_ArrayLhsClose)
|
| 575 | if tok_close:
|
| 576 | return tok0, tok_close, i + 1
|
| 577 |
|
| 578 | # Nothing detected. Could be 'foobar' or a[x+1+2/' without the closing ].
|
| 579 | return no_token, no_token, 0
|
| 580 |
|
| 581 |
|
| 582 | def DetectAssocPair(w):
|
| 583 | # type: (CompoundWord) -> Optional[AssocPair]
|
| 584 | """Like DetectShAssignment, but for A=(['k']=v ['k2']=v)
|
| 585 |
|
| 586 | The key and the value are both strings. So we just pick out
|
| 587 | word_part. Unlike a[k]=v, A=([k]=v) is NOT ambiguous, because the
|
| 588 | [k] syntax is only used for associative array literals, as opposed
|
| 589 | to indexed array literals.
|
| 590 | """
|
| 591 | parts = w.parts
|
| 592 | if LiteralId(parts[0]) != Id.Lit_LBracket:
|
| 593 | return None
|
| 594 |
|
| 595 | n = len(parts)
|
| 596 | for i in xrange(n):
|
| 597 | id_ = LiteralId(parts[i])
|
| 598 | if id_ == Id.Lit_ArrayLhsClose: # ]=
|
| 599 | # e.g. if we have [$x$y]=$a$b
|
| 600 | key = CompoundWord(parts[1:i]) # $x$y
|
| 601 | value = CompoundWord(parts[i + 1:]) # $a$b from
|
| 602 |
|
| 603 | has_plus = lexer.IsPlusEquals(cast(Token, parts[i]))
|
| 604 |
|
| 605 | # Type-annotated intermediate value for mycpp translation
|
| 606 | return AssocPair(key, value, has_plus)
|
| 607 |
|
| 608 | return None
|
| 609 |
|
| 610 |
|
| 611 | def IsControlFlow(w):
|
| 612 | # type: (CompoundWord) -> Tuple[Kind_t, Optional[Token]]
|
| 613 | """Tests if a word is a control flow word."""
|
| 614 | no_token = None # type: Optional[Token]
|
| 615 |
|
| 616 | if len(w.parts) != 1:
|
| 617 | return Kind.Undefined, no_token
|
| 618 |
|
| 619 | UP_part0 = w.parts[0]
|
| 620 | token_type = LiteralId(UP_part0)
|
| 621 | if token_type == Id.Undefined_Tok:
|
| 622 | return Kind.Undefined, no_token
|
| 623 |
|
| 624 | token_kind = consts.GetKind(token_type)
|
| 625 | if token_kind == Kind.ControlFlow:
|
| 626 | return token_kind, cast(Token, UP_part0)
|
| 627 |
|
| 628 | return Kind.Undefined, no_token
|
| 629 |
|
| 630 |
|
| 631 | def BraceToken(UP_w):
|
| 632 | # type: (word_t) -> Optional[Token]
|
| 633 | """If a word has Id.Lit_LBrace or Lit_RBrace, return a Token.
|
| 634 |
|
| 635 | This is a special case for osh/cmd_parse.py
|
| 636 |
|
| 637 | The WordParser changes Id.Op_LBrace from ExprParser into Id.Lit_LBrace, so we
|
| 638 | may get a token, not a word.
|
| 639 | """
|
| 640 | with tagswitch(UP_w) as case:
|
| 641 | if case(word_e.Operator):
|
| 642 | tok = cast(Token, UP_w)
|
| 643 | assert tok.id in (Id.Lit_LBrace, Id.Lit_RBrace), tok
|
| 644 | return tok
|
| 645 |
|
| 646 | elif case(word_e.Compound):
|
| 647 | w = cast(CompoundWord, UP_w)
|
| 648 | return LiteralToken(w)
|
| 649 |
|
| 650 | else:
|
| 651 | raise AssertionError()
|
| 652 |
|
| 653 |
|
| 654 | def AsKeywordToken(UP_w):
|
| 655 | # type: (word_t) -> Token
|
| 656 | """
|
| 657 | Given a word that IS A CompoundWord containing just a keyword, return the
|
| 658 | single token at the start.
|
| 659 | """
|
| 660 | assert UP_w.tag() == word_e.Compound, UP_w
|
| 661 | w = cast(CompoundWord, UP_w)
|
| 662 |
|
| 663 | part = w.parts[0]
|
| 664 | assert part.tag() == word_part_e.Literal, part
|
| 665 | tok = cast(Token, part)
|
| 666 | assert consts.GetKind(tok.id) == Kind.KW, tok
|
| 667 | return tok
|
| 668 |
|
| 669 |
|
| 670 | def AsOperatorToken(word):
|
| 671 | # type: (word_t) -> Token
|
| 672 | """For a word that IS an operator (word.Token), return that token.
|
| 673 |
|
| 674 | This must only be called on a word which is known to be an operator
|
| 675 | (word.Token).
|
| 676 | """
|
| 677 | assert word.tag() == word_e.Operator, word
|
| 678 | return cast(Token, word)
|
| 679 |
|
| 680 |
|
| 681 | #
|
| 682 | # Polymorphic between Token and Compound
|
| 683 | #
|
| 684 |
|
| 685 |
|
| 686 | def ArithId(w):
|
| 687 | # type: (word_t) -> Id_t
|
| 688 | """Used by shell arithmetic parsing."""
|
| 689 | if w.tag() == word_e.Operator:
|
| 690 | tok = cast(Token, w)
|
| 691 | return tok.id
|
| 692 |
|
| 693 | assert isinstance(w, CompoundWord)
|
| 694 | return Id.Word_Compound
|
| 695 |
|
| 696 |
|
| 697 | def BoolId(w):
|
| 698 | # type: (word_t) -> Id_t
|
| 699 | UP_w = w
|
| 700 | with tagswitch(w) as case:
|
| 701 | if case(word_e.String): # for test/[
|
| 702 | w = cast(word.String, UP_w)
|
| 703 | return w.id
|
| 704 |
|
| 705 | elif case(word_e.Operator):
|
| 706 | tok = cast(Token, UP_w)
|
| 707 | return tok.id
|
| 708 |
|
| 709 | elif case(word_e.Compound):
|
| 710 | w = cast(CompoundWord, UP_w)
|
| 711 |
|
| 712 | if len(w.parts) != 1:
|
| 713 | return Id.Word_Compound
|
| 714 |
|
| 715 | token_type = LiteralId(w.parts[0])
|
| 716 | if token_type == Id.Undefined_Tok:
|
| 717 | return Id.Word_Compound # It's a regular word
|
| 718 |
|
| 719 | # This is outside the BoolUnary/BoolBinary namespace, but works the same.
|
| 720 | if token_type in (Id.KW_Bang, Id.Lit_DRightBracket):
|
| 721 | return token_type # special boolean "tokens"
|
| 722 |
|
| 723 | token_kind = consts.GetKind(token_type)
|
| 724 | if token_kind in (Kind.BoolUnary, Kind.BoolBinary):
|
| 725 | return token_type # boolean operators
|
| 726 |
|
| 727 | return Id.Word_Compound
|
| 728 |
|
| 729 | else:
|
| 730 | # I think Empty never happens in this context?
|
| 731 | raise AssertionError(w.tag())
|
| 732 |
|
| 733 |
|
| 734 | def CommandId(w):
|
| 735 | # type: (word_t) -> Id_t
|
| 736 | """Used by CommandParser."""
|
| 737 | UP_w = w
|
| 738 | with tagswitch(w) as case:
|
| 739 | if case(word_e.Operator):
|
| 740 | tok = cast(Token, UP_w)
|
| 741 | return tok.id
|
| 742 |
|
| 743 | elif case(word_e.Compound):
|
| 744 | w = cast(CompoundWord, UP_w)
|
| 745 |
|
| 746 | # Fine-grained categorization of SINGLE literal parts
|
| 747 | if len(w.parts) != 1:
|
| 748 | return Id.Word_Compound # generic word
|
| 749 |
|
| 750 | token_type = LiteralId(w.parts[0])
|
| 751 | if token_type == Id.Undefined_Tok:
|
| 752 | return Id.Word_Compound # Not Kind.Lit, generic word
|
| 753 |
|
| 754 | if token_type in (Id.Lit_LBrace, Id.Lit_RBrace, Id.Lit_Equals,
|
| 755 | Id.Lit_TDot):
|
| 756 | # - { } are for YSH braces
|
| 757 | # - = is for the = keyword
|
| 758 | # - ... is to start multiline mode
|
| 759 | #
|
| 760 | # TODO: Should we use Op_{LBrace,RBrace} and Kind.Op when
|
| 761 | # parse_brace? Lit_Equals could be KW_Equals?
|
| 762 | return token_type
|
| 763 |
|
| 764 | token_kind = consts.GetKind(token_type)
|
| 765 | if token_kind == Kind.KW:
|
| 766 | return token_type # Id.KW_Var, etc.
|
| 767 |
|
| 768 | return Id.Word_Compound # generic word
|
| 769 |
|
| 770 | elif case(word_e.Redir):
|
| 771 | w = cast(word.Redir, UP_w)
|
| 772 | return w.op.id
|
| 773 |
|
| 774 | else:
|
| 775 | raise AssertionError(w.tag())
|
| 776 |
|
| 777 |
|
| 778 | def CommandKind(w):
|
| 779 | # type: (word_t) -> Kind_t
|
| 780 | """The CommandKind is for coarse-grained decisions in the CommandParser.
|
| 781 |
|
| 782 | NOTE: This is inconsistent with CommandId(), because we never return
|
| 783 | Kind.KW or Kind.Lit. But the CommandParser is easier to write this way.
|
| 784 |
|
| 785 | For example, these are valid redirects to a Kind.Word, and the parser
|
| 786 | checks:
|
| 787 |
|
| 788 | echo hi > =
|
| 789 | echo hi > {
|
| 790 |
|
| 791 | Invalid:
|
| 792 | echo hi > (
|
| 793 | echo hi > ;
|
| 794 | """
|
| 795 | if w.tag() == word_e.Operator:
|
| 796 | tok = cast(Token, w)
|
| 797 | # CommandParser uses Kind.Op, Kind.Eof, etc.
|
| 798 | return consts.GetKind(tok.id)
|
| 799 | if w.tag() == word_e.Redir:
|
| 800 | return Kind.Redir
|
| 801 |
|
| 802 | return Kind.Word
|
| 803 |
|
| 804 |
|
| 805 | # Stubs for converting RHS of assignment to expression mode.
|
| 806 | # For ysh_ify.py
|
| 807 | def IsVarSub(w):
|
| 808 | # type: (word_t) -> bool
|
| 809 | """Return whether it's any var sub, or a double quoted one."""
|
| 810 | return False
|
| 811 |
|
| 812 |
|
| 813 | # Doesn't translate with mycpp because of dynamic %
|
| 814 | def ErrorWord(error_str):
|
| 815 | # type: (str) -> CompoundWord
|
| 816 | t = lexer.DummyToken(Id.Lit_Chars, error_str)
|
| 817 | return CompoundWord([t])
|
| 818 |
|
| 819 |
|
| 820 | def Pretty(w):
|
| 821 | # type: (word_t) -> str
|
| 822 | """Return a string to display to the user."""
|
| 823 | UP_w = w
|
| 824 | if w.tag() == word_e.String:
|
| 825 | w = cast(word.String, UP_w)
|
| 826 | if w.id == Id.Eof_Real:
|
| 827 | return 'EOF'
|
| 828 | else:
|
| 829 | return repr(w.s)
|
| 830 | else:
|
| 831 | return word_str(w.tag()) # tag name
|
| 832 |
|
| 833 |
|
| 834 | class ctx_EmitDocToken(object):
|
| 835 | """For doc comments."""
|
| 836 |
|
| 837 | def __init__(self, w_parser):
|
| 838 | # type: (WordParser) -> None
|
| 839 | w_parser.EmitDocToken(True)
|
| 840 | self.w_parser = w_parser
|
| 841 |
|
| 842 | def __enter__(self):
|
| 843 | # type: () -> None
|
| 844 | pass
|
| 845 |
|
| 846 | def __exit__(self, type, value, traceback):
|
| 847 | # type: (Any, Any, Any) -> None
|
| 848 | self.w_parser.EmitDocToken(False)
|
| 849 |
|
| 850 |
|
| 851 | class ctx_Multiline(object):
|
| 852 | """For multiline commands."""
|
| 853 |
|
| 854 | def __init__(self, w_parser):
|
| 855 | # type: (WordParser) -> None
|
| 856 | w_parser.Multiline(True)
|
| 857 | self.w_parser = w_parser
|
| 858 |
|
| 859 | def __enter__(self):
|
| 860 | # type: () -> None
|
| 861 | pass
|
| 862 |
|
| 863 | def __exit__(self, type, value, traceback):
|
| 864 | # type: (Any, Any, Any) -> None
|
| 865 | self.w_parser.Multiline(False)
|