1 | #!/usr/bin/env python2
|
2 | # Copyright 2016 Andy Chu. All rights reserved.
|
3 | # Licensed under the Apache License, Version 2.0 (the "License");
|
4 | # you may not use this file except in compliance with the License.
|
5 | # You may obtain a copy of the License at
|
6 | #
|
7 | # http://www.apache.org/licenses/LICENSE-2.0
|
8 | """
|
9 | id_kind_def.py - Id and Kind definitions, stored in Token
|
10 |
|
11 | NOTE: If this file changes, rebuild it with build/py.sh all
|
12 | """
|
13 | from __future__ import print_function
|
14 |
|
15 | from _devbuild.gen.types_asdl import (bool_arg_type_e, bool_arg_type_t)
|
16 | #from mycpp.mylib import log
|
17 |
|
18 | from typing import List, Tuple, Dict, Optional, TYPE_CHECKING
|
19 | if TYPE_CHECKING: # avoid circular build deps
|
20 | from _devbuild.gen.id_kind_asdl import Id_t, Kind_t
|
21 |
|
22 |
|
23 | class IdSpec(object):
|
24 | """Identifiers that form the "spine" of the shell program
|
25 | representation."""
|
26 |
|
27 | def __init__(self, kind_lookup, bool_ops):
|
28 | # type: (Dict[int, int], Dict[int, bool_arg_type_t]) -> None
|
29 | self.id_str2int = {} # type: Dict[str, int]
|
30 | self.kind_str2int = {} # type: Dict[str, int]
|
31 |
|
32 | self.kind_lookup = kind_lookup # Id int -> Kind int
|
33 | self.kind_name_list = [] # type: List[str]
|
34 | self.kind_sizes = [] # type: List[int] # optional stats
|
35 |
|
36 | self.lexer_pairs = {} # type: Dict[int, List[Tuple[bool, str, int]]]
|
37 | self.bool_ops = bool_ops # type: Dict[int, bool_arg_type_t]
|
38 |
|
39 | # Incremented on each method call
|
40 | # IMPORTANT: 1-based indices match what asdl/gen_python.py does!!!
|
41 | self.id_index = 1
|
42 | self.kind_index = 1
|
43 |
|
44 | def LexerPairs(self, kind):
|
45 | # type: (Kind_t) -> List[Tuple[bool, str, Id_t]]
|
46 | result = []
|
47 | for is_regex, pat, id_ in self.lexer_pairs[kind]:
|
48 | result.append((is_regex, pat, id_))
|
49 | return result
|
50 |
|
51 | def _AddId(self, id_name, kind=None):
|
52 | # type: (str, Optional[int]) -> int
|
53 | """
|
54 | Args:
|
55 | id_name: e.g. BoolBinary_Equal
|
56 | kind: override autoassignment. For AddBoolBinaryForBuiltin
|
57 | """
|
58 | t = self.id_index
|
59 |
|
60 | self.id_str2int[id_name] = t
|
61 |
|
62 | if kind is None:
|
63 | kind = self.kind_index
|
64 | self.kind_lookup[t] = kind
|
65 |
|
66 | self.id_index += 1 # mutate last
|
67 | return t # the index we used
|
68 |
|
69 | def _AddKind(self, kind_name):
|
70 | # type: (str) -> None
|
71 | self.kind_str2int[kind_name] = self.kind_index
|
72 | #log('%s = %d', kind_name, self.kind_index)
|
73 | self.kind_index += 1
|
74 | self.kind_name_list.append(kind_name)
|
75 |
|
76 | def AddKind(self, kind_name, tokens):
|
77 | # type: (str, List[str]) -> None
|
78 | assert isinstance(tokens, list), tokens
|
79 |
|
80 | for name in tokens:
|
81 | id_name = '%s_%s' % (kind_name, name)
|
82 | self._AddId(id_name)
|
83 |
|
84 | # Must be after adding Id
|
85 | self._AddKind(kind_name)
|
86 | self.kind_sizes.append(len(tokens)) # debug info
|
87 |
|
88 | def AddKindPairs(self, kind_name, pairs):
|
89 | # type: (str, List[Tuple[str, str]]) -> None
|
90 | assert isinstance(pairs, list), pairs
|
91 |
|
92 | lexer_pairs = []
|
93 | for name, char_pat in pairs:
|
94 | id_name = '%s_%s' % (kind_name, name)
|
95 | id_int = self._AddId(id_name)
|
96 | # After _AddId
|
97 | lexer_pairs.append((False, char_pat, id_int)) # Constant
|
98 |
|
99 | self.lexer_pairs[self.kind_index] = lexer_pairs
|
100 |
|
101 | # Must be after adding Id
|
102 | self._AddKind(kind_name)
|
103 | self.kind_sizes.append(len(pairs)) # debug info
|
104 |
|
105 | def AddBoolKind(
|
106 | self,
|
107 | kind_name, # type: str
|
108 | arg_type_pairs, # type: List[Tuple[bool_arg_type_t, List[Tuple[str, str]]]]
|
109 | ):
|
110 | # type: (...) -> None
|
111 | """
|
112 | Args:
|
113 | kind_name: string
|
114 | arg_type_pairs: dictionary of bool_arg_type_e -> []
|
115 | """
|
116 | lexer_pairs = []
|
117 | num_tokens = 0
|
118 | for arg_type, pairs in arg_type_pairs:
|
119 | #print(arg_type, pairs)
|
120 |
|
121 | for name, char_pat in pairs:
|
122 | # BoolUnary_f, BoolBinary_eq, BoolBinary_NEqual
|
123 | id_name = '%s_%s' % (kind_name, name)
|
124 | id_int = self._AddId(id_name)
|
125 | self.AddBoolOp(id_int, arg_type) # register type
|
126 | lexer_pairs.append((False, char_pat, id_int)) # constant
|
127 |
|
128 | num_tokens += len(pairs)
|
129 |
|
130 | self.lexer_pairs[self.kind_index] = lexer_pairs
|
131 |
|
132 | # Must do this after _AddId()
|
133 | self._AddKind(kind_name)
|
134 | self.kind_sizes.append(num_tokens) # debug info
|
135 |
|
136 | def AddBoolBinaryForBuiltin(self, id_name, kind):
|
137 | # type: (str, int) -> int
|
138 | """For [ = ] [ == ] and [ != ].
|
139 |
|
140 | These operators are NOT added to the lexer. The are "lexed" as
|
141 | word.String.
|
142 | """
|
143 | id_name = 'BoolBinary_%s' % id_name
|
144 | id_int = self._AddId(id_name, kind=kind)
|
145 | self.AddBoolOp(id_int, bool_arg_type_e.Str)
|
146 | return id_int
|
147 |
|
148 | def AddBoolOp(self, id_int, arg_type):
|
149 | # type: (int, bool_arg_type_t) -> None
|
150 | """Associate an ID integer with an bool_arg_type_e."""
|
151 | self.bool_ops[id_int] = arg_type
|
152 |
|
153 |
|
154 | def AddKinds(spec):
|
155 | # type: (IdSpec) -> None
|
156 |
|
157 | # A compound word, in arith context, boolean context, or command context.
|
158 | # A['foo'] A["foo"] A[$foo] A["$foo"] A[${foo}] A["${foo}"]
|
159 | spec.AddKind('Word', ['Compound'])
|
160 |
|
161 | # Token IDs in Kind.Arith are first to make the TDOP precedence table
|
162 | # small.
|
163 | #
|
164 | # NOTE: Could share Op_Pipe, Op_Amp, Op_DAmp, Op_Semi, Op_LParen, etc.
|
165 | # Actually all of Arith could be folded into Op, because we are using
|
166 | # WordParser._ReadArithWord vs. WordParser._ReadWord.
|
167 | spec.AddKindPairs(
|
168 | 'Arith',
|
169 | [
|
170 | ('Semi', ';'), # ternary for loop only
|
171 | ('Comma', ','), # function call and C comma operator
|
172 | ('Plus', '+'),
|
173 | ('Minus', '-'),
|
174 | ('Star', '*'),
|
175 | ('Slash', '/'),
|
176 | ('Percent', '%'),
|
177 | ('DPlus', '++'),
|
178 | ('DMinus', '--'),
|
179 | ('DStar', '**'),
|
180 | ('LParen', '('),
|
181 | ('RParen', ')'), # grouping and function call extension
|
182 | ('LBracket', '['),
|
183 | ('RBracket', ']'), # array and assoc array subscript
|
184 | ('RBrace', '}'), # for end of var sub
|
185 |
|
186 | # Logical Ops
|
187 | ('QMark', '?'),
|
188 | ('Colon', ':'), # Ternary Op: a < b ? 0 : 1
|
189 | ('LessEqual', '<='),
|
190 | ('Less', '<'),
|
191 | ('GreatEqual', '>='),
|
192 | ('Great', '>'),
|
193 | ('DEqual', '=='),
|
194 | ('NEqual', '!='),
|
195 | # note: these 3 are not in YSH Expr. (Could be used in find dialect.)
|
196 | ('DAmp', '&&'),
|
197 | ('DPipe', '||'),
|
198 | ('Bang', '!'),
|
199 |
|
200 | # Bitwise ops
|
201 | ('DGreat', '>>'),
|
202 | ('DLess', '<<'),
|
203 | # YSH: ^ is exponent
|
204 | ('Amp', '&'),
|
205 | ('Pipe', '|'),
|
206 | ('Caret', '^'),
|
207 | ('Tilde', '~'),
|
208 | ('Equal', '='),
|
209 |
|
210 | # Augmented Assignment for $(( ))
|
211 | # Must match the list in osh/arith_parse.py
|
212 | # YSH has **= //= like Python
|
213 | ('PlusEqual', '+='),
|
214 | ('MinusEqual', '-='),
|
215 | ('StarEqual', '*='),
|
216 | ('SlashEqual', '/='),
|
217 | ('PercentEqual', '%='),
|
218 | ('DGreatEqual', '>>='),
|
219 | ('DLessEqual', '<<='),
|
220 | ('AmpEqual', '&='),
|
221 | ('CaretEqual', '^='),
|
222 | ('PipeEqual', '|='),
|
223 | ])
|
224 |
|
225 | spec.AddKind('Eof', ['Real', 'RParen', 'Backtick'])
|
226 |
|
227 | spec.AddKind('Undefined', ['Tok']) # for initial state
|
228 |
|
229 | # The Unknown kind is used when we lex something, but it's invalid.
|
230 | # Examples:
|
231 | # ${^}
|
232 | # $'\z' Such bad codes are accepted when parse_backslash is on
|
233 | # (default in OSH), so we have to lex them.
|
234 | # (x == y) should used === or ~==
|
235 | spec.AddKind('Unknown', ['Tok', 'Backslash', 'DEqual', 'DAmp', 'DPipe', 'DDot'])
|
236 |
|
237 | spec.AddKind('Eol', ['Tok']) # no more tokens on line (\0)
|
238 |
|
239 | # Ignored_Newline is for J8 lexing to count lines
|
240 | spec.AddKind('Ignored', ['LineCont', 'Space', 'Comment', 'Newline'])
|
241 |
|
242 | # Id.WS_Space is for lex_mode_e.ShCommand; Id.Ignored_Space is for
|
243 | # lex_mode_e.Arith
|
244 | spec.AddKind('WS', ['Space'])
|
245 |
|
246 | spec.AddKind(
|
247 | 'Lit',
|
248 | [
|
249 | 'Chars',
|
250 | 'CharsWithoutPrefix', # for stripping leading whitespace
|
251 | 'VarLike',
|
252 | 'ArrayLhsOpen',
|
253 | 'ArrayLhsClose',
|
254 | 'Splice', # @func(a, b)
|
255 | 'AtLBracket', # @[split(x)]
|
256 | 'AtLBraceDot', # @{.myproc arg1} should be builtin_sub
|
257 | 'Other',
|
258 | 'EscapedChar', # \* is escaped
|
259 | 'LBracket',
|
260 | 'RBracket', # for assoc array literals, static globs
|
261 | 'Star',
|
262 | 'QMark',
|
263 | # Either brace expansion or keyword for { and }
|
264 | 'LBrace',
|
265 | 'RBrace',
|
266 | 'Comma',
|
267 | 'Equals', # For = f()
|
268 | 'Dollar', # detecting 'echo $'
|
269 | 'DRightBracket', # the ]] that matches [[, NOT a keyword
|
270 | 'Tilde', # tilde expansion
|
271 | 'Pound', # for comment or VarOp state
|
272 | 'TPound', # for doc comments like ###
|
273 | 'TDot', # for multiline commands ...
|
274 | 'Slash',
|
275 | 'Percent', # / # % for patsub, NOT unary op
|
276 | 'Colon', # x=foo:~:~root needs tilde expansion
|
277 | 'Digits', # for lex_mode_e.Arith
|
278 | 'At', # for ${a[@]} in lex_mode_e.Arith, and detecting @[]
|
279 | 'ArithVarLike', # for $((var+1)). Distinct from Lit_VarLike 'var='
|
280 | 'BadBackslash', # for "\z", not Id.Unknown_Backslash because it's a
|
281 | # syntax error in YSH, but NOT OSH
|
282 | 'CompDummy', # A fake Lit_* token to get partial words during
|
283 | # completion
|
284 | ])
|
285 |
|
286 | # For recognizing \` and \" and \\ within backticks. There's an extra layer
|
287 | # of backslash quoting.
|
288 | spec.AddKind('Backtick', ['Right', 'Quoted', 'DoubleQuote', 'Other'])
|
289 |
|
290 | spec.AddKind('History', ['Op', 'Num', 'Search', 'Other'])
|
291 |
|
292 | spec.AddKind(
|
293 | 'Op',
|
294 | [
|
295 | 'Newline', # mostly equivalent to SEMI
|
296 | 'Amp', # &
|
297 | 'Pipe', # |
|
298 | 'PipeAmp', # |& -- bash extension for stderr
|
299 | 'DAmp', # &&
|
300 | 'DPipe', # ||
|
301 | 'Semi', # ;
|
302 | 'DSemi', # ;; for case
|
303 | 'SemiAmp', # ;& for case
|
304 | 'DSemiAmp', # ;;& for case
|
305 | 'LParen', # For subshell. Not Kind.Left because it's NOT a WordPart.
|
306 | 'RParen', # Default, will be translated to Id.Right_*
|
307 | 'DLeftParen',
|
308 | 'DRightParen',
|
309 |
|
310 | # for [[ ]] language
|
311 | 'Less', # <
|
312 | 'Great', # >
|
313 | 'Bang', # !
|
314 |
|
315 | # YSH [] {}
|
316 | 'LBracket',
|
317 | 'RBracket',
|
318 | 'LBrace',
|
319 | 'RBrace',
|
320 | ])
|
321 |
|
322 | # YSH expressions use Kind.Expr and Kind.Arith (further below)
|
323 | spec.AddKind(
|
324 | 'Expr',
|
325 | [
|
326 | 'Reserved', # <- means nothing but it's reserved now
|
327 | 'Symbol', # %foo
|
328 | 'Name',
|
329 | 'DecInt',
|
330 | 'BinInt',
|
331 | 'OctInt',
|
332 | 'HexInt',
|
333 | 'Float',
|
334 | 'Bang', # eggex !digit, ![a-z]
|
335 | 'Dot',
|
336 | 'DDotLessThan',
|
337 | 'DDotEqual',
|
338 | 'Colon', # mylist:pop()
|
339 | 'RArrow',
|
340 | 'RDArrow',
|
341 | 'DSlash', # integer division
|
342 | 'TEqual',
|
343 | 'NotDEqual',
|
344 | 'TildeDEqual', # === !== ~==
|
345 | 'At',
|
346 | 'DoubleAt', # splice operators
|
347 | 'Ellipsis', # for varargs
|
348 | 'Dollar', # legacy regex
|
349 | 'NotTilde', # !~
|
350 | 'DTilde',
|
351 | 'NotDTilde', # ~~ !~~
|
352 | 'DStarEqual', # **=, which bash doesn't have
|
353 | 'DSlashEqual', # //=, which bash doesn't have
|
354 | 'CastedDummy', # Used for @() $() (words in lex_mode_e.ShCommand)
|
355 | # and ${} '' "" (and all other strings)
|
356 |
|
357 | # Constants
|
358 | 'Null',
|
359 | 'True',
|
360 | 'False',
|
361 |
|
362 | # Keywords are resolved after lexing, but otherwise behave like tokens.
|
363 | 'And',
|
364 | 'Or',
|
365 | 'Not',
|
366 |
|
367 | # List comprehensions
|
368 | 'For',
|
369 | 'Is',
|
370 | 'In',
|
371 | 'If',
|
372 | 'Else',
|
373 | 'Func', # For function literals
|
374 | 'Capture',
|
375 | 'As',
|
376 | ])
|
377 |
|
378 | # For C-escaped strings.
|
379 | spec.AddKind(
|
380 | 'Char',
|
381 | [
|
382 | 'OneChar',
|
383 | 'Stop',
|
384 | 'Hex', # \xff
|
385 | 'YHex', # \yff for J8 notation
|
386 |
|
387 | # Two variants of Octal: \377, and \0377.
|
388 | 'Octal3',
|
389 | 'Octal4',
|
390 | 'Unicode4',
|
391 | 'SurrogatePair', # JSON
|
392 | 'Unicode8', # bash
|
393 | 'UBraced',
|
394 | 'Pound', # YSH
|
395 | 'AsciiControl', # \x01-\x1f, what's disallowed in JSON
|
396 | ])
|
397 |
|
398 | # For lex_mode_e.BashRegex
|
399 | # Bash treats ( | ) as special, and space is allowed within ()
|
400 | # Note Id.Op_RParen -> Id.Right_BashRegex with lexer hint
|
401 | spec.AddKind('BashRegex', ['LParen', 'AllowedInParens'])
|
402 |
|
403 | spec.AddKind(
|
404 | 'Eggex',
|
405 | [
|
406 | 'Start', # ^ or %start
|
407 | 'End', # $ or %end
|
408 | 'Dot', # . or dot
|
409 | # Future: %boundary generates \b in Python/Perl, etc.
|
410 | ])
|
411 |
|
412 | spec.AddKind(
|
413 | 'Redir',
|
414 | [
|
415 | 'Less', # < stdin
|
416 | 'Great', # > stdout
|
417 | 'DLess', # << here doc redirect
|
418 | 'TLess', # <<< bash only here string
|
419 | 'DGreat', # >> append stdout
|
420 | 'GreatAnd', # >& descriptor redirect
|
421 | 'LessAnd', # <& descriptor redirect
|
422 | 'DLessDash', # <<- here doc redirect for tabs?
|
423 | 'LessGreat', # <>
|
424 | 'Clobber', # >| POSIX?
|
425 | 'AndGreat', # bash &> stdout/stderr to file
|
426 | 'AndDGreat', # bash &>> stdout/stderr append to file
|
427 |
|
428 | #'GreatPlus', # >+ is append in YSH
|
429 | #'DGreatPlus', # >>+ is append to string in YSH
|
430 | ])
|
431 |
|
432 | # NOTE: This is for left/right WORDS only. (( is not a word so it doesn't
|
433 | # get that.
|
434 | spec.AddKind(
|
435 | 'Left',
|
436 | [
|
437 | 'DoubleQuote',
|
438 | 'JDoubleQuote', # j" for J8 notation
|
439 | 'SingleQuote', # ''
|
440 | 'DollarSingleQuote', # $'' for \n escapes
|
441 | 'RSingleQuote', # r''
|
442 | 'USingleQuote', # u''
|
443 | 'BSingleQuote', # b''
|
444 |
|
445 | # Multiline versions
|
446 | 'TDoubleQuote', # """ """
|
447 | 'DollarTDoubleQuote', # $""" """
|
448 | 'TSingleQuote', # ''' '''
|
449 | 'RTSingleQuote', # r''' '''
|
450 | 'UTSingleQuote', # u''' '''
|
451 | 'BTSingleQuote', # b''' '''
|
452 | 'Backtick', # `
|
453 | 'DollarParen', # $(
|
454 | 'DollarBrace', # ${
|
455 | 'DollarBraceZsh', # ${(foo)
|
456 | 'DollarDParen', # $((
|
457 | 'DollarBracket', # $[ - synonym for $(( in bash and zsh
|
458 | 'DollarDoubleQuote', # $" for bash localized strings
|
459 | 'ProcSubIn', # <( )
|
460 | 'ProcSubOut', # >( )
|
461 | 'AtParen', # @( for split command sub
|
462 | 'CaretParen', # ^( for Block literal in expression mode
|
463 | 'CaretBracket', # ^[ for Expr literal
|
464 | 'CaretBrace', # ^{ for Arglist
|
465 | 'CaretDoubleQuote', # ^" for Template
|
466 | 'ColonPipe', # :| for word arrays
|
467 | 'PercentParen', # legacy %( for word arrays
|
468 | ])
|
469 |
|
470 | spec.AddKind(
|
471 | 'Right',
|
472 | [
|
473 | 'DoubleQuote',
|
474 | 'SingleQuote',
|
475 | 'Backtick', # `
|
476 | 'DollarBrace', # }
|
477 | 'DollarDParen', # )) -- really the second one is a PushHint()
|
478 | # ArithSub2 is just Id.Arith_RBracket
|
479 | 'DollarDoubleQuote', # "
|
480 | 'DollarSingleQuote', # '
|
481 |
|
482 | # Disambiguated right parens
|
483 | 'Subshell', # )
|
484 | 'ShFunction', # )
|
485 | 'CasePat', # )
|
486 | 'ShArrayLiteral', # )
|
487 | 'ExtGlob', # )
|
488 | 'BashRegexGroup', # )
|
489 | 'BlockLiteral', # } that matches &{ echo hi }
|
490 | ])
|
491 |
|
492 | spec.AddKind('ExtGlob', ['Comma', 'At', 'Star', 'Plus', 'QMark', 'Bang'])
|
493 |
|
494 | # First position of var sub ${
|
495 | # Id.VOp2_Pound -- however you can't tell the difference at first! It could
|
496 | # be an op or a name. So it makes sense to base i on the state.
|
497 | # Id.VOp2_At
|
498 | # But then you have AS_STAR, or Id.Arith_Star maybe
|
499 |
|
500 | spec.AddKind(
|
501 | 'VSub',
|
502 | [
|
503 | 'DollarName', # $foo
|
504 | 'Name', # 'foo' in ${foo}
|
505 | 'Number', # $0 .. $9
|
506 | 'Bang', # $!
|
507 | 'At', # $@ or [@] for array subscripting
|
508 | 'Pound', # $# or ${#var} for length
|
509 | 'Dollar', # $$
|
510 | 'Star', # $*
|
511 | 'Hyphen', # $-
|
512 | 'QMark', # $?
|
513 | 'Dot', # ${.myproc builtin sub}
|
514 | ])
|
515 |
|
516 | spec.AddKindPairs('VTest', [
|
517 | ('ColonHyphen', ':-'),
|
518 | ('Hyphen', '-'),
|
519 | ('ColonEquals', ':='),
|
520 | ('Equals', '='),
|
521 | ('ColonQMark', ':?'),
|
522 | ('QMark', '?'),
|
523 | ('ColonPlus', ':+'),
|
524 | ('Plus', '+'),
|
525 | ])
|
526 |
|
527 | # Statically parse @P, so @x etc. is an error.
|
528 | spec.AddKindPairs(
|
529 | 'VOp0',
|
530 | [
|
531 | ('Q', '@Q'), # ${x@Q} for quoting
|
532 | ('E', '@E'),
|
533 | ('P', '@P'), # ${PS1@P} for prompt eval
|
534 | ('A', '@A'),
|
535 | ('a', '@a'),
|
536 | ])
|
537 |
|
538 | # String removal ops
|
539 | spec.AddKindPairs(
|
540 | 'VOp1',
|
541 | [
|
542 | ('Percent', '%'),
|
543 | ('DPercent', '%%'),
|
544 | ('Pound', '#'),
|
545 | ('DPound', '##'),
|
546 | # Case ops, in bash. At least parse them. Execution might require
|
547 | # unicode stuff.
|
548 | ('Caret', '^'),
|
549 | ('DCaret', '^^'),
|
550 | ('Comma', ','),
|
551 | ('DComma', ',,'),
|
552 | ])
|
553 |
|
554 | spec.AddKindPairs(
|
555 | 'VOpYsh',
|
556 | [
|
557 | ('Pipe', '|'), # ${x|html}
|
558 | ('Space', ' '), # ${x %.3f}
|
559 | ])
|
560 |
|
561 | # Not in POSIX, but in Bash
|
562 | spec.AddKindPairs(
|
563 | 'VOp2',
|
564 | [
|
565 | ('Slash', '/'), # / for replacement
|
566 | ('Colon', ':'), # : for slicing
|
567 | ('LBracket', '['), # [ for indexing
|
568 | ('RBracket', ']'), # ] for indexing
|
569 | ])
|
570 |
|
571 | # Can only occur after ${!prefix@}
|
572 | spec.AddKindPairs('VOp3', [
|
573 | ('At', '@'),
|
574 | ('Star', '*'),
|
575 | ])
|
576 |
|
577 | # This kind is for Node types that are NOT tokens.
|
578 | spec.AddKind(
|
579 | 'Node',
|
580 | [
|
581 | # Arithmetic nodes
|
582 | 'PostDPlus',
|
583 | 'PostDMinus', # Postfix inc/dec.
|
584 | # Prefix inc/dec use Arith_DPlus/Arith_DMinus.
|
585 | 'UnaryPlus',
|
586 | 'UnaryMinus', # +1 and -1, to distinguish from infix.
|
587 | # Actually we don't need this because we they
|
588 | # will be under Expr1/Plus vs Expr2/Plus.
|
589 | 'NotIn',
|
590 | 'IsNot', # For YSH comparisons
|
591 | ])
|
592 |
|
593 | # NOTE: Not doing AddKindPairs() here because oil will have a different set
|
594 | # of keywords. It will probably have for/in/while/until/case/if/else/elif,
|
595 | # and then func/proc.
|
596 | spec.AddKind(
|
597 | 'KW',
|
598 | [
|
599 | 'DLeftBracket',
|
600 | 'Bang',
|
601 | 'For',
|
602 | 'While',
|
603 | 'Until',
|
604 | 'Do',
|
605 | 'Done',
|
606 | 'In',
|
607 | 'Case',
|
608 | 'Esac',
|
609 | 'If',
|
610 | 'Fi',
|
611 | 'Then',
|
612 | 'Else',
|
613 | 'Elif',
|
614 | 'Function',
|
615 | 'Time',
|
616 |
|
617 | # YSH keywords.
|
618 | 'Const',
|
619 | 'Var',
|
620 | 'SetVar',
|
621 | 'SetGlobal',
|
622 | # later: Auto?
|
623 | 'Call',
|
624 | 'Proc',
|
625 | 'Typed',
|
626 | 'Func',
|
627 |
|
628 | # builtins, NOT keywords: use, fork, wait, etc.
|
629 | # Things that don't affect parsing shouldn't be keywords.
|
630 | ])
|
631 |
|
632 | # Unlike bash, we parse control flow statically. They're not
|
633 | # dynamically-resolved builtins.
|
634 | spec.AddKind('ControlFlow', ['Break', 'Continue', 'Return', 'Exit'])
|
635 |
|
636 | # Special Kind for lookahead in the lexer. It's never seen by anything else.
|
637 | spec.AddKind('LookAhead', ['FuncParens'])
|
638 |
|
639 | # For parsing globs and converting them to regexes.
|
640 | spec.AddKind('Glob', [
|
641 | 'LBracket',
|
642 | 'RBracket',
|
643 | 'Star',
|
644 | 'QMark',
|
645 | 'Bang',
|
646 | 'Caret',
|
647 | 'EscapedChar',
|
648 | 'BadBackslash',
|
649 | 'CleanLiterals',
|
650 | 'OtherLiteral',
|
651 | ])
|
652 |
|
653 | # For C-escaped strings.
|
654 | spec.AddKind(
|
655 | 'Format',
|
656 | [
|
657 | 'EscapedPercent',
|
658 | 'Percent', # starts another lexer mode
|
659 | 'Flag',
|
660 | 'Num',
|
661 | 'Dot',
|
662 | 'Type',
|
663 | 'Star',
|
664 | 'Time',
|
665 | 'Zero',
|
666 | ])
|
667 |
|
668 | # For parsing prompt strings like PS1.
|
669 | spec.AddKind('PS', [
|
670 | 'Subst',
|
671 | 'Octal3',
|
672 | 'LBrace',
|
673 | 'RBrace',
|
674 | 'Literals',
|
675 | 'BadBackslash',
|
676 | ])
|
677 |
|
678 | spec.AddKind('Range', ['Int', 'Char', 'Dots', 'Other'])
|
679 |
|
680 | spec.AddKind(
|
681 | 'J8',
|
682 | [
|
683 | 'LBracket',
|
684 | 'RBracket',
|
685 | 'LBrace',
|
686 | 'RBrace',
|
687 | 'Comma',
|
688 | 'Colon',
|
689 | 'Null',
|
690 | 'Bool',
|
691 | 'Int', # Number
|
692 | 'Float', # Number
|
693 |
|
694 | # High level tokens for "" b'' u''
|
695 | # We don't distinguish them in the parser, because we recognize
|
696 | # strings in the lexer.
|
697 | 'String',
|
698 |
|
699 | # JSON8 and NIL8
|
700 | 'Identifier',
|
701 | 'Newline', # J8 Lines only, similar to Op_Newline
|
702 | 'Tab', # Reserved for TSV8
|
703 |
|
704 | # NIL8 only
|
705 | 'LParen',
|
706 | 'RParen',
|
707 | #'Symbol',
|
708 | 'Operator',
|
709 | ])
|
710 |
|
711 | spec.AddKind('ShNumber', ['Dec', 'Hex', 'Oct', 'BaseN'])
|
712 |
|
713 |
|
714 | # Shared between [[ and test/[.
|
715 | _UNARY_STR_CHARS = 'zn' # -z -n
|
716 | _UNARY_OTHER_CHARS = 'otvR' # -o is overloaded
|
717 | _UNARY_PATH_CHARS = 'abcdefghkLprsSuwxOGN' # -a is overloaded
|
718 |
|
719 | _BINARY_PATH = ['ef', 'nt', 'ot']
|
720 | _BINARY_INT = ['eq', 'ne', 'gt', 'ge', 'lt', 'le']
|
721 |
|
722 |
|
723 | def _Dash(strs):
|
724 | # type: (List[str]) -> List[Tuple[str, str]]
|
725 | # Gives a pair of (token name, string to match)
|
726 | return [(s, '-' + s) for s in strs]
|
727 |
|
728 |
|
729 | def AddBoolKinds(spec):
|
730 | # type: (IdSpec) -> None
|
731 | spec.AddBoolKind('BoolUnary', [
|
732 | (bool_arg_type_e.Str, _Dash(list(_UNARY_STR_CHARS))),
|
733 | (bool_arg_type_e.Other, _Dash(list(_UNARY_OTHER_CHARS))),
|
734 | (bool_arg_type_e.Path, _Dash(list(_UNARY_PATH_CHARS))),
|
735 | ])
|
736 |
|
737 | Id = spec.id_str2int
|
738 |
|
739 | # test --true and test --false have no single letter flags. They need no
|
740 | # lexing.
|
741 | for long_flag in ('true', 'false'):
|
742 | id_name = 'BoolUnary_%s' % long_flag
|
743 | spec._AddId(id_name)
|
744 | spec.AddBoolOp(Id[id_name], bool_arg_type_e.Str)
|
745 |
|
746 | spec.AddBoolKind('BoolBinary', [
|
747 | (bool_arg_type_e.Str, [
|
748 | ('GlobEqual', '='),
|
749 | ('GlobDEqual', '=='),
|
750 | ('GlobNEqual', '!='),
|
751 | ('EqualTilde', '=~'),
|
752 | ]),
|
753 | (bool_arg_type_e.Path, _Dash(_BINARY_PATH)),
|
754 | (bool_arg_type_e.Int, _Dash(_BINARY_INT)),
|
755 | ])
|
756 |
|
757 | # logical, arity, arg_type
|
758 | spec.AddBoolOp(Id['Op_DAmp'], bool_arg_type_e.Undefined)
|
759 | spec.AddBoolOp(Id['Op_DPipe'], bool_arg_type_e.Undefined)
|
760 | spec.AddBoolOp(Id['KW_Bang'], bool_arg_type_e.Undefined)
|
761 |
|
762 | spec.AddBoolOp(Id['Op_Less'], bool_arg_type_e.Str)
|
763 | spec.AddBoolOp(Id['Op_Great'], bool_arg_type_e.Str)
|
764 |
|
765 |
|
766 | def SetupTestBuiltin(
|
767 | id_spec, # type: IdSpec
|
768 | unary_lookup, # type: Dict[str, int]
|
769 | binary_lookup, # type: Dict[str, int]
|
770 | other_lookup, # type: Dict[str, int]
|
771 | ):
|
772 | # type: (...) -> None
|
773 | """Setup tokens for test/[.
|
774 |
|
775 | Similar to _AddBoolKinds above. Differences:
|
776 | - =~ doesn't exist
|
777 | - && -> -a, || -> -o
|
778 | - ( ) -> Op_LParen (they don't appear above)
|
779 | """
|
780 | Id = id_spec.id_str2int
|
781 | Kind = id_spec.kind_str2int
|
782 |
|
783 | for letter in _UNARY_STR_CHARS + _UNARY_OTHER_CHARS + _UNARY_PATH_CHARS:
|
784 | id_name = 'BoolUnary_%s' % letter
|
785 | unary_lookup['-' + letter] = Id[id_name]
|
786 |
|
787 | for s in _BINARY_PATH + _BINARY_INT:
|
788 | id_name = 'BoolBinary_%s' % s
|
789 | binary_lookup['-' + s] = Id[id_name]
|
790 |
|
791 | # Like the [[ definition above, but without globbing and without =~ .
|
792 |
|
793 | for id_name, token_str in [('Equal', '='), ('DEqual', '=='),
|
794 | ('NEqual', '!=')]:
|
795 | id_int = id_spec.AddBoolBinaryForBuiltin(id_name, Kind['BoolBinary'])
|
796 |
|
797 | binary_lookup[token_str] = id_int
|
798 |
|
799 | # Some of these names don't quite match, but it keeps the BoolParser simple.
|
800 | binary_lookup['<'] = Id['Op_Less']
|
801 | binary_lookup['>'] = Id['Op_Great']
|
802 |
|
803 | # NOTE: -a and -o overloaded as unary prefix operators BoolUnary_a and
|
804 | # BoolUnary_o. The parser rather than the tokenizer handles this.
|
805 | other_lookup['!'] = Id['KW_Bang'] # like [[ !
|
806 | other_lookup['('] = Id['Op_LParen']
|
807 | other_lookup[')'] = Id['Op_RParen']
|
808 |
|
809 | other_lookup[']'] = Id['Arith_RBracket'] # For closing ]
|