OILS / frontend / id_kind_def.py View on Github | oilshell.org

810 lines, 550 significant
1#!/usr/bin/env python2
2# Copyright 2016 Andy Chu. All rights reserved.
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8"""
9id_kind_def.py - Id and Kind definitions, stored in Token
10
11NOTE: If this file changes, rebuild it with build/py.sh all
12"""
13from __future__ import print_function
14
15from _devbuild.gen.types_asdl import (bool_arg_type_e, bool_arg_type_t)
16#from mycpp.mylib import log
17
18from typing import List, Tuple, Dict, Optional, TYPE_CHECKING
19if TYPE_CHECKING: # avoid circular build deps
20 from _devbuild.gen.id_kind_asdl import Id_t, Kind_t
21
22
23class IdSpec(object):
24 """Identifiers that form the "spine" of the shell program
25 representation."""
26
27 def __init__(self, kind_lookup, bool_ops):
28 # type: (Dict[int, int], Dict[int, bool_arg_type_t]) -> None
29 self.id_str2int = {} # type: Dict[str, int]
30 self.kind_str2int = {} # type: Dict[str, int]
31
32 self.kind_lookup = kind_lookup # Id int -> Kind int
33 self.kind_name_list = [] # type: List[str]
34 self.kind_sizes = [] # type: List[int] # optional stats
35
36 self.lexer_pairs = {} # type: Dict[int, List[Tuple[bool, str, int]]]
37 self.bool_ops = bool_ops # type: Dict[int, bool_arg_type_t]
38
39 # Incremented on each method call
40 # IMPORTANT: 1-based indices match what asdl/gen_python.py does!!!
41 self.id_index = 1
42 self.kind_index = 1
43
44 def LexerPairs(self, kind):
45 # type: (Kind_t) -> List[Tuple[bool, str, Id_t]]
46 result = []
47 for is_regex, pat, id_ in self.lexer_pairs[kind]:
48 result.append((is_regex, pat, id_))
49 return result
50
51 def _AddId(self, id_name, kind=None):
52 # type: (str, Optional[int]) -> int
53 """
54 Args:
55 id_name: e.g. BoolBinary_Equal
56 kind: override autoassignment. For AddBoolBinaryForBuiltin
57 """
58 t = self.id_index
59
60 self.id_str2int[id_name] = t
61
62 if kind is None:
63 kind = self.kind_index
64 self.kind_lookup[t] = kind
65
66 self.id_index += 1 # mutate last
67 return t # the index we used
68
69 def _AddKind(self, kind_name):
70 # type: (str) -> None
71 self.kind_str2int[kind_name] = self.kind_index
72 #log('%s = %d', kind_name, self.kind_index)
73 self.kind_index += 1
74 self.kind_name_list.append(kind_name)
75
76 def AddKind(self, kind_name, tokens):
77 # type: (str, List[str]) -> None
78 assert isinstance(tokens, list), tokens
79
80 for name in tokens:
81 id_name = '%s_%s' % (kind_name, name)
82 self._AddId(id_name)
83
84 # Must be after adding Id
85 self._AddKind(kind_name)
86 self.kind_sizes.append(len(tokens)) # debug info
87
88 def AddKindPairs(self, kind_name, pairs):
89 # type: (str, List[Tuple[str, str]]) -> None
90 assert isinstance(pairs, list), pairs
91
92 lexer_pairs = []
93 for name, char_pat in pairs:
94 id_name = '%s_%s' % (kind_name, name)
95 id_int = self._AddId(id_name)
96 # After _AddId
97 lexer_pairs.append((False, char_pat, id_int)) # Constant
98
99 self.lexer_pairs[self.kind_index] = lexer_pairs
100
101 # Must be after adding Id
102 self._AddKind(kind_name)
103 self.kind_sizes.append(len(pairs)) # debug info
104
105 def AddBoolKind(
106 self,
107 kind_name, # type: str
108 arg_type_pairs, # type: List[Tuple[bool_arg_type_t, List[Tuple[str, str]]]]
109 ):
110 # type: (...) -> None
111 """
112 Args:
113 kind_name: string
114 arg_type_pairs: dictionary of bool_arg_type_e -> []
115 """
116 lexer_pairs = []
117 num_tokens = 0
118 for arg_type, pairs in arg_type_pairs:
119 #print(arg_type, pairs)
120
121 for name, char_pat in pairs:
122 # BoolUnary_f, BoolBinary_eq, BoolBinary_NEqual
123 id_name = '%s_%s' % (kind_name, name)
124 id_int = self._AddId(id_name)
125 self.AddBoolOp(id_int, arg_type) # register type
126 lexer_pairs.append((False, char_pat, id_int)) # constant
127
128 num_tokens += len(pairs)
129
130 self.lexer_pairs[self.kind_index] = lexer_pairs
131
132 # Must do this after _AddId()
133 self._AddKind(kind_name)
134 self.kind_sizes.append(num_tokens) # debug info
135
136 def AddBoolBinaryForBuiltin(self, id_name, kind):
137 # type: (str, int) -> int
138 """For [ = ] [ == ] and [ != ].
139
140 These operators are NOT added to the lexer. The are "lexed" as
141 word.String.
142 """
143 id_name = 'BoolBinary_%s' % id_name
144 id_int = self._AddId(id_name, kind=kind)
145 self.AddBoolOp(id_int, bool_arg_type_e.Str)
146 return id_int
147
148 def AddBoolOp(self, id_int, arg_type):
149 # type: (int, bool_arg_type_t) -> None
150 """Associate an ID integer with an bool_arg_type_e."""
151 self.bool_ops[id_int] = arg_type
152
153
154def AddKinds(spec):
155 # type: (IdSpec) -> None
156
157 # A compound word, in arith context, boolean context, or command context.
158 # A['foo'] A["foo"] A[$foo] A["$foo"] A[${foo}] A["${foo}"]
159 spec.AddKind('Word', ['Compound'])
160
161 # Token IDs in Kind.Arith are first to make the TDOP precedence table
162 # small.
163 #
164 # NOTE: Could share Op_Pipe, Op_Amp, Op_DAmp, Op_Semi, Op_LParen, etc.
165 # Actually all of Arith could be folded into Op, because we are using
166 # WordParser._ReadArithWord vs. WordParser._ReadWord.
167 spec.AddKindPairs(
168 'Arith',
169 [
170 ('Semi', ';'), # ternary for loop only
171 ('Comma', ','), # function call and C comma operator
172 ('Plus', '+'),
173 ('Minus', '-'),
174 ('Star', '*'),
175 ('Slash', '/'),
176 ('Percent', '%'),
177 ('DPlus', '++'),
178 ('DMinus', '--'),
179 ('DStar', '**'),
180 ('LParen', '('),
181 ('RParen', ')'), # grouping and function call extension
182 ('LBracket', '['),
183 ('RBracket', ']'), # array and assoc array subscript
184 ('RBrace', '}'), # for end of var sub
185
186 # Logical Ops
187 ('QMark', '?'),
188 ('Colon', ':'), # Ternary Op: a < b ? 0 : 1
189 ('LessEqual', '<='),
190 ('Less', '<'),
191 ('GreatEqual', '>='),
192 ('Great', '>'),
193 ('DEqual', '=='),
194 ('NEqual', '!='),
195 # note: these 3 are not in YSH Expr. (Could be used in find dialect.)
196 ('DAmp', '&&'),
197 ('DPipe', '||'),
198 ('Bang', '!'),
199
200 # Bitwise ops
201 ('DGreat', '>>'),
202 ('DLess', '<<'),
203 # YSH: ^ is exponent
204 ('Amp', '&'),
205 ('Pipe', '|'),
206 ('Caret', '^'),
207 ('Tilde', '~'),
208 ('Equal', '='),
209
210 # Augmented Assignment for $(( ))
211 # Must match the list in osh/arith_parse.py
212 # YSH has **= //= like Python
213 ('PlusEqual', '+='),
214 ('MinusEqual', '-='),
215 ('StarEqual', '*='),
216 ('SlashEqual', '/='),
217 ('PercentEqual', '%='),
218 ('DGreatEqual', '>>='),
219 ('DLessEqual', '<<='),
220 ('AmpEqual', '&='),
221 ('CaretEqual', '^='),
222 ('PipeEqual', '|='),
223 ])
224
225 spec.AddKind('Eof', ['Real', 'RParen', 'Backtick'])
226
227 spec.AddKind('Undefined', ['Tok']) # for initial state
228
229 # The Unknown kind is used when we lex something, but it's invalid.
230 # Examples:
231 # ${^}
232 # $'\z' Such bad codes are accepted when parse_backslash is on
233 # (default in OSH), so we have to lex them.
234 # (x == y) should used === or ~==
235 spec.AddKind('Unknown',
236 ['Tok', 'Backslash', 'DEqual', 'DAmp', 'DPipe', 'DDot'])
237
238 spec.AddKind('Eol', ['Tok']) # no more tokens on line (\0)
239
240 # Ignored_Newline is for J8 lexing to count lines
241 spec.AddKind('Ignored', ['LineCont', 'Space', 'Comment', 'Newline'])
242
243 # Id.WS_Space is for lex_mode_e.ShCommand; Id.Ignored_Space is for
244 # lex_mode_e.Arith
245 spec.AddKind('WS', ['Space'])
246
247 spec.AddKind(
248 'Lit',
249 [
250 'Chars',
251 'CharsWithoutPrefix', # for stripping leading whitespace
252 'VarLike',
253 'ArrayLhsOpen',
254 'ArrayLhsClose',
255 'Splice', # @func(a, b)
256 'AtLBracket', # @[split(x)]
257 'AtLBraceDot', # @{.myproc arg1} should be builtin_sub
258 'Other',
259 'EscapedChar', # \* is escaped
260 'LBracket',
261 'RBracket', # for assoc array literals, static globs
262 'Star',
263 'QMark',
264 # Either brace expansion or keyword for { and }
265 'LBrace',
266 'RBrace',
267 'Comma',
268 'Equals', # For = f()
269 'Dollar', # detecting 'echo $'
270 'DRightBracket', # the ]] that matches [[, NOT a keyword
271 'Tilde', # tilde expansion
272 'Pound', # for comment or VarOp state
273 'TPound', # for doc comments like ###
274 'TDot', # for multiline commands ...
275 'Slash',
276 'Percent', # / # % for patsub, NOT unary op
277 'Colon', # x=foo:~:~root needs tilde expansion
278 'Digits', # for lex_mode_e.Arith
279 'At', # for ${a[@]} in lex_mode_e.Arith, and detecting @[]
280 'ArithVarLike', # for $((var+1)). Distinct from Lit_VarLike 'var='
281 'BadBackslash', # for "\z", not Id.Unknown_Backslash because it's a
282 # syntax error in YSH, but NOT OSH
283 'CompDummy', # A fake Lit_* token to get partial words during
284 # completion
285 ])
286
287 # For recognizing \` and \" and \\ within backticks. There's an extra layer
288 # of backslash quoting.
289 spec.AddKind('Backtick', ['Right', 'Quoted', 'DoubleQuote', 'Other'])
290
291 spec.AddKind('History', ['Op', 'Num', 'Search', 'Other'])
292
293 spec.AddKind(
294 'Op',
295 [
296 'Newline', # mostly equivalent to SEMI
297 'Amp', # &
298 'Pipe', # |
299 'PipeAmp', # |& -- bash extension for stderr
300 'DAmp', # &&
301 'DPipe', # ||
302 'Semi', # ;
303 'DSemi', # ;; for case
304 'SemiAmp', # ;& for case
305 'DSemiAmp', # ;;& for case
306 'LParen', # For subshell. Not Kind.Left because it's NOT a WordPart.
307 'RParen', # Default, will be translated to Id.Right_*
308 'DLeftParen',
309 'DRightParen',
310
311 # for [[ ]] language
312 'Less', # <
313 'Great', # >
314 'Bang', # !
315
316 # YSH [] {}
317 'LBracket',
318 'RBracket',
319 'LBrace',
320 'RBrace',
321 ])
322
323 # YSH expressions use Kind.Expr and Kind.Arith (further below)
324 spec.AddKind(
325 'Expr',
326 [
327 'Reserved', # <- means nothing but it's reserved now
328 'Symbol', # %foo
329 'Name',
330 'DecInt',
331 'BinInt',
332 'OctInt',
333 'HexInt',
334 'Float',
335 'Bang', # eggex !digit, ![a-z]
336 'Dot',
337 'DDotLessThan',
338 'DDotEqual',
339 'Colon', # mylist:pop()
340 'RArrow',
341 'RDArrow',
342 'DSlash', # integer division
343 'TEqual',
344 'NotDEqual',
345 'TildeDEqual', # === !== ~==
346 'At',
347 'DoubleAt', # splice operators
348 'Ellipsis', # for varargs
349 'Dollar', # legacy regex
350 'NotTilde', # !~
351 'DTilde',
352 'NotDTilde', # ~~ !~~
353 'DStarEqual', # **=, which bash doesn't have
354 'DSlashEqual', # //=, which bash doesn't have
355 'CastedDummy', # Used for @() $() (words in lex_mode_e.ShCommand)
356 # and ${} '' "" (and all other strings)
357
358 # Constants
359 'Null',
360 'True',
361 'False',
362
363 # Keywords are resolved after lexing, but otherwise behave like tokens.
364 'And',
365 'Or',
366 'Not',
367
368 # List comprehensions
369 'For',
370 'Is',
371 'In',
372 'If',
373 'Else',
374 'Func', # For function literals
375 'Capture',
376 'As',
377 ])
378
379 # For C-escaped strings.
380 spec.AddKind(
381 'Char',
382 [
383 'OneChar',
384 'Stop',
385 'Hex', # \xff
386 'YHex', # \yff for J8 notation
387
388 # Two variants of Octal: \377, and \0377.
389 'Octal3',
390 'Octal4',
391 'Unicode4',
392 'SurrogatePair', # JSON
393 'Unicode8', # bash
394 'UBraced',
395 'Pound', # YSH
396 'AsciiControl', # \x01-\x1f, what's disallowed in JSON
397 ])
398
399 # For lex_mode_e.BashRegex
400 # Bash treats ( | ) as special, and space is allowed within ()
401 # Note Id.Op_RParen -> Id.Right_BashRegex with lexer hint
402 spec.AddKind('BashRegex', ['LParen', 'AllowedInParens'])
403
404 spec.AddKind(
405 'Eggex',
406 [
407 'Start', # ^ or %start
408 'End', # $ or %end
409 'Dot', # . or dot
410 # Future: %boundary generates \b in Python/Perl, etc.
411 ])
412
413 spec.AddKind(
414 'Redir',
415 [
416 'Less', # < stdin
417 'Great', # > stdout
418 'DLess', # << here doc redirect
419 'TLess', # <<< bash only here string
420 'DGreat', # >> append stdout
421 'GreatAnd', # >& descriptor redirect
422 'LessAnd', # <& descriptor redirect
423 'DLessDash', # <<- here doc redirect for tabs?
424 'LessGreat', # <>
425 'Clobber', # >| POSIX?
426 'AndGreat', # bash &> stdout/stderr to file
427 'AndDGreat', # bash &>> stdout/stderr append to file
428
429 #'GreatPlus', # >+ is append in YSH
430 #'DGreatPlus', # >>+ is append to string in YSH
431 ])
432
433 # NOTE: This is for left/right WORDS only. (( is not a word so it doesn't
434 # get that.
435 spec.AddKind(
436 'Left',
437 [
438 'DoubleQuote',
439 'JDoubleQuote', # j" for J8 notation
440 'SingleQuote', # ''
441 'DollarSingleQuote', # $'' for \n escapes
442 'RSingleQuote', # r''
443 'USingleQuote', # u''
444 'BSingleQuote', # b''
445
446 # Multiline versions
447 'TDoubleQuote', # """ """
448 'DollarTDoubleQuote', # $""" """
449 'TSingleQuote', # ''' '''
450 'RTSingleQuote', # r''' '''
451 'UTSingleQuote', # u''' '''
452 'BTSingleQuote', # b''' '''
453 'Backtick', # `
454 'DollarParen', # $(
455 'DollarBrace', # ${
456 'DollarBraceZsh', # ${(foo)
457 'DollarDParen', # $((
458 'DollarBracket', # $[ - synonym for $(( in bash and zsh
459 'DollarDoubleQuote', # $" for bash localized strings
460 'ProcSubIn', # <( )
461 'ProcSubOut', # >( )
462 'AtParen', # @( for split command sub
463 'CaretParen', # ^( for Block literal in expression mode
464 'CaretBracket', # ^[ for Expr literal
465 'CaretBrace', # ^{ for Arglist
466 'CaretDoubleQuote', # ^" for Template
467 'ColonPipe', # :| for word arrays
468 'PercentParen', # legacy %( for word arrays
469 ])
470
471 spec.AddKind(
472 'Right',
473 [
474 'DoubleQuote',
475 'SingleQuote',
476 'Backtick', # `
477 'DollarBrace', # }
478 'DollarDParen', # )) -- really the second one is a PushHint()
479 # ArithSub2 is just Id.Arith_RBracket
480 'DollarDoubleQuote', # "
481 'DollarSingleQuote', # '
482
483 # Disambiguated right parens
484 'Subshell', # )
485 'ShFunction', # )
486 'CasePat', # )
487 'ShArrayLiteral', # )
488 'ExtGlob', # )
489 'BashRegexGroup', # )
490 'BlockLiteral', # } that matches &{ echo hi }
491 ])
492
493 spec.AddKind('ExtGlob', ['Comma', 'At', 'Star', 'Plus', 'QMark', 'Bang'])
494
495 # First position of var sub ${
496 # Id.VOp2_Pound -- however you can't tell the difference at first! It could
497 # be an op or a name. So it makes sense to base i on the state.
498 # Id.VOp2_At
499 # But then you have AS_STAR, or Id.Arith_Star maybe
500
501 spec.AddKind(
502 'VSub',
503 [
504 'DollarName', # $foo
505 'Name', # 'foo' in ${foo}
506 'Number', # $0 .. $9
507 'Bang', # $!
508 'At', # $@ or [@] for array subscripting
509 'Pound', # $# or ${#var} for length
510 'Dollar', # $$
511 'Star', # $*
512 'Hyphen', # $-
513 'QMark', # $?
514 'Dot', # ${.myproc builtin sub}
515 ])
516
517 spec.AddKindPairs('VTest', [
518 ('ColonHyphen', ':-'),
519 ('Hyphen', '-'),
520 ('ColonEquals', ':='),
521 ('Equals', '='),
522 ('ColonQMark', ':?'),
523 ('QMark', '?'),
524 ('ColonPlus', ':+'),
525 ('Plus', '+'),
526 ])
527
528 # Statically parse @P, so @x etc. is an error.
529 spec.AddKindPairs(
530 'VOp0',
531 [
532 ('Q', '@Q'), # ${x@Q} for quoting
533 ('E', '@E'),
534 ('P', '@P'), # ${PS1@P} for prompt eval
535 ('A', '@A'),
536 ('a', '@a'),
537 ])
538
539 # String removal ops
540 spec.AddKindPairs(
541 'VOp1',
542 [
543 ('Percent', '%'),
544 ('DPercent', '%%'),
545 ('Pound', '#'),
546 ('DPound', '##'),
547 # Case ops, in bash. At least parse them. Execution might require
548 # unicode stuff.
549 ('Caret', '^'),
550 ('DCaret', '^^'),
551 ('Comma', ','),
552 ('DComma', ',,'),
553 ])
554
555 spec.AddKindPairs(
556 'VOpYsh',
557 [
558 ('Pipe', '|'), # ${x|html}
559 ('Space', ' '), # ${x %.3f}
560 ])
561
562 # Not in POSIX, but in Bash
563 spec.AddKindPairs(
564 'VOp2',
565 [
566 ('Slash', '/'), # / for replacement
567 ('Colon', ':'), # : for slicing
568 ('LBracket', '['), # [ for indexing
569 ('RBracket', ']'), # ] for indexing
570 ])
571
572 # Can only occur after ${!prefix@}
573 spec.AddKindPairs('VOp3', [
574 ('At', '@'),
575 ('Star', '*'),
576 ])
577
578 # This kind is for Node types that are NOT tokens.
579 spec.AddKind(
580 'Node',
581 [
582 # Arithmetic nodes
583 'PostDPlus',
584 'PostDMinus', # Postfix inc/dec.
585 # Prefix inc/dec use Arith_DPlus/Arith_DMinus.
586 'UnaryPlus',
587 'UnaryMinus', # +1 and -1, to distinguish from infix.
588 # Actually we don't need this because we they
589 # will be under Expr1/Plus vs Expr2/Plus.
590 'NotIn',
591 'IsNot', # For YSH comparisons
592 ])
593
594 # NOTE: Not doing AddKindPairs() here because oil will have a different set
595 # of keywords. It will probably have for/in/while/until/case/if/else/elif,
596 # and then func/proc.
597 spec.AddKind(
598 'KW',
599 [
600 'DLeftBracket',
601 'Bang',
602 'For',
603 'While',
604 'Until',
605 'Do',
606 'Done',
607 'In',
608 'Case',
609 'Esac',
610 'If',
611 'Fi',
612 'Then',
613 'Else',
614 'Elif',
615 'Function',
616 'Time',
617
618 # YSH keywords.
619 'Const',
620 'Var',
621 'SetVar',
622 'SetGlobal',
623 # later: Auto?
624 'Call',
625 'Proc',
626 'Typed',
627 'Func',
628
629 # builtins, NOT keywords: use, fork, wait, etc.
630 # Things that don't affect parsing shouldn't be keywords.
631 ])
632
633 # Unlike bash, we parse control flow statically. They're not
634 # dynamically-resolved builtins.
635 spec.AddKind('ControlFlow', ['Break', 'Continue', 'Return', 'Exit'])
636
637 # Special Kind for lookahead in the lexer. It's never seen by anything else.
638 spec.AddKind('LookAhead', ['FuncParens'])
639
640 # For parsing globs and converting them to regexes.
641 spec.AddKind('Glob', [
642 'LBracket',
643 'RBracket',
644 'Star',
645 'QMark',
646 'Bang',
647 'Caret',
648 'EscapedChar',
649 'BadBackslash',
650 'CleanLiterals',
651 'OtherLiteral',
652 ])
653
654 # For C-escaped strings.
655 spec.AddKind(
656 'Format',
657 [
658 'EscapedPercent',
659 'Percent', # starts another lexer mode
660 'Flag',
661 'Num',
662 'Dot',
663 'Type',
664 'Star',
665 'Time',
666 'Zero',
667 ])
668
669 # For parsing prompt strings like PS1.
670 spec.AddKind('PS', [
671 'Subst',
672 'Octal3',
673 'LBrace',
674 'RBrace',
675 'Literals',
676 'BadBackslash',
677 ])
678
679 spec.AddKind('Range', ['Int', 'Char', 'Dots', 'Other'])
680
681 spec.AddKind(
682 'J8',
683 [
684 'LBracket',
685 'RBracket',
686 'LBrace',
687 'RBrace',
688 'Comma',
689 'Colon',
690 'Null',
691 'Bool',
692 'Int', # Number
693 'Float', # Number
694
695 # High level tokens for "" b'' u''
696 # We don't distinguish them in the parser, because we recognize
697 # strings in the lexer.
698 'String',
699
700 # JSON8 and NIL8
701 'Identifier',
702 'Newline', # J8 Lines only, similar to Op_Newline
703 'Tab', # Reserved for TSV8
704
705 # NIL8 only
706 'LParen',
707 'RParen',
708 #'Symbol',
709 'Operator',
710 ])
711
712 spec.AddKind('ShNumber', ['Dec', 'Hex', 'Oct', 'BaseN'])
713
714
715# Shared between [[ and test/[.
716_UNARY_STR_CHARS = 'zn' # -z -n
717_UNARY_OTHER_CHARS = 'otvR' # -o is overloaded
718_UNARY_PATH_CHARS = 'abcdefghkLprsSuwxOGN' # -a is overloaded
719
720_BINARY_PATH = ['ef', 'nt', 'ot']
721_BINARY_INT = ['eq', 'ne', 'gt', 'ge', 'lt', 'le']
722
723
724def _Dash(strs):
725 # type: (List[str]) -> List[Tuple[str, str]]
726 # Gives a pair of (token name, string to match)
727 return [(s, '-' + s) for s in strs]
728
729
730def AddBoolKinds(spec):
731 # type: (IdSpec) -> None
732 spec.AddBoolKind('BoolUnary', [
733 (bool_arg_type_e.Str, _Dash(list(_UNARY_STR_CHARS))),
734 (bool_arg_type_e.Other, _Dash(list(_UNARY_OTHER_CHARS))),
735 (bool_arg_type_e.Path, _Dash(list(_UNARY_PATH_CHARS))),
736 ])
737
738 Id = spec.id_str2int
739
740 # test --true and test --false have no single letter flags. They need no
741 # lexing.
742 for long_flag in ('true', 'false'):
743 id_name = 'BoolUnary_%s' % long_flag
744 spec._AddId(id_name)
745 spec.AddBoolOp(Id[id_name], bool_arg_type_e.Str)
746
747 spec.AddBoolKind('BoolBinary', [
748 (bool_arg_type_e.Str, [
749 ('GlobEqual', '='),
750 ('GlobDEqual', '=='),
751 ('GlobNEqual', '!='),
752 ('EqualTilde', '=~'),
753 ]),
754 (bool_arg_type_e.Path, _Dash(_BINARY_PATH)),
755 (bool_arg_type_e.Int, _Dash(_BINARY_INT)),
756 ])
757
758 # logical, arity, arg_type
759 spec.AddBoolOp(Id['Op_DAmp'], bool_arg_type_e.Undefined)
760 spec.AddBoolOp(Id['Op_DPipe'], bool_arg_type_e.Undefined)
761 spec.AddBoolOp(Id['KW_Bang'], bool_arg_type_e.Undefined)
762
763 spec.AddBoolOp(Id['Op_Less'], bool_arg_type_e.Str)
764 spec.AddBoolOp(Id['Op_Great'], bool_arg_type_e.Str)
765
766
767def SetupTestBuiltin(
768 id_spec, # type: IdSpec
769 unary_lookup, # type: Dict[str, int]
770 binary_lookup, # type: Dict[str, int]
771 other_lookup, # type: Dict[str, int]
772):
773 # type: (...) -> None
774 """Setup tokens for test/[.
775
776 Similar to _AddBoolKinds above. Differences:
777 - =~ doesn't exist
778 - && -> -a, || -> -o
779 - ( ) -> Op_LParen (they don't appear above)
780 """
781 Id = id_spec.id_str2int
782 Kind = id_spec.kind_str2int
783
784 for letter in _UNARY_STR_CHARS + _UNARY_OTHER_CHARS + _UNARY_PATH_CHARS:
785 id_name = 'BoolUnary_%s' % letter
786 unary_lookup['-' + letter] = Id[id_name]
787
788 for s in _BINARY_PATH + _BINARY_INT:
789 id_name = 'BoolBinary_%s' % s
790 binary_lookup['-' + s] = Id[id_name]
791
792 # Like the [[ definition above, but without globbing and without =~ .
793
794 for id_name, token_str in [('Equal', '='), ('DEqual', '=='),
795 ('NEqual', '!=')]:
796 id_int = id_spec.AddBoolBinaryForBuiltin(id_name, Kind['BoolBinary'])
797
798 binary_lookup[token_str] = id_int
799
800 # Some of these names don't quite match, but it keeps the BoolParser simple.
801 binary_lookup['<'] = Id['Op_Less']
802 binary_lookup['>'] = Id['Op_Great']
803
804 # NOTE: -a and -o overloaded as unary prefix operators BoolUnary_a and
805 # BoolUnary_o. The parser rather than the tokenizer handles this.
806 other_lookup['!'] = Id['KW_Bang'] # like [[ !
807 other_lookup['('] = Id['Op_LParen']
808 other_lookup[')'] = Id['Op_RParen']
809
810 other_lookup[']'] = Id['Arith_RBracket'] # For closing ]