OILS / frontend / id_kind_def.py View on Github | oils.pub

817 lines, 554 significant
1#!/usr/bin/env python2
2# Copyright 2016 Andy Chu. All rights reserved.
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8"""
9id_kind_def.py - Id and Kind definitions, stored in Token
10
11NOTE: If this file changes, rebuild it with build/py.sh all
12"""
13from __future__ import print_function
14
15from _devbuild.gen.types_asdl import (bool_arg_type_e, bool_arg_type_t)
16#from mycpp.mylib import log
17
18from typing import List, Tuple, Dict, Optional, TYPE_CHECKING
19if TYPE_CHECKING: # avoid circular build deps
20 from _devbuild.gen.id_kind_asdl import Id_t, Kind_t
21
22
23class IdSpec(object):
24 """Identifiers that form the "spine" of the shell program
25 representation."""
26
27 def __init__(self, kind_lookup, bool_ops):
28 # type: (Dict[int, int], Dict[int, bool_arg_type_t]) -> None
29 self.id_str2int = {} # type: Dict[str, int]
30 self.kind_str2int = {} # type: Dict[str, int]
31
32 self.kind_lookup = kind_lookup # Id int -> Kind int
33 self.kind_name_list = [] # type: List[str]
34 self.kind_sizes = [] # type: List[int] # optional stats
35
36 self.lexer_pairs = {} # type: Dict[int, List[Tuple[bool, str, int]]]
37 self.bool_ops = bool_ops # type: Dict[int, bool_arg_type_t]
38
39 # Incremented on each method call
40 # IMPORTANT: 1-based indices match what asdl/gen_python.py does!!!
41 self.id_index = 1
42 self.kind_index = 1
43
44 def LexerPairs(self, kind):
45 # type: (Kind_t) -> List[Tuple[bool, str, Id_t]]
46 result = []
47 for is_regex, pat, id_ in self.lexer_pairs[kind]:
48 result.append((is_regex, pat, id_))
49 return result
50
51 def _AddId(self, id_name, kind=None):
52 # type: (str, Optional[int]) -> int
53 """
54 Args:
55 id_name: e.g. BoolBinary_Equal
56 kind: override autoassignment. For AddBoolBinaryForBuiltin
57 """
58 t = self.id_index
59
60 self.id_str2int[id_name] = t
61
62 if kind is None:
63 kind = self.kind_index
64 self.kind_lookup[t] = kind
65
66 self.id_index += 1 # mutate last
67 return t # the index we used
68
69 def _AddKind(self, kind_name):
70 # type: (str) -> None
71 self.kind_str2int[kind_name] = self.kind_index
72 #log('%s = %d', kind_name, self.kind_index)
73 self.kind_index += 1
74 self.kind_name_list.append(kind_name)
75
76 def AddKind(self, kind_name, tokens):
77 # type: (str, List[str]) -> None
78 assert isinstance(tokens, list), tokens
79
80 for name in tokens:
81 id_name = '%s_%s' % (kind_name, name)
82 self._AddId(id_name)
83
84 # Must be after adding Id
85 self._AddKind(kind_name)
86 self.kind_sizes.append(len(tokens)) # debug info
87
88 def AddKindPairs(self, kind_name, pairs):
89 # type: (str, List[Tuple[str, str]]) -> None
90 assert isinstance(pairs, list), pairs
91
92 lexer_pairs = []
93 for name, char_pat in pairs:
94 id_name = '%s_%s' % (kind_name, name)
95 id_int = self._AddId(id_name)
96 # After _AddId
97 lexer_pairs.append((False, char_pat, id_int)) # Constant
98
99 self.lexer_pairs[self.kind_index] = lexer_pairs
100
101 # Must be after adding Id
102 self._AddKind(kind_name)
103 self.kind_sizes.append(len(pairs)) # debug info
104
105 def AddBoolKind(
106 self,
107 kind_name, # type: str
108 arg_type_pairs, # type: List[Tuple[bool_arg_type_t, List[Tuple[str, str]]]]
109 ):
110 # type: (...) -> None
111 """
112 Args:
113 kind_name: string
114 arg_type_pairs: dictionary of bool_arg_type_e -> []
115 """
116 lexer_pairs = []
117 num_tokens = 0
118 for arg_type, pairs in arg_type_pairs:
119 #print(arg_type, pairs)
120
121 for name, char_pat in pairs:
122 # BoolUnary_f, BoolBinary_eq, BoolBinary_NEqual
123 id_name = '%s_%s' % (kind_name, name)
124 id_int = self._AddId(id_name)
125 self.AddBoolOp(id_int, arg_type) # register type
126 lexer_pairs.append((False, char_pat, id_int)) # constant
127
128 num_tokens += len(pairs)
129
130 self.lexer_pairs[self.kind_index] = lexer_pairs
131
132 # Must do this after _AddId()
133 self._AddKind(kind_name)
134 self.kind_sizes.append(num_tokens) # debug info
135
136 def AddBoolBinaryForBuiltin(self, id_name, kind):
137 # type: (str, int) -> int
138 """For [ = ] [ == ] and [ != ].
139
140 These operators are NOT added to the lexer. The are "lexed" as
141 word.String.
142 """
143 id_name = 'BoolBinary_%s' % id_name
144 id_int = self._AddId(id_name, kind=kind)
145 self.AddBoolOp(id_int, bool_arg_type_e.Str)
146 return id_int
147
148 def AddBoolOp(self, id_int, arg_type):
149 # type: (int, bool_arg_type_t) -> None
150 """Associate an ID integer with an bool_arg_type_e."""
151 self.bool_ops[id_int] = arg_type
152
153
154def AddKinds(spec):
155 # type: (IdSpec) -> None
156
157 # A compound word, in arith context, boolean context, or command context.
158 # A['foo'] A["foo"] A[$foo] A["$foo"] A[${foo}] A["${foo}"]
159 spec.AddKind('Word', ['Compound'])
160
161 # Token IDs in Kind.Arith are first to make the TDOP precedence table
162 # small.
163 #
164 # NOTE: Could share Op_Pipe, Op_Amp, Op_DAmp, Op_Semi, Op_LParen, etc.
165 # Actually all of Arith could be folded into Op, because we are using
166 # WordParser._ReadArithWord vs. WordParser._ReadWord.
167 spec.AddKindPairs(
168 'Arith',
169 [
170 ('Semi', ';'), # ternary for loop only
171 ('Comma', ','), # function call and C comma operator
172 ('Plus', '+'),
173 ('Minus', '-'),
174 ('Star', '*'),
175 ('Slash', '/'),
176 ('Percent', '%'),
177 ('DPlus', '++'),
178 ('DMinus', '--'),
179 ('DStar', '**'),
180 ('LParen', '('),
181 ('RParen', ')'), # grouping and function call extension
182 ('LBracket', '['),
183 ('RBracket', ']'), # array and assoc array subscript
184 ('RBrace', '}'), # for end of var sub
185
186 # Logical Ops
187 ('QMark', '?'),
188 ('Colon', ':'), # Ternary Op: a < b ? 0 : 1
189 ('LessEqual', '<='),
190 ('Less', '<'),
191 ('GreatEqual', '>='),
192 ('Great', '>'),
193 ('DEqual', '=='),
194 ('NEqual', '!='),
195 # note: these 3 are not in YSH Expr. (Could be used in find dialect.)
196 ('DAmp', '&&'),
197 ('DPipe', '||'),
198 ('Bang', '!'),
199
200 # Bitwise ops
201 ('DGreat', '>>'),
202 ('DLess', '<<'),
203 # YSH: ^ is exponent
204 ('Amp', '&'),
205 ('Pipe', '|'),
206 ('Caret', '^'),
207 ('Tilde', '~'),
208 ('Equal', '='),
209
210 # Augmented Assignment for $(( ))
211 # Must match the list in osh/arith_parse.py
212 # YSH has **= //= like Python
213 ('PlusEqual', '+='),
214 ('MinusEqual', '-='),
215 ('StarEqual', '*='),
216 ('SlashEqual', '/='),
217 ('PercentEqual', '%='),
218 ('DGreatEqual', '>>='),
219 ('DLessEqual', '<<='),
220 ('AmpEqual', '&='),
221 ('CaretEqual', '^='),
222 ('PipeEqual', '|='),
223 ])
224
225 spec.AddKind('Eof', ['Real', 'RParen', 'Backtick'])
226
227 spec.AddKind('Undefined', ['Tok']) # for initial state
228
229 # The Unknown kind is used when we lex something, but it's invalid.
230 # Examples:
231 # ${^}
232 # $'\z' Such bad codes are accepted in OSH, when no_parse_backslash is
233 # off, so we have to lex them.
234 # (x == y) should used === or ~==
235 spec.AddKind('Unknown',
236 ['Tok', 'Backslash', 'DEqual', 'DAmp', 'DPipe', 'DDot'])
237
238 spec.AddKind('Eol', ['Tok']) # no more tokens on line (\0)
239
240 # Ignored_Newline is for J8 lexing to count lines
241 spec.AddKind('Ignored', ['LineCont', 'Space', 'Comment', 'Newline'])
242
243 # Id.WS_Space is for lex_mode_e.ShCommand; Id.Ignored_Space is for
244 # lex_mode_e.Arith
245 spec.AddKind('WS', ['Space'])
246
247 spec.AddKind(
248 'Lit',
249 [
250 'Chars',
251 'CharsWithoutPrefix', # for stripping leading whitespace
252 'VarLike',
253 'ArrayLhsOpen',
254 'ArrayLhsClose',
255 'Splice', # @func(a, b)
256 'AtLBracket', # @[split(x)]
257 'AtLBraceDot', # @{.myproc arg1} should be builtin_sub
258 'Other',
259 'EscapedChar', # \* is escaped
260 'BackslashDoubleQuote', # \"
261 'LBracket',
262 'RBracket', # for assoc array literals, static globs
263 'Star',
264 'QMark',
265 # Either brace expansion or keyword for { and }
266 'LBrace',
267 'RBrace',
268 'Comma',
269 'Equals', # For = f()
270 'Dollar', # detecting 'echo $'
271 'DRightBracket', # the ]] that matches [[, NOT a keyword
272 'Tilde', # tilde expansion
273 'Pound', # for comment or VarOp state
274 'TPound', # for doc comments like ###
275 'TDot', # for multiline commands ...
276 'Slash',
277 'Percent', # / # % for patsub, NOT unary op
278 'Colon', # x=foo:~:~root needs tilde expansion
279 'Digits', # for lex_mode_e.Arith
280 'At', # for ${a[@]} in lex_mode_e.Arith, and detecting @[]
281 'ArithVarLike', # for $((var+1)). Distinct from Lit_VarLike 'var='
282 'BadBackslash', # for "\z", not Id.Unknown_Backslash because it's a
283 # syntax error in YSH, but NOT OSH
284 'CompDummy', # A fake Lit_* token to get partial words during
285 # completion
286 'Number',
287 'RedirVarName' # "{myvar}", as in {myvar}>out.txt (and on its own)
288 ])
289
290 # For recognizing \` and \" and \\ within backticks. There's an extra layer
291 # of backslash quoting.
292 spec.AddKind('Backtick', ['Right', 'Quoted', 'DoubleQuote', 'Other'])
293
294 spec.AddKind('History', ['Op', 'Num', 'Search', 'Other'])
295
296 spec.AddKind(
297 'Op',
298 [
299 'Newline', # mostly equivalent to SEMI
300 'Amp', # &
301 'Pipe', # |
302 'PipeAmp', # |& -- bash extension for stderr
303 'DAmp', # &&
304 'DPipe', # ||
305 'Semi', # ;
306 'DSemi', # ;; for case
307 'SemiAmp', # ;& for case
308 'DSemiAmp', # ;;& for case
309 'LParen', # For subshell. Not Kind.Left because it's NOT a WordPart.
310 'RParen', # Default, will be translated to Id.Right_*
311 'DLeftParen',
312 'DRightParen',
313
314 # for [[ ]] language
315 'Less', # <
316 'Great', # >
317 'Bang', # !
318
319 # YSH [] {}
320 'LBracket',
321 'RBracket',
322 'LBrace',
323 'RBrace',
324 ])
325
326 # YSH expressions use Kind.Expr and Kind.Arith (further below)
327 spec.AddKind(
328 'Expr',
329 [
330 'Reserved', # <- means nothing but it's reserved now
331 'Symbol', # %foo
332 'Name',
333 'DecInt',
334 'BinInt',
335 'OctInt',
336 'HexInt',
337 'Float',
338 'Bang', # eggex !digit, ![a-z]
339 'Dot',
340 'DDotLessThan',
341 'DDotEqual',
342 'Colon', # mylist:pop()
343 'RArrow',
344 'RDArrow',
345 'DSlash', # integer division
346 'TEqual',
347 'NotDEqual',
348 'TildeDEqual', # === !== ~==
349 'At',
350 'DoubleAt', # splice operators
351 'Ellipsis', # for varargs
352 'Dollar', # legacy regex
353 'NotTilde', # !~
354 'DTilde',
355 'NotDTilde', # ~~ !~~
356 'DStarEqual', # **=, which bash doesn't have
357 'DSlashEqual', # //=, which bash doesn't have
358 'CastedDummy', # Used for @() $() (words in lex_mode_e.ShCommand)
359 # and ${} '' "" (and all other strings)
360
361 # Constants
362 'Null',
363 'True',
364 'False',
365
366 # Keywords are resolved after lexing, but otherwise behave like tokens.
367 'And',
368 'Or',
369 'Not',
370
371 # List comprehensions
372 'For',
373 'Is',
374 'In',
375 'If',
376 'Else',
377 'Capture',
378 'As',
379
380 # Unused
381 'Func',
382 'Proc',
383 ])
384
385 # For C-escaped strings.
386 spec.AddKind(
387 'Char',
388 [
389 'OneChar',
390 'Stop',
391 'Hex', # \xff
392 'YHex', # \yff for J8 notation
393
394 # Two variants of Octal: \377, and \0377.
395 'Octal3',
396 'Octal4',
397 'Unicode4',
398 'SurrogatePair', # JSON
399 'Unicode8', # bash
400 'UBraced',
401 'Pound', # YSH
402 'AsciiControl', # \x01-\x1f, what's disallowed in JSON
403 ])
404
405 # For lex_mode_e.BashRegex
406 # Bash treats ( | ) as special, and space is allowed within ()
407 # Note Id.Op_RParen -> Id.Right_BashRegex with lexer hint
408 spec.AddKind('BashRegex', ['LParen', 'AllowedInParens'])
409
410 spec.AddKind(
411 'Eggex',
412 [
413 'Start', # ^ or %start
414 'End', # $ or %end
415 'Dot', # . or dot
416 # Future: %boundary generates \b in Python/Perl, etc.
417 ])
418
419 spec.AddKind(
420 'Redir',
421 [
422 'Less', # < stdin
423 'Great', # > stdout
424 'DLess', # << here doc redirect
425 'TLess', # <<< bash only here string
426 'DGreat', # >> append stdout
427 'GreatAnd', # >& descriptor redirect
428 'LessAnd', # <& descriptor redirect
429 'DLessDash', # <<- here doc redirect for tabs?
430 'LessGreat', # <>
431 'Clobber', # >| POSIX?
432 'AndGreat', # bash &> stdout/stderr to file
433 'AndDGreat', # bash &>> stdout/stderr append to file
434
435 #'GreatPlus', # >+ is append in YSH
436 #'DGreatPlus', # >>+ is append to string in YSH
437 ])
438
439 # NOTE: This is for left/right WORDS only. (( is not a word so it doesn't
440 # get that.
441 spec.AddKind(
442 'Left',
443 [
444 'DoubleQuote',
445 'JDoubleQuote', # j" for J8 notation
446 'SingleQuote', # ''
447 'DollarSingleQuote', # $'' for \n escapes
448 'RSingleQuote', # r''
449 'USingleQuote', # u''
450 'BSingleQuote', # b''
451
452 # Multiline versions
453 'TDoubleQuote', # """ """
454 'DollarTDoubleQuote', # $""" """
455 'TSingleQuote', # ''' '''
456 'RTSingleQuote', # r''' '''
457 'UTSingleQuote', # u''' '''
458 'BTSingleQuote', # b''' '''
459 'Backtick', # `
460 'DollarParen', # $(
461 'DollarBrace', # ${
462 'DollarBraceZsh', # ${(foo)
463 'DollarDParen', # $((
464 'DollarBracket', # $[ - synonym for $(( in bash and zsh
465 'AtBracket', # @[expr] array splice in expression mode
466 'DollarDoubleQuote', # $" for bash localized strings
467 'ProcSubIn', # <( )
468 'ProcSubOut', # >( )
469 'AtParen', # @( for split command sub
470 'CaretParen', # ^( for Block literal in expression mode
471 'CaretBracket', # ^[ for Expr literal
472 'CaretBrace', # ^{ for Arglist
473 'CaretDoubleQuote', # ^" for Template
474 'ColonPipe', # :| for word arrays
475 'PercentParen', # legacy %( for word arrays
476 ])
477
478 spec.AddKind(
479 'Right',
480 [
481 'DoubleQuote',
482 'SingleQuote',
483 'Backtick', # `
484 'DollarBrace', # }
485 'DollarDParen', # )) -- really the second one is a PushHint()
486 # ArithSub2 is just Id.Arith_RBracket
487 'DollarDoubleQuote', # "
488 'DollarSingleQuote', # '
489
490 # Disambiguated right parens
491 'Subshell', # )
492 'ShFunction', # )
493 'CasePat', # )
494 'Initializer', # )
495 'ExtGlob', # )
496 'BashRegexGroup', # )
497 'BlockLiteral', # } that matches &{ echo hi }
498 ])
499
500 spec.AddKind('ExtGlob', ['Comma', 'At', 'Star', 'Plus', 'QMark', 'Bang'])
501
502 # First position of var sub ${
503 # Id.VOp2_Pound -- however you can't tell the difference at first! It could
504 # be an op or a name. So it makes sense to base i on the state.
505 # Id.VOp2_At
506 # But then you have AS_STAR, or Id.Arith_Star maybe
507
508 spec.AddKind(
509 'VSub',
510 [
511 'DollarName', # $foo
512 'Name', # 'foo' in ${foo}
513 'Number', # $0 .. $9
514 'Bang', # $!
515 'At', # $@ or [@] for array subscripting
516 'Pound', # $# or ${#var} for length
517 'Dollar', # $$
518 'Star', # $*
519 'Hyphen', # $-
520 'QMark', # $?
521 'Dot', # ${.myproc builtin sub}
522 ])
523
524 spec.AddKindPairs('VTest', [
525 ('ColonHyphen', ':-'),
526 ('Hyphen', '-'),
527 ('ColonEquals', ':='),
528 ('Equals', '='),
529 ('ColonQMark', ':?'),
530 ('QMark', '?'),
531 ('ColonPlus', ':+'),
532 ('Plus', '+'),
533 ])
534
535 # Statically parse @P, so @x etc. is an error.
536 spec.AddKindPairs(
537 'VOp0',
538 [
539 ('Q', '@Q'), # ${x@Q} for quoting
540 ('E', '@E'),
541 ('P', '@P'), # ${PS1@P} for prompt eval
542 ('A', '@A'),
543 ('a', '@a'),
544 ])
545
546 # String removal ops
547 spec.AddKindPairs(
548 'VOp1',
549 [
550 ('Percent', '%'),
551 ('DPercent', '%%'),
552 ('Pound', '#'),
553 ('DPound', '##'),
554 # Case ops, in bash. At least parse them. Execution might require
555 # unicode stuff.
556 ('Caret', '^'),
557 ('DCaret', '^^'),
558 ('Comma', ','),
559 ('DComma', ',,'),
560 ])
561
562 spec.AddKindPairs(
563 'VOpYsh',
564 [
565 ('Pipe', '|'), # ${x|html}
566 ('Space', ' '), # ${x %.3f}
567 ])
568
569 # Not in POSIX, but in Bash
570 spec.AddKindPairs(
571 'VOp2',
572 [
573 ('Slash', '/'), # / for replacement
574 ('Colon', ':'), # : for slicing
575 ('LBracket', '['), # [ for indexing
576 ('RBracket', ']'), # ] for indexing
577 ])
578
579 # Can only occur after ${!prefix@}
580 spec.AddKindPairs('VOp3', [
581 ('At', '@'),
582 ('Star', '*'),
583 ])
584
585 # This kind is for Node types that are NOT tokens.
586 spec.AddKind(
587 'Node',
588 [
589 # Arithmetic nodes
590 'PostDPlus',
591 'PostDMinus', # Postfix inc/dec.
592 # Prefix inc/dec use Arith_DPlus/Arith_DMinus.
593 'UnaryPlus',
594 'UnaryMinus', # +1 and -1, to distinguish from infix.
595 # Actually we don't need this because we they
596 # will be under Expr1/Plus vs Expr2/Plus.
597 'NotIn',
598 'IsNot', # For YSH comparisons
599 ])
600
601 # NOTE: Not doing AddKindPairs() here because oil will have a different set
602 # of keywords. It will probably have for/in/while/until/case/if/else/elif,
603 # and then func/proc.
604 spec.AddKind(
605 'KW',
606 [
607 'DLeftBracket',
608 'Bang',
609 'For',
610 'While',
611 'Until',
612 'Do',
613 'Done',
614 'In',
615 'Case',
616 'Esac',
617 'If',
618 'Fi',
619 'Then',
620 'Else',
621 'Elif',
622 'Function',
623 'Time',
624
625 # YSH keywords.
626 'Const',
627 'Var',
628 'SetVar',
629 'SetGlobal',
630 # later: Auto?
631 'Call',
632 'Proc',
633 'Typed',
634 'Func',
635
636 # builtins, NOT keywords: use, fork, wait, etc.
637 # Things that don't affect parsing shouldn't be keywords.
638 ])
639
640 # Unlike bash, we parse control flow statically. They're not
641 # dynamically-resolved builtins.
642 spec.AddKind('ControlFlow', ['Break', 'Continue', 'Return', 'Exit'])
643
644 # Special Kind for lookahead in the lexer. It's never seen by anything else.
645 spec.AddKind('LookAhead', ['FuncParens'])
646
647 # For parsing globs and converting them to regexes.
648 spec.AddKind('Glob', [
649 'LBracket',
650 'RBracket',
651 'Star',
652 'QMark',
653 'Bang',
654 'Caret',
655 'EscapedChar',
656 'BadBackslash',
657 'CleanLiterals',
658 'OtherLiteral',
659 ])
660
661 # For C-escaped strings.
662 spec.AddKind(
663 'Format',
664 [
665 'EscapedPercent',
666 'Percent', # starts another lexer mode
667 'Flag',
668 'Num',
669 'Dot',
670 'Type',
671 'Star',
672 'Time',
673 'Zero',
674 ])
675
676 # For parsing prompt strings like PS1.
677 spec.AddKind('PS', [
678 'Subst',
679 'Octal3',
680 'LBrace',
681 'RBrace',
682 'Literals',
683 'BadBackslash',
684 ])
685
686 spec.AddKind('Range', ['Int', 'Char', 'Dots', 'Other'])
687
688 spec.AddKind(
689 'J8',
690 [
691 'LBracket',
692 'RBracket',
693 'LBrace',
694 'RBrace',
695 'Comma',
696 'Colon',
697 'Null',
698 'Bool',
699 'Int', # Number
700 'Float', # Number
701
702 # High level tokens for "" b'' u''
703 # We don't distinguish them in the parser, because we recognize
704 # strings in the lexer.
705 'String',
706
707 # JSON8 and NIL8
708 'Identifier',
709 'Newline', # J8 Lines only, similar to Op_Newline
710 'Tab', # Reserved for TSV8
711
712 # NIL8 only
713 'LParen',
714 'RParen',
715 #'Symbol',
716 'Operator',
717 ])
718
719 spec.AddKind('ShNumber', ['Dec', 'Hex', 'Oct', 'BaseN'])
720
721
722# Shared between [[ and test/[.
723_UNARY_STR_CHARS = 'zn' # -z -n
724_UNARY_OTHER_CHARS = 'otvR' # -o is overloaded
725_UNARY_PATH_CHARS = 'abcdefghkLprsSuwxOGN' # -a is overloaded
726
727_BINARY_PATH = ['ef', 'nt', 'ot']
728_BINARY_INT = ['eq', 'ne', 'gt', 'ge', 'lt', 'le']
729
730
731def _Dash(strs):
732 # type: (List[str]) -> List[Tuple[str, str]]
733 # Gives a pair of (token name, string to match)
734 return [(s, '-' + s) for s in strs]
735
736
737def AddBoolKinds(spec):
738 # type: (IdSpec) -> None
739 spec.AddBoolKind('BoolUnary', [
740 (bool_arg_type_e.Str, _Dash(list(_UNARY_STR_CHARS))),
741 (bool_arg_type_e.Other, _Dash(list(_UNARY_OTHER_CHARS))),
742 (bool_arg_type_e.Path, _Dash(list(_UNARY_PATH_CHARS))),
743 ])
744
745 Id = spec.id_str2int
746
747 # test --true and test --false have no single letter flags. They need no
748 # lexing.
749 for long_flag in ('true', 'false'):
750 id_name = 'BoolUnary_%s' % long_flag
751 spec._AddId(id_name)
752 spec.AddBoolOp(Id[id_name], bool_arg_type_e.Str)
753
754 spec.AddBoolKind('BoolBinary', [
755 (bool_arg_type_e.Str, [
756 ('GlobEqual', '='),
757 ('GlobDEqual', '=='),
758 ('GlobNEqual', '!='),
759 ('EqualTilde', '=~'),
760 ]),
761 (bool_arg_type_e.Path, _Dash(_BINARY_PATH)),
762 (bool_arg_type_e.Int, _Dash(_BINARY_INT)),
763 ])
764
765 # logical, arity, arg_type
766 spec.AddBoolOp(Id['Op_DAmp'], bool_arg_type_e.Undefined)
767 spec.AddBoolOp(Id['Op_DPipe'], bool_arg_type_e.Undefined)
768 spec.AddBoolOp(Id['KW_Bang'], bool_arg_type_e.Undefined)
769
770 spec.AddBoolOp(Id['Op_Less'], bool_arg_type_e.Str)
771 spec.AddBoolOp(Id['Op_Great'], bool_arg_type_e.Str)
772
773
774def SetupTestBuiltin(
775 id_spec, # type: IdSpec
776 unary_lookup, # type: Dict[str, int]
777 binary_lookup, # type: Dict[str, int]
778 other_lookup, # type: Dict[str, int]
779):
780 # type: (...) -> None
781 """Setup tokens for test/[.
782
783 Similar to _AddBoolKinds above. Differences:
784 - =~ doesn't exist
785 - && -> -a, || -> -o
786 - ( ) -> Op_LParen (they don't appear above)
787 """
788 Id = id_spec.id_str2int
789 Kind = id_spec.kind_str2int
790
791 for letter in _UNARY_STR_CHARS + _UNARY_OTHER_CHARS + _UNARY_PATH_CHARS:
792 id_name = 'BoolUnary_%s' % letter
793 unary_lookup['-' + letter] = Id[id_name]
794
795 for s in _BINARY_PATH + _BINARY_INT:
796 id_name = 'BoolBinary_%s' % s
797 binary_lookup['-' + s] = Id[id_name]
798
799 # Like the [[ definition above, but without globbing and without =~ .
800
801 for id_name, token_str in [('Equal', '='), ('DEqual', '=='),
802 ('NEqual', '!=')]:
803 id_int = id_spec.AddBoolBinaryForBuiltin(id_name, Kind['BoolBinary'])
804
805 binary_lookup[token_str] = id_int
806
807 # Some of these names don't quite match, but it keeps the BoolParser simple.
808 binary_lookup['<'] = Id['Op_Less']
809 binary_lookup['>'] = Id['Op_Great']
810
811 # NOTE: -a and -o overloaded as unary prefix operators BoolUnary_a and
812 # BoolUnary_o. The parser rather than the tokenizer handles this.
813 other_lookup['!'] = Id['KW_Bang'] # like [[ !
814 other_lookup['('] = Id['Op_LParen']
815 other_lookup[')'] = Id['Op_RParen']
816
817 other_lookup[']'] = Id['Arith_RBracket'] # For closing ]