frontend/id_kind

OILS / frontend / id_kind_def.py View on Github | oilshell.org

809 lines, 549 significant

1	#!/usr/bin/env python2
2	# Copyright 2016 Andy Chu. All rights reserved.
3	# Licensed under the Apache License, Version 2.0 (the "License");
4	# you may not use this file except in compliance with the License.
5	# You may obtain a copy of the License at
6	#
7	# http://www.apache.org/licenses/LICENSE-2.0
8	"""
9	id_kind_def.py - Id and Kind definitions, stored in Token
10
11	NOTE: If this file changes, rebuild it with build/py.sh all
12	"""
13	from __future__ import print_function
14
15	from _devbuild.gen.types_asdl import (bool_arg_type_e, bool_arg_type_t)
16	#from mycpp.mylib import log
17
18	from typing import List, Tuple, Dict, Optional, TYPE_CHECKING
19	if TYPE_CHECKING: # avoid circular build deps
20	from _devbuild.gen.id_kind_asdl import Id_t, Kind_t
21
22
23	class IdSpec(object):
24	"""Identifiers that form the "spine" of the shell program
25	representation."""
26
27	def __init__(self, kind_lookup, bool_ops):
28	# type: (Dict[int, int], Dict[int, bool_arg_type_t]) -> None
29	self.id_str2int = {} # type: Dict[str, int]
30	self.kind_str2int = {} # type: Dict[str, int]
31
32	self.kind_lookup = kind_lookup # Id int -> Kind int
33	self.kind_name_list = [] # type: List[str]
34	self.kind_sizes = [] # type: List[int] # optional stats
35
36	self.lexer_pairs = {} # type: Dict[int, List[Tuple[bool, str, int]]]
37	self.bool_ops = bool_ops # type: Dict[int, bool_arg_type_t]
38
39	# Incremented on each method call
40	# IMPORTANT: 1-based indices match what asdl/gen_python.py does!!!
41	self.id_index = 1
42	self.kind_index = 1
43
44	def LexerPairs(self, kind):
45	# type: (Kind_t) -> List[Tuple[bool, str, Id_t]]
46	result = []
47	for is_regex, pat, id_ in self.lexer_pairs[kind]:
48	result.append((is_regex, pat, id_))
49	return result
50
51	def _AddId(self, id_name, kind=None):
52	# type: (str, Optional[int]) -> int
53	"""
54	Args:
55	id_name: e.g. BoolBinary_Equal
56	kind: override autoassignment. For AddBoolBinaryForBuiltin
57	"""
58	t = self.id_index
59
60	self.id_str2int[id_name] = t
61
62	if kind is None:
63	kind = self.kind_index
64	self.kind_lookup[t] = kind
65
66	self.id_index += 1 # mutate last
67	return t # the index we used
68
69	def _AddKind(self, kind_name):
70	# type: (str) -> None
71	self.kind_str2int[kind_name] = self.kind_index
72	#log('%s = %d', kind_name, self.kind_index)
73	self.kind_index += 1
74	self.kind_name_list.append(kind_name)
75
76	def AddKind(self, kind_name, tokens):
77	# type: (str, List[str]) -> None
78	assert isinstance(tokens, list), tokens
79
80	for name in tokens:
81	id_name = '%s_%s' % (kind_name, name)
82	self._AddId(id_name)
83
84	# Must be after adding Id
85	self._AddKind(kind_name)
86	self.kind_sizes.append(len(tokens)) # debug info
87
88	def AddKindPairs(self, kind_name, pairs):
89	# type: (str, List[Tuple[str, str]]) -> None
90	assert isinstance(pairs, list), pairs
91
92	lexer_pairs = []
93	for name, char_pat in pairs:
94	id_name = '%s_%s' % (kind_name, name)
95	id_int = self._AddId(id_name)
96	# After _AddId
97	lexer_pairs.append((False, char_pat, id_int)) # Constant
98
99	self.lexer_pairs[self.kind_index] = lexer_pairs
100
101	# Must be after adding Id
102	self._AddKind(kind_name)
103	self.kind_sizes.append(len(pairs)) # debug info
104
105	def AddBoolKind(
106	self,
107	kind_name, # type: str
108	arg_type_pairs, # type: List[Tuple[bool_arg_type_t, List[Tuple[str, str]]]]
109	):
110	# type: (...) -> None
111	"""
112	Args:
113	kind_name: string
114	arg_type_pairs: dictionary of bool_arg_type_e -> []
115	"""
116	lexer_pairs = []
117	num_tokens = 0
118	for arg_type, pairs in arg_type_pairs:
119	#print(arg_type, pairs)
120
121	for name, char_pat in pairs:
122	# BoolUnary_f, BoolBinary_eq, BoolBinary_NEqual
123	id_name = '%s_%s' % (kind_name, name)
124	id_int = self._AddId(id_name)
125	self.AddBoolOp(id_int, arg_type) # register type
126	lexer_pairs.append((False, char_pat, id_int)) # constant
127
128	num_tokens += len(pairs)
129
130	self.lexer_pairs[self.kind_index] = lexer_pairs
131
132	# Must do this after _AddId()
133	self._AddKind(kind_name)
134	self.kind_sizes.append(num_tokens) # debug info
135
136	def AddBoolBinaryForBuiltin(self, id_name, kind):
137	# type: (str, int) -> int
138	"""For [ = ] [ == ] and [ != ].
139
140	These operators are NOT added to the lexer. The are "lexed" as
141	word.String.
142	"""
143	id_name = 'BoolBinary_%s' % id_name
144	id_int = self._AddId(id_name, kind=kind)
145	self.AddBoolOp(id_int, bool_arg_type_e.Str)
146	return id_int
147
148	def AddBoolOp(self, id_int, arg_type):
149	# type: (int, bool_arg_type_t) -> None
150	"""Associate an ID integer with an bool_arg_type_e."""
151	self.bool_ops[id_int] = arg_type
152
153
154	def AddKinds(spec):
155	# type: (IdSpec) -> None
156
157	# A compound word, in arith context, boolean context, or command context.
158	# A['foo'] A["foo"] A[$foo] A["$foo"] A[${foo}] A["${foo}"]
159	spec.AddKind('Word', ['Compound'])
160
161	# Token IDs in Kind.Arith are first to make the TDOP precedence table
162	# small.
163	#
164	# NOTE: Could share Op_Pipe, Op_Amp, Op_DAmp, Op_Semi, Op_LParen, etc.
165	# Actually all of Arith could be folded into Op, because we are using
166	# WordParser._ReadArithWord vs. WordParser._ReadWord.
167	spec.AddKindPairs(
168	'Arith',
169	[
170	('Semi', ';'), # ternary for loop only
171	('Comma', ','), # function call and C comma operator
172	('Plus', '+'),
173	('Minus', '-'),
174	('Star', '*'),
175	('Slash', '/'),
176	('Percent', '%'),
177	('DPlus', '++'),
178	('DMinus', '--'),
179	('DStar', '**'),
180	('LParen', '('),
181	('RParen', ')'), # grouping and function call extension
182	('LBracket', '['),
183	('RBracket', ']'), # array and assoc array subscript
184	('RBrace', '}'), # for end of var sub
185
186	# Logical Ops
187	('QMark', '?'),
188	('Colon', ':'), # Ternary Op: a < b ? 0 : 1
189	('LessEqual', '<='),
190	('Less', '<'),
191	('GreatEqual', '>='),
192	('Great', '>'),
193	('DEqual', '=='),
194	('NEqual', '!='),
195	# note: these 3 are not in YSH Expr. (Could be used in find dialect.)
196	('DAmp', '&&'),
197	('DPipe', '\|\|'),
198	('Bang', '!'),
199
200	# Bitwise ops
201	('DGreat', '>>'),
202	('DLess', '<<'),
203	# YSH: ^ is exponent
204	('Amp', '&'),
205	('Pipe', '\|'),
206	('Caret', '^'),
207	('Tilde', '~'),
208	('Equal', '='),
209
210	# Augmented Assignment for $(( ))
211	# Must match the list in osh/arith_parse.py
212	# YSH has **= //= like Python
213	('PlusEqual', '+='),
214	('MinusEqual', '-='),
215	('StarEqual', '*='),
216	('SlashEqual', '/='),
217	('PercentEqual', '%='),
218	('DGreatEqual', '>>='),
219	('DLessEqual', '<<='),
220	('AmpEqual', '&='),
221	('CaretEqual', '^='),
222	('PipeEqual', '\|='),
223	])
224
225	spec.AddKind('Eof', ['Real', 'RParen', 'Backtick'])
226
227	spec.AddKind('Undefined', ['Tok']) # for initial state
228
229	# The Unknown kind is used when we lex something, but it's invalid.
230	# Examples:
231	# ${^}
232	# $'\z' Such bad codes are accepted when parse_backslash is on
233	# (default in OSH), so we have to lex them.
234	# (x == y) should used === or ~==
235	spec.AddKind('Unknown', ['Tok', 'Backslash', 'DEqual', 'DAmp', 'DPipe', 'DDot'])
236
237	spec.AddKind('Eol', ['Tok']) # no more tokens on line (\0)
238
239	# Ignored_Newline is for J8 lexing to count lines
240	spec.AddKind('Ignored', ['LineCont', 'Space', 'Comment', 'Newline'])
241
242	# Id.WS_Space is for lex_mode_e.ShCommand; Id.Ignored_Space is for
243	# lex_mode_e.Arith
244	spec.AddKind('WS', ['Space'])
245
246	spec.AddKind(
247	'Lit',
248	[
249	'Chars',
250	'CharsWithoutPrefix', # for stripping leading whitespace
251	'VarLike',
252	'ArrayLhsOpen',
253	'ArrayLhsClose',
254	'Splice', # @func(a, b)
255	'AtLBracket', # @[split(x)]
256	'AtLBraceDot', # @{.myproc arg1} should be builtin_sub
257	'Other',
258	'EscapedChar', # \* is escaped
259	'LBracket',
260	'RBracket', # for assoc array literals, static globs
261	'Star',
262	'QMark',
263	# Either brace expansion or keyword for { and }
264	'LBrace',
265	'RBrace',
266	'Comma',
267	'Equals', # For = f()
268	'Dollar', # detecting 'echo $'
269	'DRightBracket', # the ]] that matches [[, NOT a keyword
270	'Tilde', # tilde expansion
271	'Pound', # for comment or VarOp state
272	'TPound', # for doc comments like ###
273	'TDot', # for multiline commands ...
274	'Slash',
275	'Percent', # / # % for patsub, NOT unary op
276	'Colon', # x=foo:~:~root needs tilde expansion
277	'Digits', # for lex_mode_e.Arith
278	'At', # for ${a[@]} in lex_mode_e.Arith, and detecting @[]
279	'ArithVarLike', # for $((var+1)). Distinct from Lit_VarLike 'var='
280	'BadBackslash', # for "\z", not Id.Unknown_Backslash because it's a
281	# syntax error in YSH, but NOT OSH
282	'CompDummy', # A fake Lit_* token to get partial words during
283	# completion
284	])
285
286	# For recognizing \` and \" and \\ within backticks. There's an extra layer
287	# of backslash quoting.
288	spec.AddKind('Backtick', ['Right', 'Quoted', 'DoubleQuote', 'Other'])
289
290	spec.AddKind('History', ['Op', 'Num', 'Search', 'Other'])
291
292	spec.AddKind(
293	'Op',
294	[
295	'Newline', # mostly equivalent to SEMI
296	'Amp', # &
297	'Pipe', # \|
298	'PipeAmp', # \|& -- bash extension for stderr
299	'DAmp', # &&
300	'DPipe', # \|\|
301	'Semi', # ;
302	'DSemi', # ;; for case
303	'SemiAmp', # ;& for case
304	'DSemiAmp', # ;;& for case
305	'LParen', # For subshell. Not Kind.Left because it's NOT a WordPart.
306	'RParen', # Default, will be translated to Id.Right_*
307	'DLeftParen',
308	'DRightParen',
309
310	# for [[ ]] language
311	'Less', # <
312	'Great', # >
313	'Bang', # !
314
315	# YSH [] {}
316	'LBracket',
317	'RBracket',
318	'LBrace',
319	'RBrace',
320	])
321
322	# YSH expressions use Kind.Expr and Kind.Arith (further below)
323	spec.AddKind(
324	'Expr',
325	[
326	'Reserved', # <- means nothing but it's reserved now
327	'Symbol', # %foo
328	'Name',
329	'DecInt',
330	'BinInt',
331	'OctInt',
332	'HexInt',
333	'Float',
334	'Bang', # eggex !digit, ![a-z]
335	'Dot',
336	'DDotLessThan',
337	'DDotEqual',
338	'Colon', # mylist:pop()
339	'RArrow',
340	'RDArrow',
341	'DSlash', # integer division
342	'TEqual',
343	'NotDEqual',
344	'TildeDEqual', # === !== ~==
345	'At',
346	'DoubleAt', # splice operators
347	'Ellipsis', # for varargs
348	'Dollar', # legacy regex
349	'NotTilde', # !~
350	'DTilde',
351	'NotDTilde', # ~~ !~~
352	'DStarEqual', # **=, which bash doesn't have
353	'DSlashEqual', # //=, which bash doesn't have
354	'CastedDummy', # Used for @() $() (words in lex_mode_e.ShCommand)
355	# and ${} '' "" (and all other strings)
356
357	# Constants
358	'Null',
359	'True',
360	'False',
361
362	# Keywords are resolved after lexing, but otherwise behave like tokens.
363	'And',
364	'Or',
365	'Not',
366
367	# List comprehensions
368	'For',
369	'Is',
370	'In',
371	'If',
372	'Else',
373	'Func', # For function literals
374	'Capture',
375	'As',
376	])
377
378	# For C-escaped strings.
379	spec.AddKind(
380	'Char',
381	[
382	'OneChar',
383	'Stop',
384	'Hex', # \xff
385	'YHex', # \yff for J8 notation
386
387	# Two variants of Octal: \377, and \0377.
388	'Octal3',
389	'Octal4',
390	'Unicode4',
391	'SurrogatePair', # JSON
392	'Unicode8', # bash
393	'UBraced',
394	'Pound', # YSH
395	'AsciiControl', # \x01-\x1f, what's disallowed in JSON
396	])
397
398	# For lex_mode_e.BashRegex
399	# Bash treats ( \| ) as special, and space is allowed within ()
400	# Note Id.Op_RParen -> Id.Right_BashRegex with lexer hint
401	spec.AddKind('BashRegex', ['LParen', 'AllowedInParens'])
402
403	spec.AddKind(
404	'Eggex',
405	[
406	'Start', # ^ or %start
407	'End', # $ or %end
408	'Dot', # . or dot
409	# Future: %boundary generates \b in Python/Perl, etc.
410	])
411
412	spec.AddKind(
413	'Redir',
414	[
415	'Less', # < stdin
416	'Great', # > stdout
417	'DLess', # << here doc redirect
418	'TLess', # <<< bash only here string
419	'DGreat', # >> append stdout
420	'GreatAnd', # >& descriptor redirect
421	'LessAnd', # <& descriptor redirect
422	'DLessDash', # <<- here doc redirect for tabs?
423	'LessGreat', # <>
424	'Clobber', # >\| POSIX?
425	'AndGreat', # bash &> stdout/stderr to file
426	'AndDGreat', # bash &>> stdout/stderr append to file
427
428	#'GreatPlus', # >+ is append in YSH
429	#'DGreatPlus', # >>+ is append to string in YSH
430	])
431
432	# NOTE: This is for left/right WORDS only. (( is not a word so it doesn't
433	# get that.
434	spec.AddKind(
435	'Left',
436	[
437	'DoubleQuote',
438	'JDoubleQuote', # j" for J8 notation
439	'SingleQuote', # ''
440	'DollarSingleQuote', # $'' for \n escapes
441	'RSingleQuote', # r''
442	'USingleQuote', # u''
443	'BSingleQuote', # b''
444
445	# Multiline versions
446	'TDoubleQuote', # """ """
447	'DollarTDoubleQuote', # $""" """
448	'TSingleQuote', # ''' '''
449	'RTSingleQuote', # r''' '''
450	'UTSingleQuote', # u''' '''
451	'BTSingleQuote', # b''' '''
452	'Backtick', # `
453	'DollarParen', # $(
454	'DollarBrace', # ${
455	'DollarBraceZsh', # ${(foo)
456	'DollarDParen', # $((
457	'DollarBracket', # $[ - synonym for $(( in bash and zsh
458	'DollarDoubleQuote', # $" for bash localized strings
459	'ProcSubIn', # <( )
460	'ProcSubOut', # >( )
461	'AtParen', # @( for split command sub
462	'CaretParen', # ^( for Block literal in expression mode
463	'CaretBracket', # ^[ for Expr literal
464	'CaretBrace', # ^{ for Arglist
465	'CaretDoubleQuote', # ^" for Template
466	'ColonPipe', # :\| for word arrays
467	'PercentParen', # legacy %( for word arrays
468	])
469
470	spec.AddKind(
471	'Right',
472	[
473	'DoubleQuote',
474	'SingleQuote',
475	'Backtick', # `
476	'DollarBrace', # }
477	'DollarDParen', # )) -- really the second one is a PushHint()
478	# ArithSub2 is just Id.Arith_RBracket
479	'DollarDoubleQuote', # "
480	'DollarSingleQuote', # '
481
482	# Disambiguated right parens
483	'Subshell', # )
484	'ShFunction', # )
485	'CasePat', # )
486	'ShArrayLiteral', # )
487	'ExtGlob', # )
488	'BashRegexGroup', # )
489	'BlockLiteral', # } that matches &{ echo hi }
490	])
491
492	spec.AddKind('ExtGlob', ['Comma', 'At', 'Star', 'Plus', 'QMark', 'Bang'])
493
494	# First position of var sub ${
495	# Id.VOp2_Pound -- however you can't tell the difference at first! It could
496	# be an op or a name. So it makes sense to base i on the state.
497	# Id.VOp2_At
498	# But then you have AS_STAR, or Id.Arith_Star maybe
499
500	spec.AddKind(
501	'VSub',
502	[
503	'DollarName', # $foo
504	'Name', # 'foo' in ${foo}
505	'Number', # $0 .. $9
506	'Bang', # $!
507	'At', # $@ or [@] for array subscripting
508	'Pound', # $# or ${#var} for length
509	'Dollar', # $$
510	'Star', # $*
511	'Hyphen', # $-
512	'QMark', # $?
513	'Dot', # ${.myproc builtin sub}
514	])
515
516	spec.AddKindPairs('VTest', [
517	('ColonHyphen', ':-'),
518	('Hyphen', '-'),
519	('ColonEquals', ':='),
520	('Equals', '='),
521	('ColonQMark', ':?'),
522	('QMark', '?'),
523	('ColonPlus', ':+'),
524	('Plus', '+'),
525	])
526
527	# Statically parse @P, so @x etc. is an error.
528	spec.AddKindPairs(
529	'VOp0',
530	[
531	('Q', '@Q'), # ${x@Q} for quoting
532	('E', '@E'),
533	('P', '@P'), # ${PS1@P} for prompt eval
534	('A', '@A'),
535	('a', '@a'),
536	])
537
538	# String removal ops
539	spec.AddKindPairs(
540	'VOp1',
541	[
542	('Percent', '%'),
543	('DPercent', '%%'),
544	('Pound', '#'),
545	('DPound', '##'),
546	# Case ops, in bash. At least parse them. Execution might require
547	# unicode stuff.
548	('Caret', '^'),
549	('DCaret', '^^'),
550	('Comma', ','),
551	('DComma', ',,'),
552	])
553
554	spec.AddKindPairs(
555	'VOpYsh',
556	[
557	('Pipe', '\|'), # ${x\|html}
558	('Space', ' '), # ${x %.3f}
559	])
560
561	# Not in POSIX, but in Bash
562	spec.AddKindPairs(
563	'VOp2',
564	[
565	('Slash', '/'), # / for replacement
566	('Colon', ':'), # : for slicing
567	('LBracket', '['), # [ for indexing
568	('RBracket', ']'), # ] for indexing
569	])
570
571	# Can only occur after ${!prefix@}
572	spec.AddKindPairs('VOp3', [
573	('At', '@'),
574	('Star', '*'),
575	])
576
577	# This kind is for Node types that are NOT tokens.
578	spec.AddKind(
579	'Node',
580	[
581	# Arithmetic nodes
582	'PostDPlus',
583	'PostDMinus', # Postfix inc/dec.
584	# Prefix inc/dec use Arith_DPlus/Arith_DMinus.
585	'UnaryPlus',
586	'UnaryMinus', # +1 and -1, to distinguish from infix.
587	# Actually we don't need this because we they
588	# will be under Expr1/Plus vs Expr2/Plus.
589	'NotIn',
590	'IsNot', # For YSH comparisons
591	])
592
593	# NOTE: Not doing AddKindPairs() here because oil will have a different set
594	# of keywords. It will probably have for/in/while/until/case/if/else/elif,
595	# and then func/proc.
596	spec.AddKind(
597	'KW',
598	[
599	'DLeftBracket',
600	'Bang',
601	'For',
602	'While',
603	'Until',
604	'Do',
605	'Done',
606	'In',
607	'Case',
608	'Esac',
609	'If',
610	'Fi',
611	'Then',
612	'Else',
613	'Elif',
614	'Function',
615	'Time',
616
617	# YSH keywords.
618	'Const',
619	'Var',
620	'SetVar',
621	'SetGlobal',
622	# later: Auto?
623	'Call',
624	'Proc',
625	'Typed',
626	'Func',
627
628	# builtins, NOT keywords: use, fork, wait, etc.
629	# Things that don't affect parsing shouldn't be keywords.
630	])
631
632	# Unlike bash, we parse control flow statically. They're not
633	# dynamically-resolved builtins.
634	spec.AddKind('ControlFlow', ['Break', 'Continue', 'Return', 'Exit'])
635
636	# Special Kind for lookahead in the lexer. It's never seen by anything else.
637	spec.AddKind('LookAhead', ['FuncParens'])
638
639	# For parsing globs and converting them to regexes.
640	spec.AddKind('Glob', [
641	'LBracket',
642	'RBracket',
643	'Star',
644	'QMark',
645	'Bang',
646	'Caret',
647	'EscapedChar',
648	'BadBackslash',
649	'CleanLiterals',
650	'OtherLiteral',
651	])
652
653	# For C-escaped strings.
654	spec.AddKind(
655	'Format',
656	[
657	'EscapedPercent',
658	'Percent', # starts another lexer mode
659	'Flag',
660	'Num',
661	'Dot',
662	'Type',
663	'Star',
664	'Time',
665	'Zero',
666	])
667
668	# For parsing prompt strings like PS1.
669	spec.AddKind('PS', [
670	'Subst',
671	'Octal3',
672	'LBrace',
673	'RBrace',
674	'Literals',
675	'BadBackslash',
676	])
677
678	spec.AddKind('Range', ['Int', 'Char', 'Dots', 'Other'])
679
680	spec.AddKind(
681	'J8',
682	[
683	'LBracket',
684	'RBracket',
685	'LBrace',
686	'RBrace',
687	'Comma',
688	'Colon',
689	'Null',
690	'Bool',
691	'Int', # Number
692	'Float', # Number
693
694	# High level tokens for "" b'' u''
695	# We don't distinguish them in the parser, because we recognize
696	# strings in the lexer.
697	'String',
698
699	# JSON8 and NIL8
700	'Identifier',
701	'Newline', # J8 Lines only, similar to Op_Newline
702	'Tab', # Reserved for TSV8
703
704	# NIL8 only
705	'LParen',
706	'RParen',
707	#'Symbol',
708	'Operator',
709	])
710
711	spec.AddKind('ShNumber', ['Dec', 'Hex', 'Oct', 'BaseN'])
712
713
714	# Shared between [[ and test/[.
715	_UNARY_STR_CHARS = 'zn' # -z -n
716	_UNARY_OTHER_CHARS = 'otvR' # -o is overloaded
717	_UNARY_PATH_CHARS = 'abcdefghkLprsSuwxOGN' # -a is overloaded
718
719	_BINARY_PATH = ['ef', 'nt', 'ot']
720	_BINARY_INT = ['eq', 'ne', 'gt', 'ge', 'lt', 'le']
721
722
723	def _Dash(strs):
724	# type: (List[str]) -> List[Tuple[str, str]]
725	# Gives a pair of (token name, string to match)
726	return [(s, '-' + s) for s in strs]
727
728
729	def AddBoolKinds(spec):
730	# type: (IdSpec) -> None
731	spec.AddBoolKind('BoolUnary', [
732	(bool_arg_type_e.Str, _Dash(list(_UNARY_STR_CHARS))),
733	(bool_arg_type_e.Other, _Dash(list(_UNARY_OTHER_CHARS))),
734	(bool_arg_type_e.Path, _Dash(list(_UNARY_PATH_CHARS))),
735	])
736
737	Id = spec.id_str2int
738
739	# test --true and test --false have no single letter flags. They need no
740	# lexing.
741	for long_flag in ('true', 'false'):
742	id_name = 'BoolUnary_%s' % long_flag
743	spec._AddId(id_name)
744	spec.AddBoolOp(Id[id_name], bool_arg_type_e.Str)
745
746	spec.AddBoolKind('BoolBinary', [
747	(bool_arg_type_e.Str, [
748	('GlobEqual', '='),
749	('GlobDEqual', '=='),
750	('GlobNEqual', '!='),
751	('EqualTilde', '=~'),
752	]),
753	(bool_arg_type_e.Path, _Dash(_BINARY_PATH)),
754	(bool_arg_type_e.Int, _Dash(_BINARY_INT)),
755	])
756
757	# logical, arity, arg_type
758	spec.AddBoolOp(Id['Op_DAmp'], bool_arg_type_e.Undefined)
759	spec.AddBoolOp(Id['Op_DPipe'], bool_arg_type_e.Undefined)
760	spec.AddBoolOp(Id['KW_Bang'], bool_arg_type_e.Undefined)
761
762	spec.AddBoolOp(Id['Op_Less'], bool_arg_type_e.Str)
763	spec.AddBoolOp(Id['Op_Great'], bool_arg_type_e.Str)
764
765
766	def SetupTestBuiltin(
767	id_spec, # type: IdSpec
768	unary_lookup, # type: Dict[str, int]
769	binary_lookup, # type: Dict[str, int]
770	other_lookup, # type: Dict[str, int]
771	):
772	# type: (...) -> None
773	"""Setup tokens for test/[.
774
775	Similar to _AddBoolKinds above. Differences:
776	- =~ doesn't exist
777	- && -> -a, \|\| -> -o
778	- ( ) -> Op_LParen (they don't appear above)
779	"""
780	Id = id_spec.id_str2int
781	Kind = id_spec.kind_str2int
782
783	for letter in _UNARY_STR_CHARS + _UNARY_OTHER_CHARS + _UNARY_PATH_CHARS:
784	id_name = 'BoolUnary_%s' % letter
785	unary_lookup['-' + letter] = Id[id_name]
786
787	for s in _BINARY_PATH + _BINARY_INT:
788	id_name = 'BoolBinary_%s' % s
789	binary_lookup['-' + s] = Id[id_name]
790
791	# Like the [[ definition above, but without globbing and without =~ .
792
793	for id_name, token_str in [('Equal', '='), ('DEqual', '=='),
794	('NEqual', '!=')]:
795	id_int = id_spec.AddBoolBinaryForBuiltin(id_name, Kind['BoolBinary'])
796
797	binary_lookup[token_str] = id_int
798
799	# Some of these names don't quite match, but it keeps the BoolParser simple.
800	binary_lookup['<'] = Id['Op_Less']
801	binary_lookup['>'] = Id['Op_Great']
802
803	# NOTE: -a and -o overloaded as unary prefix operators BoolUnary_a and
804	# BoolUnary_o. The parser rather than the tokenizer handles this.
805	other_lookup['!'] = Id['KW_Bang'] # like [[ !
806	other_lookup['('] = Id['Op_LParen']
807	other_lookup[')'] = Id['Op_RParen']
808
809	other_lookup[']'] = Id['Arith_RBracket'] # For closing ]