frontend/consts.py

OILS / frontend / consts.py View on Github | oilshell.org

389 lines, 184 significant

1	#!/usr/bin/env python2
2	"""Consts.py."""
3	from __future__ import print_function
4
5	from _devbuild.gen.types_asdl import (redir_arg_type_e, redir_arg_type_t,
6	bool_arg_type_t, opt_group_i)
7	from _devbuild.gen.id_kind_asdl import Id, Id_t, Kind_t
8	from frontend import builtin_def
9	from frontend import lexer_def
10	from frontend import option_def
11
12	from typing import Tuple, Optional, TYPE_CHECKING
13	if TYPE_CHECKING:
14	from _devbuild.gen.option_asdl import option_t, builtin_t
15
16	NO_INDEX = 0 # for Resolve
17
18	# Used as consts::STRICT_ALL, etc. Do it explicitly to satisfy MyPy.
19	STRICT_ALL = option_def.STRICT_ALL
20	YSH_UPGRADE = option_def.YSH_UPGRADE
21	YSH_ALL = option_def.YSH_ALL
22	DEFAULT_TRUE = option_def.DEFAULT_TRUE
23
24	PARSE_OPTION_NUMS = option_def.PARSE_OPTION_NUMS
25
26	SET_OPTION_NUMS = [
27	opt.index for opt in option_def._SORTED if opt.builtin == 'set'
28	]
29	SET_OPTION_NAMES = [
30	opt.name for opt in option_def._SORTED if opt.builtin == 'set'
31	]
32
33	SHOPT_OPTION_NUMS = [
34	opt.index for opt in option_def._SORTED if opt.builtin == 'shopt'
35	]
36	SHOPT_OPTION_NAMES = [
37	opt.name for opt in option_def._SORTED if opt.builtin == 'shopt'
38	]
39
40	VISIBLE_SHOPT_NUMS = option_def.VISIBLE_SHOPT_NUMS # used to print
41
42	BUILTIN_NAMES = builtin_def.BUILTIN_NAMES # Used by builtin_comp.py
43
44	# Keywords for introspection with bash 'compgen' and 'type'
45	OSH_KEYWORD_NAMES = [name for _, name, _ in lexer_def.KEYWORDS]
46	OSH_KEYWORD_NAMES.append('{') # not handled by our lexer
47	OSH_KEYWORD_NAMES.append('=') # YSH keyword not handled by our lexer
48
49	# bash considers these closing delimiters keywords
50	OSH_KEYWORD_NAMES.append('}')
51	OSH_KEYWORD_NAMES.append(']]')
52
53
54	def GetKind(id_):
55	# type: (Id_t) -> Kind_t
56	"""To make coarse-grained parsing decisions."""
57
58	from _devbuild.gen.id_kind import ID_TO_KIND # break circular dep
59	return ID_TO_KIND[id_]
60
61
62	def BoolArgType(id_):
63	# type: (Id_t) -> bool_arg_type_t
64
65	from _devbuild.gen.id_kind import BOOL_ARG_TYPES # break circular dep
66	return BOOL_ARG_TYPES[id_]
67
68
69	#
70	# Redirect Tables associated with IDs
71	#
72
73	REDIR_DEFAULT_FD = {
74	# filename
75	Id.Redir_Less: 0, # cat <input.txt means cat 0<input.txt
76	Id.Redir_Great: 1,
77	Id.Redir_DGreat: 1,
78	Id.Redir_Clobber: 1,
79	Id.Redir_LessGreat: 0, # 'exec <> foo' opens a file with read/write
80	# bash &> and &>>
81	Id.Redir_AndGreat: 1,
82	Id.Redir_AndDGreat: 1,
83
84	# descriptor
85	Id.Redir_GreatAnd: 1, # echo >&2 means echo 1>&2
86	Id.Redir_LessAnd: 0, # echo <&3 means echo 0<&3, I think
87	Id.Redir_TLess: 0, # here word
88
89	# here docs included
90	Id.Redir_DLess: 0,
91	Id.Redir_DLessDash: 0,
92	}
93
94	REDIR_ARG_TYPES = {
95	# filename
96	Id.Redir_Less: redir_arg_type_e.Path,
97	Id.Redir_Great: redir_arg_type_e.Path,
98	Id.Redir_DGreat: redir_arg_type_e.Path,
99	Id.Redir_Clobber: redir_arg_type_e.Path,
100	Id.Redir_LessGreat: redir_arg_type_e.Path,
101	# bash &> and &>>
102	Id.Redir_AndGreat: redir_arg_type_e.Path,
103	Id.Redir_AndDGreat: redir_arg_type_e.Path,
104
105	# descriptor
106	Id.Redir_GreatAnd: redir_arg_type_e.Desc,
107	Id.Redir_LessAnd: redir_arg_type_e.Desc,
108
109	# Note: here docs aren't included
110	}
111
112
113	def RedirArgType(id_):
114	# type: (Id_t) -> redir_arg_type_t
115	return REDIR_ARG_TYPES[id_]
116
117
118	def RedirDefaultFd(id_):
119	# type: (Id_t) -> int
120	return REDIR_DEFAULT_FD[id_]
121
122
123	#
124	# Builtins
125	#
126
127	_BUILTIN_DICT = builtin_def.BuiltinDict()
128
129
130	def LookupSpecialBuiltin(argv0):
131	# type: (str) -> builtin_t
132	"""Is it a special builtin?"""
133	b = _BUILTIN_DICT.get(argv0)
134	if b and b.kind == 'special':
135	return b.index
136	else:
137	return NO_INDEX
138
139
140	def LookupAssignBuiltin(argv0):
141	# type: (str) -> builtin_t
142	"""Is it an assignment builtin?"""
143	b = _BUILTIN_DICT.get(argv0)
144	if b and b.kind == 'assign':
145	return b.index
146	else:
147	return NO_INDEX
148
149
150	def LookupNormalBuiltin(argv0):
151	# type: (str) -> builtin_t
152	"""Is it any other builtin?"""
153	b = _BUILTIN_DICT.get(argv0)
154	if b and b.kind == 'normal':
155	return b.index
156	else:
157	return NO_INDEX
158
159
160	def OptionName(opt_num):
161	# type: (option_t) -> str
162	"""Get the name from an index."""
163	return option_def.OPTION_NAMES[opt_num]
164
165
166	OPTION_GROUPS = {
167	'strict:all': opt_group_i.StrictAll,
168	'ysh:upgrade': opt_group_i.YshUpgrade,
169	'ysh:all': opt_group_i.YshAll,
170
171	# Aliases to deprecate
172	'oil:upgrade': opt_group_i.YshUpgrade,
173	'oil:all': opt_group_i.YshAll,
174	}
175
176
177	def OptionGroupNum(s):
178	# type: (str) -> int
179	return OPTION_GROUPS.get(s, NO_INDEX) # 0 for not found
180
181
182	_OPTION_DICT = option_def.OptionDict()
183	_UNIMPL_OPTION_DICT = option_def.UnimplOptionDict()
184
185
186	def OptionNum(s):
187	# type: (str) -> int
188	return _OPTION_DICT.get(s, 0) # 0 means not found
189
190
191	def UnimplOptionNum(s):
192	# type: (str) -> int
193	return _UNIMPL_OPTION_DICT.get(s, 0) # 0 means not found
194
195
196	_CONTROL_FLOW_NAMES = [name for _, name, _ in lexer_def.CONTROL_FLOW]
197	_CONTROL_FLOW_LOOKUP = {}
198	for _, name, id_ in lexer_def.CONTROL_FLOW:
199	_CONTROL_FLOW_LOOKUP[id_] = name
200
201
202	def ControlFlowName(id_):
203	# type: (int) -> str
204	"""For tracing"""
205	return _CONTROL_FLOW_LOOKUP[id_]
206
207
208	def IsControlFlow(name):
209	# type: (str) -> bool
210	return name in _CONTROL_FLOW_NAMES
211
212
213	def IsKeyword(name):
214	# type: (str) -> bool
215	return name in OSH_KEYWORD_NAMES
216
217
218	#
219	# osh/prompt.py and osh/word_compile.py
220	#
221
222	_ONE_CHAR_C = {
223	'0': '\0',
224	'a': '\a',
225	'b': '\b',
226	'e': '\x1b',
227	'E': '\x1b',
228	'f': '\f',
229	'n': '\n',
230	'r': '\r',
231	't': '\t',
232	'v': '\v',
233	'\\': '\\',
234	"'": "'", # for $'' only, not echo -e
235	'"': '"', # not sure why this is escaped within $''
236	'/': '/', # for JSON \/ only
237	}
238
239
240	def LookupCharC(c):
241	# type: (str) -> str
242	"""Fatal if not present."""
243	return _ONE_CHAR_C[c]
244
245
246	# NOTE: Prompts chars and printf are inconsistent, e.g. \E is \e in printf, but
247	# not in PS1.
248	_ONE_CHAR_PROMPT = {
249	'a': '\a',
250	'e': '\x1b',
251	'r': '\r',
252	'n': '\n',
253	'\\': '\\',
254	}
255
256
257	def LookupCharPrompt(c):
258	# type: (str) -> Optional[str]
259	"""Returns None if not present."""
260	return _ONE_CHAR_PROMPT.get(c)
261
262
263	#
264	# Constants used by osh/split.py
265	#
266
267	# IFS splitting is complicated in general. We handle it with three concepts:
268	#
269	# - CH.* - Kinds of characters (edge labels)
270	# - ST.* - States (node labels)
271	# - EMIT.* Actions
272	#
273	# The Split() loop below classifies characters, follows state transitions, and
274	# emits spans. A span is a (ignored Bool, end_index Int) pair.
275
276	# As an example, consider this string:
277	# 'a _ b'
278	#
279	# The character classes are:
280	#
281	# a ' ' _ ' ' b
282	# Black DE_White DE_Gray DE_White Black
283	#
284	# The states are:
285	#
286	# a ' ' _ ' ' b
287	# Black DE_White1 DE_Gray DE_White2 Black
288	#
289	# DE_White2 is whitespace that follows a "gray" non-whitespace IFS character.
290	#
291	# The spans emitted are:
292	#
293	# (part 'a', ignored ' _ ', part 'b')
294
295	# SplitForRead() will check if the last two spans are a \ and \\n. Easy.
296
297	# Shorter names for state machine enums
298	from _devbuild.gen.runtime_asdl import state_t, emit_t, char_kind_t
299	from _devbuild.gen.runtime_asdl import emit_i as EMIT
300	from _devbuild.gen.runtime_asdl import char_kind_i as CH
301	from _devbuild.gen.runtime_asdl import state_i as ST
302
303	_IFS_EDGES = {
304	# Whitespace should have been stripped
305	(ST.Start, CH.DE_White): (ST.Invalid, EMIT.Nothing), # ' '
306	(ST.Start, CH.DE_Gray): (ST.DE_Gray, EMIT.Empty), # '_'
307	(ST.Start, CH.Black): (ST.Black, EMIT.Nothing), # 'a'
308	(ST.Start, CH.Backslash): (ST.Backslash, EMIT.Nothing), # '\'
309	(ST.Start, CH.Sentinel): (ST.Done, EMIT.Nothing), # ''
310	(ST.DE_White1, CH.DE_White): (ST.DE_White1, EMIT.Nothing), # ' '
311	(ST.DE_White1, CH.DE_Gray): (ST.DE_Gray, EMIT.Nothing), # ' _'
312	(ST.DE_White1, CH.Black): (ST.Black, EMIT.Delim), # ' a'
313	(ST.DE_White1, CH.Backslash): (ST.Backslash, EMIT.Delim), # ' \'
314	# Ignore trailing IFS whitespace too. This is necessary for the case:
315	# IFS=':' ; read x y z <<< 'a : b : c :'.
316	(ST.DE_White1, CH.Sentinel): (ST.Done, EMIT.Nothing), # 'zz '
317	(ST.DE_Gray, CH.DE_White): (ST.DE_White2, EMIT.Nothing), # '_ '
318	(ST.DE_Gray, CH.DE_Gray): (ST.DE_Gray, EMIT.Empty), # '__'
319	(ST.DE_Gray, CH.Black): (ST.Black, EMIT.Delim), # '_a'
320	(ST.DE_Gray, CH.Backslash): (ST.Black, EMIT.Delim), # '_\'
321	(ST.DE_Gray, CH.Sentinel): (ST.Done, EMIT.Delim), # 'zz:' IFS=': '
322	(ST.DE_White2, CH.DE_White): (ST.DE_White2, EMIT.Nothing), # '_ '
323	(ST.DE_White2, CH.DE_Gray): (ST.DE_Gray, EMIT.Empty), # '_ _'
324	(ST.DE_White2, CH.Black): (ST.Black, EMIT.Delim), # '_ a'
325	(ST.DE_White2, CH.Backslash): (ST.Backslash, EMIT.Delim), # '_ \'
326	(ST.DE_White2, CH.Sentinel): (ST.Done, EMIT.Delim), # 'zz: ' IFS=': '
327	(ST.Black, CH.DE_White): (ST.DE_White1, EMIT.Part), # 'a '
328	(ST.Black, CH.DE_Gray): (ST.DE_Gray, EMIT.Part), # 'a_'
329	(ST.Black, CH.Black): (ST.Black, EMIT.Nothing), # 'aa'
330	(ST.Black, CH.Backslash): (ST.Backslash, EMIT.Part), # 'a\'
331	(ST.Black, CH.Sentinel): (ST.Done, EMIT.Part), # 'zz' IFS=': '
332
333	# Here we emit an ignored \ and the second character as well.
334	# We're emitting TWO spans here; we don't wait until the subsequent
335	# character. That is OK.
336	#
337	# Problem: if '\ ' is the last one, we don't want to emit a trailing span?
338	# In all other cases we do.
339	(ST.Backslash, CH.DE_White): (ST.Black, EMIT.Escape), # '\ '
340	(ST.Backslash, CH.DE_Gray): (ST.Black, EMIT.Escape), # '\_'
341	(ST.Backslash, CH.Black): (ST.Black, EMIT.Escape), # '\a'
342	# NOTE: second character is a backslash, but new state is ST.Black!
343	(ST.Backslash, CH.Backslash): (ST.Black, EMIT.Escape), # '\\'
344	(ST.Backslash, CH.Sentinel): (ST.Done, EMIT.Escape), # 'zz\'
345	}
346
347
348	def IfsEdge(state, ch):
349	# type: (state_t, char_kind_t) -> Tuple[state_t, emit_t]
350	"""Follow edges of the IFS state machine."""
351	return _IFS_EDGES[state, ch]
352
353
354	# Pattern to use libc regexec() to parse NAME, NAME=value, and NAME+=value.
355	#
356	# We want submatch extraction, which would need a new type of binding, and
357	# doing it with libc seems easy enough.
358
359	ASSIGN_ARG_RE = '^(' + lexer_def.VAR_NAME_RE + r')((=\|\+=)(.*))?$'
360
361	# Eggex equivalent:
362	#
363	# VarName = /
364	# [a-z A-Z _ ]
365	# [a-z A-Z 0-9 _ ]*
366	# /
367	#
368	# SplitArg = /
369	# %begin
370	# <capture VarName>
371	# (
372	# <capture '=' \| '+='> <capture dot*>
373	# )?
374	# %end
375
376	# Weird rules for brackets: put ] first
377	NOT_BRACKETS = '[^][]*'
378	TEST_V_RE = '^(' + lexer_def.VAR_NAME_RE + r')(\[(' + NOT_BRACKETS + ')\])?$'
379
380	# NotBracket = / ![ ']' '[' ] /
381	#
382	# TestV = /
383	# %begin
384	# <capture VarName>
385	# (
386	# '[' <capture NotBrackets> ']'
387	# )?
388	# %end
389	# /