OILS / frontend / consts.py View on Github | oilshell.org

399 lines, 191 significant
1#!/usr/bin/env python2
2"""Consts.py."""
3from __future__ import print_function
4
5from _devbuild.gen.types_asdl import (redir_arg_type_e, redir_arg_type_t,
6 bool_arg_type_t, opt_group_i)
7from _devbuild.gen.id_kind_asdl import Id, Id_t, Kind_t
8from frontend import builtin_def
9from frontend import lexer_def
10from frontend import option_def
11
12from typing import Tuple, Optional, TYPE_CHECKING
13if TYPE_CHECKING:
14 from _devbuild.gen.option_asdl import option_t, builtin_t
15
16NO_INDEX = 0 # for Resolve
17
18# Used as consts::STRICT_ALL, etc. Do it explicitly to satisfy MyPy.
19STRICT_ALL = option_def.STRICT_ALL
20YSH_UPGRADE = option_def.YSH_UPGRADE
21YSH_ALL = option_def.YSH_ALL
22DEFAULT_TRUE = option_def.DEFAULT_TRUE
23
24PARSE_OPTION_NUMS = option_def.PARSE_OPTION_NUMS
25
26SET_OPTION_NUMS = [
27 opt.index for opt in option_def._SORTED if opt.builtin == 'set'
28]
29SET_OPTION_NAMES = [
30 opt.name for opt in option_def._SORTED if opt.builtin == 'set'
31]
32
33SHOPT_OPTION_NUMS = [
34 opt.index for opt in option_def._SORTED if opt.builtin == 'shopt'
35]
36SHOPT_OPTION_NAMES = [
37 opt.name for opt in option_def._SORTED if opt.builtin == 'shopt'
38]
39
40VISIBLE_SHOPT_NUMS = option_def.VISIBLE_SHOPT_NUMS # used to print
41
42BUILTIN_NAMES = builtin_def.BUILTIN_NAMES # Used by builtin_comp.py
43
44# Keywords for introspection with bash 'compgen' and 'type'
45OSH_KEYWORD_NAMES = [name for _, name, _ in lexer_def.KEYWORDS]
46OSH_KEYWORD_NAMES.append('{') # not handled by our lexer
47OSH_KEYWORD_NAMES.append('=') # YSH keyword not handled by our lexer
48
49# bash considers these closing delimiters keywords
50OSH_KEYWORD_NAMES.append('}')
51OSH_KEYWORD_NAMES.append(']]')
52
53
54def GetKind(id_):
55 # type: (Id_t) -> Kind_t
56 """To make coarse-grained parsing decisions."""
57
58 from _devbuild.gen.id_kind import ID_TO_KIND # break circular dep
59 return ID_TO_KIND[id_]
60
61
62def BoolArgType(id_):
63 # type: (Id_t) -> bool_arg_type_t
64
65 from _devbuild.gen.id_kind import BOOL_ARG_TYPES # break circular dep
66 return BOOL_ARG_TYPES[id_]
67
68
69#
70# Redirect Tables associated with IDs
71#
72
73REDIR_DEFAULT_FD = {
74 # filename
75 Id.Redir_Less: 0, # cat <input.txt means cat 0<input.txt
76 Id.Redir_Great: 1,
77 Id.Redir_DGreat: 1,
78 Id.Redir_Clobber: 1,
79 Id.Redir_LessGreat: 0, # 'exec <> foo' opens a file with read/write
80 # bash &> and &>>
81 Id.Redir_AndGreat: 1,
82 Id.Redir_AndDGreat: 1,
83
84 # descriptor
85 Id.Redir_GreatAnd: 1, # echo >&2 means echo 1>&2
86 Id.Redir_LessAnd: 0, # echo <&3 means echo 0<&3, I think
87 Id.Redir_TLess: 0, # here word
88
89 # here docs included
90 Id.Redir_DLess: 0,
91 Id.Redir_DLessDash: 0,
92}
93
94REDIR_ARG_TYPES = {
95 # filename
96 Id.Redir_Less: redir_arg_type_e.Path,
97 Id.Redir_Great: redir_arg_type_e.Path,
98 Id.Redir_DGreat: redir_arg_type_e.Path,
99 Id.Redir_Clobber: redir_arg_type_e.Path,
100 Id.Redir_LessGreat: redir_arg_type_e.Path,
101 # bash &> and &>>
102 Id.Redir_AndGreat: redir_arg_type_e.Path,
103 Id.Redir_AndDGreat: redir_arg_type_e.Path,
104
105 # descriptor
106 Id.Redir_GreatAnd: redir_arg_type_e.Desc,
107 Id.Redir_LessAnd: redir_arg_type_e.Desc,
108
109 # Note: here docs aren't included
110}
111
112
113def RedirArgType(id_):
114 # type: (Id_t) -> redir_arg_type_t
115 return REDIR_ARG_TYPES[id_]
116
117
118def RedirDefaultFd(id_):
119 # type: (Id_t) -> int
120 return REDIR_DEFAULT_FD[id_]
121
122
123#
124# Builtins
125#
126
127_BUILTIN_DICT = builtin_def.BuiltinDict()
128
129
130def LookupSpecialBuiltin(argv0):
131 # type: (str) -> builtin_t
132 """Is it a special builtin?"""
133 b = _BUILTIN_DICT.get(argv0)
134 if b and b.kind == 'special':
135 return b.index
136 else:
137 return NO_INDEX
138
139
140def LookupAssignBuiltin(argv0):
141 # type: (str) -> builtin_t
142 """Is it an assignment builtin?"""
143 b = _BUILTIN_DICT.get(argv0)
144 if b and b.kind == 'assign':
145 return b.index
146 else:
147 return NO_INDEX
148
149
150def LookupNormalBuiltin(argv0):
151 # type: (str) -> builtin_t
152 """Is it any other builtin?"""
153 b = _BUILTIN_DICT.get(argv0)
154 if b and b.kind == 'normal':
155 return b.index
156 else:
157 return NO_INDEX
158
159
160def OptionName(opt_num):
161 # type: (option_t) -> str
162 """Get the name from an index."""
163 return option_def.OPTION_NAMES[opt_num]
164
165
166OPTION_GROUPS = {
167 'strict:all': opt_group_i.StrictAll,
168 'ysh:upgrade': opt_group_i.YshUpgrade,
169 'ysh:all': opt_group_i.YshAll,
170
171 # Aliases to deprecate
172 'oil:upgrade': opt_group_i.YshUpgrade,
173 'oil:all': opt_group_i.YshAll,
174}
175
176
177def OptionGroupNum(s):
178 # type: (str) -> int
179 return OPTION_GROUPS.get(s, NO_INDEX) # 0 for not found
180
181
182_OPTION_DICT = option_def.OptionDict()
183
184
185def OptionNum(s):
186 # type: (str) -> int
187 """
188 Only considers implemented options.
189 """
190 pair = _OPTION_DICT.get(s)
191 if pair is None:
192 return 0
193 num, impl = pair
194 return num if impl else 0 # 0 means not found
195
196
197def UnimplOptionNum(s):
198 # type: (str) -> int
199 pair = _OPTION_DICT.get(s)
200 if pair is None:
201 return 0
202 num, impl = pair
203 return 0 if impl else num # 0 means not found
204
205
206_CONTROL_FLOW_NAMES = [name for _, name, _ in lexer_def.CONTROL_FLOW]
207_CONTROL_FLOW_LOOKUP = {}
208for _, name, id_ in lexer_def.CONTROL_FLOW:
209 _CONTROL_FLOW_LOOKUP[id_] = name
210
211
212def ControlFlowName(id_):
213 # type: (int) -> str
214 """For tracing"""
215 return _CONTROL_FLOW_LOOKUP[id_]
216
217
218def IsControlFlow(name):
219 # type: (str) -> bool
220 return name in _CONTROL_FLOW_NAMES
221
222
223def IsKeyword(name):
224 # type: (str) -> bool
225 return name in OSH_KEYWORD_NAMES
226
227
228#
229# osh/prompt.py and osh/word_compile.py
230#
231
232_ONE_CHAR_C = {
233 '0': '\0',
234 'a': '\a',
235 'b': '\b',
236 'e': '\x1b',
237 'E': '\x1b',
238 'f': '\f',
239 'n': '\n',
240 'r': '\r',
241 't': '\t',
242 'v': '\v',
243 '\\': '\\',
244 "'": "'", # for $'' only, not echo -e
245 '"': '"', # not sure why this is escaped within $''
246 '/': '/', # for JSON \/ only
247}
248
249
250def LookupCharC(c):
251 # type: (str) -> str
252 """Fatal if not present."""
253 return _ONE_CHAR_C[c]
254
255
256# NOTE: Prompts chars and printf are inconsistent, e.g. \E is \e in printf, but
257# not in PS1.
258_ONE_CHAR_PROMPT = {
259 'a': '\a',
260 'e': '\x1b',
261 'r': '\r',
262 'n': '\n',
263 '\\': '\\',
264}
265
266
267def LookupCharPrompt(c):
268 # type: (str) -> Optional[str]
269 """Returns None if not present."""
270 return _ONE_CHAR_PROMPT.get(c)
271
272
273#
274# Constants used by osh/split.py
275#
276
277# IFS splitting is complicated in general. We handle it with three concepts:
278#
279# - CH.* - Kinds of characters (edge labels)
280# - ST.* - States (node labels)
281# - EMIT.* Actions
282#
283# The Split() loop below classifies characters, follows state transitions, and
284# emits spans. A span is a (ignored Bool, end_index Int) pair.
285
286# As an example, consider this string:
287# 'a _ b'
288#
289# The character classes are:
290#
291# a ' ' _ ' ' b
292# Black DE_White DE_Gray DE_White Black
293#
294# The states are:
295#
296# a ' ' _ ' ' b
297# Black DE_White1 DE_Gray DE_White2 Black
298#
299# DE_White2 is whitespace that follows a "gray" non-whitespace IFS character.
300#
301# The spans emitted are:
302#
303# (part 'a', ignored ' _ ', part 'b')
304
305# SplitForRead() will check if the last two spans are a \ and \\n. Easy.
306
307# Shorter names for state machine enums
308from _devbuild.gen.runtime_asdl import state_t, emit_t, char_kind_t
309from _devbuild.gen.runtime_asdl import emit_i as EMIT
310from _devbuild.gen.runtime_asdl import char_kind_i as CH
311from _devbuild.gen.runtime_asdl import state_i as ST
312
313_IFS_EDGES = {
314 # Whitespace should have been stripped
315 (ST.Start, CH.DE_White): (ST.Invalid, EMIT.Nothing), # ' '
316 (ST.Start, CH.DE_Gray): (ST.DE_Gray, EMIT.Empty), # '_'
317 (ST.Start, CH.Black): (ST.Black, EMIT.Nothing), # 'a'
318 (ST.Start, CH.Backslash): (ST.Backslash, EMIT.Nothing), # '\'
319 (ST.Start, CH.Sentinel): (ST.Done, EMIT.Nothing), # ''
320 (ST.DE_White1, CH.DE_White): (ST.DE_White1, EMIT.Nothing), # ' '
321 (ST.DE_White1, CH.DE_Gray): (ST.DE_Gray, EMIT.Nothing), # ' _'
322 (ST.DE_White1, CH.Black): (ST.Black, EMIT.Delim), # ' a'
323 (ST.DE_White1, CH.Backslash): (ST.Backslash, EMIT.Delim), # ' \'
324 # Ignore trailing IFS whitespace too. This is necessary for the case:
325 # IFS=':' ; read x y z <<< 'a : b : c :'.
326 (ST.DE_White1, CH.Sentinel): (ST.Done, EMIT.Nothing), # 'zz '
327 (ST.DE_Gray, CH.DE_White): (ST.DE_White2, EMIT.Nothing), # '_ '
328 (ST.DE_Gray, CH.DE_Gray): (ST.DE_Gray, EMIT.Empty), # '__'
329 (ST.DE_Gray, CH.Black): (ST.Black, EMIT.Delim), # '_a'
330 (ST.DE_Gray, CH.Backslash): (ST.Black, EMIT.Delim), # '_\'
331 (ST.DE_Gray, CH.Sentinel): (ST.Done, EMIT.Delim), # 'zz:' IFS=': '
332 (ST.DE_White2, CH.DE_White): (ST.DE_White2, EMIT.Nothing), # '_ '
333 (ST.DE_White2, CH.DE_Gray): (ST.DE_Gray, EMIT.Empty), # '_ _'
334 (ST.DE_White2, CH.Black): (ST.Black, EMIT.Delim), # '_ a'
335 (ST.DE_White2, CH.Backslash): (ST.Backslash, EMIT.Delim), # '_ \'
336 (ST.DE_White2, CH.Sentinel): (ST.Done, EMIT.Delim), # 'zz: ' IFS=': '
337 (ST.Black, CH.DE_White): (ST.DE_White1, EMIT.Part), # 'a '
338 (ST.Black, CH.DE_Gray): (ST.DE_Gray, EMIT.Part), # 'a_'
339 (ST.Black, CH.Black): (ST.Black, EMIT.Nothing), # 'aa'
340 (ST.Black, CH.Backslash): (ST.Backslash, EMIT.Part), # 'a\'
341 (ST.Black, CH.Sentinel): (ST.Done, EMIT.Part), # 'zz' IFS=': '
342
343 # Here we emit an ignored \ and the second character as well.
344 # We're emitting TWO spans here; we don't wait until the subsequent
345 # character. That is OK.
346 #
347 # Problem: if '\ ' is the last one, we don't want to emit a trailing span?
348 # In all other cases we do.
349 (ST.Backslash, CH.DE_White): (ST.Black, EMIT.Escape), # '\ '
350 (ST.Backslash, CH.DE_Gray): (ST.Black, EMIT.Escape), # '\_'
351 (ST.Backslash, CH.Black): (ST.Black, EMIT.Escape), # '\a'
352 # NOTE: second character is a backslash, but new state is ST.Black!
353 (ST.Backslash, CH.Backslash): (ST.Black, EMIT.Escape), # '\\'
354 (ST.Backslash, CH.Sentinel): (ST.Done, EMIT.Escape), # 'zz\'
355}
356
357
358def IfsEdge(state, ch):
359 # type: (state_t, char_kind_t) -> Tuple[state_t, emit_t]
360 """Follow edges of the IFS state machine."""
361 return _IFS_EDGES[state, ch]
362
363
364# Pattern to use libc regexec() to parse NAME, NAME=value, and NAME+=value.
365#
366# We want submatch extraction, which would need a new type of binding, and
367# doing it with libc seems easy enough.
368
369ASSIGN_ARG_RE = '^(' + lexer_def.VAR_NAME_RE + r')((=|\+=)(.*))?$'
370
371# Eggex equivalent:
372#
373# VarName = /
374# [a-z A-Z _ ]
375# [a-z A-Z 0-9 _ ]*
376# /
377#
378# SplitArg = /
379# %begin
380# <capture VarName>
381# (
382# <capture '=' | '+='> <capture dot*>
383# )?
384# %end
385
386# Weird rules for brackets: put ] first
387NOT_BRACKETS = '[^][]*'
388TEST_V_RE = '^(' + lexer_def.VAR_NAME_RE + r')(\[(' + NOT_BRACKETS + ')\])?$'
389
390# NotBracket = / ![ ']' '[' ] /
391#
392# TestV = /
393# %begin
394# <capture VarName>
395# (
396# '[' <capture NotBrackets> ']'
397# )?
398# %end
399# /