OILS / frontend / consts.py View on Github | oilshell.org

389 lines, 184 significant
1#!/usr/bin/env python2
2"""Consts.py."""
3from __future__ import print_function
4
5from _devbuild.gen.types_asdl import (redir_arg_type_e, redir_arg_type_t,
6 bool_arg_type_t, opt_group_i)
7from _devbuild.gen.id_kind_asdl import Id, Id_t, Kind_t
8from frontend import builtin_def
9from frontend import lexer_def
10from frontend import option_def
11
12from typing import Tuple, Optional, TYPE_CHECKING
13if TYPE_CHECKING:
14 from _devbuild.gen.option_asdl import option_t, builtin_t
15
16NO_INDEX = 0 # for Resolve
17
18# Used as consts::STRICT_ALL, etc. Do it explicitly to satisfy MyPy.
19STRICT_ALL = option_def.STRICT_ALL
20YSH_UPGRADE = option_def.YSH_UPGRADE
21YSH_ALL = option_def.YSH_ALL
22DEFAULT_TRUE = option_def.DEFAULT_TRUE
23
24PARSE_OPTION_NUMS = option_def.PARSE_OPTION_NUMS
25
26SET_OPTION_NUMS = [
27 opt.index for opt in option_def._SORTED if opt.builtin == 'set'
28]
29SET_OPTION_NAMES = [
30 opt.name for opt in option_def._SORTED if opt.builtin == 'set'
31]
32
33SHOPT_OPTION_NUMS = [
34 opt.index for opt in option_def._SORTED if opt.builtin == 'shopt'
35]
36SHOPT_OPTION_NAMES = [
37 opt.name for opt in option_def._SORTED if opt.builtin == 'shopt'
38]
39
40VISIBLE_SHOPT_NUMS = option_def.VISIBLE_SHOPT_NUMS # used to print
41
42BUILTIN_NAMES = builtin_def.BUILTIN_NAMES # Used by builtin_comp.py
43
44# Keywords for introspection with bash 'compgen' and 'type'
45OSH_KEYWORD_NAMES = [name for _, name, _ in lexer_def.KEYWORDS]
46OSH_KEYWORD_NAMES.append('{') # not handled by our lexer
47OSH_KEYWORD_NAMES.append('=') # YSH keyword not handled by our lexer
48
49# bash considers these closing delimiters keywords
50OSH_KEYWORD_NAMES.append('}')
51OSH_KEYWORD_NAMES.append(']]')
52
53
54def GetKind(id_):
55 # type: (Id_t) -> Kind_t
56 """To make coarse-grained parsing decisions."""
57
58 from _devbuild.gen.id_kind import ID_TO_KIND # break circular dep
59 return ID_TO_KIND[id_]
60
61
62def BoolArgType(id_):
63 # type: (Id_t) -> bool_arg_type_t
64
65 from _devbuild.gen.id_kind import BOOL_ARG_TYPES # break circular dep
66 return BOOL_ARG_TYPES[id_]
67
68
69#
70# Redirect Tables associated with IDs
71#
72
73REDIR_DEFAULT_FD = {
74 # filename
75 Id.Redir_Less: 0, # cat <input.txt means cat 0<input.txt
76 Id.Redir_Great: 1,
77 Id.Redir_DGreat: 1,
78 Id.Redir_Clobber: 1,
79 Id.Redir_LessGreat: 0, # 'exec <> foo' opens a file with read/write
80 # bash &> and &>>
81 Id.Redir_AndGreat: 1,
82 Id.Redir_AndDGreat: 1,
83
84 # descriptor
85 Id.Redir_GreatAnd: 1, # echo >&2 means echo 1>&2
86 Id.Redir_LessAnd: 0, # echo <&3 means echo 0<&3, I think
87 Id.Redir_TLess: 0, # here word
88
89 # here docs included
90 Id.Redir_DLess: 0,
91 Id.Redir_DLessDash: 0,
92}
93
94REDIR_ARG_TYPES = {
95 # filename
96 Id.Redir_Less: redir_arg_type_e.Path,
97 Id.Redir_Great: redir_arg_type_e.Path,
98 Id.Redir_DGreat: redir_arg_type_e.Path,
99 Id.Redir_Clobber: redir_arg_type_e.Path,
100 Id.Redir_LessGreat: redir_arg_type_e.Path,
101 # bash &> and &>>
102 Id.Redir_AndGreat: redir_arg_type_e.Path,
103 Id.Redir_AndDGreat: redir_arg_type_e.Path,
104
105 # descriptor
106 Id.Redir_GreatAnd: redir_arg_type_e.Desc,
107 Id.Redir_LessAnd: redir_arg_type_e.Desc,
108
109 # Note: here docs aren't included
110}
111
112
113def RedirArgType(id_):
114 # type: (Id_t) -> redir_arg_type_t
115 return REDIR_ARG_TYPES[id_]
116
117
118def RedirDefaultFd(id_):
119 # type: (Id_t) -> int
120 return REDIR_DEFAULT_FD[id_]
121
122
123#
124# Builtins
125#
126
127_BUILTIN_DICT = builtin_def.BuiltinDict()
128
129
130def LookupSpecialBuiltin(argv0):
131 # type: (str) -> builtin_t
132 """Is it a special builtin?"""
133 b = _BUILTIN_DICT.get(argv0)
134 if b and b.kind == 'special':
135 return b.index
136 else:
137 return NO_INDEX
138
139
140def LookupAssignBuiltin(argv0):
141 # type: (str) -> builtin_t
142 """Is it an assignment builtin?"""
143 b = _BUILTIN_DICT.get(argv0)
144 if b and b.kind == 'assign':
145 return b.index
146 else:
147 return NO_INDEX
148
149
150def LookupNormalBuiltin(argv0):
151 # type: (str) -> builtin_t
152 """Is it any other builtin?"""
153 b = _BUILTIN_DICT.get(argv0)
154 if b and b.kind == 'normal':
155 return b.index
156 else:
157 return NO_INDEX
158
159
160def OptionName(opt_num):
161 # type: (option_t) -> str
162 """Get the name from an index."""
163 return option_def.OPTION_NAMES[opt_num]
164
165
166OPTION_GROUPS = {
167 'strict:all': opt_group_i.StrictAll,
168 'ysh:upgrade': opt_group_i.YshUpgrade,
169 'ysh:all': opt_group_i.YshAll,
170
171 # Aliases to deprecate
172 'oil:upgrade': opt_group_i.YshUpgrade,
173 'oil:all': opt_group_i.YshAll,
174}
175
176
177def OptionGroupNum(s):
178 # type: (str) -> int
179 return OPTION_GROUPS.get(s, NO_INDEX) # 0 for not found
180
181
182_OPTION_DICT = option_def.OptionDict()
183_UNIMPL_OPTION_DICT = option_def.UnimplOptionDict()
184
185
186def OptionNum(s):
187 # type: (str) -> int
188 return _OPTION_DICT.get(s, 0) # 0 means not found
189
190
191def UnimplOptionNum(s):
192 # type: (str) -> int
193 return _UNIMPL_OPTION_DICT.get(s, 0) # 0 means not found
194
195
196_CONTROL_FLOW_NAMES = [name for _, name, _ in lexer_def.CONTROL_FLOW]
197_CONTROL_FLOW_LOOKUP = {}
198for _, name, id_ in lexer_def.CONTROL_FLOW:
199 _CONTROL_FLOW_LOOKUP[id_] = name
200
201
202def ControlFlowName(id_):
203 # type: (int) -> str
204 """For tracing"""
205 return _CONTROL_FLOW_LOOKUP[id_]
206
207
208def IsControlFlow(name):
209 # type: (str) -> bool
210 return name in _CONTROL_FLOW_NAMES
211
212
213def IsKeyword(name):
214 # type: (str) -> bool
215 return name in OSH_KEYWORD_NAMES
216
217
218#
219# osh/prompt.py and osh/word_compile.py
220#
221
222_ONE_CHAR_C = {
223 '0': '\0',
224 'a': '\a',
225 'b': '\b',
226 'e': '\x1b',
227 'E': '\x1b',
228 'f': '\f',
229 'n': '\n',
230 'r': '\r',
231 't': '\t',
232 'v': '\v',
233 '\\': '\\',
234 "'": "'", # for $'' only, not echo -e
235 '"': '"', # not sure why this is escaped within $''
236 '/': '/', # for JSON \/ only
237}
238
239
240def LookupCharC(c):
241 # type: (str) -> str
242 """Fatal if not present."""
243 return _ONE_CHAR_C[c]
244
245
246# NOTE: Prompts chars and printf are inconsistent, e.g. \E is \e in printf, but
247# not in PS1.
248_ONE_CHAR_PROMPT = {
249 'a': '\a',
250 'e': '\x1b',
251 'r': '\r',
252 'n': '\n',
253 '\\': '\\',
254}
255
256
257def LookupCharPrompt(c):
258 # type: (str) -> Optional[str]
259 """Returns None if not present."""
260 return _ONE_CHAR_PROMPT.get(c)
261
262
263#
264# Constants used by osh/split.py
265#
266
267# IFS splitting is complicated in general. We handle it with three concepts:
268#
269# - CH.* - Kinds of characters (edge labels)
270# - ST.* - States (node labels)
271# - EMIT.* Actions
272#
273# The Split() loop below classifies characters, follows state transitions, and
274# emits spans. A span is a (ignored Bool, end_index Int) pair.
275
276# As an example, consider this string:
277# 'a _ b'
278#
279# The character classes are:
280#
281# a ' ' _ ' ' b
282# Black DE_White DE_Gray DE_White Black
283#
284# The states are:
285#
286# a ' ' _ ' ' b
287# Black DE_White1 DE_Gray DE_White2 Black
288#
289# DE_White2 is whitespace that follows a "gray" non-whitespace IFS character.
290#
291# The spans emitted are:
292#
293# (part 'a', ignored ' _ ', part 'b')
294
295# SplitForRead() will check if the last two spans are a \ and \\n. Easy.
296
297# Shorter names for state machine enums
298from _devbuild.gen.runtime_asdl import state_t, emit_t, char_kind_t
299from _devbuild.gen.runtime_asdl import emit_i as EMIT
300from _devbuild.gen.runtime_asdl import char_kind_i as CH
301from _devbuild.gen.runtime_asdl import state_i as ST
302
303_IFS_EDGES = {
304 # Whitespace should have been stripped
305 (ST.Start, CH.DE_White): (ST.Invalid, EMIT.Nothing), # ' '
306 (ST.Start, CH.DE_Gray): (ST.DE_Gray, EMIT.Empty), # '_'
307 (ST.Start, CH.Black): (ST.Black, EMIT.Nothing), # 'a'
308 (ST.Start, CH.Backslash): (ST.Backslash, EMIT.Nothing), # '\'
309 (ST.Start, CH.Sentinel): (ST.Done, EMIT.Nothing), # ''
310 (ST.DE_White1, CH.DE_White): (ST.DE_White1, EMIT.Nothing), # ' '
311 (ST.DE_White1, CH.DE_Gray): (ST.DE_Gray, EMIT.Nothing), # ' _'
312 (ST.DE_White1, CH.Black): (ST.Black, EMIT.Delim), # ' a'
313 (ST.DE_White1, CH.Backslash): (ST.Backslash, EMIT.Delim), # ' \'
314 # Ignore trailing IFS whitespace too. This is necessary for the case:
315 # IFS=':' ; read x y z <<< 'a : b : c :'.
316 (ST.DE_White1, CH.Sentinel): (ST.Done, EMIT.Nothing), # 'zz '
317 (ST.DE_Gray, CH.DE_White): (ST.DE_White2, EMIT.Nothing), # '_ '
318 (ST.DE_Gray, CH.DE_Gray): (ST.DE_Gray, EMIT.Empty), # '__'
319 (ST.DE_Gray, CH.Black): (ST.Black, EMIT.Delim), # '_a'
320 (ST.DE_Gray, CH.Backslash): (ST.Black, EMIT.Delim), # '_\'
321 (ST.DE_Gray, CH.Sentinel): (ST.Done, EMIT.Delim), # 'zz:' IFS=': '
322 (ST.DE_White2, CH.DE_White): (ST.DE_White2, EMIT.Nothing), # '_ '
323 (ST.DE_White2, CH.DE_Gray): (ST.DE_Gray, EMIT.Empty), # '_ _'
324 (ST.DE_White2, CH.Black): (ST.Black, EMIT.Delim), # '_ a'
325 (ST.DE_White2, CH.Backslash): (ST.Backslash, EMIT.Delim), # '_ \'
326 (ST.DE_White2, CH.Sentinel): (ST.Done, EMIT.Delim), # 'zz: ' IFS=': '
327 (ST.Black, CH.DE_White): (ST.DE_White1, EMIT.Part), # 'a '
328 (ST.Black, CH.DE_Gray): (ST.DE_Gray, EMIT.Part), # 'a_'
329 (ST.Black, CH.Black): (ST.Black, EMIT.Nothing), # 'aa'
330 (ST.Black, CH.Backslash): (ST.Backslash, EMIT.Part), # 'a\'
331 (ST.Black, CH.Sentinel): (ST.Done, EMIT.Part), # 'zz' IFS=': '
332
333 # Here we emit an ignored \ and the second character as well.
334 # We're emitting TWO spans here; we don't wait until the subsequent
335 # character. That is OK.
336 #
337 # Problem: if '\ ' is the last one, we don't want to emit a trailing span?
338 # In all other cases we do.
339 (ST.Backslash, CH.DE_White): (ST.Black, EMIT.Escape), # '\ '
340 (ST.Backslash, CH.DE_Gray): (ST.Black, EMIT.Escape), # '\_'
341 (ST.Backslash, CH.Black): (ST.Black, EMIT.Escape), # '\a'
342 # NOTE: second character is a backslash, but new state is ST.Black!
343 (ST.Backslash, CH.Backslash): (ST.Black, EMIT.Escape), # '\\'
344 (ST.Backslash, CH.Sentinel): (ST.Done, EMIT.Escape), # 'zz\'
345}
346
347
348def IfsEdge(state, ch):
349 # type: (state_t, char_kind_t) -> Tuple[state_t, emit_t]
350 """Follow edges of the IFS state machine."""
351 return _IFS_EDGES[state, ch]
352
353
354# Pattern to use libc regexec() to parse NAME, NAME=value, and NAME+=value.
355#
356# We want submatch extraction, which would need a new type of binding, and
357# doing it with libc seems easy enough.
358
359ASSIGN_ARG_RE = '^(' + lexer_def.VAR_NAME_RE + r')((=|\+=)(.*))?$'
360
361# Eggex equivalent:
362#
363# VarName = /
364# [a-z A-Z _ ]
365# [a-z A-Z 0-9 _ ]*
366# /
367#
368# SplitArg = /
369# %begin
370# <capture VarName>
371# (
372# <capture '=' | '+='> <capture dot*>
373# )?
374# %end
375
376# Weird rules for brackets: put ] first
377NOT_BRACKETS = '[^][]*'
378TEST_V_RE = '^(' + lexer_def.VAR_NAME_RE + r')(\[(' + NOT_BRACKETS + ')\])?$'
379
380# NotBracket = / ![ ']' '[' ] /
381#
382# TestV = /
383# %begin
384# <capture VarName>
385# (
386# '[' <capture NotBrackets> ']'
387# )?
388# %end
389# /