OILS / frontend / consts.py View on Github | oilshell.org

383 lines, 181 significant
1#!/usr/bin/env python2
2"""Consts.py."""
3from __future__ import print_function
4
5from _devbuild.gen.types_asdl import (redir_arg_type_e, redir_arg_type_t,
6 bool_arg_type_t, opt_group_i)
7from _devbuild.gen.id_kind_asdl import Id, Id_t, Kind_t
8from frontend import builtin_def
9from frontend import lexer_def
10from frontend import option_def
11
12from typing import Tuple, Optional, TYPE_CHECKING
13if TYPE_CHECKING:
14 from _devbuild.gen.option_asdl import option_t, builtin_t
15
16NO_INDEX = 0 # for Resolve
17
18# Used as consts::STRICT_ALL, etc. Do it explicitly to satisfy MyPy.
19STRICT_ALL = option_def.STRICT_ALL
20YSH_UPGRADE = option_def.YSH_UPGRADE
21YSH_ALL = option_def.YSH_ALL
22DEFAULT_TRUE = option_def.DEFAULT_TRUE
23
24PARSE_OPTION_NUMS = option_def.PARSE_OPTION_NUMS
25
26SET_OPTION_NUMS = [
27 opt.index for opt in option_def._SORTED if opt.builtin == 'set'
28]
29SET_OPTION_NAMES = [
30 opt.name for opt in option_def._SORTED if opt.builtin == 'set'
31]
32
33SHOPT_OPTION_NUMS = [
34 opt.index for opt in option_def._SORTED if opt.builtin == 'shopt'
35]
36SHOPT_OPTION_NAMES = [
37 opt.name for opt in option_def._SORTED if opt.builtin == 'shopt'
38]
39
40VISIBLE_SHOPT_NUMS = option_def.VISIBLE_SHOPT_NUMS # used to print
41
42BUILTIN_NAMES = builtin_def.BUILTIN_NAMES # Used by builtin_comp.py
43
44# Keywords for introspection with bash 'compgen' and 'type'
45OSH_KEYWORD_NAMES = [name for _, name, _ in lexer_def.KEYWORDS]
46OSH_KEYWORD_NAMES.append('{') # not handled by our lexer
47OSH_KEYWORD_NAMES.append('=') # YSH keyword not handled by our lexer
48
49# bash considers these closing delimiters keywords
50OSH_KEYWORD_NAMES.append('}')
51OSH_KEYWORD_NAMES.append(']]')
52
53
54def GetKind(id_):
55 # type: (Id_t) -> Kind_t
56 """To make coarse-grained parsing decisions."""
57
58 from _devbuild.gen.id_kind import ID_TO_KIND # break circular dep
59 return ID_TO_KIND[id_]
60
61
62def BoolArgType(id_):
63 # type: (Id_t) -> bool_arg_type_t
64
65 from _devbuild.gen.id_kind import BOOL_ARG_TYPES # break circular dep
66 return BOOL_ARG_TYPES[id_]
67
68
69#
70# Redirect Tables associated with IDs
71#
72
73REDIR_DEFAULT_FD = {
74 # filename
75 Id.Redir_Less: 0, # cat <input.txt means cat 0<input.txt
76 Id.Redir_Great: 1,
77 Id.Redir_DGreat: 1,
78 Id.Redir_Clobber: 1,
79 Id.Redir_LessGreat: 0, # 'exec <> foo' opens a file with read/write
80 # bash &> and &>>
81 Id.Redir_AndGreat: 1,
82 Id.Redir_AndDGreat: 1,
83
84 # descriptor
85 Id.Redir_GreatAnd: 1, # echo >&2 means echo 1>&2
86 Id.Redir_LessAnd: 0, # echo <&3 means echo 0<&3, I think
87 Id.Redir_TLess: 0, # here word
88
89 # here docs included
90 Id.Redir_DLess: 0,
91 Id.Redir_DLessDash: 0,
92}
93
94REDIR_ARG_TYPES = {
95 # filename
96 Id.Redir_Less: redir_arg_type_e.Path,
97 Id.Redir_Great: redir_arg_type_e.Path,
98 Id.Redir_DGreat: redir_arg_type_e.Path,
99 Id.Redir_Clobber: redir_arg_type_e.Path,
100 Id.Redir_LessGreat: redir_arg_type_e.Path,
101 # bash &> and &>>
102 Id.Redir_AndGreat: redir_arg_type_e.Path,
103 Id.Redir_AndDGreat: redir_arg_type_e.Path,
104
105 # descriptor
106 Id.Redir_GreatAnd: redir_arg_type_e.Desc,
107 Id.Redir_LessAnd: redir_arg_type_e.Desc,
108
109 # Note: here docs aren't included
110}
111
112
113def RedirArgType(id_):
114 # type: (Id_t) -> redir_arg_type_t
115 return REDIR_ARG_TYPES[id_]
116
117
118def RedirDefaultFd(id_):
119 # type: (Id_t) -> int
120 return REDIR_DEFAULT_FD[id_]
121
122
123#
124# Builtins
125#
126
127_BUILTIN_DICT = builtin_def.BuiltinDict()
128
129
130def LookupSpecialBuiltin(argv0):
131 # type: (str) -> builtin_t
132 """Is it a special builtin?"""
133 b = _BUILTIN_DICT.get(argv0)
134 if b and b.kind == 'special':
135 return b.index
136 else:
137 return NO_INDEX
138
139
140def LookupAssignBuiltin(argv0):
141 # type: (str) -> builtin_t
142 """Is it an assignment builtin?"""
143 b = _BUILTIN_DICT.get(argv0)
144 if b and b.kind == 'assign':
145 return b.index
146 else:
147 return NO_INDEX
148
149
150def LookupNormalBuiltin(argv0):
151 # type: (str) -> builtin_t
152 """Is it any other builtin?"""
153 b = _BUILTIN_DICT.get(argv0)
154 if b and b.kind == 'normal':
155 return b.index
156 else:
157 return NO_INDEX
158
159
160def OptionName(opt_num):
161 # type: (option_t) -> str
162 """Get the name from an index."""
163 return option_def.OPTION_NAMES[opt_num]
164
165
166OPTION_GROUPS = {
167 'strict:all': opt_group_i.StrictAll,
168 'ysh:upgrade': opt_group_i.YshUpgrade,
169 'ysh:all': opt_group_i.YshAll,
170
171 # Aliases to deprecate
172 'oil:upgrade': opt_group_i.YshUpgrade,
173 'oil:all': opt_group_i.YshAll,
174}
175
176
177def OptionGroupNum(s):
178 # type: (str) -> int
179 return OPTION_GROUPS.get(s, NO_INDEX) # 0 for not found
180
181
182_OPTION_DICT = option_def.OptionDict()
183
184
185def OptionNum(s):
186 # type: (str) -> int
187 return _OPTION_DICT.get(s, 0) # 0 means not found
188
189
190_CONTROL_FLOW_NAMES = [name for _, name, _ in lexer_def.CONTROL_FLOW]
191_CONTROL_FLOW_LOOKUP = {}
192for _, name, id_ in lexer_def.CONTROL_FLOW:
193 _CONTROL_FLOW_LOOKUP[id_] = name
194
195
196def ControlFlowName(id_):
197 # type: (int) -> str
198 """For tracing"""
199 return _CONTROL_FLOW_LOOKUP[id_]
200
201
202def IsControlFlow(name):
203 # type: (str) -> bool
204 return name in _CONTROL_FLOW_NAMES
205
206
207def IsKeyword(name):
208 # type: (str) -> bool
209 return name in OSH_KEYWORD_NAMES
210
211
212#
213# osh/prompt.py and osh/word_compile.py
214#
215
216_ONE_CHAR_C = {
217 '0': '\0',
218 'a': '\a',
219 'b': '\b',
220 'e': '\x1b',
221 'E': '\x1b',
222 'f': '\f',
223 'n': '\n',
224 'r': '\r',
225 't': '\t',
226 'v': '\v',
227 '\\': '\\',
228 "'": "'", # for $'' only, not echo -e
229 '"': '"', # not sure why this is escaped within $''
230 '/': '/', # for JSON \/ only
231}
232
233
234def LookupCharC(c):
235 # type: (str) -> str
236 """Fatal if not present."""
237 return _ONE_CHAR_C[c]
238
239
240# NOTE: Prompts chars and printf are inconsistent, e.g. \E is \e in printf, but
241# not in PS1.
242_ONE_CHAR_PROMPT = {
243 'a': '\a',
244 'e': '\x1b',
245 'r': '\r',
246 'n': '\n',
247 '\\': '\\',
248}
249
250
251def LookupCharPrompt(c):
252 # type: (str) -> Optional[str]
253 """Returns None if not present."""
254 return _ONE_CHAR_PROMPT.get(c)
255
256
257#
258# Constants used by osh/split.py
259#
260
261# IFS splitting is complicated in general. We handle it with three concepts:
262#
263# - CH.* - Kinds of characters (edge labels)
264# - ST.* - States (node labels)
265# - EMIT.* Actions
266#
267# The Split() loop below classifies characters, follows state transitions, and
268# emits spans. A span is a (ignored Bool, end_index Int) pair.
269
270# As an example, consider this string:
271# 'a _ b'
272#
273# The character classes are:
274#
275# a ' ' _ ' ' b
276# Black DE_White DE_Gray DE_White Black
277#
278# The states are:
279#
280# a ' ' _ ' ' b
281# Black DE_White1 DE_Gray DE_White2 Black
282#
283# DE_White2 is whitespace that follows a "gray" non-whitespace IFS character.
284#
285# The spans emitted are:
286#
287# (part 'a', ignored ' _ ', part 'b')
288
289# SplitForRead() will check if the last two spans are a \ and \\n. Easy.
290
291# Shorter names for state machine enums
292from _devbuild.gen.runtime_asdl import state_t, emit_t, char_kind_t
293from _devbuild.gen.runtime_asdl import emit_i as EMIT
294from _devbuild.gen.runtime_asdl import char_kind_i as CH
295from _devbuild.gen.runtime_asdl import state_i as ST
296
297_IFS_EDGES = {
298 # Whitespace should have been stripped
299 (ST.Start, CH.DE_White): (ST.Invalid, EMIT.Nothing), # ' '
300 (ST.Start, CH.DE_Gray): (ST.DE_Gray, EMIT.Empty), # '_'
301 (ST.Start, CH.Black): (ST.Black, EMIT.Nothing), # 'a'
302 (ST.Start, CH.Backslash): (ST.Backslash, EMIT.Nothing), # '\'
303 (ST.Start, CH.Sentinel): (ST.Done, EMIT.Nothing), # ''
304 (ST.DE_White1, CH.DE_White): (ST.DE_White1, EMIT.Nothing), # ' '
305 (ST.DE_White1, CH.DE_Gray): (ST.DE_Gray, EMIT.Nothing), # ' _'
306 (ST.DE_White1, CH.Black): (ST.Black, EMIT.Delim), # ' a'
307 (ST.DE_White1, CH.Backslash): (ST.Backslash, EMIT.Delim), # ' \'
308 # Ignore trailing IFS whitespace too. This is necessary for the case:
309 # IFS=':' ; read x y z <<< 'a : b : c :'.
310 (ST.DE_White1, CH.Sentinel): (ST.Done, EMIT.Nothing), # 'zz '
311 (ST.DE_Gray, CH.DE_White): (ST.DE_White2, EMIT.Nothing), # '_ '
312 (ST.DE_Gray, CH.DE_Gray): (ST.DE_Gray, EMIT.Empty), # '__'
313 (ST.DE_Gray, CH.Black): (ST.Black, EMIT.Delim), # '_a'
314 (ST.DE_Gray, CH.Backslash): (ST.Black, EMIT.Delim), # '_\'
315 (ST.DE_Gray, CH.Sentinel): (ST.Done, EMIT.Delim), # 'zz:' IFS=': '
316 (ST.DE_White2, CH.DE_White): (ST.DE_White2, EMIT.Nothing), # '_ '
317 (ST.DE_White2, CH.DE_Gray): (ST.DE_Gray, EMIT.Empty), # '_ _'
318 (ST.DE_White2, CH.Black): (ST.Black, EMIT.Delim), # '_ a'
319 (ST.DE_White2, CH.Backslash): (ST.Backslash, EMIT.Delim), # '_ \'
320 (ST.DE_White2, CH.Sentinel): (ST.Done, EMIT.Delim), # 'zz: ' IFS=': '
321 (ST.Black, CH.DE_White): (ST.DE_White1, EMIT.Part), # 'a '
322 (ST.Black, CH.DE_Gray): (ST.DE_Gray, EMIT.Part), # 'a_'
323 (ST.Black, CH.Black): (ST.Black, EMIT.Nothing), # 'aa'
324 (ST.Black, CH.Backslash): (ST.Backslash, EMIT.Part), # 'a\'
325 (ST.Black, CH.Sentinel): (ST.Done, EMIT.Part), # 'zz' IFS=': '
326
327 # Here we emit an ignored \ and the second character as well.
328 # We're emitting TWO spans here; we don't wait until the subsequent
329 # character. That is OK.
330 #
331 # Problem: if '\ ' is the last one, we don't want to emit a trailing span?
332 # In all other cases we do.
333 (ST.Backslash, CH.DE_White): (ST.Black, EMIT.Escape), # '\ '
334 (ST.Backslash, CH.DE_Gray): (ST.Black, EMIT.Escape), # '\_'
335 (ST.Backslash, CH.Black): (ST.Black, EMIT.Escape), # '\a'
336 # NOTE: second character is a backslash, but new state is ST.Black!
337 (ST.Backslash, CH.Backslash): (ST.Black, EMIT.Escape), # '\\'
338 (ST.Backslash, CH.Sentinel): (ST.Done, EMIT.Escape), # 'zz\'
339}
340
341
342def IfsEdge(state, ch):
343 # type: (state_t, char_kind_t) -> Tuple[state_t, emit_t]
344 """Follow edges of the IFS state machine."""
345 return _IFS_EDGES[state, ch]
346
347
348# Pattern to use libc regexec() to parse NAME, NAME=value, and NAME+=value.
349#
350# We want submatch extraction, which would need a new type of binding, and
351# doing it with libc seems easy enough.
352
353ASSIGN_ARG_RE = '^(' + lexer_def.VAR_NAME_RE + r')((=|\+=)(.*))?$'
354
355# Eggex equivalent:
356#
357# VarName = /
358# [a-z A-Z _ ]
359# [a-z A-Z 0-9 _ ]*
360# /
361#
362# SplitArg = /
363# %begin
364# <capture VarName>
365# (
366# <capture '=' | '+='> <capture dot*>
367# )?
368# %end
369
370# Weird rules for brackets: put ] first
371NOT_BRACKETS = '[^][]*'
372TEST_V_RE = '^(' + lexer_def.VAR_NAME_RE + r')(\[(' + NOT_BRACKETS + ')\])?$'
373
374# NotBracket = / ![ ']' '[' ] /
375#
376# TestV = /
377# %begin
378# <capture VarName>
379# (
380# '[' <capture NotBrackets> ']'
381# )?
382# %end
383# /