1 | #!/usr/bin/env python2
|
2 | """Consts.py."""
|
3 | from __future__ import print_function
|
4 |
|
5 | from _devbuild.gen.types_asdl import (redir_arg_type_e, redir_arg_type_t,
|
6 | bool_arg_type_t, opt_group_i)
|
7 | from _devbuild.gen.id_kind_asdl import Id, Id_t, Kind_t
|
8 | from frontend import builtin_def
|
9 | from frontend import lexer_def
|
10 | from frontend import option_def
|
11 |
|
12 | from typing import Tuple, Optional, TYPE_CHECKING
|
13 | if TYPE_CHECKING:
|
14 | from _devbuild.gen.option_asdl import option_t, builtin_t
|
15 |
|
16 | NO_INDEX = 0 # for Resolve
|
17 |
|
18 | # Used as consts::STRICT_ALL, etc. Do it explicitly to satisfy MyPy.
|
19 | STRICT_ALL = option_def.STRICT_ALL
|
20 | YSH_UPGRADE = option_def.YSH_UPGRADE
|
21 | YSH_ALL = option_def.YSH_ALL
|
22 | DEFAULT_TRUE = option_def.DEFAULT_TRUE
|
23 |
|
24 | PARSE_OPTION_NUMS = option_def.PARSE_OPTION_NUMS
|
25 |
|
26 | SET_OPTION_NUMS = [
|
27 | opt.index for opt in option_def._SORTED if opt.builtin == 'set'
|
28 | ]
|
29 | SET_OPTION_NAMES = [
|
30 | opt.name for opt in option_def._SORTED if opt.builtin == 'set'
|
31 | ]
|
32 |
|
33 | SHOPT_OPTION_NUMS = [
|
34 | opt.index for opt in option_def._SORTED if opt.builtin == 'shopt'
|
35 | ]
|
36 | SHOPT_OPTION_NAMES = [
|
37 | opt.name for opt in option_def._SORTED if opt.builtin == 'shopt'
|
38 | ]
|
39 |
|
40 | VISIBLE_SHOPT_NUMS = option_def.VISIBLE_SHOPT_NUMS # used to print
|
41 |
|
42 | BUILTIN_NAMES = builtin_def.BUILTIN_NAMES # Used by builtin_comp.py
|
43 |
|
44 | # Keywords for introspection with bash 'compgen' and 'type'
|
45 | OSH_KEYWORD_NAMES = [name for _, name, _ in lexer_def.KEYWORDS]
|
46 | OSH_KEYWORD_NAMES.append('{') # not handled by our lexer
|
47 | OSH_KEYWORD_NAMES.append('=') # YSH keyword not handled by our lexer
|
48 |
|
49 | # bash considers these closing delimiters keywords
|
50 | OSH_KEYWORD_NAMES.append('}')
|
51 | OSH_KEYWORD_NAMES.append(']]')
|
52 |
|
53 |
|
54 | def GetKind(id_):
|
55 | # type: (Id_t) -> Kind_t
|
56 | """To make coarse-grained parsing decisions."""
|
57 |
|
58 | from _devbuild.gen.id_kind import ID_TO_KIND # break circular dep
|
59 | return ID_TO_KIND[id_]
|
60 |
|
61 |
|
62 | def BoolArgType(id_):
|
63 | # type: (Id_t) -> bool_arg_type_t
|
64 |
|
65 | from _devbuild.gen.id_kind import BOOL_ARG_TYPES # break circular dep
|
66 | return BOOL_ARG_TYPES[id_]
|
67 |
|
68 |
|
69 | #
|
70 | # Redirect Tables associated with IDs
|
71 | #
|
72 |
|
73 | REDIR_DEFAULT_FD = {
|
74 | # filename
|
75 | Id.Redir_Less: 0, # cat <input.txt means cat 0<input.txt
|
76 | Id.Redir_Great: 1,
|
77 | Id.Redir_DGreat: 1,
|
78 | Id.Redir_Clobber: 1,
|
79 | Id.Redir_LessGreat: 0, # 'exec <> foo' opens a file with read/write
|
80 | # bash &> and &>>
|
81 | Id.Redir_AndGreat: 1,
|
82 | Id.Redir_AndDGreat: 1,
|
83 |
|
84 | # descriptor
|
85 | Id.Redir_GreatAnd: 1, # echo >&2 means echo 1>&2
|
86 | Id.Redir_LessAnd: 0, # echo <&3 means echo 0<&3, I think
|
87 | Id.Redir_TLess: 0, # here word
|
88 |
|
89 | # here docs included
|
90 | Id.Redir_DLess: 0,
|
91 | Id.Redir_DLessDash: 0,
|
92 | }
|
93 |
|
94 | REDIR_ARG_TYPES = {
|
95 | # filename
|
96 | Id.Redir_Less: redir_arg_type_e.Path,
|
97 | Id.Redir_Great: redir_arg_type_e.Path,
|
98 | Id.Redir_DGreat: redir_arg_type_e.Path,
|
99 | Id.Redir_Clobber: redir_arg_type_e.Path,
|
100 | Id.Redir_LessGreat: redir_arg_type_e.Path,
|
101 | # bash &> and &>>
|
102 | Id.Redir_AndGreat: redir_arg_type_e.Path,
|
103 | Id.Redir_AndDGreat: redir_arg_type_e.Path,
|
104 |
|
105 | # descriptor
|
106 | Id.Redir_GreatAnd: redir_arg_type_e.Desc,
|
107 | Id.Redir_LessAnd: redir_arg_type_e.Desc,
|
108 |
|
109 | # Note: here docs aren't included
|
110 | }
|
111 |
|
112 |
|
113 | def RedirArgType(id_):
|
114 | # type: (Id_t) -> redir_arg_type_t
|
115 | return REDIR_ARG_TYPES[id_]
|
116 |
|
117 |
|
118 | def RedirDefaultFd(id_):
|
119 | # type: (Id_t) -> int
|
120 | return REDIR_DEFAULT_FD[id_]
|
121 |
|
122 |
|
123 | #
|
124 | # Builtins
|
125 | #
|
126 |
|
127 | _BUILTIN_DICT = builtin_def.BuiltinDict()
|
128 |
|
129 |
|
130 | def LookupSpecialBuiltin(argv0):
|
131 | # type: (str) -> builtin_t
|
132 | """Is it a special builtin?"""
|
133 | b = _BUILTIN_DICT.get(argv0)
|
134 | if b and b.kind == 'special':
|
135 | return b.index
|
136 | else:
|
137 | return NO_INDEX
|
138 |
|
139 |
|
140 | def LookupAssignBuiltin(argv0):
|
141 | # type: (str) -> builtin_t
|
142 | """Is it an assignment builtin?"""
|
143 | b = _BUILTIN_DICT.get(argv0)
|
144 | if b and b.kind == 'assign':
|
145 | return b.index
|
146 | else:
|
147 | return NO_INDEX
|
148 |
|
149 |
|
150 | def LookupNormalBuiltin(argv0):
|
151 | # type: (str) -> builtin_t
|
152 | """Is it any other builtin?"""
|
153 | b = _BUILTIN_DICT.get(argv0)
|
154 | if b and b.kind == 'normal':
|
155 | return b.index
|
156 | else:
|
157 | return NO_INDEX
|
158 |
|
159 |
|
160 | def OptionName(opt_num):
|
161 | # type: (option_t) -> str
|
162 | """Get the name from an index."""
|
163 | return option_def.OPTION_NAMES[opt_num]
|
164 |
|
165 |
|
166 | OPTION_GROUPS = {
|
167 | 'strict:all': opt_group_i.StrictAll,
|
168 | 'ysh:upgrade': opt_group_i.YshUpgrade,
|
169 | 'ysh:all': opt_group_i.YshAll,
|
170 |
|
171 | # Aliases to deprecate
|
172 | 'oil:upgrade': opt_group_i.YshUpgrade,
|
173 | 'oil:all': opt_group_i.YshAll,
|
174 | }
|
175 |
|
176 |
|
177 | def OptionGroupNum(s):
|
178 | # type: (str) -> int
|
179 | return OPTION_GROUPS.get(s, NO_INDEX) # 0 for not found
|
180 |
|
181 |
|
182 | _OPTION_DICT = option_def.OptionDict()
|
183 |
|
184 |
|
185 | def OptionNum(s):
|
186 | # type: (str) -> int
|
187 | """
|
188 | Only considers implemented options.
|
189 | """
|
190 | pair = _OPTION_DICT.get(s)
|
191 | if pair is None:
|
192 | return 0
|
193 | num, impl = pair
|
194 | return num if impl else 0 # 0 means not found
|
195 |
|
196 |
|
197 | def UnimplOptionNum(s):
|
198 | # type: (str) -> int
|
199 | pair = _OPTION_DICT.get(s)
|
200 | if pair is None:
|
201 | return 0
|
202 | num, impl = pair
|
203 | return 0 if impl else num # 0 means not found
|
204 |
|
205 |
|
206 | _CONTROL_FLOW_NAMES = [name for _, name, _ in lexer_def.CONTROL_FLOW]
|
207 | _CONTROL_FLOW_LOOKUP = {}
|
208 | for _, name, id_ in lexer_def.CONTROL_FLOW:
|
209 | _CONTROL_FLOW_LOOKUP[id_] = name
|
210 |
|
211 |
|
212 | def ControlFlowName(id_):
|
213 | # type: (int) -> str
|
214 | """For tracing"""
|
215 | return _CONTROL_FLOW_LOOKUP[id_]
|
216 |
|
217 |
|
218 | def IsControlFlow(name):
|
219 | # type: (str) -> bool
|
220 | return name in _CONTROL_FLOW_NAMES
|
221 |
|
222 |
|
223 | def IsKeyword(name):
|
224 | # type: (str) -> bool
|
225 | return name in OSH_KEYWORD_NAMES
|
226 |
|
227 |
|
228 | #
|
229 | # osh/prompt.py and osh/word_compile.py
|
230 | #
|
231 |
|
232 | _ONE_CHAR_C = {
|
233 | '0': '\0',
|
234 | 'a': '\a',
|
235 | 'b': '\b',
|
236 | 'e': '\x1b',
|
237 | 'E': '\x1b',
|
238 | 'f': '\f',
|
239 | 'n': '\n',
|
240 | 'r': '\r',
|
241 | 't': '\t',
|
242 | 'v': '\v',
|
243 | '\\': '\\',
|
244 | "'": "'", # for $'' only, not echo -e
|
245 | '"': '"', # not sure why this is escaped within $''
|
246 | '/': '/', # for JSON \/ only
|
247 | }
|
248 |
|
249 |
|
250 | def LookupCharC(c):
|
251 | # type: (str) -> str
|
252 | """Fatal if not present."""
|
253 | return _ONE_CHAR_C[c]
|
254 |
|
255 |
|
256 | # NOTE: Prompts chars and printf are inconsistent, e.g. \E is \e in printf, but
|
257 | # not in PS1.
|
258 | _ONE_CHAR_PROMPT = {
|
259 | 'a': '\a',
|
260 | 'e': '\x1b',
|
261 | 'r': '\r',
|
262 | 'n': '\n',
|
263 | '\\': '\\',
|
264 | }
|
265 |
|
266 |
|
267 | def LookupCharPrompt(c):
|
268 | # type: (str) -> Optional[str]
|
269 | """Returns None if not present."""
|
270 | return _ONE_CHAR_PROMPT.get(c)
|
271 |
|
272 |
|
273 | #
|
274 | # Constants used by osh/split.py
|
275 | #
|
276 |
|
277 | # IFS splitting is complicated in general. We handle it with three concepts:
|
278 | #
|
279 | # - CH.* - Kinds of characters (edge labels)
|
280 | # - ST.* - States (node labels)
|
281 | # - EMIT.* Actions
|
282 | #
|
283 | # The Split() loop below classifies characters, follows state transitions, and
|
284 | # emits spans. A span is a (ignored Bool, end_index Int) pair.
|
285 |
|
286 | # As an example, consider this string:
|
287 | # 'a _ b'
|
288 | #
|
289 | # The character classes are:
|
290 | #
|
291 | # a ' ' _ ' ' b
|
292 | # Black DE_White DE_Gray DE_White Black
|
293 | #
|
294 | # The states are:
|
295 | #
|
296 | # a ' ' _ ' ' b
|
297 | # Black DE_White1 DE_Gray DE_White2 Black
|
298 | #
|
299 | # DE_White2 is whitespace that follows a "gray" non-whitespace IFS character.
|
300 | #
|
301 | # The spans emitted are:
|
302 | #
|
303 | # (part 'a', ignored ' _ ', part 'b')
|
304 |
|
305 | # SplitForRead() will check if the last two spans are a \ and \\n. Easy.
|
306 |
|
307 | # Shorter names for state machine enums
|
308 | from _devbuild.gen.runtime_asdl import state_t, emit_t, char_kind_t
|
309 | from _devbuild.gen.runtime_asdl import emit_i as EMIT
|
310 | from _devbuild.gen.runtime_asdl import char_kind_i as CH
|
311 | from _devbuild.gen.runtime_asdl import state_i as ST
|
312 |
|
313 | _IFS_EDGES = {
|
314 | # Whitespace should have been stripped
|
315 | (ST.Start, CH.DE_White): (ST.Invalid, EMIT.Nothing), # ' '
|
316 | (ST.Start, CH.DE_Gray): (ST.DE_Gray, EMIT.Empty), # '_'
|
317 | (ST.Start, CH.Black): (ST.Black, EMIT.Nothing), # 'a'
|
318 | (ST.Start, CH.Backslash): (ST.Backslash, EMIT.Nothing), # '\'
|
319 | (ST.Start, CH.Sentinel): (ST.Done, EMIT.Nothing), # ''
|
320 | (ST.DE_White1, CH.DE_White): (ST.DE_White1, EMIT.Nothing), # ' '
|
321 | (ST.DE_White1, CH.DE_Gray): (ST.DE_Gray, EMIT.Nothing), # ' _'
|
322 | (ST.DE_White1, CH.Black): (ST.Black, EMIT.Delim), # ' a'
|
323 | (ST.DE_White1, CH.Backslash): (ST.Backslash, EMIT.Delim), # ' \'
|
324 | # Ignore trailing IFS whitespace too. This is necessary for the case:
|
325 | # IFS=':' ; read x y z <<< 'a : b : c :'.
|
326 | (ST.DE_White1, CH.Sentinel): (ST.Done, EMIT.Nothing), # 'zz '
|
327 | (ST.DE_Gray, CH.DE_White): (ST.DE_White2, EMIT.Nothing), # '_ '
|
328 | (ST.DE_Gray, CH.DE_Gray): (ST.DE_Gray, EMIT.Empty), # '__'
|
329 | (ST.DE_Gray, CH.Black): (ST.Black, EMIT.Delim), # '_a'
|
330 | (ST.DE_Gray, CH.Backslash): (ST.Black, EMIT.Delim), # '_\'
|
331 | (ST.DE_Gray, CH.Sentinel): (ST.Done, EMIT.Delim), # 'zz:' IFS=': '
|
332 | (ST.DE_White2, CH.DE_White): (ST.DE_White2, EMIT.Nothing), # '_ '
|
333 | (ST.DE_White2, CH.DE_Gray): (ST.DE_Gray, EMIT.Empty), # '_ _'
|
334 | (ST.DE_White2, CH.Black): (ST.Black, EMIT.Delim), # '_ a'
|
335 | (ST.DE_White2, CH.Backslash): (ST.Backslash, EMIT.Delim), # '_ \'
|
336 | (ST.DE_White2, CH.Sentinel): (ST.Done, EMIT.Delim), # 'zz: ' IFS=': '
|
337 | (ST.Black, CH.DE_White): (ST.DE_White1, EMIT.Part), # 'a '
|
338 | (ST.Black, CH.DE_Gray): (ST.DE_Gray, EMIT.Part), # 'a_'
|
339 | (ST.Black, CH.Black): (ST.Black, EMIT.Nothing), # 'aa'
|
340 | (ST.Black, CH.Backslash): (ST.Backslash, EMIT.Part), # 'a\'
|
341 | (ST.Black, CH.Sentinel): (ST.Done, EMIT.Part), # 'zz' IFS=': '
|
342 |
|
343 | # Here we emit an ignored \ and the second character as well.
|
344 | # We're emitting TWO spans here; we don't wait until the subsequent
|
345 | # character. That is OK.
|
346 | #
|
347 | # Problem: if '\ ' is the last one, we don't want to emit a trailing span?
|
348 | # In all other cases we do.
|
349 | (ST.Backslash, CH.DE_White): (ST.Black, EMIT.Escape), # '\ '
|
350 | (ST.Backslash, CH.DE_Gray): (ST.Black, EMIT.Escape), # '\_'
|
351 | (ST.Backslash, CH.Black): (ST.Black, EMIT.Escape), # '\a'
|
352 | # NOTE: second character is a backslash, but new state is ST.Black!
|
353 | (ST.Backslash, CH.Backslash): (ST.Black, EMIT.Escape), # '\\'
|
354 | (ST.Backslash, CH.Sentinel): (ST.Done, EMIT.Escape), # 'zz\'
|
355 | }
|
356 |
|
357 |
|
358 | def IfsEdge(state, ch):
|
359 | # type: (state_t, char_kind_t) -> Tuple[state_t, emit_t]
|
360 | """Follow edges of the IFS state machine."""
|
361 | return _IFS_EDGES[state, ch]
|
362 |
|
363 |
|
364 | # Pattern to use libc regexec() to parse NAME, NAME=value, and NAME+=value.
|
365 | #
|
366 | # We want submatch extraction, which would need a new type of binding, and
|
367 | # doing it with libc seems easy enough.
|
368 |
|
369 | ASSIGN_ARG_RE = '^(' + lexer_def.VAR_NAME_RE + r')((=|\+=)(.*))?$'
|
370 |
|
371 | # Eggex equivalent:
|
372 | #
|
373 | # VarName = /
|
374 | # [a-z A-Z _ ]
|
375 | # [a-z A-Z 0-9 _ ]*
|
376 | # /
|
377 | #
|
378 | # SplitArg = /
|
379 | # %begin
|
380 | # <capture VarName>
|
381 | # (
|
382 | # <capture '=' | '+='> <capture dot*>
|
383 | # )?
|
384 | # %end
|
385 |
|
386 | # Weird rules for brackets: put ] first
|
387 | NOT_BRACKETS = '[^][]*'
|
388 | TEST_V_RE = '^(' + lexer_def.VAR_NAME_RE + r')(\[(' + NOT_BRACKETS + ')\])?$'
|
389 |
|
390 | # NotBracket = / ![ ']' '[' ] /
|
391 | #
|
392 | # TestV = /
|
393 | # %begin
|
394 | # <capture VarName>
|
395 | # (
|
396 | # '[' <capture NotBrackets> ']'
|
397 | # )?
|
398 | # %end
|
399 | # /
|