core/executor.py

OILS / core / executor.py View on Github | oils.pub

1064 lines, 546 significant

1	"""executor.py - used by the shell evaluator"""
2	from __future__ import print_function
3
4	from errno import EINTR
5
6	from _devbuild.gen.id_kind_asdl import Id
7	from _devbuild.gen.option_asdl import builtin_i, builtin_t
8	from _devbuild.gen.runtime_asdl import RedirValue, trace
9	from _devbuild.gen.syntax_asdl import (
10	command,
11	command_e,
12	CommandSub,
13	CompoundWord,
14	loc,
15	loc_t,
16	word_t,
17	)
18	from builtin import hay_ysh
19	from core import dev
20	from core import error
21	from core.error import e_die, e_die_status
22	from core import process
23	from core import pyos
24	from core import state
25	from core import vm
26	from display import ui
27	from frontend import consts
28	from frontend import lexer
29	from mycpp import mylib
30	from mycpp.mylib import str_switch, log, print_stderr
31	from pylib import os_path
32	from pylib import path_stat
33
34	import posix_ as posix
35	from posix_ import X_OK # translated directly to C macro
36
37	from typing import cast, Dict, List, Tuple, Optional, TYPE_CHECKING
38	if TYPE_CHECKING:
39	from _devbuild.gen.runtime_asdl import (cmd_value, CommandStatus,
40	StatusArray)
41	from _devbuild.gen.syntax_asdl import command_t
42	from builtin import trap_osh
43	from core import optview
44
45	_ = log
46
47
48	def _IsPathExecutable(full_path):
49	# type: (str) -> bool
50	return posix.access(full_path, X_OK) and path_stat.isfile(full_path)
51
52
53	def LookupExecutable(name, path_dirs, exec_required=True):
54	# type: (str, List[str], bool) -> Optional[str]
55	"""
56	Returns either
57	- the name if it's a relative path that exists
58	- the executable name resolved against path_dirs
59	- None if not found
60	"""
61	if len(name) == 0: # special case for "$(true)"
62	return None
63
64	if '/' in name:
65	return name if path_stat.exists(name) else None
66
67	for path_dir in path_dirs:
68	full_path = os_path.join(path_dir, name)
69	if exec_required:
70	found = _IsPathExecutable(full_path)
71	else: # Used by 'source' builtin
72	found = path_stat.isfile(full_path)
73
74	if found:
75	return full_path
76
77	return None
78
79
80	def _RewriteExternToBuiltin(argv):
81	# type: (List[str]) -> builtin_t
82	"""Decide whether to rewrite a command as a builtin.
83
84	This function can have false negatives, but NOT false positives.
85
86	False negative:
87	We could have used the builtin, but used the extern instead
88	False positive like 'cat -v'
89	We tried to use the builtin for a feature it doesn't support! This is a
90	BUG
91
92	SOUND optimizations:
93	cat
94	rm
95
96	TODO sound:
97	mkdir - common in Oils
98	mv - used in autoconf
99	- rename() only
100	- if the files live on different devices, then fall back to extern
101	(requires different logic)
102	ln -s -f -v - no reason to shell out
103
104	# Path operations
105	- readlink -f - yeah actually we should do this, it's a transparent
106	optimization
107	- it just calls realpath
108	- dirname
109	- basename
110
111	Quoting: shopt --set rewrite_name_regex
112	- ls - without args?
113	- Is quoting compatible? May not matter
114	- find
115	- Also need a better quoting mode
116	- wc displays filenames
117
118	YSH: fs can be a new design to take the place of ls and find
119
120	- Starting processes
121	- xargs -P
122
123	Regex:
124	grep egrep fgrep -v -o '^ *+'
125	builtin grep *.py (/d+/) # eggex pattern
126	sed ? Because --regexp-extended is GNU? Maybe implement that
127	builtin sed s (/<capture d+ as month>/, ^"$month")
128
129	Why hidden in OSH? Because regex engines can have MINOR syntax
130	differences, like []] for char classes. But it could be ON in YSH,
131	specifically so you can AVOID those differences!
132
133	Meh: explicit builtin grep / builtin sed is better. Make a note about
134	performance in doc/ref.
135
136	Field selection:
137	awk / cut
138
139	Though be careful of being slower than awk to execute.
140
141	Maybe an alias:
142	b grep
143	b sed
144	b wc
145	b ls
146
147	For anything with possible unsoundness. cat, rm,
148	"""
149	assert len(argv) >= 1, argv # enforced in the executor
150
151	arg0 = argv[0]
152	i = 1
153	n = len(argv)
154	with str_switch(arg0) as case:
155	if case('cat'):
156	while i < n:
157	arg = argv[i]
158
159	# allowed: cat -
160	# allowed: cat -- foo
161	if arg in ('-', '--'):
162	pass
163
164	# commands with flags aren't rewritten
165	elif arg.startswith('-'):
166	return consts.NO_INDEX
167
168	i += 1
169
170	# Every arg was OK
171	return builtin_i.cat
172
173	elif case('rm'):
174	while i < n:
175	arg = argv[i]
176
177	# allowed: rm -- foo
178	# allowed: rm -f foo
179	if arg in ('--', '-f'):
180	pass
181
182	# commands with flags aren't rewritten
183	elif arg.startswith('-'):
184	return consts.NO_INDEX
185
186	i += 1
187
188	return builtin_i.rm
189
190	else:
191	return consts.NO_INDEX
192
193
194	class SearchPath(object):
195	"""For looking up files in $PATH or ENV.PATH"""
196
197	def __init__(self, mem, exec_opts):
198	# type: (state.Mem, optview.Exec) -> None
199	self.mem = mem
200	self.cache = {} # type: Dict[str, str]
201
202	def _GetPath(self):
203	# type: () -> List[str]
204
205	# In YSH, we read from ENV.PATH
206	s = self.mem.env_config.Get('PATH')
207	if s is None:
208	return [] # treat as empty path
209
210	# TODO: Could cache this to avoid split() allocating all the time.
211	return s.split(':')
212
213	def LookupOne(self, name, exec_required=True):
214	# type: (str, bool) -> Optional[str]
215	"""
216	Returns the path itself (if relative path), the resolved path, or None.
217	"""
218	return LookupExecutable(name,
219	self._GetPath(),
220	exec_required=exec_required)
221
222	def LookupReflect(self, name, do_all):
223	# type: (str, bool) -> List[str]
224	"""
225	Like LookupOne(), with an option for 'type -a' to return all paths.
226	"""
227	if len(name) == 0: # special case for "$(true)"
228	return []
229
230	if '/' in name:
231	if _IsPathExecutable(name):
232	return [name]
233	else:
234	return []
235
236	results = [] # type: List[str]
237	for path_dir in self._GetPath():
238	full_path = os_path.join(path_dir, name)
239	if _IsPathExecutable(full_path):
240	results.append(full_path)
241	if not do_all:
242	return results
243
244	return results
245
246	def CachedLookup(self, name):
247	# type: (str) -> Optional[str]
248	#log('name %r', name)
249	if name in self.cache:
250	return self.cache[name]
251
252	full_path = self.LookupOne(name)
253	if full_path is not None:
254	self.cache[name] = full_path
255	return full_path
256
257	def MaybeRemoveEntry(self, name):
258	# type: (str) -> None
259	"""When the file system changes."""
260	mylib.dict_erase(self.cache, name)
261
262	def ClearCache(self):
263	# type: () -> None
264	"""For hash -r."""
265	self.cache.clear()
266
267	def CachedCommands(self):
268	# type: () -> List[str]
269	return self.cache.values()
270
271
272	class _ProcessSubFrame(object):
273	"""To keep track of diff <(cat 1) <(cat 2) > >(tac)"""
274
275	def __init__(self):
276	# type: () -> None
277
278	# These objects appear unconditionally in the main loop, and aren't
279	# commonly used, so we manually optimize [] into None.
280
281	self._to_wait = [] # type: List[process.Process]
282	self._to_close = [] # type: List[int] # file descriptors
283	self._locs = [] # type: List[loc_t]
284	self._modified = False
285
286	def WasModified(self):
287	# type: () -> bool
288	return self._modified
289
290	def Append(self, p, fd, status_loc):
291	# type: (process.Process, int, loc_t) -> None
292	self._modified = True
293
294	self._to_wait.append(p)
295	self._to_close.append(fd)
296	self._locs.append(status_loc)
297
298	def MaybeWaitOnProcessSubs(self, waiter, status_array):
299	# type: (process.Waiter, StatusArray) -> None
300
301	# Wait in the same order that they were evaluated. That seems fine.
302	for fd in self._to_close:
303	posix.close(fd)
304
305	codes = [] # type: List[int]
306	locs = [] # type: List[loc_t]
307	for i, p in enumerate(self._to_wait):
308	#log('waiting for %s', p)
309	st = p.Wait(waiter)
310	codes.append(st)
311	locs.append(self._locs[i])
312
313	status_array.codes = codes
314	status_array.locs = locs
315
316
317	# Big flags for _RunSimpleCommand
318	IS_LAST_CMD = 1 << 1
319	NO_CALL_PROCS = 1 << 2 # command ls suppresses function lookup
320	USE_DEFAULT_PATH = 1 << 3 # for command -p ls changes the path
321
322	# Copied from var.c in dash
323	DEFAULT_PATH = [
324	'/usr/local/sbin', '/usr/local/bin', '/usr/sbin', '/usr/bin', '/sbin',
325	'/bin'
326	]
327
328	_PURITY_STATUS = 5
329
330
331	class PureExecutor(vm._Executor):
332
333	# mycpp needs this duplicate constructor
334	def __init__(
335	self,
336	mem, # type: state.Mem
337	exec_opts, # type: optview.Exec
338	mutable_opts, # type: state.MutableOpts
339	procs, # type: state.Procs
340	hay_state, # type: hay_ysh.HayState
341	builtins, # type: Dict[int, vm._Builtin]
342	tracer, # type: dev.Tracer
343	errfmt # type: ui.ErrorFormatter
344	):
345	vm._Executor.__init__(self, mem, exec_opts, mutable_opts, procs,
346	hay_state, builtins, tracer, errfmt)
347
348	def _RunSimpleCommand(self, arg0, arg0_loc, cmd_val, cmd_st, run_flags):
349	# type: (str, loc_t, cmd_value.Argv, CommandStatus, int) -> int
350
351	call_procs = not (run_flags & NO_CALL_PROCS)
352	if call_procs:
353	proc_val, self_obj = self.procs.GetInvokable(arg0)
354	if proc_val is not None:
355	return self._RunInvokable(proc_val, self_obj, arg0_loc,
356	cmd_val)
357
358	if self.hay_state.Resolve(arg0):
359	return self.RunBuiltin(builtin_i.haynode, cmd_val)
360
361	self.errfmt.Print_(
362	'Command %r not found in pure mode (OILS-ERR-102)' % arg0,
363	arg0_loc)
364	return 127
365
366	def RunBackgroundJob(self, node):
367	# type: (command_t) -> int
368	raise error.Structured(
369	_PURITY_STATUS,
370	"Background jobs aren't allowed in pure mode (OILS-ERR-204)",
371	loc.Command(node))
372
373	def RunPipeline(self, node, status_out):
374	# type: (command.Pipeline, CommandStatus) -> None
375	raise error.Structured(
376	_PURITY_STATUS,
377	"Pipelines aren't allowed in pure mode (OILS-ERR-204)",
378	loc.Command(node))
379
380	def RunSubshell(self, node):
381	# type: (command_t) -> int
382	raise error.Structured(
383	_PURITY_STATUS,
384	"Subshells aren't allowed in pure mode (OILS-ERR-204)",
385	loc.Command(node))
386
387	def CaptureStdout(self, node):
388	# type: (command_t) -> Tuple[int, str]
389	"""
390	Used by io->captureStdout() method, and called by command sub
391	"""
392	return 0, ''
393
394	def Capture3(self, node):
395	# type: (command_t) -> Tuple[int, str, str]
396	"""
397	Used by io->captureAll() method, and called by command sub
398	"""
399	return 0, '', ''
400
401	def RunCommandSub(self, cs_part):
402	# type: (CommandSub) -> str
403	raise error.Structured(
404	_PURITY_STATUS,
405	"Command subs aren't allowed in pure mode (OILS-ERR-204)",
406	loc.WordPart(cs_part))
407
408	def RunProcessSub(self, cs_part):
409	# type: (CommandSub) -> str
410	raise error.Structured(
411	_PURITY_STATUS,
412	"Process subs aren't allowed in pure mode (OILS-ERR-204)",
413	loc.WordPart(cs_part))
414
415	def PushRedirects(self, redirects, err_out):
416	# type: (List[RedirValue], List[int]) -> None
417	pass
418
419	def PopRedirects(self, num_redirects, err_out):
420	# type: (int, List[int]) -> None
421	pass
422
423	def PushProcessSub(self):
424	# type: () -> None
425	pass
426
427	def PopProcessSub(self, compound_st):
428	# type: (StatusArray) -> None
429	pass
430
431
432	class ShellExecutor(vm._Executor):
433	"""An executor combined with the OSH language evaluators in osh/ to create
434	a shell interpreter."""
435
436	def __init__(
437	self,
438	mem, # type: state.Mem
439	exec_opts, # type: optview.Exec
440	mutable_opts, # type: state.MutableOpts
441	procs, # type: state.Procs
442	hay_state, # type: hay_ysh.HayState
443	builtins, # type: Dict[int, vm._Builtin]
444	tracer, # type: dev.Tracer
445	errfmt, # type: ui.ErrorFormatter
446	search_path, # type: SearchPath
447	ext_prog, # type: process.ExternalProgram
448	waiter, # type: process.Waiter
449	job_control, # type: process.JobControl
450	job_list, # type: process.JobList
451	fd_state, # type: process.FdState
452	trap_state, # type: trap_osh.TrapState
453	):
454	# type: (...) -> None
455	vm._Executor.__init__(self, mem, exec_opts, mutable_opts, procs,
456	hay_state, builtins, tracer, errfmt)
457	self.search_path = search_path
458	self.ext_prog = ext_prog
459	self.waiter = waiter
460	self.multi_trace = tracer.multi_trace
461	self.job_control = job_control
462	# sleep 5 & puts a (PID, job#) entry here. And then "jobs" displays it.
463	self.job_list = job_list
464	self.fd_state = fd_state
465	self.trap_state = trap_state
466	self.process_sub_stack = [] # type: List[_ProcessSubFrame]
467	self.clean_frame_pool = [] # type: List[_ProcessSubFrame]
468
469	# When starting a pipeline in the foreground, we need to pass a handle to it
470	# through the evaluation of the last node back to ourselves for execution.
471	# We use this handle to make sure any processes forked for the last part of
472	# the pipeline are placed into the same process group as the rest of the
473	# pipeline. Since there is, by design, only ever one foreground pipeline and
474	# any pipelines started within subshells run in their parent's process
475	# group, we only need one pointer here, not some collection.
476	self.fg_pipeline = None # type: Optional[process.Pipeline]
477
478	tok1 = lexer.DummyToken(Id.Lit_Chars, 'builtin')
479	tok2 = lexer.DummyToken(Id.Lit_Chars, 'cat')
480	self.builtin_cat_words = [CompoundWord([tok1]),
481	CompoundWord([tok2])] # type: List[word_t]
482
483	def _MakeProcess(self, node, inherit_errexit, inherit_errtrace):
484	# type: (command_t, bool, bool) -> process.Process
485	"""Assume we will run the node in another process.
486
487	Return a process.
488	"""
489	UP_node = node
490	if node.tag() == command_e.ControlFlow:
491	node = cast(command.ControlFlow, UP_node)
492	# Pipeline or subshells with control flow are invalid, e.g.:
493	# - break \| less
494	# - continue \| less
495	# - ( return )
496	# NOTE: This could be done at parse time too.
497	if node.keyword.id != Id.ControlFlow_Exit:
498	e_die(
499	'Invalid control flow %r in pipeline / subshell / background'
500	% lexer.TokenVal(node.keyword), node.keyword)
501
502	# NOTE: If ErrExit(), we could be verbose about subprogram errors? This
503	# only really matters when executing 'exit 42', because the child shell
504	# inherits errexit and will be verbose. Other notes:
505	#
506	# - We might want errors to fit on a single line so they don't get
507	# interleaved.
508	# - We could turn the `exit` builtin into a error.FatalRuntime exception
509	# and get this check for "free".
510	thunk = process.SubProgramThunk(self.cmd_ev, node, self.trap_state,
511	self.multi_trace, inherit_errexit,
512	inherit_errtrace)
513	p = process.Process(thunk, self.job_control, self.job_list,
514	self.tracer)
515	return p
516
517	def _RunSimpleCommand(self, arg0, arg0_loc, cmd_val, cmd_st, run_flags):
518	# type: (str, loc_t, cmd_value.Argv, CommandStatus, int) -> int
519	"""Run builtins, functions, external commands.
520
521	Possible variations:
522	- YSH might have different, simpler rules. No special builtins, etc.
523	- YSH might have OILS_PATH = :\| /bin /usr/bin \| or something.
524	- Interpreters might want to define all their own builtins.
525	"""
526
527	builtin_id = consts.LookupAssignBuiltin(arg0)
528	if builtin_id != consts.NO_INDEX:
529	# command readonly is disallowed, for technical reasons. Could relax it
530	# later.
531	self.errfmt.Print_("Simple command can't run assignment builtin",
532	arg0_loc)
533	return 1
534
535	builtin_id = consts.LookupSpecialBuiltin(arg0)
536	if builtin_id != consts.NO_INDEX:
537	cmd_st.show_code = True # this is a "leaf" for errors
538	status = self.RunBuiltin(builtin_id, cmd_val)
539	# TODO: Enable this and fix spec test failures.
540	# Also update _SPECIAL_BUILTINS in osh/builtin.py.
541	#if status != 0:
542	# e_die_status(status, 'special builtin failed')
543	return status
544
545	# Call procs first. Builtins like 'true' can be redefined.
546	call_procs = not (run_flags & NO_CALL_PROCS)
547	if call_procs:
548	proc_val, self_obj = self.procs.GetInvokable(arg0)
549	if proc_val is not None:
550	return self._RunInvokable(proc_val, self_obj, arg0_loc,
551	cmd_val)
552
553	# Notes:
554	# - procs shadow hay names
555	# - hay names shadow normal builtins? Should we limit to CAPS or no?
556	if self.hay_state.Resolve(arg0):
557	return self.RunBuiltin(builtin_i.haynode, cmd_val)
558
559	builtin_id = consts.LookupNormalBuiltin(arg0)
560
561	if self.exec_opts._running_hay():
562	# Hay: limit the builtins that can be run
563	# - declare 'use dialect'
564	# - echo and write for debugging
565	# - no JSON?
566	if builtin_id in (builtin_i.haynode, builtin_i.use, builtin_i.echo,
567	builtin_i.write):
568	cmd_st.show_code = True # this is a "leaf" for errors
569	return self.RunBuiltin(builtin_id, cmd_val)
570
571	self.errfmt.Print_('Unknown command %r while running hay' % arg0,
572	arg0_loc)
573	return 127
574
575	if builtin_id != consts.NO_INDEX:
576	cmd_st.show_code = True # this is a "leaf" for errors
577	return self.RunBuiltin(builtin_id, cmd_val)
578
579	# Maybe rewrite 'cat' as 'builtin cat' !
580	# Don't do it interactively, since that can mess up job control.
581	if (self.exec_opts.rewrite_extern() and
582	not self.exec_opts.interactive()):
583	builtin_id = _RewriteExternToBuiltin(cmd_val.argv)
584	if builtin_id != consts.NO_INDEX:
585	# Run cat in a separate process (#2530) in OSH for compatibility
586	if builtin_id == builtin_i.cat and not self.exec_opts.ysh_rewrite_extern():
587	thunk = process.BuiltinThunk(self, builtin_id, cmd_val)
588	p = process.Process(thunk, self.job_control, self.job_list,
589	self.tracer)
590	status = p.RunProcess(self.waiter, trace.Fork)
591	return status
592	else:
593	return self.RunBuiltin(builtin_id, cmd_val)
594
595	return self.RunExternal(arg0, arg0_loc, cmd_val, cmd_st, run_flags)
596
597	def RunExternal(self, arg0, arg0_loc, cmd_val, cmd_st, run_flags):
598	# type: (str, loc_t, cmd_value.Argv, Optional[CommandStatus], int) -> int
599	environ = self.mem.GetEnv() # Include temporary variables
600
601	if cmd_val.proc_args:
602	e_die(
603	'%r appears to be external. External commands don\'t accept typed args (OILS-ERR-200)'
604	% arg0, cmd_val.proc_args.typed_args.left)
605
606	# Resolve argv[0] BEFORE forking.
607	if run_flags & USE_DEFAULT_PATH:
608	argv0_path = LookupExecutable(arg0, DEFAULT_PATH)
609	else:
610	argv0_path = self.search_path.CachedLookup(arg0)
611	if argv0_path is None:
612	self.errfmt.Print_('Command %r not found (OILS-ERR-100)' % arg0,
613	arg0_loc)
614	return 127
615
616	if self.trap_state.ThisProcessHasTraps():
617	do_fork = True
618	else:
619	do_fork = not cmd_val.is_last_cmd
620
621	# Normal case: ls /
622	if do_fork:
623	thunk = process.ExternalThunk(self.ext_prog, argv0_path, cmd_val,
624	environ)
625	p = process.Process(thunk, self.job_control, self.job_list,
626	self.tracer)
627
628	if self.job_control.Enabled():
629	if self.fg_pipeline is not None:
630	pgid = self.fg_pipeline.ProcessGroupId()
631	# If job control is enabled, this should be true
632	assert pgid != process.INVALID_PGID
633
634	change = process.SetPgid(pgid, self.tracer)
635	self.fg_pipeline = None # clear to avoid confusion in subshells
636	else:
637	change = process.SetPgid(process.OWN_LEADER, self.tracer)
638	p.AddStateChange(change)
639
640	status = p.RunProcess(self.waiter, trace.External(cmd_val.argv))
641
642	# this is close to a "leaf" for errors
643	# problem: permission denied EACCESS prints duplicate messages
644	# TODO: add message command 'ls' failed
645	if cmd_st is not None:
646	cmd_st.show_code = True
647
648	return status
649
650	self.tracer.OnExec(cmd_val.argv)
651
652	# Already forked for pipeline: ls / \| wc -l
653	self.ext_prog.Exec(argv0_path, cmd_val, environ) # NEVER RETURNS
654
655	raise AssertionError('for -Wreturn-type in C++')
656
657	def RunBackgroundJob(self, node):
658	# type: (command_t) -> int
659	"""For & etc."""
660	# Special case for pipeline. There is some evidence here:
661	# https://www.gnu.org/software/libc/manual/html_node/Launching-Jobs.html#Launching-Jobs
662	#
663	# "You can either make all the processes in the process group be children
664	# of the shell process, or you can make one process in group be the
665	# ancestor of all the other processes in that group. The sample shell
666	# program presented in this chapter uses the first approach because it
667	# makes bookkeeping somewhat simpler."
668	UP_node = node
669
670	if UP_node.tag() == command_e.Pipeline:
671	node = cast(command.Pipeline, UP_node)
672	pi = process.Pipeline(self.exec_opts.sigpipe_status_ok(),
673	self.job_control, self.job_list, self.tracer)
674	for child in node.children:
675	p = self._MakeProcess(child, True, self.exec_opts.errtrace())
676	p.Init_ParentPipeline(pi)
677	pi.Add(p)
678
679	pi.StartPipeline(self.waiter)
680	pi.SetBackground()
681	self.mem.last_bg_pid = pi.PidForWait() # for $!
682	job_id = self.job_list.RegisterJob(pi) # show in 'jobs' list
683
684	else:
685	# Problem: to get the 'set -b' behavior of immediate notifications, we
686	# have to register SIGCHLD. But then that introduces race conditions.
687	# If we haven't called Register yet, then we won't know who to notify.
688
689	p = self._MakeProcess(node, True, self.exec_opts.errtrace())
690	if self.job_control.Enabled():
691	p.AddStateChange(
692	process.SetPgid(process.OWN_LEADER, self.tracer))
693
694	p.SetBackground()
695	pid = p.StartProcess(trace.Fork)
696	self.mem.last_bg_pid = p.PidForWait() # for $!
697	job_id = self.job_list.RegisterJob(p) # show in 'jobs' list
698
699	if self.exec_opts.interactive():
700	# Print it like %1 to show it's a job
701	print_stderr('[%%%d] PID %d Started' %
702	(job_id, self.mem.last_bg_pid))
703
704	return 0
705
706	def RunPipeline(self, node, status_out):
707	# type: (command.Pipeline, CommandStatus) -> None
708
709	pi = process.Pipeline(self.exec_opts.sigpipe_status_ok(),
710	self.job_control, self.job_list, self.tracer)
711
712	# initialized with CommandStatus.CreateNull()
713	pipe_locs = [] # type: List[loc_t]
714
715	# First n-1 processes (which is empty when n == 1)
716	n = len(node.children)
717	for i in xrange(n - 1):
718	child = node.children[i]
719
720	# TODO: determine these locations at parse time?
721	pipe_locs.append(loc.Command(child))
722
723	p = self._MakeProcess(child, True, self.exec_opts.errtrace())
724	p.Init_ParentPipeline(pi)
725	pi.Add(p)
726
727	last_child = node.children[n - 1]
728	# Last piece of code is in THIS PROCESS. 'echo foo \| read line; echo $line'
729	pi.AddLast((self.cmd_ev, last_child))
730	pipe_locs.append(loc.Command(last_child))
731
732	with dev.ctx_Tracer(self.tracer, 'pipeline', None):
733	pi.StartPipeline(self.waiter)
734	self.fg_pipeline = pi
735	status_out.pipe_status = pi.RunLastPart(self.waiter, self.fd_state)
736	self.fg_pipeline = None # clear in case we didn't end up forking
737
738	status_out.pipe_locs = pipe_locs
739
740	def RunSubshell(self, node):
741	# type: (command_t) -> int
742	p = self._MakeProcess(node, True, self.exec_opts.errtrace())
743	if self.job_control.Enabled():
744	p.AddStateChange(process.SetPgid(process.OWN_LEADER, self.tracer))
745
746	return p.RunProcess(self.waiter, trace.ForkWait)
747
748	def CaptureStdout(self, node):
749	# type: (command_t) -> Tuple[int, str]
750
751	p = self._MakeProcess(node, self.exec_opts.inherit_errexit(),
752	self.exec_opts.errtrace())
753	# Shell quirk: Command subs remain part of the shell's process group, so we
754	# don't use p.AddStateChange(process.SetPgid(...))
755
756	r, w = posix.pipe()
757	p.AddStateChange(process.StdoutToPipe(r, w))
758
759	p.StartProcess(trace.CommandSub)
760	#log('Command sub started %d', pid)
761
762	chunks = [] # type: List[str]
763	posix.close(w) # not going to write
764	while True:
765	n, err_num = pyos.Read(r, 4096, chunks)
766
767	if n == 0: # EOF
768	break
769
770	elif n > 0:
771	# common shell behavior: remove NUL from stdout
772	chunks[-1] = chunks[-1].replace('\0', '')
773
774	else: # n < 0
775	if err_num == EINTR:
776	pass # retry
777	else:
778	# Like the top level IOError handler
779	e_die_status(
780	2,
781	'Oils I/O error (read): %s' % posix.strerror(err_num))
782
783	posix.close(r)
784
785	status = p.Wait(self.waiter)
786	stdout_str = ''.join(chunks).rstrip('\n')
787
788	return status, stdout_str
789
790	def Capture3(self, node):
791	# type: (command_t) -> Tuple[int, str, str]
792
793	p = self._MakeProcess(node, self.exec_opts.inherit_errexit(),
794	self.exec_opts.errtrace())
795	# Shell quirk: Command subs remain part of the shell's process group, so we
796	# don't use p.AddStateChange(process.SetPgid(...))
797
798	stdout_fd, w = posix.pipe()
799	stderr_fd, w2 = posix.pipe()
800	p.AddStateChange(process.StdoutToPipe(stdout_fd, w))
801	p.AddStateChange(process.StderrToPipe(stderr_fd, w2))
802
803	p.StartProcess(trace.CommandSub)
804	#log('Command sub started %d', pid)
805
806	stdout_chunks = [] # type: List[str]
807	stderr_chunks = [] # type: List[str]
808	posix.close(w) # not going to write
809	posix.close(w2) # not going to write
810	open_fds = [stdout_fd, stderr_fd]
811	while True:
812	fds = pyos.WaitForReading(open_fds)
813
814	# zero outputs mean something went wrong
815	if len(fds) == 0:
816	break
817
818	for fd in fds:
819	if fd == stdout_fd:
820	n, err_num = pyos.Read(fd, 4096, stdout_chunks)
821	else:
822	n, err_num = pyos.Read(fd, 4096, stderr_chunks)
823	if n < 0:
824	if err_num == EINTR:
825	pass # retry
826	else:
827	# Like the top level IOError handler
828	e_die_status(
829	2, 'Oils I/O error (read): %s' %
830	posix.strerror(err_num))
831	elif n == 0: # EOF
832	open_fds.remove(fd)
833
834	if len(open_fds) == 0:
835	break
836
837	posix.close(stdout_fd)
838	posix.close(stderr_fd)
839
840	status = p.Wait(self.waiter)
841	stdout_str = ''.join(stdout_chunks)
842	stderr_str = ''.join(stderr_chunks)
843
844	return status, stdout_str, stderr_str
845
846	def RunCommandSub(self, cs_part):
847	# type: (CommandSub) -> str
848
849	if not self.exec_opts._allow_command_sub():
850	# _allow_command_sub is used in two places. Only one of them turns
851	# off _allow_process_sub
852	if not self.exec_opts._allow_process_sub():
853	why = "status wouldn't be checked (strict_errexit)"
854	else:
855	why = 'eval_unsafe_arith is off'
856
857	e_die("Command subs not allowed here because %s" % why,
858	loc.WordPart(cs_part))
859
860	node = cs_part.child
861
862	# Hack for weird $(<file) construct.
863	# TODO: This should be detected at PARSE time, and turned into
864	# word_part.Slurp.
865	# - All shells that implement it do it as a special # case.
866	# - Then document it under chap-word-lang.md
867	# - In YSH, it could be $[io.slurp('myfile')]
868
869	if node.tag() == command_e.Redirect:
870	redir_node = cast(command.Redirect, node)
871	# Detect '< file'
872	if (len(redir_node.redirects) == 1 and
873	redir_node.redirects[0].op.id == Id.Redir_Less and
874	redir_node.child.tag() == command_e.NoOp):
875
876	# Change it to builtin cat < file.
877	# Blame < because 'builtin cat' has no location
878	blame_tok = redir_node.redirects[0].op
879	node = command.Simple(blame_tok, [], self.builtin_cat_words,
880	None, None, False, redir_node.redirects)
881
882	status, stdout_str = self.CaptureStdout(node)
883
884	# OSH has the concept of aborting in the middle of a WORD. We're not
885	# waiting until the command is over!
886	if self.exec_opts.command_sub_errexit():
887	if status != 0:
888	msg = 'Command Sub exited with status %d' % status
889	raise error.ErrExit(status, msg, loc.WordPart(cs_part))
890
891	else:
892	# POSIX 2.9.1.3: "If there is no command name but the command
893	# contains a command substitution, the command shall complete with
894	# the exit status of the command substitution whose exit status was
895	# the last to be obtained"
896	#
897	# This affects
898	# a=$(false)
899	# $(false) $(exit 42)
900
901	self.cmd_ev.check_command_sub_status = True
902	self.mem.SetLastStatus(status)
903
904	# Runtime errors test case: # $("echo foo > $@")
905	# Why rstrip()?
906	# https://unix.stackexchange.com/questions/17747/why-does-shell-command-substitution-gobble-up-a-trailing-newline-char
907	return stdout_str
908
909	def RunProcessSub(self, cs_part):
910	# type: (CommandSub) -> str
911	"""Process sub creates a forks a process connected to a pipe.
912
913	The pipe is typically passed to another process via a /dev/fd/$FD path.
914
915	Life cycle of a process substitution:
916
917	1. Start with this code
918
919	diff <(seq 3) <(seq 4)
920
921	2. To evaluate the command line, we evaluate every word. The
922	NormalWordEvaluator this method, RunProcessSub(), which does 3 things:
923
924	a. Create a pipe(), getting r and w
925	b. Starts the seq process, which inherits r and w
926	It has a StdoutToPipe() redirect, which means that it dup2(w, 1)
927	and close(r)
928	c. Close the w FD, because neither the shell or 'diff' will write to it.
929	However we must retain 'r', because 'diff' hasn't opened /dev/fd yet!
930	d. We evaluate <(seq 3) to /dev/fd/$r, so "diff" can read from it
931
932	3. Now we're done evaluating every word, so we know the command line of
933	diff, which looks like
934
935	diff /dev/fd/64 /dev/fd/65
936
937	Those are the FDs for the read ends of the pipes we created.
938
939	4. diff inherits a copy of the read end of bot pipes. But it actually
940	calls open() both files passed as argv. (I think this is fine.)
941
942	5. wait() for the diff process.
943
944	6. The shell closes both the read ends of both pipes. Neither us or
945	'diffd' will read again.
946
947	7. The shell waits for both 'seq' processes.
948
949	Related:
950	shopt -s process_sub_fail
951	_process_sub_status
952	"""
953	cs_loc = loc.WordPart(cs_part)
954
955	if not self.exec_opts._allow_process_sub():
956	e_die(
957	"Process subs not allowed here because status wouldn't be checked (strict_errexit)",
958	cs_loc)
959
960	p = self._MakeProcess(cs_part.child, True, self.exec_opts.errtrace())
961
962	r, w = posix.pipe()
963	#log('pipe = %d, %d', r, w)
964
965	op_id = cs_part.left_token.id
966	if op_id == Id.Left_ProcSubIn:
967	# Example: cat < <(head foo.txt)
968	#
969	# The head process should write its stdout to a pipe.
970	redir = process.StdoutToPipe(r,
971	w) # type: process.ChildStateChange
972
973	elif op_id == Id.Left_ProcSubOut:
974	# Example: head foo.txt > >(tac)
975	#
976	# The tac process should read its stdin from a pipe.
977
978	# Note: this example sometimes requires you to hit "enter" in bash and
979	# zsh. WHy?
980	redir = process.StdinFromPipe(r, w)
981
982	else:
983	raise AssertionError()
984
985	p.AddStateChange(redir)
986
987	if self.job_control.Enabled():
988	p.AddStateChange(process.SetPgid(process.OWN_LEADER, self.tracer))
989
990	# Fork, letting the child inherit the pipe file descriptors.
991	p.StartProcess(trace.ProcessSub)
992
993	ps_frame = self.process_sub_stack[-1]
994
995	# Note: bash never waits() on the process, but zsh does. The calling
996	# program needs to read() before we can wait, e.g.
997	# diff <(sort left.txt) <(sort right.txt)
998
999	# After forking, close the end of the pipe we're not using.
1000	if op_id == Id.Left_ProcSubIn:
1001	posix.close(w) # cat < <(head foo.txt)
1002	ps_frame.Append(p, r, cs_loc) # close later
1003	elif op_id == Id.Left_ProcSubOut:
1004	posix.close(r)
1005	#log('Left_ProcSubOut closed %d', r)
1006	ps_frame.Append(p, w, cs_loc) # close later
1007	else:
1008	raise AssertionError()
1009
1010	# Is /dev Linux-specific?
1011	if op_id == Id.Left_ProcSubIn:
1012	return '/dev/fd/%d' % r
1013
1014	elif op_id == Id.Left_ProcSubOut:
1015	return '/dev/fd/%d' % w
1016
1017	else:
1018	raise AssertionError()
1019
1020	def PushRedirects(self, redirects, err_out):
1021	# type: (List[RedirValue], List[int]) -> None
1022	if len(redirects) == 0: # Optimized to avoid allocs
1023	return
1024	self.fd_state.Push(redirects, err_out)
1025
1026	def PopRedirects(self, num_redirects, err_out):
1027	# type: (int, List[int]) -> None
1028	if num_redirects == 0: # Optimized to avoid allocs
1029	return
1030	self.fd_state.Pop(err_out)
1031
1032	def PushProcessSub(self):
1033	# type: () -> None
1034	if len(self.clean_frame_pool):
1035	# Optimized to avoid allocs
1036	new_frame = self.clean_frame_pool.pop()
1037	else:
1038	new_frame = _ProcessSubFrame()
1039	self.process_sub_stack.append(new_frame)
1040
1041	def PopProcessSub(self, compound_st):
1042	# type: (StatusArray) -> None
1043	"""This method is called by a context manager, which means we always
1044	wait() on the way out, which I think is the right thing.
1045
1046	We don't always set _process_sub_status, e.g. if some fatal
1047	error occurs first, but we always wait.
1048	"""
1049	frame = self.process_sub_stack.pop()
1050	if frame.WasModified():
1051	frame.MaybeWaitOnProcessSubs(self.waiter, compound_st)
1052	else:
1053	# Optimized to avoid allocs
1054	self.clean_frame_pool.append(frame)
1055
1056	# Note: the 3 lists in _ProcessSubFrame are hot in our profiles. It would
1057	# be nice to somehow "destroy" them here, rather than letting them become
1058	# garbage that needs to be traced.
1059
1060	# The CommandEvaluator could have a ProcessSubStack, which supports Push(),
1061	# Pop(), and Top() of VALUES rather than GC objects?
1062
1063
1064	# vim: sw=4