OILS / builtin / read_osh.py View on Github | oils.pub

530 lines, 311 significant
1from __future__ import print_function
2
3from errno import EINTR
4
5from _devbuild.gen import arg_types
6from _devbuild.gen.runtime_asdl import (span_e, cmd_value)
7from _devbuild.gen.syntax_asdl import source, loc_t
8from _devbuild.gen.value_asdl import value, LeftName
9from core import alloc
10from core import error
11from core.error import e_die
12from core import pyos
13from core import pyutil
14from core import state
15from display import ui
16from core import vm
17from frontend import flag_util
18from frontend import reader
19from frontend import typed_args
20from mycpp import mops
21from mycpp import mylib
22from mycpp.mylib import log, STDIN_FILENO
23
24import posix_ as posix
25
26from typing import Tuple, List, Any, TYPE_CHECKING
27if TYPE_CHECKING:
28 from _devbuild.gen.runtime_asdl import span_t
29 from frontend.parse_lib import ParseContext
30 from frontend import args
31 from osh.cmd_eval import CommandEvaluator
32 from osh.split import SplitContext
33
34_ = log
35
36# The read builtin splits using IFS.
37#
38# Summary:
39# - Split with IFS, except \ can escape them! This is different than the
40# algorithm for splitting words (at least the way I've represented it.)
41
42# Bash manual:
43# - If there are more words than names, the remaining words and their
44# intervening delimiters are assigned to the last name.
45# - If there are fewer words read from the input stream than names, the
46# remaining names are assigned empty values.
47# - The characters in the value of the IFS variable are used to split the line
48# into words using the same rules the shell uses for expansion (described
49# above in Word Splitting).
50# - The backslash character '\' may be used to remove any special meaning for
51# the next character read and for line continuation.
52
53
54def _AppendParts(
55 s, # type: str
56 spans, # type: List[Tuple[span_t, int]]
57 max_results, # type: int
58 join_next, # type: bool
59 parts, # type: List[mylib.BufWriter]
60):
61 # type: (...) -> Tuple[bool, bool]
62 """Append to 'parts', for the 'read' builtin.
63
64 Similar to _SpansToParts in osh/split.py
65
66 Args:
67 s: The original string
68 spans: List of (span, end_index)
69 max_results: the maximum number of parts we want
70 join_next: Whether to join the next span to the previous part. This
71 happens in two cases:
72 - when we have '\ '
73 - and when we have more spans # than max_results.
74 """
75 start_index = 0
76 # If the last span was black, and we get a backslash, set join_next to merge
77 # two black spans.
78 last_span_was_black = False
79
80 for span_type, end_index in spans:
81 if span_type == span_e.Black:
82 if join_next and len(parts):
83 parts[-1].write(s[start_index:end_index])
84 join_next = False
85 else:
86 buf = mylib.BufWriter()
87 buf.write(s[start_index:end_index])
88 parts.append(buf)
89 last_span_was_black = True
90
91 elif span_type == span_e.Delim:
92 if join_next:
93 parts[-1].write(s[start_index:end_index])
94 join_next = False
95 last_span_was_black = False
96
97 elif span_type == span_e.Backslash:
98 if last_span_was_black:
99 join_next = True
100 last_span_was_black = False
101
102 if max_results and len(parts) >= max_results:
103 join_next = True
104
105 start_index = end_index
106
107 done = True
108 if len(spans):
109 #log('%s %s', s, spans)
110 #log('%s', spans[-1])
111 last_span_type, _ = spans[-1]
112 if last_span_type == span_e.Backslash:
113 done = False
114
115 #log('PARTS %s', parts)
116 return done, join_next
117
118
119#
120# Three read() wrappers for 'read' builtin that RunPendingTraps: _ReadN,
121# _ReadPortion, and ReadLineSlowly
122#
123
124
125def _ReadN(fd, num_bytes, cmd_ev):
126 # type: (int, int, CommandEvaluator) -> str
127 chunks = [] # type: List[str]
128 bytes_left = num_bytes
129 while bytes_left > 0:
130 n, err_num = pyos.Read(fd, bytes_left, chunks)
131
132 if n < 0:
133 if err_num == EINTR:
134 cmd_ev.RunPendingTraps()
135 # retry after running traps
136 else:
137 raise pyos.ReadError(err_num)
138
139 elif n == 0: # EOF
140 break
141
142 else:
143 bytes_left -= n
144
145 return ''.join(chunks)
146
147
148def _ReadPortion(fd, delim_byte, max_chars, allow_escape, cmd_ev):
149 # type: (int, int, int, bool, CommandEvaluator) -> Tuple[str, bool]
150 """Read a portion of filedescriptor fd.
151
152 Reads until delimiter or max_chars, which ever comes first. Will ignore
153 max_chars if it's set to -1.
154
155 The delimiter is not included in the result.
156 """
157 ch_array = [] # type: List[int]
158 eof = False
159
160 chars_read = 0
161 backslash = False
162 while True:
163 if max_chars >= 0 and chars_read >= max_chars:
164 break
165 ch, err_num = pyos.ReadByte(fd)
166 if ch < 0:
167 if err_num == EINTR:
168 cmd_ev.RunPendingTraps()
169 # retry after running traps
170 else:
171 raise pyos.ReadError(err_num)
172
173 elif ch == pyos.EOF_SENTINEL:
174 eof = True
175 break
176
177 elif backslash:
178 backslash = False
179 if ch == pyos.NEWLINE_CH:
180 continue
181 ch_array.append(pyos.BACKSLASH_CH)
182 ch_array.append(ch)
183 elif allow_escape and ch == pyos.BACKSLASH_CH:
184 backslash = True
185 continue
186
187 elif ch == delim_byte:
188 break
189
190 elif ch == 0:
191 # Quirk of most shells except zsh: they ignore NUL bytes!
192 pass
193
194 else:
195 ch_array.append(ch)
196
197 chars_read += 1
198
199 return pyutil.ChArrayToString(ch_array), eof
200
201
202def ReadLineSlowly(cmd_ev, with_eol=True):
203 # type: (CommandEvaluator, bool) -> Tuple[str, bool]
204 """Read a line from stdin, unbuffered
205
206 Used by mapfile and read --raw-line.
207
208 sys.stdin.readline() in Python has its own buffering which is incompatible
209 with shell semantics. dash, mksh, and zsh all read a single byte at a time
210 with read(0, 1).
211 """
212 ch_array = [] # type: List[int]
213 eof = False
214 is_first_byte = True
215 while True:
216 ch, err_num = pyos.ReadByte(0)
217 #log(' ch %d', ch)
218
219 if ch < 0:
220 if err_num == EINTR:
221 cmd_ev.RunPendingTraps()
222 # retry after running traps
223 else:
224 raise pyos.ReadError(err_num)
225
226 elif ch == pyos.EOF_SENTINEL:
227 if is_first_byte:
228 eof = True
229 break
230
231 elif ch == pyos.NEWLINE_CH:
232 if with_eol:
233 ch_array.append(ch)
234 break
235
236 else:
237 ch_array.append(ch)
238
239 is_first_byte = False
240
241 return pyutil.ChArrayToString(ch_array), eof
242
243
244def ReadAll():
245 # type: () -> str
246 """Read all of stdin.
247
248 Similar to command sub in core/executor.py.
249 """
250 chunks = [] # type: List[str]
251 while True:
252 n, err_num = pyos.Read(0, 4096, chunks)
253
254 if n < 0:
255 if err_num == EINTR:
256 # Retry only. Like read --line (and command sub), read --all
257 # doesn't run traps. It would be a bit weird to run every 4096
258 # bytes.
259 pass
260 else:
261 raise pyos.ReadError(err_num)
262
263 elif n == 0: # EOF
264 break
265
266 return ''.join(chunks)
267
268
269class ctx_TermAttrs(object):
270
271 def __init__(self, fd, local_modes):
272 # type: (int, int) -> None
273 self.fd = fd
274
275 # We change term_attrs[3] in Python, which is lflag "local modes"
276 self.orig_local_modes, self.term_attrs = pyos.PushTermAttrs(
277 fd, local_modes)
278
279 def __enter__(self):
280 # type: () -> None
281 pass
282
283 def __exit__(self, type, value, traceback):
284 # type: (Any, Any, Any) -> None
285 pyos.PopTermAttrs(self.fd, self.orig_local_modes, self.term_attrs)
286
287
288class Read(vm._Builtin):
289
290 def __init__(
291 self,
292 splitter, # type: SplitContext
293 mem, # type: state.Mem
294 parse_ctx, # type: ParseContext
295 cmd_ev, # type: CommandEvaluator
296 errfmt, # type: ui.ErrorFormatter
297 ):
298 # type: (...) -> None
299 self.splitter = splitter
300 self.mem = mem
301 self.parse_ctx = parse_ctx
302 self.cmd_ev = cmd_ev
303 self.errfmt = errfmt
304 self.stdin_ = mylib.Stdin()
305
306 # Was --qsn, might be restored as --j8-word or --j8-line
307 if 0:
308 #from data_lang import qsn_native
309 def _MaybeDecodeLine(self, line):
310 # type: (str) -> str
311 """Raises error.Parse if line isn't valid."""
312
313 # Lines that don't start with a single quote aren't QSN. They may
314 # contain a single quote internally, like:
315 #
316 # Fool's Gold
317 if not line.startswith("'"):
318 return line
319
320 arena = self.parse_ctx.arena
321 line_reader = reader.StringLineReader(line, arena)
322 lexer = self.parse_ctx.MakeLexer(line_reader)
323
324 # The parser only yields valid tokens:
325 # Char_OneChar, Char_Hex, Char_UBraced
326 # So we can use word_compile.EvalCStringToken, which is also used for
327 # $''.
328 # Important: we don't generate Id.Unknown_Backslash because that is valid
329 # in echo -e. We just make it Id.Unknown_Tok?
330
331 # TODO: read location info should know about stdin, and redirects, and
332 # pipelines?
333 with alloc.ctx_SourceCode(arena, source.Stdin('')):
334 #tokens = qsn_native.Parse(lexer)
335 pass
336 #tmp = [word_compile.EvalCStringToken(t) for t in tokens]
337 #return ''.join(tmp)
338 return ''
339
340 def Run(self, cmd_val):
341 # type: (cmd_value.Argv) -> int
342 try:
343 status = self._Run(cmd_val)
344 except pyos.ReadError as e: # different paths for read -d, etc.
345 # don't quote code since YSH errexit will likely quote
346 self.errfmt.PrintMessage("Oils read error: %s" %
347 posix.strerror(e.err_num))
348 status = 1
349 except (IOError, OSError) as e: # different paths for read -d, etc.
350 self.errfmt.PrintMessage("Oils read I/O error: %s" %
351 pyutil.strerror(e))
352 status = 1
353 return status
354
355 def _ReadYsh(self, arg, arg_r, cmd_val):
356 # type: (arg_types.read, args.Reader, cmd_value.Argv) -> int
357 """
358 Usage:
359
360 read --all # sets _reply
361 read --all (&x) # sets x
362
363 Invalid for now:
364
365 read (&x) # YSH doesn't have token splitting
366 # we probably want read --row too
367 """
368 place = None # type: value.Place
369
370 if cmd_val.proc_args: # read --flag (&x)
371 rd = typed_args.ReaderForProc(cmd_val)
372 place = rd.PosPlace()
373 rd.Done()
374
375 blame_loc = cmd_val.proc_args.typed_args.left # type: loc_t
376
377 else: # read --flag
378 var_name = '_reply'
379
380 #log('VAR %s', var_name)
381 blame_loc = cmd_val.arg_locs[0]
382 place = value.Place(LeftName(var_name, blame_loc),
383 self.mem.CurrentFrame())
384
385 next_arg, next_loc = arg_r.Peek2()
386 if next_arg is not None:
387 raise error.Usage('got extra argument', next_loc)
388
389 num_bytes = mops.BigTruncate(arg.num_bytes)
390 if num_bytes != -1: # read --num-bytes
391 contents = _ReadN(STDIN_FILENO, num_bytes, self.cmd_ev)
392 status = 0
393
394 elif arg.raw_line: # read --raw-line is unbuffered
395 contents, eof = ReadLineSlowly(self.cmd_ev, with_eol=arg.with_eol)
396 #log('EOF %s', eof)
397 #status = 1 if eof else 0
398 status = 1 if eof else 0
399
400 elif arg.all: # read --all
401 contents = ReadAll()
402 status = 0
403
404 else:
405 raise AssertionError()
406
407 self.mem.SetPlace(place, value.Str(contents), blame_loc)
408 return status
409
410 def _Run(self, cmd_val):
411 # type: (cmd_value.Argv) -> int
412 attrs, arg_r = flag_util.ParseCmdVal('read',
413 cmd_val,
414 accept_typed_args=True)
415 arg = arg_types.read(attrs.attrs)
416 names = arg_r.Rest()
417 input_fd = STDIN_FILENO
418 if arg.u != mops.MINUS_ONE:
419 input_fd = mops.BigTruncate(arg.u)
420
421 if arg.raw_line or arg.all or mops.BigTruncate(arg.num_bytes) != -1:
422 return self._ReadYsh(arg, arg_r, cmd_val)
423
424 if cmd_val.proc_args:
425 raise error.Usage(
426 "doesn't accept typed args without --all, or --num-bytes",
427 cmd_val.proc_args.typed_args.left)
428
429 if arg.t >= 0.0:
430 if arg.t != 0.0:
431 e_die("read -t isn't implemented (except t=0)")
432 else:
433 return 0 if pyos.InputAvailable(STDIN_FILENO) else 1
434
435 bits = 0
436 if posix.isatty(input_fd):
437 # -d and -n should be unbuffered
438 if arg.d is not None or mops.BigTruncate(arg.n) >= 0:
439 bits |= pyos.TERM_ICANON
440 if arg.s: # silent
441 bits |= pyos.TERM_ECHO
442
443 if arg.p is not None: # only if tty
444 mylib.Stderr().write(arg.p)
445 if bits == 0:
446 status = self._Read(arg, names, input_fd)
447 else:
448 # should we use input_fd here as well?
449 with ctx_TermAttrs(input_fd, ~bits):
450 status = self._Read(arg, names, input_fd)
451 return status
452
453 def _Read(self, arg, names, input_fd):
454 # type: (arg_types.read, List[str], int) -> int
455
456 # read a certain number of bytes, NOT respecting delimiter (-1 means
457 # unset)
458 arg_N = mops.BigTruncate(arg.N)
459 if arg_N >= 0:
460 s = _ReadN(input_fd, arg_N, self.cmd_ev)
461
462 if len(names):
463 name = names[0] # ignore other names
464
465 # Clear extra names, as bash does
466 for i in xrange(1, len(names)):
467 state.BuiltinSetString(self.mem, names[i], '')
468 else:
469 name = 'REPLY' # default variable name
470
471 state.BuiltinSetString(self.mem, name, s)
472
473 # Did we read all the bytes we wanted?
474 return 0 if len(s) == arg_N else 1
475
476 do_split = False
477
478 if len(names):
479 do_split = True # read myvar does word splitting
480 else:
481 # read without args does NOT split, and fills in $REPLY
482 names.append('REPLY')
483
484 if arg.a is not None:
485 max_results = 0 # array can hold all parts
486 do_split = True
487 else:
488 # Assign one part to each variable name; leftovers are assigned to
489 # the last name
490 max_results = len(names)
491
492 if arg.Z: # -0 is synonym for IFS= read -r -d ''
493 do_split = False
494 raw = True
495 delim_byte = 0
496 else:
497 raw = arg.r
498 if arg.d is not None:
499 if len(arg.d):
500 delim_byte = ord(arg.d[0])
501 else:
502 delim_byte = 0 # -d '' delimits by NUL
503 else:
504 delim_byte = pyos.NEWLINE_CH # read a line
505
506 chunk, eof = _ReadPortion(input_fd, delim_byte, mops.BigTruncate(arg.n), not raw,
507 self.cmd_ev)
508
509 # status 1 to terminate loop. (This is true even though we set
510 # variables).
511 status = 1 if eof else 0
512
513 #log('LINE %r', chunk)
514 entries = self.splitter.SplitForRead(chunk, not raw, do_split,
515 max_results)
516
517 num_parts = len(entries)
518 if arg.a is not None:
519 state.BuiltinSetArray(self.mem, arg.a, entries)
520 else:
521 for i in xrange(max_results):
522 if i < num_parts:
523 s = entries[i]
524 else:
525 s = '' # if there are too many variables
526 var_name = names[i]
527 #log('read: %s = %s', var_name, s)
528 state.BuiltinSetString(self.mem, var_name, s)
529
530 return status