1 | #!/usr/bin/env python2
|
2 | from __future__ import print_function
|
3 |
|
4 | import time as time_ # avoid name conflict
|
5 |
|
6 | from _devbuild.gen import arg_types
|
7 | from _devbuild.gen.id_kind_asdl import Id, Id_t, Id_str, Kind, Kind_t
|
8 | from _devbuild.gen.runtime_asdl import cmd_value
|
9 | from _devbuild.gen.syntax_asdl import (
|
10 | loc,
|
11 | loc_e,
|
12 | loc_t,
|
13 | source,
|
14 | Token,
|
15 | CompoundWord,
|
16 | printf_part,
|
17 | printf_part_e,
|
18 | printf_part_t,
|
19 | )
|
20 | from _devbuild.gen.types_asdl import lex_mode_e, lex_mode_t
|
21 | from _devbuild.gen.value_asdl import (value, value_e)
|
22 |
|
23 | from core import alloc
|
24 | from core import error
|
25 | from core.error import p_die
|
26 | from core import state
|
27 | from core import vm
|
28 | from frontend import flag_util
|
29 | from frontend import consts
|
30 | from frontend import lexer
|
31 | from frontend import match
|
32 | from frontend import reader
|
33 | from mycpp import mops
|
34 | from mycpp import mylib
|
35 | from mycpp.mylib import log
|
36 | from osh import sh_expr_eval
|
37 | from osh import string_ops
|
38 | from osh import word_compile
|
39 | from data_lang import j8_lite
|
40 |
|
41 | import posix_ as posix
|
42 |
|
43 | from typing import Dict, List, Optional, TYPE_CHECKING, cast
|
44 |
|
45 | if TYPE_CHECKING:
|
46 | from display import ui
|
47 | from frontend import parse_lib
|
48 |
|
49 | _ = log
|
50 |
|
51 |
|
52 | class _FormatStringParser(object):
|
53 | """
|
54 | Grammar:
|
55 |
|
56 | width = Num | Star
|
57 | precision = Dot (Num | Star | Zero)?
|
58 | fmt = Percent (Flag | Zero)* width? precision? (Type | Time)
|
59 | part = Char_* | Format_EscapedPercent | fmt
|
60 | printf_format = part* Eof_Real # we're using the main lexer
|
61 |
|
62 | Maybe: bash also supports %(strftime)T
|
63 | """
|
64 |
|
65 | def __init__(self, lexer):
|
66 | # type: (lexer.Lexer) -> None
|
67 | self.lexer = lexer
|
68 |
|
69 | # uninitialized values
|
70 | self.cur_token = None # type: Token
|
71 | self.token_type = Id.Undefined_Tok # type: Id_t
|
72 | self.token_kind = Kind.Undefined # type: Kind_t
|
73 |
|
74 | def _Next(self, lex_mode):
|
75 | # type: (lex_mode_t) -> None
|
76 | """Advance a token."""
|
77 | self.cur_token = self.lexer.Read(lex_mode)
|
78 | self.token_type = self.cur_token.id
|
79 | self.token_kind = consts.GetKind(self.token_type)
|
80 |
|
81 | def _ParseFormatStr(self):
|
82 | # type: () -> printf_part_t
|
83 | """fmt = ..."""
|
84 | self._Next(lex_mode_e.PrintfPercent) # move past %
|
85 |
|
86 | part = printf_part.Percent.CreateNull(alloc_lists=True)
|
87 | while self.token_type in (Id.Format_Flag, Id.Format_Zero):
|
88 | # space and + could be implemented
|
89 | flag = lexer.TokenVal(self.cur_token) # allocation will be cached
|
90 | if flag in '# +':
|
91 | p_die("osh printf doesn't support the %r flag" % flag,
|
92 | self.cur_token)
|
93 |
|
94 | part.flags.append(self.cur_token)
|
95 | self._Next(lex_mode_e.PrintfPercent)
|
96 |
|
97 | if self.token_type in (Id.Format_Num, Id.Format_Star):
|
98 | part.width = self.cur_token
|
99 | self._Next(lex_mode_e.PrintfPercent)
|
100 |
|
101 | if self.token_type == Id.Format_Dot:
|
102 | part.precision = self.cur_token
|
103 | self._Next(lex_mode_e.PrintfPercent) # past dot
|
104 | if self.token_type in (Id.Format_Num, Id.Format_Star,
|
105 | Id.Format_Zero):
|
106 | part.precision = self.cur_token
|
107 | self._Next(lex_mode_e.PrintfPercent)
|
108 |
|
109 | if self.token_type in (Id.Format_Type, Id.Format_Time):
|
110 | part.type = self.cur_token
|
111 |
|
112 | # ADDITIONAL VALIDATION outside the "grammar".
|
113 | type_val = lexer.TokenVal(part.type) # allocation will be cached
|
114 | if type_val in 'eEfFgG':
|
115 | p_die("osh printf doesn't support floating point", part.type)
|
116 | # These two could be implemented. %c needs utf-8 decoding.
|
117 | if type_val == 'c':
|
118 | p_die("osh printf doesn't support single characters (bytes)",
|
119 | part.type)
|
120 |
|
121 | elif self.token_type == Id.Unknown_Tok:
|
122 | p_die('Invalid printf format character', self.cur_token)
|
123 |
|
124 | else:
|
125 | p_die('Expected a printf format character', self.cur_token)
|
126 |
|
127 | return part
|
128 |
|
129 | def Parse(self):
|
130 | # type: () -> List[printf_part_t]
|
131 | self._Next(lex_mode_e.PrintfOuter)
|
132 | parts = [] # type: List[printf_part_t]
|
133 | while True:
|
134 | if (self.token_kind in (Kind.Lit, Kind.Char) or self.token_type
|
135 | in (Id.Format_EscapedPercent, Id.Unknown_Backslash)):
|
136 |
|
137 | # Note: like in echo -e, we don't fail with Unknown_Backslash here
|
138 | # when shopt -u parse_backslash because it's at runtime rather than
|
139 | # parse time.
|
140 | # Users should use $'' or the future static printf ${x %.3f}.
|
141 |
|
142 | parts.append(self.cur_token)
|
143 |
|
144 | elif self.token_type == Id.Format_Percent:
|
145 | parts.append(self._ParseFormatStr())
|
146 |
|
147 | elif self.token_type in (Id.Eof_Real, Id.Eol_Tok):
|
148 | # Id.Eol_Tok: special case for format string of '\x00'.
|
149 | break
|
150 |
|
151 | else:
|
152 | raise AssertionError(Id_str(self.token_type))
|
153 |
|
154 | self._Next(lex_mode_e.PrintfOuter)
|
155 |
|
156 | return parts
|
157 |
|
158 |
|
159 | class _PrintfState(object):
|
160 |
|
161 | def __init__(self):
|
162 | # type: () -> None
|
163 | self.arg_index = 0
|
164 | self.backslash_c = False
|
165 | self.status = 0 # set to 1 before returning
|
166 |
|
167 |
|
168 | class Printf(vm._Builtin):
|
169 |
|
170 | def __init__(
|
171 | self,
|
172 | mem, # type: state.Mem
|
173 | parse_ctx, # type: parse_lib.ParseContext
|
174 | unsafe_arith, # type: sh_expr_eval.UnsafeArith
|
175 | errfmt, # type: ui.ErrorFormatter
|
176 | ):
|
177 | # type: (...) -> None
|
178 | self.mem = mem
|
179 | self.parse_ctx = parse_ctx
|
180 | self.unsafe_arith = unsafe_arith
|
181 | self.errfmt = errfmt
|
182 | self.parse_cache = {} # type: Dict[str, List[printf_part_t]]
|
183 |
|
184 | # this object initialized in main()
|
185 | self.shell_start_time = time_.time()
|
186 |
|
187 | def _Percent(
|
188 | self,
|
189 | pr, # type: _PrintfState
|
190 | part, # type: printf_part.Percent
|
191 | varargs, # type: List[str]
|
192 | locs, # type: List[CompoundWord]
|
193 | ):
|
194 | # type: (...) -> Optional[str]
|
195 |
|
196 | num_args = len(varargs)
|
197 |
|
198 | # TODO: Cache this?
|
199 | flags = [] # type: List[str]
|
200 | if len(part.flags) > 0:
|
201 | for flag_token in part.flags:
|
202 | flags.append(lexer.TokenVal(flag_token))
|
203 |
|
204 | width = -1 # nonexistent
|
205 | if part.width:
|
206 | if part.width.id in (Id.Format_Num, Id.Format_Zero):
|
207 | width_str = lexer.TokenVal(part.width)
|
208 | width_loc = part.width # type: loc_t
|
209 | elif part.width.id == Id.Format_Star: # depends on data
|
210 | if pr.arg_index < num_args:
|
211 | width_str = varargs[pr.arg_index]
|
212 | width_loc = locs[pr.arg_index]
|
213 | pr.arg_index += 1
|
214 | else:
|
215 | width_str = '' # invalid
|
216 | width_loc = loc.Missing
|
217 | else:
|
218 | raise AssertionError()
|
219 |
|
220 | try:
|
221 | width = int(width_str)
|
222 | except ValueError:
|
223 | if width_loc.tag() == loc_e.Missing:
|
224 | width_loc = part.width
|
225 | self.errfmt.Print_("printf got invalid width %r" % width_str,
|
226 | blame_loc=width_loc)
|
227 | pr.status = 1
|
228 | return None
|
229 |
|
230 | precision = -1 # nonexistent
|
231 | if part.precision:
|
232 | if part.precision.id == Id.Format_Dot:
|
233 | precision_str = '0'
|
234 | precision_loc = part.precision # type: loc_t
|
235 | elif part.precision.id in (Id.Format_Num, Id.Format_Zero):
|
236 | precision_str = lexer.TokenVal(part.precision)
|
237 | precision_loc = part.precision
|
238 | elif part.precision.id == Id.Format_Star:
|
239 | if pr.arg_index < num_args:
|
240 | precision_str = varargs[pr.arg_index]
|
241 | precision_loc = locs[pr.arg_index]
|
242 | pr.arg_index += 1
|
243 | else:
|
244 | precision_str = ''
|
245 | precision_loc = loc.Missing
|
246 | else:
|
247 | raise AssertionError()
|
248 |
|
249 | try:
|
250 | precision = int(precision_str)
|
251 | except ValueError:
|
252 | if precision_loc.tag() == loc_e.Missing:
|
253 | precision_loc = part.precision
|
254 | self.errfmt.Print_('printf got invalid precision %r' %
|
255 | precision_str,
|
256 | blame_loc=precision_loc)
|
257 | pr.status = 1
|
258 | return None
|
259 |
|
260 | if pr.arg_index < num_args:
|
261 | s = varargs[pr.arg_index]
|
262 | word_loc = locs[pr.arg_index] # type: loc_t
|
263 | pr.arg_index += 1
|
264 | has_arg = True
|
265 | else:
|
266 | s = ''
|
267 | word_loc = loc.Missing
|
268 | has_arg = False
|
269 |
|
270 | # Note: %s could be lexed into Id.Percent_S. Although small string
|
271 | # optimization would remove the allocation as well.
|
272 | typ = lexer.TokenVal(part.type)
|
273 | if typ == 's':
|
274 | if precision >= 0:
|
275 | s = s[:precision] # truncate
|
276 |
|
277 | elif typ == 'q':
|
278 | # Most shells give \' for single quote, while OSH gives
|
279 | # $'\'' this could matter when SSH'ing.
|
280 | # Ditto for $'\\' vs. '\'
|
281 |
|
282 | s = j8_lite.MaybeShellEncode(s)
|
283 |
|
284 | elif typ == 'b':
|
285 | # Process just like echo -e, except \c handling is simpler.
|
286 |
|
287 | c_parts = [] # type: List[str]
|
288 | lex = match.EchoLexer(s)
|
289 | while True:
|
290 | id_, tok_val = lex.Next()
|
291 | if id_ == Id.Eol_Tok: # Note: This is really a NUL terminator
|
292 | break
|
293 |
|
294 | p = word_compile.EvalCStringToken(id_, tok_val)
|
295 |
|
296 | # Unusual behavior: '\c' aborts processing!
|
297 | if p is None:
|
298 | pr.backslash_c = True
|
299 | break
|
300 |
|
301 | c_parts.append(p)
|
302 | s = ''.join(c_parts)
|
303 |
|
304 | elif part.type.id == Id.Format_Time or typ in 'diouxX':
|
305 | # %(...)T and %d share this complex integer conversion logic
|
306 |
|
307 | if match.LooksLikeInteger(s):
|
308 | # Note: spaces like ' -42 ' accepted and normalized
|
309 | ok, d = mops.FromStr2(s)
|
310 | if not ok:
|
311 | self.errfmt.Print_("Integer too big: %s" % s, word_loc)
|
312 | pr.status = 1
|
313 | return None
|
314 |
|
315 | else:
|
316 | # Check for 'a and "a
|
317 | # These are interpreted as the numeric ASCII value of 'a'
|
318 | num_bytes = len(s)
|
319 | if num_bytes > 0 and s[0] in '\'"':
|
320 | if num_bytes == 1:
|
321 | # NUL after quote
|
322 | d = mops.ZERO
|
323 | elif num_bytes == 2:
|
324 | # Allow invalid UTF-8, because all shells do
|
325 | d = mops.IntWiden(ord(s[1]))
|
326 | else:
|
327 | try:
|
328 | small_i = string_ops.DecodeUtf8Char(s, 1)
|
329 | except error.Expr as e:
|
330 | # Take the numeric value of first char, ignoring
|
331 | # the rest of the bytes.
|
332 | # Something like strict_arith or strict_printf
|
333 | # could throw an error in this case.
|
334 | self.errfmt.Print_(
|
335 | 'Warning: %s' % e.UserErrorString(), word_loc)
|
336 | small_i = ord(s[1])
|
337 |
|
338 | d = mops.IntWiden(small_i)
|
339 |
|
340 | # No argument means -1 for %(...)T as in Bash Reference Manual
|
341 | # 4.2 - "If no argument is specified, conversion behaves as if
|
342 | # -1 had been given."
|
343 | elif not has_arg and part.type.id == Id.Format_Time:
|
344 | d = mops.MINUS_ONE
|
345 |
|
346 | else:
|
347 | if has_arg:
|
348 | blame_loc = word_loc # type: loc_t
|
349 | else:
|
350 | blame_loc = part.type
|
351 | self.errfmt.Print_(
|
352 | 'printf expected an integer, got %r' % s, blame_loc)
|
353 | pr.status = 1
|
354 | return None
|
355 |
|
356 | if part.type.id == Id.Format_Time:
|
357 | # Initialize timezone:
|
358 | # `localtime' uses the current timezone information initialized
|
359 | # by `tzset'. The function `tzset' refers to the environment
|
360 | # variable `TZ'. When the exported variable `TZ' is present,
|
361 | # its value should be reflected in the real environment
|
362 | # variable `TZ' before call of `tzset'.
|
363 | #
|
364 | # Note: unlike LANG, TZ doesn't seem to change behavior if it's
|
365 | # not exported.
|
366 | #
|
367 | # TODO: In YSH, provide an API that doesn't rely on libc's global
|
368 | # state.
|
369 |
|
370 | tzcell = self.mem.GetCell('TZ')
|
371 | if (tzcell and tzcell.exported and
|
372 | tzcell.val.tag() == value_e.Str):
|
373 | tzval = cast(value.Str, tzcell.val)
|
374 | posix.putenv('TZ', tzval.s)
|
375 |
|
376 | time_.tzset()
|
377 |
|
378 | # Handle special values:
|
379 | # User can specify two special values -1 and -2 as in Bash
|
380 | # Reference Manual 4.2: "Two special argument values may be
|
381 | # used: -1 represents the current time, and -2 represents the
|
382 | # time the shell was invoked." from
|
383 | # https://www.gnu.org/software/bash/manual/html_node/Bash-Builtins.html#index-printf
|
384 | if mops.Equal(d, mops.MINUS_ONE): # -1 is current time
|
385 | # TODO: 2038 problem
|
386 | ts = time_.time()
|
387 | elif mops.Equal(d, mops.MINUS_TWO): # -2 is shell start time
|
388 | ts = self.shell_start_time
|
389 | else:
|
390 | ts = mops.BigTruncate(d)
|
391 |
|
392 | s = time_.strftime(typ[1:-2], time_.localtime(ts))
|
393 | if precision >= 0:
|
394 | s = s[:precision] # truncate
|
395 |
|
396 | else: # typ in 'diouxX'
|
397 | # Disallowed because it depends on 32- or 64- bit
|
398 | if mops.Greater(mops.ZERO, d) and typ in 'ouxX':
|
399 | # TODO: Don't truncate it
|
400 | self.errfmt.Print_(
|
401 | "Can't format negative number with %%%s: %d" %
|
402 | (typ, mops.BigTruncate(d)), part.type)
|
403 | pr.status = 1
|
404 | return None
|
405 |
|
406 | if typ == 'o':
|
407 | s = mops.ToOctal(d)
|
408 | elif typ == 'x':
|
409 | s = mops.ToHexLower(d)
|
410 | elif typ == 'X':
|
411 | s = mops.ToHexUpper(d)
|
412 | else: # diu
|
413 | s = mops.ToStr(d) # without spaces like ' -42 '
|
414 |
|
415 | # There are TWO different ways to ZERO PAD, and they differ on
|
416 | # the negative sign! See spec/builtin-printf
|
417 |
|
418 | zero_pad = 0 # no zero padding
|
419 | if width >= 0 and '0' in flags:
|
420 | zero_pad = 1 # style 1
|
421 | elif precision > 0 and len(s) < precision:
|
422 | zero_pad = 2 # style 2
|
423 |
|
424 | if zero_pad:
|
425 | negative = (s[0] == '-')
|
426 | if negative:
|
427 | digits = s[1:]
|
428 | sign = '-'
|
429 | if zero_pad == 1:
|
430 | # [%06d] -42 becomes [-00042] (6 TOTAL)
|
431 | n = width - 1
|
432 | else:
|
433 | # [%6.6d] -42 becomes [-000042] (1 for '-' + 6)
|
434 | n = precision
|
435 | else:
|
436 | digits = s
|
437 | sign = ''
|
438 | if zero_pad == 1:
|
439 | n = width
|
440 | else:
|
441 | n = precision
|
442 | s = sign + digits.rjust(n, '0')
|
443 |
|
444 | else:
|
445 | raise AssertionError()
|
446 |
|
447 | if width >= 0:
|
448 | if '-' in flags:
|
449 | s = s.ljust(width, ' ')
|
450 | else:
|
451 | s = s.rjust(width, ' ')
|
452 | return s
|
453 |
|
454 | def _Format(self, parts, varargs, locs, out):
|
455 | # type: (List[printf_part_t], List[str], List[CompoundWord], List[str]) -> int
|
456 | """Hairy printf formatting logic."""
|
457 |
|
458 | pr = _PrintfState()
|
459 | num_args = len(varargs)
|
460 |
|
461 | while True: # loop over arguments
|
462 | for part in parts: # loop over parsed format string
|
463 | UP_part = part
|
464 | if part.tag() == printf_part_e.Literal:
|
465 | part = cast(Token, UP_part)
|
466 | if part.id == Id.Format_EscapedPercent:
|
467 | s = '%'
|
468 | else:
|
469 | s = word_compile.EvalCStringToken(
|
470 | part.id, lexer.LazyStr(part))
|
471 |
|
472 | elif part.tag() == printf_part_e.Percent:
|
473 | part = cast(printf_part.Percent, UP_part)
|
474 |
|
475 | s = self._Percent(pr, part, varargs, locs)
|
476 | if pr.status != 0:
|
477 | return pr.status
|
478 |
|
479 | else:
|
480 | raise AssertionError()
|
481 |
|
482 | out.append(s)
|
483 |
|
484 | if pr.backslash_c: # 'printf %b a\cb xx' - \c terminates processing!
|
485 | break
|
486 |
|
487 | if pr.arg_index == 0:
|
488 | # We went through ALL parts and didn't consume ANY arg.
|
489 | # Example: print x y
|
490 | break
|
491 | if pr.arg_index >= num_args:
|
492 | # We printed all args
|
493 | break
|
494 | # If there are more args, keep going. This implement 'arg recycling'
|
495 | # behavior
|
496 | # printf '%s ' 1 2 3 => 1 2 3
|
497 |
|
498 | return 0
|
499 |
|
500 | def Run(self, cmd_val):
|
501 | # type: (cmd_value.Argv) -> int
|
502 | """
|
503 | printf: printf [-v var] format [argument ...]
|
504 | """
|
505 | attrs, arg_r = flag_util.ParseCmdVal('printf', cmd_val)
|
506 | arg = arg_types.printf(attrs.attrs)
|
507 |
|
508 | fmt, fmt_loc = arg_r.ReadRequired2('requires a format string')
|
509 | varargs, locs = arg_r.Rest2()
|
510 |
|
511 | #log('fmt %s', fmt)
|
512 | #log('vals %s', vals)
|
513 |
|
514 | arena = self.parse_ctx.arena
|
515 | if fmt in self.parse_cache:
|
516 | parts = self.parse_cache[fmt]
|
517 | else:
|
518 | line_reader = reader.StringLineReader(fmt, arena)
|
519 | # TODO: Make public
|
520 | lexer = self.parse_ctx.MakeLexer(line_reader)
|
521 | parser = _FormatStringParser(lexer)
|
522 |
|
523 | with alloc.ctx_SourceCode(arena,
|
524 | source.Dynamic('printf arg', fmt_loc)):
|
525 | try:
|
526 | parts = parser.Parse()
|
527 | except error.Parse as e:
|
528 | self.errfmt.PrettyPrintError(e)
|
529 | return 2 # parse error
|
530 |
|
531 | self.parse_cache[fmt] = parts
|
532 |
|
533 | if 0:
|
534 | print()
|
535 | for part in parts:
|
536 | part.PrettyPrint()
|
537 | print()
|
538 |
|
539 | out = [] # type: List[str]
|
540 | status = self._Format(parts, varargs, locs, out)
|
541 | if status != 0:
|
542 | return status # failure
|
543 |
|
544 | result = ''.join(out)
|
545 | if arg.v is not None:
|
546 | # TODO: get the location for arg.v!
|
547 | v_loc = loc.Missing
|
548 | lval = self.unsafe_arith.ParseLValue(arg.v, v_loc)
|
549 | state.BuiltinSetValue(self.mem, lval, value.Str(result))
|
550 | else:
|
551 | mylib.Stdout().write(result)
|
552 | return 0
|