OILS / test / syscall.sh View on Github | oils.pub

508 lines, 210 significant
1#!/usr/bin/env bash
2#
3# Measure the number of syscalls that shells use.
4#
5# Usage:
6# test/syscall.sh <function name>
7
8: ${LIB_OSH=stdlib/osh}
9source $LIB_OSH/bash-strict.sh
10source $LIB_OSH/task-five.sh
11
12source build/dev-shell.sh
13
14OSH=${OSH:-osh}
15YSH=${YSH:-ysh}
16
17#readonly -a SHELLS=(dash bash-4.4 bash $OSH)
18
19# Compare bash 4 vs. bash 5
20SHELLS=(dash bash-4.4 bash-5.2.21 mksh zsh ash $OSH $YSH)
21
22SHELLS_MORE=( ${SHELLS[@]} yash )
23
24# yash does something fundamentally different in by-code.wrapped - it
25# understands functions
26#SHELLS+=(yash)
27
28readonly BASE_DIR='_tmp/syscall' # What we'll publish
29readonly RAW_DIR='_tmp/syscall-raw' # Raw data
30
31# Run it against the dev version of OSH
32REPO_ROOT=$(cd "$(dirname $0)/.."; pwd)
33
34count-procs() {
35 local out_prefix=$1
36 local sh=$2
37 shift 2
38
39 case $sh in
40 # avoid the extra processes that bin/osh starts!
41 # relies on word splitting
42 #(X) # to compare against osh 0.8.pre3 installed
43 osh)
44 sh="env PYTHONPATH=$REPO_ROOT:$REPO_ROOT/vendor $REPO_ROOT/bin/oils_for_unix.py osh"
45 ;;
46 ysh)
47 sh="env PYTHONPATH=$REPO_ROOT:$REPO_ROOT/vendor $REPO_ROOT/bin/oils_for_unix.py ysh"
48 ;;
49 osh-cpp)
50 sh=_bin/cxx-dbg/osh
51 ;;
52 ysh-cpp)
53 sh=_bin/cxx-dbg/ysh
54 ;;
55 esac
56
57 # Ignore failure, because we are just counting
58 strace -ff -o $out_prefix -- $sh "$@" || true
59}
60
61run-case() {
62 ### Run a test case with many shells
63
64 local num=$1
65 local code_str=$2
66 local func_wrap=${3:-}
67
68 local -a shells
69 if test -n "$func_wrap"; then
70 code_str="wrapper() { $code_str; }; wrapper"
71 shells=( "${SHELLS[@]}" )
72 else
73 shells=( "${SHELLS_MORE[@]}" )
74 fi
75
76 for sh in "${shells[@]}"; do
77 local out_prefix=$RAW_DIR/${sh}__${num}
78 echo "--- $sh"
79 count-procs $out_prefix $sh -c "$code_str"
80 done
81}
82
83run-case-file() {
84 ### Like the above, but the shell reads from a file
85
86 local num=$1
87 local code_str=$2
88
89 echo -n "$code_str" > _tmp/$num.sh
90
91 for sh in "${SHELLS_MORE[@]}"; do
92 local out_prefix=$RAW_DIR/${sh}__${num}
93 echo "--- $sh"
94 count-procs $out_prefix $sh _tmp/$num.sh
95 done
96}
97
98run-case-stdin() {
99 ### Like the above, but read from a pipe
100
101 local num=$1
102 local code_str=$2
103
104 for sh in "${SHELLS_MORE[@]}"; do
105 local out_prefix=$RAW_DIR/${sh}__${num}
106 echo "--- $sh"
107 echo -n "$code_str" | count-procs $out_prefix $sh
108 done
109}
110
111print-cases() {
112 # format: number, whitespace, then an arbitrary code string
113 egrep -v '^[[:space:]]*(#|$)' <<EOF
114
115# builtin
116echo hi
117
118# external command
119date
120
121# OSH calls this "sentence"
122date ;
123
124# trap - bash has special logic for this
125trap 'echo mytrap' EXIT; date
126
127# external then builtin
128date; echo hi
129
130# builtin then external
131echo hi; date
132
133# two external commands
134date; date
135
136# does a brace group make a difference?
137{ date; date; }
138
139# singleton brace group
140date; { date; }
141
142# does it behave differently if sourced?
143. _tmp/sourced.sh
144
145# dash and zsh somehow optimize this to 1
146(echo hi)
147
148(date)
149
150( ( date ) )
151
152( ( date ) ); echo hi
153
154echo hi; (date)
155
156echo hi; (date;)
157
158echo hi; (echo hi;)
159
160echo hi; (echo hi; date)
161
162( echo hi ); echo hi
163
164date > /tmp/redir.txt
165
166(date;) > /tmp/sentence.txt
167
168date 2> /tmp/stderr.txt | wc -l
169
170echo hi > /tmp/redir.txt
171
172(echo hi;) > /tmp/sentence.txt
173
174echo hi 2> /tmp/stderr.txt | wc -l
175
176(date; echo hi)
177
178# command sub
179echo \$(date)
180
181# command sub with builtin
182echo \$(echo hi)
183
184# command sub with useless subshell (some scripts use this)
185echo \$( ( date ) )
186
187# command sub with other subshell
188echo \$( ( date ); echo hi )
189
190# 2 processes for all shells
191( echo hi ); echo done
192
193# simple pipeline
194date | wc -l
195
196# negated
197! date | wc -l
198
199# every shell does 3
200echo a | wc -l
201
202# every shell does 3
203command echo a | wc -l
204
205# bash does 4 here!
206command date | wc -l
207
208# negated
209! command date | wc -l
210
211# 3 processes for all?
212# osh gives FIVE??? But others give 3. That's bad.
213( date ) | wc -l
214
215# 3 processes for all shells except zsh and osh, which have shopt -s lastpipe!
216date | read x
217
218# osh has 3, but should be 2 like zsh?
219# hm how can zsh do 2 here? That seems impossible.
220# oh it's lastpipe turns the shell process into wc -l ??? wow.
221{ echo a; echo b; } | wc -l
222
223# zsh behaves normally here. That is a crazy optimization. I guess it's
224# nice when you have SH -c 'mypipeline | wc-l'
225{ echo a; echo b; } | wc -l; echo done
226
227# this is all over the map too. 3 4 4 2.
228{ echo a; date; } | wc -l
229
230# osh does 4 when others do 3. So every shell optimizes this extra pipeline.
231( echo a; echo b ) | wc -l
232
233( echo a; echo b ) | ( wc -l )
234
235{ echo prefix; ( echo a; echo b ); } | ( wc -l )
236
237echo hi & wait
238
239date & wait
240
241echo hi | wc -l & wait
242
243date | wc -l & wait
244
245trap 'echo mytrap' EXIT; date & wait
246
247trap 'echo mytrap' EXIT; date | wc -l & wait
248
249# trap in SubProgramThunk
250{ trap 'echo mytrap' EXIT; date; } & wait
251EOF
252
253# Discarded because they're identical
254# pipeline with redirect last
255#date | wc -l > /tmp/out.txt
256
257# pipeline with redirect first
258#date 2>&1 | wc -l
259
260}
261
262number-cases() {
263 # Right justified, leading zeros, with 2
264 # Wish this was %02d
265 print-cases | nl --number-format rz --number-width 2
266}
267
268by-input() {
269 ### Run cases that vary by input reader
270 if ! strace true; then
271 echo "Aborting because we couldn't run strace"
272 return
273 fi
274
275 local suite='by-input'
276
277 rm -r -f -v $RAW_DIR
278 mkdir -p $RAW_DIR $BASE_DIR
279
280 # Wow this newline makes a difference in shells!
281
282 # This means that Id.Eof_Real is different than Id.Op_Newline?
283 # Should we create a Sentence for it too then?
284 # That is possible in _ParseCommandLine
285
286 zero=$'date; date'
287 one=$'date; date\n'
288 two=$'date; date\n#comment\n'
289 comment=$'# comment\ndate;date'
290 newline=$'date\n\ndate'
291 newline2=$'date\n\ndate\n#comment'
292
293 # zsh is the only shell to optimize all 6 cases! 2 processes instead of 3.
294 run-case 50 "$zero"
295 run-case 51 "$one"
296 run-case 52 "$two"
297 run-case 53 "$comment"
298 run-case 54 "$newline"
299 run-case 55 "$newline2"
300
301 run-case-file 60 "$zero"
302 run-case-file 61 "$one"
303 run-case-file 62 "$two"
304 run-case-file 63 "$comment"
305 run-case-file 64 "$newline2"
306 run-case-file 65 "$newline2"
307
308 # yash is the only shell to optimize the stdin case at all!
309 # it looks for a lack of trailing newline.
310 run-case-stdin 70 "$zero"
311 run-case-stdin 71 "$one"
312 run-case-stdin 72 "$two"
313 run-case-stdin 73 "$comment"
314 run-case-stdin 74 "$newline2"
315 run-case-stdin 75 "$newline2"
316
317 # This is identical for all shells
318 #run-case 32 $'date; date\n#comment\n'
319
320 cat >$BASE_DIR/cases.${suite}.txt <<EOF
32150 -c: zero lines
32251 -c: one line
32352 -c: one line and comment
32453 -c: comment first
32554 -c: newline
32655 -c: newline2
32760 file: zero lines
32861 file: one line
32962 file: one line and comment
33063 file: comment first
33164 file: newline
33265 file: newline2
33370 stdin: zero lines
33471 stdin: one line
33572 stdin: one line and comment
33673 stdin: comment first
33774 stdin: newline
33875 stdin: newline2
339EOF
340
341 count-lines $suite
342 summarize $suite 3 0
343}
344
345# Quick hack: every shell uses 2 processes for this... doesn't illuminate much.
346weird-command-sub() {
347 shopt -s nullglob
348 rm -r -f -v $RAW_DIR/*
349
350 local tmp=_tmp/cs
351 echo FOO > $tmp
352 run-case 60 "echo $(< $tmp)"
353 run-case 61 "echo $(< $tmp; echo hi)"
354
355 local suite=weird-command-sub
356
357 cat >$BASE_DIR/cases.${suite}.txt <<EOF
35860 \$(< file)
35961 \$(< file; echo hi)
360EOF
361
362 count-lines $suite
363 summarize $suite 0 0
364}
365
366readonly MAX_CASES=100
367#readonly MAX_CASES=3
368
369by-code() {
370 ### Run cases that vary by code snippet
371 local func_wrap=${1:-}
372
373 if ! strace true; then
374 echo "Aborting because we couldn't run strace"
375 return
376 fi
377
378 local max_cases=${1:-$MAX_CASES}
379
380 rm -r -f -v $RAW_DIR
381 mkdir -p $RAW_DIR $BASE_DIR
382
383 write-sourced
384
385 local suite
386 if test -n "$func_wrap"; then
387 suite='by-code-wrapped'
388 else
389 suite='by-code'
390 fi
391
392 local cases=$BASE_DIR/cases.${suite}.txt
393
394 number-cases > $cases
395 head -n $max_cases $cases | while read -r num code_str; do
396 echo
397 echo '==='
398 echo "$num $code_str"
399 echo
400
401 run-case $num "$code_str" "$func_wrap"
402 done
403
404 # omit total line
405 count-lines $suite
406 summarize $suite 3 0
407}
408
409by-code-cpp() {
410 ninja _bin/cxx-dbg/{osh,ysh}
411 OSH=osh-cpp YSH=ysh-cpp $0 by-code "$@"
412}
413
414by-input-cpp() {
415 ninja _bin/cxx-dbg/{osh,ysh}
416 OSH=osh-cpp YSH=ysh-cpp $0 by-input "$@"
417}
418
419syscall-py() {
420 PYTHONPATH=. test/syscall.py "$@"
421}
422
423write-sourced() {
424 echo -n 'date; date' > _tmp/sourced.sh
425}
426
427count-lines() {
428 local suite=${1:-by-code}
429 ( cd $RAW_DIR && wc -l * ) | head -n -1 > $BASE_DIR/wc.${suite}.txt
430}
431
432summarize() {
433 local suite=${1:-by-code}
434 local not_minimum=${2:-0}
435 local more_than_bash=${3:-0}
436
437 set +o errexit
438 cat $BASE_DIR/wc.${suite}.txt \
439 | syscall-py \
440 --not-minimum $not_minimum \
441 --more-than-bash $more_than_bash \
442 --suite $suite \
443 $BASE_DIR/cases.${suite}.txt \
444 $BASE_DIR
445 local status=$?
446 set -o errexit
447
448 if test $status -eq 0; then
449 echo 'OK'
450 else
451 echo 'FAIL'
452 fi
453}
454
455soil-run() {
456 # Invoked as one of the "other" tests. Soil runs by-code and by-input
457 # separately.
458
459 # Note: Only $BASE_DIR/*.txt is included in the release/$VERSION/other.wwz
460 by-code
461
462 # wrapped
463 by-code T
464
465 by-input
466
467 echo 'OK'
468}
469
470run-for-release() {
471 ### Run the two syscall suites
472
473 soil-run
474}
475
476#
477# Real World
478#
479# $ ls|grep dash|wc -l
480# 6098
481# $ ls|grep bash|wc -l
482# 6102
483# $ ls|grep osh|wc -l
484# 6098
485#
486# So Oil is already at dash level for CPython's configure, and bash isn't
487# far off. So autoconf-generated scripts probably already use constructs
488# that are already "optimal" in most shells.
489
490readonly PY27_DIR=$PWD/Python-2.7.13
491
492cpython-configure() {
493 local raw_dir=$PWD/$RAW_DIR/real
494 mkdir -p $raw_dir
495
496 pushd $PY27_DIR
497 #for sh in "${SHELLS[@]}"; do
498 for sh in bash dash osh; do
499 local out_prefix=$raw_dir/cpython-$sh
500 echo "--- $sh"
501
502 # TODO: Use a different dir
503 count-procs $out_prefix $sh -c './configure'
504 done
505 popd
506}
507
508task-five "$@"