OILS / benchmarks / osh-runtime.sh View on Github | oils.pub

581 lines, 341 significant
1#!/usr/bin/env bash
2#
3# Test scripts found in the wild for both correctness and performance.
4#
5# Usage:
6# benchmarks/osh-runtime.sh <function name>
7
8set -o nounset
9set -o pipefail
10set -o errexit
11
12REPO_ROOT=$(cd "$(dirname $0)/.."; pwd)
13
14source benchmarks/common.sh # tsv-concat
15source benchmarks/id.sh # print-job-id
16source test/common.sh
17source test/tsv-lib.sh # tsv-row
18
19readonly BASE_DIR=_tmp/osh-runtime
20
21# TODO: Move to ../oil_DEPS
22readonly TAR_DIR=$PWD/_deps/osh-runtime # Make it absolute
23
24#
25# Dependencies
26#
27
28readonly PY27_DIR=$PWD/Python-2.7.13
29
30# NOTE: Same list in oilshell.org/blob/run.sh.
31tarballs() {
32 cat <<EOF
33tcc-0.9.26.tar.bz2
34yash-2.46.tar.xz
35ocaml-4.06.0.tar.xz
36util-linux-2.40.tar.xz
37EOF
38}
39
40download() {
41 mkdir -p $TAR_DIR
42 tarballs | xargs -n 1 -I {} --verbose -- \
43 wget --no-clobber --directory $TAR_DIR 'https://www.oilshell.org/blob/testdata/{}'
44}
45
46extract() {
47 set -x
48 time for f in $TAR_DIR/*.{bz2,xz}; do
49 tar -x --directory $TAR_DIR --file $f
50 done
51 set +x
52
53 ls -l $TAR_DIR
54}
55
56#
57# Computation
58#
59
60run-tasks() {
61 local raw_out_dir=$1
62 raw_out_dir="$PWD/$raw_out_dir" # because we change dirs
63
64 # Bug fix for dynamic scoping!
65 local host_name sh_path workload
66
67 local task_id=0
68 while read -r host_name sh_path workload; do
69
70 log "*** $host_name $sh_path $workload $task_id"
71
72 local sh_run_path
73 case $sh_path in
74 /*) # Already absolute
75 sh_run_path=$sh_path
76 ;;
77 */*) # It's relative, so make it absolute
78 sh_run_path=$PWD/$sh_path
79 ;;
80 *) # 'dash' should remain 'dash'
81 sh_run_path=$sh_path
82 ;;
83 esac
84
85 local working_dir=''
86 local files_out_dir="$raw_out_dir/files-$task_id"
87 mkdir -v -p $files_out_dir
88
89 local save_new_files=''
90
91 local -a argv
92 case $workload in
93 hello-world)
94 argv=( testdata/osh-runtime/hello_world.sh )
95 ;;
96
97 bin-true)
98 argv=( testdata/osh-runtime/bin_true.sh )
99 ;;
100
101 abuild-print-help)
102 argv=( testdata/osh-runtime/abuild -h )
103 ;;
104
105 configure.cpython)
106 argv=( $PY27_DIR/configure )
107 working_dir=$files_out_dir
108 ;;
109
110 configure.util-linux)
111 # flag needed to avoid sqlite3 dep error message
112 argv=( $TAR_DIR/util-linux-2.40/configure --disable-liblastlog2 )
113 working_dir=$files_out_dir
114 ;;
115
116 configure.*)
117 argv=( ./configure )
118
119 local conf_dir
120 case $workload in
121 *.ocaml)
122 conf_dir='ocaml-4.06.0'
123 ;;
124 *.tcc)
125 conf_dir='tcc-0.9.26'
126 ;;
127 *.yash)
128 conf_dir='yash-2.46'
129 ;;
130 *)
131 die "Invalid workload $workload"
132 esac
133
134 # These are run in-tree?
135 working_dir=$TAR_DIR/$conf_dir
136 ;;
137
138 *)
139 die "Invalid workload $workload"
140 ;;
141 esac
142
143 local -a time_argv=(
144 time-tsv
145 --output "$raw_out_dir/times.tsv" --append
146 --rusage
147 --rusage-2
148 --field "$task_id"
149 --field "$host_name" --field "$sh_path"
150 --field "$workload"
151 -- "$sh_run_path" "${argv[@]}"
152 )
153
154 local stdout_file="$files_out_dir/STDOUT.txt"
155 local gc_stats_file="$raw_out_dir/gc-$task_id.txt"
156
157 # Maybe change dirs
158 if test -n "$working_dir"; then
159 pushd "$working_dir"
160 fi
161
162 if test -n "$save_new_files"; then
163 touch __TIMESTAMP
164 fi
165
166 # Run it, possibly with GC stats
167 case $sh_path in
168 *_bin/*/osh)
169 OILS_GC_STATS_FD=99 "${time_argv[@]}" > $stdout_file 99> $gc_stats_file
170 ;;
171 *)
172 "${time_argv[@]}" > $stdout_file
173 ;;
174 esac
175
176 if test -n "$save_new_files"; then
177 echo "COPYING to $files_out_dir"
178 find . -type f -newer __TIMESTAMP \
179 | xargs -I {} -- cp --verbose {} $files_out_dir
180 fi
181
182 # Restore dir
183 if test -n "$working_dir"; then
184 popd
185 fi
186
187 task_id=$((task_id + 1))
188 done
189}
190
191# Sorted by priority for test-oils.sh osh-runtime --num-shells 3
192
193readonly -a ALL_WORKLOADS=(
194 hello-world
195 bin-true
196
197 configure.cpython
198 configure.util-linux
199 configure.ocaml
200 configure.tcc
201 configure.yash
202
203 abuild-print-help
204)
205
206print-workloads() {
207 ### for help
208
209 for w in "${ALL_WORKLOADS[@]}"; do
210 echo " $w"
211 done
212}
213
214print-tasks() {
215 local host_name=$1
216 shift 1
217 local -a osh_native=( "$@" )
218
219 if test -n "${QUICKLY:-}"; then
220 workloads=(
221 hello-world
222 bin-true
223 #configure.util-linux
224 #abuild-print-help
225 )
226 else
227 workloads=( "${ALL_WORKLOADS[@]}" )
228 fi
229
230 for sh_path in bash dash bin/osh "${osh_native[@]}"; do
231 for workload in "${workloads[@]}"; do
232 tsv-row $host_name $sh_path $workload
233 done
234 done
235}
236
237print-tasks-xshar() {
238 local host_name=$1
239 local osh_native=$2
240
241 local num_iters=${3:-1}
242 local num_shells=${4:-1}
243 local num_workloads=${5:-1}
244
245 for i in $(seq $num_iters); do
246
247 local s=0
248 for sh_path in $osh_native bash dash; do
249
250 local w=0
251 for workload in "${ALL_WORKLOADS[@]}"; do
252 tsv-row $host_name $sh_path $workload
253
254 w=$(( w + 1 )) # cut off at specified workloads
255 if test $w -eq $num_workloads; then
256 break
257 fi
258 done
259
260 s=$(( s + 1 )) # cut off as specified shells
261 if test $s -eq $num_shells; then
262 break
263 fi
264
265 done
266 done
267}
268
269test-print-tasks-xshar() {
270 print-tasks-xshar $(hostname) osh 1 1 1
271 echo
272 print-tasks-xshar $(hostname) osh 1 2 1
273 echo
274 print-tasks-xshar $(hostname) osh 1 2 2
275 echo
276 print-tasks-xshar $(hostname) osh 1 2 3
277 echo
278}
279
280run-tasks-wrapper() {
281 ### reads tasks from stdin
282
283 local host_name=$1 # 'no-host' or 'lenny'
284 local raw_out_dir=$2
285
286 mkdir -v -p $raw_out_dir
287
288 local tsv_out="$raw_out_dir/times.tsv"
289
290 # Write header of the TSV file that is appended to.
291 time-tsv -o $tsv_out --print-header \
292 --rusage \
293 --rusage-2 \
294 --field task_id \
295 --field host_name --field sh_path \
296 --field workload
297
298 # reads tasks from stdin
299 # run-tasks outputs 3 things: raw times.tsv, per-task STDOUT and files, and
300 # per-task GC stats
301 run-tasks $raw_out_dir
302
303 # Turn individual files into a TSV, adding host
304 benchmarks/gc_stats_to_tsv.py $raw_out_dir/gc-*.txt \
305 | tsv-add-const-column host_name "$host_name" \
306 > $raw_out_dir/gc_stats.tsv
307
308 cp -v _tmp/provenance.tsv $raw_out_dir
309}
310
311measure() {
312 ### For release and CI
313 local host_name=$1 # 'no-host' or 'lenny'
314 local raw_out_dir=$2 # _tmp/osh-runtime/$X or ../../benchmark-data/osh-runtime/$X
315 shift 2
316 local -a osh_native=( "$@" ) # $OSH_CPP_NINJA_BUILD or $OSH_CPP_BENCHMARK_DATA, etc...
317
318 print-tasks "$host_name" "${osh_native[@]}" \
319 | run-tasks-wrapper "$host_name" "$raw_out_dir"
320}
321
322stage1() {
323 local base_dir=${1:-$BASE_DIR} # _tmp/osh-runtime or ../benchmark-data/osh-runtime
324 local single_machine=${2:-}
325
326 local out_dir=$BASE_DIR/stage1 # _tmp/osh-runtime
327 mkdir -p $out_dir
328
329 local -a raw_times=()
330 local -a raw_gc_stats=()
331 local -a raw_provenance=()
332
333 if test -n "$single_machine"; then
334 # find dir in _tmp/osh-runtime
335 local -a a=( $base_dir/raw.$single_machine.* )
336
337 raw_times+=( ${a[-1]}/times.tsv )
338 raw_gc_stats+=( ${a[-1]}/gc_stats.tsv )
339 raw_provenance+=( ${a[-1]}/provenance.tsv )
340
341 else
342 # find last dirs in ../benchmark-data/osh-runtime
343 # Globs are in lexicographical order, which works for our dates.
344 local -a a=( $base_dir/raw.$MACHINE1.* )
345 local -a b=( $base_dir/raw.$MACHINE2.* )
346
347 raw_times+=( ${a[-1]}/times.tsv ${b[-1]}/times.tsv )
348 raw_gc_stats+=( ${a[-1]}/gc_stats.tsv ${b[-1]}/gc_stats.tsv )
349 raw_provenance+=( ${a[-1]}/provenance.tsv ${b[-1]}/provenance.tsv )
350 fi
351
352 tsv-concat "${raw_times[@]}" > $out_dir/times.tsv
353
354 tsv-concat "${raw_gc_stats[@]}" > $out_dir/gc_stats.tsv
355
356 tsv-concat "${raw_provenance[@]}" > $out_dir/provenance.tsv
357}
358
359print-report() {
360 local in_dir=$1
361
362 benchmark-html-head 'OSH Runtime Performance'
363
364 cat <<EOF
365 <body class="width60">
366 <p id="home-link">
367 <a href="/">oils.pub</a>
368 </p>
369EOF
370
371 cmark <<'EOF'
372## OSH Runtime Performance
373
374Source code: [benchmarks/osh-runtime.sh](https://github.com/oilshell/oil/tree/master/benchmarks/osh-runtime.sh)
375
376- [Elapsed Time](#elapsed-time)
377- [Minor Page Faults](#page-faults)
378- [Memory Usage](#memory-usage)
379- [GC Stats](#gc-stats)
380- [rusage Details](#rusage-details)
381- [More Details](#more-details)
382- [Shell and Host](#shell-and-host)
383
384[Raw files](-wwz-index)
385
386<a name="elapsed-time" />
387
388### Elapsed Time by Shell (milliseconds)
389
390Some benchmarks call many external tools, while some exercise the shell
391interpreter itself.
392EOF
393 tsv2html $in_dir/elapsed.tsv
394
395 cmark <<EOF
396<a name="page-faults" />
397
398### Minor Page Faults
399EOF
400
401 tsv2html $in_dir/page_faults.tsv
402
403 cmark <<EOF
404<a name="memory-usage" />
405
406### Memory Usage (Max Resident Set Size in MB)
407
408Memory usage is measured in MB (powers of 10), not MiB (powers of 2).
409EOF
410 tsv2html $in_dir/max_rss.tsv
411
412 cmark <<EOF
413<a name="gc-stats" />
414
415### GC Stats
416EOF
417 tsv2html $in_dir/gc_stats.tsv
418
419 cmark <<EOF
420<a name="rusage-details" />
421
422### rusage Details
423EOF
424 tsv2html $in_dir/details.tsv
425
426 cmark <<EOF
427<a name="more-details" />
428
429### More Details
430EOF
431 tsv2html $in_dir/details_io.tsv
432
433 cmark <<'EOF'
434<a name="shell-and-host" />
435
436### Shell and Host
437EOF
438 tsv2html $in_dir/shells.tsv
439 tsv2html $in_dir/hosts.tsv
440
441 cmark <<'EOF'
442
443 </body>
444</html>
445EOF
446}
447
448test-oils-run() {
449 local osh=$1
450 local job_id=$2
451 local host_name=$3
452
453 # flags passed by caller
454 local num_iters=${4:-1}
455 local num_shells=${5:-1}
456 local num_workloads=${6:-1}
457
458 local time_py=${XSHAR_DIR:-$REPO_ROOT}/benchmarks/time_.py
459 $time_py --tsv --rusage -- \
460 $osh -c 'echo "smoke test: hi from benchmarks/osh-runtime.sh"'
461
462 # Fresh build
463 rm -r -f -v $BASE_DIR _tmp/{shell,host}-id
464
465 # Write _tmp/provenance.* and _tmp/{host,shell}-id
466 shell-provenance-2 \
467 $host_name $job_id _tmp \
468 bash dash $osh
469
470 # e.g. 2024-05-01__10-11-12.ci-vm-name
471 local raw_out_dir="$BASE_DIR/raw"
472 mkdir -p $raw_out_dir
473
474 # Similar to 'measure', for soil-run and release
475 print-tasks-xshar $host_name $osh \
476 $num_iters $num_shells $num_workloads \
477 | tee $BASE_DIR/tasks.txt
478
479 run-tasks-wrapper $host_name $raw_out_dir < $BASE_DIR/tasks.txt
480 echo
481
482 # Note: 'stage1' in soil-run is a trivial concatenation, so we can create input for
483 # benchmarks/report.R. We don't need that here
484}
485
486soil-run() {
487 ### Run it on just this machine, and make a report
488
489 rm -r -f $BASE_DIR
490 mkdir -p $BASE_DIR
491
492 # TODO: This testdata should be baked into Docker image, or mounted
493 download
494 extract
495
496 # could add _bin/cxx-bumpleak/oils-for-unix, although sometimes it's slower
497 local -a osh_bin=( $OSH_CPP_NINJA_BUILD $OSH_SOUFFLE_CPP_NINJA_BUILD )
498 ninja "${osh_bin[@]}"
499
500 local single_machine='no-host'
501
502 local job_id
503 job_id=$(print-job-id)
504
505 # Write _tmp/provenance.* and _tmp/{host,shell}-id
506 shell-provenance-2 \
507 $single_machine $job_id _tmp \
508 bash dash bin/osh "${osh_bin[@]}"
509
510 local host_job_id="$single_machine.$job_id"
511 local raw_out_dir="$BASE_DIR/raw.$host_job_id"
512 mkdir -p $raw_out_dir $BASE_DIR/stage1
513
514 measure $single_machine $raw_out_dir $OSH_CPP_NINJA_BUILD $OSH_SOUFFLE_CPP_NINJA_BUILD
515
516 # Trivial concatenation for 1 machine
517 stage1 '' $single_machine
518
519 benchmarks/report.sh stage2 $BASE_DIR
520
521 benchmarks/report.sh stage3 $BASE_DIR
522}
523
524#
525# Debugging
526#
527
528compare-cpython() {
529 #local -a a=( ../benchmark-data/osh-runtime/*.lenny.2024* )
530 local -a a=( ../benchmark-data/osh-runtime/*.hoover.2024* )
531
532 # More of a diff here?
533 #local -a a=( ../benchmark-data/osh-runtime/*.broome.2023* )
534 # less diff here
535 #local -a a=( ../benchmark-data/osh-runtime/*.lenny.2023* )
536
537 local dir=${a[-1]}
538
539 echo $dir
540
541 head -n 1 $dir/times.tsv
542 fgrep 'configure.cpython' $dir/times.tsv
543
544 local bash_id=2
545 local dash_id=8
546 local osh_py_id=14
547 local osh_cpp_id=20
548
549 set +o errexit
550
551 local out_dir=_tmp/cpython-configure
552 mkdir -p $out_dir
553
554 echo 'bash vs. dash'
555 diff -u --recursive $dir/{files-2,files-8} > $out_dir/bash-vs-dash.txt
556 diffstat $out_dir/bash-vs-dash.txt
557 echo
558
559 echo 'bash vs. osh-py'
560 diff -u --recursive $dir/{files-2,files-14} > $out_dir/bash-vs-osh-py.txt
561 diffstat $out_dir/bash-vs-osh-py.txt
562 echo
563
564 echo 'bash vs. osh-cpp'
565 diff -u --recursive $dir/{files-2,files-20} > $out_dir/bash-vs-osh-cpp.txt
566 diffstat $out_dir/bash-vs-osh-cpp.txt
567 echo
568
569 return
570
571 diff -u $dir/{files-2,files-20}/STDOUT.txt
572 echo
573
574 diff -u $dir/{files-2,files-20}/pyconfig.h
575 echo
576
577 cdiff -u $dir/{files-2,files-20}/config.log
578 echo
579}
580
581"$@"