benchmarks/osh-runtime.sh

OILS / benchmarks / osh-runtime.sh View on Github | oilshell.org

581 lines, 341 significant

1	#!/usr/bin/env bash
2	#
3	# Test scripts found in the wild for both correctness and performance.
4	#
5	# Usage:
6	# benchmarks/osh-runtime.sh <function name>
7
8	set -o nounset
9	set -o pipefail
10	set -o errexit
11
12	REPO_ROOT=$(cd "$(dirname $0)/.."; pwd)
13
14	source benchmarks/common.sh # tsv-concat
15	source benchmarks/id.sh # print-job-id
16	source test/common.sh
17	source test/tsv-lib.sh # tsv-row
18
19	readonly BASE_DIR=_tmp/osh-runtime
20
21	# TODO: Move to ../oil_DEPS
22	readonly TAR_DIR=$PWD/_deps/osh-runtime # Make it absolute
23
24	#
25	# Dependencies
26	#
27
28	readonly PY27_DIR=$PWD/Python-2.7.13
29
30	# NOTE: Same list in oilshell.org/blob/run.sh.
31	tarballs() {
32	cat <<EOF
33	tcc-0.9.26.tar.bz2
34	yash-2.46.tar.xz
35	ocaml-4.06.0.tar.xz
36	util-linux-2.40.tar.xz
37	EOF
38	}
39
40	download() {
41	mkdir -p $TAR_DIR
42	tarballs \| xargs -n 1 -I {} --verbose -- \
43	wget --no-clobber --directory $TAR_DIR 'https://www.oilshell.org/blob/testdata/{}'
44	}
45
46	extract() {
47	set -x
48	time for f in $TAR_DIR/*.{bz2,xz}; do
49	tar -x --directory $TAR_DIR --file $f
50	done
51	set +x
52
53	ls -l $TAR_DIR
54	}
55
56	#
57	# Computation
58	#
59
60	run-tasks() {
61	local raw_out_dir=$1
62	raw_out_dir="$PWD/$raw_out_dir" # because we change dirs
63
64	# Bug fix for dynamic scoping!
65	local host_name sh_path workload
66
67	local task_id=0
68	while read -r host_name sh_path workload; do
69
70	log "*** $host_name $sh_path $workload $task_id"
71
72	local sh_run_path
73	case $sh_path in
74	/*) # Already absolute
75	sh_run_path=$sh_path
76	;;
77	/) # It's relative, so make it absolute
78	sh_run_path=$PWD/$sh_path
79	;;
80	*) # 'dash' should remain 'dash'
81	sh_run_path=$sh_path
82	;;
83	esac
84
85	local working_dir=''
86	local files_out_dir="$raw_out_dir/files-$task_id"
87	mkdir -v -p $files_out_dir
88
89	local save_new_files=''
90
91	local -a argv
92	case $workload in
93	hello-world)
94	argv=( testdata/osh-runtime/hello_world.sh )
95	;;
96
97	bin-true)
98	argv=( testdata/osh-runtime/bin_true.sh )
99	;;
100
101	abuild-print-help)
102	argv=( testdata/osh-runtime/abuild -h )
103	;;
104
105	configure.cpython)
106	argv=( $PY27_DIR/configure )
107	working_dir=$files_out_dir
108	;;
109
110	configure.util-linux)
111	# flag needed to avoid sqlite3 dep error message
112	argv=( $TAR_DIR/util-linux-2.40/configure --disable-liblastlog2 )
113	working_dir=$files_out_dir
114	;;
115
116	configure.*)
117	argv=( ./configure )
118
119	local conf_dir
120	case $workload in
121	*.ocaml)
122	conf_dir='ocaml-4.06.0'
123	;;
124	*.tcc)
125	conf_dir='tcc-0.9.26'
126	;;
127	*.yash)
128	conf_dir='yash-2.46'
129	;;
130	*)
131	die "Invalid workload $workload"
132	esac
133
134	# These are run in-tree?
135	working_dir=$TAR_DIR/$conf_dir
136	;;
137
138	*)
139	die "Invalid workload $workload"
140	;;
141	esac
142
143	local -a time_argv=(
144	time-tsv
145	--output "$raw_out_dir/times.tsv" --append
146	--rusage
147	--rusage-2
148	--field "$task_id"
149	--field "$host_name" --field "$sh_path"
150	--field "$workload"
151	-- "$sh_run_path" "${argv[@]}"
152	)
153
154	local stdout_file="$files_out_dir/STDOUT.txt"
155	local gc_stats_file="$raw_out_dir/gc-$task_id.txt"
156
157	# Maybe change dirs
158	if test -n "$working_dir"; then
159	pushd "$working_dir"
160	fi
161
162	if test -n "$save_new_files"; then
163	touch __TIMESTAMP
164	fi
165
166	# Run it, possibly with GC stats
167	case $sh_path in
168	_bin//osh)
169	OILS_GC_STATS_FD=99 "${time_argv[@]}" > $stdout_file 99> $gc_stats_file
170	;;
171	*)
172	"${time_argv[@]}" > $stdout_file
173	;;
174	esac
175
176	if test -n "$save_new_files"; then
177	echo "COPYING to $files_out_dir"
178	find . -type f -newer __TIMESTAMP \
179	\| xargs -I {} -- cp --verbose {} $files_out_dir
180	fi
181
182	# Restore dir
183	if test -n "$working_dir"; then
184	popd
185	fi
186
187	task_id=$((task_id + 1))
188	done
189	}
190
191	# Sorted by priority for test-oils.sh osh-runtime --num-shells 3
192
193	readonly -a ALL_WORKLOADS=(
194	hello-world
195	bin-true
196
197	configure.cpython
198	configure.util-linux
199	configure.ocaml
200	configure.tcc
201	configure.yash
202
203	abuild-print-help
204	)
205
206	print-workloads() {
207	### for help
208
209	for w in "${ALL_WORKLOADS[@]}"; do
210	echo " $w"
211	done
212	}
213
214	print-tasks() {
215	local host_name=$1
216	shift 1
217	local -a osh_native=( "$@" )
218
219	if test -n "${QUICKLY:-}"; then
220	workloads=(
221	hello-world
222	bin-true
223	#configure.util-linux
224	#abuild-print-help
225	)
226	else
227	workloads=( "${ALL_WORKLOADS[@]}" )
228	fi
229
230	for sh_path in bash dash bin/osh "${osh_native[@]}"; do
231	for workload in "${workloads[@]}"; do
232	tsv-row $host_name $sh_path $workload
233	done
234	done
235	}
236
237	print-tasks-xshar() {
238	local host_name=$1
239	local osh_native=$2
240
241	local num_iters=${3:-1}
242	local num_shells=${4:-1}
243	local num_workloads=${5:-1}
244
245	for i in $(seq $num_iters); do
246
247	local s=0
248	for sh_path in $osh_native bash dash; do
249
250	local w=0
251	for workload in "${ALL_WORKLOADS[@]}"; do
252	tsv-row $host_name $sh_path $workload
253
254	w=$(( w + 1 )) # cut off at specified workloads
255	if test $w -eq $num_workloads; then
256	break
257	fi
258	done
259
260	s=$(( s + 1 )) # cut off as specified shells
261	if test $s -eq $num_shells; then
262	break
263	fi
264
265	done
266	done
267	}
268
269	test-print-tasks-xshar() {
270	print-tasks-xshar $(hostname) osh 1 1 1
271	echo
272	print-tasks-xshar $(hostname) osh 1 2 1
273	echo
274	print-tasks-xshar $(hostname) osh 1 2 2
275	echo
276	print-tasks-xshar $(hostname) osh 1 2 3
277	echo
278	}
279
280	run-tasks-wrapper() {
281	### reads tasks from stdin
282
283	local host_name=$1 # 'no-host' or 'lenny'
284	local raw_out_dir=$2
285
286	mkdir -v -p $raw_out_dir
287
288	local tsv_out="$raw_out_dir/times.tsv"
289
290	# Write header of the TSV file that is appended to.
291	time-tsv -o $tsv_out --print-header \
292	--rusage \
293	--rusage-2 \
294	--field task_id \
295	--field host_name --field sh_path \
296	--field workload
297
298	# reads tasks from stdin
299	# run-tasks outputs 3 things: raw times.tsv, per-task STDOUT and files, and
300	# per-task GC stats
301	run-tasks $raw_out_dir
302
303	# Turn individual files into a TSV, adding host
304	benchmarks/gc_stats_to_tsv.py $raw_out_dir/gc-*.txt \
305	\| tsv-add-const-column host_name "$host_name" \
306	> $raw_out_dir/gc_stats.tsv
307
308	cp -v _tmp/provenance.tsv $raw_out_dir
309	}
310
311	measure() {
312	### For release and CI
313	local host_name=$1 # 'no-host' or 'lenny'
314	local raw_out_dir=$2 # _tmp/osh-runtime/$X or ../../benchmark-data/osh-runtime/$X
315	shift 2
316	local -a osh_native=( "$@" ) # $OSH_CPP_NINJA_BUILD or $OSH_CPP_BENCHMARK_DATA, etc...
317
318	print-tasks "$host_name" "${osh_native[@]}" \
319	\| run-tasks-wrapper "$host_name" "$raw_out_dir"
320	}
321
322	stage1() {
323	local base_dir=${1:-$BASE_DIR} # _tmp/osh-runtime or ../benchmark-data/osh-runtime
324	local single_machine=${2:-}
325
326	local out_dir=$BASE_DIR/stage1 # _tmp/osh-runtime
327	mkdir -p $out_dir
328
329	local -a raw_times=()
330	local -a raw_gc_stats=()
331	local -a raw_provenance=()
332
333	if test -n "$single_machine"; then
334	# find dir in _tmp/osh-runtime
335	local -a a=( $base_dir/raw.$single_machine.* )
336
337	raw_times+=( ${a[-1]}/times.tsv )
338	raw_gc_stats+=( ${a[-1]}/gc_stats.tsv )
339	raw_provenance+=( ${a[-1]}/provenance.tsv )
340
341	else
342	# find last dirs in ../benchmark-data/osh-runtime
343	# Globs are in lexicographical order, which works for our dates.
344	local -a a=( $base_dir/raw.$MACHINE1.* )
345	local -a b=( $base_dir/raw.$MACHINE2.* )
346
347	raw_times+=( ${a[-1]}/times.tsv ${b[-1]}/times.tsv )
348	raw_gc_stats+=( ${a[-1]}/gc_stats.tsv ${b[-1]}/gc_stats.tsv )
349	raw_provenance+=( ${a[-1]}/provenance.tsv ${b[-1]}/provenance.tsv )
350	fi
351
352	tsv-concat "${raw_times[@]}" > $out_dir/times.tsv
353
354	tsv-concat "${raw_gc_stats[@]}" > $out_dir/gc_stats.tsv
355
356	tsv-concat "${raw_provenance[@]}" > $out_dir/provenance.tsv
357	}
358
359	print-report() {
360	local in_dir=$1
361
362	benchmark-html-head 'OSH Runtime Performance'
363
364	cat <<EOF
365	<body class="width60">
366	<p id="home-link">
367	<a href="/">oilshell.org</a>
368	</p>
369	EOF
370
371	cmark <<'EOF'
372	## OSH Runtime Performance
373
374	Source code: [benchmarks/osh-runtime.sh](https://github.com/oilshell/oil/tree/master/benchmarks/osh-runtime.sh)
375
376	- [Elapsed Time](#elapsed-time)
377	- [Minor Page Faults](#page-faults)
378	- [Memory Usage](#memory-usage)
379	- [GC Stats](#gc-stats)
380	- [rusage Details](#rusage-details)
381	- [More Details](#more-details)
382	- [Shell and Host](#shell-and-host)
383
384	[Raw files](-wwz-index)
385
386	<a name="elapsed-time" />
387
388	### Elapsed Time by Shell (milliseconds)
389
390	Some benchmarks call many external tools, while some exercise the shell
391	interpreter itself.
392	EOF
393	tsv2html $in_dir/elapsed.tsv
394
395	cmark <<EOF
396	<a name="page-faults" />
397
398	### Minor Page Faults
399	EOF
400
401	tsv2html $in_dir/page_faults.tsv
402
403	cmark <<EOF
404	<a name="memory-usage" />
405
406	### Memory Usage (Max Resident Set Size in MB)
407
408	Memory usage is measured in MB (powers of 10), not MiB (powers of 2).
409	EOF
410	tsv2html $in_dir/max_rss.tsv
411
412	cmark <<EOF
413	<a name="gc-stats" />
414
415	### GC Stats
416	EOF
417	tsv2html $in_dir/gc_stats.tsv
418
419	cmark <<EOF
420	<a name="rusage-details" />
421
422	### rusage Details
423	EOF
424	tsv2html $in_dir/details.tsv
425
426	cmark <<EOF
427	<a name="more-details" />
428
429	### More Details
430	EOF
431	tsv2html $in_dir/details_io.tsv
432
433	cmark <<'EOF'
434	<a name="shell-and-host" />
435
436	### Shell and Host
437	EOF
438	tsv2html $in_dir/shells.tsv
439	tsv2html $in_dir/hosts.tsv
440
441	cmark <<'EOF'
442
443	</body>
444	</html>
445	EOF
446	}
447
448	test-oils-run() {
449	local osh=$1
450	local job_id=$2
451	local host_name=$3
452
453	# flags passed by caller
454	local num_iters=${4:-1}
455	local num_shells=${5:-1}
456	local num_workloads=${6:-1}
457
458	local time_py=${XSHAR_DIR:-$REPO_ROOT}/benchmarks/time_.py
459	$time_py --tsv --rusage -- \
460	$osh -c 'echo "smoke test: hi from benchmarks/osh-runtime.sh"'
461
462	# Fresh build
463	rm -r -f -v $BASE_DIR _tmp/{shell,host}-id
464
465	# Write _tmp/provenance.* and _tmp/{host,shell}-id
466	shell-provenance-2 \
467	$host_name $job_id _tmp \
468	bash dash $osh
469
470	# e.g. 2024-05-01__10-11-12.ci-vm-name
471	local raw_out_dir="$BASE_DIR/raw"
472	mkdir -p $raw_out_dir
473
474	# Similar to 'measure', for soil-run and release
475	print-tasks-xshar $host_name $osh \
476	$num_iters $num_shells $num_workloads \
477	\| tee $BASE_DIR/tasks.txt
478
479	run-tasks-wrapper $host_name $raw_out_dir < $BASE_DIR/tasks.txt
480	echo
481
482	# Note: 'stage1' in soil-run is a trivial concatenation, so we can create input for
483	# benchmarks/report.R. We don't need that here
484	}
485
486	soil-run() {
487	### Run it on just this machine, and make a report
488
489	rm -r -f $BASE_DIR
490	mkdir -p $BASE_DIR
491
492	# TODO: This testdata should be baked into Docker image, or mounted
493	download
494	extract
495
496	# could add _bin/cxx-bumpleak/oils-for-unix, although sometimes it's slower
497	local -a osh_bin=( $OSH_CPP_NINJA_BUILD $OSH_SOUFFLE_CPP_NINJA_BUILD )
498	ninja "${osh_bin[@]}"
499
500	local single_machine='no-host'
501
502	local job_id
503	job_id=$(print-job-id)
504
505	# Write _tmp/provenance.* and _tmp/{host,shell}-id
506	shell-provenance-2 \
507	$single_machine $job_id _tmp \
508	bash dash bin/osh "${osh_bin[@]}"
509
510	local host_job_id="$single_machine.$job_id"
511	local raw_out_dir="$BASE_DIR/raw.$host_job_id"
512	mkdir -p $raw_out_dir $BASE_DIR/stage1
513
514	measure $single_machine $raw_out_dir $OSH_CPP_NINJA_BUILD $OSH_SOUFFLE_CPP_NINJA_BUILD
515
516	# Trivial concatenation for 1 machine
517	stage1 '' $single_machine
518
519	benchmarks/report.sh stage2 $BASE_DIR
520
521	benchmarks/report.sh stage3 $BASE_DIR
522	}
523
524	#
525	# Debugging
526	#
527
528	compare-cpython() {
529	#local -a a=( ../benchmark-data/osh-runtime/.lenny.2024 )
530	local -a a=( ../benchmark-data/osh-runtime/.hoover.2024 )
531
532	# More of a diff here?
533	#local -a a=( ../benchmark-data/osh-runtime/.broome.2023 )
534	# less diff here
535	#local -a a=( ../benchmark-data/osh-runtime/.lenny.2023 )
536
537	local dir=${a[-1]}
538
539	echo $dir
540
541	head -n 1 $dir/times.tsv
542	fgrep 'configure.cpython' $dir/times.tsv
543
544	local bash_id=2
545	local dash_id=8
546	local osh_py_id=14
547	local osh_cpp_id=20
548
549	set +o errexit
550
551	local out_dir=_tmp/cpython-configure
552	mkdir -p $out_dir
553
554	echo 'bash vs. dash'
555	diff -u --recursive $dir/{files-2,files-8} > $out_dir/bash-vs-dash.txt
556	diffstat $out_dir/bash-vs-dash.txt
557	echo
558
559	echo 'bash vs. osh-py'
560	diff -u --recursive $dir/{files-2,files-14} > $out_dir/bash-vs-osh-py.txt
561	diffstat $out_dir/bash-vs-osh-py.txt
562	echo
563
564	echo 'bash vs. osh-cpp'
565	diff -u --recursive $dir/{files-2,files-20} > $out_dir/bash-vs-osh-cpp.txt
566	diffstat $out_dir/bash-vs-osh-cpp.txt
567	echo
568
569	return
570
571	diff -u $dir/{files-2,files-20}/STDOUT.txt
572	echo
573
574	diff -u $dir/{files-2,files-20}/pyconfig.h
575	echo
576
577	cdiff -u $dir/{files-2,files-20}/config.log
578	echo
579	}
580
581	"$@"