benchmarks/compute.sh

OILS / benchmarks / compute.sh View on Github | oils.pub

735 lines, 357 significant

1	#!/usr/bin/env bash
2	#
3	# Compare operations on data structures, with little I/O: strings, array,
4	# associative arrays, integers.
5	#
6	# Usage:
7	# benchmarks/compute.sh <function name>
8	#
9	# List of benchmarks:
10	#
11	# - fib: integer, loop, assignment (shells don't have real integers
12	# - for_loop: 2025 update, taken from benchmarks/ysh-for.sh
13	# - word_freq: hash table / assoc array (OSH uses a vector<pair<>> now!)
14	# also integer counter
15	# - bubble_sort: indexed array (bash uses a linked list?)
16	# - palindrome: string, slicing, unicode
17	# - parse_help: realistic shell-only string processing, which I didn't write.
18	#
19	# TODO:
20	# - Fix the BUGS
21	# - palindrome doesn't work? Unicode? Does UTF-8 decode
22	# - bubble sort depend on locale too - there is an LC_ALL here
23	#
24	# - This file is annoying to TEST
25	# - to add to it, you also have to change benchmarks/report.R
26	# - and there is a loop in 'stage1' as well
27	# - why can't it behave like other benchmarks?
28	# - they are using this "big table" pattern
29
30	# - vary problem size, which is different than iters
31	# - bubble sort: array length, to test complexity of array indexing
32	# - palindrome: longer lines, to test complexity of unicode/byte slicing
33	# - word_freq: more unique words, to test complexity of assoc array
34	# - for_loop and fib are kinda similar
35	# - maybe for_loop can introduce some conditionals
36	#
37	# - other languages
38	# - awk, mawk, etc.
39	# - we are going for Awk speed!
40	#
41	# Questions to answer
42	# - Can fast bytecode runtime be as fast as Python?
43	# - measurement issue: Python kills at fib/for_loop - it's mostly process
44	# startup time
45	# - bubble_sort and word_freq are a bit more work
46	# - can add YSH versions of those
47	# - I wonder if they are testing data structures or the actual interpreter
48	# loop though
49	# - it's possible that speeding up the interpreter loop doesn't help much
50	# - the real motivations behind bytecode:
51	# - to fix 'break continue'
52	# - add coroutine support too - that suspends and resumes a frame, which we
53	# can't do
54
55	set -o nounset
56	set -o pipefail
57	set -o errexit
58
59	REPO_ROOT=$(cd $(dirname $0)/.. && pwd)
60	readonly REPO_ROOT
61
62	source benchmarks/common.sh # filter-provenance
63	source test/tsv-lib.sh # tsv2html
64
65	readonly BASE_DIR=_tmp/compute
66
67	# Stabilize 'sort' output across machines (ugh locales!)
68	export LC_ALL=C
69
70	TIMEFORMAT='%U'
71
72	readonly OSH_YSH_OPT_REGEX='_bin/cxx-opt(-sh)?/(mycpp-nosouffle/)?(osh\|ysh)'
73
74	# task_name,iter,args
75	hello-tasks() {
76	local provenance=$1
77
78	# Add 1 field for each of 5 fields.
79	cat $provenance \| filter-provenance python2 awk bash dash "$OSH_CPP_REGEX" \|
80	while read fields; do
81	echo 'hello _ _' \| xargs -n 3 -- echo "$fields"
82	done
83	}
84
85	# task_name,iter,args
86	fib-tasks() {
87	local provenance=$1
88
89	# Add 1 field for each of 5 fields.
90	cat $provenance \| filter-provenance python2 awk bash dash "$OSH_YSH_OPT_REGEX" \|
91	while read fields; do
92	echo 'fib 200 44' \| xargs -n 3 -- echo "$fields"
93	done
94	}
95
96	# task_name,iter,args
97	for_loop-tasks() {
98	local provenance=$1
99
100	# bumpleak segfaults on for_loop! Probably because it runs out of memory
101	cat $provenance \| filter-provenance python2 awk bash dash "$OSH_YSH_OPT_REGEX" \|
102	while read fields; do
103	echo 'for_loop 50000 _' \| xargs -n 3 -- echo "$fields"
104	done
105	}
106
107	# task_name,iter,args
108	control_flow-tasks() {
109	local provenance=$1
110
111	cat $provenance \| filter-provenance bash dash "$OSH_CPP_REGEX" \|
112	while read fields; do
113	echo 'control_flow do_return 200' \| xargs -n 3 -- echo "$fields"
114	done
115	}
116
117	word_freq-tasks() {
118	local provenance=$1
119
120	cat $provenance \| filter-provenance python2 bash "$OSH_CPP_REGEX" \|
121	while read fields; do
122	# BUG: oils-for-unix differs on these two. Looks like it's related to
123	# backslashes!
124	#echo 'word_freq 10 benchmarks/testdata/abuild' \| xargs -n 3 -- echo "$fields"
125	#echo 'word_freq 2 benchmarks/testdata/ltmain.sh' \| xargs -n 3 -- echo "$fields"
126	echo 'word_freq 10 configure' \| xargs -n 3 -- echo "$fields"
127	done
128	}
129
130	assoc_array-tasks() {
131	local provenance=$1
132
133	cat $provenance \| filter-provenance python2 bash "$OSH_CPP_REGEX" \|
134	while read fields; do
135	for n in 1000 2000 3000; do
136	echo "word_freq 10 $n" \| xargs -n 3 -- echo "$fields"
137	done
138	done
139	}
140
141	bubble_sort-tasks() {
142	# Note: this is quadratic, but bubble sort itself is quadratic!
143	local provenance=$1
144
145	cat $provenance \| filter-provenance python2 bash "$OSH_CPP_REGEX" \|
146	while read fields; do
147	echo 'bubble_sort int 200' \| xargs -n 3 -- echo "$fields"
148	echo 'bubble_sort bytes 200' \| xargs -n 3 -- echo "$fields"
149	done
150	}
151
152	# Arrays are doubly linked lists in bash! With a LASTREF hack to avoid being
153	# quadratic.
154	#
155	# See array_reference() in array.c in bash. It searches both back and
156	# forward. Every cell has its index, a value, a forward pointer, and a back
157	# pointer.
158	#
159	# You need pretty high N to see the quadratic behavior though!
160
161	# NOTE: osh is also slower with linear access, but not superlinear!
162
163	array_ref-tasks() {
164	local provenance=$1
165
166	cat $provenance \| filter-provenance bash \|
167	while read fields; do
168	for mode in seq random; do
169	for n in 10000 20000 30000 40000; do
170	echo "array_ref $mode $n" \| xargs -n 3 -- echo "$fields"
171	done
172	done
173	done
174
175	#array_ref $OSH_CC seq 5000
176	#array_ref $OSH_CC seq 10000
177	#array_ref $OSH_CC random 5000
178	#array_ref $OSH_CC random 10000
179	#EOF
180	}
181
182	palindrome-tasks() {
183	local provenance=$1
184
185	cat $provenance \| filter-provenance python2 bash "$OSH_CPP_REGEX" \|
186	while read fields; do
187	echo 'palindrome unicode _' \| xargs -n 3 -- echo "$fields"
188	echo 'palindrome bytes _' \| xargs -n 3 -- echo "$fields"
189	done
190	}
191
192	parse_help-tasks() {
193	local provenance=$1
194
195	cat $provenance \| filter-provenance bash "$OSH_CPP_REGEX" \|
196	while read fields; do
197	echo 'parse_help ls-short _' \| xargs -n 3 -- echo "$fields"
198	echo 'parse_help ls _' \| xargs -n 3 -- echo "$fields"
199	echo 'parse_help mypy _' \| xargs -n 3 -- echo "$fields"
200	done
201	}
202
203	ext() {
204	local ext
205	case $runtime in
206	python2)
207	echo 'py'
208	;;
209	awk)
210	echo 'awk'
211	;;
212	ysh)
213	echo 'ysh'
214	;;
215	sh \| osh*)
216	echo 'sh'
217	;;
218	esac
219	}
220
221	word_freq-one() {
222	### Run one word_freq task (hash tables)
223
224	local name=${1:-word_freq}
225	local runtime=$2
226
227	local iters=${3:-10}
228	local in=${4:-configure} # input
229
230	$runtime benchmarks/compute/word_freq.$(ext $runtime) $iters < $in \| sort -n
231	}
232
233	assoc_array-one() {
234	### Run word_freq with seq
235
236	local name=${1:-word_freq}
237	local runtime=$2
238
239	local iters=${3:-10}
240	local n=${4:-10}
241
242	# shuf so we don't get the bash optimization
243	seq $n \| shuf \|
244	$runtime benchmarks/compute/word_freq.$(ext $runtime) $iters \| sort -n
245	}
246
247	bubble_sort-one() {
248	### Run one bubble_sort task (arrays)
249
250	local name=${1:-bubble_sort}
251	local runtime=$2
252	local mode=${3:-int}
253	local n=${4:-100}
254
255	$runtime benchmarks/compute/bubble_sort.$(ext $runtime) $mode \
256	< $BASE_DIR/tmp/$name/testdata-$n.txt
257	}
258
259	# OSH is like 10x faster here!
260	array_ref-one() {
261	### Run one array_ref task (arrays)
262
263	local name=${1:-bubble_sort}
264	local runtime=$2
265	local mode=${3:-seq}
266	local n=${4:-100}
267
268	seq $n \| shuf \| $runtime benchmarks/compute/array_ref.$(ext $runtime) $mode
269	}
270
271	palindrome-one() {
272	### Run one palindrome task (strings)
273
274	local name=${1:-palindrome}
275	local runtime=$2
276	local mode=${3:-unicode}
277
278	$runtime benchmarks/compute/palindrome.$(ext $runtime) $mode \
279	< $BASE_DIR/tmp/$name/testdata.txt
280	}
281
282	parse_help-one() {
283	### Run one palindrome task (strings, real code)
284
285	local name=${1:-parse_help}
286	local runtime=$2
287	local workload=${3:-}
288
289	$runtime benchmarks/parse-help/pure-excerpt.sh _parse_help - \
290	< benchmarks/parse-help/$workload.txt
291	}
292
293	#
294	# Helpers
295	#
296
297	hello-all() { task-all hello "$@"; }
298	fib-all() { task-all fib "$@"; }
299	for_loop-all() { task-all for_loop "$@"; }
300	control_flow-all() { task-all control_flow "$@"; }
301	word_freq-all() { task-all word_freq "$@"; }
302	assoc_array-all() { task-all assoc_array "$@"; }
303
304	# TODO: Fix the OSH comparison operator! It gives the wrong answer and
305	# completes quickly.
306	bubble_sort-all() { task-all bubble_sort "$@"; }
307
308	# Array that is not quadratic
309	array_ref-all() { task-all array_ref "$@"; }
310
311	# Hm osh is a little slower here
312	palindrome-all() { task-all palindrome "$@"; }
313
314	parse_help-all() { task-all parse_help "$@"; }
315
316	task-all() {
317	local task_name=$1
318	local provenance=$2
319	local host_job_id=$3
320	local out_dir=$4 # put files to save in benchmarks-data repo here
321
322	local tmp_dir=$BASE_DIR/tmp/$task_name
323
324	local times_tsv=$out_dir/$task_name/$host_job_id.times.tsv
325	rm -f $times_tsv
326
327	mkdir -p $tmp_dir $out_dir/$task_name
328
329	banner "* $task_name *"
330
331	# header
332	tsv-row \
333	status elapsed_secs user_secs sys_secs max_rss_KiB \
334	stdout_md5sum \
335	host_name host_hash \
336	runtime_name runtime_hash \
337	task_name arg1 arg2 stdout_filename > $times_tsv
338
339	local task_id=0
340
341	${task_name}-tasks $provenance > $tmp_dir/tasks.txt
342
343	cat $tmp_dir/tasks.txt \|
344	while read _ host host_hash runtime runtime_hash _ arg1 arg2; do
345	local file
346	case $runtime in
347	python2)
348	file='py'
349	;;
350	sh \| osh*)
351	file=$(basename $runtime)
352	;;
353	esac
354
355	local stdout_filename="stdout-$file-$arg1-$(basename $arg2).txt"
356
357	local -a cmd
358	case $task_name in
359	hello\|fib\|for_loop\|control_flow)
360	# Run it DIRECTLY, do not run $0. Because we do NOT want to fork bash
361	# then dash, because bash uses more memory.
362	args=( benchmarks/compute/$task_name.$(ext $runtime) "$arg1" "$arg2" )
363
364	case $runtime in
365	# add -f flag
366	awk) cmd=($runtime -f "${args[@]}") ;;
367	*) cmd=($runtime "${args[@]}") ;;
368	esac
369	;;
370	*)
371	cmd=($0 ${task_name}-one "$task_name" "$runtime" "$arg1" "$arg2")
372	;;
373	esac
374
375	# join args into a single field
376	time-tsv -o $times_tsv --append \
377	--stdout $tmp_dir/$stdout_filename \
378	--rusage \
379	--field "$host" --field "$host_hash" \
380	--field $runtime --field $runtime_hash \
381	--field "$task_name" --field "$arg1" --field "$arg2" \
382	--field "$stdout_filename" -- \
383	"${cmd[@]}"
384
385	task_id=$((task_id + 1))
386	done
387
388	#wc -l _tmp/compute/word_freq/*
389	maybe-tree $tmp_dir
390	cat $times_tsv
391	}
392
393	#
394	# Testdata
395	#
396
397	bubble_sort-testdata() {
398	local out=$BASE_DIR/tmp/bubble_sort
399	mkdir -p $out
400
401	# TODO: Make these deterministic for more stable benchmarks?
402	for n in 100 200 300 400; do
403	seq $n \| shuf > $out/testdata-$n.txt
404	done
405
406	wc -l $out/testdata-*.txt
407	}
408
409	palindrome-testdata() {
410	local out=$BASE_DIR/tmp/palindrome
411	mkdir -p $out
412
413	# TODO: Use iters?
414
415	for i in $(seq 500); do
416	cat <<EOF
417	foo
418	a
419	tat
420	cat
421
422	noon
423	amanaplanacanalpanama
424
425	μ
426	-μ-
427	EOF
428
429	done > $out/testdata.txt
430
431	wc -l $out/testdata.txt
432	}
433
434	measure() {
435	local provenance=$1
436	local host_job_id=$2
437	local out_dir=${3:-$BASE_DIR/raw} # ../benchmark-data/compute
438
439	mkdir -p $BASE_DIR/{tmp,raw,stage1} $out_dir
440
441	# set -x
442	hello-all $provenance $host_job_id $out_dir
443	fib-all $provenance $host_job_id $out_dir
444
445	for_loop-all $provenance $host_job_id $out_dir
446	control_flow-all $provenance $host_job_id $out_dir
447
448	#return
449
450	# TODO: doesn't work because we would need duplicate logic in stage1
451	#if test -n "${QUICKLY:-}"; then
452	# return
453	#fi
454
455	word_freq-all $provenance $host_job_id $out_dir
456	parse_help-all $provenance $host_job_id $out_dir
457
458	bubble_sort-testdata
459	palindrome-testdata
460
461	bubble_sort-all $provenance $host_job_id $out_dir
462
463	# INCORRECT, but still run it
464	palindrome-all $provenance $host_job_id $out_dir
465
466	# array_ref takes too long to show quadratic behavior, and that's only
467	# necessary on 1 machine. I think I will make a separate blog post,
468	# if anything.
469
470	maybe-tree $out_dir
471	}
472
473	soil-run() {
474	### Run it on just this machine, and make a report
475
476	rm -r -f $BASE_DIR
477	mkdir -p $BASE_DIR
478
479	# Test the one that's IN TREE, NOT in ../benchmark-data
480	local -a oils_bin=(
481	_bin/cxx-opt/osh _bin/cxx-opt+bumpleak/osh _bin/cxx-opt/mycpp-nosouffle/osh
482	_bin/cxx-opt/ysh _bin/cxx-opt+bumpleak/ysh _bin/cxx-opt/mycpp-nosouffle/ysh
483	)
484	ninja "${oils_bin[@]}"
485
486	local single_machine='no-host'
487
488	local job_id
489	job_id=$(benchmarks/id.sh print-job-id)
490
491	# Only measure what's in the soil-benchmarks OCI image -- it doesn't have
492	# mksh or zsh
493	benchmarks/id.sh shell-provenance-2 \
494	$single_machine $job_id _tmp \
495	bash dash python2 awk "${oils_bin[@]}"
496
497	local provenance=_tmp/provenance.txt
498	local host_job_id="$single_machine.$job_id"
499
500	measure $provenance $host_job_id
501
502	# Make it run on one machine
503	stage1 '' $single_machine
504
505	benchmarks/report.sh stage2 $BASE_DIR
506	benchmarks/report.sh stage3 $BASE_DIR
507	}
508
509
510	test-report() {
511	# Make it run on one machine
512	stage1 '' no-host
513
514	benchmarks/report.sh stage2 $BASE_DIR
515	benchmarks/report.sh stage3 $BASE_DIR
516	}
517
518	stage1() {
519	local raw_dir=${1:-$BASE_DIR/raw}
520
521	# This report works even if we only have one machine
522	local single_machine=${2:-}
523
524	local out_dir=$BASE_DIR/stage1
525	mkdir -p $out_dir
526
527	local times_tsv=$out_dir/times.tsv
528
529	local -a raw=()
530
531	# TODO: We should respect QUICKLY=1
532	for metric in hello fib for_loop control_flow word_freq parse_help bubble_sort palindrome; do
533	local dir=$raw_dir/$metric
534
535	if test -n "$single_machine"; then
536	local -a a=($dir/$single_machine.*.times.tsv)
537	raw+=(${a[-1]})
538	else
539	# Globs are in lexicographical order, which works for our dates.
540	local -a a=($dir/$MACHINE1.*.times.tsv)
541	local -a b=($dir/$MACHINE2.*.times.tsv) # HACK for now
542
543	# take the latest file
544	raw+=(${a[-1]} ${b[-1]})
545	fi
546
547	done
548	csv-concat ${raw[@]} > $times_tsv
549	wc -l $times_tsv
550	}
551
552	print-report() {
553	local in_dir=$1
554
555	benchmark-html-head 'OSH Compute Performance'
556
557	cat <<EOF
558	<body class="width60">
559	<p id="home-link">
560	<a href="/">oils.pub</a>
561	</p>
562	EOF
563	cmark <<EOF
564
565	## OSH Compute Performance
566
567	Running time and memory usage of programs that test data structures (as opposed
568	to I/O).
569
570	Memory usage is measured in MB (powers of 10), not MiB (powers of 2).
571
572	Source code: [oils/benchmarks/compute](https://github.com/oils-for-unix/oils/tree/master/benchmarks/compute)
573
574	EOF
575
576	cmark <<EOF
577	### hello (minimal startup)
578
579	EOF
580
581	tsv2html $in_dir/hello.tsv
582
583	cmark <<EOF
584	### fibonacci (integers)
585
586	- arg1: number of repetitions
587	- arg2: the N in fib(N)
588	EOF
589
590	tsv2html $in_dir/fib.tsv
591
592	cmark <<EOF
593	### for loop
594
595	- arg1: the N to sum
596	EOF
597
598	tsv2html $in_dir/for_loop.tsv
599
600	cmark <<EOF
601	### control flow
602
603	- arg1: the N to sum
604	EOF
605
606	tsv2html $in_dir/control_flow.tsv
607
608	cmark <<EOF
609	### word_freq (associative arrays / hash tables)
610
611	- arg1: number of repetitions
612	- arg2: the file (varies size of hash table)
613	EOF
614
615	tsv2html $in_dir/word_freq.tsv
616
617	cmark <<EOF
618	### parse_help (strings, real code)
619
620	- arg1: file to parse
621	EOF
622
623	tsv2html $in_dir/parse_help.tsv
624
625	cmark <<EOF
626	### bubble_sort (array of integers, arrays of strings)
627
628	- arg1: type of array
629	- arg2: length of array
630	EOF
631
632	tsv2html $in_dir/bubble_sort.tsv
633
634	# Comment out until checksum is fixed
635
636	if false; then
637	cmark <<EOF
638	### palindrome (byte strings, unicode strings)
639
640	- arg1: type of string
641	- arg2: TODO: length of string
642	EOF
643
644	tsv2html $in_dir/palindrome.tsv
645
646	fi
647
648	cmark <<EOF
649	### Interpreter and Host Details
650	EOF
651
652	tsv2html $in_dir/shells.tsv
653	tsv2html $in_dir/hosts.tsv
654
655	cmark <<EOF
656	### Details
657	EOF
658
659	tsv2html $in_dir/details.tsv
660
661	cmark <<EOF
662	### Stdout Files
663	EOF
664
665	tsv2html $in_dir/stdout_files.tsv
666
667
668	cat <<EOF
669	</body>
670	</html>
671	EOF
672	}
673
674	#
675	# Run by hand
676	#
677
678	run-control-flow() {
679	### Reproduce OSH perf bug because of C++ exceptions
680
681	# do_neither: 0.288 dash, 0.872 bash, 0.865 OSH
682	# do_continue: 0.310 dash, 1.065 bash, 2.313 OSH
683	# do_break: 0.222 dash, 0.712 bash, 1.430 OSH
684
685	local osh=_bin/cxx-opt/osh
686	#set -x
687
688	ninja $osh
689
690	for func in do_none do_continue do_break do_return; do
691	#for func in do_return; do
692	echo "=== $func"
693	echo
694	for sh in dash bash $osh; do
695	echo "--- $sh"
696	# TIMEFORMAT above
697	time $sh benchmarks/compute/control_flow.sh $func 200
698	echo
699	done
700	done
701	}
702
703	# TODO: could make this a proper benchmark
704	word-split() {
705	### Test word splitting perf
706	export OILS_GC_STATS=${1:-}
707
708	local osh=_bin/cxx-opt/osh
709	#set -x
710
711	ninja $osh
712
713	#local filename=README.md
714
715	# Hm our word splitting actually isn't that slow?
716	# TODO: measure allocs too?
717
718	# Hm allocating over a million objects, but it's faster than bash
719	# Most are in the pools
720
721	local filename=benchmarks/testdata/configure-coreutils
722
723	for func in default_ifs other_ifs; do
724	echo "=== $func"
725	echo
726	for sh in dash bash $osh; do
727	echo "--- $sh"
728	# TIMEFORMAT above
729	time $sh benchmarks/compute/word_split.sh $func $filename
730	echo
731	done
732	done
733	}
734
735	"$@"