OILS / benchmarks / compute.sh View on Github | oils.pub

735 lines, 357 significant
1#!/usr/bin/env bash
2#
3# Compare operations on data structures, with little I/O: strings, array,
4# associative arrays, integers.
5#
6# Usage:
7# benchmarks/compute.sh <function name>
8#
9# List of benchmarks:
10#
11# - fib: integer, loop, assignment (shells don't have real integers
12# - for_loop: 2025 update, taken from benchmarks/ysh-for.sh
13# - word_freq: hash table / assoc array (OSH uses a vector<pair<>> now!)
14# also integer counter
15# - bubble_sort: indexed array (bash uses a linked list?)
16# - palindrome: string, slicing, unicode
17# - parse_help: realistic shell-only string processing, which I didn't write.
18#
19# TODO:
20# - Fix the BUGS
21# - palindrome doesn't work? Unicode? Does UTF-8 decode
22# - bubble sort depend on locale too - there is an LC_ALL here
23#
24# - This file is annoying to TEST
25# - to add to it, you also have to change benchmarks/report.R
26# - and there is a loop in 'stage1' as well
27# - why can't it behave like other benchmarks?
28# - they are using this "big table" pattern
29
30# - vary problem size, which is different than iters
31# - bubble sort: array length, to test complexity of array indexing
32# - palindrome: longer lines, to test complexity of unicode/byte slicing
33# - word_freq: more unique words, to test complexity of assoc array
34# - for_loop and fib are kinda similar
35# - maybe for_loop can introduce some conditionals
36#
37# - other languages
38# - awk, mawk, etc.
39# - we are going for Awk speed!
40#
41# Questions to answer
42# - Can fast bytecode runtime be as fast as Python?
43# - measurement issue: Python kills at fib/for_loop - it's mostly process
44# startup time
45# - bubble_sort and word_freq are a bit more work
46# - can add YSH versions of those
47# - I wonder if they are testing data structures or the actual interpreter
48# loop though
49# - it's possible that speeding up the interpreter loop doesn't help much
50# - the real motivations behind bytecode:
51# - to fix 'break continue'
52# - add coroutine support too - that suspends and resumes a frame, which we
53# can't do
54
55set -o nounset
56set -o pipefail
57set -o errexit
58
59REPO_ROOT=$(cd $(dirname $0)/.. && pwd)
60readonly REPO_ROOT
61
62source benchmarks/common.sh # filter-provenance
63source test/tsv-lib.sh # tsv2html
64
65readonly BASE_DIR=_tmp/compute
66
67# Stabilize 'sort' output across machines (ugh locales!)
68export LC_ALL=C
69
70TIMEFORMAT='%U'
71
72readonly OSH_YSH_OPT_REGEX='_bin/cxx-opt(-sh)?/(mycpp-nosouffle/)?(osh|ysh)'
73
74# task_name,iter,args
75hello-tasks() {
76 local provenance=$1
77
78 # Add 1 field for each of 5 fields.
79 cat $provenance | filter-provenance python2 awk bash dash "$OSH_CPP_REGEX" |
80 while read fields; do
81 echo 'hello _ _' | xargs -n 3 -- echo "$fields"
82 done
83}
84
85# task_name,iter,args
86fib-tasks() {
87 local provenance=$1
88
89 # Add 1 field for each of 5 fields.
90 cat $provenance | filter-provenance python2 awk bash dash "$OSH_YSH_OPT_REGEX" |
91 while read fields; do
92 echo 'fib 200 44' | xargs -n 3 -- echo "$fields"
93 done
94}
95
96# task_name,iter,args
97for_loop-tasks() {
98 local provenance=$1
99
100 # bumpleak segfaults on for_loop! Probably because it runs out of memory
101 cat $provenance | filter-provenance python2 awk bash dash "$OSH_YSH_OPT_REGEX" |
102 while read fields; do
103 echo 'for_loop 50000 _' | xargs -n 3 -- echo "$fields"
104 done
105}
106
107# task_name,iter,args
108control_flow-tasks() {
109 local provenance=$1
110
111 cat $provenance | filter-provenance bash dash "$OSH_CPP_REGEX" |
112 while read fields; do
113 echo 'control_flow do_return 200' | xargs -n 3 -- echo "$fields"
114 done
115}
116
117word_freq-tasks() {
118 local provenance=$1
119
120 cat $provenance | filter-provenance python2 bash "$OSH_CPP_REGEX" |
121 while read fields; do
122 # BUG: oils-for-unix differs on these two. Looks like it's related to
123 # backslashes!
124 #echo 'word_freq 10 benchmarks/testdata/abuild' | xargs -n 3 -- echo "$fields"
125 #echo 'word_freq 2 benchmarks/testdata/ltmain.sh' | xargs -n 3 -- echo "$fields"
126 echo 'word_freq 10 configure' | xargs -n 3 -- echo "$fields"
127 done
128}
129
130assoc_array-tasks() {
131 local provenance=$1
132
133 cat $provenance | filter-provenance python2 bash "$OSH_CPP_REGEX" |
134 while read fields; do
135 for n in 1000 2000 3000; do
136 echo "word_freq 10 $n" | xargs -n 3 -- echo "$fields"
137 done
138 done
139}
140
141bubble_sort-tasks() {
142 # Note: this is quadratic, but bubble sort itself is quadratic!
143 local provenance=$1
144
145 cat $provenance | filter-provenance python2 bash "$OSH_CPP_REGEX" |
146 while read fields; do
147 echo 'bubble_sort int 200' | xargs -n 3 -- echo "$fields"
148 echo 'bubble_sort bytes 200' | xargs -n 3 -- echo "$fields"
149 done
150}
151
152# Arrays are doubly linked lists in bash! With a LASTREF hack to avoid being
153# quadratic.
154#
155# See array_reference() in array.c in bash. It searches both back and
156# forward. Every cell has its index, a value, a forward pointer, and a back
157# pointer.
158#
159# You need pretty high N to see the quadratic behavior though!
160
161# NOTE: osh is also slower with linear access, but not superlinear!
162
163array_ref-tasks() {
164 local provenance=$1
165
166 cat $provenance | filter-provenance bash |
167 while read fields; do
168 for mode in seq random; do
169 for n in 10000 20000 30000 40000; do
170 echo "array_ref $mode $n" | xargs -n 3 -- echo "$fields"
171 done
172 done
173 done
174
175#array_ref $OSH_CC seq 5000
176#array_ref $OSH_CC seq 10000
177#array_ref $OSH_CC random 5000
178#array_ref $OSH_CC random 10000
179#EOF
180}
181
182palindrome-tasks() {
183 local provenance=$1
184
185 cat $provenance | filter-provenance python2 bash "$OSH_CPP_REGEX" |
186 while read fields; do
187 echo 'palindrome unicode _' | xargs -n 3 -- echo "$fields"
188 echo 'palindrome bytes _' | xargs -n 3 -- echo "$fields"
189 done
190}
191
192parse_help-tasks() {
193 local provenance=$1
194
195 cat $provenance | filter-provenance bash "$OSH_CPP_REGEX" |
196 while read fields; do
197 echo 'parse_help ls-short _' | xargs -n 3 -- echo "$fields"
198 echo 'parse_help ls _' | xargs -n 3 -- echo "$fields"
199 echo 'parse_help mypy _' | xargs -n 3 -- echo "$fields"
200 done
201}
202
203ext() {
204 local ext
205 case $runtime in
206 python2)
207 echo 'py'
208 ;;
209 awk)
210 echo 'awk'
211 ;;
212 *ysh*)
213 echo 'ysh'
214 ;;
215 *sh | *osh*)
216 echo 'sh'
217 ;;
218 esac
219}
220
221word_freq-one() {
222 ### Run one word_freq task (hash tables)
223
224 local name=${1:-word_freq}
225 local runtime=$2
226
227 local iters=${3:-10}
228 local in=${4:-configure} # input
229
230 $runtime benchmarks/compute/word_freq.$(ext $runtime) $iters < $in | sort -n
231}
232
233assoc_array-one() {
234 ### Run word_freq with seq
235
236 local name=${1:-word_freq}
237 local runtime=$2
238
239 local iters=${3:-10}
240 local n=${4:-10}
241
242 # shuf so we don't get the bash optimization
243 seq $n | shuf |
244 $runtime benchmarks/compute/word_freq.$(ext $runtime) $iters | sort -n
245}
246
247bubble_sort-one() {
248 ### Run one bubble_sort task (arrays)
249
250 local name=${1:-bubble_sort}
251 local runtime=$2
252 local mode=${3:-int}
253 local n=${4:-100}
254
255 $runtime benchmarks/compute/bubble_sort.$(ext $runtime) $mode \
256 < $BASE_DIR/tmp/$name/testdata-$n.txt
257}
258
259# OSH is like 10x faster here!
260array_ref-one() {
261 ### Run one array_ref task (arrays)
262
263 local name=${1:-bubble_sort}
264 local runtime=$2
265 local mode=${3:-seq}
266 local n=${4:-100}
267
268 seq $n | shuf | $runtime benchmarks/compute/array_ref.$(ext $runtime) $mode
269}
270
271palindrome-one() {
272 ### Run one palindrome task (strings)
273
274 local name=${1:-palindrome}
275 local runtime=$2
276 local mode=${3:-unicode}
277
278 $runtime benchmarks/compute/palindrome.$(ext $runtime) $mode \
279 < $BASE_DIR/tmp/$name/testdata.txt
280}
281
282parse_help-one() {
283 ### Run one palindrome task (strings, real code)
284
285 local name=${1:-parse_help}
286 local runtime=$2
287 local workload=${3:-}
288
289 $runtime benchmarks/parse-help/pure-excerpt.sh _parse_help - \
290 < benchmarks/parse-help/$workload.txt
291}
292
293#
294# Helpers
295#
296
297hello-all() { task-all hello "$@"; }
298fib-all() { task-all fib "$@"; }
299for_loop-all() { task-all for_loop "$@"; }
300control_flow-all() { task-all control_flow "$@"; }
301word_freq-all() { task-all word_freq "$@"; }
302assoc_array-all() { task-all assoc_array "$@"; }
303
304# TODO: Fix the OSH comparison operator! It gives the wrong answer and
305# completes quickly.
306bubble_sort-all() { task-all bubble_sort "$@"; }
307
308# Array that is not quadratic
309array_ref-all() { task-all array_ref "$@"; }
310
311# Hm osh is a little slower here
312palindrome-all() { task-all palindrome "$@"; }
313
314parse_help-all() { task-all parse_help "$@"; }
315
316task-all() {
317 local task_name=$1
318 local provenance=$2
319 local host_job_id=$3
320 local out_dir=$4 # put files to save in benchmarks-data repo here
321
322 local tmp_dir=$BASE_DIR/tmp/$task_name
323
324 local times_tsv=$out_dir/$task_name/$host_job_id.times.tsv
325 rm -f $times_tsv
326
327 mkdir -p $tmp_dir $out_dir/$task_name
328
329 banner "*** $task_name ***"
330
331 # header
332 tsv-row \
333 status elapsed_secs user_secs sys_secs max_rss_KiB \
334 stdout_md5sum \
335 host_name host_hash \
336 runtime_name runtime_hash \
337 task_name arg1 arg2 stdout_filename > $times_tsv
338
339 local task_id=0
340
341 ${task_name}-tasks $provenance > $tmp_dir/tasks.txt
342
343 cat $tmp_dir/tasks.txt |
344 while read _ host host_hash runtime runtime_hash _ arg1 arg2; do
345 local file
346 case $runtime in
347 python2)
348 file='py'
349 ;;
350 *sh | *osh*)
351 file=$(basename $runtime)
352 ;;
353 esac
354
355 local stdout_filename="stdout-$file-$arg1-$(basename $arg2).txt"
356
357 local -a cmd
358 case $task_name in
359 hello|fib|for_loop|control_flow)
360 # Run it DIRECTLY, do not run $0. Because we do NOT want to fork bash
361 # then dash, because bash uses more memory.
362 args=( benchmarks/compute/$task_name.$(ext $runtime) "$arg1" "$arg2" )
363
364 case $runtime in
365 # add -f flag
366 awk) cmd=($runtime -f "${args[@]}") ;;
367 *) cmd=($runtime "${args[@]}") ;;
368 esac
369 ;;
370 *)
371 cmd=($0 ${task_name}-one "$task_name" "$runtime" "$arg1" "$arg2")
372 ;;
373 esac
374
375 # join args into a single field
376 time-tsv -o $times_tsv --append \
377 --stdout $tmp_dir/$stdout_filename \
378 --rusage \
379 --field "$host" --field "$host_hash" \
380 --field $runtime --field $runtime_hash \
381 --field "$task_name" --field "$arg1" --field "$arg2" \
382 --field "$stdout_filename" -- \
383 "${cmd[@]}"
384
385 task_id=$((task_id + 1))
386 done
387
388 #wc -l _tmp/compute/word_freq/*
389 maybe-tree $tmp_dir
390 cat $times_tsv
391}
392
393#
394# Testdata
395#
396
397bubble_sort-testdata() {
398 local out=$BASE_DIR/tmp/bubble_sort
399 mkdir -p $out
400
401 # TODO: Make these deterministic for more stable benchmarks?
402 for n in 100 200 300 400; do
403 seq $n | shuf > $out/testdata-$n.txt
404 done
405
406 wc -l $out/testdata-*.txt
407}
408
409palindrome-testdata() {
410 local out=$BASE_DIR/tmp/palindrome
411 mkdir -p $out
412
413 # TODO: Use iters?
414
415 for i in $(seq 500); do
416 cat <<EOF
417foo
418a
419tat
420cat
421
422noon
423amanaplanacanalpanama
424
425μ
426-μ-
427EOF
428
429 done > $out/testdata.txt
430
431 wc -l $out/testdata.txt
432}
433
434measure() {
435 local provenance=$1
436 local host_job_id=$2
437 local out_dir=${3:-$BASE_DIR/raw} # ../benchmark-data/compute
438
439 mkdir -p $BASE_DIR/{tmp,raw,stage1} $out_dir
440
441 # set -x
442 hello-all $provenance $host_job_id $out_dir
443 fib-all $provenance $host_job_id $out_dir
444
445 for_loop-all $provenance $host_job_id $out_dir
446 control_flow-all $provenance $host_job_id $out_dir
447
448 #return
449
450 # TODO: doesn't work because we would need duplicate logic in stage1
451 #if test -n "${QUICKLY:-}"; then
452 # return
453 #fi
454
455 word_freq-all $provenance $host_job_id $out_dir
456 parse_help-all $provenance $host_job_id $out_dir
457
458 bubble_sort-testdata
459 palindrome-testdata
460
461 bubble_sort-all $provenance $host_job_id $out_dir
462
463 # INCORRECT, but still run it
464 palindrome-all $provenance $host_job_id $out_dir
465
466 # array_ref takes too long to show quadratic behavior, and that's only
467 # necessary on 1 machine. I think I will make a separate blog post,
468 # if anything.
469
470 maybe-tree $out_dir
471}
472
473soil-run() {
474 ### Run it on just this machine, and make a report
475
476 rm -r -f $BASE_DIR
477 mkdir -p $BASE_DIR
478
479 # Test the one that's IN TREE, NOT in ../benchmark-data
480 local -a oils_bin=(
481 _bin/cxx-opt/osh _bin/cxx-opt+bumpleak/osh _bin/cxx-opt/mycpp-nosouffle/osh
482 _bin/cxx-opt/ysh _bin/cxx-opt+bumpleak/ysh _bin/cxx-opt/mycpp-nosouffle/ysh
483 )
484 ninja "${oils_bin[@]}"
485
486 local single_machine='no-host'
487
488 local job_id
489 job_id=$(benchmarks/id.sh print-job-id)
490
491 # Only measure what's in the soil-benchmarks OCI image -- it doesn't have
492 # mksh or zsh
493 benchmarks/id.sh shell-provenance-2 \
494 $single_machine $job_id _tmp \
495 bash dash python2 awk "${oils_bin[@]}"
496
497 local provenance=_tmp/provenance.txt
498 local host_job_id="$single_machine.$job_id"
499
500 measure $provenance $host_job_id
501
502 # Make it run on one machine
503 stage1 '' $single_machine
504
505 benchmarks/report.sh stage2 $BASE_DIR
506 benchmarks/report.sh stage3 $BASE_DIR
507}
508
509
510test-report() {
511 # Make it run on one machine
512 stage1 '' no-host
513
514 benchmarks/report.sh stage2 $BASE_DIR
515 benchmarks/report.sh stage3 $BASE_DIR
516}
517
518stage1() {
519 local raw_dir=${1:-$BASE_DIR/raw}
520
521 # This report works even if we only have one machine
522 local single_machine=${2:-}
523
524 local out_dir=$BASE_DIR/stage1
525 mkdir -p $out_dir
526
527 local times_tsv=$out_dir/times.tsv
528
529 local -a raw=()
530
531 # TODO: We should respect QUICKLY=1
532 for metric in hello fib for_loop control_flow word_freq parse_help bubble_sort palindrome; do
533 local dir=$raw_dir/$metric
534
535 if test -n "$single_machine"; then
536 local -a a=($dir/$single_machine.*.times.tsv)
537 raw+=(${a[-1]})
538 else
539 # Globs are in lexicographical order, which works for our dates.
540 local -a a=($dir/$MACHINE1.*.times.tsv)
541 local -a b=($dir/$MACHINE2.*.times.tsv) # HACK for now
542
543 # take the latest file
544 raw+=(${a[-1]} ${b[-1]})
545 fi
546
547 done
548 csv-concat ${raw[@]} > $times_tsv
549 wc -l $times_tsv
550}
551
552print-report() {
553 local in_dir=$1
554
555 benchmark-html-head 'OSH Compute Performance'
556
557 cat <<EOF
558 <body class="width60">
559 <p id="home-link">
560 <a href="/">oils.pub</a>
561 </p>
562EOF
563 cmark <<EOF
564
565## OSH Compute Performance
566
567Running time and memory usage of programs that test data structures (as opposed
568to I/O).
569
570Memory usage is measured in MB (powers of 10), not MiB (powers of 2).
571
572Source code: [oils/benchmarks/compute](https://github.com/oils-for-unix/oils/tree/master/benchmarks/compute)
573
574EOF
575
576 cmark <<EOF
577### hello (minimal startup)
578
579EOF
580
581 tsv2html $in_dir/hello.tsv
582
583 cmark <<EOF
584### fibonacci (integers)
585
586- arg1: number of repetitions
587- arg2: the N in fib(N)
588EOF
589
590 tsv2html $in_dir/fib.tsv
591
592 cmark <<EOF
593### for loop
594
595- arg1: the N to sum
596EOF
597
598 tsv2html $in_dir/for_loop.tsv
599
600 cmark <<EOF
601### control flow
602
603- arg1: the N to sum
604EOF
605
606 tsv2html $in_dir/control_flow.tsv
607
608 cmark <<EOF
609### word_freq (associative arrays / hash tables)
610
611- arg1: number of repetitions
612- arg2: the file (varies size of hash table)
613EOF
614
615 tsv2html $in_dir/word_freq.tsv
616
617 cmark <<EOF
618### parse_help (strings, real code)
619
620- arg1: file to parse
621EOF
622
623 tsv2html $in_dir/parse_help.tsv
624
625 cmark <<EOF
626### bubble_sort (array of integers, arrays of strings)
627
628- arg1: type of array
629- arg2: length of array
630EOF
631
632 tsv2html $in_dir/bubble_sort.tsv
633
634 # Comment out until checksum is fixed
635
636if false; then
637 cmark <<EOF
638### palindrome (byte strings, unicode strings)
639
640- arg1: type of string
641- arg2: TODO: length of string
642EOF
643
644 tsv2html $in_dir/palindrome.tsv
645
646fi
647
648 cmark <<EOF
649### Interpreter and Host Details
650EOF
651
652 tsv2html $in_dir/shells.tsv
653 tsv2html $in_dir/hosts.tsv
654
655 cmark <<EOF
656### Details
657EOF
658
659 tsv2html $in_dir/details.tsv
660
661 cmark <<EOF
662### Stdout Files
663EOF
664
665 tsv2html $in_dir/stdout_files.tsv
666
667
668 cat <<EOF
669 </body>
670</html>
671EOF
672}
673
674#
675# Run by hand
676#
677
678run-control-flow() {
679 ### Reproduce OSH perf bug because of C++ exceptions
680
681 # do_neither: 0.288 dash, 0.872 bash, 0.865 OSH
682 # do_continue: 0.310 dash, 1.065 bash, 2.313 OSH
683 # do_break: 0.222 dash, 0.712 bash, 1.430 OSH
684
685 local osh=_bin/cxx-opt/osh
686 #set -x
687
688 ninja $osh
689
690 for func in do_none do_continue do_break do_return; do
691 #for func in do_return; do
692 echo "=== $func"
693 echo
694 for sh in dash bash $osh; do
695 echo "--- $sh"
696 # TIMEFORMAT above
697 time $sh benchmarks/compute/control_flow.sh $func 200
698 echo
699 done
700 done
701}
702
703# TODO: could make this a proper benchmark
704word-split() {
705 ### Test word splitting perf
706 export OILS_GC_STATS=${1:-}
707
708 local osh=_bin/cxx-opt/osh
709 #set -x
710
711 ninja $osh
712
713 #local filename=README.md
714
715 # Hm our word splitting actually isn't that slow?
716 # TODO: measure allocs too?
717
718 # Hm allocating over a million objects, but it's faster than bash
719 # Most are in the pools
720
721 local filename=benchmarks/testdata/configure-coreutils
722
723 for func in default_ifs other_ifs; do
724 echo "=== $func"
725 echo
726 for sh in dash bash $osh; do
727 echo "--- $sh"
728 # TIMEFORMAT above
729 time $sh benchmarks/compute/word_split.sh $func $filename
730 echo
731 done
732 done
733}
734
735"$@"