OILS / benchmarks / compute.sh View on Github | oils.pub

745 lines, 360 significant
1#!/usr/bin/env bash
2#
3# Compare operations on data structures, with little I/O: strings, array,
4# associative arrays, integers.
5#
6# Usage:
7# benchmarks/compute.sh <function name>
8#
9# List of benchmarks:
10#
11# - fib: integer, loop, assignment (shells don't have real integers
12# - for_loop: 2025 update, taken from benchmarks/ysh-for.sh
13# - word_freq: hash table / assoc array (OSH uses a vector<pair<>> now!)
14# also integer counter
15# - bubble_sort: indexed array (bash uses a linked list?)
16# - palindrome: string, slicing, unicode
17# - parse_help: realistic shell-only string processing, which I didn't write.
18#
19# TODO:
20# - Fix the BUGS
21# - palindrome doesn't work? Unicode? Does UTF-8 decode
22# - bubble sort depend on locale too - there is an LC_ALL here
23#
24# - This file is annoying to TEST
25# - to add to it, you also have to change benchmarks/report.R
26# - and there is a loop in 'stage1' as well
27# - why can't it behave like other benchmarks?
28# - they are using this "big table" pattern
29
30# - vary problem size, which is different than iters
31# - bubble sort: array length, to test complexity of array indexing
32# - palindrome: longer lines, to test complexity of unicode/byte slicing
33# - word_freq: more unique words, to test complexity of assoc array
34# - for_loop and fib are kinda similar
35# - maybe for_loop can introduce some conditionals
36#
37# - other languages
38# - awk, mawk, etc.
39# - we are going for Awk speed!
40#
41# Questions to answer
42# - Can fast bytecode runtime be as fast as Python?
43# - measurement issue: Python kills at fib/for_loop - it's mostly process
44# startup time
45# - bubble_sort and word_freq are a bit more work
46# - can add YSH versions of those
47# - I wonder if they are testing data structures or the actual interpreter
48# loop though
49# - it's possible that speeding up the interpreter loop doesn't help much
50# - the real motivations behind bytecode:
51# - to fix 'break continue'
52# - add coroutine support too - that suspends and resumes a frame, which we
53# can't do
54
55set -o nounset
56set -o pipefail
57set -o errexit
58
59REPO_ROOT=$(cd $(dirname $0)/.. && pwd)
60readonly REPO_ROOT
61
62source benchmarks/common.sh # filter-provenance
63source build/dev-shell.sh # python2
64source test/tsv-lib.sh # tsv2html
65
66readonly BASE_DIR=_tmp/compute
67
68# Stabilize 'sort' output across machines (ugh locales!)
69export LC_ALL=C
70
71TIMEFORMAT='%U'
72
73# task_name,iter,args
74hello-tasks() {
75 local provenance=$1
76
77 # Add 1 field for each of 5 fields.
78 cat $provenance | filter-provenance python2 awk bash dash "$OSH_CPP_REGEX" |
79 while read fields; do
80 echo 'hello _ _' | xargs -n 3 -- echo "$fields"
81 done
82}
83
84# task_name,iter,args
85fib-tasks() {
86 local provenance=$1
87
88 local runtime_regex='_bin/cxx-opt/(osh|ysh)'
89
90 # Add 1 field for each of 5 fields.
91 cat $provenance | filter-provenance python2 awk bash dash "$runtime_regex" |
92 while read fields; do
93 echo 'fib 200 44' | xargs -n 3 -- echo "$fields"
94 done
95}
96
97# task_name,iter,args
98for_loop-tasks() {
99 local provenance=$1
100
101 # bumpleak segfaults on for_loop! Probably because it runs out of memory
102 local runtime_regex='_bin/cxx-opt/(osh|ysh)'
103
104 cat $provenance | filter-provenance python2 awk bash dash "$runtime_regex" |
105 while read fields; do
106 echo 'for_loop 50000 _' | xargs -n 3 -- echo "$fields"
107 done
108}
109
110# task_name,iter,args
111control_flow-tasks() {
112 local provenance=$1
113
114 local runtime_regex='_bin/cxx-opt/osh'
115
116 cat $provenance | filter-provenance bash dash "$runtime_regex" |
117 while read fields; do
118 echo 'control_flow do_return 200' | xargs -n 3 -- echo "$fields"
119 done
120}
121
122word_freq-tasks() {
123 local provenance=$1
124
125 cat $provenance | filter-provenance python2 bash "$OSH_CPP_REGEX" |
126 while read fields; do
127 # BUG: oils-for-unix differs on these two. Looks like it's related to
128 # backslashes!
129 #echo 'word_freq 10 benchmarks/testdata/abuild' | xargs -n 3 -- echo "$fields"
130 #echo 'word_freq 2 benchmarks/testdata/ltmain.sh' | xargs -n 3 -- echo "$fields"
131 echo 'word_freq 10 configure' | xargs -n 3 -- echo "$fields"
132 done
133}
134
135assoc_array-tasks() {
136 local provenance=$1
137
138 cat $provenance | filter-provenance python2 bash "$OSH_CPP_REGEX" |
139 while read fields; do
140 for n in 1000 2000 3000; do
141 echo "word_freq 10 $n" | xargs -n 3 -- echo "$fields"
142 done
143 done
144}
145
146bubble_sort-tasks() {
147 # Note: this is quadratic, but bubble sort itself is quadratic!
148 local provenance=$1
149
150 cat $provenance | filter-provenance python2 bash "$OSH_CPP_REGEX" |
151 while read fields; do
152 echo 'bubble_sort int 200' | xargs -n 3 -- echo "$fields"
153 echo 'bubble_sort bytes 200' | xargs -n 3 -- echo "$fields"
154 done
155}
156
157# Arrays are doubly linked lists in bash! With a LASTREF hack to avoid being
158# quadratic.
159#
160# See array_reference() in array.c in bash. It searches both back and
161# forward. Every cell has its index, a value, a forward pointer, and a back
162# pointer.
163#
164# You need pretty high N to see the quadratic behavior though!
165
166# NOTE: osh is also slower with linear access, but not superlinear!
167
168array_ref-tasks() {
169 local provenance=$1
170
171 cat $provenance | filter-provenance bash |
172 while read fields; do
173 for mode in seq random; do
174 for n in 10000 20000 30000 40000; do
175 echo "array_ref $mode $n" | xargs -n 3 -- echo "$fields"
176 done
177 done
178 done
179
180#array_ref $OSH_CC seq 5000
181#array_ref $OSH_CC seq 10000
182#array_ref $OSH_CC random 5000
183#array_ref $OSH_CC random 10000
184#EOF
185}
186
187palindrome-tasks() {
188 local provenance=$1
189
190 cat $provenance | filter-provenance python2 bash "$OSH_CPP_REGEX" |
191 while read fields; do
192 echo 'palindrome unicode _' | xargs -n 3 -- echo "$fields"
193 echo 'palindrome bytes _' | xargs -n 3 -- echo "$fields"
194 done
195}
196
197parse_help-tasks() {
198 local provenance=$1
199
200 cat $provenance | filter-provenance bash "$OSH_CPP_REGEX" |
201 while read fields; do
202 echo 'parse_help ls-short _' | xargs -n 3 -- echo "$fields"
203 echo 'parse_help ls _' | xargs -n 3 -- echo "$fields"
204 echo 'parse_help mypy _' | xargs -n 3 -- echo "$fields"
205 done
206}
207
208ext() {
209 local ext
210 case $runtime in
211 python2)
212 echo 'py'
213 ;;
214 awk)
215 echo 'awk'
216 ;;
217 *ysh*)
218 echo 'ysh'
219 ;;
220 *sh | *osh*)
221 echo 'sh'
222 ;;
223 esac
224}
225
226word_freq-one() {
227 ### Run one word_freq task (hash tables)
228
229 local name=${1:-word_freq}
230 local runtime=$2
231
232 local iters=${3:-10}
233 local in=${4:-configure} # input
234
235 $runtime benchmarks/compute/word_freq.$(ext $runtime) $iters < $in | sort -n
236}
237
238assoc_array-one() {
239 ### Run word_freq with seq
240
241 local name=${1:-word_freq}
242 local runtime=$2
243
244 local iters=${3:-10}
245 local n=${4:-10}
246
247 # shuf so we don't get the bash optimization
248 seq $n | shuf |
249 $runtime benchmarks/compute/word_freq.$(ext $runtime) $iters | sort -n
250}
251
252bubble_sort-one() {
253 ### Run one bubble_sort task (arrays)
254
255 local name=${1:-bubble_sort}
256 local runtime=$2
257 local mode=${3:-int}
258 local n=${4:-100}
259
260 $runtime benchmarks/compute/bubble_sort.$(ext $runtime) $mode \
261 < $BASE_DIR/tmp/$name/testdata-$n.txt
262}
263
264# OSH is like 10x faster here!
265array_ref-one() {
266 ### Run one array_ref task (arrays)
267
268 local name=${1:-bubble_sort}
269 local runtime=$2
270 local mode=${3:-seq}
271 local n=${4:-100}
272
273 seq $n | shuf | $runtime benchmarks/compute/array_ref.$(ext $runtime) $mode
274}
275
276palindrome-one() {
277 ### Run one palindrome task (strings)
278
279 local name=${1:-palindrome}
280 local runtime=$2
281 local mode=${3:-unicode}
282
283 $runtime benchmarks/compute/palindrome.$(ext $runtime) $mode \
284 < $BASE_DIR/tmp/$name/testdata.txt
285}
286
287parse_help-one() {
288 ### Run one palindrome task (strings, real code)
289
290 local name=${1:-parse_help}
291 local runtime=$2
292 local workload=${3:-}
293
294 $runtime benchmarks/parse-help/pure-excerpt.sh _parse_help - \
295 < benchmarks/parse-help/$workload.txt
296}
297
298#
299# Helpers
300#
301
302hello-all() { task-all hello "$@"; }
303fib-all() { task-all fib "$@"; }
304for_loop-all() { task-all for_loop "$@"; }
305control_flow-all() { task-all control_flow "$@"; }
306word_freq-all() { task-all word_freq "$@"; }
307assoc_array-all() { task-all assoc_array "$@"; }
308
309# TODO: Fix the OSH comparison operator! It gives the wrong answer and
310# completes quickly.
311bubble_sort-all() { task-all bubble_sort "$@"; }
312
313# Array that is not quadratic
314array_ref-all() { task-all array_ref "$@"; }
315
316# Hm osh is a little slower here
317palindrome-all() { task-all palindrome "$@"; }
318
319parse_help-all() { task-all parse_help "$@"; }
320
321task-all() {
322 local task_name=$1
323 local provenance=$2
324 local host_job_id=$3
325 local out_dir=$4 # put files to save in benchmarks-data repo here
326
327 local tmp_dir=$BASE_DIR/tmp/$task_name
328
329 local times_tsv=$out_dir/$task_name/$host_job_id.times.tsv
330 rm -f $times_tsv
331
332 mkdir -p $tmp_dir $out_dir/$task_name
333
334 banner "*** $task_name ***"
335
336 # header
337 tsv-row \
338 status elapsed_secs user_secs sys_secs max_rss_KiB \
339 stdout_md5sum \
340 host_name host_hash \
341 runtime_name runtime_hash \
342 task_name arg1 arg2 stdout_filename > $times_tsv
343
344 local task_id=0
345
346 ${task_name}-tasks $provenance > $tmp_dir/tasks.txt
347
348 cat $tmp_dir/tasks.txt |
349 while read _ host host_hash runtime runtime_hash _ arg1 arg2; do
350 local file
351 case $runtime in
352 (python2)
353 file='py'
354 ;;
355 (*sh | *osh*)
356 file=$(basename $runtime)
357 ;;
358 esac
359
360 #log "runtime=$runtime args=$args"
361
362 local stdout_filename="stdout-$file-$arg1-$(basename $arg2).txt"
363
364 # Measurement BUG! This makes dash have the memory usage of bash!
365 # It's better to get argv into the shell.
366
367 local -a cmd
368 case $task_name in
369 hello|fib|for_loop|control_flow)
370 # Run it DIRECTLY, do not run $0. Because we do NOT want to fork bash
371 # then dash, because bash uses more memory.
372 args=(benchmarks/compute/$task_name.$(ext $runtime) "$arg1" "$arg2")
373
374 case $runtime in
375 # add -f flag
376 awk) cmd=($runtime -f "${args[@]}") ;;
377 *) cmd=($runtime "${args[@]}") ;;
378 esac
379 ;;
380 *)
381 cmd=($0 ${task_name}-one "$task_name" "$runtime" "$arg1" "$arg2")
382 ;;
383 esac
384
385 # join args into a single field
386 time-tsv -o $times_tsv --append \
387 --stdout $tmp_dir/$stdout_filename \
388 --rusage \
389 --field "$host" --field "$host_hash" \
390 --field $runtime --field $runtime_hash \
391 --field "$task_name" --field "$arg1" --field "$arg2" \
392 --field "$stdout_filename" -- \
393 "${cmd[@]}"
394
395 task_id=$((task_id + 1))
396 done
397
398 #wc -l _tmp/compute/word_freq/*
399 maybe-tree $tmp_dir
400 cat $times_tsv
401}
402
403#
404# Testdata
405#
406
407bubble_sort-testdata() {
408 local out=$BASE_DIR/tmp/bubble_sort
409 mkdir -p $out
410
411 # TODO: Make these deterministic for more stable benchmarks?
412 for n in 100 200 300 400; do
413 seq $n | shuf > $out/testdata-$n.txt
414 done
415
416 wc -l $out/testdata-*.txt
417}
418
419palindrome-testdata() {
420 local out=$BASE_DIR/tmp/palindrome
421 mkdir -p $out
422
423 # TODO: Use iters?
424
425 for i in $(seq 500); do
426 cat <<EOF
427foo
428a
429tat
430cat
431
432noon
433amanaplanacanalpanama
434
435μ
436-μ-
437EOF
438
439 done > $out/testdata.txt
440
441 wc -l $out/testdata.txt
442}
443
444measure() {
445 local provenance=$1
446 local host_job_id=$2
447 local out_dir=${3:-$BASE_DIR/raw} # ../benchmark-data/compute
448
449 mkdir -p $BASE_DIR/{tmp,raw,stage1} $out_dir
450
451 # set -x
452 hello-all $provenance $host_job_id $out_dir
453 fib-all $provenance $host_job_id $out_dir
454
455 for_loop-all $provenance $host_job_id $out_dir
456 control_flow-all $provenance $host_job_id $out_dir
457
458 # TODO: doesn't work because we would need duplicate logic in stage1
459 #if test -n "${QUICKLY:-}"; then
460 # return
461 #fi
462
463 word_freq-all $provenance $host_job_id $out_dir
464 parse_help-all $provenance $host_job_id $out_dir
465
466 bubble_sort-testdata
467 palindrome-testdata
468
469 bubble_sort-all $provenance $host_job_id $out_dir
470
471 # INCORRECT, but still run it
472 palindrome-all $provenance $host_job_id $out_dir
473
474 # array_ref takes too long to show quadratic behavior, and that's only
475 # necessary on 1 machine. I think I will make a separate blog post,
476 # if anything.
477
478 maybe-tree $out_dir
479}
480
481soil-run() {
482 ### Run it on just this machine, and make a report
483
484 rm -r -f $BASE_DIR
485 mkdir -p $BASE_DIR
486
487 # Test the one that's IN TREE, NOT in ../benchmark-data
488 local -a oils_bin=(
489 _bin/cxx-opt/osh _bin/cxx-opt+bumpleak/osh _bin/cxx-opt/mycpp-souffle/osh
490 _bin/cxx-opt/ysh _bin/cxx-opt+bumpleak/ysh _bin/cxx-opt/mycpp-souffle/ysh
491 )
492 ninja "${oils_bin[@]}"
493
494 local single_machine='no-host'
495
496 local job_id
497 job_id=$(benchmarks/id.sh print-job-id)
498
499 # Only measure what's in the Docker image
500 # - The Soil 'benchmarks' job uses the 'cpp' Docker image, which doesn't have
501 # layer-cpython, ../oil_DEPS/cpython-full
502 # - It also doesn't have mksh or zsh
503
504 benchmarks/id.sh shell-provenance-2 \
505 $single_machine $job_id _tmp \
506 bash dash python2 awk "${oils_bin[@]}"
507
508 local provenance=_tmp/provenance.txt
509 local host_job_id="$single_machine.$job_id"
510
511 measure $provenance $host_job_id
512
513 # Make it run on one machine
514 stage1 '' $single_machine
515
516 benchmarks/report.sh stage2 $BASE_DIR
517 benchmarks/report.sh stage3 $BASE_DIR
518}
519
520
521test-report() {
522 # Make it run on one machine
523 stage1 '' no-host
524
525 benchmarks/report.sh stage2 $BASE_DIR
526 benchmarks/report.sh stage3 $BASE_DIR
527}
528
529stage1() {
530 local raw_dir=${1:-$BASE_DIR/raw}
531
532 # This report works even if we only have one machine
533 local single_machine=${2:-}
534
535 local out_dir=$BASE_DIR/stage1
536 mkdir -p $out_dir
537
538 local times_tsv=$out_dir/times.tsv
539
540 local -a raw=()
541
542 # TODO: We should respect QUICKLY=1
543 for metric in hello fib for_loop control_flow word_freq parse_help bubble_sort palindrome; do
544 local dir=$raw_dir/$metric
545
546 if test -n "$single_machine"; then
547 local -a a=($dir/$single_machine.*.times.tsv)
548 raw+=(${a[-1]})
549 else
550 # Globs are in lexicographical order, which works for our dates.
551 local -a a=($dir/$MACHINE1.*.times.tsv)
552 local -a b=($dir/$MACHINE2.*.times.tsv) # HACK for now
553
554 # take the latest file
555 raw+=(${a[-1]} ${b[-1]})
556 fi
557
558 done
559 csv-concat ${raw[@]} > $times_tsv
560 wc -l $times_tsv
561}
562
563print-report() {
564 local in_dir=$1
565
566 benchmark-html-head 'OSH Compute Performance'
567
568 cat <<EOF
569 <body class="width60">
570 <p id="home-link">
571 <a href="/">oils.pub</a>
572 </p>
573EOF
574 cmark <<EOF
575
576## OSH Compute Performance
577
578Running time and memory usage of programs that test data structures (as opposed
579to I/O).
580
581Memory usage is measured in MB (powers of 10), not MiB (powers of 2).
582
583Source code: [oil/benchmarks/compute](https://github.com/oilshell/oil/tree/master/benchmarks/compute)
584
585EOF
586
587 cmark <<EOF
588### hello (minimal startup)
589
590EOF
591
592 tsv2html $in_dir/hello.tsv
593
594 cmark <<EOF
595### fibonacci (integers)
596
597- arg1: number of repetitions
598- arg2: the N in fib(N)
599EOF
600
601 tsv2html $in_dir/fib.tsv
602
603 cmark <<EOF
604### for loop
605
606- arg1: the N to sum
607EOF
608
609 tsv2html $in_dir/for_loop.tsv
610
611 cmark <<EOF
612### control flow
613
614- arg1: the N to sum
615EOF
616
617 tsv2html $in_dir/control_flow.tsv
618
619 cmark <<EOF
620### word_freq (associative arrays / hash tables)
621
622- arg1: number of repetitions
623- arg2: the file (varies size of hash table)
624EOF
625
626 tsv2html $in_dir/word_freq.tsv
627
628 cmark <<EOF
629### parse_help (strings, real code)
630
631- arg1: file to parse
632EOF
633
634 tsv2html $in_dir/parse_help.tsv
635
636 cmark <<EOF
637### bubble_sort (array of integers, arrays of strings)
638
639- arg1: type of array
640- arg2: length of array
641EOF
642
643 tsv2html $in_dir/bubble_sort.tsv
644
645 # Comment out until checksum is fixed
646
647if false; then
648 cmark <<EOF
649### palindrome (byte strings, unicode strings)
650
651- arg1: type of string
652- arg2: TODO: length of string
653EOF
654
655 tsv2html $in_dir/palindrome.tsv
656
657fi
658
659 cmark <<EOF
660### Interpreter and Host Details
661EOF
662
663 tsv2html $in_dir/shells.tsv
664 tsv2html $in_dir/hosts.tsv
665
666 cmark <<EOF
667### Details
668EOF
669
670 tsv2html $in_dir/details.tsv
671
672 cmark <<EOF
673### Stdout Files
674EOF
675
676 tsv2html $in_dir/stdout_files.tsv
677
678
679 cat <<EOF
680 </body>
681</html>
682EOF
683}
684
685run-control-flow() {
686 ### Reproduce OSH perf bug because of C++ exceptions
687
688 # do_neither: 0.288 dash, 0.872 bash, 0.865 OSH
689 # do_continue: 0.310 dash, 1.065 bash, 2.313 OSH
690 # do_break: 0.222 dash, 0.712 bash, 1.430 OSH
691
692 local osh=_bin/cxx-opt/osh
693 #set -x
694
695 ninja $osh
696
697 for func in do_none do_continue do_break do_return; do
698 #for func in do_return; do
699 echo "=== $func"
700 echo
701 for sh in dash bash $osh; do
702 echo "--- $sh"
703 # TIMEFORMAT above
704 time $sh benchmarks/compute/control_flow.sh $func 200
705 echo
706 done
707 done
708}
709
710word-split() {
711 ### Test word splitting perf
712 export OILS_GC_STATS=${1:-}
713
714 # do_neither: 0.288 dash, 0.872 bash, 0.865 OSH
715 # do_continue: 0.310 dash, 1.065 bash, 2.313 OSH
716 # do_break: 0.222 dash, 0.712 bash, 1.430 OSH
717
718 local osh=_bin/cxx-opt/osh
719 #set -x
720
721 ninja $osh
722
723 #local filename=README.md
724
725 # Hm our word splitting actually isn't that slow?
726 # TODO: measure allocs too?
727
728 # Hm allocating over a million objects, but it's faster than bash
729 # Most are in the pools
730
731 local filename=benchmarks/testdata/configure-coreutils
732
733 for func in default_ifs other_ifs; do
734 echo "=== $func"
735 echo
736 for sh in dash bash $osh; do
737 echo "--- $sh"
738 # TIMEFORMAT above
739 time $sh benchmarks/compute/word_split.sh $func $filename
740 echo
741 done
742 done
743}
744
745"$@"