OILS / benchmarks / compute.sh View on Github | oils.pub

741 lines, 359 significant
1#!/usr/bin/env bash
2#
3# Compare operations on data structures, with little I/O: strings, array,
4# associative arrays, integers.
5#
6# Usage:
7# benchmarks/compute.sh <function name>
8#
9# List of benchmarks:
10#
11# - fib: integer, loop, assignment (shells don't have real integers
12# - for_loop: 2025 update, taken from benchmarks/ysh-for.sh
13# - word_freq: hash table / assoc array (OSH uses a vector<pair<>> now!)
14# also integer counter
15# - bubble_sort: indexed array (bash uses a linked list?)
16# - palindrome: string, slicing, unicode
17# - parse_help: realistic shell-only string processing, which I didn't write.
18#
19# TODO:
20# - Fix the BUGS
21# - palindrome doesn't work? Unicode? Does UTF-8 decode
22# - bubble sort depend on locale too - there is an LC_ALL here
23#
24# - This file is annoying to TEST
25# - to add to it, you also have to change benchmarks/report.R
26# - and there is a loop in 'stage1' as well
27# - why can't it behave like other benchmarks?
28# - they are using this "big table" pattern
29
30# - vary problem size, which is different than iters
31# - bubble sort: array length, to test complexity of array indexing
32# - palindrome: longer lines, to test complexity of unicode/byte slicing
33# - word_freq: more unique words, to test complexity of assoc array
34# - for_loop and fib are kinda similar
35# - maybe for_loop can introduce some conditionals
36#
37# - other languages
38# - awk, mawk, etc.
39# - we are going for Awk speed!
40#
41# Questions to answer
42# - Can fast bytecode runtime be as fast as Python?
43# - measurement issue: Python kills at fib/for_loop - it's mostly process
44# startup time
45# - bubble_sort and word_freq are a bit more work
46# - can add YSH versions of those
47# - I wonder if they are testing data structures or the actual interpreter
48# loop though
49# - it's possible that speeding up the interpreter loop doesn't help much
50# - the real motivations behind bytecode:
51# - to fix 'break continue'
52# - add coroutine support too - that suspends and resumes a frame, which we
53# can't do
54
55set -o nounset
56set -o pipefail
57set -o errexit
58
59REPO_ROOT=$(cd $(dirname $0)/.. && pwd)
60readonly REPO_ROOT
61
62source benchmarks/common.sh # filter-provenance
63source build/dev-shell.sh # python2
64source test/tsv-lib.sh # tsv2html
65
66readonly BASE_DIR=_tmp/compute
67
68# Stabilize 'sort' output across machines (ugh locales!)
69export LC_ALL=C
70
71TIMEFORMAT='%U'
72
73readonly OSH_YSH_OPT_REGEX='_bin/cxx-opt(-sh)?/(osh|ysh)'
74
75# task_name,iter,args
76hello-tasks() {
77 local provenance=$1
78
79 # Add 1 field for each of 5 fields.
80 cat $provenance | filter-provenance python2 awk bash dash "$OSH_CPP_REGEX" |
81 while read fields; do
82 echo 'hello _ _' | xargs -n 3 -- echo "$fields"
83 done
84}
85
86# task_name,iter,args
87fib-tasks() {
88 local provenance=$1
89
90 # Add 1 field for each of 5 fields.
91 cat $provenance | filter-provenance python2 awk bash dash "$OSH_YSH_OPT_REGEX" |
92 while read fields; do
93 echo 'fib 200 44' | xargs -n 3 -- echo "$fields"
94 done
95}
96
97# task_name,iter,args
98for_loop-tasks() {
99 local provenance=$1
100
101 # bumpleak segfaults on for_loop! Probably because it runs out of memory
102 cat $provenance | filter-provenance python2 awk bash dash "$OSH_YSH_OPT_REGEX" |
103 while read fields; do
104 echo 'for_loop 50000 _' | xargs -n 3 -- echo "$fields"
105 done
106}
107
108# task_name,iter,args
109control_flow-tasks() {
110 local provenance=$1
111
112 local runtime_regex='_bin/cxx-opt(-sh)?/osh'
113
114 cat $provenance | filter-provenance bash dash "$runtime_regex" |
115 while read fields; do
116 echo 'control_flow do_return 200' | xargs -n 3 -- echo "$fields"
117 done
118}
119
120word_freq-tasks() {
121 local provenance=$1
122
123 cat $provenance | filter-provenance python2 bash "$OSH_CPP_REGEX" |
124 while read fields; do
125 # BUG: oils-for-unix differs on these two. Looks like it's related to
126 # backslashes!
127 #echo 'word_freq 10 benchmarks/testdata/abuild' | xargs -n 3 -- echo "$fields"
128 #echo 'word_freq 2 benchmarks/testdata/ltmain.sh' | xargs -n 3 -- echo "$fields"
129 echo 'word_freq 10 configure' | xargs -n 3 -- echo "$fields"
130 done
131}
132
133assoc_array-tasks() {
134 local provenance=$1
135
136 cat $provenance | filter-provenance python2 bash "$OSH_CPP_REGEX" |
137 while read fields; do
138 for n in 1000 2000 3000; do
139 echo "word_freq 10 $n" | xargs -n 3 -- echo "$fields"
140 done
141 done
142}
143
144bubble_sort-tasks() {
145 # Note: this is quadratic, but bubble sort itself is quadratic!
146 local provenance=$1
147
148 cat $provenance | filter-provenance python2 bash "$OSH_CPP_REGEX" |
149 while read fields; do
150 echo 'bubble_sort int 200' | xargs -n 3 -- echo "$fields"
151 echo 'bubble_sort bytes 200' | xargs -n 3 -- echo "$fields"
152 done
153}
154
155# Arrays are doubly linked lists in bash! With a LASTREF hack to avoid being
156# quadratic.
157#
158# See array_reference() in array.c in bash. It searches both back and
159# forward. Every cell has its index, a value, a forward pointer, and a back
160# pointer.
161#
162# You need pretty high N to see the quadratic behavior though!
163
164# NOTE: osh is also slower with linear access, but not superlinear!
165
166array_ref-tasks() {
167 local provenance=$1
168
169 cat $provenance | filter-provenance bash |
170 while read fields; do
171 for mode in seq random; do
172 for n in 10000 20000 30000 40000; do
173 echo "array_ref $mode $n" | xargs -n 3 -- echo "$fields"
174 done
175 done
176 done
177
178#array_ref $OSH_CC seq 5000
179#array_ref $OSH_CC seq 10000
180#array_ref $OSH_CC random 5000
181#array_ref $OSH_CC random 10000
182#EOF
183}
184
185palindrome-tasks() {
186 local provenance=$1
187
188 cat $provenance | filter-provenance python2 bash "$OSH_CPP_REGEX" |
189 while read fields; do
190 echo 'palindrome unicode _' | xargs -n 3 -- echo "$fields"
191 echo 'palindrome bytes _' | xargs -n 3 -- echo "$fields"
192 done
193}
194
195parse_help-tasks() {
196 local provenance=$1
197
198 cat $provenance | filter-provenance bash "$OSH_CPP_REGEX" |
199 while read fields; do
200 echo 'parse_help ls-short _' | xargs -n 3 -- echo "$fields"
201 echo 'parse_help ls _' | xargs -n 3 -- echo "$fields"
202 echo 'parse_help mypy _' | xargs -n 3 -- echo "$fields"
203 done
204}
205
206ext() {
207 local ext
208 case $runtime in
209 python2)
210 echo 'py'
211 ;;
212 awk)
213 echo 'awk'
214 ;;
215 *ysh*)
216 echo 'ysh'
217 ;;
218 *sh | *osh*)
219 echo 'sh'
220 ;;
221 esac
222}
223
224word_freq-one() {
225 ### Run one word_freq task (hash tables)
226
227 local name=${1:-word_freq}
228 local runtime=$2
229
230 local iters=${3:-10}
231 local in=${4:-configure} # input
232
233 $runtime benchmarks/compute/word_freq.$(ext $runtime) $iters < $in | sort -n
234}
235
236assoc_array-one() {
237 ### Run word_freq with seq
238
239 local name=${1:-word_freq}
240 local runtime=$2
241
242 local iters=${3:-10}
243 local n=${4:-10}
244
245 # shuf so we don't get the bash optimization
246 seq $n | shuf |
247 $runtime benchmarks/compute/word_freq.$(ext $runtime) $iters | sort -n
248}
249
250bubble_sort-one() {
251 ### Run one bubble_sort task (arrays)
252
253 local name=${1:-bubble_sort}
254 local runtime=$2
255 local mode=${3:-int}
256 local n=${4:-100}
257
258 $runtime benchmarks/compute/bubble_sort.$(ext $runtime) $mode \
259 < $BASE_DIR/tmp/$name/testdata-$n.txt
260}
261
262# OSH is like 10x faster here!
263array_ref-one() {
264 ### Run one array_ref task (arrays)
265
266 local name=${1:-bubble_sort}
267 local runtime=$2
268 local mode=${3:-seq}
269 local n=${4:-100}
270
271 seq $n | shuf | $runtime benchmarks/compute/array_ref.$(ext $runtime) $mode
272}
273
274palindrome-one() {
275 ### Run one palindrome task (strings)
276
277 local name=${1:-palindrome}
278 local runtime=$2
279 local mode=${3:-unicode}
280
281 $runtime benchmarks/compute/palindrome.$(ext $runtime) $mode \
282 < $BASE_DIR/tmp/$name/testdata.txt
283}
284
285parse_help-one() {
286 ### Run one palindrome task (strings, real code)
287
288 local name=${1:-parse_help}
289 local runtime=$2
290 local workload=${3:-}
291
292 $runtime benchmarks/parse-help/pure-excerpt.sh _parse_help - \
293 < benchmarks/parse-help/$workload.txt
294}
295
296#
297# Helpers
298#
299
300hello-all() { task-all hello "$@"; }
301fib-all() { task-all fib "$@"; }
302for_loop-all() { task-all for_loop "$@"; }
303control_flow-all() { task-all control_flow "$@"; }
304word_freq-all() { task-all word_freq "$@"; }
305assoc_array-all() { task-all assoc_array "$@"; }
306
307# TODO: Fix the OSH comparison operator! It gives the wrong answer and
308# completes quickly.
309bubble_sort-all() { task-all bubble_sort "$@"; }
310
311# Array that is not quadratic
312array_ref-all() { task-all array_ref "$@"; }
313
314# Hm osh is a little slower here
315palindrome-all() { task-all palindrome "$@"; }
316
317parse_help-all() { task-all parse_help "$@"; }
318
319task-all() {
320 local task_name=$1
321 local provenance=$2
322 local host_job_id=$3
323 local out_dir=$4 # put files to save in benchmarks-data repo here
324
325 local tmp_dir=$BASE_DIR/tmp/$task_name
326
327 local times_tsv=$out_dir/$task_name/$host_job_id.times.tsv
328 rm -f $times_tsv
329
330 mkdir -p $tmp_dir $out_dir/$task_name
331
332 banner "*** $task_name ***"
333
334 # header
335 tsv-row \
336 status elapsed_secs user_secs sys_secs max_rss_KiB \
337 stdout_md5sum \
338 host_name host_hash \
339 runtime_name runtime_hash \
340 task_name arg1 arg2 stdout_filename > $times_tsv
341
342 local task_id=0
343
344 ${task_name}-tasks $provenance > $tmp_dir/tasks.txt
345
346 cat $tmp_dir/tasks.txt |
347 while read _ host host_hash runtime runtime_hash _ arg1 arg2; do
348 local file
349 case $runtime in
350 python2)
351 file='py'
352 ;;
353 *sh | *osh*)
354 file=$(basename $runtime)
355 ;;
356 esac
357
358 local stdout_filename="stdout-$file-$arg1-$(basename $arg2).txt"
359
360 local -a cmd
361 case $task_name in
362 hello|fib|for_loop|control_flow)
363 # Run it DIRECTLY, do not run $0. Because we do NOT want to fork bash
364 # then dash, because bash uses more memory.
365 args=( benchmarks/compute/$task_name.$(ext $runtime) "$arg1" "$arg2" )
366
367 case $runtime in
368 # add -f flag
369 awk) cmd=($runtime -f "${args[@]}") ;;
370 *) cmd=($runtime "${args[@]}") ;;
371 esac
372 ;;
373 *)
374 cmd=($0 ${task_name}-one "$task_name" "$runtime" "$arg1" "$arg2")
375 ;;
376 esac
377
378 # join args into a single field
379 time-tsv -o $times_tsv --append \
380 --stdout $tmp_dir/$stdout_filename \
381 --rusage \
382 --field "$host" --field "$host_hash" \
383 --field $runtime --field $runtime_hash \
384 --field "$task_name" --field "$arg1" --field "$arg2" \
385 --field "$stdout_filename" -- \
386 "${cmd[@]}"
387
388 task_id=$((task_id + 1))
389 done
390
391 #wc -l _tmp/compute/word_freq/*
392 maybe-tree $tmp_dir
393 cat $times_tsv
394}
395
396#
397# Testdata
398#
399
400bubble_sort-testdata() {
401 local out=$BASE_DIR/tmp/bubble_sort
402 mkdir -p $out
403
404 # TODO: Make these deterministic for more stable benchmarks?
405 for n in 100 200 300 400; do
406 seq $n | shuf > $out/testdata-$n.txt
407 done
408
409 wc -l $out/testdata-*.txt
410}
411
412palindrome-testdata() {
413 local out=$BASE_DIR/tmp/palindrome
414 mkdir -p $out
415
416 # TODO: Use iters?
417
418 for i in $(seq 500); do
419 cat <<EOF
420foo
421a
422tat
423cat
424
425noon
426amanaplanacanalpanama
427
428μ
429-μ-
430EOF
431
432 done > $out/testdata.txt
433
434 wc -l $out/testdata.txt
435}
436
437measure() {
438 local provenance=$1
439 local host_job_id=$2
440 local out_dir=${3:-$BASE_DIR/raw} # ../benchmark-data/compute
441
442 mkdir -p $BASE_DIR/{tmp,raw,stage1} $out_dir
443
444 # set -x
445 hello-all $provenance $host_job_id $out_dir
446 fib-all $provenance $host_job_id $out_dir
447
448 for_loop-all $provenance $host_job_id $out_dir
449 control_flow-all $provenance $host_job_id $out_dir
450
451 #return
452
453 # TODO: doesn't work because we would need duplicate logic in stage1
454 #if test -n "${QUICKLY:-}"; then
455 # return
456 #fi
457
458 word_freq-all $provenance $host_job_id $out_dir
459 parse_help-all $provenance $host_job_id $out_dir
460
461 bubble_sort-testdata
462 palindrome-testdata
463
464 bubble_sort-all $provenance $host_job_id $out_dir
465
466 # INCORRECT, but still run it
467 palindrome-all $provenance $host_job_id $out_dir
468
469 # array_ref takes too long to show quadratic behavior, and that's only
470 # necessary on 1 machine. I think I will make a separate blog post,
471 # if anything.
472
473 maybe-tree $out_dir
474}
475
476soil-run() {
477 ### Run it on just this machine, and make a report
478
479 rm -r -f $BASE_DIR
480 mkdir -p $BASE_DIR
481
482 # Test the one that's IN TREE, NOT in ../benchmark-data
483 local -a oils_bin=(
484 _bin/cxx-opt/osh _bin/cxx-opt+bumpleak/osh _bin/cxx-opt/mycpp-souffle/osh
485 _bin/cxx-opt/ysh _bin/cxx-opt+bumpleak/ysh _bin/cxx-opt/mycpp-souffle/ysh
486 )
487 ninja "${oils_bin[@]}"
488
489 local single_machine='no-host'
490
491 local job_id
492 job_id=$(benchmarks/id.sh print-job-id)
493
494 # Only measure what's in the Docker image
495 # - The Soil 'benchmarks' job uses the 'cpp' Docker image, which doesn't have
496 # layer-cpython, ../oil_DEPS/cpython-full
497 # - It also doesn't have mksh or zsh
498
499 benchmarks/id.sh shell-provenance-2 \
500 $single_machine $job_id _tmp \
501 bash dash python2 awk "${oils_bin[@]}"
502
503 local provenance=_tmp/provenance.txt
504 local host_job_id="$single_machine.$job_id"
505
506 measure $provenance $host_job_id
507
508 # Make it run on one machine
509 stage1 '' $single_machine
510
511 benchmarks/report.sh stage2 $BASE_DIR
512 benchmarks/report.sh stage3 $BASE_DIR
513}
514
515
516test-report() {
517 # Make it run on one machine
518 stage1 '' no-host
519
520 benchmarks/report.sh stage2 $BASE_DIR
521 benchmarks/report.sh stage3 $BASE_DIR
522}
523
524stage1() {
525 local raw_dir=${1:-$BASE_DIR/raw}
526
527 # This report works even if we only have one machine
528 local single_machine=${2:-}
529
530 local out_dir=$BASE_DIR/stage1
531 mkdir -p $out_dir
532
533 local times_tsv=$out_dir/times.tsv
534
535 local -a raw=()
536
537 # TODO: We should respect QUICKLY=1
538 for metric in hello fib for_loop control_flow word_freq parse_help bubble_sort palindrome; do
539 local dir=$raw_dir/$metric
540
541 if test -n "$single_machine"; then
542 local -a a=($dir/$single_machine.*.times.tsv)
543 raw+=(${a[-1]})
544 else
545 # Globs are in lexicographical order, which works for our dates.
546 local -a a=($dir/$MACHINE1.*.times.tsv)
547 local -a b=($dir/$MACHINE2.*.times.tsv) # HACK for now
548
549 # take the latest file
550 raw+=(${a[-1]} ${b[-1]})
551 fi
552
553 done
554 csv-concat ${raw[@]} > $times_tsv
555 wc -l $times_tsv
556}
557
558print-report() {
559 local in_dir=$1
560
561 benchmark-html-head 'OSH Compute Performance'
562
563 cat <<EOF
564 <body class="width60">
565 <p id="home-link">
566 <a href="/">oils.pub</a>
567 </p>
568EOF
569 cmark <<EOF
570
571## OSH Compute Performance
572
573Running time and memory usage of programs that test data structures (as opposed
574to I/O).
575
576Memory usage is measured in MB (powers of 10), not MiB (powers of 2).
577
578Source code: [oil/benchmarks/compute](https://github.com/oilshell/oil/tree/master/benchmarks/compute)
579
580EOF
581
582 cmark <<EOF
583### hello (minimal startup)
584
585EOF
586
587 tsv2html $in_dir/hello.tsv
588
589 cmark <<EOF
590### fibonacci (integers)
591
592- arg1: number of repetitions
593- arg2: the N in fib(N)
594EOF
595
596 tsv2html $in_dir/fib.tsv
597
598 cmark <<EOF
599### for loop
600
601- arg1: the N to sum
602EOF
603
604 tsv2html $in_dir/for_loop.tsv
605
606 cmark <<EOF
607### control flow
608
609- arg1: the N to sum
610EOF
611
612 tsv2html $in_dir/control_flow.tsv
613
614 cmark <<EOF
615### word_freq (associative arrays / hash tables)
616
617- arg1: number of repetitions
618- arg2: the file (varies size of hash table)
619EOF
620
621 tsv2html $in_dir/word_freq.tsv
622
623 cmark <<EOF
624### parse_help (strings, real code)
625
626- arg1: file to parse
627EOF
628
629 tsv2html $in_dir/parse_help.tsv
630
631 cmark <<EOF
632### bubble_sort (array of integers, arrays of strings)
633
634- arg1: type of array
635- arg2: length of array
636EOF
637
638 tsv2html $in_dir/bubble_sort.tsv
639
640 # Comment out until checksum is fixed
641
642if false; then
643 cmark <<EOF
644### palindrome (byte strings, unicode strings)
645
646- arg1: type of string
647- arg2: TODO: length of string
648EOF
649
650 tsv2html $in_dir/palindrome.tsv
651
652fi
653
654 cmark <<EOF
655### Interpreter and Host Details
656EOF
657
658 tsv2html $in_dir/shells.tsv
659 tsv2html $in_dir/hosts.tsv
660
661 cmark <<EOF
662### Details
663EOF
664
665 tsv2html $in_dir/details.tsv
666
667 cmark <<EOF
668### Stdout Files
669EOF
670
671 tsv2html $in_dir/stdout_files.tsv
672
673
674 cat <<EOF
675 </body>
676</html>
677EOF
678}
679
680#
681# Run by hand
682#
683
684run-control-flow() {
685 ### Reproduce OSH perf bug because of C++ exceptions
686
687 # do_neither: 0.288 dash, 0.872 bash, 0.865 OSH
688 # do_continue: 0.310 dash, 1.065 bash, 2.313 OSH
689 # do_break: 0.222 dash, 0.712 bash, 1.430 OSH
690
691 local osh=_bin/cxx-opt/osh
692 #set -x
693
694 ninja $osh
695
696 for func in do_none do_continue do_break do_return; do
697 #for func in do_return; do
698 echo "=== $func"
699 echo
700 for sh in dash bash $osh; do
701 echo "--- $sh"
702 # TIMEFORMAT above
703 time $sh benchmarks/compute/control_flow.sh $func 200
704 echo
705 done
706 done
707}
708
709# TODO: could make this a proper benchmark
710word-split() {
711 ### Test word splitting perf
712 export OILS_GC_STATS=${1:-}
713
714 local osh=_bin/cxx-opt/osh
715 #set -x
716
717 ninja $osh
718
719 #local filename=README.md
720
721 # Hm our word splitting actually isn't that slow?
722 # TODO: measure allocs too?
723
724 # Hm allocating over a million objects, but it's faster than bash
725 # Most are in the pools
726
727 local filename=benchmarks/testdata/configure-coreutils
728
729 for func in default_ifs other_ifs; do
730 echo "=== $func"
731 echo
732 for sh in dash bash $osh; do
733 echo "--- $sh"
734 # TIMEFORMAT above
735 time $sh benchmarks/compute/word_split.sh $func $filename
736 echo
737 done
738 done
739}
740
741"$@"