OILS / benchmarks / compute.sh View on Github | oils.pub

744 lines, 359 significant
1#!/usr/bin/env bash
2#
3# Compare operations on data structures, with little I/O: strings, array,
4# associative arrays, integers.
5#
6# Usage:
7# benchmarks/compute.sh <function name>
8#
9# List of benchmarks:
10#
11# - fib: integer, loop, assignment (shells don't have real integers
12# - for_loop: 2025 update, taken from benchmarks/ysh-for.sh
13# - word_freq: hash table / assoc array (OSH uses a vector<pair<>> now!)
14# also integer counter
15# - bubble_sort: indexed array (bash uses a linked list?)
16# - palindrome: string, slicing, unicode
17# - parse_help: realistic shell-only string processing, which I didn't write.
18#
19# TODO:
20# - Fix the BUGS
21# - palindrome doesn't work? Unicode? Does UTF-8 decode
22# - bubble sort depend on locale too - there is an LC_ALL here
23#
24# - This file is annoying to TEST
25# - to add to it, you also have to change benchmarks/report.R
26# - and there is a loop in 'stage1' as well
27# - why can't it behave like other benchmarks?
28# - they are using this "big table" pattern
29
30# - vary problem size, which is different than iters
31# - bubble sort: array length, to test complexity of array indexing
32# - palindrome: longer lines, to test complexity of unicode/byte slicing
33# - word_freq: more unique words, to test complexity of assoc array
34# - for_loop and fib are kinda similar
35# - maybe for_loop can introduce some conditionals
36#
37# - other languages
38# - awk, mawk, etc.
39# - we are going for Awk speed!
40#
41# Questions to answer
42# - Can fast bytecode runtime be as fast as Python?
43# - measurement issue: Python kills at fib/for_loop - it's mostly process
44# startup time
45# - bubble_sort and word_freq are a bit more work
46# - can add YSH versions of those
47# - I wonder if they are testing data structures or the actual interpreter
48# loop though
49# - it's possible that speeding up the interpreter loop doesn't help much
50# - the real motivations behind bytecode:
51# - to fix 'break continue'
52# - add coroutine support too - that suspends and resumes a frame, which we
53# can't do
54
55set -o nounset
56set -o pipefail
57set -o errexit
58
59REPO_ROOT=$(cd $(dirname $0)/.. && pwd)
60readonly REPO_ROOT
61
62source benchmarks/common.sh # filter-provenance
63source test/tsv-lib.sh # tsv2html
64
65readonly BASE_DIR=_tmp/compute
66
67# Stabilize 'sort' output across machines (ugh locales!)
68export LC_ALL=C
69
70TIMEFORMAT='%U'
71
72# task_name,iter,args
73hello-tasks() {
74 local provenance=$1
75
76 # Add 1 field for each of 5 fields.
77 cat $provenance | filter-provenance python2 awk bash dash "$OSH_CPP_REGEX" |
78 while read fields; do
79 echo 'hello _ _' | xargs -n 3 -- echo "$fields"
80 done
81}
82
83# task_name,iter,args
84fib-tasks() {
85 local provenance=$1
86
87 local runtime_regex='_bin/cxx-opt/(osh|ysh)'
88
89 # Add 1 field for each of 5 fields.
90 cat $provenance | filter-provenance python2 awk bash dash "$runtime_regex" |
91 while read fields; do
92 echo 'fib 200 44' | xargs -n 3 -- echo "$fields"
93 done
94}
95
96# task_name,iter,args
97for_loop-tasks() {
98 local provenance=$1
99
100 # bumpleak segfaults on for_loop! Probably because it runs out of memory
101 local runtime_regex='_bin/cxx-opt/(osh|ysh)'
102
103 cat $provenance | filter-provenance python2 awk bash dash "$runtime_regex" |
104 while read fields; do
105 echo 'for_loop 50000 _' | xargs -n 3 -- echo "$fields"
106 done
107}
108
109# task_name,iter,args
110control_flow-tasks() {
111 local provenance=$1
112
113 local runtime_regex='_bin/cxx-opt/osh'
114
115 cat $provenance | filter-provenance bash dash "$runtime_regex" |
116 while read fields; do
117 echo 'control_flow do_return 200' | xargs -n 3 -- echo "$fields"
118 done
119}
120
121word_freq-tasks() {
122 local provenance=$1
123
124 cat $provenance | filter-provenance python2 bash "$OSH_CPP_REGEX" |
125 while read fields; do
126 # BUG: oils-for-unix differs on these two. Looks like it's related to
127 # backslashes!
128 #echo 'word_freq 10 benchmarks/testdata/abuild' | xargs -n 3 -- echo "$fields"
129 #echo 'word_freq 2 benchmarks/testdata/ltmain.sh' | xargs -n 3 -- echo "$fields"
130 echo 'word_freq 10 configure' | xargs -n 3 -- echo "$fields"
131 done
132}
133
134assoc_array-tasks() {
135 local provenance=$1
136
137 cat $provenance | filter-provenance python2 bash "$OSH_CPP_REGEX" |
138 while read fields; do
139 for n in 1000 2000 3000; do
140 echo "word_freq 10 $n" | xargs -n 3 -- echo "$fields"
141 done
142 done
143}
144
145bubble_sort-tasks() {
146 # Note: this is quadratic, but bubble sort itself is quadratic!
147 local provenance=$1
148
149 cat $provenance | filter-provenance python2 bash "$OSH_CPP_REGEX" |
150 while read fields; do
151 echo 'bubble_sort int 200' | xargs -n 3 -- echo "$fields"
152 echo 'bubble_sort bytes 200' | xargs -n 3 -- echo "$fields"
153 done
154}
155
156# Arrays are doubly linked lists in bash! With a LASTREF hack to avoid being
157# quadratic.
158#
159# See array_reference() in array.c in bash. It searches both back and
160# forward. Every cell has its index, a value, a forward pointer, and a back
161# pointer.
162#
163# You need pretty high N to see the quadratic behavior though!
164
165# NOTE: osh is also slower with linear access, but not superlinear!
166
167array_ref-tasks() {
168 local provenance=$1
169
170 cat $provenance | filter-provenance bash |
171 while read fields; do
172 for mode in seq random; do
173 for n in 10000 20000 30000 40000; do
174 echo "array_ref $mode $n" | xargs -n 3 -- echo "$fields"
175 done
176 done
177 done
178
179#array_ref $OSH_CC seq 5000
180#array_ref $OSH_CC seq 10000
181#array_ref $OSH_CC random 5000
182#array_ref $OSH_CC random 10000
183#EOF
184}
185
186palindrome-tasks() {
187 local provenance=$1
188
189 cat $provenance | filter-provenance python2 bash "$OSH_CPP_REGEX" |
190 while read fields; do
191 echo 'palindrome unicode _' | xargs -n 3 -- echo "$fields"
192 echo 'palindrome bytes _' | xargs -n 3 -- echo "$fields"
193 done
194}
195
196parse_help-tasks() {
197 local provenance=$1
198
199 cat $provenance | filter-provenance bash "$OSH_CPP_REGEX" |
200 while read fields; do
201 echo 'parse_help ls-short _' | xargs -n 3 -- echo "$fields"
202 echo 'parse_help ls _' | xargs -n 3 -- echo "$fields"
203 echo 'parse_help mypy _' | xargs -n 3 -- echo "$fields"
204 done
205}
206
207ext() {
208 local ext
209 case $runtime in
210 python2)
211 echo 'py'
212 ;;
213 awk)
214 echo 'awk'
215 ;;
216 *ysh*)
217 echo 'ysh'
218 ;;
219 *sh | *osh*)
220 echo 'sh'
221 ;;
222 esac
223}
224
225word_freq-one() {
226 ### Run one word_freq task (hash tables)
227
228 local name=${1:-word_freq}
229 local runtime=$2
230
231 local iters=${3:-10}
232 local in=${4:-configure} # input
233
234 $runtime benchmarks/compute/word_freq.$(ext $runtime) $iters < $in | sort -n
235}
236
237assoc_array-one() {
238 ### Run word_freq with seq
239
240 local name=${1:-word_freq}
241 local runtime=$2
242
243 local iters=${3:-10}
244 local n=${4:-10}
245
246 # shuf so we don't get the bash optimization
247 seq $n | shuf |
248 $runtime benchmarks/compute/word_freq.$(ext $runtime) $iters | sort -n
249}
250
251bubble_sort-one() {
252 ### Run one bubble_sort task (arrays)
253
254 local name=${1:-bubble_sort}
255 local runtime=$2
256 local mode=${3:-int}
257 local n=${4:-100}
258
259 $runtime benchmarks/compute/bubble_sort.$(ext $runtime) $mode \
260 < $BASE_DIR/tmp/$name/testdata-$n.txt
261}
262
263# OSH is like 10x faster here!
264array_ref-one() {
265 ### Run one array_ref task (arrays)
266
267 local name=${1:-bubble_sort}
268 local runtime=$2
269 local mode=${3:-seq}
270 local n=${4:-100}
271
272 seq $n | shuf | $runtime benchmarks/compute/array_ref.$(ext $runtime) $mode
273}
274
275palindrome-one() {
276 ### Run one palindrome task (strings)
277
278 local name=${1:-palindrome}
279 local runtime=$2
280 local mode=${3:-unicode}
281
282 $runtime benchmarks/compute/palindrome.$(ext $runtime) $mode \
283 < $BASE_DIR/tmp/$name/testdata.txt
284}
285
286parse_help-one() {
287 ### Run one palindrome task (strings, real code)
288
289 local name=${1:-parse_help}
290 local runtime=$2
291 local workload=${3:-}
292
293 $runtime benchmarks/parse-help/pure-excerpt.sh _parse_help - \
294 < benchmarks/parse-help/$workload.txt
295}
296
297#
298# Helpers
299#
300
301hello-all() { task-all hello "$@"; }
302fib-all() { task-all fib "$@"; }
303for_loop-all() { task-all for_loop "$@"; }
304control_flow-all() { task-all control_flow "$@"; }
305word_freq-all() { task-all word_freq "$@"; }
306assoc_array-all() { task-all assoc_array "$@"; }
307
308# TODO: Fix the OSH comparison operator! It gives the wrong answer and
309# completes quickly.
310bubble_sort-all() { task-all bubble_sort "$@"; }
311
312# Array that is not quadratic
313array_ref-all() { task-all array_ref "$@"; }
314
315# Hm osh is a little slower here
316palindrome-all() { task-all palindrome "$@"; }
317
318parse_help-all() { task-all parse_help "$@"; }
319
320task-all() {
321 local task_name=$1
322 local provenance=$2
323 local host_job_id=$3
324 local out_dir=$4 # put files to save in benchmarks-data repo here
325
326 local tmp_dir=$BASE_DIR/tmp/$task_name
327
328 local times_tsv=$out_dir/$task_name/$host_job_id.times.tsv
329 rm -f $times_tsv
330
331 mkdir -p $tmp_dir $out_dir/$task_name
332
333 banner "*** $task_name ***"
334
335 # header
336 tsv-row \
337 status elapsed_secs user_secs sys_secs max_rss_KiB \
338 stdout_md5sum \
339 host_name host_hash \
340 runtime_name runtime_hash \
341 task_name arg1 arg2 stdout_filename > $times_tsv
342
343 local task_id=0
344
345 ${task_name}-tasks $provenance > $tmp_dir/tasks.txt
346
347 cat $tmp_dir/tasks.txt |
348 while read _ host host_hash runtime runtime_hash _ arg1 arg2; do
349 local file
350 case $runtime in
351 (python2)
352 file='py'
353 ;;
354 (*sh | *osh*)
355 file=$(basename $runtime)
356 ;;
357 esac
358
359 #log "runtime=$runtime args=$args"
360
361 local stdout_filename="stdout-$file-$arg1-$(basename $arg2).txt"
362
363 # Measurement BUG! This makes dash have the memory usage of bash!
364 # It's better to get argv into the shell.
365
366 local -a cmd
367 case $task_name in
368 hello|fib|for_loop|control_flow)
369 # Run it DIRECTLY, do not run $0. Because we do NOT want to fork bash
370 # then dash, because bash uses more memory.
371 args=(benchmarks/compute/$task_name.$(ext $runtime) "$arg1" "$arg2")
372
373 case $runtime in
374 # add -f flag
375 awk) cmd=($runtime -f "${args[@]}") ;;
376 *) cmd=($runtime "${args[@]}") ;;
377 esac
378 ;;
379 *)
380 cmd=($0 ${task_name}-one "$task_name" "$runtime" "$arg1" "$arg2")
381 ;;
382 esac
383
384 # join args into a single field
385 time-tsv -o $times_tsv --append \
386 --stdout $tmp_dir/$stdout_filename \
387 --rusage \
388 --field "$host" --field "$host_hash" \
389 --field $runtime --field $runtime_hash \
390 --field "$task_name" --field "$arg1" --field "$arg2" \
391 --field "$stdout_filename" -- \
392 "${cmd[@]}"
393
394 task_id=$((task_id + 1))
395 done
396
397 #wc -l _tmp/compute/word_freq/*
398 maybe-tree $tmp_dir
399 cat $times_tsv
400}
401
402#
403# Testdata
404#
405
406bubble_sort-testdata() {
407 local out=$BASE_DIR/tmp/bubble_sort
408 mkdir -p $out
409
410 # TODO: Make these deterministic for more stable benchmarks?
411 for n in 100 200 300 400; do
412 seq $n | shuf > $out/testdata-$n.txt
413 done
414
415 wc -l $out/testdata-*.txt
416}
417
418palindrome-testdata() {
419 local out=$BASE_DIR/tmp/palindrome
420 mkdir -p $out
421
422 # TODO: Use iters?
423
424 for i in $(seq 500); do
425 cat <<EOF
426foo
427a
428tat
429cat
430
431noon
432amanaplanacanalpanama
433
434μ
435-μ-
436EOF
437
438 done > $out/testdata.txt
439
440 wc -l $out/testdata.txt
441}
442
443measure() {
444 local provenance=$1
445 local host_job_id=$2
446 local out_dir=${3:-$BASE_DIR/raw} # ../benchmark-data/compute
447
448 mkdir -p $BASE_DIR/{tmp,raw,stage1} $out_dir
449
450 # set -x
451 hello-all $provenance $host_job_id $out_dir
452 fib-all $provenance $host_job_id $out_dir
453
454 for_loop-all $provenance $host_job_id $out_dir
455 control_flow-all $provenance $host_job_id $out_dir
456
457 # TODO: doesn't work because we would need duplicate logic in stage1
458 #if test -n "${QUICKLY:-}"; then
459 # return
460 #fi
461
462 word_freq-all $provenance $host_job_id $out_dir
463 parse_help-all $provenance $host_job_id $out_dir
464
465 bubble_sort-testdata
466 palindrome-testdata
467
468 bubble_sort-all $provenance $host_job_id $out_dir
469
470 # INCORRECT, but still run it
471 palindrome-all $provenance $host_job_id $out_dir
472
473 # array_ref takes too long to show quadratic behavior, and that's only
474 # necessary on 1 machine. I think I will make a separate blog post,
475 # if anything.
476
477 maybe-tree $out_dir
478}
479
480soil-run() {
481 ### Run it on just this machine, and make a report
482
483 rm -r -f $BASE_DIR
484 mkdir -p $BASE_DIR
485
486 # Test the one that's IN TREE, NOT in ../benchmark-data
487 local -a oils_bin=(
488 _bin/cxx-opt/osh _bin/cxx-opt+bumpleak/osh _bin/cxx-opt/mycpp-souffle/osh
489 _bin/cxx-opt/ysh _bin/cxx-opt+bumpleak/ysh _bin/cxx-opt/mycpp-souffle/ysh
490 )
491 ninja "${oils_bin[@]}"
492
493 local single_machine='no-host'
494
495 local job_id
496 job_id=$(benchmarks/id.sh print-job-id)
497
498 # Only measure what's in the Docker image
499 # - The Soil 'benchmarks' job uses the 'cpp' Docker image, which doesn't have
500 # layer-cpython, ../oil_DEPS/cpython-full
501 # - It also doesn't have mksh or zsh
502
503 benchmarks/id.sh shell-provenance-2 \
504 $single_machine $job_id _tmp \
505 bash dash python2 awk "${oils_bin[@]}"
506
507 local provenance=_tmp/provenance.txt
508 local host_job_id="$single_machine.$job_id"
509
510 measure $provenance $host_job_id
511
512 # Make it run on one machine
513 stage1 '' $single_machine
514
515 benchmarks/report.sh stage2 $BASE_DIR
516 benchmarks/report.sh stage3 $BASE_DIR
517}
518
519
520test-report() {
521 # Make it run on one machine
522 stage1 '' no-host
523
524 benchmarks/report.sh stage2 $BASE_DIR
525 benchmarks/report.sh stage3 $BASE_DIR
526}
527
528stage1() {
529 local raw_dir=${1:-$BASE_DIR/raw}
530
531 # This report works even if we only have one machine
532 local single_machine=${2:-}
533
534 local out_dir=$BASE_DIR/stage1
535 mkdir -p $out_dir
536
537 local times_tsv=$out_dir/times.tsv
538
539 local -a raw=()
540
541 # TODO: We should respect QUICKLY=1
542 for metric in hello fib for_loop control_flow word_freq parse_help bubble_sort palindrome; do
543 local dir=$raw_dir/$metric
544
545 if test -n "$single_machine"; then
546 local -a a=($dir/$single_machine.*.times.tsv)
547 raw+=(${a[-1]})
548 else
549 # Globs are in lexicographical order, which works for our dates.
550 local -a a=($dir/$MACHINE1.*.times.tsv)
551 local -a b=($dir/$MACHINE2.*.times.tsv) # HACK for now
552
553 # take the latest file
554 raw+=(${a[-1]} ${b[-1]})
555 fi
556
557 done
558 csv-concat ${raw[@]} > $times_tsv
559 wc -l $times_tsv
560}
561
562print-report() {
563 local in_dir=$1
564
565 benchmark-html-head 'OSH Compute Performance'
566
567 cat <<EOF
568 <body class="width60">
569 <p id="home-link">
570 <a href="/">oils.pub</a>
571 </p>
572EOF
573 cmark <<EOF
574
575## OSH Compute Performance
576
577Running time and memory usage of programs that test data structures (as opposed
578to I/O).
579
580Memory usage is measured in MB (powers of 10), not MiB (powers of 2).
581
582Source code: [oil/benchmarks/compute](https://github.com/oilshell/oil/tree/master/benchmarks/compute)
583
584EOF
585
586 cmark <<EOF
587### hello (minimal startup)
588
589EOF
590
591 tsv2html $in_dir/hello.tsv
592
593 cmark <<EOF
594### fibonacci (integers)
595
596- arg1: number of repetitions
597- arg2: the N in fib(N)
598EOF
599
600 tsv2html $in_dir/fib.tsv
601
602 cmark <<EOF
603### for loop
604
605- arg1: the N to sum
606EOF
607
608 tsv2html $in_dir/for_loop.tsv
609
610 cmark <<EOF
611### control flow
612
613- arg1: the N to sum
614EOF
615
616 tsv2html $in_dir/control_flow.tsv
617
618 cmark <<EOF
619### word_freq (associative arrays / hash tables)
620
621- arg1: number of repetitions
622- arg2: the file (varies size of hash table)
623EOF
624
625 tsv2html $in_dir/word_freq.tsv
626
627 cmark <<EOF
628### parse_help (strings, real code)
629
630- arg1: file to parse
631EOF
632
633 tsv2html $in_dir/parse_help.tsv
634
635 cmark <<EOF
636### bubble_sort (array of integers, arrays of strings)
637
638- arg1: type of array
639- arg2: length of array
640EOF
641
642 tsv2html $in_dir/bubble_sort.tsv
643
644 # Comment out until checksum is fixed
645
646if false; then
647 cmark <<EOF
648### palindrome (byte strings, unicode strings)
649
650- arg1: type of string
651- arg2: TODO: length of string
652EOF
653
654 tsv2html $in_dir/palindrome.tsv
655
656fi
657
658 cmark <<EOF
659### Interpreter and Host Details
660EOF
661
662 tsv2html $in_dir/shells.tsv
663 tsv2html $in_dir/hosts.tsv
664
665 cmark <<EOF
666### Details
667EOF
668
669 tsv2html $in_dir/details.tsv
670
671 cmark <<EOF
672### Stdout Files
673EOF
674
675 tsv2html $in_dir/stdout_files.tsv
676
677
678 cat <<EOF
679 </body>
680</html>
681EOF
682}
683
684run-control-flow() {
685 ### Reproduce OSH perf bug because of C++ exceptions
686
687 # do_neither: 0.288 dash, 0.872 bash, 0.865 OSH
688 # do_continue: 0.310 dash, 1.065 bash, 2.313 OSH
689 # do_break: 0.222 dash, 0.712 bash, 1.430 OSH
690
691 local osh=_bin/cxx-opt/osh
692 #set -x
693
694 ninja $osh
695
696 for func in do_none do_continue do_break do_return; do
697 #for func in do_return; do
698 echo "=== $func"
699 echo
700 for sh in dash bash $osh; do
701 echo "--- $sh"
702 # TIMEFORMAT above
703 time $sh benchmarks/compute/control_flow.sh $func 200
704 echo
705 done
706 done
707}
708
709word-split() {
710 ### Test word splitting perf
711 export OILS_GC_STATS=${1:-}
712
713 # do_neither: 0.288 dash, 0.872 bash, 0.865 OSH
714 # do_continue: 0.310 dash, 1.065 bash, 2.313 OSH
715 # do_break: 0.222 dash, 0.712 bash, 1.430 OSH
716
717 local osh=_bin/cxx-opt/osh
718 #set -x
719
720 ninja $osh
721
722 #local filename=README.md
723
724 # Hm our word splitting actually isn't that slow?
725 # TODO: measure allocs too?
726
727 # Hm allocating over a million objects, but it's faster than bash
728 # Most are in the pools
729
730 local filename=benchmarks/testdata/configure-coreutils
731
732 for func in default_ifs other_ifs; do
733 echo "=== $func"
734 echo
735 for sh in dash bash $osh; do
736 echo "--- $sh"
737 # TIMEFORMAT above
738 time $sh benchmarks/compute/word_split.sh $func $filename
739 echo
740 done
741 done
742}
743
744"$@"