OILS / benchmarks / ovm-build.sh View on Github | oilshell.org

491 lines, 256 significant
1#!/usr/bin/env bash
2#
3# Measure the time it takes to build a binary with different compilers on
4# different machines, and measure the binary size.
5#
6# Usage:
7# benchmarks/ovm-build.sh <function name>
8#
9# Run on its own:
10# 1. Follow common instructions in benchmarks/osh-parser.sh
11# 2. benchmarks/auto.sh measure-builds
12# 3. benchmarks/report.sh ovm-build
13
14# Directories used:
15#
16# oilshell.org/blob/
17# ovm-build/
18#
19# ~/git/oilshell/
20# oil/
21# _deps/
22# ovm-build # tarballs and extracted source
23# _tmp/
24# ovm-build/
25# raw/ # output CSV
26# stage1
27# benchmark-data/
28# ovm-build/
29# raw/
30# compiler-id/
31# host-id/
32
33set -o nounset
34set -o pipefail
35set -o errexit
36
37source benchmarks/common.sh # for log, etc.
38source benchmarks/id.sh # print-job-id
39source build/common.sh # for $CLANG
40
41REPO_ROOT=$(cd $(dirname $0)/..; pwd)
42source test/tsv-lib.sh # uses REPO_ROOT
43
44readonly BASE_DIR=_tmp/ovm-build
45readonly TAR_DIR=$PWD/_deps/ovm-build # Make it absolute
46
47#
48# Dependencies
49#
50
51readonly -a TAR_SUBDIRS=(
52 dash-0.5.9.1
53 bash-4.4
54)
55
56# NOTE: Same list in oilshell.org/blob/run.sh.
57tarballs() {
58 cat <<EOF
59bash-4.4.tar.gz
60dash-0.5.9.1.tar.gz
61EOF
62}
63
64download() {
65 mkdir -p $TAR_DIR
66 tarballs | xargs -n 1 -I {} --verbose -- \
67 wget --no-clobber --directory $TAR_DIR 'https://www.oilshell.org/blob/ovm-build/{}'
68}
69
70# Done MANUALLY.
71extract-other() {
72 time for f in $TAR_DIR/*gz; do
73 tar -x --directory $TAR_DIR --file $f
74 done
75}
76
77# Done automatically by 'measure' function.
78
79# TODO: CI should download this from previous
80extract-oils() {
81 # To run on multiple machines, use the one in the benchmarks-data repo.
82 cp --recursive --no-target-directory \
83 ../benchmark-data/src/oils-for-unix-$OIL_VERSION/ \
84 $TAR_DIR/oils-for-unix-$OIL_VERSION/
85}
86
87#
88# Measure Size of Binaries.
89#
90
91# Other tools:
92# - bloaty to look inside elf file
93# - nm? Just a flat list of symbols? Counting them would be nice.
94# - zipfile.py to look inside bytecode.zip
95
96sizes-tsv() {
97 # host_label matches the times.tsv file output by report.R
98 tsv-row host_label num_bytes path
99 local host=$(hostname)
100 find "$@" -maxdepth 0 -printf "$host\t%s\t%p\n"
101}
102
103# NOTE: This should be the same on all x64 machines. But I want to run it on
104# x64 machines.
105measure-sizes() {
106 local raw_out_dir=$1
107
108 # PROBLEM: Do I need provenance for gcc/clang here? I can just join it later
109 # in R.
110
111 # clang/oils-for-unix
112 # clang/oils-for-unix.stripped
113 # gcc/oils-for-unix
114 # gcc/oils-for-unix.stripped
115 sizes-tsv $BASE_DIR/bin/*/{oils-for-unix,oils-for-unix.stripped} \
116 > ${raw_out_dir}/native-sizes.tsv
117
118 # Not used - we're not stripping these, etc.
119 sizes-tsv $BASE_DIR/bin/*/*sh \
120 > ${raw_out_dir}/other-shell-sizes.tsv
121
122 log "Wrote ${raw_out_dir}/*.tsv"
123}
124
125#
126# Unused Demos
127#
128
129bytecode-size() {
130 local zip=_build/oil/bytecode.zip
131
132 # 242 files, 1.85 MB
133 unzip -l $zip | tail -n 1
134
135 # 1.88 MB, so there's 30K of header overhead.
136 ls -l $zip
137}
138
139# 6.8 seconds for debug build, instead of 8 seconds.
140clang-oil-dbg() {
141 make clean
142 CC=$CLANG make _build/oil/ovm-dbg
143}
144
145#
146# Measure Elapsed Time
147#
148
149# Add --target-size? Add that functionality to benchmarks/time.py?
150#
151# Should we add explicit targets?
152# - ovm-clang, ovm-clang-dbg
153# - ovm-gcc, ovm-gcc-dbg
154#
155# It would be possible, but it complicates the makefile.
156
157build-task() {
158 local raw_out_dir=$1
159 local job_id=$2
160 local host=$3
161 local host_hash=$4
162 local compiler_path=$5
163 local compiler_hash=$6
164 local src_dir=$7
165 local action=$8
166
167 local times_out="$PWD/$raw_out_dir/times.tsv"
168
169 # Definitions that depends on $PWD.
170 local -a TIME_PREFIX=(
171 time-tsv \
172 --append \
173 --output $times_out \
174 --field "$host" --field "$host_hash" \
175 --field "$compiler_path" --field "$compiler_hash" \
176 --field "$src_dir" --field "$action"
177 )
178 local bin_base_dir=$PWD/$BASE_DIR/bin
179
180 local bin_dir="$bin_base_dir/$(basename $compiler_path)"
181 mkdir -p $bin_dir
182
183 pushd $src_dir >/dev/null
184
185 # NOTE: We're not saving the output anywhere. We save the status, which
186 # protects against basic errors.
187
188 case $action in
189 (configure)
190 "${TIME_PREFIX[@]}" -- ./configure
191
192 # Cleaning here relies on the ORDER of tasks.txt. configure happens
193 # before build. The Clang build shouldn't reuse GCC objects!
194 # It has to be done after configure, because the Makefile must exist!
195 make clean
196 ;;
197
198 (make)
199 "${TIME_PREFIX[@]}" -- make CC=$compiler_path
200
201 local target
202 case $src_dir in
203 (*/bash*)
204 target=bash
205 ;;
206 (*/dash*)
207 target=src/dash
208 ;;
209 esac
210
211 strip $target
212 cp -v $target $bin_dir
213 ;;
214
215 (oils-for-unix*)
216 case $action in
217 (oils-for-unix)
218 local variant='dbg'
219 ;;
220 (oils-for-unix.stripped)
221 local variant='opt'
222 ;;
223 *)
224 die "Invalid target"
225 ;;
226 esac
227
228 # Change the C compiler into the corresponding C++ compiler
229 local compiler
230 case $compiler_path in
231 (*gcc)
232 # note: we take provenance of /usr/bin/gcc, but the shell script runs 'c++'
233 compiler='cxx'
234 ;;
235 (*clang)
236 # Note on slight mess: benchmarks/id.sh takes the provenanec of
237 # $CLANG. We translate that to 'clang' here, and
238 # _build/oils.sh uses $CLANGXX.
239 compiler='clang'
240 ;;
241 *)
242 die "Invalid compiler"
243 ;;
244 esac
245
246 "${TIME_PREFIX[@]}" -- _build/oils.sh $compiler $variant
247
248 # e.g. cp _bin/clang-opt-sh/oils-for-unix.stripped _tmp/ovm-build/bin/clang/
249 local filename=$action
250 cp -v _bin/$compiler-$variant-sh/$filename $bin_dir
251 ;;
252
253 *)
254 local target=$action # Assume it's a target like _bin/oil.ovm
255
256 "${TIME_PREFIX[@]}" -- make CC=$compiler_path $target
257
258 cp -v $target $bin_dir
259 ;;
260 esac
261
262 popd >/dev/null
263
264 log "DONE BUILD TASK $action $src_dir __ status=$?"
265}
266
267oils-tasks() {
268 local provenance=$1
269
270 local ofu_dir="$TAR_DIR/oils-for-unix-$OIL_VERSION"
271
272 # Add 1 field for each of 5 fields.
273 cat $provenance | while read line; do
274 echo "$line" $ofu_dir oils-for-unix
275 echo "$line" $ofu_dir oils-for-unix.stripped
276 done
277}
278
279other-shell-tasks() {
280 local provenance=$1
281
282 # Add 1 field for each of 5 fields.
283 cat $provenance | while read line; do
284 case $line in
285 # Skip clang for now.
286 (*clang*)
287 continue
288 ;;
289 esac
290
291 for dir in "${TAR_SUBDIRS[@]}"; do
292 echo "$line" $TAR_DIR/$dir configure
293 echo "$line" $TAR_DIR/$dir make
294 done
295 done
296}
297
298# 5 releases: 0.0.0 to 0.4.0. For now, just do the 0.5.alpha1 release, and
299# show the drop.
300oil-historical-tasks() {
301 echo
302}
303
304# action is 'configure', a target name, etc.
305readonly NUM_COLUMNS=7 # 5 from provenence, then tarball/target
306
307print-tasks() {
308 local build_prov=$1
309
310 local t1=$BASE_DIR/oils-tasks.txt
311 local t2=$BASE_DIR/other-shell-tasks.txt
312
313 oils-tasks $build_prov > $t1
314 other-shell-tasks $build_prov > $t2
315
316 if test -n "${QUICKLY:-}"; then
317 head -n 2 $t1 # debug and opt binary
318 head -n 2 $t2 # do dash configure make
319 else
320 cat $t1 $t2
321 fi
322}
323
324measure() {
325 local build_prov=$1 # from benchmarks/id.sh compiler-provenance
326 local raw_out_dir=$2 # _tmp/ovm-build/$X or ../../benchmark-data/ovm-build/$X
327
328 extract-oils
329
330 local times_out="$raw_out_dir/times.tsv"
331 # NOTE: Do we need two raw dirs?
332 mkdir -p $BASE_DIR/{stage1,bin} $raw_out_dir
333
334 # TODO: the $times_out calculation is duplicated in build-task()
335
336 # Write header of the TSV file that is appended to.
337 tsv-row \
338 status elapsed_secs \
339 host_name host_hash compiler_path compiler_hash \
340 src_dir action > $times_out
341
342 # TODO: remove xargs
343 # - print-tasks | run-tasks with a loop
344 # - exit code is more reliable, and we're not running in parallel anyway
345
346 set +o errexit
347 time print-tasks $build_prov \
348 | xargs --verbose -n $NUM_COLUMNS -- $0 build-task $raw_out_dir
349 local status=$?
350 set -o errexit
351
352 if test $status -ne 0; then
353 die "*** Some tasks failed. (xargs status=$status) ***"
354 fi
355
356 measure-sizes $raw_out_dir
357}
358
359#
360# Data Preparation and Analysis
361#
362
363stage1() {
364 local base_dir=${1:-$BASE_DIR} # _tmp/ovm-build or ../benchmark-data/ovm-build
365 local single_machine=${2:-}
366
367 local out_dir=$BASE_DIR/stage1
368 mkdir -p $out_dir
369
370 local -a raw_times=()
371 local -a raw_sizes=()
372
373 if test -n "$single_machine"; then
374 # find dir in _tmp/ovm-build
375 local -a a=( $base_dir/raw.$single_machine.* )
376
377 raw_times+=( ${a[-1]}/times.tsv )
378 raw_sizes+=( ${a[-1]}/native-sizes.tsv )
379
380 else
381 # find last dirs in ../benchmark-data/ovm-build
382 # Globs are in lexicographical order, which works for our dates.
383 local -a a=( $base_dir/raw.$MACHINE1.* )
384 local -a b=( $base_dir/raw.$MACHINE2.* )
385
386 raw_times+=( ${a[-1]}/times.tsv ${b[-1]}/times.tsv )
387 raw_sizes+=( ${a[-1]}/native-sizes.tsv ${b[-1]}/native-sizes.tsv )
388 fi
389
390 tsv-concat "${raw_times[@]}" > $out_dir/times.tsv
391 tsv-concat "${raw_sizes[@]}" > $out_dir/native-sizes.tsv
392
393 return
394
395 # NOTE: unused
396 # Construct a one-column TSV file
397 local raw_data_tsv=$out/raw-data.tsv
398 { echo 'path'
399 echo ${a[-1]}
400 echo ${b[-1]}
401 } > $raw_data_tsv
402
403 head $out/*
404 wc -l $out/*
405}
406
407print-report() {
408 local in_dir=$1
409 local base_url='../../web'
410
411 benchmark-html-head 'OVM Build Performance'
412
413 cat <<EOF
414 <body class="width60">
415 <p id="home-link">
416 <a href="/">oilshell.org</a>
417 </p>
418EOF
419
420 cmark << 'EOF'
421## OVM Build Performance
422
423Source code: [oil/benchmarks/osh-parser.sh](https://github.com/oilshell/oil/tree/master/benchmarks/osh-parser.sh)
424
425### Time in Seconds by Host and Compiler
426
427We measure the build speed of `bash` and `dash` for comparison.
428EOF
429
430 # Highlighting clang makes this table easier to read.
431 tsv2html \
432 --css-class-pattern 'special ^gcc' \
433 $in_dir/times.tsv
434
435 cmark << 'EOF'
436### Native Binary Size
437
438EOF
439 tsv2html --css-class-pattern 'special ^gcc' $in_dir/native-sizes.tsv
440
441 cmark << 'EOF'
442
443### Host and Compiler Details
444EOF
445 tsv2html $in_dir/hosts.tsv
446 tsv2html $in_dir/compilers.tsv
447
448 cat <<EOF
449 </body>
450</html>
451EOF
452}
453
454soil-run() {
455 rm -r -f $BASE_DIR
456 mkdir -p $BASE_DIR
457
458 download
459 extract-other
460
461 # Copied from benchmarks/osh-runtime.sh soil-run
462
463 # could add _bin/cxx-bumpleak/oils-for-unix, although sometimes it's slower
464 local -a osh_bin=( $OSH_CPP_NINJA_BUILD )
465 ninja "${osh_bin[@]}"
466
467 local single_machine='no-host'
468
469 local single_machine='no-host'
470
471 local job_id
472 job_id=$(print-job-id)
473
474 compiler-provenance-2 \
475 $single_machine $job_id _tmp
476
477 local host_job_id="$single_machine.$job_id"
478 local raw_out_dir="$BASE_DIR/raw.$host_job_id"
479 mkdir -p $raw_out_dir $BASE_DIR/stage1
480
481 measure _tmp/compiler-provenance.txt $raw_out_dir
482
483 # Trivial concatenation for 1 machine
484 stage1 '' $single_machine
485
486 benchmarks/report.sh stage2 $BASE_DIR
487
488 benchmarks/report.sh stage3 $BASE_DIR
489}
490
491"$@"