benchmarks/ovm-build.sh

OILS / benchmarks / ovm-build.sh View on Github | oilshell.org

491 lines, 256 significant

1	#!/usr/bin/env bash
2	#
3	# Measure the time it takes to build a binary with different compilers on
4	# different machines, and measure the binary size.
5	#
6	# Usage:
7	# benchmarks/ovm-build.sh <function name>
8	#
9	# Run on its own:
10	# 1. Follow common instructions in benchmarks/osh-parser.sh
11	# 2. benchmarks/auto.sh measure-builds
12	# 3. benchmarks/report.sh ovm-build
13
14	# Directories used:
15	#
16	# oilshell.org/blob/
17	# ovm-build/
18	#
19	# ~/git/oilshell/
20	# oil/
21	# _deps/
22	# ovm-build # tarballs and extracted source
23	# _tmp/
24	# ovm-build/
25	# raw/ # output CSV
26	# stage1
27	# benchmark-data/
28	# ovm-build/
29	# raw/
30	# compiler-id/
31	# host-id/
32
33	set -o nounset
34	set -o pipefail
35	set -o errexit
36
37	source benchmarks/common.sh # for log, etc.
38	source benchmarks/id.sh # print-job-id
39	source build/common.sh # for $CLANG
40
41	REPO_ROOT=$(cd $(dirname $0)/..; pwd)
42	source test/tsv-lib.sh # uses REPO_ROOT
43
44	readonly BASE_DIR=_tmp/ovm-build
45	readonly TAR_DIR=$PWD/_deps/ovm-build # Make it absolute
46
47	#
48	# Dependencies
49	#
50
51	readonly -a TAR_SUBDIRS=(
52	dash-0.5.9.1
53	bash-4.4
54	)
55
56	# NOTE: Same list in oilshell.org/blob/run.sh.
57	tarballs() {
58	cat <<EOF
59	bash-4.4.tar.gz
60	dash-0.5.9.1.tar.gz
61	EOF
62	}
63
64	download() {
65	mkdir -p $TAR_DIR
66	tarballs \| xargs -n 1 -I {} --verbose -- \
67	wget --no-clobber --directory $TAR_DIR 'https://www.oilshell.org/blob/ovm-build/{}'
68	}
69
70	# Done MANUALLY.
71	extract-other() {
72	time for f in $TAR_DIR/*gz; do
73	tar -x --directory $TAR_DIR --file $f
74	done
75	}
76
77	# Done automatically by 'measure' function.
78
79	# TODO: CI should download this from previous
80	extract-oils() {
81	# To run on multiple machines, use the one in the benchmarks-data repo.
82	cp --recursive --no-target-directory \
83	../benchmark-data/src/oils-for-unix-$OIL_VERSION/ \
84	$TAR_DIR/oils-for-unix-$OIL_VERSION/
85	}
86
87	#
88	# Measure Size of Binaries.
89	#
90
91	# Other tools:
92	# - bloaty to look inside elf file
93	# - nm? Just a flat list of symbols? Counting them would be nice.
94	# - zipfile.py to look inside bytecode.zip
95
96	sizes-tsv() {
97	# host_label matches the times.tsv file output by report.R
98	tsv-row host_label num_bytes path
99	local host=$(hostname)
100	find "$@" -maxdepth 0 -printf "$host\t%s\t%p\n"
101	}
102
103	# NOTE: This should be the same on all x64 machines. But I want to run it on
104	# x64 machines.
105	measure-sizes() {
106	local raw_out_dir=$1
107
108	# PROBLEM: Do I need provenance for gcc/clang here? I can just join it later
109	# in R.
110
111	# clang/oils-for-unix
112	# clang/oils-for-unix.stripped
113	# gcc/oils-for-unix
114	# gcc/oils-for-unix.stripped
115	sizes-tsv $BASE_DIR/bin/*/{oils-for-unix,oils-for-unix.stripped} \
116	> ${raw_out_dir}/native-sizes.tsv
117
118	# Not used - we're not stripping these, etc.
119	sizes-tsv $BASE_DIR/bin//sh \
120	> ${raw_out_dir}/other-shell-sizes.tsv
121
122	log "Wrote ${raw_out_dir}/*.tsv"
123	}
124
125	#
126	# Unused Demos
127	#
128
129	bytecode-size() {
130	local zip=_build/oil/bytecode.zip
131
132	# 242 files, 1.85 MB
133	unzip -l $zip \| tail -n 1
134
135	# 1.88 MB, so there's 30K of header overhead.
136	ls -l $zip
137	}
138
139	# 6.8 seconds for debug build, instead of 8 seconds.
140	clang-oil-dbg() {
141	make clean
142	CC=$CLANG make _build/oil/ovm-dbg
143	}
144
145	#
146	# Measure Elapsed Time
147	#
148
149	# Add --target-size? Add that functionality to benchmarks/time.py?
150	#
151	# Should we add explicit targets?
152	# - ovm-clang, ovm-clang-dbg
153	# - ovm-gcc, ovm-gcc-dbg
154	#
155	# It would be possible, but it complicates the makefile.
156
157	build-task() {
158	local raw_out_dir=$1
159	local job_id=$2
160	local host=$3
161	local host_hash=$4
162	local compiler_path=$5
163	local compiler_hash=$6
164	local src_dir=$7
165	local action=$8
166
167	local times_out="$PWD/$raw_out_dir/times.tsv"
168
169	# Definitions that depends on $PWD.
170	local -a TIME_PREFIX=(
171	time-tsv \
172	--append \
173	--output $times_out \
174	--field "$host" --field "$host_hash" \
175	--field "$compiler_path" --field "$compiler_hash" \
176	--field "$src_dir" --field "$action"
177	)
178	local bin_base_dir=$PWD/$BASE_DIR/bin
179
180	local bin_dir="$bin_base_dir/$(basename $compiler_path)"
181	mkdir -p $bin_dir
182
183	pushd $src_dir >/dev/null
184
185	# NOTE: We're not saving the output anywhere. We save the status, which
186	# protects against basic errors.
187
188	case $action in
189	(configure)
190	"${TIME_PREFIX[@]}" -- ./configure
191
192	# Cleaning here relies on the ORDER of tasks.txt. configure happens
193	# before build. The Clang build shouldn't reuse GCC objects!
194	# It has to be done after configure, because the Makefile must exist!
195	make clean
196	;;
197
198	(make)
199	"${TIME_PREFIX[@]}" -- make CC=$compiler_path
200
201	local target
202	case $src_dir in
203	(/bash)
204	target=bash
205	;;
206	(/dash)
207	target=src/dash
208	;;
209	esac
210
211	strip $target
212	cp -v $target $bin_dir
213	;;
214
215	(oils-for-unix*)
216	case $action in
217	(oils-for-unix)
218	local variant='dbg'
219	;;
220	(oils-for-unix.stripped)
221	local variant='opt'
222	;;
223	*)
224	die "Invalid target"
225	;;
226	esac
227
228	# Change the C compiler into the corresponding C++ compiler
229	local compiler
230	case $compiler_path in
231	(*gcc)
232	# note: we take provenance of /usr/bin/gcc, but the shell script runs 'c++'
233	compiler='cxx'
234	;;
235	(*clang)
236	# Note on slight mess: benchmarks/id.sh takes the provenanec of
237	# $CLANG. We translate that to 'clang' here, and
238	# _build/oils.sh uses $CLANGXX.
239	compiler='clang'
240	;;
241	*)
242	die "Invalid compiler"
243	;;
244	esac
245
246	"${TIME_PREFIX[@]}" -- _build/oils.sh --cxx $compiler --variant $variant
247
248	# e.g. cp _bin/clang-opt-sh/oils-for-unix.stripped _tmp/ovm-build/bin/clang/
249	local filename=$action
250	cp -v _bin/$compiler-$variant-sh/$filename $bin_dir
251	;;
252
253	*)
254	local target=$action # Assume it's a target like _bin/oil.ovm
255
256	"${TIME_PREFIX[@]}" -- make CC=$compiler_path $target
257
258	cp -v $target $bin_dir
259	;;
260	esac
261
262	popd >/dev/null
263
264	log "DONE BUILD TASK $action $src_dir __ status=$?"
265	}
266
267	oils-tasks() {
268	local provenance=$1
269
270	local ofu_dir="$TAR_DIR/oils-for-unix-$OIL_VERSION"
271
272	# Add 1 field for each of 5 fields.
273	cat $provenance \| while read line; do
274	echo "$line" $ofu_dir oils-for-unix
275	echo "$line" $ofu_dir oils-for-unix.stripped
276	done
277	}
278
279	other-shell-tasks() {
280	local provenance=$1
281
282	# Add 1 field for each of 5 fields.
283	cat $provenance \| while read line; do
284	case $line in
285	# Skip clang for now.
286	(clang)
287	continue
288	;;
289	esac
290
291	for dir in "${TAR_SUBDIRS[@]}"; do
292	echo "$line" $TAR_DIR/$dir configure
293	echo "$line" $TAR_DIR/$dir make
294	done
295	done
296	}
297
298	# 5 releases: 0.0.0 to 0.4.0. For now, just do the 0.5.alpha1 release, and
299	# show the drop.
300	oil-historical-tasks() {
301	echo
302	}
303
304	# action is 'configure', a target name, etc.
305	readonly NUM_COLUMNS=7 # 5 from provenence, then tarball/target
306
307	print-tasks() {
308	local build_prov=$1
309
310	local t1=$BASE_DIR/oils-tasks.txt
311	local t2=$BASE_DIR/other-shell-tasks.txt
312
313	oils-tasks $build_prov > $t1
314	other-shell-tasks $build_prov > $t2
315
316	if test -n "${QUICKLY:-}"; then
317	head -n 2 $t1 # debug and opt binary
318	head -n 2 $t2 # do dash configure make
319	else
320	cat $t1 $t2
321	fi
322	}
323
324	measure() {
325	local build_prov=$1 # from benchmarks/id.sh compiler-provenance
326	local raw_out_dir=$2 # _tmp/ovm-build/$X or ../../benchmark-data/ovm-build/$X
327
328	extract-oils
329
330	local times_out="$raw_out_dir/times.tsv"
331	# NOTE: Do we need two raw dirs?
332	mkdir -p $BASE_DIR/{stage1,bin} $raw_out_dir
333
334	# TODO: the $times_out calculation is duplicated in build-task()
335
336	# Write header of the TSV file that is appended to.
337	tsv-row \
338	status elapsed_secs \
339	host_name host_hash compiler_path compiler_hash \
340	src_dir action > $times_out
341
342	# TODO: remove xargs
343	# - print-tasks \| run-tasks with a loop
344	# - exit code is more reliable, and we're not running in parallel anyway
345
346	set +o errexit
347	time print-tasks $build_prov \
348	\| xargs --verbose -n $NUM_COLUMNS -- $0 build-task $raw_out_dir
349	local status=$?
350	set -o errexit
351
352	if test $status -ne 0; then
353	die "* Some tasks failed. (xargs status=$status) *"
354	fi
355
356	measure-sizes $raw_out_dir
357	}
358
359	#
360	# Data Preparation and Analysis
361	#
362
363	stage1() {
364	local base_dir=${1:-$BASE_DIR} # _tmp/ovm-build or ../benchmark-data/ovm-build
365	local single_machine=${2:-}
366
367	local out_dir=$BASE_DIR/stage1
368	mkdir -p $out_dir
369
370	local -a raw_times=()
371	local -a raw_sizes=()
372
373	if test -n "$single_machine"; then
374	# find dir in _tmp/ovm-build
375	local -a a=( $base_dir/raw.$single_machine.* )
376
377	raw_times+=( ${a[-1]}/times.tsv )
378	raw_sizes+=( ${a[-1]}/native-sizes.tsv )
379
380	else
381	# find last dirs in ../benchmark-data/ovm-build
382	# Globs are in lexicographical order, which works for our dates.
383	local -a a=( $base_dir/raw.$MACHINE1.* )
384	local -a b=( $base_dir/raw.$MACHINE2.* )
385
386	raw_times+=( ${a[-1]}/times.tsv ${b[-1]}/times.tsv )
387	raw_sizes+=( ${a[-1]}/native-sizes.tsv ${b[-1]}/native-sizes.tsv )
388	fi
389
390	tsv-concat "${raw_times[@]}" > $out_dir/times.tsv
391	tsv-concat "${raw_sizes[@]}" > $out_dir/native-sizes.tsv
392
393	return
394
395	# NOTE: unused
396	# Construct a one-column TSV file
397	local raw_data_tsv=$out/raw-data.tsv
398	{ echo 'path'
399	echo ${a[-1]}
400	echo ${b[-1]}
401	} > $raw_data_tsv
402
403	head $out/*
404	wc -l $out/*
405	}
406
407	print-report() {
408	local in_dir=$1
409	local base_url='../../web'
410
411	benchmark-html-head 'OVM Build Performance'
412
413	cat <<EOF
414	<body class="width60">
415	<p id="home-link">
416	<a href="/">oilshell.org</a>
417	</p>
418	EOF
419
420	cmark << 'EOF'
421	## OVM Build Performance
422
423	Source code: [oil/benchmarks/osh-parser.sh](https://github.com/oilshell/oil/tree/master/benchmarks/osh-parser.sh)
424
425	### Time in Seconds by Host and Compiler
426
427	We measure the build speed of `bash` and `dash` for comparison.
428	EOF
429
430	# Highlighting clang makes this table easier to read.
431	tsv2html \
432	--css-class-pattern 'special ^gcc' \
433	$in_dir/times.tsv
434
435	cmark << 'EOF'
436	### Native Binary Size
437
438	EOF
439	tsv2html --css-class-pattern 'special ^gcc' $in_dir/native-sizes.tsv
440
441	cmark << 'EOF'
442
443	### Host and Compiler Details
444	EOF
445	tsv2html $in_dir/hosts.tsv
446	tsv2html $in_dir/compilers.tsv
447
448	cat <<EOF
449	</body>
450	</html>
451	EOF
452	}
453
454	soil-run() {
455	rm -r -f $BASE_DIR
456	mkdir -p $BASE_DIR
457
458	download
459	extract-other
460
461	# Copied from benchmarks/osh-runtime.sh soil-run
462
463	# could add _bin/cxx-bumpleak/oils-for-unix, although sometimes it's slower
464	local -a osh_bin=( $OSH_CPP_NINJA_BUILD )
465	ninja "${osh_bin[@]}"
466
467	local single_machine='no-host'
468
469	local single_machine='no-host'
470
471	local job_id
472	job_id=$(print-job-id)
473
474	compiler-provenance-2 \
475	$single_machine $job_id _tmp
476
477	local host_job_id="$single_machine.$job_id"
478	local raw_out_dir="$BASE_DIR/raw.$host_job_id"
479	mkdir -p $raw_out_dir $BASE_DIR/stage1
480
481	measure _tmp/compiler-provenance.txt $raw_out_dir
482
483	# Trivial concatenation for 1 machine
484	stage1 '' $single_machine
485
486	benchmarks/report.sh stage2 $BASE_DIR
487
488	benchmarks/report.sh stage3 $BASE_DIR
489	}
490
491	"$@"