benchmarks/report.R

OILS / benchmarks / report.R View on Github | oilshell.org

1366 lines, 943 significant

1	#!/usr/bin/env Rscript
2	#
3	# benchmarks/report.R -- Analyze data collected by shell scripts.
4	#
5	# Usage:
6	# benchmarks/report.R OUT_DIR [TIMES_CSV...]
7
8	# Suppress warnings about functions masked from 'package:stats' and 'package:base'
9	# filter, lag
10	# intersect, setdiff, setequal, union
11	library(dplyr, warn.conflicts = FALSE)
12	library(tidyr) # spread()
13	library(stringr)
14
15	source('benchmarks/common.R')
16
17	options(stringsAsFactors = F)
18
19	# For pretty printing
20	commas = function(x) {
21	format(x, big.mark=',')
22	}
23
24	sourceUrl = function(path) {
25	sprintf('https://github.com/oilshell/oil/blob/master/%s', path)
26	}
27
28	# Takes a filename, not a path.
29	sourceUrl2 = function(filename) {
30	sprintf(
31	'https://github.com/oilshell/oil/blob/master/benchmarks/testdata/%s',
32	filename)
33	}
34
35	mycppUrl = function(name) {
36	sprintf('https://github.com/oilshell/oil/blob/master/mycpp/examples/%s.py', name)
37	}
38
39	genUrl = function(name) {
40	sprintf('../../_gen/mycpp/examples/%s.mycpp.cc', name)
41	}
42
43
44	# TODO: Set up cgit because Github links are slow.
45	benchmarkDataLink = function(subdir, name, suffix) {
46	#sprintf('../../../../benchmark-data/shell-id/%s', shell_id)
47	sprintf('https://github.com/oilshell/benchmark-data/blob/master/%s/%s%s',
48	subdir, name, suffix)
49	}
50
51	provenanceLink = function(subdir, name, suffix) {
52	sprintf('../%s/%s%s', subdir, name, suffix)
53	}
54
55
56	GetOshLabel = function(shell_hash, prov_dir) {
57	### Given a string, return another string.
58
59	path = sprintf('%s/shell-id/osh-%s/sh-path.txt', prov_dir, shell_hash)
60
61	if (file.exists(path)) {
62	Log('Reading %s', path)
63	lines = readLines(path)
64	if (length(grep('_bin/osh', lines)) > 0) {
65	label = 'osh-ovm'
66	} else if (length(grep('bin/osh', lines)) > 0) {
67	label = 'osh-cpython'
68	} else if (length(grep('_bin/.*/mycpp-souffle/osh', lines)) > 0) {
69	label = 'osh-native-souffle'
70	} else if (length(grep('_bin/.*/osh', lines)) > 0) {
71	label = 'osh-native'
72	} else {
73	stop("Expected _bin/osh, bin/osh, or _bin/.*/osh")
74	}
75	} else {
76	stop(sprintf("%s doesn't exist", path))
77	}
78	return(label)
79	}
80
81	opt_suffix1 = '_bin/cxx-opt/osh'
82	opt_suffix2 = '_bin/cxx-opt-sh/osh'
83	opt_suffix3 = '_bin/cxx-opt/mycpp-souffle/osh'
84	opt_suffix4 = '_bin/cxx-opt-sh/mycpp-souffle/osh'
85
86	ShellLabels = function(shell_name, shell_hash, num_hosts) {
87	### Given 2 vectors, return a vector of readable labels.
88
89	# TODO: Clean up callers. Some metrics all this function with a
90	# shell/runtime BASENAME, and others a PATH
91	# - e.g. ComputeReport calls this with runtime_name which is actually a PATH
92
93	#Log('name %s', shell_name)
94	#Log('hash %s', shell_hash)
95
96	if (num_hosts == 1) {
97	prov_dir = '_tmp'
98	} else {
99	prov_dir = '../benchmark-data/'
100	}
101
102	labels = c()
103	for (i in 1:length(shell_name)) {
104	sh = shell_name[i]
105	if (sh == 'osh') {
106	label = GetOshLabel(shell_hash[i], prov_dir)
107
108	} else if (endsWith(sh, opt_suffix1) \|\| endsWith(sh, opt_suffix2)) {
109	label = 'opt/osh'
110
111	} else if (endsWith(sh, opt_suffix3) \|\| endsWith(sh, opt_suffix4)) {
112	label = 'opt/osh-souffle'
113
114	} else if (endsWith(sh, '_bin/cxx-opt+bumpleak/osh')) {
115	label = 'bumpleak/osh'
116
117	} else {
118	label = sh
119	}
120
121	Log('[%s] [%s]', shell_name[i], label)
122	labels = c(labels, label)
123	}
124
125	return(labels)
126	}
127
128	# Simple version of the above, used by benchmarks/gc
129	ShellLabelFromPath = function(sh_path) {
130	labels = c()
131	for (i in 1:length(sh_path)) {
132	sh = sh_path[i]
133
134	if (endsWith(sh, opt_suffix1) \|\| endsWith(sh, opt_suffix2)) {
135	# the opt binary is osh-native
136	label = 'osh-native'
137
138	} else if (endsWith(sh, opt_suffix3) \|\| endsWith(sh, opt_suffix4)) {
139	# the opt binary is osh-native
140	label = 'osh-native-souffle'
141
142	} else if (endsWith(sh, '_bin/cxx-opt+bumpleak/osh')) {
143	label = 'bumpleak/osh'
144
145	} else if (endsWith(sh, '_bin/osh')) { # the app bundle
146	label = 'osh-ovm'
147
148	} else if (endsWith(sh, 'bin/osh')) {
149	label = 'osh-cpython'
150
151	} else {
152	label = sh
153	}
154	labels = c(labels, label)
155	}
156	return(labels)
157	}
158
159	DistinctHosts = function(t) {
160	t %>% distinct(host_name, host_hash) -> distinct_hosts
161	# The label is just the name
162	distinct_hosts$host_label = distinct_hosts$host_name
163	return(distinct_hosts)
164	}
165
166	DistinctShells = function(t, num_hosts = -1) {
167	t %>% distinct(shell_name, shell_hash) -> distinct_shells
168
169	Log('')
170	Log('Labeling shells')
171
172	# Calculate it if not passed
173	if (num_hosts == -1) {
174	num_hosts = nrow(DistinctHosts(t))
175	}
176
177	distinct_shells$shell_label = ShellLabels(distinct_shells$shell_name,
178	distinct_shells$shell_hash,
179	num_hosts)
180	return(distinct_shells)
181	}
182
183	ParserReport = function(in_dir, out_dir) {
184	times = read.csv(file.path(in_dir, 'times.csv'))
185	lines = read.csv(file.path(in_dir, 'lines.csv'))
186	raw_data = read.csv(file.path(in_dir, 'raw-data.csv'))
187
188	cachegrind = readTsv(file.path(in_dir, 'cachegrind.tsv'))
189
190	# For joining by filename
191	lines_by_filename = tibble(
192	num_lines = lines$num_lines,
193	filename = basename(lines$path)
194	)
195
196	# Remove failures
197	times %>% filter(status == 0) %>% select(-c(status)) -> times
198	cachegrind %>% filter(status == 0) %>% select(-c(status)) -> cachegrind
199
200	# Add the number of lines, joining on path, and compute lines/ms
201	times %>%
202	left_join(lines, by = c('path')) %>%
203	mutate(filename = basename(path), filename_HREF = sourceUrl(path),
204	max_rss_MB = max_rss_KiB * 1024 / 1e6,
205	elapsed_ms = elapsed_secs * 1000,
206	user_ms = user_secs * 1000,
207	sys_ms = sys_secs * 1000,
208	lines_per_ms = num_lines / elapsed_ms) %>%
209	select(-c(path, max_rss_KiB, elapsed_secs, user_secs, sys_secs)) ->
210	joined_times
211
212	#print(head(times))
213	#print(head(lines))
214	#print(head(vm))
215	#print(head(joined_times))
216
217	print(summary(joined_times))
218
219	#
220	# Find distinct shells and hosts, and label them for readability.
221	#
222
223	distinct_hosts = DistinctHosts(joined_times)
224	Log('')
225	Log('Distinct hosts')
226	print(distinct_hosts)
227
228	distinct_shells = DistinctShells(joined_times)
229	Log('')
230	Log('Distinct shells')
231	print(distinct_shells)
232
233	# Replace name/hash combinations with labels.
234	joined_times %>%
235	left_join(distinct_hosts, by = c('host_name', 'host_hash')) %>%
236	left_join(distinct_shells, by = c('shell_name', 'shell_hash')) %>%
237	select(-c(host_name, host_hash, shell_name, shell_hash)) ->
238	joined_times
239
240	# Like 'times', but do shell_label as one step
241	# Hack: we know benchmarks/auto.sh runs this on one machine
242	distinct_shells_2 = DistinctShells(cachegrind, num_hosts = nrow(distinct_hosts))
243	cachegrind %>%
244	left_join(lines, by = c('path')) %>%
245	select(-c(elapsed_secs, user_secs, sys_secs, max_rss_KiB)) %>%
246	left_join(distinct_shells_2, by = c('shell_name', 'shell_hash')) %>%
247	select(-c(shell_name, shell_hash)) %>%
248	mutate(filename = basename(path), filename_HREF = sourceUrl(path)) %>%
249	select(-c(path)) ->
250	joined_cachegrind
251
252	Log('summary(joined_times):')
253	print(summary(joined_times))
254	Log('head(joined_times):')
255	print(head(joined_times))
256
257	# Summarize rates by platform/shell
258	joined_times %>%
259	mutate(host_label = paste("host", host_label)) %>%
260	group_by(host_label, shell_label) %>%
261	summarize(total_lines = sum(num_lines), total_ms = sum(elapsed_ms)) %>%
262	mutate(lines_per_ms = total_lines / total_ms) %>%
263	select(-c(total_ms)) %>%
264	spread(key = host_label, value = lines_per_ms) ->
265	times_summary
266
267	# Sort by parsing rate on machine 1
268	if ("host hoover" %in% colnames(times_summary)) {
269	times_summary %>% arrange(desc(`host hoover`)) -> times_summary
270	} else {
271	times_summary %>% arrange(desc(`host no-host`)) -> times_summary
272	}
273
274	Log('times_summary:')
275	print(times_summary)
276
277	# Summarize cachegrind by platform/shell
278	# Bug fix: as.numeric(irefs) avoids 32-bit integer overflow!
279	joined_cachegrind %>%
280	group_by(shell_label) %>%
281	summarize(total_lines = sum(num_lines), total_irefs = sum(as.numeric(irefs))) %>%
282	mutate(thousand_irefs_per_line = total_irefs / total_lines / 1000) %>%
283	select(-c(total_irefs)) ->
284	cachegrind_summary
285
286	if ("no-host" %in% distinct_hosts$host_label) {
287
288	# We don't have all the shells
289	elapsed = NULL
290	rate = NULL
291	max_rss = NULL
292	instructions = NULL
293
294	joined_times %>%
295	select(c(shell_label, elapsed_ms, user_ms, sys_ms, max_rss_MB,
296	num_lines, filename, filename_HREF)) %>%
297	arrange(filename, elapsed_ms) ->
298	times_flat
299
300	joined_cachegrind %>%
301	select(c(shell_label, irefs, num_lines, filename, filename_HREF)) %>%
302	arrange(filename, irefs) ->
303	cachegrind_flat
304
305	} else {
306
307	times_flat = NULL
308	cachegrind_flat = NULL
309
310	# Hack for release. TODO: unify with Soil
311	if (Sys.getenv("OILS_NO_SOUFFLE") == "") {
312	souffle_col = c('osh-native-souffle')
313	} else {
314	souffle_col = c()
315	}
316
317	cols1 = c('host_label', 'bash', 'dash', 'mksh', 'zsh',
318	'osh-ovm', 'osh-cpython', 'osh-native', souffle_col,
319	'osh_to_bash_ratio', 'num_lines', 'filename', 'filename_HREF')
320
321	# Elapsed seconds for each shell by platform and file
322	joined_times %>%
323	select(-c(lines_per_ms, user_ms, sys_ms, max_rss_MB)) %>%
324	spread(key = shell_label, value = elapsed_ms) %>%
325	arrange(host_label, num_lines) %>%
326	mutate(osh_to_bash_ratio = `osh-native` / bash) %>%
327	select(all_of(cols1)) ->
328	elapsed
329
330	Log('\n')
331	Log('ELAPSED')
332	print(elapsed)
333
334	cols2 = c('host_label', 'bash', 'dash', 'mksh', 'zsh',
335	'osh-ovm', 'osh-cpython', 'osh-native', souffle_col,
336	'num_lines', 'filename', 'filename_HREF')
337	# Rates by file and shell
338	joined_times %>%
339	select(-c(elapsed_ms, user_ms, sys_ms, max_rss_MB)) %>%
340	spread(key = shell_label, value = lines_per_ms) %>%
341	arrange(host_label, num_lines) %>%
342	select(all_of(cols2)) ->
343	rate
344
345	Log('\n')
346	Log('RATE')
347	print(rate)
348
349	# Memory usage by file
350	joined_times %>%
351	select(-c(elapsed_ms, lines_per_ms, user_ms, sys_ms)) %>%
352	spread(key = shell_label, value = max_rss_MB) %>%
353	arrange(host_label, num_lines) %>%
354	select(all_of(cols2)) ->
355	max_rss
356
357	Log('\n')
358	Log('MAX RSS')
359	print(max_rss)
360
361	Log('\n')
362	Log('joined_cachegrind has %d rows', nrow(joined_cachegrind))
363	print(joined_cachegrind)
364	#print(joined_cachegrind %>% filter(path == 'benchmarks/testdata/configure-helper.sh'))
365
366	cols3 = c('bash', 'dash', 'mksh', 'osh-native', souffle_col,
367	'num_lines', 'filename', 'filename_HREF')
368
369	# Cachegrind instructions by file
370	joined_cachegrind %>%
371	mutate(thousand_irefs_per_line = irefs / num_lines / 1000) %>%
372	select(-c(irefs)) %>%
373	spread(key = shell_label, value = thousand_irefs_per_line) %>%
374	arrange(num_lines) %>%
375	select(all_of(cols3)) ->
376	instructions
377
378	Log('\n')
379	Log('instructions has %d rows', nrow(instructions))
380	print(instructions)
381	}
382
383	WriteProvenance(distinct_hosts, distinct_shells, out_dir)
384
385	raw_data_table = tibble(
386	filename = basename(as.character(raw_data$path)),
387	filename_HREF = benchmarkDataLink('osh-parser', filename, '')
388	)
389	#print(raw_data_table)
390
391	writeCsv(raw_data_table, file.path(out_dir, 'raw-data'))
392
393	precision = SamePrecision(0) # lines per ms
394	writeCsv(times_summary, file.path(out_dir, 'summary'), precision)
395
396	precision = ColumnPrecision(list(), default = 1)
397	writeTsv(cachegrind_summary, file.path(out_dir, 'cachegrind_summary'), precision)
398
399	if (!is.null(times_flat)) {
400	precision = SamePrecision(0)
401	writeTsv(times_flat, file.path(out_dir, 'times_flat'), precision)
402	}
403
404	if (!is.null(cachegrind_flat)) {
405	precision = SamePrecision(0)
406	writeTsv(cachegrind_flat, file.path(out_dir, 'cachegrind_flat'), precision)
407	}
408
409	if (!is.null(elapsed)) { # equivalent to no-host
410	# Round to nearest millisecond, but the ratio has a decimal point.
411	precision = ColumnPrecision(list(osh_to_bash_ratio = 1), default = 0)
412	writeCsv(elapsed, file.path(out_dir, 'elapsed'), precision)
413
414	precision = SamePrecision(0)
415	writeCsv(rate, file.path(out_dir, 'rate'), precision)
416
417	writeCsv(max_rss, file.path(out_dir, 'max_rss'))
418
419	precision = SamePrecision(1)
420	writeTsv(instructions, file.path(out_dir, 'instructions'), precision)
421	}
422
423	Log('Wrote %s', out_dir)
424	}
425
426	WriteProvenance = function(distinct_hosts, distinct_shells, out_dir, tsv = F) {
427
428	num_hosts = nrow(distinct_hosts)
429	if (num_hosts == 1) {
430	linkify = provenanceLink
431	} else {
432	linkify = benchmarkDataLink
433	}
434
435	Log('distinct_hosts')
436	print(distinct_hosts)
437	Log('')
438
439	Log('distinct_shells')
440	print(distinct_shells)
441	Log('')
442
443	# Should be:
444	# host_id_url
445	# And then csv_to_html will be smart enough? It should take --url flag?
446	host_table = tibble(
447	host_label = distinct_hosts$host_label,
448	host_id = paste(distinct_hosts$host_name,
449	distinct_hosts$host_hash, sep='-'),
450	host_id_HREF = linkify('host-id', host_id, '/')
451	)
452	Log('host_table')
453	print(host_table)
454	Log('')
455
456	shell_table = tibble(
457	shell_label = distinct_shells$shell_label,
458	shell_id = paste(distinct_shells$shell_name,
459	distinct_shells$shell_hash, sep='-'),
460	shell_id_HREF = linkify('shell-id', shell_id, '/')
461	)
462
463	Log('shell_table')
464	print(shell_table)
465	Log('')
466
467	if (tsv) {
468	writeTsv(host_table, file.path(out_dir, 'hosts'))
469	writeTsv(shell_table, file.path(out_dir, 'shells'))
470	} else {
471	writeCsv(host_table, file.path(out_dir, 'hosts'))
472	writeCsv(shell_table, file.path(out_dir, 'shells'))
473	}
474	}
475
476	WriteSimpleProvenance = function(provenance, out_dir) {
477	Log('provenance')
478	print(provenance)
479	Log('')
480
481	# Legacy: add $shell_name, because "$shell_basename-$shell_hash" is what
482	# benchmarks/id.sh publish-shell-id uses
483	provenance %>%
484	mutate(shell_name = basename(sh_path)) %>%
485	distinct(shell_label, shell_name, shell_hash) ->
486	distinct_shells
487
488	Log('distinct_shells')
489	print(distinct_shells)
490	Log('')
491
492	provenance %>% distinct(host_label, host_name, host_hash) -> distinct_hosts
493
494	WriteProvenance(distinct_hosts, distinct_shells, out_dir, tsv = T)
495	}
496
497	RuntimeReport = function(in_dir, out_dir) {
498	times = readTsv(file.path(in_dir, 'times.tsv'))
499
500	gc_stats = readTsv(file.path(in_dir, 'gc_stats.tsv'))
501	provenance = readTsv(file.path(in_dir, 'provenance.tsv'))
502
503	times %>% filter(status != 0) -> failed
504	if (nrow(failed) != 0) {
505	print(failed)
506	stop('Some osh-runtime tasks failed')
507	}
508
509	# Joins:
510	# times <= sh_path => provenance
511	# times <= join_id, host_name => gc_stats
512
513	# TODO: provenance may have rows from 2 machines. Could validate them and
514	# deduplicate.
515
516	# It should have (host_label, host_name, host_hash)
517	# (shell_label, sh_path, shell_hash)
518	provenance %>%
519	mutate(host_label = host_name, shell_label = ShellLabelFromPath(sh_path)) ->
520	provenance
521
522	provenance %>% distinct(sh_path, shell_label) -> label_lookup
523
524	Log('label_lookup')
525	print(label_lookup)
526
527	# Join with provenance for host label and shell label
528	times %>%
529	select(c(elapsed_secs, user_secs, sys_secs, max_rss_KiB, task_id,
530	host_name, sh_path, workload)) %>%
531	mutate(elapsed_ms = elapsed_secs * 1000,
532	user_ms = user_secs * 1000,
533	sys_ms = sys_secs * 1000,
534	max_rss_MB = max_rss_KiB * 1024 / 1e6) %>%
535	select(-c(elapsed_secs, user_secs, sys_secs, max_rss_KiB)) %>%
536	left_join(label_lookup, by = c('sh_path')) %>%
537	select(-c(sh_path)) %>%
538	# we want to compare workloads on adjacent rows
539	arrange(workload) ->
540	details
541
542	times %>%
543	select(c(task_id, host_name, sh_path, workload, minor_faults, major_faults, swaps, in_block, out_block, signals, voluntary_ctx, involuntary_ctx)) %>%
544	left_join(label_lookup, by = c('sh_path')) %>%
545	select(-c(sh_path)) %>%
546	# we want to compare workloads on adjacent rows
547	arrange(workload) ->
548	details_io
549
550	Log('details')
551	print(details)
552
553	# Hack for release. TODO: unify with Soil
554	if (Sys.getenv("OILS_NO_SOUFFLE") == "") {
555	souffle_col = c('osh-native-souffle')
556	} else {
557	souffle_col = c()
558	}
559
560	cols2 = c('workload', 'host_name',
561	'bash', 'dash', 'osh-cpython', 'osh-native', souffle_col,
562	'py_bash_ratio', 'native_bash_ratio')
563
564	# Elapsed time comparison
565	details %>%
566	select(-c(task_id, user_ms, sys_ms, max_rss_MB)) %>%
567	spread(key = shell_label, value = elapsed_ms) %>%
568	mutate(py_bash_ratio = `osh-cpython` / bash) %>%
569	mutate(native_bash_ratio = `osh-native` / bash) %>%
570	arrange(workload, host_name) %>%
571	select(all_of(cols2)) ->
572	elapsed
573
574	Log('elapsed')
575	print(elapsed)
576
577	# Minor Page Faults Comparison
578	details_io %>%
579	select(c(host_name, shell_label, workload, minor_faults)) %>%
580	spread(key = shell_label, value = minor_faults) %>%
581	mutate(py_bash_ratio = `osh-cpython` / bash) %>%
582	mutate(native_bash_ratio = `osh-native` / bash) %>%
583	arrange(workload, host_name) %>%
584	select(all_of(cols2)) ->
585	page_faults
586
587	Log('page_faults')
588	print(page_faults)
589
590	# Max RSS comparison
591	details %>%
592	select(c(host_name, shell_label, workload, max_rss_MB)) %>%
593	spread(key = shell_label, value = max_rss_MB) %>%
594	mutate(py_bash_ratio = `osh-cpython` / bash) %>%
595	mutate(native_bash_ratio = `osh-native` / bash) %>%
596	arrange(workload, host_name) %>%
597	select(all_of(cols2)) ->
598	max_rss
599
600	Log('max rss')
601	print(max_rss)
602
603	details %>%
604	select(c(task_id, host_name, workload, elapsed_ms, max_rss_MB)) %>%
605	mutate(join_id = sprintf("gc-%d", task_id)) %>%
606	select(-c(task_id)) ->
607	gc_details
608
609	Log('GC details')
610	print(gc_details)
611	Log('')
612
613	Log('GC stats')
614	print(gc_stats)
615	Log('')
616
617	gc_stats %>%
618	left_join(gc_details, by = c('join_id', 'host_name')) %>%
619	select(-c(join_id, roots_capacity, objs_capacity)) %>%
620	# Do same transformations as GcReport()
621	mutate(allocated_MB = bytes_allocated / 1e6) %>%
622	select(-c(bytes_allocated)) %>%
623	rename(num_gc_done = num_collections) %>%
624	# Put these columns first
625	relocate(workload, host_name,
626	elapsed_ms, max_gc_millis, total_gc_millis,
627	allocated_MB, max_rss_MB, num_allocated) ->
628	gc_stats
629
630	Log('After GC stats')
631	print(gc_stats)
632	Log('')
633
634	WriteSimpleProvenance(provenance, out_dir)
635
636	# milliseconds don't need decimal digit
637	precision = ColumnPrecision(list(bash = 0, dash = 0, `osh-cpython` = 0,
638	`osh-native` = 0, `osh-native-souffle` = 0, py_bash_ratio = 2,
639	native_bash_ratio = 2))
640	writeTsv(elapsed, file.path(out_dir, 'elapsed'), precision)
641	writeTsv(page_faults, file.path(out_dir, 'page_faults'), precision)
642
643	precision2 = ColumnPrecision(list(py_bash_ratio = 2, native_bash_ratio = 2))
644	writeTsv(max_rss, file.path(out_dir, 'max_rss'), precision2)
645
646	precision3 = ColumnPrecision(list(max_rss_MB = 1, allocated_MB = 1),
647	default = 0)
648	writeTsv(gc_stats, file.path(out_dir, 'gc_stats'), precision3)
649
650	writeTsv(details, file.path(out_dir, 'details'), precision3)
651	writeTsv(details_io, file.path(out_dir, 'details_io'))
652
653	Log('Wrote %s', out_dir)
654	}
655
656	VmBaselineReport = function(in_dir, out_dir) {
657	vm = readTsv(file.path(in_dir, 'vm-baseline.tsv'))
658	#print(vm)
659
660	# Not using DistinctHosts() because field host_hash isn't collected
661	num_hosts = nrow(vm %>% distinct(host))
662
663	vm %>%
664	rename(kib = metric_value) %>%
665	mutate(shell_label = ShellLabels(shell_name, shell_hash, num_hosts),
666	megabytes = kib * 1024 / 1e6) %>%
667	select(-c(shell_name, kib)) %>%
668	spread(key = c(metric_name), value = megabytes) %>%
669	rename(VmPeak_MB = VmPeak, VmRSS_MB = VmRSS) %>%
670	select(c(shell_label, shell_hash, host, VmRSS_MB, VmPeak_MB)) %>%
671	arrange(shell_label, shell_hash, host, VmPeak_MB) ->
672	vm
673
674	print(vm)
675
676	writeTsv(vm, file.path(out_dir, 'vm-baseline'))
677	}
678
679	WriteOvmBuildDetails = function(distinct_hosts, distinct_compilers, out_dir) {
680	host_table = tibble(
681	host_label = distinct_hosts$host_label,
682	host_id = paste(distinct_hosts$host_name,
683	distinct_hosts$host_hash, sep='-'),
684	host_id_HREF = benchmarkDataLink('host-id', host_id, '/')
685	)
686	print(host_table)
687
688	dc = distinct_compilers
689	compiler_table = tibble(
690	compiler_label = dc$compiler_label,
691	compiler_id = paste(dc$compiler_label, dc$compiler_hash, sep='-'),
692	compiler_id_HREF = benchmarkDataLink('compiler-id', compiler_id, '/')
693	)
694	print(compiler_table)
695
696	writeTsv(host_table, file.path(out_dir, 'hosts'))
697	writeTsv(compiler_table, file.path(out_dir, 'compilers'))
698	}
699
700	OvmBuildReport = function(in_dir, out_dir) {
701	times = readTsv(file.path(in_dir, 'times.tsv'))
702	native_sizes = readTsv(file.path(in_dir, 'native-sizes.tsv'))
703	#raw_data = readTsv(file.path(in_dir, 'raw-data.tsv'))
704
705	times %>% filter(status != 0) -> failed
706	if (nrow(failed) != 0) {
707	print(failed)
708	stop('Some ovm-build tasks failed')
709	}
710
711	times %>% distinct(host_name, host_hash) -> distinct_hosts
712	distinct_hosts$host_label = distinct_hosts$host_name
713
714	times %>% distinct(compiler_path, compiler_hash) -> distinct_compilers
715	distinct_compilers$compiler_label = basename(distinct_compilers$compiler_path)
716
717	#print(distinct_hosts)
718	#print(distinct_compilers)
719
720	WriteOvmBuildDetails(distinct_hosts, distinct_compilers, out_dir)
721
722	times %>%
723	select(-c(status)) %>%
724	left_join(distinct_hosts, by = c('host_name', 'host_hash')) %>%
725	left_join(distinct_compilers, by = c('compiler_path', 'compiler_hash')) %>%
726	select(-c(host_name, host_hash, compiler_path, compiler_hash)) %>%
727	mutate(src_dir = basename(src_dir),
728	host_label = paste("host ", host_label),
729	is_conf = str_detect(action, 'configure'),
730	is_ovm = str_detect(action, 'oil.ovm'),
731	is_dbg = str_detect(action, 'dbg'),
732	) %>%
733	select(host_label, src_dir, compiler_label, action, is_conf, is_ovm, is_dbg,
734	elapsed_secs) %>%
735	spread(key = c(host_label), value = elapsed_secs) %>%
736	arrange(src_dir, compiler_label, desc(is_conf), is_ovm, desc(is_dbg)) %>%
737	select(-c(is_conf, is_ovm, is_dbg)) ->
738	times
739
740	#print(times)
741
742	# paths look like _tmp/ovm-build/bin/clang/oils_cpp.stripped
743	native_sizes %>%
744	select(c(host_label, path, num_bytes)) %>%
745	mutate(host_label = paste("host ", host_label),
746	binary = basename(path),
747	compiler = basename(dirname(path)),
748	) %>%
749	select(-c(path)) %>%
750	spread(key = c(host_label), value = num_bytes) %>%
751	arrange(compiler, binary) ->
752	native_sizes
753
754	# NOTE: These don't have the host and compiler.
755	writeTsv(times, file.path(out_dir, 'times'))
756	writeTsv(native_sizes, file.path(out_dir, 'native-sizes'))
757
758	# TODO: I want a size report too
759	#writeCsv(sizes, file.path(out_dir, 'sizes'))
760	}
761
762	unique_stdout_md5sum = function(t, num_expected) {
763	u = n_distinct(t$stdout_md5sum)
764	if (u != num_expected) {
765	t %>% select(c(host_name, task_name, arg1, arg2, runtime_name, stdout_md5sum)) %>% print()
766	stop(sprintf('Expected %d unique md5sums, got %d', num_expected, u))
767	}
768	}
769
770	ComputeReport = function(in_dir, out_dir) {
771	# TSV file, not CSV
772	times = read.table(file.path(in_dir, 'times.tsv'), header=T)
773	print(times)
774
775	times %>% filter(status != 0) -> failed
776	if (nrow(failed) != 0) {
777	print(failed)
778	stop('Some compute tasks failed')
779	}
780
781	#
782	# Check correctness
783	#
784
785	times %>% filter(task_name == 'hello') %>% unique_stdout_md5sum(1)
786	times %>% filter(task_name == 'fib') %>% unique_stdout_md5sum(1)
787	times %>% filter(task_name == 'word_freq') %>% unique_stdout_md5sum(1)
788	# 3 different inputs
789	times %>% filter(task_name == 'parse_help') %>% unique_stdout_md5sum(3)
790
791	times %>% filter(task_name == 'bubble_sort') %>% unique_stdout_md5sum(2)
792
793	# TODO:
794	# - oils_cpp doesn't implement unicode LANG=C
795	# - bash behaves differently on your desktop vs. in the container
796	# - might need layer-locales in the image?
797
798	#times %>% filter(task_name == 'palindrome' & arg1 == 'unicode') %>% unique_stdout_md5sum(1)
799	# Ditto here
800	#times %>% filter(task_name == 'palindrome' & arg1 == 'bytes') %>% unique_stdout_md5sum(1)
801
802	#
803	# Find distinct shells and hosts, and label them for readability.
804	#
805
806	# Runtimes are called shells, as a hack for code reuse
807	times %>%
808	mutate(shell_name = runtime_name, shell_hash = runtime_hash) %>%
809	select(c(host_name, host_hash, shell_name, shell_hash)) ->
810	tmp
811
812	distinct_hosts = DistinctHosts(tmp)
813	Log('')
814	Log('Distinct hosts')
815	print(distinct_hosts)
816
817	distinct_shells = DistinctShells(tmp)
818	Log('')
819	Log('Distinct runtimes')
820	print(distinct_shells)
821
822	num_hosts = nrow(distinct_hosts)
823
824	times %>%
825	select(-c(status, stdout_md5sum, stdout_filename, host_hash, runtime_hash)) %>%
826	mutate(runtime_label = ShellLabels(runtime_name, runtime_hash, num_hosts),
827	elapsed_ms = elapsed_secs * 1000,
828	user_ms = user_secs * 1000,
829	sys_ms = sys_secs * 1000,
830	max_rss_MB = max_rss_KiB * 1024 / 1e6) %>%
831	select(-c(runtime_name, elapsed_secs, user_secs, sys_secs, max_rss_KiB)) %>%
832	arrange(host_name, task_name, arg1, arg2, user_ms) ->
833	details
834
835	times %>%
836	mutate(
837	runtime_label = ShellLabels(runtime_name, runtime_hash, num_hosts),
838	stdout_md5sum_HREF = file.path('tmp', task_name, stdout_filename)) %>%
839	select(c(host_name, task_name, arg1, arg2, runtime_label,
840	stdout_md5sum, stdout_md5sum_HREF)) ->
841	stdout_files
842
843	details %>% filter(task_name == 'hello') %>% select(-c(task_name)) -> hello
844	details %>% filter(task_name == 'fib') %>% select(-c(task_name)) -> fib
845	details %>% filter(task_name == 'word_freq') %>% select(-c(task_name)) -> word_freq
846	# There's no arg2
847	details %>% filter(task_name == 'parse_help') %>% select(-c(task_name, arg2)) -> parse_help
848
849	details %>% filter(task_name == 'bubble_sort') %>% select(-c(task_name)) -> bubble_sort
850	details %>% filter(task_name == 'palindrome' & arg1 == 'unicode') %>% select(-c(task_name)) -> palindrome
851
852	precision = ColumnPrecision(list(max_rss_MB = 1), default = 0)
853	writeTsv(details, file.path(out_dir, 'details'), precision)
854
855	writeTsv(stdout_files, file.path(out_dir, 'stdout_files'), precision)
856
857	writeTsv(hello, file.path(out_dir, 'hello'), precision)
858	writeTsv(fib, file.path(out_dir, 'fib'), precision)
859	writeTsv(word_freq, file.path(out_dir, 'word_freq'), precision)
860	writeTsv(parse_help, file.path(out_dir, 'parse_help'), precision)
861
862	writeTsv(bubble_sort, file.path(out_dir, 'bubble_sort'), precision)
863	writeTsv(palindrome, file.path(out_dir, 'palindrome'), precision)
864
865	WriteProvenance(distinct_hosts, distinct_shells, out_dir, tsv = T)
866	}
867
868	WriteOneTask = function(times, out_dir, task_name, precision) {
869	times %>%
870	filter(task == task_name) %>%
871	select(-c(task)) -> subset
872
873	writeTsv(subset, file.path(out_dir, task_name), precision)
874	}
875
876	SHELL_ORDER = c('dash',
877	'bash',
878	'zsh',
879	'_bin/cxx-opt+bumpleak/osh',
880	'_bin/cxx-opt+bumproot/osh',
881	'_bin/cxx-opt+bumpsmall/osh',
882	'_bin/cxx-opt/osh',
883	'_bin/cxx-opt+nopool/osh')
884
885	GcReport = function(in_dir, out_dir) {
886	times = read.table(file.path(in_dir, 'raw/times.tsv'), header=T)
887	gc_stats = read.table(file.path(in_dir, 'stage1/gc_stats.tsv'), header=T)
888
889	times %>% filter(status != 0) -> failed
890	if (nrow(failed) != 0) {
891	print(failed)
892	stop('Some gc tasks failed')
893	}
894
895	# Change units and order columns
896	times %>%
897	arrange(task, factor(sh_path, levels = SHELL_ORDER)) %>%
898	mutate(elapsed_ms = elapsed_secs * 1000,
899	user_ms = user_secs * 1000,
900	sys_ms = sys_secs * 1000,
901	max_rss_MB = max_rss_KiB * 1024 / 1e6,
902	shell_label = ShellLabelFromPath(sh_path)
903	) %>%
904	select(c(join_id, task, elapsed_ms, user_ms, sys_ms, max_rss_MB, shell_label,
905	shell_runtime_opts)) ->
906	times
907
908	# Join and order columns
909	gc_stats %>% left_join(times, by = c('join_id')) %>%
910	arrange(desc(task)) %>%
911	mutate(allocated_MB = bytes_allocated / 1e6) %>%
912	# try to make the table skinnier
913	rename(num_gc_done = num_collections) %>%
914	select(task, elapsed_ms, max_gc_millis, total_gc_millis,
915	allocated_MB, max_rss_MB, num_allocated,
916	num_gc_points, num_gc_done, gc_threshold, num_growths, max_survived,
917	shell_label) ->
918	gc_stats
919
920	times %>% select(-c(join_id)) -> times
921
922
923	precision = ColumnPrecision(list(max_rss_MB = 1, allocated_MB = 1),
924	default = 0)
925
926	writeTsv(times, file.path(out_dir, 'times'), precision)
927	writeTsv(gc_stats, file.path(out_dir, 'gc_stats'), precision)
928
929	tasks = c('parse.configure-coreutils',
930	'parse.configure-cpython',
931	'parse.abuild',
932	'ex.compute-fib',
933	'ex.bashcomp-parse-help',
934	'ex.abuild-print-help')
935	# Write out separate rows
936	for (task in tasks) {
937	WriteOneTask(times, out_dir, task, precision)
938	}
939	}
940
941	GcCachegrindReport = function(in_dir, out_dir) {
942	times = readTsv(file.path(in_dir, 'raw/times.tsv'))
943	counts = readTsv(file.path(in_dir, 'stage1/cachegrind.tsv'))
944
945	times %>% filter(status != 0) -> failed
946	if (nrow(failed) != 0) {
947	print(failed)
948	stop('Some gc tasks failed')
949	}
950
951	print(times)
952	print(counts)
953
954	counts %>% left_join(times, by = c('join_id')) %>%
955	mutate(million_irefs = irefs / 1e6) %>%
956	select(c(million_irefs, task, sh_path, shell_runtime_opts)) %>%
957	arrange(factor(sh_path, levels = SHELL_ORDER)) ->
958	counts
959
960	precision = NULL
961	tasks = c('parse.abuild', 'ex.compute-fib')
962	for (task in tasks) {
963	WriteOneTask(counts, out_dir, task, precision)
964	}
965	}
966
967	MyCppReport = function(in_dir, out_dir) {
968	times = readTsv(file.path(in_dir, 'benchmark-table.tsv'))
969	print(times)
970
971	times %>% filter(status != 0) -> failed
972	if (nrow(failed) != 0) {
973	print(failed)
974	stop('Some mycpp tasks failed')
975	}
976
977	# Don't care about elapsed and system
978	times %>% select(-c(status, elapsed_secs, bin, task_out)) %>%
979	mutate(example_name_HREF = mycppUrl(example_name),
980	gen = c('gen'),
981	gen_HREF = genUrl(example_name),
982	user_ms = user_secs * 1000,
983	sys_ms = sys_secs * 1000,
984	max_rss_MB = max_rss_KiB * 1024 / 1e6) %>%
985	select(-c(user_secs, sys_secs, max_rss_KiB)) ->
986	details
987
988	details %>% select(-c(sys_ms, max_rss_MB)) %>%
989	spread(key = impl, value = user_ms) %>%
990	mutate(`C++ : Python` = `C++` / Python) %>%
991	arrange(`C++ : Python`) ->
992	user_time
993
994	details %>% select(-c(user_ms, max_rss_MB)) %>%
995	spread(key = impl, value = sys_ms) %>%
996	mutate(`C++ : Python` = `C++` / Python) %>%
997	arrange(`C++ : Python`) ->
998	sys_time
999
1000	details %>% select(-c(user_ms, sys_ms)) %>%
1001	spread(key = impl, value = max_rss_MB) %>%
1002	mutate(`C++ : Python` = `C++` / Python) %>%
1003	arrange(`C++ : Python`) ->
1004	max_rss
1005
1006	# Sometimes it speeds up by more than 10x
1007	precision1 = ColumnPrecision(list(`C++ : Python` = 3), default = 0)
1008	writeTsv(user_time, file.path(out_dir, 'user_time'), precision1)
1009	writeTsv(sys_time, file.path(out_dir, 'sys_time'), precision1)
1010
1011	precision2 = ColumnPrecision(list(`C++ : Python` = 2), default = 1)
1012	writeTsv(max_rss, file.path(out_dir, 'max_rss'), precision2)
1013
1014	writeTsv(details, file.path(out_dir, 'details'))
1015	}
1016
1017	UftraceTaskReport = function(env, task_name, summaries) {
1018	# Need this again after redirect
1019	MaybeDisableColor(stdout())
1020
1021	task_env = env[[task_name]]
1022
1023	untyped = task_env$untyped
1024	typed = task_env$typed
1025	strings = task_env$strings
1026	slabs = task_env$slabs
1027	reserve = task_env$reserve
1028
1029	string_overhead = 17 # GC header (8) + len (4) + hash value (4) + NUL (1)
1030	strings %>% mutate(obj_len = str_len + string_overhead) -> strings
1031
1032	# TODO: Output these totals PER WORKLOAD, e.g. parsing big/small, executing
1033	# big/small
1034	#
1035	# And then zoom in on distributions as well
1036
1037	num_allocs = nrow(untyped)
1038	total_bytes = sum(untyped$obj_len)
1039
1040	untyped %>% group_by(obj_len) %>% count() %>% ungroup() -> untyped_hist
1041	#print(untyped_hist)
1042
1043	untyped_hist %>%
1044	mutate(n_less_than = cumsum(n),
1045	percent = n_less_than * 100.0 / num_allocs) ->
1046	alloc_sizes
1047
1048	a24 = untyped_hist %>% filter(obj_len <= 24)
1049	a48 = untyped_hist %>% filter(obj_len <= 48)
1050	a96 = untyped_hist %>% filter(obj_len <= 96)
1051
1052	allocs_24_bytes_or_less = sum(a24$n) * 100.0 / num_allocs
1053	allocs_48_bytes_or_less = sum(a48$n) * 100.0 / num_allocs
1054	allocs_96_bytes_or_less = sum(a96$n) * 100.0 / num_allocs
1055
1056	Log('Percentage of allocs less than 48 bytes: %.1f', allocs_48_bytes_or_less)
1057
1058	options(tibble.print_min=25)
1059
1060	Log('')
1061	Log('All allocations')
1062	print(alloc_sizes %>% head(22))
1063	print(alloc_sizes %>% tail(5))
1064
1065	Log('')
1066	Log('Common Sizes')
1067	print(untyped_hist %>% arrange(desc(n)) %>% head(8))
1068
1069	Log('')
1070	Log(' %s total allocations, total bytes = %s', commas(num_allocs), commas(total_bytes))
1071	Log('')
1072
1073	Log('Typed allocations')
1074
1075	num_typed = nrow(typed)
1076
1077	typed %>% group_by(func_name) %>% count() %>% ungroup() %>%
1078	mutate(percent = n * 100.0 / num_typed) %>%
1079	arrange(desc(n)) -> most_common_types
1080
1081	print(most_common_types %>% head(20))
1082	print(most_common_types %>% tail(5))
1083
1084	lists = typed %>% filter(str_starts(func_name, ('List<')))
1085	#print(lists)
1086
1087	num_lists = nrow(lists)
1088	total_list_bytes = num_lists * 24 # sizeof List<T> head is hard-coded
1089
1090	Log('')
1091	Log('%s typed allocs, including %s List<T>', commas(num_typed), commas(num_lists))
1092	Log('%.2f%% of allocs are typed', num_typed * 100 / num_allocs)
1093	Log('')
1094
1095	#
1096	# Strings
1097	#
1098
1099	num_strings = nrow(strings)
1100	total_string_bytes = sum(strings$obj_len)
1101
1102	strings %>% group_by(str_len) %>% count() %>% ungroup() %>%
1103	mutate(n_less_than = cumsum(n),
1104	percent = n_less_than * 100.0 / num_strings) ->
1105	string_lengths
1106
1107	strs_6_bytes_or_less = string_lengths %>% filter(str_len == 6) %>% select(percent)
1108	strs_14_bytes_or_less = string_lengths %>% filter(str_len == 14) %>% select(percent)
1109
1110	# Parse workload
1111	# 62% of strings <= 6 bytes
1112	# 84% of strings <= 14 bytes
1113
1114	Log('Str - NewStr() and OverAllocatedStr()')
1115	print(string_lengths %>% head(16))
1116	print(string_lengths %>% tail(5))
1117	Log('')
1118
1119	Log('%s string allocations, total length = %s, total bytes = %s', commas(num_strings),
1120	commas(sum(strings$str_len)), commas(total_string_bytes))
1121	Log('')
1122	Log('%.2f%% of allocs are strings', num_strings * 100 / num_allocs)
1123	Log('%.2f%% of bytes are strings', total_string_bytes * 100 / total_bytes)
1124	Log('')
1125
1126	#
1127	# Slabs
1128	#
1129
1130	Log('NewSlab()')
1131
1132	num_slabs = nrow(slabs)
1133	slabs %>% group_by(slab_len) %>% count() %>% ungroup() %>%
1134	mutate(n_less_than = cumsum(n),
1135	percent = n_less_than * 100.0 / num_slabs) ->
1136	slab_lengths
1137
1138	slabs %>% group_by(func_name) %>% count() %>% ungroup() %>%
1139	arrange(desc(n)) -> slab_types
1140
1141	Log(' Lengths')
1142	print(slab_lengths %>% head())
1143	print(slab_lengths %>% tail(5))
1144	Log('')
1145
1146	Log(' Slab Types')
1147	print(slab_types %>% head())
1148	print(slab_types %>% tail(5))
1149	Log('')
1150
1151	total_slab_items = sum(slabs$slab_len)
1152
1153	Log('%s slabs, total items = %s', commas(num_slabs),
1154	commas(sum(slabs$slab_len)))
1155	Log('%.2f%% of allocs are slabs', num_slabs * 100 / num_allocs)
1156	Log('')
1157
1158	#
1159	# reserve() calls
1160	#
1161
1162	# There should be strictly more List::reserve() calls than NewSlab
1163
1164	Log('::reserve(int n)')
1165	Log('')
1166
1167	num_reserve = nrow(reserve)
1168	reserve %>% group_by(num_items) %>% count() %>% ungroup() %>%
1169	mutate(n_less_than = cumsum(n),
1170	percent = n_less_than * 100.0 / num_reserve) ->
1171	reserve_args
1172
1173	Log(' Num Items')
1174	print(reserve_args %>% head(15))
1175	print(reserve_args %>% tail(5))
1176	Log('')
1177
1178	Log('%s reserve() calls, total items = %s', commas(num_reserve),
1179	commas(sum(reserve$num_items)))
1180	Log('')
1181
1182	# Accounting for all allocations!
1183	Log('Untyped: %s', commas(num_allocs))
1184	Log('Typed + Str + Slab: %s', commas(num_typed + num_strings + num_slabs))
1185	Log('')
1186
1187	num_other_typed = num_typed - num_lists
1188
1189	# Summary table
1190	stats = tibble(task = task_name,
1191	total_bytes_ = commas(total_bytes),
1192	num_allocs_ = commas(num_allocs),
1193	sum_typed_strs_slabs = commas(num_typed + num_strings + num_slabs),
1194	num_reserve_calls = commas(num_reserve),
1195
1196	percent_list_allocs = Percent(num_lists, num_allocs),
1197	percent_slab_allocs = Percent(num_slabs, num_allocs),
1198	percent_string_allocs = Percent(num_strings, num_allocs),
1199	percent_other_typed_allocs = Percent(num_other_typed, num_allocs),
1200
1201	percent_list_bytes = Percent(total_list_bytes, total_bytes),
1202	percent_string_bytes = Percent(total_string_bytes, total_bytes),
1203
1204	allocs_24_bytes_or_less = sprintf('%.1f%%', allocs_24_bytes_or_less),
1205	allocs_48_bytes_or_less = sprintf('%.1f%%', allocs_48_bytes_or_less),
1206	allocs_96_bytes_or_less = sprintf('%.1f%%', allocs_96_bytes_or_less),
1207
1208	strs_6_bytes_or_less = sprintf('%.1f%%', strs_6_bytes_or_less),
1209	strs_14_bytes_or_less = sprintf('%.1f%%', strs_14_bytes_or_less),
1210	)
1211	summaries$stats[[task_name]] = stats
1212
1213	summaries$most_common_types[[task_name]] = most_common_types
1214	}
1215
1216	LoadUftraceTsv = function(in_dir, env) {
1217	for (task in list.files(in_dir)) {
1218	Log('Loading data for task %s', task)
1219	base_dir = file.path(in_dir, task)
1220
1221	task_env = new.env()
1222	env[[task]] = task_env
1223
1224	# TSV file, not CSV
1225	task_env$untyped = readTsv(file.path(base_dir, 'all-untyped.tsv'))
1226	task_env$typed = readTsv(file.path(base_dir, 'typed.tsv'))
1227	task_env$strings = readTsv(file.path(base_dir, 'strings.tsv'))
1228	task_env$slabs = readTsv(file.path(base_dir, 'slabs.tsv'))
1229	task_env$reserve = readTsv(file.path(base_dir, 'reserve.tsv'))
1230
1231	# median string length is 4, mean is 9.5!
1232	Log('UNTYPED')
1233	print(summary(task_env$untyped))
1234	Log('')
1235
1236	Log('TYPED')
1237	print(summary(task_env$typed))
1238	Log('')
1239
1240	Log('STRINGS')
1241	print(summary(task_env$strings))
1242	Log('')
1243
1244	Log('SLABS')
1245	print(summary(task_env$slabs))
1246	Log('')
1247
1248	Log('RESERVE')
1249	print(summary(task_env$reserve))
1250	Log('')
1251	}
1252	}
1253
1254	Percent = function(n, total) {
1255	sprintf('%.1f%%', n * 100.0 / total)
1256	}
1257
1258	PrettyPrintLong = function(d) {
1259	tr = t(d) # transpose
1260
1261	row_names = rownames(tr)
1262
1263	for (i in 1:nrow(tr)) {
1264	row_name = row_names[i]
1265	cat(sprintf('%26s', row_name)) # calculated min width manually
1266	cat(sprintf('%20s', tr[i,]))
1267	cat('\n')
1268
1269	# Extra spacing
1270	if (row_name %in% c('num_reserve_calls',
1271	'percent_string_bytes',
1272	'percent_other_typed_allocs',
1273	'allocs_96_bytes_or_less')) {
1274	cat('\n')
1275	}
1276	}
1277	}
1278
1279
1280	UftraceReport = function(env, out_dir) {
1281	# summaries$stats should be a list of 1-row data frames
1282	# summaries$top_types should be a list of types
1283	summaries = new.env()
1284
1285	for (task_name in names(env)) {
1286	report_out = file.path(out_dir, paste0(task_name, '.txt'))
1287
1288	Log('Making report for task %s -> %s', task_name, report_out)
1289
1290	sink(file = report_out)
1291	UftraceTaskReport(env, task_name, summaries)
1292	sink() # reset
1293	}
1294	Log('')
1295
1296	# Concate all the data frames added to summary
1297	stats = bind_rows(as.list(summaries$stats))
1298
1299	sink(file = file.path(out_dir, 'summary.txt'))
1300	#print(stats)
1301	#Log('')
1302
1303	PrettyPrintLong(stats)
1304	Log('')
1305
1306	mct = summaries$most_common_types
1307	for (task_name in names(mct)) {
1308	Log('Common types in workload %s', task_name)
1309	Log('')
1310
1311	print(mct[[task_name]] %>% head(5))
1312	Log('')
1313	}
1314	sink()
1315
1316	# For the REPL
1317	return(list(stats = stats))
1318	}
1319
1320	main = function(argv) {
1321	action = argv[[1]]
1322	in_dir = argv[[2]]
1323	out_dir = argv[[3]]
1324
1325	if (action == 'osh-parser') {
1326	ParserReport(in_dir, out_dir)
1327
1328	} else if (action == 'osh-runtime') {
1329	RuntimeReport(in_dir, out_dir)
1330
1331	} else if (action == 'vm-baseline') {
1332	VmBaselineReport(in_dir, out_dir)
1333
1334	} else if (action == 'ovm-build') {
1335	OvmBuildReport(in_dir, out_dir)
1336
1337	} else if (action == 'compute') {
1338	ComputeReport(in_dir, out_dir)
1339
1340	} else if (action == 'gc') {
1341	GcReport(in_dir, out_dir)
1342
1343	} else if (action == 'gc-cachegrind') {
1344	GcCachegrindReport(in_dir, out_dir)
1345
1346	} else if (action == 'mycpp') {
1347	MyCppReport(in_dir, out_dir)
1348
1349	} else if (action == 'uftrace') {
1350	d = new.env()
1351	LoadUftraceTsv(in_dir, d)
1352	UftraceReport(d, out_dir)
1353
1354	} else {
1355	Log("Invalid action '%s'", action)
1356	quit(status = 1)
1357	}
1358	Log('PID %d done', Sys.getpid())
1359	}
1360
1361	if (length(sys.frames()) == 0) {
1362	# increase ggplot font size globally
1363	#theme_set(theme_grey(base_size = 20))
1364
1365	main(commandArgs(TRUE))
1366	}