OILS / benchmarks / report.R View on Github | oilshell.org

1352 lines, 935 significant
1#!/usr/bin/env Rscript
2#
3# benchmarks/report.R -- Analyze data collected by shell scripts.
4#
5# Usage:
6# benchmarks/report.R OUT_DIR [TIMES_CSV...]
7
8# Suppress warnings about functions masked from 'package:stats' and 'package:base'
9# filter, lag
10# intersect, setdiff, setequal, union
11library(dplyr, warn.conflicts = FALSE)
12library(tidyr) # spread()
13library(stringr)
14
15source('benchmarks/common.R')
16
17options(stringsAsFactors = F)
18
19# For pretty printing
20commas = function(x) {
21 format(x, big.mark=',')
22}
23
24sourceUrl = function(path) {
25 sprintf('https://github.com/oilshell/oil/blob/master/%s', path)
26}
27
28# Takes a filename, not a path.
29sourceUrl2 = function(filename) {
30 sprintf(
31 'https://github.com/oilshell/oil/blob/master/benchmarks/testdata/%s',
32 filename)
33}
34
35mycppUrl = function(name) {
36 sprintf('https://github.com/oilshell/oil/blob/master/mycpp/examples/%s.py', name)
37}
38
39genUrl = function(name) {
40 sprintf('../../_gen/mycpp/examples/%s.mycpp.cc', name)
41}
42
43
44# TODO: Set up cgit because Github links are slow.
45benchmarkDataLink = function(subdir, name, suffix) {
46 #sprintf('../../../../benchmark-data/shell-id/%s', shell_id)
47 sprintf('https://github.com/oilshell/benchmark-data/blob/master/%s/%s%s',
48 subdir, name, suffix)
49}
50
51provenanceLink = function(subdir, name, suffix) {
52 sprintf('../%s/%s%s', subdir, name, suffix)
53}
54
55
56GetOshLabel = function(shell_hash, prov_dir) {
57 ### Given a string, return another string.
58
59 path = sprintf('%s/shell-id/osh-%s/sh-path.txt', prov_dir, shell_hash)
60
61 if (file.exists(path)) {
62 Log('Reading %s', path)
63 lines = readLines(path)
64 if (length(grep('_bin/osh', lines)) > 0) {
65 label = 'osh-ovm'
66 } else if (length(grep('bin/osh', lines)) > 0) {
67 label = 'osh-cpython'
68 } else if (length(grep('_bin/.*/mycpp-souffle/osh', lines)) > 0) {
69 label = 'osh-native-souffle'
70 } else if (length(grep('_bin/.*/osh', lines)) > 0) {
71 label = 'osh-native'
72 } else {
73 stop("Expected _bin/osh, bin/osh, or _bin/.*/osh")
74 }
75 } else {
76 stop(sprintf("%s doesn't exist", path))
77 }
78 return(label)
79}
80
81opt_suffix1 = '_bin/cxx-opt/osh'
82opt_suffix2 = '_bin/cxx-opt-sh/osh'
83opt_suffix3 = '_bin/cxx-opt/mycpp-souffle/osh'
84opt_suffix4 = '_bin/cxx-opt-sh/mycpp-souffle/osh'
85
86ShellLabels = function(shell_name, shell_hash, num_hosts) {
87 ### Given 2 vectors, return a vector of readable labels.
88
89 # TODO: Clean up callers. Some metrics all this function with a
90 # shell/runtime BASENAME, and others a PATH
91 # - e.g. ComputeReport calls this with runtime_name which is actually a PATH
92
93 #Log('name %s', shell_name)
94 #Log('hash %s', shell_hash)
95
96 if (num_hosts == 1) {
97 prov_dir = '_tmp'
98 } else {
99 prov_dir = '../benchmark-data/'
100 }
101
102 labels = c()
103 for (i in 1:length(shell_name)) {
104 sh = shell_name[i]
105 if (sh == 'osh') {
106 label = GetOshLabel(shell_hash[i], prov_dir)
107
108 } else if (endsWith(sh, opt_suffix1) || endsWith(sh, opt_suffix2)) {
109 label = 'opt/osh'
110
111 } else if (endsWith(sh, opt_suffix3) || endsWith(sh, opt_suffix4)) {
112 label = 'opt/osh-souffle'
113
114 } else if (endsWith(sh, '_bin/cxx-opt+bumpleak/osh')) {
115 label = 'bumpleak/osh'
116
117 } else {
118 label = sh
119 }
120
121 Log('[%s] [%s]', shell_name[i], label)
122 labels = c(labels, label)
123 }
124
125 return(labels)
126}
127
128# Simple version of the above, used by benchmarks/gc
129ShellLabelFromPath = function(sh_path) {
130 labels = c()
131 for (i in 1:length(sh_path)) {
132 sh = sh_path[i]
133
134 if (endsWith(sh, opt_suffix1) || endsWith(sh, opt_suffix2)) {
135 # the opt binary is osh-native
136 label = 'osh-native'
137
138 } else if (endsWith(sh, opt_suffix3) || endsWith(sh, opt_suffix4)) {
139 # the opt binary is osh-native
140 label = 'osh-native-souffle'
141
142 } else if (endsWith(sh, '_bin/cxx-opt+bumpleak/osh')) {
143 label = 'bumpleak/osh'
144
145 } else if (endsWith(sh, '_bin/osh')) { # the app bundle
146 label = 'osh-ovm'
147
148 } else if (endsWith(sh, 'bin/osh')) {
149 label = 'osh-cpython'
150
151 } else {
152 label = sh
153 }
154 labels = c(labels, label)
155 }
156 return(labels)
157}
158
159DistinctHosts = function(t) {
160 t %>% distinct(host_name, host_hash) -> distinct_hosts
161 # The label is just the name
162 distinct_hosts$host_label = distinct_hosts$host_name
163 return(distinct_hosts)
164}
165
166DistinctShells = function(t, num_hosts = -1) {
167 t %>% distinct(shell_name, shell_hash) -> distinct_shells
168
169 Log('')
170 Log('Labeling shells')
171
172 # Calculate it if not passed
173 if (num_hosts == -1) {
174 num_hosts = nrow(DistinctHosts(t))
175 }
176
177 distinct_shells$shell_label = ShellLabels(distinct_shells$shell_name,
178 distinct_shells$shell_hash,
179 num_hosts)
180 return(distinct_shells)
181}
182
183ParserReport = function(in_dir, out_dir) {
184 times = read.csv(file.path(in_dir, 'times.csv'))
185 lines = read.csv(file.path(in_dir, 'lines.csv'))
186 raw_data = read.csv(file.path(in_dir, 'raw-data.csv'))
187
188 cachegrind = readTsv(file.path(in_dir, 'cachegrind.tsv'))
189
190 # For joining by filename
191 lines_by_filename = tibble(
192 num_lines = lines$num_lines,
193 filename = basename(lines$path)
194 )
195
196 # Remove failures
197 times %>% filter(status == 0) %>% select(-c(status)) -> times
198 cachegrind %>% filter(status == 0) %>% select(-c(status)) -> cachegrind
199
200 # Add the number of lines, joining on path, and compute lines/ms
201 times %>%
202 left_join(lines, by = c('path')) %>%
203 mutate(filename = basename(path), filename_HREF = sourceUrl(path),
204 max_rss_MB = max_rss_KiB * 1024 / 1e6,
205 elapsed_ms = elapsed_secs * 1000,
206 user_ms = user_secs * 1000,
207 sys_ms = sys_secs * 1000,
208 lines_per_ms = num_lines / elapsed_ms) %>%
209 select(-c(path, max_rss_KiB, elapsed_secs, user_secs, sys_secs)) ->
210 joined_times
211
212 #print(head(times))
213 #print(head(lines))
214 #print(head(vm))
215 #print(head(joined_times))
216
217 print(summary(joined_times))
218
219 #
220 # Find distinct shells and hosts, and label them for readability.
221 #
222
223 distinct_hosts = DistinctHosts(joined_times)
224 Log('')
225 Log('Distinct hosts')
226 print(distinct_hosts)
227
228 distinct_shells = DistinctShells(joined_times)
229 Log('')
230 Log('Distinct shells')
231 print(distinct_shells)
232
233 # Replace name/hash combinations with labels.
234 joined_times %>%
235 left_join(distinct_hosts, by = c('host_name', 'host_hash')) %>%
236 left_join(distinct_shells, by = c('shell_name', 'shell_hash')) %>%
237 select(-c(host_name, host_hash, shell_name, shell_hash)) ->
238 joined_times
239
240 # Like 'times', but do shell_label as one step
241 # Hack: we know benchmarks/auto.sh runs this on one machine
242 distinct_shells_2 = DistinctShells(cachegrind, num_hosts = nrow(distinct_hosts))
243 cachegrind %>%
244 left_join(lines, by = c('path')) %>%
245 select(-c(elapsed_secs, user_secs, sys_secs, max_rss_KiB)) %>%
246 left_join(distinct_shells_2, by = c('shell_name', 'shell_hash')) %>%
247 select(-c(shell_name, shell_hash)) %>%
248 mutate(filename = basename(path), filename_HREF = sourceUrl(path)) %>%
249 select(-c(path)) ->
250 joined_cachegrind
251
252 Log('summary(joined_times):')
253 print(summary(joined_times))
254 Log('head(joined_times):')
255 print(head(joined_times))
256
257 # Summarize rates by platform/shell
258 joined_times %>%
259 mutate(host_label = paste("host", host_label)) %>%
260 group_by(host_label, shell_label) %>%
261 summarize(total_lines = sum(num_lines), total_ms = sum(elapsed_ms)) %>%
262 mutate(lines_per_ms = total_lines / total_ms) %>%
263 select(-c(total_ms)) %>%
264 spread(key = host_label, value = lines_per_ms) ->
265 times_summary
266
267 # Sort by parsing rate on machine 1
268 if ("host hoover" %in% colnames(times_summary)) {
269 times_summary %>% arrange(desc(`host hoover`)) -> times_summary
270 } else {
271 times_summary %>% arrange(desc(`host no-host`)) -> times_summary
272 }
273
274 Log('times_summary:')
275 print(times_summary)
276
277 # Summarize cachegrind by platform/shell
278 # Bug fix: as.numeric(irefs) avoids 32-bit integer overflow!
279 joined_cachegrind %>%
280 group_by(shell_label) %>%
281 summarize(total_lines = sum(num_lines), total_irefs = sum(as.numeric(irefs))) %>%
282 mutate(thousand_irefs_per_line = total_irefs / total_lines / 1000) %>%
283 select(-c(total_irefs)) ->
284 cachegrind_summary
285
286 if ("no-host" %in% distinct_hosts$host_label) {
287
288 # We don't have all the shells
289 elapsed = NULL
290 rate = NULL
291 max_rss = NULL
292 instructions = NULL
293
294 joined_times %>%
295 select(c(shell_label, elapsed_ms, user_ms, sys_ms, max_rss_MB,
296 num_lines, filename, filename_HREF)) %>%
297 arrange(filename, elapsed_ms) ->
298 times_flat
299
300 joined_cachegrind %>%
301 select(c(shell_label, irefs, num_lines, filename, filename_HREF)) %>%
302 arrange(filename, irefs) ->
303 cachegrind_flat
304
305 } else {
306
307 times_flat = NULL
308 cachegrind_flat = NULL
309
310 # Elapsed seconds for each shell by platform and file
311 joined_times %>%
312 select(-c(lines_per_ms, user_ms, sys_ms, max_rss_MB)) %>%
313 spread(key = shell_label, value = elapsed_ms) %>%
314 arrange(host_label, num_lines) %>%
315 mutate(osh_to_bash_ratio = `osh-native` / bash) %>%
316 select(c(host_label, bash, dash, mksh, zsh,
317 `osh-ovm`, `osh-cpython`, `osh-native`, `osh-native-souffle`,
318 osh_to_bash_ratio, num_lines, filename, filename_HREF)) ->
319 elapsed
320
321 Log('\n')
322 Log('ELAPSED')
323 print(elapsed)
324
325 # Rates by file and shell
326 joined_times %>%
327 select(-c(elapsed_ms, user_ms, sys_ms, max_rss_MB)) %>%
328 spread(key = shell_label, value = lines_per_ms) %>%
329 arrange(host_label, num_lines) %>%
330 select(c(host_label, bash, dash, mksh, zsh,
331 `osh-ovm`, `osh-cpython`, `osh-native`, `osh-native-souffle`,
332 num_lines, filename, filename_HREF)) ->
333 rate
334
335 Log('\n')
336 Log('RATE')
337 print(rate)
338
339 # Memory usage by file
340 joined_times %>%
341 select(-c(elapsed_ms, lines_per_ms, user_ms, sys_ms)) %>%
342 spread(key = shell_label, value = max_rss_MB) %>%
343 arrange(host_label, num_lines) %>%
344 select(c(host_label, bash, dash, mksh, zsh,
345 `osh-ovm`, `osh-cpython`, `osh-native`, `osh-native-souffle`,
346 num_lines, filename, filename_HREF)) ->
347 max_rss
348
349 Log('\n')
350 Log('MAX RSS')
351 print(max_rss)
352
353 Log('\n')
354 Log('joined_cachegrind has %d rows', nrow(joined_cachegrind))
355 print(joined_cachegrind)
356 #print(joined_cachegrind %>% filter(path == 'benchmarks/testdata/configure-helper.sh'))
357
358 # Cachegrind instructions by file
359 joined_cachegrind %>%
360 mutate(thousand_irefs_per_line = irefs / num_lines / 1000) %>%
361 select(-c(irefs)) %>%
362 spread(key = shell_label, value = thousand_irefs_per_line) %>%
363 arrange(num_lines) %>%
364 select(c(bash, dash, mksh, `osh-native`, `osh-native-souffle`,
365 num_lines, filename, filename_HREF)) ->
366 instructions
367
368 Log('\n')
369 Log('instructions has %d rows', nrow(instructions))
370 print(instructions)
371 }
372
373 WriteProvenance(distinct_hosts, distinct_shells, out_dir)
374
375 raw_data_table = tibble(
376 filename = basename(as.character(raw_data$path)),
377 filename_HREF = benchmarkDataLink('osh-parser', filename, '')
378 )
379 #print(raw_data_table)
380
381 writeCsv(raw_data_table, file.path(out_dir, 'raw-data'))
382
383 precision = SamePrecision(0) # lines per ms
384 writeCsv(times_summary, file.path(out_dir, 'summary'), precision)
385
386 precision = ColumnPrecision(list(), default = 1)
387 writeTsv(cachegrind_summary, file.path(out_dir, 'cachegrind_summary'), precision)
388
389 if (!is.null(times_flat)) {
390 precision = SamePrecision(0)
391 writeTsv(times_flat, file.path(out_dir, 'times_flat'), precision)
392 }
393
394 if (!is.null(cachegrind_flat)) {
395 precision = SamePrecision(0)
396 writeTsv(cachegrind_flat, file.path(out_dir, 'cachegrind_flat'), precision)
397 }
398
399 if (!is.null(elapsed)) { # equivalent to no-host
400 # Round to nearest millisecond, but the ratio has a decimal point.
401 precision = ColumnPrecision(list(osh_to_bash_ratio = 1), default = 0)
402 writeCsv(elapsed, file.path(out_dir, 'elapsed'), precision)
403
404 precision = SamePrecision(0)
405 writeCsv(rate, file.path(out_dir, 'rate'), precision)
406
407 writeCsv(max_rss, file.path(out_dir, 'max_rss'))
408
409 precision = SamePrecision(1)
410 writeTsv(instructions, file.path(out_dir, 'instructions'), precision)
411 }
412
413 Log('Wrote %s', out_dir)
414}
415
416WriteProvenance = function(distinct_hosts, distinct_shells, out_dir, tsv = F) {
417
418 num_hosts = nrow(distinct_hosts)
419 if (num_hosts == 1) {
420 linkify = provenanceLink
421 } else {
422 linkify = benchmarkDataLink
423 }
424
425 Log('distinct_hosts')
426 print(distinct_hosts)
427 Log('')
428
429 Log('distinct_shells')
430 print(distinct_shells)
431 Log('')
432
433 # Should be:
434 # host_id_url
435 # And then csv_to_html will be smart enough? It should take --url flag?
436 host_table = tibble(
437 host_label = distinct_hosts$host_label,
438 host_id = paste(distinct_hosts$host_name,
439 distinct_hosts$host_hash, sep='-'),
440 host_id_HREF = linkify('host-id', host_id, '/')
441 )
442 Log('host_table')
443 print(host_table)
444 Log('')
445
446 shell_table = tibble(
447 shell_label = distinct_shells$shell_label,
448 shell_id = paste(distinct_shells$shell_name,
449 distinct_shells$shell_hash, sep='-'),
450 shell_id_HREF = linkify('shell-id', shell_id, '/')
451 )
452
453 Log('shell_table')
454 print(shell_table)
455 Log('')
456
457 if (tsv) {
458 writeTsv(host_table, file.path(out_dir, 'hosts'))
459 writeTsv(shell_table, file.path(out_dir, 'shells'))
460 } else {
461 writeCsv(host_table, file.path(out_dir, 'hosts'))
462 writeCsv(shell_table, file.path(out_dir, 'shells'))
463 }
464}
465
466WriteSimpleProvenance = function(provenance, out_dir) {
467 Log('provenance')
468 print(provenance)
469 Log('')
470
471 # Legacy: add $shell_name, because "$shell_basename-$shell_hash" is what
472 # benchmarks/id.sh publish-shell-id uses
473 provenance %>%
474 mutate(shell_name = basename(sh_path)) %>%
475 distinct(shell_label, shell_name, shell_hash) ->
476 distinct_shells
477
478 Log('distinct_shells')
479 print(distinct_shells)
480 Log('')
481
482 provenance %>% distinct(host_label, host_name, host_hash) -> distinct_hosts
483
484 WriteProvenance(distinct_hosts, distinct_shells, out_dir, tsv = T)
485}
486
487RuntimeReport = function(in_dir, out_dir) {
488 times = readTsv(file.path(in_dir, 'times.tsv'))
489
490 gc_stats = readTsv(file.path(in_dir, 'gc_stats.tsv'))
491 provenance = readTsv(file.path(in_dir, 'provenance.tsv'))
492
493 times %>% filter(status != 0) -> failed
494 if (nrow(failed) != 0) {
495 print(failed)
496 stop('Some osh-runtime tasks failed')
497 }
498
499 # Joins:
500 # times <= sh_path => provenance
501 # times <= join_id, host_name => gc_stats
502
503 # TODO: provenance may have rows from 2 machines. Could validate them and
504 # deduplicate.
505
506 # It should have (host_label, host_name, host_hash)
507 # (shell_label, sh_path, shell_hash)
508 provenance %>%
509 mutate(host_label = host_name, shell_label = ShellLabelFromPath(sh_path)) ->
510 provenance
511
512 provenance %>% distinct(sh_path, shell_label) -> label_lookup
513
514 Log('label_lookup')
515 print(label_lookup)
516
517 # Join with provenance for host label and shell label
518 times %>%
519 select(c(elapsed_secs, user_secs, sys_secs, max_rss_KiB, task_id,
520 host_name, sh_path, workload)) %>%
521 mutate(elapsed_ms = elapsed_secs * 1000,
522 user_ms = user_secs * 1000,
523 sys_ms = sys_secs * 1000,
524 max_rss_MB = max_rss_KiB * 1024 / 1e6) %>%
525 select(-c(elapsed_secs, user_secs, sys_secs, max_rss_KiB)) %>%
526 left_join(label_lookup, by = c('sh_path')) %>%
527 select(-c(sh_path)) %>%
528 # we want to compare workloads on adjacent rows
529 arrange(workload) ->
530 details
531
532 times %>%
533 select(c(task_id, host_name, sh_path, workload, minor_faults, major_faults, swaps, in_block, out_block, signals, voluntary_ctx, involuntary_ctx)) %>%
534 left_join(label_lookup, by = c('sh_path')) %>%
535 select(-c(sh_path)) %>%
536 # we want to compare workloads on adjacent rows
537 arrange(workload) ->
538 details_io
539
540 Log('details')
541 print(details)
542
543 # Elapsed time comparison
544 details %>%
545 select(-c(task_id, user_ms, sys_ms, max_rss_MB)) %>%
546 spread(key = shell_label, value = elapsed_ms) %>%
547 mutate(py_bash_ratio = `osh-cpython` / bash) %>%
548 mutate(native_bash_ratio = `osh-native` / bash) %>%
549 arrange(workload, host_name) %>%
550 select(c(workload, host_name,
551 bash, dash, `osh-cpython`, `osh-native`, `osh-native-souffle`,
552 py_bash_ratio, native_bash_ratio)) ->
553
554 elapsed
555
556 Log('elapsed')
557 print(elapsed)
558
559 # Minor Page Faults Comparison
560 details_io %>%
561 select(c(host_name, shell_label, workload, minor_faults)) %>%
562 spread(key = shell_label, value = minor_faults) %>%
563 mutate(py_bash_ratio = `osh-cpython` / bash) %>%
564 mutate(native_bash_ratio = `osh-native` / bash) %>%
565 arrange(workload, host_name) %>%
566 select(c(workload, host_name,
567 bash, dash, `osh-cpython`, `osh-native`, `osh-native-souffle`,
568 py_bash_ratio, native_bash_ratio)) ->
569 page_faults
570
571 Log('page_faults')
572 print(page_faults)
573
574 # Max RSS comparison
575 details %>%
576 select(c(host_name, shell_label, workload, max_rss_MB)) %>%
577 spread(key = shell_label, value = max_rss_MB) %>%
578 mutate(py_bash_ratio = `osh-cpython` / bash) %>%
579 mutate(native_bash_ratio = `osh-native` / bash) %>%
580 arrange(workload, host_name) %>%
581 select(c(workload, host_name,
582 bash, dash, `osh-cpython`, `osh-native`, `osh-native-souffle`,
583 py_bash_ratio, native_bash_ratio)) ->
584 max_rss
585
586 Log('max rss')
587 print(max_rss)
588
589 details %>%
590 select(c(task_id, host_name, workload, elapsed_ms, max_rss_MB)) %>%
591 mutate(join_id = sprintf("gc-%d", task_id)) %>%
592 select(-c(task_id)) ->
593 gc_details
594
595 Log('GC details')
596 print(gc_details)
597 Log('')
598
599 Log('GC stats')
600 print(gc_stats)
601 Log('')
602
603 gc_stats %>%
604 left_join(gc_details, by = c('join_id', 'host_name')) %>%
605 select(-c(join_id, roots_capacity, objs_capacity)) %>%
606 # Do same transformations as GcReport()
607 mutate(allocated_MB = bytes_allocated / 1e6) %>%
608 select(-c(bytes_allocated)) %>%
609 rename(num_gc_done = num_collections) %>%
610 # Put these columns first
611 relocate(workload, host_name,
612 elapsed_ms, max_gc_millis, total_gc_millis,
613 allocated_MB, max_rss_MB, num_allocated) ->
614 gc_stats
615
616 Log('After GC stats')
617 print(gc_stats)
618 Log('')
619
620 WriteSimpleProvenance(provenance, out_dir)
621
622 # milliseconds don't need decimal digit
623 precision = ColumnPrecision(list(bash = 0, dash = 0, `osh-cpython` = 0,
624 `osh-native` = 0, `osh-native-souffle` = 0, py_bash_ratio = 2,
625 native_bash_ratio = 2))
626 writeTsv(elapsed, file.path(out_dir, 'elapsed'), precision)
627 writeTsv(page_faults, file.path(out_dir, 'page_faults'), precision)
628
629 precision2 = ColumnPrecision(list(py_bash_ratio = 2, native_bash_ratio = 2))
630 writeTsv(max_rss, file.path(out_dir, 'max_rss'), precision2)
631
632 precision3 = ColumnPrecision(list(max_rss_MB = 1, allocated_MB = 1),
633 default = 0)
634 writeTsv(gc_stats, file.path(out_dir, 'gc_stats'), precision3)
635
636 writeTsv(details, file.path(out_dir, 'details'), precision3)
637 writeTsv(details_io, file.path(out_dir, 'details_io'))
638
639 Log('Wrote %s', out_dir)
640}
641
642VmBaselineReport = function(in_dir, out_dir) {
643 vm = readTsv(file.path(in_dir, 'vm-baseline.tsv'))
644 #print(vm)
645
646 # Not using DistinctHosts() because field host_hash isn't collected
647 num_hosts = nrow(vm %>% distinct(host))
648
649 vm %>%
650 rename(kib = metric_value) %>%
651 mutate(shell_label = ShellLabels(shell_name, shell_hash, num_hosts),
652 megabytes = kib * 1024 / 1e6) %>%
653 select(-c(shell_name, kib)) %>%
654 spread(key = c(metric_name), value = megabytes) %>%
655 rename(VmPeak_MB = VmPeak, VmRSS_MB = VmRSS) %>%
656 select(c(shell_label, shell_hash, host, VmRSS_MB, VmPeak_MB)) %>%
657 arrange(shell_label, shell_hash, host, VmPeak_MB) ->
658 vm
659
660 print(vm)
661
662 writeTsv(vm, file.path(out_dir, 'vm-baseline'))
663}
664
665WriteOvmBuildDetails = function(distinct_hosts, distinct_compilers, out_dir) {
666 host_table = tibble(
667 host_label = distinct_hosts$host_label,
668 host_id = paste(distinct_hosts$host_name,
669 distinct_hosts$host_hash, sep='-'),
670 host_id_HREF = benchmarkDataLink('host-id', host_id, '/')
671 )
672 print(host_table)
673
674 dc = distinct_compilers
675 compiler_table = tibble(
676 compiler_label = dc$compiler_label,
677 compiler_id = paste(dc$compiler_label, dc$compiler_hash, sep='-'),
678 compiler_id_HREF = benchmarkDataLink('compiler-id', compiler_id, '/')
679 )
680 print(compiler_table)
681
682 writeTsv(host_table, file.path(out_dir, 'hosts'))
683 writeTsv(compiler_table, file.path(out_dir, 'compilers'))
684}
685
686OvmBuildReport = function(in_dir, out_dir) {
687 times = readTsv(file.path(in_dir, 'times.tsv'))
688 native_sizes = readTsv(file.path(in_dir, 'native-sizes.tsv'))
689 #raw_data = readTsv(file.path(in_dir, 'raw-data.tsv'))
690
691 times %>% filter(status != 0) -> failed
692 if (nrow(failed) != 0) {
693 print(failed)
694 stop('Some ovm-build tasks failed')
695 }
696
697 times %>% distinct(host_name, host_hash) -> distinct_hosts
698 distinct_hosts$host_label = distinct_hosts$host_name
699
700 times %>% distinct(compiler_path, compiler_hash) -> distinct_compilers
701 distinct_compilers$compiler_label = basename(distinct_compilers$compiler_path)
702
703 #print(distinct_hosts)
704 #print(distinct_compilers)
705
706 WriteOvmBuildDetails(distinct_hosts, distinct_compilers, out_dir)
707
708 times %>%
709 select(-c(status)) %>%
710 left_join(distinct_hosts, by = c('host_name', 'host_hash')) %>%
711 left_join(distinct_compilers, by = c('compiler_path', 'compiler_hash')) %>%
712 select(-c(host_name, host_hash, compiler_path, compiler_hash)) %>%
713 mutate(src_dir = basename(src_dir),
714 host_label = paste("host ", host_label),
715 is_conf = str_detect(action, 'configure'),
716 is_ovm = str_detect(action, 'oil.ovm'),
717 is_dbg = str_detect(action, 'dbg'),
718 ) %>%
719 select(host_label, src_dir, compiler_label, action, is_conf, is_ovm, is_dbg,
720 elapsed_secs) %>%
721 spread(key = c(host_label), value = elapsed_secs) %>%
722 arrange(src_dir, compiler_label, desc(is_conf), is_ovm, desc(is_dbg)) %>%
723 select(-c(is_conf, is_ovm, is_dbg)) ->
724 times
725
726 #print(times)
727
728 # paths look like _tmp/ovm-build/bin/clang/oils_cpp.stripped
729 native_sizes %>%
730 select(c(host_label, path, num_bytes)) %>%
731 mutate(host_label = paste("host ", host_label),
732 binary = basename(path),
733 compiler = basename(dirname(path)),
734 ) %>%
735 select(-c(path)) %>%
736 spread(key = c(host_label), value = num_bytes) %>%
737 arrange(compiler, binary) ->
738 native_sizes
739
740 # NOTE: These don't have the host and compiler.
741 writeTsv(times, file.path(out_dir, 'times'))
742 writeTsv(native_sizes, file.path(out_dir, 'native-sizes'))
743
744 # TODO: I want a size report too
745 #writeCsv(sizes, file.path(out_dir, 'sizes'))
746}
747
748unique_stdout_md5sum = function(t, num_expected) {
749 u = n_distinct(t$stdout_md5sum)
750 if (u != num_expected) {
751 t %>% select(c(host_name, task_name, arg1, arg2, runtime_name, stdout_md5sum)) %>% print()
752 stop(sprintf('Expected %d unique md5sums, got %d', num_expected, u))
753 }
754}
755
756ComputeReport = function(in_dir, out_dir) {
757 # TSV file, not CSV
758 times = read.table(file.path(in_dir, 'times.tsv'), header=T)
759 print(times)
760
761 times %>% filter(status != 0) -> failed
762 if (nrow(failed) != 0) {
763 print(failed)
764 stop('Some compute tasks failed')
765 }
766
767 #
768 # Check correctness
769 #
770
771 times %>% filter(task_name == 'hello') %>% unique_stdout_md5sum(1)
772 times %>% filter(task_name == 'fib') %>% unique_stdout_md5sum(1)
773 times %>% filter(task_name == 'word_freq') %>% unique_stdout_md5sum(1)
774 # 3 different inputs
775 times %>% filter(task_name == 'parse_help') %>% unique_stdout_md5sum(3)
776
777 times %>% filter(task_name == 'bubble_sort') %>% unique_stdout_md5sum(2)
778
779 # TODO:
780 # - oils_cpp doesn't implement unicode LANG=C
781 # - bash behaves differently on your desktop vs. in the container
782 # - might need layer-locales in the image?
783
784 #times %>% filter(task_name == 'palindrome' & arg1 == 'unicode') %>% unique_stdout_md5sum(1)
785 # Ditto here
786 #times %>% filter(task_name == 'palindrome' & arg1 == 'bytes') %>% unique_stdout_md5sum(1)
787
788 #
789 # Find distinct shells and hosts, and label them for readability.
790 #
791
792 # Runtimes are called shells, as a hack for code reuse
793 times %>%
794 mutate(shell_name = runtime_name, shell_hash = runtime_hash) %>%
795 select(c(host_name, host_hash, shell_name, shell_hash)) ->
796 tmp
797
798 distinct_hosts = DistinctHosts(tmp)
799 Log('')
800 Log('Distinct hosts')
801 print(distinct_hosts)
802
803 distinct_shells = DistinctShells(tmp)
804 Log('')
805 Log('Distinct runtimes')
806 print(distinct_shells)
807
808 num_hosts = nrow(distinct_hosts)
809
810 times %>%
811 select(-c(status, stdout_md5sum, stdout_filename, host_hash, runtime_hash)) %>%
812 mutate(runtime_label = ShellLabels(runtime_name, runtime_hash, num_hosts),
813 elapsed_ms = elapsed_secs * 1000,
814 user_ms = user_secs * 1000,
815 sys_ms = sys_secs * 1000,
816 max_rss_MB = max_rss_KiB * 1024 / 1e6) %>%
817 select(-c(runtime_name, elapsed_secs, user_secs, sys_secs, max_rss_KiB)) %>%
818 arrange(host_name, task_name, arg1, arg2, user_ms) ->
819 details
820
821 times %>%
822 mutate(
823 runtime_label = ShellLabels(runtime_name, runtime_hash, num_hosts),
824 stdout_md5sum_HREF = file.path('tmp', task_name, stdout_filename)) %>%
825 select(c(host_name, task_name, arg1, arg2, runtime_label,
826 stdout_md5sum, stdout_md5sum_HREF)) ->
827 stdout_files
828
829 details %>% filter(task_name == 'hello') %>% select(-c(task_name)) -> hello
830 details %>% filter(task_name == 'fib') %>% select(-c(task_name)) -> fib
831 details %>% filter(task_name == 'word_freq') %>% select(-c(task_name)) -> word_freq
832 # There's no arg2
833 details %>% filter(task_name == 'parse_help') %>% select(-c(task_name, arg2)) -> parse_help
834
835 details %>% filter(task_name == 'bubble_sort') %>% select(-c(task_name)) -> bubble_sort
836 details %>% filter(task_name == 'palindrome' & arg1 == 'unicode') %>% select(-c(task_name)) -> palindrome
837
838 precision = ColumnPrecision(list(max_rss_MB = 1), default = 0)
839 writeTsv(details, file.path(out_dir, 'details'), precision)
840
841 writeTsv(stdout_files, file.path(out_dir, 'stdout_files'), precision)
842
843 writeTsv(hello, file.path(out_dir, 'hello'), precision)
844 writeTsv(fib, file.path(out_dir, 'fib'), precision)
845 writeTsv(word_freq, file.path(out_dir, 'word_freq'), precision)
846 writeTsv(parse_help, file.path(out_dir, 'parse_help'), precision)
847
848 writeTsv(bubble_sort, file.path(out_dir, 'bubble_sort'), precision)
849 writeTsv(palindrome, file.path(out_dir, 'palindrome'), precision)
850
851 WriteProvenance(distinct_hosts, distinct_shells, out_dir, tsv = T)
852}
853
854WriteOneTask = function(times, out_dir, task_name, precision) {
855 times %>%
856 filter(task == task_name) %>%
857 select(-c(task)) -> subset
858
859 writeTsv(subset, file.path(out_dir, task_name), precision)
860}
861
862SHELL_ORDER = c('dash',
863 'bash',
864 'zsh',
865 '_bin/cxx-opt+bumpleak/osh',
866 '_bin/cxx-opt+bumproot/osh',
867 '_bin/cxx-opt+bumpsmall/osh',
868 '_bin/cxx-opt/osh',
869 '_bin/cxx-opt+nopool/osh')
870
871GcReport = function(in_dir, out_dir) {
872 times = read.table(file.path(in_dir, 'raw/times.tsv'), header=T)
873 gc_stats = read.table(file.path(in_dir, 'stage1/gc_stats.tsv'), header=T)
874
875 times %>% filter(status != 0) -> failed
876 if (nrow(failed) != 0) {
877 print(failed)
878 stop('Some gc tasks failed')
879 }
880
881 # Change units and order columns
882 times %>%
883 arrange(task, factor(sh_path, levels = SHELL_ORDER)) %>%
884 mutate(elapsed_ms = elapsed_secs * 1000,
885 user_ms = user_secs * 1000,
886 sys_ms = sys_secs * 1000,
887 max_rss_MB = max_rss_KiB * 1024 / 1e6,
888 shell_label = ShellLabelFromPath(sh_path)
889 ) %>%
890 select(c(join_id, task, elapsed_ms, user_ms, sys_ms, max_rss_MB, shell_label,
891 shell_runtime_opts)) ->
892 times
893
894 # Join and order columns
895 gc_stats %>% left_join(times, by = c('join_id')) %>%
896 arrange(desc(task)) %>%
897 mutate(allocated_MB = bytes_allocated / 1e6) %>%
898 # try to make the table skinnier
899 rename(num_gc_done = num_collections) %>%
900 select(task, elapsed_ms, max_gc_millis, total_gc_millis,
901 allocated_MB, max_rss_MB, num_allocated,
902 num_gc_points, num_gc_done, gc_threshold, num_growths, max_survived,
903 shell_label) ->
904 gc_stats
905
906 times %>% select(-c(join_id)) -> times
907
908
909 precision = ColumnPrecision(list(max_rss_MB = 1, allocated_MB = 1),
910 default = 0)
911
912 writeTsv(times, file.path(out_dir, 'times'), precision)
913 writeTsv(gc_stats, file.path(out_dir, 'gc_stats'), precision)
914
915 tasks = c('parse.configure-coreutils',
916 'parse.configure-cpython',
917 'parse.abuild',
918 'ex.compute-fib',
919 'ex.bashcomp-parse-help',
920 'ex.abuild-print-help')
921 # Write out separate rows
922 for (task in tasks) {
923 WriteOneTask(times, out_dir, task, precision)
924 }
925}
926
927GcCachegrindReport = function(in_dir, out_dir) {
928 times = readTsv(file.path(in_dir, 'raw/times.tsv'))
929 counts = readTsv(file.path(in_dir, 'stage1/cachegrind.tsv'))
930
931 times %>% filter(status != 0) -> failed
932 if (nrow(failed) != 0) {
933 print(failed)
934 stop('Some gc tasks failed')
935 }
936
937 print(times)
938 print(counts)
939
940 counts %>% left_join(times, by = c('join_id')) %>%
941 mutate(million_irefs = irefs / 1e6) %>%
942 select(c(million_irefs, task, sh_path, shell_runtime_opts)) %>%
943 arrange(factor(sh_path, levels = SHELL_ORDER)) ->
944 counts
945
946 precision = NULL
947 tasks = c('parse.abuild', 'ex.compute-fib')
948 for (task in tasks) {
949 WriteOneTask(counts, out_dir, task, precision)
950 }
951}
952
953MyCppReport = function(in_dir, out_dir) {
954 times = readTsv(file.path(in_dir, 'benchmark-table.tsv'))
955 print(times)
956
957 times %>% filter(status != 0) -> failed
958 if (nrow(failed) != 0) {
959 print(failed)
960 stop('Some mycpp tasks failed')
961 }
962
963 # Don't care about elapsed and system
964 times %>% select(-c(status, elapsed_secs, bin, task_out)) %>%
965 mutate(example_name_HREF = mycppUrl(example_name),
966 gen = c('gen'),
967 gen_HREF = genUrl(example_name),
968 user_ms = user_secs * 1000,
969 sys_ms = sys_secs * 1000,
970 max_rss_MB = max_rss_KiB * 1024 / 1e6) %>%
971 select(-c(user_secs, sys_secs, max_rss_KiB)) ->
972 details
973
974 details %>% select(-c(sys_ms, max_rss_MB)) %>%
975 spread(key = impl, value = user_ms) %>%
976 mutate(`C++ : Python` = `C++` / Python) %>%
977 arrange(`C++ : Python`) ->
978 user_time
979
980 details %>% select(-c(user_ms, max_rss_MB)) %>%
981 spread(key = impl, value = sys_ms) %>%
982 mutate(`C++ : Python` = `C++` / Python) %>%
983 arrange(`C++ : Python`) ->
984 sys_time
985
986 details %>% select(-c(user_ms, sys_ms)) %>%
987 spread(key = impl, value = max_rss_MB) %>%
988 mutate(`C++ : Python` = `C++` / Python) %>%
989 arrange(`C++ : Python`) ->
990 max_rss
991
992 # Sometimes it speeds up by more than 10x
993 precision1 = ColumnPrecision(list(`C++ : Python` = 3), default = 0)
994 writeTsv(user_time, file.path(out_dir, 'user_time'), precision1)
995 writeTsv(sys_time, file.path(out_dir, 'sys_time'), precision1)
996
997 precision2 = ColumnPrecision(list(`C++ : Python` = 2), default = 1)
998 writeTsv(max_rss, file.path(out_dir, 'max_rss'), precision2)
999
1000 writeTsv(details, file.path(out_dir, 'details'))
1001}
1002
1003UftraceTaskReport = function(env, task_name, summaries) {
1004 # Need this again after redirect
1005 MaybeDisableColor(stdout())
1006
1007 task_env = env[[task_name]]
1008
1009 untyped = task_env$untyped
1010 typed = task_env$typed
1011 strings = task_env$strings
1012 slabs = task_env$slabs
1013 reserve = task_env$reserve
1014
1015 string_overhead = 17 # GC header (8) + len (4) + hash value (4) + NUL (1)
1016 strings %>% mutate(obj_len = str_len + string_overhead) -> strings
1017
1018 # TODO: Output these totals PER WORKLOAD, e.g. parsing big/small, executing
1019 # big/small
1020 #
1021 # And then zoom in on distributions as well
1022
1023 num_allocs = nrow(untyped)
1024 total_bytes = sum(untyped$obj_len)
1025
1026 untyped %>% group_by(obj_len) %>% count() %>% ungroup() -> untyped_hist
1027 #print(untyped_hist)
1028
1029 untyped_hist %>%
1030 mutate(n_less_than = cumsum(n),
1031 percent = n_less_than * 100.0 / num_allocs) ->
1032 alloc_sizes
1033
1034 a24 = untyped_hist %>% filter(obj_len <= 24)
1035 a48 = untyped_hist %>% filter(obj_len <= 48)
1036 a96 = untyped_hist %>% filter(obj_len <= 96)
1037
1038 allocs_24_bytes_or_less = sum(a24$n) * 100.0 / num_allocs
1039 allocs_48_bytes_or_less = sum(a48$n) * 100.0 / num_allocs
1040 allocs_96_bytes_or_less = sum(a96$n) * 100.0 / num_allocs
1041
1042 Log('Percentage of allocs less than 48 bytes: %.1f', allocs_48_bytes_or_less)
1043
1044 options(tibble.print_min=25)
1045
1046 Log('')
1047 Log('All allocations')
1048 print(alloc_sizes %>% head(22))
1049 print(alloc_sizes %>% tail(5))
1050
1051 Log('')
1052 Log('Common Sizes')
1053 print(untyped_hist %>% arrange(desc(n)) %>% head(8))
1054
1055 Log('')
1056 Log(' %s total allocations, total bytes = %s', commas(num_allocs), commas(total_bytes))
1057 Log('')
1058
1059 Log('Typed allocations')
1060
1061 num_typed = nrow(typed)
1062
1063 typed %>% group_by(func_name) %>% count() %>% ungroup() %>%
1064 mutate(percent = n * 100.0 / num_typed) %>%
1065 arrange(desc(n)) -> most_common_types
1066
1067 print(most_common_types %>% head(20))
1068 print(most_common_types %>% tail(5))
1069
1070 lists = typed %>% filter(str_starts(func_name, ('List<')))
1071 #print(lists)
1072
1073 num_lists = nrow(lists)
1074 total_list_bytes = num_lists * 24 # sizeof List<T> head is hard-coded
1075
1076 Log('')
1077 Log('%s typed allocs, including %s List<T>', commas(num_typed), commas(num_lists))
1078 Log('%.2f%% of allocs are typed', num_typed * 100 / num_allocs)
1079 Log('')
1080
1081 #
1082 # Strings
1083 #
1084
1085 num_strings = nrow(strings)
1086 total_string_bytes = sum(strings$obj_len)
1087
1088 strings %>% group_by(str_len) %>% count() %>% ungroup() %>%
1089 mutate(n_less_than = cumsum(n),
1090 percent = n_less_than * 100.0 / num_strings) ->
1091 string_lengths
1092
1093 strs_6_bytes_or_less = string_lengths %>% filter(str_len == 6) %>% select(percent)
1094 strs_14_bytes_or_less = string_lengths %>% filter(str_len == 14) %>% select(percent)
1095
1096 # Parse workload
1097 # 62% of strings <= 6 bytes
1098 # 84% of strings <= 14 bytes
1099
1100 Log('Str - NewStr() and OverAllocatedStr()')
1101 print(string_lengths %>% head(16))
1102 print(string_lengths %>% tail(5))
1103 Log('')
1104
1105 Log('%s string allocations, total length = %s, total bytes = %s', commas(num_strings),
1106 commas(sum(strings$str_len)), commas(total_string_bytes))
1107 Log('')
1108 Log('%.2f%% of allocs are strings', num_strings * 100 / num_allocs)
1109 Log('%.2f%% of bytes are strings', total_string_bytes * 100 / total_bytes)
1110 Log('')
1111
1112 #
1113 # Slabs
1114 #
1115
1116 Log('NewSlab()')
1117
1118 num_slabs = nrow(slabs)
1119 slabs %>% group_by(slab_len) %>% count() %>% ungroup() %>%
1120 mutate(n_less_than = cumsum(n),
1121 percent = n_less_than * 100.0 / num_slabs) ->
1122 slab_lengths
1123
1124 slabs %>% group_by(func_name) %>% count() %>% ungroup() %>%
1125 arrange(desc(n)) -> slab_types
1126
1127 Log(' Lengths')
1128 print(slab_lengths %>% head())
1129 print(slab_lengths %>% tail(5))
1130 Log('')
1131
1132 Log(' Slab Types')
1133 print(slab_types %>% head())
1134 print(slab_types %>% tail(5))
1135 Log('')
1136
1137 total_slab_items = sum(slabs$slab_len)
1138
1139 Log('%s slabs, total items = %s', commas(num_slabs),
1140 commas(sum(slabs$slab_len)))
1141 Log('%.2f%% of allocs are slabs', num_slabs * 100 / num_allocs)
1142 Log('')
1143
1144 #
1145 # reserve() calls
1146 #
1147
1148 # There should be strictly more List::reserve() calls than NewSlab
1149
1150 Log('::reserve(int n)')
1151 Log('')
1152
1153 num_reserve = nrow(reserve)
1154 reserve %>% group_by(num_items) %>% count() %>% ungroup() %>%
1155 mutate(n_less_than = cumsum(n),
1156 percent = n_less_than * 100.0 / num_reserve) ->
1157 reserve_args
1158
1159 Log(' Num Items')
1160 print(reserve_args %>% head(15))
1161 print(reserve_args %>% tail(5))
1162 Log('')
1163
1164 Log('%s reserve() calls, total items = %s', commas(num_reserve),
1165 commas(sum(reserve$num_items)))
1166 Log('')
1167
1168 # Accounting for all allocations!
1169 Log('Untyped: %s', commas(num_allocs))
1170 Log('Typed + Str + Slab: %s', commas(num_typed + num_strings + num_slabs))
1171 Log('')
1172
1173 num_other_typed = num_typed - num_lists
1174
1175 # Summary table
1176 stats = tibble(task = task_name,
1177 total_bytes_ = commas(total_bytes),
1178 num_allocs_ = commas(num_allocs),
1179 sum_typed_strs_slabs = commas(num_typed + num_strings + num_slabs),
1180 num_reserve_calls = commas(num_reserve),
1181
1182 percent_list_allocs = Percent(num_lists, num_allocs),
1183 percent_slab_allocs = Percent(num_slabs, num_allocs),
1184 percent_string_allocs = Percent(num_strings, num_allocs),
1185 percent_other_typed_allocs = Percent(num_other_typed, num_allocs),
1186
1187 percent_list_bytes = Percent(total_list_bytes, total_bytes),
1188 percent_string_bytes = Percent(total_string_bytes, total_bytes),
1189
1190 allocs_24_bytes_or_less = sprintf('%.1f%%', allocs_24_bytes_or_less),
1191 allocs_48_bytes_or_less = sprintf('%.1f%%', allocs_48_bytes_or_less),
1192 allocs_96_bytes_or_less = sprintf('%.1f%%', allocs_96_bytes_or_less),
1193
1194 strs_6_bytes_or_less = sprintf('%.1f%%', strs_6_bytes_or_less),
1195 strs_14_bytes_or_less = sprintf('%.1f%%', strs_14_bytes_or_less),
1196 )
1197 summaries$stats[[task_name]] = stats
1198
1199 summaries$most_common_types[[task_name]] = most_common_types
1200}
1201
1202LoadUftraceTsv = function(in_dir, env) {
1203 for (task in list.files(in_dir)) {
1204 Log('Loading data for task %s', task)
1205 base_dir = file.path(in_dir, task)
1206
1207 task_env = new.env()
1208 env[[task]] = task_env
1209
1210 # TSV file, not CSV
1211 task_env$untyped = readTsv(file.path(base_dir, 'all-untyped.tsv'))
1212 task_env$typed = readTsv(file.path(base_dir, 'typed.tsv'))
1213 task_env$strings = readTsv(file.path(base_dir, 'strings.tsv'))
1214 task_env$slabs = readTsv(file.path(base_dir, 'slabs.tsv'))
1215 task_env$reserve = readTsv(file.path(base_dir, 'reserve.tsv'))
1216
1217 # median string length is 4, mean is 9.5!
1218 Log('UNTYPED')
1219 print(summary(task_env$untyped))
1220 Log('')
1221
1222 Log('TYPED')
1223 print(summary(task_env$typed))
1224 Log('')
1225
1226 Log('STRINGS')
1227 print(summary(task_env$strings))
1228 Log('')
1229
1230 Log('SLABS')
1231 print(summary(task_env$slabs))
1232 Log('')
1233
1234 Log('RESERVE')
1235 print(summary(task_env$reserve))
1236 Log('')
1237 }
1238}
1239
1240Percent = function(n, total) {
1241 sprintf('%.1f%%', n * 100.0 / total)
1242}
1243
1244PrettyPrintLong = function(d) {
1245 tr = t(d) # transpose
1246
1247 row_names = rownames(tr)
1248
1249 for (i in 1:nrow(tr)) {
1250 row_name = row_names[i]
1251 cat(sprintf('%26s', row_name)) # calculated min width manually
1252 cat(sprintf('%20s', tr[i,]))
1253 cat('\n')
1254
1255 # Extra spacing
1256 if (row_name %in% c('num_reserve_calls',
1257 'percent_string_bytes',
1258 'percent_other_typed_allocs',
1259 'allocs_96_bytes_or_less')) {
1260 cat('\n')
1261 }
1262 }
1263}
1264
1265
1266UftraceReport = function(env, out_dir) {
1267 # summaries$stats should be a list of 1-row data frames
1268 # summaries$top_types should be a list of types
1269 summaries = new.env()
1270
1271 for (task_name in names(env)) {
1272 report_out = file.path(out_dir, paste0(task_name, '.txt'))
1273
1274 Log('Making report for task %s -> %s', task_name, report_out)
1275
1276 sink(file = report_out)
1277 UftraceTaskReport(env, task_name, summaries)
1278 sink() # reset
1279 }
1280 Log('')
1281
1282 # Concate all the data frames added to summary
1283 stats = bind_rows(as.list(summaries$stats))
1284
1285 sink(file = file.path(out_dir, 'summary.txt'))
1286 #print(stats)
1287 #Log('')
1288
1289 PrettyPrintLong(stats)
1290 Log('')
1291
1292 mct = summaries$most_common_types
1293 for (task_name in names(mct)) {
1294 Log('Common types in workload %s', task_name)
1295 Log('')
1296
1297 print(mct[[task_name]] %>% head(5))
1298 Log('')
1299 }
1300 sink()
1301
1302 # For the REPL
1303 return(list(stats = stats))
1304}
1305
1306main = function(argv) {
1307 action = argv[[1]]
1308 in_dir = argv[[2]]
1309 out_dir = argv[[3]]
1310
1311 if (action == 'osh-parser') {
1312 ParserReport(in_dir, out_dir)
1313
1314 } else if (action == 'osh-runtime') {
1315 RuntimeReport(in_dir, out_dir)
1316
1317 } else if (action == 'vm-baseline') {
1318 VmBaselineReport(in_dir, out_dir)
1319
1320 } else if (action == 'ovm-build') {
1321 OvmBuildReport(in_dir, out_dir)
1322
1323 } else if (action == 'compute') {
1324 ComputeReport(in_dir, out_dir)
1325
1326 } else if (action == 'gc') {
1327 GcReport(in_dir, out_dir)
1328
1329 } else if (action == 'gc-cachegrind') {
1330 GcCachegrindReport(in_dir, out_dir)
1331
1332 } else if (action == 'mycpp') {
1333 MyCppReport(in_dir, out_dir)
1334
1335 } else if (action == 'uftrace') {
1336 d = new.env()
1337 LoadUftraceTsv(in_dir, d)
1338 UftraceReport(d, out_dir)
1339
1340 } else {
1341 Log("Invalid action '%s'", action)
1342 quit(status = 1)
1343 }
1344 Log('PID %d done', Sys.getpid())
1345}
1346
1347if (length(sys.frames()) == 0) {
1348 # increase ggplot font size globally
1349 #theme_set(theme_grey(base_size = 20))
1350
1351 main(commandArgs(TRUE))
1352}