OILS / benchmarks / report.R View on Github | oils.pub

1366 lines, 943 significant
1#!/usr/bin/env Rscript
2#
3# benchmarks/report.R -- Analyze data collected by shell scripts.
4#
5# Usage:
6# benchmarks/report.R OUT_DIR [TIMES_CSV...]
7
8# Suppress warnings about functions masked from 'package:stats' and 'package:base'
9# filter, lag
10# intersect, setdiff, setequal, union
11library(dplyr, warn.conflicts = FALSE)
12library(tidyr) # spread()
13library(stringr)
14
15source('benchmarks/common.R')
16
17options(stringsAsFactors = F)
18
19# For pretty printing
20commas = function(x) {
21 format(x, big.mark=',')
22}
23
24sourceUrl = function(path) {
25 sprintf('https://github.com/oilshell/oil/blob/master/%s', path)
26}
27
28# Takes a filename, not a path.
29sourceUrl2 = function(filename) {
30 sprintf(
31 'https://github.com/oilshell/oil/blob/master/benchmarks/testdata/%s',
32 filename)
33}
34
35mycppUrl = function(name) {
36 sprintf('https://github.com/oilshell/oil/blob/master/mycpp/examples/%s.py', name)
37}
38
39genUrl = function(name) {
40 sprintf('../../_gen/mycpp/examples/%s.mycpp.cc', name)
41}
42
43
44# TODO: Set up cgit because Github links are slow.
45benchmarkDataLink = function(subdir, name, suffix) {
46 #sprintf('../../../../benchmark-data/shell-id/%s', shell_id)
47 sprintf('https://github.com/oilshell/benchmark-data/blob/master/%s/%s%s',
48 subdir, name, suffix)
49}
50
51provenanceLink = function(subdir, name, suffix) {
52 sprintf('../%s/%s%s', subdir, name, suffix)
53}
54
55
56GetOshLabel = function(shell_hash, prov_dir) {
57 ### Given a string, return another string.
58
59 path = sprintf('%s/shell-id/osh-%s/sh-path.txt', prov_dir, shell_hash)
60
61 if (file.exists(path)) {
62 Log('Reading %s', path)
63 lines = readLines(path)
64 if (length(grep('_bin/osh', lines)) > 0) {
65 label = 'osh-ovm'
66 } else if (length(grep('bin/osh', lines)) > 0) {
67 label = 'osh-cpython'
68 } else if (length(grep('_bin/.*/mycpp-souffle/osh', lines)) > 0) {
69 label = 'osh-native-souffle'
70 } else if (length(grep('_bin/.*/osh', lines)) > 0) {
71 label = 'osh-native'
72 } else {
73 stop("Expected _bin/osh, bin/osh, or _bin/.*/osh")
74 }
75 } else {
76 stop(sprintf("%s doesn't exist", path))
77 }
78 return(label)
79}
80
81opt_suffix1 = '_bin/cxx-opt/osh'
82opt_suffix2 = '_bin/cxx-opt-sh/osh'
83opt_suffix3 = '_bin/cxx-opt/mycpp-souffle/osh'
84opt_suffix4 = '_bin/cxx-opt-sh/mycpp-souffle/osh'
85
86ShellLabels = function(shell_name, shell_hash, num_hosts) {
87 ### Given 2 vectors, return a vector of readable labels.
88
89 # TODO: Clean up callers. Some metrics all this function with a
90 # shell/runtime BASENAME, and others a PATH
91 # - e.g. ComputeReport calls this with runtime_name which is actually a PATH
92
93 #Log('name %s', shell_name)
94 #Log('hash %s', shell_hash)
95
96 if (num_hosts == 1) {
97 prov_dir = '_tmp'
98 } else {
99 prov_dir = '../benchmark-data/'
100 }
101
102 labels = c()
103 for (i in 1:length(shell_name)) {
104 sh = shell_name[i]
105 if (sh == 'osh') {
106 label = GetOshLabel(shell_hash[i], prov_dir)
107
108 } else if (endsWith(sh, opt_suffix1) || endsWith(sh, opt_suffix2)) {
109 label = 'opt/osh'
110
111 } else if (endsWith(sh, opt_suffix3) || endsWith(sh, opt_suffix4)) {
112 label = 'opt/osh-souffle'
113
114 } else if (endsWith(sh, '_bin/cxx-opt+bumpleak/osh')) {
115 label = 'bumpleak/osh'
116
117 } else {
118 label = sh
119 }
120
121 Log('[%s] [%s]', shell_name[i], label)
122 labels = c(labels, label)
123 }
124
125 return(labels)
126}
127
128# Simple version of the above, used by benchmarks/gc
129ShellLabelFromPath = function(sh_path) {
130 labels = c()
131 for (i in 1:length(sh_path)) {
132 sh = sh_path[i]
133
134 if (endsWith(sh, opt_suffix1) || endsWith(sh, opt_suffix2)) {
135 # the opt binary is osh-native
136 label = 'osh-native'
137
138 } else if (endsWith(sh, opt_suffix3) || endsWith(sh, opt_suffix4)) {
139 # the opt binary is osh-native
140 label = 'osh-native-souffle'
141
142 } else if (endsWith(sh, '_bin/cxx-opt+bumpleak/osh')) {
143 label = 'bumpleak/osh'
144
145 } else if (endsWith(sh, '_bin/osh')) { # the app bundle
146 label = 'osh-ovm'
147
148 } else if (endsWith(sh, 'bin/osh')) {
149 label = 'osh-cpython'
150
151 } else {
152 label = sh
153 }
154 labels = c(labels, label)
155 }
156 return(labels)
157}
158
159DistinctHosts = function(t) {
160 t %>% distinct(host_name, host_hash) -> distinct_hosts
161 # The label is just the name
162 distinct_hosts$host_label = distinct_hosts$host_name
163 return(distinct_hosts)
164}
165
166DistinctShells = function(t, num_hosts = -1) {
167 t %>% distinct(shell_name, shell_hash) -> distinct_shells
168
169 Log('')
170 Log('Labeling shells')
171
172 # Calculate it if not passed
173 if (num_hosts == -1) {
174 num_hosts = nrow(DistinctHosts(t))
175 }
176
177 distinct_shells$shell_label = ShellLabels(distinct_shells$shell_name,
178 distinct_shells$shell_hash,
179 num_hosts)
180 return(distinct_shells)
181}
182
183ParserReport = function(in_dir, out_dir) {
184 times = read.csv(file.path(in_dir, 'times.csv'))
185 lines = read.csv(file.path(in_dir, 'lines.csv'))
186 raw_data = read.csv(file.path(in_dir, 'raw-data.csv'))
187
188 cachegrind = readTsv(file.path(in_dir, 'cachegrind.tsv'))
189
190 # For joining by filename
191 lines_by_filename = tibble(
192 num_lines = lines$num_lines,
193 filename = basename(lines$path)
194 )
195
196 # Remove failures
197 times %>% filter(status == 0) %>% select(-c(status)) -> times
198 cachegrind %>% filter(status == 0) %>% select(-c(status)) -> cachegrind
199
200 # Add the number of lines, joining on path, and compute lines/ms
201 times %>%
202 left_join(lines, by = c('path')) %>%
203 mutate(filename = basename(path), filename_HREF = sourceUrl(path),
204 max_rss_MB = max_rss_KiB * 1024 / 1e6,
205 elapsed_ms = elapsed_secs * 1000,
206 user_ms = user_secs * 1000,
207 sys_ms = sys_secs * 1000,
208 lines_per_ms = num_lines / elapsed_ms) %>%
209 select(-c(path, max_rss_KiB, elapsed_secs, user_secs, sys_secs)) ->
210 joined_times
211
212 #print(head(times))
213 #print(head(lines))
214 #print(head(vm))
215 #print(head(joined_times))
216
217 print(summary(joined_times))
218
219 #
220 # Find distinct shells and hosts, and label them for readability.
221 #
222
223 distinct_hosts = DistinctHosts(joined_times)
224 Log('')
225 Log('Distinct hosts')
226 print(distinct_hosts)
227
228 distinct_shells = DistinctShells(joined_times)
229 Log('')
230 Log('Distinct shells')
231 print(distinct_shells)
232
233 # Replace name/hash combinations with labels.
234 joined_times %>%
235 left_join(distinct_hosts, by = c('host_name', 'host_hash')) %>%
236 left_join(distinct_shells, by = c('shell_name', 'shell_hash')) %>%
237 select(-c(host_name, host_hash, shell_name, shell_hash)) ->
238 joined_times
239
240 # Like 'times', but do shell_label as one step
241 # Hack: we know benchmarks/auto.sh runs this on one machine
242 distinct_shells_2 = DistinctShells(cachegrind, num_hosts = nrow(distinct_hosts))
243 cachegrind %>%
244 left_join(lines, by = c('path')) %>%
245 select(-c(elapsed_secs, user_secs, sys_secs, max_rss_KiB)) %>%
246 left_join(distinct_shells_2, by = c('shell_name', 'shell_hash')) %>%
247 select(-c(shell_name, shell_hash)) %>%
248 mutate(filename = basename(path), filename_HREF = sourceUrl(path)) %>%
249 select(-c(path)) ->
250 joined_cachegrind
251
252 Log('summary(joined_times):')
253 print(summary(joined_times))
254 Log('head(joined_times):')
255 print(head(joined_times))
256
257 # Summarize rates by platform/shell
258 joined_times %>%
259 mutate(host_label = paste("host", host_label)) %>%
260 group_by(host_label, shell_label) %>%
261 summarize(total_lines = sum(num_lines), total_ms = sum(elapsed_ms)) %>%
262 mutate(lines_per_ms = total_lines / total_ms) %>%
263 select(-c(total_ms)) %>%
264 spread(key = host_label, value = lines_per_ms) ->
265 times_summary
266
267 # Sort by parsing rate on machine 1
268 if ("host hoover" %in% colnames(times_summary)) {
269 times_summary %>% arrange(desc(`host hoover`)) -> times_summary
270 } else {
271 times_summary %>% arrange(desc(`host no-host`)) -> times_summary
272 }
273
274 Log('times_summary:')
275 print(times_summary)
276
277 # Summarize cachegrind by platform/shell
278 # Bug fix: as.numeric(irefs) avoids 32-bit integer overflow!
279 joined_cachegrind %>%
280 group_by(shell_label) %>%
281 summarize(total_lines = sum(num_lines), total_irefs = sum(as.numeric(irefs))) %>%
282 mutate(thousand_irefs_per_line = total_irefs / total_lines / 1000) %>%
283 select(-c(total_irefs)) ->
284 cachegrind_summary
285
286 if ("no-host" %in% distinct_hosts$host_label) {
287
288 # We don't have all the shells
289 elapsed = NULL
290 rate = NULL
291 max_rss = NULL
292 instructions = NULL
293
294 joined_times %>%
295 select(c(shell_label, elapsed_ms, user_ms, sys_ms, max_rss_MB,
296 num_lines, filename, filename_HREF)) %>%
297 arrange(filename, elapsed_ms) ->
298 times_flat
299
300 joined_cachegrind %>%
301 select(c(shell_label, irefs, num_lines, filename, filename_HREF)) %>%
302 arrange(filename, irefs) ->
303 cachegrind_flat
304
305 } else {
306
307 times_flat = NULL
308 cachegrind_flat = NULL
309
310 # Hack for release. TODO: unify with Soil
311 if (Sys.getenv("OILS_NO_SOUFFLE") == "") {
312 souffle_col = c('osh-native-souffle')
313 } else {
314 souffle_col = c()
315 }
316
317 cols1 = c('host_label', 'bash', 'dash', 'mksh', 'zsh',
318 'osh-ovm', 'osh-cpython', 'osh-native', souffle_col,
319 'osh_to_bash_ratio', 'num_lines', 'filename', 'filename_HREF')
320
321 # Elapsed seconds for each shell by platform and file
322 joined_times %>%
323 select(-c(lines_per_ms, user_ms, sys_ms, max_rss_MB)) %>%
324 spread(key = shell_label, value = elapsed_ms) %>%
325 arrange(host_label, num_lines) %>%
326 mutate(osh_to_bash_ratio = `osh-native` / bash) %>%
327 select(all_of(cols1)) ->
328 elapsed
329
330 Log('\n')
331 Log('ELAPSED')
332 print(elapsed)
333
334 cols2 = c('host_label', 'bash', 'dash', 'mksh', 'zsh',
335 'osh-ovm', 'osh-cpython', 'osh-native', souffle_col,
336 'num_lines', 'filename', 'filename_HREF')
337 # Rates by file and shell
338 joined_times %>%
339 select(-c(elapsed_ms, user_ms, sys_ms, max_rss_MB)) %>%
340 spread(key = shell_label, value = lines_per_ms) %>%
341 arrange(host_label, num_lines) %>%
342 select(all_of(cols2)) ->
343 rate
344
345 Log('\n')
346 Log('RATE')
347 print(rate)
348
349 # Memory usage by file
350 joined_times %>%
351 select(-c(elapsed_ms, lines_per_ms, user_ms, sys_ms)) %>%
352 spread(key = shell_label, value = max_rss_MB) %>%
353 arrange(host_label, num_lines) %>%
354 select(all_of(cols2)) ->
355 max_rss
356
357 Log('\n')
358 Log('MAX RSS')
359 print(max_rss)
360
361 Log('\n')
362 Log('joined_cachegrind has %d rows', nrow(joined_cachegrind))
363 print(joined_cachegrind)
364 #print(joined_cachegrind %>% filter(path == 'benchmarks/testdata/configure-helper.sh'))
365
366 cols3 = c('bash', 'dash', 'mksh', 'osh-native', souffle_col,
367 'num_lines', 'filename', 'filename_HREF')
368
369 # Cachegrind instructions by file
370 joined_cachegrind %>%
371 mutate(thousand_irefs_per_line = irefs / num_lines / 1000) %>%
372 select(-c(irefs)) %>%
373 spread(key = shell_label, value = thousand_irefs_per_line) %>%
374 arrange(num_lines) %>%
375 select(all_of(cols3)) ->
376 instructions
377
378 Log('\n')
379 Log('instructions has %d rows', nrow(instructions))
380 print(instructions)
381 }
382
383 WriteProvenance(distinct_hosts, distinct_shells, out_dir)
384
385 raw_data_table = tibble(
386 filename = basename(as.character(raw_data$path)),
387 filename_HREF = benchmarkDataLink('osh-parser', filename, '')
388 )
389 #print(raw_data_table)
390
391 writeCsv(raw_data_table, file.path(out_dir, 'raw-data'))
392
393 precision = SamePrecision(0) # lines per ms
394 writeCsv(times_summary, file.path(out_dir, 'summary'), precision)
395
396 precision = ColumnPrecision(list(), default = 1)
397 writeTsv(cachegrind_summary, file.path(out_dir, 'cachegrind_summary'), precision)
398
399 if (!is.null(times_flat)) {
400 precision = SamePrecision(0)
401 writeTsv(times_flat, file.path(out_dir, 'times_flat'), precision)
402 }
403
404 if (!is.null(cachegrind_flat)) {
405 precision = SamePrecision(0)
406 writeTsv(cachegrind_flat, file.path(out_dir, 'cachegrind_flat'), precision)
407 }
408
409 if (!is.null(elapsed)) { # equivalent to no-host
410 # Round to nearest millisecond, but the ratio has a decimal point.
411 precision = ColumnPrecision(list(osh_to_bash_ratio = 1), default = 0)
412 writeCsv(elapsed, file.path(out_dir, 'elapsed'), precision)
413
414 precision = SamePrecision(0)
415 writeCsv(rate, file.path(out_dir, 'rate'), precision)
416
417 writeCsv(max_rss, file.path(out_dir, 'max_rss'))
418
419 precision = SamePrecision(1)
420 writeTsv(instructions, file.path(out_dir, 'instructions'), precision)
421 }
422
423 Log('Wrote %s', out_dir)
424}
425
426WriteProvenance = function(distinct_hosts, distinct_shells, out_dir, tsv = F) {
427
428 num_hosts = nrow(distinct_hosts)
429 if (num_hosts == 1) {
430 linkify = provenanceLink
431 } else {
432 linkify = benchmarkDataLink
433 }
434
435 Log('distinct_hosts')
436 print(distinct_hosts)
437 Log('')
438
439 Log('distinct_shells')
440 print(distinct_shells)
441 Log('')
442
443 # Should be:
444 # host_id_url
445 # And then csv_to_html will be smart enough? It should take --url flag?
446 host_table = tibble(
447 host_label = distinct_hosts$host_label,
448 host_id = paste(distinct_hosts$host_name,
449 distinct_hosts$host_hash, sep='-'),
450 host_id_HREF = linkify('host-id', host_id, '/')
451 )
452 Log('host_table')
453 print(host_table)
454 Log('')
455
456 shell_table = tibble(
457 shell_label = distinct_shells$shell_label,
458 shell_id = paste(distinct_shells$shell_name,
459 distinct_shells$shell_hash, sep='-'),
460 shell_id_HREF = linkify('shell-id', shell_id, '/')
461 )
462
463 Log('shell_table')
464 print(shell_table)
465 Log('')
466
467 if (tsv) {
468 writeTsv(host_table, file.path(out_dir, 'hosts'))
469 writeTsv(shell_table, file.path(out_dir, 'shells'))
470 } else {
471 writeCsv(host_table, file.path(out_dir, 'hosts'))
472 writeCsv(shell_table, file.path(out_dir, 'shells'))
473 }
474}
475
476WriteSimpleProvenance = function(provenance, out_dir) {
477 Log('provenance')
478 print(provenance)
479 Log('')
480
481 # Legacy: add $shell_name, because "$shell_basename-$shell_hash" is what
482 # benchmarks/id.sh publish-shell-id uses
483 provenance %>%
484 mutate(shell_name = basename(sh_path)) %>%
485 distinct(shell_label, shell_name, shell_hash) ->
486 distinct_shells
487
488 Log('distinct_shells')
489 print(distinct_shells)
490 Log('')
491
492 provenance %>% distinct(host_label, host_name, host_hash) -> distinct_hosts
493
494 WriteProvenance(distinct_hosts, distinct_shells, out_dir, tsv = T)
495}
496
497RuntimeReport = function(in_dir, out_dir) {
498 times = readTsv(file.path(in_dir, 'times.tsv'))
499
500 gc_stats = readTsv(file.path(in_dir, 'gc_stats.tsv'))
501 provenance = readTsv(file.path(in_dir, 'provenance.tsv'))
502
503 times %>% filter(status != 0) -> failed
504 if (nrow(failed) != 0) {
505 print(failed)
506 stop('Some osh-runtime tasks failed')
507 }
508
509 # Joins:
510 # times <= sh_path => provenance
511 # times <= join_id, host_name => gc_stats
512
513 # TODO: provenance may have rows from 2 machines. Could validate them and
514 # deduplicate.
515
516 # It should have (host_label, host_name, host_hash)
517 # (shell_label, sh_path, shell_hash)
518 provenance %>%
519 mutate(host_label = host_name, shell_label = ShellLabelFromPath(sh_path)) ->
520 provenance
521
522 provenance %>% distinct(sh_path, shell_label) -> label_lookup
523
524 Log('label_lookup')
525 print(label_lookup)
526
527 # Join with provenance for host label and shell label
528 times %>%
529 select(c(elapsed_secs, user_secs, sys_secs, max_rss_KiB, task_id,
530 host_name, sh_path, workload)) %>%
531 mutate(elapsed_ms = elapsed_secs * 1000,
532 user_ms = user_secs * 1000,
533 sys_ms = sys_secs * 1000,
534 max_rss_MB = max_rss_KiB * 1024 / 1e6) %>%
535 select(-c(elapsed_secs, user_secs, sys_secs, max_rss_KiB)) %>%
536 left_join(label_lookup, by = c('sh_path')) %>%
537 select(-c(sh_path)) %>%
538 # we want to compare workloads on adjacent rows
539 arrange(workload) ->
540 details
541
542 times %>%
543 select(c(task_id, host_name, sh_path, workload, minor_faults, major_faults, swaps, in_block, out_block, signals, voluntary_ctx, involuntary_ctx)) %>%
544 left_join(label_lookup, by = c('sh_path')) %>%
545 select(-c(sh_path)) %>%
546 # we want to compare workloads on adjacent rows
547 arrange(workload) ->
548 details_io
549
550 Log('details')
551 print(details)
552
553 # Hack for release. TODO: unify with Soil
554 if (Sys.getenv("OILS_NO_SOUFFLE") == "") {
555 souffle_col = c('osh-native-souffle')
556 } else {
557 souffle_col = c()
558 }
559
560 cols2 = c('workload', 'host_name',
561 'bash', 'dash', 'osh-cpython', 'osh-native', souffle_col,
562 'py_bash_ratio', 'native_bash_ratio')
563
564 # Elapsed time comparison
565 details %>%
566 select(-c(task_id, user_ms, sys_ms, max_rss_MB)) %>%
567 spread(key = shell_label, value = elapsed_ms) %>%
568 mutate(py_bash_ratio = `osh-cpython` / bash) %>%
569 mutate(native_bash_ratio = `osh-native` / bash) %>%
570 arrange(workload, host_name) %>%
571 select(all_of(cols2)) ->
572 elapsed
573
574 Log('elapsed')
575 print(elapsed)
576
577 # Minor Page Faults Comparison
578 details_io %>%
579 select(c(host_name, shell_label, workload, minor_faults)) %>%
580 spread(key = shell_label, value = minor_faults) %>%
581 mutate(py_bash_ratio = `osh-cpython` / bash) %>%
582 mutate(native_bash_ratio = `osh-native` / bash) %>%
583 arrange(workload, host_name) %>%
584 select(all_of(cols2)) ->
585 page_faults
586
587 Log('page_faults')
588 print(page_faults)
589
590 # Max RSS comparison
591 details %>%
592 select(c(host_name, shell_label, workload, max_rss_MB)) %>%
593 spread(key = shell_label, value = max_rss_MB) %>%
594 mutate(py_bash_ratio = `osh-cpython` / bash) %>%
595 mutate(native_bash_ratio = `osh-native` / bash) %>%
596 arrange(workload, host_name) %>%
597 select(all_of(cols2)) ->
598 max_rss
599
600 Log('max rss')
601 print(max_rss)
602
603 details %>%
604 select(c(task_id, host_name, workload, elapsed_ms, max_rss_MB)) %>%
605 mutate(join_id = sprintf("gc-%d", task_id)) %>%
606 select(-c(task_id)) ->
607 gc_details
608
609 Log('GC details')
610 print(gc_details)
611 Log('')
612
613 Log('GC stats')
614 print(gc_stats)
615 Log('')
616
617 gc_stats %>%
618 left_join(gc_details, by = c('join_id', 'host_name')) %>%
619 select(-c(join_id, roots_capacity, objs_capacity)) %>%
620 # Do same transformations as GcReport()
621 mutate(allocated_MB = bytes_allocated / 1e6) %>%
622 select(-c(bytes_allocated)) %>%
623 rename(num_gc_done = num_collections) %>%
624 # Put these columns first
625 relocate(workload, host_name,
626 elapsed_ms, max_gc_millis, total_gc_millis,
627 allocated_MB, max_rss_MB, num_allocated) ->
628 gc_stats
629
630 Log('After GC stats')
631 print(gc_stats)
632 Log('')
633
634 WriteSimpleProvenance(provenance, out_dir)
635
636 # milliseconds don't need decimal digit
637 precision = ColumnPrecision(list(bash = 0, dash = 0, `osh-cpython` = 0,
638 `osh-native` = 0, `osh-native-souffle` = 0, py_bash_ratio = 2,
639 native_bash_ratio = 2))
640 writeTsv(elapsed, file.path(out_dir, 'elapsed'), precision)
641 writeTsv(page_faults, file.path(out_dir, 'page_faults'), precision)
642
643 precision2 = ColumnPrecision(list(py_bash_ratio = 2, native_bash_ratio = 2))
644 writeTsv(max_rss, file.path(out_dir, 'max_rss'), precision2)
645
646 precision3 = ColumnPrecision(list(max_rss_MB = 1, allocated_MB = 1),
647 default = 0)
648 writeTsv(gc_stats, file.path(out_dir, 'gc_stats'), precision3)
649
650 writeTsv(details, file.path(out_dir, 'details'), precision3)
651 writeTsv(details_io, file.path(out_dir, 'details_io'))
652
653 Log('Wrote %s', out_dir)
654}
655
656VmBaselineReport = function(in_dir, out_dir) {
657 vm = readTsv(file.path(in_dir, 'vm-baseline.tsv'))
658 #print(vm)
659
660 # Not using DistinctHosts() because field host_hash isn't collected
661 num_hosts = nrow(vm %>% distinct(host))
662
663 vm %>%
664 rename(kib = metric_value) %>%
665 mutate(shell_label = ShellLabels(shell_name, shell_hash, num_hosts),
666 megabytes = kib * 1024 / 1e6) %>%
667 select(-c(shell_name, kib)) %>%
668 spread(key = c(metric_name), value = megabytes) %>%
669 rename(VmPeak_MB = VmPeak, VmRSS_MB = VmRSS) %>%
670 select(c(shell_label, shell_hash, host, VmRSS_MB, VmPeak_MB)) %>%
671 arrange(shell_label, shell_hash, host, VmPeak_MB) ->
672 vm
673
674 print(vm)
675
676 writeTsv(vm, file.path(out_dir, 'vm-baseline'))
677}
678
679WriteOvmBuildDetails = function(distinct_hosts, distinct_compilers, out_dir) {
680 host_table = tibble(
681 host_label = distinct_hosts$host_label,
682 host_id = paste(distinct_hosts$host_name,
683 distinct_hosts$host_hash, sep='-'),
684 host_id_HREF = benchmarkDataLink('host-id', host_id, '/')
685 )
686 print(host_table)
687
688 dc = distinct_compilers
689 compiler_table = tibble(
690 compiler_label = dc$compiler_label,
691 compiler_id = paste(dc$compiler_label, dc$compiler_hash, sep='-'),
692 compiler_id_HREF = benchmarkDataLink('compiler-id', compiler_id, '/')
693 )
694 print(compiler_table)
695
696 writeTsv(host_table, file.path(out_dir, 'hosts'))
697 writeTsv(compiler_table, file.path(out_dir, 'compilers'))
698}
699
700OvmBuildReport = function(in_dir, out_dir) {
701 times = readTsv(file.path(in_dir, 'times.tsv'))
702 native_sizes = readTsv(file.path(in_dir, 'native-sizes.tsv'))
703 #raw_data = readTsv(file.path(in_dir, 'raw-data.tsv'))
704
705 times %>% filter(status != 0) -> failed
706 if (nrow(failed) != 0) {
707 print(failed)
708 stop('Some ovm-build tasks failed')
709 }
710
711 times %>% distinct(host_name, host_hash) -> distinct_hosts
712 distinct_hosts$host_label = distinct_hosts$host_name
713
714 times %>% distinct(compiler_path, compiler_hash) -> distinct_compilers
715 distinct_compilers$compiler_label = basename(distinct_compilers$compiler_path)
716
717 #print(distinct_hosts)
718 #print(distinct_compilers)
719
720 WriteOvmBuildDetails(distinct_hosts, distinct_compilers, out_dir)
721
722 times %>%
723 select(-c(status)) %>%
724 left_join(distinct_hosts, by = c('host_name', 'host_hash')) %>%
725 left_join(distinct_compilers, by = c('compiler_path', 'compiler_hash')) %>%
726 select(-c(host_name, host_hash, compiler_path, compiler_hash)) %>%
727 mutate(src_dir = basename(src_dir),
728 host_label = paste("host ", host_label),
729 is_conf = str_detect(action, 'configure'),
730 is_ovm = str_detect(action, 'oil.ovm'),
731 is_dbg = str_detect(action, 'dbg'),
732 ) %>%
733 select(host_label, src_dir, compiler_label, action, is_conf, is_ovm, is_dbg,
734 elapsed_secs) %>%
735 spread(key = c(host_label), value = elapsed_secs) %>%
736 arrange(src_dir, compiler_label, desc(is_conf), is_ovm, desc(is_dbg)) %>%
737 select(-c(is_conf, is_ovm, is_dbg)) ->
738 times
739
740 #print(times)
741
742 # paths look like _tmp/ovm-build/bin/clang/oils_cpp.stripped
743 native_sizes %>%
744 select(c(host_label, path, num_bytes)) %>%
745 mutate(host_label = paste("host ", host_label),
746 binary = basename(path),
747 compiler = basename(dirname(path)),
748 ) %>%
749 select(-c(path)) %>%
750 spread(key = c(host_label), value = num_bytes) %>%
751 arrange(compiler, binary) ->
752 native_sizes
753
754 # NOTE: These don't have the host and compiler.
755 writeTsv(times, file.path(out_dir, 'times'))
756 writeTsv(native_sizes, file.path(out_dir, 'native-sizes'))
757
758 # TODO: I want a size report too
759 #writeCsv(sizes, file.path(out_dir, 'sizes'))
760}
761
762unique_stdout_md5sum = function(t, num_expected) {
763 u = n_distinct(t$stdout_md5sum)
764 if (u != num_expected) {
765 t %>% select(c(host_name, task_name, arg1, arg2, runtime_name, stdout_md5sum)) %>% print()
766 stop(sprintf('Expected %d unique md5sums, got %d', num_expected, u))
767 }
768}
769
770ComputeReport = function(in_dir, out_dir) {
771 # TSV file, not CSV
772 times = read.table(file.path(in_dir, 'times.tsv'), header=T)
773 print(times)
774
775 times %>% filter(status != 0) -> failed
776 if (nrow(failed) != 0) {
777 print(failed)
778 stop('Some compute tasks failed')
779 }
780
781 #
782 # Check correctness
783 #
784
785 times %>% filter(task_name == 'hello') %>% unique_stdout_md5sum(1)
786 times %>% filter(task_name == 'fib') %>% unique_stdout_md5sum(1)
787 times %>% filter(task_name == 'word_freq') %>% unique_stdout_md5sum(1)
788 # 3 different inputs
789 times %>% filter(task_name == 'parse_help') %>% unique_stdout_md5sum(3)
790
791 times %>% filter(task_name == 'bubble_sort') %>% unique_stdout_md5sum(2)
792
793 # TODO:
794 # - oils_cpp doesn't implement unicode LANG=C
795 # - bash behaves differently on your desktop vs. in the container
796 # - might need layer-locales in the image?
797
798 #times %>% filter(task_name == 'palindrome' & arg1 == 'unicode') %>% unique_stdout_md5sum(1)
799 # Ditto here
800 #times %>% filter(task_name == 'palindrome' & arg1 == 'bytes') %>% unique_stdout_md5sum(1)
801
802 #
803 # Find distinct shells and hosts, and label them for readability.
804 #
805
806 # Runtimes are called shells, as a hack for code reuse
807 times %>%
808 mutate(shell_name = runtime_name, shell_hash = runtime_hash) %>%
809 select(c(host_name, host_hash, shell_name, shell_hash)) ->
810 tmp
811
812 distinct_hosts = DistinctHosts(tmp)
813 Log('')
814 Log('Distinct hosts')
815 print(distinct_hosts)
816
817 distinct_shells = DistinctShells(tmp)
818 Log('')
819 Log('Distinct runtimes')
820 print(distinct_shells)
821
822 num_hosts = nrow(distinct_hosts)
823
824 times %>%
825 select(-c(status, stdout_md5sum, stdout_filename, host_hash, runtime_hash)) %>%
826 mutate(runtime_label = ShellLabels(runtime_name, runtime_hash, num_hosts),
827 elapsed_ms = elapsed_secs * 1000,
828 user_ms = user_secs * 1000,
829 sys_ms = sys_secs * 1000,
830 max_rss_MB = max_rss_KiB * 1024 / 1e6) %>%
831 select(-c(runtime_name, elapsed_secs, user_secs, sys_secs, max_rss_KiB)) %>%
832 arrange(host_name, task_name, arg1, arg2, user_ms) ->
833 details
834
835 times %>%
836 mutate(
837 runtime_label = ShellLabels(runtime_name, runtime_hash, num_hosts),
838 stdout_md5sum_HREF = file.path('tmp', task_name, stdout_filename)) %>%
839 select(c(host_name, task_name, arg1, arg2, runtime_label,
840 stdout_md5sum, stdout_md5sum_HREF)) ->
841 stdout_files
842
843 details %>% filter(task_name == 'hello') %>% select(-c(task_name)) -> hello
844 details %>% filter(task_name == 'fib') %>% select(-c(task_name)) -> fib
845 details %>% filter(task_name == 'word_freq') %>% select(-c(task_name)) -> word_freq
846 # There's no arg2
847 details %>% filter(task_name == 'parse_help') %>% select(-c(task_name, arg2)) -> parse_help
848
849 details %>% filter(task_name == 'bubble_sort') %>% select(-c(task_name)) -> bubble_sort
850 details %>% filter(task_name == 'palindrome' & arg1 == 'unicode') %>% select(-c(task_name)) -> palindrome
851
852 precision = ColumnPrecision(list(max_rss_MB = 1), default = 0)
853 writeTsv(details, file.path(out_dir, 'details'), precision)
854
855 writeTsv(stdout_files, file.path(out_dir, 'stdout_files'), precision)
856
857 writeTsv(hello, file.path(out_dir, 'hello'), precision)
858 writeTsv(fib, file.path(out_dir, 'fib'), precision)
859 writeTsv(word_freq, file.path(out_dir, 'word_freq'), precision)
860 writeTsv(parse_help, file.path(out_dir, 'parse_help'), precision)
861
862 writeTsv(bubble_sort, file.path(out_dir, 'bubble_sort'), precision)
863 writeTsv(palindrome, file.path(out_dir, 'palindrome'), precision)
864
865 WriteProvenance(distinct_hosts, distinct_shells, out_dir, tsv = T)
866}
867
868WriteOneTask = function(times, out_dir, task_name, precision) {
869 times %>%
870 filter(task == task_name) %>%
871 select(-c(task)) -> subset
872
873 writeTsv(subset, file.path(out_dir, task_name), precision)
874}
875
876SHELL_ORDER = c('dash',
877 'bash',
878 'zsh',
879 '_bin/cxx-opt+bumpleak/osh',
880 '_bin/cxx-opt+bumproot/osh',
881 '_bin/cxx-opt+bumpsmall/osh',
882 '_bin/cxx-opt/osh',
883 '_bin/cxx-opt+nopool/osh')
884
885GcReport = function(in_dir, out_dir) {
886 times = read.table(file.path(in_dir, 'raw/times.tsv'), header=T)
887 gc_stats = read.table(file.path(in_dir, 'stage1/gc_stats.tsv'), header=T)
888
889 times %>% filter(status != 0) -> failed
890 if (nrow(failed) != 0) {
891 print(failed)
892 stop('Some gc tasks failed')
893 }
894
895 # Change units and order columns
896 times %>%
897 arrange(task, factor(sh_path, levels = SHELL_ORDER)) %>%
898 mutate(elapsed_ms = elapsed_secs * 1000,
899 user_ms = user_secs * 1000,
900 sys_ms = sys_secs * 1000,
901 max_rss_MB = max_rss_KiB * 1024 / 1e6,
902 shell_label = ShellLabelFromPath(sh_path)
903 ) %>%
904 select(c(join_id, task, elapsed_ms, user_ms, sys_ms, max_rss_MB, shell_label,
905 shell_runtime_opts)) ->
906 times
907
908 # Join and order columns
909 gc_stats %>% left_join(times, by = c('join_id')) %>%
910 arrange(desc(task)) %>%
911 mutate(allocated_MB = bytes_allocated / 1e6) %>%
912 # try to make the table skinnier
913 rename(num_gc_done = num_collections) %>%
914 select(task, elapsed_ms, max_gc_millis, total_gc_millis,
915 allocated_MB, max_rss_MB, num_allocated,
916 num_gc_points, num_gc_done, gc_threshold, num_growths, max_survived,
917 shell_label) ->
918 gc_stats
919
920 times %>% select(-c(join_id)) -> times
921
922
923 precision = ColumnPrecision(list(max_rss_MB = 1, allocated_MB = 1),
924 default = 0)
925
926 writeTsv(times, file.path(out_dir, 'times'), precision)
927 writeTsv(gc_stats, file.path(out_dir, 'gc_stats'), precision)
928
929 tasks = c('parse.configure-coreutils',
930 'parse.configure-cpython',
931 'parse.abuild',
932 'ex.compute-fib',
933 'ex.bashcomp-parse-help',
934 'ex.abuild-print-help')
935 # Write out separate rows
936 for (task in tasks) {
937 WriteOneTask(times, out_dir, task, precision)
938 }
939}
940
941GcCachegrindReport = function(in_dir, out_dir) {
942 times = readTsv(file.path(in_dir, 'raw/times.tsv'))
943 counts = readTsv(file.path(in_dir, 'stage1/cachegrind.tsv'))
944
945 times %>% filter(status != 0) -> failed
946 if (nrow(failed) != 0) {
947 print(failed)
948 stop('Some gc tasks failed')
949 }
950
951 print(times)
952 print(counts)
953
954 counts %>% left_join(times, by = c('join_id')) %>%
955 mutate(million_irefs = irefs / 1e6) %>%
956 select(c(million_irefs, task, sh_path, shell_runtime_opts)) %>%
957 arrange(factor(sh_path, levels = SHELL_ORDER)) ->
958 counts
959
960 precision = NULL
961 tasks = c('parse.abuild', 'ex.compute-fib')
962 for (task in tasks) {
963 WriteOneTask(counts, out_dir, task, precision)
964 }
965}
966
967MyCppReport = function(in_dir, out_dir) {
968 times = readTsv(file.path(in_dir, 'benchmark-table.tsv'))
969 print(times)
970
971 times %>% filter(status != 0) -> failed
972 if (nrow(failed) != 0) {
973 print(failed)
974 stop('Some mycpp tasks failed')
975 }
976
977 # Don't care about elapsed and system
978 times %>% select(-c(status, elapsed_secs, bin, task_out)) %>%
979 mutate(example_name_HREF = mycppUrl(example_name),
980 gen = c('gen'),
981 gen_HREF = genUrl(example_name),
982 user_ms = user_secs * 1000,
983 sys_ms = sys_secs * 1000,
984 max_rss_MB = max_rss_KiB * 1024 / 1e6) %>%
985 select(-c(user_secs, sys_secs, max_rss_KiB)) ->
986 details
987
988 details %>% select(-c(sys_ms, max_rss_MB)) %>%
989 spread(key = impl, value = user_ms) %>%
990 mutate(`C++ : Python` = `C++` / Python) %>%
991 arrange(`C++ : Python`) ->
992 user_time
993
994 details %>% select(-c(user_ms, max_rss_MB)) %>%
995 spread(key = impl, value = sys_ms) %>%
996 mutate(`C++ : Python` = `C++` / Python) %>%
997 arrange(`C++ : Python`) ->
998 sys_time
999
1000 details %>% select(-c(user_ms, sys_ms)) %>%
1001 spread(key = impl, value = max_rss_MB) %>%
1002 mutate(`C++ : Python` = `C++` / Python) %>%
1003 arrange(`C++ : Python`) ->
1004 max_rss
1005
1006 # Sometimes it speeds up by more than 10x
1007 precision1 = ColumnPrecision(list(`C++ : Python` = 3), default = 0)
1008 writeTsv(user_time, file.path(out_dir, 'user_time'), precision1)
1009 writeTsv(sys_time, file.path(out_dir, 'sys_time'), precision1)
1010
1011 precision2 = ColumnPrecision(list(`C++ : Python` = 2), default = 1)
1012 writeTsv(max_rss, file.path(out_dir, 'max_rss'), precision2)
1013
1014 writeTsv(details, file.path(out_dir, 'details'))
1015}
1016
1017UftraceTaskReport = function(env, task_name, summaries) {
1018 # Need this again after redirect
1019 MaybeDisableColor(stdout())
1020
1021 task_env = env[[task_name]]
1022
1023 untyped = task_env$untyped
1024 typed = task_env$typed
1025 strings = task_env$strings
1026 slabs = task_env$slabs
1027 reserve = task_env$reserve
1028
1029 string_overhead = 17 # GC header (8) + len (4) + hash value (4) + NUL (1)
1030 strings %>% mutate(obj_len = str_len + string_overhead) -> strings
1031
1032 # TODO: Output these totals PER WORKLOAD, e.g. parsing big/small, executing
1033 # big/small
1034 #
1035 # And then zoom in on distributions as well
1036
1037 num_allocs = nrow(untyped)
1038 total_bytes = sum(untyped$obj_len)
1039
1040 untyped %>% group_by(obj_len) %>% count() %>% ungroup() -> untyped_hist
1041 #print(untyped_hist)
1042
1043 untyped_hist %>%
1044 mutate(n_less_than = cumsum(n),
1045 percent = n_less_than * 100.0 / num_allocs) ->
1046 alloc_sizes
1047
1048 a24 = untyped_hist %>% filter(obj_len <= 24)
1049 a48 = untyped_hist %>% filter(obj_len <= 48)
1050 a96 = untyped_hist %>% filter(obj_len <= 96)
1051
1052 allocs_24_bytes_or_less = sum(a24$n) * 100.0 / num_allocs
1053 allocs_48_bytes_or_less = sum(a48$n) * 100.0 / num_allocs
1054 allocs_96_bytes_or_less = sum(a96$n) * 100.0 / num_allocs
1055
1056 Log('Percentage of allocs less than 48 bytes: %.1f', allocs_48_bytes_or_less)
1057
1058 options(tibble.print_min=25)
1059
1060 Log('')
1061 Log('All allocations')
1062 print(alloc_sizes %>% head(22))
1063 print(alloc_sizes %>% tail(5))
1064
1065 Log('')
1066 Log('Common Sizes')
1067 print(untyped_hist %>% arrange(desc(n)) %>% head(8))
1068
1069 Log('')
1070 Log(' %s total allocations, total bytes = %s', commas(num_allocs), commas(total_bytes))
1071 Log('')
1072
1073 Log('Typed allocations')
1074
1075 num_typed = nrow(typed)
1076
1077 typed %>% group_by(func_name) %>% count() %>% ungroup() %>%
1078 mutate(percent = n * 100.0 / num_typed) %>%
1079 arrange(desc(n)) -> most_common_types
1080
1081 print(most_common_types %>% head(20))
1082 print(most_common_types %>% tail(5))
1083
1084 lists = typed %>% filter(str_starts(func_name, ('List<')))
1085 #print(lists)
1086
1087 num_lists = nrow(lists)
1088 total_list_bytes = num_lists * 24 # sizeof List<T> head is hard-coded
1089
1090 Log('')
1091 Log('%s typed allocs, including %s List<T>', commas(num_typed), commas(num_lists))
1092 Log('%.2f%% of allocs are typed', num_typed * 100 / num_allocs)
1093 Log('')
1094
1095 #
1096 # Strings
1097 #
1098
1099 num_strings = nrow(strings)
1100 total_string_bytes = sum(strings$obj_len)
1101
1102 strings %>% group_by(str_len) %>% count() %>% ungroup() %>%
1103 mutate(n_less_than = cumsum(n),
1104 percent = n_less_than * 100.0 / num_strings) ->
1105 string_lengths
1106
1107 strs_6_bytes_or_less = string_lengths %>% filter(str_len == 6) %>% select(percent)
1108 strs_14_bytes_or_less = string_lengths %>% filter(str_len == 14) %>% select(percent)
1109
1110 # Parse workload
1111 # 62% of strings <= 6 bytes
1112 # 84% of strings <= 14 bytes
1113
1114 Log('Str - NewStr() and OverAllocatedStr()')
1115 print(string_lengths %>% head(16))
1116 print(string_lengths %>% tail(5))
1117 Log('')
1118
1119 Log('%s string allocations, total length = %s, total bytes = %s', commas(num_strings),
1120 commas(sum(strings$str_len)), commas(total_string_bytes))
1121 Log('')
1122 Log('%.2f%% of allocs are strings', num_strings * 100 / num_allocs)
1123 Log('%.2f%% of bytes are strings', total_string_bytes * 100 / total_bytes)
1124 Log('')
1125
1126 #
1127 # Slabs
1128 #
1129
1130 Log('NewSlab()')
1131
1132 num_slabs = nrow(slabs)
1133 slabs %>% group_by(slab_len) %>% count() %>% ungroup() %>%
1134 mutate(n_less_than = cumsum(n),
1135 percent = n_less_than * 100.0 / num_slabs) ->
1136 slab_lengths
1137
1138 slabs %>% group_by(func_name) %>% count() %>% ungroup() %>%
1139 arrange(desc(n)) -> slab_types
1140
1141 Log(' Lengths')
1142 print(slab_lengths %>% head())
1143 print(slab_lengths %>% tail(5))
1144 Log('')
1145
1146 Log(' Slab Types')
1147 print(slab_types %>% head())
1148 print(slab_types %>% tail(5))
1149 Log('')
1150
1151 total_slab_items = sum(slabs$slab_len)
1152
1153 Log('%s slabs, total items = %s', commas(num_slabs),
1154 commas(sum(slabs$slab_len)))
1155 Log('%.2f%% of allocs are slabs', num_slabs * 100 / num_allocs)
1156 Log('')
1157
1158 #
1159 # reserve() calls
1160 #
1161
1162 # There should be strictly more List::reserve() calls than NewSlab
1163
1164 Log('::reserve(int n)')
1165 Log('')
1166
1167 num_reserve = nrow(reserve)
1168 reserve %>% group_by(num_items) %>% count() %>% ungroup() %>%
1169 mutate(n_less_than = cumsum(n),
1170 percent = n_less_than * 100.0 / num_reserve) ->
1171 reserve_args
1172
1173 Log(' Num Items')
1174 print(reserve_args %>% head(15))
1175 print(reserve_args %>% tail(5))
1176 Log('')
1177
1178 Log('%s reserve() calls, total items = %s', commas(num_reserve),
1179 commas(sum(reserve$num_items)))
1180 Log('')
1181
1182 # Accounting for all allocations!
1183 Log('Untyped: %s', commas(num_allocs))
1184 Log('Typed + Str + Slab: %s', commas(num_typed + num_strings + num_slabs))
1185 Log('')
1186
1187 num_other_typed = num_typed - num_lists
1188
1189 # Summary table
1190 stats = tibble(task = task_name,
1191 total_bytes_ = commas(total_bytes),
1192 num_allocs_ = commas(num_allocs),
1193 sum_typed_strs_slabs = commas(num_typed + num_strings + num_slabs),
1194 num_reserve_calls = commas(num_reserve),
1195
1196 percent_list_allocs = Percent(num_lists, num_allocs),
1197 percent_slab_allocs = Percent(num_slabs, num_allocs),
1198 percent_string_allocs = Percent(num_strings, num_allocs),
1199 percent_other_typed_allocs = Percent(num_other_typed, num_allocs),
1200
1201 percent_list_bytes = Percent(total_list_bytes, total_bytes),
1202 percent_string_bytes = Percent(total_string_bytes, total_bytes),
1203
1204 allocs_24_bytes_or_less = sprintf('%.1f%%', allocs_24_bytes_or_less),
1205 allocs_48_bytes_or_less = sprintf('%.1f%%', allocs_48_bytes_or_less),
1206 allocs_96_bytes_or_less = sprintf('%.1f%%', allocs_96_bytes_or_less),
1207
1208 strs_6_bytes_or_less = sprintf('%.1f%%', strs_6_bytes_or_less),
1209 strs_14_bytes_or_less = sprintf('%.1f%%', strs_14_bytes_or_less),
1210 )
1211 summaries$stats[[task_name]] = stats
1212
1213 summaries$most_common_types[[task_name]] = most_common_types
1214}
1215
1216LoadUftraceTsv = function(in_dir, env) {
1217 for (task in list.files(in_dir)) {
1218 Log('Loading data for task %s', task)
1219 base_dir = file.path(in_dir, task)
1220
1221 task_env = new.env()
1222 env[[task]] = task_env
1223
1224 # TSV file, not CSV
1225 task_env$untyped = readTsv(file.path(base_dir, 'all-untyped.tsv'))
1226 task_env$typed = readTsv(file.path(base_dir, 'typed.tsv'))
1227 task_env$strings = readTsv(file.path(base_dir, 'strings.tsv'))
1228 task_env$slabs = readTsv(file.path(base_dir, 'slabs.tsv'))
1229 task_env$reserve = readTsv(file.path(base_dir, 'reserve.tsv'))
1230
1231 # median string length is 4, mean is 9.5!
1232 Log('UNTYPED')
1233 print(summary(task_env$untyped))
1234 Log('')
1235
1236 Log('TYPED')
1237 print(summary(task_env$typed))
1238 Log('')
1239
1240 Log('STRINGS')
1241 print(summary(task_env$strings))
1242 Log('')
1243
1244 Log('SLABS')
1245 print(summary(task_env$slabs))
1246 Log('')
1247
1248 Log('RESERVE')
1249 print(summary(task_env$reserve))
1250 Log('')
1251 }
1252}
1253
1254Percent = function(n, total) {
1255 sprintf('%.1f%%', n * 100.0 / total)
1256}
1257
1258PrettyPrintLong = function(d) {
1259 tr = t(d) # transpose
1260
1261 row_names = rownames(tr)
1262
1263 for (i in 1:nrow(tr)) {
1264 row_name = row_names[i]
1265 cat(sprintf('%26s', row_name)) # calculated min width manually
1266 cat(sprintf('%20s', tr[i,]))
1267 cat('\n')
1268
1269 # Extra spacing
1270 if (row_name %in% c('num_reserve_calls',
1271 'percent_string_bytes',
1272 'percent_other_typed_allocs',
1273 'allocs_96_bytes_or_less')) {
1274 cat('\n')
1275 }
1276 }
1277}
1278
1279
1280UftraceReport = function(env, out_dir) {
1281 # summaries$stats should be a list of 1-row data frames
1282 # summaries$top_types should be a list of types
1283 summaries = new.env()
1284
1285 for (task_name in names(env)) {
1286 report_out = file.path(out_dir, paste0(task_name, '.txt'))
1287
1288 Log('Making report for task %s -> %s', task_name, report_out)
1289
1290 sink(file = report_out)
1291 UftraceTaskReport(env, task_name, summaries)
1292 sink() # reset
1293 }
1294 Log('')
1295
1296 # Concate all the data frames added to summary
1297 stats = bind_rows(as.list(summaries$stats))
1298
1299 sink(file = file.path(out_dir, 'summary.txt'))
1300 #print(stats)
1301 #Log('')
1302
1303 PrettyPrintLong(stats)
1304 Log('')
1305
1306 mct = summaries$most_common_types
1307 for (task_name in names(mct)) {
1308 Log('Common types in workload %s', task_name)
1309 Log('')
1310
1311 print(mct[[task_name]] %>% head(5))
1312 Log('')
1313 }
1314 sink()
1315
1316 # For the REPL
1317 return(list(stats = stats))
1318}
1319
1320main = function(argv) {
1321 action = argv[[1]]
1322 in_dir = argv[[2]]
1323 out_dir = argv[[3]]
1324
1325 if (action == 'osh-parser') {
1326 ParserReport(in_dir, out_dir)
1327
1328 } else if (action == 'osh-runtime') {
1329 RuntimeReport(in_dir, out_dir)
1330
1331 } else if (action == 'vm-baseline') {
1332 VmBaselineReport(in_dir, out_dir)
1333
1334 } else if (action == 'ovm-build') {
1335 OvmBuildReport(in_dir, out_dir)
1336
1337 } else if (action == 'compute') {
1338 ComputeReport(in_dir, out_dir)
1339
1340 } else if (action == 'gc') {
1341 GcReport(in_dir, out_dir)
1342
1343 } else if (action == 'gc-cachegrind') {
1344 GcCachegrindReport(in_dir, out_dir)
1345
1346 } else if (action == 'mycpp') {
1347 MyCppReport(in_dir, out_dir)
1348
1349 } else if (action == 'uftrace') {
1350 d = new.env()
1351 LoadUftraceTsv(in_dir, d)
1352 UftraceReport(d, out_dir)
1353
1354 } else {
1355 Log("Invalid action '%s'", action)
1356 quit(status = 1)
1357 }
1358 Log('PID %d done', Sys.getpid())
1359}
1360
1361if (length(sys.frames()) == 0) {
1362 # increase ggplot font size globally
1363 #theme_set(theme_grey(base_size = 20))
1364
1365 main(commandArgs(TRUE))
1366}