OILS / benchmarks / report.R View on Github | oils.pub

1381 lines, 955 significant
1#!/usr/bin/env Rscript
2#
3# benchmarks/report.R -- Analyze data collected by shell scripts.
4#
5# Usage:
6# benchmarks/report.R OUT_DIR [TIMES_CSV...]
7
8# Suppress warnings about functions masked from 'package:stats' and 'package:base'
9# filter, lag
10# intersect, setdiff, setequal, union
11library(dplyr, warn.conflicts = FALSE)
12library(tidyr) # spread()
13library(stringr)
14
15source('benchmarks/common.R')
16
17options(stringsAsFactors = F)
18
19# For pretty printing
20commas = function(x) {
21 format(x, big.mark=',')
22}
23
24sourceUrl = function(path) {
25 sprintf('https://github.com/oilshell/oil/blob/master/%s', path)
26}
27
28# Takes a filename, not a path.
29sourceUrl2 = function(filename) {
30 sprintf(
31 'https://github.com/oilshell/oil/blob/master/benchmarks/testdata/%s',
32 filename)
33}
34
35mycppUrl = function(name) {
36 sprintf('https://github.com/oilshell/oil/blob/master/mycpp/examples/%s.py', name)
37}
38
39genUrl = function(name) {
40 sprintf('../../_gen/mycpp/examples/%s.mycpp.cc', name)
41}
42
43
44# TODO: Set up cgit because Github links are slow.
45benchmarkDataLink = function(subdir, name, suffix) {
46 #sprintf('../../../../benchmark-data/shell-id/%s', shell_id)
47 sprintf('https://github.com/oilshell/benchmark-data/blob/master/%s/%s%s',
48 subdir, name, suffix)
49}
50
51provenanceLink = function(subdir, name, suffix) {
52 sprintf('../%s/%s%s', subdir, name, suffix)
53}
54
55
56GetOshLabel = function(shell_hash, prov_dir) {
57 ### Given a string, return another string.
58
59 path = sprintf('%s/shell-id/osh-%s/sh-path.txt', prov_dir, shell_hash)
60
61 if (file.exists(path)) {
62 Log('Reading %s', path)
63 lines = readLines(path)
64 if (length(grep('_bin/osh', lines)) > 0) {
65 label = 'osh-ovm'
66 } else if (length(grep('bin/osh', lines)) > 0) {
67 label = 'osh-cpython'
68 } else if (length(grep('_bin/.*/mycpp-souffle/osh', lines)) > 0) {
69 label = 'osh-native-souffle'
70 } else if (length(grep('_bin/.*/osh', lines)) > 0) {
71 label = 'osh-native'
72 } else {
73 stop("Expected _bin/osh, bin/osh, or _bin/.*/osh")
74 }
75 } else {
76 stop(sprintf("%s doesn't exist", path))
77 }
78 return(label)
79}
80
81osh_opt_suffix1 = '_bin/cxx-opt/osh'
82osh_opt_suffix2 = '_bin/cxx-opt-sh/osh'
83
84osh_souffle_suffix1 = '_bin/cxx-opt/mycpp-souffle/osh'
85osh_souffle_suffix2 = '_bin/cxx-opt-sh/mycpp-souffle/osh'
86
87ysh_opt_suffix1 = '_bin/cxx-opt/ysh'
88ysh_opt_suffix2 = '_bin/cxx-opt-sh/ysh'
89
90ShellLabels = function(shell_name, shell_hash, num_hosts) {
91 ### Given 2 vectors, return a vector of readable labels.
92
93 # TODO: Clean up callers. Some metrics all this function with a
94 # shell/runtime BASENAME, and others a PATH
95 # - e.g. ComputeReport calls this with runtime_name which is actually a PATH
96
97 #Log('name %s', shell_name)
98 #Log('hash %s', shell_hash)
99
100 if (num_hosts == 1) {
101 prov_dir = '_tmp'
102 } else {
103 prov_dir = '../benchmark-data/'
104 }
105
106 labels = c()
107 for (i in 1:length(shell_name)) {
108 sh = shell_name[i]
109 if (sh == 'osh') {
110 label = GetOshLabel(shell_hash[i], prov_dir)
111
112 } else if (endsWith(sh, osh_opt_suffix1) || endsWith(sh, osh_opt_suffix2)) {
113 label = 'opt/osh'
114
115 } else if (endsWith(sh, ysh_opt_suffix1) || endsWith(sh, ysh_opt_suffix2)) {
116 label = 'opt/ysh'
117
118 } else if (endsWith(sh, osh_souffle_suffix1) || endsWith(sh, osh_souffle_suffix2)) {
119 label = 'opt/osh-souffle'
120
121 } else if (endsWith(sh, '_bin/cxx-opt+bumpleak/osh')) {
122 label = 'bumpleak/osh'
123
124 } else {
125 label = sh
126 }
127
128 Log('[%s] [%s]', shell_name[i], label)
129 labels = c(labels, label)
130 }
131
132 return(labels)
133}
134
135# Simple version of the above, used by benchmarks/{gc,osh-runtime}
136ShellLabelFromPath = function(sh_path) {
137 labels = c()
138 for (i in 1:length(sh_path)) {
139 sh = sh_path[i]
140
141 if (endsWith(sh, osh_opt_suffix1) || endsWith(sh, osh_opt_suffix2)) {
142 # the opt binary is called osh-native - the osh-runtime report relies on this
143 label = 'osh-native'
144
145 } else if (endsWith(sh, ysh_opt_suffix1) || endsWith(sh, ysh_opt_suffix2)) {
146 label = 'opt/ysh'
147
148 } else if (endsWith(sh, osh_souffle_suffix1) || endsWith(sh, osh_souffle_suffix2)) {
149 label = 'osh-native-souffle'
150
151 } else if (endsWith(sh, '_bin/cxx-opt+bumpleak/osh')) {
152 label = 'bumpleak/osh'
153
154 } else if (endsWith(sh, '_bin/osh')) { # the app bundle
155 label = 'osh-ovm'
156
157 } else if (endsWith(sh, 'bin/osh')) {
158 label = 'osh-cpython'
159
160 } else {
161 label = sh
162 }
163 labels = c(labels, label)
164 }
165 return(labels)
166}
167
168DistinctHosts = function(t) {
169 t %>% distinct(host_name, host_hash) -> distinct_hosts
170 # The label is just the name
171 distinct_hosts$host_label = distinct_hosts$host_name
172 return(distinct_hosts)
173}
174
175DistinctShells = function(t, num_hosts = -1) {
176 t %>% distinct(shell_name, shell_hash) -> distinct_shells
177
178 Log('')
179 Log('Labeling shells')
180
181 # Calculate it if not passed
182 if (num_hosts == -1) {
183 num_hosts = nrow(DistinctHosts(t))
184 }
185
186 distinct_shells$shell_label = ShellLabels(distinct_shells$shell_name,
187 distinct_shells$shell_hash,
188 num_hosts)
189 return(distinct_shells)
190}
191
192ParserReport = function(in_dir, out_dir) {
193 times = read.csv(file.path(in_dir, 'times.csv'))
194 lines = read.csv(file.path(in_dir, 'lines.csv'))
195 raw_data = read.csv(file.path(in_dir, 'raw-data.csv'))
196
197 cachegrind = readTsv(file.path(in_dir, 'cachegrind.tsv'))
198
199 # For joining by filename
200 lines_by_filename = tibble(
201 num_lines = lines$num_lines,
202 filename = basename(lines$path)
203 )
204
205 # Remove failures
206 times %>% filter(status == 0) %>% select(-c(status)) -> times
207 cachegrind %>% filter(status == 0) %>% select(-c(status)) -> cachegrind
208
209 # Add the number of lines, joining on path, and compute lines/ms
210 times %>%
211 left_join(lines, by = c('path')) %>%
212 mutate(filename = basename(path), filename_HREF = sourceUrl(path),
213 max_rss_MB = max_rss_KiB * 1024 / 1e6,
214 elapsed_ms = elapsed_secs * 1000,
215 user_ms = user_secs * 1000,
216 sys_ms = sys_secs * 1000,
217 lines_per_ms = num_lines / elapsed_ms) %>%
218 select(-c(path, max_rss_KiB, elapsed_secs, user_secs, sys_secs)) ->
219 joined_times
220
221 #print(head(times))
222 #print(head(lines))
223 #print(head(vm))
224 #print(head(joined_times))
225
226 print(summary(joined_times))
227
228 #
229 # Find distinct shells and hosts, and label them for readability.
230 #
231
232 distinct_hosts = DistinctHosts(joined_times)
233 Log('')
234 Log('Distinct hosts')
235 print(distinct_hosts)
236
237 distinct_shells = DistinctShells(joined_times)
238 Log('')
239 Log('Distinct shells')
240 print(distinct_shells)
241
242 # Replace name/hash combinations with labels.
243 joined_times %>%
244 left_join(distinct_hosts, by = c('host_name', 'host_hash')) %>%
245 left_join(distinct_shells, by = c('shell_name', 'shell_hash')) %>%
246 select(-c(host_name, host_hash, shell_name, shell_hash)) ->
247 joined_times
248
249 # Like 'times', but do shell_label as one step
250 # Hack: we know benchmarks/auto.sh runs this on one machine
251 distinct_shells_2 = DistinctShells(cachegrind, num_hosts = nrow(distinct_hosts))
252 cachegrind %>%
253 left_join(lines, by = c('path')) %>%
254 select(-c(elapsed_secs, user_secs, sys_secs, max_rss_KiB)) %>%
255 left_join(distinct_shells_2, by = c('shell_name', 'shell_hash')) %>%
256 select(-c(shell_name, shell_hash)) %>%
257 mutate(filename = basename(path), filename_HREF = sourceUrl(path)) %>%
258 select(-c(path)) ->
259 joined_cachegrind
260
261 Log('summary(joined_times):')
262 print(summary(joined_times))
263 Log('head(joined_times):')
264 print(head(joined_times))
265
266 # Summarize rates by platform/shell
267 joined_times %>%
268 mutate(host_label = paste("host", host_label)) %>%
269 group_by(host_label, shell_label) %>%
270 summarize(total_lines = sum(num_lines), total_ms = sum(elapsed_ms)) %>%
271 mutate(lines_per_ms = total_lines / total_ms) %>%
272 select(-c(total_ms)) %>%
273 spread(key = host_label, value = lines_per_ms) ->
274 times_summary
275
276 # Sort by parsing rate on machine 1
277 if ("host hoover" %in% colnames(times_summary)) {
278 times_summary %>% arrange(desc(`host hoover`)) -> times_summary
279 } else {
280 times_summary %>% arrange(desc(`host no-host`)) -> times_summary
281 }
282
283 Log('times_summary:')
284 print(times_summary)
285
286 # Summarize cachegrind by platform/shell
287 # Bug fix: as.numeric(irefs) avoids 32-bit integer overflow!
288 joined_cachegrind %>%
289 group_by(shell_label) %>%
290 summarize(total_lines = sum(num_lines), total_irefs = sum(as.numeric(irefs))) %>%
291 mutate(thousand_irefs_per_line = total_irefs / total_lines / 1000) %>%
292 select(-c(total_irefs)) ->
293 cachegrind_summary
294
295 if ("no-host" %in% distinct_hosts$host_label) {
296
297 # We don't have all the shells
298 elapsed = NULL
299 rate = NULL
300 max_rss = NULL
301 instructions = NULL
302
303 joined_times %>%
304 select(c(shell_label, elapsed_ms, user_ms, sys_ms, max_rss_MB,
305 num_lines, filename, filename_HREF)) %>%
306 arrange(filename, elapsed_ms) ->
307 times_flat
308
309 joined_cachegrind %>%
310 select(c(shell_label, irefs, num_lines, filename, filename_HREF)) %>%
311 arrange(filename, irefs) ->
312 cachegrind_flat
313
314 } else {
315
316 times_flat = NULL
317 cachegrind_flat = NULL
318
319 # Hack for release. TODO: unify with Soil
320 if (Sys.getenv("OILS_NO_SOUFFLE") == "") {
321 souffle_col = c('osh-native-souffle')
322 } else {
323 souffle_col = c()
324 }
325
326 cols1 = c('host_label', 'bash', 'dash', 'mksh', 'zsh',
327 'osh-ovm', 'osh-cpython', 'osh-native', souffle_col,
328 'osh_to_bash_ratio', 'num_lines', 'filename', 'filename_HREF')
329
330 # Elapsed seconds for each shell by platform and file
331 joined_times %>%
332 select(-c(lines_per_ms, user_ms, sys_ms, max_rss_MB)) %>%
333 spread(key = shell_label, value = elapsed_ms) %>%
334 arrange(host_label, num_lines) %>%
335 mutate(osh_to_bash_ratio = `osh-native` / bash) %>%
336 select(all_of(cols1)) ->
337 elapsed
338
339 Log('\n')
340 Log('ELAPSED')
341 print(elapsed)
342
343 cols2 = c('host_label', 'bash', 'dash', 'mksh', 'zsh',
344 'osh-ovm', 'osh-cpython', 'osh-native', souffle_col,
345 'num_lines', 'filename', 'filename_HREF')
346 # Rates by file and shell
347 joined_times %>%
348 select(-c(elapsed_ms, user_ms, sys_ms, max_rss_MB)) %>%
349 spread(key = shell_label, value = lines_per_ms) %>%
350 arrange(host_label, num_lines) %>%
351 select(all_of(cols2)) ->
352 rate
353
354 Log('\n')
355 Log('RATE')
356 print(rate)
357
358 # Memory usage by file
359 joined_times %>%
360 select(-c(elapsed_ms, lines_per_ms, user_ms, sys_ms)) %>%
361 spread(key = shell_label, value = max_rss_MB) %>%
362 arrange(host_label, num_lines) %>%
363 select(all_of(cols2)) ->
364 max_rss
365
366 Log('\n')
367 Log('MAX RSS')
368 print(max_rss)
369
370 Log('\n')
371 Log('joined_cachegrind has %d rows', nrow(joined_cachegrind))
372 print(joined_cachegrind)
373 #print(joined_cachegrind %>% filter(path == 'benchmarks/testdata/configure-helper.sh'))
374
375 cols3 = c('bash', 'dash', 'mksh', 'osh-native', souffle_col,
376 'num_lines', 'filename', 'filename_HREF')
377
378 # Cachegrind instructions by file
379 joined_cachegrind %>%
380 mutate(thousand_irefs_per_line = irefs / num_lines / 1000) %>%
381 select(-c(irefs)) %>%
382 spread(key = shell_label, value = thousand_irefs_per_line) %>%
383 arrange(num_lines) %>%
384 select(all_of(cols3)) ->
385 instructions
386
387 Log('\n')
388 Log('instructions has %d rows', nrow(instructions))
389 print(instructions)
390 }
391
392 WriteProvenance(distinct_hosts, distinct_shells, out_dir)
393
394 raw_data_table = tibble(
395 filename = basename(as.character(raw_data$path)),
396 filename_HREF = benchmarkDataLink('osh-parser', filename, '')
397 )
398 #print(raw_data_table)
399
400 writeCsv(raw_data_table, file.path(out_dir, 'raw-data'))
401
402 precision = SamePrecision(0) # lines per ms
403 writeCsv(times_summary, file.path(out_dir, 'summary'), precision)
404
405 precision = ColumnPrecision(list(), default = 1)
406 writeTsv(cachegrind_summary, file.path(out_dir, 'cachegrind_summary'), precision)
407
408 if (!is.null(times_flat)) {
409 precision = SamePrecision(0)
410 writeTsv(times_flat, file.path(out_dir, 'times_flat'), precision)
411 }
412
413 if (!is.null(cachegrind_flat)) {
414 precision = SamePrecision(0)
415 writeTsv(cachegrind_flat, file.path(out_dir, 'cachegrind_flat'), precision)
416 }
417
418 if (!is.null(elapsed)) { # equivalent to no-host
419 # Round to nearest millisecond, but the ratio has a decimal point.
420 precision = ColumnPrecision(list(osh_to_bash_ratio = 1), default = 0)
421 writeCsv(elapsed, file.path(out_dir, 'elapsed'), precision)
422
423 precision = SamePrecision(0)
424 writeCsv(rate, file.path(out_dir, 'rate'), precision)
425
426 writeCsv(max_rss, file.path(out_dir, 'max_rss'))
427
428 precision = SamePrecision(1)
429 writeTsv(instructions, file.path(out_dir, 'instructions'), precision)
430 }
431
432 Log('Wrote %s', out_dir)
433}
434
435WriteProvenance = function(distinct_hosts, distinct_shells, out_dir, tsv = F) {
436
437 num_hosts = nrow(distinct_hosts)
438 if (num_hosts == 1) {
439 linkify = provenanceLink
440 } else {
441 linkify = benchmarkDataLink
442 }
443
444 Log('distinct_hosts')
445 print(distinct_hosts)
446 Log('')
447
448 Log('distinct_shells')
449 print(distinct_shells)
450 Log('')
451
452 # Should be:
453 # host_id_url
454 # And then csv_to_html will be smart enough? It should take --url flag?
455 host_table = tibble(
456 host_label = distinct_hosts$host_label,
457 host_id = paste(distinct_hosts$host_name,
458 distinct_hosts$host_hash, sep='-'),
459 host_id_HREF = linkify('host-id', host_id, '/')
460 )
461 Log('host_table')
462 print(host_table)
463 Log('')
464
465 shell_table = tibble(
466 shell_label = distinct_shells$shell_label,
467 shell_id = paste(distinct_shells$shell_name,
468 distinct_shells$shell_hash, sep='-'),
469 shell_id_HREF = linkify('shell-id', shell_id, '/')
470 )
471
472 Log('shell_table')
473 print(shell_table)
474 Log('')
475
476 if (tsv) {
477 writeTsv(host_table, file.path(out_dir, 'hosts'))
478 writeTsv(shell_table, file.path(out_dir, 'shells'))
479 } else {
480 writeCsv(host_table, file.path(out_dir, 'hosts'))
481 writeCsv(shell_table, file.path(out_dir, 'shells'))
482 }
483}
484
485WriteSimpleProvenance = function(provenance, out_dir) {
486 Log('provenance')
487 print(provenance)
488 Log('')
489
490 # Legacy: add $shell_name, because "$shell_basename-$shell_hash" is what
491 # benchmarks/id.sh publish-shell-id uses
492 provenance %>%
493 mutate(shell_name = basename(sh_path)) %>%
494 distinct(shell_label, shell_name, shell_hash) ->
495 distinct_shells
496
497 Log('distinct_shells')
498 print(distinct_shells)
499 Log('')
500
501 provenance %>% distinct(host_label, host_name, host_hash) -> distinct_hosts
502
503 WriteProvenance(distinct_hosts, distinct_shells, out_dir, tsv = T)
504}
505
506RuntimeReport = function(in_dir, out_dir) {
507 times = readTsv(file.path(in_dir, 'times.tsv'))
508
509 gc_stats = readTsv(file.path(in_dir, 'gc_stats.tsv'))
510 provenance = readTsv(file.path(in_dir, 'provenance.tsv'))
511
512 times %>% filter(status != 0) -> failed
513 if (nrow(failed) != 0) {
514 print(failed)
515 stop('Some osh-runtime tasks failed')
516 }
517
518 # Joins:
519 # times <= sh_path => provenance
520 # times <= join_id, host_name => gc_stats
521
522 # TODO: provenance may have rows from 2 machines. Could validate them and
523 # deduplicate.
524
525 # It should have (host_label, host_name, host_hash)
526 # (shell_label, sh_path, shell_hash)
527 provenance %>%
528 mutate(host_label = host_name, shell_label = ShellLabelFromPath(sh_path)) ->
529 provenance
530
531 provenance %>% distinct(sh_path, shell_label) -> label_lookup
532
533 Log('label_lookup')
534 print(label_lookup)
535
536 # Join with provenance for host label and shell label
537 times %>%
538 select(c(elapsed_secs, user_secs, sys_secs, max_rss_KiB, task_id,
539 host_name, sh_path, workload)) %>%
540 mutate(elapsed_ms = elapsed_secs * 1000,
541 user_ms = user_secs * 1000,
542 sys_ms = sys_secs * 1000,
543 max_rss_MB = max_rss_KiB * 1024 / 1e6) %>%
544 select(-c(elapsed_secs, user_secs, sys_secs, max_rss_KiB)) %>%
545 left_join(label_lookup, by = c('sh_path')) %>%
546 select(-c(sh_path)) %>%
547 # we want to compare workloads on adjacent rows
548 arrange(workload) ->
549 details
550
551 times %>%
552 select(c(task_id, host_name, sh_path, workload, minor_faults, major_faults, swaps, in_block, out_block, signals, voluntary_ctx, involuntary_ctx)) %>%
553 left_join(label_lookup, by = c('sh_path')) %>%
554 select(-c(sh_path)) %>%
555 # we want to compare workloads on adjacent rows
556 arrange(workload) ->
557 details_io
558
559 Log('details')
560 print(details)
561
562 # Hack for release. TODO: unify with Soil
563 if (Sys.getenv("OILS_NO_SOUFFLE") == "") {
564 souffle_col = c('osh-native-souffle')
565 } else {
566 souffle_col = c()
567 }
568
569 cols2 = c('workload', 'host_name',
570 'bash', 'dash', 'osh-cpython', 'osh-native', souffle_col,
571 'py_bash_ratio', 'native_bash_ratio')
572
573 # Elapsed time comparison
574 details %>%
575 select(-c(task_id, user_ms, sys_ms, max_rss_MB)) %>%
576 spread(key = shell_label, value = elapsed_ms) %>%
577 mutate(py_bash_ratio = `osh-cpython` / bash) %>%
578 mutate(native_bash_ratio = `osh-native` / bash) %>%
579 arrange(workload, host_name) %>%
580 select(all_of(cols2)) ->
581 elapsed
582
583 Log('elapsed')
584 print(elapsed)
585
586 # Minor Page Faults Comparison
587 details_io %>%
588 select(c(host_name, shell_label, workload, minor_faults)) %>%
589 spread(key = shell_label, value = minor_faults) %>%
590 mutate(py_bash_ratio = `osh-cpython` / bash) %>%
591 mutate(native_bash_ratio = `osh-native` / bash) %>%
592 arrange(workload, host_name) %>%
593 select(all_of(cols2)) ->
594 page_faults
595
596 Log('page_faults')
597 print(page_faults)
598
599 # Max RSS comparison
600 details %>%
601 select(c(host_name, shell_label, workload, max_rss_MB)) %>%
602 spread(key = shell_label, value = max_rss_MB) %>%
603 mutate(py_bash_ratio = `osh-cpython` / bash) %>%
604 mutate(native_bash_ratio = `osh-native` / bash) %>%
605 arrange(workload, host_name) %>%
606 select(all_of(cols2)) ->
607 max_rss
608
609 Log('max rss')
610 print(max_rss)
611
612 details %>%
613 select(c(task_id, host_name, workload, elapsed_ms, max_rss_MB)) %>%
614 mutate(join_id = sprintf("gc-%d", task_id)) %>%
615 select(-c(task_id)) ->
616 gc_details
617
618 Log('GC details')
619 print(gc_details)
620 Log('')
621
622 Log('GC stats')
623 print(gc_stats)
624 Log('')
625
626 gc_stats %>%
627 left_join(gc_details, by = c('join_id', 'host_name')) %>%
628 select(-c(join_id, roots_capacity, objs_capacity)) %>%
629 # Do same transformations as GcReport()
630 mutate(allocated_MB = bytes_allocated / 1e6) %>%
631 select(-c(bytes_allocated)) %>%
632 rename(num_gc_done = num_collections) %>%
633 # Put these columns first
634 relocate(workload, host_name,
635 elapsed_ms, max_gc_millis, total_gc_millis,
636 allocated_MB, max_rss_MB, num_allocated) ->
637 gc_stats
638
639 Log('After GC stats')
640 print(gc_stats)
641 Log('')
642
643 WriteSimpleProvenance(provenance, out_dir)
644
645 # milliseconds don't need decimal digit
646 precision = ColumnPrecision(list(bash = 0, dash = 0, `osh-cpython` = 0,
647 `osh-native` = 0, `osh-native-souffle` = 0, py_bash_ratio = 2,
648 native_bash_ratio = 2))
649 writeTsv(elapsed, file.path(out_dir, 'elapsed'), precision)
650 writeTsv(page_faults, file.path(out_dir, 'page_faults'), precision)
651
652 precision2 = ColumnPrecision(list(py_bash_ratio = 2, native_bash_ratio = 2))
653 writeTsv(max_rss, file.path(out_dir, 'max_rss'), precision2)
654
655 precision3 = ColumnPrecision(list(max_rss_MB = 1, allocated_MB = 1),
656 default = 0)
657 writeTsv(gc_stats, file.path(out_dir, 'gc_stats'), precision3)
658
659 writeTsv(details, file.path(out_dir, 'details'), precision3)
660 writeTsv(details_io, file.path(out_dir, 'details_io'))
661
662 Log('Wrote %s', out_dir)
663}
664
665VmBaselineReport = function(in_dir, out_dir) {
666 vm = readTsv(file.path(in_dir, 'vm-baseline.tsv'))
667 #print(vm)
668
669 # Not using DistinctHosts() because field host_hash isn't collected
670 num_hosts = nrow(vm %>% distinct(host))
671
672 vm %>%
673 rename(kib = metric_value) %>%
674 mutate(shell_label = ShellLabels(shell_name, shell_hash, num_hosts),
675 megabytes = kib * 1024 / 1e6) %>%
676 select(-c(shell_name, kib)) %>%
677 spread(key = c(metric_name), value = megabytes) %>%
678 rename(VmPeak_MB = VmPeak, VmRSS_MB = VmRSS) %>%
679 select(c(shell_label, shell_hash, host, VmRSS_MB, VmPeak_MB)) %>%
680 arrange(shell_label, shell_hash, host, VmPeak_MB) ->
681 vm
682
683 print(vm)
684
685 writeTsv(vm, file.path(out_dir, 'vm-baseline'))
686}
687
688WriteOvmBuildDetails = function(distinct_hosts, distinct_compilers, out_dir) {
689 host_table = tibble(
690 host_label = distinct_hosts$host_label,
691 host_id = paste(distinct_hosts$host_name,
692 distinct_hosts$host_hash, sep='-'),
693 host_id_HREF = benchmarkDataLink('host-id', host_id, '/')
694 )
695 print(host_table)
696
697 dc = distinct_compilers
698 compiler_table = tibble(
699 compiler_label = dc$compiler_label,
700 compiler_id = paste(dc$compiler_label, dc$compiler_hash, sep='-'),
701 compiler_id_HREF = benchmarkDataLink('compiler-id', compiler_id, '/')
702 )
703 print(compiler_table)
704
705 writeTsv(host_table, file.path(out_dir, 'hosts'))
706 writeTsv(compiler_table, file.path(out_dir, 'compilers'))
707}
708
709OvmBuildReport = function(in_dir, out_dir) {
710 times = readTsv(file.path(in_dir, 'times.tsv'))
711 native_sizes = readTsv(file.path(in_dir, 'native-sizes.tsv'))
712 #raw_data = readTsv(file.path(in_dir, 'raw-data.tsv'))
713
714 times %>% filter(status != 0) -> failed
715 if (nrow(failed) != 0) {
716 print(failed)
717 stop('Some ovm-build tasks failed')
718 }
719
720 times %>% distinct(host_name, host_hash) -> distinct_hosts
721 distinct_hosts$host_label = distinct_hosts$host_name
722
723 times %>% distinct(compiler_path, compiler_hash) -> distinct_compilers
724 distinct_compilers$compiler_label = basename(distinct_compilers$compiler_path)
725
726 #print(distinct_hosts)
727 #print(distinct_compilers)
728
729 WriteOvmBuildDetails(distinct_hosts, distinct_compilers, out_dir)
730
731 times %>%
732 select(-c(status)) %>%
733 left_join(distinct_hosts, by = c('host_name', 'host_hash')) %>%
734 left_join(distinct_compilers, by = c('compiler_path', 'compiler_hash')) %>%
735 select(-c(host_name, host_hash, compiler_path, compiler_hash)) %>%
736 mutate(src_dir = basename(src_dir),
737 host_label = paste("host ", host_label),
738 is_conf = str_detect(action, 'configure'),
739 is_ovm = str_detect(action, 'oil.ovm'),
740 is_dbg = str_detect(action, 'dbg'),
741 ) %>%
742 select(host_label, src_dir, compiler_label, action, is_conf, is_ovm, is_dbg,
743 elapsed_secs) %>%
744 spread(key = c(host_label), value = elapsed_secs) %>%
745 arrange(src_dir, compiler_label, desc(is_conf), is_ovm, desc(is_dbg)) %>%
746 select(-c(is_conf, is_ovm, is_dbg)) ->
747 times
748
749 #print(times)
750
751 # paths look like _tmp/ovm-build/bin/clang/oils_cpp.stripped
752 native_sizes %>%
753 select(c(host_label, path, num_bytes)) %>%
754 mutate(host_label = paste("host ", host_label),
755 binary = basename(path),
756 compiler = basename(dirname(path)),
757 ) %>%
758 select(-c(path)) %>%
759 spread(key = c(host_label), value = num_bytes) %>%
760 arrange(compiler, binary) ->
761 native_sizes
762
763 # NOTE: These don't have the host and compiler.
764 writeTsv(times, file.path(out_dir, 'times'))
765 writeTsv(native_sizes, file.path(out_dir, 'native-sizes'))
766
767 # TODO: I want a size report too
768 #writeCsv(sizes, file.path(out_dir, 'sizes'))
769}
770
771unique_stdout_md5sum = function(t, num_expected) {
772 u = n_distinct(t$stdout_md5sum)
773 if (u != num_expected) {
774 t %>% select(c(host_name, task_name, arg1, arg2, runtime_name, stdout_md5sum)) %>% print()
775 stop(sprintf('Expected %d unique md5sums, got %d', num_expected, u))
776 }
777}
778
779ComputeReport = function(in_dir, out_dir) {
780 # TSV file, not CSV
781 times = read.table(file.path(in_dir, 'times.tsv'), header=T)
782 print(times)
783
784 times %>% filter(status != 0) -> failed
785 if (nrow(failed) != 0) {
786 print(failed)
787 stop('Some compute tasks failed')
788 }
789
790 #
791 # Check correctness
792 #
793
794 times %>% filter(task_name == 'hello') %>% unique_stdout_md5sum(1)
795 times %>% filter(task_name == 'fib') %>% unique_stdout_md5sum(1)
796 times %>% filter(task_name == 'for_loop') %>% unique_stdout_md5sum(1)
797 times %>% filter(task_name == 'control_flow') %>% unique_stdout_md5sum(1)
798 times %>% filter(task_name == 'word_freq') %>% unique_stdout_md5sum(1)
799 # 3 different inputs
800 times %>% filter(task_name == 'parse_help') %>% unique_stdout_md5sum(3)
801
802 times %>% filter(task_name == 'bubble_sort') %>% unique_stdout_md5sum(2)
803
804 # TODO:
805 # - oils_cpp doesn't implement unicode LANG=C
806 # - bash behaves differently on your desktop vs. in the container
807 # - might need layer-locales in the image?
808
809 #times %>% filter(task_name == 'palindrome' & arg1 == 'unicode') %>% unique_stdout_md5sum(1)
810 # Ditto here
811 #times %>% filter(task_name == 'palindrome' & arg1 == 'bytes') %>% unique_stdout_md5sum(1)
812
813 #
814 # Find distinct shells and hosts, and label them for readability.
815 #
816
817 # Runtimes are called shells, as a hack for code reuse
818 times %>%
819 mutate(shell_name = runtime_name, shell_hash = runtime_hash) %>%
820 select(c(host_name, host_hash, shell_name, shell_hash)) ->
821 tmp
822
823 distinct_hosts = DistinctHosts(tmp)
824 Log('')
825 Log('Distinct hosts')
826 print(distinct_hosts)
827
828 distinct_shells = DistinctShells(tmp)
829 Log('')
830 Log('Distinct runtimes')
831 print(distinct_shells)
832
833 num_hosts = nrow(distinct_hosts)
834
835 times %>%
836 select(-c(status, stdout_md5sum, stdout_filename, host_hash, runtime_hash)) %>%
837 mutate(runtime_label = ShellLabels(runtime_name, runtime_hash, num_hosts),
838 elapsed_ms = elapsed_secs * 1000,
839 user_ms = user_secs * 1000,
840 sys_ms = sys_secs * 1000,
841 max_rss_MB = max_rss_KiB * 1024 / 1e6) %>%
842 select(-c(runtime_name, elapsed_secs, user_secs, sys_secs, max_rss_KiB)) %>%
843 arrange(host_name, task_name, arg1, arg2, user_ms) ->
844 details
845
846 times %>%
847 mutate(
848 runtime_label = ShellLabels(runtime_name, runtime_hash, num_hosts),
849 stdout_md5sum_HREF = file.path('tmp', task_name, stdout_filename)) %>%
850 select(c(host_name, task_name, arg1, arg2, runtime_label,
851 stdout_md5sum, stdout_md5sum_HREF)) ->
852 stdout_files
853
854 details %>% filter(task_name == 'hello') %>% select(-c(task_name)) -> hello
855 details %>% filter(task_name == 'fib') %>% select(-c(task_name)) -> fib
856 details %>% filter(task_name == 'for_loop') %>% select(-c(task_name)) -> for_loop
857 details %>% filter(task_name == 'control_flow') %>% select(-c(task_name)) -> control_flow
858 details %>% filter(task_name == 'word_freq') %>% select(-c(task_name)) -> word_freq
859 # There's no arg2
860 details %>% filter(task_name == 'parse_help') %>% select(-c(task_name, arg2)) -> parse_help
861
862 details %>% filter(task_name == 'bubble_sort') %>% select(-c(task_name)) -> bubble_sort
863 details %>% filter(task_name == 'palindrome' & arg1 == 'unicode') %>% select(-c(task_name)) -> palindrome
864
865 precision = ColumnPrecision(list(max_rss_MB = 1), default = 0)
866 writeTsv(details, file.path(out_dir, 'details'), precision)
867
868 writeTsv(stdout_files, file.path(out_dir, 'stdout_files'), precision)
869
870 writeTsv(hello, file.path(out_dir, 'hello'), precision)
871 writeTsv(fib, file.path(out_dir, 'fib'), precision)
872 writeTsv(word_freq, file.path(out_dir, 'word_freq'), precision)
873 writeTsv(for_loop, file.path(out_dir, 'for_loop'), precision)
874 writeTsv(control_flow, file.path(out_dir, 'control_flow'), precision)
875 writeTsv(parse_help, file.path(out_dir, 'parse_help'), precision)
876
877 writeTsv(bubble_sort, file.path(out_dir, 'bubble_sort'), precision)
878 writeTsv(palindrome, file.path(out_dir, 'palindrome'), precision)
879
880 WriteProvenance(distinct_hosts, distinct_shells, out_dir, tsv = T)
881}
882
883WriteOneTask = function(times, out_dir, task_name, precision) {
884 times %>%
885 filter(task == task_name) %>%
886 select(-c(task)) -> subset
887
888 writeTsv(subset, file.path(out_dir, task_name), precision)
889}
890
891SHELL_ORDER = c('dash',
892 'bash',
893 'zsh',
894 '_bin/cxx-opt+bumpleak/osh',
895 '_bin/cxx-opt+bumproot/osh',
896 '_bin/cxx-opt+bumpsmall/osh',
897 '_bin/cxx-opt/osh',
898 '_bin/cxx-opt+nopool/osh')
899
900GcReport = function(in_dir, out_dir) {
901 times = read.table(file.path(in_dir, 'raw/times.tsv'), header=T)
902 gc_stats = read.table(file.path(in_dir, 'stage1/gc_stats.tsv'), header=T)
903
904 times %>% filter(status != 0) -> failed
905 if (nrow(failed) != 0) {
906 print(failed)
907 stop('Some gc tasks failed')
908 }
909
910 # Change units and order columns
911 times %>%
912 arrange(task, factor(sh_path, levels = SHELL_ORDER)) %>%
913 mutate(elapsed_ms = elapsed_secs * 1000,
914 user_ms = user_secs * 1000,
915 sys_ms = sys_secs * 1000,
916 max_rss_MB = max_rss_KiB * 1024 / 1e6,
917 shell_label = ShellLabelFromPath(sh_path)
918 ) %>%
919 select(c(join_id, task, elapsed_ms, user_ms, sys_ms, max_rss_MB, shell_label,
920 shell_runtime_opts)) ->
921 times
922
923 # Join and order columns
924 gc_stats %>% left_join(times, by = c('join_id')) %>%
925 arrange(desc(task)) %>%
926 mutate(allocated_MB = bytes_allocated / 1e6) %>%
927 # try to make the table skinnier
928 rename(num_gc_done = num_collections) %>%
929 select(task, elapsed_ms, max_gc_millis, total_gc_millis,
930 allocated_MB, max_rss_MB, num_allocated,
931 num_gc_points, num_gc_done, gc_threshold, num_growths, max_survived,
932 shell_label) ->
933 gc_stats
934
935 times %>% select(-c(join_id)) -> times
936
937
938 precision = ColumnPrecision(list(max_rss_MB = 1, allocated_MB = 1),
939 default = 0)
940
941 writeTsv(times, file.path(out_dir, 'times'), precision)
942 writeTsv(gc_stats, file.path(out_dir, 'gc_stats'), precision)
943
944 tasks = c('parse.configure-coreutils',
945 'parse.configure-cpython',
946 'parse.abuild',
947 'ex.compute-fib',
948 'ex.bashcomp-parse-help',
949 'ex.abuild-print-help')
950 # Write out separate rows
951 for (task in tasks) {
952 WriteOneTask(times, out_dir, task, precision)
953 }
954}
955
956GcCachegrindReport = function(in_dir, out_dir) {
957 times = readTsv(file.path(in_dir, 'raw/times.tsv'))
958 counts = readTsv(file.path(in_dir, 'stage1/cachegrind.tsv'))
959
960 times %>% filter(status != 0) -> failed
961 if (nrow(failed) != 0) {
962 print(failed)
963 stop('Some gc tasks failed')
964 }
965
966 print(times)
967 print(counts)
968
969 counts %>% left_join(times, by = c('join_id')) %>%
970 mutate(million_irefs = irefs / 1e6) %>%
971 select(c(million_irefs, task, sh_path, shell_runtime_opts)) %>%
972 arrange(factor(sh_path, levels = SHELL_ORDER)) ->
973 counts
974
975 precision = NULL
976 tasks = c('parse.abuild', 'ex.compute-fib')
977 for (task in tasks) {
978 WriteOneTask(counts, out_dir, task, precision)
979 }
980}
981
982MyCppReport = function(in_dir, out_dir) {
983 times = readTsv(file.path(in_dir, 'benchmark-table.tsv'))
984 print(times)
985
986 times %>% filter(status != 0) -> failed
987 if (nrow(failed) != 0) {
988 print(failed)
989 stop('Some mycpp tasks failed')
990 }
991
992 # Don't care about elapsed and system
993 times %>% select(-c(status, elapsed_secs, bin, task_out)) %>%
994 mutate(example_name_HREF = mycppUrl(example_name),
995 gen = c('gen'),
996 gen_HREF = genUrl(example_name),
997 user_ms = user_secs * 1000,
998 sys_ms = sys_secs * 1000,
999 max_rss_MB = max_rss_KiB * 1024 / 1e6) %>%
1000 select(-c(user_secs, sys_secs, max_rss_KiB)) ->
1001 details
1002
1003 details %>% select(-c(sys_ms, max_rss_MB)) %>%
1004 spread(key = impl, value = user_ms) %>%
1005 mutate(`C++ : Python` = `C++` / Python) %>%
1006 arrange(`C++ : Python`) ->
1007 user_time
1008
1009 details %>% select(-c(user_ms, max_rss_MB)) %>%
1010 spread(key = impl, value = sys_ms) %>%
1011 mutate(`C++ : Python` = `C++` / Python) %>%
1012 arrange(`C++ : Python`) ->
1013 sys_time
1014
1015 details %>% select(-c(user_ms, sys_ms)) %>%
1016 spread(key = impl, value = max_rss_MB) %>%
1017 mutate(`C++ : Python` = `C++` / Python) %>%
1018 arrange(`C++ : Python`) ->
1019 max_rss
1020
1021 # Sometimes it speeds up by more than 10x
1022 precision1 = ColumnPrecision(list(`C++ : Python` = 3), default = 0)
1023 writeTsv(user_time, file.path(out_dir, 'user_time'), precision1)
1024 writeTsv(sys_time, file.path(out_dir, 'sys_time'), precision1)
1025
1026 precision2 = ColumnPrecision(list(`C++ : Python` = 2), default = 1)
1027 writeTsv(max_rss, file.path(out_dir, 'max_rss'), precision2)
1028
1029 writeTsv(details, file.path(out_dir, 'details'))
1030}
1031
1032UftraceTaskReport = function(env, task_name, summaries) {
1033 # Need this again after redirect
1034 MaybeDisableColor(stdout())
1035
1036 task_env = env[[task_name]]
1037
1038 untyped = task_env$untyped
1039 typed = task_env$typed
1040 strings = task_env$strings
1041 slabs = task_env$slabs
1042 reserve = task_env$reserve
1043
1044 string_overhead = 17 # GC header (8) + len (4) + hash value (4) + NUL (1)
1045 strings %>% mutate(obj_len = str_len + string_overhead) -> strings
1046
1047 # TODO: Output these totals PER WORKLOAD, e.g. parsing big/small, executing
1048 # big/small
1049 #
1050 # And then zoom in on distributions as well
1051
1052 num_allocs = nrow(untyped)
1053 total_bytes = sum(untyped$obj_len)
1054
1055 untyped %>% group_by(obj_len) %>% count() %>% ungroup() -> untyped_hist
1056 #print(untyped_hist)
1057
1058 untyped_hist %>%
1059 mutate(n_less_than = cumsum(n),
1060 percent = n_less_than * 100.0 / num_allocs) ->
1061 alloc_sizes
1062
1063 a24 = untyped_hist %>% filter(obj_len <= 24)
1064 a48 = untyped_hist %>% filter(obj_len <= 48)
1065 a96 = untyped_hist %>% filter(obj_len <= 96)
1066
1067 allocs_24_bytes_or_less = sum(a24$n) * 100.0 / num_allocs
1068 allocs_48_bytes_or_less = sum(a48$n) * 100.0 / num_allocs
1069 allocs_96_bytes_or_less = sum(a96$n) * 100.0 / num_allocs
1070
1071 Log('Percentage of allocs less than 48 bytes: %.1f', allocs_48_bytes_or_less)
1072
1073 options(tibble.print_min=25)
1074
1075 Log('')
1076 Log('All allocations')
1077 print(alloc_sizes %>% head(22))
1078 print(alloc_sizes %>% tail(5))
1079
1080 Log('')
1081 Log('Common Sizes')
1082 print(untyped_hist %>% arrange(desc(n)) %>% head(8))
1083
1084 Log('')
1085 Log(' %s total allocations, total bytes = %s', commas(num_allocs), commas(total_bytes))
1086 Log('')
1087
1088 Log('Typed allocations')
1089
1090 num_typed = nrow(typed)
1091
1092 typed %>% group_by(func_name) %>% count() %>% ungroup() %>%
1093 mutate(percent = n * 100.0 / num_typed) %>%
1094 arrange(desc(n)) -> most_common_types
1095
1096 print(most_common_types %>% head(20))
1097 print(most_common_types %>% tail(5))
1098
1099 lists = typed %>% filter(str_starts(func_name, ('List<')))
1100 #print(lists)
1101
1102 num_lists = nrow(lists)
1103 total_list_bytes = num_lists * 24 # sizeof List<T> head is hard-coded
1104
1105 Log('')
1106 Log('%s typed allocs, including %s List<T>', commas(num_typed), commas(num_lists))
1107 Log('%.2f%% of allocs are typed', num_typed * 100 / num_allocs)
1108 Log('')
1109
1110 #
1111 # Strings
1112 #
1113
1114 num_strings = nrow(strings)
1115 total_string_bytes = sum(strings$obj_len)
1116
1117 strings %>% group_by(str_len) %>% count() %>% ungroup() %>%
1118 mutate(n_less_than = cumsum(n),
1119 percent = n_less_than * 100.0 / num_strings) ->
1120 string_lengths
1121
1122 strs_6_bytes_or_less = string_lengths %>% filter(str_len == 6) %>% select(percent)
1123 strs_14_bytes_or_less = string_lengths %>% filter(str_len == 14) %>% select(percent)
1124
1125 # Parse workload
1126 # 62% of strings <= 6 bytes
1127 # 84% of strings <= 14 bytes
1128
1129 Log('Str - NewStr() and OverAllocatedStr()')
1130 print(string_lengths %>% head(16))
1131 print(string_lengths %>% tail(5))
1132 Log('')
1133
1134 Log('%s string allocations, total length = %s, total bytes = %s', commas(num_strings),
1135 commas(sum(strings$str_len)), commas(total_string_bytes))
1136 Log('')
1137 Log('%.2f%% of allocs are strings', num_strings * 100 / num_allocs)
1138 Log('%.2f%% of bytes are strings', total_string_bytes * 100 / total_bytes)
1139 Log('')
1140
1141 #
1142 # Slabs
1143 #
1144
1145 Log('NewSlab()')
1146
1147 num_slabs = nrow(slabs)
1148 slabs %>% group_by(slab_len) %>% count() %>% ungroup() %>%
1149 mutate(n_less_than = cumsum(n),
1150 percent = n_less_than * 100.0 / num_slabs) ->
1151 slab_lengths
1152
1153 slabs %>% group_by(func_name) %>% count() %>% ungroup() %>%
1154 arrange(desc(n)) -> slab_types
1155
1156 Log(' Lengths')
1157 print(slab_lengths %>% head())
1158 print(slab_lengths %>% tail(5))
1159 Log('')
1160
1161 Log(' Slab Types')
1162 print(slab_types %>% head())
1163 print(slab_types %>% tail(5))
1164 Log('')
1165
1166 total_slab_items = sum(slabs$slab_len)
1167
1168 Log('%s slabs, total items = %s', commas(num_slabs),
1169 commas(sum(slabs$slab_len)))
1170 Log('%.2f%% of allocs are slabs', num_slabs * 100 / num_allocs)
1171 Log('')
1172
1173 #
1174 # reserve() calls
1175 #
1176
1177 # There should be strictly more List::reserve() calls than NewSlab
1178
1179 Log('::reserve(int n)')
1180 Log('')
1181
1182 num_reserve = nrow(reserve)
1183 reserve %>% group_by(num_items) %>% count() %>% ungroup() %>%
1184 mutate(n_less_than = cumsum(n),
1185 percent = n_less_than * 100.0 / num_reserve) ->
1186 reserve_args
1187
1188 Log(' Num Items')
1189 print(reserve_args %>% head(15))
1190 print(reserve_args %>% tail(5))
1191 Log('')
1192
1193 Log('%s reserve() calls, total items = %s', commas(num_reserve),
1194 commas(sum(reserve$num_items)))
1195 Log('')
1196
1197 # Accounting for all allocations!
1198 Log('Untyped: %s', commas(num_allocs))
1199 Log('Typed + Str + Slab: %s', commas(num_typed + num_strings + num_slabs))
1200 Log('')
1201
1202 num_other_typed = num_typed - num_lists
1203
1204 # Summary table
1205 stats = tibble(task = task_name,
1206 total_bytes_ = commas(total_bytes),
1207 num_allocs_ = commas(num_allocs),
1208 sum_typed_strs_slabs = commas(num_typed + num_strings + num_slabs),
1209 num_reserve_calls = commas(num_reserve),
1210
1211 percent_list_allocs = Percent(num_lists, num_allocs),
1212 percent_slab_allocs = Percent(num_slabs, num_allocs),
1213 percent_string_allocs = Percent(num_strings, num_allocs),
1214 percent_other_typed_allocs = Percent(num_other_typed, num_allocs),
1215
1216 percent_list_bytes = Percent(total_list_bytes, total_bytes),
1217 percent_string_bytes = Percent(total_string_bytes, total_bytes),
1218
1219 allocs_24_bytes_or_less = sprintf('%.1f%%', allocs_24_bytes_or_less),
1220 allocs_48_bytes_or_less = sprintf('%.1f%%', allocs_48_bytes_or_less),
1221 allocs_96_bytes_or_less = sprintf('%.1f%%', allocs_96_bytes_or_less),
1222
1223 strs_6_bytes_or_less = sprintf('%.1f%%', strs_6_bytes_or_less),
1224 strs_14_bytes_or_less = sprintf('%.1f%%', strs_14_bytes_or_less),
1225 )
1226 summaries$stats[[task_name]] = stats
1227
1228 summaries$most_common_types[[task_name]] = most_common_types
1229}
1230
1231LoadUftraceTsv = function(in_dir, env) {
1232 for (task in list.files(in_dir)) {
1233 Log('Loading data for task %s', task)
1234 base_dir = file.path(in_dir, task)
1235
1236 task_env = new.env()
1237 env[[task]] = task_env
1238
1239 # TSV file, not CSV
1240 task_env$untyped = readTsv(file.path(base_dir, 'all-untyped.tsv'))
1241 task_env$typed = readTsv(file.path(base_dir, 'typed.tsv'))
1242 task_env$strings = readTsv(file.path(base_dir, 'strings.tsv'))
1243 task_env$slabs = readTsv(file.path(base_dir, 'slabs.tsv'))
1244 task_env$reserve = readTsv(file.path(base_dir, 'reserve.tsv'))
1245
1246 # median string length is 4, mean is 9.5!
1247 Log('UNTYPED')
1248 print(summary(task_env$untyped))
1249 Log('')
1250
1251 Log('TYPED')
1252 print(summary(task_env$typed))
1253 Log('')
1254
1255 Log('STRINGS')
1256 print(summary(task_env$strings))
1257 Log('')
1258
1259 Log('SLABS')
1260 print(summary(task_env$slabs))
1261 Log('')
1262
1263 Log('RESERVE')
1264 print(summary(task_env$reserve))
1265 Log('')
1266 }
1267}
1268
1269Percent = function(n, total) {
1270 sprintf('%.1f%%', n * 100.0 / total)
1271}
1272
1273PrettyPrintLong = function(d) {
1274 tr = t(d) # transpose
1275
1276 row_names = rownames(tr)
1277
1278 for (i in 1:nrow(tr)) {
1279 row_name = row_names[i]
1280 cat(sprintf('%26s', row_name)) # calculated min width manually
1281 cat(sprintf('%20s', tr[i,]))
1282 cat('\n')
1283
1284 # Extra spacing
1285 if (row_name %in% c('num_reserve_calls',
1286 'percent_string_bytes',
1287 'percent_other_typed_allocs',
1288 'allocs_96_bytes_or_less')) {
1289 cat('\n')
1290 }
1291 }
1292}
1293
1294
1295UftraceReport = function(env, out_dir) {
1296 # summaries$stats should be a list of 1-row data frames
1297 # summaries$top_types should be a list of types
1298 summaries = new.env()
1299
1300 for (task_name in names(env)) {
1301 report_out = file.path(out_dir, paste0(task_name, '.txt'))
1302
1303 Log('Making report for task %s -> %s', task_name, report_out)
1304
1305 sink(file = report_out)
1306 UftraceTaskReport(env, task_name, summaries)
1307 sink() # reset
1308 }
1309 Log('')
1310
1311 # Concate all the data frames added to summary
1312 stats = bind_rows(as.list(summaries$stats))
1313
1314 sink(file = file.path(out_dir, 'summary.txt'))
1315 #print(stats)
1316 #Log('')
1317
1318 PrettyPrintLong(stats)
1319 Log('')
1320
1321 mct = summaries$most_common_types
1322 for (task_name in names(mct)) {
1323 Log('Common types in workload %s', task_name)
1324 Log('')
1325
1326 print(mct[[task_name]] %>% head(5))
1327 Log('')
1328 }
1329 sink()
1330
1331 # For the REPL
1332 return(list(stats = stats))
1333}
1334
1335main = function(argv) {
1336 action = argv[[1]]
1337 in_dir = argv[[2]]
1338 out_dir = argv[[3]]
1339
1340 if (action == 'osh-parser') {
1341 ParserReport(in_dir, out_dir)
1342
1343 } else if (action == 'osh-runtime') {
1344 RuntimeReport(in_dir, out_dir)
1345
1346 } else if (action == 'vm-baseline') {
1347 VmBaselineReport(in_dir, out_dir)
1348
1349 } else if (action == 'ovm-build') {
1350 OvmBuildReport(in_dir, out_dir)
1351
1352 } else if (action == 'compute') {
1353 ComputeReport(in_dir, out_dir)
1354
1355 } else if (action == 'gc') {
1356 GcReport(in_dir, out_dir)
1357
1358 } else if (action == 'gc-cachegrind') {
1359 GcCachegrindReport(in_dir, out_dir)
1360
1361 } else if (action == 'mycpp') {
1362 MyCppReport(in_dir, out_dir)
1363
1364 } else if (action == 'uftrace') {
1365 d = new.env()
1366 LoadUftraceTsv(in_dir, d)
1367 UftraceReport(d, out_dir)
1368
1369 } else {
1370 Log("Invalid action '%s'", action)
1371 quit(status = 1)
1372 }
1373 Log('PID %d done', Sys.getpid())
1374}
1375
1376if (length(sys.frames()) == 0) {
1377 # increase ggplot font size globally
1378 #theme_set(theme_grey(base_size = 20))
1379
1380 main(commandArgs(TRUE))
1381}