deps/wedge-report.sh

OILS / deps / wedge-report.sh View on Github | oils.pub

109 lines, 33 significant

1	#!/usr/bin/env bash
2	#
3	# Report on wedge sizes for CI
4	#
5	# Usage:
6	# deps/wedge-report.sh <function name>
7	#
8	# Examples:
9	# deps/wedge-report.sh show
10
11	: ${LIB_OSH=stdlib/osh}
12	source $LIB_OSH/bash-strict.sh
13	source $LIB_OSH/task-five.sh
14
15	commas() {
16	# Wow I didn't know this :a trick
17	#
18	# OK this is a label and a loop, which makes sense. You can't do it with
19	# pure regex.
20	#
21	# https://shallowsky.com/blog/linux/cmdline/sed-improve-comma-insertion.html
22	# https://shallowsky.com/blog/linux/cmdline/sed-improve-comma-insertion.html
23	sed ':a;s/\b$[0-9]\+$$[0-9]\{3\}$\b/\1,\2/;ta'
24	}
25
26	wedge-sizes() {
27	local tmp=_tmp/wedge-sizes.txt
28
29	# -b is --bytes, but use short flag for busybox compat
30	du -s -b ../oils.DEPS/wedge// \| awk '
31	{ print $0 # print the line
32	total_bytes += $1 # accumulate
33	}
34	END { print total_bytes " TOTAL" }
35	' > $tmp
36
37	# printf justifies du output
38	cat $tmp \| commas \| xargs -n 2 printf '%15s %s\n'
39	echo
40
41	#du -s --si /wedge///* ~/wedge///*
42	#echo
43	}
44
45	show() {
46	# 4 levels deep shows the package
47	if command -v tree > /dev/null; then
48	tree -L 4 ../oils.DEPS
49	echo
50	fi
51
52	wedge-sizes
53
54	local tmp=_tmp/wedge-manifest.txt
55
56	echo 'Biggest files'
57	if ! find ../oils.DEPS/wedge -type f -a -printf '%10s %P\n' > $tmp; then
58	# busybox find doesn't have -printf
59	echo 'find -printf failed'
60	return
61	fi
62
63	set +o errexit # ignore SIGPIPE
64	sort -n --reverse $tmp \| head -n 20 \| commas
65	set -o errexit
66
67	echo
68
69	# Show the most common file extensions
70	#
71	# I feel like we should be able to get rid of .a files? That's 92 MB, second
72	# most common
73	#
74	# There are also duplicate .a files for Python -- should look at how distros
75	# get rid of those
76
77	cat $tmp \| python3 -c '
78	import os, sys, collections
79
80	bytes = collections.Counter()
81	files = collections.Counter()
82
83	for line in sys.stdin:
84	size, path = line.split(None, 1)
85	path = path.strip() # remove newline
86	_, ext = os.path.splitext(path)
87	size = int(size)
88
89	bytes[ext] += size
90	files[ext] += 1
91
92	#print(bytes)
93	#print(files)
94
95	n = 20
96
97	print("Most common file types")
98	for ext, count in files.most_common()[:n]:
99	print("%10d %s" % (count, ext))
100
101	print()
102
103	print("Total bytes by file type")
104	for ext, total_bytes in bytes.most_common()[:n]:
105	print("%10d %s" % (total_bytes, ext))
106	' \| commas
107	}
108
109	task-five "$@"