OILS / deps / wedge-report.sh View on Github | oils.pub

109 lines, 33 significant
1#!/usr/bin/env bash
2#
3# Report on wedge sizes for CI
4#
5# Usage:
6# deps/wedge-report.sh <function name>
7#
8# Examples:
9# deps/wedge-report.sh show
10
11: ${LIB_OSH=stdlib/osh}
12source $LIB_OSH/bash-strict.sh
13source $LIB_OSH/task-five.sh
14
15commas() {
16 # Wow I didn't know this :a trick
17 #
18 # OK this is a label and a loop, which makes sense. You can't do it with
19 # pure regex.
20 #
21 # https://shallowsky.com/blog/linux/cmdline/sed-improve-comma-insertion.html
22 # https://shallowsky.com/blog/linux/cmdline/sed-improve-comma-insertion.html
23 sed ':a;s/\b\([0-9]\+\)\([0-9]\{3\}\)\b/\1,\2/;ta'
24}
25
26wedge-sizes() {
27 local tmp=_tmp/wedge-sizes.txt
28
29 # -b is --bytes, but use short flag for busybox compat
30 du -s -b ../oils.DEPS/wedge/*/* | awk '
31 { print $0 # print the line
32 total_bytes += $1 # accumulate
33 }
34END { print total_bytes " TOTAL" }
35' > $tmp
36
37 # printf justifies du output
38 cat $tmp | commas | xargs -n 2 printf '%15s %s\n'
39 echo
40
41 #du -s --si /wedge/*/*/* ~/wedge/*/*/*
42 #echo
43}
44
45show() {
46 # 4 levels deep shows the package
47 if command -v tree > /dev/null; then
48 tree -L 4 ../oils.DEPS
49 echo
50 fi
51
52 wedge-sizes
53
54 local tmp=_tmp/wedge-manifest.txt
55
56 echo 'Biggest files'
57 if ! find ../oils.DEPS/wedge -type f -a -printf '%10s %P\n' > $tmp; then
58 # busybox find doesn't have -printf
59 echo 'find -printf failed'
60 return
61 fi
62
63 set +o errexit # ignore SIGPIPE
64 sort -n --reverse $tmp | head -n 20 | commas
65 set -o errexit
66
67 echo
68
69 # Show the most common file extensions
70 #
71 # I feel like we should be able to get rid of .a files? That's 92 MB, second
72 # most common
73 #
74 # There are also duplicate .a files for Python -- should look at how distros
75 # get rid of those
76
77 cat $tmp | python3 -c '
78import os, sys, collections
79
80bytes = collections.Counter()
81files = collections.Counter()
82
83for line in sys.stdin:
84 size, path = line.split(None, 1)
85 path = path.strip() # remove newline
86 _, ext = os.path.splitext(path)
87 size = int(size)
88
89 bytes[ext] += size
90 files[ext] += 1
91
92#print(bytes)
93#print(files)
94
95n = 20
96
97print("Most common file types")
98for ext, count in files.most_common()[:n]:
99 print("%10d %s" % (count, ext))
100
101print()
102
103print("Total bytes by file type")
104for ext, total_bytes in bytes.most_common()[:n]:
105 print("%10d %s" % (total_bytes, ext))
106' | commas
107}
108
109task-five "$@"