| 1 | #!/usr/bin/env bash
|
| 2 | #
|
| 3 | # Report on wedge sizes for CI
|
| 4 | #
|
| 5 | # Usage:
|
| 6 | # deps/wedge-report.sh <function name>
|
| 7 | #
|
| 8 | # Examples:
|
| 9 | # deps/wedge-report.sh show
|
| 10 |
|
| 11 | : ${LIB_OSH=stdlib/osh}
|
| 12 | source $LIB_OSH/bash-strict.sh
|
| 13 | source $LIB_OSH/task-five.sh
|
| 14 |
|
| 15 | commas() {
|
| 16 | # Wow I didn't know this :a trick
|
| 17 | #
|
| 18 | # OK this is a label and a loop, which makes sense. You can't do it with
|
| 19 | # pure regex.
|
| 20 | #
|
| 21 | # https://shallowsky.com/blog/linux/cmdline/sed-improve-comma-insertion.html
|
| 22 | # https://shallowsky.com/blog/linux/cmdline/sed-improve-comma-insertion.html
|
| 23 | sed ':a;s/\b\([0-9]\+\)\([0-9]\{3\}\)\b/\1,\2/;ta'
|
| 24 | }
|
| 25 |
|
| 26 | wedge-sizes() {
|
| 27 | local tmp=_tmp/wedge-sizes.txt
|
| 28 |
|
| 29 | # -b is --bytes, but use short flag for busybox compat
|
| 30 | du -s -b ../oils.DEPS/wedge/*/* | awk '
|
| 31 | { print $0 # print the line
|
| 32 | total_bytes += $1 # accumulate
|
| 33 | }
|
| 34 | END { print total_bytes " TOTAL" }
|
| 35 | ' > $tmp
|
| 36 |
|
| 37 | # printf justifies du output
|
| 38 | cat $tmp | commas | xargs -n 2 printf '%15s %s\n'
|
| 39 | echo
|
| 40 |
|
| 41 | #du -s --si /wedge/*/*/* ~/wedge/*/*/*
|
| 42 | #echo
|
| 43 | }
|
| 44 |
|
| 45 | show() {
|
| 46 | # 4 levels deep shows the package
|
| 47 | if command -v tree > /dev/null; then
|
| 48 | tree -L 4 ../oils.DEPS
|
| 49 | echo
|
| 50 | fi
|
| 51 |
|
| 52 | wedge-sizes
|
| 53 |
|
| 54 | local tmp=_tmp/wedge-manifest.txt
|
| 55 |
|
| 56 | echo 'Biggest files'
|
| 57 | if ! find ../oils.DEPS/wedge -type f -a -printf '%10s %P\n' > $tmp; then
|
| 58 | # busybox find doesn't have -printf
|
| 59 | echo 'find -printf failed'
|
| 60 | return
|
| 61 | fi
|
| 62 |
|
| 63 | set +o errexit # ignore SIGPIPE
|
| 64 | sort -n --reverse $tmp | head -n 20 | commas
|
| 65 | set -o errexit
|
| 66 |
|
| 67 | echo
|
| 68 |
|
| 69 | # Show the most common file extensions
|
| 70 | #
|
| 71 | # I feel like we should be able to get rid of .a files? That's 92 MB, second
|
| 72 | # most common
|
| 73 | #
|
| 74 | # There are also duplicate .a files for Python -- should look at how distros
|
| 75 | # get rid of those
|
| 76 |
|
| 77 | cat $tmp | python3 -c '
|
| 78 | import os, sys, collections
|
| 79 |
|
| 80 | bytes = collections.Counter()
|
| 81 | files = collections.Counter()
|
| 82 |
|
| 83 | for line in sys.stdin:
|
| 84 | size, path = line.split(None, 1)
|
| 85 | path = path.strip() # remove newline
|
| 86 | _, ext = os.path.splitext(path)
|
| 87 | size = int(size)
|
| 88 |
|
| 89 | bytes[ext] += size
|
| 90 | files[ext] += 1
|
| 91 |
|
| 92 | #print(bytes)
|
| 93 | #print(files)
|
| 94 |
|
| 95 | n = 20
|
| 96 |
|
| 97 | print("Most common file types")
|
| 98 | for ext, count in files.most_common()[:n]:
|
| 99 | print("%10d %s" % (count, ext))
|
| 100 |
|
| 101 | print()
|
| 102 |
|
| 103 | print("Total bytes by file type")
|
| 104 | for ext, total_bytes in bytes.most_common()[:n]:
|
| 105 | print("%10d %s" % (total_bytes, ext))
|
| 106 | ' | commas
|
| 107 | }
|
| 108 |
|
| 109 | task-five "$@"
|