OILS / metrics / bytecode.sh View on Github | oils.pub

143 lines, 56 significant
1#!/usr/bin/env bash
2#
3# Metrics for Oil bytecode produced by the OPy compiler.
4#
5# This is more like a metric than a benchmark. In particular, we do NOT need
6# to run it on multiple machines! It doesn't need the provenance of binaries
7# and so forth.
8#
9# But it IS like a benchmark in that we use R to analyze data and want HTML
10# reports.
11#
12# NOTE: We will eventually have benchmarks for OPy compile time.
13#
14# Usage:
15# ./bytecode.sh <function name>
16
17set -o nounset
18set -o pipefail
19set -o errexit
20
21source build/dev-shell.sh # R_LIBS_USER
22source test/common.sh # log
23
24readonly BASE_DIR=_tmp/metrics/bytecode
25
26write-opcodes() {
27 # 119 ops?
28 PYTHONPATH=. python2 > _tmp/opcodes-defined.txt -c '
29from opy.lib import opcode
30names = sorted(opcode.opmap)
31for n in names:
32 print(n)
33'
34 wc -l _tmp/opcodes-defined.txt # 119 defined
35}
36
37# NOTE: We analyze ~76 bytecode files. This outputs produces 5 TSV2 files that
38# are ~131K rows in ~8.5 MB altogether. The biggest table is the 'ops' table.
39
40opy-dis-tables() {
41 local out_dir=$BASE_DIR/opy-dis-tables
42 mkdir -p $out_dir
43
44 # Pass the .pyc files in the bytecode-opy.zip file to 'opyc dis'
45
46 # The .pyc files look like _build/oil/bytecode-opy/os.pyc
47 time cat _build/oil/opy-app-deps.txt \
48 | awk ' $1 ~ /\.pyc$/ { print $1 }' \
49 | xargs -- bin/opyc dis-tables $out_dir
50
51 wc -l $out_dir/*.tsv2
52}
53
54# Hm it seems like build/prepare.sh build-python is necessary for this?
55cpython-dis-tables() {
56 local out_dir=$BASE_DIR/cpython-dis-tables
57 mkdir -p $out_dir
58 # The .py files look like /home/andy/git/oilshell/oil/Python-2.7.13/Lib/os.py
59 time cat _build/oil/opy-app-deps.txt \
60 | awk ' $1 ~ /\.py$/ { print $1 "c" }' \
61 | xargs -- bin/opyc dis-tables $out_dir
62
63 wc -l $out_dir/*.tsv2
64}
65
66# CPython:
67#
68# 9143 _tmp/metrics/bytecode/cpython/consts.tsv2
69# 3956 _tmp/metrics/bytecode/cpython/flags.tsv2
70# 1858 _tmp/metrics/bytecode/cpython/frames.tsv2
71# 19808 _tmp/metrics/bytecode/cpython/names.tsv2
72# 76504 _tmp/metrics/bytecode/cpython/ops.tsv2
73# 111269 total
74#
75# OPy:
76# 8338 _tmp/metrics/bytecode/consts.tsv2 # fewer docstrings?
77# 3909 _tmp/metrics/bytecode/flags.tsv2
78# 1857 _tmp/metrics/bytecode/frames.tsv2
79# 35609 _tmp/metrics/bytecode/names.tsv2
80# 80396 _tmp/metrics/bytecode/ops.tsv2
81# 130109 total
82#
83# Yes I see there is bug in the names.
84# Frames are almost exactly the same, which I expected.
85
86
87report() {
88 metrics/bytecode.R "$@"
89}
90
91# Reads the 5 tables and produces some metrics.
92metrics-opy() {
93 report metrics $BASE_DIR/opy-dis-tables
94}
95
96compare() {
97 report compare $BASE_DIR/cpython-dis-tables $BASE_DIR/opy-dis-tables
98}
99
100# Reads a .py / .pyc manifest and calculates the ratio of input/output file
101# sizes.
102src-bin-ratio() {
103 # Pass the manifest and the base directory of .pyc files.
104 report src-bin-ratio _build/oil/all-deps-py.txt _build/oil/bytecode-opy
105}
106
107run-for-release() {
108 write-opcodes # _tmp/opcodes-defined.txt, for analysis
109
110 opy-dis-tables
111 cpython-dis-tables
112
113 local out
114
115 out=$BASE_DIR/oil-with-opy.txt
116 report metrics $BASE_DIR/opy-dis-tables > $out
117 log "Wrote $out"
118
119 out=$BASE_DIR/oil-with-cpython.txt
120 report metrics $BASE_DIR/cpython-dis-tables > $out
121 log "Wrote $out"
122
123 out=$BASE_DIR/src-bin-ratio-with-opy.txt
124 src-bin-ratio > $out
125 log "Wrote $out"
126
127 out=$BASE_DIR/overview.txt
128 compare > $out
129 log "Wrote $out"
130}
131
132# TODO:
133# - opy/callgraph.py should output a table too
134# - then take the difference to find which ones are unused
135# - problem: it doesn't have unique names? Should we add (name, firstlineno)
136# to the key? That is only stable for the exact same version.
137# - compare bytecode vs CPython
138# - I think there is a bug with 'names' ?
139
140# maybe:
141# - analyze native code for OVM from GCC/Clang output?
142
143"$@"