OILS / demo / old / benchmarks-oheap.sh View on Github | oils.pub

184 lines, 84 significant
1#!/usr/bin/env bash
2#
3# Test the size of file, encoding, and decoding speed.
4#
5# Usage:
6# ./oheap.sh <function name>
7
8set -o nounset
9set -o pipefail
10set -o errexit
11
12source test/common.sh
13source benchmarks/common.sh
14
15readonly BASE_DIR=_tmp/oheap
16
17encode-one() {
18 local script=$1
19 local oheap_out=$2
20 $OSH_OVM -n --ast-format oheap "$script" > $oheap_out
21}
22
23task-spec() {
24 while read path; do
25 echo "$path _tmp/oheap/$(basename $path)__oheap"
26 done < benchmarks/osh-parser-files.txt
27}
28
29encode-all() {
30 mkdir -p _tmp/oheap
31
32 local times_csv=_tmp/oheap/times.csv
33 echo 'status,elapsed_secs' > $times_csv
34
35 task-spec | xargs -n 2 --verbose -- \
36 benchmarks/time.py --output $times_csv -- \
37 $0 encode-one
38}
39
40# Out of curiosity, compress oheap and originals.
41
42compress-oheap() {
43 local c_dir=$BASE_DIR/oheap-compressed
44 mkdir -p $c_dir
45 for bin in _tmp/oheap/*__oheap; do
46 local name=$(basename $bin)
47 log "Compressing $name"
48 gzip --stdout $bin > $c_dir/$name.gz
49 xz --stdout $bin > $c_dir/$name.xz
50 done
51}
52
53compress-text() {
54 local c_dir=$BASE_DIR/src-compressed
55 mkdir -p $c_dir
56
57 while read src; do
58 local name=$(basename $src)
59 log "Compressing $name"
60 gzip --stdout $src > $c_dir/${name}__text.gz
61 xz --stdout $src > $c_dir/${name}__text.xz
62 done < benchmarks/osh-parser-files.txt
63}
64
65print-size() {
66 local c1=$1
67 local c2=$2
68 shift 2
69
70 # depth 0: just the filename itself.
71 find "$@" -maxdepth 0 -printf "%s,$c1,$c2,%p\n"
72}
73
74print-csv() {
75 echo 'num_bytes,format,compression,path'
76 # TODO
77 print-size text none benchmarks/testdata/*
78 print-size text gz $BASE_DIR/src-compressed/*.gz
79 print-size text xz $BASE_DIR/src-compressed/*.xz
80
81 print-size oheap none $BASE_DIR/*__oheap
82 print-size oheap gz $BASE_DIR/oheap-compressed/*.gz
83 print-size oheap xz $BASE_DIR/oheap-compressed/*.xz
84}
85
86# This can be done on any host.
87measure() {
88 encode-all
89 compress-oheap
90 compress-text
91}
92
93stage1() {
94 local out_dir=$BASE_DIR/stage1
95 mkdir -p $out_dir
96 print-csv > $out_dir/sizes.csv
97}
98
99print-report() {
100 local in_dir=$1
101 local base_url='../../web'
102
103 cat <<EOF
104<!DOCTYPE html>
105<html>
106 <head>
107 <title>OHeap Encoding</title>
108 <script type="text/javascript" src="$base_url/table/table-sort.js"></script>
109 <link rel="stylesheet" type="text/css" href="$base_url/table/table-sort.css" />
110 <link rel="stylesheet" type="text/css" href="$base_url/benchmarks.css" />
111
112 </head>
113 <body>
114 <p id="home-link">
115 <a href="/">oilshell.org</a>
116 </p>
117 <h2>OHeap Encoding</h2>
118
119 <h3>Encoding Size (KB)</h3>
120
121 <p>Sizes are in KB (powers of 10), not KiB (powers of 2).</p>
122EOF
123 csv2html $in_dir/encoding_size.csv
124
125 cat <<EOF
126 <h3>Encoding Ratios</h3>
127EOF
128 csv2html $in_dir/encoding_ratios.csv
129
130 cat <<EOF
131 </body>
132</html>
133EOF
134}
135
136
137# TODO: instead of running osh_demo, we should generate a C++ program that
138# visits every node and counts it. The output might look like:
139#
140# - It can also print out the depth of the tree.
141# - Summary: number of different types used
142# - another option: decode/validate utf-8. See Visitor Use Cases.
143#
144# # 500 instances
145# line_span = (...)
146# # 455 instances
147# token = (
148# id id,
149# string val, # lengths: min 0, max 20, avg 30
150# int? span_id,
151# )
152#
153# command =
154# # 20 instances
155# NoOp
156# -- TODO: respect order
157# # 20 instances
158# | SimpleCommand(
159# word* words, # min length: 0, max: 10, mean: 3.3 ?
160# redir* redirects, # min length 0, max: 2, mean: 4.4
161# env_pair* more_env)
162# | Sentence(command child, token terminator)
163#
164# This might help with encoding things inline?
165# You will definitely need to append to ASDL arrays. I don't think you'll need
166# to append to strings. But you might want to store strings inline with
167# structs.
168# I guess it wouldn't hurt to print out a table of EVERY node an array, along
169# with the type.
170# parent_type,field_name,type,subtype,length
171# token,val,Str,-,5
172# SimpleCommand,redirects,Array,redirect,10
173#
174# This lets you figure out what the common types are, as well as the common
175# lengths.
176
177decode-all() {
178 for bin in _tmp/oheap/*__oheap; do
179 echo $bin
180 time _tmp/osh_demo $bin | wc -l
181 done
182}
183
184"$@"