OILS / pea / TEST.sh View on Github | oilshell.org

344 lines, 128 significant
1#!/usr/bin/env bash
2#
3# Quick test for a potential rewrite of mycpp.
4#
5# Usage:
6# pea/TEST.sh <function name>
7
8: ${LIB_OSH=stdlib/osh}
9source $LIB_OSH/bash-strict.sh
10source $LIB_OSH/no-quotes.sh
11
12source test/common.sh # run-test-funcs
13source devtools/common.sh
14
15source build/dev-shell.sh # find python3 in /wedge PATH component
16
17# This is just like the yapf problem in devtools/format.sh !
18# Pea needs a newer version of MyPy -- one that supports 'math'
19
20# 2024-09 - there is a conflict between:
21# parse-all - 'import mypy' for mycpp/pass_state.py
22# check-types - uses a newer version of MyPy
23#
24# The problem is importing MyPy as a LIBRARY vs. using it as a TOOL
25
26unset PYTHONPATH
27export PYTHONPATH=.
28
29readonly MYPY_VENV='_tmp/mypy-venv'
30
31install-mypy() {
32 local venv=$MYPY_VENV
33
34 rm -r -f -v $venv
35
36 python3 -m venv $venv
37
38 . $venv/bin/activate
39
40 python3 -m pip install mypy
41
42 # Says 1.5.1 (compiled: yes)
43 mypy-version
44}
45
46mypy-version() {
47 . $MYPY_VENV/bin/activate
48 python3 -m mypy --version
49}
50
51#
52# Run Pea
53#
54
55pea-main() {
56 pea/pea_main.py "$@"
57}
58
59parse-one() {
60 pea-main parse "$@"
61}
62
63translate-cpp() {
64 ### Used by mycpp/NINJA-steps.sh
65
66 pea-main cpp "$@"
67}
68
69all-files() {
70 # Can't run this on Soil because we only have build/py.sh py-source, not
71 # 'minimal'
72
73 # Update this file with build/dynamic-deps.sh pea-hack
74
75 cat pea/oils-typecheck.txt
76
77 for path in */*.pyi; do
78 echo $path
79 done
80}
81
82parse-all() {
83 #source $MYPY_VENV/bin/activate
84 time all-files | xargs --verbose -- $0 pea-main parse
85}
86
87# Good illustration of "distributing your overhead"
88#
89# Total work goes up, while latency goes down. To a point. Then it goes back
90# up.
91
92# batch size 30
93#
94# real 0m0.342s
95# user 0m0.735s
96# sys 0m0.059s
97#
98# batch size 20
99#
100# real 0m0.305s
101# user 0m0.993s
102# sys 0m0.081s
103#
104# batch size 15
105#
106# real 0m0.299s
107# user 0m1.110s
108# sys 0m0.123s
109#
110# batch size 10
111#
112# real 0m0.272s
113# user 0m1.362s
114# sys 0m0.145s
115
116batch-size() {
117 local num_files=$1
118
119 local num_procs
120 num_procs=$(nproc)
121
122 # Use (p-1) as a fudge so we don't end up more batches than processors
123 local files_per_process=$(( num_files / (num_procs - 1) ))
124
125 echo "$num_procs $files_per_process"
126}
127
128demo-par() {
129 ### Demo parallelism of Python processes
130
131 local files
132 num_files=$(all-files | wc -l)
133
134 # 103 files
135
136 shopt -s lastpipe
137 batch-size $num_files | read num_procs optimal
138
139 echo "Parsing $num_files files with $num_procs parallel processes"
140 echo "Optimal batch size is $optimal"
141
142 echo
143
144 echo 'All at once:'
145 time parse-all > /dev/null 2>&1
146 echo
147
148 # 5 is meant to be suboptimal
149 for n in 50 30 20 10 5 $optimal; do
150 echo "batch size $n"
151 time all-files | xargs --verbose -P $num_procs -n $n -- \
152 $0 parse-one > /dev/null 2>&1
153 echo
154 done
155}
156
157# - 0.40 secs to parse
158# - 0.56 secs pickle, so that's 160 ms
159# Then
160#
161# - 0.39 secs load pickle
162#
163# That's definitely slower than I want. It's 6.6 MB of data.
164#
165# So
166# - parallel parsing can be done in <300 ms
167# - parallel pickling
168# - serial unpickling (reduce) in 390 ms
169#
170# So now we're at ~700 ms or so. Can we type check in 300 ms in pure Python?
171#
172# What if we compress the generated ASDL? Those are very repetitive.
173
174# Problem statement:
175
176_serial-pickle() {
177 mkdir -p _tmp
178 local tmp=_tmp/serial
179
180 time all-files | xargs --verbose -- $0 pea-main dump-pickles > $tmp
181
182 ls -l -h $tmp
183
184 echo 'loading'
185 time pea-main load-pickles < $tmp
186}
187
188# 1.07 seconds
189serial-pickle() { time $0 _serial-pickle; }
190
191pickle-one() {
192 pea-main dump-pickles "$@" > _tmp/p/$$
193}
194
195_par-pickle() {
196 local files
197 num_files=$(all-files | wc -l)
198
199 shopt -s lastpipe
200 batch-size $num_files | read num_procs optimal
201
202 local dir=_tmp/p
203 rm -r -f -v $dir
204 mkdir -p $dir
205
206 time all-files | xargs --verbose -P $num_procs -n $optimal -- $0 pickle-one
207
208 ls -l -h $dir
209
210 # This takes 410-430 ms? Wow that's slow.
211 time cat $dir/* | pea-main load-pickles
212}
213
214# Can get this down to ~700 ms
215#
216# Note parsing serially in a single process is 410 ms !!! So this is NOT a win
217# unless we have more work besides parsing to parallelize.
218#
219# We can extract constants and forward declarations in parallel I suppose.
220#
221# BUT immutable string constants have to be de-duplciated! Though I guess that
222# is a natural 'reduce' step.
223#
224# And we can even do implementation and prototypes in parallel too?
225#
226# I think the entire algorithm can be OPTIMISTIC without serialized type
227# checking?
228#
229# I think
230#
231# a = 5
232# b = a # do not know the type without a global algorithm
233#
234# Or I guess you can do type checking within a function. Functions require
235# signatures. So yes let's do that in parallel.
236#
237# --
238#
239# The ideal way to do this would be to split Oils up into MODULES, like
240#
241# _debuild/
242# builtin/
243# core/
244# data_lang/
245# frontend/
246# osh/
247# ysh/
248# Smaller: pgen2/ pylib/ tools/
249#
250# And modules are acyclic, and can compile on their own with dependencies. If
251# you pick random .py files and spit out header files, I think they won't compile.
252# The forward declarations and constants will work, but the prototype won't.
253
254par-pickle() { time $0 _par-pickle; }
255
256sum1() {
257 awk '{ sum += $1 } END { print sum }'
258}
259
260sum-sizes() {
261 xargs -I {} -- find {} -printf '%s %p\n' | sum1
262}
263
264size-ratio() {
265 # all-files
266 # echo _tmp/p/*
267
268 # 1.96 MB of source code
269 all-files | sum-sizes
270
271 # 7.13 MB of pickle files
272 # Weirdly echo _tmp/p/* doesn't work here
273 for f in _tmp/p/*; do echo $f; done | sum-sizes
274}
275
276# Only 47 ms!
277# I want the overhead to be less than 1 second:
278# 1. parallel parsing + pickle
279# 2. serial unpickle + type check
280# 3. starting the process
281#
282# So unpickling is slow.
283
284osh-overhead() {
285 time bin/osh -c 'echo hi'
286}
287
288
289# MyPy dev version takes 10.2 seconds the first time (without their mypyc
290# speedups)
291#
292# 0.150 seconds the second time, WITHOUT code changes
293# 0.136 seconds
294
295# 4.1 seconds: whitespace change
296# 3.9 seconds: again, and this is on my fast hoover machine
297
298# 5.0 seconds - Invalid type!
299# 4.9 seconds - again invalid
300
301
302mypy-compare() {
303 devtools/types.sh check-oils
304}
305
306check-types() {
307
308 # install-mypy creates this. May not be present in CI machine.
309 local activate=$MYPY_VENV/bin/activate
310 if test -f $activate; then
311 . $activate
312 fi
313
314 time python3 -m mypy --strict pea/pea_main.py
315}
316
317test-translate() {
318 translate-cpp bin/oils_for_unix.py
319}
320
321test-syntax-error() {
322 set +o errexit
323
324 # error in Python syntax
325 parse-one pea/testdata/py_err.py
326 nq-assert $? -eq 1
327
328 # error in signature
329 parse-one pea/testdata/sig_err.py
330 nq-assert $? -eq 1
331
332 # error in assignment
333 parse-one pea/testdata/assign_err.py
334 nq-assert $? -eq 1
335}
336
337run-tests() {
338 # Making this separate for soil/worker.sh
339
340 echo 'Running test functions'
341 run-test-funcs
342}
343
344"$@"