OILS / pea / TEST.sh View on Github | oils.pub

411 lines, 176 significant
1#!/usr/bin/env bash
2#
3# Quick test for a potential rewrite of mycpp.
4#
5# Usage:
6# pea/TEST.sh <function name>
7
8: ${LIB_OSH=stdlib/osh}
9source $LIB_OSH/bash-strict.sh
10source $LIB_OSH/task-five.sh
11source $LIB_OSH/no-quotes.sh
12
13source devtools/common.sh
14
15source build/dev-shell.sh # find python3 in /wedge PATH component
16
17readonly MYPY_VENV='_tmp/mypy-venv'
18
19show-python-config() {
20 which python3
21 echo
22
23 python3 -V
24 echo
25
26 echo PYTHONPATH=$PYTHONPATH
27 echo
28}
29
30install-latest-mypy() {
31 local venv=$MYPY_VENV
32
33 export PYTHONPATH=.
34
35 rm -r -f -v $venv
36
37 show-python-config
38
39 echo "Creating venv in $venv"
40 python3 -m venv $venv
41
42 . $venv/bin/activate
43
44 echo "venv $venv is activated"
45 show-python-config
46
47
48 python3 -m pip install mypy
49
50 # 2022: 1.5.1 (compiled: yes)
51 # 2024-12 Debian desktop: 1.13.0 (compiled: yes)
52 # 2024-12 Soil CI image: 1.10.0
53 python3 -m mypy --version
54}
55
56pea-files() {
57 for f in pea/*.py; do
58 case $f in
59 *NINJA_subgraph.py)
60 continue
61 ;;
62 esac
63
64 echo $f
65 done
66}
67
68count-lines() {
69 pea-files | xargs wc -l
70}
71
72_check-types() {
73 echo PYTHONPATH=$PYTHONPATH
74 echo
75
76 python3 -m mypy --version
77 echo
78
79 time pea-files | xargs python3 -m mypy --strict
80}
81
82check-with-our-mypy() {
83 _check-types
84}
85
86check-with-latest-mypy() {
87 ### soil/worker.sh call this
88
89 # This disables the MyPy wedge< and uses the latest MyPy installed above
90 # It'
91 export PYTHONPATH=.
92
93 # install-mypy creates this. May not be present in CI machine.
94 local activate=$MYPY_VENV/bin/activate
95 if test -f $activate; then
96 . $activate
97 fi
98
99 _check-types
100}
101
102#
103# Run Pea
104#
105
106pea-main() {
107 pea/pea_main.py "$@"
108}
109
110parse-one() {
111 pea-main parse "$@"
112}
113
114translate-cpp() {
115 ### Used by mycpp/NINJA-steps.sh
116
117 pea-main cpp "$@"
118}
119
120all-files() {
121 # Can't run this on Soil because we only have build/py.sh py-source, not
122 # 'minimal'
123
124 # Update this file with build/dynamic-deps.sh pea-hack
125
126 cat pea/oils-typecheck.txt
127
128 for path in */*.pyi; do
129 echo $path
130 done
131}
132
133parse-all() {
134 ### soil/worker.sh call this
135
136 time all-files | xargs --verbose -- $0 pea-main parse
137}
138
139# Good illustration of "distributing your overhead"
140#
141# Total work goes up, while latency goes down. To a point. Then it goes back
142# up.
143
144# batch size 30
145#
146# real 0m0.342s
147# user 0m0.735s
148# sys 0m0.059s
149#
150# batch size 20
151#
152# real 0m0.305s
153# user 0m0.993s
154# sys 0m0.081s
155#
156# batch size 15
157#
158# real 0m0.299s
159# user 0m1.110s
160# sys 0m0.123s
161#
162# batch size 10
163#
164# real 0m0.272s
165# user 0m1.362s
166# sys 0m0.145s
167
168batch-size() {
169 local num_files=$1
170
171 local num_procs
172 num_procs=$(nproc)
173
174 # Use (p-1) as a fudge so we don't end up more batches than processors
175 local files_per_process=$(( num_files / (num_procs - 1) ))
176
177 echo "$num_procs $files_per_process"
178}
179
180demo-par() {
181 ### Demo parallelism of Python processes
182
183 local files
184 num_files=$(all-files | wc -l)
185
186 # 103 files
187
188 shopt -s lastpipe
189 batch-size $num_files | read num_procs optimal
190
191 echo "Parsing $num_files files with $num_procs parallel processes"
192 echo "Optimal batch size is $optimal"
193
194 echo
195
196 echo 'All at once:'
197 time parse-all > /dev/null 2>&1
198 echo
199
200 # 5 is meant to be suboptimal
201 for n in 50 30 20 10 5 $optimal; do
202 echo "batch size $n"
203 time all-files | xargs --verbose -P $num_procs -n $n -- \
204 $0 parse-one > /dev/null 2>&1
205 echo
206 done
207}
208
209# - 0.40 secs to parse
210# - 0.56 secs pickle, so that's 160 ms
211# Then
212#
213# - 0.39 secs load pickle
214#
215# That's definitely slower than I want. It's 6.6 MB of data.
216#
217# So
218# - parallel parsing can be done in <300 ms
219# - parallel pickling
220# - serial unpickling (reduce) in 390 ms
221#
222# So now we're at ~700 ms or so. Can we type check in 300 ms in pure Python?
223#
224# What if we compress the generated ASDL? Those are very repetitive.
225
226# Problem statement:
227
228_serial-pickle() {
229 mkdir -p _tmp
230 local tmp=_tmp/serial
231
232 time all-files | xargs --verbose -- $0 pea-main dump-pickles > $tmp
233
234 ls -l -h $tmp
235
236 echo 'loading'
237 time pea-main load-pickles < $tmp
238}
239
240# 1.07 seconds
241serial-pickle() { time $0 _serial-pickle; }
242
243pickle-one() {
244 pea-main dump-pickles "$@" > _tmp/p/$$
245}
246
247_par-pickle() {
248 local files
249 num_files=$(all-files | wc -l)
250
251 shopt -s lastpipe
252 batch-size $num_files | read num_procs optimal
253
254 local dir=_tmp/p
255 rm -r -f -v $dir
256 mkdir -p $dir
257
258 time all-files | xargs --verbose -P $num_procs -n $optimal -- $0 pickle-one
259
260 ls -l -h $dir
261
262 # This takes 410-430 ms? Wow that's slow.
263 time cat $dir/* | pea-main load-pickles
264}
265
266# Can get this down to ~700 ms
267#
268# Note parsing serially in a single process is 410 ms !!! So this is NOT a win
269# unless we have more work besides parsing to parallelize.
270#
271# We can extract constants and forward declarations in parallel I suppose.
272#
273# BUT immutable string constants have to be de-duplciated! Though I guess that
274# is a natural 'reduce' step.
275#
276# And we can even do implementation and prototypes in parallel too?
277#
278# I think the entire algorithm can be OPTIMISTIC without serialized type
279# checking?
280#
281# I think
282#
283# a = 5
284# b = a # do not know the type without a global algorithm
285#
286# Or I guess you can do type checking within a function. Functions require
287# signatures. So yes let's do that in parallel.
288#
289# --
290#
291# The ideal way to do this would be to split Oils up into MODULES, like
292#
293# _debuild/
294# builtin/
295# core/
296# data_lang/
297# frontend/
298# osh/
299# ysh/
300# Smaller: pgen2/ pylib/ tools/
301#
302# And modules are acyclic, and can compile on their own with dependencies. If
303# you pick random .py files and spit out header files, I think they won't compile.
304# The forward declarations and constants will work, but the prototype won't.
305
306par-pickle() { time $0 _par-pickle; }
307
308sum1() {
309 awk '{ sum += $1 } END { print sum }'
310}
311
312sum-sizes() {
313 xargs -I {} -- find {} -printf '%s %p\n' | sum1
314}
315
316size-ratio() {
317 # all-files
318 # echo _tmp/p/*
319
320 # 1.96 MB of source code
321 all-files | sum-sizes
322
323 # 7.13 MB of pickle files
324 # Weirdly echo _tmp/p/* doesn't work here
325 for f in _tmp/p/*; do echo $f; done | sum-sizes
326}
327
328# Only 47 ms!
329# I want the overhead to be less than 1 second:
330# 1. parallel parsing + pickle
331# 2. serial unpickle + type check
332# 3. starting the process
333#
334# So unpickling is slow.
335
336osh-overhead() {
337 time bin/osh -c 'echo hi'
338}
339
340
341# MyPy dev version takes 10.2 seconds the first time (without their mypyc
342# speedups)
343#
344# 0.150 seconds the second time, WITHOUT code changes
345# 0.136 seconds
346
347# 4.1 seconds: whitespace change
348# 3.9 seconds: again, and this is on my fast hoover machine
349
350# 5.0 seconds - Invalid type!
351# 4.9 seconds - again invalid
352
353
354mypy-compare() {
355 devtools/types.sh check-oils
356}
357
358test-translate() {
359 translate-cpp bin/oils_for_unix.py
360}
361
362test-syntax-error() {
363 local status stdout
364
365 # error in Python syntax
366 nq-capture status stdout \
367 parse-one pea/testdata/py_err.py
368 nq-assert 1 = $status
369
370 # error in signature
371 nq-capture status stdout \
372 parse-one pea/testdata/sig_err.py
373 nq-assert 1 = $status
374
375 # error in assignment
376 nq-capture status stdout \
377 parse-one pea/testdata/assign_err.py
378 nq-assert 1 = $status
379}
380
381test-mycpp-integration() {
382 # In Soil CI, we are importing a compiled MyPy?
383 # We don't have the WEDGE
384 # OK I can just add that
385 #return
386
387 # Works
388 echo ---
389 pea-main mycpp
390
391 echo ---
392 pea-main mycpp mycpp/examples/test_small_str.py
393}
394
395test-example-hello() {
396 local bin=_bin/cxx-asan/mycpp/examples/pea_hello.pea
397 ninja $bin
398
399 local status stdout
400 nq-capture status stdout \
401 $bin
402 nq-assert 42 = $status
403}
404
405run-tests() {
406 ### soil/worker.sh call this
407
408 devtools/byo.sh test $0
409}
410
411task-five "$@"