pea/TEST.sh

OILS / pea / TEST.sh View on Github | oilshell.org

344 lines, 128 significant

1	#!/usr/bin/env bash
2	#
3	# Quick test for a potential rewrite of mycpp.
4	#
5	# Usage:
6	# pea/TEST.sh <function name>
7
8	: ${LIB_OSH=stdlib/osh}
9	source $LIB_OSH/bash-strict.sh
10	source $LIB_OSH/no-quotes.sh
11
12	source test/common.sh # run-test-funcs
13	source devtools/common.sh
14
15	source build/dev-shell.sh # find python3 in /wedge PATH component
16
17	# This is just like the yapf problem in devtools/format.sh !
18	# Pea needs a newer version of MyPy -- one that supports 'math'
19
20	# 2024-09 - there is a conflict between:
21	# parse-all - 'import mypy' for mycpp/pass_state.py
22	# check-types - uses a newer version of MyPy
23	#
24	# The problem is importing MyPy as a LIBRARY vs. using it as a TOOL
25
26	unset PYTHONPATH
27	export PYTHONPATH=.
28
29	readonly MYPY_VENV='_tmp/mypy-venv'
30
31	install-mypy() {
32	local venv=$MYPY_VENV
33
34	rm -r -f -v $venv
35
36	python3 -m venv $venv
37
38	. $venv/bin/activate
39
40	python3 -m pip install mypy
41
42	# Says 1.5.1 (compiled: yes)
43	mypy-version
44	}
45
46	mypy-version() {
47	. $MYPY_VENV/bin/activate
48	python3 -m mypy --version
49	}
50
51	#
52	# Run Pea
53	#
54
55	pea-main() {
56	pea/pea_main.py "$@"
57	}
58
59	parse-one() {
60	pea-main parse "$@"
61	}
62
63	translate-cpp() {
64	### Used by mycpp/NINJA-steps.sh
65
66	pea-main cpp "$@"
67	}
68
69	all-files() {
70	# Can't run this on Soil because we only have build/py.sh py-source, not
71	# 'minimal'
72
73	# Update this file with build/dynamic-deps.sh pea-hack
74
75	cat pea/oils-typecheck.txt
76
77	for path in /.pyi; do
78	echo $path
79	done
80	}
81
82	parse-all() {
83	#source $MYPY_VENV/bin/activate
84	time all-files \| xargs --verbose -- $0 pea-main parse
85	}
86
87	# Good illustration of "distributing your overhead"
88	#
89	# Total work goes up, while latency goes down. To a point. Then it goes back
90	# up.
91
92	# batch size 30
93	#
94	# real 0m0.342s
95	# user 0m0.735s
96	# sys 0m0.059s
97	#
98	# batch size 20
99	#
100	# real 0m0.305s
101	# user 0m0.993s
102	# sys 0m0.081s
103	#
104	# batch size 15
105	#
106	# real 0m0.299s
107	# user 0m1.110s
108	# sys 0m0.123s
109	#
110	# batch size 10
111	#
112	# real 0m0.272s
113	# user 0m1.362s
114	# sys 0m0.145s
115
116	batch-size() {
117	local num_files=$1
118
119	local num_procs
120	num_procs=$(nproc)
121
122	# Use (p-1) as a fudge so we don't end up more batches than processors
123	local files_per_process=$(( num_files / (num_procs - 1) ))
124
125	echo "$num_procs $files_per_process"
126	}
127
128	demo-par() {
129	### Demo parallelism of Python processes
130
131	local files
132	num_files=$(all-files \| wc -l)
133
134	# 103 files
135
136	shopt -s lastpipe
137	batch-size $num_files \| read num_procs optimal
138
139	echo "Parsing $num_files files with $num_procs parallel processes"
140	echo "Optimal batch size is $optimal"
141
142	echo
143
144	echo 'All at once:'
145	time parse-all > /dev/null 2>&1
146	echo
147
148	# 5 is meant to be suboptimal
149	for n in 50 30 20 10 5 $optimal; do
150	echo "batch size $n"
151	time all-files \| xargs --verbose -P $num_procs -n $n -- \
152	$0 parse-one > /dev/null 2>&1
153	echo
154	done
155	}
156
157	# - 0.40 secs to parse
158	# - 0.56 secs pickle, so that's 160 ms
159	# Then
160	#
161	# - 0.39 secs load pickle
162	#
163	# That's definitely slower than I want. It's 6.6 MB of data.
164	#
165	# So
166	# - parallel parsing can be done in <300 ms
167	# - parallel pickling
168	# - serial unpickling (reduce) in 390 ms
169	#
170	# So now we're at ~700 ms or so. Can we type check in 300 ms in pure Python?
171	#
172	# What if we compress the generated ASDL? Those are very repetitive.
173
174	# Problem statement:
175
176	_serial-pickle() {
177	mkdir -p _tmp
178	local tmp=_tmp/serial
179
180	time all-files \| xargs --verbose -- $0 pea-main dump-pickles > $tmp
181
182	ls -l -h $tmp
183
184	echo 'loading'
185	time pea-main load-pickles < $tmp
186	}
187
188	# 1.07 seconds
189	serial-pickle() { time $0 _serial-pickle; }
190
191	pickle-one() {
192	pea-main dump-pickles "$@" > _tmp/p/$$
193	}
194
195	_par-pickle() {
196	local files
197	num_files=$(all-files \| wc -l)
198
199	shopt -s lastpipe
200	batch-size $num_files \| read num_procs optimal
201
202	local dir=_tmp/p
203	rm -r -f -v $dir
204	mkdir -p $dir
205
206	time all-files \| xargs --verbose -P $num_procs -n $optimal -- $0 pickle-one
207
208	ls -l -h $dir
209
210	# This takes 410-430 ms? Wow that's slow.
211	time cat $dir/* \| pea-main load-pickles
212	}
213
214	# Can get this down to ~700 ms
215	#
216	# Note parsing serially in a single process is 410 ms !!! So this is NOT a win
217	# unless we have more work besides parsing to parallelize.
218	#
219	# We can extract constants and forward declarations in parallel I suppose.
220	#
221	# BUT immutable string constants have to be de-duplciated! Though I guess that
222	# is a natural 'reduce' step.
223	#
224	# And we can even do implementation and prototypes in parallel too?
225	#
226	# I think the entire algorithm can be OPTIMISTIC without serialized type
227	# checking?
228	#
229	# I think
230	#
231	# a = 5
232	# b = a # do not know the type without a global algorithm
233	#
234	# Or I guess you can do type checking within a function. Functions require
235	# signatures. So yes let's do that in parallel.
236	#
237	# --
238	#
239	# The ideal way to do this would be to split Oils up into MODULES, like
240	#
241	# _debuild/
242	# builtin/
243	# core/
244	# data_lang/
245	# frontend/
246	# osh/
247	# ysh/
248	# Smaller: pgen2/ pylib/ tools/
249	#
250	# And modules are acyclic, and can compile on their own with dependencies. If
251	# you pick random .py files and spit out header files, I think they won't compile.
252	# The forward declarations and constants will work, but the prototype won't.
253
254	par-pickle() { time $0 _par-pickle; }
255
256	sum1() {
257	awk '{ sum += $1 } END { print sum }'
258	}
259
260	sum-sizes() {
261	xargs -I {} -- find {} -printf '%s %p\n' \| sum1
262	}
263
264	size-ratio() {
265	# all-files
266	# echo _tmp/p/*
267
268	# 1.96 MB of source code
269	all-files \| sum-sizes
270
271	# 7.13 MB of pickle files
272	# Weirdly echo _tmp/p/* doesn't work here
273	for f in _tmp/p/*; do echo $f; done \| sum-sizes
274	}
275
276	# Only 47 ms!
277	# I want the overhead to be less than 1 second:
278	# 1. parallel parsing + pickle
279	# 2. serial unpickle + type check
280	# 3. starting the process
281	#
282	# So unpickling is slow.
283
284	osh-overhead() {
285	time bin/osh -c 'echo hi'
286	}
287
288
289	# MyPy dev version takes 10.2 seconds the first time (without their mypyc
290	# speedups)
291	#
292	# 0.150 seconds the second time, WITHOUT code changes
293	# 0.136 seconds
294
295	# 4.1 seconds: whitespace change
296	# 3.9 seconds: again, and this is on my fast hoover machine
297
298	# 5.0 seconds - Invalid type!
299	# 4.9 seconds - again invalid
300
301
302	mypy-compare() {
303	devtools/types.sh check-oils
304	}
305
306	check-types() {
307
308	# install-mypy creates this. May not be present in CI machine.
309	local activate=$MYPY_VENV/bin/activate
310	if test -f $activate; then
311	. $activate
312	fi
313
314	time python3 -m mypy --strict pea/pea_main.py
315	}
316
317	test-translate() {
318	translate-cpp bin/oils_for_unix.py
319	}
320
321	test-syntax-error() {
322	set +o errexit
323
324	# error in Python syntax
325	parse-one pea/testdata/py_err.py
326	nq-assert $? -eq 1
327
328	# error in signature
329	parse-one pea/testdata/sig_err.py
330	nq-assert $? -eq 1
331
332	# error in assignment
333	parse-one pea/testdata/assign_err.py
334	nq-assert $? -eq 1
335	}
336
337	run-tests() {
338	# Making this separate for soil/worker.sh
339
340	echo 'Running test functions'
341	run-test-funcs
342	}
343
344	"$@"