stdlib/ysh/stream.ysh

OILS / stdlib / ysh / stream.ysh View on Github | oilshell.org

310 lines, 118 significant

1	# stream.ysh
2	#
3	# Usage:
4	# source --builtin stream.ysh
5	#
6	# For reading lines, decoding, extracting, splitting
7
8	# make this file a test server
9	source $LIB_OSH/byo-server.sh
10
11	source $LIB_YSH/args.ysh
12
13	proc slurp-by (; num_lines) {
14	var buf = []
15	for line in (io.stdin) {
16	call buf->append(line)
17	if (len(buf) === num_lines) {
18	json write (buf, space=0)
19
20	# TODO:
21	#call buf->clear()
22	setvar buf = []
23	}
24	}
25	if (buf) {
26	json write (buf, space=0)
27	}
28	}
29
30	proc test-slurp-by {
31	seq 8 \| slurp-by (3)
32	}
33
34	### Awk
35
36	# Naming
37	#
38	# TEXT INPUT
39	# each-word # this doesn't go by lines, it does a global regex split or something?
40	#
41	# LINE INPUT
42	# each-line --j8 { echo "-- $_line" } # similar to @()
43	# each-line --j8 (^"-- $_line") # is this superfluous?
44	#
45	# each-split name1 name2
46	# (delim=' ')
47	# (ifs=' ')
48	# (pat=/d+/)
49	# # also assign names for each part?
50	#
51	# each-match # regex match
52	# must-match # assert that every line matches
53	#
54	# TABLE INPUT
55	# each-row # TSV and TSV8 input?
56	#
57	# They all take templates or blocks?
58
59	proc each-line (...words; template=null; ; block=null) {
60	# TODO:
61	# parse --j8 --max-jobs flag
62
63	# parse template_str as string
64	# TODO: this is dangerous though ... because you can execute code
65	# I think you need a SAFE version
66
67	# evaluate template string expression - I guess that allows $(echo hi) and so
68	# forth
69
70	# evaluate block with _line binding
71	# block: execute in parallel with --max-jobs
72
73	for line in (stdin) {
74	echo TODO
75	}
76	}
77
78	proc test-each-line {
79	echo 'TODO: need basic test runner'
80
81	# ysh-tool test stream.ysh
82	#
83	# Col
84	}
85
86	proc each-j8-line (; ; ; block) {
87	for _line in (io.stdin) {
88	# TODO: fromJ8Line() toJ8Line()
89	# var _line = fromJson(_line)
90	call io->eval(block, vars={_line})
91	}
92	}
93
94	proc test-each-j8-line {
95	var lines = []
96	var prefix = 'z'
97
98	# unquoted
99	seq 3 \| each-j8-line {
100	call lines->append(prefix ++ _line)
101	}
102	pp test_ (lines)
103
104	# Note: no trailing new lines, since they aren't significant in Unix
105	var expected = ['z1', 'z2', 'z3']
106	assert [expected === lines]
107	}
108
109	proc each-row (; ; block) {
110	echo TODO
111	}
112
113	proc split-by (; delim; ifs=null; block) {
114
115	# TODO: provide the option to bind names? Or is that a separate thing?
116	# The output of this is "ragged"
117
118	for line in (io.stdin) {
119	#pp (line)
120	var parts = line.split(delim)
121	pp (parts)
122
123	# variable number
124	call io->eval(block, dollar0=line, pos_args=parts)
125	}
126	}
127
128	proc chop () {
129	### alias for split-by
130	echo TODO
131	}
132
133	proc test-split-by {
134	var z = 'z' # test out scoping
135	var count = 0 # test out mutation
136
137	# TODO: need split by space
138	# Where the leading and trailing are split
139	# if-split-by(' ') doesn't work well
140
141	line-data \| split-by (/s+/) {
142
143	# how do we deal with nonexistent?
144	# should we also bind _parts or _words?
145
146	echo "$z \| $0 \| $1 \| $z"
147
148	setvar count += 1
149	}
150	echo "count = $count"
151	}
152
153	proc must-split-by (; ; ifs=null; block) {
154	### like if-split-by
155
156	echo TODO
157	}
158
159	# Naming: each-match, each-split?
160
161	proc if-match (; pattern, template=null; ; block=null) {
162	### like 'grep' but with submatches
163
164	for line in (io.stdin) {
165	var m = line.search(pattern)
166	if (m) {
167	#pp asdl_ (m)
168	#var groups = m.groups()
169
170	# Should we also pass _line?
171
172	if (block) {
173	call io->eval(block, dollar0=m.group(0))
174	} elif (template) {
175	echo TEMPLATE
176	} else {
177	echo TSV
178	}
179	}
180	}
181
182	# always succeeds - I think must-match is the one that can fail
183	}
184
185	proc must-match (; pattern; block) {
186	### like if-match
187
188	echo TODO
189	}
190
191	proc line-data {
192	# note: trailing ''' issue, I should probably get rid of the last line
193
194	write --end '' -- '''
195	prefix 30 foo
196	oils
197	/// 42 bar
198	'''
199	}
200
201	const pat = /<capture d+> s+ <capture w+>/
202
203	proc test-if-match {
204	var z = 'z' # test out scoping
205	var count = 0 # test out mutation
206
207	# Test cases should be like:
208	# grep: print the matches, or just count them
209	# sed: print a new line based on submatches
210	# awk: re-arrange the cols, and also accumulate counters
211
212	line-data \| if-match (pat) {
213	echo "$z $0 $z"
214	# TODO: need pos_args
215
216	#echo "-- $2 $1 --"
217
218	setvar count += 1
219	}
220	echo "count = $count"
221	}
222
223	proc test-if-match-2 {
224	# If there's no block or template, it should print out a TSV with:
225	#
226	# $0 ...
227	# $1 $2
228	# $_line maybe?
229
230	#line-data \| if-match (pat)
231
232	var z = 'z' # scoping
233	line-data \| if-match (pat, ^"$z $0 $z")
234	line-data \| if-match (pat, ^"-- $0 --")
235	}
236
237	# might be a nice way to write it, not sure if byo.sh can discover it
238	if false {
239	tests 'if-match' {
240	proc case-block {
241	echo TODO
242	}
243	proc case-template {
244	echo TODO
245	}
246	}
247	}
248
249	# Protocol:
250	#
251	# - The file lists its tests the "actions"
252	# - Then the test harness runs them
253	# - But should it be ENV vars
254	#
255	# - BYO_LIST_TESTS=1
256	# - BYO_RUN_TEST=foo
257	# - $PWD is a CLEAN temp dir, the process doesn't have to do anything
258
259	# - silent on success, but prints file on output
260	# - OK this makes sense
261	#
262	# The trivial test in Python:
263	#
264	# from test import byo
265	# byo.maybe_main()
266	#
267	# bash library:
268	# source --builtin byo-server.sh
269	#
270	# byo-maybe-main # reads env variables, and then exits
271	#
272	# source --builtin assertions.ysh
273	#
274	# assert-ok 'echo hi'
275	# assert-stdout 'hi' 'echo -n hi'
276	#
277	# "$@"
278	#
279	# Run all tests
280	# util/byo-client.sh run-tests $YSH stdlib/table.ysh
281	# util/byo-client.sh run-tests -f x $YSH stdlib/table.ysh
282
283	# Clean process
284	# Clean working dir
285
286	#
287	# Stream Protocol:
288	# #.byo - is this she-dot, that's for a file
289	# Do we need metadata?
290	#
291
292	# The harness
293	#
294	# It's process based testing.
295	#
296	# Test runner process: bash or OSH (unlike sharness!)
297	# Tested process: any language - bash,
298	#
299	# Key point: you don't have to quote shell code?
300
301	list-byo-tests() {
302	echo TODO
303	}
304
305	run-byo-tests() {
306	# source it
307	echo TODO
308	}
309
310	byo-maybe-run