OILS / stdlib / ysh / stream.ysh View on Github | oils.pub

310 lines, 118 significant
1# stream.ysh
2#
3# Usage:
4# source --builtin stream.ysh
5#
6# For reading lines, decoding, extracting, splitting
7
8# make this file a test server
9source $LIB_OSH/byo-server.sh
10
11source $LIB_YSH/args.ysh
12
13proc slurp-by (; num_lines) {
14 var buf = []
15 for line in (io.stdin) {
16 call buf->append(line)
17 if (len(buf) === num_lines) {
18 json write (buf, space=0)
19
20 # TODO:
21 #call buf->clear()
22 setvar buf = []
23 }
24 }
25 if (buf) {
26 json write (buf, space=0)
27 }
28}
29
30proc test-slurp-by {
31 seq 8 | slurp-by (3)
32}
33
34### Awk
35
36# Naming
37#
38# TEXT INPUT
39# each-word # this doesn't go by lines, it does a global regex split or something?
40#
41# LINE INPUT
42# each-line --j8 { echo "-- $_line" } # similar to @()
43# each-line --j8 (^"-- $_line") # is this superfluous?
44#
45# each-split name1 name2
46# (delim=' ')
47# (ifs=' ')
48# (pat=/d+/)
49# # also assign names for each part?
50#
51# each-match # regex match
52# must-match # assert that every line matches
53#
54# TABLE INPUT
55# each-row # TSV and TSV8 input?
56#
57# They all take templates or blocks?
58
59proc each-line (...words; template=null; ; block=null) {
60 # TODO:
61 # parse --j8 --max-jobs flag
62
63 # parse template_str as string
64 # TODO: this is dangerous though ... because you can execute code
65 # I think you need a SAFE version
66
67 # evaluate template string expression - I guess that allows $(echo hi) and so
68 # forth
69
70 # evaluate block with _line binding
71 # block: execute in parallel with --max-jobs
72
73 for line in (stdin) {
74 echo TODO
75 }
76}
77
78proc test-each-line {
79 echo 'TODO: need basic test runner'
80
81 # ysh-tool test stream.ysh
82 #
83 # Col
84}
85
86proc each-j8-line (; ; ; block) {
87 for _line in (io.stdin) {
88 # TODO: fromJ8Line() toJ8Line()
89 # var _line = fromJson(_line)
90 call io->eval(block, vars={_line})
91 }
92}
93
94proc test-each-j8-line {
95 var lines = []
96 var prefix = 'z'
97
98 # unquoted
99 seq 3 | each-j8-line {
100 call lines->append(prefix ++ _line)
101 }
102 pp test_ (lines)
103
104 # Note: no trailing new lines, since they aren't significant in Unix
105 var expected = ['z1', 'z2', 'z3']
106 assert [expected === lines]
107}
108
109proc each-row (; ; block) {
110 echo TODO
111}
112
113proc split-by (; delim; ifs=null; block) {
114
115 # TODO: provide the option to bind names? Or is that a separate thing?
116 # The output of this is "ragged"
117
118 for line in (io.stdin) {
119 #pp (line)
120 var parts = line.split(delim)
121 pp (parts)
122
123 # variable number
124 call io->eval(block, dollar0=line, pos_args=parts)
125 }
126}
127
128proc chop () {
129 ### alias for split-by
130 echo TODO
131}
132
133proc test-split-by {
134 var z = 'z' # test out scoping
135 var count = 0 # test out mutation
136
137 # TODO: need split by space
138 # Where the leading and trailing are split
139 # if-split-by(' ') doesn't work well
140
141 line-data | split-by (/s+/) {
142
143 # how do we deal with nonexistent?
144 # should we also bind _parts or _words?
145
146 echo "$z | $0 | $1 | $z"
147
148 setvar count += 1
149 }
150 echo "count = $count"
151}
152
153proc must-split-by (; ; ifs=null; block) {
154 ### like if-split-by
155
156 echo TODO
157}
158
159# Naming: each-match, each-split?
160
161proc if-match (; pattern, template=null; ; block=null) {
162 ### like 'grep' but with submatches
163
164 for line in (io.stdin) {
165 var m = line.search(pattern)
166 if (m) {
167 #pp asdl_ (m)
168 #var groups = m.groups()
169
170 # Should we also pass _line?
171
172 if (block) {
173 call io->eval(block, dollar0=m.group(0))
174 } elif (template) {
175 echo TEMPLATE
176 } else {
177 echo TSV
178 }
179 }
180 }
181
182 # always succeeds - I think must-match is the one that can fail
183}
184
185proc must-match (; pattern; block) {
186 ### like if-match
187
188 echo TODO
189}
190
191proc line-data {
192 # note: trailing ''' issue, I should probably get rid of the last line
193
194 write --end '' -- '''
195 prefix 30 foo
196 oils
197 /// 42 bar
198 '''
199}
200
201const pat = /<capture d+> s+ <capture w+>/
202
203proc test-if-match {
204 var z = 'z' # test out scoping
205 var count = 0 # test out mutation
206
207 # Test cases should be like:
208 # grep: print the matches, or just count them
209 # sed: print a new line based on submatches
210 # awk: re-arrange the cols, and also accumulate counters
211
212 line-data | if-match (pat) {
213 echo "$z $0 $z"
214 # TODO: need pos_args
215
216 #echo "-- $2 $1 --"
217
218 setvar count += 1
219 }
220 echo "count = $count"
221}
222
223proc test-if-match-2 {
224 # If there's no block or template, it should print out a TSV with:
225 #
226 # $0 ...
227 # $1 $2
228 # $_line maybe?
229
230 #line-data | if-match (pat)
231
232 var z = 'z' # scoping
233 line-data | if-match (pat, ^"$z $0 $z")
234 line-data | if-match (pat, ^"-- $0 --")
235}
236
237# might be a nice way to write it, not sure if byo.sh can discover it
238if false {
239tests 'if-match' {
240 proc case-block {
241 echo TODO
242 }
243 proc case-template {
244 echo TODO
245 }
246}
247}
248
249# Protocol:
250#
251# - The file lists its tests the "actions"
252# - Then the test harness runs them
253# - But should it be ENV vars
254#
255# - BYO_LIST_TESTS=1
256# - BYO_RUN_TEST=foo
257# - $PWD is a CLEAN temp dir, the process doesn't have to do anything
258
259# - silent on success, but prints file on output
260# - OK this makes sense
261#
262# The trivial test in Python:
263#
264# from test import byo
265# byo.maybe_main()
266#
267# bash library:
268# source --builtin byo-server.sh
269#
270# byo-maybe-main # reads env variables, and then exits
271#
272# source --builtin assertions.ysh
273#
274# assert-ok 'echo hi'
275# assert-stdout 'hi' 'echo -n hi'
276#
277# "$@"
278#
279# Run all tests
280# util/byo-client.sh run-tests $YSH stdlib/table.ysh
281# util/byo-client.sh run-tests -f x $YSH stdlib/table.ysh
282
283# Clean process
284# Clean working dir
285
286#
287# Stream Protocol:
288# #.byo - is this she-dot, that's for a file
289# Do we need metadata?
290#
291
292# The harness
293#
294# It's process based testing.
295#
296# Test runner process: bash or OSH (unlike sharness!)
297# Tested process: any language - bash,
298#
299# Key point: you don't have to quote shell code?
300
301list-byo-tests() {
302 echo TODO
303}
304
305run-byo-tests() {
306 # source it
307 echo TODO
308}
309
310byo-maybe-run