OILS / demo / url-search-params.ysh View on Github | oilshell.org

271 lines, 121 significant
1#!bin/ysh
2#
3# Usage:
4# demo/url-search-params.ysh <function name>
5#
6# Tested against JavaScript's URLSearchParams. Differences:
7#
8# - JS strings can't represent bytes, so %ff turns into the Unicode replacement char.
9# - YSH turns this into the 0xff byte, denoted as b'\yff'
10# - JS accepts '==' as key="" value="="
11# - In YSH, this is a syntax error.
12# - On the other hand, both JS and YSH agree that =&=&= is 3 empty key value pairs:
13# [["", ""]
14# ["", ""],
15# ["", ""]]
16#
17# Evaluation of "the YSH experience":
18#
19# GOOD:
20#
21# - Eggex is elegant
22# - This code is structured better than the Python stdlib urlparse.py!
23# - This problem is also hard/ugly in JavaScript. They use an extra
24# s=>replace() on top of decodeURIComponent()!
25# - Task files in YSH basically work!
26# - I think this file has a nice structure
27# - It's nice to mix INTERIOR YSH testing and EXTERIOR comparison to node.js
28# - Triple quoted multiline strings are nice!
29#
30# NEEDS WORK:
31#
32# - need Vim syntax highlighting!
33# - e.g. multiline '' strings aren't higlighted
34# - task files need completion
35#
36# - Eggex can use multiline /// syntax, though you can use \ for line continuation
37# - Eggex could use "which" match
38# - Alternative to printf -v probably needed, or at least wrap it in the YSH
39# stdlib
40#
41# - ERROR messages for URL parsing should bubble up to the user!
42# - USER code should be able to point out to location info for bad escapes
43# like %f or %0z
44# - I guess we just need an idiom for this?
45
46source $LIB_OSH/task-five.sh
47#source $LIB_YSH/yblocks.ysh
48
49func strFromTwoHex(two_hex) {
50 var result
51 # TODO: provide alternative to old OSH style!
52
53 # Python style would include something like this
54 # var i = int(two_hex, 16)
55
56 printf -v result "\\x$two_hex"
57 return (result)
58}
59
60const Hex = / [0-9 a-f A-F] /
61
62const Quoted = / \
63 <capture !['%+']+ as lit> \
64 | <capture '+' as plus> \
65 | '%' <capture Hex Hex as two_hex> \
66 /
67
68func unquote (s) {
69 ### Turn strings with %20 into space, etc.
70
71 #echo
72 #echo "unquote $s"
73
74 var pos = 0
75 var parts = []
76 while (true) {
77 var m = s.leftMatch(Quoted, pos=pos)
78 if (not m) {
79 break
80 }
81
82 var lit = m.group('lit')
83 var plus = m.group('plus')
84 var two_hex = m.group('two_hex')
85
86 var part
87 if (lit) {
88 #echo " lit $lit"
89 setvar part = lit
90 } elif (plus) {
91 #echo " plus $plus"
92 setvar part = ' '
93 } elif (two_hex) {
94 #echo " two_hex $two_hex"
95 #setvar part = two_hex
96
97 setvar part = strFromTwoHex(two_hex)
98 }
99 call parts->append(part)
100
101 setvar pos = m.end(0)
102 #echo
103 }
104 if (pos !== len(s)) {
105 error "Unexpected trailing input in unquote"
106 }
107
108 return (join(parts))
109}
110
111proc js-decode-part(s) {
112 nodejs -e '''
113
114 var encoded = process.argv[1];
115
116 // It does not handle +, because is only for query params, not components?
117 // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/decodeURIComponent
118 var encoded = encoded.replace(/\+/g, " ")
119
120 var j = JSON.stringify(decodeURIComponent(encoded))
121 process.stdout.write(j);
122
123 ''' $s
124}
125
126const PART_CASES = [
127 'foo+bar',
128 'foo%23%40',
129 # empty key, empty value, invalid % , etc.
130]
131
132proc test-part() {
133 echo hi
134
135 #_check ('foo bar' === unquote('foo+bar'))
136
137 for s in (PART_CASES) {
138 js-decode-part $s | json read (&js)
139 echo 'JS'
140 pp test_ (js)
141
142 echo 'YSH'
143 var y = unquote(s)
144 pp test_ (y)
145
146 assert [y === js]
147
148 echo
149 #break
150 }
151}
152
153#
154# Query
155#
156
157# JavaScript allows either side of k=v to be empty, so we match that
158const Tok = / !['&= ']* /
159
160const Pair = / <capture Tok as key> '=' <capture Tok as value> /
161
162const Pairs = / Pair <capture '&' as sep>? /
163
164func URLSearchParams(s) {
165 ### Turn k=v&foo=spam+eggs&k=v into a list of pairs
166
167 # Loop over matches
168 var pos = 0
169 #echo Pairs=$Pairs
170
171 var pairs = []
172 while (true) {
173 var m = s.leftMatch(Pairs, pos=pos)
174 if (not m) {
175 break
176 }
177 #pp test_ (m)
178 #pp test_ (m => group(0))
179 var k = m.group('key')
180 var v = m.group('value')
181
182 #pp test_ (k)
183 #pp test_ (v)
184
185 call pairs->append([unquote(k), unquote(v)])
186
187 setvar pos = m.end(0)
188 #pp test_ (pos)
189
190 var sep = m.group('sep')
191 if (not sep) {
192 break
193 }
194 }
195 if (pos !== len(s)) {
196 error "Unexpected trailing input in URLSearchParams $pos != $[len(s)]"
197 }
198
199 return (pairs)
200}
201
202proc js-decode-query(s) {
203 nodejs -e '''
204
205 const u = new URLSearchParams(process.argv[1]);
206 //console.log(JSON.stringify(u));
207
208 var pairs = []
209 for (pair of u) {
210 pairs.push(pair)
211 }
212
213 var j = JSON.stringify(pairs);
214
215 //console.log(j):
216 process.stdout.write(j);
217 ''' $s
218}
219
220const QUERY_CASES = [
221 'k=foo+bar',
222 'key=foo%23%40',
223 'k=v&foo%23=bar+baz+%24%25&k=v',
224 'foo+bar=z',
225
226 'missing_val=&k=',
227
228 '=missing_key&=m2',
229
230 # This is valid
231 '=&=',
232 '=&=&',
233
234]
235
236const OTHER_CASES = [
237
238 # JavaScript converts %ff to the Unicode replacement char - its strings can't represent bytes
239 'foo%ffbar=z',
240
241 # JavaScript treats = as literal - that seems wrong
242 # YSH treating this as an error seems right
243 '==',
244]
245
246
247proc test-query() {
248 for s in (QUERY_CASES) {
249 #for s in (OTHER_CASES) {
250 echo 'INPUT'
251 echo " $s"
252
253 js-decode-query $s | json read (&js)
254 echo 'JS'
255 pp test_ (js)
256
257 echo 'YSH'
258 var pairs = URLSearchParams(s)
259 pp test_ (pairs)
260
261 assert [pairs === js]
262
263 echo
264 }
265}
266
267proc run-tests() {
268 devtools/byo.sh test $0
269}
270
271task-five "$@"