OILS / data_lang / json-survey.sh View on Github | oilshell.org

395 lines, 216 significant
1#!/usr/bin/env bash
2#
3# Usage:
4# data_lang/json-survey.sh <function name>
5
6set -o nounset
7set -o pipefail
8set -o errexit
9
10source build/dev-shell.sh # python3 in $PATH
11
12decode-int-float() {
13 # This is a float
14 python2 -c 'import json; val = json.loads("1e6"); print(type(val)); print(val)'
15 python2 -c 'import json; val = json.loads("1e-6"); print(type(val)); print(val)'
16 python2 -c 'import json; val = json.loads("0.5"); print(type(val)); print(val)'
17
18 # Int
19 python2 -c 'import json; val = json.loads("42"); print(type(val)); print(val)'
20
21 python3 -c 'import json; val = json.loads("1e6"); print(type(val)); print(val)'
22
23 echo
24 echo
25
26 # JavaScript only has 'number', no Int and Float
27 nodejs -e 'var val = JSON.parse("1e6"); console.log(typeof(val)); console.log(val)'
28}
29
30big-int() {
31 for i in $(seq 1000); do
32 echo -n 1234567890
33 done
34}
35
36# Hm, decoding integers and floats doesn't have overflow cases
37
38decode-huge-int() {
39 local i
40 i=$(big-int)
41 echo $i
42
43 # really big integer causes 100% CPU usage in Python 3
44 echo "$i" | python3 -c 'import json, sys; val = json.load(sys.stdin); print(type(val)); print(val)'
45
46 # decodes to "Infinity"
47 echo "$i" | nodejs -e 'var fs = require("fs"); var stdin = fs.readFileSync(0, "utf-8"); console.log(JSON.parse(stdin));'
48}
49
50decode-huge-float() {
51 local f
52 f=$(big-int).99
53 echo $f
54
55 # decodes to "inf"
56 echo "$f" | python3 -c 'import json, sys; val = json.load(sys.stdin); print(type(val)); print(val)'
57
58 # decodes to "Infinity"
59 echo "$f" | nodejs -e 'var fs = require("fs"); var stdin = fs.readFileSync(0, "utf-8"); console.log(JSON.parse(stdin));'
60}
61
62decode-syntax-errors() {
63
64 python2 -c 'import json; val = json.loads("{3:4}"); print(type(val)); print(val)' || true
65 echo
66 python2 -c 'import json; val = json.loads("[3:4]"); print(type(val)); print(val)' || true
67
68 echo
69 echo
70
71 # This has good position information
72 # It prints the line number, the line, and points to the token in the line
73 # where the problem happened
74
75 nodejs -e 'var val = JSON.parse("{3: 4}"); console.log(typeof(val)); console.log(val)' || true
76
77 nodejs -e 'var val = JSON.parse("[\n 3: 4\n]"); console.log(typeof(val)); console.log(val)' || true
78
79 nodejs -e 'var val = JSON.parse("[\n\n \"hello "); console.log(typeof(val)); console.log(val)' || true
80}
81
82decode-empty-input() {
83 python3 -c 'import json; val = json.loads(""); print(type(val)); print(val)' || true
84
85 echo
86 echo
87
88 nodejs -e 'var val = JSON.parse(""); console.log(typeof(val)); console.log(val)' || true
89}
90
91decode-trailing-data() {
92 # Extra data
93 python3 -c 'import json; val = json.loads("[]]"); print(type(val)); print(val)' || true
94
95 echo
96 echo
97
98 nodejs -e 'var val = JSON.parse("[]]"); console.log(typeof(val)); console.log(val)' || true
99}
100
101
102decode-invalid-escape() {
103 # single quoted escape not valid
104 cat >_tmp/json.txt <<'EOF'
105"\'"
106EOF
107 local json
108 json=$(cat _tmp/json.txt)
109
110 python3 -c 'import json, sys; val = json.loads(sys.argv[1]); print(type(val)); print(val)' \
111 "$json" || true
112
113 echo
114 echo
115
116 nodejs -e 'var val = JSON.parse(process.argv[1]); console.log(typeof(val)); console.log(val)' \
117 "$json" || true
118}
119
120decode-whitespace() {
121 # e.g. is carriage return whitespace? Yes, it is allowed
122 local json=$'{"age":\r42}'
123
124 # neither \f nor \v is allowed
125 #local json=$'{"age":\f42}'
126 #local json=$'{"age":\v42}'
127
128 python3 -c 'import json, sys; val = json.loads(sys.argv[1]); print(type(val)); print(val)' \
129 "$json" || true
130
131 echo
132 echo
133
134 nodejs -e 'var val = JSON.parse(process.argv[1]); console.log(typeof(val)); console.log(val)' \
135 "$json" || true
136}
137
138encode-list-dict-indent() {
139 echo 'PYTHON'
140 python3 -c 'import json; val = {}; print(json.dumps(val, indent=4))'
141 python3 -c 'import json; val = {"a": 42}; print(json.dumps(val, indent=4))'
142 python3 -c 'import json; val = {"a": 42, "b": 43}; print(json.dumps(val, indent=4))'
143 python3 -c 'import json; val = []; print(json.dumps(val, indent=4))'
144 python3 -c 'import json; val = [42]; print(json.dumps(val, indent=4))'
145 echo
146
147 echo 'JS'
148 nodejs -e 'var val = {}; console.log(JSON.stringify(val, null, 4))'
149 nodejs -e 'var val = {"a": 42}; console.log(JSON.stringify(val, null, 4))'
150 nodejs -e 'var val = {"a": 42, "b": 43}; console.log(JSON.stringify(val, null, 4))'
151 nodejs -e 'var val = []; console.log(JSON.stringify(val, null, 4))'
152 nodejs -e 'var val = [42]; console.log(JSON.stringify(val, null, 4))'
153 echo
154}
155
156encode-default() {
157 echo 'PYTHON'
158 python3 -c 'import json; val = {"a": 42, "b": [1, 2, 3]}; print(json.dumps(val))'
159 echo
160
161 echo 'JS'
162 nodejs -e 'var val = {"a": 42, "b": [1, 2, 3]}; console.log(JSON.stringify(val))'
163 echo
164
165 # Hm we indent by default, maybe we should change this
166 #
167 # I think the = operator indents by default, but json/json8 don't?
168 #
169 # PYTHON
170 # {"a": 42, "b": [1, 2, 3]}
171 #
172 # JS
173 # {"a":42,"b":[1,2,3]}
174
175 # Single knob design:
176 #
177 # json write (x) # space=2 by default
178 # json write (x, space=0) # like JS
179}
180
181encode-no-indent() {
182 echo 'PYTHON'
183
184 # has a space
185 python3 -c 'import json; val = {"a": 42, "b": [1, 2, 3]}; print(json.dumps(val, indent=None))'
186 # you control it like this
187 python3 -c 'import json; val = {"a": 42, "b": [1, 2, 3]}; print(json.dumps(val, separators=[",", ":"]))'
188 echo
189
190 # Python: -1 and 0 both mean zero indent, but MULTIPLE lines
191 python3 -c 'import json; val = {"a": 42, "b": [1, 2, 3]}; print(json.dumps(val, indent=-1))'
192 echo
193 python3 -c 'import json; val = {"a": 42, "b": [1, 2, 3]}; print(json.dumps(val, indent=0))'
194 echo
195
196 echo 'JS'
197
198 # JS: -1 and 0 both print on ONE LINE
199 # Second arg is "replacer", which I don't think we need
200 nodejs -e 'var val = {"a": 42, "b": [1, 2, 3]}; console.log(JSON.stringify(val, null, -1))'
201 nodejs -e 'var val = {"a": 42, "b": [1, 2, 3]}; console.log(JSON.stringify(val, null, 0))'
202 # third arg can be a string too
203 nodejs -e 'var val = {"a": 42, "b": [1, 2, 3]}; console.log(JSON.stringify(val, null, "\t"))'
204
205 # Python has indent=0 vs indent=None, and it has separators=[",", ";"]
206 # JS has indent=1 and indent="\t" etc.
207 # - it also clamps strings/indents to 10 chars or less
208
209 # https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/JSON/stringify
210 # indent less than 1 means "no space"
211 #
212 # Which behavior should OSH have for 0 and -1?
213 #
214 # Does indent=0 and indent=null and indent=" " make sense?
215 # I think it could
216}
217
218encode-obj-cycles() {
219 python3 -c 'import json; val = {}; val["k"] = val; print(json.dumps(val))' || true
220 echo
221
222 python3 -c 'import json; val = []; val.append(val); print(json.dumps(val))' || true
223 echo
224
225 # Better error message than Python!
226 # TypeError: Converting circular structure to JSON
227 # --> starting at object with constructor 'Object'
228 # --- property 'k' closes the circle
229 nodejs -e 'var val = {}; val["k"] = val; console.log(JSON.stringify(val))' || true
230 echo
231
232 nodejs -e 'var val = []; val.push(val); console.log(JSON.stringify(val))' || true
233 echo
234}
235
236multiple-refs() {
237 # Python prints a tree
238 python3 -c 'import json; mylist = [1,2,3]; val = [mylist, mylist]; print(repr(val)); print(json.dumps(val))'
239 echo
240
241 # Same with node.js
242 nodejs -e 'var mylist = [1,2,3]; var val = [mylist, mylist]; console.log(val); console.log(JSON.stringify(val))'
243 echo
244
245 # Same with Oils
246 bin/osh -c 'var mylist = [1,2,3]; var val = [mylist, mylist]; = val; json write (val); pp asdl_ (val)'
247 echo
248}
249
250oils-cycles() {
251 bin/ysh -c 'var d = {}; setvar d.key = d; = d; pp test_ (d); pp asdl_ (d); json write (d)'
252}
253
254surrogate-pair() {
255 local json=${1:-'"\ud83e\udd26"'}
256
257 # Hm it actually escapes. I thought it would use raw UTF-8
258 python2 -c 'import json; s = json.loads(r'\'$json\''); print(json.dumps(s))'
259 echo
260
261 python3 -c 'import json; s = json.loads(r'\'$json\''); print(json.dumps(s))'
262 echo
263
264 # This doesn't escape
265 nodejs -e 'var s = JSON.parse('\'$json\''); console.log(JSON.stringify(s))'
266 echo
267}
268
269surrogate-half() {
270 local json='"\ud83e"'
271
272 # Round trips correctly!
273 surrogate-pair "$json"
274}
275
276encode-nan() {
277 # Wow Python doesn't conform to spec!!
278 # https://docs.python.org/3.8/library/json.html#infinite-and-nan-number-values
279
280 # allow_nan=False and parse_constant alter the behavior
281
282 python2 -c 'import json; val = float("nan"); s = json.dumps(val); print(s); print(json.loads(s))' || true
283 echo
284
285 python3 -c 'import json; val = float("nan"); s = json.dumps(val); print(s); print(json.loads(s))' || true
286 echo
287
288 # raises error
289 python3 -c 'import json; val = float("nan"); s = json.dumps(val, allow_nan=False); print(s); print(json.loads(s))' || true
290 echo
291
292 # nodejs uses null
293 nodejs -e 'var val = NaN; var s = JSON.stringify(val); console.log(s); console.log(JSON.parse(s));' || true
294 echo
295}
296
297encode-inf() {
298 # Again, Python doesn't conform to spec
299
300 python2 -c 'import json; val = float("-inf"); print(val); s = json.dumps(val); print(s); print(json.loads(s))' || true
301 echo
302
303 python3 -c 'import json; val = float("-inf"); print(val); s = json.dumps(val); print(s); print(json.loads(s))' || true
304 echo
305
306 # raises error
307 python3 -c 'import json; val = float("-inf"); print(val); s = json.dumps(val, allow_nan=False); print(s); print(json.loads(s))' || true
308 echo
309
310 # nodejs uses null again
311 nodejs -e 'var val = Number.NEGATIVE_INFINITY; console.log(val); var s = JSON.stringify(val); console.log(s); console.log(JSON.parse(s));' || true
312 echo
313}
314
315encode-bad-type() {
316 python3 -c 'import json; print(json.dumps(json))' || true
317 echo
318
319 # {} or undefined - BAD!
320 nodejs -e 'console.log(JSON.stringify(JSON));' || true
321 nodejs -e 'function f() { return 42; }; console.log(JSON.stringify(f));' || true
322 echo
323}
324
325encode-binary-data() {
326 # utf-8 codec can't decode byte -- so it does UTF-8 decoding during encoding,
327 # which makes sense
328 python2 -c 'import json; print(json.dumps(b"\xff"))' || true
329 echo
330
331 # can't serialize bytes type
332 python3 -c 'import json; print(json.dumps(b"\xff"))' || true
333 echo
334
335 # there is no bytes type? \xff is a code point in JS
336 nodejs -e 'console.log(JSON.stringify("\xff"));' || true
337 nodejs -e 'console.log(JSON.stringify("\u{ff}"));' || true
338 echo
339}
340
341decode-utf8-in-surrogate-range() {
342 python2 -c 'b = "\xed\xa0\xbe"; print(repr(b.decode("utf-8")))'
343 echo
344
345 # Hm Python 3 gives an error here!
346 python3 -c 'b = b"\xed\xa0\xbe"; print(repr(b.decode("utf-8")))' || true
347 echo
348
349 # valid
350 nodejs -e 'var u = new Uint8Array([0xce, 0xbc]); var string = new TextDecoder("utf-8").decode(u); console.log(string);'
351 echo
352
353 # can't decode!
354 nodejs -e 'var u = new Uint8Array([0xed, 0xa0, 0xbe]); var string = new TextDecoder("utf-8").decode(u); console.log(string);'
355 echo
356}
357
358pairs() {
359 local nums
360 nums=$(seq $1)
361
362 echo -n '['
363 for i in $nums; do
364 echo -n '[42,'
365 done
366 echo -n '43]'
367 for i in $nums; do
368 echo -n ']'
369 done
370}
371
372decode-deeply-nested() {
373 local msg
374 msg=$(pairs 40200)
375
376 # RuntimeError
377 echo "$msg" | python2 -c 'import json, sys; print(repr(json.load(sys.stdin)))' || true
378
379 # RecursionError
380 echo "$msg" | python3 -c 'import json, sys; print(repr(json.load(sys.stdin)))' || true
381
382 # Hm node.js handles it fine? Probably doesn't have a stackful parser.
383 # [ [ [ [Array] ] ] ]
384 echo "$msg" | nodejs -e 'var fs = require("fs"); var stdin = fs.readFileSync(0, "utf-8"); console.log(JSON.parse(stdin));' || true
385
386 echo "$msg" | bin/osh -c 'json read; = _reply' || true
387
388 # Hm this works past 40K in C++! Then segmentation fault. We could put an
389 # artifical limit on it.
390 local osh=_bin/cxx-opt/osh
391 ninja $osh
392 echo "$msg" | $osh -c 'json read; = _reply; echo $[len(_reply)]' || true
393}
394
395"$@"