1 | #!/usr/bin/env bash
|
2 | #
|
3 | # Usage:
|
4 | # data_lang/json-survey.sh <function name>
|
5 |
|
6 | set -o nounset
|
7 | set -o pipefail
|
8 | set -o errexit
|
9 |
|
10 | source build/dev-shell.sh # python3 in $PATH
|
11 |
|
12 | decode-int-float() {
|
13 | # This is a float
|
14 | python2 -c 'import json; val = json.loads("1e6"); print(type(val)); print(val)'
|
15 | python2 -c 'import json; val = json.loads("1e-6"); print(type(val)); print(val)'
|
16 | python2 -c 'import json; val = json.loads("0.5"); print(type(val)); print(val)'
|
17 |
|
18 | # Int
|
19 | python2 -c 'import json; val = json.loads("42"); print(type(val)); print(val)'
|
20 |
|
21 | python3 -c 'import json; val = json.loads("1e6"); print(type(val)); print(val)'
|
22 |
|
23 | echo
|
24 | echo
|
25 |
|
26 | # JavaScript only has 'number', no Int and Float
|
27 | nodejs -e 'var val = JSON.parse("1e6"); console.log(typeof(val)); console.log(val)'
|
28 | }
|
29 |
|
30 | big-int() {
|
31 | for i in $(seq 1000); do
|
32 | echo -n 1234567890
|
33 | done
|
34 | }
|
35 |
|
36 | # Hm, decoding integers and floats doesn't have overflow cases
|
37 |
|
38 | decode-huge-int() {
|
39 | local i
|
40 | i=$(big-int)
|
41 | echo $i
|
42 |
|
43 | # really big integer causes 100% CPU usage in Python 3
|
44 | echo "$i" | python3 -c 'import json, sys; val = json.load(sys.stdin); print(type(val)); print(val)'
|
45 |
|
46 | # decodes to "Infinity"
|
47 | echo "$i" | nodejs -e 'var fs = require("fs"); var stdin = fs.readFileSync(0, "utf-8"); console.log(JSON.parse(stdin));'
|
48 | }
|
49 |
|
50 | decode-huge-float() {
|
51 | local f
|
52 | f=$(big-int).99
|
53 | echo $f
|
54 |
|
55 | # decodes to "inf"
|
56 | echo "$f" | python3 -c 'import json, sys; val = json.load(sys.stdin); print(type(val)); print(val)'
|
57 |
|
58 | # decodes to "Infinity"
|
59 | echo "$f" | nodejs -e 'var fs = require("fs"); var stdin = fs.readFileSync(0, "utf-8"); console.log(JSON.parse(stdin));'
|
60 | }
|
61 |
|
62 | decode-syntax-errors() {
|
63 |
|
64 | python2 -c 'import json; val = json.loads("{3:4}"); print(type(val)); print(val)' || true
|
65 | echo
|
66 | python2 -c 'import json; val = json.loads("[3:4]"); print(type(val)); print(val)' || true
|
67 |
|
68 | echo
|
69 | echo
|
70 |
|
71 | # This has good position information
|
72 | # It prints the line number, the line, and points to the token in the line
|
73 | # where the problem happened
|
74 |
|
75 | nodejs -e 'var val = JSON.parse("{3: 4}"); console.log(typeof(val)); console.log(val)' || true
|
76 |
|
77 | nodejs -e 'var val = JSON.parse("[\n 3: 4\n]"); console.log(typeof(val)); console.log(val)' || true
|
78 |
|
79 | nodejs -e 'var val = JSON.parse("[\n\n \"hello "); console.log(typeof(val)); console.log(val)' || true
|
80 | }
|
81 |
|
82 | decode-empty-input() {
|
83 | python3 -c 'import json; val = json.loads(""); print(type(val)); print(val)' || true
|
84 |
|
85 | echo
|
86 | echo
|
87 |
|
88 | nodejs -e 'var val = JSON.parse(""); console.log(typeof(val)); console.log(val)' || true
|
89 | }
|
90 |
|
91 | decode-trailing-data() {
|
92 | # Extra data
|
93 | python3 -c 'import json; val = json.loads("[]]"); print(type(val)); print(val)' || true
|
94 |
|
95 | echo
|
96 | echo
|
97 |
|
98 | nodejs -e 'var val = JSON.parse("[]]"); console.log(typeof(val)); console.log(val)' || true
|
99 | }
|
100 |
|
101 |
|
102 | decode-invalid-escape() {
|
103 | # single quoted escape not valid
|
104 | cat >_tmp/json.txt <<'EOF'
|
105 | "\'"
|
106 | EOF
|
107 | local json
|
108 | json=$(cat _tmp/json.txt)
|
109 |
|
110 | python3 -c 'import json, sys; val = json.loads(sys.argv[1]); print(type(val)); print(val)' \
|
111 | "$json" || true
|
112 |
|
113 | echo
|
114 | echo
|
115 |
|
116 | nodejs -e 'var val = JSON.parse(process.argv[1]); console.log(typeof(val)); console.log(val)' \
|
117 | "$json" || true
|
118 | }
|
119 |
|
120 | decode-whitespace() {
|
121 | # e.g. is carriage return whitespace? Yes, it is allowed
|
122 | local json=$'{"age":\r42}'
|
123 |
|
124 | # neither \f nor \v is allowed
|
125 | #local json=$'{"age":\f42}'
|
126 | #local json=$'{"age":\v42}'
|
127 |
|
128 | python3 -c 'import json, sys; val = json.loads(sys.argv[1]); print(type(val)); print(val)' \
|
129 | "$json" || true
|
130 |
|
131 | echo
|
132 | echo
|
133 |
|
134 | nodejs -e 'var val = JSON.parse(process.argv[1]); console.log(typeof(val)); console.log(val)' \
|
135 | "$json" || true
|
136 | }
|
137 |
|
138 | encode-list-dict-indent() {
|
139 | echo 'PYTHON'
|
140 | python3 -c 'import json; val = {}; print(json.dumps(val, indent=4))'
|
141 | python3 -c 'import json; val = {"a": 42}; print(json.dumps(val, indent=4))'
|
142 | python3 -c 'import json; val = {"a": 42, "b": 43}; print(json.dumps(val, indent=4))'
|
143 | python3 -c 'import json; val = []; print(json.dumps(val, indent=4))'
|
144 | python3 -c 'import json; val = [42]; print(json.dumps(val, indent=4))'
|
145 | echo
|
146 |
|
147 | echo 'JS'
|
148 | nodejs -e 'var val = {}; console.log(JSON.stringify(val, null, 4))'
|
149 | nodejs -e 'var val = {"a": 42}; console.log(JSON.stringify(val, null, 4))'
|
150 | nodejs -e 'var val = {"a": 42, "b": 43}; console.log(JSON.stringify(val, null, 4))'
|
151 | nodejs -e 'var val = []; console.log(JSON.stringify(val, null, 4))'
|
152 | nodejs -e 'var val = [42]; console.log(JSON.stringify(val, null, 4))'
|
153 | echo
|
154 | }
|
155 |
|
156 | encode-default() {
|
157 | echo 'PYTHON'
|
158 | python3 -c 'import json; val = {"a": 42, "b": [1, 2, 3]}; print(json.dumps(val))'
|
159 | echo
|
160 |
|
161 | echo 'JS'
|
162 | nodejs -e 'var val = {"a": 42, "b": [1, 2, 3]}; console.log(JSON.stringify(val))'
|
163 | echo
|
164 |
|
165 | # Hm we indent by default, maybe we should change this
|
166 | #
|
167 | # I think the = operator indents by default, but json/json8 don't?
|
168 | #
|
169 | # PYTHON
|
170 | # {"a": 42, "b": [1, 2, 3]}
|
171 | #
|
172 | # JS
|
173 | # {"a":42,"b":[1,2,3]}
|
174 |
|
175 | # Single knob design:
|
176 | #
|
177 | # json write (x) # space=2 by default
|
178 | # json write (x, space=0) # like JS
|
179 | }
|
180 |
|
181 | encode-no-indent() {
|
182 | echo 'PYTHON'
|
183 |
|
184 | # has a space
|
185 | python3 -c 'import json; val = {"a": 42, "b": [1, 2, 3]}; print(json.dumps(val, indent=None))'
|
186 | # you control it like this
|
187 | python3 -c 'import json; val = {"a": 42, "b": [1, 2, 3]}; print(json.dumps(val, separators=[",", ":"]))'
|
188 | echo
|
189 |
|
190 | # Python: -1 and 0 both mean zero indent, but MULTIPLE lines
|
191 | python3 -c 'import json; val = {"a": 42, "b": [1, 2, 3]}; print(json.dumps(val, indent=-1))'
|
192 | echo
|
193 | python3 -c 'import json; val = {"a": 42, "b": [1, 2, 3]}; print(json.dumps(val, indent=0))'
|
194 | echo
|
195 |
|
196 | echo 'JS'
|
197 |
|
198 | # JS: -1 and 0 both print on ONE LINE
|
199 | # Second arg is "replacer", which I don't think we need
|
200 | nodejs -e 'var val = {"a": 42, "b": [1, 2, 3]}; console.log(JSON.stringify(val, null, -1))'
|
201 | nodejs -e 'var val = {"a": 42, "b": [1, 2, 3]}; console.log(JSON.stringify(val, null, 0))'
|
202 | # third arg can be a string too
|
203 | nodejs -e 'var val = {"a": 42, "b": [1, 2, 3]}; console.log(JSON.stringify(val, null, "\t"))'
|
204 |
|
205 | # Python has indent=0 vs indent=None, and it has separators=[",", ";"]
|
206 | # JS has indent=1 and indent="\t" etc.
|
207 | # - it also clamps strings/indents to 10 chars or less
|
208 |
|
209 | # https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/JSON/stringify
|
210 | # indent less than 1 means "no space"
|
211 | #
|
212 | # Which behavior should OSH have for 0 and -1?
|
213 | #
|
214 | # Does indent=0 and indent=null and indent=" " make sense?
|
215 | # I think it could
|
216 | }
|
217 |
|
218 | encode-obj-cycles() {
|
219 | python3 -c 'import json; val = {}; val["k"] = val; print(json.dumps(val))' || true
|
220 | echo
|
221 |
|
222 | python3 -c 'import json; val = []; val.append(val); print(json.dumps(val))' || true
|
223 | echo
|
224 |
|
225 | # Better error message than Python!
|
226 | # TypeError: Converting circular structure to JSON
|
227 | # --> starting at object with constructor 'Object'
|
228 | # --- property 'k' closes the circle
|
229 | nodejs -e 'var val = {}; val["k"] = val; console.log(JSON.stringify(val))' || true
|
230 | echo
|
231 |
|
232 | nodejs -e 'var val = []; val.push(val); console.log(JSON.stringify(val))' || true
|
233 | echo
|
234 | }
|
235 |
|
236 | multiple-refs() {
|
237 | # Python prints a tree
|
238 | python3 -c 'import json; mylist = [1,2,3]; val = [mylist, mylist]; print(repr(val)); print(json.dumps(val))'
|
239 | echo
|
240 |
|
241 | # Same with node.js
|
242 | nodejs -e 'var mylist = [1,2,3]; var val = [mylist, mylist]; console.log(val); console.log(JSON.stringify(val))'
|
243 | echo
|
244 |
|
245 | # Same with Oils
|
246 | bin/osh -c 'var mylist = [1,2,3]; var val = [mylist, mylist]; = val; json write (val); pp asdl_ (val)'
|
247 | echo
|
248 | }
|
249 |
|
250 | oils-cycles() {
|
251 | bin/ysh -c 'var d = {}; setvar d.key = d; = d; pp test_ (d); pp asdl_ (d); json write (d)'
|
252 | }
|
253 |
|
254 | surrogate-pair() {
|
255 | local json=${1:-'"\ud83e\udd26"'}
|
256 |
|
257 | # Hm it actually escapes. I thought it would use raw UTF-8
|
258 | python2 -c 'import json; s = json.loads(r'\'$json\''); print(json.dumps(s))'
|
259 | echo
|
260 |
|
261 | python3 -c 'import json; s = json.loads(r'\'$json\''); print(json.dumps(s))'
|
262 | echo
|
263 |
|
264 | # This doesn't escape
|
265 | nodejs -e 'var s = JSON.parse('\'$json\''); console.log(JSON.stringify(s))'
|
266 | echo
|
267 | }
|
268 |
|
269 | surrogate-half() {
|
270 | local json='"\ud83e"'
|
271 |
|
272 | # Round trips correctly!
|
273 | surrogate-pair "$json"
|
274 | }
|
275 |
|
276 | encode-nan() {
|
277 | # Wow Python doesn't conform to spec!!
|
278 | # https://docs.python.org/3.8/library/json.html#infinite-and-nan-number-values
|
279 |
|
280 | # allow_nan=False and parse_constant alter the behavior
|
281 |
|
282 | python2 -c 'import json; val = float("nan"); s = json.dumps(val); print(s); print(json.loads(s))' || true
|
283 | echo
|
284 |
|
285 | python3 -c 'import json; val = float("nan"); s = json.dumps(val); print(s); print(json.loads(s))' || true
|
286 | echo
|
287 |
|
288 | # raises error
|
289 | python3 -c 'import json; val = float("nan"); s = json.dumps(val, allow_nan=False); print(s); print(json.loads(s))' || true
|
290 | echo
|
291 |
|
292 | # nodejs uses null
|
293 | nodejs -e 'var val = NaN; var s = JSON.stringify(val); console.log(s); console.log(JSON.parse(s));' || true
|
294 | echo
|
295 | }
|
296 |
|
297 | encode-inf() {
|
298 | # Again, Python doesn't conform to spec
|
299 |
|
300 | python2 -c 'import json; val = float("-inf"); print(val); s = json.dumps(val); print(s); print(json.loads(s))' || true
|
301 | echo
|
302 |
|
303 | python3 -c 'import json; val = float("-inf"); print(val); s = json.dumps(val); print(s); print(json.loads(s))' || true
|
304 | echo
|
305 |
|
306 | # raises error
|
307 | python3 -c 'import json; val = float("-inf"); print(val); s = json.dumps(val, allow_nan=False); print(s); print(json.loads(s))' || true
|
308 | echo
|
309 |
|
310 | # nodejs uses null again
|
311 | nodejs -e 'var val = Number.NEGATIVE_INFINITY; console.log(val); var s = JSON.stringify(val); console.log(s); console.log(JSON.parse(s));' || true
|
312 | echo
|
313 | }
|
314 |
|
315 | encode-bad-type() {
|
316 | python3 -c 'import json; print(json.dumps(json))' || true
|
317 | echo
|
318 |
|
319 | # {} or undefined - BAD!
|
320 | nodejs -e 'console.log(JSON.stringify(JSON));' || true
|
321 | nodejs -e 'function f() { return 42; }; console.log(JSON.stringify(f));' || true
|
322 | echo
|
323 | }
|
324 |
|
325 | encode-binary-data() {
|
326 | # utf-8 codec can't decode byte -- so it does UTF-8 decoding during encoding,
|
327 | # which makes sense
|
328 | python2 -c 'import json; print(json.dumps(b"\xff"))' || true
|
329 | echo
|
330 |
|
331 | # can't serialize bytes type
|
332 | python3 -c 'import json; print(json.dumps(b"\xff"))' || true
|
333 | echo
|
334 |
|
335 | # there is no bytes type? \xff is a code point in JS
|
336 | nodejs -e 'console.log(JSON.stringify("\xff"));' || true
|
337 | nodejs -e 'console.log(JSON.stringify("\u{ff}"));' || true
|
338 | echo
|
339 | }
|
340 |
|
341 | decode-utf8-in-surrogate-range() {
|
342 | python2 -c 'b = "\xed\xa0\xbe"; print(repr(b.decode("utf-8")))'
|
343 | echo
|
344 |
|
345 | # Hm Python 3 gives an error here!
|
346 | python3 -c 'b = b"\xed\xa0\xbe"; print(repr(b.decode("utf-8")))' || true
|
347 | echo
|
348 |
|
349 | # valid
|
350 | nodejs -e 'var u = new Uint8Array([0xce, 0xbc]); var string = new TextDecoder("utf-8").decode(u); console.log(string);'
|
351 | echo
|
352 |
|
353 | # can't decode!
|
354 | nodejs -e 'var u = new Uint8Array([0xed, 0xa0, 0xbe]); var string = new TextDecoder("utf-8").decode(u); console.log(string);'
|
355 | echo
|
356 | }
|
357 |
|
358 | pairs() {
|
359 | local nums
|
360 | nums=$(seq $1)
|
361 |
|
362 | echo -n '['
|
363 | for i in $nums; do
|
364 | echo -n '[42,'
|
365 | done
|
366 | echo -n '43]'
|
367 | for i in $nums; do
|
368 | echo -n ']'
|
369 | done
|
370 | }
|
371 |
|
372 | decode-deeply-nested() {
|
373 | local msg
|
374 | msg=$(pairs 40200)
|
375 |
|
376 | # RuntimeError
|
377 | echo "$msg" | python2 -c 'import json, sys; print(repr(json.load(sys.stdin)))' || true
|
378 |
|
379 | # RecursionError
|
380 | echo "$msg" | python3 -c 'import json, sys; print(repr(json.load(sys.stdin)))' || true
|
381 |
|
382 | # Hm node.js handles it fine? Probably doesn't have a stackful parser.
|
383 | # [ [ [ [Array] ] ] ]
|
384 | echo "$msg" | nodejs -e 'var fs = require("fs"); var stdin = fs.readFileSync(0, "utf-8"); console.log(JSON.parse(stdin));' || true
|
385 |
|
386 | echo "$msg" | bin/osh -c 'json read; = _reply' || true
|
387 |
|
388 | # Hm this works past 40K in C++! Then segmentation fault. We could put an
|
389 | # artifical limit on it.
|
390 | local osh=_bin/cxx-opt/osh
|
391 | ninja $osh
|
392 | echo "$msg" | $osh -c 'json read; = _reply; echo $[len(_reply)]' || true
|
393 | }
|
394 |
|
395 | "$@"
|