OILS / spec / ysh-unicode.test.sh View on Github | oils.pub

185 lines, 69 significant
1## oils_failures_allowed: 1
2
3#### ${#s} and len(s)
4shopt --set parse_ysh_expr_sub
5
6source $REPO_ROOT/spec/testdata/unicode.sh
7
8# bash agrees
9echo "farmer scalars =" ${#farmer}
10
11echo "facepalm scalars =" ${#facepalm}
12
13echo "farmer len =" $[len(farmer)]
14
15echo "facepalm len =" $[len(facepalm)]
16
17## STDOUT:
18farmer scalars = 4
19facepalm scalars = 5
20farmer len = 15
21facepalm len = 17
22## END
23
24
25#### JSON \uXXXX\uYYYY as max code point - can't go above
26
27py-decode() {
28 python2 -c 'import json, sys; print json.load(sys.stdin).encode("utf-8")'
29}
30
31to-hex() {
32 od -A n -t x1
33}
34
35max='"\udbff\udfff"'
36
37# incrementing by one gives invalid surrogates
38# the encoding is "tight"
39# too_big='"\udc00\udfff"'
40
41echo "$max" | py-decode | to-hex
42
43echo "$max" | json read
44echo "$_reply" | to-hex
45
46## STDOUT:
47 f4 8f bf bf 0a
48 f4 8f bf bf 0a
49## END
50
51
52
53#### Parsing data - J8 rejects \u{110000}
54
55json8 read <<EOF
56u'\u{110000}'
57EOF
58echo status=$?
59
60## STDOUT:
61status=1
62## END
63
64
65#### Parsing source code - YSH rejects \u{110000}
66
67# Sanity check first: Python interpreter DOES check big code points,
68# whereas shells don't
69
70max=$(python2 -c 'print u"\U0010ffff".encode("utf-8")')
71echo status max=$?
72
73too_big=$(python2 -c 'print u"\U00110000".encode("utf-8")')
74echo status too_big=$?
75
76#echo py max=$max
77#echo py too_big=$too_big
78
79# python2 -c 'import sys; c = sys.argv[1].decode("utf-8"); print len(c)' "$ok"
80# python2 -c 'import sys; c = sys.argv[1].decode("utf-8"); print len(c)' "$too_big"
81
82var max = u'\u{10ffff}'
83pp test_ (max)
84
85var too_big = u'\u{110000}'
86pp test_ (too_big) # should not get here
87
88# These are errors too
89var max = b'\u{10ffff}'
90var too_big = b'\u{110000}'
91
92## status: 2
93## STDOUT:
94status max=0
95status too_big=1
96(Str) "􏿿"
97## END
98
99
100#### Parsing source code - YSH source code rejects encoded string
101
102max=$(bash <<'EOF'
103echo $'\U0010ffff'
104EOF
105)
106
107# bash allows the bad one
108too_big=$(bash <<'EOF'
109echo $'\U00110000'
110EOF
111)
112
113echo "var x = u'"$max"'; = x" | $SH
114echo status=$?
115#pp test_ (_reply)
116
117echo "var x = u'"$too_big"'; = x" | $SH
118echo status=$?
119#pp test_ (_reply)
120
121## STDOUT:
122## END
123
124
125#### JSON and J8 reject encoded string above max code point
126
127max=$(bash <<'EOF'
128echo $'\U0010ffff'
129EOF
130)
131
132# bash allows the bad one
133too_big=$(bash <<'EOF'
134echo $'\U00110000'
135EOF
136)
137
138# JSON string
139
140echo '"'$max'"' | json read
141echo status=$?
142#pp test_ (_reply)
143
144# Need to propagate the reason here
145
146echo '"'$too_big'"' | json read
147echo status=$?
148#pp test_ (_reply)
149
150
151# J8 string
152
153echo "u'"$max"'" | json8 read
154echo status=$?
155#pp test_ (_reply)
156
157echo "u'"$too_big"'" | json8 read
158echo status=$?
159#pp test_ (_reply)
160
161## STDOUT:
162status=0
163status=1
164status=0
165status=1
166## END
167
168#### Max code point: json, json8, = keyword, pp test_
169
170var max = u'\u{10ffff}'
171
172json write (max)
173json8 write (max)
174
175= max
176pp test_ (max)
177
178#echo "var x = u'"$max"'; = x" | $SH
179
180## STDOUT:
181"􏿿"
182"􏿿"
183(Str) '􏿿'
184(Str) "􏿿"
185## END