1 ## oils_failures_allowed: 1
2
3 #### ${#s} and len(s)
4 shopt --set parse_ysh_expr_sub
5
6 source $REPO_ROOT/spec/testdata/unicode.sh
7
8 # bash agrees
9 echo "farmer scalars =" ${#farmer}
10
11 echo "facepalm scalars =" ${#facepalm}
12
13 echo "farmer len =" $[len(farmer)]
14
15 echo "facepalm len =" $[len(facepalm)]
16
17 ## STDOUT:
18 farmer scalars = 4
19 facepalm scalars = 5
20 farmer len = 15
21 facepalm len = 17
22 ## END
23
24
25 #### JSON \uXXXX\uYYYY as max code point - can't go above
26
27 py-decode() {
28 python2 -c 'import json, sys; print json.load(sys.stdin).encode("utf-8")'
29 }
30
31 to-hex() {
32 od -A n -t x1
33 }
34
35 max='"\udbff\udfff"'
36
37 # incrementing by one gives invalid surrogates
38 # the encoding is "tight"
39 # too_big='"\udc00\udfff"'
40
41 echo "$max" | py-decode | to-hex
42
43 echo "$max" | json read
44 echo "$_reply" | to-hex
45
46 ## STDOUT:
47 f4 8f bf bf 0a
48 f4 8f bf bf 0a
49 ## END
50
51
52
53 #### Parsing data - J8 rejects \u{110000}
54
55 json8 read <<EOF
56 u'\u{110000}'
57 EOF
58 echo status=$?
59
60 ## STDOUT:
61 status=1
62 ## END
63
64
65 #### Parsing source code - YSH rejects \u{110000}
66
67 # Sanity check first: Python interpreter DOES check big code points,
68 # whereas shells don't
69
70 max=$(python2 -c 'print u"\U0010ffff".encode("utf-8")')
71 echo status max=$?
72
73 too_big=$(python2 -c 'print u"\U00110000".encode("utf-8")')
74 echo status too_big=$?
75
76 #echo py max=$max
77 #echo py too_big=$too_big
78
79 # python2 -c 'import sys; c = sys.argv[1].decode("utf-8"); print len(c)' "$ok"
80 # python2 -c 'import sys; c = sys.argv[1].decode("utf-8"); print len(c)' "$too_big"
81
82 var max = u'\u{10ffff}'
83 pp test_ (max)
84
85 var too_big = u'\u{110000}'
86 pp test_ (too_big) # should not get here
87
88 # These are errors too
89 var max = b'\u{10ffff}'
90 var too_big = b'\u{110000}'
91
92 ## status: 2
93 ## STDOUT:
94 status max=0
95 status too_big=1
96 (Str) "􏿿"
97 ## END
98
99
100 #### Parsing source code - YSH source code rejects encoded string
101
102 max=$(bash <<'EOF'
103 echo $'\U0010ffff'
104 EOF
105 )
106
107 # bash allows the bad one
108 too_big=$(bash <<'EOF'
109 echo $'\U00110000'
110 EOF
111 )
112
113 echo "var x = u'"$max"'; = x" | $SH
114 echo status=$?
115 #pp test_ (_reply)
116
117 echo "var x = u'"$too_big"'; = x" | $SH
118 echo status=$?
119 #pp test_ (_reply)
120
121 ## STDOUT:
122 ## END
123
124
125 #### JSON and J8 reject encoded string above max code point
126
127 max=$(bash <<'EOF'
128 echo $'\U0010ffff'
129 EOF
130 )
131
132 # bash allows the bad one
133 too_big=$(bash <<'EOF'
134 echo $'\U00110000'
135 EOF
136 )
137
138 # JSON string
139
140 echo '"'$max'"' | json read
141 echo status=$?
142 #pp test_ (_reply)
143
144 # Need to propagate the reason here
145
146 echo '"'$too_big'"' | json read
147 echo status=$?
148 #pp test_ (_reply)
149
150
151 # J8 string
152
153 echo "u'"$max"'" | json8 read
154 echo status=$?
155 #pp test_ (_reply)
156
157 echo "u'"$too_big"'" | json8 read
158 echo status=$?
159 #pp test_ (_reply)
160
161 ## STDOUT:
162 status=0
163 status=1
164 status=0
165 status=1
166 ## END
167
168 #### Max code point: json, json8, = keyword, pp test_
169
170 var max = u'\u{10ffff}'
171
172 json write (max)
173 json8 write (max)
174
175 = max
176 pp test_ (max)
177
178 #echo "var x = u'"$max"'; = x" | $SH
179
180 ## STDOUT:
181 "􏿿"
182 "􏿿"
183 (Str) '􏿿'
184 (Str) "􏿿"
185 ## END