OILS / spec / var-op-len.test.sh View on Github | oils.pub

269 lines, 173 significant
1## compare_shells: dash bash mksh zsh
2## oils_failures_allowed: 1
3
4# Test the length oeprator, which dash supports. Dash doesn't support most
5# other ops.
6
7#### String length
8v=foo
9echo ${#v}
10## stdout: 3
11
12#### Unicode string length (UTF-8)
13v=$'_\u03bc_'
14echo ${#v}
15## stdout: 3
16## N-I dash stdout: 9
17## N-I mksh stdout: 4
18
19#### Unicode string length (spec/testdata/utf8-chars.txt)
20v=$(cat $REPO_ROOT/spec/testdata/utf8-chars.txt)
21echo ${#v}
22## stdout: 7
23## N-I dash stdout: 13
24## N-I mksh stdout: 13
25
26#### String length with incomplete utf-8
27for num_bytes in 0 1 2 3 4 5 6 7 8 9 10 11 12 13; do
28 s=$(head -c $num_bytes $REPO_ROOT/spec/testdata/utf8-chars.txt)
29 echo ${#s}
30done 2> $TMP/err.txt
31
32grep 'warning:' $TMP/err.txt
33true # exit 0
34
35## STDOUT:
360
371
382
39-1
403
414
42-1
43-1
445
456
46-1
47-1
48-1
497
50[ stdin ]:3: warning: UTF-8 decode: Truncated bytes at offset 2 in string of 3 bytes
51[ stdin ]:3: warning: UTF-8 decode: Truncated bytes at offset 5 in string of 6 bytes
52[ stdin ]:3: warning: UTF-8 decode: Truncated bytes at offset 5 in string of 7 bytes
53[ stdin ]:3: warning: UTF-8 decode: Truncated bytes at offset 9 in string of 10 bytes
54[ stdin ]:3: warning: UTF-8 decode: Truncated bytes at offset 9 in string of 11 bytes
55[ stdin ]:3: warning: UTF-8 decode: Truncated bytes at offset 9 in string of 12 bytes
56## END
57# zsh behavior actually matches bash!
58## BUG bash/zsh stderr-json: ""
59## BUG bash/zsh STDOUT:
600
611
622
633
643
654
665
676
685
696
707
718
729
737
74## END
75## N-I dash/mksh stderr-json: ""
76## N-I dash/mksh STDOUT:
770
781
792
803
814
825
836
847
858
869
8710
8811
8912
9013
91## END
92
93#### String length with invalid utf-8 continuation bytes
94for num_bytes in 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14; do
95 s=$(head -c $num_bytes $REPO_ROOT/spec/testdata/utf8-chars.txt)$(echo -e "\xFF")
96 echo ${#s}
97done 2> $TMP/err.txt
98
99grep 'warning:' $TMP/err.txt
100true
101
102## STDOUT:
103-1
104-1
105-1
106-1
107-1
108-1
109-1
110-1
111-1
112-1
113-1
114-1
115-1
116-1
117-1
118[ stdin ]:3: warning: UTF-8 decode: Bad encoding at offset 0 in string of 1 bytes
119[ stdin ]:3: warning: UTF-8 decode: Bad encoding at offset 1 in string of 2 bytes
120[ stdin ]:3: warning: UTF-8 decode: Bad encoding at offset 2 in string of 3 bytes
121[ stdin ]:3: warning: UTF-8 decode: Bad encoding at offset 2 in string of 4 bytes
122[ stdin ]:3: warning: UTF-8 decode: Bad encoding at offset 4 in string of 5 bytes
123[ stdin ]:3: warning: UTF-8 decode: Bad encoding at offset 5 in string of 6 bytes
124[ stdin ]:3: warning: UTF-8 decode: Bad encoding at offset 5 in string of 7 bytes
125[ stdin ]:3: warning: UTF-8 decode: Bad encoding at offset 5 in string of 8 bytes
126[ stdin ]:3: warning: UTF-8 decode: Bad encoding at offset 8 in string of 9 bytes
127[ stdin ]:3: warning: UTF-8 decode: Bad encoding at offset 9 in string of 10 bytes
128[ stdin ]:3: warning: UTF-8 decode: Bad encoding at offset 9 in string of 11 bytes
129[ stdin ]:3: warning: UTF-8 decode: Bad encoding at offset 9 in string of 12 bytes
130[ stdin ]:3: warning: UTF-8 decode: Bad encoding at offset 9 in string of 13 bytes
131[ stdin ]:3: warning: UTF-8 decode: Bad encoding at offset 13 in string of 14 bytes
132[ stdin ]:3: warning: UTF-8 decode: Bad encoding at offset 13 in string of 14 bytes
133## END
134## BUG bash/zsh stderr-json: ""
135## BUG bash/zsh STDOUT:
1361
1372
1383
1394
1404
1415
1426
1437
1446
1457
1468
1479
14810
1498
1508
151## N-I dash stderr-json: ""
152## N-I dash STDOUT:
1537
1548
1559
15610
15711
15812
15913
16014
16115
16216
16317
16418
16519
16620
16720
168## END
169## N-I mksh stderr-json: ""
170## N-I mksh STDOUT:
1711
1722
1733
1744
1755
1766
1777
1788
1799
18010
18111
18212
18313
18414
18514
186## END
187
188#### Length of undefined variable
189echo ${#undef}
190## stdout: 0
191
192#### Length of undefined variable with nounset
193set -o nounset
194echo ${#undef}
195## status: 1
196## OK dash status: 2
197
198#### Length operator can't be followed by test operator
199echo ${#x-default}
200
201x=''
202echo ${#x-default}
203
204x='foo'
205echo ${#x-default}
206
207## status: 2
208## OK bash/mksh status: 1
209## stdout-json: ""
210## BUG zsh status: 0
211## BUG zsh STDOUT:
2127
2130
2143
215## END
216## BUG dash status: 0
217## BUG dash STDOUT:
2180
2190
2203
221## END
222
223#### ${#s} respects LC_ALL - length in bytes or code points
224case $SH in dash) exit ;; esac
225
226# This test case is sorta "infected" because spec-common.sh sets LC_ALL=C.UTF-8
227#
228# For some reason mksh behaves differently
229#
230# See demo/04-unicode.sh
231
232#echo $LC_ALL
233unset LC_ALL
234
235# note: this may depend on the CI machine config
236LANG=en_US.UTF-8
237
238#LC_ALL=en_US.UTF-8
239
240for s in $'\u03bc' $'\U00010000'; do
241 LC_ALL=
242 echo "len=${#s}"
243
244 LC_ALL=C
245 echo "len=${#s}"
246
247 echo
248done
249
250## STDOUT:
251len=1
252len=2
253
254len=1
255len=4
256
257## END
258
259## N-I dash STDOUT:
260## END
261
262## BUG mksh STDOUT:
263len=2
264len=2
265
266len=3
267len=3
268
269## END