1 ## oils_failures_allowed: 0
2 ## compare_shells: bash-4.4 zsh
3
4 #
5 # Only bash and zsh seem to implement [[ foo =~ '' ]]
6 #
7 # ^(a b)$ is a regex that should match 'a b' in a group.
8 #
9 # Not sure what bash is doing here... I think I have to just be empirical.
10 # Might need "compat" switch for parsing the regex. It should be an opaque
11 # string like zsh, not sure why it isn't.
12 #
13 # I think this is just papering over bugs...
14 # https://www.gnu.org/software/bash/manual/bash.html#Conditional-Constructs
15 #
16 # Storing the regular expression in a shell variable is often a useful way to
17 # avoid problems with quoting characters that are special to the shell. It is
18 # sometimes difficult to specify a regular expression literally without using
19 # quotes, or to keep track of the quoting used by regular expressions while
20 # paying attention to the shell’s quote removal. Using a shell variable to
21 # store the pattern decreases these problems. For example, the following is
22 # equivalent to the above:
23 #
24 # pattern='[[:space:]]*(a)?b'
25 # [[ $line =~ $pattern ]]
26 #
27 # If you want to match a character that’s special to the regular expression
28 # grammar, it has to be quoted to remove its special meaning. This means that in
29 # the pattern ‘xxx.txt’, the ‘.’ matches any character in the string (its usual
30 # regular expression meaning), but in the pattern ‘"xxx.txt"’ it can only match a
31 # literal ‘.’. Shell programmers should take special care with backslashes, since
32 # backslashes are used both by the shell and regular expressions to remove the
33 # special meaning from the following character. The following two sets of
34 # commands are not equivalent:
35 #
36 # From bash code: ( | ) are treated special. Normally they must be quoted, but
37 # they can be UNQUOTED in BASH_REGEX state. In fact they can't be quoted!
38
39 #### BASH_REMATCH
40 [[ foo123 =~ ([a-z]+)([0-9]+) ]]
41 echo status=$?
42 argv.py "${BASH_REMATCH[@]}"
43
44 [[ failed =~ ([a-z]+)([0-9]+) ]]
45 echo status=$?
46 argv.py "${BASH_REMATCH[@]}" # not cleared!
47
48 ## STDOUT:
49 status=0
50 ['foo123', 'foo', '123']
51 status=1
52 []
53 ## END
54 ## N-I zsh STDOUT:
55 status=0
56 ['']
57 status=1
58 ['']
59 ## END
60
61 #### Match is unanchored at both ends
62 [[ 'bar' =~ a ]] && echo true
63 ## stdout: true
64
65 #### Failed match
66 [[ 'bar' =~ X ]] && echo true
67 ## status: 1
68 ## stdout-json: ""
69
70 #### Regex quoted with \ -- preferred in bash
71 [[ 'a b' =~ ^(a\ b)$ ]] && echo true
72 ## stdout: true
73
74 #### Regex quoted with single quotes
75 # bash doesn't like the quotes
76 [[ 'a b' =~ '^(a b)$' ]] && echo true
77 ## stdout-json: ""
78 ## status: 1
79 ## OK zsh stdout: true
80 ## OK zsh status: 0
81
82 #### Regex quoted with double quotes
83 # bash doesn't like the quotes
84 [[ 'a b' =~ "^(a b)$" ]] && echo true
85 ## stdout-json: ""
86 ## status: 1
87 ## OK zsh stdout: true
88 ## OK zsh status: 0
89
90 #### Fix single quotes by storing in variable
91 pat='^(a b)$'
92 [[ 'a b' =~ $pat ]] && echo true
93 ## stdout: true
94
95 #### Fix single quotes by storing in variable
96 pat="^(a b)$"
97 [[ 'a b' =~ $pat ]] && echo true
98 ## stdout: true
99
100 #### Double quoting pat variable -- again bash doesn't like it.
101 pat="^(a b)$"
102 [[ 'a b' =~ "$pat" ]] && echo true
103 ## stdout-json: ""
104 ## status: 1
105 ## OK zsh stdout: true
106 ## OK zsh status: 0
107
108 #### Mixing quoted and unquoted parts
109 [[ 'a b' =~ 'a 'b ]] && echo true
110 [[ "a b" =~ "a "'b' ]] && echo true
111 ## STDOUT:
112 true
113 true
114 ## END
115
116 #### Regex with == and not =~ is parse error, different lexer mode required
117 # They both give a syntax error. This is lame.
118 [[ '^(a b)$' == ^(a\ b)$ ]] && echo true
119 ## status: 2
120 ## OK zsh status: 1
121
122 #### Omitting ( )
123 [[ '^a b$' == ^a\ b$ ]] && echo true
124 ## stdout: true
125
126 #### Malformed regex
127 # Are they trying to PARSE the regex? Do they feed the buffer directly to
128 # regcomp()?
129 [[ 'a b' =~ ^)a\ b($ ]] && echo true
130 ## stdout-json: ""
131 ## status: 2
132 ## OK zsh status: 1
133
134 #### Regex with |
135 [[ 'bar' =~ foo|bar ]] && echo true
136 ## stdout: true
137 ## N-I zsh stdout-json: ""
138 ## N-I zsh status: 1
139
140 #### Regex to match literal brackets []
141
142 # bash-completion relies on this, so we're making it match bash.
143 # zsh understandably differs.
144 [[ '[]' =~ \[\] ]] && echo true
145
146 # Another way to write this.
147 pat='\[\]'
148 [[ '[]' =~ $pat ]] && echo true
149 ## STDOUT:
150 true
151 true
152 ## END
153 ## OK zsh STDOUT:
154 true
155 ## END
156
157 #### Regex to match literals . ^ $ etc.
158 [[ 'x' =~ \. ]] || echo false
159 [[ '.' =~ \. ]] && echo true
160
161 [[ 'xx' =~ \^\$ ]] || echo false
162 [[ '^$' =~ \^\$ ]] && echo true
163
164 [[ 'xxx' =~ \+\*\? ]] || echo false
165 [[ '*+?' =~ \*\+\? ]] && echo true
166
167 [[ 'xx' =~ \{\} ]] || echo false
168 [[ '{}' =~ \{\} ]] && echo true
169 ## STDOUT:
170 false
171 true
172 false
173 true
174 false
175 true
176 false
177 true
178 ## END
179 ## BUG zsh STDOUT:
180 true
181 false
182 false
183 false
184 ## END
185 ## BUG zsh status: 1
186
187 #### Unquoted { is a regex parse error
188 [[ { =~ { ]] && echo true
189 echo status=$?
190 ## stdout-json: ""
191 ## status: 2
192 ## BUG bash STDOUT:
193 status=2
194 ## END
195 ## BUG bash status: 0
196 ## BUG zsh STDOUT:
197 status=1
198 ## END
199 ## BUG zsh status: 0
200
201 #### Fatal error inside [[ =~ ]]
202
203 # zsh and osh are stricter than bash. bash treats [[ like a command.
204
205 [[ a =~ $(( 1 / 0 )) ]]
206 echo status=$?
207 ## stdout-json: ""
208 ## status: 1
209 ## BUG bash stdout: status=1
210 ## BUG bash status: 0
211
212 #### Quoted { and +
213 [[ { =~ "{" ]] && echo 'yes {'
214 [[ + =~ "+" ]] && echo 'yes +'
215 [[ * =~ "*" ]] && echo 'yes *'
216 [[ ? =~ "?" ]] && echo 'yes ?'
217 [[ ^ =~ "^" ]] && echo 'yes ^'
218 [[ $ =~ "$" ]] && echo 'yes $'
219 [[ '(' =~ '(' ]] && echo 'yes ('
220 [[ ')' =~ ')' ]] && echo 'yes )'
221 [[ '|' =~ '|' ]] && echo 'yes |'
222 [[ '\' =~ '\' ]] && echo 'yes \'
223 echo ---
224
225 [[ . =~ "." ]] && echo 'yes .'
226 [[ z =~ "." ]] || echo 'no .'
227 echo ---
228
229 # This rule is weird but all shells agree. I would expect that the - gets
230 # escaped? It's an operator? but it behaves like a-z.
231 [[ a =~ ["a-z"] ]]; echo "a $?"
232 [[ - =~ ["a-z"] ]]; echo "- $?"
233 [[ b =~ ['a-z'] ]]; echo "b $?"
234 [[ z =~ ['a-z'] ]]; echo "z $?"
235
236 echo status=$?
237 ## STDOUT:
238 yes {
239 yes +
240 yes *
241 yes ?
242 yes ^
243 yes $
244 yes (
245 yes )
246 yes |
247 yes \
248 ---
249 yes .
250 no .
251 ---
252 a 0
253 - 1
254 b 0
255 z 0
256 status=0
257 ## END
258 ## N-I zsh STDOUT:
259 yes ^
260 yes $
261 yes )
262 yes |
263 ---
264 yes .
265 ---
266 a 0
267 - 1
268 b 0
269 z 0
270 status=0
271 ## END
272
273 #### Escaped {
274 # from bash-completion
275 [[ '$PA' =~ ^(\$\{?)([A-Za-z0-9_]*)$ ]] && argv.py "${BASH_REMATCH[@]}"
276 ## STDOUT:
277 ['$PA', '$', 'PA']
278 ## END
279 ## BUG zsh stdout-json: ""
280 ## BUG zsh status: 1
281
282 #### Escaped { stored in variable first
283 # from bash-completion
284 pat='^(\$\{?)([A-Za-z0-9_]*)$'
285 [[ '$PA' =~ $pat ]] && argv.py "${BASH_REMATCH[@]}"
286 ## STDOUT:
287 ['$PA', '$', 'PA']
288 ## END
289 ## BUG zsh STDOUT:
290 ['']
291 ## END
292
293 #### regex with ?
294 [[ 'c' =~ c? ]] && echo true
295 [[ '' =~ c? ]] && echo true
296 ## STDOUT:
297 true
298 true
299 ## END
300
301 #### regex with unprintable characters
302 # can't have nul byte
303
304 # This pattern has literal characters
305 pat=$'^[\x01\x02]+$'
306
307 [[ $'\x01\x02\x01' =~ $pat ]]; echo status=$?
308 [[ $'a\x01' =~ $pat ]]; echo status=$?
309
310 # NOTE: There doesn't appear to be any way to escape these!
311 pat2='^[\x01\x02]+$'
312
313 ## STDOUT:
314 status=0
315 status=1
316 ## END
317
318 #### pattern $f(x) -- regression
319 f=fff
320 [[ fffx =~ $f(x) ]]
321 echo status=$?
322 [[ ffx =~ $f(x) ]]
323 echo status=$?
324 ## STDOUT:
325 status=0
326 status=1
327 ## END
328
329 #### pattern a=(1)
330 [[ a=x =~ a=(x) ]]
331 echo status=$?
332 [[ =x =~ a=(x) ]]
333 echo status=$?
334 ## STDOUT:
335 status=0
336 status=1
337 ## END
338 ## BUG zsh status: 1
339 ## BUG zsh STDOUT:
340 status=0
341 ## END
342
343 #### pattern @f(x)
344 shopt -s parse_at
345 [[ @fx =~ @f(x) ]]
346 echo status=$?
347 [[ fx =~ @f(x) ]]
348 echo status=$?
349 ## STDOUT:
350 status=0
351 status=1
352 ## END
353
354
355 #### Bug: Nix idiom with closing ) next to pattern
356
357 if [[ ! (" ${params[*]} " =~ " -shared " || " ${params[*]} " =~ " -static ") ]]; then
358 echo one
359 fi
360
361 # Reduced idiom
362 if [[ (foo =~ foo) ]]; then
363 echo two
364 fi
365
366 ## STDOUT:
367 one
368 two
369 ## END
370
371 #### unquoted (a b) as pattern, (a b|c)
372
373 if [[ 'a b' =~ (a b) ]]; then
374 echo one
375 fi
376
377 if [[ 'a b' =~ (a b) ]]; then
378 echo BAD
379 fi
380
381 if [[ 'a b' =~ (a b|c) ]]; then
382 echo two
383 fi
384
385 # I think spaces are only allowed within ()
386
387 if [[ ' c' =~ (a| c) ]]; then
388 echo three
389 fi
390
391 ## STDOUT:
392 one
393 two
394 three
395 ## END
396
397 #### Multiple adjacent () groups
398
399 if [[ 'a-b-c-d' =~ a-(b| >>)-c-( ;|[de])|ff|gg ]]; then
400 argv.py "${BASH_REMATCH[@]}"
401 fi
402
403 if [[ ff =~ a-(b| >>)-c-( ;|[de])|ff|gg ]]; then
404 argv.py "${BASH_REMATCH[@]}"
405 fi
406
407 # empty group ()
408
409 if [[ zz =~ ([a-z]+)() ]]; then
410 argv.py "${BASH_REMATCH[@]}"
411 fi
412
413 # nested empty group
414 if [[ zz =~ ([a-z]+)(()z) ]]; then
415 argv.py "${BASH_REMATCH[@]}"
416 fi
417
418 ## STDOUT:
419 ['a-b-c-d', 'b', 'd']
420 ['ff', '', '']
421 ['zz', 'zz', '']
422 ['zz', 'z', 'z', '']
423 ## END
424
425 ## BUG zsh status: 1
426 ## BUG zsh STDOUT:
427 ['']
428 ['']
429 ['']
430 ['']
431 ## END
432
433 #### unquoted [a b] as pattern, [a b|c]
434
435 $SH <<'EOF'
436 [[ a =~ [ab] ]] && echo yes
437 EOF
438 echo "[ab]=$?"
439
440 $SH <<'EOF'
441 [[ a =~ [a b] ]] && echo yes
442 EOF
443 echo "[a b]=$?"
444
445 $SH <<'EOF'
446 [[ a =~ ([a b]) ]] && echo yes
447 EOF
448 echo "[a b]=$?"
449
450 ## STDOUT:
451 yes
452 [ab]=0
453 [a b]=2
454 yes
455 [a b]=0
456 ## END
457
458 ## OK zsh STDOUT:
459 yes
460 [ab]=0
461 [a b]=1
462 yes
463 [a b]=0
464 ## END
465
466 #### c|a unquoted
467
468 if [[ a =~ c|a ]]; then
469 echo one
470 fi
471
472 ## STDOUT:
473 one
474 ## END
475 ## N-I zsh status: 1
476
477 #### Operator chars ; & but not |
478
479 # Hm semicolon is still an operator in bash
480 $SH <<'EOF'
481 [[ ';' =~ ; ]] && echo semi
482 EOF
483 echo semi=$?
484
485 $SH <<'EOF'
486 [[ ';' =~ (;) ]] && echo semi paren
487 EOF
488 echo semi paren=$?
489
490 echo
491
492 $SH <<'EOF'
493 [[ '&' =~ & ]] && echo amp
494 EOF
495 echo amp=$?
496
497 # Oh I guess this is not a bug? regcomp doesn't reject this trivial regex?
498 $SH <<'EOF'
499 [[ '|' =~ | ]] && echo pipe1
500 [[ 'a' =~ | ]] && echo pipe2
501 EOF
502 echo pipe=$?
503
504 $SH <<'EOF'
505 [[ '|' =~ a| ]] && echo four
506 EOF
507 echo pipe=$?
508
509 # This is probably special because > operator is inside foo [[ a > b ]]
510 $SH <<'EOF'
511 [[ '<>' =~ <> ]] && echo angle
512 EOF
513 echo angle=$?
514
515 # Bug: OSH allowed this!
516 $SH <<'EOF'
517 [[ $'a\nb' =~ a
518 b ]] && echo newline
519 EOF
520 echo newline=$?
521
522 ## STDOUT:
523 semi=2
524 semi paren
525 semi paren=0
526
527 amp=2
528 pipe1
529 pipe2
530 pipe=0
531 four
532 pipe=0
533 angle=2
534 newline=2
535 ## END
536
537 ## BUG zsh STDOUT:
538 semi=1
539 semi paren=1
540
541 amp=1
542 pipe=1
543 pipe=1
544 angle=1
545 newline=1
546 ## END
547
548
549
550 #### Quotes '' "" $'' $"" in pattern
551
552 $SH <<'EOF'
553 [[ '|' =~ '|' ]] && echo sq
554 EOF
555 echo sq=$?
556
557 $SH <<'EOF'
558 [[ '|' =~ "|" ]] && echo dq
559 EOF
560 echo dq=$?
561
562 $SH <<'EOF'
563 [[ '|' =~ $'|' ]] && echo dollar-sq
564 EOF
565 echo dollar-sq=$?
566
567 $SH <<'EOF'
568 [[ '|' =~ $"|" ]] && echo dollar-dq
569 EOF
570 echo dollar-dq=$?
571
572 ## STDOUT:
573 sq
574 sq=0
575 dq
576 dq=0
577 dollar-sq
578 dollar-sq=0
579 dollar-dq
580 dollar-dq=0
581 ## END
582
583
584 #### Unicode in pattern
585
586 $SH <<'EOF'
587 [[ μ =~ μ ]] && echo mu
588 EOF
589 echo mu=$?
590
591 ## STDOUT:
592 mu
593 mu=0
594 ## END
595
596 #### Parse error with 2 words
597
598 if [[ a =~ c a ]]; then
599 echo one
600 fi
601
602 ## status: 2
603 ## STDOUT:
604 ## END
605
606 ## BUG zsh status: 1
607 ## BUG zsh STDOUT:
608 one
609 ## END
610
611 #### make a lisp example
612
613 str='(hi)'
614 [[ "${str}" =~ ^^([][{}\(\)^@])|^(~@)|(\"(\\.|[^\\\"])*\")|^(;[^$'\n']*)|^([~\'\`])|^([^][ ~\`\'\";{}\(\)^@\,]+)|^[,]|^[[:space:]]+ ]]
615 echo status=$?
616
617 m=${BASH_REMATCH[0]}
618 echo m=$m
619
620 ## STDOUT:
621 status=0
622 m=(
623 ## END
624
625 ## BUG zsh STDOUT:
626 status=1
627 m=
628 ## END
629
630 #### Operators and space lose meaning inside ()
631 [[ '< >' =~ (< >) ]] && echo true
632 ## stdout: true
633 ## N-I zsh stdout-json: ""
634 ## N-I zsh status: 1
635