| 1 | #!/usr/bin/env bash
|
| 2 | #
|
| 3 | # Survey string APIs
|
| 4 | #
|
| 5 | # Usage:
|
| 6 | # demo/survey-str-api.sh <function name>
|
| 7 |
|
| 8 | set -o nounset
|
| 9 | set -o pipefail
|
| 10 | set -o errexit
|
| 11 |
|
| 12 | source build/dev-shell.sh # python3 in $PATH
|
| 13 |
|
| 14 | # Python and JS string and regex replacement APIs
|
| 15 |
|
| 16 | string-replace() {
|
| 17 | echo 'STRING PYTHON'
|
| 18 | echo
|
| 19 |
|
| 20 | # This is a float
|
| 21 | python3 -c 'print("oils-for-unix".replace("i", "++"))'
|
| 22 |
|
| 23 | # replace none
|
| 24 | echo 'count=0'
|
| 25 | python3 -c 'print("oils-for-unix".replace("i", "++", 0))'
|
| 26 | echo
|
| 27 |
|
| 28 | # replace all
|
| 29 | echo 'count=-1'
|
| 30 | python3 -c 'print("oils-for-unix".replace("i", "++", -1))'
|
| 31 | echo
|
| 32 |
|
| 33 | # Very weird empty string behavior -- it finds one between every char
|
| 34 | python3 -c 'print("oils-for-unix".replace("", "++"))'
|
| 35 | python3 -c 'print("oils-for-unix".replace("", "++", 1))'
|
| 36 | echo
|
| 37 |
|
| 38 | echo 'STRING JS'
|
| 39 | echo
|
| 40 | # Only replaces first occurrence!
|
| 41 | # https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/replace
|
| 42 | nodejs -e 'console.log("oils-for-unix".replace("i", "++"))'
|
| 43 |
|
| 44 | nodejs -e 'console.log("oils-for-unix".replace("", "++"))'
|
| 45 | }
|
| 46 |
|
| 47 | regex-replace() {
|
| 48 | echo 'REGEX PYTHON'
|
| 49 | echo
|
| 50 |
|
| 51 | python3 -c 'import re; p = re.compile("[i]"); print(p.sub("++", "oils-for-unix"))'
|
| 52 |
|
| 53 | echo 'count=0 INCONSISTENT, replaces all'
|
| 54 | python3 -c 'import re; p = re.compile("[i]"); print(p.sub("++", "oils-for-unix", count=0))'
|
| 55 | echo
|
| 56 |
|
| 57 | echo 'count=-1'
|
| 58 | python3 -c 'import re; p = re.compile("[i]"); print(p.sub("++", "oils-for-unix", count=-1))'
|
| 59 | echo
|
| 60 |
|
| 61 | # empty string?
|
| 62 | # It's consistent, it finds empty string between every char
|
| 63 | python3 -c 'import re; p = re.compile(""); print(p.sub("++", "oils-for-unix"))'
|
| 64 | echo
|
| 65 |
|
| 66 | # supports equivalent of $0 and $1 ?
|
| 67 | python3 -c 'import re; p = re.compile("[i](.)"); print(p.sub("[\g<0>]", "oils-for-unix"))'
|
| 68 | python3 -c 'import re; p = re.compile("[i](.)"); print(p.sub("[\g<1>]", "oils-for-unix"))'
|
| 69 | echo
|
| 70 |
|
| 71 | # ^ means that only one replacement occurs
|
| 72 | python3 -c 'import re; p = re.compile(r"(\d+)"); print(p.sub("[\g<1>]", "9-16-25\n100-200"))'
|
| 73 | echo
|
| 74 | python3 -c 'import re; p = re.compile(r"^(\d+)"); print(p.sub("[\g<1>]", "9-16-25\n100-200"))'
|
| 75 | echo
|
| 76 | # one replacement per line with re.MULTILINE!
|
| 77 | python3 -c 'import re; p = re.compile(r"^(\d+)", re.MULTILINE); print(p.sub("[\g<1>]", "9-16-25\n100-200"))'
|
| 78 | echo
|
| 79 |
|
| 80 | echo 'REGEX JS'
|
| 81 | echo
|
| 82 |
|
| 83 | # Replaces first one
|
| 84 | nodejs -e 'console.log("oils-for-unix".replace(/[i]/, "++"))'
|
| 85 |
|
| 86 | # Replaces all
|
| 87 | # no count param?
|
| 88 | nodejs -e 'console.log("oils-for-unix".replace(/[i]/g, "++"))'
|
| 89 |
|
| 90 | # Empty regex
|
| 91 | nodejs -e 'console.log("oils-for-unix".replace(new RegExp(""), "++"))'
|
| 92 |
|
| 93 | # Hm this is inconsistent -- empty string gets replaced everywhere
|
| 94 | nodejs -e 'console.log("oils-for-unix".replace(new RegExp("", "g"), "++"))'
|
| 95 |
|
| 96 | # Hm JavaScript does not support $0 for the whole match -- it has $& instead
|
| 97 | # https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/replace
|
| 98 | nodejs -e 'console.log("oils-for-unix".replace(new RegExp("[i](.)", "g"), "[$0]"))'
|
| 99 | nodejs -e 'console.log("oils-for-unix".replace(new RegExp("[i](.)", "g"), "[$&]"))'
|
| 100 | nodejs -e 'console.log("oils-for-unix".replace(new RegExp("[i](.)", "g"), "[$1]"))'
|
| 101 | echo
|
| 102 |
|
| 103 | # ^ means that only one replacement occurs
|
| 104 | nodejs -e 'console.log("9-16-25\n100-200".replace(new RegExp("(\\d+)", "g"), "[$&]"))'
|
| 105 | echo
|
| 106 | nodejs -e 'console.log("9-16-25\n100-200".replace(new RegExp("^(\\d+)", "g"), "[$1]"))'
|
| 107 | echo
|
| 108 | # m flag is like re.MULTILINE
|
| 109 | nodejs -e 'console.log("9-16-25\n100-200".replace(new RegExp("^(\\d+)", "gm"), "[$1]"))'
|
| 110 | echo
|
| 111 | }
|
| 112 |
|
| 113 | survey-trim() {
|
| 114 | echo 'PYTHON'
|
| 115 | echo
|
| 116 |
|
| 117 | # TODO: Test other unicode chars
|
| 118 | local str=' hi '
|
| 119 |
|
| 120 | python3 -c 'import sys; s = sys.argv[1]; print("[%s] [%s]" % (s, s.strip()))' "$str"
|
| 121 |
|
| 122 | nodejs -e 'var s = process.argv[1]; var t = s.trim(); console.log(`[${s}] [${t}]`);' "$str"
|
| 123 | }
|
| 124 |
|
| 125 | survey-split() {
|
| 126 | echo '============== PYTHON'
|
| 127 | echo
|
| 128 |
|
| 129 | python3 << EOF
|
| 130 | print('a,b,c'.split(','))
|
| 131 | print('aa'.split('a'))
|
| 132 | print('a<>b<>c<d'.split('<>'))
|
| 133 | print('a;b;;c'.split(';'))
|
| 134 | print(''.split('foo'))
|
| 135 |
|
| 136 | import re
|
| 137 |
|
| 138 | print(re.split(',|;', 'a,b;c'))
|
| 139 | print(re.split('.*', 'aa'))
|
| 140 | print(re.split('.', 'aa'))
|
| 141 | print(re.split('<>|@@', 'a<>b@@c<d'))
|
| 142 | print(re.split('\\s*', 'a b cd'))
|
| 143 | print(re.split('\\s+', 'a b cd'))
|
| 144 | print(re.split('.', ''))
|
| 145 | EOF
|
| 146 |
|
| 147 | echo
|
| 148 | echo '============== NODE'
|
| 149 | echo
|
| 150 |
|
| 151 | node << EOF
|
| 152 | console.log('a,b,c'.split(','))
|
| 153 | console.log('aa'.split('a'))
|
| 154 | console.log('a<>b<>c<d'.split('<>'))
|
| 155 | console.log('a;b;;c'.split(';'))
|
| 156 | console.log(''.split('foo'))
|
| 157 |
|
| 158 | console.log('a,b;c'.split(/,|;/))
|
| 159 | console.log('aa'.split(/.*/))
|
| 160 | console.log('aa'.split(/./))
|
| 161 | console.log('a<>b@@c<d'.split(/<>|@@/))
|
| 162 | console.log('a b cd'.split(/\s*/))
|
| 163 | console.log('a b cd'.split(/\s+/))
|
| 164 | console.log(''.split(/./))
|
| 165 | EOF
|
| 166 |
|
| 167 | echo
|
| 168 | echo '============== YSH'
|
| 169 | echo
|
| 170 |
|
| 171 | bin/ysh << EOF
|
| 172 | pp test_ ('a,b,c'.split(','))
|
| 173 | pp test_ ('aa'.split('a'))
|
| 174 | pp test_ ('a<>b<>c<d'.split('<>'))
|
| 175 | pp test_ ('a;b;;c'.split(';'))
|
| 176 | pp test_ (''.split('foo'))
|
| 177 |
|
| 178 | pp test_ ('a,b;c'.split(/ ',' | ';' /))
|
| 179 | pp test_ ('aa'.split(/ dot* /))
|
| 180 | pp test_ ('aa'.split(/ dot /))
|
| 181 | pp test_ ('a<>b@@c<d'.split(/ '<>' | '@@' /))
|
| 182 | pp test_ ('a b cd'.split(/ space* /))
|
| 183 | pp test_ ('a b cd'.split(/ space+ /))
|
| 184 | pp test_ (''.split(/ dot /))
|
| 185 | EOF
|
| 186 | }
|
| 187 |
|
| 188 | regex-match-vs-search() {
|
| 189 | echo 'REGEX PYTHON'
|
| 190 | echo
|
| 191 |
|
| 192 | # This is a float
|
| 193 | python3 -c '
|
| 194 | import re
|
| 195 |
|
| 196 | vowels = re.compile("[aeiou]")
|
| 197 | print(vowels.match("hi"))
|
| 198 | print(vowels.search("hi"))
|
| 199 |
|
| 200 | vowelsLeft = re.compile("^[aeiou]")
|
| 201 |
|
| 202 | print(vowelsLeft.match("hi", pos=1))
|
| 203 | print(vowelsLeft.search("hi", pos=1))
|
| 204 | '
|
| 205 |
|
| 206 | echo
|
| 207 | echo 'REGEX YSH'
|
| 208 | echo
|
| 209 |
|
| 210 | bin/ysh -c '
|
| 211 | var vowels = / [a e i o u] /
|
| 212 | echo $vowels
|
| 213 |
|
| 214 | = "hi".leftMatch(vowels)
|
| 215 | = "hi".search(vowels)
|
| 216 |
|
| 217 | var vowelsLeft = / %start [a e i o u] /
|
| 218 | = "hi".leftMatch(vowelsLeft, pos=1)
|
| 219 | = "hi".search(vowelsLeft, pos=1)
|
| 220 | '
|
| 221 |
|
| 222 | # does JS have match vs. search? I think it might use ^
|
| 223 | }
|
| 224 |
|
| 225 | string-contains() {
|
| 226 | echo 'STRING CONTAINS PYTHON'
|
| 227 |
|
| 228 | # Python sugar for 'contains' ('in' calls generic __contains__)
|
| 229 | echo 'contains'
|
| 230 | python3 -c 'print("ils" in "oils-for-unix")'
|
| 231 | echo
|
| 232 |
|
| 233 | echo 'contains empty string'
|
| 234 | python3 -c 'print("" in "oils-for-unix")'
|
| 235 | echo
|
| 236 |
|
| 237 | echo 'does not contain'
|
| 238 | python3 -c 'print("xoxo" not in "oils-for-unix")'
|
| 239 | echo
|
| 240 |
|
| 241 | echo 'STRING CONTAINS JS'
|
| 242 | nodejs -e 'console.log("oils-for-unix".includes("ils"))'
|
| 243 | echo
|
| 244 |
|
| 245 | nodejs -e 'console.log("oils-for-unix".includes("ils", 2))'
|
| 246 | echo
|
| 247 |
|
| 248 | nodejs -e 'console.log("oils-for-unix".includes(""))'
|
| 249 | nodejs -e 'console.log("oils-for-unix".includes("", 100))'
|
| 250 | }
|
| 251 |
|
| 252 | string-find() {
|
| 253 | echo 'STRING FIND PYTHON'
|
| 254 | echo
|
| 255 |
|
| 256 | # Returns int of the index of the substring
|
| 257 | python3 -c 'print("oils-for-unix".find("i"))'
|
| 258 | echo
|
| 259 |
|
| 260 | echo 'start=2'
|
| 261 | python3 -c 'print("oils-for-unix".find("i", 2))'
|
| 262 | echo
|
| 263 |
|
| 264 | echo 'substring does not occur'
|
| 265 | python3 -c 'print("oils-for-unix".find("y"))'
|
| 266 | echo
|
| 267 |
|
| 268 | echo 'start=-1'
|
| 269 | python3 -c 'print("oils-for-unix".find("x", -1))'
|
| 270 | echo
|
| 271 |
|
| 272 | # also works for longer substrings
|
| 273 | echo 'longer substrings'
|
| 274 | python3 -c 'print("oils-for-unix".find("r-"))'
|
| 275 | echo
|
| 276 |
|
| 277 | # empty string is always immediately found
|
| 278 | echo 'empty string'
|
| 279 | python3 -c 'print("oils-for-unix".find(""))'
|
| 280 | echo 'empty string with start past the end of the string'
|
| 281 | python3 -c 'print("oils-for-unix".find("", 15))'
|
| 282 | echo
|
| 283 |
|
| 284 | # start and end are interpreted as in the slice notation [x:y]
|
| 285 | echo 'start=5,end=8'
|
| 286 | python3 -c 'print("oils-for-unix".find("-", 5, 8))'
|
| 287 | echo
|
| 288 |
|
| 289 | echo 'start=5,end=9'
|
| 290 | python3 -c 'print("oils-for-unix".find("-", 5, 9))'
|
| 291 | echo
|
| 292 |
|
| 293 | echo 'start=5,end=100'
|
| 294 | python3 -c 'print("oils-for-unix".find("-", 5, 100))'
|
| 295 | echo
|
| 296 |
|
| 297 | echo 'start=100'
|
| 298 | python3 -c 'print("oils-for-unix".find("-", 100))'
|
| 299 | python3 -c 'print("oils-for-unix".find("-", 100, 9))'
|
| 300 | echo
|
| 301 |
|
| 302 | echo 'STRING FIND JS'
|
| 303 | echo
|
| 304 |
|
| 305 | nodejs -e 'console.log("oils-for-unix".indexOf("i"))'
|
| 306 | echo
|
| 307 |
|
| 308 | echo 'start=2'
|
| 309 | nodejs -e 'console.log("oils-for-unix".indexOf("i", 2))'
|
| 310 | echo
|
| 311 |
|
| 312 | echo 'substring does not occur'
|
| 313 | nodejs -e 'console.log("oils-for-unix".indexOf("y", 2))'
|
| 314 | echo
|
| 315 |
|
| 316 | # Behaves as if start=0!
|
| 317 | echo 'start=-1'
|
| 318 | nodejs -e 'console.log("oils-for-unix".indexOf("i", -1))'
|
| 319 | echo
|
| 320 |
|
| 321 | nodejs -e 'console.log("oils-for-unix".indexOf("r-"))'
|
| 322 | echo
|
| 323 |
|
| 324 | # empty string is always immediately found
|
| 325 | echo 'empty string'
|
| 326 | nodejs -e 'console.log("oils-for-unix".indexOf(""))'
|
| 327 | echo
|
| 328 |
|
| 329 | # returns the length of the string!
|
| 330 | echo 'empty string with start past the string'
|
| 331 | nodejs -e 'console.log("oils-for-unix".indexOf("", 100))'
|
| 332 | echo
|
| 333 | }
|
| 334 |
|
| 335 | string-last-find() {
|
| 336 | echo 'STRING LAST FIND PYTHON'
|
| 337 | echo
|
| 338 |
|
| 339 | # Returns int of the index of the substring
|
| 340 | python3 -c 'print("oils-for-unix".rfind("i"))'
|
| 341 | echo
|
| 342 |
|
| 343 | echo 'start=2, end=-2'
|
| 344 | python3 -c 'print("oils-for-unix".rfind("i", 1, -2))'
|
| 345 | echo
|
| 346 |
|
| 347 | echo 'substring does not occur'
|
| 348 | python3 -c 'print("oils-for-unix".rfind("y"))'
|
| 349 | echo
|
| 350 |
|
| 351 | echo 'start=-2'
|
| 352 | python3 -c 'print("oils-for-unix".rfind("x", -2))'
|
| 353 | echo
|
| 354 |
|
| 355 | # also works for longer substrings
|
| 356 | echo 'longer substrings'
|
| 357 | python3 -c 'print("oils-for-unix".rfind("r-"))'
|
| 358 | echo
|
| 359 |
|
| 360 | # empty string is always immediately found
|
| 361 | echo 'empty string'
|
| 362 | python3 -c 'print("oils-for-unix".rfind(""))'
|
| 363 | echo 'empty string with start past the string'
|
| 364 | python3 -c 'print("oils-for-unix".rfind("", 15))'
|
| 365 | echo
|
| 366 |
|
| 367 | # start and end are interpreted as in the slice notation [x:y]
|
| 368 | echo 'start=4,end=8'
|
| 369 | python3 -c 'print("oils-for-unix".rfind("-", 4, 8))'
|
| 370 | echo
|
| 371 |
|
| 372 | echo 'start=4,end=9'
|
| 373 | python3 -c 'print("oils-for-unix".rfind("-", 4, 9))'
|
| 374 | echo
|
| 375 |
|
| 376 | echo 'start=100'
|
| 377 | python3 -c 'print("oils-for-unix".rfind("-", 100))'
|
| 378 | python3 -c 'print("oils-for-unix".rfind("-", 100, 9))'
|
| 379 | echo
|
| 380 |
|
| 381 | echo 'STRING FIND JS'
|
| 382 | echo
|
| 383 |
|
| 384 | nodejs -e 'console.log("oils-for-unix".lastIndexOf("i"))'
|
| 385 | echo
|
| 386 |
|
| 387 | # the index taken is that of the end position, not start!
|
| 388 | echo 'end=2'
|
| 389 | nodejs -e 'console.log("oils-for-unix".lastIndexOf("i", 2))'
|
| 390 | echo
|
| 391 |
|
| 392 | # end is inclusive
|
| 393 | echo 'end=1'
|
| 394 | nodejs -e 'console.log("oils-for-unix".lastIndexOf("i", 1))'
|
| 395 | echo
|
| 396 |
|
| 397 | echo 'substring does not occur'
|
| 398 | nodejs -e 'console.log("oils-for-unix".lastIndexOf("y"))'
|
| 399 | echo
|
| 400 |
|
| 401 | # Behaves as if end=0!
|
| 402 | echo 'end=-1'
|
| 403 | nodejs -e 'console.log("oils-for-unix".lastIndexOf("i", -1))'
|
| 404 | echo
|
| 405 |
|
| 406 | nodejs -e 'console.log("oils-for-unix".lastIndexOf("o", -1))'
|
| 407 | echo
|
| 408 |
|
| 409 | nodejs -e 'console.log("oils-for-unix".lastIndexOf("r-"))'
|
| 410 | echo
|
| 411 |
|
| 412 | # empty string is always immediately found
|
| 413 | echo 'empty string'
|
| 414 | nodejs -e 'console.log("oils-for-unix".lastIndexOf(""))'
|
| 415 | echo
|
| 416 |
|
| 417 | echo 'empty string with end past the string'
|
| 418 | nodejs -e 'console.log("oils-for-unix".lastIndexOf("", 100))'
|
| 419 | echo
|
| 420 |
|
| 421 | echo 'empty string with end=0'
|
| 422 | nodejs -e 'console.log("oils-for-unix".lastIndexOf("", 0))'
|
| 423 | echo
|
| 424 | }
|
| 425 |
|
| 426 | "$@"
|