OILS / benchmarks / compute / word_freq.sh View on Github | oils.pub

47 lines, 22 significant
1#!/usr/bin/env bash
2
3set -o noglob # for unquoted $text splitting
4
5tokenize() {
6 # read it once
7 read -r -d '' text
8
9 for word in $text; do # relies on word splitting
10 echo "$word"
11 done
12}
13
14main() {
15 iters=${1:-100}
16
17 # read it once
18 read -r -d '' text
19
20 declare -A words
21
22 # do it a bunch of times
23 for (( i = 0; i < iters; ++i )); do
24
25 # Relies on unquoted IFS splitting. Difference with Python: Python will
26 # give you \, but IFS splitting won't.
27 for word in $text; do
28
29 # Hm this isn't correct in bash!
30 old=${words["$word"]}
31 words["$word"]=$((old + 1))
32
33 # BUG in bash, see spec/assoc case #37
34 #(( words["$word"] += 1 ))
35 #(( words[\$word] += 1 ))
36 done
37 done
38
39 # note: we can sort the output in the benchmark and assert that it's the same?
40
41 for word in "${!words[@]}"; do
42 echo "${words["$word"]} $word"
43 done
44}
45
46main "$@"
47#tokenize "$@"