OILS / demo / 04-unicode.sh View on Github | oils.pub

107 lines, 41 significant
1#!/usr/bin/env bash
2#
3# Usage:
4# demo/04-unicode.sh <function name>
5#
6# TODO: Test what happens if you read binary data into a $(command sub)
7# - internal NUL
8# - invalid utf-8 sequence
9#
10# It would be nice to move some of this into test/gold? It depends on the
11# locale.
12
13set -o nounset
14set -o pipefail
15set -o errexit
16
17# https://www.gnu.org/software/bash/manual/bash.html#Shell-Parameter-Expansion
18#
19# See doc/unicode.txt.
20
21unicode-char() {
22 python -c 'print u"[\u03bc]".encode("utf-8")'
23}
24
25# http://stackoverflow.com/questions/602912/how-do-you-echo-a-4-digit-unicode-character-in-bash
26echo-char() {
27 #echo -e "\xE2\x98\xA0"
28 echo -e "\xE2\x98\xA0"
29
30 #echo -e "\x03\xbc"
31
32 # Woah bash has this! Interesting. Not documented in "help echo" though.
33 echo -e '\u2620'
34
35 # GNU echo does not have it.
36 /bin/echo -e '\u2620'
37}
38
39raw-char() {
40 # Use vim to put utf-8 in this source file:
41 # 1. i to enter Insert mode
42 # 2. Ctrl-V
43 # 3. u
44 # 4. 03bc -- 4 digits of hex0
45 echo [μ]
46}
47
48quoted-chars() {
49 echo '[μ]'
50 echo "[μ]"
51 echo $'[\u03bc]' # C-escaped string
52
53 # Not implementing this
54 # https://www.gnu.org/software/bash/manual/html_node/Locale-Translation.html
55 echo $"hello"
56}
57
58test-unicode() {
59 locale # displays state
60 echo
61 echo $LANG
62
63 unicode-char
64
65 local u=$(unicode-char)
66 echo $u
67
68 # This changes bash behavior!
69
70 #LANG=C
71 echo ${#u} # three chars
72
73 # OK bash respect utf-8 when doing string slicing. Does it have its own
74 # unicode support, or does it use libc?
75 echo ${u:0} ${u:1} ${u:2}
76
77 local u=$(raw-char)
78 echo ${u:0} ${u:1} ${u:2}
79}
80
81json() {
82 python -c 'print "\"\u03bc\""' | python -c '
83import sys, json
84print json.loads(sys.stdin.read())
85'
86
87 # \0u000 code point seems to be representable
88 python -c 'print "\"[\u0000]\""' | python -c '
89import sys, json
90print repr(json.loads(sys.stdin.read()))
91'
92 # Works in python3 too.
93 python -c 'print "\"[\u0000]\""' | python3 -c '
94import sys, json
95print(repr(json.loads(sys.stdin.read())))
96'
97}
98
99# Right now it's split into (Lit_Other '\xce') and (Lit_Other '\xbc'). This is
100# fine for most purposes, although we could probably simplify this.
101osh-literal() {
102 bin/osh -n -c 'echo [μ]'
103 # This works fine
104 bin/osh -c 'echo [μ]'
105}
106
107"$@"