OILS / cpp / unicode_demo.cc View on Github | oils.pub

105 lines, 45 significant
1#include <locale.h>
2#include <regex.h>
3#include <wctype.h> // towupper()
4
5#include "mycpp/common.h"
6#include "vendor/greatest.h"
7
8TEST casefold_demo() {
9#if 0
10 // Turkish
11 if (setlocale(LC_CTYPE, "tr_TR.utf8") == NULL) {
12 log("Couldn't set locale to tr_TR.utf8");
13 FAIL();
14 }
15#endif
16
17 // LC_CTYPE_MASK instead of LC_CTYPE
18 locale_t turkish = newlocale(LC_CTYPE_MASK, "tr_TR.utf8", NULL);
19
20 int u = toupper('i');
21 int wu = towupper('i');
22 int wul = towupper_l('i', turkish);
23
24 // Regular: upper case i is I, 73
25 // Turkish: upper case is 304
26 log("upper = %d", u);
27 log("wide upper = %d", wu);
28 log("wide upper locale = %d", wul);
29
30 freelocale(turkish);
31
32 PASS();
33}
34
35TEST upper_lower_demo() {
36 int x = strcmp("a", "b");
37 log("strcmp = %d", x);
38 x = strcmp("a", "a");
39 log("strcmp = %d", x);
40
41 // Functions to test with:
42 // - different LANG settings
43 // - musl libc vs. GNU libc, etc.
44 //
45 // glob() and fnmatch()
46 // regexec()
47 // strcoll()
48 // int toupper(), tolower(), toupper_l() can be passed locale
49 //
50 // See doc/unicode.md
51
52 // We could have pyext/libc.c wrappers for this, rather than using Python
53 // str.upper(). Maybe remove Str::{upper,lower}() from the Yaks language,
54 // since it depends on Unicode.
55
56 int c;
57 c = toupper((unsigned char)'a');
58 log("toupper %c", c);
59
60 c = tolower((unsigned char)c);
61 log("tolower %c", c);
62}
63
64TEST isspace_demo() {
65 int x;
66
67 // 0xa0 from
68 // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Lexical_grammar#white_space
69 //
70 // Somehow it's false
71 //
72 // In Python we have
73 // >>> '\u00a0'.isspace()
74 // True
75
76 // U+00A0 is non-breaking space
77 // U+FEFF is zero-width no break space - this is true
78 int cases[] = {'\0', '\t', '\v', '\f', ' ', 'a', 0xa0, 0xfeff};
79 int n = sizeof(cases) / sizeof(cases[0]);
80
81 for (int i = 0; i < n; ++i) {
82 int x = isspace(cases[i]);
83 log("isspace %x %d", cases[i], x);
84 }
85 PASS();
86}
87
88GREATEST_MAIN_DEFS();
89
90int main(int argc, char** argv) {
91 // gHeap.Init();
92
93 GREATEST_MAIN_BEGIN();
94
95 RUN_TEST(upper_lower_demo);
96 RUN_TEST(isspace_demo);
97
98 // Crashes in CI? Because of Turkish locale?
99 // RUN_TEST(casefold_test);
100
101 // gHeap.CleanProcessExit();
102
103 GREATEST_MAIN_END();
104 return 0;
105}