cpp/unicode_demo.cc

OILS / cpp / unicode_demo.cc View on Github | oils.pub

105 lines, 45 significant

1	#include <locale.h>
2	#include <regex.h>
3	#include <wctype.h> // towupper()
4
5	#include "mycpp/common.h"
6	#include "vendor/greatest.h"
7
8	TEST casefold_demo() {
9	#if 0
10	// Turkish
11	if (setlocale(LC_CTYPE, "tr_TR.utf8") == NULL) {
12	log("Couldn't set locale to tr_TR.utf8");
13	FAIL();
14	}
15	#endif
16
17	// LC_CTYPE_MASK instead of LC_CTYPE
18	locale_t turkish = newlocale(LC_CTYPE_MASK, "tr_TR.utf8", NULL);
19
20	int u = toupper('i');
21	int wu = towupper('i');
22	int wul = towupper_l('i', turkish);
23
24	// Regular: upper case i is I, 73
25	// Turkish: upper case is 304
26	log("upper = %d", u);
27	log("wide upper = %d", wu);
28	log("wide upper locale = %d", wul);
29
30	freelocale(turkish);
31
32	PASS();
33	}
34
35	TEST upper_lower_demo() {
36	int x = strcmp("a", "b");
37	log("strcmp = %d", x);
38	x = strcmp("a", "a");
39	log("strcmp = %d", x);
40
41	// Functions to test with:
42	// - different LANG settings
43	// - musl libc vs. GNU libc, etc.
44	//
45	// glob() and fnmatch()
46	// regexec()
47	// strcoll()
48	// int toupper(), tolower(), toupper_l() can be passed locale
49	//
50	// See doc/unicode.md
51
52	// We could have pyext/libc.c wrappers for this, rather than using Python
53	// str.upper(). Maybe remove Str::{upper,lower}() from the Yaks language,
54	// since it depends on Unicode.
55
56	int c;
57	c = toupper((unsigned char)'a');
58	log("toupper %c", c);
59
60	c = tolower((unsigned char)c);
61	log("tolower %c", c);
62	}
63
64	TEST isspace_demo() {
65	int x;
66
67	// 0xa0 from
68	// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Lexical_grammar#white_space
69	//
70	// Somehow it's false
71	//
72	// In Python we have
73	// >>> '\u00a0'.isspace()
74	// True
75
76	// U+00A0 is non-breaking space
77	// U+FEFF is zero-width no break space - this is true
78	int cases[] = {'\0', '\t', '\v', '\f', ' ', 'a', 0xa0, 0xfeff};
79	int n = sizeof(cases) / sizeof(cases[0]);
80
81	for (int i = 0; i < n; ++i) {
82	int x = isspace(cases[i]);
83	log("isspace %x %d", cases[i], x);
84	}
85	PASS();
86	}
87
88	GREATEST_MAIN_DEFS();
89
90	int main(int argc, char** argv) {
91	// gHeap.Init();
92
93	GREATEST_MAIN_BEGIN();
94
95	RUN_TEST(upper_lower_demo);
96	RUN_TEST(isspace_demo);
97
98	// Crashes in CI? Because of Turkish locale?
99	// RUN_TEST(casefold_test);
100
101	// gHeap.CleanProcessExit();
102
103	GREATEST_MAIN_END();
104	return 0;
105	}