| 1 | #include <locale.h>
|
| 2 | #include <regex.h>
|
| 3 | #include <wctype.h> // towupper()
|
| 4 |
|
| 5 | #include "mycpp/common.h"
|
| 6 | #include "vendor/greatest.h"
|
| 7 |
|
| 8 | TEST casefold_demo() {
|
| 9 | #if 0
|
| 10 | // Turkish
|
| 11 | if (setlocale(LC_CTYPE, "tr_TR.utf8") == NULL) {
|
| 12 | log("Couldn't set locale to tr_TR.utf8");
|
| 13 | FAIL();
|
| 14 | }
|
| 15 | #endif
|
| 16 |
|
| 17 | // LC_CTYPE_MASK instead of LC_CTYPE
|
| 18 | locale_t turkish = newlocale(LC_CTYPE_MASK, "tr_TR.utf8", NULL);
|
| 19 |
|
| 20 | int u = toupper('i');
|
| 21 | int wu = towupper('i');
|
| 22 | int wul = towupper_l('i', turkish);
|
| 23 |
|
| 24 | // Regular: upper case i is I, 73
|
| 25 | // Turkish: upper case is 304
|
| 26 | log("upper = %d", u);
|
| 27 | log("wide upper = %d", wu);
|
| 28 | log("wide upper locale = %d", wul);
|
| 29 |
|
| 30 | freelocale(turkish);
|
| 31 |
|
| 32 | PASS();
|
| 33 | }
|
| 34 |
|
| 35 | TEST upper_lower_demo() {
|
| 36 | int x = strcmp("a", "b");
|
| 37 | log("strcmp = %d", x);
|
| 38 | x = strcmp("a", "a");
|
| 39 | log("strcmp = %d", x);
|
| 40 |
|
| 41 | // Functions to test with:
|
| 42 | // - different LANG settings
|
| 43 | // - musl libc vs. GNU libc, etc.
|
| 44 | //
|
| 45 | // glob() and fnmatch()
|
| 46 | // regexec()
|
| 47 | // strcoll()
|
| 48 | // int toupper(), tolower(), toupper_l() can be passed locale
|
| 49 | //
|
| 50 | // See doc/unicode.md
|
| 51 |
|
| 52 | // We could have pyext/libc.c wrappers for this, rather than using Python
|
| 53 | // str.upper(). Maybe remove Str::{upper,lower}() from the Yaks language,
|
| 54 | // since it depends on Unicode.
|
| 55 |
|
| 56 | int c;
|
| 57 | c = toupper((unsigned char)'a');
|
| 58 | log("toupper %c", c);
|
| 59 |
|
| 60 | c = tolower((unsigned char)c);
|
| 61 | log("tolower %c", c);
|
| 62 | }
|
| 63 |
|
| 64 | TEST isspace_demo() {
|
| 65 | int x;
|
| 66 |
|
| 67 | // 0xa0 from
|
| 68 | // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Lexical_grammar#white_space
|
| 69 | //
|
| 70 | // Somehow it's false
|
| 71 | //
|
| 72 | // In Python we have
|
| 73 | // >>> '\u00a0'.isspace()
|
| 74 | // True
|
| 75 |
|
| 76 | // U+00A0 is non-breaking space
|
| 77 | // U+FEFF is zero-width no break space - this is true
|
| 78 | int cases[] = {'\0', '\t', '\v', '\f', ' ', 'a', 0xa0, 0xfeff};
|
| 79 | int n = sizeof(cases) / sizeof(cases[0]);
|
| 80 |
|
| 81 | for (int i = 0; i < n; ++i) {
|
| 82 | int x = isspace(cases[i]);
|
| 83 | log("isspace %x %d", cases[i], x);
|
| 84 | }
|
| 85 | PASS();
|
| 86 | }
|
| 87 |
|
| 88 | GREATEST_MAIN_DEFS();
|
| 89 |
|
| 90 | int main(int argc, char** argv) {
|
| 91 | // gHeap.Init();
|
| 92 |
|
| 93 | GREATEST_MAIN_BEGIN();
|
| 94 |
|
| 95 | RUN_TEST(upper_lower_demo);
|
| 96 | RUN_TEST(isspace_demo);
|
| 97 |
|
| 98 | // Crashes in CI? Because of Turkish locale?
|
| 99 | // RUN_TEST(casefold_test);
|
| 100 |
|
| 101 | // gHeap.CleanProcessExit();
|
| 102 |
|
| 103 | GREATEST_MAIN_END();
|
| 104 | return 0;
|
| 105 | }
|