OILS / mycpp / gc_builtins.cc View on Github | oils.pub

469 lines, 255 significant
1#include <errno.h> // errno
2#include <float.h> // DBL_MIN, DBL_MAX
3#include <math.h> // INFINITY
4#include <stdio.h> // required for readline/readline.h (man readline)
5
6#include "_build/detected-cpp-config.h"
7#include "mycpp/gc_list.h"
8#include "mycpp/gc_str.h"
9
10// Translation of Python's print().
11void print(BigStr* s) {
12 fputs(s->data_, stdout); // print until first NUL
13 fputc('\n', stdout);
14}
15
16BigStr* str(int i) {
17 BigStr* s = OverAllocatedStr(kIntBufSize);
18 int length = snprintf(s->data(), kIntBufSize, "%d", i);
19 s->MaybeShrink(length);
20 return s;
21}
22
23BigStr* str(double d) {
24 char buf[64]; // overestimate, but we use snprintf() to be safe
25
26 int n = sizeof(buf) - 2; // in case we add '.0'
27
28 // The round tripping test in mycpp/float_test.cc tells us:
29 // %.9g - FLOAT round trip
30 // %.17g - DOUBLE round trip
31 // But this causes problems in practice, e.g. for 3.14, or 1/3
32 // int length = snprintf(buf, n, "%.17g", d);
33
34 // So use 1 less digit, which happens to match Python 3 and node.js (but not
35 // Python 2)
36 // Note: snprintf() %g depends on LC_NUMERIC env var!
37 int length = snprintf(buf, n, "%.16g", d);
38
39 // Problem:
40 // %f prints 3.0000000 and 3.500000
41 // %g prints 3 and 3.5
42 //
43 // We want 3.0 and 3.5, so add '.0' in some cases
44 bool all_digits = true;
45 for (int i = 0; i < length; ++i) {
46 int ch = buf[i];
47 // allow -12345 ; disallow 3,5
48 if (!(('0' <= ch && ch <= '9') || ch == '-')) {
49 all_digits = false;
50 break;
51 }
52 }
53
54 if (all_digits) { // -12345 -> -12345.0
55 buf[length] = '.';
56 buf[length + 1] = '0';
57 buf[length + 2] = '\0';
58 }
59
60 return StrFromC(buf);
61}
62// %a is a hexfloat form, probably don't need that
63// int length = snprintf(buf, n, "%a", d);
64
65// Do we need this API? Or is mylib.InternedStr(BigStr* s, int start, int end)
66// better for getting values out of Token.line without allocating?
67//
68// e.g. mylib.InternedStr(tok.line, tok.start, tok.start+1)
69//
70// Also for SmallStr, we don't care about interning. Only for HeapStr.
71
72BigStr* intern(BigStr* s) {
73 // TODO: put in table gHeap.interned_
74 return s;
75}
76
77// Print quoted string. Called by StrFormat('%r').
78// TODO: consider using J8 notation instead, since error messages show that
79// string.
80BigStr* repr(BigStr* s) {
81 // Worst case: \0 becomes 4 bytes as '\\x00', and then two quote bytes.
82 int n = len(s);
83 int upper_bound = n * 4 + 2;
84
85 BigStr* result = OverAllocatedStr(upper_bound);
86
87 // Single quote by default.
88 char quote = '\'';
89 if (memchr(s->data_, '\'', n) && !memchr(s->data_, '"', n)) {
90 quote = '"';
91 }
92 char* p = result->data_;
93
94 // From PyString_Repr()
95 *p++ = quote;
96 for (int i = 0; i < n; ++i) {
97 unsigned char c = static_cast<unsigned char>(s->data_[i]);
98 if (c == quote || c == '\\') {
99 *p++ = '\\';
100 *p++ = c;
101 } else if (c == '\t') {
102 *p++ = '\\';
103 *p++ = 't';
104 } else if (c == '\n') {
105 *p++ = '\\';
106 *p++ = 'n';
107 } else if (c == '\r') {
108 *p++ = '\\';
109 *p++ = 'r';
110 } else if (0x20 <= c && c < 0x80) {
111 *p++ = c;
112 } else {
113 // Unprintable becomes \xff.
114 // TODO: Consider \yff. This is similar to J8 strings, but we don't
115 // decode UTF-8.
116 sprintf(p, "\\x%02x", c & 0xff);
117 p += 4;
118 }
119 }
120 *p++ = quote;
121 *p = '\0';
122
123 int length = p - result->data_;
124 result->MaybeShrink(length);
125 return result;
126}
127
128// Helper functions that don't use exceptions.
129
130bool StringToInt(const char* s, int length, int base, int* result) {
131 if (length == 0) {
132 return false; // empty string isn't a valid integer
133 }
134
135 // Note: sizeof(int) is often 4 bytes on both 32-bit and 64-bit
136 // sizeof(long) is often 4 bytes on both 32-bit but 8 bytes on 64-bit
137 // static_assert(sizeof(long) == 8);
138
139 char* pos; // mutated by strtol
140
141 errno = 0;
142 long v = strtol(s, &pos, base);
143
144 if (errno == ERANGE) {
145 switch (v) {
146 case LONG_MIN:
147 return false; // underflow of long, which may be 64 bits
148 case LONG_MAX:
149 return false; // overflow of long
150 }
151 }
152
153 // It should ALSO fit in an int, not just a long
154 if (v > INT_MAX) {
155 return false;
156 }
157 if (v < INT_MIN) {
158 return false;
159 }
160
161 const char* end = s + length;
162 if (pos == end) {
163 *result = v;
164 return true; // strtol() consumed ALL characters.
165 }
166
167 while (pos < end) {
168 if (!IsAsciiWhitespace(*pos)) {
169 return false; // Trailing non-space
170 }
171 pos++;
172 }
173
174 *result = v;
175 return true; // Trailing space is OK
176}
177
178bool StringToInt64(const char* s, int length, int base, int64_t* result) {
179 if (length == 0) {
180 return false; // empty string isn't a valid integer
181 }
182
183 // These should be the same type
184 static_assert(sizeof(long long) == sizeof(int64_t), "");
185
186 char* pos; // mutated by strtol
187
188 errno = 0;
189 long long v = strtoll(s, &pos, base);
190
191 if (errno == ERANGE) {
192 switch (v) {
193 case LLONG_MIN:
194 return false; // underflow
195 case LLONG_MAX:
196 return false; // overflow
197 }
198 }
199
200 const char* end = s + length;
201 if (pos == end) {
202 *result = v;
203 return true; // strtol() consumed ALL characters.
204 }
205
206 while (pos < end) {
207 if (!IsAsciiWhitespace(*pos)) {
208 return false; // Trailing non-space
209 }
210 pos++;
211 }
212
213 *result = v;
214 return true; // Trailing space is OK
215}
216
217int to_int(BigStr* s, int base) {
218 int i;
219 if (StringToInt(s->data_, len(s), base, &i)) {
220 return i; // truncated to int
221 } else {
222 throw Alloc<ValueError>();
223 }
224}
225
226BigStr* chr(int i) {
227 // NOTE: i should be less than 256, in which we could return an object from
228 // GLOBAL_STR() pool, like StrIter
229 auto result = NewStr(1);
230 result->data_[0] = i;
231 return result;
232}
233
234int ord(BigStr* s) {
235 assert(len(s) == 1);
236 // signed to unsigned conversion, so we don't get values like -127
237 uint8_t c = static_cast<uint8_t>(s->data_[0]);
238 return c;
239}
240
241bool to_bool(BigStr* s) {
242 return len(s) != 0;
243}
244
245double to_float(int i) {
246 return static_cast<double>(i);
247}
248
249double to_float(BigStr* s) {
250 char* begin = s->data_;
251 char* end_pos = begin + len(s);
252 char* orig_end = end_pos;
253
254 errno = 0;
255 double result = strtod(begin, &end_pos);
256
257 if (errno == ERANGE) { // error: overflow or underflow
258 if (result >= HUGE_VAL) {
259 return INFINITY;
260 } else if (result <= -HUGE_VAL) {
261 return -INFINITY;
262 } else if (-DBL_MIN <= result && result <= DBL_MIN) {
263 return 0.0;
264 } else {
265 FAIL("Invalid value after ERANGE");
266 }
267 }
268 if (end_pos == begin) { // error: not a floating point number
269 throw Alloc<ValueError>();
270 }
271 if (end_pos != orig_end) { // trailing data like '5s' not alowed
272 while (end_pos < orig_end) {
273 if (!IsAsciiWhitespace(*end_pos)) {
274 throw Alloc<ValueError>(); // Trailing non-space
275 }
276 end_pos++;
277 }
278 }
279
280 return result;
281}
282
283// e.g. ('a' in 'abc')
284bool str_contains(BigStr* haystack, BigStr* needle) {
285 // Common case
286 if (len(needle) == 1) {
287 return memchr(haystack->data_, needle->data_[0], len(haystack));
288 }
289
290 if (len(needle) > len(haystack)) {
291 return false;
292 }
293
294 // General case. TODO: We could use a smarter substring algorithm.
295
296 const char* end = haystack->data_ + len(haystack);
297 const char* last_possible = end - len(needle);
298 const char* p = haystack->data_;
299
300 while (p <= last_possible) {
301 if (memcmp(p, needle->data_, len(needle)) == 0) {
302 return true;
303 }
304 p++;
305 }
306 return false;
307}
308
309BigStr* str_repeat(BigStr* s, int times) {
310 // Python allows -1 too, and Oil used that
311 if (times <= 0) {
312 return kEmptyString;
313 }
314 int len_ = len(s);
315 int new_len = len_ * times;
316 BigStr* result = NewStr(new_len);
317
318 char* dest = result->data_;
319 for (int i = 0; i < times; i++) {
320 memcpy(dest, s->data_, len_);
321 dest += len_;
322 }
323 return result;
324}
325
326// for os_path.join()
327// NOTE(Jesse): Perfect candidate for BoundedBuffer
328BigStr* str_concat3(BigStr* a, BigStr* b, BigStr* c) {
329 int a_len = len(a);
330 int b_len = len(b);
331 int c_len = len(c);
332
333 int new_len = a_len + b_len + c_len;
334 BigStr* result = NewStr(new_len);
335 char* pos = result->data_;
336
337 memcpy(pos, a->data_, a_len);
338 pos += a_len;
339
340 memcpy(pos, b->data_, b_len);
341 pos += b_len;
342
343 memcpy(pos, c->data_, c_len);
344
345 assert(pos + c_len == result->data_ + new_len);
346
347 return result;
348}
349
350BigStr* str_concat(BigStr* a, BigStr* b) {
351 int a_len = len(a);
352 int b_len = len(b);
353 int new_len = a_len + b_len;
354 BigStr* result = NewStr(new_len);
355 char* buf = result->data_;
356
357 memcpy(buf, a->data_, a_len);
358 memcpy(buf + a_len, b->data_, b_len);
359
360 return result;
361}
362
363//
364// Comparators
365//
366
367bool str_equals(BigStr* left, BigStr* right) {
368 // Fast path for identical strings. String deduplication during GC could
369 // make this more likely. String interning could guarantee it, allowing us
370 // to remove memcmp().
371 if (left == right) {
372 return true;
373 }
374
375 // TODO: It would be nice to remove this condition, but I think we need MyPy
376 // strict None checking for it
377 if (left == nullptr || right == nullptr) {
378 return false;
379 }
380
381 if (left->len_ != right->len_) {
382 return false;
383 }
384
385 return memcmp(left->data_, right->data_, left->len_) == 0;
386}
387
388bool maybe_str_equals(BigStr* left, BigStr* right) {
389 if (left && right) {
390 return str_equals(left, right);
391 }
392
393 if (!left && !right) {
394 return true; // None == None
395 }
396
397 return false; // one is None and one is a BigStr*
398}
399
400bool items_equal(BigStr* left, BigStr* right) {
401 return str_equals(left, right);
402}
403
404bool keys_equal(BigStr* left, BigStr* right) {
405 return items_equal(left, right);
406}
407
408bool items_equal(Tuple2<int, int>* t1, Tuple2<int, int>* t2) {
409 return (t1->at0() == t2->at0()) && (t1->at1() == t2->at1());
410}
411
412bool keys_equal(Tuple2<int, int>* t1, Tuple2<int, int>* t2) {
413 return items_equal(t1, t2);
414}
415
416bool items_equal(Tuple2<BigStr*, int>* t1, Tuple2<BigStr*, int>* t2) {
417 return items_equal(t1->at0(), t2->at0()) && (t1->at1() == t2->at1());
418}
419
420bool keys_equal(Tuple2<BigStr*, int>* t1, Tuple2<BigStr*, int>* t2) {
421 return items_equal(t1, t2);
422}
423
424bool str_equals_c(BigStr* s, const char* c_string, int c_len) {
425 // Needs SmallStr change
426 if (len(s) == c_len) {
427 return memcmp(s->data_, c_string, c_len) == 0;
428 } else {
429 return false;
430 }
431}
432
433bool str_equals0(const char* c_string, BigStr* s) {
434 int n = strlen(c_string);
435 if (len(s) == n) {
436 return memcmp(s->data_, c_string, n) == 0;
437 } else {
438 return false;
439 }
440}
441
442int hash(BigStr* s) {
443 return s->hash(fnv1);
444}
445
446int max(int a, int b) {
447 return std::max(a, b);
448}
449
450int min(int a, int b) {
451 return std::min(a, b);
452}
453
454int max(List<int>* elems) {
455 int n = len(elems);
456 if (n < 1) {
457 throw Alloc<ValueError>();
458 }
459
460 int ret = elems->at(0);
461 for (int i = 0; i < n; ++i) {
462 int cand = elems->at(i);
463 if (cand > ret) {
464 ret = cand;
465 }
466 }
467
468 return ret;
469}