mycpp/gc_builtins.cc

OILS / mycpp / gc_builtins.cc View on Github | oils.pub

469 lines, 255 significant

1	#include <errno.h> // errno
2	#include <float.h> // DBL_MIN, DBL_MAX
3	#include <math.h> // INFINITY
4	#include <stdio.h> // required for readline/readline.h (man readline)
5
6	#include "_build/detected-cpp-config.h"
7	#include "mycpp/gc_list.h"
8	#include "mycpp/gc_str.h"
9
10	// Translation of Python's print().
11	void print(BigStr* s) {
12	fputs(s->data_, stdout); // print until first NUL
13	fputc('\n', stdout);
14	}
15
16	BigStr* str(int i) {
17	BigStr* s = OverAllocatedStr(kIntBufSize);
18	int length = snprintf(s->data(), kIntBufSize, "%d", i);
19	s->MaybeShrink(length);
20	return s;
21	}
22
23	BigStr* str(double d) {
24	char buf[64]; // overestimate, but we use snprintf() to be safe
25
26	int n = sizeof(buf) - 2; // in case we add '.0'
27
28	// The round tripping test in mycpp/float_test.cc tells us:
29	// %.9g - FLOAT round trip
30	// %.17g - DOUBLE round trip
31	// But this causes problems in practice, e.g. for 3.14, or 1/3
32	// int length = snprintf(buf, n, "%.17g", d);
33
34	// So use 1 less digit, which happens to match Python 3 and node.js (but not
35	// Python 2)
36	// Note: snprintf() %g depends on LC_NUMERIC env var!
37	int length = snprintf(buf, n, "%.16g", d);
38
39	// Problem:
40	// %f prints 3.0000000 and 3.500000
41	// %g prints 3 and 3.5
42	//
43	// We want 3.0 and 3.5, so add '.0' in some cases
44	bool all_digits = true;
45	for (int i = 0; i < length; ++i) {
46	int ch = buf[i];
47	// allow -12345 ; disallow 3,5
48	if (!(('0' <= ch && ch <= '9') \|\| ch == '-')) {
49	all_digits = false;
50	break;
51	}
52	}
53
54	if (all_digits) { // -12345 -> -12345.0
55	buf[length] = '.';
56	buf[length + 1] = '0';
57	buf[length + 2] = '\0';
58	}
59
60	return StrFromC(buf);
61	}
62	// %a is a hexfloat form, probably don't need that
63	// int length = snprintf(buf, n, "%a", d);
64
65	// Do we need this API? Or is mylib.InternedStr(BigStr* s, int start, int end)
66	// better for getting values out of Token.line without allocating?
67	//
68	// e.g. mylib.InternedStr(tok.line, tok.start, tok.start+1)
69	//
70	// Also for SmallStr, we don't care about interning. Only for HeapStr.
71
72	BigStr* intern(BigStr* s) {
73	// TODO: put in table gHeap.interned_
74	return s;
75	}
76
77	// Print quoted string. Called by StrFormat('%r').
78	// TODO: consider using J8 notation instead, since error messages show that
79	// string.
80	BigStr* repr(BigStr* s) {
81	// Worst case: \0 becomes 4 bytes as '\\x00', and then two quote bytes.
82	int n = len(s);
83	int upper_bound = n * 4 + 2;
84
85	BigStr* result = OverAllocatedStr(upper_bound);
86
87	// Single quote by default.
88	char quote = '\'';
89	if (memchr(s->data_, '\'', n) && !memchr(s->data_, '"', n)) {
90	quote = '"';
91	}
92	char* p = result->data_;
93
94	// From PyString_Repr()
95	*p++ = quote;
96	for (int i = 0; i < n; ++i) {
97	unsigned char c = static_cast<unsigned char>(s->data_[i]);
98	if (c == quote \|\| c == '\\') {
99	*p++ = '\\';
100	*p++ = c;
101	} else if (c == '\t') {
102	*p++ = '\\';
103	*p++ = 't';
104	} else if (c == '\n') {
105	*p++ = '\\';
106	*p++ = 'n';
107	} else if (c == '\r') {
108	*p++ = '\\';
109	*p++ = 'r';
110	} else if (0x20 <= c && c < 0x80) {
111	*p++ = c;
112	} else {
113	// Unprintable becomes \xff.
114	// TODO: Consider \yff. This is similar to J8 strings, but we don't
115	// decode UTF-8.
116	sprintf(p, "\\x%02x", c & 0xff);
117	p += 4;
118	}
119	}
120	*p++ = quote;
121	*p = '\0';
122
123	int length = p - result->data_;
124	result->MaybeShrink(length);
125	return result;
126	}
127
128	// Helper functions that don't use exceptions.
129
130	bool StringToInt(const char* s, int length, int base, int* result) {
131	if (length == 0) {
132	return false; // empty string isn't a valid integer
133	}
134
135	// Note: sizeof(int) is often 4 bytes on both 32-bit and 64-bit
136	// sizeof(long) is often 4 bytes on both 32-bit but 8 bytes on 64-bit
137	// static_assert(sizeof(long) == 8);
138
139	char* pos; // mutated by strtol
140
141	errno = 0;
142	long v = strtol(s, &pos, base);
143
144	if (errno == ERANGE) {
145	switch (v) {
146	case LONG_MIN:
147	return false; // underflow of long, which may be 64 bits
148	case LONG_MAX:
149	return false; // overflow of long
150	}
151	}
152
153	// It should ALSO fit in an int, not just a long
154	if (v > INT_MAX) {
155	return false;
156	}
157	if (v < INT_MIN) {
158	return false;
159	}
160
161	const char* end = s + length;
162	if (pos == end) {
163	*result = v;
164	return true; // strtol() consumed ALL characters.
165	}
166
167	while (pos < end) {
168	if (!IsAsciiWhitespace(*pos)) {
169	return false; // Trailing non-space
170	}
171	pos++;
172	}
173
174	*result = v;
175	return true; // Trailing space is OK
176	}
177
178	bool StringToInt64(const char* s, int length, int base, int64_t* result) {
179	if (length == 0) {
180	return false; // empty string isn't a valid integer
181	}
182
183	// These should be the same type
184	static_assert(sizeof(long long) == sizeof(int64_t), "");
185
186	char* pos; // mutated by strtol
187
188	errno = 0;
189	long long v = strtoll(s, &pos, base);
190
191	if (errno == ERANGE) {
192	switch (v) {
193	case LLONG_MIN:
194	return false; // underflow
195	case LLONG_MAX:
196	return false; // overflow
197	}
198	}
199
200	const char* end = s + length;
201	if (pos == end) {
202	*result = v;
203	return true; // strtol() consumed ALL characters.
204	}
205
206	while (pos < end) {
207	if (!IsAsciiWhitespace(*pos)) {
208	return false; // Trailing non-space
209	}
210	pos++;
211	}
212
213	*result = v;
214	return true; // Trailing space is OK
215	}
216
217	int to_int(BigStr* s, int base) {
218	int i;
219	if (StringToInt(s->data_, len(s), base, &i)) {
220	return i; // truncated to int
221	} else {
222	throw Alloc<ValueError>();
223	}
224	}
225
226	BigStr* chr(int i) {
227	// NOTE: i should be less than 256, in which we could return an object from
228	// GLOBAL_STR() pool, like StrIter
229	auto result = NewStr(1);
230	result->data_[0] = i;
231	return result;
232	}
233
234	int ord(BigStr* s) {
235	assert(len(s) == 1);
236	// signed to unsigned conversion, so we don't get values like -127
237	uint8_t c = static_cast<uint8_t>(s->data_[0]);
238	return c;
239	}
240
241	bool to_bool(BigStr* s) {
242	return len(s) != 0;
243	}
244
245	double to_float(int i) {
246	return static_cast<double>(i);
247	}
248
249	double to_float(BigStr* s) {
250	char* begin = s->data_;
251	char* end_pos = begin + len(s);
252	char* orig_end = end_pos;
253
254	errno = 0;
255	double result = strtod(begin, &end_pos);
256
257	if (errno == ERANGE) { // error: overflow or underflow
258	if (result >= HUGE_VAL) {
259	return INFINITY;
260	} else if (result <= -HUGE_VAL) {
261	return -INFINITY;
262	} else if (-DBL_MIN <= result && result <= DBL_MIN) {
263	return 0.0;
264	} else {
265	FAIL("Invalid value after ERANGE");
266	}
267	}
268	if (end_pos == begin) { // error: not a floating point number
269	throw Alloc<ValueError>();
270	}
271	if (end_pos != orig_end) { // trailing data like '5s' not alowed
272	while (end_pos < orig_end) {
273	if (!IsAsciiWhitespace(*end_pos)) {
274	throw Alloc<ValueError>(); // Trailing non-space
275	}
276	end_pos++;
277	}
278	}
279
280	return result;
281	}
282
283	// e.g. ('a' in 'abc')
284	bool str_contains(BigStr* haystack, BigStr* needle) {
285	// Common case
286	if (len(needle) == 1) {
287	return memchr(haystack->data_, needle->data_[0], len(haystack));
288	}
289
290	if (len(needle) > len(haystack)) {
291	return false;
292	}
293
294	// General case. TODO: We could use a smarter substring algorithm.
295
296	const char* end = haystack->data_ + len(haystack);
297	const char* last_possible = end - len(needle);
298	const char* p = haystack->data_;
299
300	while (p <= last_possible) {
301	if (memcmp(p, needle->data_, len(needle)) == 0) {
302	return true;
303	}
304	p++;
305	}
306	return false;
307	}
308
309	BigStr* str_repeat(BigStr* s, int times) {
310	// Python allows -1 too, and Oil used that
311	if (times <= 0) {
312	return kEmptyString;
313	}
314	int len_ = len(s);
315	int new_len = len_ * times;
316	BigStr* result = NewStr(new_len);
317
318	char* dest = result->data_;
319	for (int i = 0; i < times; i++) {
320	memcpy(dest, s->data_, len_);
321	dest += len_;
322	}
323	return result;
324	}
325
326	// for os_path.join()
327	// NOTE(Jesse): Perfect candidate for BoundedBuffer
328	BigStr* str_concat3(BigStr* a, BigStr* b, BigStr* c) {
329	int a_len = len(a);
330	int b_len = len(b);
331	int c_len = len(c);
332
333	int new_len = a_len + b_len + c_len;
334	BigStr* result = NewStr(new_len);
335	char* pos = result->data_;
336
337	memcpy(pos, a->data_, a_len);
338	pos += a_len;
339
340	memcpy(pos, b->data_, b_len);
341	pos += b_len;
342
343	memcpy(pos, c->data_, c_len);
344
345	assert(pos + c_len == result->data_ + new_len);
346
347	return result;
348	}
349
350	BigStr* str_concat(BigStr* a, BigStr* b) {
351	int a_len = len(a);
352	int b_len = len(b);
353	int new_len = a_len + b_len;
354	BigStr* result = NewStr(new_len);
355	char* buf = result->data_;
356
357	memcpy(buf, a->data_, a_len);
358	memcpy(buf + a_len, b->data_, b_len);
359
360	return result;
361	}
362
363	//
364	// Comparators
365	//
366
367	bool str_equals(BigStr* left, BigStr* right) {
368	// Fast path for identical strings. String deduplication during GC could
369	// make this more likely. String interning could guarantee it, allowing us
370	// to remove memcmp().
371	if (left == right) {
372	return true;
373	}
374
375	// TODO: It would be nice to remove this condition, but I think we need MyPy
376	// strict None checking for it
377	if (left == nullptr \|\| right == nullptr) {
378	return false;
379	}
380
381	if (left->len_ != right->len_) {
382	return false;
383	}
384
385	return memcmp(left->data_, right->data_, left->len_) == 0;
386	}
387
388	bool maybe_str_equals(BigStr* left, BigStr* right) {
389	if (left && right) {
390	return str_equals(left, right);
391	}
392
393	if (!left && !right) {
394	return true; // None == None
395	}
396
397	return false; // one is None and one is a BigStr*
398	}
399
400	bool items_equal(BigStr* left, BigStr* right) {
401	return str_equals(left, right);
402	}
403
404	bool keys_equal(BigStr* left, BigStr* right) {
405	return items_equal(left, right);
406	}
407
408	bool items_equal(Tuple2<int, int>* t1, Tuple2<int, int>* t2) {
409	return (t1->at0() == t2->at0()) && (t1->at1() == t2->at1());
410	}
411
412	bool keys_equal(Tuple2<int, int>* t1, Tuple2<int, int>* t2) {
413	return items_equal(t1, t2);
414	}
415
416	bool items_equal(Tuple2<BigStr, int> t1, Tuple2<BigStr, int> t2) {
417	return items_equal(t1->at0(), t2->at0()) && (t1->at1() == t2->at1());
418	}
419
420	bool keys_equal(Tuple2<BigStr, int> t1, Tuple2<BigStr, int> t2) {
421	return items_equal(t1, t2);
422	}
423
424	bool str_equals_c(BigStr* s, const char* c_string, int c_len) {
425	// Needs SmallStr change
426	if (len(s) == c_len) {
427	return memcmp(s->data_, c_string, c_len) == 0;
428	} else {
429	return false;
430	}
431	}
432
433	bool str_equals0(const char* c_string, BigStr* s) {
434	int n = strlen(c_string);
435	if (len(s) == n) {
436	return memcmp(s->data_, c_string, n) == 0;
437	} else {
438	return false;
439	}
440	}
441
442	int hash(BigStr* s) {
443	return s->hash(fnv1);
444	}
445
446	int max(int a, int b) {
447	return std::max(a, b);
448	}
449
450	int min(int a, int b) {
451	return std::min(a, b);
452	}
453
454	int max(List<int>* elems) {
455	int n = len(elems);
456	if (n < 1) {
457	throw Alloc<ValueError>();
458	}
459
460	int ret = elems->at(0);
461	for (int i = 0; i < n; ++i) {
462	int cand = elems->at(i);
463	if (cand > ret) {
464	ret = cand;
465	}
466	}
467
468	return ret;
469	}