mycpp/gc_builtins.cc

OILS / mycpp / gc_builtins.cc View on Github | oils.pub

478 lines, 259 significant

1	#include <errno.h> // errno
2	#include <float.h> // DBL_MIN, DBL_MAX
3	#include <math.h> // INFINITY
4	#include <stdio.h> // required for readline/readline.h (man readline)
5
6	#include "_build/detected-cpp-config.h"
7	#include "mycpp/gc_list.h"
8	#include "mycpp/gc_str.h"
9
10	// Translation of Python's print().
11	void print(BigStr* s) {
12	fputs(s->data_, stdout); // print until first NUL
13	fputc('\n', stdout);
14	}
15
16	BigStr* str(int i) {
17	BigStr* s = OverAllocatedStr(kIntBufSize);
18	int length = snprintf(s->data(), kIntBufSize, "%d", i);
19	s->MaybeShrink(length);
20	return s;
21	}
22
23	BigStr* str(double d) {
24	char buf[64]; // overestimate, but we use snprintf() to be safe
25
26	int n = sizeof(buf) - 2; // in case we add '.0'
27
28	// The round tripping test in mycpp/float_test.cc tells us:
29	// %.9g - FLOAT round trip
30	// %.17g - DOUBLE round trip
31	// But this causes problems in practice, e.g. for 3.14, or 1/3
32	// int length = snprintf(buf, n, "%.17g", d);
33
34	// So use 1 less digit, which happens to match Python 3 and node.js (but not
35	// Python 2)
36	// Note: snprintf() %g depends on LC_NUMERIC env var!
37	int length = snprintf(buf, n, "%.16g", d);
38
39	// Problem:
40	// %f prints 3.0000000 and 3.500000
41	// %g prints 3 and 3.5
42	//
43	// We want 3.0 and 3.5, so add '.0' in some cases
44	bool all_digits = true;
45	for (int i = 0; i < length; ++i) {
46	int ch = buf[i];
47	// allow -12345 ; disallow 3,5
48	if (!(('0' <= ch && ch <= '9') \|\| ch == '-')) {
49	all_digits = false;
50	break;
51	}
52	}
53
54	if (all_digits) { // -12345 -> -12345.0
55	buf[length] = '.';
56	buf[length + 1] = '0';
57	buf[length + 2] = '\0';
58	}
59
60	return StrFromC(buf);
61	}
62	// %a is a hexfloat form, probably don't need that
63	// int length = snprintf(buf, n, "%a", d);
64
65	// Do we need this API? Or is mylib.InternedStr(BigStr* s, int start, int end)
66	// better for getting values out of Token.line without allocating?
67	//
68	// e.g. mylib.InternedStr(tok.line, tok.start, tok.start+1)
69	//
70	// Also for SmallStr, we don't care about interning. Only for HeapStr.
71
72	BigStr* intern(BigStr* s) {
73	// TODO: put in table gHeap.interned_
74	return s;
75	}
76
77	// Print quoted string. Called by StrFormat('%r').
78	// TODO: consider using J8 notation instead, since error messages show that
79	// string.
80	BigStr* repr(BigStr* s) {
81	// Worst case: \0 becomes 4 bytes as '\\x00', and then two quote bytes.
82	int n = len(s);
83	int upper_bound = n * 4 + 2;
84
85	BigStr* result = OverAllocatedStr(upper_bound);
86
87	// Single quote by default.
88	char quote = '\'';
89	if (memchr(s->data_, '\'', n) && !memchr(s->data_, '"', n)) {
90	quote = '"';
91	}
92	char* p = result->data_;
93
94	// From PyString_Repr()
95	*p++ = quote;
96	for (int i = 0; i < n; ++i) {
97	unsigned char c = static_cast<unsigned char>(s->data_[i]);
98	if (c == quote \|\| c == '\\') {
99	*p++ = '\\';
100	*p++ = c;
101	} else if (c == '\t') {
102	*p++ = '\\';
103	*p++ = 't';
104	} else if (c == '\n') {
105	*p++ = '\\';
106	*p++ = 'n';
107	} else if (c == '\r') {
108	*p++ = '\\';
109	*p++ = 'r';
110	} else if (0x20 <= c && c < 0x80) {
111	*p++ = c;
112	} else {
113	// Unprintable becomes \xff.
114	// TODO: Consider \yff. This is similar to J8 strings, but we don't
115	// decode UTF-8.
116	sprintf(p, "\\x%02x", c & 0xff);
117	p += 4;
118	}
119	}
120	*p++ = quote;
121	*p = '\0';
122
123	int length = p - result->data_;
124	result->MaybeShrink(length);
125	return result;
126	}
127
128	// Helper functions that don't use exceptions.
129
130	bool StringToInt(const char* s, int length, int base, int* result) {
131	if (length == 0) {
132	return false; // empty string isn't a valid integer
133	}
134
135	// Note: sizeof(int) is often 4 bytes on both 32-bit and 64-bit
136	// sizeof(long) is often 4 bytes on both 32-bit but 8 bytes on 64-bit
137	// static_assert(sizeof(long) == 8);
138
139	char* pos; // mutated by strtol
140
141	errno = 0;
142	long v = strtol(s, &pos, base);
143
144	if (pos == s) {
145	return false; // nothing consumed
146	}
147
148	if (errno == ERANGE) {
149	switch (v) {
150	case LONG_MIN:
151	return false; // underflow of long, which may be 64 bits
152	case LONG_MAX:
153	return false; // overflow of long
154	}
155	}
156
157	// It should ALSO fit in an int, not just a long
158	if (v > INT_MAX) {
159	return false;
160	}
161	if (v < INT_MIN) {
162	return false;
163	}
164
165	const char* end = s + length;
166	// log("s %p, pos %p, end %p, length %d", s, pos, end, length);
167	if (pos == end) {
168	*result = v;
169	return true; // strtol() consumed ALL characters.
170	}
171
172	while (pos < end) {
173	if (!IsAsciiWhitespace(*pos)) {
174	return false; // Trailing non-space
175	}
176	pos++;
177	}
178
179	*result = v;
180	return true; // Trailing space is OK
181	}
182
183	bool StringToInt64(const char* s, int length, int base, int64_t* result) {
184	if (length == 0) {
185	return false; // empty string isn't a valid integer
186	}
187
188	// These should be the same type
189	static_assert(sizeof(long long) == sizeof(int64_t), "");
190
191	char* pos; // mutated by strtol
192
193	errno = 0;
194	long long v = strtoll(s, &pos, base);
195
196	if (pos == s) {
197	return false; // nothing consumed
198	}
199
200	if (errno == ERANGE) {
201	switch (v) {
202	case LLONG_MIN:
203	return false; // underflow
204	case LLONG_MAX:
205	return false; // overflow
206	}
207	}
208
209	const char* end = s + length;
210	if (pos == end) {
211	*result = v;
212	return true; // strtol() consumed ALL characters.
213	}
214
215	while (pos < end) {
216	if (!IsAsciiWhitespace(*pos)) {
217	return false; // Trailing non-space
218	}
219	pos++;
220	}
221
222	*result = v;
223	return true; // Trailing space is OK
224	}
225
226	int to_int(BigStr* s, int base) {
227	int i;
228	if (StringToInt(s->data_, len(s), base, &i)) {
229	return i; // truncated to int
230	} else {
231	throw Alloc<ValueError>();
232	}
233	}
234
235	BigStr* chr(int i) {
236	// NOTE: i should be less than 256, in which we could return an object from
237	// GLOBAL_STR() pool, like StrIter
238	auto result = NewStr(1);
239	result->data_[0] = i;
240	return result;
241	}
242
243	int ord(BigStr* s) {
244	assert(len(s) == 1);
245	// signed to unsigned conversion, so we don't get values like -127
246	uint8_t c = static_cast<uint8_t>(s->data_[0]);
247	return c;
248	}
249
250	bool to_bool(BigStr* s) {
251	return len(s) != 0;
252	}
253
254	double to_float(int i) {
255	return static_cast<double>(i);
256	}
257
258	double to_float(BigStr* s) {
259	char* begin = s->data_;
260	char* end_pos = begin + len(s);
261	char* orig_end = end_pos;
262
263	errno = 0;
264	double result = strtod(begin, &end_pos);
265
266	if (errno == ERANGE) { // error: overflow or underflow
267	if (result >= HUGE_VAL) {
268	return INFINITY;
269	} else if (result <= -HUGE_VAL) {
270	return -INFINITY;
271	} else if (-DBL_MIN <= result && result <= DBL_MIN) {
272	return 0.0;
273	} else {
274	FAIL("Invalid value after ERANGE");
275	}
276	}
277	if (end_pos == begin) { // error: not a floating point number
278	throw Alloc<ValueError>();
279	}
280	if (end_pos != orig_end) { // trailing data like '5s' not alowed
281	while (end_pos < orig_end) {
282	if (!IsAsciiWhitespace(*end_pos)) {
283	throw Alloc<ValueError>(); // Trailing non-space
284	}
285	end_pos++;
286	}
287	}
288
289	return result;
290	}
291
292	// e.g. ('a' in 'abc')
293	bool str_contains(BigStr* haystack, BigStr* needle) {
294	// Common case
295	if (len(needle) == 1) {
296	return memchr(haystack->data_, needle->data_[0], len(haystack));
297	}
298
299	if (len(needle) > len(haystack)) {
300	return false;
301	}
302
303	// General case. TODO: We could use a smarter substring algorithm.
304
305	const char* end = haystack->data_ + len(haystack);
306	const char* last_possible = end - len(needle);
307	const char* p = haystack->data_;
308
309	while (p <= last_possible) {
310	if (memcmp(p, needle->data_, len(needle)) == 0) {
311	return true;
312	}
313	p++;
314	}
315	return false;
316	}
317
318	BigStr* str_repeat(BigStr* s, int times) {
319	// Python allows -1 too, and Oil used that
320	if (times <= 0) {
321	return kEmptyString;
322	}
323	int len_ = len(s);
324	int new_len = len_ * times;
325	BigStr* result = NewStr(new_len);
326
327	char* dest = result->data_;
328	for (int i = 0; i < times; i++) {
329	memcpy(dest, s->data_, len_);
330	dest += len_;
331	}
332	return result;
333	}
334
335	// for os_path.join()
336	// NOTE(Jesse): Perfect candidate for BoundedBuffer
337	BigStr* str_concat3(BigStr* a, BigStr* b, BigStr* c) {
338	int a_len = len(a);
339	int b_len = len(b);
340	int c_len = len(c);
341
342	int new_len = a_len + b_len + c_len;
343	BigStr* result = NewStr(new_len);
344	char* pos = result->data_;
345
346	memcpy(pos, a->data_, a_len);
347	pos += a_len;
348
349	memcpy(pos, b->data_, b_len);
350	pos += b_len;
351
352	memcpy(pos, c->data_, c_len);
353
354	assert(pos + c_len == result->data_ + new_len);
355
356	return result;
357	}
358
359	BigStr* str_concat(BigStr* a, BigStr* b) {
360	int a_len = len(a);
361	int b_len = len(b);
362	int new_len = a_len + b_len;
363	BigStr* result = NewStr(new_len);
364	char* buf = result->data_;
365
366	memcpy(buf, a->data_, a_len);
367	memcpy(buf + a_len, b->data_, b_len);
368
369	return result;
370	}
371
372	//
373	// Comparators
374	//
375
376	bool str_equals(BigStr* left, BigStr* right) {
377	// Fast path for identical strings. String deduplication during GC could
378	// make this more likely. String interning could guarantee it, allowing us
379	// to remove memcmp().
380	if (left == right) {
381	return true;
382	}
383
384	// TODO: It would be nice to remove this condition, but I think we need MyPy
385	// strict None checking for it
386	if (left == nullptr \|\| right == nullptr) {
387	return false;
388	}
389
390	if (left->len_ != right->len_) {
391	return false;
392	}
393
394	return memcmp(left->data_, right->data_, left->len_) == 0;
395	}
396
397	bool maybe_str_equals(BigStr* left, BigStr* right) {
398	if (left && right) {
399	return str_equals(left, right);
400	}
401
402	if (!left && !right) {
403	return true; // None == None
404	}
405
406	return false; // one is None and one is a BigStr*
407	}
408
409	bool items_equal(BigStr* left, BigStr* right) {
410	return str_equals(left, right);
411	}
412
413	bool keys_equal(BigStr* left, BigStr* right) {
414	return items_equal(left, right);
415	}
416
417	bool items_equal(Tuple2<int, int>* t1, Tuple2<int, int>* t2) {
418	return (t1->at0() == t2->at0()) && (t1->at1() == t2->at1());
419	}
420
421	bool keys_equal(Tuple2<int, int>* t1, Tuple2<int, int>* t2) {
422	return items_equal(t1, t2);
423	}
424
425	bool items_equal(Tuple2<BigStr, int> t1, Tuple2<BigStr, int> t2) {
426	return items_equal(t1->at0(), t2->at0()) && (t1->at1() == t2->at1());
427	}
428
429	bool keys_equal(Tuple2<BigStr, int> t1, Tuple2<BigStr, int> t2) {
430	return items_equal(t1, t2);
431	}
432
433	bool str_equals_c(BigStr* s, const char* c_string, int c_len) {
434	// Needs SmallStr change
435	if (len(s) == c_len) {
436	return memcmp(s->data_, c_string, c_len) == 0;
437	} else {
438	return false;
439	}
440	}
441
442	bool str_equals0(const char* c_string, BigStr* s) {
443	int n = strlen(c_string);
444	if (len(s) == n) {
445	return memcmp(s->data_, c_string, n) == 0;
446	} else {
447	return false;
448	}
449	}
450
451	int hash(BigStr* s) {
452	return s->hash(fnv1);
453	}
454
455	int max(int a, int b) {
456	return std::max(a, b);
457	}
458
459	int min(int a, int b) {
460	return std::min(a, b);
461	}
462
463	int max(List<int>* elems) {
464	int n = len(elems);
465	if (n < 1) {
466	throw Alloc<ValueError>();
467	}
468
469	int ret = elems->at(0);
470	for (int i = 0; i < n; ++i) {
471	int cand = elems->at(i);
472	if (cand > ret) {
473	ret = cand;
474	}
475	}
476
477	return ret;
478	}