mycpp/gc_list.h

OILS / mycpp / gc_list.h View on Github | oils.pub

540 lines, 295 significant

1	#ifndef MYCPP_GC_LIST_H
2	#define MYCPP_GC_LIST_H
3
4	#include <string.h> // memcpy
5
6	#include <algorithm> // sort() is templated
7
8	#include "mycpp/common.h" // DCHECK
9	#include "mycpp/comparators.h"
10	#include "mycpp/gc_alloc.h" // Alloc
11	#include "mycpp/gc_builtins.h" // ValueError
12	#include "mycpp/gc_mops.h" // BigInt
13	#include "mycpp/gc_slab.h"
14
15	// GlobalList is layout-compatible with List (unit tests assert this), and it
16	// can be a true C global (incurs zero startup time)
17
18	template <typename T, int N>
19	class GlobalList {
20	public:
21	int len_;
22	int capacity_;
23	GlobalSlab<T, N>* slab_;
24	};
25
26	#define GLOBAL_LIST(name, T, N, array) \
27	GcGlobal<GlobalSlab<T, N>> _slab_##name = {ObjHeader::Global(TypeTag::Slab), \
28	{.items_ = array}}; \
29	GcGlobal<GlobalList<T, N>> _list_##name = { \
30	ObjHeader::Global(TypeTag::List), \
31	{.len_ = N, .capacity_ = N, .slab_ = &_slab_##name.obj}}; \
32	List<T>* name = reinterpret_cast<List<T>*>(&_list_##name.obj);
33
34	template <typename T>
35	class List {
36	public:
37	List() : len_(0), capacity_(0), slab_(nullptr) {
38	}
39
40	protected:
41	// Used for ASDL subtypes with <. NOT even a shallow copy - it ALIASES thes
42	// slab.
43	explicit List(List* other)
44	: len_(other->len_), capacity_(other->capacity_), slab_(other->slab_) {
45	}
46
47	public:
48	// Implements L[i]
49	T at(int i);
50
51	// returns index of the element
52	int index(T element);
53
54	// Implements L[i] = item
55	void set(int i, T item);
56
57	// L[begin:]
58	List* slice(int begin);
59
60	// L[begin:end]
61	List* slice(int begin, int end);
62
63	// Should we have a separate API that doesn't return it?
64	// https://stackoverflow.com/questions/12600330/pop-back-return-value
65	T pop();
66
67	// Used in osh/word_parse.py to remove from front
68	T pop(int i);
69
70	// Remove the first occourence of x from the list.
71	void remove(T x);
72
73	void clear();
74
75	// Used in osh/string_ops.py
76	void reverse();
77
78	// Templated function
79	void sort();
80
81	// Ensure that there's space for at LEAST this many items
82	void reserve(int num_desired);
83
84	// Append a single element to this list.
85	void append(T item);
86
87	// Extend this list with multiple elements.
88	void extend(List<T>* other);
89
90	static constexpr ObjHeader obj_header() {
91	return ObjHeader::ClassFixed(field_mask(), sizeof(List<T>));
92	}
93
94	// Used by ASDL
95	void SetTaken();
96
97	int len_; // number of entries
98	int capacity_; // max entries before resizing
99
100	// The container may be resized, so this field isn't in-line.
101	Slab<T>* slab_;
102
103	// A list has one Slab pointer which we need to follow.
104	static constexpr uint32_t field_mask() {
105	return maskbit(offsetof(List, slab_));
106	}
107
108	DISALLOW_COPY_AND_ASSIGN(List)
109
110	static_assert(sizeof(ObjHeader) % sizeof(T) == 0,
111	"ObjHeader size should be multiple of item size");
112	static constexpr int kHeaderFudge = sizeof(ObjHeader) / sizeof(T);
113
114	#if 0
115	// 24-byte pool comes from very common List header, and Token
116	static constexpr int kPoolBytes1 = 24 - sizeof(ObjHeader);
117	static_assert(kPoolBytes1 % sizeof(T) == 0,
118	"An integral number of items should fit in first pool");
119	static constexpr int kNumItems1 = kPoolBytes1 / sizeof(T);
120	#endif
121
122	// Matches mark_sweep_heap.h
123	static constexpr int kPoolBytes2 = 48 - sizeof(ObjHeader);
124	static_assert(kPoolBytes2 % sizeof(T) == 0,
125	"An integral number of items should fit in second pool");
126	static constexpr int kNumItems2 = kPoolBytes2 / sizeof(T);
127
128	#if 0
129	static constexpr int kMinBytes2 = 128 - sizeof(ObjHeader);
130	static_assert(kMinBytes2 % sizeof(T) == 0,
131	"An integral number of items should fit");
132	static constexpr int kMinItems2 = kMinBytes2 / sizeof(T);
133	#endif
134
135	// Given the number of items desired, return the number items we should
136	// reserve room for, according to our growth policy.
137	int HowManyItems(int num_desired) {
138	// Using the 24-byte pool leads to too much GC of tiny slab objects! So
139	// just use the larger 48 byte pool.
140	#if 0
141	if (num_desired <= kNumItems1) { // use full cell in pool 1
142	return kNumItems1;
143	}
144	#endif
145	if (num_desired <= kNumItems2) { // use full cell in pool 2
146	return kNumItems2;
147	}
148	#if 0
149	if (num_desired <= kMinItems2) { // 48 -> 128, not 48 -> 64
150	return kMinItems2;
151	}
152	#endif
153
154	// Make sure the total allocation is a power of 2. TODO: consider using
155	// slightly less than power of 2, to account for malloc() headers, and
156	// reduce fragmentation.
157	// Example:
158	// - ask for 11 integers
159	// - round up 11+2 == 13 up to 16 items
160	// - return 14 items
161	// - 14 integers is 56 bytes, plus 8 byte GC header => 64 byte alloc.
162	return RoundUp(num_desired + kHeaderFudge) - kHeaderFudge;
163	}
164	};
165
166	// "Constructors" as free functions since we can't allocate within a
167	// constructor. Allocation may cause garbage collection, which interferes with
168	// placement new.
169
170	// This is not really necessary, only syntactic sugar.
171	template <typename T>
172	List<T>* NewList() {
173	return Alloc<List<T>>();
174	}
175
176	// Literal ['foo', 'bar']
177	// This seems to allow better template argument type deduction than a
178	// constructor.
179	template <typename T>
180	List<T>* NewList(std::initializer_list<T> init) {
181	auto self = Alloc<List<T>>();
182
183	int n = init.size();
184	self->reserve(n);
185
186	int i = 0;
187	for (auto item : init) {
188	self->slab_->items_[i] = item;
189	++i;
190	}
191	self->len_ = n;
192	return self;
193	}
194
195	// ['foo'] * 3
196	template <typename T>
197	List<T>* NewList(T item, int times) {
198	auto self = Alloc<List<T>>();
199
200	self->reserve(times);
201	self->len_ = times;
202	for (int i = 0; i < times; ++i) {
203	self->set(i, item);
204	}
205	return self;
206	}
207
208	template <typename T>
209	void List<T>::append(T item) {
210	reserve(len_ + 1);
211	slab_->items_[len_] = item;
212	++len_;
213	}
214
215	template <typename T>
216	int len(const List<T>* L) {
217	return L->len_;
218	}
219
220	template <typename T>
221	List<T>* list_repeat(T item, int times);
222
223	template <typename T>
224	inline bool list_contains(List<T>* haystack, T needle);
225
226	template <typename K, typename V>
227	class Dict; // forward decl
228
229	template <typename V>
230	List<BigStr> sorted(Dict<BigStr, V> d);
231
232	template <typename T>
233	List<T>* sorted(List<T>* l);
234
235	// L[begin:]
236	template <typename T>
237	List<T>* List<T>::slice(int begin) {
238	return slice(begin, len_);
239	}
240
241	// L[begin:end]
242	template <typename T>
243	List<T>* List<T>::slice(int begin, int end) {
244	SLICE_ADJUST(begin, end, len_);
245
246	DCHECK(0 <= begin && begin <= len_);
247	DCHECK(0 <= end && end <= len_);
248
249	int new_len = end - begin;
250	DCHECK(0 <= new_len && new_len <= len_);
251
252	List<T>* result = NewList<T>();
253	result->reserve(new_len);
254
255	// Faster than append() in a loop
256	memcpy(result->slab_->items_, slab_->items_ + begin, new_len * sizeof(T));
257	result->len_ = new_len;
258
259	return result;
260	}
261
262	// Ensure that there's space for a number of items
263	template <typename T>
264	void List<T>::reserve(int num_desired) {
265	// log("reserve capacity = %d, n = %d", capacity_, n);
266
267	// Don't do anything if there's already enough space.
268	if (capacity_ >= num_desired) {
269	return;
270	}
271
272	// Slabs should be a total of 2^N bytes. kCapacityAdjust is the number of
273	// items that the 8 byte header takes up: 1 for List<T*>, and 2 for
274	// List<int>.
275	//
276	// Example: the user reserves space for 3 integers. The minimum number of
277	// items would be 5, which is rounded up to 8. Subtract 2 again, giving 6,
278	// which leads to 8 + 6*4 = 32 byte Slab.
279
280	capacity_ = HowManyItems(num_desired);
281	auto new_slab = NewSlab<T>(capacity_);
282
283	if (len_ > 0) {
284	// log("Copying %d bytes", len_ * sizeof(T));
285	memcpy(new_slab->items_, slab_->items_, len_ * sizeof(T));
286	}
287	slab_ = new_slab;
288	}
289
290	// Implements L[i] = item
291	template <typename T>
292	void List<T>::set(int i, T item) {
293	if (i < 0) {
294	i = len_ + i;
295	}
296
297	if (0 > i \|\| i >= len_) {
298	throw Alloc<IndexError>();
299	}
300
301	slab_->items_[i] = item;
302	}
303
304	// Implements L[i]
305	template <typename T>
306	T List<T>::at(int i) {
307	if (i < 0) {
308	i = len_ + i;
309	}
310
311	if (0 > i \|\| i >= len_) {
312	throw Alloc<IndexError>();
313	}
314	return slab_->items_[i];
315	}
316
317	// L.index(i) -- Python method
318	template <typename T>
319	int List<T>::index(T value) {
320	int element_count = len(this);
321	for (int i = 0; i < element_count; i++) {
322	if (items_equal(slab_->items_[i], value)) {
323	return i;
324	}
325	}
326	throw Alloc<ValueError>();
327	}
328
329	// Should we have a separate API that doesn't return it?
330	// https://stackoverflow.com/questions/12600330/pop-back-return-value
331	template <typename T>
332	T List<T>::pop() {
333	if (len_ == 0) {
334	throw Alloc<IndexError>();
335	}
336	len_--;
337	T result = slab_->items_[len_];
338	slab_->items_[len_] = 0; // zero for GC scan
339	return result;
340	}
341
342	// Used in osh/word_parse.py to remove from front
343	template <typename T>
344	T List<T>::pop(int i) {
345	if (len_ < i) {
346	throw Alloc<IndexError>();
347	}
348
349	T result = at(i);
350	len_--;
351
352	// Shift everything by one
353	memmove(slab_->items_ + i, slab_->items_ + (i + 1), (len_ - i) * sizeof(T));
354
355	/*
356	for (int j = 0; j < len_; j++) {
357	slab_->items_[j] = slab_->items_[j+1];
358	}
359	*/
360
361	slab_->items_[len_] = 0; // zero for GC scan
362	return result;
363	}
364
365	template <typename T>
366	void List<T>::remove(T x) {
367	int idx = this->index(x);
368	this->pop(idx); // unused
369	}
370
371	template <typename T>
372	void List<T>::clear() {
373	if (slab_) {
374	memset(slab_->items_, 0, len_ * sizeof(T)); // zero for GC scan
375	}
376	len_ = 0;
377	}
378
379	// used by ASDL
380	template <typename T>
381	void List<T>::SetTaken() {
382	slab_ = nullptr;
383	len_ = 0;
384	capacity_ = 0;
385	}
386
387	// Used in osh/string_ops.py
388	template <typename T>
389	void List<T>::reverse() {
390	for (int i = 0; i < len_ / 2; ++i) {
391	// log("swapping %d and %d", i, n-i);
392	T tmp = slab_->items_[i];
393	int j = len_ - 1 - i;
394	slab_->items_[i] = slab_->items_[j];
395	slab_->items_[j] = tmp;
396	}
397	}
398
399	// Extend this list with multiple elements.
400	template <typename T>
401	void List<T>::extend(List<T>* other) {
402	int n = other->len_;
403	int new_len = len_ + n;
404	reserve(new_len);
405
406	for (int i = 0; i < n; ++i) {
407	slab_->items_[len_ + i] = other->slab_->items_[i];
408	}
409	len_ = new_len;
410	}
411
412	inline bool CompareBigStr(BigStr* a, BigStr* b) {
413	return mylib::str_cmp(a, b) < 0;
414	}
415
416	template <>
417	inline void List<BigStr*>::sort() {
418	std::sort(slab_->items_, slab_->items_ + len_, CompareBigStr);
419	}
420
421	inline bool CompareBigInt(mops::BigInt a, mops::BigInt b) {
422	return a < b;
423	}
424
425	template <>
426	inline void List<mops::BigInt>::sort() {
427	std::sort(slab_->items_, slab_->items_ + len_, CompareBigInt);
428	}
429
430	// TODO: mycpp can just generate the constructor instead?
431	// e.g. [None] * 3
432	template <typename T>
433	List<T>* list_repeat(T item, int times) {
434	return NewList<T>(item, times);
435	}
436
437	// e.g. 'a' in ['a', 'b', 'c']
438	template <typename T>
439	inline bool list_contains(List<T>* haystack, T needle) {
440	int n = len(haystack);
441	for (int i = 0; i < n; ++i) {
442	if (items_equal(haystack->at(i), needle)) {
443	return true;
444	}
445	}
446	return false;
447	}
448
449	template <typename V>
450	List<BigStr> sorted(Dict<BigStr, V> d) {
451	auto keys = d->keys();
452	keys->sort();
453	return keys;
454	}
455
456	template <typename T>
457	List<T>* sorted(List<T>* l) {
458	auto ret = list(l);
459	ret->sort();
460	return ret;
461	}
462
463	// list(L) copies the list
464	template <typename T>
465	List<T>* list(List<T>* other) {
466	auto result = NewList<T>();
467	result->extend(other);
468	return result;
469	}
470
471	template <class T>
472	class ListIter {
473	public:
474	explicit ListIter(List<T>* L) : L_(L), i_(0) {
475	// Cheney only: L_ could be moved during iteration.
476	// gHeap.PushRoot(reinterpret_cast<RawObject**>(&L_));
477	}
478
479	~ListIter() {
480	// gHeap.PopRoot();
481	}
482	void Next() {
483	i_++;
484	}
485	bool Done() {
486	// "unsigned size_t was a mistake"
487	return i_ >= static_cast<int>(L_->len_);
488	}
489	T Value() {
490	return L_->slab_->items_[i_];
491	}
492	T iterNext() {
493	if (Done()) {
494	throw Alloc<StopIteration>();
495	}
496	T ret = L_->slab_->items_[i_];
497	Next();
498	return ret;
499	}
500
501	// only for use with generators
502	List<T>* GetList() {
503	return L_;
504	}
505
506	private:
507	List<T>* L_;
508	int i_;
509	};
510
511	// list(it) returns the iterator's backing list
512	template <typename T>
513	List<T>* list(ListIter<T> it) {
514	return list(it.GetList());
515	}
516
517	// TODO: Does using pointers rather than indices make this more efficient?
518	template <class T>
519	class ReverseListIter {
520	public:
521	explicit ReverseListIter(List<T>* L) : L_(L), i_(L_->len_ - 1) {
522	}
523	void Next() {
524	i_--;
525	}
526	bool Done() {
527	return i_ < 0;
528	}
529	T Value() {
530	return L_->slab_->items_[i_];
531	}
532
533	private:
534	List<T>* L_;
535	int i_;
536	};
537
538	int max(List<int>* elems);
539
540	#endif // MYCPP_GC_LIST_H