OILS / mycpp / gc_list.h View on Github | oils.pub

556 lines, 304 significant
1#ifndef MYCPP_GC_LIST_H
2#define MYCPP_GC_LIST_H
3
4#include <string.h> // memcpy
5
6#include <algorithm> // sort() is templated
7
8#include "mycpp/common.h" // DCHECK
9#include "mycpp/comparators.h"
10#include "mycpp/gc_alloc.h" // Alloc
11#include "mycpp/gc_builtins.h" // ValueError
12#include "mycpp/gc_mops.h" // BigInt
13#include "mycpp/gc_slab.h"
14
15// GlobalList is layout-compatible with List (unit tests assert this), and it
16// can be a true C global (incurs zero startup time)
17
18template <typename T, int N>
19class GlobalList {
20 public:
21 int len_;
22 int capacity_;
23 GlobalSlab<T, N>* slab_;
24};
25
26#define GLOBAL_LIST(name, T, N, array) \
27 GcGlobal<GlobalSlab<T, N>> _slab_##name = {ObjHeader::Global(TypeTag::Slab), \
28 {.items_ = array}}; \
29 GcGlobal<GlobalList<T, N>> _list_##name = { \
30 ObjHeader::Global(TypeTag::List), \
31 {.len_ = N, .capacity_ = N, .slab_ = &_slab_##name.obj}}; \
32 List<T>* name = reinterpret_cast<List<T>*>(&_list_##name.obj);
33
34template <typename T>
35class List : public Walkable {
36 public:
37 List() : len_(0), capacity_(0), slab_(nullptr) {
38 }
39
40 // Walkable::type_id()
41 int type_id() const {
42 // Only walk when it it's List<T*>
43 if (std::is_pointer<T>()) {
44 return TypeTag::List;
45 } else {
46 return kDoNotWalk; // opaque slab doesn't need to be walked
47 }
48 }
49
50 protected:
51 // Used for ASDL subtypes with <. NOT even a shallow copy - it ALIASES thes
52 // slab.
53 explicit List(List* other)
54 : len_(other->len_), capacity_(other->capacity_), slab_(other->slab_) {
55 }
56
57 public:
58 // Implements L[i]
59 T at(int i);
60
61 // returns index of the element
62 int index(T element);
63
64 // Implements L[i] = item
65 void set(int i, T item);
66
67 // L[begin:]
68 List* slice(int begin);
69
70 // L[begin:end]
71 List* slice(int begin, int end);
72
73 // Should we have a separate API that doesn't return it?
74 // https://stackoverflow.com/questions/12600330/pop-back-return-value
75 T pop();
76
77 // Used in osh/word_parse.py to remove from front
78 T pop(int i);
79
80 // Remove the first occurence of x from the list.
81 void remove(T x);
82
83 void clear();
84
85 // Used in osh/string_ops.py
86 void reverse();
87
88 // Templated function
89 void sort();
90
91 // Ensure that there's space for at LEAST this many items
92 void reserve(int num_desired);
93
94 // Append a single element to this list.
95 void append(T item);
96
97 // Extend this list with multiple elements.
98 void extend(List<T>* other);
99
100 static constexpr ObjHeader obj_header() {
101 return ObjHeader::ClassFixed(field_mask(), sizeof(List<T>));
102 }
103
104 // Used by ASDL
105 void SetTaken();
106
107 int len_; // number of entries
108 int capacity_; // max entries before resizing
109
110 // The container may be resized, so this field isn't in-line.
111 Slab<T>* slab_;
112
113 // A list has one Slab pointer which we need to follow.
114 static constexpr uint32_t field_mask() {
115 return maskbit(offsetof(List, slab_));
116 }
117
118 DISALLOW_COPY_AND_ASSIGN(List)
119
120 static_assert(sizeof(ObjHeader) % sizeof(T) == 0,
121 "ObjHeader size should be multiple of item size");
122 static constexpr int kHeaderFudge = sizeof(ObjHeader) / sizeof(T);
123
124#if 0
125 // 24-byte pool comes from very common List header, and Token
126 static constexpr int kPoolBytes1 = 24 - sizeof(ObjHeader);
127 static_assert(kPoolBytes1 % sizeof(T) == 0,
128 "An integral number of items should fit in first pool");
129 static constexpr int kNumItems1 = kPoolBytes1 / sizeof(T);
130#endif
131
132 // Matches mark_sweep_heap.h
133 static constexpr int kPoolBytes2 = 48 - sizeof(ObjHeader);
134 static_assert(kPoolBytes2 % sizeof(T) == 0,
135 "An integral number of items should fit in second pool");
136 static constexpr int kNumItems2 = kPoolBytes2 / sizeof(T);
137
138#if 0
139 static constexpr int kMinBytes2 = 128 - sizeof(ObjHeader);
140 static_assert(kMinBytes2 % sizeof(T) == 0,
141 "An integral number of items should fit");
142 static constexpr int kMinItems2 = kMinBytes2 / sizeof(T);
143#endif
144
145 // Given the number of items desired, return the number items we should
146 // reserve room for, according to our growth policy.
147 int HowManyItems(int num_desired) {
148 // Using the 24-byte pool leads to too much GC of tiny slab objects! So
149 // just use the larger 48 byte pool.
150#if 0
151 if (num_desired <= kNumItems1) { // use full cell in pool 1
152 return kNumItems1;
153 }
154#endif
155 if (num_desired <= kNumItems2) { // use full cell in pool 2
156 return kNumItems2;
157 }
158#if 0
159 if (num_desired <= kMinItems2) { // 48 -> 128, not 48 -> 64
160 return kMinItems2;
161 }
162#endif
163
164 // Make sure the total allocation is a power of 2. TODO: consider using
165 // slightly less than power of 2, to account for malloc() headers, and
166 // reduce fragmentation.
167 // Example:
168 // - ask for 11 integers
169 // - round up 11+2 == 13 up to 16 items
170 // - return 14 items
171 // - 14 integers is 56 bytes, plus 8 byte GC header => 64 byte alloc.
172 return RoundUp(num_desired + kHeaderFudge) - kHeaderFudge;
173 }
174};
175
176// "Constructors" as free functions since we can't allocate within a
177// constructor. Allocation may cause garbage collection, which interferes with
178// placement new.
179
180// This is not really necessary, only syntactic sugar.
181template <typename T>
182List<T>* NewList() {
183 return Alloc<List<T>>();
184}
185
186// Literal ['foo', 'bar']
187// This seems to allow better template argument type deduction than a
188// constructor.
189template <typename T>
190List<T>* NewList(std::initializer_list<T> init) {
191 auto self = Alloc<List<T>>();
192
193 int n = init.size();
194 self->reserve(n);
195
196 int i = 0;
197 for (auto item : init) {
198 self->slab_->items_[i] = item;
199 ++i;
200 }
201 self->len_ = n;
202 return self;
203}
204
205// ['foo'] * 3
206template <typename T>
207List<T>* NewList(T item, int times) {
208 auto self = Alloc<List<T>>();
209
210 self->reserve(times);
211 self->len_ = times;
212 for (int i = 0; i < times; ++i) {
213 self->set(i, item);
214 }
215 return self;
216}
217
218template <typename T>
219void List<T>::append(T item) {
220 reserve(len_ + 1);
221 slab_->items_[len_] = item;
222 ++len_;
223}
224
225template <typename T>
226int len(const List<T>* L) {
227 return L->len_;
228}
229
230template <typename T>
231List<T>* list_repeat(T item, int times);
232
233template <typename T>
234inline bool list_contains(List<T>* haystack, T needle);
235
236template <typename K, typename V>
237class Dict; // forward decl
238
239template <typename V>
240List<BigStr*>* sorted(Dict<BigStr*, V>* d);
241
242template <typename T>
243List<T>* sorted(List<T>* l);
244
245// L[begin:]
246template <typename T>
247List<T>* List<T>::slice(int begin) {
248 return slice(begin, len_);
249}
250
251// L[begin:end]
252template <typename T>
253List<T>* List<T>::slice(int begin, int end) {
254 SLICE_ADJUST(begin, end, len_);
255
256 DCHECK(0 <= begin && begin <= len_);
257 DCHECK(0 <= end && end <= len_);
258
259 int new_len = end - begin;
260 DCHECK(0 <= new_len && new_len <= len_);
261
262 List<T>* result = NewList<T>();
263 if (new_len == 0) { // empty slice
264 return result;
265 }
266
267 result->reserve(new_len);
268 DCHECK(result->slab_);
269 // Faster than append() in a loop
270 memcpy(result->slab_->items_, slab_->items_ + begin, new_len * sizeof(T));
271 result->len_ = new_len;
272
273 return result;
274}
275
276// Ensure that there's space for a number of items
277template <typename T>
278void List<T>::reserve(int num_desired) {
279 // log("reserve capacity = %d, n = %d", capacity_, n);
280
281 // Don't do anything if there's already enough space.
282 if (capacity_ >= num_desired) {
283 return;
284 }
285
286 // Slabs should be a total of 2^N bytes. kCapacityAdjust is the number of
287 // items that the 8 byte header takes up: 1 for List<T*>, and 2 for
288 // List<int>.
289 //
290 // Example: the user reserves space for 3 integers. The minimum number of
291 // items would be 5, which is rounded up to 8. Subtract 2 again, giving 6,
292 // which leads to 8 + 6*4 = 32 byte Slab.
293
294 capacity_ = HowManyItems(num_desired);
295 auto new_slab = NewSlab<T>(capacity_);
296
297 if (len_ > 0) {
298 // log("Copying %d bytes", len_ * sizeof(T));
299 memcpy(new_slab->items_, slab_->items_, len_ * sizeof(T));
300 }
301 slab_ = new_slab;
302}
303
304// Implements L[i] = item
305template <typename T>
306void List<T>::set(int i, T item) {
307 if (i < 0) {
308 i = len_ + i;
309 }
310
311 if (0 > i || i >= len_) {
312 throw Alloc<IndexError>();
313 }
314
315 slab_->items_[i] = item;
316}
317
318// Implements L[i]
319template <typename T>
320T List<T>::at(int i) {
321 if (i < 0) {
322 i = len_ + i;
323 }
324
325 if (0 > i || i >= len_) {
326 throw Alloc<IndexError>();
327 }
328 return slab_->items_[i];
329}
330
331// L.index(i) -- Python method
332template <typename T>
333int List<T>::index(T value) {
334 int element_count = len(this);
335 for (int i = 0; i < element_count; i++) {
336 if (items_equal(slab_->items_[i], value)) {
337 return i;
338 }
339 }
340 throw Alloc<ValueError>();
341}
342
343// Should we have a separate API that doesn't return it?
344// https://stackoverflow.com/questions/12600330/pop-back-return-value
345template <typename T>
346T List<T>::pop() {
347 if (len_ == 0) {
348 throw Alloc<IndexError>();
349 }
350 len_--;
351 T result = slab_->items_[len_];
352 slab_->items_[len_] = 0; // zero for GC scan
353 return result;
354}
355
356// Used in osh/word_parse.py to remove from front
357template <typename T>
358T List<T>::pop(int i) {
359 if (len_ < i) {
360 throw Alloc<IndexError>();
361 }
362
363 T result = at(i);
364 len_--;
365
366 // Shift everything by one
367 memmove(slab_->items_ + i, slab_->items_ + (i + 1), (len_ - i) * sizeof(T));
368
369 /*
370 for (int j = 0; j < len_; j++) {
371 slab_->items_[j] = slab_->items_[j+1];
372 }
373 */
374
375 slab_->items_[len_] = 0; // zero for GC scan
376 return result;
377}
378
379template <typename T>
380void List<T>::remove(T x) {
381 int idx = this->index(x);
382 this->pop(idx); // unused
383}
384
385template <typename T>
386void List<T>::clear() {
387 if (slab_) {
388 memset(slab_->items_, 0, len_ * sizeof(T)); // zero for GC scan
389 }
390 len_ = 0;
391}
392
393// used by ASDL
394template <typename T>
395void List<T>::SetTaken() {
396 slab_ = nullptr;
397 len_ = 0;
398 capacity_ = 0;
399}
400
401// Used in osh/string_ops.py
402template <typename T>
403void List<T>::reverse() {
404 for (int i = 0; i < len_ / 2; ++i) {
405 // log("swapping %d and %d", i, n-i);
406 T tmp = slab_->items_[i];
407 int j = len_ - 1 - i;
408 slab_->items_[i] = slab_->items_[j];
409 slab_->items_[j] = tmp;
410 }
411}
412
413// Extend this list with multiple elements.
414template <typename T>
415void List<T>::extend(List<T>* other) {
416 int n = other->len_;
417 int new_len = len_ + n;
418 reserve(new_len);
419
420 for (int i = 0; i < n; ++i) {
421 slab_->items_[len_ + i] = other->slab_->items_[i];
422 }
423 len_ = new_len;
424}
425
426inline bool CompareBigStr(BigStr* a, BigStr* b) {
427 return mylib::str_cmp(a, b) < 0;
428}
429
430template <>
431inline void List<BigStr*>::sort() {
432 if (slab_) {
433 std::sort(slab_->items_, slab_->items_ + len_, CompareBigStr);
434 }
435}
436
437inline bool CompareBigInt(mops::BigInt a, mops::BigInt b) {
438 return a < b;
439}
440
441template <>
442inline void List<mops::BigInt>::sort() {
443 std::sort(slab_->items_, slab_->items_ + len_, CompareBigInt);
444}
445
446// TODO: mycpp can just generate the constructor instead?
447// e.g. [None] * 3
448template <typename T>
449List<T>* list_repeat(T item, int times) {
450 return NewList<T>(item, times);
451}
452
453// e.g. 'a' in ['a', 'b', 'c']
454template <typename T>
455inline bool list_contains(List<T>* haystack, T needle) {
456 int n = len(haystack);
457 for (int i = 0; i < n; ++i) {
458 if (items_equal(haystack->at(i), needle)) {
459 return true;
460 }
461 }
462 return false;
463}
464
465template <typename V>
466List<BigStr*>* sorted(Dict<BigStr*, V>* d) {
467 auto keys = d->keys();
468 keys->sort();
469 return keys;
470}
471
472template <typename T>
473List<T>* sorted(List<T>* l) {
474 auto ret = list(l);
475 ret->sort();
476 return ret;
477}
478
479// list(L) copies the list
480template <typename T>
481List<T>* list(List<T>* other) {
482 auto result = NewList<T>();
483 result->extend(other);
484 return result;
485}
486
487template <class T>
488class ListIter {
489 public:
490 explicit ListIter(List<T>* L) : L_(L), i_(0) {
491 // Cheney only: L_ could be moved during iteration.
492 // gHeap.PushRoot(reinterpret_cast<RawObject**>(&L_));
493 }
494
495 ~ListIter() {
496 // gHeap.PopRoot();
497 }
498 void Next() {
499 i_++;
500 }
501 bool Done() {
502 // "unsigned size_t was a mistake"
503 return i_ >= static_cast<int>(L_->len_);
504 }
505 T Value() {
506 return L_->slab_->items_[i_];
507 }
508 T iterNext() {
509 if (Done()) {
510 throw Alloc<StopIteration>();
511 }
512 T ret = L_->slab_->items_[i_];
513 Next();
514 return ret;
515 }
516
517 // only for use with generators
518 List<T>* GetList() {
519 return L_;
520 }
521
522 private:
523 List<T>* L_;
524 int i_;
525};
526
527// list(it) returns the iterator's backing list
528template <typename T>
529List<T>* list(ListIter<T> it) {
530 return list(it.GetList());
531}
532
533// TODO: Does using pointers rather than indices make this more efficient?
534template <class T>
535class ReverseListIter {
536 public:
537 explicit ReverseListIter(List<T>* L) : L_(L), i_(L_->len_ - 1) {
538 }
539 void Next() {
540 i_--;
541 }
542 bool Done() {
543 return i_ < 0;
544 }
545 T Value() {
546 return L_->slab_->items_[i_];
547 }
548
549 private:
550 List<T>* L_;
551 int i_;
552};
553
554int max(List<int>* elems);
555
556#endif // MYCPP_GC_LIST_H