OILS / mycpp / gc_list.h View on Github | oilshell.org

540 lines, 295 significant
1#ifndef MYCPP_GC_LIST_H
2#define MYCPP_GC_LIST_H
3
4#include <string.h> // memcpy
5
6#include <algorithm> // sort() is templated
7
8#include "mycpp/common.h" // DCHECK
9#include "mycpp/comparators.h"
10#include "mycpp/gc_alloc.h" // Alloc
11#include "mycpp/gc_builtins.h" // ValueError
12#include "mycpp/gc_mops.h" // BigInt
13#include "mycpp/gc_slab.h"
14
15// GlobalList is layout-compatible with List (unit tests assert this), and it
16// can be a true C global (incurs zero startup time)
17
18template <typename T, int N>
19class GlobalList {
20 public:
21 int len_;
22 int capacity_;
23 GlobalSlab<T, N>* slab_;
24};
25
26#define GLOBAL_LIST(name, T, N, array) \
27 GcGlobal<GlobalSlab<T, N>> _slab_##name = {ObjHeader::Global(TypeTag::Slab), \
28 {.items_ = array}}; \
29 GcGlobal<GlobalList<T, N>> _list_##name = { \
30 ObjHeader::Global(TypeTag::List), \
31 {.len_ = N, .capacity_ = N, .slab_ = &_slab_##name.obj}}; \
32 List<T>* name = reinterpret_cast<List<T>*>(&_list_##name.obj);
33
34template <typename T>
35class List {
36 public:
37 List() : len_(0), capacity_(0), slab_(nullptr) {
38 }
39
40 protected:
41 // Used for ASDL subtypes with <. NOT even a shallow copy - it ALIASES thes
42 // slab.
43 explicit List(List* other)
44 : len_(other->len_), capacity_(other->capacity_), slab_(other->slab_) {
45 }
46
47 public:
48 // Implements L[i]
49 T at(int i);
50
51 // returns index of the element
52 int index(T element);
53
54 // Implements L[i] = item
55 void set(int i, T item);
56
57 // L[begin:]
58 List* slice(int begin);
59
60 // L[begin:end]
61 List* slice(int begin, int end);
62
63 // Should we have a separate API that doesn't return it?
64 // https://stackoverflow.com/questions/12600330/pop-back-return-value
65 T pop();
66
67 // Used in osh/word_parse.py to remove from front
68 T pop(int i);
69
70 // Remove the first occourence of x from the list.
71 void remove(T x);
72
73 void clear();
74
75 // Used in osh/string_ops.py
76 void reverse();
77
78 // Templated function
79 void sort();
80
81 // Ensure that there's space for at LEAST this many items
82 void reserve(int num_desired);
83
84 // Append a single element to this list.
85 void append(T item);
86
87 // Extend this list with multiple elements.
88 void extend(List<T>* other);
89
90 static constexpr ObjHeader obj_header() {
91 return ObjHeader::ClassFixed(field_mask(), sizeof(List<T>));
92 }
93
94 // Used by ASDL
95 void SetTaken();
96
97 int len_; // number of entries
98 int capacity_; // max entries before resizing
99
100 // The container may be resized, so this field isn't in-line.
101 Slab<T>* slab_;
102
103 // A list has one Slab pointer which we need to follow.
104 static constexpr uint32_t field_mask() {
105 return maskbit(offsetof(List, slab_));
106 }
107
108 DISALLOW_COPY_AND_ASSIGN(List)
109
110 static_assert(sizeof(ObjHeader) % sizeof(T) == 0,
111 "ObjHeader size should be multiple of item size");
112 static constexpr int kHeaderFudge = sizeof(ObjHeader) / sizeof(T);
113
114#if 0
115 // 24-byte pool comes from very common List header, and Token
116 static constexpr int kPoolBytes1 = 24 - sizeof(ObjHeader);
117 static_assert(kPoolBytes1 % sizeof(T) == 0,
118 "An integral number of items should fit in first pool");
119 static constexpr int kNumItems1 = kPoolBytes1 / sizeof(T);
120#endif
121
122 // Matches mark_sweep_heap.h
123 static constexpr int kPoolBytes2 = 48 - sizeof(ObjHeader);
124 static_assert(kPoolBytes2 % sizeof(T) == 0,
125 "An integral number of items should fit in second pool");
126 static constexpr int kNumItems2 = kPoolBytes2 / sizeof(T);
127
128#if 0
129 static constexpr int kMinBytes2 = 128 - sizeof(ObjHeader);
130 static_assert(kMinBytes2 % sizeof(T) == 0,
131 "An integral number of items should fit");
132 static constexpr int kMinItems2 = kMinBytes2 / sizeof(T);
133#endif
134
135 // Given the number of items desired, return the number items we should
136 // reserve room for, according to our growth policy.
137 int HowManyItems(int num_desired) {
138 // Using the 24-byte pool leads to too much GC of tiny slab objects! So
139 // just use the larger 48 byte pool.
140#if 0
141 if (num_desired <= kNumItems1) { // use full cell in pool 1
142 return kNumItems1;
143 }
144#endif
145 if (num_desired <= kNumItems2) { // use full cell in pool 2
146 return kNumItems2;
147 }
148#if 0
149 if (num_desired <= kMinItems2) { // 48 -> 128, not 48 -> 64
150 return kMinItems2;
151 }
152#endif
153
154 // Make sure the total allocation is a power of 2. TODO: consider using
155 // slightly less than power of 2, to account for malloc() headers, and
156 // reduce fragmentation.
157 // Example:
158 // - ask for 11 integers
159 // - round up 11+2 == 13 up to 16 items
160 // - return 14 items
161 // - 14 integers is 56 bytes, plus 8 byte GC header => 64 byte alloc.
162 return RoundUp(num_desired + kHeaderFudge) - kHeaderFudge;
163 }
164};
165
166// "Constructors" as free functions since we can't allocate within a
167// constructor. Allocation may cause garbage collection, which interferes with
168// placement new.
169
170// This is not really necessary, only syntactic sugar.
171template <typename T>
172List<T>* NewList() {
173 return Alloc<List<T>>();
174}
175
176// Literal ['foo', 'bar']
177// This seems to allow better template argument type deduction than a
178// constructor.
179template <typename T>
180List<T>* NewList(std::initializer_list<T> init) {
181 auto self = Alloc<List<T>>();
182
183 int n = init.size();
184 self->reserve(n);
185
186 int i = 0;
187 for (auto item : init) {
188 self->slab_->items_[i] = item;
189 ++i;
190 }
191 self->len_ = n;
192 return self;
193}
194
195// ['foo'] * 3
196template <typename T>
197List<T>* NewList(T item, int times) {
198 auto self = Alloc<List<T>>();
199
200 self->reserve(times);
201 self->len_ = times;
202 for (int i = 0; i < times; ++i) {
203 self->set(i, item);
204 }
205 return self;
206}
207
208template <typename T>
209void List<T>::append(T item) {
210 reserve(len_ + 1);
211 slab_->items_[len_] = item;
212 ++len_;
213}
214
215template <typename T>
216int len(const List<T>* L) {
217 return L->len_;
218}
219
220template <typename T>
221List<T>* list_repeat(T item, int times);
222
223template <typename T>
224inline bool list_contains(List<T>* haystack, T needle);
225
226template <typename K, typename V>
227class Dict; // forward decl
228
229template <typename V>
230List<BigStr*>* sorted(Dict<BigStr*, V>* d);
231
232template <typename T>
233List<T>* sorted(List<T>* l);
234
235// L[begin:]
236template <typename T>
237List<T>* List<T>::slice(int begin) {
238 return slice(begin, len_);
239}
240
241// L[begin:end]
242template <typename T>
243List<T>* List<T>::slice(int begin, int end) {
244 SLICE_ADJUST(begin, end, len_);
245
246 DCHECK(0 <= begin && begin <= len_);
247 DCHECK(0 <= end && end <= len_);
248
249 int new_len = end - begin;
250 DCHECK(0 <= new_len && new_len <= len_);
251
252 List<T>* result = NewList<T>();
253 result->reserve(new_len);
254
255 // Faster than append() in a loop
256 memcpy(result->slab_->items_, slab_->items_ + begin, new_len * sizeof(T));
257 result->len_ = new_len;
258
259 return result;
260}
261
262// Ensure that there's space for a number of items
263template <typename T>
264void List<T>::reserve(int num_desired) {
265 // log("reserve capacity = %d, n = %d", capacity_, n);
266
267 // Don't do anything if there's already enough space.
268 if (capacity_ >= num_desired) {
269 return;
270 }
271
272 // Slabs should be a total of 2^N bytes. kCapacityAdjust is the number of
273 // items that the 8 byte header takes up: 1 for List<T*>, and 2 for
274 // List<int>.
275 //
276 // Example: the user reserves space for 3 integers. The minimum number of
277 // items would be 5, which is rounded up to 8. Subtract 2 again, giving 6,
278 // which leads to 8 + 6*4 = 32 byte Slab.
279
280 capacity_ = HowManyItems(num_desired);
281 auto new_slab = NewSlab<T>(capacity_);
282
283 if (len_ > 0) {
284 // log("Copying %d bytes", len_ * sizeof(T));
285 memcpy(new_slab->items_, slab_->items_, len_ * sizeof(T));
286 }
287 slab_ = new_slab;
288}
289
290// Implements L[i] = item
291template <typename T>
292void List<T>::set(int i, T item) {
293 if (i < 0) {
294 i = len_ + i;
295 }
296
297 if (0 > i || i >= len_) {
298 throw Alloc<IndexError>();
299 }
300
301 slab_->items_[i] = item;
302}
303
304// Implements L[i]
305template <typename T>
306T List<T>::at(int i) {
307 if (i < 0) {
308 i = len_ + i;
309 }
310
311 if (0 > i || i >= len_) {
312 throw Alloc<IndexError>();
313 }
314 return slab_->items_[i];
315}
316
317// L.index(i) -- Python method
318template <typename T>
319int List<T>::index(T value) {
320 int element_count = len(this);
321 for (int i = 0; i < element_count; i++) {
322 if (items_equal(slab_->items_[i], value)) {
323 return i;
324 }
325 }
326 throw Alloc<ValueError>();
327}
328
329// Should we have a separate API that doesn't return it?
330// https://stackoverflow.com/questions/12600330/pop-back-return-value
331template <typename T>
332T List<T>::pop() {
333 if (len_ == 0) {
334 throw Alloc<IndexError>();
335 }
336 len_--;
337 T result = slab_->items_[len_];
338 slab_->items_[len_] = 0; // zero for GC scan
339 return result;
340}
341
342// Used in osh/word_parse.py to remove from front
343template <typename T>
344T List<T>::pop(int i) {
345 if (len_ < i) {
346 throw Alloc<IndexError>();
347 }
348
349 T result = at(i);
350 len_--;
351
352 // Shift everything by one
353 memmove(slab_->items_ + i, slab_->items_ + (i + 1), (len_ - i) * sizeof(T));
354
355 /*
356 for (int j = 0; j < len_; j++) {
357 slab_->items_[j] = slab_->items_[j+1];
358 }
359 */
360
361 slab_->items_[len_] = 0; // zero for GC scan
362 return result;
363}
364
365template <typename T>
366void List<T>::remove(T x) {
367 int idx = this->index(x);
368 this->pop(idx); // unused
369}
370
371template <typename T>
372void List<T>::clear() {
373 if (slab_) {
374 memset(slab_->items_, 0, len_ * sizeof(T)); // zero for GC scan
375 }
376 len_ = 0;
377}
378
379// used by ASDL
380template <typename T>
381void List<T>::SetTaken() {
382 slab_ = nullptr;
383 len_ = 0;
384 capacity_ = 0;
385}
386
387// Used in osh/string_ops.py
388template <typename T>
389void List<T>::reverse() {
390 for (int i = 0; i < len_ / 2; ++i) {
391 // log("swapping %d and %d", i, n-i);
392 T tmp = slab_->items_[i];
393 int j = len_ - 1 - i;
394 slab_->items_[i] = slab_->items_[j];
395 slab_->items_[j] = tmp;
396 }
397}
398
399// Extend this list with multiple elements.
400template <typename T>
401void List<T>::extend(List<T>* other) {
402 int n = other->len_;
403 int new_len = len_ + n;
404 reserve(new_len);
405
406 for (int i = 0; i < n; ++i) {
407 slab_->items_[len_ + i] = other->slab_->items_[i];
408 }
409 len_ = new_len;
410}
411
412inline bool CompareBigStr(BigStr* a, BigStr* b) {
413 return mylib::str_cmp(a, b) < 0;
414}
415
416template <>
417inline void List<BigStr*>::sort() {
418 std::sort(slab_->items_, slab_->items_ + len_, CompareBigStr);
419}
420
421inline bool CompareBigInt(mops::BigInt a, mops::BigInt b) {
422 return a < b;
423}
424
425template <>
426inline void List<mops::BigInt>::sort() {
427 std::sort(slab_->items_, slab_->items_ + len_, CompareBigInt);
428}
429
430// TODO: mycpp can just generate the constructor instead?
431// e.g. [None] * 3
432template <typename T>
433List<T>* list_repeat(T item, int times) {
434 return NewList<T>(item, times);
435}
436
437// e.g. 'a' in ['a', 'b', 'c']
438template <typename T>
439inline bool list_contains(List<T>* haystack, T needle) {
440 int n = len(haystack);
441 for (int i = 0; i < n; ++i) {
442 if (items_equal(haystack->at(i), needle)) {
443 return true;
444 }
445 }
446 return false;
447}
448
449template <typename V>
450List<BigStr*>* sorted(Dict<BigStr*, V>* d) {
451 auto keys = d->keys();
452 keys->sort();
453 return keys;
454}
455
456template <typename T>
457List<T>* sorted(List<T>* l) {
458 auto ret = list(l);
459 ret->sort();
460 return ret;
461}
462
463// list(L) copies the list
464template <typename T>
465List<T>* list(List<T>* other) {
466 auto result = NewList<T>();
467 result->extend(other);
468 return result;
469}
470
471template <class T>
472class ListIter {
473 public:
474 explicit ListIter(List<T>* L) : L_(L), i_(0) {
475 // Cheney only: L_ could be moved during iteration.
476 // gHeap.PushRoot(reinterpret_cast<RawObject**>(&L_));
477 }
478
479 ~ListIter() {
480 // gHeap.PopRoot();
481 }
482 void Next() {
483 i_++;
484 }
485 bool Done() {
486 // "unsigned size_t was a mistake"
487 return i_ >= static_cast<int>(L_->len_);
488 }
489 T Value() {
490 return L_->slab_->items_[i_];
491 }
492 T iterNext() {
493 if (Done()) {
494 throw Alloc<StopIteration>();
495 }
496 T ret = L_->slab_->items_[i_];
497 Next();
498 return ret;
499 }
500
501 // only for use with generators
502 List<T>* GetList() {
503 return L_;
504 }
505
506 private:
507 List<T>* L_;
508 int i_;
509};
510
511// list(it) returns the iterator's backing list
512template <typename T>
513List<T>* list(ListIter<T> it) {
514 return list(it.GetList());
515}
516
517// TODO: Does using pointers rather than indices make this more efficient?
518template <class T>
519class ReverseListIter {
520 public:
521 explicit ReverseListIter(List<T>* L) : L_(L), i_(L_->len_ - 1) {
522 }
523 void Next() {
524 i_--;
525 }
526 bool Done() {
527 return i_ < 0;
528 }
529 T Value() {
530 return L_->slab_->items_[i_];
531 }
532
533 private:
534 List<T>* L_;
535 int i_;
536};
537
538int max(List<int>* elems);
539
540#endif // MYCPP_GC_LIST_H