OILS / mycpp / gc_list.h View on Github | oils.pub

558 lines, 306 significant
1#ifndef MYCPP_GC_LIST_H
2#define MYCPP_GC_LIST_H
3
4#include <string.h> // memcpy
5
6#include <algorithm> // sort() is templated
7
8#include "mycpp/common.h" // DCHECK
9#include "mycpp/comparators.h"
10#include "mycpp/gc_alloc.h" // Alloc
11#include "mycpp/gc_builtins.h" // ValueError
12#include "mycpp/gc_mops.h" // BigInt
13#include "mycpp/gc_slab.h"
14
15// GlobalList is layout-compatible with List (unit tests assert this), and it
16// can be a true C global (incurs zero startup time)
17
18template <typename T, int N>
19class GlobalList {
20 public:
21 int len_;
22 int capacity_;
23 GlobalSlab<T, N>* slab_;
24};
25
26#define GLOBAL_LIST(name, T, N, array) \
27 GcGlobal<GlobalSlab<T, N>> _slab_##name = {ObjHeader::Global(TypeTag::Slab), \
28 {.items_ = array}}; \
29 GcGlobal<GlobalList<T, N>> _list_##name = { \
30 ObjHeader::Global(TypeTag::List), \
31 {.len_ = N, .capacity_ = N, .slab_ = &_slab_##name.obj}}; \
32 List<T>* name = reinterpret_cast<List<T>*>(&_list_##name.obj);
33
34template <typename T>
35class List {
36 public:
37 List() : len_(0), capacity_(0), slab_(nullptr) {
38 }
39
40#if 0
41 // Walkable::type_id()
42 int type_id() const {
43 // Only walk when it it's List<T*>
44 if (std::is_pointer<T>()) {
45 return TypeTag::List;
46 } else {
47 return kDoNotWalk; // opaque slab doesn't need to be walked
48 }
49 }
50#endif
51
52 protected:
53 // Used for ASDL subtypes with <. NOT even a shallow copy - it ALIASES thes
54 // slab.
55 explicit List(List* other)
56 : len_(other->len_), capacity_(other->capacity_), slab_(other->slab_) {
57 }
58
59 public:
60 // Implements L[i]
61 T at(int i);
62
63 // returns index of the element
64 int index(T element);
65
66 // Implements L[i] = item
67 void set(int i, T item);
68
69 // L[begin:]
70 List* slice(int begin);
71
72 // L[begin:end]
73 List* slice(int begin, int end);
74
75 // Should we have a separate API that doesn't return it?
76 // https://stackoverflow.com/questions/12600330/pop-back-return-value
77 T pop();
78
79 // Used in osh/word_parse.py to remove from front
80 T pop(int i);
81
82 // Remove the first occurence of x from the list.
83 void remove(T x);
84
85 void clear();
86
87 // Used in osh/string_ops.py
88 void reverse();
89
90 // Templated function
91 void sort();
92
93 // Ensure that there's space for at LEAST this many items
94 void reserve(int num_desired);
95
96 // Append a single element to this list.
97 void append(T item);
98
99 // Extend this list with multiple elements.
100 void extend(List<T>* other);
101
102 static constexpr ObjHeader obj_header() {
103 return ObjHeader::ClassFixed(field_mask(), sizeof(List<T>));
104 }
105
106 // Used by ASDL
107 void SetTaken();
108
109 int len_; // number of entries
110 int capacity_; // max entries before resizing
111
112 // The container may be resized, so this field isn't in-line.
113 Slab<T>* slab_;
114
115 // A list has one Slab pointer which we need to follow.
116 static constexpr uint32_t field_mask() {
117 return maskbit(offsetof(List, slab_));
118 }
119
120 DISALLOW_COPY_AND_ASSIGN(List)
121
122 static_assert(sizeof(ObjHeader) % sizeof(T) == 0,
123 "ObjHeader size should be multiple of item size");
124 static constexpr int kHeaderFudge = sizeof(ObjHeader) / sizeof(T);
125
126#if 0
127 // 24-byte pool comes from very common List header, and Token
128 static constexpr int kPoolBytes1 = 24 - sizeof(ObjHeader);
129 static_assert(kPoolBytes1 % sizeof(T) == 0,
130 "An integral number of items should fit in first pool");
131 static constexpr int kNumItems1 = kPoolBytes1 / sizeof(T);
132#endif
133
134 // Matches mark_sweep_heap.h
135 static constexpr int kPoolBytes2 = 48 - sizeof(ObjHeader);
136 static_assert(kPoolBytes2 % sizeof(T) == 0,
137 "An integral number of items should fit in second pool");
138 static constexpr int kNumItems2 = kPoolBytes2 / sizeof(T);
139
140#if 0
141 static constexpr int kMinBytes2 = 128 - sizeof(ObjHeader);
142 static_assert(kMinBytes2 % sizeof(T) == 0,
143 "An integral number of items should fit");
144 static constexpr int kMinItems2 = kMinBytes2 / sizeof(T);
145#endif
146
147 // Given the number of items desired, return the number items we should
148 // reserve room for, according to our growth policy.
149 int HowManyItems(int num_desired) {
150 // Using the 24-byte pool leads to too much GC of tiny slab objects! So
151 // just use the larger 48 byte pool.
152#if 0
153 if (num_desired <= kNumItems1) { // use full cell in pool 1
154 return kNumItems1;
155 }
156#endif
157 if (num_desired <= kNumItems2) { // use full cell in pool 2
158 return kNumItems2;
159 }
160#if 0
161 if (num_desired <= kMinItems2) { // 48 -> 128, not 48 -> 64
162 return kMinItems2;
163 }
164#endif
165
166 // Make sure the total allocation is a power of 2. TODO: consider using
167 // slightly less than power of 2, to account for malloc() headers, and
168 // reduce fragmentation.
169 // Example:
170 // - ask for 11 integers
171 // - round up 11+2 == 13 up to 16 items
172 // - return 14 items
173 // - 14 integers is 56 bytes, plus 8 byte GC header => 64 byte alloc.
174 return RoundUp(num_desired + kHeaderFudge) - kHeaderFudge;
175 }
176};
177
178// "Constructors" as free functions since we can't allocate within a
179// constructor. Allocation may cause garbage collection, which interferes with
180// placement new.
181
182// This is not really necessary, only syntactic sugar.
183template <typename T>
184List<T>* NewList() {
185 return Alloc<List<T>>();
186}
187
188// Literal ['foo', 'bar']
189// This seems to allow better template argument type deduction than a
190// constructor.
191template <typename T>
192List<T>* NewList(std::initializer_list<T> init) {
193 auto self = Alloc<List<T>>();
194
195 int n = init.size();
196 self->reserve(n);
197
198 int i = 0;
199 for (auto item : init) {
200 self->slab_->items_[i] = item;
201 ++i;
202 }
203 self->len_ = n;
204 return self;
205}
206
207// ['foo'] * 3
208template <typename T>
209List<T>* NewList(T item, int times) {
210 auto self = Alloc<List<T>>();
211
212 self->reserve(times);
213 self->len_ = times;
214 for (int i = 0; i < times; ++i) {
215 self->set(i, item);
216 }
217 return self;
218}
219
220template <typename T>
221void List<T>::append(T item) {
222 reserve(len_ + 1);
223 slab_->items_[len_] = item;
224 ++len_;
225}
226
227template <typename T>
228int len(const List<T>* L) {
229 return L->len_;
230}
231
232template <typename T>
233List<T>* list_repeat(T item, int times);
234
235template <typename T>
236inline bool list_contains(List<T>* haystack, T needle);
237
238template <typename K, typename V>
239class Dict; // forward decl
240
241template <typename V>
242List<BigStr*>* sorted(Dict<BigStr*, V>* d);
243
244template <typename T>
245List<T>* sorted(List<T>* l);
246
247// L[begin:]
248template <typename T>
249List<T>* List<T>::slice(int begin) {
250 return slice(begin, len_);
251}
252
253// L[begin:end]
254template <typename T>
255List<T>* List<T>::slice(int begin, int end) {
256 SLICE_ADJUST(begin, end, len_);
257
258 DCHECK(0 <= begin && begin <= len_);
259 DCHECK(0 <= end && end <= len_);
260
261 int new_len = end - begin;
262 DCHECK(0 <= new_len && new_len <= len_);
263
264 List<T>* result = NewList<T>();
265 if (new_len == 0) { // empty slice
266 return result;
267 }
268
269 result->reserve(new_len);
270 DCHECK(result->slab_);
271 // Faster than append() in a loop
272 memcpy(result->slab_->items_, slab_->items_ + begin, new_len * sizeof(T));
273 result->len_ = new_len;
274
275 return result;
276}
277
278// Ensure that there's space for a number of items
279template <typename T>
280void List<T>::reserve(int num_desired) {
281 // log("reserve capacity = %d, n = %d", capacity_, n);
282
283 // Don't do anything if there's already enough space.
284 if (capacity_ >= num_desired) {
285 return;
286 }
287
288 // Slabs should be a total of 2^N bytes. kCapacityAdjust is the number of
289 // items that the 8 byte header takes up: 1 for List<T*>, and 2 for
290 // List<int>.
291 //
292 // Example: the user reserves space for 3 integers. The minimum number of
293 // items would be 5, which is rounded up to 8. Subtract 2 again, giving 6,
294 // which leads to 8 + 6*4 = 32 byte Slab.
295
296 capacity_ = HowManyItems(num_desired);
297 auto new_slab = NewSlab<T>(capacity_);
298
299 if (len_ > 0) {
300 // log("Copying %d bytes", len_ * sizeof(T));
301 memcpy(new_slab->items_, slab_->items_, len_ * sizeof(T));
302 }
303 slab_ = new_slab;
304}
305
306// Implements L[i] = item
307template <typename T>
308void List<T>::set(int i, T item) {
309 if (i < 0) {
310 i = len_ + i;
311 }
312
313 if (0 > i || i >= len_) {
314 throw Alloc<IndexError>();
315 }
316
317 slab_->items_[i] = item;
318}
319
320// Implements L[i]
321template <typename T>
322T List<T>::at(int i) {
323 if (i < 0) {
324 i = len_ + i;
325 }
326
327 if (0 > i || i >= len_) {
328 throw Alloc<IndexError>();
329 }
330 return slab_->items_[i];
331}
332
333// L.index(i) -- Python method
334template <typename T>
335int List<T>::index(T value) {
336 int element_count = len(this);
337 for (int i = 0; i < element_count; i++) {
338 if (items_equal(slab_->items_[i], value)) {
339 return i;
340 }
341 }
342 throw Alloc<ValueError>();
343}
344
345// Should we have a separate API that doesn't return it?
346// https://stackoverflow.com/questions/12600330/pop-back-return-value
347template <typename T>
348T List<T>::pop() {
349 if (len_ == 0) {
350 throw Alloc<IndexError>();
351 }
352 len_--;
353 T result = slab_->items_[len_];
354 slab_->items_[len_] = 0; // zero for GC scan
355 return result;
356}
357
358// Used in osh/word_parse.py to remove from front
359template <typename T>
360T List<T>::pop(int i) {
361 if (len_ < i) {
362 throw Alloc<IndexError>();
363 }
364
365 T result = at(i);
366 len_--;
367
368 // Shift everything by one
369 memmove(slab_->items_ + i, slab_->items_ + (i + 1), (len_ - i) * sizeof(T));
370
371 /*
372 for (int j = 0; j < len_; j++) {
373 slab_->items_[j] = slab_->items_[j+1];
374 }
375 */
376
377 slab_->items_[len_] = 0; // zero for GC scan
378 return result;
379}
380
381template <typename T>
382void List<T>::remove(T x) {
383 int idx = this->index(x);
384 this->pop(idx); // unused
385}
386
387template <typename T>
388void List<T>::clear() {
389 if (slab_) {
390 memset(slab_->items_, 0, len_ * sizeof(T)); // zero for GC scan
391 }
392 len_ = 0;
393}
394
395// used by ASDL
396template <typename T>
397void List<T>::SetTaken() {
398 slab_ = nullptr;
399 len_ = 0;
400 capacity_ = 0;
401}
402
403// Used in osh/string_ops.py
404template <typename T>
405void List<T>::reverse() {
406 for (int i = 0; i < len_ / 2; ++i) {
407 // log("swapping %d and %d", i, n-i);
408 T tmp = slab_->items_[i];
409 int j = len_ - 1 - i;
410 slab_->items_[i] = slab_->items_[j];
411 slab_->items_[j] = tmp;
412 }
413}
414
415// Extend this list with multiple elements.
416template <typename T>
417void List<T>::extend(List<T>* other) {
418 int n = other->len_;
419 int new_len = len_ + n;
420 reserve(new_len);
421
422 for (int i = 0; i < n; ++i) {
423 slab_->items_[len_ + i] = other->slab_->items_[i];
424 }
425 len_ = new_len;
426}
427
428inline bool CompareBigStr(BigStr* a, BigStr* b) {
429 return mylib::str_cmp(a, b) < 0;
430}
431
432template <>
433inline void List<BigStr*>::sort() {
434 if (slab_) {
435 std::sort(slab_->items_, slab_->items_ + len_, CompareBigStr);
436 }
437}
438
439inline bool CompareBigInt(mops::BigInt a, mops::BigInt b) {
440 return a < b;
441}
442
443template <>
444inline void List<mops::BigInt>::sort() {
445 std::sort(slab_->items_, slab_->items_ + len_, CompareBigInt);
446}
447
448// TODO: mycpp can just generate the constructor instead?
449// e.g. [None] * 3
450template <typename T>
451List<T>* list_repeat(T item, int times) {
452 return NewList<T>(item, times);
453}
454
455// e.g. 'a' in ['a', 'b', 'c']
456template <typename T>
457inline bool list_contains(List<T>* haystack, T needle) {
458 int n = len(haystack);
459 for (int i = 0; i < n; ++i) {
460 if (items_equal(haystack->at(i), needle)) {
461 return true;
462 }
463 }
464 return false;
465}
466
467template <typename V>
468List<BigStr*>* sorted(Dict<BigStr*, V>* d) {
469 auto keys = d->keys();
470 keys->sort();
471 return keys;
472}
473
474template <typename T>
475List<T>* sorted(List<T>* l) {
476 auto ret = list(l);
477 ret->sort();
478 return ret;
479}
480
481// list(L) copies the list
482template <typename T>
483List<T>* list(List<T>* other) {
484 auto result = NewList<T>();
485 result->extend(other);
486 return result;
487}
488
489template <class T>
490class ListIter {
491 public:
492 explicit ListIter(List<T>* L) : L_(L), i_(0) {
493 // Cheney only: L_ could be moved during iteration.
494 // gHeap.PushRoot(reinterpret_cast<RawObject**>(&L_));
495 }
496
497 ~ListIter() {
498 // gHeap.PopRoot();
499 }
500 void Next() {
501 i_++;
502 }
503 bool Done() {
504 // "unsigned size_t was a mistake"
505 return i_ >= static_cast<int>(L_->len_);
506 }
507 T Value() {
508 return L_->slab_->items_[i_];
509 }
510 T iterNext() {
511 if (Done()) {
512 throw Alloc<StopIteration>();
513 }
514 T ret = L_->slab_->items_[i_];
515 Next();
516 return ret;
517 }
518
519 // only for use with generators
520 List<T>* GetList() {
521 return L_;
522 }
523
524 private:
525 List<T>* L_;
526 int i_;
527};
528
529// list(it) returns the iterator's backing list
530template <typename T>
531List<T>* list(ListIter<T> it) {
532 return list(it.GetList());
533}
534
535// TODO: Does using pointers rather than indices make this more efficient?
536template <class T>
537class ReverseListIter {
538 public:
539 explicit ReverseListIter(List<T>* L) : L_(L), i_(L_->len_ - 1) {
540 }
541 void Next() {
542 i_--;
543 }
544 bool Done() {
545 return i_ < 0;
546 }
547 T Value() {
548 return L_->slab_->items_[i_];
549 }
550
551 private:
552 List<T>* L_;
553 int i_;
554};
555
556int max(List<int>* elems);
557
558#endif // MYCPP_GC_LIST_H