OILS / mycpp / gc_list.h View on Github | oilshell.org

521 lines, 285 significant
1#ifndef MYCPP_GC_LIST_H
2#define MYCPP_GC_LIST_H
3
4#include <string.h> // memcpy
5
6#include <algorithm> // sort() is templated
7
8#include "mycpp/common.h" // DCHECK
9#include "mycpp/comparators.h"
10#include "mycpp/gc_alloc.h" // Alloc
11#include "mycpp/gc_builtins.h" // ValueError
12#include "mycpp/gc_mops.h" // BigInt
13#include "mycpp/gc_slab.h"
14
15// GlobalList is layout-compatible with List (unit tests assert this), and it
16// can be a true C global (incurs zero startup time)
17
18template <typename T, int N>
19class GlobalList {
20 public:
21 int len_;
22 int capacity_;
23 GlobalSlab<T, N>* slab_;
24};
25
26#define GLOBAL_LIST(name, T, N, array) \
27 GcGlobal<GlobalSlab<T, N>> _slab_##name = {ObjHeader::Global(TypeTag::Slab), \
28 {.items_ = array}}; \
29 GcGlobal<GlobalList<T, N>> _list_##name = { \
30 ObjHeader::Global(TypeTag::List), \
31 {.len_ = N, .capacity_ = N, .slab_ = &_slab_##name.obj}}; \
32 List<T>* name = reinterpret_cast<List<T>*>(&_list_##name.obj);
33
34template <typename T>
35class List {
36 public:
37 List() : len_(0), capacity_(0), slab_(nullptr) {
38 }
39
40 // Implements L[i]
41 T at(int i);
42
43 // returns index of the element
44 int index(T element);
45
46 // Implements L[i] = item
47 void set(int i, T item);
48
49 // L[begin:]
50 List* slice(int begin);
51
52 // L[begin:end]
53 List* slice(int begin, int end);
54
55 // Should we have a separate API that doesn't return it?
56 // https://stackoverflow.com/questions/12600330/pop-back-return-value
57 T pop();
58
59 // Used in osh/word_parse.py to remove from front
60 T pop(int i);
61
62 // Remove the first occourence of x from the list.
63 void remove(T x);
64
65 void clear();
66
67 // Used in osh/string_ops.py
68 void reverse();
69
70 // Templated function
71 void sort();
72
73 // Ensure that there's space for at LEAST this many items
74 void reserve(int num_desired);
75
76 // Append a single element to this list.
77 void append(T item);
78
79 // Extend this list with multiple elements.
80 void extend(List<T>* other);
81
82 static constexpr ObjHeader obj_header() {
83 return ObjHeader::ClassFixed(field_mask(), sizeof(List<T>));
84 }
85
86 int len_; // number of entries
87 int capacity_; // max entries before resizing
88
89 // The container may be resized, so this field isn't in-line.
90 Slab<T>* slab_;
91
92 // A list has one Slab pointer which we need to follow.
93 static constexpr uint32_t field_mask() {
94 return maskbit(offsetof(List, slab_));
95 }
96
97 DISALLOW_COPY_AND_ASSIGN(List)
98
99 static_assert(sizeof(ObjHeader) % sizeof(T) == 0,
100 "ObjHeader size should be multiple of item size");
101 static constexpr int kHeaderFudge = sizeof(ObjHeader) / sizeof(T);
102
103#if 0
104 // 24-byte pool comes from very common List header, and Token
105 static constexpr int kPoolBytes1 = 24 - sizeof(ObjHeader);
106 static_assert(kPoolBytes1 % sizeof(T) == 0,
107 "An integral number of items should fit in first pool");
108 static constexpr int kNumItems1 = kPoolBytes1 / sizeof(T);
109#endif
110
111 // Matches mark_sweep_heap.h
112 static constexpr int kPoolBytes2 = 48 - sizeof(ObjHeader);
113 static_assert(kPoolBytes2 % sizeof(T) == 0,
114 "An integral number of items should fit in second pool");
115 static constexpr int kNumItems2 = kPoolBytes2 / sizeof(T);
116
117#if 0
118 static constexpr int kMinBytes2 = 128 - sizeof(ObjHeader);
119 static_assert(kMinBytes2 % sizeof(T) == 0,
120 "An integral number of items should fit");
121 static constexpr int kMinItems2 = kMinBytes2 / sizeof(T);
122#endif
123
124 // Given the number of items desired, return the number items we should
125 // reserve room for, according to our growth policy.
126 int HowManyItems(int num_desired) {
127 // Using the 24-byte pool leads to too much GC of tiny slab objects! So
128 // just use the larger 48 byte pool.
129#if 0
130 if (num_desired <= kNumItems1) { // use full cell in pool 1
131 return kNumItems1;
132 }
133#endif
134 if (num_desired <= kNumItems2) { // use full cell in pool 2
135 return kNumItems2;
136 }
137#if 0
138 if (num_desired <= kMinItems2) { // 48 -> 128, not 48 -> 64
139 return kMinItems2;
140 }
141#endif
142
143 // Make sure the total allocation is a power of 2. TODO: consider using
144 // slightly less than power of 2, to account for malloc() headers, and
145 // reduce fragmentation.
146 // Example:
147 // - ask for 11 integers
148 // - round up 11+2 == 13 up to 16 items
149 // - return 14 items
150 // - 14 integers is 56 bytes, plus 8 byte GC header => 64 byte alloc.
151 return RoundUp(num_desired + kHeaderFudge) - kHeaderFudge;
152 }
153};
154
155// "Constructors" as free functions since we can't allocate within a
156// constructor. Allocation may cause garbage collection, which interferes with
157// placement new.
158
159// This is not really necessary, only syntactic sugar.
160template <typename T>
161List<T>* NewList() {
162 return Alloc<List<T>>();
163}
164
165// Literal ['foo', 'bar']
166// This seems to allow better template argument type deduction than a
167// constructor.
168template <typename T>
169List<T>* NewList(std::initializer_list<T> init) {
170 auto self = Alloc<List<T>>();
171
172 int n = init.size();
173 self->reserve(n);
174
175 int i = 0;
176 for (auto item : init) {
177 self->slab_->items_[i] = item;
178 ++i;
179 }
180 self->len_ = n;
181 return self;
182}
183
184// ['foo'] * 3
185template <typename T>
186List<T>* NewList(T item, int times) {
187 auto self = Alloc<List<T>>();
188
189 self->reserve(times);
190 self->len_ = times;
191 for (int i = 0; i < times; ++i) {
192 self->set(i, item);
193 }
194 return self;
195}
196
197template <typename T>
198void List<T>::append(T item) {
199 reserve(len_ + 1);
200 slab_->items_[len_] = item;
201 ++len_;
202}
203
204template <typename T>
205int len(const List<T>* L) {
206 return L->len_;
207}
208
209template <typename T>
210List<T>* list_repeat(T item, int times);
211
212template <typename T>
213inline bool list_contains(List<T>* haystack, T needle);
214
215template <typename K, typename V>
216class Dict; // forward decl
217
218template <typename V>
219List<BigStr*>* sorted(Dict<BigStr*, V>* d);
220
221template <typename T>
222List<T>* sorted(List<T>* l);
223
224// L[begin:]
225template <typename T>
226List<T>* List<T>::slice(int begin) {
227 return slice(begin, len_);
228}
229
230// L[begin:end]
231template <typename T>
232List<T>* List<T>::slice(int begin, int end) {
233 SLICE_ADJUST(begin, end, len_);
234
235 DCHECK(0 <= begin && begin <= len_);
236 DCHECK(0 <= end && end <= len_);
237
238 int new_len = end - begin;
239 DCHECK(0 <= new_len && new_len <= len_);
240
241 List<T>* result = NewList<T>();
242 result->reserve(new_len);
243
244 // Faster than append() in a loop
245 memcpy(result->slab_->items_, slab_->items_ + begin, new_len * sizeof(T));
246 result->len_ = new_len;
247
248 return result;
249}
250
251// Ensure that there's space for a number of items
252template <typename T>
253void List<T>::reserve(int num_desired) {
254 // log("reserve capacity = %d, n = %d", capacity_, n);
255
256 // Don't do anything if there's already enough space.
257 if (capacity_ >= num_desired) {
258 return;
259 }
260
261 // Slabs should be a total of 2^N bytes. kCapacityAdjust is the number of
262 // items that the 8 byte header takes up: 1 for List<T*>, and 2 for
263 // List<int>.
264 //
265 // Example: the user reserves space for 3 integers. The minimum number of
266 // items would be 5, which is rounded up to 8. Subtract 2 again, giving 6,
267 // which leads to 8 + 6*4 = 32 byte Slab.
268
269 capacity_ = HowManyItems(num_desired);
270 auto new_slab = NewSlab<T>(capacity_);
271
272 if (len_ > 0) {
273 // log("Copying %d bytes", len_ * sizeof(T));
274 memcpy(new_slab->items_, slab_->items_, len_ * sizeof(T));
275 }
276 slab_ = new_slab;
277}
278
279// Implements L[i] = item
280template <typename T>
281void List<T>::set(int i, T item) {
282 if (i < 0) {
283 i = len_ + i;
284 }
285
286 if (0 > i || i >= len_) {
287 throw Alloc<IndexError>();
288 }
289
290 slab_->items_[i] = item;
291}
292
293// Implements L[i]
294template <typename T>
295T List<T>::at(int i) {
296 if (i < 0) {
297 i = len_ + i;
298 }
299
300 if (0 > i || i >= len_) {
301 throw Alloc<IndexError>();
302 }
303 return slab_->items_[i];
304}
305
306// L.index(i) -- Python method
307template <typename T>
308int List<T>::index(T value) {
309 int element_count = len(this);
310 for (int i = 0; i < element_count; i++) {
311 if (items_equal(slab_->items_[i], value)) {
312 return i;
313 }
314 }
315 throw Alloc<ValueError>();
316}
317
318// Should we have a separate API that doesn't return it?
319// https://stackoverflow.com/questions/12600330/pop-back-return-value
320template <typename T>
321T List<T>::pop() {
322 if (len_ == 0) {
323 throw Alloc<IndexError>();
324 }
325 len_--;
326 T result = slab_->items_[len_];
327 slab_->items_[len_] = 0; // zero for GC scan
328 return result;
329}
330
331// Used in osh/word_parse.py to remove from front
332template <typename T>
333T List<T>::pop(int i) {
334 if (len_ < i) {
335 throw Alloc<IndexError>();
336 }
337
338 T result = at(i);
339 len_--;
340
341 // Shift everything by one
342 memmove(slab_->items_ + i, slab_->items_ + (i + 1), (len_ - i) * sizeof(T));
343
344 /*
345 for (int j = 0; j < len_; j++) {
346 slab_->items_[j] = slab_->items_[j+1];
347 }
348 */
349
350 slab_->items_[len_] = 0; // zero for GC scan
351 return result;
352}
353
354template <typename T>
355void List<T>::remove(T x) {
356 int idx = this->index(x);
357 this->pop(idx); // unused
358}
359
360template <typename T>
361void List<T>::clear() {
362 if (slab_) {
363 memset(slab_->items_, 0, len_ * sizeof(T)); // zero for GC scan
364 }
365 len_ = 0;
366}
367
368// Used in osh/string_ops.py
369template <typename T>
370void List<T>::reverse() {
371 for (int i = 0; i < len_ / 2; ++i) {
372 // log("swapping %d and %d", i, n-i);
373 T tmp = slab_->items_[i];
374 int j = len_ - 1 - i;
375 slab_->items_[i] = slab_->items_[j];
376 slab_->items_[j] = tmp;
377 }
378}
379
380// Extend this list with multiple elements.
381template <typename T>
382void List<T>::extend(List<T>* other) {
383 int n = other->len_;
384 int new_len = len_ + n;
385 reserve(new_len);
386
387 for (int i = 0; i < n; ++i) {
388 slab_->items_[len_ + i] = other->slab_->items_[i];
389 }
390 len_ = new_len;
391}
392
393inline bool CompareBigStr(BigStr* a, BigStr* b) {
394 return mylib::str_cmp(a, b) < 0;
395}
396
397template <>
398inline void List<BigStr*>::sort() {
399 std::sort(slab_->items_, slab_->items_ + len_, CompareBigStr);
400}
401
402inline bool CompareBigInt(mops::BigInt a, mops::BigInt b) {
403 return a < b;
404}
405
406template <>
407inline void List<mops::BigInt>::sort() {
408 std::sort(slab_->items_, slab_->items_ + len_, CompareBigInt);
409}
410
411// TODO: mycpp can just generate the constructor instead?
412// e.g. [None] * 3
413template <typename T>
414List<T>* list_repeat(T item, int times) {
415 return NewList<T>(item, times);
416}
417
418// e.g. 'a' in ['a', 'b', 'c']
419template <typename T>
420inline bool list_contains(List<T>* haystack, T needle) {
421 int n = len(haystack);
422 for (int i = 0; i < n; ++i) {
423 if (items_equal(haystack->at(i), needle)) {
424 return true;
425 }
426 }
427 return false;
428}
429
430template <typename V>
431List<BigStr*>* sorted(Dict<BigStr*, V>* d) {
432 auto keys = d->keys();
433 keys->sort();
434 return keys;
435}
436
437template <typename T>
438List<T>* sorted(List<T>* l) {
439 auto ret = list(l);
440 ret->sort();
441 return ret;
442}
443
444// list(L) copies the list
445template <typename T>
446List<T>* list(List<T>* other) {
447 auto result = NewList<T>();
448 result->extend(other);
449 return result;
450}
451
452template <class T>
453class ListIter {
454 public:
455 explicit ListIter(List<T>* L) : L_(L), i_(0) {
456 // Cheney only: L_ could be moved during iteration.
457 // gHeap.PushRoot(reinterpret_cast<RawObject**>(&L_));
458 }
459
460 ~ListIter() {
461 // gHeap.PopRoot();
462 }
463 void Next() {
464 i_++;
465 }
466 bool Done() {
467 // "unsigned size_t was a mistake"
468 return i_ >= static_cast<int>(L_->len_);
469 }
470 T Value() {
471 return L_->slab_->items_[i_];
472 }
473 T iterNext() {
474 if (Done()) {
475 throw Alloc<StopIteration>();
476 }
477 T ret = L_->slab_->items_[i_];
478 Next();
479 return ret;
480 }
481
482 // only for use with generators
483 List<T>* GetList() {
484 return L_;
485 }
486
487 private:
488 List<T>* L_;
489 int i_;
490};
491
492// list(it) returns the iterator's backing list
493template <typename T>
494List<T>* list(ListIter<T> it) {
495 return list(it.GetList());
496}
497
498// TODO: Does using pointers rather than indices make this more efficient?
499template <class T>
500class ReverseListIter {
501 public:
502 explicit ReverseListIter(List<T>* L) : L_(L), i_(L_->len_ - 1) {
503 }
504 void Next() {
505 i_--;
506 }
507 bool Done() {
508 return i_ < 0;
509 }
510 T Value() {
511 return L_->slab_->items_[i_];
512 }
513
514 private:
515 List<T>* L_;
516 int i_;
517};
518
519int max(List<int>* elems);
520
521#endif // MYCPP_GC_LIST_H