OILS / mycpp / mark_sweep_heap.h View on Github | oilshell.org

301 lines, 181 significant
1#ifndef MARKSWEEP_HEAP_H
2#define MARKSWEEP_HEAP_H
3
4#include <stdlib.h>
5
6#include <vector>
7
8#include "mycpp/common.h"
9#include "mycpp/gc_obj.h"
10
11class MarkSet {
12 public:
13 MarkSet() : bits_() {
14 }
15
16 // ReInit() must be called at the start of MarkObjects(). Allocate() should
17 // keep track of the maximum object ID.
18 void ReInit(int max_obj_id) {
19 // https://stackoverflow.com/questions/8848575/fastest-way-to-reset-every-value-of-stdvectorint-to-0
20 std::fill(bits_.begin(), bits_.end(), 0);
21 int max_byte_index = (max_obj_id >> 3) + 1; // round up
22 // log("ReInit max_byte_index %d", max_byte_index);
23 bits_.resize(max_byte_index);
24 }
25
26 // Called by MarkObjects()
27 void Mark(int obj_id) {
28 DCHECK(obj_id >= 0);
29 // log("obj id %d", obj_id);
30 DCHECK(!IsMarked(obj_id));
31 int byte_index = obj_id >> 3; // 8 bits per byte
32 int bit_index = obj_id & 0b111;
33 // log("byte_index %d %d", byte_index, bit_index);
34 bits_[byte_index] |= (1 << bit_index);
35 }
36
37 // Called by Sweep()
38 bool IsMarked(int obj_id) {
39 DCHECK(obj_id >= 0);
40 int byte_index = obj_id >> 3;
41 int bit_index = obj_id & 0b111;
42 return bits_[byte_index] & (1 << bit_index);
43 }
44
45 void Debug() {
46 int n = bits_.size();
47 dprintf(2, "[ ");
48 for (int i = 0; i < n; ++i) {
49 dprintf(2, "%02x ", bits_[i]);
50 }
51 dprintf(2, "] (%d bytes) \n", n);
52 dprintf(2, "[ ");
53 int num_bits = 0;
54 for (int i = 0; i < n; ++i) {
55 for (int j = 0; j < 8; ++j) {
56 int bit = (bits_[i] & (1 << j)) != 0;
57 dprintf(2, "%d", bit);
58 num_bits += bit;
59 }
60 }
61 dprintf(2, " ] (%d bits set)\n", num_bits);
62 }
63
64 std::vector<uint8_t> bits_; // bit vector indexed by obj_id
65};
66
67// A simple Pool allocator for allocating small objects. It maintains an ever
68// growing number of Blocks each consisting of a number of fixed size Cells.
69// Memory is handed out one Cell at a time.
70// Note: within the context of the Pool allocator we refer to object IDs as cell
71// IDs because in addition to identifying an object they're also used to index
72// into the Cell storage.
73template <int CellsPerBlock, size_t CellSize>
74class Pool {
75 public:
76 static constexpr size_t kMaxObjSize = CellSize;
77 static constexpr int kBlockSize = CellSize * CellsPerBlock;
78
79 Pool() = default;
80
81 void* Allocate(int* obj_id) {
82 num_allocated_++;
83
84 if (!free_list_) {
85 // Allocate a new Block and add every new Cell to the free list.
86 Block* block = static_cast<Block*>(malloc(sizeof(Block)));
87 blocks_.push_back(block);
88 bytes_allocated_ += kBlockSize;
89 num_free_ += CellsPerBlock;
90
91 // The starting cell_id for Cells in this block.
92 int cell_id = (blocks_.size() - 1) * CellsPerBlock;
93 for (Cell& cell : block->cells) {
94 FreeCell* free_cell = reinterpret_cast<FreeCell*>(cell);
95 free_cell->id = cell_id++;
96 free_cell->next = free_list_;
97 free_list_ = free_cell;
98 }
99 }
100
101 FreeCell* cell = free_list_;
102 free_list_ = free_list_->next;
103 num_free_--;
104 *obj_id = cell->id;
105 return cell;
106 }
107
108 void PrepareForGc() {
109 DCHECK(!gc_underway_);
110 gc_underway_ = true;
111 mark_set_.ReInit(blocks_.size() * CellsPerBlock);
112 }
113
114 bool IsMarked(int cell_id) {
115 DCHECK(gc_underway_);
116 return mark_set_.IsMarked(cell_id);
117 }
118
119 void Mark(int cell_id) {
120 DCHECK(gc_underway_);
121 mark_set_.Mark(cell_id);
122 }
123
124 void Sweep() {
125 DCHECK(gc_underway_);
126 // Iterate over every Cell linking the free ones into a new free list.
127 num_free_ = 0;
128 free_list_ = nullptr;
129 int cell_id = 0;
130 for (Block* block : blocks_) {
131 for (Cell& cell : block->cells) {
132 if (!mark_set_.IsMarked(cell_id)) {
133 num_free_++;
134 FreeCell* free_cell = reinterpret_cast<FreeCell*>(cell);
135 free_cell->id = cell_id;
136 free_cell->next = free_list_;
137 free_list_ = free_cell;
138 }
139 cell_id++;
140 }
141 }
142 gc_underway_ = false;
143 }
144
145 void Free() {
146 for (Block* block : blocks_) {
147 free(block);
148 }
149 blocks_.clear();
150 num_free_ = 0;
151 }
152
153 int num_allocated() {
154 return num_allocated_;
155 }
156
157 int64_t bytes_allocated() {
158 return bytes_allocated_;
159 }
160
161 int num_live() {
162#ifndef OPTIMIZED
163 int capacity = blocks_.size() * CellsPerBlock;
164 // log("Pool capacity = %d", capacity);
165 // log("Pool num_free_ = %d", num_free_);
166 DCHECK(num_free_ <= capacity);
167#endif
168 return blocks_.size() * CellsPerBlock - num_free_;
169 }
170
171 private:
172 using Cell = uint8_t[CellSize];
173
174 struct Block {
175 Cell cells[CellsPerBlock];
176 };
177
178 // Unused/free cells are tracked via a linked list of FreeCells. The FreeCells
179 // are stored in the unused Cells, so it takes no extra memory to track them.
180 struct FreeCell {
181 int id;
182 FreeCell* next;
183 };
184 static_assert(CellSize >= sizeof(FreeCell), "CellSize is too small");
185
186 // Whether a GC is underway, for asserting that calls are in order.
187 bool gc_underway_ = false;
188
189 FreeCell* free_list_ = nullptr;
190 int num_free_ = 0;
191 int num_allocated_ = 0;
192 int64_t bytes_allocated_ = 0;
193 std::vector<Block*> blocks_;
194 MarkSet mark_set_;
195
196 DISALLOW_COPY_AND_ASSIGN(Pool<CellsPerBlock COMMA CellSize>);
197};
198
199class MarkSweepHeap {
200 public:
201 // reserve 32 frames to start
202 MarkSweepHeap() {
203 }
204
205 void Init(); // use default threshold
206 void Init(int gc_threshold);
207
208 void PushRoot(RawObject** p) {
209 roots_.push_back(p);
210 }
211
212 void PopRoot() {
213 roots_.pop_back();
214 }
215
216 void RootGlobalVar(void* root) {
217 global_roots_.push_back(reinterpret_cast<RawObject*>(root));
218 }
219
220 void* Allocate(size_t num_bytes, int* obj_id, int* pool_id);
221
222#if 0
223 void* Reallocate(void* p, size_t num_bytes);
224#endif
225 int MaybeCollect();
226 int Collect();
227
228 void MaybeMarkAndPush(RawObject* obj);
229 void TraceChildren();
230
231 void Sweep();
232
233 void PrintStats(int fd); // public for testing
234
235 void CleanProcessExit(); // do one last GC, used in unit tests
236 void ProcessExit(); // main() lets OS clean up, except ASAN variant
237
238 int num_live() {
239 return num_live_
240#ifndef NO_POOL_ALLOC
241 + pool1_.num_live() + pool2_.num_live()
242#endif
243 ;
244 }
245
246 bool is_initialized_ = true; // mark/sweep doesn't need to be initialized
247
248 // Runtime params
249
250 // Threshold is a number of live objects, since we aren't keeping track of
251 // total bytes
252 int gc_threshold_;
253
254 // Show debug logging
255 bool gc_verbose_ = false;
256
257 // Current stats
258 int num_live_ = 0;
259 // Should we keep track of sizes?
260 // int64_t bytes_live_ = 0;
261
262 // Cumulative stats
263 int max_survived_ = 0; // max # live after a collection
264 int num_allocated_ = 0;
265 int64_t bytes_allocated_ = 0; // avoid overflow
266 int num_gc_points_ = 0; // manual collection points
267 int num_collections_ = 0;
268 int num_growths_;
269 double max_gc_millis_ = 0.0;
270 double total_gc_millis_ = 0.0;
271
272#ifndef NO_POOL_ALLOC
273 // 16,384 / 24 bytes = 682 cells (rounded), 16,368 bytes
274 // 16,384 / 48 bytes = 341 cells (rounded), 16,368 bytes
275 // Conveniently, the glibc malloc header is 16 bytes, giving exactly 16 Ki
276 // differences
277 Pool<682, 24> pool1_;
278 Pool<341, 48> pool2_;
279#endif
280
281 std::vector<RawObject**> roots_;
282 std::vector<RawObject*> global_roots_;
283
284 // Allocate() appends live objects, and Sweep() compacts it
285 std::vector<ObjHeader*> live_objs_;
286 // Allocate lazily frees these, and Sweep() replenishes it
287 std::vector<ObjHeader*> to_free_;
288
289 std::vector<ObjHeader*> gray_stack_;
290 MarkSet mark_set_;
291
292 int greatest_obj_id_ = 0;
293
294 private:
295 void FreeEverything();
296 void MaybePrintStats();
297
298 DISALLOW_COPY_AND_ASSIGN(MarkSweepHeap);
299};
300
301#endif // MARKSWEEP_HEAP_H