examples

Coverage Report

Created: 2025-05-31 14:43

/home/uke/oil/mycpp/mark_sweep_heap.cc
Line
Count
Source (jump to first uncovered line)
1
#include "mycpp/mark_sweep_heap.h"
2
3
#include <inttypes.h>  // PRId64
4
#include <stdio.h>     // dprintf()
5
#include <stdlib.h>    // getenv()
6
#include <string.h>    // strlen()
7
#include <sys/time.h>  // gettimeofday()
8
#include <time.h>      // clock_gettime(), CLOCK_PROCESS_CPUTIME_ID
9
#include <unistd.h>    // STDERR_FILENO
10
11
#include "_build/detected-cpp-config.h"  // for GC_TIMING
12
#include "mycpp/gc_builtins.h"           // StringToInt()
13
#include "mycpp/gc_slab.h"
14
15
// TODO: Remove this guard when we have separate binaries
16
#if MARK_SWEEP
17
18
32
void MarkSweepHeap::Init() {
19
32
  Init(1000);  // collect at 1000 objects in tests
20
32
}
21
22
32
void MarkSweepHeap::Init(int gc_threshold) {
23
32
  gc_threshold_ = gc_threshold;
24
25
32
  char* e;
26
32
  e = getenv("OILS_GC_THRESHOLD");
27
32
  if (e) {
28
0
    int result;
29
0
    if (StringToInt(e, strlen(e), 10, &result)) {
30
      // Override collection threshold
31
0
      gc_threshold_ = result;
32
0
    }
33
0
  }
34
35
  // only for developers
36
32
  e = getenv("_OILS_GC_VERBOSE");
37
32
  if (e && strcmp(e, "1") == 0) {
38
0
    gc_verbose_ = true;
39
0
  }
40
41
32
  live_objs_.reserve(KiB(10));
42
32
  roots_.reserve(KiB(1));  // prevent resizing in common case
43
32
}
44
45
2.07k
int MarkSweepHeap::MaybeCollect() {
46
  // Maybe collect BEFORE allocation, because the new object won't be rooted
47
  #if GC_ALWAYS
48
  int result = Collect();
49
  #else
50
2.07k
  int result = -1;
51
2.07k
  if (num_live() > gc_threshold_) {
52
9
    result = Collect();
53
9
  }
54
2.07k
  #endif
55
56
2.07k
  num_gc_points_++;  // this is a manual collection point
57
2.07k
  return result;
58
2.07k
}
59
60
  #if defined(BUMP_SMALL)
61
    #include "mycpp/bump_leak_heap.h"
62
63
BumpLeakHeap gBumpLeak;
64
  #endif
65
66
// Allocate and update stats
67
// TODO: Make this interface nicer.
68
10.3k
void* MarkSweepHeap::Allocate(size_t num_bytes, int* obj_id, int* pool_id) {
69
  // log("Allocate %d", num_bytes);
70
10.3k
  #ifndef NO_POOL_ALLOC
71
10.3k
  if (num_bytes <= pool1_.kMaxObjSize) {
72
6.94k
    *pool_id = 1;
73
6.94k
    return pool1_.Allocate(obj_id);
74
6.94k
  }
75
3.41k
  if (num_bytes <= pool2_.kMaxObjSize) {
76
3.15k
    *pool_id = 2;
77
3.15k
    return pool2_.Allocate(obj_id);
78
3.15k
  }
79
261
  *pool_id = 0;  // malloc(), not a pool
80
261
  #endif
81
82
  // Does the pool allocator approximate a bump allocator?  Use pool2_
83
  // threshold of 48 bytes.
84
  // These only work with GC off -- OILS_GC_THRESHOLD=[big]
85
  #ifdef BUMP_SMALL
86
  if (num_bytes <= 48) {
87
    return gBumpLeak.Allocate(num_bytes);
88
  }
89
  #endif
90
91
261
  if (to_free_.empty()) {
92
    // Use higher object IDs
93
97
    *obj_id = greatest_obj_id_;
94
97
    greatest_obj_id_++;
95
96
    // This check is ON in release mode
97
97
    CHECK(greatest_obj_id_ <= kMaxObjId);
98
164
  } else {
99
164
    ObjHeader* dead = to_free_.back();
100
164
    to_free_.pop_back();
101
102
164
    *obj_id = dead->obj_id;  // reuse the dead object's ID
103
104
164
    free(dead);
105
164
  }
106
107
0
  void* result = malloc(num_bytes);
108
261
  DCHECK(result != nullptr);
109
110
0
  live_objs_.push_back(static_cast<ObjHeader*>(result));
111
112
261
  num_live_++;
113
261
  num_allocated_++;
114
261
  bytes_allocated_ += num_bytes;
115
116
261
  return result;
117
3.41k
}
118
119
  #if 0
120
void* MarkSweepHeap::Reallocate(void* p, size_t num_bytes) {
121
  FAIL(kNotImplemented);
122
  // This causes a double-free in the GC!
123
  // return realloc(p, num_bytes);
124
}
125
  #endif
126
127
// "Leaf" for marking / TraceChildren
128
//
129
// - Abort if nullptr
130
// - Find the header (get rid of this when remove ObjHeader member)
131
// - Tag::{Opaque,FixedSized,Scanned} have their mark bits set
132
// - Tag::{FixedSize,Scanned} are also pushed on the gray stack
133
134
2.10k
void MarkSweepHeap::MaybeMarkAndPush(RawObject* obj) {
135
2.10k
  ObjHeader* header = ObjHeader::FromObject(obj);
136
2.10k
  if (header->heap_tag == HeapTag::Global) {  // don't mark or push
137
314
    return;
138
314
  }
139
140
1.78k
  int obj_id = header->obj_id;
141
1.78k
  #ifndef NO_POOL_ALLOC
142
1.78k
  if (header->pool_id == 1) {
143
1.50k
    if (pool1_.IsMarked(obj_id)) {
144
117
      return;
145
117
    }
146
1.38k
    pool1_.Mark(obj_id);
147
1.38k
  } else if (header->pool_id == 2) {
148
266
    if (pool2_.IsMarked(obj_id)) {
149
56
      return;
150
56
    }
151
210
    pool2_.Mark(obj_id);
152
210
  } else
153
19
  #endif
154
19
  {
155
19
    if (mark_set_.IsMarked(obj_id)) {
156
0
      return;
157
0
    }
158
19
    mark_set_.Mark(obj_id);
159
19
  }
160
161
1.61k
  switch (header->heap_tag) {
162
203
  case HeapTag::Opaque:  // e.g. strings have no children
163
203
    break;
164
165
1.29k
  case HeapTag::Scanned:  // these 2 types have children
166
1.41k
  case HeapTag::FixedSize:
167
1.41k
    gray_stack_.push_back(header);  // Push the header, not the object!
168
1.41k
    break;
169
170
0
  default:
171
0
    FAIL(kShouldNotGetHere);
172
1.61k
  }
173
1.61k
}
174
175
41
void MarkSweepHeap::TraceChildren() {
176
1.45k
  while (!gray_stack_.empty()) {
177
1.41k
    ObjHeader* header = gray_stack_.back();
178
1.41k
    gray_stack_.pop_back();
179
180
1.41k
    switch (header->heap_tag) {
181
117
    case HeapTag::FixedSize: {
182
117
      auto fixed = reinterpret_cast<LayoutFixed*>(header->ObjectAddress());
183
117
      int mask = FIELD_MASK(*header);
184
185
2.92k
      for (int i = 0; i < kFieldMaskBits; ++i) {
186
2.80k
        if (mask & (1 << i)) {
187
139
          RawObject* child = fixed->children_[i];
188
139
          if (child) {
189
117
            MaybeMarkAndPush(child);
190
117
          }
191
139
        }
192
2.80k
      }
193
117
      break;
194
0
    }
195
196
1.29k
    case HeapTag::Scanned: {
197
1.29k
      auto slab = reinterpret_cast<Slab<RawObject*>*>(header->ObjectAddress());
198
199
1.29k
      int n = NUM_POINTERS(*header);
200
3.38k
      for (int i = 0; i < n; ++i) {
201
2.09k
        RawObject* child = slab->items_[i];
202
2.09k
        if (child) {
203
1.87k
          MaybeMarkAndPush(child);
204
1.87k
        }
205
2.09k
      }
206
1.29k
      break;
207
0
    }
208
0
    default:
209
      // Only FixedSize and Scanned are pushed
210
0
      FAIL(kShouldNotGetHere);
211
1.41k
    }
212
1.41k
  }
213
41
}
214
215
41
void MarkSweepHeap::Sweep() {
216
41
  #ifndef NO_POOL_ALLOC
217
41
  pool1_.Sweep();
218
41
  pool2_.Sweep();
219
41
  #endif
220
221
41
  int last_live_index = 0;
222
41
  int num_objs = live_objs_.size();
223
321
  for (int i = 0; i < num_objs; ++i) {
224
280
    ObjHeader* obj = live_objs_[i];
225
280
    DCHECK(obj);  // malloc() shouldn't have returned nullptr
226
227
0
    bool is_live = mark_set_.IsMarked(obj->obj_id);
228
229
    // Compact live_objs_ and populate to_free_.  Note: doing the reverse could
230
    // be more efficient when many objects are dead.
231
280
    if (is_live) {
232
19
      live_objs_[last_live_index++] = obj;
233
261
    } else {
234
261
      to_free_.push_back(obj);
235
      // free(obj);
236
261
      num_live_--;
237
261
    }
238
280
  }
239
41
  live_objs_.resize(last_live_index);  // remove dangling objects
240
241
41
  num_collections_++;
242
41
  max_survived_ = std::max(max_survived_, num_live());
243
41
}
244
245
41
int MarkSweepHeap::Collect() {
246
41
  #ifdef GC_TIMING
247
41
  struct timespec start, end;
248
41
  if (clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start) < 0) {
249
0
    FAIL("clock_gettime failed");
250
0
  }
251
0
  #endif
252
253
0
  int num_roots = roots_.size();
254
41
  int num_globals = global_roots_.size();
255
256
41
  if (gc_verbose_) {
257
0
    log("");
258
0
    log("%2d. GC with %d roots (%d global) and %d live objects",
259
0
        num_collections_, num_roots + num_globals, num_globals, num_live());
260
0
  }
261
262
  // Resize it
263
41
  mark_set_.ReInit(greatest_obj_id_);
264
41
  #ifndef NO_POOL_ALLOC
265
41
  pool1_.PrepareForGc();
266
41
  pool2_.PrepareForGc();
267
41
  #endif
268
269
  // Mark roots.
270
  // Note: It might be nice to get rid of double pointers
271
169
  for (int i = 0; i < num_roots; ++i) {
272
128
    RawObject* root = *(roots_[i]);
273
128
    if (root) {
274
102
      MaybeMarkAndPush(root);
275
102
    }
276
128
  }
277
278
49
  for (int i = 0; i < num_globals; ++i) {
279
8
    RawObject* root = global_roots_[i];
280
8
    if (root) {
281
8
      MaybeMarkAndPush(root);
282
8
    }
283
8
  }
284
285
  // Traverse object graph.
286
41
  TraceChildren();
287
288
41
  Sweep();
289
290
41
  if (gc_verbose_) {
291
0
    log("    %d live after sweep", num_live());
292
0
  }
293
294
  // We know how many are live.  If the number of objects is close to the
295
  // threshold (above 75%), then set the threshold to 2 times the number of
296
  // live objects.  This is an ad hoc policy that removes observed "thrashing"
297
  // -- being at 99% of the threshold and doing FUTILE mark and sweep.
298
299
41
  int water_mark = (gc_threshold_ * 3) / 4;
300
41
  if (num_live() > water_mark) {
301
0
    gc_threshold_ = num_live() * 2;
302
0
    num_growths_++;
303
0
    if (gc_verbose_) {
304
0
      log("    exceeded %d live objects; gc_threshold set to %d", water_mark,
305
0
          gc_threshold_);
306
0
    }
307
0
  }
308
309
41
  #ifdef GC_TIMING
310
41
  if (clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end) < 0) {
311
0
    FAIL("clock_gettime failed");
312
0
  }
313
314
0
  double start_secs = start.tv_sec + start.tv_nsec / 1e9;
315
41
  double end_secs = end.tv_sec + end.tv_nsec / 1e9;
316
41
  double gc_millis = (end_secs - start_secs) * 1000.0;
317
318
41
  if (gc_verbose_) {
319
0
    log("    %.1f ms GC", gc_millis);
320
0
  }
321
322
41
  total_gc_millis_ += gc_millis;
323
41
  if (gc_millis > max_gc_millis_) {
324
33
    max_gc_millis_ = gc_millis;
325
33
  }
326
41
  #endif
327
328
41
  return num_live();  // for unit tests only
329
41
}
330
331
0
void MarkSweepHeap::PrintShortStats() {
332
  // TODO: should use feature detection of dprintf
333
0
  #ifndef OILS_WIN32
334
0
    #ifndef NO_POOL_ALLOC
335
0
  int fd = 2;
336
0
  dprintf(fd, "  num allocated    = %10d\n",
337
0
          num_allocated_ + pool1_.num_allocated() + pool2_.num_allocated());
338
0
  dprintf(
339
0
      fd, "bytes allocated    = %10" PRId64 "\n",
340
0
      bytes_allocated_ + pool1_.bytes_allocated() + pool2_.bytes_allocated());
341
0
    #endif
342
0
  #endif
343
0
}
344
345
0
void MarkSweepHeap::PrintStats(int fd) {
346
  // TODO: should use feature detection of dprintf
347
0
  #ifndef OILS_WIN32
348
0
  dprintf(fd, "  num live         = %10d\n", num_live());
349
  // max survived_ can be less than num_live(), because leave off the last GC
350
0
  dprintf(fd, "  max survived     = %10d\n", max_survived_);
351
0
  dprintf(fd, "\n");
352
353
0
    #ifndef NO_POOL_ALLOC
354
0
  dprintf(fd, "  num allocated    = %10d\n",
355
0
          num_allocated_ + pool1_.num_allocated() + pool2_.num_allocated());
356
0
  dprintf(fd, "  num in heap      = %10d\n", num_allocated_);
357
    #else
358
  dprintf(fd, "  num allocated    = %10d\n", num_allocated_);
359
    #endif
360
361
0
    #ifndef NO_POOL_ALLOC
362
0
  dprintf(fd, "  num in pool 1    = %10d\n", pool1_.num_allocated());
363
0
  dprintf(fd, "  num in pool 2    = %10d\n", pool2_.num_allocated());
364
0
  dprintf(
365
0
      fd, "bytes allocated    = %10" PRId64 "\n",
366
0
      bytes_allocated_ + pool1_.bytes_allocated() + pool2_.bytes_allocated());
367
    #else
368
  dprintf(fd, "bytes allocated    = %10" PRId64 "\n", bytes_allocated_);
369
    #endif
370
371
0
  dprintf(fd, "\n");
372
0
  dprintf(fd, "  num gc points    = %10d\n", num_gc_points_);
373
0
  dprintf(fd, "  num collections  = %10d\n", num_collections_);
374
0
  dprintf(fd, "\n");
375
0
  dprintf(fd, "   gc threshold    = %10d\n", gc_threshold_);
376
0
  dprintf(fd, "  num growths      = %10d\n", num_growths_);
377
0
  dprintf(fd, "\n");
378
0
  dprintf(fd, "  max gc millis    = %10.1f\n", max_gc_millis_);
379
0
  dprintf(fd, "total gc millis    = %10.1f\n", total_gc_millis_);
380
0
  dprintf(fd, "\n");
381
0
  dprintf(fd, "roots capacity     = %10d\n",
382
0
          static_cast<int>(roots_.capacity()));
383
0
  dprintf(fd, " objs capacity     = %10d\n",
384
0
          static_cast<int>(live_objs_.capacity()));
385
0
  #endif
386
0
}
387
388
// Cleanup at the end of main() to remain ASAN-safe
389
32
void MarkSweepHeap::MaybePrintStats() {
390
32
  int stats_fd = -1;
391
32
  char* e = getenv("OILS_GC_STATS");
392
32
  if (e && strlen(e)) {  // env var set and non-empty
393
0
    stats_fd = STDERR_FILENO;
394
32
  } else {
395
    // A raw file descriptor lets benchmarks extract stats even if the script
396
    // writes to stdout and stderr.  Shells can't use open() without potential
397
    // conflicts.
398
399
32
    e = getenv("OILS_GC_STATS_FD");
400
32
    if (e && strlen(e)) {
401
      // Try setting 'stats_fd'.  If there's an error, it will be unchanged, and
402
      // we don't PrintStats();
403
0
      StringToInt(e, strlen(e), 10, &stats_fd);
404
0
    }
405
32
  }
406
407
32
  if (stats_fd != -1) {
408
0
    PrintStats(stats_fd);
409
0
  }
410
32
}
411
412
32
void MarkSweepHeap::FreeEverything() {
413
32
  roots_.clear();
414
32
  global_roots_.clear();
415
416
32
  Collect();
417
418
  // Collect() told us what to free()
419
97
  for (auto obj : to_free_) {
420
97
    free(obj);
421
97
  }
422
32
  #ifndef NO_POOL_ALLOC
423
32
  pool1_.Free();
424
32
  pool2_.Free();
425
32
  #endif
426
32
}
427
428
32
void MarkSweepHeap::CleanProcessExit() {
429
32
  MaybePrintStats();
430
431
32
  char* e = getenv("OILS_GC_ON_EXIT");
432
  // collect by default; OILS_GC_ON_EXIT=0 overrides
433
32
  if (e && strcmp(e, "0") == 0) {
434
0
    ;
435
32
  } else {
436
32
    FreeEverything();
437
32
  }
438
32
}
439
440
// for the main binary
441
0
void MarkSweepHeap::ProcessExit() {
442
0
  MaybePrintStats();
443
444
  #ifdef CLEAN_PROCESS_EXIT
445
  FreeEverything();
446
  #else
447
0
  char* e = getenv("OILS_GC_ON_EXIT");
448
  // don't collect by default; OILS_GC_ON_EXIT=1 overrides
449
0
  if (e && strcmp(e, "1") == 0) {
450
0
    FreeEverything();
451
0
  }
452
0
  #endif
453
0
}
454
455
MarkSweepHeap gHeap;
456
457
#endif  // MARK_SWEEP