OILS / mycpp / gc_mylib.h View on Github | oilshell.org

385 lines, 198 significant
1// gc_mylib.h - corresponds to mycpp/mylib.py
2
3#ifndef MYCPP_GC_MYLIB_H
4#define MYCPP_GC_MYLIB_H
5
6#include <limits.h> // CHAR_BIT
7
8#include "mycpp/gc_alloc.h" // gHeap
9#include "mycpp/gc_dict.h" // for dict_erase()
10#include "mycpp/gc_mops.h"
11#include "mycpp/gc_tuple.h"
12
13template <class K, class V>
14class Dict;
15
16// https://stackoverflow.com/questions/3919995/determining-sprintf-buffer-size-whats-the-standard/11092994#11092994
17// Notes:
18// - Python 2.7's intobject.c has an erroneous +6
19// - This is 13, but len('-2147483648') is 11, which means we only need 12?
20// - This formula is valid for octal(), because 2^(3 bits) = 8
21
22const int kIntBufSize = CHAR_BIT * sizeof(int) / 3 + 3;
23
24namespace mylib {
25
26void InitCppOnly();
27
28// Wrappers around our C++ APIs
29
30inline void MaybeCollect() {
31 gHeap.MaybeCollect();
32}
33
34inline void PrintGcStats() {
35 gHeap.PrintShortStats(); // print to stderr
36}
37
38void print_stderr(BigStr* s);
39
40inline int ByteAt(BigStr* s, int i) {
41 DCHECK(0 <= i);
42 DCHECK(i <= len(s));
43
44 return static_cast<unsigned char>(s->data_[i]);
45}
46
47inline int ByteEquals(int byte, BigStr* ch) {
48 DCHECK(0 <= byte);
49 DCHECK(byte < 256);
50
51 DCHECK(len(ch) == 1);
52
53 return byte == static_cast<unsigned char>(ch->data_[0]);
54}
55
56inline int ByteInSet(int byte, BigStr* byte_set) {
57 DCHECK(0 <= byte);
58 DCHECK(byte < 256);
59
60 int n = len(byte_set);
61 for (int i = 0; i < n; ++i) {
62 int b = static_cast<unsigned char>(byte_set->data_[i]);
63 if (byte == b) {
64 return true;
65 }
66 }
67 return false;
68}
69
70BigStr* JoinBytes(List<int>* byte_list);
71
72void BigIntSort(List<mops::BigInt>* keys);
73
74// const int kStdout = 1;
75// const int kStderr = 2;
76
77// void writeln(BigStr* s, int fd = kStdout);
78
79Tuple2<BigStr*, BigStr*> split_once(BigStr* s, BigStr* delim);
80
81template <typename K, typename V>
82void dict_erase(Dict<K, V>* haystack, K needle) {
83 DCHECK(haystack->obj_header().heap_tag != HeapTag::Global);
84
85 int pos = haystack->hash_and_probe(needle);
86 if (pos == kTooSmall) {
87 return;
88 }
89 DCHECK(pos >= 0);
90 int kv_index = haystack->index_->items_[pos];
91 if (kv_index < 0) {
92 return;
93 }
94
95 int last_kv_index = haystack->len_ - 1;
96 DCHECK(kv_index <= last_kv_index);
97
98 // Swap the target entry with the most recently inserted one before removing
99 // it. This has two benefits.
100 // (1) It keeps the entry arrays compact. All valid entries occupy a
101 // contiguous region in memory.
102 // (2) It prevents holes in the entry arrays. This makes iterating over
103 // entries (e.g. in keys() or DictIter()) trivial and doesn't require
104 // any extra validity state (like a bitset of unusable slots). This is
105 // important because keys and values wont't always be pointers, so we
106 // can't rely on NULL checks for validity. We also can't wrap the slab
107 // entry types in some other type without modifying the garbage
108 // collector to trace through unmanaged types (or paying the extra
109 // allocations for the outer type).
110 if (kv_index != last_kv_index) {
111 K last_key = haystack->keys_->items_[last_kv_index];
112 V last_val = haystack->values_->items_[last_kv_index];
113 int last_pos = haystack->hash_and_probe(last_key);
114 DCHECK(last_pos != kNotFound);
115 haystack->keys_->items_[kv_index] = last_key;
116 haystack->values_->items_[kv_index] = last_val;
117 haystack->index_->items_[last_pos] = kv_index;
118 }
119
120 // Zero out for GC. These could be nullptr or 0
121 haystack->keys_->items_[last_kv_index] = 0;
122 haystack->values_->items_[last_kv_index] = 0;
123 haystack->index_->items_[pos] = kDeletedEntry;
124 haystack->len_--;
125 DCHECK(haystack->len_ < haystack->capacity_);
126}
127
128inline BigStr* hex_lower(int i) {
129 // Note: Could also use OverAllocatedStr, but most strings are small?
130 char buf[kIntBufSize];
131 int len = snprintf(buf, kIntBufSize, "%x", i);
132 return ::StrFromC(buf, len);
133}
134
135// Abstract type: Union of LineReader and Writer
136class File {
137 public:
138 File() {
139 }
140 // Writer
141 virtual void write(BigStr* s) = 0;
142 virtual void flush() = 0;
143
144 // Reader
145 virtual BigStr* readline() = 0;
146
147 // Both
148 virtual bool isatty() = 0;
149 virtual void close() = 0;
150
151 static constexpr ObjHeader obj_header() {
152 return ObjHeader::ClassFixed(field_mask(), sizeof(File));
153 }
154
155 static constexpr uint32_t field_mask() {
156 return kZeroMask;
157 }
158};
159
160// Wrap a FILE* for read and write
161class CFile : public File {
162 public:
163 explicit CFile(FILE* f) : File(), f_(f) {
164 }
165 // Writer
166 void write(BigStr* s) override;
167 void flush() override;
168
169 // Reader
170 BigStr* readline() override;
171
172 // Both
173 bool isatty() override;
174 void close() override;
175
176 static constexpr ObjHeader obj_header() {
177 return ObjHeader::ClassFixed(field_mask(), sizeof(CFile));
178 }
179
180 static constexpr uint32_t field_mask() {
181 // not mutating field_mask because FILE* isn't a GC object
182 return File::field_mask();
183 }
184
185 private:
186 FILE* f_;
187
188 DISALLOW_COPY_AND_ASSIGN(CFile)
189};
190
191// Abstract File we can only read from.
192// TODO: can we get rid of DCHECK() and reinterpret_cast?
193class LineReader : public File {
194 public:
195 LineReader() : File() {
196 }
197 void write(BigStr* s) override {
198 CHECK(false); // should not happen
199 }
200 void flush() override {
201 CHECK(false); // should not happen
202 }
203
204 static constexpr ObjHeader obj_header() {
205 return ObjHeader::ClassFixed(field_mask(), sizeof(LineReader));
206 }
207
208 static constexpr uint32_t field_mask() {
209 return kZeroMask;
210 }
211};
212
213class BufLineReader : public LineReader {
214 public:
215 explicit BufLineReader(BigStr* s) : LineReader(), s_(s), pos_(0) {
216 }
217 virtual BigStr* readline();
218 virtual bool isatty() {
219 return false;
220 }
221 virtual void close() {
222 }
223
224 BigStr* s_;
225 int pos_;
226
227 static constexpr ObjHeader obj_header() {
228 return ObjHeader::ClassFixed(field_mask(), sizeof(LineReader));
229 }
230
231 static constexpr uint32_t field_mask() {
232 return LineReader::field_mask() | maskbit(offsetof(BufLineReader, s_));
233 }
234
235 DISALLOW_COPY_AND_ASSIGN(BufLineReader)
236};
237
238extern LineReader* gStdin;
239
240inline LineReader* Stdin() {
241 if (gStdin == nullptr) {
242 gStdin = reinterpret_cast<LineReader*>(Alloc<CFile>(stdin));
243 }
244 return gStdin;
245}
246
247LineReader* open(BigStr* path);
248
249// Abstract File we can only write to.
250// TODO: can we get rid of DCHECK() and reinterpret_cast?
251class Writer : public File {
252 public:
253 Writer() : File() {
254 }
255 BigStr* readline() override {
256 CHECK(false); // should not happen
257 }
258
259 static constexpr ObjHeader obj_header() {
260 return ObjHeader::ClassFixed(field_mask(), sizeof(Writer));
261 }
262
263 static constexpr uint32_t field_mask() {
264 return kZeroMask;
265 }
266};
267
268class MutableStr;
269
270class BufWriter : public Writer {
271 public:
272 BufWriter() : Writer(), str_(nullptr), len_(0) {
273 }
274 void write(BigStr* s) override;
275 void write_spaces(int n);
276 void clear() { // Reuse this instance
277 str_ = nullptr;
278 len_ = 0;
279 is_valid_ = true;
280 }
281 void close() override {
282 }
283 void flush() override {
284 }
285 bool isatty() override {
286 return false;
287 }
288 BigStr* getvalue(); // part of cStringIO API
289
290 //
291 // Low Level API for C++ usage only
292 //
293
294 // Convenient API that avoids BigStr*
295 void WriteConst(const char* c_string);
296
297 // Potentially resizes the buffer.
298 void EnsureMoreSpace(int n);
299 // After EnsureMoreSpace(42), you can write 42 more bytes safely.
300 //
301 // Note that if you call EnsureMoreSpace(42), write 5 byte, and then
302 // EnsureMoreSpace(42) again, the amount of additional space reserved is 47.
303
304 // (Similar to vector::reserve(n), but it takes an integer to ADD to the
305 // capacity.)
306
307 uint8_t* LengthPointer(); // start + length
308 uint8_t* CapacityPointer(); // start + capacity
309 void SetLengthFrom(uint8_t* length_ptr);
310
311 int Length() {
312 return len_;
313 }
314
315 // Rewind to earlier position, future writes start there
316 void Truncate(int length);
317
318 static constexpr ObjHeader obj_header() {
319 return ObjHeader::ClassFixed(field_mask(), sizeof(BufWriter));
320 }
321
322 static constexpr unsigned field_mask() {
323 // maskvit_v() because BufWriter has virtual methods
324 return Writer::field_mask() | maskbit(offsetof(BufWriter, str_));
325 }
326
327 private:
328 void WriteRaw(char* s, int n);
329
330 MutableStr* str_; // getvalue() turns this directly into Str*, no copying
331 int len_; // how many bytes have been written so far
332 bool is_valid_ = true; // It becomes invalid after getvalue() is called
333};
334
335extern Writer* gStdout;
336
337inline Writer* Stdout() {
338 if (gStdout == nullptr) {
339 gStdout = reinterpret_cast<Writer*>(Alloc<CFile>(stdout));
340 gHeap.RootGlobalVar(gStdout);
341 }
342 return gStdout;
343}
344
345extern Writer* gStderr;
346
347inline Writer* Stderr() {
348 if (gStderr == nullptr) {
349 gStderr = reinterpret_cast<Writer*>(Alloc<CFile>(stderr));
350 gHeap.RootGlobalVar(gStderr);
351 }
352 return gStderr;
353}
354
355class UniqueObjects {
356 // Can't be expressed in typed Python because we don't have uint64_t for
357 // addresses
358
359 public:
360 UniqueObjects() {
361 }
362 void Add(void* obj) {
363 }
364 int Get(void* obj) {
365 return -1;
366 }
367
368 static constexpr ObjHeader obj_header() {
369 return ObjHeader::ClassFixed(field_mask(), sizeof(UniqueObjects));
370 }
371
372 // SPECIAL CASE? We should never have a unique reference to an object? So
373 // don't bother tracing
374 static constexpr uint32_t field_mask() {
375 return kZeroMask;
376 }
377
378 private:
379 // address -> small integer ID
380 Dict<void*, int> addresses_;
381};
382
383} // namespace mylib
384
385#endif // MYCPP_GC_MYLIB_H