OILS / vendor / souffle / datastructure / BTree.h View on Github | oils.pub

2147 lines, 1005 significant
1/*
2 * Souffle - A Datalog Compiler
3 * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved
4 * Licensed under the Universal Permissive License v 1.0 as shown at:
5 * - https://opensource.org/licenses/UPL
6 * - <souffle root>/licenses/SOUFFLE-UPL.txt
7 */
8
9/************************************************************************
10 *
11 * @file BTree.h
12 *
13 * An implementation of a generic B-tree data structure including
14 * interfaces for utilizing instances as set or multiset containers.
15 *
16 ***********************************************************************/
17
18#pragma once
19
20#include "souffle/datastructure/BTreeUtil.h"
21#include "souffle/utility/CacheUtil.h"
22#include "souffle/utility/ContainerUtil.h"
23#include "souffle/utility/MiscUtil.h"
24#include "souffle/utility/ParallelUtil.h"
25#include <algorithm>
26#include <cassert>
27#include <cstddef>
28#include <cstdint>
29#include <iostream>
30#include <iterator>
31#include <string>
32#include <tuple>
33#include <type_traits>
34#include <typeinfo>
35#include <vector>
36
37namespace souffle {
38
39namespace detail {
40
41/**
42 * The actual implementation of a b-tree data structure.
43 *
44 * @tparam Key .. the element type to be stored in this tree
45 * @tparam Comparator .. a class defining an order on the stored elements
46 * @tparam Allocator .. utilized for allocating memory for required nodes
47 * @tparam blockSize .. determines the number of bytes/block utilized by leaf nodes
48 * @tparam SearchStrategy .. enables switching between linear, binary or any other search strategy
49 * @tparam isSet .. true = set, false = multiset
50 */
51template <typename Key, typename Comparator,
52 typename Allocator, // is ignored so far - TODO: add support
53 unsigned blockSize, typename SearchStrategy, bool isSet, typename WeakComparator = Comparator,
54 typename Updater = detail::updater<Key>>
55class btree {
56public:
57 class iterator;
58 using const_iterator = iterator;
59
60 using key_type = Key;
61 using element_type = Key;
62 using chunk = range<iterator>;
63
64protected:
65 /* ------------- static utilities ----------------- */
66
67 const static SearchStrategy search;
68
69 /* ---------- comparison utilities ---------------- */
70
71 mutable Comparator comp;
72
73 bool less(const Key& a, const Key& b) const {
74 return comp.less(a, b);
75 }
76
77 bool equal(const Key& a, const Key& b) const {
78 return comp.equal(a, b);
79 }
80
81 mutable WeakComparator weak_comp;
82
83 bool weak_less(const Key& a, const Key& b) const {
84 return weak_comp.less(a, b);
85 }
86
87 bool weak_equal(const Key& a, const Key& b) const {
88 return weak_comp.equal(a, b);
89 }
90
91 /* -------------- updater utilities ------------- */
92
93 mutable Updater upd;
94 bool update(Key& old_k, const Key& new_k) {
95 return upd.update(old_k, new_k);
96 }
97
98 /* -------------- the node type ----------------- */
99
100 using size_type = std::size_t;
101 using field_index_type = uint8_t;
102 using lock_type = OptimisticReadWriteLock;
103
104 struct node;
105
106 /**
107 * The base type of all node types containing essential
108 * book-keeping information.
109 */
110 struct base {
111#ifdef IS_PARALLEL
112
113 // the parent node
114 node* volatile parent;
115
116 // a lock for synchronizing parallel operations on this node
117 lock_type lock;
118
119 // the number of keys in this node
120 volatile size_type numElements;
121
122 // the position in the parent node
123 volatile field_index_type position;
124#else
125 // the parent node
126 node* parent;
127
128 // the number of keys in this node
129 size_type numElements;
130
131 // the position in the parent node
132 field_index_type position;
133#endif
134
135 // a flag indicating whether this is a inner node or not
136 const bool inner;
137
138 /**
139 * A simple constructor for nodes
140 */
141 base(bool inner) : parent(nullptr), numElements(0), position(0), inner(inner) {}
142
143 bool isLeaf() const {
144 return !inner;
145 }
146
147 bool isInner() const {
148 return inner;
149 }
150
151 node* getParent() const {
152 return parent;
153 }
154
155 field_index_type getPositionInParent() const {
156 return position;
157 }
158
159 size_type getNumElements() const {
160 return numElements;
161 }
162 };
163
164 struct inner_node;
165
166 /**
167 * The actual, generic node implementation covering the operations
168 * for both, inner and leaf nodes.
169 */
170 struct node : public base {
171 /**
172 * The number of keys/node desired by the user.
173 */
174 static constexpr std::size_t desiredNumKeys =
175 ((blockSize > sizeof(base)) ? blockSize - sizeof(base) : 0) / sizeof(Key);
176
177 /**
178 * The actual number of keys/node corrected by functional requirements.
179 */
180 static constexpr std::size_t maxKeys = (desiredNumKeys > 3) ? desiredNumKeys : 3;
181
182 // the keys stored in this node
183 Key keys[maxKeys];
184
185 // a simple constructor
186 node(bool inner) : base(inner) {}
187
188 /**
189 * A deep-copy operation creating a clone of this node.
190 */
191 node* clone() const {
192 // create a clone of this node
193 node* res = (this->isInner()) ? static_cast<node*>(new inner_node())
194 : static_cast<node*>(new leaf_node());
195
196 // copy basic fields
197 res->position = this->position;
198 res->numElements = this->numElements;
199
200 for (size_type i = 0; i < this->numElements; ++i) {
201 res->keys[i] = this->keys[i];
202 }
203
204 // if this is a leaf we are done
205 if (this->isLeaf()) {
206 return res;
207 }
208
209 // copy child nodes recursively
210 auto* ires = (inner_node*)res;
211 for (size_type i = 0; i <= this->numElements; ++i) {
212 ires->children[i] = this->getChild(i)->clone();
213 ires->children[i]->parent = res;
214 }
215
216 // that's it
217 return res;
218 }
219
220 /**
221 * A utility function providing a reference to this node as
222 * an inner node.
223 */
224 inner_node& asInnerNode() {
225 assert(this->inner && "Invalid cast!");
226 return *static_cast<inner_node*>(this);
227 }
228
229 /**
230 * A utility function providing a reference to this node as
231 * a const inner node.
232 */
233 const inner_node& asInnerNode() const {
234 assert(this->inner && "Invalid cast!");
235 return *static_cast<const inner_node*>(this);
236 }
237
238 /**
239 * Computes the number of nested levels of the tree rooted
240 * by this node.
241 */
242 size_type getDepth() const {
243 if (this->isLeaf()) {
244 return 1;
245 }
246 return getChild(0)->getDepth() + 1;
247 }
248
249 /**
250 * Counts the number of nodes contained in the sub-tree rooted
251 * by this node.
252 */
253 size_type countNodes() const {
254 if (this->isLeaf()) {
255 return 1;
256 }
257 size_type sum = 1;
258 for (unsigned i = 0; i <= this->numElements; ++i) {
259 sum += getChild(i)->countNodes();
260 }
261 return sum;
262 }
263
264 /**
265 * Counts the number of entries contained in the sub-tree rooted
266 * by this node.
267 */
268 size_type countEntries() const {
269 if (this->isLeaf()) {
270 return this->numElements;
271 }
272 size_type sum = this->numElements;
273 for (unsigned i = 0; i <= this->numElements; ++i) {
274 sum += getChild(i)->countEntries();
275 }
276 return sum;
277 }
278
279 /**
280 * Determines the amount of memory used by the sub-tree rooted
281 * by this node.
282 */
283 size_type getMemoryUsage() const {
284 if (this->isLeaf()) {
285 return sizeof(leaf_node);
286 }
287 size_type res = sizeof(inner_node);
288 for (unsigned i = 0; i <= this->numElements; ++i) {
289 res += getChild(i)->getMemoryUsage();
290 }
291 return res;
292 }
293
294 /**
295 * Obtains a pointer to the array of child-pointers
296 * of this node -- if it is an inner node.
297 */
298 node** getChildren() {
299 return asInnerNode().children;
300 }
301
302 /**
303 * Obtains a pointer to the array of const child-pointers
304 * of this node -- if it is an inner node.
305 */
306 node* const* getChildren() const {
307 return asInnerNode().children;
308 }
309
310 /**
311 * Obtains a reference to the child of the given index.
312 */
313 node* getChild(size_type s) const {
314 return asInnerNode().children[s];
315 }
316
317 /**
318 * Checks whether this node is empty -- can happen due to biased insertion.
319 */
320 bool isEmpty() const {
321 return this->numElements == 0;
322 }
323
324 /**
325 * Checks whether this node is full.
326 */
327 bool isFull() const {
328 return this->numElements == maxKeys;
329 }
330
331 /**
332 * Obtains the point at which full nodes should be split.
333 * Conventional b-trees always split in half. However, in cases
334 * where in-order insertions are frequent, a split assigning
335 * larger portions to the right fragment provide higher performance
336 * and a better node-filling rate.
337 */
338 int getSplitPoint(int /*unused*/) {
339 return static_cast<int>(std::min(3 * maxKeys / 4, maxKeys - 2));
340 }
341
342 /**
343 * Splits this node.
344 *
345 * @param root .. a pointer to the root-pointer of the enclosing b-tree
346 * (might have to be updated if the root-node needs to be split)
347 * @param idx .. the position of the insert causing the split
348 */
349#ifdef IS_PARALLEL
350 void split(node** root, lock_type& root_lock, int idx, std::vector<node*>& locked_nodes) {
351 assert(this->lock.is_write_locked());
352 assert(!this->parent || this->parent->lock.is_write_locked());
353 assert((this->parent != nullptr) || root_lock.is_write_locked());
354 assert(this->isLeaf() || souffle::contains(locked_nodes, this));
355 assert(!this->parent || souffle::contains(locked_nodes, const_cast<node*>(this->parent)));
356#else
357 void split(node** root, lock_type& root_lock, int idx) {
358#endif
359 assert(this->numElements == maxKeys);
360
361 // get middle element
362 int split_point = getSplitPoint(idx);
363
364 // create a new sibling node
365 node* sibling = (this->inner) ? static_cast<node*>(new inner_node())
366 : static_cast<node*>(new leaf_node());
367
368#ifdef IS_PARALLEL
369 // lock sibling
370 sibling->lock.start_write();
371 locked_nodes.push_back(sibling);
372#endif
373
374 // move data over to the new node
375 for (unsigned i = split_point + 1, j = 0; i < maxKeys; ++i, ++j) {
376 sibling->keys[j] = keys[i];
377 }
378
379 // move child pointers
380 if (this->inner) {
381 // move pointers to sibling
382 auto* other = static_cast<inner_node*>(sibling);
383 for (unsigned i = split_point + 1, j = 0; i <= maxKeys; ++i, ++j) {
384 other->children[j] = getChildren()[i];
385 other->children[j]->parent = other;
386 other->children[j]->position = static_cast<field_index_type>(j);
387 }
388 }
389
390 // update number of elements
391 this->numElements = split_point;
392 sibling->numElements = maxKeys - split_point - 1;
393
394 // update parent
395#ifdef IS_PARALLEL
396 grow_parent(root, root_lock, sibling, locked_nodes);
397#else
398 grow_parent(root, root_lock, sibling);
399#endif
400 }
401
402 /**
403 * Moves keys from this node to one of its siblings or splits
404 * this node to make some space for the insertion of an element at
405 * position idx.
406 *
407 * Returns the number of elements moved to the left side, 0 in case
408 * of a split. The number of moved elements will be <= the given idx.
409 *
410 * @param root .. the root node of the b-tree being part of
411 * @param idx .. the position of the insert triggering this operation
412 */
413 // TODO: remove root_lock ... no longer needed
414#ifdef IS_PARALLEL
415 int rebalance_or_split(node** root, lock_type& root_lock, int idx, std::vector<node*>& locked_nodes) {
416 assert(this->lock.is_write_locked());
417 assert(!this->parent || this->parent->lock.is_write_locked());
418 assert((this->parent != nullptr) || root_lock.is_write_locked());
419 assert(this->isLeaf() || souffle::contains(locked_nodes, this));
420 assert(!this->parent || souffle::contains(locked_nodes, const_cast<node*>(this->parent)));
421#else
422 int rebalance_or_split(node** root, lock_type& root_lock, int idx) {
423#endif
424
425 // this node is full ... and needs some space
426 assert(this->numElements == maxKeys);
427
428 // get snap-shot of parent
429 auto parent = this->parent;
430 auto pos = this->position;
431
432 // Option A) re-balance data
433 if (parent && pos > 0) {
434 node* left = parent->getChild(pos - 1);
435
436#ifdef IS_PARALLEL
437 // lock access to left sibling
438 if (!left->lock.try_start_write()) {
439 // left node is currently updated => skip balancing and split
440 split(root, root_lock, idx, locked_nodes);
441 return 0;
442 }
443#endif
444
445 // compute number of elements to be movable to left
446 // space available in left vs. insertion index
447 size_type num = static_cast<size_type>(
448 std::min<int>(static_cast<int>(maxKeys - left->numElements), idx));
449
450 // if there are elements to move ..
451 if (num > 0) {
452 Key* splitter = &(parent->keys[this->position - 1]);
453
454 // .. move keys to left node
455 left->keys[left->numElements] = *splitter;
456 for (size_type i = 0; i < num - 1; ++i) {
457 left->keys[left->numElements + 1 + i] = keys[i];
458 }
459 *splitter = keys[num - 1];
460
461 // shift keys in this node to the left
462 for (size_type i = 0; i < this->numElements - num; ++i) {
463 keys[i] = keys[i + num];
464 }
465
466 // .. and children if necessary
467 if (this->isInner()) {
468 auto* ileft = static_cast<inner_node*>(left);
469 auto* iright = static_cast<inner_node*>(this);
470
471 // move children
472 for (field_index_type i = 0; i < num; ++i) {
473 ileft->children[left->numElements + i + 1] = iright->children[i];
474 }
475
476 // update moved children
477 for (size_type i = 0; i < num; ++i) {
478 iright->children[i]->parent = ileft;
479 iright->children[i]->position =
480 static_cast<field_index_type>(left->numElements + i) + 1;
481 }
482
483 // shift child-pointer to the left
484 for (size_type i = 0; i < this->numElements - num + 1; ++i) {
485 iright->children[i] = iright->children[i + num];
486 }
487
488 // update position of children
489 for (size_type i = 0; i < this->numElements - num + 1; ++i) {
490 iright->children[i]->position = static_cast<field_index_type>(i);
491 }
492 }
493
494 // update node sizes
495 left->numElements += num;
496 this->numElements -= num;
497
498#ifdef IS_PARALLEL
499 left->lock.end_write();
500#endif
501
502 // done
503 return static_cast<int>(num);
504 }
505
506#ifdef IS_PARALLEL
507 left->lock.abort_write();
508#endif
509 }
510
511 // Option B) split node
512#ifdef IS_PARALLEL
513 split(root, root_lock, idx, locked_nodes);
514#else
515 split(root, root_lock, idx);
516#endif
517 return 0; // = no re-balancing
518 }
519
520 private:
521 /**
522 * Inserts a new sibling into the parent of this node utilizing
523 * the last key of this node as a separation key. (for internal
524 * use only)
525 *
526 * @param root .. a pointer to the root-pointer of the containing tree
527 * @param sibling .. the new right-sibling to be add to the parent node
528 */
529#ifdef IS_PARALLEL
530 void grow_parent(node** root, lock_type& root_lock, node* sibling, std::vector<node*>& locked_nodes) {
531 assert(this->lock.is_write_locked());
532 assert(!this->parent || this->parent->lock.is_write_locked());
533 assert((this->parent != nullptr) || root_lock.is_write_locked());
534 assert(this->isLeaf() || souffle::contains(locked_nodes, this));
535 assert(!this->parent || souffle::contains(locked_nodes, const_cast<node*>(this->parent)));
536#else
537 void grow_parent(node** root, lock_type& root_lock, node* sibling) {
538#endif
539
540 if (this->parent == nullptr) {
541 assert(*root == this);
542
543 // create a new root node
544 auto* new_root = new inner_node();
545 new_root->numElements = 1;
546 new_root->keys[0] = keys[this->numElements];
547
548 new_root->children[0] = this;
549 new_root->children[1] = sibling;
550
551 // link this and the sibling node to new root
552 this->parent = new_root;
553 sibling->parent = new_root;
554 sibling->position = 1;
555
556 // switch root node
557 *root = new_root;
558
559 } else {
560 // insert new element in parent element
561 auto parent = this->parent;
562 auto pos = this->position;
563
564#ifdef IS_PARALLEL
565 parent->insert_inner(
566 root, root_lock, pos, this, keys[this->numElements], sibling, locked_nodes);
567#else
568 parent->insert_inner(root, root_lock, pos, this, keys[this->numElements], sibling);
569#endif
570 }
571 }
572
573 /**
574 * Inserts a new element into an inner node (for internal use only).
575 *
576 * @param root .. a pointer to the root-pointer of the containing tree
577 * @param pos .. the position to insert the new key
578 * @param key .. the key to insert
579 * @param newNode .. the new right-child of the inserted key
580 */
581#ifdef IS_PARALLEL
582 void insert_inner(node** root, lock_type& root_lock, unsigned pos, node* predecessor, const Key& key,
583 node* newNode, std::vector<node*>& locked_nodes) {
584 assert(this->lock.is_write_locked());
585 assert(souffle::contains(locked_nodes, this));
586#else
587 void insert_inner(node** root, lock_type& root_lock, unsigned pos, node* predecessor, const Key& key,
588 node* newNode) {
589#endif
590
591 // check capacity
592 if (this->numElements >= maxKeys) {
593#ifdef IS_PARALLEL
594 assert(!this->parent || this->parent->lock.is_write_locked());
595 assert((this->parent) || root_lock.is_write_locked());
596 assert(!this->parent || souffle::contains(locked_nodes, const_cast<node*>(this->parent)));
597#endif
598
599 // split this node
600#ifdef IS_PARALLEL
601 pos -= rebalance_or_split(root, root_lock, pos, locked_nodes);
602#else
603 pos -= rebalance_or_split(root, root_lock, pos);
604#endif
605
606 // complete insertion within new sibling if necessary
607 if (pos > this->numElements) {
608 // correct position
609 pos = pos - static_cast<unsigned int>(this->numElements) - 1;
610
611 // get new sibling
612 auto other = this->parent->getChild(this->position + 1);
613
614#ifdef IS_PARALLEL
615 // make sure other side is write locked
616 assert(other->lock.is_write_locked());
617 assert(souffle::contains(locked_nodes, other));
618
619 // search for new position (since other may have been altered in the meanwhile)
620 size_type i = 0;
621 for (; i <= other->numElements; ++i) {
622 if (other->getChild(i) == predecessor) {
623 break;
624 }
625 }
626
627 pos = (i > static_cast<unsigned>(other->numElements)) ? 0 : static_cast<unsigned>(i);
628 other->insert_inner(root, root_lock, pos, predecessor, key, newNode, locked_nodes);
629#else
630 other->insert_inner(root, root_lock, pos, predecessor, key, newNode);
631#endif
632 return;
633 }
634 }
635
636 // move bigger keys one forward
637 for (int i = static_cast<int>(this->numElements) - 1; i >= (int)pos; --i) {
638 keys[i + 1] = keys[i];
639 getChildren()[i + 2] = getChildren()[i + 1];
640 ++getChildren()[i + 2]->position;
641 }
642
643 // ensure proper position
644 assert(getChild(pos) == predecessor);
645
646 // insert new element
647 keys[pos] = key;
648 getChildren()[pos + 1] = newNode;
649 newNode->parent = this;
650 newNode->position = static_cast<field_index_type>(pos) + 1;
651 ++this->numElements;
652 }
653
654 public:
655 /**
656 * Prints a textual representation of this tree to the given output stream.
657 * This feature is mainly intended for debugging and tuning purposes.
658 *
659 * @see btree::printTree
660 */
661 void printTree(std::ostream& out, const std::string& prefix) const {
662 // print the header
663 out << prefix << "@" << this << "[" << ((int)(this->position)) << "] - "
664 << (this->inner ? "i" : "") << "node : " << this->numElements << "/" << maxKeys << " [";
665
666 // print the keys
667 for (unsigned i = 0; i < this->numElements; i++) {
668 out << keys[i];
669 if (i != this->numElements - 1) {
670 out << ",";
671 }
672 }
673 out << "]";
674
675 // print references to children
676 if (this->inner) {
677 out << " - [";
678 for (unsigned i = 0; i <= this->numElements; i++) {
679 out << getChildren()[i];
680 if (i != this->numElements) {
681 out << ",";
682 }
683 }
684 out << "]";
685 }
686
687#ifdef IS_PARALLEL
688 // print the lock state
689 if (this->lock.is_write_locked()) {
690 std::cout << " locked";
691 }
692#endif
693
694 out << "\n";
695
696 // print the children recursively
697 if (this->inner) {
698 for (unsigned i = 0; i < this->numElements + 1; ++i) {
699 static_cast<const inner_node*>(this)->children[i]->printTree(out, prefix + " ");
700 }
701 }
702 }
703
704 /**
705 * A function decomposing the sub-tree rooted by this node into approximately equally
706 * sized chunks. To minimize computational overhead, no strict load balance nor limit
707 * on the number of actual chunks is given.
708 *
709 * @see btree::getChunks()
710 *
711 * @param res .. the list of chunks to be extended
712 * @param num .. the number of chunks to be produced
713 * @param begin .. the iterator to start the first chunk with
714 * @param end .. the iterator to end the last chunk with
715 * @return the handed in list of chunks extended by generated chunks
716 */
717 std::vector<chunk>& collectChunks(
718 std::vector<chunk>& res, size_type num, const iterator& begin, const iterator& end) const {
719 assert(num > 0);
720
721 // special case: this node is empty
722 if (isEmpty()) {
723 if (begin != end) {
724 res.push_back(chunk(begin, end));
725 }
726 return res;
727 }
728
729 // special case: a single chunk is requested
730 if (num == 1) {
731 res.push_back(chunk(begin, end));
732 return res;
733 }
734
735 // cut-off
736 if (this->isLeaf() || num < (this->numElements + 1)) {
737 auto step = this->numElements / num;
738 if (step == 0) {
739 step = 1;
740 }
741
742 size_type i = 0;
743
744 // the first chunk starts at the begin
745 res.push_back(chunk(begin, iterator(this, static_cast<field_index_type>(step) - 1)));
746
747 // split up the main part
748 for (i = step - 1; i < this->numElements - step; i += step) {
749 res.push_back(chunk(iterator(this, static_cast<field_index_type>(i)),
750 iterator(this, static_cast<field_index_type>(i + step))));
751 }
752
753 // the last chunk runs to the end
754 res.push_back(chunk(iterator(this, static_cast<field_index_type>(i)), end));
755
756 // done
757 return res;
758 }
759
760 // else: collect chunks of sub-set elements
761
762 auto part = num / (this->numElements + 1);
763 assert(part > 0);
764 getChild(0)->collectChunks(res, part, begin, iterator(this, 0));
765 for (size_type i = 1; i < this->numElements; i++) {
766 getChild(i)->collectChunks(res, part, iterator(this, static_cast<field_index_type>(i - 1)),
767 iterator(this, static_cast<field_index_type>(i)));
768 }
769 getChild(this->numElements)
770 ->collectChunks(res, num - (part * this->numElements),
771 iterator(this, static_cast<field_index_type>(this->numElements) - 1), end);
772
773 // done
774 return res;
775 }
776
777 /**
778 * A function to verify the consistency of this node.
779 *
780 * @param root ... a reference to the root of the enclosing tree.
781 * @return true if valid, false otherwise
782 */
783 template <typename Comp>
784 bool check(Comp& comp, const node* root) const {
785 bool valid = true;
786
787 // check fill-state
788 if (this->numElements > maxKeys) {
789 std::cout << "Node with " << this->numElements << "/" << maxKeys << " encountered!\n";
790 valid = false;
791 }
792
793 // check root state
794 if (root == this) {
795 if (this->parent != nullptr) {
796 std::cout << "Root not properly linked!\n";
797 valid = false;
798 }
799 } else {
800 // check parent relation
801 if (!this->parent) {
802 std::cout << "Invalid null-parent!\n";
803 valid = false;
804 } else {
805 if (this->parent->getChildren()[this->position] != this) {
806 std::cout << "Parent reference invalid!\n";
807 std::cout << " Node: " << this << "\n";
808 std::cout << " Parent: " << this->parent << "\n";
809 std::cout << " Position: " << ((int)this->position) << "\n";
810 valid = false;
811 }
812
813 // check parent key
814 if (valid && this->position != 0 &&
815 !(comp(this->parent->keys[this->position - 1], keys[0]) < ((isSet) ? 0 : 1))) {
816 std::cout << "Left parent key not lower bound!\n";
817 std::cout << " Node: " << this << "\n";
818 std::cout << " Parent: " << this->parent << "\n";
819 std::cout << " Position: " << ((int)this->position) << "\n";
820 std::cout << " Key: " << (this->parent->keys[this->position]) << "\n";
821 std::cout << " Lower: " << (keys[0]) << "\n";
822 valid = false;
823 }
824
825 // check parent key
826 if (valid && this->position != this->parent->numElements &&
827 !(comp(keys[this->numElements - 1], this->parent->keys[this->position]) <
828 ((isSet) ? 0 : 1))) {
829 std::cout << "Right parent key not lower bound!\n";
830 std::cout << " Node: " << this << "\n";
831 std::cout << " Parent: " << this->parent << "\n";
832 std::cout << " Position: " << ((int)this->position) << "\n";
833 std::cout << " Key: " << (this->parent->keys[this->position]) << "\n";
834 std::cout << " Upper: " << (keys[0]) << "\n";
835 valid = false;
836 }
837 }
838 }
839
840 // check element order
841 if (this->numElements > 0) {
842 for (unsigned i = 0; i < this->numElements - 1; i++) {
843 if (valid && !(comp(keys[i], keys[i + 1]) < ((isSet) ? 0 : 1))) {
844 std::cout << "Element order invalid!\n";
845 std::cout << " @" << this << " key " << i << " is " << keys[i] << " vs "
846 << keys[i + 1] << "\n";
847 valid = false;
848 }
849 }
850 }
851
852 // check state of sub-nodes
853 if (this->inner) {
854 for (unsigned i = 0; i <= this->numElements; i++) {
855 valid &= getChildren()[i]->check(comp, root);
856 }
857 }
858
859 return valid;
860 }
861 }; // namespace detail
862
863 /**
864 * The data type representing inner nodes of the b-tree. It extends
865 * the generic implementation of a node by the storage locations
866 * of child pointers.
867 */
868 struct inner_node : public node {
869 // references to child nodes owned by this node
870 node* children[node::maxKeys + 1];
871
872 // a simple default constructor initializing member fields
873 inner_node() : node(true) {}
874
875 // clear up child nodes recursively
876 ~inner_node() {
877 for (unsigned i = 0; i <= this->numElements; ++i) {
878 if (children[i] != nullptr) {
879 if (children[i]->isLeaf()) {
880 delete static_cast<leaf_node*>(children[i]);
881 } else {
882 delete static_cast<inner_node*>(children[i]);
883 }
884 }
885 }
886 }
887 };
888
889 /**
890 * The data type representing leaf nodes of the b-tree. It does not
891 * add any capabilities to the generic node type.
892 */
893 struct leaf_node : public node {
894 // a simple default constructor initializing member fields
895 leaf_node() : node(false) {}
896 };
897
898 // ------------------- iterators ------------------------
899
900public:
901 /**
902 * The iterator type to be utilized for scanning through btree instances.
903 */
904 class iterator {
905 // a pointer to the node currently referred to
906 node const* cur;
907
908 // the index of the element currently addressed within the referenced node
909 field_index_type pos = 0;
910
911 public:
912 using iterator_category = std::forward_iterator_tag;
913 using value_type = Key;
914 using difference_type = ptrdiff_t;
915 using pointer = value_type*;
916 using reference = value_type&;
917
918 // default constructor -- creating an end-iterator
919 iterator() : cur(nullptr) {}
920
921 // creates an iterator referencing a specific element within a given node
922 iterator(node const* cur, field_index_type pos) : cur(cur), pos(pos) {}
923
924 // a copy constructor
925 iterator(const iterator& other) : cur(other.cur), pos(other.pos) {}
926
927 // an assignment operator
928 iterator& operator=(const iterator& other) {
929 cur = other.cur;
930 pos = other.pos;
931 return *this;
932 }
933
934 // the equality operator as required by the iterator concept
935 bool operator==(const iterator& other) const {
936 return cur == other.cur && pos == other.pos;
937 }
938
939 // the not-equality operator as required by the iterator concept
940 bool operator!=(const iterator& other) const {
941 return !(*this == other);
942 }
943
944 // the deref operator as required by the iterator concept
945 const Key& operator*() const {
946 return cur->keys[pos];
947 }
948
949 // the increment operator as required by the iterator concept
950 iterator& operator++() {
951 // the quick mode -- if in a leaf and there are elements left
952 if (cur->isLeaf() && ++pos < cur->getNumElements()) {
953 return *this;
954 }
955
956 // otherwise it is a bit more tricky
957
958 // A) currently in an inner node => go to the left-most child
959 if (cur->isInner()) {
960 cur = cur->getChildren()[pos + 1];
961 while (!cur->isLeaf()) {
962 cur = cur->getChildren()[0];
963 }
964 pos = 0;
965
966 // nodes may be empty due to biased insertion
967 if (!cur->isEmpty()) {
968 return *this;
969 }
970 }
971
972 // B) we are at the right-most element of a leaf => go to next inner node
973 assert(cur->isLeaf());
974 assert(pos == cur->getNumElements());
975
976 while (cur != nullptr && pos == cur->getNumElements()) {
977 pos = cur->getPositionInParent();
978 cur = cur->getParent();
979 }
980 return *this;
981 }
982
983 // prints a textual representation of this iterator to the given stream (mainly for debugging)
984 void print(std::ostream& out = std::cout) const {
985 out << cur << "[" << (int)pos << "]";
986 }
987 };
988
989 /**
990 * A collection of operation hints speeding up some of the involved operations
991 * by exploiting temporal locality.
992 */
993 template <unsigned size = 1>
994 struct btree_operation_hints {
995 using node_cache = LRUCache<node*, size>;
996
997 // the node where the last insertion terminated
998 node_cache last_insert;
999
1000 // the node where the last find-operation terminated
1001 node_cache last_find_end;
1002
1003 // the node where the last lower-bound operation terminated
1004 node_cache last_lower_bound_end;
1005
1006 // the node where the last upper-bound operation terminated
1007 node_cache last_upper_bound_end;
1008
1009 // default constructor
1010 btree_operation_hints() = default;
1011
1012 // resets all hints (to be triggered e.g. when deleting nodes)
1013 void clear() {
1014 last_insert.clear(nullptr);
1015 last_find_end.clear(nullptr);
1016 last_lower_bound_end.clear(nullptr);
1017 last_upper_bound_end.clear(nullptr);
1018 }
1019 };
1020
1021 using operation_hints = btree_operation_hints<1>;
1022
1023protected:
1024#ifdef IS_PARALLEL
1025 // a pointer to the root node of this tree
1026 node* volatile root;
1027
1028 // a lock to synchronize update operations on the root pointer
1029 lock_type root_lock;
1030#else
1031 // a pointer to the root node of this tree
1032 node* root;
1033
1034 // required to not duplicate too much code
1035 lock_type root_lock;
1036#endif
1037
1038 // a pointer to the left-most node of this tree (initial note for iteration)
1039 leaf_node* leftmost;
1040
1041 /* -------------- operator hint statistics ----------------- */
1042
1043 // an aggregation of statistical values of the hint utilization
1044 struct hint_statistics {
1045 // the counter for insertion operations
1046 CacheAccessCounter inserts;
1047
1048 // the counter for contains operations
1049 CacheAccessCounter contains;
1050
1051 // the counter for lower_bound operations
1052 CacheAccessCounter lower_bound;
1053
1054 // the counter for upper_bound operations
1055 CacheAccessCounter upper_bound;
1056 };
1057
1058 // the hint statistic of this b-tree instance
1059 mutable hint_statistics hint_stats;
1060
1061public:
1062 // the maximum number of keys stored per node
1063 static constexpr std::size_t max_keys_per_node = node::maxKeys;
1064
1065 // -- ctors / dtors --
1066
1067 // the default constructor creating an empty tree
1068 btree(Comparator comp = Comparator(), WeakComparator weak_comp = WeakComparator())
1069 : comp(std::move(comp)), weak_comp(std::move(weak_comp)), root(nullptr), leftmost(nullptr) {}
1070
1071 // a constructor creating a tree from the given iterator range
1072 template <typename Iter>
1073 btree(const Iter& a, const Iter& b) : root(nullptr), leftmost(nullptr) {
1074 insert(a, b);
1075 }
1076
1077 // a move constructor
1078 btree(btree&& other)
1079 : comp(other.comp), weak_comp(other.weak_comp), root(other.root), leftmost(other.leftmost) {
1080 other.root = nullptr;
1081 other.leftmost = nullptr;
1082 }
1083
1084 // a copy constructor
1085 btree(const btree& set) : comp(set.comp), weak_comp(set.weak_comp), root(nullptr), leftmost(nullptr) {
1086 // use assignment operator for a deep copy
1087 *this = set;
1088 }
1089
1090protected:
1091 /**
1092 * An internal constructor enabling the specific creation of a tree
1093 * based on internal parameters.
1094 */
1095 btree(size_type /* size */, node* root, leaf_node* leftmost) : root(root), leftmost(leftmost) {}
1096
1097public:
1098 // the destructor freeing all contained nodes
1099 ~btree() {
1100 clear();
1101 }
1102
1103 // -- mutators and observers --
1104
1105 // emptiness check
1106 bool empty() const {
1107 return root == nullptr;
1108 }
1109
1110 // determines the number of elements in this tree
1111 size_type size() const {
1112 return (root) ? root->countEntries() : 0;
1113 }
1114
1115 /**
1116 * Inserts the given key into this tree.
1117 */
1118 bool insert(const Key& k) {
1119 operation_hints hints;
1120 return insert(k, hints);
1121 }
1122
1123 /**
1124 * Inserts the given key into this tree.
1125 */
1126 bool insert(const Key& k, operation_hints& hints) {
1127#ifdef IS_PARALLEL
1128
1129 // special handling for inserting first element
1130 while (root == nullptr) {
1131 // try obtaining root-lock
1132 if (!root_lock.try_start_write()) {
1133 // somebody else was faster => re-check
1134 continue;
1135 }
1136
1137 // check loop condition again
1138 if (root != nullptr) {
1139 // somebody else was faster => normal insert
1140 root_lock.end_write();
1141 break;
1142 }
1143
1144 // create new node
1145 leftmost = new leaf_node();
1146 leftmost->numElements = 1;
1147 leftmost->keys[0] = k;
1148 root = leftmost;
1149
1150 // operation complete => we can release the root lock
1151 root_lock.end_write();
1152
1153 hints.last_insert.access(leftmost);
1154
1155 return true;
1156 }
1157
1158 // insert using iterative implementation
1159
1160 node* cur = nullptr;
1161
1162 // test last insert hints
1163 lock_type::Lease cur_lease;
1164
1165 auto checkHint = [&](node* last_insert) {
1166 // ignore null pointer
1167 if (!last_insert) return false;
1168 // get a read lease on indicated node
1169 auto hint_lease = last_insert->lock.start_read();
1170 // check whether it covers the key
1171 if (!weak_covers(last_insert, k)) return false;
1172 // and if there was no concurrent modification
1173 if (!last_insert->lock.validate(hint_lease)) return false;
1174 // use hinted location
1175 cur = last_insert;
1176 // and keep lease
1177 cur_lease = hint_lease;
1178 // we found a hit
1179 return true;
1180 };
1181
1182 if (hints.last_insert.any(checkHint)) {
1183 // register this as a hit
1184 hint_stats.inserts.addHit();
1185 } else {
1186 // register this as a miss
1187 hint_stats.inserts.addMiss();
1188 }
1189
1190 // if there is no valid hint ..
1191 if (!cur) {
1192 do {
1193 // get root - access lock
1194 auto root_lease = root_lock.start_read();
1195
1196 // start with root
1197 cur = root;
1198
1199 // get lease of the next node to be accessed
1200 cur_lease = cur->lock.start_read();
1201
1202 // check validity of root pointer
1203 if (root_lock.end_read(root_lease)) {
1204 break;
1205 }
1206
1207 } while (true);
1208 }
1209
1210 while (true) {
1211 // handle inner nodes
1212 if (cur->inner) {
1213 auto a = &(cur->keys[0]);
1214 auto b = &(cur->keys[cur->numElements]);
1215
1216 auto pos = search.lower_bound(k, a, b, weak_comp);
1217 auto idx = pos - a;
1218
1219 // early exit for sets
1220 if (isSet && pos != b && weak_equal(*pos, k)) {
1221 // validate results
1222 if (!cur->lock.validate(cur_lease)) {
1223 // start over again
1224 return insert(k, hints);
1225 }
1226
1227 // update provenance information
1228 if (typeid(Comparator) != typeid(WeakComparator)) {
1229 if (!cur->lock.try_upgrade_to_write(cur_lease)) {
1230 // start again
1231 return insert(k, hints);
1232 }
1233 bool updated = update(*pos, k);
1234 cur->lock.end_write();
1235 return updated;
1236 }
1237
1238 // we found the element => no check of lock necessary
1239 return false;
1240 }
1241
1242 // get next pointer
1243 auto next = cur->getChild(idx);
1244
1245 // get lease on next level
1246 auto next_lease = next->lock.start_read();
1247
1248 // check whether there was a write
1249 if (!cur->lock.end_read(cur_lease)) {
1250 // start over
1251 return insert(k, hints);
1252 }
1253
1254 // go to next
1255 cur = next;
1256
1257 // move on lease
1258 cur_lease = next_lease;
1259
1260 continue;
1261 }
1262
1263 // the rest is for leaf nodes
1264 assert(!cur->inner);
1265
1266 // -- insert node in leaf node --
1267
1268 auto a = &(cur->keys[0]);
1269 auto b = &(cur->keys[cur->numElements]);
1270
1271 auto pos = search.upper_bound(k, a, b, weak_comp);
1272 auto idx = pos - a;
1273
1274 // early exit for sets
1275 if (isSet && pos != a && weak_equal(*(pos - 1), k)) {
1276 // validate result
1277 if (!cur->lock.validate(cur_lease)) {
1278 // start over again
1279 return insert(k, hints);
1280 }
1281
1282 // update provenance information
1283 if (typeid(Comparator) != typeid(WeakComparator)) {
1284 if (!cur->lock.try_upgrade_to_write(cur_lease)) {
1285 // start again
1286 return insert(k, hints);
1287 }
1288 bool updated = update(*(pos - 1), k);
1289 cur->lock.end_write();
1290 return updated;
1291 }
1292
1293 // we found the element => done
1294 return false;
1295 }
1296
1297 // upgrade to write-permission
1298 if (!cur->lock.try_upgrade_to_write(cur_lease)) {
1299 // something has changed => restart
1300 hints.last_insert.access(cur);
1301 return insert(k, hints);
1302 }
1303
1304 if (cur->numElements >= node::maxKeys) {
1305 // -- lock parents --
1306 auto priv = cur;
1307 auto parent = priv->parent;
1308 std::vector<node*> parents;
1309 do {
1310 if (parent) {
1311 parent->lock.start_write();
1312 while (true) {
1313 // check whether parent is correct
1314 if (parent == priv->parent) {
1315 break;
1316 }
1317 // switch parent
1318 parent->lock.abort_write();
1319 parent = priv->parent;
1320 parent->lock.start_write();
1321 }
1322 } else {
1323 // lock root lock => since cur is root
1324 root_lock.start_write();
1325 }
1326
1327 // record locked node
1328 parents.push_back(parent);
1329
1330 // stop at "sphere of influence"
1331 if (!parent || !parent->isFull()) {
1332 break;
1333 }
1334
1335 // go one step higher
1336 priv = parent;
1337 parent = parent->parent;
1338
1339 } while (true);
1340
1341 // split this node
1342 auto old_root = root;
1343 idx -= cur->rebalance_or_split(
1344 const_cast<node**>(&root), root_lock, static_cast<int>(idx), parents);
1345
1346 // release parent lock
1347 for (auto it = parents.rbegin(); it != parents.rend(); ++it) {
1348 auto parent = *it;
1349
1350 // release this lock
1351 if (parent) {
1352 parent->lock.end_write();
1353 } else {
1354 if (old_root != root) {
1355 root_lock.end_write();
1356 } else {
1357 root_lock.abort_write();
1358 }
1359 }
1360 }
1361
1362 // insert element in right fragment
1363 if (((size_type)idx) > cur->numElements) {
1364 // release current lock
1365 cur->lock.end_write();
1366
1367 // insert in sibling
1368 return insert(k, hints);
1369 }
1370 }
1371
1372 // ok - no split necessary
1373 assert(cur->numElements < node::maxKeys && "Split required!");
1374
1375 // move keys
1376 for (int j = static_cast<int>(cur->numElements); j > static_cast<int>(idx); --j) {
1377 cur->keys[j] = cur->keys[j - 1];
1378 }
1379
1380 // insert new element
1381 cur->keys[idx] = k;
1382 cur->numElements++;
1383
1384 // release lock on current node
1385 cur->lock.end_write();
1386
1387 // remember last insertion position
1388 hints.last_insert.access(cur);
1389 return true;
1390 }
1391
1392#else
1393 // special handling for inserting first element
1394 if (empty()) {
1395 // create new node
1396 leftmost = new leaf_node();
1397 leftmost->numElements = 1;
1398 leftmost->keys[0] = k;
1399 root = leftmost;
1400
1401 hints.last_insert.access(leftmost);
1402
1403 return true;
1404 }
1405
1406 // insert using iterative implementation
1407 node* cur = root;
1408
1409 auto checkHints = [&](node* last_insert) {
1410 if (!last_insert) return false;
1411 if (!weak_covers(last_insert, k)) return false;
1412 cur = last_insert;
1413 return true;
1414 };
1415
1416 // test last insert
1417 if (hints.last_insert.any(checkHints)) {
1418 hint_stats.inserts.addHit();
1419 } else {
1420 hint_stats.inserts.addMiss();
1421 }
1422
1423 while (true) {
1424 // handle inner nodes
1425 if (cur->inner) {
1426 auto a = &(cur->keys[0]);
1427 auto b = &(cur->keys[cur->numElements]);
1428
1429 auto pos = search.lower_bound(k, a, b, weak_comp);
1430 auto idx = pos - a;
1431
1432 // early exit for sets
1433 if (isSet && pos != b && weak_equal(*pos, k)) {
1434 // update provenance information
1435 if (typeid(Comparator) != typeid(WeakComparator)) {
1436 return update(*pos, k);
1437 }
1438
1439 return false;
1440 }
1441
1442 cur = cur->getChild(idx);
1443 continue;
1444 }
1445
1446 // the rest is for leaf nodes
1447 assert(!cur->inner);
1448
1449 // -- insert node in leaf node --
1450
1451 auto a = &(cur->keys[0]);
1452 auto b = &(cur->keys[cur->numElements]);
1453
1454 auto pos = search.upper_bound(k, a, b, weak_comp);
1455 auto idx = pos - a;
1456
1457 // early exit for sets
1458 if (isSet && pos != a && weak_equal(*(pos - 1), k)) {
1459 // update provenance information
1460 if (typeid(Comparator) != typeid(WeakComparator)) {
1461 return update(*(pos - 1), k);
1462 }
1463
1464 return false;
1465 }
1466
1467 if (cur->numElements >= node::maxKeys) {
1468 // split this node
1469 idx -= cur->rebalance_or_split(&root, root_lock, static_cast<int>(idx));
1470
1471 // insert element in right fragment
1472 if (((size_type)idx) > cur->numElements) {
1473 idx -= cur->numElements + 1;
1474 cur = cur->parent->getChild(cur->position + 1);
1475 }
1476 }
1477
1478 // ok - no split necessary
1479 assert(cur->numElements < node::maxKeys && "Split required!");
1480
1481 // move keys
1482 for (int j = static_cast<int>(cur->numElements); j > idx; --j) {
1483 cur->keys[j] = cur->keys[j - 1];
1484 }
1485
1486 // insert new element
1487 cur->keys[idx] = k;
1488 cur->numElements++;
1489
1490 // remember last insertion position
1491 hints.last_insert.access(cur);
1492
1493 return true;
1494 }
1495#endif
1496 }
1497
1498 /**
1499 * Inserts the given range of elements into this tree.
1500 */
1501 template <typename Iter>
1502 void insert(const Iter& a, const Iter& b) {
1503 // TODO: improve this beyond a naive insert
1504 operation_hints hints;
1505 // a naive insert so far .. seems to work fine
1506 for (auto it = a; it != b; ++it) {
1507 // use insert with hint
1508 insert(*it, hints);
1509 }
1510 }
1511
1512 // Obtains an iterator referencing the first element of the tree.
1513 iterator begin() const {
1514 return iterator(leftmost, 0);
1515 }
1516
1517 // Obtains an iterator referencing the position after the last element of the tree.
1518 iterator end() const {
1519 return iterator();
1520 }
1521
1522 /**
1523 * Partitions the full range of this set into up to a given number of chunks.
1524 * The chunks will cover approximately the same number of elements. Also, the
1525 * number of chunks will only approximate the desired number of chunks.
1526 *
1527 * @param num .. the number of chunks requested
1528 * @return a list of chunks partitioning this tree
1529 */
1530 std::vector<chunk> partition(size_type num) const {
1531 return getChunks(num);
1532 }
1533
1534 std::vector<chunk> getChunks(size_type num) const {
1535 std::vector<chunk> res;
1536 if (empty()) {
1537 return res;
1538 }
1539 return root->collectChunks(res, num, begin(), end());
1540 }
1541
1542 /**
1543 * Determines whether the given element is a member of this tree.
1544 */
1545 bool contains(const Key& k) const {
1546 operation_hints hints;
1547 return contains(k, hints);
1548 }
1549
1550 /**
1551 * Determines whether the given element is a member of this tree.
1552 */
1553 bool contains(const Key& k, operation_hints& hints) const {
1554 return find(k, hints) != end();
1555 }
1556
1557 /**
1558 * Locates the given key within this tree and returns an iterator
1559 * referencing its position. If not found, an end-iterator will be returned.
1560 */
1561 iterator find(const Key& k) const {
1562 operation_hints hints;
1563 return find(k, hints);
1564 }
1565
1566 /**
1567 * Locates the given key within this tree and returns an iterator
1568 * referencing its position. If not found, an end-iterator will be returned.
1569 */
1570 iterator find(const Key& k, operation_hints& hints) const {
1571 if (empty()) {
1572 return end();
1573 }
1574
1575 node* cur = root;
1576
1577 auto checkHints = [&](node* last_find_end) {
1578 if (!last_find_end) return false;
1579 if (!covers(last_find_end, k)) return false;
1580 cur = last_find_end;
1581 return true;
1582 };
1583
1584 // test last location searched (temporal locality)
1585 if (hints.last_find_end.any(checkHints)) {
1586 // register it as a hit
1587 hint_stats.contains.addHit();
1588 } else {
1589 // register it as a miss
1590 hint_stats.contains.addMiss();
1591 }
1592
1593 // an iterative implementation (since 2/7 faster than recursive)
1594
1595 while (true) {
1596 auto a = &(cur->keys[0]);
1597 auto b = &(cur->keys[cur->numElements]);
1598
1599 auto pos = search(k, a, b, comp);
1600
1601 if (pos < b && equal(*pos, k)) {
1602 hints.last_find_end.access(cur);
1603 return iterator(cur, static_cast<field_index_type>(pos - a));
1604 }
1605
1606 if (!cur->inner) {
1607 hints.last_find_end.access(cur);
1608 return end();
1609 }
1610
1611 // continue search in child node
1612 cur = cur->getChild(pos - a);
1613 }
1614 }
1615
1616 /**
1617 * Obtains a lower boundary for the given key -- hence an iterator referencing
1618 * the smallest value that is not less the given key. If there is no such element,
1619 * an end-iterator will be returned.
1620 */
1621 iterator lower_bound(const Key& k) const {
1622 operation_hints hints;
1623 return lower_bound(k, hints);
1624 }
1625
1626 /**
1627 * Obtains a lower boundary for the given key -- hence an iterator referencing
1628 * the smallest value that is not less the given key. If there is no such element,
1629 * an end-iterator will be returned.
1630 */
1631 iterator lower_bound(const Key& k, operation_hints& hints) const {
1632 if (empty()) {
1633 return end();
1634 }
1635
1636 node* cur = root;
1637
1638 auto checkHints = [&](node* last_lower_bound_end) {
1639 if (!last_lower_bound_end) return false;
1640 if (!covers(last_lower_bound_end, k)) return false;
1641 cur = last_lower_bound_end;
1642 return true;
1643 };
1644
1645 // test last searched node
1646 if (hints.last_lower_bound_end.any(checkHints)) {
1647 hint_stats.lower_bound.addHit();
1648 } else {
1649 hint_stats.lower_bound.addMiss();
1650 }
1651
1652 iterator res = end();
1653 while (true) {
1654 auto a = &(cur->keys[0]);
1655 auto b = &(cur->keys[cur->numElements]);
1656
1657 auto pos = search.lower_bound(k, a, b, comp);
1658 auto idx = static_cast<field_index_type>(pos - a);
1659
1660 if (!cur->inner) {
1661 hints.last_lower_bound_end.access(cur);
1662 return (pos != b) ? iterator(cur, idx) : res;
1663 }
1664
1665 if (isSet && pos != b && equal(*pos, k)) {
1666 return iterator(cur, idx);
1667 }
1668
1669 if (pos != b) {
1670 res = iterator(cur, idx);
1671 }
1672
1673 cur = cur->getChild(idx);
1674 }
1675 }
1676
1677 /**
1678 * Obtains an upper boundary for the given key -- hence an iterator referencing
1679 * the first element that the given key is less than the referenced value. If
1680 * there is no such element, an end-iterator will be returned.
1681 */
1682 iterator upper_bound(const Key& k) const {
1683 operation_hints hints;
1684 return upper_bound(k, hints);
1685 }
1686
1687 /**
1688 * Obtains an upper boundary for the given key -- hence an iterator referencing
1689 * the first element that the given key is less than the referenced value. If
1690 * there is no such element, an end-iterator will be returned.
1691 */
1692 iterator upper_bound(const Key& k, operation_hints& hints) const {
1693 if (empty()) {
1694 return end();
1695 }
1696
1697 node* cur = root;
1698
1699 auto checkHints = [&](node* last_upper_bound_end) {
1700 if (!last_upper_bound_end) return false;
1701 if (!coversUpperBound(last_upper_bound_end, k)) return false;
1702 cur = last_upper_bound_end;
1703 return true;
1704 };
1705
1706 // test last search node
1707 if (hints.last_upper_bound_end.any(checkHints)) {
1708 hint_stats.upper_bound.addHit();
1709 } else {
1710 hint_stats.upper_bound.addMiss();
1711 }
1712
1713 iterator res = end();
1714 while (true) {
1715 auto a = &(cur->keys[0]);
1716 auto b = &(cur->keys[cur->numElements]);
1717
1718 auto pos = search.upper_bound(k, a, b, comp);
1719 auto idx = static_cast<field_index_type>(pos - a);
1720
1721 if (!cur->inner) {
1722 hints.last_upper_bound_end.access(cur);
1723 return (pos != b) ? iterator(cur, idx) : res;
1724 }
1725
1726 if (pos != b) {
1727 res = iterator(cur, idx);
1728 }
1729
1730 cur = cur->getChild(idx);
1731 }
1732 }
1733
1734 /**
1735 * Clears this tree.
1736 */
1737 void clear() {
1738 if (root != nullptr) {
1739 if (root->isLeaf()) {
1740 delete static_cast<leaf_node*>(root);
1741 } else {
1742 delete static_cast<inner_node*>(root);
1743 }
1744 }
1745 root = nullptr;
1746 leftmost = nullptr;
1747 }
1748
1749 /**
1750 * Swaps the content of this tree with the given tree. This
1751 * is a much more efficient operation than creating a copy and
1752 * realizing the swap utilizing assignment operations.
1753 */
1754 void swap(btree& other) {
1755 // swap the content
1756 std::swap(root, other.root);
1757 std::swap(leftmost, other.leftmost);
1758 }
1759
1760 // Implementation of the assignment operation for trees.
1761 btree& operator=(const btree& other) {
1762 // check identity
1763 if (this == &other) {
1764 return *this;
1765 }
1766
1767 // create a deep-copy of the content of the other tree
1768 // shortcut for empty sets
1769 if (other.empty()) {
1770 return *this;
1771 }
1772
1773 // clone content (deep copy)
1774 root = other.root->clone();
1775
1776 // update leftmost reference
1777 auto tmp = root;
1778 while (!tmp->isLeaf()) {
1779 tmp = tmp->getChild(0);
1780 }
1781 leftmost = static_cast<leaf_node*>(tmp);
1782
1783 // done
1784 return *this;
1785 }
1786
1787 // Implementation of an equality operation for trees.
1788 bool operator==(const btree& other) const {
1789 // check identity
1790 if (this == &other) {
1791 return true;
1792 }
1793
1794 // check size
1795 if (size() != other.size()) {
1796 return false;
1797 }
1798 if (size() < other.size()) {
1799 return other == *this;
1800 }
1801
1802 // check content
1803 for (const auto& key : other) {
1804 if (!contains(key)) {
1805 return false;
1806 }
1807 }
1808 return true;
1809 }
1810
1811 // Implementation of an inequality operation for trees.
1812 bool operator!=(const btree& other) const {
1813 return !(*this == other);
1814 }
1815
1816 // -- for debugging --
1817
1818 // Determines the number of levels contained in this tree.
1819 size_type getDepth() const {
1820 return (empty()) ? 0 : root->getDepth();
1821 }
1822
1823 // Determines the number of nodes contained in this tree.
1824 size_type getNumNodes() const {
1825 return (empty()) ? 0 : root->countNodes();
1826 }
1827
1828 // Determines the amount of memory used by this data structure
1829 size_type getMemoryUsage() const {
1830 return sizeof(*this) + (empty() ? 0 : root->getMemoryUsage());
1831 }
1832
1833 /*
1834 * Prints a textual representation of this tree to the given
1835 * output stream (mostly for debugging and tuning).
1836 */
1837 void printTree(std::ostream& out = std::cout) const {
1838 out << "B-Tree with " << size() << " elements:\n";
1839 if (empty()) {
1840 out << " - empty - \n";
1841 } else {
1842 root->printTree(out, "");
1843 }
1844 }
1845
1846 /**
1847 * Prints a textual summary of statistical properties of this
1848 * tree to the given output stream (for debugging and tuning).
1849 */
1850 void printStats(std::ostream& out = std::cout) const {
1851 auto nodes = getNumNodes();
1852 out << " ---------------------------------\n";
1853 out << " Elements: " << size() << "\n";
1854 out << " Depth: " << (empty() ? 0 : root->getDepth()) << "\n";
1855 out << " Nodes: " << nodes << "\n";
1856 out << " ---------------------------------\n";
1857 out << " Size of inner node: " << sizeof(inner_node) << "\n";
1858 out << " Size of leaf node: " << sizeof(leaf_node) << "\n";
1859 out << " Size of Key: " << sizeof(Key) << "\n";
1860 out << " max keys / node: " << node::maxKeys << "\n";
1861 out << " avg keys / node: " << (size() / (double)nodes) << "\n";
1862 out << " avg filling rate: " << ((size() / (double)nodes) / node::maxKeys) << "\n";
1863 out << " ---------------------------------\n";
1864 out << " insert-hint (hits/misses/total): " << hint_stats.inserts.getHits() << "/"
1865 << hint_stats.inserts.getMisses() << "/" << hint_stats.inserts.getAccesses() << "\n";
1866 out << " contains-hint(hits/misses/total):" << hint_stats.contains.getHits() << "/"
1867 << hint_stats.contains.getMisses() << "/" << hint_stats.contains.getAccesses() << "\n";
1868 out << " lower-bound-hint (hits/misses/total):" << hint_stats.lower_bound.getHits() << "/"
1869 << hint_stats.lower_bound.getMisses() << "/" << hint_stats.lower_bound.getAccesses() << "\n";
1870 out << " upper-bound-hint (hits/misses/total):" << hint_stats.upper_bound.getHits() << "/"
1871 << hint_stats.upper_bound.getMisses() << "/" << hint_stats.upper_bound.getAccesses() << "\n";
1872 out << " ---------------------------------\n";
1873 }
1874
1875 /**
1876 * Checks the consistency of this tree.
1877 */
1878 bool check() {
1879 auto ok = empty() || root->check(comp, root);
1880 if (!ok) {
1881 printTree();
1882 }
1883 return ok;
1884 }
1885
1886 /**
1887 * A static member enabling the bulk-load of ordered data into an empty
1888 * tree. This function is much more efficient in creating a index over
1889 * an ordered set of elements than an iterative insertion of values.
1890 *
1891 * @tparam Iter .. the type of iterator specifying the range
1892 * it must be a random-access iterator
1893 */
1894 template <typename R, typename Iter>
1895 static typename std::enable_if<std::is_same<typename std::iterator_traits<Iter>::iterator_category,
1896 std::random_access_iterator_tag>::value,
1897 R>::type
1898 load(const Iter& a, const Iter& b) {
1899 // quick exit - empty range
1900 if (a == b) {
1901 return R();
1902 }
1903
1904 // resolve tree recursively
1905 auto root = buildSubTree(a, b - 1);
1906
1907 // find leftmost node
1908 node* leftmost = root;
1909 while (!leftmost->isLeaf()) {
1910 leftmost = leftmost->getChild(0);
1911 }
1912
1913 // build result
1914 return R(b - a, root, static_cast<leaf_node*>(leftmost));
1915 }
1916
1917protected:
1918 /**
1919 * Determines whether the range covered by the given node is also
1920 * covering the given key value.
1921 */
1922 bool covers(const node* node, const Key& k) const {
1923 if (isSet) {
1924 // in sets we can include the ends as covered elements
1925 return !node->isEmpty() && !less(k, node->keys[0]) && !less(node->keys[node->numElements - 1], k);
1926 }
1927 // in multi-sets the ends may not be completely covered
1928 return !node->isEmpty() && less(node->keys[0], k) && less(k, node->keys[node->numElements - 1]);
1929 }
1930
1931 /**
1932 * Determines whether the range covered by the given node is also
1933 * covering the given key value.
1934 */
1935 bool weak_covers(const node* node, const Key& k) const {
1936 if (isSet) {
1937 // in sets we can include the ends as covered elements
1938 return !node->isEmpty() && !weak_less(k, node->keys[0]) &&
1939 !weak_less(node->keys[node->numElements - 1], k);
1940 }
1941 // in multi-sets the ends may not be completely covered
1942 return !node->isEmpty() && weak_less(node->keys[0], k) &&
1943 weak_less(k, node->keys[node->numElements - 1]);
1944 }
1945
1946private:
1947 /**
1948 * Determines whether the range covered by this node covers
1949 * the upper bound of the given key.
1950 */
1951 bool coversUpperBound(const node* node, const Key& k) const {
1952 // ignore edges
1953 return !node->isEmpty() && !less(k, node->keys[0]) && less(k, node->keys[node->numElements - 1]);
1954 }
1955
1956 // Utility function for the load operation above.
1957 template <typename Iter>
1958 static node* buildSubTree(const Iter& a, const Iter& b) {
1959 const int N = node::maxKeys;
1960
1961 // divide range in N+1 sub-ranges
1962 int64_t length = (b - a) + 1;
1963
1964 // terminal case: length is less then maxKeys
1965 if (length <= N) {
1966 // create a leaf node
1967 node* res = new leaf_node();
1968 res->numElements = length;
1969
1970 for (int i = 0; i < length; ++i) {
1971 res->keys[i] = a[i];
1972 }
1973
1974 return res;
1975 }
1976
1977 // recursive case - compute step size
1978 int numKeys = N;
1979 int64_t step = ((length - numKeys) / (numKeys + 1));
1980
1981 while (numKeys > 1 && (step < N / 2)) {
1982 numKeys--;
1983 step = ((length - numKeys) / (numKeys + 1));
1984 }
1985
1986 // create inner node
1987 node* res = new inner_node();
1988 res->numElements = numKeys;
1989
1990 Iter c = a;
1991 for (int i = 0; i < numKeys; i++) {
1992 // get dividing key
1993 res->keys[i] = c[step];
1994
1995 // get sub-tree
1996 auto child = buildSubTree(c, c + (step - 1));
1997 child->parent = res;
1998 child->position = i;
1999 res->getChildren()[i] = child;
2000
2001 c = c + (step + 1);
2002 }
2003
2004 // and the remaining part
2005 auto child = buildSubTree(c, b);
2006 child->parent = res;
2007 child->position = numKeys;
2008 res->getChildren()[numKeys] = child;
2009
2010 // done
2011 return res;
2012 }
2013}; // namespace souffle
2014
2015// Instantiation of static member search.
2016template <typename Key, typename Comparator, typename Allocator, unsigned blockSize, typename SearchStrategy,
2017 bool isSet, typename WeakComparator, typename Updater>
2018const SearchStrategy
2019 btree<Key, Comparator, Allocator, blockSize, SearchStrategy, isSet, WeakComparator, Updater>::search;
2020
2021} // end namespace detail
2022
2023/**
2024 * A b-tree based set implementation.
2025 *
2026 * @tparam Key .. the element type to be stored in this set
2027 * @tparam Comparator .. a class defining an order on the stored elements
2028 * @tparam Allocator .. utilized for allocating memory for required nodes
2029 * @tparam blockSize .. determines the number of bytes/block utilized by leaf nodes
2030 * @tparam SearchStrategy .. enables switching between linear, binary or any other search strategy
2031 */
2032template <typename Key, typename Comparator = detail::comparator<Key>,
2033 typename Allocator = std::allocator<Key>, // is ignored so far
2034 unsigned blockSize = 256,
2035 typename SearchStrategy = typename souffle::detail::default_strategy<Key>::type,
2036 typename WeakComparator = Comparator, typename Updater = souffle::detail::updater<Key>>
2037class btree_set : public souffle::detail::btree<Key, Comparator, Allocator, blockSize, SearchStrategy, true,
2038 WeakComparator, Updater> {
2039 using super = souffle::detail::btree<Key, Comparator, Allocator, blockSize, SearchStrategy, true,
2040 WeakComparator, Updater>;
2041
2042 friend class souffle::detail::btree<Key, Comparator, Allocator, blockSize, SearchStrategy, true,
2043 WeakComparator, Updater>;
2044
2045public:
2046 /**
2047 * A default constructor creating an empty set.
2048 */
2049 btree_set(const Comparator& comp = Comparator(), const WeakComparator& weak_comp = WeakComparator())
2050 : super(comp, weak_comp) {}
2051
2052 /**
2053 * A constructor creating a set based on the given range.
2054 */
2055 template <typename Iter>
2056 btree_set(const Iter& a, const Iter& b) {
2057 this->insert(a, b);
2058 }
2059
2060 // A copy constructor.
2061 btree_set(const btree_set& other) : super(other) {}
2062
2063 // A move constructor.
2064 btree_set(btree_set&& other) : super(std::move(other)) {}
2065
2066private:
2067 // A constructor required by the bulk-load facility.
2068 template <typename s, typename n, typename l>
2069 btree_set(s size, n* root, l* leftmost) : super(size, root, leftmost) {}
2070
2071public:
2072 // Support for the assignment operator.
2073 btree_set& operator=(const btree_set& other) {
2074 super::operator=(other);
2075 return *this;
2076 }
2077
2078 // Support for the bulk-load operator.
2079 template <typename Iter>
2080 static btree_set load(const Iter& a, const Iter& b) {
2081 return super::template load<btree_set>(a, b);
2082 }
2083};
2084
2085/**
2086 * A b-tree based multi-set implementation.
2087 *
2088 * @tparam Key .. the element type to be stored in this set
2089 * @tparam Comparator .. a class defining an order on the stored elements
2090 * @tparam Allocator .. utilized for allocating memory for required nodes
2091 * @tparam blockSize .. determines the number of bytes/block utilized by leaf nodes
2092 * @tparam SearchStrategy .. enables switching between linear, binary or any other search strategy
2093 */
2094template <typename Key, typename Comparator = detail::comparator<Key>,
2095 typename Allocator = std::allocator<Key>, // is ignored so far
2096 unsigned blockSize = 256,
2097 typename SearchStrategy = typename souffle::detail::default_strategy<Key>::type,
2098 typename WeakComparator = Comparator, typename Updater = souffle::detail::updater<Key>>
2099class btree_multiset : public souffle::detail::btree<Key, Comparator, Allocator, blockSize, SearchStrategy,
2100 false, WeakComparator, Updater> {
2101 using super = souffle::detail::btree<Key, Comparator, Allocator, blockSize, SearchStrategy, false,
2102 WeakComparator, Updater>;
2103
2104 friend class souffle::detail::btree<Key, Comparator, Allocator, blockSize, SearchStrategy, false,
2105 WeakComparator, Updater>;
2106
2107public:
2108 /**
2109 * A default constructor creating an empty set.
2110 */
2111 btree_multiset(const Comparator& comp = Comparator(), const WeakComparator& weak_comp = WeakComparator())
2112 : super(comp, weak_comp) {}
2113
2114 /**
2115 * A constructor creating a set based on the given range.
2116 */
2117 template <typename Iter>
2118 btree_multiset(const Iter& a, const Iter& b) {
2119 this->insert(a, b);
2120 }
2121
2122 // A copy constructor.
2123 btree_multiset(const btree_multiset& other) : super(other) {}
2124
2125 // A move constructor.
2126 btree_multiset(btree_multiset&& other) : super(std::move(other)) {}
2127
2128private:
2129 // A constructor required by the bulk-load facility.
2130 template <typename s, typename n, typename l>
2131 btree_multiset(s size, n* root, l* leftmost) : super(size, root, leftmost) {}
2132
2133public:
2134 // Support for the assignment operator.
2135 btree_multiset& operator=(const btree_multiset& other) {
2136 super::operator=(other);
2137 return *this;
2138 }
2139
2140 // Support for the bulk-load operator.
2141 template <typename Iter>
2142 static btree_multiset load(const Iter& a, const Iter& b) {
2143 return super::template load<btree_multiset>(a, b);
2144 }
2145};
2146
2147} // end of namespace souffle