package org.apache.cassandra.utils.btree; import java.util.Arrays; import java.util.Collection; import java.util.Comparator; import com.google.common.base.Function; import com.google.common.collect.Collections2; public class BTree { /** * Leaf Nodes are a raw array of values: Object[V1, V1, ...,]. * * Branch Nodes: Object[V1, V2, ..., child[<V1.key], child[<V2.key], ..., child[< Inf]], where * each child is another node, i.e., an Object[]. Thus, the value elements in a branch node are the * first half of the array, rounding down. In our implementation, each value must include its own key; * we access these via Comparator, rather than directly. * * So we can quickly distinguish between leaves and branches, we require that leaf nodes are always even number * of elements (padded with a null, if necessary), and branches are always an odd number of elements. * * BTrees are immutable; updating one returns a new tree that reuses unmodified nodes. * * There are no references back to a parent node from its children. (This would make it impossible to re-use * subtrees when modifying the tree, since the modified tree would need new parent references.) * Instead, we store these references in a Path as needed when navigating the tree. */ // The maximum fan factor used for B-Trees static final int FAN_SHIFT; static { int fanfactor = 32; if (System.getProperty("cassandra.btree.fanfactor") != null) fanfactor = Integer.parseInt(System.getProperty("cassandra.btree.fanfactor")); int shift = 1; while (1 << shift < fanfactor) shift += 1; FAN_SHIFT = shift; } // NB we encode Path indexes as Bytes, so this needs to be less than Byte.MAX_VALUE / 2 static final int FAN_FACTOR = 1 << FAN_SHIFT; static final int QUICK_MERGE_LIMIT = Math.min(FAN_FACTOR, 16) * 2; // Maximum depth of any B-Tree. In reality this is just an arbitrary limit, and is currently imposed on iterators only, // but a maximum depth sufficient to store at worst Integer.MAX_VALUE items seems reasonable // 2^n = (2^k).(2^(n/k)) => 2^31 <= 2^(FAN_SHIFT-1) . 2^ceil(31 / (FAN_SHIFT - 1)) static final int MAX_DEPTH = (int) Math.ceil(31d / (FAN_SHIFT - 1)); // An empty BTree Leaf - which is the same as an empty BTree static final Object[] EMPTY_LEAF = new Object[0]; // An empty BTree branch - used only for internal purposes in Modifier static final Object[] EMPTY_BRANCH = new Object[1]; /** * Returns an empty BTree * * @return */ public static Object[] empty() { return EMPTY_LEAF; } /** * Creates a BTree containing all of the objects in the provided collection * * @param source the items to build the tree with * @param comparator the comparator that defines the ordering over the items in the tree * @param sorted if false, the collection will be copied and sorted to facilitate construction * @param <V> * @return */ public static <V> Object[] build(Collection<V> source, Comparator<V> comparator, boolean sorted) { int size = source.size(); if (size < FAN_FACTOR) { // pad to even length to match contract that all leaf nodes are even V[] values = source.toArray((V[]) new Object[size + (size & 1)]); // inline sorting since we're already calling toArray if (!sorted) Arrays.sort(values, 0, size, comparator); return values; } if (!sorted) source = sorted(source, comparator, size); return modifier.get().build(source, size); } /** * Returns a new BTree with the provided set inserting/replacing as necessary any equal items * * @param btree the tree to update * @param comparator the comparator that defines the ordering over the items in the tree * @param updateWith the items to either insert / update * @param updateWithIsSorted if false, updateWith will be copied and sorted to facilitate construction * @param <V> * @return */ public static <V> Object[] update(Object[] btree, Comparator<V> comparator, Collection<V> updateWith, boolean updateWithIsSorted) { return update(btree, comparator, updateWith, updateWithIsSorted, null, null); } /** * Returns a new BTree with the provided set inserting/replacing as necessary any equal items * * @param btree the tree to update * @param comparator the comparator that defines the ordering over the items in the tree * @param updateWith the items to either insert / update * @param updateWithIsSorted if false, updateWith will be copied and sorted to facilitate construction * @param replaceF a function to apply to a pair we are swapping * @param terminateEarly a function that returns Boolean.TRUE if we should terminate before finishing our work. * the argument to terminateEarly is ignored. * @param <V> * @return */ public static <V> Object[] update(Object[] btree, Comparator<V> comparator, Collection<V> updateWith, boolean updateWithIsSorted, ReplaceFunction<V> replaceF, Function<?, Boolean> terminateEarly) { if (btree.length == 0) { if (replaceF != null) updateWith = Collections2.transform(updateWith, replaceF); return build(updateWith, comparator, updateWithIsSorted); } if (!updateWithIsSorted) updateWith = sorted(updateWith, comparator, updateWith.size()); // if the b-tree is just a single root node, we can try a quick in-place merge if (isLeaf(btree) && btree.length + updateWith.size() < QUICK_MERGE_LIMIT) { // since updateWith is sorted, we can skip elements from earlier iterations tracked by this offset int btreeOffset = 0; int keyEnd = getLeafKeyEnd(btree); Object[] merged = new Object[QUICK_MERGE_LIMIT]; int mergedCount = 0; for (V v : updateWith) { // find the index i where v would belong in the original btree int i = find(comparator, v, btree, btreeOffset, keyEnd); boolean found = i >= 0; if (!found) i = -i - 1; // copy original elements up to i into the merged array int count = i - btreeOffset; if (count > 0) { System.arraycopy(btree, btreeOffset, merged, mergedCount, count); mergedCount += count; btreeOffset = i; } if (found) { // apply replaceF if it matches an existing element btreeOffset++; if (replaceF != null) v = replaceF.apply((V) btree[i], v); } else if (replaceF != null) { // new element but still need to apply replaceF to handle indexing and size-tracking v = replaceF.apply(v); } merged[mergedCount++] = v; } // copy any remaining original elements if (btreeOffset < keyEnd) { int count = keyEnd - btreeOffset; System.arraycopy(btree, btreeOffset, merged, mergedCount, count); mergedCount += count; } if (mergedCount > FAN_FACTOR) { // TODO this code will never execute since QUICK_MERGE_LIMIT == FAN_FACTOR int mid = (mergedCount >> 1) & ~1; // divide by two, rounding down to an even number return new Object[] { merged[mid], Arrays.copyOfRange(merged, 0, mid), Arrays.copyOfRange(merged, 1 + mid, mergedCount + ((mergedCount + 1) & 1)), }; } return Arrays.copyOfRange(merged, 0, mergedCount + (mergedCount & 1)); } return modifier.get().update(btree, comparator, updateWith, replaceF, terminateEarly); } /** * Returns an Iterator over the entire tree * * @param btree the tree to iterate over * @param forwards if false, the iterator will start at the end and move backwards * @param <V> * @return */ public static <V> Cursor<V> slice(Object[] btree, boolean forwards) { Cursor<V> r = Cursor.newCursor(); r.reset(btree, forwards); return r; } /** * Returns an Iterator over a sub-range of the tree * * @param btree the tree to iterate over * @param comparator the comparator that defines the ordering over the items in the tree * @param start the first item to include * @param end the last item to include * @param forwards if false, the iterator will start at end and move backwards * @param <V> * @return */ public static <V> Cursor<V> slice(Object[] btree, Comparator<V> comparator, V start, V end, boolean forwards) { Cursor<V> r = Cursor.newCursor(); r.reset(btree, comparator, start, end, forwards); return r; } /** * Returns an Iterator over a sub-range of the tree * * @param btree the tree to iterate over * @param comparator the comparator that defines the ordering over the items in the tree * @param start the first item to include * @param end the last item to include * @param forwards if false, the iterator will start at end and move backwards * @param <V> * @return */ public static <V> Cursor<V> slice(Object[] btree, Comparator<V> comparator, V start, boolean startInclusive, V end, boolean endInclusive, boolean forwards) { Cursor<V> r = Cursor.newCursor(); r.reset(btree, comparator, start, startInclusive, end, endInclusive, forwards); return r; } public static <V> V find(Object[] node, Comparator<V> comparator, V find) { while (true) { int keyEnd = getKeyEnd(node); int i = BTree.find(comparator, find, node, 0, keyEnd); if (i >= 0) { return (V) node[i]; } else if (!isLeaf(node)) { i = -i - 1; node = (Object[]) node[keyEnd + i]; } else { return null; } } } // UTILITY METHODS // same basic semantics as Arrays.binarySearch, but delegates to compare() method to avoid // wrapping generic Comparator with support for Special +/- infinity sentinels static <V> int find(Comparator<V> comparator, Object key, Object[] a, final int fromIndex, final int toIndex) { // attempt to terminate quickly by checking the first element, // as many uses of this class will (probably) be updating identical sets if (fromIndex >= toIndex) return -(fromIndex + 1); int c = compare(comparator, key, a[fromIndex]); if (c <= 0) { if (c == 0) return fromIndex; else return -(fromIndex + 1); } int low = fromIndex + 1; int high = toIndex - 1; while (low <= high) { int mid = (low + high) / 2; int cmp = compare(comparator, key, a[mid]); if (cmp > 0) low = mid + 1; else if (cmp < 0) high = mid - 1; else return mid; // key found } return -(low + 1); // key not found. } // get the upper bound we should search in for keys in the node static int getKeyEnd(Object[] node) { if (isLeaf(node)) return getLeafKeyEnd(node); else return getBranchKeyEnd(node); } // get the last index that is non-null in the leaf node static int getLeafKeyEnd(Object[] node) { int len = node.length; if (len == 0) return 0; else if (node[len - 1] == null) return len - 1; else return len; } // return the boundary position between keys/children for the branch node static int getBranchKeyEnd(Object[] node) { return node.length / 2; } // returns true if the provided node is a leaf, false if it is a branch static boolean isLeaf(Object[] node) { return (node.length & 1) == 0; } // Special class for making certain operations easier, so we can define a +/- Inf private static interface Special extends Comparable<Object> { } static final Special POSITIVE_INFINITY = new Special() { public int compareTo(Object o) { return o == this ? 0 : 1; } }; static final Special NEGATIVE_INFINITY = new Special() { public int compareTo(Object o) { return o == this ? 0 : -1; } }; private static final ThreadLocal<Builder> modifier = new ThreadLocal<Builder>() { @Override protected Builder initialValue() { return new Builder(); } }; // return a sorted collection private static <V> Collection<V> sorted(Collection<V> collection, Comparator<V> comparator, int size) { V[] vs = collection.toArray((V[]) new Object[size]); Arrays.sort(vs, comparator); return Arrays.asList(vs); } /** simple static wrapper to calls to cmp.compare() which checks if either a or b are Special (i.e. represent an infinity) */ // TODO : cheaper to check for POSITIVE/NEGATIVE infinity in callers, rather than here static <V> int compare(Comparator<V> cmp, Object a, Object b) { if (a instanceof Special) return ((Special) a).compareTo(b); if (b instanceof Special) return -((Special) b).compareTo(a); return cmp.compare((V) a, (V) b); } public static boolean isWellFormed(Object[] btree) { return isWellFormed(null, btree, true, NEGATIVE_INFINITY, POSITIVE_INFINITY); } public static boolean isWellFormed(Object[] btree, Comparator<? extends Object> cmp) { return isWellFormed(cmp, btree, true, NEGATIVE_INFINITY, POSITIVE_INFINITY); } private static boolean isWellFormed(Comparator<?> cmp, Object[] node, boolean isRoot, Object min, Object max) { if (cmp != null && !isNodeWellFormed(cmp, node, min, max)) return false; if (isLeaf(node)) { if (isRoot) return node.length <= FAN_FACTOR; return node.length >= FAN_FACTOR / 2 && node.length <= FAN_FACTOR; } int type = 0; int childOffset = getBranchKeyEnd(node); // compare each child node with the branch element at the head of this node it corresponds with for (int i = childOffset; i < node.length; i++) { Object[] child = (Object[]) node[i]; Object localmax = i < node.length - 1 ? node[i - childOffset] : max; if (!isWellFormed(cmp, child, false, min, localmax)) return false; type |= isLeaf(child) ? 1 : 2; min = localmax; } return type < 3; // either all leaves or all branches but not a mix } private static boolean isNodeWellFormed(Comparator<?> cmp, Object[] node, Object min, Object max) { Object previous = min; int end = getKeyEnd(node); for (int i = 0; i < end; i++) { Object current = node[i]; if (compare(cmp, previous, current) >= 0) return false; previous = current; } return compare(cmp, previous, max) < 0; } }