BTree.java example

Explorer
Electric8-master
- com
  - sun
    - electric
/* -*- tab-width: 4 -*-
 *
 * Electric(tm) VLSI Design System
 *
 * File: BTree.java
 *
 * Copyright (c) 2009 Sun Microsystems and Static Free Software
 *
 * Electric(tm) is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 3 of the License, or
 * (at your option) any later version.
 *
 * Electric(tm) is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Electric(tm); see the file COPYING.  If not, write to
 * the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
 * Boston, Mass 02111-1307, USA.
 */
package com.sun.electric.database.geometry.btree;

import java.io.*;
import java.util.*;
import com.sun.electric.database.geometry.btree.unboxed.*;
import com.sun.electric.database.geometry.btree.CachingPageStorage.CachedPage;

/**
 *  A <a href=http://www.youtube.com/watch?v=coRJrcIYbF4>B+Tree</a>
 *  implemented using {@see PageStorage}.<p>
 *
 *  This is a B-Plus-Tree; values are stored only in leaf nodes.<p>
 *
 *  <h3>Usage Notes</h3>
 *
 *  Each element in a BTree is conceptually a triple
 *  <ordinal,key,value> where "key" is a user-supplied key
 *  (belonging to a type that is {@see Comparable}), "value" is a
 *  user-supplied value (no restrictions) and "ordinal" is an integer
 *  indicating the number of keys in the tree less than this one.
 *  Note that the ordinal is not actually stored in the tree, and
 *  inserting a new value can potentially modify the ordinals of all
 *  preexisting elements!  Each of the getXXX() methods takes one of
 *  these three coordinates (<tt>Ord</tt>, <tt>Key</tt>, or
 *  <tt>Val</tt>) and returns one of the others, or else a count
 *  (<tt>Num</tt>).  Additionally, the getXXXFromKey() methods include
 *  floor/ceiling versions that take an upper/lower bound and search
 *  for the largest/smallest key which is less/greater than the one
 *  supplied.<p>
 *
 *  The BTree supports appending a new element (that is, inserting a value
 *  with a key greater than any key in the table) in near-constant time
 *  (Actually log<sup>*</sup>(n)) and all other queries in log(n) time.
 *  All operations are done in a <i>single pass</i> down the BTree from
 *  the root to the leaves; this brings two benefits: the data structure
 *  can be made concurrent with very little lock contention and it can
 *  support copy-on-write shadow versions.<p>
 *  
 *  You must distinguish between insert() and replace() ahead of time;
 *  you can't call insert() on a key that is already in the tree or
 *  replace() on one that isn't.  In order to replace() on a BTree
 *  with a summary the summary product operation must be commutative
 *  and invertible, and you must know the old value which you are
 *  replacing.  This lets us update the interior node invariants as we
 *  walk down the tree and avoid having to walk back up afterwards.
 *  If you're not sure if a key is in the tree, just do a get() -- the
 *  net result is two passes over the tree, which is what we'd have to
 *  do anyways if the user didn't distinguish insert() from replace().
 *  We're just offering the option to double the performance in the
 *  case where the user already knows if the key is in the tree or
 *  not.<p>
 *
 *  You can associate a <i>summary</i> with each leaf node of the BTree.
 *  In order to do this, you must provide an instance of {@see
 *  com.sun.electric.database.geometry.btree.unboxed.AssociativeOperation}
 *  to the BTree when you construct it.  The AssociativeOperation knows
 *  the key and value type of the BTree, and must know two things:
 *  
 *  <ul>
 *    <li> How to calculate the summary of a single (key,value) pair.
 *    <li> How to merge two summaries.
 *  </ul>
 *  
 *  The process of merging two summaries must be associative; if we want
 *  to merge three summaries (ABC) it must not matter if we merge them as
 *  ((AB)C) or (A(BC)).  Technically this makes the merge operation
 *  a <i><a href=http://en.wikipedia.org/wiki/Semigroup>semigroup</a></i>.
 *  In exchange for providing all of this information
 *  you can ask the BTree to calculate the summary of any contiguous
 *  region of the keyspace in log(n) time.  For example, this can be used
 *  to answer "min/max over this range" queries very efficiently.<p>
 *
 *  <h3>Implementation Notes</h3>
 *
 *  We proactively split nodes as soon as they become full rather than
 *  waiting for them to become overfull.  This has a space overhead of
 *  1/NUM_KEYS_PER_PAGE, but puts an O(1) bound on the number of pages
 *  written per operation (number of pages read is still O(log n)).
 *  It also makes the walk routine tail-recursive.<p>
 *
 *  Each node of the BTree uses one page of the PageStorage; we don't
 *  yet support situations where a single page is too small for one
 *  key and one value.<p>
 *
 *  The coding style in this file is pretty unusual; it looks a lot
 *  like "Java as a better C".  This is mainly because Hotspot is
 *  remarkably good at inlining methods, but remarkably bad (still,
 *  even in Java 1.7) at figuring out when it's safe to unbox values.
 *  So I expend a lot of effort trying not to create boxed values, but
 *  don't worry at all about the overhead of method calls,
 *  particularly when made via "final" references (nearly always
 *  inlined).  I don't code this way very often -- I reserve this for
 *  the 2% of my code that bears 98% of the performance burden.  There
 *  is anecdotal evidence that using the server JVM rather than the
 *  client JVM yields a dramatic increase in performance; this coding
 *  style is especially friendly to the server JVM's optimization
 *  techniques.<p>
 *
 *  Keys, values, and summary elements are stored in <i>unboxed</i>
 *  form.  This means that each of these types must know how to
 *  serialize itself to and deserialize itself from a sequence of
 *  bytes, and <i>it must be possible to perform the important
 *  operations (comparison for keys, product for summary values)
 *  on these values in unboxed form.</i> The reasons for this are set
 *  out in the previous paragraph.  See the package btree.unboxed for
 *  further details.
 *
 *  @author Adam Megacz <adam.megacz@sun.com>
 */
public class BTree
    <K extends Serializable & Comparable,
     V extends Serializable,
     S extends Serializable> {

    final CachingPageStorage   ps;
    final UnboxedComparable<K> uk;
    final AssociativeOperation<S> ao;
    final UnboxedFunction<Pair<K,V>,S> summarize;
    final Unboxed<V>           uv;
    final UnboxedInt           ui = UnboxedInt.instance;

    private LeafNodeCursor<K,V,S>       leafNodeCursor;
    private InteriorNodeCursor<K,V,S>   interiorNodeCursor1;
    private InteriorNodeCursor<K,V,S>   interiorNodeCursor2;

    int                  rootpage;
    private final byte[] keybuf;
    private final byte[] keybuf2;
    private final byte[] sbuf;

    private final byte[] largestKey;
    private       int    largestKeyPage = -1;  // or -1 if unknown

    private       int    size = 0;

    /**
     *  Create a BTree.
     *  @param ps the PageStorage to hold the underlying bytes
     *  @param uk the unboxed type for keys (must be comparable)
     *  @param uv the unboxed type for values
     *  @param summarize the function which summarizes a single (key,value) pair
     *  @param combine the function which associatively combines two summaries
     */
    public BTree(CachingPageStorage ps,
                 UnboxedComparable<K> uk,
                 Unboxed<V> uv,
                 UnboxedFunction<Pair<K,V>,S> summarize,
                 AssociativeOperation<S> combine) {
        AssociativeOperation<S> ao = combine;
        this.summarize = summarize;
        if (ao!=null) {
            if (!(ao instanceof AssociativeCommutativeOperation))
                throw new RuntimeException("Only commutative summary operations are supported (allows one-pass insertion)");
            // FIXME: if the summary is not invertible (ie a group) and commutative, we cannot do DELETE in a single pass
            // I don't think we can ever do REPLACE in one pass unless we knew the previous value
        }
        this.ps = ps;
        this.uk = uk;
        this.ao = ao;
        this.uv = uv;
        this.leafNodeCursor = new LeafNodeCursor<K,V,S>(this);
        this.interiorNodeCursor1 = new InteriorNodeCursor<K,V,S>(this);
        this.interiorNodeCursor2 = new InteriorNodeCursor<K,V,S>(this);
        this.rootpage = ps.createPage();
        this.keybuf = new byte[uk.getSize()];
        this.keybuf2 = new byte[uk.getSize()];
        this.sbuf = ao==null ? null : new byte[ao.getSize()];
        this.largestKey = new byte[uk.getSize()];
        leafNodeCursor.initBuf(ps.getPage(rootpage, false), true);
        leafNodeCursor.writeBack();
    }

    /**
     *  Returns the number of entries in the tree with a key between
     *  min and max inclusive; if either min or max is null it is treated
     *  as negative or positive infinity (respectively)
     */
    public int getNumFromKeys(K min, K max) {
        if (min==null && max==null) return size;
        throw new RuntimeException("not implemented");
    }

    /** same as getNumFromKeys(null,null) */
    public int  size() { return getNumFromKeys(null,null); }

    /** returns the value in the tree, or null if not found */
    public V getValFromKey(K key) {
        uk.serialize(key, keybuf, 0);
        return (V)walk(keybuf, 0, null, Op.GET_VAL_FROM_KEY, 0);
    }

    /** returns the value of the largest key less than or equal to the one supplied */
    public V getValFromKeyFloor(K key) {
        uk.serialize(key, keybuf, 0);
        return (V)walk(keybuf, 0, null, Op.GET_VAL_FROM_KEY_FLOOR, 0);
    }

    /** returns the value of the smallest key greater than or equal to the one supplied */
    public V getValFromKeyCeiling(K key) {
        uk.serialize(key, keybuf, 0);
        return (V)walk(keybuf, 0, null, Op.GET_VAL_FROM_KEY_CEIL, 0);
    }

    /** returns the ordinal of the given key, or -1 if not found */
    public int getOrdFromKey(K key) {
        uk.serialize(key, keybuf, 0);
        return ((Integer)walk(keybuf, 0, null, Op.GET_ORD_FROM_KEY, 0)).intValue();
    }

    /** returns the ordinal of the largest key less than or equal to the one supplied */
    public int getOrdFromKeyFloor(K key) {
        uk.serialize(key, keybuf, 0);
        return ((Integer)walk(keybuf, 0, null, Op.GET_ORD_FROM_KEY_FLOOR, 0)).intValue();
    }

    /** returns the ordinal of the smallest key greater than or equal to the one supplied */
    public int getOrdFromKeyCeiling(K key) {
        uk.serialize(key, keybuf, 0);
        return ((Integer)walk(keybuf, 0, null, Op.GET_ORD_FROM_KEY_CEIL, 0)).intValue();
    }

    /** returns the least key <i>strictly</i> greater than the argument */
    public V getKeyFromKeyNext(K key) {
        throw new RuntimeException("not implemented");
    }

    /** returns the greatest key <i>strictly</i> less than the argument */
    public V getKeyFromKeyPrev(K key) {
        throw new RuntimeException("not implemented");
    }

    /** returns the i^th value in the tree */
    public V getValFromOrd(int ord) {
        return (V)walk(null, 0, null, Op.GET_VAL_FROM_ORD, ord);
    }

    /** returns the i^th key in the tree */
    public K getKeyFromOrd(int ord) {
        return (K)walk(null, 0, null, Op.GET_KEY_FROM_ORD, ord);
    }

    /** will throw an exception if the key is already in the tree */
    public void insert(K key, V val) {
        uk.serialize(key, keybuf, 0);
        walk(keybuf, 0, val, Op.INSERT, 0);
        size++;
    }

    /** returns value previously in the tree; will throw an exception if the key is not already in the tree */
    public V replace(K key, V val) {
        uk.serialize(key, keybuf, 0);
        return (V)walk(keybuf, 0, val, Op.REPLACE, 0);
    }

    /** returns value previously in the tree; will throw an exception if the key is not already in the tree */
    public V remove(K key) {
        throw new RuntimeException("not implemented");
        // size--;
    }

    /** remove all entries */
    public void clear() {
        throw new RuntimeException("not implemented");
    }

    /** compute the summary of all (key,value) pairs between min and max, inclusive */
    public S getSummaryFromKeys(K min, K max) {
        uk.serialize(min, keybuf, 0);
        uk.serialize(max, keybuf2, 0);
        walk(keybuf, 0, null, Op.SUMMARIZE_LEFT,  0, keybuf2, 0, sbuf, 0);
        walk(keybuf, 0, null, Op.SUMMARIZE_MID,   0, keybuf2, 0, sbuf, 0);
        walk(keybuf, 0, null, Op.SUMMARIZE_RIGHT, 0, keybuf2, 0, sbuf, 0);
        return (S)ao.deserialize(sbuf, 0);
    }
    
    private static enum Op {
        GET_VAL_FROM_KEY,
        GET_VAL_FROM_KEY_FLOOR,
        GET_VAL_FROM_KEY_CEIL,
        GET_ORD_FROM_KEY,
        GET_ORD_FROM_KEY_FLOOR,
        GET_ORD_FROM_KEY_CEIL,
        GET_VAL_FROM_ORD,
        GET_KEY_FROM_ORD,
        GET_NEXT,
        GET_PREV,
        REMOVE,
        INSERT,
        REPLACE,
        SUMMARIZE_LEFT,
        SUMMARIZE_MID,
        SUMMARIZE_RIGHT,
            ;
        public boolean isGetFromOrd() {
            switch(this) {
                case GET_VAL_FROM_ORD:
                case GET_KEY_FROM_ORD:
                    return true;
                default:
                    return false;
            }
        }
        public boolean isGetOrd() {
            switch(this) {
                case GET_ORD_FROM_KEY:
                case GET_ORD_FROM_KEY_FLOOR:
                case GET_ORD_FROM_KEY_CEIL:
                    return true;
                default:
                    return false;
            }
        }
        public boolean isGetFromKey() {
            switch(this) {
                case GET_VAL_FROM_KEY:
                case GET_VAL_FROM_KEY_FLOOR:
                case GET_VAL_FROM_KEY_CEIL:
                case GET_ORD_FROM_KEY:
                case GET_ORD_FROM_KEY_FLOOR:
                case GET_ORD_FROM_KEY_CEIL:
                    return true;
                default:
                    return false;
            }
        }
        public boolean isGetFromKeyFloor() {
            switch(this) {
                case GET_VAL_FROM_KEY_FLOOR:
                case GET_ORD_FROM_KEY_FLOOR:
                    return true;
                default:
                    return false;
            }
        }
        public boolean isGetFromKeyCeil() {
            switch(this) {
                case GET_VAL_FROM_KEY_CEIL:
                case GET_ORD_FROM_KEY_CEIL:
                    return true;
                default:
                    return false;
            }
        }
    }
    

    private Object walk(byte[] key, int key_ofs, V val, Op op, int ord) {
        return walk(key, key_ofs, val, op, ord, null, 0, null, 0);
    }

    /**
     *  B+Tree walking routine.
     *
     *  This is the hairiest part, so I arranged things to share a single
     *  codepath across all four operations (insert/replace/delete/find).
     *
     *  The routine is implemented using a loop rather than recursive
     *  calls because the JVM does not support tail recursion (and
     *  probably never will, because its lame security model is based
     *  on stack inspection).
     *
     *  On writes/deletes, this returns the previous value.
     *
     */
    private Object walk(byte[] key, int key_ofs, V val, Op op, int ord, byte[] key2, int key2_ofs, byte[] ret, int ret_ofs) {
        int pageid = rootpage;
        int idx = -1;
        int global_ord = 0;

        LeafNodeCursor<K,V,S>       leafNodeCursor = this.leafNodeCursor;
        InteriorNodeCursor<K,V,S>   interiorNodeCursor = this.interiorNodeCursor1;
        InteriorNodeCursor<K,V,S>   parentNodeCursor = this.interiorNodeCursor2;
        NodeCursor cur = null;

        boolean rightEdge = true;
        boolean cheat = false;
        int comp = 0;

        if (largestKeyPage != -1 && op==Op.INSERT) {
            leafNodeCursor.setBuf(ps.getPage(largestKeyPage, true));
            comp = uk.compare(key, key_ofs, largestKey, 0);
            if (comp >= 0 && !leafNodeCursor.isFull()) {
                pageid = largestKeyPage;
                parentNodeCursor.forgetCachedPage();
                cheat = true;
                cur = leafNodeCursor;
            }
        }

        while(true) {
            if (cur==null || cur.getCachedPage()==null || cur.getPageId() != pageid) {
                CachedPage cp = ps.getPage(pageid, true);
                cur = LeafNodeCursor.isLeafNode(cp) ? leafNodeCursor : interiorNodeCursor;
                cur.setBuf(cp);
            }

            if ((op==Op.INSERT || op==Op.REPLACE) && cur.isFull()) {
                assert cur!=parentNodeCursor;
                int old;

                // is the node we're splitting the last child of its parent or the root node?
                boolean splitting_last_or_root = false;
                if (pageid == rootpage) {
                    parentNodeCursor.initRoot();
                    parentNodeCursor.setBucketPageId(0, pageid);
                    idx = 0;
                    old = size;
                    splitting_last_or_root = true;
                } else {
                    assert !parentNodeCursor.isFull();
                    splitting_last_or_root = idx>=parentNodeCursor.getNumBuckets()-1;
                    old = splitting_last_or_root ? -1 : parentNodeCursor.getNumValsBelowBucket(idx);
                }
                if (op==Op.INSERT && old!=-1) old -= 1;
                int ofs = parentNodeCursor.insertNewBucketAt(idx+1);
                int oldpage = cur.getPageId();

                // optimization: if we're splitting a node on the
                // "right edge" of the tree, make the split uneven --
                // put everything on the left side.

                int splitPoint = rightEdge ? cur.getNumBuckets()-1 : cur.getMaxBuckets()/2;
                if (rightEdge) splitUnEven++; else splitEven++;

                if (ao!=null) {
                    byte[] monbuf = new byte[ao.getSize()];
                    cur.getSummary(0, monbuf, 0);
                    for(int i=1; i<splitPoint; i++) {
                        cur.getSummary(i, monbuf, 0);
                        parentNodeCursor.multiplySummaryCommutative(idx, monbuf, 0);
                    }
                }
                int num = cur.split(parentNodeCursor.getBuf(), ofs, splitPoint);
                parentNodeCursor.setNumValsBelowBucket(idx, num);
                int newpage = cur.getPageId();
                if (largestKeyPage==oldpage) largestKeyPage = newpage;
                parentNodeCursor.setBucketPageId(idx+1, newpage);
                if (!splitting_last_or_root)
                    parentNodeCursor.setNumValsBelowBucket(idx+1, old-num);
                if (ao!=null && (!parentNodeCursor.isRightMost() || idx+1<parentNodeCursor.getNumBuckets()-1)) {
                    byte[] monbuf = new byte[ao.getSize()];
                    cur.getSummary(0, monbuf, 0);
                    for(int i=1; i<cur.getNumBuckets() - (cur.isRightMost() ? 1 : 0); i++) {
                        cur.getSummary(i, monbuf, 0);
                        parentNodeCursor.multiplySummaryCommutative(idx+1, monbuf, 0);
                    }
                }

                cur.writeBack();
                parentNodeCursor.writeBack();
                pageid = rootpage;
                cheat = false;
                continue;
            }


            if (cheat) {
                idx = leafNodeCursor.getNumBuckets()-1;
                comp = 1;
            } else if (!op.isGetFromOrd()) {
                idx = cur.search(key, key_ofs);
                comp = cur.compare(key, key_ofs, idx);
            } else if (!cur.isLeafNode()) {
                // FIXME: linear scan => bad
                for(idx = 0; idx < interiorNodeCursor.getNumBuckets()-1; idx++) {
                    int k = interiorNodeCursor.getNumValsBelowBucket(idx);
                    if (ord < k) break;
                    ord -= k;
                }
            }
            if (cur.isLeafNode()) {
                switch(op) {
                    case GET_VAL_FROM_ORD:       return ord >= leafNodeCursor.getNumBuckets() ? null : leafNodeCursor.getVal(ord);
                    case GET_KEY_FROM_ORD:       return ord >= leafNodeCursor.getNumBuckets() ? null : leafNodeCursor.getKey(ord);
                    case GET_VAL_FROM_KEY:       return comp==0 ? leafNodeCursor.getVal(idx) : null;
                    case GET_VAL_FROM_KEY_FLOOR: return leafNodeCursor.getVal(idx);
                    case GET_VAL_FROM_KEY_CEIL:  /* FIXME: might need to backtrack one step */ throw new RuntimeException("not implemented");
                    case GET_ORD_FROM_KEY:       return comp==0 ? new Integer(idx+global_ord) : new Integer(-1);
                    case GET_ORD_FROM_KEY_FLOOR: return new Integer(idx+global_ord /*FIXME: off the end?*/);
                    case GET_ORD_FROM_KEY_CEIL:  return comp==0 ? new Integer(idx+global_ord) : new Integer(idx+global_ord+1 /*FIXME: off the end?*/);
                    default: /* INSERT or REPLACE; fall through */
                }
                if (op==Op.INSERT && comp==0) throw new RuntimeException("attempt to re-insert a value at key " + leafNodeCursor.getKey(idx));
                if (op==Op.REPLACE && comp!=0) throw new RuntimeException("attempt to replace a value that did not exist");
                if (op==Op.INSERT) { if (cheat) insertionFastPath++; else insertionSlowPath++; }
                if (largestKeyPage==-1 || cheat)
                    System.arraycopy(key, key_ofs, largestKey, 0, largestKey.length);
                if (largestKeyPage==-1) largestKeyPage = pageid;
                if (comp==0) {
                    if (val==null) throw new RuntimeException("deletion is not yet implemented");
                    return leafNodeCursor.setVal(idx, val);
                }
                leafNodeCursor.insertVal(idx+1, key, key_ofs, val);
                return null;
            } else {
                if (op==Op.REMOVE)
                    throw new RuntimeException("need to adjust 'least value under X' on the way down for deletions");
                if (op==Op.INSERT) {
                    boolean wb = false;
                    if (idx < interiorNodeCursor.getNumBuckets()-1) {
                        interiorNodeCursor.setNumValsBelowBucket(idx, interiorNodeCursor.getNumValsBelowBucket(idx)+1);
                        wb = true;
                    }
                    if (ao != null && (idx < interiorNodeCursor.getNumBuckets()-1 || !interiorNodeCursor.isRightMost())) {
                        throw new RuntimeException("not implemented");
                        /*
                          // FIXME
                        byte[] monbuf = new byte[ao.getSize()];
                        byte[] vbuf = new byte[uv.getSize()];
                        uv.serialize(val, vbuf, 0);
                        summarize.call(key, key_ofs, vbuf, 0, monbuf, 0);
                        interiorNodeCursor.multiplySummaryCommutative(idx, monbuf, 0);
                        */
                    }
                    if (wb) interiorNodeCursor.writeBack();
                }
                if (op.isGetOrd())
                    for(int i = 0; i < idx; i++)
                        global_ord += interiorNodeCursor.getNumValsBelowBucket(i);
                rightEdge &= idx==interiorNodeCursor.getNumBuckets()-1;
                pageid = interiorNodeCursor.getBucketPageId(idx);
                InteriorNodeCursor<K,V,S> ic = interiorNodeCursor; interiorNodeCursor = parentNodeCursor; parentNodeCursor = ic;
                assert interiorNodeCursor!=parentNodeCursor;
                continue;
            }
        }
    }

    static long insertionFastPath = 0;
    static long insertionSlowPath = 0;
    static long splitEven = 0;
    static long splitUnEven = 0;

    /** debugging method; may go away in future releases */
    public static void clearStats() {
        BTree.splitUnEven = 0;
        BTree.splitEven = 0;
        BTree.insertionFastPath = 0;
        BTree.insertionSlowPath = 0;
    }

    /** debugging method; may go away in future releases */
    public static void dumpStats(PrintStream pw) {
        pw.println("BTree stats: insertion fastpath = " +
                   BTree.insertionFastPath + "/" + (BTree.insertionFastPath+BTree.insertionSlowPath) + " = " +
                   (int)(( BTree.insertionFastPath * 100 )/(float)(BTree.insertionFastPath+BTree.insertionSlowPath)) + "%");
        pw.println("             intelligent splits = " +
                    BTree.splitUnEven + "/" + (BTree.splitUnEven+BTree.splitEven) + " = " +
                    (int)(( BTree.splitUnEven * 100 )/(float)(BTree.splitUnEven+BTree.splitEven)) + "%");
    }

}